summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2017-09-11 19:55:32 -0700
committerKaz Kylheku <kaz@kylheku.com>2017-09-11 19:55:32 -0700
commitfc5935a9fe816d64291e66b1ca5133a27f5f0230 (patch)
treeffd7371d6ee3817aedf45d63e6606f072a16ac83
parent6417918274d7d7d8d5b7bb675906f8e38d25c073 (diff)
downloadtxr-fc5935a9fe816d64291e66b1ca5133a27f5f0230.tar.gz
txr-fc5935a9fe816d64291e66b1ca5133a27f5f0230.tar.bz2
txr-fc5935a9fe816d64291e66b1ca5133a27f5f0230.zip
regex: new function, regex-prefix-match.
This new function allows a program to determine whether a given string is the prefix of any of the strings denoted by a regular expression; or, in alternative words, whether a given string is the prefix of a possibly longer string which matches a regular expression. * regex.c (regex_machine_infer_init_state): New static function. (regex_prefix_match): New function. (regex_init): regex-prefix-match intrinsic registered. regex.h (regex_prefix_match): Declared. * txr.1: Documented.
-rw-r--r--regex.c48
-rw-r--r--regex.h1
-rw-r--r--txr.169
3 files changed, 118 insertions, 0 deletions
diff --git a/regex.c b/regex.c
index 069066c8..ebfd9570 100644
--- a/regex.c
+++ b/regex.c
@@ -2398,6 +2398,14 @@ static void regex_machine_cleanup(regex_machine_t *regm)
}
}
+static regm_result_t regex_machine_infer_init_state(regex_machine_t *regm)
+{
+ if (regm->n.is_nfa)
+ return (regm->n.nclos != 0) ? REGM_INCOMPLETE : REGM_FAIL;
+ else
+ return (regm->d.deriv != t) ? REGM_INCOMPLETE : REGM_FAIL;
+}
+
static regm_result_t regex_machine_feed(regex_machine_t *regm, wchar_t ch)
{
int accept = 0;
@@ -2694,6 +2702,44 @@ val match_regex_right(val str, val regex, val end)
return nil;
}
+val regex_prefix_match(val reg, val str, val pos)
+{
+ regex_machine_t regm;
+ val i;
+ regm_result_t last_res;
+
+ if (null_or_missing_p(pos)) {
+ pos = zero;
+ } else if (lt(pos, zero)) {
+ pos = plus(pos, length_str(str));
+ if (lt(pos, zero))
+ return nil;
+ } else if (length_str_lt(str, pos)) {
+ return nil;
+ }
+
+ regex_machine_init(&regm, reg);
+
+ last_res = regex_machine_infer_init_state(&regm);
+
+ for (i = pos; length_str_gt(str, i); i = plus(i, one)) {
+ last_res = regex_machine_feed(&regm, c_chr(chr_str(str, i)));
+ if (last_res == REGM_FAIL)
+ break;
+ }
+
+ regex_machine_cleanup(&regm);
+
+ switch (last_res) {
+ case REGM_INCOMPLETE:
+ case REGM_MATCH:
+ return t;
+ default:
+ case REGM_FAIL:
+ return nil;
+ }
+}
+
val regsub(val regex, val repl, val str)
{
val isfunc = functionp(repl);
@@ -3124,6 +3170,8 @@ void regex_init(void)
reg_fun(intern(lit("match-regst-right"), user_package),
func_n3o((opt_compat && opt_compat <= 150) ?
match_regst_right_old : match_regst_right, 2));
+ reg_fun(intern(lit("regex-prefix-match"), user_package),
+ func_n3o(regex_prefix_match, 2));
reg_fun(intern(lit("regsub"), user_package), func_n3(regsub));
reg_fun(intern(lit("regex-parse"), user_package), func_n2o(regex_parse, 1));
diff --git a/regex.h b/regex.h
index 77375454..3ad158ba 100644
--- a/regex.h
+++ b/regex.h
@@ -42,6 +42,7 @@ val match_regex_right(val str, val regex, val end);
val search_regst(val haystack, val needle_regex, val start_num, val from_end);
val match_regst(val str, val regex, val pos);
val match_regst_right(val str, val regex, val end);
+val regex_prefix_match(val reg, val str, val pos);
val regsub(val regex, val repl, val str);
val read_until_match(val regex, val stream, val keep_match);
val regex_match_full(val regex, val arg1, val arg2);
diff --git a/txr.1 b/txr.1
index ec7ca80f..4cf4b2fe 100644
--- a/txr.1
+++ b/txr.1
@@ -37915,6 +37915,75 @@ the matching substring of
(match-regex-right-substring "ac" #/c*/) -> "c"
.cble
+.coNP Function @ regex-prefix-match
+.synb
+.mets (regex-prefix-match < regex < string <> [ position ])
+.syne
+.desc
+The
+.code regex-prefix-match
+determines whether the input string might
+might be the prefix of a string which matches regular expression
+.metn regex .
+
+The result is true if the input string matches
+.meta regex
+exactly. However, it is also true in situations in which
+the input string doesn't match
+.metn regex ,
+yet can be extended with one or more additional characters beyond the end such
+that the extended string
+.B does
+match.
+
+The
+.meta string
+argument must be a character string. The function takes the input string to be
+the suffix of
+.meta string
+which starts at the character position indicated by the
+.meta position
+argument. If that argument is omitted, then
+.meta string
+is taken as the input in its entirety. Negative values index backwards from
+the end of
+.meta string
+according to the usual conventions elsewhere in the library.
+
+Note: this function is not to be confused for the semantics
+of a regex matching a prefix of a string: that capability is
+provided by the functions
+.codn match-regex ,
+.codn m^ ,
+.codn r^ ,
+.code f^
+and
+.codn fr^ .
+
+.TP* Examples:
+
+.cblk
+ ;; The empty string is not a viable prefix match for
+ ;; a regex that matches no strings at all:
+ (regex-prefix-match #/~.*/ "") -> nil
+ (regex-prefix-match #/[]/ "") -> nil
+
+ ;; The empty string is a viable prefix of any regex
+ ;; which matches at least one string:
+ (regex-prefix-match #// "") -> t
+ (regex-prefix-match #/abc/ "") -> t
+
+ ;; This string doesn't match the regex because
+ ;; it doesn't end in b, but is a viable prefix:
+ (regex-prefix-match #/a*b/ "aa") -> t
+
+ (regex-prefix-match #/a*b/ "ab") -> t
+
+ (regex-prefix-match #/a*b/ "ac") -> nil
+
+ (regex-prefix-match #/a*b/ "abc") -> nil
+.cble
+
.coNP Function @ regsub
.synb
.mets (regsub >> { regex | << function } < replacement << string )