diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2017-09-11 19:55:32 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2017-09-11 19:55:32 -0700 |
commit | fc5935a9fe816d64291e66b1ca5133a27f5f0230 (patch) | |
tree | ffd7371d6ee3817aedf45d63e6606f072a16ac83 | |
parent | 6417918274d7d7d8d5b7bb675906f8e38d25c073 (diff) | |
download | txr-fc5935a9fe816d64291e66b1ca5133a27f5f0230.tar.gz txr-fc5935a9fe816d64291e66b1ca5133a27f5f0230.tar.bz2 txr-fc5935a9fe816d64291e66b1ca5133a27f5f0230.zip |
regex: new function, regex-prefix-match.
This new function allows a program to determine whether a
given string is the prefix of any of the strings denoted by a
regular expression; or, in alternative words, whether a given
string is the prefix of a possibly longer string which matches
a regular expression.
* regex.c (regex_machine_infer_init_state): New static
function.
(regex_prefix_match): New function.
(regex_init): regex-prefix-match intrinsic registered.
regex.h (regex_prefix_match): Declared.
* txr.1: Documented.
-rw-r--r-- | regex.c | 48 | ||||
-rw-r--r-- | regex.h | 1 | ||||
-rw-r--r-- | txr.1 | 69 |
3 files changed, 118 insertions, 0 deletions
@@ -2398,6 +2398,14 @@ static void regex_machine_cleanup(regex_machine_t *regm) } } +static regm_result_t regex_machine_infer_init_state(regex_machine_t *regm) +{ + if (regm->n.is_nfa) + return (regm->n.nclos != 0) ? REGM_INCOMPLETE : REGM_FAIL; + else + return (regm->d.deriv != t) ? REGM_INCOMPLETE : REGM_FAIL; +} + static regm_result_t regex_machine_feed(regex_machine_t *regm, wchar_t ch) { int accept = 0; @@ -2694,6 +2702,44 @@ val match_regex_right(val str, val regex, val end) return nil; } +val regex_prefix_match(val reg, val str, val pos) +{ + regex_machine_t regm; + val i; + regm_result_t last_res; + + if (null_or_missing_p(pos)) { + pos = zero; + } else if (lt(pos, zero)) { + pos = plus(pos, length_str(str)); + if (lt(pos, zero)) + return nil; + } else if (length_str_lt(str, pos)) { + return nil; + } + + regex_machine_init(®m, reg); + + last_res = regex_machine_infer_init_state(®m); + + for (i = pos; length_str_gt(str, i); i = plus(i, one)) { + last_res = regex_machine_feed(®m, c_chr(chr_str(str, i))); + if (last_res == REGM_FAIL) + break; + } + + regex_machine_cleanup(®m); + + switch (last_res) { + case REGM_INCOMPLETE: + case REGM_MATCH: + return t; + default: + case REGM_FAIL: + return nil; + } +} + val regsub(val regex, val repl, val str) { val isfunc = functionp(repl); @@ -3124,6 +3170,8 @@ void regex_init(void) reg_fun(intern(lit("match-regst-right"), user_package), func_n3o((opt_compat && opt_compat <= 150) ? match_regst_right_old : match_regst_right, 2)); + reg_fun(intern(lit("regex-prefix-match"), user_package), + func_n3o(regex_prefix_match, 2)); reg_fun(intern(lit("regsub"), user_package), func_n3(regsub)); reg_fun(intern(lit("regex-parse"), user_package), func_n2o(regex_parse, 1)); @@ -42,6 +42,7 @@ val match_regex_right(val str, val regex, val end); val search_regst(val haystack, val needle_regex, val start_num, val from_end); val match_regst(val str, val regex, val pos); val match_regst_right(val str, val regex, val end); +val regex_prefix_match(val reg, val str, val pos); val regsub(val regex, val repl, val str); val read_until_match(val regex, val stream, val keep_match); val regex_match_full(val regex, val arg1, val arg2); @@ -37915,6 +37915,75 @@ the matching substring of (match-regex-right-substring "ac" #/c*/) -> "c" .cble +.coNP Function @ regex-prefix-match +.synb +.mets (regex-prefix-match < regex < string <> [ position ]) +.syne +.desc +The +.code regex-prefix-match +determines whether the input string might +might be the prefix of a string which matches regular expression +.metn regex . + +The result is true if the input string matches +.meta regex +exactly. However, it is also true in situations in which +the input string doesn't match +.metn regex , +yet can be extended with one or more additional characters beyond the end such +that the extended string +.B does +match. + +The +.meta string +argument must be a character string. The function takes the input string to be +the suffix of +.meta string +which starts at the character position indicated by the +.meta position +argument. If that argument is omitted, then +.meta string +is taken as the input in its entirety. Negative values index backwards from +the end of +.meta string +according to the usual conventions elsewhere in the library. + +Note: this function is not to be confused for the semantics +of a regex matching a prefix of a string: that capability is +provided by the functions +.codn match-regex , +.codn m^ , +.codn r^ , +.code f^ +and +.codn fr^ . + +.TP* Examples: + +.cblk + ;; The empty string is not a viable prefix match for + ;; a regex that matches no strings at all: + (regex-prefix-match #/~.*/ "") -> nil + (regex-prefix-match #/[]/ "") -> nil + + ;; The empty string is a viable prefix of any regex + ;; which matches at least one string: + (regex-prefix-match #// "") -> t + (regex-prefix-match #/abc/ "") -> t + + ;; This string doesn't match the regex because + ;; it doesn't end in b, but is a viable prefix: + (regex-prefix-match #/a*b/ "aa") -> t + + (regex-prefix-match #/a*b/ "ab") -> t + + (regex-prefix-match #/a*b/ "ac") -> nil + + (regex-prefix-match #/a*b/ "abc") -> nil +.cble + .coNP Function @ regsub .synb .mets (regsub >> { regex | << function } < replacement << string ) |