diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2016-09-23 06:39:56 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2016-09-23 06:39:56 -0700 |
commit | 83f7efdc4127c9807bdc46708ef5036d5fdafc51 (patch) | |
tree | d39d6b99503a98909ecb27db2fe5b6c703bcc24d | |
parent | 6cb0284cc3fe66c4c20a09a651ba897ed6e2f71e (diff) | |
download | txr-83f7efdc4127c9807bdc46708ef5036d5fdafc51.tar.gz txr-83f7efdc4127c9807bdc46708ef5036d5fdafc51.tar.bz2 txr-83f7efdc4127c9807bdc46708ef5036d5fdafc51.zip |
New regex functions: m^$, m^, m$, and others.
* regex.c (do_match_full, do_match_full_offs, do_match_left,
do_match_left_offs, do_match_right, do_match_right_offs):
New static functions.
(regex_match_full_fun, regex_match_right_fun,
regex_match_full, regex_match_left, regex_match_right,
regex_range_full, regex_range_left, regex_range_right):
New functions.
(regex_init): Register f^$, f^, f$, m^$, m^, m$, r^$,
r^ and r$ intrinsics.
* regex.h (regex_match_full_fun, regex_match_right_fun,
regex_match_full, regex_match_left, regex_match_right,
regex_range_full, regex_range_left, regex_range_right):
Declared.
* txr.1: Documented new functions.
-rw-r--r-- | regex.c | 130 | ||||
-rw-r--r-- | regex.h | 9 | ||||
-rw-r--r-- | txr.1 | 233 |
3 files changed, 372 insertions, 0 deletions
@@ -2655,6 +2655,127 @@ val match_regst_right(val str, val regex, val end) sub_str(str, neg(len), t), sub_str(str, minus(end, len), end))); } +static val do_match_full(val regex, val str) +{ + return if2(eql(match_regex(str, regex, zero), length_str(str)), str); +} + +static val do_match_full_offs(val env, val str) +{ + cons_bind (regex, pos_in, env); + val len = length_str(str); + val pos = if3(minusp(pos_in), plus(pos_in, len), pos_in); + return if2(eql(match_regex(str, regex, pos), len), + sub_str(str, pos, t)); +} + +val regex_match_full_fun(val regex, val pos) +{ + if (null_or_missing_p(pos)) + return func_f1(regex, do_match_full); + return func_f1(cons(regex, pos), do_match_full_offs); +} + +static val do_match_left(val regex, val str) +{ + return match_regst(str, regex, zero); +} + +static val do_match_left_offs(val env, val str) +{ + cons_bind (regex, pos, env); + return match_regst(str, regex, pos); +} + +val regex_match_left_fun(val regex, val pos) +{ + if (null_or_missing_p(pos)) + return func_f1(regex, do_match_left); + return func_f1(cons(regex, pos), do_match_left_offs); +} + +static val do_match_right(val regex, val str) +{ + return match_regst_right(str, regex, nil); +} + +static val do_match_right_offs(val env, val str) +{ + cons_bind (regex, end, env); + return match_regst_right(str, regex, end); +} + +val regex_match_right_fun(val regex, val end) +{ + if (null_or_missing_p(end)) + return func_f1(regex, do_match_right); + return func_f1(cons(regex, end), do_match_right_offs); +} + +val regex_match_full(val regex, val arg1, val arg2) +{ + if (null_or_missing_p(arg2)) { + val str = arg1; + return if2(eql(match_regex(arg1, regex, arg2), length_str(str)), str); + } else { + val str = arg2; + val len = length_str(str); + val pos = if3(minusp(arg1), plus(len, arg1), arg1); + return if2(eql(match_regex(str, regex, pos), len), sub_str(str, pos, t)); + } +} + +val regex_match_left(val regex, val arg1, val arg2) +{ + if (null_or_missing_p(arg2)) + return match_regst(arg1, regex, arg2); + return match_regst(arg2, regex, arg1); +} + +val regex_match_right(val regex, val arg1, val arg2) +{ + if (null_or_missing_p(arg2)) + return match_regst_right(arg1, regex, arg2); + return match_regst_right(arg2, regex, arg1); +} + +val regex_range_full(val regex, val arg1, val arg2) +{ + if (null_or_missing_p(arg2)) { + val str = arg1; + val len = length_str(str); + return if2(eql(match_regex(str, regex, zero), len), rcons(zero, len)); + } else { + val str = arg2; + val len = length_str(str); + val pos = if3(minusp(arg1), plus(len, arg1), arg1); + return if2(eql(match_regex(str, regex, pos), len), rcons(pos, len)); + } +} + +val regex_range_left(val regex, val arg1, val arg2) +{ + if (null_or_missing_p(arg2)) { + val len = match_regex(arg1, regex, arg2); + return if2(len, rcons(zero, len)); + } else { + val pos = if3(lt(arg1, zero), plus(arg1, length_str(arg2)), arg1); + val new_pos = match_regex(arg2, regex, pos); + return if2(new_pos, rcons(pos, new_pos)); + } +} + +val regex_range_right(val regex, val arg1, val arg2) +{ + if (null_or_missing_p(arg2)) { + val len = match_regex_right(arg1, regex, arg2); + return if2(len, rcons(zero, len)); + } else { + val end = if3(lt(arg1, zero), plus(arg1, length_str(arg2)), arg1); + val len = match_regex_right(arg2, regex, end); + return if2(len, rcons(minus(end, len), end)); + } +} val read_until_match(val regex, val stream_in, val include_match_in) { @@ -2832,6 +2953,15 @@ void regex_init(void) func_n1(reg_expand_nongreedy)); reg_fun(intern(lit("reg-optimize"), system_package), func_n1(reg_optimize)); reg_fun(intern(lit("read-until-match"), user_package), func_n3o(read_until_match, 1)); + reg_fun(intern(lit("f^$"), user_package), func_n2o(regex_match_full_fun, 1)); + reg_fun(intern(lit("f^"), user_package), func_n2o(regex_match_left_fun, 1)); + reg_fun(intern(lit("f$"), user_package), func_n2o(regex_match_right_fun, 1)); + reg_fun(intern(lit("m^$"), user_package), func_n3o(regex_match_full, 2)); + reg_fun(intern(lit("m^"), user_package), func_n3o(regex_match_left, 2)); + reg_fun(intern(lit("m$"), user_package), func_n3o(regex_match_right, 2)); + reg_fun(intern(lit("r^$"), user_package), func_n3o(regex_range_full, 2)); + reg_fun(intern(lit("r^"), user_package), func_n3o(regex_range_left, 2)); + reg_fun(intern(lit("r$"), user_package), func_n3o(regex_range_right, 2)); init_special_char_sets(); } @@ -41,6 +41,15 @@ val match_regst(val str, val regex, val pos); val match_regst_right(val str, val regex, val end); val regsub(val regex, val repl, val str); val read_until_match(val regex, val stream, val keep_match); +val regex_match_full(val regex, val arg1, val arg2); +val regex_match_full_fun(val regex, val pos); +val regex_match_left_fun(val regex, val pos); +val regex_match_right_fun(val regex, val end); +val regex_match_left(val regex, val arg1, val arg2); +val regex_match_right(val regex, val arg1, val arg2); +val regex_range_full(val regex, val arg1, val arg2); +val regex_range_left(val regex, val arg1, val arg2); +val regex_range_right(val regex, val arg1, val arg2); int wide_display_char_p(wchar_t ch); void regex_init(void); void regex_free_all(void); @@ -32216,6 +32216,239 @@ removed from the stream. If is true, that matching text is included in the returned string. Otherwise, it is discarded. +.coNP Functions @, m^$ @ m^ and @ m$ +.synb +.mets (m^$ < regex <> [ position ] << string ) +.mets (m^ < regex <> [ position ] << string ) +.mets (m$ < regex <> [ end-position ] << string ) +.syne +.desc +These functions provide functionality similar to the +.meta match-regst +and +.meta match-regst-right +functions, but under alternative interfaces which are more +convenient. + +The +.code ^ +and +.code $ +notation used in their names are an allusion to the +regular expression search anchoring operators found in +familiar POSIX utilities such as +.codn grep . + +The +.meta position +argument, if omitted, +defaults to zero, so that the +entire +.meta string +is operated upon. + +The +.meta end-position +argument defaults to the length of +.metn string , +so that the end position coincides with the end of the +string. + +If the +.meta position +or +.meta end-position +arguments are negative, they index backwards +from the length of +.meta string +so that -1 denotes the last character. + +A value in either parameter which is excessively +negative or positive, such that it indexes before +the start of the string or exceeds its length +results in a failed match and consequently +.code nil +being returned. + + +The +.code m^$ +function tests whether the entire portion of +.meta string +starting at +.meta position +through to the end of the string is in the set of strings +matched by +.metn regex . +If this is true, then that portion of the string is +returned. Otherwise +.code nil +is returned. + +The +.code m^ +function tests whether the portion of the +.meta string +starting at +.meta position +has a prefix which matches +.metn regex . +If so, then this matching prefix is returned. +Otherwise +.code nil +is returned. + +The +.code m$ +function tests whether the portion of +.meta string +ending just before +.meta end-position +has a suffix which matches +.metn regex . +If so, then this matching suffix is returned. +Otherwise +.code nil +is returned. + +.coNP Functions @, r^$ @ r^ and @ r$ +.synb +.mets (r^$ < regex <> [ position ] << string ) +.mets (r^ < regex <> [ position ] << string ) +.mets (r$ < regex <> [ end-position ] << string ) +.syne +.desc +These functions perform the same operations as, +respectively, +.codn m^$ , +.code m^ +and +.codn m$ , +with the same argument conventions. They differ +in return value. When a match is found, they +return a range value indicating the extent of +the matching substring within +.meta string +rather than the matching substring itself. + +The +.meta position +argument, if omitted, +defaults to zero, so that the +entire +.meta string +is operated upon. + +The +.meta end-position +argument defaults to the length of +.metn string , +so that the end position coincides with the end of the +string. + +A value in either parameter which is excessively +negative or positive, such that it indexes before +the start of the string or exceeds its length +results in a failed match and consequently +.code nil + +being returned. +The +.code r^$ +function tests whether the entire portion of +.meta string +starting at +.meta position +through to the end of the string is in the set of strings +matched by +.metn regex . +If this is true, then the matching range is returned, +as a range object. + +The +.code r^ +function tests whether the portion of the +.meta string +starting at +.meta position +has a prefix which matches +.metn regex . +If so, then the matching range is returned, as a range object. +Otherwise +.code nil +is returned. + +The +.code m$ +function tests whether the portion of +.meta string +ending just before +.meta end-position +has a suffix which matches +.metn regex . +If so, then the matching range is returned. +Otherwise +.code nil +is returned. + +.coNP Functions @, f^$ @ f^ and @ f$ +.synb +.mets (f^$ < regex <> [ position ]) +.mets (f^ < regex <> [ position ]) +.mets (f$ < regex <> [ end-position ]) +.syne +.desc +These regular expression functions do not directly +perform regex operations. Rather, they each return +a function of one argument which performs a regex +operation. + +The returned functions perform the same operations as, +respectively, +.codn m^$ , +.code m^ +and +.codn m$ . + +The following equivalences nearly hold, except that the functions +on the right side produced by +.code op +can accept two arguments when only +.code r +is curried, whereas the functions on the left take only +one argument: + +.cblk + [f^$ r] <--> (op m^$ r) + [f^$ r p] <--> (op m^$ r p) + [f^ r] <--> (op m^ r) + [f^ r p] <--> (op m^ r p) + [f$ r] <--> (op m$ r) + [f$ r p] <--> (op m$ r p) +.cble + +That is to say, +.code f^$ +returns a function which binds +.meta regex +and possibly the optional +.metn position . +When this function is invoked, it must be given an argument +which is a string. It performs the same operation as +.code m^$ +being called on +.meta regex +and possibly +.metn position . +The same holds between +.code f^ +and +.codn m^ , +and between +.code f$ +and +.codn m$ . + .SS* Hashing Library .coNP Functions @ make-hash and @ hash .synb |