diff options
-rw-r--r-- | regex.c | 42 | ||||
-rw-r--r-- | regex.h | 2 | ||||
-rw-r--r-- | txr.1 | 48 |
3 files changed, 83 insertions, 9 deletions
@@ -43,6 +43,7 @@ #include "gc.h" #include "eval.h" #include "cadr.h" +#include "itypes.h" #include "regex.h" #include "txr.h" @@ -3113,11 +3114,12 @@ val regex_range_search_fun(val regex, val start, val from_end) return curry_1234_1(func_n4(range_regex), regex, start, from_end); } -val read_until_match(val regex, val stream_in, val include_match_in) +static val scan_until_common(val self, val regex, val stream_in, + val include_match_in, val accum) { - val self = lit("read-until-match"); regex_machine_t regm; val out = nil; + u64_t count = 0; val stack = nil; val match = nil; val stream = default_arg(stream_in, std_input); @@ -3153,10 +3155,14 @@ val read_until_match(val regex, val stream_in, val include_match_in) ch = get_char(stream); - if (!out) - out = mkstring(one, ch); - else - string_extend(out, ch); + if (accum) { + if (!out) + out = mkstring(one, ch); + else + string_extend(out, ch); + } else { + count++; + } regex_machine_reset(®m); continue; @@ -3176,10 +3182,14 @@ val read_until_match(val regex, val stream_in, val include_match_in) out_match: while (stack && stack != match) unget_char(rcyc_pop(&stack), stream); - if (!out) + if (accum && !out) out = null_string; if (include_match) out = cat_str(cons(out, stack = nreverse(stack)), nil); + if (!accum && match) { + val c = unum_64(count); + out = if3(include_match, cons(c, out), c); + } } regex_machine_cleanup(®m); @@ -3190,6 +3200,22 @@ out_match: return out; } +val read_until_match(val regex, val stream_in, val include_match_in) +{ + return scan_until_common(lit("read-until-match"), regex, stream_in, + include_match_in, t); +} + +val scan_until_match(val regex, val stream_in) +{ + return scan_until_common(lit("scan-until-match"), regex, stream_in, t, nil); +} + +val count_until_match(val regex, val stream_in) +{ + return scan_until_common(lit("count-until-match"), regex, stream_in, nil, nil); +} + static char_set_t *create_wide_cs(void) { #ifdef FULL_UNICODE @@ -3293,6 +3319,8 @@ void regex_init(void) func_n1(reg_expand_nongreedy)); reg_fun(intern(lit("reg-optimize"), system_package), func_n1(reg_optimize)); reg_fun(intern(lit("read-until-match"), user_package), func_n3o(read_until_match, 1)); + reg_fun(intern(lit("scan-until-match"), user_package), func_n2(scan_until_match)); + reg_fun(intern(lit("count-until-match"), user_package), func_n2(count_until_match)); reg_fun(intern(lit("f^$"), user_package), func_n2o(regex_match_full_fun, 1)); reg_fun(intern(lit("f^"), user_package), func_n2o(regex_match_left_fun, 1)); reg_fun(intern(lit("f$"), user_package), func_n2o(regex_match_right_fun, 1)); @@ -45,6 +45,8 @@ val match_regst_right(val str, val regex, val end); val regex_prefix_match(val reg, val str, val pos); val regsub(val regex, val repl, val str); val read_until_match(val regex, val stream, val keep_match); +val scan_until_match(val regex, val stream_in); +val count_until_match(val regex, val stream_in); val regex_match_full(val regex, val arg1, val arg2); val regex_match_full_fun(val regex, val pos); val regex_match_left_fun(val regex, val pos); @@ -40197,8 +40197,8 @@ If matches the stream before any characters are accumulated, then an empty string is returned. -If the stream ends or an error occurs before any characters -are accumulated, the function returns +If the stream ends or an non-exception-throwing error occurs before any +characters are accumulated, the function returns .codn nil . When the accumulation of characters terminates by a match on @@ -40209,6 +40209,50 @@ removed from the stream. If is true, that matching text is included in the returned string. Otherwise, it is discarded. +.coNP Functions @ scan-until-match and @ count-until-match +.synb +.mets (scan-until-match < regex <> [ stream ]) +.mets (count-until-match < regex <> [ stream ]) +.syne +.desc +The functions +.code scan-until-match +and +.code count-until-match +read characters from +.meta stream +until a match occurs in the stream for regular expression +.metn regex , +the stream runs out of characters, or an error occurs. + +If the stream runs out of characters, or a non-exception-throwing error +occurs, before a match for +.meta regex +is identified, these functions return +.codn nil . + +If a match for +.meta regex +occurs in +.metn stream , +then +.code count-until-match +returns the number of characters that were read and discarded prior to +encountering the first matching character. +In the same situation, the +.code scan-until-match +function returns a +.code cons +cell whose +.code car +holds the count of discarded characters, that being the same value as what +would be returned by +.codn count-until-match , +and whose +.code cdr +holds a character string that comprises the text matched by +.metn regex . + .coNP Functions @, m^$ @ m^ and @ m$ .synb .mets (m^$ < regex <> [ position ] << string ) |