summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--regex.c42
-rw-r--r--regex.h2
-rw-r--r--txr.148
3 files changed, 83 insertions, 9 deletions
diff --git a/regex.c b/regex.c
index 99be57e1..47f1e0c3 100644
--- a/regex.c
+++ b/regex.c
@@ -43,6 +43,7 @@
#include "gc.h"
#include "eval.h"
#include "cadr.h"
+#include "itypes.h"
#include "regex.h"
#include "txr.h"
@@ -3113,11 +3114,12 @@ val regex_range_search_fun(val regex, val start, val from_end)
return curry_1234_1(func_n4(range_regex), regex, start, from_end);
}
-val read_until_match(val regex, val stream_in, val include_match_in)
+static val scan_until_common(val self, val regex, val stream_in,
+ val include_match_in, val accum)
{
- val self = lit("read-until-match");
regex_machine_t regm;
val out = nil;
+ u64_t count = 0;
val stack = nil;
val match = nil;
val stream = default_arg(stream_in, std_input);
@@ -3153,10 +3155,14 @@ val read_until_match(val regex, val stream_in, val include_match_in)
ch = get_char(stream);
- if (!out)
- out = mkstring(one, ch);
- else
- string_extend(out, ch);
+ if (accum) {
+ if (!out)
+ out = mkstring(one, ch);
+ else
+ string_extend(out, ch);
+ } else {
+ count++;
+ }
regex_machine_reset(&regm);
continue;
@@ -3176,10 +3182,14 @@ val read_until_match(val regex, val stream_in, val include_match_in)
out_match:
while (stack && stack != match)
unget_char(rcyc_pop(&stack), stream);
- if (!out)
+ if (accum && !out)
out = null_string;
if (include_match)
out = cat_str(cons(out, stack = nreverse(stack)), nil);
+ if (!accum && match) {
+ val c = unum_64(count);
+ out = if3(include_match, cons(c, out), c);
+ }
}
regex_machine_cleanup(&regm);
@@ -3190,6 +3200,22 @@ out_match:
return out;
}
+val read_until_match(val regex, val stream_in, val include_match_in)
+{
+ return scan_until_common(lit("read-until-match"), regex, stream_in,
+ include_match_in, t);
+}
+
+val scan_until_match(val regex, val stream_in)
+{
+ return scan_until_common(lit("scan-until-match"), regex, stream_in, t, nil);
+}
+
+val count_until_match(val regex, val stream_in)
+{
+ return scan_until_common(lit("count-until-match"), regex, stream_in, nil, nil);
+}
+
static char_set_t *create_wide_cs(void)
{
#ifdef FULL_UNICODE
@@ -3293,6 +3319,8 @@ void regex_init(void)
func_n1(reg_expand_nongreedy));
reg_fun(intern(lit("reg-optimize"), system_package), func_n1(reg_optimize));
reg_fun(intern(lit("read-until-match"), user_package), func_n3o(read_until_match, 1));
+ reg_fun(intern(lit("scan-until-match"), user_package), func_n2(scan_until_match));
+ reg_fun(intern(lit("count-until-match"), user_package), func_n2(count_until_match));
reg_fun(intern(lit("f^$"), user_package), func_n2o(regex_match_full_fun, 1));
reg_fun(intern(lit("f^"), user_package), func_n2o(regex_match_left_fun, 1));
reg_fun(intern(lit("f$"), user_package), func_n2o(regex_match_right_fun, 1));
diff --git a/regex.h b/regex.h
index 63a7e181..73c92093 100644
--- a/regex.h
+++ b/regex.h
@@ -45,6 +45,8 @@ val match_regst_right(val str, val regex, val end);
val regex_prefix_match(val reg, val str, val pos);
val regsub(val regex, val repl, val str);
val read_until_match(val regex, val stream, val keep_match);
+val scan_until_match(val regex, val stream_in);
+val count_until_match(val regex, val stream_in);
val regex_match_full(val regex, val arg1, val arg2);
val regex_match_full_fun(val regex, val pos);
val regex_match_left_fun(val regex, val pos);
diff --git a/txr.1 b/txr.1
index 41d1568f..f915b133 100644
--- a/txr.1
+++ b/txr.1
@@ -40197,8 +40197,8 @@ If
matches the stream before any characters are accumulated,
then an empty string is returned.
-If the stream ends or an error occurs before any characters
-are accumulated, the function returns
+If the stream ends or an non-exception-throwing error occurs before any
+characters are accumulated, the function returns
.codn nil .
When the accumulation of characters terminates by a match on
@@ -40209,6 +40209,50 @@ removed from the stream. If
is true, that matching text is included in
the returned string. Otherwise, it is discarded.
+.coNP Functions @ scan-until-match and @ count-until-match
+.synb
+.mets (scan-until-match < regex <> [ stream ])
+.mets (count-until-match < regex <> [ stream ])
+.syne
+.desc
+The functions
+.code scan-until-match
+and
+.code count-until-match
+read characters from
+.meta stream
+until a match occurs in the stream for regular expression
+.metn regex ,
+the stream runs out of characters, or an error occurs.
+
+If the stream runs out of characters, or a non-exception-throwing error
+occurs, before a match for
+.meta regex
+is identified, these functions return
+.codn nil .
+
+If a match for
+.meta regex
+occurs in
+.metn stream ,
+then
+.code count-until-match
+returns the number of characters that were read and discarded prior to
+encountering the first matching character.
+In the same situation, the
+.code scan-until-match
+function returns a
+.code cons
+cell whose
+.code car
+holds the count of discarded characters, that being the same value as what
+would be returned by
+.codn count-until-match ,
+and whose
+.code cdr
+holds a character string that comprises the text matched by
+.metn regex .
+
.coNP Functions @, m^$ @ m^ and @ m$
.synb
.mets (m^$ < regex <> [ position ] << string )