summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2024-09-14 20:50:03 -0700
committerKaz Kylheku <kaz@kylheku.com>2024-09-14 21:08:22 -0700
commit3775eb7b3afe49d6b39e5a907747c23de8b5f42e (patch)
tree56bfa7b4905d350529f9c3f3a83f8b96d0c36896
parent99a1ff71bc2a4f8a6048449836400221a22b6094 (diff)
downloadtxr-3775eb7b3afe49d6b39e5a907747c23de8b5f42e.tar.gz
txr-3775eb7b3afe49d6b39e5a907747c23de8b5f42e.tar.bz2
txr-3775eb7b3afe49d6b39e5a907747c23de8b5f42e.zip
read-until-match: fix regression.
Commit 9aa751c8a4f845ef2d2bba091c81ffeded941afd broke things. This fix affects the function read-until-match, scan-until-match and count-until-match which share implementation. * regex.c (scan_until_common): In the REGM_MATCH_DONE and REGM_MATCH cases, we must push the character onto the local stack, before doing the match = stack assignment. Otherwise, it's possible that the stack is empty and so no match is recorded. The REGM_FAIL case will then behave as if no match was found, consuming a character and continuing. * txr.1: Codify an existing behavior: only non-empty matches for the regex are considered by read-until-match. * tests/015/regex.tl: New file. I am amazed to discover that we don't seem to have a test suite for regexes at all. Putting the tests here which confirm this fix and provide coverage for some edge cases in read-until-match.
-rw-r--r--regex.c2
-rw-r--r--tests/015/regex.tl18
-rw-r--r--txr.13
3 files changed, 21 insertions, 2 deletions
diff --git a/regex.c b/regex.c
index aaddec64..9437881e 100644
--- a/regex.c
+++ b/regex.c
@@ -3234,9 +3234,11 @@ static val scan_until_common(val self, val regex, val stream_in,
regex_machine_reset(&regm);
continue;
case REGM_MATCH_DONE:
+ push(ch, &stack);
match = stack;
goto out_match;
case REGM_MATCH:
+ push(ch, &stack);
match = stack;
continue;
case REGM_INCOMPLETE:
diff --git a/tests/015/regex.tl b/tests/015/regex.tl
new file mode 100644
index 00000000..68058eea
--- /dev/null
+++ b/tests/015/regex.tl
@@ -0,0 +1,18 @@
+(load "../common")
+
+(defun rum (str regex : include-match)
+ (with-in-string-stream (s str)
+ (list (read-until-match regex s include-match)
+ (read-until-match regex s include-match))))
+
+(mtest
+ (rum "a-b" #/-/) ("a" "b")
+ (rum "a-b" #/-/ t) ("a-" "b")
+ (rum "a----b" #/-+/) ("a" "b")
+ (rum "a----b" #/-+/ t) ("a----" "b")
+ (rum "a----b" #/-*/) ("a" "b")
+ (rum "a----b" #/-*/ t) ("a----" "b")
+ (rum "abc" #/-/) ("abc" nil)
+ (rum "abc" #/-/ t) ("abc" nil)
+ (rum "a___b___#c" #/_+#/) ("a___b" "c")
+ (rum "a___b___#c" #/_+#/ t) ("a___b___#" "c"))
diff --git a/txr.1 b/txr.1
index 277673c7..128fcebf 100644
--- a/txr.1
+++ b/txr.1
@@ -56676,10 +56676,9 @@ matched by
is included in the returned string. It defaults to
.codn nil .
-The accumulation of characters is terminated by a match on
+The accumulation of characters is terminated by a non-empty match on
.metn regex ,
the end of the stream, or an error.
-
This means that characters are read from the stream and accumulated while the
stream has more characters available, and while its prefix does not match
.metn regex .