summaryrefslogtreecommitdiffstats
path: root/lib.c
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2016-10-26 20:19:42 -0700
committerKaz Kylheku <kaz@kylheku.com>2016-10-26 20:19:42 -0700
commite0dbcc3a6455d990c0a0ecde74e279e8f3b53843 (patch)
tree835afaf66a49e1e9b0183f13705d83be76c7b07a /lib.c
parent88268ee75421084cc412d26250beb7483f49c1b3 (diff)
downloadtxr-e0dbcc3a6455d990c0a0ecde74e279e8f3b53843.tar.gz
txr-e0dbcc3a6455d990c0a0ecde74e279e8f3b53843.tar.bz2
txr-e0dbcc3a6455d990c0a0ecde74e279e8f3b53843.zip
Fix tok-str semantics once again.
The problem is that when the regular expression is capable of matching empty strings, tok-str will extract an empty token immediately following a non-empty token. For instance (tok-str "a,b" /[^,]*/) extracts ("a" "" "b") instead of just ("a" "b"). This is a poor behavior and the way to fix it is to impose a rule that an empty token must not be extracted immediately at the ending position of a previous token. Only a non-empty token can be consecutive to a token. * lib.c (tok_str): Rewrite the logic of the loop, using the prev_empty flag to suppress empty tokens which immediately follow non-empty tokens. The addition of 1 to the position when the token is empty to skip a character is done at the bottom of the loop and a new last_end variable keeps track of the end position of the last extracted token for the purposes of extracting the keep-between area if keep_sep is true. The old loop is preserved intact and enabled by compatibility. * tests/015/split.tl: Multiple empty-regex test cases for tok-str updated. * txr.1: Updated tok-str documentation and also added a note between the conditions under which split-str and tok-str, invoked with keep-sep true, produce equivalent output. Added compatibility notes.
Diffstat (limited to 'lib.c')
-rw-r--r--lib.c27
1 files changed, 26 insertions, 1 deletions
diff --git a/lib.c b/lib.c
index 5f92b6c1..f77ba01a 100644
--- a/lib.c
+++ b/lib.c
@@ -3994,11 +3994,13 @@ val tok_str(val str, val tok_regex, val keep_sep)
{
list_collect_decl (out, iter);
val pos = zero;
+ val last_end = zero;
val slen = length(str);
+ int prev_empty = 1;
keep_sep = default_bool_arg(keep_sep);
- for (;;) {
+ if (opt_compat && opt_compat <= 155) for (;;) {
cons_bind (new_pos, len, search_regex(str, tok_regex, pos, nil));
if (len == zero && new_pos != slen)
@@ -4015,6 +4017,29 @@ val tok_str(val str, val tok_regex, val keep_sep)
pos = plus(new_pos, len);
iter = list_collect(iter, sub_str(str, new_pos, pos));
+ } else for (;;) {
+ cons_bind (new_pos, len, search_regex(str, tok_regex, pos, nil));
+
+ if (!len || (new_pos == slen && !prev_empty)) {
+ if (keep_sep)
+ iter = list_collect(iter, sub_str(str, last_end, t));
+ break;
+ }
+
+ if (len != zero || prev_empty) {
+ if (keep_sep)
+ iter = list_collect(iter, sub_str(str, last_end, new_pos));
+ last_end = plus(new_pos, len);
+ iter = list_collect(iter, sub_str(str, new_pos, last_end));
+ prev_empty = (len == zero);
+ } else {
+ prev_empty = 1;
+ }
+
+ pos = plus(new_pos, len);
+
+ if (len == zero)
+ pos = succ(pos);
}
return out;