summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog10
-rw-r--r--eval.c2
-rw-r--r--lib.c10
-rw-r--r--lib.h2
-rw-r--r--txr.19
5 files changed, 28 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index e062c16b..a2bec990 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
2013-06-11 Kaz Kylheku <kaz@kylheku.com>
+ * eval.c (eval_init): tok-str acquires new parameter
+
+ * lib.c (tok_str): New parameter, keep_sep.
+
+ * lib.h (tok_str): Declaration updated.
+
+ * txr.1: Documentation for tok-str updated.
+
+2013-06-11 Kaz Kylheku <kaz@kylheku.com>
+
* eval.c (eval_init): lazy-str's third argument is optional.
Added lazy-stringp. Changing names of length-str-{gt,ge,lt,le}
to be consistent with the >, >=, < and <= functions.
diff --git a/eval.c b/eval.c
index 77209599..1c8664be 100644
--- a/eval.c
+++ b/eval.c
@@ -2368,7 +2368,7 @@ void eval_init(void)
reg_fun(intern(lit("cat-str"), user_package), func_n2o(cat_str, 1));
reg_fun(intern(lit("split-str"), user_package), func_n2(split_str));
reg_fun(intern(lit("split-str-set"), user_package), func_n2(split_str_set));
- reg_fun(intern(lit("tok-str"), user_package), func_n2(tok_str));
+ reg_fun(intern(lit("tok-str"), user_package), func_n3o(tok_str, 1));
reg_fun(intern(lit("list-str"), user_package), func_n1(list_str));
reg_fun(intern(lit("trim-str"), user_package), func_n1(trim_str));
reg_fun(intern(lit("string-lt"), user_package), func_n2(string_lt));
diff --git a/lib.c b/lib.c
index 97844d32..25ecb91d 100644
--- a/lib.c
+++ b/lib.c
@@ -2060,7 +2060,7 @@ val split_str_set(val str, val set)
return out;
}
-val tok_str(val str, val tok_regex)
+val tok_str(val str, val tok_regex, val keep_sep)
{
list_collect_decl (out, iter);
val pos = zero;
@@ -2069,11 +2069,17 @@ val tok_str(val str, val tok_regex)
cons_bind (new_pos, len, search_regex(str, tok_regex, pos, nil));
val end;
- if (!len)
+ if (!len) {
+ if (keep_sep)
+ list_collect(iter, sub_str(str, pos, t));
break;
+ }
end = plus(new_pos, len);
+ if (keep_sep)
+ list_collect(iter, sub_str(str, pos, new_pos));
+
list_collect(iter, sub_str(str, new_pos, end));
pos = end;
diff --git a/lib.h b/lib.h
index 2b87e7c8..c29eaa6d 100644
--- a/lib.h
+++ b/lib.h
@@ -501,7 +501,7 @@ val sub_str(val str_in, val from_num, val to_num);
val cat_str(val list, val sep);
val split_str(val str, val sep);
val split_str_set(val str, val set);
-val tok_str(val str, val tok_regex);
+val tok_str(val str, val tok_regex, val keep_sep);
val list_str(val str);
val trim_str(val str);
val string_cmp(val astr, val bstr);
diff --git a/txr.1 b/txr.1
index 8d56e113..73493413 100644
--- a/txr.1
+++ b/txr.1
@@ -7572,7 +7572,7 @@ This operation is nondestructive: <string> is not modified in any way.
.TP
Syntax:
- (tok-str <string> <regex>)
+ (tok-str <string> <regex> [<keep-between>])
.TP
Description:
@@ -7589,6 +7589,13 @@ list ("a" "" "" ""). After the token "a" is extracted from a non-empty match
for the regex, the regex is considered to matches three more times: before the
"b", between "b" and "c", and after the "c".
+If the <keep-between> argument is specified, and is not nil, then the behavior
+of tok-str changes in the following way. The pieces of <string> which are
+skipped by the search for tokens are included in the output. If no token is
+found in <string>, then a list of one element is returned, containing <string>.
+Generally, if N tokens are found, then the returned list consists of 2N + 1
+elements. The first element of the list is the (possibly empty) substring which had to be skipped to find the first token. Then the token follows. The next element is the next skipped substring and so on. The last element is the substring of <string> between the last token and the end.
+
.SS Function list-str
.TP