* eval.c (eval_init): tok-str acquires new parameter

* lib.c (tok_str): New parameter, keep_sep. * lib.h (tok_str): Declaration updated. * txr.1: Documentation for tok-str updated.
author: Kaz Kylheku <kaz@kylheku.com> 2013-06-11 16:15:34 -0700
committer: Kaz Kylheku <kaz@kylheku.com> 2013-06-11 16:15:34 -0700
commit: 399b4b6a5082aa6f14a98bc50b29baeca686aa39 (patch)
tree: fcf71aad074e38aecc2a13a192756791a1e02783
parent: a6b0130ceaeadce6845d698fb68712dc2786e918 (diff)
download: txr-399b4b6a5082aa6f14a98bc50b29baeca686aa39.tar.gz
txr-399b4b6a5082aa6f14a98bc50b29baeca686aa39.tar.bz2
txr-399b4b6a5082aa6f14a98bc50b29baeca686aa39.zip
5 files changed, 28 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index e062c16b..a2bec990 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
 2013-06-11  Kaz Kylheku  <kaz@kylheku.com>
 
+	* eval.c (eval_init): tok-str acquires new parameter
+
+	* lib.c (tok_str): New parameter, keep_sep.
+
+	* lib.h (tok_str): Declaration updated.
+
+	* txr.1: Documentation for tok-str updated.
+
+2013-06-11  Kaz Kylheku  <kaz@kylheku.com>
+
 	* eval.c (eval_init): lazy-str's third argument is optional.
 	Added lazy-stringp. Changing names of length-str-{gt,ge,lt,le}
 	to be consistent with the >, >=, < and <= functions.
diff --git a/eval.c b/eval.c
index 77209599..1c8664be 100644
--- a/eval.c
+++ b/eval.c
@@ -2368,7 +2368,7 @@ void eval_init(void)
   reg_fun(intern(lit("cat-str"), user_package), func_n2o(cat_str, 1));
   reg_fun(intern(lit("split-str"), user_package), func_n2(split_str));
   reg_fun(intern(lit("split-str-set"), user_package), func_n2(split_str_set));
-  reg_fun(intern(lit("tok-str"), user_package), func_n2(tok_str));
+  reg_fun(intern(lit("tok-str"), user_package), func_n3o(tok_str, 1));
   reg_fun(intern(lit("list-str"), user_package), func_n1(list_str));
   reg_fun(intern(lit("trim-str"), user_package), func_n1(trim_str));
   reg_fun(intern(lit("string-lt"), user_package), func_n2(string_lt));
diff --git a/lib.c b/lib.c
index 97844d32..25ecb91d 100644
--- a/lib.c
+++ b/lib.c
@@ -2060,7 +2060,7 @@ val split_str_set(val str, val set)
   return out;
 }
 
-val tok_str(val str, val tok_regex)
+val tok_str(val str, val tok_regex, val keep_sep)
 {
   list_collect_decl (out, iter);
   val pos = zero;
@@ -2069,11 +2069,17 @@ val tok_str(val str, val tok_regex)
     cons_bind (new_pos, len, search_regex(str, tok_regex, pos, nil));
     val end;
 
-    if (!len)
+    if (!len) {
+      if (keep_sep)
+        list_collect(iter, sub_str(str, pos, t));
       break;
+    }
 
     end = plus(new_pos, len);
 
+    if (keep_sep)
+      list_collect(iter, sub_str(str, pos, new_pos));
+
     list_collect(iter, sub_str(str, new_pos, end));
 
     pos = end;
diff --git a/lib.h b/lib.h
index 2b87e7c8..c29eaa6d 100644
--- a/lib.h
+++ b/lib.h
@@ -501,7 +501,7 @@ val sub_str(val str_in, val from_num, val to_num);
 val cat_str(val list, val sep);
 val split_str(val str, val sep);
 val split_str_set(val str, val set);
-val tok_str(val str, val tok_regex);
+val tok_str(val str, val tok_regex, val keep_sep);
 val list_str(val str);
 val trim_str(val str);
 val string_cmp(val astr, val bstr);
diff --git a/txr.1 b/txr.1
index 8d56e113..73493413 100644
--- a/txr.1
+++ b/txr.1
@@ -7572,7 +7572,7 @@ This operation is nondestructive: <string> is not modified in any way.
 .TP
 Syntax:
 
-  (tok-str <string> <regex>)
+  (tok-str <string> <regex> [<keep-between>])
 
 .TP
 Description:
@@ -7589,6 +7589,13 @@ list ("a" "" "" ""). After the token "a" is extracted from a non-empty match
 for the regex, the regex is considered to matches three more times: before the
 "b", between "b" and "c", and after the "c".
 
+If the <keep-between> argument is specified, and is not nil, then the behavior
+of tok-str changes in the following way. The pieces of <string> which are
+skipped by the search for tokens are included in the output. If no token is
+found in <string>, then a list of one element is returned, containing <string>.
+Generally, if N tokens are found, then the returned list consists of 2N + 1
+elements. The first element of the list is the (possibly empty) substring which had to be skipped to find the first token. Then the token follows. The next element is the next skipped substring and so on. The last element is the substring of <string> between the last token and the end.
+
 .SS Function list-str
 
 .TP
author	Kaz Kylheku <kaz@kylheku.com>	2013-06-11 16:15:34 -0700
committer	Kaz Kylheku <kaz@kylheku.com>	2013-06-11 16:15:34 -0700
commit	399b4b6a5082aa6f14a98bc50b29baeca686aa39 (patch)
tree	fcf71aad074e38aecc2a13a192756791a1e02783
parent	a6b0130ceaeadce6845d698fb68712dc2786e918 (diff)
download	txr-399b4b6a5082aa6f14a98bc50b29baeca686aa39.tar.gz txr-399b4b6a5082aa6f14a98bc50b29baeca686aa39.tar.bz2 txr-399b4b6a5082aa6f14a98bc50b29baeca686aa39.zip