summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog11
-rw-r--r--dep.mk2
-rw-r--r--lib.c25
-rw-r--r--txr.131
4 files changed, 67 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index 33590a1b..80681bd7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2014-11-17 Kaz Kylheku <kaz@kylheku.com>
+
+ * lib.c (split_str): If the separator string is empty,
+ then unless opt_compat is 100 or less, provide a more
+ consistent behavior, rather than splitting the string
+ into characters. This latter behavior was never documented.
+
+ * txr.1: Documented.
+
+ * dep.mk: Updated.
+
2014-11-15 Kaz Kylheku <kaz@kylheku.com>
* lib.c (max2, min2): Use the less comparison function
diff --git a/dep.mk b/dep.mk
index bc7458e0..4fa08832 100644
--- a/dep.mk
+++ b/dep.mk
@@ -2,7 +2,7 @@
./lex.yy.o: config.h $(top_srcdir)/./lib.h $(top_srcdir)/./gc.h $(top_srcdir)/./stream.h $(top_srcdir)/./utf8.h $(top_srcdir)/./signal.h $(top_srcdir)/./unwind.h $(top_srcdir)/./hash.h $(top_srcdir)/./parser.h $(top_srcdir)/./eval.h y.tab.h
./y.tab.o: config.h $(top_srcdir)/./lib.h $(top_srcdir)/./signal.h $(top_srcdir)/./unwind.h $(top_srcdir)/./regex.h $(top_srcdir)/./utf8.h $(top_srcdir)/./match.h $(top_srcdir)/./hash.h $(top_srcdir)/./eval.h $(top_srcdir)/./stream.h $(top_srcdir)/./parser.h
./match.o: config.h $(top_srcdir)/./lib.h $(top_srcdir)/./gc.h $(top_srcdir)/./signal.h $(top_srcdir)/./unwind.h $(top_srcdir)/./regex.h $(top_srcdir)/./stream.h $(top_srcdir)/./parser.h $(top_srcdir)/./txr.h $(top_srcdir)/./utf8.h $(top_srcdir)/./filter.h $(top_srcdir)/./hash.h $(top_srcdir)/./debug.h $(top_srcdir)/./eval.h $(top_srcdir)/./match.h
-./lib.o: config.h $(top_srcdir)/./lib.h $(top_srcdir)/./gc.h $(top_srcdir)/./arith.h $(top_srcdir)/./rand.h $(top_srcdir)/./hash.h $(top_srcdir)/./signal.h $(top_srcdir)/./unwind.h $(top_srcdir)/./stream.h $(top_srcdir)/./utf8.h $(top_srcdir)/./filter.h $(top_srcdir)/./eval.h $(top_srcdir)/./sysif.h $(top_srcdir)/./regex.h
+./lib.o: config.h $(top_srcdir)/./lib.h $(top_srcdir)/./gc.h $(top_srcdir)/./arith.h $(top_srcdir)/./rand.h $(top_srcdir)/./hash.h $(top_srcdir)/./signal.h $(top_srcdir)/./unwind.h $(top_srcdir)/./stream.h $(top_srcdir)/./utf8.h $(top_srcdir)/./filter.h $(top_srcdir)/./eval.h $(top_srcdir)/./sysif.h $(top_srcdir)/./regex.h $(top_srcdir)/./txr.h
./regex.o: config.h $(top_srcdir)/./lib.h $(top_srcdir)/./parser.h $(top_srcdir)/./signal.h $(top_srcdir)/./unwind.h $(top_srcdir)/./stream.h $(top_srcdir)/./gc.h $(top_srcdir)/./regex.h $(top_srcdir)/./txr.h
./gc.o: config.h $(top_srcdir)/./lib.h $(top_srcdir)/./stream.h $(top_srcdir)/./hash.h $(top_srcdir)/./txr.h $(top_srcdir)/./eval.h $(top_srcdir)/./gc.h $(top_srcdir)/./signal.h
./unwind.o: config.h $(top_srcdir)/./lib.h $(top_srcdir)/./gc.h $(top_srcdir)/./stream.h $(top_srcdir)/./txr.h $(top_srcdir)/./signal.h $(top_srcdir)/./eval.h $(top_srcdir)/./parser.h $(top_srcdir)/./unwind.h
diff --git a/lib.c b/lib.c
index 62d43b67..796a126c 100644
--- a/lib.c
+++ b/lib.c
@@ -57,6 +57,7 @@
#include "eval.h"
#include "sysif.h"
#include "regex.h"
+#include "txr.h"
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min(a, b) ((a) < (b) ? (a) : (b))
@@ -2778,7 +2779,29 @@ val split_str(val str, val sep)
size_t len_sep = c_num(length_str(sep));
if (len_sep == 0) {
- return list_str(str);
+ if (opt_compat && opt_compat <= 100) {
+ return list_str(str);
+ } else {
+ const wchar_t *cstr = c_str(str);
+
+ if (*cstr) {
+ list_collect_decl (out, iter);
+
+ prot1(&str);
+
+ for (; *cstr; cstr++) {
+ val piece = mkustring(one);
+ init_str(piece, cstr);
+ iter = list_collect(iter, piece);
+ }
+
+ rel1(&str);
+
+ return out;
+ } else {
+ return cons(str, nil);
+ }
+ }
} else {
const wchar_t *cstr = c_str(str);
const wchar_t *csep = c_str(sep);
diff --git a/txr.1 b/txr.1
index 409fe8a3..c4ae19d8 100644
--- a/txr.1
+++ b/txr.1
@@ -15874,6 +15874,15 @@ The string is broken into pieces
according to the gaps left behind by the removed separators, and a list
of the remaining pieces is returned.
+If
+.meta sep
+is the empty string, then the separator pieces removed from the
+string are considered to be the empty strings between its
+characters. In this case, if
+.meta string
+is of length one or zero, then it is considered to have no such pieces, and a
+list of one element is returned containing the original string.
+
If a match for
.meta sep
is not found in the string at all, then the string is not
@@ -15895,6 +15904,17 @@ This operation is nondestructive:
.meta string
is not modified in any way.
+Note: To split a string into pieces of length one such that an empty string
+produces
+.code nil
+rather than
+.codn ("") ,
+use the
+.cblk
+.meti (tok-str < string #/./)
+.cble
+pattern.
+
.coNP Function @ split-str-set
.synb
.mets (split-str-set < string << set )
@@ -26036,6 +26056,17 @@ can be emulated is \*(TX 97.
Here are values which have a special meaning as arguments to the
.code -C
option, along with a description of what behaviors are affected:
+.IP 100
+Up to \*(TX 100, the
+.code split-str
+function had an undocumented behavior. When the
+.code sep
+argument was an empty string, it split the string into
+individual characters as if by calling
+.codn list-str .
+This behavior changed to the currently
+documented behavior starting in \*(TX 101.
+
.IP 99
Up to \*(TX 99, the substitution of TXR Lisp expressions in
.code @(output)