From 1e78321bcd7838565fb6ea6ecf330db7d2422547 Mon Sep 17 00:00:00 2001
From: Kaz Kylheku <kaz@kylheku.com>
Date: Mon, 27 May 2024 20:20:14 -0700
Subject: quasiliterals: buffers in hex, separation for strings and buffers.

In this commit, output variables in the TXR Pattern language and
in TXR Lisp quasiliterals now support separator strings for values
that are strings and buffers.  Values which are buffers appear
differently: they are rendered as a sequence of lower case hex
digit pairs. When a string-valued variable specifies a separator,
the separator appears between characters of the string value.
Previously, the separator was ignored. When a buffer-valued
variable specifies a separator. the separator appears between
pairs of digits, not between digits. For instance if ethaddr
is a variable holding #b'08:00:27:79:c7:f5', then the quasiliteral
`@ethaddr` produces "08002779c7f" whereas `@{ethaddr ":"}`
produces "08:00:27:79:c7:f5".

* buf.[ch] (buf_str_sep): New function.

* lib.[ch] (fmt_str_sep): New function.

* eval.c (fmt_cat): If the argument is a string, and a separator
is present, replace the value with the result of calling
fmt_str_sep. If the argument is a buffer, and a separator is
present, use buf_str_sep to convert to a string, otherwise
use tostringp.

* txr.1: Section on Output Variables updated.

* tests/012/readprint.tl: New tests.
---
 buf.c                  | 24 ++++++++++++++++++
 buf.h                  |  2 ++
 eval.c                 |  8 ++++--
 lib.c                  | 34 +++++++++++++++++++++++++
 lib.h                  |  1 +
 tests/012/readprint.tl | 19 ++++++++++++--
 txr.1                  | 68 ++++++++++++++++++++++++++++++++++++++++++++++----
 7 files changed, 147 insertions(+), 9 deletions(-)

diff --git a/buf.c b/buf.c
index 625b9f60..452b1fcc 100644
--- a/buf.c
+++ b/buf.c
@@ -887,6 +887,30 @@ val buf_pprint(val buf, val stream_in)
   return t;
 }
 
+val buf_str_sep(val buf, val sep, val self)
+{
+  struct buf *b = buf_handle(buf, self);
+  ucnum len = c_unum(b->len, self);
+  val ret = null_string;
+
+  if (len > 0) {
+    val stream = make_string_output_stream();
+    ucnum i = 0;
+
+    goto first;
+
+    while (i < len) {
+      put_string(sep, stream);
+    first:
+      format(stream, lit("~,02x"), num_fast(b->data[i++]), nao);
+    }
+
+    ret = get_string_from_stream(stream);
+  }
+
+  return ret;
+}
+
 void buf_hex(val buf, char *hex, size_t sz, int caps)
 {
   val self = lit("buf-hex");
diff --git a/buf.h b/buf.h
index 3d2ebc01..666152f4 100644
--- a/buf.h
+++ b/buf.h
@@ -115,6 +115,8 @@ val buf_get_cptr(val buf, val pos);
 val buf_print(val buf, val stream);
 val buf_pprint(val buf, val stream);
 
+val buf_str_sep(val buf, val sep, val self);
+
 void buf_hex(val buf, char *, size_t, int);
 
 val make_buf_stream(val buf_opt);
diff --git a/eval.c b/eval.c
index eef6ba44..f63e01a1 100644
--- a/eval.c
+++ b/eval.c
@@ -2979,14 +2979,18 @@ static val fmt_tostring(val obj)
 
 static val fmt_cat(val obj, val sep)
 {
+  val self = lit("quasistring formatting");
+
   switch (type(obj)) {
   case LIT:
   case STR:
   case LSTR:
-    return obj;
+    return if3(null_or_missing_p(sep), obj, fmt_str_sep(sep, obj, self));
   case BUF:
     if (!opt_compat || opt_compat > 294)
-      return tostringp(obj);
+      return if3(null_or_missing_p(sep),
+                 fmt_cat(tostringp(obj), sep),
+                 buf_str_sep(obj, sep, self));
     /* fallthrough */
   default:
     return if3(if3(opt_compat && opt_compat <= 174, listp(obj), seqp(obj)),
diff --git a/lib.c b/lib.c
index 7a0d2522..363b55a3 100644
--- a/lib.c
+++ b/lib.c
@@ -6237,6 +6237,40 @@ val fmt_join(varg args)
   return join_with(nil, args);
 }
 
+val fmt_str_sep(val sep, val str, val self)
+{
+  ucnum lsep = c_unum(length_str(sep), self);
+  ucnum lstr = c_unum(length_str(str), self);
+  const wchar_t *csep = c_str(sep, self);
+  const wchar_t *cstr = c_str(str, self);
+  ucnum total = (lstr > 0 ? (lstr - 1) * lsep + lstr : 0) + 1;
+  ucnum i;
+  wchar_t *out = chk_wmalloc(total);
+  wchar_t *ptr = out, *end = out + total;
+
+  for (i = 0; i < lstr; i++) {
+    if (i > 0) {
+      if (end - ptr <= convert(ptrdiff_t, lsep))
+        break;
+      wcscpy(ptr, csep);
+      ptr += lsep;
+    }
+    if (end - ptr <= 1)
+      break;
+    *ptr++ = cstr[i];
+  }
+
+  if (i < lstr) {
+    free(out);
+    uw_throwf(error_s, lit("~a: string length overflow"), self, nao);
+  }
+
+  *ptr = 0;
+
+  return string_own(out);
+}
+
+
 val split_str_keep(val str, val sep, val keep_sep_opt, val count_opt)
 {
   val self = lit("split-str");
diff --git a/lib.h b/lib.h
index dfa23c6e..07c37b9f 100644
--- a/lib.h
+++ b/lib.h
@@ -1103,6 +1103,7 @@ val scat2(val s1, val s2);
 val scat3(val s1, val sep, val s2);
 val join_with(val sep, varg args);
 val fmt_join(varg args);
+val fmt_str_sep(val sep, val str, val self);
 val split_str(val str, val sep);
 val split_str_keep(val str, val sep, val keep_sep_opt, val count_opt);
 val spl(val sep, val arg1, val arg2);
diff --git a/tests/012/readprint.tl b/tests/012/readprint.tl
index b3c2ff14..deebeba0 100644
--- a/tests/012/readprint.tl
+++ b/tests/012/readprint.tl
@@ -21,6 +21,21 @@
     `@b` "abcdef"
     `@{b [0..1]}` "ab"
     `@{b [-1..:]}` "ef"
-    `@{b ":"}` "abcdef"
-    `@{b [0..2] ":"}` "abcd"
+    `@{b ":"}` "ab:cd:ef"
+    `@{b ""}` "abcdef"
+    `@{b [0..2] ":"}` "ab:cd"
+    `@{b [0..1] ":"}` "ab"
+    `@{b [0..0] ":"}` ""
     `@{b [-1]}` "239"))
+
+(let ((b "abcdef"))
+  (mtest
+    `@b` "abcdef"
+    `@{b [0..1]}` "a"
+    `@{b [-1..:]}` "f"
+    `@{b ":"}` "a:b:c:d:e:f"
+    `@{b ""}` "abcdef"
+    `@{b [0..2] ":"}` "a:b"
+    `@{b [0..1] ":"}` "a"
+    `@{b [0..0] ":"}` ""
+    `@{b [-1]}` "f"))
diff --git a/txr.1 b/txr.1
index 2cd40c1e..4dffdf9b 100644
--- a/txr.1
+++ b/txr.1
@@ -9632,11 +9632,14 @@ than a list or string, it will be converted to a string as if by the
 .code tostring
 function in \*(TL.
 
-A list is converted to a string in a special way: the elements are
-individually converted to a string and then they are catenated together.
-The default separator string is a single space: an alternate separation
-can be specified as an argument in the brace substitution syntax.
-Empty lists turn into an empty string.
+A value which is a sequence is converted to a string in a special way: the
+elements are individually converted to strings and then they are catenated
+together.  The default separator string for most sequences is a single space:
+an alternate separation can be specified as an argument in the brace
+substitution syntax.  Empty sequences turn into an empty string.
+More details are given in the 
+.B "Output Variables: Separation"
+section below.
 
 Lists may be output within
 .code @(repeat)
@@ -9670,6 +9673,56 @@ for the output clause. The syntax for this is
 The filter specification syntax is the same as in the output clause.
 See Output Filtering below.
 
+.NP* Output Variables: Buffer Objects
+
+When the value of an output variable is a buffer (object of type
+.codn buf ),
+it is rendered as a sequence of hexadecimal digit pairs, with
+no line breaks. The digits
+.code a
+through
+.code f
+are rendered in lower case.
+
+.NP* Output Variables: Separation
+
+As mentioned in the previous section, the value of a variable
+can be a sequence. The individual elements of a sequence are turned
+into strings, and then catenated together with the separator,
+which may be specified as a string modifier in the variable syntax.
+
+For most sequences, the default separator is a space.
+
+When the value of a variable is a character string, and the
+separator is not specified, the string is output as-is.
+Effectively, the string is treated as a sequence but with an
+empty default separator.
+
+When the value of a variable is a buffer, it is rendered in
+hexadecimal, as described in the previous section.
+If a separator string modifier is specified, it separates
+pairs of digits, rather than individual digits.
+
+Example:
+
+.verb
+  @(bind str "string")
+  @(bind buf #b'cafef00d')
+  @(output)
+  @{str[0..3] "--"}
+  @{buf[0..2] ":"}
+  @{buf[2..4] "/"}
+  @(end)
+.brev
+
+The above example produces the output
+
+.verb
+  s--t--r
+  ca:fe
+  f0/0d
+.brev
+
 .NP* Output Variables: Indexing
 
 Additional syntax is supported in output variables that does not appear
@@ -9725,6 +9778,11 @@ separator string, and
 .code 10
 specifies the field width.
 
+When a variable includes indexing, separation and a field width,
+the indexing operation is first applied to select a subsequence.
+Then separation is applied to produce a textual representation.
+Finally the representation is rendered din the specified field width.
+
 .NP* Output Substitutions
 
 The brace syntax has another syntactic and semantic extension in
-- 
cgit v1.2.3