summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2014-06-15 21:48:04 -0700
committerKaz Kylheku <kaz@kylheku.com>2014-06-15 21:48:04 -0700
commit4a223e77f8bf67c9236232bce354d60951b25bed (patch)
tree9e9a1bb72884b56dfaa240763f979ca630cea23e
parent548dd7697516a2fea8930d3fa9e88ea48d5ab630 (diff)
downloadtxr-4a223e77f8bf67c9236232bce354d60951b25bed.tar.gz
txr-4a223e77f8bf67c9236232bce354d60951b25bed.tar.bz2
txr-4a223e77f8bf67c9236232bce354d60951b25bed.zip
* lib.c (obj_print): Render character DC00 as "pnul".
Clean up code which chooses rendering for characters. Print C0 and C1 control characters, as well as D800-DFFF, FFFE and FFFF and characters above FFFF using hex; others are printed using the #\<char> notation. * parser.y (char_from_name): map "pnul" to DC00. * txr.1: Documented pnul, clarified character printing rules, and added a cautionary note about possible ambiguity in printing.
-rw-r--r--ChangeLog14
-rw-r--r--lib.c11
-rw-r--r--parser.y1
-rw-r--r--txr.124
4 files changed, 45 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index 4ecc3fae..f4eb7b50 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,19 @@
2014-06-16 Kaz Kylheku <kaz@kylheku.com>
+ * lib.c (obj_print): Render character DC00 as "pnul".
+ Clean up code which chooses rendering for characters.
+ Print C0 and C1 control characters, as well as D800-DFFF,
+ FFFE and FFFF and characters above FFFF using hex;
+ others are printed using the #\<char> notation.
+
+ * parser.y (char_from_name): map "pnul" to DC00.
+
+ * txr.1: Documented pnul, clarified character
+ printing rules, and added a cautionary note about
+ possible ambiguity in printing.
+
+2014-06-16 Kaz Kylheku <kaz@kylheku.com>
+
* eval.c (eval_init): Register pos_max, pos_min, find_max,
find_min and seqp as intrinsics.
diff --git a/lib.c b/lib.c
index e2b42506..52c7fa5d 100644
--- a/lib.c
+++ b/lib.c
@@ -5754,11 +5754,16 @@ finish:
case '\r': put_string(lit("return"), out); break;
case 27: put_string(lit("esc"), out); break;
case ' ': put_string(lit("space"), out); break;
+ case 0xDC00: put_string(lit("pnul"), out); break;
default:
- if (ch >= ' ')
- put_char(chr(ch), out);
- else
+ if ((ch < 0x20) || (ch >= 0x80 && ch < 0xA0))
format(out, lit("x~,02x"), num(ch), nao);
+ else if ((ch >= 0xD800 && ch < 0xE000) || ch == 0xFFFE || ch == 0xFFFF)
+ format(out, lit("x~,04x"), num(ch), nao);
+ else if (ch >= 0xFFFF)
+ format(out, lit("x~,06x"), num(ch), nao);
+ else
+ put_char(chr(ch), out);
}
}
return obj;
diff --git a/parser.y b/parser.y
index 951427c1..cda1aaf8 100644
--- a/parser.y
+++ b/parser.y
@@ -1276,6 +1276,7 @@ static wchar_t char_from_name(const wchar_t *name)
{ L"return", L'\r' },
{ L"esc", 27 },
{ L"space", L' ' },
+ { L"pnul", 0xDC00 },
{ 0, 0 },
};
int i;
diff --git a/txr.1 b/txr.1
index d52c2a84..b0accf49 100644
--- a/txr.1
+++ b/txr.1
@@ -1123,8 +1123,12 @@ Character literals are introduced by the #\e syntax, which is either
followed by a character name, the letter x followed by hex digits,
the letter o followed by octal digits, or a single character. Valid character
names are: nul, alarm, backspace, tab, linefeed, newline, vtab, page, return,
-esc, space. This convention for character literals is similar to that of the
-Scheme language. Note that #\elinefeed and #\enewline are the same character.
+esc, space and pnul. This convention for character literals is similar to that
+of the Scheme language. Note that #\elinefeed and #\enewline are the same
+character. The #\epnul character is specific to TXR and denotes the U+DC00
+code in Unicode; the name stands for "pseudo-null", which is related to
+its special function. For more information about this, see the section
+"Character Handling and International Characters".
.SS String Literals
@@ -12136,6 +12140,22 @@ and calls to these functions:
For pprint, tostringp and pprinl, the equivalence is produced by using "~a" in
format rather than "~s".
+Note: for characters, the print function behaves as follows: most control
+characters in the Unicode C0 and C1 range are rendered using the #\ex notation,
+using two hex digits. Codes in the range D800 to DFFF, and the codes
+FFFE and FFFF are printed in the #\exNNNN with four hexadecimal digits, and
+charater above this range are printed using the same notation, but with six
+hexadecimal digits. Certain characters in the C0 range are printed using
+their names such as #\enul and #\ereturn, which are documented
+in the Character Literals section not far from the start of this document.
+The DC00 character is printed as #\epnul. All other characters are printed as
+#\e<char>, where <char> is the actual character.
+
+Caution: read-print consistency is affected by trailing material. If additional
+digits are printed immediately after a number without intervening whitespace,
+they extend that number. If hex digits are printed after the character x,
+which is rendered as #\ex, they look like a hex character code.
+
.SS Function streamp
.TP