6 files changed, 139 insertions, 57 deletions
diff --git a/ChangeLog b/ChangeLog
index b50b75db..c91fae43 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,32 @@
+2011-11-15  Kaz Kylheku  <kaz@kylheku.com>
+
+	Changing read syntax for character literals, because we are going to
+	need the single quote in the Lisp way for suppressing evaluation,
+	eventually.
+
+	I'm going with a Scheme-compatible syntax for character literals.
+	It has a richer repertoire of standard character names than Common
+	Lisp, and has a x convention for coding characters in hex.
+
+	* lib.c (obj_print): Print characters in a Scheme-like way.
+
+	* parser.h (end_of_char): New function declared.
+
+	* parser.l (grammar): Implement rules for #\ syntax, with
+	involving new HASH_BACKSLASH token.
+	(end_of_regex): Enhancement: added check that end_of_regex is
+	called in correct state, like the one in end_of_char.
+	(end_of_char): New function.
+
+	* parser.y (repeat_rep_helper, o_elems_transform, define_transform,
+	lit_char_helper): Functions changed to static.
+	(rl): Function moved down, past the grammar section.
+	(HASH_BACKSLASH): New terminal symbol.
+	(chrlit): Grammar redesigned.
+	(char_from_name): New function.
+
+	* txr.1: Character syntax documented.
+
 2011-11-14  Kaz Kylheku  <kaz@kylheku.com>
 
 	Bugfix: horizontal directives were being treated as vertical,
diff --git a/lib.c b/lib.c
index 7b3f9765..10782cc1 100644
--- a/lib.c
+++ b/lib.c
@@ -2565,25 +2565,24 @@ void obj_print(val obj, val out)
     {
       wchar_t ch = c_chr(obj);
 
-      put_char(out, chr('\''));
+      put_string(out, lit("#\\"));
       switch (ch) {
-      case '\a': put_string(out, lit("\\a")); break;
-      case '\b': put_string(out, lit("\\b")); break;
-      case '\t': put_string(out, lit("\\t")); break;
-      case '\n': put_string(out, lit("\\n")); break;
-      case '\v': put_string(out, lit("\\v")); break;
-      case '\f': put_string(out, lit("\\f")); break;
-      case '\r': put_string(out, lit("\\r")); break;
-      case '"': put_string(out, lit("\\\"")); break;
-      case '\\': put_string(out, lit("\\\\")); break;
-      case 27: put_string(out, lit("\\e")); break;
+      case '\0': put_string(out, lit("nul")); break;
+      case '\a': put_string(out, lit("alarm")); break;
+      case '\b': put_string(out, lit("backspace")); break;
+      case '\t': put_string(out, lit("tab")); break;
+      case '\n': put_string(out, lit("newline")); break;
+      case '\v': put_string(out, lit("vtab")); break;
+      case '\f': put_string(out, lit("page")); break;
+      case '\r': put_string(out, lit("return")); break;
+      case 27: put_string(out, lit("esc")); break;
+      case ' ': put_string(out, lit("space")); break;
       default:
         if (iswprint(ch))
           put_char(out, chr(ch));
         else
-          format(out, lit("\\~03o"), num(ch), nao);
+          format(out, lit("x~x"), num(ch), nao);
       }
-      put_char(out, chr('\''));
     }
     return;
   case NUM:
diff --git a/parser.h b/parser.h
index cca988da..754282d5 100644
--- a/parser.h
+++ b/parser.h
@@ -37,6 +37,7 @@ void yyerror(const char *s);
 void yyerrorf(val s, ...);
 void yybadtoken(int tok, val context);
 void end_of_regex(void);
+void end_of_char(void);
 int yylex(void);
 void parse_init(void);
 val source_loc(val form);
diff --git a/parser.l b/parser.l
index 1d1e0643..6514a39b 100644
--- a/parser.l
+++ b/parser.l
@@ -386,9 +386,9 @@ UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
                           return '"';
                         }
 
-<SPECIAL,NESTED>\'      {
+<SPECIAL,NESTED>#\\     {
                           yy_push_state(CHRLIT);
-                          return '\'';
+                          return HASH_BACKSLASH;
                         }
 
 <SPECIAL,NESTED>`       {
@@ -550,29 +550,30 @@ UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
                   return yytext[0];
                 }
 
-<CHRLIT>\'      {
-                  yy_pop_state();
-                  return yytext[0];
-                }
-
 <QSILIT>`       {
                   yy_pop_state();
                   return yytext[0];
                 }
 
-<STRLIT,CHRLIT,QSILIT>[\\][abtnvfre"`'\\] {
-                                            yylval.chr = char_esc(yytext[1]);
-                                            return LITCHAR;
-                                          }
+<STRLIT,QSILIT>[\\][abtnvfre"`'\\]      {
+                                          yylval.chr = char_esc(yytext[1]);
+                                          return LITCHAR;
+                                        }
 
 <STRLIT,QSILIT>{WS}[\\]\n{WS}   {
                                   lineno++;
                                 }
                                 
-<STRLIT,CHRLIT>[\\](x{HEX}+|{OCT}+)     {
-                                          yylval.chr = num_esc(yytext + 1);
-                                          return LITCHAR;
-                                        }
+<CHRLIT>(x{HEX}+|o{OCT}+) {
+                            yylval.chr = num_esc(yytext);
+                            return LITCHAR;
+                          }
+
+<CHRLIT>{SYM}           {
+                          yylval.lexeme = utf8_dup_from(yytext);
+                          return IDENT;
+                        }
+
 <STRLIT>\n              {
                           yyerrprepf(lit("newline in string literal"), nao);
                           lineno++;
@@ -615,12 +616,23 @@ UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
 
 void end_of_regex(void)
 {
+  if (YYSTATE != REGEX)
+    internal_error("end_of_regex called in wrong scanner state");
+
   yy_pop_state();
   if (yy_top_state() == INITIAL
       || yy_top_state() == QSILIT)
     yy_pop_state();
 }
 
+void end_of_char(void)
+{
+  if (YYSTATE != CHRLIT)
+    internal_error("end_of_char called in wrong scanner state");
+
+  yy_pop_state();
+}
+
 val source_loc(val form)
 {
   return gethash(form_to_ln_hash, form);
diff --git a/parser.y b/parser.y
index 3c0823e3..9089004c 100644
--- a/parser.y
+++ b/parser.y
@@ -43,21 +43,15 @@
 int yylex(void);
 void yyerror(const char *);
 
-val repeat_rep_helper(val sym, val main, val parts);
-val o_elems_transform(val output_form);
-val define_transform(val define_form);
-val lit_char_helper(val litchars);
+static val repeat_rep_helper(val sym, val main, val parts);
+static val o_elems_transform(val output_form);
+static val define_transform(val define_form);
+static val lit_char_helper(val litchars);
+static val rl(val form, val lineno);
+static wchar_t char_from_name(wchar_t *name);
 
 static val parsed_spec;
 
-static val rl(val form, val lineno)
-{
-  sethash(form_to_ln_hash, form, lineno);
-  pushhash(ln_to_forms_hash, lineno, form);
-  return form;
-}
-
-
 %}
 
 %union {
@@ -73,6 +67,7 @@ static val rl(val form, val lineno)
 %token <lineno> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE
 %token <lineno> TRY CATCH FINALLY
 %token <lineno> ERRTOK /* deliberately not used in grammar */
+%token <lineno> HASH_BACKSLASH
 
 %token <num> NUMBER
 
@@ -110,7 +105,7 @@ static val rl(val form, val lineno)
 
 spec : clauses                  { parsed_spec = $1; }
      | /* empty */              { parsed_spec = nil; }
-     | error '\n'                   { parsed_spec = nil;
+     | error '\n'               { parsed_spec = nil;
                                   if (errors >= 8)
                                     YYABORT;
                                   yyerrok;
@@ -699,13 +694,16 @@ strlit : '"' '"'                { $$ = null_string; }
                                   yybadtoken(yychar, lit("string literal")); }
        ;
 
-chrlit : '\'' '\''              { $$ = nil;
-                                  yyerror("empty character literal"); }
-       | '\'' litchars '\''     { $$ = car($2);
-                                  if (cdr($2))
-                                    yyerror("multiple characters in "
-                                            "character literal"); }
-       | '\'' error             { $$ = nil;
+chrlit : HASH_BACKSLASH IDENT   { wchar_t ch = char_from_name($2);
+                                  val str = string_own($2);
+                                  end_of_char();
+                                  if (ch == L'!')
+                                  { yyerrorf(lit("unknown character name: ~a"),
+                                             str, nao); }
+                                  $$ = chr(ch); }
+       | HASH_BACKSLASH LITCHAR { $$ = chr($2);
+                                  end_of_char(); }
+       | HASH_BACKSLASH error   { $$ = nil;
                                   yybadtoken(yychar,
                                              lit("character literal")); }
        ;
@@ -733,7 +731,7 @@ litchars : LITCHAR              { $$ = cons(chr($1), nil); }
 
 %%
 
-val repeat_rep_helper(val sym, val main, val parts)
+static val repeat_rep_helper(val sym, val main, val parts)
 {
   val single_parts = nil;
   val first_parts = nil;
@@ -762,7 +760,7 @@ val repeat_rep_helper(val sym, val main, val parts)
               last_parts, empty_parts, nao);
 }
 
-val o_elems_transform(val o_elems)
+static val o_elems_transform(val o_elems)
 {
   list_collect_decl(o_elems_out, ptail);
   val iter;
@@ -786,7 +784,7 @@ val o_elems_transform(val o_elems)
   return o_elems_out;
 }
 
-val define_transform(val define_form)
+static val define_transform(val define_form)
 {
   val sym = first(define_form);
   val args = second(define_form);
@@ -825,7 +823,7 @@ val define_transform(val define_form)
   return define_form;
 }
 
-val lit_char_helper(val litchars)
+static val lit_char_helper(val litchars)
 {
   val ret = nil;
 
@@ -844,6 +842,42 @@ val lit_char_helper(val litchars)
   return ret;
 }
 
+static val rl(val form, val lineno)
+{
+  sethash(form_to_ln_hash, form, lineno);
+  pushhash(ln_to_forms_hash, lineno, form);
+  return form;
+}
+
+static wchar_t char_from_name(wchar_t *name)
+{
+  static struct {
+    wchar_t *name;
+    wchar_t ch;
+  } map[] = {
+    { L"nul", 0 },
+    { L"alarm", L'\a' },
+    { L"backspace", L'\b' },
+    { L"tab", L'\t' },
+    { L"linefeed", L'\n' },
+    { L"newline", L'\n' },
+    { L"vtab", L'\v' },
+    { L"page", L'\f' },
+    { L"return", L'\r' },
+    { L"esc", 27 },
+    { L"space", L' ' },
+    { 0, 0 },
+  };
+  int i;
+
+  for (i = 0; map[i].name; i++) {
+    if (wcscmp(map[i].name, name) == 0)
+      return map[i].ch;
+  }
+
+  return L'!'; /* code meaning not found */
+}
+
 val get_spec(void)
 {
   return parsed_spec;
diff --git a/txr.1 b/txr.1
index b9158d02..32de2aa5 100644
--- a/txr.1
+++ b/txr.1
@@ -920,7 +920,7 @@ directives are:
 
   @(  a (b (c d) (e  ) ))
 
-  @("apple" 'b' 3)
+  @("apple" #\eb #\espace 3)
 
   @(a /[a-z]*/ b)
 
@@ -929,10 +929,17 @@ directives are:
 A symbol is lexically the same thing as a variable and the same rules
 apply. Tokens that look like numbers are treated as numbers.
 
-String and character literals are delimited by double and single quotes,
-respectively, and may not span multiple lines. Character literals must contain
-exactly one character. Character and numeric escapes may be used within
-literals to escape the quotes, and to denote control characters.
+Character literals are introduced by the #\ syntax, which is either
+followed by a character name, the letter x followed by hex digits,
+or a single character. Valid character names are: nul, alarm, backspace, tab,
+linefeed, newline, vtab, page, return, esc, space. This convention
+for character literals is similar to that of the Scheme language.
+
+String literals are delimited by double respectively, and may not span multiple
+lines. A double quote within a string literal is encoded using \e"
+and a backslash is encoded as \e\e. Backslash escapes like \en and \et
+are recognized, as are hexadecimal escapes like \exFF and octal
+escapes like \e123.
 
 Quasiliterals are similar to string literals, except that they may
 contain variable references denoted by the usual @ syntax. The quasiliteral