summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog29
-rw-r--r--lib.c25
-rw-r--r--parser.h1
-rw-r--r--parser.l42
-rw-r--r--parser.y82
-rw-r--r--txr.117
6 files changed, 139 insertions, 57 deletions
diff --git a/ChangeLog b/ChangeLog
index b50b75db..c91fae43 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,32 @@
+2011-11-15 Kaz Kylheku <kaz@kylheku.com>
+
+ Changing read syntax for character literals, because we are going to
+ need the single quote in the Lisp way for suppressing evaluation,
+ eventually.
+
+ I'm going with a Scheme-compatible syntax for character literals.
+ It has a richer repertoire of standard character names than Common
+ Lisp, and has a x convention for coding characters in hex.
+
+ * lib.c (obj_print): Print characters in a Scheme-like way.
+
+ * parser.h (end_of_char): New function declared.
+
+ * parser.l (grammar): Implement rules for #\ syntax, with
+ involving new HASH_BACKSLASH token.
+ (end_of_regex): Enhancement: added check that end_of_regex is
+ called in correct state, like the one in end_of_char.
+ (end_of_char): New function.
+
+ * parser.y (repeat_rep_helper, o_elems_transform, define_transform,
+ lit_char_helper): Functions changed to static.
+ (rl): Function moved down, past the grammar section.
+ (HASH_BACKSLASH): New terminal symbol.
+ (chrlit): Grammar redesigned.
+ (char_from_name): New function.
+
+ * txr.1: Character syntax documented.
+
2011-11-14 Kaz Kylheku <kaz@kylheku.com>
Bugfix: horizontal directives were being treated as vertical,
diff --git a/lib.c b/lib.c
index 7b3f9765..10782cc1 100644
--- a/lib.c
+++ b/lib.c
@@ -2565,25 +2565,24 @@ void obj_print(val obj, val out)
{
wchar_t ch = c_chr(obj);
- put_char(out, chr('\''));
+ put_string(out, lit("#\\"));
switch (ch) {
- case '\a': put_string(out, lit("\\a")); break;
- case '\b': put_string(out, lit("\\b")); break;
- case '\t': put_string(out, lit("\\t")); break;
- case '\n': put_string(out, lit("\\n")); break;
- case '\v': put_string(out, lit("\\v")); break;
- case '\f': put_string(out, lit("\\f")); break;
- case '\r': put_string(out, lit("\\r")); break;
- case '"': put_string(out, lit("\\\"")); break;
- case '\\': put_string(out, lit("\\\\")); break;
- case 27: put_string(out, lit("\\e")); break;
+ case '\0': put_string(out, lit("nul")); break;
+ case '\a': put_string(out, lit("alarm")); break;
+ case '\b': put_string(out, lit("backspace")); break;
+ case '\t': put_string(out, lit("tab")); break;
+ case '\n': put_string(out, lit("newline")); break;
+ case '\v': put_string(out, lit("vtab")); break;
+ case '\f': put_string(out, lit("page")); break;
+ case '\r': put_string(out, lit("return")); break;
+ case 27: put_string(out, lit("esc")); break;
+ case ' ': put_string(out, lit("space")); break;
default:
if (iswprint(ch))
put_char(out, chr(ch));
else
- format(out, lit("\\~03o"), num(ch), nao);
+ format(out, lit("x~x"), num(ch), nao);
}
- put_char(out, chr('\''));
}
return;
case NUM:
diff --git a/parser.h b/parser.h
index cca988da..754282d5 100644
--- a/parser.h
+++ b/parser.h
@@ -37,6 +37,7 @@ void yyerror(const char *s);
void yyerrorf(val s, ...);
void yybadtoken(int tok, val context);
void end_of_regex(void);
+void end_of_char(void);
int yylex(void);
void parse_init(void);
val source_loc(val form);
diff --git a/parser.l b/parser.l
index 1d1e0643..6514a39b 100644
--- a/parser.l
+++ b/parser.l
@@ -386,9 +386,9 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return '"';
}
-<SPECIAL,NESTED>\' {
+<SPECIAL,NESTED>#\\ {
yy_push_state(CHRLIT);
- return '\'';
+ return HASH_BACKSLASH;
}
<SPECIAL,NESTED>` {
@@ -550,29 +550,30 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return yytext[0];
}
-<CHRLIT>\' {
- yy_pop_state();
- return yytext[0];
- }
-
<QSILIT>` {
yy_pop_state();
return yytext[0];
}
-<STRLIT,CHRLIT,QSILIT>[\\][abtnvfre"`'\\] {
- yylval.chr = char_esc(yytext[1]);
- return LITCHAR;
- }
+<STRLIT,QSILIT>[\\][abtnvfre"`'\\] {
+ yylval.chr = char_esc(yytext[1]);
+ return LITCHAR;
+ }
<STRLIT,QSILIT>{WS}[\\]\n{WS} {
lineno++;
}
-<STRLIT,CHRLIT>[\\](x{HEX}+|{OCT}+) {
- yylval.chr = num_esc(yytext + 1);
- return LITCHAR;
- }
+<CHRLIT>(x{HEX}+|o{OCT}+) {
+ yylval.chr = num_esc(yytext);
+ return LITCHAR;
+ }
+
+<CHRLIT>{SYM} {
+ yylval.lexeme = utf8_dup_from(yytext);
+ return IDENT;
+ }
+
<STRLIT>\n {
yyerrprepf(lit("newline in string literal"), nao);
lineno++;
@@ -615,12 +616,23 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
void end_of_regex(void)
{
+ if (YYSTATE != REGEX)
+ internal_error("end_of_regex called in wrong scanner state");
+
yy_pop_state();
if (yy_top_state() == INITIAL
|| yy_top_state() == QSILIT)
yy_pop_state();
}
+void end_of_char(void)
+{
+ if (YYSTATE != CHRLIT)
+ internal_error("end_of_char called in wrong scanner state");
+
+ yy_pop_state();
+}
+
val source_loc(val form)
{
return gethash(form_to_ln_hash, form);
diff --git a/parser.y b/parser.y
index 3c0823e3..9089004c 100644
--- a/parser.y
+++ b/parser.y
@@ -43,21 +43,15 @@
int yylex(void);
void yyerror(const char *);
-val repeat_rep_helper(val sym, val main, val parts);
-val o_elems_transform(val output_form);
-val define_transform(val define_form);
-val lit_char_helper(val litchars);
+static val repeat_rep_helper(val sym, val main, val parts);
+static val o_elems_transform(val output_form);
+static val define_transform(val define_form);
+static val lit_char_helper(val litchars);
+static val rl(val form, val lineno);
+static wchar_t char_from_name(wchar_t *name);
static val parsed_spec;
-static val rl(val form, val lineno)
-{
- sethash(form_to_ln_hash, form, lineno);
- pushhash(ln_to_forms_hash, lineno, form);
- return form;
-}
-
-
%}
%union {
@@ -73,6 +67,7 @@ static val rl(val form, val lineno)
%token <lineno> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE
%token <lineno> TRY CATCH FINALLY
%token <lineno> ERRTOK /* deliberately not used in grammar */
+%token <lineno> HASH_BACKSLASH
%token <num> NUMBER
@@ -110,7 +105,7 @@ static val rl(val form, val lineno)
spec : clauses { parsed_spec = $1; }
| /* empty */ { parsed_spec = nil; }
- | error '\n' { parsed_spec = nil;
+ | error '\n' { parsed_spec = nil;
if (errors >= 8)
YYABORT;
yyerrok;
@@ -699,13 +694,16 @@ strlit : '"' '"' { $$ = null_string; }
yybadtoken(yychar, lit("string literal")); }
;
-chrlit : '\'' '\'' { $$ = nil;
- yyerror("empty character literal"); }
- | '\'' litchars '\'' { $$ = car($2);
- if (cdr($2))
- yyerror("multiple characters in "
- "character literal"); }
- | '\'' error { $$ = nil;
+chrlit : HASH_BACKSLASH IDENT { wchar_t ch = char_from_name($2);
+ val str = string_own($2);
+ end_of_char();
+ if (ch == L'!')
+ { yyerrorf(lit("unknown character name: ~a"),
+ str, nao); }
+ $$ = chr(ch); }
+ | HASH_BACKSLASH LITCHAR { $$ = chr($2);
+ end_of_char(); }
+ | HASH_BACKSLASH error { $$ = nil;
yybadtoken(yychar,
lit("character literal")); }
;
@@ -733,7 +731,7 @@ litchars : LITCHAR { $$ = cons(chr($1), nil); }
%%
-val repeat_rep_helper(val sym, val main, val parts)
+static val repeat_rep_helper(val sym, val main, val parts)
{
val single_parts = nil;
val first_parts = nil;
@@ -762,7 +760,7 @@ val repeat_rep_helper(val sym, val main, val parts)
last_parts, empty_parts, nao);
}
-val o_elems_transform(val o_elems)
+static val o_elems_transform(val o_elems)
{
list_collect_decl(o_elems_out, ptail);
val iter;
@@ -786,7 +784,7 @@ val o_elems_transform(val o_elems)
return o_elems_out;
}
-val define_transform(val define_form)
+static val define_transform(val define_form)
{
val sym = first(define_form);
val args = second(define_form);
@@ -825,7 +823,7 @@ val define_transform(val define_form)
return define_form;
}
-val lit_char_helper(val litchars)
+static val lit_char_helper(val litchars)
{
val ret = nil;
@@ -844,6 +842,42 @@ val lit_char_helper(val litchars)
return ret;
}
+static val rl(val form, val lineno)
+{
+ sethash(form_to_ln_hash, form, lineno);
+ pushhash(ln_to_forms_hash, lineno, form);
+ return form;
+}
+
+static wchar_t char_from_name(wchar_t *name)
+{
+ static struct {
+ wchar_t *name;
+ wchar_t ch;
+ } map[] = {
+ { L"nul", 0 },
+ { L"alarm", L'\a' },
+ { L"backspace", L'\b' },
+ { L"tab", L'\t' },
+ { L"linefeed", L'\n' },
+ { L"newline", L'\n' },
+ { L"vtab", L'\v' },
+ { L"page", L'\f' },
+ { L"return", L'\r' },
+ { L"esc", 27 },
+ { L"space", L' ' },
+ { 0, 0 },
+ };
+ int i;
+
+ for (i = 0; map[i].name; i++) {
+ if (wcscmp(map[i].name, name) == 0)
+ return map[i].ch;
+ }
+
+ return L'!'; /* code meaning not found */
+}
+
val get_spec(void)
{
return parsed_spec;
diff --git a/txr.1 b/txr.1
index b9158d02..32de2aa5 100644
--- a/txr.1
+++ b/txr.1
@@ -920,7 +920,7 @@ directives are:
@( a (b (c d) (e ) ))
- @("apple" 'b' 3)
+ @("apple" #\eb #\espace 3)
@(a /[a-z]*/ b)
@@ -929,10 +929,17 @@ directives are:
A symbol is lexically the same thing as a variable and the same rules
apply. Tokens that look like numbers are treated as numbers.
-String and character literals are delimited by double and single quotes,
-respectively, and may not span multiple lines. Character literals must contain
-exactly one character. Character and numeric escapes may be used within
-literals to escape the quotes, and to denote control characters.
+Character literals are introduced by the #\ syntax, which is either
+followed by a character name, the letter x followed by hex digits,
+or a single character. Valid character names are: nul, alarm, backspace, tab,
+linefeed, newline, vtab, page, return, esc, space. This convention
+for character literals is similar to that of the Scheme language.
+
+String literals are delimited by double respectively, and may not span multiple
+lines. A double quote within a string literal is encoded using \e"
+and a backslash is encoded as \e\e. Backslash escapes like \en and \et
+are recognized, as are hexadecimal escapes like \exFF and octal
+escapes like \e123.
Quasiliterals are similar to string literals, except that they may
contain variable references denoted by the usual @ syntax. The quasiliteral