diff options
-rw-r--r-- | ChangeLog | 29 | ||||
-rw-r--r-- | lib.c | 25 | ||||
-rw-r--r-- | parser.h | 1 | ||||
-rw-r--r-- | parser.l | 42 | ||||
-rw-r--r-- | parser.y | 82 | ||||
-rw-r--r-- | txr.1 | 17 |
6 files changed, 139 insertions, 57 deletions
@@ -1,3 +1,32 @@ +2011-11-15 Kaz Kylheku <kaz@kylheku.com> + + Changing read syntax for character literals, because we are going to + need the single quote in the Lisp way for suppressing evaluation, + eventually. + + I'm going with a Scheme-compatible syntax for character literals. + It has a richer repertoire of standard character names than Common + Lisp, and has a x convention for coding characters in hex. + + * lib.c (obj_print): Print characters in a Scheme-like way. + + * parser.h (end_of_char): New function declared. + + * parser.l (grammar): Implement rules for #\ syntax, with + involving new HASH_BACKSLASH token. + (end_of_regex): Enhancement: added check that end_of_regex is + called in correct state, like the one in end_of_char. + (end_of_char): New function. + + * parser.y (repeat_rep_helper, o_elems_transform, define_transform, + lit_char_helper): Functions changed to static. + (rl): Function moved down, past the grammar section. + (HASH_BACKSLASH): New terminal symbol. + (chrlit): Grammar redesigned. + (char_from_name): New function. + + * txr.1: Character syntax documented. + 2011-11-14 Kaz Kylheku <kaz@kylheku.com> Bugfix: horizontal directives were being treated as vertical, @@ -2565,25 +2565,24 @@ void obj_print(val obj, val out) { wchar_t ch = c_chr(obj); - put_char(out, chr('\'')); + put_string(out, lit("#\\")); switch (ch) { - case '\a': put_string(out, lit("\\a")); break; - case '\b': put_string(out, lit("\\b")); break; - case '\t': put_string(out, lit("\\t")); break; - case '\n': put_string(out, lit("\\n")); break; - case '\v': put_string(out, lit("\\v")); break; - case '\f': put_string(out, lit("\\f")); break; - case '\r': put_string(out, lit("\\r")); break; - case '"': put_string(out, lit("\\\"")); break; - case '\\': put_string(out, lit("\\\\")); break; - case 27: put_string(out, lit("\\e")); break; + case '\0': put_string(out, lit("nul")); break; + case '\a': put_string(out, lit("alarm")); break; + case '\b': put_string(out, lit("backspace")); break; + case '\t': put_string(out, lit("tab")); break; + case '\n': put_string(out, lit("newline")); break; + case '\v': put_string(out, lit("vtab")); break; + case '\f': put_string(out, lit("page")); break; + case '\r': put_string(out, lit("return")); break; + case 27: put_string(out, lit("esc")); break; + case ' ': put_string(out, lit("space")); break; default: if (iswprint(ch)) put_char(out, chr(ch)); else - format(out, lit("\\~03o"), num(ch), nao); + format(out, lit("x~x"), num(ch), nao); } - put_char(out, chr('\'')); } return; case NUM: @@ -37,6 +37,7 @@ void yyerror(const char *s); void yyerrorf(val s, ...); void yybadtoken(int tok, val context); void end_of_regex(void); +void end_of_char(void); int yylex(void); void parse_init(void); val source_loc(val form); @@ -386,9 +386,9 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return '"'; } -<SPECIAL,NESTED>\' { +<SPECIAL,NESTED>#\\ { yy_push_state(CHRLIT); - return '\''; + return HASH_BACKSLASH; } <SPECIAL,NESTED>` { @@ -550,29 +550,30 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return yytext[0]; } -<CHRLIT>\' { - yy_pop_state(); - return yytext[0]; - } - <QSILIT>` { yy_pop_state(); return yytext[0]; } -<STRLIT,CHRLIT,QSILIT>[\\][abtnvfre"`'\\] { - yylval.chr = char_esc(yytext[1]); - return LITCHAR; - } +<STRLIT,QSILIT>[\\][abtnvfre"`'\\] { + yylval.chr = char_esc(yytext[1]); + return LITCHAR; + } <STRLIT,QSILIT>{WS}[\\]\n{WS} { lineno++; } -<STRLIT,CHRLIT>[\\](x{HEX}+|{OCT}+) { - yylval.chr = num_esc(yytext + 1); - return LITCHAR; - } +<CHRLIT>(x{HEX}+|o{OCT}+) { + yylval.chr = num_esc(yytext); + return LITCHAR; + } + +<CHRLIT>{SYM} { + yylval.lexeme = utf8_dup_from(yytext); + return IDENT; + } + <STRLIT>\n { yyerrprepf(lit("newline in string literal"), nao); lineno++; @@ -615,12 +616,23 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} void end_of_regex(void) { + if (YYSTATE != REGEX) + internal_error("end_of_regex called in wrong scanner state"); + yy_pop_state(); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); } +void end_of_char(void) +{ + if (YYSTATE != CHRLIT) + internal_error("end_of_char called in wrong scanner state"); + + yy_pop_state(); +} + val source_loc(val form) { return gethash(form_to_ln_hash, form); @@ -43,21 +43,15 @@ int yylex(void); void yyerror(const char *); -val repeat_rep_helper(val sym, val main, val parts); -val o_elems_transform(val output_form); -val define_transform(val define_form); -val lit_char_helper(val litchars); +static val repeat_rep_helper(val sym, val main, val parts); +static val o_elems_transform(val output_form); +static val define_transform(val define_form); +static val lit_char_helper(val litchars); +static val rl(val form, val lineno); +static wchar_t char_from_name(wchar_t *name); static val parsed_spec; -static val rl(val form, val lineno) -{ - sethash(form_to_ln_hash, form, lineno); - pushhash(ln_to_forms_hash, lineno, form); - return form; -} - - %} %union { @@ -73,6 +67,7 @@ static val rl(val form, val lineno) %token <lineno> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE %token <lineno> TRY CATCH FINALLY %token <lineno> ERRTOK /* deliberately not used in grammar */ +%token <lineno> HASH_BACKSLASH %token <num> NUMBER @@ -110,7 +105,7 @@ static val rl(val form, val lineno) spec : clauses { parsed_spec = $1; } | /* empty */ { parsed_spec = nil; } - | error '\n' { parsed_spec = nil; + | error '\n' { parsed_spec = nil; if (errors >= 8) YYABORT; yyerrok; @@ -699,13 +694,16 @@ strlit : '"' '"' { $$ = null_string; } yybadtoken(yychar, lit("string literal")); } ; -chrlit : '\'' '\'' { $$ = nil; - yyerror("empty character literal"); } - | '\'' litchars '\'' { $$ = car($2); - if (cdr($2)) - yyerror("multiple characters in " - "character literal"); } - | '\'' error { $$ = nil; +chrlit : HASH_BACKSLASH IDENT { wchar_t ch = char_from_name($2); + val str = string_own($2); + end_of_char(); + if (ch == L'!') + { yyerrorf(lit("unknown character name: ~a"), + str, nao); } + $$ = chr(ch); } + | HASH_BACKSLASH LITCHAR { $$ = chr($2); + end_of_char(); } + | HASH_BACKSLASH error { $$ = nil; yybadtoken(yychar, lit("character literal")); } ; @@ -733,7 +731,7 @@ litchars : LITCHAR { $$ = cons(chr($1), nil); } %% -val repeat_rep_helper(val sym, val main, val parts) +static val repeat_rep_helper(val sym, val main, val parts) { val single_parts = nil; val first_parts = nil; @@ -762,7 +760,7 @@ val repeat_rep_helper(val sym, val main, val parts) last_parts, empty_parts, nao); } -val o_elems_transform(val o_elems) +static val o_elems_transform(val o_elems) { list_collect_decl(o_elems_out, ptail); val iter; @@ -786,7 +784,7 @@ val o_elems_transform(val o_elems) return o_elems_out; } -val define_transform(val define_form) +static val define_transform(val define_form) { val sym = first(define_form); val args = second(define_form); @@ -825,7 +823,7 @@ val define_transform(val define_form) return define_form; } -val lit_char_helper(val litchars) +static val lit_char_helper(val litchars) { val ret = nil; @@ -844,6 +842,42 @@ val lit_char_helper(val litchars) return ret; } +static val rl(val form, val lineno) +{ + sethash(form_to_ln_hash, form, lineno); + pushhash(ln_to_forms_hash, lineno, form); + return form; +} + +static wchar_t char_from_name(wchar_t *name) +{ + static struct { + wchar_t *name; + wchar_t ch; + } map[] = { + { L"nul", 0 }, + { L"alarm", L'\a' }, + { L"backspace", L'\b' }, + { L"tab", L'\t' }, + { L"linefeed", L'\n' }, + { L"newline", L'\n' }, + { L"vtab", L'\v' }, + { L"page", L'\f' }, + { L"return", L'\r' }, + { L"esc", 27 }, + { L"space", L' ' }, + { 0, 0 }, + }; + int i; + + for (i = 0; map[i].name; i++) { + if (wcscmp(map[i].name, name) == 0) + return map[i].ch; + } + + return L'!'; /* code meaning not found */ +} + val get_spec(void) { return parsed_spec; @@ -920,7 +920,7 @@ directives are: @( a (b (c d) (e ) )) - @("apple" 'b' 3) + @("apple" #\eb #\espace 3) @(a /[a-z]*/ b) @@ -929,10 +929,17 @@ directives are: A symbol is lexically the same thing as a variable and the same rules apply. Tokens that look like numbers are treated as numbers. -String and character literals are delimited by double and single quotes, -respectively, and may not span multiple lines. Character literals must contain -exactly one character. Character and numeric escapes may be used within -literals to escape the quotes, and to denote control characters. +Character literals are introduced by the #\ syntax, which is either +followed by a character name, the letter x followed by hex digits, +or a single character. Valid character names are: nul, alarm, backspace, tab, +linefeed, newline, vtab, page, return, esc, space. This convention +for character literals is similar to that of the Scheme language. + +String literals are delimited by double respectively, and may not span multiple +lines. A double quote within a string literal is encoded using \e" +and a backslash is encoded as \e\e. Backslash escapes like \en and \et +are recognized, as are hexadecimal escapes like \exFF and octal +escapes like \e123. Quasiliterals are similar to string literals, except that they may contain variable references denoted by the usual @ syntax. The quasiliteral |