diff options
-rw-r--r-- | ChangeLog | 18 | ||||
-rw-r--r-- | parser.l | 35 | ||||
-rw-r--r-- | parser.y | 20 | ||||
-rw-r--r-- | txr.1 | 40 |
4 files changed, 102 insertions, 11 deletions
@@ -1,3 +1,21 @@ +2014-03-25 Kaz Kylheku <kaz@kylheku.com> + + Introducing word list literals. + + * parser.l (WLIT): New exclusive start state. + Extend lexical grammar to transition to WLIT state upon + the #" or #*" sequence which kicks off a word literal, + and in that state, piecewise lexically analyze the literal, + mostly by borrowing rules from other literals. + + * parser.y (WORDS, WSPLICE): New tokens. + (n_exprs): Integrate splicing form of word list literal syntax. + (n_expr): Integrate non-splicit for of word list literal syntax. + (litchars): Propagate line number info. + (wordslit): New grammar rule. + + * txr.1: Updated. + 2014-03-23 Kaz Kylheku <kaz@kylheku.com> * eval.c (eval_init): Register last function as intrinsic. @@ -190,7 +190,7 @@ UANY {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} UANYN {ASCN}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} -%x SPECIAL BRACED NESTED REGEX STRLIT CHRLIT QSILIT QSPECIAL +%x SPECIAL BRACED NESTED REGEX STRLIT CHRLIT QSILIT QSPECIAL WLIT %% @@ -574,6 +574,16 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return '`'; } +<SPECIAL,QSPECIAL,NESTED,BRACED>#\" { + yy_push_state(WLIT); + return WORDS; +} + +<SPECIAL,QSPECIAL,NESTED,BRACED>#\*\" { + yy_push_state(WLIT); + return WSPLICE; +} + <NESTED,BRACED># { return '#'; } @@ -761,7 +771,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} /* comment to end of line */ } -<STRLIT>\" { +<STRLIT,WLIT>\" { yy_pop_state(); return yytext[0]; } @@ -771,21 +781,21 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return yytext[0]; } -<STRLIT,QSILIT>[\\][abtnvfre "`'\\ ] { +<STRLIT,QSILIT,WLIT>[\\][abtnvfre "`'\\ ] { yylval.chr = char_esc(yytext[1]); return LITCHAR; } -<STRLIT,QSILIT>{WS}[\\]\n{WS} { +<STRLIT,QSILIT,WLIT>{WS}[\\]\n{WS} { lineno++; } -<STRLIT,QSILIT>[\\](x{HEX}+|{OCT}+);? { +<STRLIT,QSILIT,WLIT>[\\](x{HEX}+|{OCT}+);? { yylval.chr = num_esc(yytext+1); return LITCHAR; } -<STRLIT,QSILIT>[\\]. { +<STRLIT,QSILIT,WLIT>[\\]. { yyerrorf(lit("unrecognized escape: \\~a"), chr(yytext[1]), nao); } @@ -825,18 +835,27 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return ERRTOK; } +<WLIT>\n { + lineno++; + return '\n'; +} + <QSILIT>@ { yy_push_state(QSPECIAL); } -<STRLIT,CHRLIT,QSILIT>{UANYN} { +<WLIT>{WS} { + return ' '; +} + +<STRLIT,CHRLIT,QSILIT,WLIT>{UANYN} { wchar_t buf[8]; utf8_from(buf, yytext); yylval.chr = buf[0]; return LITCHAR; } -<STRLIT,CHRLIT,QSILIT>. { +<STRLIT,CHRLIT,QSILIT,WLIT>. { yyerrprepf(lit("non-UTF-8 byte in literal: '\\x~02x'"), num((unsigned char) yytext[0]), nao); return ERRTOK; @@ -79,6 +79,7 @@ static val parsed_spec; %token <lineno> MOD MODLAST DEFINE TRY CATCH FINALLY %token <lineno> ERRTOK /* deliberately not used in grammar */ %token <lineno> HASH_BACKSLASH HASH_SLASH DOTDOT HASH_H +%token <lineno> WORDS WSPLICE %token <lineno> SECRET_ESCAPE_R SECRET_ESCAPE_E %token <val> NUMBER METANUM @@ -100,7 +101,7 @@ static val parsed_spec; %type <val> o_elems_opt o_elems o_elem o_var rep_elem rep_parts_opt %type <val> regex lisp_regex regexpr regbranch %type <val> regterm regtoken regclass regclassterm regrange -%type <val> strlit chrlit quasilit quasi_items quasi_item litchars +%type <val> strlit chrlit quasilit quasi_items quasi_item litchars wordslit %type <val> not_a_clause %type <chr> regchar %type <lineno> '(' '[' '@' @@ -753,6 +754,9 @@ n_exprs : n_expr { $$ = rlcp(cons($1, nil), $1); } $$ = rlcp(cons(list(cons_s, $1, car($3), nao), cdr($3)), or2($1, $3)); } + | WSPLICE wordslit { $$ = rl($2, num($1)); } + | WSPLICE wordslit + n_exprs { $$ = nappend2(rl($2, num($1)), $3); } ; n_expr : SYMTOK { $$ = sym_helper($1, t); } @@ -768,6 +772,7 @@ n_expr : SYMTOK { $$ = sym_helper($1, t); } | chrlit { $$ = $1; } | strlit { $$ = $1; } | quasilit { $$ = $1; } + | WORDS wordslit { $$ = rl($2, num($1)); } | '\'' n_expr { $$ = rlcp(list(quote_s, $2, nao), $2); } | '^' n_expr { $$ = rlcp(list(sys_qquote_s, $2, nao), $2); } | ',' n_expr { $$ = rlcp(list(sys_unquote_s, $2, nao), $2); } @@ -923,8 +928,17 @@ quasi_item : litchars { $$ = lit_char_helper($1); } | list { $$ = rlcp(cons(expr_s, $1), $1); } ; -litchars : LITCHAR { $$ = cons(chr($1), nil); } - | LITCHAR litchars { $$ = cons(chr($1), $2); } +litchars : LITCHAR { $$ = rl(cons(chr($1), nil), num(lineno)); } + | LITCHAR litchars { $$ = rl(cons(chr($1), $2), num(lineno)); } + ; + +wordslit : '"' { $$ = nil; } + | ' ' wordslit { $$ = $2; } + | '\n' wordslit { $$ = $2; } + | litchars wordslit { val word = lit_char_helper($1); + $$ = rlcp(cons(word, $2), $1); } + | error { $$ = nil; + yybadtoken(yychar, lit("word literal")); } ; not_a_clause : ALL { $$ = make_expr(all_s, nil, num(lineno)); } @@ -1129,6 +1129,46 @@ Example: The first string literal is the string "foobar". The second two are "foo bar". +.SS Word List Literals + +A word list literal provides a convenient way to write a list of strings +when such a list can be given as whitespace-delimited words. + +There are two flavors of the word list literal: the regular word list +literal which begins with #" (hash, double-quote) and the splicing +list literal which begins with #*" (hash, star, double-quote). + +Both literals are terminated by a double quote, which may be escaped +as \e" in order to include it as a character. All the escaping conventions +used in string literals can be used in words literals. + +Unlike in string literals, whitespace (tabs, spaces and newlines) is not +significant in word literals: it separates words. Whitespace may be +escaped with a backslash in order to include it as a literal character. + + +Example: + + #"abc def ghi" --> notates ("abc" "def" "ghi") + + #"abc def + ghi" --> notates ("abc" "def" "ghi") + + #"abc\ def ghi" --> notates ("abc def" "ghi") + +A splicing word literal differs from a word literal in that it deos not +produce a list of string literals, but rather it produces a sequence of string +literal tokens that is merged into the surrounding syntax. + +Example: + + (1 2 3 #*"abc def" 4 5 #"abc def") + + --> (1 2 3 "abc" "def" 4 5 ("abc" "def")) + +The regular word list literal produced a single list object, but the splicing +word list literal expanded into multiple string literal objects. + .SS String Quasiliterals Quasiliterals are similar to string literals, except that they may |