diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2014-03-25 22:10:31 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2014-03-25 22:10:31 -0700 |
commit | f4a6c56c8e8841c1991c1bb44546681ccbdb8f3a (patch) | |
tree | 943413daf7f15ed66562552b9cb43c8ada7053d0 | |
parent | 4c6d387b73dca86caed48b1e786c5c1bc2c4716b (diff) | |
download | txr-f4a6c56c8e8841c1991c1bb44546681ccbdb8f3a.tar.gz txr-f4a6c56c8e8841c1991c1bb44546681ccbdb8f3a.tar.bz2 txr-f4a6c56c8e8841c1991c1bb44546681ccbdb8f3a.zip |
* eval.c (me_quasilist): New static function.
(eval_init): Register me_quasilist as quasilist macro expander.
* lib.c (quasilist_s): New global variable.
(obj_init): quasilist_s initialized.
* lib.h (quasilist_s): Declared.
* match.c (do_txreval): Handle quasilist syntax.
* parser.l (QWLIT): New exclusive state.
Extend lexical grammar to transition to QWLIT state upon
the #` or #*` sequence which kicks off a word literal,
and in that state, piecewise lexically analyze the QLL,
mostly by borrowing rules from quasiliterals.
* parser.y (QWORDS, QWSPLICE): New tokens.
(n_exprs): Integrate splicing form of QLL syntax.
(n_expr): Integrate non-splicing form of QLL syntax.
(litchars): Propagate line number info.
(quasilit): Fix "string literal" wording in error message.
* txr.1: Introduced WLL abbreviation for word list literals,
cleaned up the text a little, and documented QLL's.
-rw-r--r-- | ChangeLog | 27 | ||||
-rw-r--r-- | eval.c | 6 | ||||
-rw-r--r-- | lib.c | 4 | ||||
-rw-r--r-- | lib.h | 3 | ||||
-rw-r--r-- | match.c | 18 | ||||
-rw-r--r-- | parser.l | 88 | ||||
-rw-r--r-- | parser.y | 27 | ||||
-rw-r--r-- | txr.1 | 58 |
8 files changed, 179 insertions, 52 deletions
@@ -1,5 +1,32 @@ 2014-03-25 Kaz Kylheku <kaz@kylheku.com> + * eval.c (me_quasilist): New static function. + (eval_init): Register me_quasilist as quasilist macro expander. + + * lib.c (quasilist_s): New global variable. + (obj_init): quasilist_s initialized. + + * lib.h (quasilist_s): Declared. + + * match.c (do_txreval): Handle quasilist syntax. + + * parser.l (QWLIT): New exclusive state. + Extend lexical grammar to transition to QWLIT state upon + the #` or #*` sequence which kicks off a word literal, + and in that state, piecewise lexically analyze the QLL, + mostly by borrowing rules from quasiliterals. + + * parser.y (QWORDS, QWSPLICE): New tokens. + (n_exprs): Integrate splicing form of QLL syntax. + (n_expr): Integrate non-splicing form of QLL syntax. + (litchars): Propagate line number info. + (quasilit): Fix "string literal" wording in error message. + + * txr.1: Introduced WLL abbreviation for word list literals, + cleaned up the text a little, and documented QLL's. + +2014-03-25 Kaz Kylheku <kaz@kylheku.com> + * eval.c (expand_quasi): Bugfix: incorrect logic, failing to macro-expand the embedded forms in a quasiliteral except when they are the very first item. @@ -2048,6 +2048,11 @@ static val me_until(val form, val menv) rest(rest(form)), nao)); } +static val me_quasilist(val form, val menv) +{ + return cons(list_s, cdr(form)); +} + val expand_forms(val form, val menv) { if (atom(form)) { @@ -3214,6 +3219,7 @@ void eval_init(void) reg_mac(intern(lit("unless"), user_package), me_unless); reg_mac(intern(lit("while"), user_package), me_while); reg_mac(intern(lit("until"), user_package), me_until); + reg_mac(quasilist_s, me_quasilist); reg_fun(cons_s, func_n2(cons)); reg_fun(intern(lit("make-lazy-cons"), user_package), func_n1(make_lazy_cons)); @@ -76,7 +76,8 @@ val var_s, expr_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s; val nongreedy_s, compiled_regex_s; val quote_s, qquote_s, unquote_s, splice_s; val sys_qquote_s, sys_unquote_s, sys_splice_s; -val zeroplus_s, optional_s, compl_s, compound_s, or_s, and_s, quasi_s; +val zeroplus_s, optional_s, compl_s, compound_s; +val or_s, and_s, quasi_s, quasilist_s; val skip_s, trailer_s, block_s, next_s, freeform_s, fail_s, accept_s; val all_s, some_s, none_s, maybe_s, cases_s, collect_s, until_s, coll_s; val define_s, output_s, single_s, first_s, last_s, empty_s; @@ -5218,6 +5219,7 @@ static void obj_init(void) or_s = intern(lit("or"), user_package); and_s = intern(lit("and"), user_package); quasi_s = intern(lit("quasi"), system_package); + quasilist_s = intern(lit("quasilist"), system_package); skip_s = intern(lit("skip"), user_package); trailer_s = intern(lit("trailer"), user_package); block_s = intern(lit("block"), user_package); @@ -330,7 +330,8 @@ extern val var_s, expr_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s; extern val nongreedy_s, compiled_regex_s; extern val quote_s, qquote_s, unquote_s, splice_s; extern val sys_qquote_s, sys_unquote_s, sys_splice_s; -extern val zeroplus_s, optional_s, compl_s, compound_s, or_s, and_s, quasi_s; +extern val zeroplus_s, optional_s, compl_s, compound_s; +extern val or_s, and_s, quasi_s, quasilist_s; extern val skip_s, trailer_s, block_s, next_s, freeform_s, fail_s, accept_s; extern val all_s, some_s, none_s, maybe_s, cases_s, collect_s, until_s, coll_s; extern val define_s, output_s, single_s, first_s, last_s, empty_s; @@ -1449,19 +1449,29 @@ static val do_txeval(val spec, val form, val bindings, val allow_unbound) ret = cdr(binding); } } else if (consp(form)) { - if (first(form) == quasi_s) { + val sym = first(form); + if (sym == quasi_s) { uw_env_begin; uw_set_match_context(cons(spec, bindings)); ret = cat_str(subst_vars(rest(form), bindings, nil), nil); uw_env_end; - } else if (regexp(car(form))) { + } else if (sym == quasilist_s) { + uw_env_begin; + val iter; + list_collect_decl (out, tail); + uw_set_match_context(cons(spec, bindings)); + for (iter = rest(form); iter != nil; iter = cdr(iter)) + list_collect(tail, subst_vars(cdr(car(iter)), bindings, nil)); + ret = out; + uw_env_end; + } else if (regexp(sym)) { ret = form; - } else if (first(form) == var_s) { + } else if (sym == var_s) { uw_env_begin; uw_set_match_context(cons(spec, bindings)); ret = eval(second(form), make_env(bindings, nil, nil), form); uw_env_end; - } else if (first(form) == expr_s) { + } else if (sym == expr_s) { uw_env_begin; uw_set_match_context(cons(spec, bindings)); ret = eval(rest(form), make_env(bindings, nil, nil), form); @@ -190,7 +190,7 @@ UANY {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} UANYN {ASCN}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} -%x SPECIAL BRACED NESTED REGEX STRLIT CHRLIT QSILIT QSPECIAL WLIT +%x SPECIAL BRACED NESTED REGEX STRLIT CHRLIT QSILIT QSPECIAL WLIT QWLIT %% @@ -198,7 +198,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} val str = string_own(utf8_dup_from(yytext)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = int_str(str, num(10)); @@ -209,7 +210,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} val str = string_own(utf8_dup_from(yytext + 2)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = int_str(str, num(16)); @@ -220,7 +222,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} val str = string_own(utf8_dup_from(yytext + 2)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = int_str(str, num(8)); @@ -231,7 +234,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} val str = string_own(utf8_dup_from(yytext + 2)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = int_str(str, num(2)); @@ -242,7 +246,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} val str = string_own(utf8_dup_from(yytext)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = flo_str(str); @@ -253,7 +258,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} val str = string_own(utf8_dup_from(yytext)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = flo_str(str); @@ -268,48 +274,53 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} yyerrorf(lit("trailing junk in floating-point literal: ~a"), str, nao); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = flo_str(str); return NUMBER; } -<NESTED,QSILIT>@{NUM} { +<NESTED,QSILIT,QWLIT>@{NUM} { val str = string_own(utf8_dup_from(yytext + 1)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = int_str(str, num(10)); return METANUM; } -<NESTED,QSILIT>@{XNUM} { +<NESTED,QSILIT,QWLIT>@{XNUM} { val str = string_own(utf8_dup_from(yytext + 3)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = int_str(str, num(16)); return METANUM; } -<NESTED,QSILIT>@{ONUM} { +<NESTED,QSILIT,QWLIT>@{ONUM} { val str = string_own(utf8_dup_from(yytext + 3)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = int_str(str, num(8)); return METANUM; } -<NESTED,QSILIT>@{BNUM} { +<NESTED,QSILIT,QWLIT>@{BNUM} { val str = string_own(utf8_dup_from(yytext + 3)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = int_str(str, num(2)); return METANUM; @@ -319,7 +330,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} <BRACED>{BTOK} | <NESTED>{NTOK} { if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.lexeme = utf8_dup_from(yytext); @@ -537,7 +549,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} <BRACED>[}] { yy_pop_state(); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); return yytext[0]; } @@ -545,7 +558,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} <SPECIAL,QSPECIAL,NESTED>[)\]] { yy_pop_state(); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); return yytext[0]; } @@ -584,6 +598,16 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return WSPLICE; } +<SPECIAL,QSPECIAL,NESTED,BRACED>#\` { + yy_push_state(QWLIT); + return QWORDS; +} + +<SPECIAL,QSPECIAL,NESTED,BRACED>#\*\` { + yy_push_state(QWLIT); + return QWSPLICE; +} + <NESTED,BRACED># { return '#'; } @@ -776,26 +800,26 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return yytext[0]; } -<QSILIT>` { +<QSILIT,QWLIT>\` { yy_pop_state(); return yytext[0]; } -<STRLIT,QSILIT,WLIT>[\\][abtnvfre "`'\\ ] { +<STRLIT,QSILIT,WLIT,QWLIT>[\\][abtnvfre "`'\\ ] { yylval.chr = char_esc(yytext[1]); return LITCHAR; } -<STRLIT,QSILIT,WLIT>{WS}[\\]\n{WS} { +<STRLIT,QSILIT,WLIT,QWLIT>{WS}[\\]\n{WS} { lineno++; } -<STRLIT,QSILIT,WLIT>[\\](x{HEX}+|{OCT}+);? { +<STRLIT,QSILIT,WLIT,QWLIT>[\\](x{HEX}+|{OCT}+);? { yylval.chr = num_esc(yytext+1); return LITCHAR; } -<STRLIT,QSILIT,WLIT>[\\]. { +<STRLIT,QSILIT,WLIT,QWLIT>[\\]. { yyerrorf(lit("unrecognized escape: \\~a"), chr(yytext[1]), nao); } @@ -835,27 +859,27 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return ERRTOK; } -<WLIT>\n { +<WLIT,QWLIT>\n { lineno++; - return '\n'; + return ' '; } -<QSILIT>@ { +<QSILIT,QWLIT>@ { yy_push_state(QSPECIAL); } -<WLIT>{WS} { +<WLIT,QWLIT>{WS} { return ' '; } -<STRLIT,CHRLIT,QSILIT,WLIT>{UANYN} { +<STRLIT,CHRLIT,QSILIT,WLIT,QWLIT>{UANYN} { wchar_t buf[8]; utf8_from(buf, yytext); yylval.chr = buf[0]; return LITCHAR; } -<STRLIT,CHRLIT,QSILIT,WLIT>. { +<STRLIT,CHRLIT,QSILIT,WLIT,QWLIT>. { yyerrprepf(lit("non-UTF-8 byte in literal: '\\x~02x'"), num((unsigned char) yytext[0]), nao); return ERRTOK; @@ -871,7 +895,9 @@ void end_of_regex(void) yy_pop_state(); if (YYSTATE != INITIAL) { - if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) + if (yy_top_state() == INITIAL + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); } } @@ -79,7 +79,7 @@ static val parsed_spec; %token <lineno> MOD MODLAST DEFINE TRY CATCH FINALLY %token <lineno> ERRTOK /* deliberately not used in grammar */ %token <lineno> HASH_BACKSLASH HASH_SLASH DOTDOT HASH_H -%token <lineno> WORDS WSPLICE +%token <lineno> WORDS WSPLICE QWORDS QWSPLICE %token <lineno> SECRET_ESCAPE_R SECRET_ESCAPE_E %token <val> NUMBER METANUM @@ -102,7 +102,7 @@ static val parsed_spec; %type <val> regex lisp_regex regexpr regbranch %type <val> regterm regtoken regclass regclassterm regrange %type <val> strlit chrlit quasilit quasi_items quasi_item litchars wordslit -%type <val> not_a_clause +%type <val> wordsqlit not_a_clause %type <chr> regchar %type <lineno> '(' '[' '@' @@ -757,6 +757,9 @@ n_exprs : n_expr { $$ = rlcp(cons($1, nil), $1); } | WSPLICE wordslit { $$ = rl($2, num($1)); } | WSPLICE wordslit n_exprs { $$ = nappend2(rl($2, num($1)), $3); } + | QWSPLICE wordsqlit { $$ = rl($2, num($1)); } + | QWSPLICE wordsqlit + n_exprs { $$ = nappend2(rl($2, num($1)), $3); } ; n_expr : SYMTOK { $$ = sym_helper($1, t); } @@ -773,6 +776,7 @@ n_expr : SYMTOK { $$ = sym_helper($1, t); } | strlit { $$ = $1; } | quasilit { $$ = $1; } | WORDS wordslit { $$ = rl($2, num($1)); } + | QWORDS wordsqlit { $$ = rl(cons(quasilist_s, $2), num($1)); } | '\'' n_expr { $$ = rlcp(list(quote_s, $2, nao), $2); } | '^' n_expr { $$ = rlcp(list(sys_qquote_s, $2, nao), $2); } | ',' n_expr { $$ = rlcp(list(sys_unquote_s, $2, nao), $2); } @@ -911,7 +915,7 @@ quasilit : '`' '`' { $$ = null_string; } rlcp($$, $2); rl($$, num(lineno)); } | '`' error { $$ = nil; - yybadtoken(yychar, lit("string literal")); } + yybadtoken(yychar, lit("quasistring")); } ; quasi_items : quasi_item { $$ = cons($1, nil); @@ -934,13 +938,24 @@ litchars : LITCHAR { $$ = rl(cons(chr($1), nil), num(lineno)); } wordslit : '"' { $$ = nil; } | ' ' wordslit { $$ = $2; } - | '\n' wordslit { $$ = $2; } | litchars wordslit { val word = lit_char_helper($1); $$ = rlcp(cons(word, $2), $1); } | error { $$ = nil; - yybadtoken(yychar, lit("word literal")); } + yybadtoken(yychar, lit("word list")); } ; +wordsqlit : '`' { $$ = nil; } + | ' ' wordsqlit { $$ = $2; } + | quasi_items '`' { val qword = cons(quasi_s, + o_elems_transform($1)); + $$ = rlcp(cons(qword, nil), $1); } + | quasi_items ' ' + wordsqlit + { val qword = cons(quasi_s, + o_elems_transform($1)); + $$ = rlcp(cons(qword, $3), $1); } + ; + not_a_clause : ALL { $$ = make_expr(all_s, nil, num(lineno)); } | SOME { $$ = make_expr(some_s, nil, num(lineno)); } | NONE { $$ = make_expr(none_s, nil, num(lineno)); } @@ -1345,6 +1360,8 @@ void yybadtoken(int tok, val context) case HASH_H: problem = lit("#H"); break; case WORDS: problem = lit("#\""); break; case WSPLICE: problem = lit("#*\""); break; + case QWORDS: problem = lit("#`"); break; + case QWSPLICE: problem = lit("#*`"); break; } if (problem != 0) @@ -1131,16 +1131,16 @@ The first string literal is the string "foobar". The second two are "foo bar". .SS Word List Literals -A word list literal provides a convenient way to write a list of strings +A word list literal (WLL) provides a convenient way to write a list of strings when such a list can be given as whitespace-delimited words. -There are two flavors of the word list literal: the regular word list -literal which begins with #" (hash, double-quote) and the splicing -list literal which begins with #*" (hash, star, double-quote). +There are two flavors of the WLL: the regular WLL which begins with #" (hash, +double-quote) and the splicing list literal which begins with #*" (hash, star, +double-quote). -Both literals are terminated by a double quote, which may be escaped +Both types are terminated by a double quote, which may be escaped as \e" in order to include it as a character. All the escaping conventions -used in string literals can be used in words literals. +used in string literals can be used in word literals. Unlike in string literals, whitespace (tabs, spaces and newlines) is not significant in word literals: it separates words. Whitespace may be @@ -1156,9 +1156,9 @@ Example: #"abc\ def ghi" --> notates ("abc def" "ghi") -A splicing word literal differs from a word literal in that it deos not +A splicing word literal differs from a word literal in that it does not produce a list of string literals, but rather it produces a sequence of string -literal tokens that is merged into the surrounding syntax. +literals that is merged into the surrounding syntax. Example: @@ -1166,8 +1166,8 @@ Example: --> (1 2 3 "abc" "def" 4 5 ("abc" "def")) -The regular word list literal produced a single list object, but the splicing -word list literal expanded into multiple string literal objects. +The regular WLL produced a single list object, but the splicing +WLL expanded into multiple string literal objects. .SS String Quasiliterals @@ -1186,6 +1186,44 @@ the TXR pattern language when the quasiliteral occurs in the pattern language. Quasliterals can be split into multiple lines in the same way as ordinary string literals. +.SS Quasiword Lists Literals + +The quasiword list literals (QLL-s) are to quasiliterals what WLL-s are to +ordinary literals. (See the above section Word List Literals.) + +A QLL combines the convenience of the WLL +with the power of quasistrings. + +Just as in the case of WLL-s, there are two flavors of the +QLL: the regular QLL which begins with #` +(hash, backquote) and the splicing list literal which begins with #*` (hash, +star, backquote). + +Both types are terminated by a backquote, which may be escaped +as \e` in order to include it as a character. All the escaping conventions +used in quasiliterals can be used in QLL. + +Unlike in quasiliterals, whitespace (tabs, spaces and newlines) is not +significant in QLL: it separates words. Whitespace may be +escaped with a backslash in order to include it as a literal character. + +Note that the delimiting into words is done before the variable +substitution. If the variable a contains spaces, then #`@a` nevertheless +expands into a list of one item: the string derived from a. + +Example: + + #`abc @a ghi` --> notates (`abc` `@a` `ghi`) + + #`abc @d@e@f + ghi` --> notates (`abc` `@d@e@f` `ghi`) + + #`@a\ @b @c` --> notates (`@a @b` `@c`) + +A splicing QLL differs from an ordinary QLL in that it does not produce a list +of quasiliterals, but rather it produces a sequence of quasiliterals that is +merged into the surrounding syntax. + .SS Numbers TXR supports integers and floating-point numbers. |