parser: don't generate special lits outside quasiquote.

The parser generates a sys:hash-lit, sys:struct-lit or sys:vector-lit whenever a hash, struct or vector literal contains unquotes. This allows the quasiquote expander to treat these objects as ordinary list structure when interpolating inside them, and then recognize these symbols and construct the implied real objects. The issue is that these literals are generated even if the unquotes occur outside of a backquote. For instance if a vector literal like #(,a) occurs out of the blue, not in any backquote, this is still a (sys:vector-lit (sys:unquote a)) and not an actual vector. The issue is compounded because this substitution takes place even if there is no actual comma or splice notation. Even the following is a sys:vector-lit: #((sys:unquote x)). In any case, it causes problems for compiled files, because such material can occur in the data vector of a compiled toplevel form. In this patch we modify the parser to keep track of the quasiquote/unquote level. The special literals are generated only when the object occurs inside a quasiquote. * parser.h (struct parser): New member, quasi_level. * parser.c (parser_common_init): Initialize the parser's new quasi_level member. * parser.y (vector, hash, struct): To decide whether to generate the special literal, don't just check whether unquotes occur in the list. Check that we are in a quasiquote, indicated by the quasiquoting level being positive. (i_expr, n_expr): Use a mid-rule actions on the quasiquote, unquote and splice rules to bump the quasiquoting level in one direction before recognizing the object, and then bump in the opposite direction when reducing the rule. (parse): Initialize quasi_level.
author: Kaz Kylheku <kaz@kylheku.com> 2018-04-04 23:30:12 -0700
committer: Kaz Kylheku <kaz@kylheku.com> 2018-04-04 23:30:12 -0700
commit: c93846acd4905a20fbb1a0613954f20e34650f92 (patch)
tree: e6494589a4cb6bc9b097e47bcdde5cc20a6d500a
parent: b4a10f1775d4e2d6ec77fd42f008cdcf638861f7 (diff)
download: txr-c93846acd4905a20fbb1a0613954f20e34650f92.tar.gz
txr-c93846acd4905a20fbb1a0613954f20e34650f92.tar.bz2
txr-c93846acd4905a20fbb1a0613954f20e34650f92.zip
3 files changed, 24 insertions, 9 deletions
diff --git a/parser.c b/parser.c
index a5109a18..9d5d2577 100644
--- a/parser.c
+++ b/parser.c
@@ -122,6 +122,7 @@ void parser_common_init(parser_t *p)
   p->circ_ref_hash = nil;
   p->circ_count = 0;
   p->syntax_tree = nil;
+  p->quasi_level = 0;
   yylex_init(&yyscan);
   p->scanner = convert(scanner_t *, yyscan);
   yyset_extra(p, p->scanner);
diff --git a/parser.h b/parser.h
index 12378631..812d5c50 100644
--- a/parser.h
+++ b/parser.h
@@ -55,6 +55,7 @@ struct parser {
   val name;
   val prepared_msg;
   val syntax_tree;
+  int quasi_level;
   val circ_ref_hash;
   cnum circ_count;
   int circ_suppress;
diff --git a/parser.y b/parser.y
index 33d850dc..421f8a1d 100644
--- a/parser.y
+++ b/parser.y
@@ -816,7 +816,7 @@ q_var : '@' '{' n_expr n_exprs_opt '}'
       ;
 
 
-vector : '#' list               { if (unquotes_occur($2, 0))
+vector : '#' list               { if (parser->quasi_level > 0 && unquotes_occur($2, 0))
                                     $$ = rlc(cons(vector_lit_s,
                                                    cons($2, nil)), $2);
                                   else
@@ -825,7 +825,7 @@ vector : '#' list               { if (unquotes_occur($2, 0))
                                   yybadtok(yychar, lit("unassigned/reserved # notation")); }
        ;
 
-hash : HASH_H list              { if (unquotes_occur($2, 0))
+hash : HASH_H list              { if (parser->quasi_level > 0 && unquotes_occur($2, 0))
                                     $$ = rl(cons(hash_lit_s, $2), num($1));
                                   else
                                     $$ = rl(hash_construct(first($2),
@@ -835,7 +835,7 @@ hash : HASH_H list              { if (unquotes_occur($2, 0))
                                     yybadtok(yychar, lit("hash literal")); }
      ;
 
-struct : HASH_S list            { if (unquotes_occur($2, 0))
+struct : HASH_S list            { if (parser->quasi_level > 0 && unquotes_occur($2, 0))
                                     $$ = rl(cons(struct_lit_s, $2),
                                               num($1));
                                   else
@@ -967,11 +967,17 @@ i_expr : SYMTOK                 { $$ = symhlpr($1, t); }
        | buflit                 { $$ = $1; }
        | '\'' i_dot_expr        { $$ = rl(rlc(list(quote_s, $2, nao), $2),
                                           num(parser->lineno)); }
-       | '^' i_dot_expr         { $$ = rl(rlc(list(sys_qquote_s, $2, nao), $2),
+       | '^'                    { parser->quasi_level++; }
+         i_dot_expr             { parser->quasi_level--;
+                                  $$ = rl(rlc(list(sys_qquote_s, $3, nao), $3),
                                           num(parser->lineno)); }
-       | ',' i_dot_expr         { $$ = rl(rlc(list(sys_unquote_s, $2, nao), $2),
+       | ','                    { parser->quasi_level--; }
+         i_dot_expr             { parser->quasi_level++;
+                                  $$ = rl(rlc(list(sys_unquote_s, $3, nao), $3),
                                           num(parser->lineno)); }
-       | SPLICE i_dot_expr      { $$ = rl(rlc(list(sys_splice_s, $2, nao), $2),
+       | SPLICE                 { parser->quasi_level--; }
+         i_dot_expr             { parser->quasi_level++;
+                                  $$ = rl(rlc(list(sys_splice_s, $3, nao), $3),
                                           num(parser->lineno)); }
        | HASH_N_EQUALS          { parser_circ_def(parser, $1, unique_s); }
          i_dot_expr             { parser_circ_def(parser, $1, $3);
@@ -1000,11 +1006,17 @@ n_expr : SYMTOK                 { $$ = symhlpr($1, t); }
        | buflit                 { $$ = $1; }
        | '\'' n_dot_expr        { $$ = rl(rlc(list(quote_s, $2, nao), $2),
                                           num(parser->lineno)); }
-       | '^' n_dot_expr         { $$ = rl(rlc(list(sys_qquote_s, $2, nao), $2),
+       | '^'                    { parser->quasi_level++; }
+         n_dot_expr             { parser->quasi_level--;
+                                  $$ = rl(rlc(list(sys_qquote_s, $3, nao), $3),
                                           num(parser->lineno)); }
-       | ',' n_dot_expr         { $$ = rl(rlc(list(sys_unquote_s, $2, nao), $2),
+       | ','                    { parser->quasi_level--; }
+         n_dot_expr             { parser->quasi_level++;
+                                  $$ = rl(rlc(list(sys_unquote_s, $3, nao), $3),
                                           num(parser->lineno)); }
-       | SPLICE n_dot_expr      { $$ = rl(rlc(list(sys_splice_s, $2, nao), $2),
+       | SPLICE                 { parser->quasi_level--; }
+         n_dot_expr             { parser->quasi_level++;
+                                  $$ = rl(rlc(list(sys_splice_s, $3, nao), $3),
                                           num(parser->lineno)); }
        | n_expr DOTDOT n_expr   { uses_or2;
                                   $$ = rlc(list(rcons_s, $1, $3, nao),
@@ -1886,6 +1898,7 @@ int parse(parser_t *parser, val name, enum prime_parser prim)
   parser->circ_count = 0;
   parser->circ_suppress = 0;
   parser->syntax_tree = nil;
+  parser->quasi_level = 0;
 
   prime_parser(parser, name, prim);
author	Kaz Kylheku <kaz@kylheku.com>	2018-04-04 23:30:12 -0700
committer	Kaz Kylheku <kaz@kylheku.com>	2018-04-04 23:30:12 -0700
commit	c93846acd4905a20fbb1a0613954f20e34650f92 (patch)
tree	e6494589a4cb6bc9b097e47bcdde5cc20a6d500a
parent	b4a10f1775d4e2d6ec77fd42f008cdcf638861f7 (diff)
download	txr-c93846acd4905a20fbb1a0613954f20e34650f92.tar.gz txr-c93846acd4905a20fbb1a0613954f20e34650f92.tar.bz2 txr-c93846acd4905a20fbb1a0613954f20e34650f92.zip