diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2017-08-17 19:26:42 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2017-08-17 19:26:42 -0700 |
commit | b5bcecee77e513b656973476dd7b76be9038b097 (patch) | |
tree | 5f847ec77abb9ae15dec3a727ddb29697811c4ea | |
parent | 2399bf36c10ec9f7011008a76f96847be7255c9d (diff) | |
download | txr-b5bcecee77e513b656973476dd7b76be9038b097.tar.gz txr-b5bcecee77e513b656973476dd7b76be9038b097.tar.bz2 txr-b5bcecee77e513b656973476dd7b76be9038b097.zip |
parser: more efficient treatment of string literals.
In this patch we switch the string literal parser from right
recursion to left recursion, so that it doesn't require a Yacc
stack depth proportional to the number of characters in the
literal. Secondly, we build the string directly in a
syntax-directed way, rather than building a list of characters
and then walking the list to build a string.
This was discovered as a byacc regression, though the
fix is not for the sake of byacc but basic efficiency.
The Sep 29, 2015 commit
111650e235ab2e529fa1529b1c9a23688a11cd1f
"Implementation of static slots for structures."
extended the string literal in the struct.tl test case in such
a way that if the parser is generated by byacc rather than GNU
Bison, the test case fails with a "yacc stack overflow". I
haven't done any regression testing with byacc in over two
years so I didn't notice this.
Quasiliterals could use this treatment also.
Word list literals benefit from this change, but they still
use a Yacc stack depth proportional to the number of words,
since the accumulation of words is right recursive.
* parser.y (lit_char_helper): Static function removed.
(restlitchar): New grammar nonterminal symbol.
(strlit, quasi_item, wordslit): No need to call
lit_char_helper.
(litchars): A litchars is now either a single LITCHAR,
or else a LITCHARS followed by a sequence of more.
This sequence is a separate production rule called
restlitchar, which is purely left recursive. (If
litchars is made directly left recursive without
this helper rule, intractable reduce/reduce and
shift/reduce conflicts arise.)
-rw-r--r-- | parser.y | 39 |
1 files changed, 12 insertions, 27 deletions
@@ -63,7 +63,6 @@ static val sym_helper(parser_t *parser, wchar_t *lexeme, val meta_allowed); static val repeat_rep_helper(val sym, val args, val main, val parts); static void process_catch_exprs(val exprs); static val define_transform(parser_t *parser, val define_form); -static val lit_char_helper(val litchars); static val optimize_text(val text_form); static val unquotes_occur(val quoted_form, int level); static val rlrec(parser_t *, val form, val line); @@ -147,8 +146,8 @@ INLINE val expand_form_ver(val form, int ver) %type <val> o_elems_opt o_elems o_elem o_var q_var rep_elem rep_parts_opt %type <val> regex lisp_regex regexpr regbranch %type <val> regterm regtoken regclass regclassterm regrange -%type <val> strlit chrlit quasilit quasi_items quasi_item litchars wordslit -%type <val> wordsqlit buflit buflit_items buflit_item not_a_clause +%type <val> strlit chrlit quasilit quasi_items quasi_item litchars restlitchar +%type <val> wordslit wordsqlit buflit buflit_items buflit_item not_a_clause %type <chr> regchar %type <val> byacc_fool %type <lineno> '(' '[' '@' @@ -1141,7 +1140,7 @@ newl : '\n' ; strlit : '"' '"' { $$ = null_string; } - | '"' litchars '"' { $$ = lit_char_helper($2); + | '"' litchars '"' { $$ = $2; rl($$, num(parser->lineno)); } | '"' error { $$ = nil; yybadtok(yychar, lit("string literal")); } @@ -1181,7 +1180,7 @@ quasi_items : quasi_item { $$ = cons($1, nil); rl($$, num(parser->lineno)); } ; -quasi_item : litchars { $$ = lit_char_helper($1); } +quasi_item : litchars { $$ = $1; } | TEXT { $$ = string_own($1); } | q_var { $$ = $1; } | METANUM { $$ = cons(var_s, cons($1, nil)); @@ -1193,13 +1192,18 @@ quasi_item : litchars { $$ = lit_char_helper($1); } $$ = $2; } ; -litchars : LITCHAR { $$ = rl(cons(chr($1), nil), num(parser->lineno)); } - | LITCHAR litchars { $$ = rl(cons(chr($1), $2), num(parser->lineno)); } +litchars : LITCHAR { $$ = mkstring(one, chr($1)); } + | LITCHAR restlitchar { val ch = mkstring(one, chr($1)); + $$ = string_extend(ch, $2); } ; +restlitchar : LITCHAR { $$ = mkstring(one, chr($1)); } + | restlitchar LITCHAR { $$ = string_extend($1, chr($2)); } + ; + wordslit : '"' { $$ = nil; } | ' ' wordslit { $$ = $2; } - | litchars wordslit { val word = lit_char_helper($1); + | litchars wordslit { val word = $1; $$ = rlcp(cons(word, $2), $1); } | error { $$ = nil; yybadtok(yychar, lit("word list")); } @@ -1519,25 +1523,6 @@ static val define_transform(parser_t *parser, val define_form) return define_form; } -static val lit_char_helper(val litchars) -{ - val ret = nil; - - if (litchars) { - val len = length_list(litchars), iter, ix; - ret = mkustring(len); - for (iter = litchars, ix = zero; - iter; - iter = cdr(iter), ix = plus(ix, one)) - { - chr_str_set(ret, ix, car(iter)); - } - } else { - ret = nil; - } - return ret; -} - static val optimize_text(val text_form) { if (all_satisfy(rest(text_form), func_n1(stringp), nil)) |