summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2017-08-17 19:26:42 -0700
committerKaz Kylheku <kaz@kylheku.com>2017-08-17 19:26:42 -0700
commitb5bcecee77e513b656973476dd7b76be9038b097 (patch)
tree5f847ec77abb9ae15dec3a727ddb29697811c4ea
parent2399bf36c10ec9f7011008a76f96847be7255c9d (diff)
downloadtxr-b5bcecee77e513b656973476dd7b76be9038b097.tar.gz
txr-b5bcecee77e513b656973476dd7b76be9038b097.tar.bz2
txr-b5bcecee77e513b656973476dd7b76be9038b097.zip
parser: more efficient treatment of string literals.
In this patch we switch the string literal parser from right recursion to left recursion, so that it doesn't require a Yacc stack depth proportional to the number of characters in the literal. Secondly, we build the string directly in a syntax-directed way, rather than building a list of characters and then walking the list to build a string. This was discovered as a byacc regression, though the fix is not for the sake of byacc but basic efficiency. The Sep 29, 2015 commit 111650e235ab2e529fa1529b1c9a23688a11cd1f "Implementation of static slots for structures." extended the string literal in the struct.tl test case in such a way that if the parser is generated by byacc rather than GNU Bison, the test case fails with a "yacc stack overflow". I haven't done any regression testing with byacc in over two years so I didn't notice this. Quasiliterals could use this treatment also. Word list literals benefit from this change, but they still use a Yacc stack depth proportional to the number of words, since the accumulation of words is right recursive. * parser.y (lit_char_helper): Static function removed. (restlitchar): New grammar nonterminal symbol. (strlit, quasi_item, wordslit): No need to call lit_char_helper. (litchars): A litchars is now either a single LITCHAR, or else a LITCHARS followed by a sequence of more. This sequence is a separate production rule called restlitchar, which is purely left recursive. (If litchars is made directly left recursive without this helper rule, intractable reduce/reduce and shift/reduce conflicts arise.)
-rw-r--r--parser.y39
1 files changed, 12 insertions, 27 deletions
diff --git a/parser.y b/parser.y
index 2746f9e9..92f223d4 100644
--- a/parser.y
+++ b/parser.y
@@ -63,7 +63,6 @@ static val sym_helper(parser_t *parser, wchar_t *lexeme, val meta_allowed);
static val repeat_rep_helper(val sym, val args, val main, val parts);
static void process_catch_exprs(val exprs);
static val define_transform(parser_t *parser, val define_form);
-static val lit_char_helper(val litchars);
static val optimize_text(val text_form);
static val unquotes_occur(val quoted_form, int level);
static val rlrec(parser_t *, val form, val line);
@@ -147,8 +146,8 @@ INLINE val expand_form_ver(val form, int ver)
%type <val> o_elems_opt o_elems o_elem o_var q_var rep_elem rep_parts_opt
%type <val> regex lisp_regex regexpr regbranch
%type <val> regterm regtoken regclass regclassterm regrange
-%type <val> strlit chrlit quasilit quasi_items quasi_item litchars wordslit
-%type <val> wordsqlit buflit buflit_items buflit_item not_a_clause
+%type <val> strlit chrlit quasilit quasi_items quasi_item litchars restlitchar
+%type <val> wordslit wordsqlit buflit buflit_items buflit_item not_a_clause
%type <chr> regchar
%type <val> byacc_fool
%type <lineno> '(' '[' '@'
@@ -1141,7 +1140,7 @@ newl : '\n'
;
strlit : '"' '"' { $$ = null_string; }
- | '"' litchars '"' { $$ = lit_char_helper($2);
+ | '"' litchars '"' { $$ = $2;
rl($$, num(parser->lineno)); }
| '"' error { $$ = nil;
yybadtok(yychar, lit("string literal")); }
@@ -1181,7 +1180,7 @@ quasi_items : quasi_item { $$ = cons($1, nil);
rl($$, num(parser->lineno)); }
;
-quasi_item : litchars { $$ = lit_char_helper($1); }
+quasi_item : litchars { $$ = $1; }
| TEXT { $$ = string_own($1); }
| q_var { $$ = $1; }
| METANUM { $$ = cons(var_s, cons($1, nil));
@@ -1193,13 +1192,18 @@ quasi_item : litchars { $$ = lit_char_helper($1); }
$$ = $2; }
;
-litchars : LITCHAR { $$ = rl(cons(chr($1), nil), num(parser->lineno)); }
- | LITCHAR litchars { $$ = rl(cons(chr($1), $2), num(parser->lineno)); }
+litchars : LITCHAR { $$ = mkstring(one, chr($1)); }
+ | LITCHAR restlitchar { val ch = mkstring(one, chr($1));
+ $$ = string_extend(ch, $2); }
;
+restlitchar : LITCHAR { $$ = mkstring(one, chr($1)); }
+ | restlitchar LITCHAR { $$ = string_extend($1, chr($2)); }
+ ;
+
wordslit : '"' { $$ = nil; }
| ' ' wordslit { $$ = $2; }
- | litchars wordslit { val word = lit_char_helper($1);
+ | litchars wordslit { val word = $1;
$$ = rlcp(cons(word, $2), $1); }
| error { $$ = nil;
yybadtok(yychar, lit("word list")); }
@@ -1519,25 +1523,6 @@ static val define_transform(parser_t *parser, val define_form)
return define_form;
}
-static val lit_char_helper(val litchars)
-{
- val ret = nil;
-
- if (litchars) {
- val len = length_list(litchars), iter, ix;
- ret = mkustring(len);
- for (iter = litchars, ix = zero;
- iter;
- iter = cdr(iter), ix = plus(ix, one))
- {
- chr_str_set(ret, ix, car(iter));
- }
- } else {
- ret = nil;
- }
- return ret;
-}
-
static val optimize_text(val text_form)
{
if (all_satisfy(rest(text_form), func_n1(stringp), nil))