From 600f1bda2366b82d1492eb04a7419d12f199349b Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Tue, 6 Dec 2016 21:56:25 -0800 Subject: parser: fix problems at EOF involving #; syntax. This patch addresses a problem whereby if a TXR Lisp file ends with an erased object notation such as #;(a b c), there is a syntax error. The strategy is to simplify the grammar so that a single yyparse primed with SECRET_ESCAPE_E or SECRET_ESCAPE_I will read either an object, or just one instance of the #; notation. If #;OBJ is read, then the parse tree is returned as the nao value. The caller knows that #;OBJ must have occurred because there are no errors and the parser isn't at EOF, yet there is no parse tree. Then in lisp_parse we can loop on this situation, and make adjustments elsewhere also. So that iread continues to work, we must separate the parser_eof condition from the lookahead token. Under iread, we were clearing the token in prime_parser_post, but that was having the side effect of making the parser look like it is in EOF. We now preserve the EOF indication in a flag, so we can manipulate the token. * parser.h (struct parser): new member, eof. * parser.c (parser_common_init): Initialize new eof flag in parser structure to zero. (prime_parser_post): Set the eof flag if the parser's most recent token is zero. (lisp_parse_impl): Call the parser repeatedly while there are no errors, and no EOF, yet no object has been produced. This indicates that a #; erasure has been processed. (read_eval_stream): Restructure the logic here for clarity. Do not break the loop if error_val was returned from the parser, but there are no errors, and the parser isn't at EOF. This is behavior is probably redundant with respect to the loop in lisp_parse_impl. (read_eval_ret_last): Bugfixes here. Pass an error indicating value down to lisp_parse, like in read_eval_stream and make the logic similar. (parser_eof): Just return an indication based no the eof flag. * parser.y (hash_semis_n_expr, hash_semis_i_expr, ignored_i_exprs, ignored_n_exprs): Grammar rules removed. (hash_semi_or_n_expr, hash_semi_or_i_expr): New grammar rules. (spec): Retarget SECRET_ESCAPE_E and SECRET_ESCAPE_I cases to new rules. (parse): Clear eof flag to zero. --- parser.c | 39 +++++++++++++++++++++++++++++---------- parser.h | 1 + parser.y | 38 ++++++++++++++------------------------ 3 files changed, 44 insertions(+), 34 deletions(-) diff --git a/parser.c b/parser.c index f4f1c1ce..f2a7e310 100644 --- a/parser.c +++ b/parser.c @@ -109,6 +109,7 @@ void parser_common_init(parser_t *p) p->parser = nil; p->lineno = 1; p->errors = 0; + p->eof = 0; p->stream = nil; p->name = nil; p->prepared_msg = nil; @@ -201,6 +202,7 @@ void prime_parser(parser_t *p, val name, enum prime_parser prim) void prime_parser_post(parser_t *p, enum prime_parser prim) { + p->eof = (p->recent_tok.yy_char == 0); if (prim == prime_interactive) p->recent_tok.yy_char = 0; } @@ -524,14 +526,20 @@ static val lisp_parse_impl(val interactive, val source_in, val error_stream, env_vbind(dyn_env, stderr_s, error_stream); - { + for (;;) { int gc = gc_state(0); enum prime_parser prime = if3(interactive, prime_interactive, prime_lisp); parse(pi, if3(std_error != std_null, name, lit("")), prime); gc_state(gc); - parsed = t; + + if (pi->syntax_tree == nao && pi->errors == 0 && !parser_eof(parser)) + continue; + + break; } + parsed = t; + uw_unwind { dyn_env = saved_dyn; if (!parsed) { @@ -587,12 +595,15 @@ val read_eval_stream(val stream, val error_stream, val hash_bang_support) val parser = get_parser(stream); if (form == error_val) { - if (parser_errors(parser) == zero) + if (parser_errors(parser) != zero) + return nil; + if (parser_eof(parser)) break; - return nil; + continue; } (void) eval_intrinsic(form, nil); + if (parser_eof(parser)) break; } @@ -858,19 +869,27 @@ static val read_eval_ret_last(val env, val counter, val in_stream, val out_stream) { val lineno = one; + val error_val = gensym(nil); val name = format(nil, lit("paste-~a"), counter, nao); + val value = nil; for (;; lineno = succ(lineno)) { - val form = lisp_parse(in_stream, out_stream, colon_k, name, lineno); + val form = lisp_parse(in_stream, out_stream, error_val, name, lineno); val parser = get_parser(in_stream); - val value = eval_intrinsic(form, nil); - if (parser_eof(parser)) { - prinl(value, out_stream); - break; + if (form == error_val) { + if (parser_errors(parser) != zero || parser_eof(parser)) + break; + continue; } + + value = eval_intrinsic(form, nil); + + if (parser_eof(parser)) + break; } + prinl(value, out_stream); return t; } @@ -1071,7 +1090,7 @@ val parser_errors(val parser) val parser_eof(val parser) { parser_t *p = coerce(parser_t *, cobj_handle(parser, parser_s)); - return tnil(p->recent_tok.yy_char == 0); + return tnil(p->eof); } static val circref(val n) diff --git a/parser.h b/parser.h index 6980e0f4..658178c7 100644 --- a/parser.h +++ b/parser.h @@ -50,6 +50,7 @@ struct parser { val parser; cnum lineno; int errors; + int eof; val stream; val name; val prepared_msg; diff --git a/parser.y b/parser.y index 24114b88..5703bef0 100644 --- a/parser.y +++ b/parser.y @@ -111,8 +111,7 @@ int yyparse(scanner_t *, parser_t *); %token REGCHAR REGTOKEN LITCHAR SPLICE CONSDOT LAMBDOT -%type spec hash_semis_n_expr hash_semis_i_expr -%type ignored_i_exprs ignored_n_exprs +%type spec hash_semi_or_n_expr hash_semi_or_i_expr %type clauses_rev clauses_opt clause %type all_clause some_clause none_clause maybe_clause block_clause %type cases_clause choose_clause gather_clause collect_clause until_last @@ -151,10 +150,10 @@ int yyparse(scanner_t *, parser_t *); spec : clauses_opt { parser->syntax_tree = $1; } | SECRET_ESCAPE_R regexpr { parser->syntax_tree = $2; end_of_regex(scnr); } - | SECRET_ESCAPE_E hash_semis_n_expr + | SECRET_ESCAPE_E hash_semi_or_n_expr { parser->syntax_tree = $2; YYACCEPT; } byacc_fool { internal_error("notreached"); } - | SECRET_ESCAPE_I hash_semis_i_expr + | SECRET_ESCAPE_I hash_semi_or_i_expr { parser->syntax_tree = $2; YYACCEPT; } byacc_fool { internal_error("notreached"); } | SECRET_ESCAPE_E { if (yychar == YYEOF) { @@ -179,29 +178,19 @@ spec : clauses_opt { parser->syntax_tree = $1; } ; -hash_semis_n_expr : ignored_n_exprs n_expr { $$ = $2; } - | n_expr { $$ = $1; } - ; -ignored_n_exprs : ignored_n_exprs HASH_SEMI { parser->circ_suppress = 1; } - n_expr { parser->circ_suppress = 0; - $$ = nil; } - | HASH_SEMI { parser->circ_suppress = 1; } - n_expr { parser->circ_suppress = 0; - $$ = nil; } - ; +hash_semi_or_n_expr : HASH_SEMI { parser->circ_suppress = 1; } + n_expr { parser->circ_suppress = 0; + $$ = nao; } + | n_expr { $$ = $1; } + ; -hash_semis_i_expr : ignored_i_exprs i_expr { $$ = $2; } - | i_expr { $$ = $1; } - ; +hash_semi_or_i_expr : HASH_SEMI { parser->circ_suppress = 1; } + i_expr { parser->circ_suppress = 0; + $$ = nao; } + | i_expr { $$ = $1; } + ; -ignored_i_exprs : ignored_i_exprs HASH_SEMI { parser->circ_suppress = 1; } - i_expr { parser->circ_suppress = 0; - $$ = nil; } - | HASH_SEMI { parser->circ_suppress = 1; } - i_expr { parser->circ_suppress = 0; - $$ = nil; } - ; /* Hack needed for Berkeley Yacc */ byacc_fool : n_expr { internal_error("notreached"); } @@ -1787,6 +1776,7 @@ int parse(parser_t *parser, val name, enum prime_parser prim) int res = 0; parser->errors = 0; + parser->eof = 0; parser->prepared_msg = nil; parser->circ_ref_hash = nil; parser->circ_count = 0; -- cgit v1.2.3