From 600f1bda2366b82d1492eb04a7419d12f199349b Mon Sep 17 00:00:00 2001
From: Kaz Kylheku <kaz@kylheku.com>
Date: Tue, 6 Dec 2016 21:56:25 -0800
Subject: parser: fix problems at EOF involving #; syntax.

This patch addresses a problem whereby if a TXR Lisp file ends
with an erased object notation such as #;(a b c), there is a
syntax error.

The strategy is to simplify the grammar so that a single
yyparse primed with SECRET_ESCAPE_E or SECRET_ESCAPE_I will
read either an object, or just one instance of the #;
notation. If #;OBJ is read, then the parse tree is returned as
the nao value. The caller knows that #;OBJ must have occurred
because there are no errors and the parser isn't at EOF, yet
there is no parse tree. Then in lisp_parse we can loop on this
situation, and make adjustments elsewhere also.

So that iread continues to work, we must separate the
parser_eof condition from the lookahead token. Under iread,
we were clearing the token in prime_parser_post, but that
was having the side effect of making the parser look like
it is in EOF. We now preserve the EOF indication in a flag,
so we can manipulate the token.

* parser.h (struct parser): new member, eof.

* parser.c (parser_common_init): Initialize new eof flag
in parser structure to zero.
(prime_parser_post): Set the eof flag if the parser's most
recent token is zero.
(lisp_parse_impl): Call the parser repeatedly while there
are no errors, and no EOF, yet no object has been produced.
This indicates that a #; erasure has been processed.
(read_eval_stream): Restructure the logic here for
clarity. Do not break the loop if error_val was returned
from the parser, but there are no errors, and the parser isn't
at EOF. This is behavior is probably redundant with respect
to the loop in lisp_parse_impl.
(read_eval_ret_last): Bugfixes here. Pass an error indicating
value down to lisp_parse, like in read_eval_stream and
make the logic similar.
(parser_eof): Just return an indication based no the
eof flag.

* parser.y (hash_semis_n_expr, hash_semis_i_expr,
ignored_i_exprs, ignored_n_exprs): Grammar rules removed.
(hash_semi_or_n_expr, hash_semi_or_i_expr): New grammar
rules.
(spec): Retarget SECRET_ESCAPE_E and SECRET_ESCAPE_I
cases to new rules.
(parse): Clear eof flag to zero.
---
 parser.c | 39 +++++++++++++++++++++++++++++----------
 parser.h |  1 +
 parser.y | 38 ++++++++++++++------------------------
 3 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/parser.c b/parser.c
index f4f1c1ce..f2a7e310 100644
--- a/parser.c
+++ b/parser.c
@@ -109,6 +109,7 @@ void parser_common_init(parser_t *p)
   p->parser = nil;
   p->lineno = 1;
   p->errors = 0;
+  p->eof = 0;
   p->stream = nil;
   p->name = nil;
   p->prepared_msg = nil;
@@ -201,6 +202,7 @@ void prime_parser(parser_t *p, val name, enum prime_parser prim)
 
 void prime_parser_post(parser_t *p, enum prime_parser prim)
 {
+  p->eof = (p->recent_tok.yy_char == 0);
   if (prim == prime_interactive)
     p->recent_tok.yy_char = 0;
 }
@@ -524,14 +526,20 @@ static val lisp_parse_impl(val interactive, val source_in, val error_stream,
 
   env_vbind(dyn_env, stderr_s, error_stream);
 
-  {
+  for (;;) {
     int gc = gc_state(0);
     enum prime_parser prime = if3(interactive, prime_interactive, prime_lisp);
     parse(pi, if3(std_error != std_null, name, lit("")), prime);
     gc_state(gc);
-    parsed = t;
+
+    if (pi->syntax_tree == nao && pi->errors == 0 && !parser_eof(parser))
+      continue;
+
+    break;
   }
 
+  parsed = t;
+
   uw_unwind {
     dyn_env = saved_dyn;
     if (!parsed) {
@@ -587,12 +595,15 @@ val read_eval_stream(val stream, val error_stream, val hash_bang_support)
     val parser = get_parser(stream);
 
     if (form == error_val) {
-      if (parser_errors(parser) == zero)
+      if (parser_errors(parser) != zero)
+        return nil;
+      if (parser_eof(parser))
         break;
-      return nil;
+      continue;
     }
 
     (void) eval_intrinsic(form, nil);
+
     if (parser_eof(parser))
       break;
   }
@@ -858,19 +869,27 @@ static val read_eval_ret_last(val env, val counter,
                               val in_stream, val out_stream)
 {
   val lineno = one;
+  val error_val = gensym(nil);
   val name = format(nil, lit("paste-~a"), counter, nao);
+  val value = nil;
 
   for (;; lineno = succ(lineno)) {
-    val form = lisp_parse(in_stream, out_stream, colon_k, name, lineno);
+    val form = lisp_parse(in_stream, out_stream, error_val, name, lineno);
     val parser = get_parser(in_stream);
-    val value = eval_intrinsic(form, nil);
 
-    if (parser_eof(parser)) {
-      prinl(value, out_stream);
-      break;
+    if (form == error_val) {
+      if (parser_errors(parser) != zero || parser_eof(parser))
+        break;
+      continue;
     }
+
+    value = eval_intrinsic(form, nil);
+
+    if (parser_eof(parser))
+      break;
   }
 
+  prinl(value, out_stream);
   return t;
 }
 
@@ -1071,7 +1090,7 @@ val parser_errors(val parser)
 val parser_eof(val parser)
 {
   parser_t *p = coerce(parser_t *, cobj_handle(parser, parser_s));
-  return tnil(p->recent_tok.yy_char == 0);
+  return tnil(p->eof);
 }
 
 static val circref(val n)
diff --git a/parser.h b/parser.h
index 6980e0f4..658178c7 100644
--- a/parser.h
+++ b/parser.h
@@ -50,6 +50,7 @@ struct parser {
   val parser;
   cnum lineno;
   int errors;
+  int eof;
   val stream;
   val name;
   val prepared_msg;
diff --git a/parser.y b/parser.y
index 24114b88..5703bef0 100644
--- a/parser.y
+++ b/parser.y
@@ -111,8 +111,7 @@ int yyparse(scanner_t *, parser_t *);
 
 %token <chr> REGCHAR REGTOKEN LITCHAR SPLICE CONSDOT LAMBDOT
 
-%type <val> spec hash_semis_n_expr hash_semis_i_expr
-%type <val> ignored_i_exprs ignored_n_exprs
+%type <val> spec hash_semi_or_n_expr hash_semi_or_i_expr
 %type <val> clauses_rev clauses_opt clause
 %type <val> all_clause some_clause none_clause maybe_clause block_clause
 %type <val> cases_clause choose_clause gather_clause collect_clause until_last
@@ -151,10 +150,10 @@ int yyparse(scanner_t *, parser_t *);
 
 spec : clauses_opt              { parser->syntax_tree = $1; }
      | SECRET_ESCAPE_R regexpr  { parser->syntax_tree = $2; end_of_regex(scnr); }
-     | SECRET_ESCAPE_E hash_semis_n_expr
+     | SECRET_ESCAPE_E hash_semi_or_n_expr
                                 { parser->syntax_tree = $2; YYACCEPT; }
        byacc_fool               { internal_error("notreached"); }
-     | SECRET_ESCAPE_I hash_semis_i_expr
+     | SECRET_ESCAPE_I hash_semi_or_i_expr
                                 { parser->syntax_tree = $2; YYACCEPT; }
        byacc_fool               { internal_error("notreached"); }
      | SECRET_ESCAPE_E          { if (yychar == YYEOF) {
@@ -179,29 +178,19 @@ spec : clauses_opt              { parser->syntax_tree = $1; }
 
      ;
 
-hash_semis_n_expr : ignored_n_exprs n_expr      { $$ = $2; }
-                  | n_expr                      { $$ = $1; }
-                  ;
 
-ignored_n_exprs : ignored_n_exprs HASH_SEMI     { parser->circ_suppress = 1; }
-                  n_expr                        { parser->circ_suppress = 0;
-                                                  $$ = nil; }
-                | HASH_SEMI                     { parser->circ_suppress = 1; }
-                  n_expr                        { parser->circ_suppress = 0;
-                                                  $$ = nil; }
-                ;
+hash_semi_or_n_expr : HASH_SEMI                 { parser->circ_suppress = 1; }
+                      n_expr                    { parser->circ_suppress = 0;
+                                                  $$ = nao; }
+                    | n_expr                    { $$ = $1; }
+                    ;
 
-hash_semis_i_expr : ignored_i_exprs i_expr      { $$ = $2; }
-                  | i_expr                      { $$ = $1; }
-                  ;
+hash_semi_or_i_expr : HASH_SEMI                 { parser->circ_suppress = 1; }
+                      i_expr                    { parser->circ_suppress = 0;
+                                                  $$ = nao; }
+                    | i_expr                    { $$ = $1; }
+                    ;
 
-ignored_i_exprs : ignored_i_exprs HASH_SEMI     { parser->circ_suppress = 1; }
-                  i_expr                        { parser->circ_suppress = 0;
-                                                  $$ = nil; }
-                | HASH_SEMI                     { parser->circ_suppress = 1; }
-                  i_expr                        { parser->circ_suppress = 0;
-                                                  $$ = nil; }
-                ;
 
 /* Hack needed for Berkeley Yacc */
 byacc_fool : n_expr { internal_error("notreached"); }
@@ -1787,6 +1776,7 @@ int parse(parser_t *parser, val name, enum prime_parser prim)
   int res = 0;
 
   parser->errors = 0;
+  parser->eof = 0;
   parser->prepared_msg = nil;
   parser->circ_ref_hash = nil;
   parser->circ_count = 0;
-- 
cgit v1.2.3