summaryrefslogtreecommitdiffstats
path: root/parser.c
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2015-08-12 06:59:15 -0700
committerKaz Kylheku <kaz@kylheku.com>2015-08-12 06:59:15 -0700
commit08bd6d07429bfaa2abd6ddccc4812272eb0b08cb (patch)
tree1f6056e7e25e69b24e120fc491d5ea512686c219 /parser.c
parent4da607e09383e71134c5ba1622f3c31803f8ea9b (diff)
downloadtxr-08bd6d07429bfaa2abd6ddccc4812272eb0b08cb.tar.gz
txr-08bd6d07429bfaa2abd6ddccc4812272eb0b08cb.tar.bz2
txr-08bd6d07429bfaa2abd6ddccc4812272eb0b08cb.zip
Crafting a better parser-priming hack.
The method of inserting a character sequence which generates a SECRET_TOKEN_E token is being replaced with a purely token based method. Because we don't manipulate the input stream, the lexer is not involved. We don't have to flush its state and deal with the carry-over of the yy_hold_char. This comes about because recent changes expose a weakness in the old scheme. Now that a top-level expression can have the form expr.expr, it means that the Yacc parser reads one token ahead, to see whether there is a dot or something else. This lookahead token is discarded. We must re-create it when we call yyparse again. This re-creation is done by creating a custom yylex function, which can maintain pushback tokens. We can prime this array of pushback tokens to generate the SECRET_TOKEN_E, as well as to re-inject the lookahead symbol that was thrown away by the previous yyparse. To know which lookahead symbol to re-inject is simple: the scanner just keeps a copy of the most recent token that it returns to the parser. When the parser returns, that token must be the lookahead one. The tokens we keep now in the parser structure are subject to garbage collection, and so we must mark them. Since the YYSTYPE union has no type field, a new API is opened up into the garbage collector to help implement a conservative GC technique. * gc.c (gc_is_heap_obj): New function. * gc.h (gc_is_heap_obj): Declared. * match.c: Include y.tab.h. This is now needed by any module that needs to instantiate a parser_t structure, because members of type YYSTYPE occur in the structure. (parser.h can still be included without y.tab.h, but only an incomplete declaration for the parser strucure is then given, and a few functions are not declared.) * parser.c (yy_tok_mark): New static function. (parser_mark): Mark the recent token and the pushback tokens. (parser_common_init): Initialize the recent token, the pushback tokens, and the pushback stack index. (pushback_token): New static function. (prime_parser): hold_byte argument removed. Body considerably simplified. The catenated stream trick is no longer required. All we do here is set up two pushback tokens and prime the scanner, if necessary, so it is in the right start state for Lisp. * parser.l (YY_DECL): Take over definition of scanning function, renaming to yylex_impl, so we can implement yylex. (grammar): Rule which produces SECRET_ESCAPE_E token removed. (reset_scanner): Function removed. (yylex): New function. * parser.h (struct parser): Now only forward-declared unless y.tab.h has been included. New members, recent_tok, tok_pushback and tok_idx. (yyset_hold_char): Declared. (reset_scanner): Declaration removed. (yylex): Declared (if y.tab.h included). (prime_parser): Declaration updated. (prime_scanner): Declared. * Makefile: express new dependency on existence of y.tab.h of txr.o, match.o and parser.o.
Diffstat (limited to 'parser.c')
-rw-r--r--parser.c49
1 files changed, 33 insertions, 16 deletions
diff --git a/parser.c b/parser.c
index b79bf8ea..03677164 100644
--- a/parser.c
+++ b/parser.c
@@ -45,14 +45,24 @@
#include "hash.h"
#include "eval.h"
#include "stream.h"
+#include "y.tab.h"
#include "parser.h"
val parser_s, unique_s;
static val stream_parser_hash;
+static void yy_tok_mark(struct yy_token *tok)
+{
+ obj_t *ptr = tok->yy_lval.val;
+
+ if (gc_is_heap_obj(ptr))
+ gc_mark(ptr);
+}
+
static void parser_mark(val obj)
{
+ int i;
parser_t *p = coerce(parser_t *, obj->co.handle);
assert (p->parser == nil || p->parser == obj);
@@ -61,6 +71,9 @@ static void parser_mark(val obj)
gc_mark(p->prepared_msg);
if (p->syntax_tree != nao)
gc_mark(p->syntax_tree);
+ yy_tok_mark(&p->recent_tok);
+ for (i = 0; i < 4; i++)
+ yy_tok_mark(&p->tok_pushback[i]);
}
static void parser_destroy(val obj)
@@ -79,6 +92,8 @@ static struct cobj_ops parser_ops = {
void parser_common_init(parser_t *p)
{
+ int i;
+
p->parser = nil;
p->lineno = 1;
p->errors = 0;
@@ -89,6 +104,13 @@ void parser_common_init(parser_t *p)
yylex_init(&p->yyscan);
p->scanner = convert(scanner_t *, p->yyscan);
yyset_extra(p, p->scanner);
+ p->recent_tok.yy_char = 0;
+ p->recent_tok.yy_lval.val = 0;
+ for (i = 0; i < 4; i++) {
+ p->tok_pushback[i].yy_char = 0;
+ p->tok_pushback[i].yy_lval.val = 0;
+ }
+ p->tok_idx = 0;
}
void parser_cleanup(parser_t *p)
@@ -124,25 +146,20 @@ static val ensure_parser(val stream)
return set(cdr_l(cell), parser(stream, one));
}
-void prime_parser(parser_t *p, int hold_byte, val name)
+static void pushback_token(parser_t *p, struct yy_token *tok)
{
- val secret_token_stream;
-
- if (hold_byte) {
- val secret_token_string = format(nil, lit("@\x01" "E~a"),
- chr(hold_byte + 0xDC00), nao);
- secret_token_stream = make_string_byte_input_stream(secret_token_string);
- } else {
- secret_token_stream = make_string_byte_input_stream(lit("@\x01" "E"));
- }
+ assert (p->tok_idx < 4);
+ p->tok_pushback[p->tok_idx++] = *tok;
+}
- if (catenated_stream_p(p->stream)) {
- catenated_stream_push(secret_token_stream, p->stream);
- } else {
- set(mkloc(p->stream, p->parser),
- make_catenated_stream(list(secret_token_stream, p->stream, nao)));
- }
+void prime_parser(parser_t *p, val name)
+{
+ struct yy_token secret_escape_e = { SECRET_ESCAPE_E };
+ if (p->recent_tok.yy_char)
+ pushback_token(p, &p->recent_tok);
+ pushback_token(p, &secret_escape_e);
+ prime_scanner(p->scanner);
set(mkloc(p->name, p->parser), name);
}