summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2015-08-12 06:59:15 -0700
committerKaz Kylheku <kaz@kylheku.com>2015-08-12 06:59:15 -0700
commit08bd6d07429bfaa2abd6ddccc4812272eb0b08cb (patch)
tree1f6056e7e25e69b24e120fc491d5ea512686c219
parent4da607e09383e71134c5ba1622f3c31803f8ea9b (diff)
downloadtxr-08bd6d07429bfaa2abd6ddccc4812272eb0b08cb.tar.gz
txr-08bd6d07429bfaa2abd6ddccc4812272eb0b08cb.tar.bz2
txr-08bd6d07429bfaa2abd6ddccc4812272eb0b08cb.zip
Crafting a better parser-priming hack.
The method of inserting a character sequence which generates a SECRET_TOKEN_E token is being replaced with a purely token based method. Because we don't manipulate the input stream, the lexer is not involved. We don't have to flush its state and deal with the carry-over of the yy_hold_char. This comes about because recent changes expose a weakness in the old scheme. Now that a top-level expression can have the form expr.expr, it means that the Yacc parser reads one token ahead, to see whether there is a dot or something else. This lookahead token is discarded. We must re-create it when we call yyparse again. This re-creation is done by creating a custom yylex function, which can maintain pushback tokens. We can prime this array of pushback tokens to generate the SECRET_TOKEN_E, as well as to re-inject the lookahead symbol that was thrown away by the previous yyparse. To know which lookahead symbol to re-inject is simple: the scanner just keeps a copy of the most recent token that it returns to the parser. When the parser returns, that token must be the lookahead one. The tokens we keep now in the parser structure are subject to garbage collection, and so we must mark them. Since the YYSTYPE union has no type field, a new API is opened up into the garbage collector to help implement a conservative GC technique. * gc.c (gc_is_heap_obj): New function. * gc.h (gc_is_heap_obj): Declared. * match.c: Include y.tab.h. This is now needed by any module that needs to instantiate a parser_t structure, because members of type YYSTYPE occur in the structure. (parser.h can still be included without y.tab.h, but only an incomplete declaration for the parser strucure is then given, and a few functions are not declared.) * parser.c (yy_tok_mark): New static function. (parser_mark): Mark the recent token and the pushback tokens. (parser_common_init): Initialize the recent token, the pushback tokens, and the pushback stack index. (pushback_token): New static function. (prime_parser): hold_byte argument removed. Body considerably simplified. The catenated stream trick is no longer required. All we do here is set up two pushback tokens and prime the scanner, if necessary, so it is in the right start state for Lisp. * parser.l (YY_DECL): Take over definition of scanning function, renaming to yylex_impl, so we can implement yylex. (grammar): Rule which produces SECRET_ESCAPE_E token removed. (reset_scanner): Function removed. (yylex): New function. * parser.h (struct parser): Now only forward-declared unless y.tab.h has been included. New members, recent_tok, tok_pushback and tok_idx. (yyset_hold_char): Declared. (reset_scanner): Declaration removed. (yylex): Declared (if y.tab.h included). (prime_parser): Declaration updated. (prime_scanner): Declared. * Makefile: express new dependency on existence of y.tab.h of txr.o, match.o and parser.o.
-rw-r--r--Makefile3
-rw-r--r--gc.c5
-rw-r--r--gc.h1
-rw-r--r--match.c1
-rw-r--r--parser.c49
-rw-r--r--parser.h25
-rw-r--r--parser.l50
-rw-r--r--parser.y3
-rw-r--r--txr.c1
9 files changed, 97 insertions, 41 deletions
diff --git a/Makefile b/Makefile
index b6e98487..ef4591c4 100644
--- a/Makefile
+++ b/Makefile
@@ -184,7 +184,8 @@ endef
$(call DEP,$(OBJS) $(EXTRA_OBJS-y),\
$(conf_dir)/config.make $(conf_dir)/config.h)
-$(call DEP,opt/lex.yy.o dbg/lex.yy.o,y.tab.h)
+$(eval $(foreach item,lex.yy.o txr.o match.o parser.o,\
+ $(call DEP,opt/$(item) dbg/$(item),y.tab.h)))
lex.yy.c: $(top_srcdir)parser.l
$(call ABBREV,LEX)
diff --git a/gc.c b/gc.c
index ebd487ed..c4ae9274 100644
--- a/gc.c
+++ b/gc.c
@@ -395,6 +395,11 @@ static int in_heap(val ptr)
return 0;
}
+int gc_is_heap_obj(union obj *ptr)
+{
+ return in_heap(ptr);
+}
+
static void mark_mem_region(val *low, val *high)
{
if (low > high) {
diff --git a/gc.h b/gc.h
index cf53f272..b24b470e 100644
--- a/gc.h
+++ b/gc.h
@@ -34,6 +34,7 @@ void gc(void);
int gc_state(int);
void gc_mark(val);
int gc_is_reachable(val);
+int gc_is_heap_obj(union obj *ptr);
#if CONFIG_GEN_GC
val gc_set(loc, val);
diff --git a/match.c b/match.c
index 26efcc6c..b6775232 100644
--- a/match.c
+++ b/match.c
@@ -40,6 +40,7 @@
#include "unwind.h"
#include "regex.h"
#include "stream.h"
+#include "y.tab.h"
#include "parser.h"
#include "txr.h"
#include "utf8.h"
diff --git a/parser.c b/parser.c
index b79bf8ea..03677164 100644
--- a/parser.c
+++ b/parser.c
@@ -45,14 +45,24 @@
#include "hash.h"
#include "eval.h"
#include "stream.h"
+#include "y.tab.h"
#include "parser.h"
val parser_s, unique_s;
static val stream_parser_hash;
+static void yy_tok_mark(struct yy_token *tok)
+{
+ obj_t *ptr = tok->yy_lval.val;
+
+ if (gc_is_heap_obj(ptr))
+ gc_mark(ptr);
+}
+
static void parser_mark(val obj)
{
+ int i;
parser_t *p = coerce(parser_t *, obj->co.handle);
assert (p->parser == nil || p->parser == obj);
@@ -61,6 +71,9 @@ static void parser_mark(val obj)
gc_mark(p->prepared_msg);
if (p->syntax_tree != nao)
gc_mark(p->syntax_tree);
+ yy_tok_mark(&p->recent_tok);
+ for (i = 0; i < 4; i++)
+ yy_tok_mark(&p->tok_pushback[i]);
}
static void parser_destroy(val obj)
@@ -79,6 +92,8 @@ static struct cobj_ops parser_ops = {
void parser_common_init(parser_t *p)
{
+ int i;
+
p->parser = nil;
p->lineno = 1;
p->errors = 0;
@@ -89,6 +104,13 @@ void parser_common_init(parser_t *p)
yylex_init(&p->yyscan);
p->scanner = convert(scanner_t *, p->yyscan);
yyset_extra(p, p->scanner);
+ p->recent_tok.yy_char = 0;
+ p->recent_tok.yy_lval.val = 0;
+ for (i = 0; i < 4; i++) {
+ p->tok_pushback[i].yy_char = 0;
+ p->tok_pushback[i].yy_lval.val = 0;
+ }
+ p->tok_idx = 0;
}
void parser_cleanup(parser_t *p)
@@ -124,25 +146,20 @@ static val ensure_parser(val stream)
return set(cdr_l(cell), parser(stream, one));
}
-void prime_parser(parser_t *p, int hold_byte, val name)
+static void pushback_token(parser_t *p, struct yy_token *tok)
{
- val secret_token_stream;
-
- if (hold_byte) {
- val secret_token_string = format(nil, lit("@\x01" "E~a"),
- chr(hold_byte + 0xDC00), nao);
- secret_token_stream = make_string_byte_input_stream(secret_token_string);
- } else {
- secret_token_stream = make_string_byte_input_stream(lit("@\x01" "E"));
- }
+ assert (p->tok_idx < 4);
+ p->tok_pushback[p->tok_idx++] = *tok;
+}
- if (catenated_stream_p(p->stream)) {
- catenated_stream_push(secret_token_stream, p->stream);
- } else {
- set(mkloc(p->stream, p->parser),
- make_catenated_stream(list(secret_token_stream, p->stream, nao)));
- }
+void prime_parser(parser_t *p, val name)
+{
+ struct yy_token secret_escape_e = { SECRET_ESCAPE_E };
+ if (p->recent_tok.yy_char)
+ pushback_token(p, &p->recent_tok);
+ pushback_token(p, &secret_escape_e);
+ prime_scanner(p->scanner);
set(mkloc(p->name, p->parser), name);
}
diff --git a/parser.h b/parser.h
index 9b336d70..bedebfbe 100644
--- a/parser.h
+++ b/parser.h
@@ -31,7 +31,16 @@ typedef struct yyguts_t scanner_t;
typedef void *yyscan_t;
#endif
-typedef struct {
+typedef struct parser parser_t;
+
+#ifdef SPACE
+
+struct yy_token {
+ int yy_char;
+ YYSTYPE yy_lval;
+};
+
+struct parser {
val parser;
cnum lineno;
int errors;
@@ -41,7 +50,11 @@ typedef struct {
val syntax_tree;
yyscan_t yyscan;
scanner_t *scanner;
-} parser_t;
+ struct yy_token recent_tok;
+ struct yy_token tok_pushback[4];
+ int tok_idx;
+};
+#endif
extern const wchar_t *spec_file;
extern val form_to_ln_hash;
@@ -53,14 +66,18 @@ void yyerrorf(scanner_t *scanner, val s, ...);
void yybadtoken(parser_t *, int tok, val context);
void end_of_regex(scanner_t *scanner);
void end_of_char(scanner_t *scanner);
-int reset_scanner(scanner_t *scanner);
+#ifdef SPACE
+int yylex(YYSTYPE *yylval_param, yyscan_t yyscanner);
+#endif
int yylex_init(yyscan_t *pscanner);
int yylex_destroy(yyscan_t scanner);
parser_t *yyget_extra(yyscan_t scanner);
void yyset_extra(parser_t *, yyscan_t);
+void yyset_hold_char(yyscan_t, int);
void parser_l_init(void);
void open_txr_file(val spec_file, val *txr_lisp_p, val *name, val *stream);
-void prime_parser(parser_t *, int hold_byte, val name);
+void prime_parser(parser_t *, val name);
+void prime_scanner(scanner_t *);
int parse_once(val stream, val name, parser_t *parser);
int parse(parser_t *parser, val name);
val source_loc(val form);
diff --git a/parser.l b/parser.l
index 59189385..92bad198 100644
--- a/parser.l
+++ b/parser.l
@@ -47,10 +47,10 @@
#include "signal.h"
#include "unwind.h"
#include "hash.h"
+#include "y.tab.h"
#include "parser.h"
#include "eval.h"
#include "txr.h"
-#include "y.tab.h"
#define YY_INPUT(buf, result, max_size) \
do { \
@@ -61,6 +61,9 @@
result = n; \
} while (0)
+#define YY_DECL \
+ static int yylex_impl(YYSTYPE *yylval_param, yyscan_t yyscanner)
+
int opt_loglevel = 1; /* 0 - quiet; 1 - normal; 2 - verbose */
val form_to_ln_hash;
@@ -860,12 +863,6 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return SECRET_ESCAPE_R;
}
-<INITIAL>@\x01E {
- yy_push_state(SPECIAL, yyscanner);
- yy_push_state(NESTED, yyscanner);
- return SECRET_ESCAPE_E;
-}
-
<INITIAL>^@[#;].*\n {
/* eat whole line comment */
yyextra->lineno++;
@@ -1015,18 +1012,6 @@ void end_of_char(scanner_t *yyg)
yy_pop_state(yyg);
}
-int reset_scanner(scanner_t *yyg)
-{
- int hold_byte = yyg->yy_hold_char;
-
- while (YYSTATE != INITIAL)
- yy_pop_state(yyg);
-
- yy_flush_buffer(YY_CURRENT_BUFFER, yyg);
-
- return hold_byte;
-}
-
val source_loc(val form)
{
return gethash(form_to_ln_hash, form);
@@ -1040,6 +1025,33 @@ val source_loc_str(val form, val alt)
return if3(line, format(nil, lit("~a:~a"), file, line, nao), alt);
}
+int yylex(YYSTYPE *yylval_param, yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = convert(struct yyguts_t *, yyscanner);
+ int yy_char;
+
+ if (yyextra->tok_idx > 0) {
+ struct yy_token *tok = &yyextra->tok_pushback[--yyextra->tok_idx];
+ yyextra->recent_tok = *tok;
+ *yylval_param = tok->yy_lval;
+ return tok->yy_char;
+ }
+
+ yy_char = yyextra->recent_tok.yy_char = yylex_impl(yylval_param, yyscanner);
+ yyextra->recent_tok.yy_lval = *yylval_param;
+
+ return yy_char;
+}
+
+void prime_scanner(scanner_t *yyg)
+{
+ if (YYSTATE == INITIAL) {
+ yy_push_state(SPECIAL, yyg);
+ yy_push_state(NESTED, yyg);
+ yy_push_state(NESTED, yyg);
+ }
+}
+
void parser_l_init(void)
{
prot1(&form_to_ln_hash);
diff --git a/parser.y b/parser.y
index 1902633d..4db7ae90 100644
--- a/parser.y
+++ b/parser.y
@@ -45,6 +45,7 @@
#include "hash.h"
#include "eval.h"
#include "stream.h"
+#include "y.tab.h"
#include "parser.h"
static val sym_helper(parser_t *parser, wchar_t *lexeme, val meta_allowed);
@@ -1491,7 +1492,7 @@ int parse(parser_t *parser, val name)
parser->errors = 0;
parser->prepared_msg = nil;
parser->syntax_tree = nil;
- prime_parser(parser, reset_scanner(parser->scanner), name);
+ prime_parser(parser, name);
res = yyparse(parser->scanner, parser);
diff --git a/txr.c b/txr.c
index b9a65f80..6dedb8a8 100644
--- a/txr.c
+++ b/txr.c
@@ -45,6 +45,7 @@
#include "gc.h"
#include "signal.h"
#include "unwind.h"
+#include "y.tab.h"
#include "parser.h"
#include "match.h"
#include "utf8.h"