diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2014-08-02 18:31:31 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2014-08-02 18:31:31 -0700 |
commit | 894c655f0214531ad7eff3d68d72792ae18d40cd (patch) | |
tree | c7da31229ce40626fa937de8e688ac645ace2309 | |
parent | 73a7ae605d364be49dc5bd1cc15c1116fe47a446 (diff) | |
download | txr-894c655f0214531ad7eff3d68d72792ae18d40cd.tar.gz txr-894c655f0214531ad7eff3d68d72792ae18d40cd.tar.bz2 txr-894c655f0214531ad7eff3d68d72792ae18d40cd.zip |
Big switch to reentrant lexing and parsing.
* parser.l (YY_INPUT): Stop relying on removed yyin_stream;
refer to stream via yyextra.
(yyin_stream, lineno, errors, spec_file_str,
prepared_error_message): Global variables removed.
(yyget_column, yyset_column): Missing prototypes not generated by flex
in bison bridge mode have to be added by us to avoid
warning.
(yyerror): Takes parser and scanner as parameters. Prepared error
message is now in the parser context. Calls to other error handling
functions receive scanner context.
(yyerr): New function.
(yyerrorf, yyerrprepf): Takes scanner argument, chases extra data to
get to parser, and refers to parser variables instead of globals.
(num_esc): Scanner argument added.
(%option reentrant, %option bison-bridge, %option extra-type): New
flex options.
(grammar): yyscanner added everywhere.
(end_of_char): Takes scanner argument.
(parse_init): Removed references to yyin_stream and
prepared_error_message.
(parse_reset): Function renamed to open_txr_file. Returns
results via pointers instead of setting global variables.
(regex_parse, lisp_parse): Use reentrant parser interface.
* parser.y (yyerror): Prototype removed.
(yylex): Prototype moved after grammar, with new arguments.
(sym_helper, define_transform): Take scanner argument.
(make_expr): Takes parser argument.
(rlrec): New static function.
(rl): Function turned into macro.
(mkexp, symhlpr): New macros.
(%purse-parser, %parse-param, %lex-param): New Yacc options.
(grammar): Actions re-worked for reentrance. Parser and scanner
contexts are passed down to helper functions, in some cases
via the three new macros. The result of the parse is stored
in the syntax_tree member of the parser_t structure instead
of a global. The yylex function receives the scanner instance.
(get_spec): Function removed.
(parse): New function.
* parser.h (lineno, errors, yyin_stream, spec_file_str):
Declarations removed.
(parser_t): New struct.
(yyerr): New function declared.
(yyparse, yyerror, yyerrorf, end_of_regex, end_of_char,
yylex, yylex_destroy): Declarations updated.
-rw-r--r-- | ChangeLog | 60 | ||||
-rw-r--r-- | match.c | 12 | ||||
-rw-r--r-- | parser.h | 36 | ||||
-rw-r--r-- | parser.l | 522 | ||||
-rw-r--r-- | parser.y | 280 | ||||
-rw-r--r-- | txr.c | 23 |
6 files changed, 513 insertions, 420 deletions
@@ -1,5 +1,65 @@ 2014-08-02 Kaz Kylheku <kaz@kylheku.com> + Big switch to reentrant lexing and parsing. + + * parser.l (YY_INPUT): Stop relying on removed yyin_stream; + refer to stream via yyextra. + (yyin_stream, lineno, errors, spec_file_str, + prepared_error_message): Global variables removed. + (yyget_column, yyset_column): Missing prototypes not generated by flex + in bison bridge mode have to be added by us to avoid + warning. + (yyerror): Takes parser and scanner as parameters. Prepared error + message is now in the parser context. Calls to other error handling + functions receive scanner context. + (yyerr): New function. + (yyerrorf, yyerrprepf): Takes scanner argument, chases extra data to + get to parser, and refers to parser variables instead of globals. + (num_esc): Scanner argument added. + (%option reentrant, %option bison-bridge, %option extra-type): New + flex options. + (grammar): yyscanner added everywhere. + (end_of_char): Takes scanner argument. + (parse_init): Removed references to yyin_stream and + prepared_error_message. + (parse_reset): Function renamed to open_txr_file. Returns + results via pointers instead of setting global variables. + (regex_parse, lisp_parse): Use reentrant parser interface. + + * parser.y (yyerror): Prototype removed. + (yylex): Prototype moved after grammar, with new arguments. + (sym_helper, define_transform): Take scanner argument. + (make_expr): Takes parser argument. + (rlrec): New static function. + (rl): Function turned into macro. + (mkexp, symhlpr): New macros. + (%purse-parser, %parse-param, %lex-param): New Yacc options. + (grammar): Actions re-worked for reentrance. Parser and scanner + contexts are passed down to helper functions, in some cases + via the three new macros. The result of the parse is stored + in the syntax_tree member of the parser_t structure instead + of a global. The yylex function receives the scanner instance. + (get_spec): Function removed. + (parse): New function. + + * parser.h (lineno, errors, yyin_stream, spec_file_str): + Declarations removed. + (parser_t): New struct. + (yyerr): New function declared. + (yyparse, yyerror, yyerrorf, end_of_regex, end_of_char, + yylex, yylex_destroy): Declarations updated. + (yylex_init, yyget_extra, yyset_extra): Declared. + (parse_reset, rl): Declaration removed. + (open_txr_file): Declaration added. + (parse): New function. + + * match.c (v_load): Use new reentrant parser interface. + + * txr.c (txr_main): Stop using parser-related global variables; + call parser using new reentrant interface. + +2014-08-02 Kaz Kylheku <kaz@kylheku.com> + * signal.c (interrupt_count): New global variable. (sig_handler): Increment and decrement interrupt count. If the interrupt count is already positive, treat @@ -3660,16 +3660,18 @@ static val v_load(match_files_ctx *c) zero, negone), cons(target, nil)), lit("/"))); int gc = gc_state(0); - parse_reset(path); - yyparse(); - yylex_destroy(); + val stream, name; + parser_t parser; + + open_txr_file(path, &stream, &name); + parse(stream, name, &parser); gc_state(gc); - if (errors) + if (parser.errors) sem_error(specline, lit("load: errors encountered in ~s"), path, nao); { - val spec = get_spec(); + val spec = parser.syntax_tree; val result = match_files(mf_spec(*c, spec)); if (!result) { @@ -24,26 +24,34 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -extern cnum lineno; -extern int errors; -extern val yyin_stream; +typedef struct { + cnum lineno; + int errors; + val stream; + val name; + val prepared_msg; + val syntax_tree; + void *scanner; +} parser_t; + extern const wchar_t *spec_file; -extern val spec_file_str; extern val form_to_ln_hash; -int yyparse(void); -val get_spec(void); -void yyerror(const char *s); -void yyerrorf(val s, ...); +int yyparse(parser_t *, void *scanner); +void yyerror(parser_t *, void *scanner, const char *s); +void yyerr(void *scanner, const char *s); +void yyerrorf(void *scanner, val s, ...); void yybadtoken(int tok, val context); -void end_of_regex(void); -void end_of_char(void); -int yylex(void); -int yylex_destroy(void); +void end_of_regex(void *scanner); +void end_of_char(void *scanner); +int yylex_init(void **pscanner); +int yylex_destroy(void *scanner); +parser_t *yyget_extra(void *scanner); +void yyset_extra(parser_t *, void *scanner); void parse_init(void); -void parse_reset(val spec_file); +void open_txr_file(val spec_file, val *name, val *stream); +int parse(val stream, val name, parser_t *parser); val source_loc(val form); val source_loc_str(val form); -val rl(val form, val lineno); val rlset(val form, val info); INLINE val rlcp(val to, val from) { @@ -52,25 +52,17 @@ #define YY_INPUT(buf, result, max_size) \ do { \ - val c = get_byte(yyin_stream); \ + val c = get_byte(yyextra->stream); \ int n = 0; \ if (c) \ buf[n++] = (char) c_num(c); \ result = n; \ } while (0) -val yyin_stream; - -cnum lineno = 1; int opt_loglevel = 1; /* 0 - quiet; 1 - normal; 2 - verbose */ -int errors; -val spec_file_str; - val form_to_ln_hash; -static val prepared_error_message; - #define FLEX_NUM_VERSION 10000*YY_FLEX_MAJOR_VERSION + \ 100*YY_FLEX_MINOR_VERSION + \ YY_FLEX_SUBMINOR_VERSION @@ -82,35 +74,48 @@ int yylex_destroy(void) } #endif -void yyerror(const char *s) +/* Missing prototypes not generated by flex. */ +int yyget_column(void *); +void yyset_column (int column_no , yyscan_t yyscanner); + +void yyerror(parser_t *parser, void *scanner, const char *s) { - yyerrorf(lit("~a"), string_utf8(s), nao); - if (prepared_error_message) { - yyerrorf(lit("~a"), prepared_error_message, nao); - prepared_error_message = nil; + yyerrorf(scanner, lit("~a"), string_utf8(s), nao); + if (parser->prepared_msg) { + yyerrorf(scanner, lit("~a"), parser->prepared_msg, nao); + parser->prepared_msg = nil; } } -void yyerrorf(val fmt, ...) +void yyerr(void *scanner, const char *s) { + yyerror(yyget_extra(scanner), scanner, s); +} + +void yyerrorf(void *scanner, val fmt, ...) +{ + parser_t *parser = yyget_extra(scanner); + if (opt_loglevel >= 1) { va_list vl; va_start (vl, fmt); format(std_error, lit("~a: (~a:~a): "), prog_string, - spec_file_str, num(lineno), nao); + parser->name, num(parser->lineno), nao); vformat(std_error, fmt, vl); put_char(chr('\n'), std_error); va_end (vl); } - errors++; + parser->errors++; } -static void yyerrprepf(val fmt, ...) +static void yyerrprepf(void *scanner, val fmt, ...) { + parser_t *parser = yyget_extra(scanner); + if (opt_loglevel >= 1) { va_list vl; va_start (vl, fmt); - prepared_error_message = vformat_to_string(fmt, vl); + parser->prepared_msg = vformat_to_string(fmt, vl); va_end (vl); } } @@ -137,26 +142,24 @@ static wchar_t char_esc(int letter) internal_error("unhandled escape character"); } -static wchar_t num_esc(char *num) +static wchar_t num_esc(void *scn, char *num) { if (num[0] == 'x') { if (strlen(num) > 7) - yyerror("too many digits in hex character escape"); + yyerror(yyget_extra(scn), scn, "too many digits in hex character escape"); return strtol(num + 1, 0, 16); } else { if (num[0] == 'o') num++; if (strlen(num) > 8) - yyerror("too many digits in octal character escape"); + yyerror(yyget_extra(scn), scn, "too many digits in octal character escape"); return strtol(num, 0, 8); } } %} -%option stack -%option nounput -%option noinput +%option stack nounput noinput reentrant bison-bridge extra-type="parser_t *" SYM [a-zA-Z0-9_]+ SGN [+\-] @@ -208,72 +211,72 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} <SPECIAL,QSPECIAL,NESTED,BRACED>{NUM} { val str = string_own(utf8_dup_from(yytext)); - if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT - || yy_top_state() == QWLIT) - yy_pop_state(); + if (yy_top_state(yyscanner) == INITIAL + || yy_top_state(yyscanner) == QSILIT + || yy_top_state(yyscanner) == QWLIT) + yy_pop_state(yyscanner); - yylval.val = int_str(str, num(10)); + yylval->val = int_str(str, num(10)); return NUMBER; } <SPECIAL,QSPECIAL,NESTED,BRACED>{XNUM} { val str = string_own(utf8_dup_from(yytext + 2)); - if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT - || yy_top_state() == QWLIT) - yy_pop_state(); + if (yy_top_state(yyscanner) == INITIAL + || yy_top_state(yyscanner) == QSILIT + || yy_top_state(yyscanner) == QWLIT) + yy_pop_state(yyscanner); - yylval.val = int_str(str, num(16)); + yylval->val = int_str(str, num(16)); return NUMBER; } <SPECIAL,QSPECIAL,NESTED,BRACED>{ONUM} { val str = string_own(utf8_dup_from(yytext + 2)); - if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT - || yy_top_state() == QWLIT) - yy_pop_state(); + if (yy_top_state(yyscanner) == INITIAL + || yy_top_state(yyscanner) == QSILIT + || yy_top_state(yyscanner) == QWLIT) + yy_pop_state(yyscanner); - yylval.val = int_str(str, num(8)); + yylval->val = int_str(str, num(8)); return NUMBER; } <SPECIAL,QSPECIAL,NESTED,BRACED>{BNUM} { val str = string_own(utf8_dup_from(yytext + 2)); - if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT - || yy_top_state() == QWLIT) - yy_pop_state(); + if (yy_top_state(yyscanner) == INITIAL + || yy_top_state(yyscanner) == QSILIT + || yy_top_state(yyscanner) == QWLIT) + yy_pop_state(yyscanner); - yylval.val = int_str(str, num(2)); + yylval->val = int_str(str, num(2)); return NUMBER; } <SPECIAL,QSPECIAL,NESTED,BRACED>{FLO} { val str = string_own(utf8_dup_from(yytext)); - if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT - || yy_top_state() == QWLIT) - yy_pop_state(); + if (yy_top_state(yyscanner) == INITIAL + || yy_top_state(yyscanner) == QSILIT + || yy_top_state(yyscanner) == QWLIT) + yy_pop_state(yyscanner); - yylval.val = flo_str(str); + yylval->val = flo_str(str); return NUMBER; } <SPECIAL,QSPECIAL,NESTED,BRACED>{FLODOT}/[^.] { val str = string_own(utf8_dup_from(yytext)); - if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT - || yy_top_state() == QWLIT) - yy_pop_state(); + if (yy_top_state(yyscanner) == INITIAL + || yy_top_state(yyscanner) == QSILIT + || yy_top_state(yyscanner) == QWLIT) + yy_pop_state(yyscanner); - yylval.val = flo_str(str); + yylval->val = flo_str(str); return NUMBER; } @@ -282,301 +285,301 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} <NESTED>({FLO}|{FLODOT}){NTOK} { val str = string_utf8(yytext); - yyerrorf(lit("trailing junk in floating-point literal: ~a"), str, nao); + yyerrorf(yyscanner, lit("trailing junk in floating-point literal: ~a"), str, nao); - if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT - || yy_top_state() == QWLIT) - yy_pop_state(); + if (yy_top_state(yyscanner) == INITIAL + || yy_top_state(yyscanner) == QSILIT + || yy_top_state(yyscanner) == QWLIT) + yy_pop_state(yyscanner); - yylval.val = flo_str(str); + yylval->val = flo_str(str); return NUMBER; } <NESTED,QSILIT,QWLIT>@{NUM} { val str = string_own(utf8_dup_from(yytext + 1)); - if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT - || yy_top_state() == QWLIT) - yy_pop_state(); - yylval.val = int_str(str, num(10)); + if (yy_top_state(yyscanner) == INITIAL + || yy_top_state(yyscanner) == QSILIT + || yy_top_state(yyscanner) == QWLIT) + yy_pop_state(yyscanner); + yylval->val = int_str(str, num(10)); return METANUM; } <NESTED,QSILIT,QWLIT>@{XNUM} { val str = string_own(utf8_dup_from(yytext + 3)); - if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT - || yy_top_state() == QWLIT) - yy_pop_state(); - yylval.val = int_str(str, num(16)); + if (yy_top_state(yyscanner) == INITIAL + || yy_top_state(yyscanner) == QSILIT + || yy_top_state(yyscanner) == QWLIT) + yy_pop_state(yyscanner); + yylval->val = int_str(str, num(16)); return METANUM; } <NESTED,QSILIT,QWLIT>@{ONUM} { val str = string_own(utf8_dup_from(yytext + 3)); - if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT - || yy_top_state() == QWLIT) - yy_pop_state(); - yylval.val = int_str(str, num(8)); + if (yy_top_state(yyscanner) == INITIAL + || yy_top_state(yyscanner) == QSILIT + || yy_top_state(yyscanner) == QWLIT) + yy_pop_state(yyscanner); + yylval->val = int_str(str, num(8)); return METANUM; } <NESTED,QSILIT,QWLIT>@{BNUM} { val str = string_own(utf8_dup_from(yytext + 3)); - if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT - || yy_top_state() == QWLIT) - yy_pop_state(); - yylval.val = int_str(str, num(2)); + if (yy_top_state(yyscanner) == INITIAL + || yy_top_state(yyscanner) == QSILIT + || yy_top_state(yyscanner) == QWLIT) + yy_pop_state(yyscanner); + yylval->val = int_str(str, num(2)); return METANUM; } <SPECIAL,QSPECIAL>{TOK} | <BRACED>{BTOK} | <NESTED>{NTOK} { - if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT - || yy_top_state() == QWLIT) - yy_pop_state(); + if (yy_top_state(yyscanner) == INITIAL + || yy_top_state(yyscanner) == QSILIT + || yy_top_state(yyscanner) == QWLIT) + yy_pop_state(yyscanner); - yylval.lexeme = utf8_dup_from(yytext); + yylval->lexeme = utf8_dup_from(yytext); return SYMTOK; } <SPECIAL>\({WS}all{WS}\) { - yy_pop_state(); - yylval.lineno = lineno; + yy_pop_state(yyscanner); + yylval->lineno = yyextra->lineno; return ALL; } <SPECIAL>\({WS}some/{ID_END} { - yy_push_state(NESTED); - yylval.lineno = lineno; + yy_push_state(NESTED, yyscanner); + yylval->lineno = yyextra->lineno; return SOME; } <SPECIAL>\({WS}none{WS}\) { - yy_pop_state(); - yylval.lineno = lineno; + yy_pop_state(yyscanner); + yylval->lineno = yyextra->lineno; return NONE; } <SPECIAL>\({WS}maybe{WS}\) { - yy_pop_state(); - yylval.lineno = lineno; + yy_pop_state(yyscanner); + yylval->lineno = yyextra->lineno; return MAYBE; } <SPECIAL>\({WS}cases{WS}\) { - yy_pop_state(); - yylval.lineno = lineno; + yy_pop_state(yyscanner); + yylval->lineno = yyextra->lineno; return CASES; } <SPECIAL>\({WS}block/{ID_END} { - yy_push_state(NESTED); - yylval.lineno = lineno; + yy_push_state(NESTED, yyscanner); + yylval->lineno = yyextra->lineno; return BLOCK; } <SPECIAL>\({WS}choose/{ID_END} { - yy_push_state(NESTED); - yylval.lineno = lineno; + yy_push_state(NESTED, yyscanner); + yylval->lineno = yyextra->lineno; return CHOOSE; } <SPECIAL>\({WS}gather/{ID_END} { - yy_push_state(NESTED); - yylval.lineno = lineno; + yy_push_state(NESTED, yyscanner); + yylval->lineno = yyextra->lineno; return GATHER; } <SPECIAL>\({WS}and{WS}\) { - yy_pop_state(); - yylval.lineno = lineno; + yy_pop_state(yyscanner); + yylval->lineno = yyextra->lineno; return AND; } <SPECIAL>\({WS}or{WS}\) { - yy_pop_state(); - yylval.lineno = lineno; + yy_pop_state(yyscanner); + yylval->lineno = yyextra->lineno; return OR; } <SPECIAL>\({WS}end{WS}\) { - yy_pop_state(); - yylval.lineno = lineno; + yy_pop_state(yyscanner); + yylval->lineno = yyextra->lineno; return END; } <SPECIAL>\({WS}collect/{ID_END} { - yy_push_state(NESTED); - yylval.lineno = lineno; + yy_push_state(NESTED, yyscanner); + yylval->lineno = yyextra->lineno; return COLLECT; } <SPECIAL>\({WS}coll/{ID_END} { - yy_push_state(NESTED); - yylval.lineno = lineno; + yy_push_state(NESTED, yyscanner); + yylval->lineno = yyextra->lineno; return COLL; } <SPECIAL>\({WS}until{WS}\) { - yy_pop_state(); - yylval.lineno = lineno; + yy_pop_state(yyscanner); + yylval->lineno = yyextra->lineno; return UNTIL; } <SPECIAL>\({WS}output/{ID_END} { - yy_push_state(NESTED); - yylval.lineno = lineno; + yy_push_state(NESTED, yyscanner); + yylval->lineno = yyextra->lineno; return OUTPUT; } <SPECIAL>\({WS}repeat/{ID_END} { - yy_push_state(NESTED); - yylval.lineno = lineno; + yy_push_state(NESTED, yyscanner); + yylval->lineno = yyextra->lineno; return REPEAT; } <SPECIAL>\({WS}rep/{ID_END} { - yy_push_state(NESTED); - yylval.lineno = lineno; + yy_push_state(NESTED, yyscanner); + yylval->lineno = yyextra->lineno; return REP; } <SPECIAL>\({WS}single{WS}\) { - yy_pop_state(); - yylval.lineno = lineno; + yy_pop_state(yyscanner); + yylval->lineno = yyextra->lineno; return SINGLE; } <SPECIAL>\({WS}first{WS}\) { - yy_pop_state(); - yylval.lineno = lineno; + yy_pop_state(yyscanner); + yylval->lineno = yyextra->lineno; return FIRST; } <SPECIAL>\({WS}last{WS}\) { - yy_pop_state(); - yylval.lineno = lineno; + yy_pop_state(yyscanner); + yylval->lineno = yyextra->lineno; return LAST; } <SPECIAL>\({WS}empty{WS}\) { - yy_pop_state(); - yylval.lineno = lineno; + yy_pop_state(yyscanner); + yylval->lineno = yyextra->lineno; return EMPTY; } <SPECIAL>\({WS}mod/{ID_END} { - yy_push_state(NESTED); - yylval.lineno = lineno; + yy_push_state(NESTED, yyscanner); + yylval->lineno = yyextra->lineno; return MOD; } <SPECIAL>\({WS}modlast/{ID_END} { - yy_push_state(NESTED); - yylval.lineno = lineno; + yy_push_state(NESTED, yyscanner); + yylval->lineno = yyextra->lineno; return MODLAST; } <SPECIAL>\({WS}define/{ID_END} { - yy_push_state(NESTED); - yylval.lineno = lineno; + yy_push_state(NESTED, yyscanner); + yylval->lineno = yyextra->lineno; return DEFINE; } <SPECIAL>\({WS}try{WS}\) { - yy_pop_state(); - yylval.lineno = lineno; + yy_pop_state(yyscanner); + yylval->lineno = yyextra->lineno; return TRY; } <SPECIAL>\({WS}catch/{ID_END} { - yy_push_state(NESTED); - yylval.lineno = lineno; + yy_push_state(NESTED, yyscanner); + yylval->lineno = yyextra->lineno; return CATCH; } <SPECIAL>\({WS}finally{WS}\) { - yy_pop_state(); - yylval.lineno = lineno; + yy_pop_state(yyscanner); + yylval->lineno = yyextra->lineno; return FINALLY; } <SPECIAL>\({WS}if/{ID_END} { - yy_push_state(NESTED); - yylval.lineno = lineno; + yy_push_state(NESTED, yyscanner); + yylval->lineno = yyextra->lineno; return IF; } <SPECIAL>\({WS}elif/{ID_END} { - yy_push_state(NESTED); - yylval.lineno = lineno; + yy_push_state(NESTED, yyscanner); + yylval->lineno = yyextra->lineno; return ELIF; } <SPECIAL>\({WS}else{WS}\) { - yy_pop_state(); - yylval.lineno = lineno; + yy_pop_state(yyscanner); + yylval->lineno = yyextra->lineno; return ELSE; } <SPECIAL,QSPECIAL>[{] { - yy_push_state(BRACED); - yylval.lineno = lineno; + yy_push_state(BRACED, yyscanner); + yylval->lineno = yyextra->lineno; return yytext[0]; } <SPECIAL,QSPECIAL,NESTED,BRACED>[(\[] { - yy_push_state(NESTED); - yylval.lineno = lineno; + yy_push_state(NESTED, yyscanner); + yylval->lineno = yyextra->lineno; return yytext[0]; } <NESTED,BRACED>@ { - yylval.lineno = lineno; + yylval->lineno = yyextra->lineno; return yytext[0]; } <NESTED,QSPECIAL,BRACED>,[*] { - yylval.chr = '*'; + yylval->chr = '*'; return SPLICE; } <NESTED>[,'^] { - yylval.chr = yytext[0]; + yylval->chr = yytext[0]; return yytext[0]; } <QSPECIAL,BRACED>[,'] { - yylval.chr = yytext[0]; + yylval->chr = yytext[0]; return yytext[0]; } <BRACED>[}] { - yy_pop_state(); - if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT - || yy_top_state() == QWLIT) - yy_pop_state(); + yy_pop_state(yyscanner); + if (yy_top_state(yyscanner) == INITIAL + || yy_top_state(yyscanner) == QSILIT + || yy_top_state(yyscanner) == QWLIT) + yy_pop_state(yyscanner); return yytext[0]; } <SPECIAL,QSPECIAL,NESTED>[)\]] { - yy_pop_state(); - if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT - || yy_top_state() == QWLIT) - yy_pop_state(); + yy_pop_state(yyscanner); + if (yy_top_state(yyscanner) == INITIAL + || yy_top_state(yyscanner) == QSILIT + || yy_top_state(yyscanner) == QWLIT) + yy_pop_state(yyscanner); return yytext[0]; } @@ -585,42 +588,42 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } <SPECIAL,QSPECIAL,NESTED,BRACED>\" { - yy_push_state(STRLIT); + yy_push_state(STRLIT, yyscanner); return '"'; } <SPECIAL,QSPECIAL,NESTED,BRACED>#\\ { - yy_push_state(CHRLIT); + yy_push_state(CHRLIT, yyscanner); return HASH_BACKSLASH; } <SPECIAL,QSPECIAL,NESTED,BRACED>#[/] { - yy_push_state(REGEX); + yy_push_state(REGEX, yyscanner); return HASH_SLASH; } <SPECIAL,QSPECIAL,NESTED,BRACED>` { - yy_push_state(QSILIT); + yy_push_state(QSILIT, yyscanner); return '`'; } <SPECIAL,QSPECIAL,NESTED,BRACED>#\" { - yy_push_state(WLIT); + yy_push_state(WLIT, yyscanner); return WORDS; } <SPECIAL,QSPECIAL,NESTED,BRACED>#\*\" { - yy_push_state(WLIT); + yy_push_state(WLIT, yyscanner); return WSPLICE; } <SPECIAL,QSPECIAL,NESTED,BRACED>#\` { - yy_push_state(QWLIT); + yy_push_state(QWLIT, yyscanner); return QWORDS; } <SPECIAL,QSPECIAL,NESTED,BRACED>#\*\` { - yy_push_state(QWLIT); + yy_push_state(QWLIT, yyscanner); return QWSPLICE; } @@ -629,61 +632,61 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } <NESTED,BRACED>#H { - yylval.lineno = lineno; + yylval->lineno = yyextra->lineno; return HASH_H; } <NESTED>\.\. { - yylval.lineno = lineno; + yylval->lineno = yyextra->lineno; return DOTDOT; } <SPECIAL>@ { - yy_pop_state(); - yylval.lexeme = chk_strdup(L"@"); + yy_pop_state(yyscanner); + yylval->lexeme = chk_strdup(L"@"); return TEXT; } <SPECIAL,QSPECIAL,NESTED,BRACED>\n { - lineno++; + yyextra->lineno++; } <SPECIAL,BRACED>[/] { - yy_push_state(REGEX); + yy_push_state(REGEX, yyscanner); return '/'; } <SPECIAL,QSPECIAL,NESTED>\. { - yylval.chr = '.'; + yylval->chr = '.'; return '.'; } <SPECIAL,QSPECIAL,NESTED,BRACED>[\\]\n{WS} { if (YYSTATE == SPECIAL) - yy_pop_state(); /* @\ continuation */ - lineno++; + yy_pop_state(yyscanner); /* @\ continuation */ + yyextra->lineno++; } <SPECIAL>[\\][abtnvfre ] { wchar_t lexeme[2]; lexeme[0] = char_esc(yytext[1]); lexeme[1] = 0; - yylval.lexeme = chk_strdup(lexeme); - yy_pop_state(); + yylval->lexeme = chk_strdup(lexeme); + yy_pop_state(yyscanner); return TEXT; } <SPECIAL>[\\](x{HEX}+|{OCT}+) { wchar_t lexeme[2]; - lexeme[0] = num_esc(yytext + 1); + lexeme[0] = num_esc(yyscanner, yytext + 1); lexeme[1] = 0; - yylval.lexeme = chk_strdup(lexeme); - yy_pop_state(); + yylval->lexeme = chk_strdup(lexeme); + yy_pop_state(yyscanner); return TEXT; } <SPECIAL>[\\]. { - yyerrorf(lit("unrecognized escape: \\~a"), chr(yytext[1]), nao); + yyerrorf(yyscanner, lit("unrecognized escape: \\~a"), chr(yytext[1]), nao); } <SPECIAL,QSPECIAL,NESTED,BRACED>[;].* { @@ -704,53 +707,53 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } <REGEX>[/] { - yylval.chr = '/'; + yylval->chr = '/'; return '/'; } <REGEX>[\\][abtnvfre\\ ] { - yylval.chr = char_esc(yytext[1]); + yylval->chr = char_esc(yytext[1]); return REGCHAR; } <REGEX>[\\](x{HEX}+|{OCT}+);? { - yylval.chr = num_esc(yytext + 1); + yylval->chr = num_esc(yyscanner, yytext + 1); return REGCHAR; } <REGEX>[\\][sSdDwW] { - yylval.chr = yytext[1]; + yylval->chr = yytext[1]; return REGTOKEN; } <REGEX>{WS}[\\]\n{WS} { - lineno++; + yyextra->lineno++; } <REGEX>\n { - lineno++; + yyextra->lineno++; yyerrprepf(lit("newline in regex"), nao); return ERRTOK; } <REGEX>[.*?+~&%] { - yylval.chr = yytext[0]; + yylval->chr = yytext[0]; return yytext[0]; } <REGEX>[\[\]\-] { - yylval.chr = yytext[0]; + yylval->chr = yytext[0]; return yytext[0]; } <REGEX>[()|] { - yylval.chr = yytext[0]; + yylval->chr = yytext[0]; return yytext[0]; } <REGEX>[\\]. { - yylval.chr = yytext[1]; + yylval->chr = yytext[1]; return REGCHAR; } @@ -762,7 +765,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} <REGEX>{UANYN} { wchar_t buf[8]; utf8_from(buf, yytext); - yylval.chr = buf[0]; + yylval->chr = buf[0]; return REGCHAR; } @@ -773,43 +776,43 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } <INITIAL>[ ]+ { - yylval.lexeme = utf8_dup_from(yytext); + yylval->lexeme = utf8_dup_from(yytext); return SPACE; } <INITIAL>({UONLY}|[^@\n ])+ { - yylval.lexeme = utf8_dup_from(yytext); + yylval->lexeme = utf8_dup_from(yytext); return TEXT; } <INITIAL>\n { - lineno++; + yyextra->lineno++; return '\n'; } <INITIAL>@{WS}\* { - yy_push_state(SPECIAL); + yy_push_state(SPECIAL, yyscanner); return '*'; } <INITIAL>@ { - yy_push_state(SPECIAL); + yy_push_state(SPECIAL, yyscanner); } <INITIAL>@\x01R { - yy_push_state(REGEX); + yy_push_state(REGEX, yyscanner); return SECRET_ESCAPE_R; } <INITIAL>@\x01E { - yy_push_state(SPECIAL); - yy_push_state(NESTED); + yy_push_state(SPECIAL, yyscanner); + yy_push_state(NESTED, yyscanner); return SECRET_ESCAPE_E; } <INITIAL>^@[#;].*\n { /* eat whole line comment */ - lineno++; + yyextra->lineno++; } <INITIAL>@[#;].* { @@ -817,76 +820,76 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } <STRLIT,WLIT>\" { - yy_pop_state(); + yy_pop_state(yyscanner); return yytext[0]; } <QSILIT,QWLIT>\` { - yy_pop_state(); + yy_pop_state(yyscanner); return yytext[0]; } <STRLIT,QSILIT,WLIT,QWLIT>[\\][abtnvfre "`'\\ ] { - yylval.chr = char_esc(yytext[1]); + yylval->chr = char_esc(yytext[1]); return LITCHAR; } <STRLIT,QSILIT,WLIT,QWLIT>{WS}[\\]\n{WS} { - lineno++; + yyextra->lineno++; } <STRLIT,QSILIT,WLIT,QWLIT>[\\](x{HEX}+|{OCT}+);? { - yylval.chr = num_esc(yytext+1); + yylval->chr = num_esc(yyscanner, yytext+1); return LITCHAR; } <STRLIT,QSILIT,WLIT,QWLIT>[\\]. { - yyerrorf(lit("unrecognized escape: \\~a"), chr(yytext[1]), nao); + yyerrorf(yyscanner, lit("unrecognized escape: \\~a"), chr(yytext[1]), nao); } <CHRLIT>(x{HEX}+|o{OCT}+) { - yylval.chr = num_esc(yytext); + yylval->chr = num_esc(yyscanner, yytext); return LITCHAR; } <CHRLIT>{SYM} { - yylval.lexeme = utf8_dup_from(yytext); + yylval->lexeme = utf8_dup_from(yytext); return SYMTOK; } <CHRLIT>[^ \t\n] { - yylval.lexeme = utf8_dup_from(yytext); + yylval->lexeme = utf8_dup_from(yytext); return SYMTOK; /* hack */ } <STRLIT>\n { yyerrprepf(lit("newline in string literal"), nao); - lineno++; - yylval.chr = yytext[0]; + yyextra->lineno++; + yylval->chr = yytext[0]; return ERRTOK; } <CHRLIT>\n { yyerrprepf(lit("newline in character literal"), nao); - lineno++; - yylval.chr = yytext[0]; + yyextra->lineno++; + yylval->chr = yytext[0]; return ERRTOK; } <QSILIT>\n { yyerrprepf(lit("newline in string quasiliteral"), nao); - lineno++; - yylval.chr = yytext[0]; + yyextra->lineno++; + yylval->chr = yytext[0]; return ERRTOK; } <WLIT,QWLIT>\n { - lineno++; + yyextra->lineno++; return ' '; } <QSILIT,QWLIT>@ { - yy_push_state(QSPECIAL); + yy_push_state(QSPECIAL, yyscanner); } <WLIT,QWLIT>{WS} { @@ -896,7 +899,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} <STRLIT,CHRLIT,QSILIT,WLIT,QWLIT>{UANYN} { wchar_t buf[8]; utf8_from(buf, yytext); - yylval.chr = buf[0]; + yylval->chr = buf[0]; return LITCHAR; } @@ -908,27 +911,31 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} %% -void end_of_regex(void) +void end_of_regex(yyscan_t yyscanner) { + struct yyguts_t *yyg = (struct yyguts_t *) yyscanner; + if (YYSTATE != REGEX) internal_error("end_of_regex called in wrong scanner state"); - yy_pop_state(); + yy_pop_state(yyscanner); if (YYSTATE != INITIAL) { - if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT - || yy_top_state() == QWLIT) - yy_pop_state(); + if (yy_top_state(yyscanner) == INITIAL + || yy_top_state(yyscanner) == QSILIT + || yy_top_state(yyscanner) == QWLIT) + yy_pop_state(yyscanner); } } -void end_of_char(void) +void end_of_char(yyscan_t yyscanner) { + struct yyguts_t *yyg = (struct yyguts_t *) yyscanner; + if (YYSTATE != CHRLIT) internal_error("end_of_char called in wrong scanner state"); - yy_pop_state(); + yy_pop_state(yyscanner); } val source_loc(val form) @@ -946,26 +953,22 @@ val source_loc_str(val form) void parse_init(void) { - protect(&yyin_stream, &prepared_error_message, - &form_to_ln_hash, (val *) 0); - + prot1(&form_to_ln_hash); form_to_ln_hash = make_hash(t, nil, nil); } -void parse_reset(val spec_file) +void open_txr_file(val spec_file, val *name, val *stream) { - errors = 0; - lineno = 1; - spec_file_str = spec_file; { - FILE *in = w_fopen(c_str(spec_file_str), L"r"); + FILE *in = w_fopen(c_str(spec_file), L"r"); if (in == 0) { - spec_file_str = cat_str(list(spec_file_str, lit("txr"), nao), lit(".")); - in = w_fopen(c_str(spec_file_str), L"r"); + spec_file = cat_str(list(spec_file, lit("txr"), nao), lit(".")); + in = w_fopen(c_str(spec_file), L"r"); if (in == 0) uw_throwf(file_error_s, lit("unable to open ~a"), spec_file, nao); } - yyin_stream = make_stdio_stream(in, spec_file_str); + *stream = make_stdio_stream(in, spec_file); + *name = spec_file; } } @@ -974,22 +977,22 @@ val regex_parse(val string, val error_stream) uses_or2; val parse_string = cat_str(list(lit("@\x01R"), string, nao), nil); val save_stream = std_error; - yyin_stream = make_string_byte_input_stream(parse_string); - errors = 0; - lineno = 1; + val stream = make_string_byte_input_stream(parse_string); + parser_t parser; + error_stream = default_bool_arg(error_stream); std_error = if3(error_stream == t, std_output, or2(error_stream, std_null)); + { int gc = gc_state(0); - spec_file_str = if3(std_error != std_null, - format(nil, lit("regex --> ~a"), string, nao), - lit("")); - yyparse(); - yylex_destroy(); + val name = if3(std_error != std_null, + format(nil, lit("regex --> ~a"), string, nao), + lit("")); + parse(stream, name, &parser); gc_state(gc); } std_error = save_stream; - return errors ? nil : get_spec(); + return parser.errors ? nil : parser.syntax_tree; } val lisp_parse(val source_in, val error_stream) @@ -1004,18 +1007,17 @@ val lisp_parse(val source_in, val error_stream) format(nil, lit("expr --> ~a"), source, nao), stream_get_prop(input_stream, name_k)); val save_stream = std_error; - yyin_stream = make_catenated_stream(list(secret_token_stream, input_stream, nao)); - errors = 0; - lineno = 1; + val stream = make_catenated_stream(list(secret_token_stream, input_stream, nao)); + parser_t parser; + error_stream = default_bool_arg(error_stream); std_error = if3(error_stream == t, std_output, or2(error_stream, std_null)); { int gc = gc_state(0); - spec_file_str = if3(std_error != std_null, name, lit("")); - yyparse(); - yylex_destroy(); + name = if3(std_error != std_null, name, lit("")); + parse(stream, name, &parser); gc_state(gc); } std_error = save_stream; - return errors ? nil : get_spec(); + return parser.errors ? nil : parser.syntax_tree; } @@ -47,24 +47,29 @@ #include "stream.h" #include "parser.h" -int yylex(void); -void yyerror(const char *); - -static val sym_helper(wchar_t *lexeme, val meta_allowed); +static val sym_helper(void *scnr, wchar_t *lexeme, val meta_allowed); static val repeat_rep_helper(val sym, val args, val main, val parts); static val o_elems_transform(val output_form); -static val define_transform(val define_form); +static val define_transform(void *scnr, val define_form); static val lit_char_helper(val litchars); static val optimize_text(val text_form); static val unquotes_occur(val quoted_form, int level); static val expand_meta(val form, val menv); +static val rlrec(parser_t *, val form, val line); static wchar_t char_from_name(const wchar_t *name); -static val make_expr(val sym, val rest, val lineno); +static val make_expr(parser_t *, val sym, val rest, val lineno); -static val parsed_spec; +#define rl(form, line) rlrec(parser, form, line) +#define mkexp(sym, rest, lineno) make_expr(parser, sym, rest, lineno) +#define symhlpr(lexeme, meta_allowed) sym_helper(scnr, lexeme, meta_allowed) %} +%pure-parser +%parse-param{parser_t *parser} +%parse-param{void *scnr} +%lex-param{void *scnr} + %union { wchar_t *lexeme; union obj *val; @@ -121,12 +126,12 @@ static val parsed_spec; %% -spec : clauses { parsed_spec = $1; } - | /* empty */ { parsed_spec = nil; } - | SECRET_ESCAPE_R regexpr { parsed_spec = $2; end_of_regex(); } - | SECRET_ESCAPE_E n_expr { parsed_spec = $2; YYACCEPT; } - | error '\n' { parsed_spec = nil; - if (errors >= 8) +spec : clauses { parser->syntax_tree = $1; } + | /* empty */ { parser->syntax_tree = nil; } + | SECRET_ESCAPE_R regexpr { parser->syntax_tree = $2; end_of_regex(scnr); } + | SECRET_ESCAPE_E n_expr { parser->syntax_tree = $2; YYACCEPT; } + | error '\n' { parser->syntax_tree = nil; + if (parser->errors >= 8) YYABORT; yyerrok; yybadtoken(yychar, nil); } @@ -150,7 +155,7 @@ clause : all_clause { $$ = cons($1, nil); rlcp($$, $1); } | choose_clause { $$ = cons($1, nil); rlcp($$, $1); } | collect_clause { $$ = cons($1, nil); rlcp($$, $1); } | gather_clause { $$ = cons($1, nil); rlcp($$, $1); } - | define_clause { $$ = list(define_transform($1), nao); + | define_clause { $$ = list(define_transform(scnr, $1), nao); rlcp(car($$), $1); rlcp($$, $1); } | try_clause { $$ = cons($1, nil); rlcp($$, $1); } @@ -165,7 +170,7 @@ all_clause : ALL newl clause_parts { $$ = list(all_s, $3, nao); yybadtoken(yychar, lit("all clause")); } | ALL newl END newl { $$ = nil; - yyerror("empty all clause"); } + yyerr(scnr, "empty all clause"); } ; @@ -179,7 +184,7 @@ some_clause : SOME exprs_opt ')' lit("some clause")); } | SOME exprs_opt ')' newl END newl { $$ = nil; - yyerror("empty some clause"); } + yyerr(scnr, "empty some clause"); } ; none_clause : NONE newl clause_parts { $$ = list(none_s, $3, nao); @@ -188,7 +193,7 @@ none_clause : NONE newl clause_parts { $$ = list(none_s, $3, nao); yybadtoken(yychar, lit("none clause")); } | NONE newl END newl { $$ = nil; - yyerror("empty none clause"); } + yyerr(scnr, "empty none clause"); } ; maybe_clause : MAYBE newl clause_parts { $$ = list(maybe_s, $3, nao); @@ -197,7 +202,7 @@ maybe_clause : MAYBE newl clause_parts { $$ = list(maybe_s, $3, nao); yybadtoken(yychar, lit("maybe clause")); } | MAYBE newl END newl { $$ = nil; - yyerror("empty maybe clause"); } + yyerr(scnr, "empty maybe clause"); } ; cases_clause : CASES newl clause_parts { $$ = list(cases_s, $3, nao); @@ -206,16 +211,17 @@ cases_clause : CASES newl clause_parts { $$ = list(cases_s, $3, nao); yybadtoken(yychar, lit("cases clause")); } | CASES newl END newl { $$ = nil; - yyerror("empty cases clause"); } + yyerr(scnr, "empty cases clause"); } ; block_clause : BLOCK exprs_opt ')' newl clauses_opt END newl { val name = first($2); if (gt(length($2), one)) - yyerror("block: takes zero or no arguments"); + yyerr(scnr, "block: takes zero or no arguments"); if (name && !bindable(name)) - yyerrorf(lit("block: ~s is not a bindable symbol"), + yyerrorf(scnr, + lit("block: ~s is not a bindable symbol"), name, nao); $$ = list(block_s, name, $5, nao); rl($$, num($1)); } @@ -234,7 +240,7 @@ choose_clause : CHOOSE exprs_opt ')' lit("choose clause")); } | CHOOSE exprs_opt ')' newl END newl { $$ = nil; - yyerror("empty choose clause"); } + yyerr(scnr, "empty choose clause"); } ; gather_clause : GATHER exprs_opt ')' @@ -261,7 +267,7 @@ gather_clause : GATHER exprs_opt ')' lit("gather clause")); } | GATHER exprs_opt ')' newl END newl { $$ = nil; - yyerror("empty gather clause"); } + yyerr(scnr, "empty gather clause"); } ; gather_parts : clauses additional_gather_parts { $$ = cons($1, $2); } @@ -289,7 +295,7 @@ collect_clause : collect_repeat exprs_opt ')' newl if (yychar == UNTIL || yychar == END || yychar == LAST) - yyerror("empty collect"); + yyerr(scnr, "empty collect"); else yybadtoken(yychar, lit("collect clause")); } @@ -349,22 +355,22 @@ elems : elem { $$ = cons($1, nil); | elem elems { $$ = cons($1, $2); rlcp($$, $1); } | rep_elem { $$ = nil; - yyerror("rep outside of output"); } + yyerr(scnr, "rep outside of output"); } ; -text : TEXT { $$ = rl(string_own($1), num(lineno)); } +text : TEXT { $$ = rl(string_own($1), num(parser->lineno)); } | SPACE { if ($1[0] == ' ' && $1[1] == 0) { val spaces = list(oneplus_s, chr(' '), nao); $$ = cons(regex_compile(spaces, nil), spaces); - rl($$, num(lineno)); + rl($$, num(parser->lineno)); free($1); } else - { $$ = rl(string_own($1), num(lineno)); }} + { $$ = rl(string_own($1), num(parser->lineno)); }} | regex { $$ = cons(regex_compile(rest($1), nil), rest($1)); - rl($$, num(lineno)); } + rl($$, num(parser->lineno)); } | EMPTY { $$ = null_string; } ; @@ -374,7 +380,7 @@ texts : text %prec LOW { $$ = rlcp(cons($1, nil), $1); } elem : texts { $$ = rlcp(cons(text_s, $1), $1); $$ = rlcp(optimize_text($$), $$); } - | var { $$ = rl($1, num(lineno)); } + | var { $$ = rl($1, num(parser->lineno)); } | list { val sym = first($1); if (sym == do_s || sym == require_s) $$ = rlcp(cons(sym, @@ -394,20 +400,20 @@ elem : texts { $$ = rlcp(cons(text_s, $1), $1); | COLL error { $$ = nil; yybadtoken(yychar, lit("coll clause")); } | ALL clause_parts_h { $$ = rl(list(all_s, t, $2, nao), num($1)); } - | ALL END { yyerror("empty all clause"); } + | ALL END { yyerr(scnr, "empty all clause"); } | SOME exprs_opt ')' clause_parts_h { $$ = rl(list(some_s, t, $4, $2, nao), num($1)); } - | SOME exprs_opt ')' END { yyerror("empty some clause"); } + | SOME exprs_opt ')' END { yyerr(scnr, "empty some clause"); } | NONE clause_parts_h { $$ = rl(list(none_s, t, $2, nao), num($1)); } - | NONE END { yyerror("empty none clause"); } + | NONE END { yyerr(scnr, "empty none clause"); } | MAYBE clause_parts_h { $$ = rl(list(maybe_s, t, $2, nao), num($1)); } - | MAYBE END { yyerror("empty maybe clause"); } + | MAYBE END { yyerr(scnr, "empty maybe clause"); } | CASES clause_parts_h { $$ = rl(list(cases_s, t, $2, nao), num($1)); } - | CASES END { yyerror("empty cases clause"); } + | CASES END { yyerr(scnr, "empty cases clause"); } | CHOOSE exprs_opt ')' clause_parts_h { $$ = list(choose_s, t, $4, $2, nao); rl($$, num($1)); } - | CHOOSE exprs_opt ')' END { yyerror("empty cases clause"); } + | CHOOSE exprs_opt ')' END { yyerr(scnr, "empty cases clause"); } | DEFINE exprs ')' elems END { $$ = list(define_s, t, $4, $2, nao); rl($$, num($1)); } @@ -450,7 +456,7 @@ try_clause : TRY newl error { $$ = nil; if (yychar == END || yychar == CATCH || yychar == FINALLY) - yyerror("empty try clause"); + yyerr(scnr, "empty try clause"); else yybadtoken(yychar, lit("try clause")); } | TRY newl @@ -493,7 +499,7 @@ catch_clauses_opt : CATCH ')' newl output_clause : OUTPUT ')' o_elems '\n' out_clauses END newl { $$ = nil; - yyerror("obsolete output syntax: trailing material"); } + yyerr(scnr, "obsolete output syntax: trailing material"); } | OUTPUT ')' newl END newl { $$ = rl(list(output_s, nao), num($1)); } | OUTPUT ')' newl @@ -507,8 +513,8 @@ output_clause : OUTPUT ')' o_elems '\n' | OUTPUT exprs ')' o_elems '\n' out_clauses END newl { $$ = nil; - yyerror("invalid combination of old and " - "new syntax in output directive"); } + yyerr(scnr, "invalid combination of old and " + "new syntax in output directive"); } | OUTPUT error { $$ = nil; yybadtoken(yychar, lit("list expression")); } | OUTPUT ')' o_elems '\n' @@ -583,7 +589,7 @@ o_line : o_elems_opt '\n' { $$ = $1; } ; o_elems_opt : o_elems { $$ = o_elems_transform($1); - rl($$, num(lineno)); } + rl($$, num(parser->lineno)); } | { $$ = nil; } ; @@ -594,9 +600,9 @@ o_elems : o_elem { $$ = cons($1, nil); } ; o_elem : TEXT { $$ = string_own($1); - rl($$, num(lineno)); } + rl($$, num(parser->lineno)); } | SPACE { $$ = string_own($1); - rl($$, num(lineno)); } + rl($$, num(parser->lineno)); } | o_var { $$ = $1; } | list { $$ = rlcp(cons(expr_s, expand($1, nil)), $1); } @@ -642,33 +648,33 @@ rep_parts_opt : SINGLE o_elems_opt /* This sucks, but factoring '*' into a nonterminal * that generates an empty phrase causes reduce/reduce conflicts. */ -var : SYMTOK { $$ = list(var_s, sym_helper($1, nil), nao); } - | SYMTOK elem { $$ = list(var_s, sym_helper($1, nil), +var : SYMTOK { $$ = list(var_s, symhlpr($1, nil), nao); } + | SYMTOK elem { $$ = list(var_s, symhlpr($1, nil), $2, nao); } - | '{' SYMTOK '}' { $$ = list(var_s, sym_helper($2, nil), nao); } - | '{' SYMTOK '}' elem { $$ = list(var_s, sym_helper($2, nil), + | '{' SYMTOK '}' { $$ = list(var_s, symhlpr($2, nil), nao); } + | '{' SYMTOK '}' elem { $$ = list(var_s, symhlpr($2, nil), $4, nao); } - | '{' SYMTOK modifiers '}' { $$ = list(var_s, sym_helper($2, nil), + | '{' SYMTOK modifiers '}' { $$ = list(var_s, symhlpr($2, nil), nil, $3, nao); } | '{' SYMTOK modifiers '}' elem - { $$ = list(var_s, sym_helper($2, nil), + { $$ = list(var_s, symhlpr($2, nil), $5, $3, nao); } - | var_op SYMTOK { $$ = list(var_s, sym_helper($2, nil), + | var_op SYMTOK { $$ = list(var_s, symhlpr($2, nil), nil, $1, nao); } - | var_op SYMTOK elem { $$ = list(var_s, sym_helper($2, nil), + | var_op SYMTOK elem { $$ = list(var_s, symhlpr($2, nil), $3, $1, nao); } - | var_op '{' SYMTOK '}' { $$ = list(var_s, sym_helper($3, nil), + | var_op '{' SYMTOK '}' { $$ = list(var_s, symhlpr($3, nil), nil, $1, nao); } | var_op '{' SYMTOK '}' elem - { $$ = list(var_s, sym_helper($3, nil), + { $$ = list(var_s, symhlpr($3, nil), $5, $1, nao); } | var_op '{' SYMTOK regex '}' { $$ = nil; - yyerror("longest match " - "not useable with regex"); } + yyerr(scnr, "longest match " + "not useable with regex"); } | var_op '{' SYMTOK NUMBER '}' { $$ = nil; - yyerror("longest match " - "not useable with " - "fixed width match"); } + yyerr(scnr, "longest match " + "not useable with " + "fixed width match"); } | SYMTOK error { $$ = nil; yybadtoken(yychar, lit("variable spec")); } | var_op error { $$ = nil; @@ -686,32 +692,32 @@ modifiers : NUMBER { $$ = cons($1, nil); } nil), $1); } ; -o_var : SYMTOK { $$ = list(var_s, sym_helper($1, nil), nao); - rl($$, num(lineno)); } - | SYMTOK o_elem { $$ = list(var_s, sym_helper($1, nil), +o_var : SYMTOK { $$ = list(var_s, symhlpr($1, nil), nao); + rl($$, num(parser->lineno)); } + | SYMTOK o_elem { $$ = list(var_s, symhlpr($1, nil), $2, nao); - rl($$, num(lineno)); } + rl($$, num(parser->lineno)); } | '{' expr exprs_opt '}' { $$ = list(var_s, $2, nil, $3, nao); - rl($$, num(lineno)); } + rl($$, num(parser->lineno)); } | '{' expr exprs_opt '}' o_elem { $$ = list(var_s, $2, $5, $3, nao); - rl($$, num(lineno)); } + rl($$, num(parser->lineno)); } | SYMTOK error { $$ = nil; yybadtoken(yychar, lit("variable spec")); } ; -q_var : SYMTOK { $$ = list(var_s, sym_helper($1, nil), nao); - rl($$, num(lineno)); } - | SYMTOK quasi_item { $$ = list(var_s, sym_helper($1, nil), +q_var : SYMTOK { $$ = list(var_s, symhlpr($1, nil), nao); + rl($$, num(parser->lineno)); } + | SYMTOK quasi_item { $$ = list(var_s, symhlpr($1, nil), $2, nao); - rl($$, num(lineno)); } + rl($$, num(parser->lineno)); } | '{' n_expr n_exprs_opt '}' { $$ = list(var_s, $2, nil, $3, nao); - rl($$, num(lineno)); } + rl($$, num(parser->lineno)); } | '{' n_expr n_exprs_opt '}' quasi_item { $$ = list(var_s, $2, $5, $3, nao); - rl($$, num(lineno)); } + rl($$, num(parser->lineno)); } | SYMTOK error { $$ = nil; yybadtoken(yychar, lit("variable spec")); } ; @@ -778,9 +784,9 @@ n_exprs : n_expr { $$ = rlcp(cons($1, nil), $1); } n_exprs { $$ = nappend2(rl($2, num($1)), $3); } ; -n_expr : SYMTOK { $$ = sym_helper($1, t); } +n_expr : SYMTOK { $$ = symhlpr($1, t); } | METANUM { $$ = cons(var_s, cons($1, nil)); - rl($$, num(lineno)); } + rl($$, num(parser->lineno)); } | NUMBER { $$ = $1; } | list { $$ = $1; } | vector { $$ = $1; } @@ -803,20 +809,20 @@ n_exprs_opt : n_exprs { $$ = $1; } | /* empty */ { $$ = nil; } ; -regex : '/' regexpr '/' { $$ = cons(regex_s, $2); end_of_regex(); - rl($$, num(lineno)); } +regex : '/' regexpr '/' { $$ = cons(regex_s, $2); end_of_regex(scnr); + rl($$, num(parser->lineno)); } | '/' error { $$ = nil; yybadtoken(yychar, lit("regex")); - end_of_regex(); } + end_of_regex(scnr); } ; lisp_regex : HASH_SLASH regexpr '/' - { $$ = cons(regex_s, $2); end_of_regex(); - rl($$, num(lineno)); } + { $$ = cons(regex_s, $2); end_of_regex(scnr); + rl($$, num(parser->lineno)); } | HASH_SLASH error { $$ = nil; yybadtoken(yychar, lit("regex")); - end_of_regex(); } + end_of_regex(scnr); } ; regexpr : regbranch { $$ = if3(cdr($1), @@ -899,13 +905,13 @@ regtoken : REGTOKEN { switch ($1) $$ = cword_char_k; break; }} newl : '\n' - | error '\n' { yyerror("newline expected after directive"); + | error '\n' { yyerr(scnr, "newline expected after directive"); yyerrok; } ; strlit : '"' '"' { $$ = null_string; } | '"' litchars '"' { $$ = lit_char_helper($2); - rl($$, num(lineno)); } + rl($$, num(parser->lineno)); } | '"' error { $$ = nil; yybadtoken(yychar, lit("string literal")); } ; @@ -919,12 +925,12 @@ chrlit : HASH_BACKSLASH SYMTOK { wchar_t ch; else { ch = char_from_name(cstr); if (ch == L'!') - { yyerrorf(lit("unknown character name: ~a"), + { yyerrorf(scnr, lit("unknown character name: ~a"), str, nao); }} - end_of_char(); + end_of_char(scnr); $$ = chr(ch); } | HASH_BACKSLASH LITCHAR { $$ = chr($2); - end_of_char(); } + end_of_char(scnr); } | HASH_BACKSLASH error { $$ = nil; yybadtoken(yychar, lit("character literal")); } @@ -933,29 +939,29 @@ chrlit : HASH_BACKSLASH SYMTOK { wchar_t ch; quasilit : '`' '`' { $$ = null_string; } | '`' quasi_items '`' { $$ = cons(quasi_s, o_elems_transform($2)); rlcp($$, $2); - rl($$, num(lineno)); } + rl($$, num(parser->lineno)); } | '`' error { $$ = nil; yybadtoken(yychar, lit("quasistring")); } ; quasi_items : quasi_item { $$ = cons($1, nil); - rl($$, num(lineno)); } + rl($$, num(parser->lineno)); } | quasi_item quasi_items { $$ = cons($1, $2); - rl($$, num(lineno)); } + rl($$, num(parser->lineno)); } ; quasi_item : litchars { $$ = lit_char_helper($1); } | TEXT { $$ = string_own($1); } | q_var { $$ = $1; } | METANUM { $$ = cons(var_s, cons($1, nil)); - rl($$, num(lineno)); } + rl($$, num(parser->lineno)); } | list { $$ = rlcp(cons(expr_s, $1), $1); } | ',' n_expr { $$ = rlcp(cons(expr_s, list(sys_unquote_s, $2, nao)), $2); } | SPLICE n_expr { $$ = rlcp(cons(expr_s, list(sys_splice_s, $2, nao)), $2); } ; -litchars : LITCHAR { $$ = rl(cons(chr($1), nil), num(lineno)); } - | LITCHAR litchars { $$ = rl(cons(chr($1), $2), num(lineno)); } +litchars : LITCHAR { $$ = rl(cons(chr($1), nil), num(parser->lineno)); } + | LITCHAR litchars { $$ = rl(cons(chr($1), $2), num(parser->lineno)); } ; wordslit : '"' { $$ = nil; } @@ -978,49 +984,51 @@ wordsqlit : '`' { $$ = nil; } $$ = rlcp(cons(qword, $3), $1); } ; -not_a_clause : ALL { $$ = make_expr(all_s, nil, num(lineno)); } - | SOME { $$ = make_expr(some_s, nil, num(lineno)); } - | NONE { $$ = make_expr(none_s, nil, num(lineno)); } - | MAYBE { $$ = make_expr(maybe_s, nil, num(lineno)); } - | CASES { $$ = make_expr(cases_s, nil, num(lineno)); } - | AND { $$ = make_expr(and_s, nil, num(lineno)); } - | OR { $$ = make_expr(or_s, nil, num(lineno)); } - | TRY { $$ = make_expr(try_s, nil, num(lineno)); } - | FINALLY { $$ = make_expr(finally_s, nil, num(lineno)); } - | ELSE { $$ = make_expr(intern(lit("else"), nil), - nil, num(lineno)); } - | ELIF { $$ = make_expr(intern(lit("elif"), nil), - nil, num(lineno)); } +not_a_clause : ALL { $$ = mkexp(all_s, nil, num(parser->lineno)); } + | SOME { $$ = mkexp(some_s, nil, num(parser->lineno)); } + | NONE { $$ = mkexp(none_s, nil, num(parser->lineno)); } + | MAYBE { $$ = mkexp(maybe_s, nil, num(parser->lineno)); } + | CASES { $$ = mkexp(cases_s, nil, num(parser->lineno)); } + | AND { $$ = mkexp(and_s, nil, num(parser->lineno)); } + | OR { $$ = mkexp(or_s, nil, num(parser->lineno)); } + | TRY { $$ = mkexp(try_s, nil, num(parser->lineno)); } + | FINALLY { $$ = mkexp(finally_s, nil, num(parser->lineno)); } + | ELSE { $$ = mkexp(intern(lit("else"), nil), + nil, num(parser->lineno)); } + | ELIF { $$ = mkexp(intern(lit("elif"), nil), + nil, num(parser->lineno)); } | BLOCK - exprs_opt ')' { $$ = make_expr(block_s, $2, nil); } + exprs_opt ')' { $$ = mkexp(block_s, $2, nil); } | CHOOSE - exprs_opt ')' { $$ = make_expr(choose_s, $2, nil); } + exprs_opt ')' { $$ = mkexp(choose_s, $2, nil); } | COLLECT - exprs_opt ')' { $$ = make_expr(collect_s, $2, nil); } + exprs_opt ')' { $$ = mkexp(collect_s, $2, nil); } | COLL - exprs_opt ')' { $$ = make_expr(coll_s, $2, nil); } + exprs_opt ')' { $$ = mkexp(coll_s, $2, nil); } | GATHER - exprs_opt ')' { $$ = make_expr(gather_s, $2, nil); } + exprs_opt ')' { $$ = mkexp(gather_s, $2, nil); } | DEFINE - exprs_opt ')' { $$ = make_expr(define_s, $2, nil); } + exprs_opt ')' { $$ = mkexp(define_s, $2, nil); } | CATCH - exprs_opt ')' { $$ = make_expr(catch_s, $2, nil); } + exprs_opt ')' { $$ = mkexp(catch_s, $2, nil); } | IF - exprs_opt ')' { $$ = make_expr(intern(lit("if"), nil), + exprs_opt ')' { $$ = mkexp(intern(lit("if"), nil), $2, nil); } | OUTPUT - exprs_opt ')' { yyerror("@(output) doesn't nest"); } + exprs_opt ')' { yyerr(scnr, "@(output) doesn't nest"); } ; %% +int yylex(YYSTYPE *, void *scanner); + /* C99 inline instantiations. */ #if __STDC_VERSION__ >= 199901L val rlcp(val to, val from); #endif -static val sym_helper(wchar_t *lexeme, val meta_allowed) +static val sym_helper(void *scnr, wchar_t *lexeme, val meta_allowed) { int leading_at = *lexeme == L'@'; wchar_t *tokfree = lexeme; @@ -1030,7 +1038,7 @@ static val sym_helper(wchar_t *lexeme, val meta_allowed) if (leading_at) { if (!meta_allowed) { val tok = string_own(lexeme); - yyerrorf(lit("~a: meta variable not allowed in this context"), tok, nao); + yyerrorf(scnr, lit("~a: meta variable not allowed in this context"), tok, nao); return nil; } lexeme++; @@ -1049,7 +1057,7 @@ static val sym_helper(wchar_t *lexeme, val meta_allowed) sym_name = string(colon + 1); free(tokfree); if (!package) { - yyerrorf(lit("~a:~a: package ~a not found"), pkg_name, sym_name, pkg_name, nao); + yyerrorf(scnr, lit("~a:~a: package ~a not found"), pkg_name, sym_name, pkg_name, nao); return nil; } } else { @@ -1139,7 +1147,7 @@ static val o_elems_transform(val o_elems) return rlcp(o_elems_out, o_elems); } -static val define_transform(val define_form) +static val define_transform(void *scnr, val define_form) { val sym = first(define_form); val args = second(define_form); @@ -1150,29 +1158,29 @@ static val define_transform(val define_form) assert (sym == define_s); if (args == nil) { - yyerror("define requires arguments"); + yyerr(scnr, "define requires arguments"); return define_form; } if (!consp(args) || !listp(cdr(args))) { - yyerror("bad define argument syntax"); + yyerr(scnr, "bad define argument syntax"); return define_form; } else { val name = first(args); val params = second(args); if (!symbolp(name)) { - yyerror("function name must be a symbol"); + yyerr(scnr, "function name must be a symbol"); return define_form; } if (!proper_listp(params)) { - yyerror("invalid function parameter list"); + yyerr(scnr, "invalid function parameter list"); return define_form; } if (!all_satisfy(params, func_n1(symbolp), nil)) - yyerror("function parameters must be symbols"); + yyerr(scnr, "function parameters must be symbols"); } return define_form; @@ -1268,9 +1276,9 @@ val rlset(val form, val info) return form; } -val rl(val form, val lineno) +val rlrec(parser_t *parser, val form, val line) { - rlset(form, cons(lineno, spec_file_str)); + rlset(form, cons(line, parser->name)); return form; } @@ -1316,7 +1324,7 @@ static wchar_t char_from_name(const wchar_t *name) return L'!'; /* code meaning not found */ } -static val make_expr(val sym, val rest, val lineno) +static val make_expr(parser_t *parser, val sym, val rest, val lineno) { val expr = cons(sym, rest); val ret = cons(expr_s, expr); @@ -1332,11 +1340,6 @@ static val make_expr(val sym, val rest, val lineno) return ret; } -val get_spec(void) -{ - return parsed_spec; -} - #ifndef YYEOF #define YYEOF 0 #endif @@ -1413,3 +1416,24 @@ void yybadtoken(int tok, val context) else yyerrorf(lit("unexpected ~s"), chr(tok), nao); } + +int parse(val stream, val name, parser_t *parser) +{ + int res; + + parser->lineno = 1; + parser->errors = 0; + parser->stream = stream; + parser->name = name; + parser->prepared_msg = nil; + parser->syntax_tree = nil; + yylex_init(&parser->scanner); + + yyset_extra(parser, parser->scanner); + + res = yyparse(parser, parser->scanner); + + yylex_destroy(parser->scanner); + + return res; +} @@ -301,17 +301,14 @@ int txr_main(int argc, char **argv) val spec_file = nil; val bindings = nil; val evaled = nil; + val spec_file_str; int match_loglevel = opt_loglevel; val arg_undo = nil, arg; + val parse_stream = std_input; list_collect_decl(arg_list, arg_tail); - prot1(&spec_file_str); - setvbuf(stderr, 0, _IOLBF, 0); - - yyin_stream = std_input; - if (argc <= 1) { hint(); return EXIT_FAILURE; @@ -519,7 +516,7 @@ int txr_main(int argc, char **argv) if (gt(length_str(specstring), zero) && chr_str(specstring, minus(length_str(specstring), one)) != chr('\n')) specstring = cat_str(list(specstring, string(L"\n"), nao), nil); - yyin_stream = make_string_byte_input_stream(specstring); + parse_stream = make_string_byte_input_stream(specstring); if (arg) arg_list = arg_undo; } else if (spec_file) { @@ -527,7 +524,7 @@ int txr_main(int argc, char **argv) FILE *in = w_fopen(c_str(spec_file), L"r"); if (in == 0) uw_throwf(file_error_s, lit("unable to open ~a"), spec_file, nao); - yyin_stream = make_stdio_stream(in, spec_file); + parse_stream = make_stdio_stream(in, spec_file); spec_file_str = spec_file; } else { spec_file_str = lit("stdin"); @@ -546,7 +543,7 @@ int txr_main(int argc, char **argv) FILE *in = w_fopen(c_str(arg), L"r"); if (in == 0) uw_throwf(file_error_s, lit("unable to open ~a"), arg, nao); - yyin_stream = make_stdio_stream(in, arg); + parse_stream = make_stdio_stream(in, arg); spec_file_str = arg; } else { spec_file_str = lit("stdin"); @@ -557,14 +554,14 @@ int txr_main(int argc, char **argv) { int gc = gc_state(0); - yyparse(); - yylex_destroy(); + parser_t parser; + parse(parse_stream, spec_file_str, &parser); gc_state(gc); - if (errors) + if (parser.errors) return EXIT_FAILURE; - spec = remove_hash_bang_line(get_spec()); + spec = remove_hash_bang_line(parser.syntax_tree); opt_loglevel = match_loglevel; @@ -577,7 +574,7 @@ int txr_main(int argc, char **argv) { int retval = extract(spec, arg_list, bindings); - return errors ? EXIT_FAILURE : retval; + return parser.errors ? EXIT_FAILURE : retval; } } } |