diff options
-rw-r--r-- | ChangeLog | 26 | ||||
-rw-r--r-- | parser.l | 94 | ||||
-rw-r--r-- | parser.y | 73 | ||||
-rw-r--r-- | stream.c | 12 | ||||
-rw-r--r-- | stream.h | 1 |
5 files changed, 141 insertions, 65 deletions
@@ -1,5 +1,31 @@ 2011-10-26 Kaz Kylheku <kaz@kylheku.com> + Parse error handling improvements. + + * parser.l (prepared_error_message): New static variable. + (yyerror): Emit and clear prepared error message. + (yyerrprepf): New static function. + (yybadtoken): Function moved into parser.y. + (grammar): For irrecoverable lexical errors, stash error message + with yyerrprepf and return the special error token ERRTOK to generate a + syntax error. I could find no other interface to the parser to make it + cleanly exit. + + * parser.y (ERRTOK): New terminal symbol, does not appear anywhere + in the grammar. + (spec): Bail after 8 errors, recover to nearest newline, and + use yyerrok to clear error situation. + (YYEOF): Provided by Bison, conditionally defined for other yacc-s. + (yybadtoken): Function moved from parser.l. Checks for the next + token being YYEMPTY or YYEOF, and also handles ERRTOK. + + * stream.c (vformat_to_string): New function. + (format): If stream is nil, format to string and return it. + + * stream.h (vformat_to_string): Declared. + +2011-10-26 Kaz Kylheku <kaz@kylheku.com> + * match.c (v_cat): Bugfix: unterminated variable argument list. * tests/001/query-3.txr: Updated to new cat syntax. @@ -68,9 +68,15 @@ int opt_arraydims = 1; int errors; +static val prepared_error_message; + void yyerror(const char *s) { yyerrorf(lit("~a"), string_utf8(s), nao); + if (prepared_error_message) { + yyerrorf(lit("~a"), prepared_error_message, nao); + prepared_error_message = nil; + } } void yyerrorf(val fmt, ...) @@ -87,55 +93,14 @@ void yyerrorf(val fmt, ...) errors++; } -void yybadtoken(int tok, val context) +static void yyerrprepf(val fmt, ...) { - val problem = nil; - - switch (tok) { - case SPACE: problem = lit("space"); break; - case TEXT: problem = lit("text"); break; - case IDENT: problem = lit("identifier"); break; - case KEYWORD: problem = lit("keyword"); break; - case METAVAR: problem = lit("metavar"); break; - case ALL: problem = lit("\"all\""); break; - case SOME: problem = lit("\"some\""); break; - case NONE: problem = lit("\"none\""); break; - case MAYBE: problem = lit("\"maybe\""); break; - case CASES: problem = lit("\"cases\""); break; - case CHOOSE: problem = lit("\"choose\""); break; - case AND: problem = lit("\"and\""); break; - case OR: problem = lit("\"or\""); break; - case END: problem = lit("\"end\""); break; - case COLLECT: problem = lit("\"collect\""); break; - case UNTIL: problem = lit("\"until\""); break; - case COLL: problem = lit("\"coll\""); break; - case OUTPUT: problem = lit("\"output\""); break; - case REPEAT: problem = lit("\"repeat\""); break; - case REP: problem = lit("\"rep\""); break; - case SINGLE: problem = lit("\"single\""); break; - case FIRST: problem = lit("\"first\""); break; - case LAST: problem = lit("\"last\""); break; - case EMPTY: problem = lit("\"empty\""); break; - case DEFINE: problem = lit("\"define\""); break; - case TRY: problem = lit("\"try\""); break; - case CATCH: problem = lit("\"catch\""); break; - case FINALLY: problem = lit("\"finally\""); break; - case NUMBER: problem = lit("\"number\""); break; - case REGCHAR: problem = lit("regular expression character"); break; - case LITCHAR: problem = lit("string literal character"); break; - case METAPAR: problem = lit("@("); break; + if (opt_loglevel >= 1) { + va_list vl; + va_start (vl, fmt); + prepared_error_message = vformat_to_string(fmt, vl); + va_end (vl); } - - if (problem != 0) - if (context) - yyerrorf(lit("misplaced ~a in ~a"), problem, context, nao); - else - yyerrorf(lit("unexpected ~a"), problem, nao); - else - if (context) - yyerrorf(lit("unterminated ~a"), context, nao); - else - yyerrorf(lit("unexpected end of input"), nao); } static wchar_t char_esc(int letter) @@ -444,14 +409,16 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } <SPECIAL,NESTED>{UANYN} { - yyerrorf(lit("bad character in directive: '~a'"), - string_utf8(yytext), nao); + yyerrprepf(lit("bad character in directive: '~a'"), + string_utf8(yytext), nao); + return ERRTOK; } <SPECIAL,NESTED>. { - yyerrorf(lit("non-UTF-8 byte in directive: " - "'\\x~02x'"), + yyerrprepf(lit("non-UTF-8 byte in directive: " + "'\\x~02x'"), num((unsigned char) yytext[0]), nao); + return ERRTOK; } <REGEX>[/] { @@ -476,7 +443,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} <REGEX>\n { lineno++; - yyerror("newline in regex"); + yyerrprepf(lit("newline in regex"), nao); + return ERRTOK; } <REGEX>[.*?+~&%] { @@ -508,8 +476,9 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } <REGEX>. { - yyerrorf(lit("non-UTF-8 byte in regex: '\\x~02x'"), - num((unsigned char) yytext[0]), nao); + yyerrprepf(lit("non-UTF-8 byte in regex: '\\x~02x'"), + num((unsigned char) yytext[0]), nao); + return ERRTOK; } <INITIAL>[ ]+ { @@ -574,24 +543,24 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return LITCHAR; } <STRLIT>\n { - yyerror("newline in string literal"); + yyerrprepf(lit("newline in string literal"), nao); lineno++; yylval.chr = yytext[0]; - return LITCHAR; + return ERRTOK; } <CHRLIT>\n { - yyerror("newline in character literal"); + yyerrprepf(lit("newline in character literal"), nao); lineno++; yylval.chr = yytext[0]; - return LITCHAR; + return ERRTOK; } <QSILIT>\n { - yyerror("newline in string quasiliteral"); + yyerrprepf(lit("newline in string quasiliteral"), nao); lineno++; yylval.chr = yytext[0]; - return LITCHAR; + return ERRTOK; } <QSILIT>@ { @@ -606,8 +575,9 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } <STRLIT,CHRLIT,QSILIT>. { - yyerrorf(lit("non-UTF-8 byte in regex: '\\x~02x'"), - num((unsigned char) yytext[0]), nao); + yyerrprepf(lit("non-UTF-8 byte in regex: '\\x~02x'"), + num((unsigned char) yytext[0]), nao); + return ERRTOK; } %% @@ -63,6 +63,7 @@ static val parsed_spec; %token <lexeme> AND OR END COLLECT %token <lexeme> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE %token <lexeme> TRY CATCH FINALLY +%token <lexeme> ERRTOK %token <num> NUMBER @@ -99,8 +100,12 @@ static val parsed_spec; spec : clauses { parsed_spec = $1; } | /* empty */ { parsed_spec = nil; } - | error { parsed_spec = nil; + | error '\n' { parsed_spec = nil; + if (errors >= 8) + YYABORT; + yyerrok; yybadtoken(yychar, nil); } + ; clauses : clause { $$ = cons($1, nil); } @@ -778,3 +783,69 @@ val get_spec(void) { return parsed_spec; } + +#ifndef YYEOF +#define YYEOF YYEMPTY +#endif + +void yybadtoken(int tok, val context) +{ + val problem = nil; + + switch (tok) { + case ERRTOK: + return; + case SPACE: problem = lit("space"); break; + case TEXT: problem = lit("text"); break; + case IDENT: problem = lit("identifier"); break; + case KEYWORD: problem = lit("keyword"); break; + case METAVAR: problem = lit("metavar"); break; + case ALL: problem = lit("\"all\""); break; + case SOME: problem = lit("\"some\""); break; + case NONE: problem = lit("\"none\""); break; + case MAYBE: problem = lit("\"maybe\""); break; + case CASES: problem = lit("\"cases\""); break; + case CHOOSE: problem = lit("\"choose\""); break; + case AND: problem = lit("\"and\""); break; + case OR: problem = lit("\"or\""); break; + case END: problem = lit("\"end\""); break; + case COLLECT: problem = lit("\"collect\""); break; + case UNTIL: problem = lit("\"until\""); break; + case COLL: problem = lit("\"coll\""); break; + case OUTPUT: problem = lit("\"output\""); break; + case REPEAT: problem = lit("\"repeat\""); break; + case REP: problem = lit("\"rep\""); break; + case SINGLE: problem = lit("\"single\""); break; + case FIRST: problem = lit("\"first\""); break; + case LAST: problem = lit("\"last\""); break; + case EMPTY: problem = lit("\"empty\""); break; + case DEFINE: problem = lit("\"define\""); break; + case TRY: problem = lit("\"try\""); break; + case CATCH: problem = lit("\"catch\""); break; + case FINALLY: problem = lit("\"finally\""); break; + case NUMBER: problem = lit("\"number\""); break; + case REGCHAR: problem = lit("regular expression character"); break; + case LITCHAR: problem = lit("string literal character"); break; + case METAPAR: problem = lit("@("); break; + } + + if (problem != 0) + if (context) + yyerrorf(lit("misplaced ~a in ~a"), problem, context, nao); + else + yyerrorf(lit("unexpected ~a"), problem, nao); + else + if (context) + if (tok == YYEOF || tok == YYEMPTY) + yyerrorf(lit("unterminated ~a"), context, nao); + else + yyerrorf(lit("misplaced ~s in ~a"), chr(tok), context, nao); + else + if (tok == YYEOF) + yyerrorf(lit("unexpected end of input"), nao); + else if (tok == YYEMPTY) + return; + else + yyerrorf(lit("unexpected ~s"), chr(tok), nao); +} + @@ -998,8 +998,16 @@ toobig: internal_error("ridiculous precision or field width in format"); } +val vformat_to_string(val fmtstr, va_list vl) +{ + val stream = make_string_output_stream(); + (void) vformat(stream, fmtstr, vl); + return get_string_from_stream(stream); +} + val format(val stream, val str, ...) { + val st = or2(stream, make_string_output_stream()); type_check (stream, COBJ); type_assert (stream->co.cls == stream_s, (lit("~a is not a stream"), stream, nao)); @@ -1008,9 +1016,9 @@ val format(val stream, val str, ...) va_list vl; val ret; va_start (vl, str); - ret = vformat(stream, str, vl); + ret = vformat(st, str, vl); va_end (vl); - return ret; + return (stream) ? ret : get_string_from_stream(st); } } @@ -40,6 +40,7 @@ val get_line(val); val get_char(val); val get_byte(val); val vformat(val stream, val string, va_list); +val vformat_to_string(val string, va_list); val format(val stream, val string, ...); val put_string(val stream, val string); val put_line(val stream, val string); |