From f50d67df2057c2cd1d26668f47bc604338ce33f0 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Wed, 13 Jan 2010 18:03:52 -0800 Subject: Bugfix: allow unescaped / to be used in regex character classes. --- ChangeLog | 22 ++++++++++++++++++++++ parser.h | 1 + parser.l | 14 +++++++++----- parser.y | 6 ++++-- 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index 00158574..116fc67d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,25 @@ +2010-01-13 Kaz Kylheku + + Bugfix: allow unescaped / to be used in regex character classes. + + To do this, we no longer make the lexer look for the terminating + slash which ends the regex syntax. This is driven by the parser, + which calls a special function in the lexer to indicate that + the regex parsing is done. + + * parser.h (end_of_regex): New function declared. + + * parser.l (REGCLASS): Unused start condition removed. + (grammar): A slash character in the REGEX start condition is now simply + returned as an operator token; no popping of the state stack takes + place. The scanner stays in REGEX mode. + (end_of_regex): New function. + + * parser.y (regex): Call end_of_regex when a regex is successfully + scanned through to terminating slash, or if a syntax error occurs. + (regchar): Can derive a / terminal now, thus including it in a + regex character class. + 2010-01-13 Kaz Kylheku * parser.y (precedence): bugfix: character classes like this [^*] diff --git a/parser.h b/parser.h index a5066f32..e4f712b9 100644 --- a/parser.h +++ b/parser.h @@ -35,3 +35,4 @@ val get_spec(void); void yyerror(const char *s); void yyerrorf(val s, ...); void yybadtoken(int tok, val context); +void end_of_regex(void); diff --git a/parser.l b/parser.l index e0c0f2d5..d6840a9d 100644 --- a/parser.l +++ b/parser.l @@ -187,7 +187,7 @@ UANY {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} UANYN {ASCN}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} -%x SPECIAL NESTED REGEX REGCLASS STRLIT CHRLIT QSILIT +%x SPECIAL NESTED REGEX STRLIT CHRLIT QSILIT %% @@ -420,10 +420,6 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } [/] { - yy_pop_state(); - if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) - yy_pop_state(); yylval.chr = '/'; return '/'; } @@ -567,3 +563,11 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } %% + +void end_of_regex(void) +{ + yy_pop_state(); + if (yy_top_state() == INITIAL + || yy_top_state() == QSILIT) + yy_pop_state(); +} diff --git a/parser.y b/parser.y index e8be187d..a7773c08 100644 --- a/parser.y +++ b/parser.y @@ -449,9 +449,10 @@ expr : IDENT { $$ = intern(string_own($1), nil); } | quasilit { $$ = $1; } ; -regex : '/' regexpr '/' { $$ = $2; } +regex : '/' regexpr '/' { $$ = $2; end_of_regex(); } | '/' error { $$ = nil; - yybadtoken(yychar, lit("regex")); } + yybadtoken(yychar, lit("regex")); + end_of_regex(); } ; regexpr : regbranch { $$ = if3(cdr($1), @@ -504,6 +505,7 @@ regchar : '?' { $$ = '?'; } | ')' { $$ = ')'; } | '^' { $$ = '^'; } | '|' { $$ = '|'; } + | '/' { $$ = '/'; } | REGCHAR { $$ = $1; } ; -- cgit v1.2.3