summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog22
-rw-r--r--parser.h1
-rw-r--r--parser.l14
-rw-r--r--parser.y6
4 files changed, 36 insertions, 7 deletions
diff --git a/ChangeLog b/ChangeLog
index 00158574..116fc67d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,27 @@
2010-01-13 Kaz Kylheku <kkylheku@gmail.com>
+ Bugfix: allow unescaped / to be used in regex character classes.
+
+ To do this, we no longer make the lexer look for the terminating
+ slash which ends the regex syntax. This is driven by the parser,
+ which calls a special function in the lexer to indicate that
+ the regex parsing is done.
+
+ * parser.h (end_of_regex): New function declared.
+
+ * parser.l (REGCLASS): Unused start condition removed.
+ (grammar): A slash character in the REGEX start condition is now simply
+ returned as an operator token; no popping of the state stack takes
+ place. The scanner stays in REGEX mode.
+ (end_of_regex): New function.
+
+ * parser.y (regex): Call end_of_regex when a regex is successfully
+ scanned through to terminating slash, or if a syntax error occurs.
+ (regchar): Can derive a / terminal now, thus including it in a
+ regex character class.
+
+2010-01-13 Kaz Kylheku <kkylheku@gmail.com>
+
* parser.y (precedence): bugfix: character classes like this [^*]
being treated as a non-complemented set of two characters.
diff --git a/parser.h b/parser.h
index a5066f32..e4f712b9 100644
--- a/parser.h
+++ b/parser.h
@@ -35,3 +35,4 @@ val get_spec(void);
void yyerror(const char *s);
void yyerrorf(val s, ...);
void yybadtoken(int tok, val context);
+void end_of_regex(void);
diff --git a/parser.l b/parser.l
index e0c0f2d5..d6840a9d 100644
--- a/parser.l
+++ b/parser.l
@@ -187,7 +187,7 @@ UANY {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
UANYN {ASCN}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
-%x SPECIAL NESTED REGEX REGCLASS STRLIT CHRLIT QSILIT
+%x SPECIAL NESTED REGEX STRLIT CHRLIT QSILIT
%%
@@ -420,10 +420,6 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
}
<REGEX>[/] {
- yy_pop_state();
- if (yy_top_state() == INITIAL
- || yy_top_state() == QSILIT)
- yy_pop_state();
yylval.chr = '/';
return '/';
}
@@ -567,3 +563,11 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
}
%%
+
+void end_of_regex(void)
+{
+ yy_pop_state();
+ if (yy_top_state() == INITIAL
+ || yy_top_state() == QSILIT)
+ yy_pop_state();
+}
diff --git a/parser.y b/parser.y
index e8be187d..a7773c08 100644
--- a/parser.y
+++ b/parser.y
@@ -449,9 +449,10 @@ expr : IDENT { $$ = intern(string_own($1), nil); }
| quasilit { $$ = $1; }
;
-regex : '/' regexpr '/' { $$ = $2; }
+regex : '/' regexpr '/' { $$ = $2; end_of_regex(); }
| '/' error { $$ = nil;
- yybadtoken(yychar, lit("regex")); }
+ yybadtoken(yychar, lit("regex"));
+ end_of_regex(); }
;
regexpr : regbranch { $$ = if3(cdr($1),
@@ -504,6 +505,7 @@ regchar : '?' { $$ = '?'; }
| ')' { $$ = ')'; }
| '^' { $$ = '^'; }
| '|' { $$ = '|'; }
+ | '/' { $$ = '/'; }
| REGCHAR { $$ = $1; }
;