From f50d67df2057c2cd1d26668f47bc604338ce33f0 Mon Sep 17 00:00:00 2001
From: Kaz Kylheku <kaz@kylheku.com>
Date: Wed, 13 Jan 2010 18:03:52 -0800
Subject: Bugfix: allow unescaped / to be used in regex character classes.

---
 ChangeLog | 22 ++++++++++++++++++++++
 parser.h  |  1 +
 parser.l  | 14 +++++++++-----
 parser.y  |  6 ++++--
 4 files changed, 36 insertions(+), 7 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 00158574..116fc67d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,25 @@
+2010-01-13  Kaz Kylheku  <kkylheku@gmail.com>
+
+	Bugfix: allow unescaped / to be used in regex character classes.
+
+	To do this, we no longer make the lexer look for the terminating
+	slash which ends the regex syntax. This is driven by the parser,
+	which calls a special function in the lexer to indicate that
+	the regex parsing is done.
+
+	* parser.h (end_of_regex): New function declared.
+
+	* parser.l (REGCLASS): Unused start condition removed.
+	(grammar): A slash character in the REGEX start condition is now simply
+	returned as an operator token; no popping of the state stack takes
+	place. The scanner stays in REGEX mode.
+	(end_of_regex): New function.
+
+	* parser.y (regex): Call end_of_regex when a regex is successfully
+	scanned through to terminating slash, or if a syntax error occurs.
+	(regchar): Can derive a / terminal now, thus including it in a
+	regex character class.
+
 2010-01-13  Kaz Kylheku  <kkylheku@gmail.com>
 
 	* parser.y (precedence): bugfix: character classes like this [^*]
diff --git a/parser.h b/parser.h
index a5066f32..e4f712b9 100644
--- a/parser.h
+++ b/parser.h
@@ -35,3 +35,4 @@ val get_spec(void);
 void yyerror(const char *s);
 void yyerrorf(val s, ...);
 void yybadtoken(int tok, val context);
+void end_of_regex(void);
diff --git a/parser.l b/parser.l
index e0c0f2d5..d6840a9d 100644
--- a/parser.l
+++ b/parser.l
@@ -187,7 +187,7 @@ UANY    {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
 UANYN   {ASCN}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
 UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
 
-%x      SPECIAL NESTED REGEX REGCLASS STRLIT CHRLIT QSILIT
+%x      SPECIAL NESTED REGEX STRLIT CHRLIT QSILIT
 
 %%
 
@@ -420,10 +420,6 @@ UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
                         }
 
 <REGEX>[/]      {
-                  yy_pop_state();
-                  if (yy_top_state() == INITIAL
-                      || yy_top_state() == QSILIT)
-                    yy_pop_state();
                   yylval.chr = '/';
                   return '/';
                 }
@@ -567,3 +563,11 @@ UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
                         }
 
 %%
+
+void end_of_regex(void)
+{
+  yy_pop_state();
+  if (yy_top_state() == INITIAL
+      || yy_top_state() == QSILIT)
+    yy_pop_state();
+}
diff --git a/parser.y b/parser.y
index e8be187d..a7773c08 100644
--- a/parser.y
+++ b/parser.y
@@ -449,9 +449,10 @@ expr : IDENT                    { $$ = intern(string_own($1), nil); }
      | quasilit                 { $$ = $1; }
      ;
 
-regex : '/' regexpr '/'         { $$ = $2; }
+regex : '/' regexpr '/'         { $$ = $2; end_of_regex(); }
       | '/' error               { $$ = nil;
-                                  yybadtoken(yychar, lit("regex")); }
+                                  yybadtoken(yychar, lit("regex"));
+                                  end_of_regex(); }
       ;
 
 regexpr : regbranch                     { $$ = if3(cdr($1), 
@@ -504,6 +505,7 @@ regchar : '?'                   { $$ = '?'; }
         | ')'                   { $$ = ')'; }
         | '^'                   { $$ = '^'; }
         | '|'                   { $$ = '|'; }
+        | '/'                   { $$ = '/'; }
         | REGCHAR               { $$ = $1; }
         ;
 
-- 
cgit v1.2.3