summaryrefslogtreecommitdiffstats
path: root/parser.l
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2015-08-15 08:41:30 -0700
committerKaz Kylheku <kaz@kylheku.com>2015-08-15 08:41:30 -0700
commit6742e3e96b3387bbea484c7278305cab1bd5397e (patch)
treef0fcf43a263172806aefb0b327d415d9c3dafb04 /parser.l
parent11ab30bd76f3cc28b364f79398229212e9537de1 (diff)
downloadtxr-6742e3e96b3387bbea484c7278305cab1bd5397e.tar.gz
txr-6742e3e96b3387bbea484c7278305cab1bd5397e.tar.bz2
txr-6742e3e96b3387bbea484c7278305cab1bd5397e.zip
Allow slashes in regex passed to regex-parse.
* parser.l (SREGEX): New start state, for stand-alone regex parsing. (grammar): All REGEX state rules are active in the SREGEX state also. The rule for the / character returns a REGCHAR if in the SREGEX state, so it is treated as an ordinary character. * txr.1: Updated regex-parse documentation about the treatment of the slash. Also added notes about double escaping when a string literal is passed to regex-parse.
Diffstat (limited to 'parser.l')
-rw-r--r--parser.l31
1 files changed, 15 insertions, 16 deletions
diff --git a/parser.l b/parser.l
index 66a51cfc..af838a63 100644
--- a/parser.l
+++ b/parser.l
@@ -218,7 +218,7 @@ UANY {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
UANYN {ASCN}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
-%x SPECIAL BRACED NESTED REGEX STRLIT CHRLIT QSILIT QSPECIAL WLIT QWLIT
+%x SPECIAL BRACED NESTED REGEX SREGEX STRLIT CHRLIT QSILIT QSPECIAL WLIT QWLIT
%%
@@ -765,48 +765,47 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return ERRTOK;
}
-<REGEX>[/] {
+<REGEX,SREGEX>[/] {
yylval->chr = '/';
- return '/';
+ return (YYSTATE == SREGEX) ? REGCHAR : '/';
}
-
-<REGEX>[\\][abtnvfre\\ ] {
+<REGEX,SREGEX>[\\][abtnvfre\\ ] {
yylval->chr = char_esc(yytext[1]);
return REGCHAR;
}
-<REGEX>[\\](x{HEX}+|{OCT}+);? {
+<REGEX,SREGEX>[\\](x{HEX}+|{OCT}+);? {
yylval->chr = num_esc(yyg, yytext + 1);
return REGCHAR;
}
-<REGEX>[\\][sSdDwW] {
+<REGEX,SREGEX>[\\][sSdDwW] {
yylval->chr = yytext[1];
return REGTOKEN;
}
-<REGEX>{WS}[\\]\n{WS} {
+<REGEX,SREGEX>{WS}[\\]\n{WS} {
yyextra->lineno++;
}
-<REGEX>\n {
+<REGEX,SREGEX>\n {
yyextra->lineno++;
yyerrprepf(yyg, lit("newline in regex"), nao);
return ERRTOK;
}
-<REGEX>{REGOP} {
+<REGEX,SREGEX>{REGOP} {
yylval->chr = yytext[0];
return yytext[0];
}
-<REGEX>[\\]{REGOP} {
+<REGEX,SREGEX>[\\]{REGOP} {
yylval->chr = yytext[1];
return REGCHAR;
}
-<REGEX>[\\]. {
+<REGEX,SREGEX>[\\]. {
if (opt_compat && opt_compat <= 105) {
yylval->chr = yytext[1];
return REGCHAR;
@@ -816,12 +815,12 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return ERRTOK;
}
-<REGEX>[\\] {
+<REGEX,SREGEX>[\\] {
yyerrprepf(yyg, lit("dangling backslash in regex"), nao);
return ERRTOK;
}
-<REGEX>{UANYN} {
+<REGEX,SREGEX>{UANYN} {
wchar_t buf[8];
utf8_from(buf, yytext);
yylval->chr = buf[0];
@@ -986,7 +985,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
void end_of_regex(scanner_t *yyg)
{
- if (YYSTATE != REGEX)
+ if (YYSTATE != REGEX && YYSTATE != SREGEX)
internal_error("end_of_regex called in wrong scanner state");
yy_pop_state(yyg);
@@ -1050,7 +1049,7 @@ void prime_scanner(scanner_t *yyg, enum prime_parser prim)
yy_push_state(NESTED, yyg);
break;
case prime_regex:
- yy_push_state(REGEX, yyg);
+ yy_push_state(SREGEX, yyg);
break;
}
}