summaryrefslogtreecommitdiffstats
path: root/parser.l
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2015-09-16 07:29:35 -0700
committerKaz Kylheku <kaz@kylheku.com>2015-09-16 07:29:35 -0700
commit49a8cbbca600bb587216cb6b114c4fef2f762e99 (patch)
treeccca3c3fcb8203be547f643bed9b9ef390ea55fe /parser.l
parent15346886504761400e6db3930b74cc59adfce6f9 (diff)
downloadtxr-49a8cbbca600bb587216cb6b114c4fef2f762e99.tar.gz
txr-49a8cbbca600bb587216cb6b114c4fef2f762e99.tar.bz2
txr-49a8cbbca600bb587216cb6b114c4fef2f762e99.zip
syntax: be tolerant of carriage returns.
This is needed for multi-line mode with CR line breaks. It also makes TXR tolerant when code is ported among systems with different line endings. * parser.l (NL): New lex named pattern, matching three possible line terminators: CR, NL or CR-NL. (grammar): In places where \n was previously matched, use {NL}. In a few places where \n is in a character class, add \r. In one place (comment matching), the the pattern . which implicitly doesn't match newlines had to be replaced with [^\r\n].
Diffstat (limited to 'parser.l')
-rw-r--r--parser.l31
1 files changed, 16 insertions, 15 deletions
diff --git a/parser.l b/parser.l
index 087a3db4..79c23eec 100644
--- a/parser.l
+++ b/parser.l
@@ -207,6 +207,7 @@ NTKEY @?:{NT2}?
NTOK {NTREG}|{NTKEY}
WS [\t ]*
REQWS [\t ]+
+NL (\n|\r|\r\n)
HEX [0-9A-Fa-f]
OCT [0-7]
@@ -700,7 +701,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return TEXT;
}
-<SPECIAL,QSPECIAL,NESTED,BRACED>\n {
+<SPECIAL,QSPECIAL,NESTED,BRACED>{NL} {
yyextra->lineno++;
}
@@ -724,7 +725,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return '.';
}
-<SPECIAL,QSPECIAL,NESTED,BRACED>[\\]\n{WS} {
+<SPECIAL,QSPECIAL,NESTED,BRACED>[\\]{NL}{WS} {
if (YYSTATE == SPECIAL)
yy_pop_state(yyscanner); /* @\ continuation */
yyextra->lineno++;
@@ -759,7 +760,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
yyerrorf(yyg, lit("unrecognized escape: \\~a"), chr(yytext[1]), nao);
}
-<SPECIAL,QSPECIAL,NESTED,BRACED>[;].* {
+<SPECIAL,QSPECIAL,NESTED,BRACED>[;][^\n\r]* {
/* comment */
}
@@ -795,11 +796,11 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return REGTOKEN;
}
-<REGEX,SREGEX>{WS}[\\]\n{WS} {
+<REGEX,SREGEX>{WS}[\\]{NL}{WS} {
yyextra->lineno++;
}
-<REGEX,SREGEX>\n {
+<REGEX,SREGEX>{NL} {
yyextra->lineno++;
yyerrprepf(yyg, lit("newline in regex"), nao);
return ERRTOK;
@@ -848,12 +849,12 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return SPACE;
}
-<INITIAL>({UONLY}|[^@\n ])+ {
+<INITIAL>({UONLY}|[^@\n\r ])+ {
yylval->lexeme = utf8_dup_from(yytext);
return TEXT;
}
-<INITIAL>\n {
+<INITIAL>{NL} {
yyextra->lineno++;
return '\n';
}
@@ -867,7 +868,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
yy_push_state(SPECIAL, yyscanner);
}
-<INITIAL>^@[#;].*\n {
+<INITIAL>^@[#;].*{NL} {
/* eat whole line comment */
yyextra->lineno++;
}
@@ -896,11 +897,11 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return LITCHAR;
}
-<STRLIT,QSILIT>{WS}[\\]\n{WS} {
+<STRLIT,QSILIT>{WS}[\\]{NL}{WS} {
yyextra->lineno++;
}
-<STRLIT,QSILIT,WLIT,QWLIT>{WS}[\\]\n{WS} {
+<STRLIT,QSILIT,WLIT,QWLIT>{WS}[\\]{NL}{WS} {
yyextra->lineno++;
if (!opt_compat || opt_compat > 109)
@@ -927,33 +928,33 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return SYMTOK;
}
-<CHRLIT>[^ \t\n] {
+<CHRLIT>[^ \t\n\r] {
yylval->lexeme = utf8_dup_from(yytext);
return SYMTOK; /* hack */
}
-<STRLIT>\n {
+<STRLIT>{NL} {
yyerrprepf(yyg, lit("newline in string literal"), nao);
yyextra->lineno++;
yylval->chr = yytext[0];
return ERRTOK;
}
-<CHRLIT>\n {
+<CHRLIT>{NL} {
yyerrprepf(yyg, lit("newline in character literal"), nao);
yyextra->lineno++;
yylval->chr = yytext[0];
return ERRTOK;
}
-<QSILIT>\n {
+<QSILIT>{NL} {
yyerrprepf(yyg, lit("newline in string quasiliteral"), nao);
yyextra->lineno++;
yylval->chr = yytext[0];
return ERRTOK;
}
-<WLIT,QWLIT>\n {
+<WLIT,QWLIT>{NL} {
yyextra->lineno++;
if (opt_compat && opt_compat <= 109)