From 49a8cbbca600bb587216cb6b114c4fef2f762e99 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Wed, 16 Sep 2015 07:29:35 -0700 Subject: syntax: be tolerant of carriage returns. This is needed for multi-line mode with CR line breaks. It also makes TXR tolerant when code is ported among systems with different line endings. * parser.l (NL): New lex named pattern, matching three possible line terminators: CR, NL or CR-NL. (grammar): In places where \n was previously matched, use {NL}. In a few places where \n is in a character class, add \r. In one place (comment matching), the the pattern . which implicitly doesn't match newlines had to be replaced with [^\r\n]. --- parser.l | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/parser.l b/parser.l index 087a3db4..79c23eec 100644 --- a/parser.l +++ b/parser.l @@ -207,6 +207,7 @@ NTKEY @?:{NT2}? NTOK {NTREG}|{NTKEY} WS [\t ]* REQWS [\t ]+ +NL (\n|\r|\r\n) HEX [0-9A-Fa-f] OCT [0-7] @@ -700,7 +701,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return TEXT; } -\n { +{NL} { yyextra->lineno++; } @@ -724,7 +725,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return '.'; } -[\\]\n{WS} { +[\\]{NL}{WS} { if (YYSTATE == SPECIAL) yy_pop_state(yyscanner); /* @\ continuation */ yyextra->lineno++; @@ -759,7 +760,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} yyerrorf(yyg, lit("unrecognized escape: \\~a"), chr(yytext[1]), nao); } -[;].* { +[;][^\n\r]* { /* comment */ } @@ -795,11 +796,11 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return REGTOKEN; } -{WS}[\\]\n{WS} { +{WS}[\\]{NL}{WS} { yyextra->lineno++; } -\n { +{NL} { yyextra->lineno++; yyerrprepf(yyg, lit("newline in regex"), nao); return ERRTOK; @@ -848,12 +849,12 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return SPACE; } -({UONLY}|[^@\n ])+ { +({UONLY}|[^@\n\r ])+ { yylval->lexeme = utf8_dup_from(yytext); return TEXT; } -\n { +{NL} { yyextra->lineno++; return '\n'; } @@ -867,7 +868,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} yy_push_state(SPECIAL, yyscanner); } -^@[#;].*\n { +^@[#;].*{NL} { /* eat whole line comment */ yyextra->lineno++; } @@ -896,11 +897,11 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return LITCHAR; } -{WS}[\\]\n{WS} { +{WS}[\\]{NL}{WS} { yyextra->lineno++; } -{WS}[\\]\n{WS} { +{WS}[\\]{NL}{WS} { yyextra->lineno++; if (!opt_compat || opt_compat > 109) @@ -927,33 +928,33 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return SYMTOK; } -[^ \t\n] { +[^ \t\n\r] { yylval->lexeme = utf8_dup_from(yytext); return SYMTOK; /* hack */ } -\n { +{NL} { yyerrprepf(yyg, lit("newline in string literal"), nao); yyextra->lineno++; yylval->chr = yytext[0]; return ERRTOK; } -\n { +{NL} { yyerrprepf(yyg, lit("newline in character literal"), nao); yyextra->lineno++; yylval->chr = yytext[0]; return ERRTOK; } -\n { +{NL} { yyerrprepf(yyg, lit("newline in string quasiliteral"), nao); yyextra->lineno++; yylval->chr = yytext[0]; return ERRTOK; } -\n { +{NL} { yyextra->lineno++; if (opt_compat && opt_compat <= 109) -- cgit v1.2.3