parser: allow non-UTF-8 bytes in literals and regexes.

* parser.l (grammar): Just like we do in SREGEX, allow an arbitrary byte in REGEX, mapping it to the DCxx range. Do the same inside string literals of all types. * lex.yy.c.shipped: Updated. * tests/012/parse.tl: New tests.
author: Kaz Kylheku <kaz@kylheku.com> 2021-04-08 21:25:58 -0700
committer: Kaz Kylheku <kaz@kylheku.com> 2021-04-08 21:25:58 -0700
commit: 60cd468d75eb0fef11a26238ddc3588b7c7b2f15 (patch)
tree: c08ecfbf6929101a4e077a8a39db4dff96d47616 /parser.l
parent: 9218460118760c2e4f3910b6c7ee73e0e644a401 (diff)
download: txr-60cd468d75eb0fef11a26238ddc3588b7c7b2f15.tar.gz
txr-60cd468d75eb0fef11a26238ddc3588b7c7b2f15.tar.bz2
txr-60cd468d75eb0fef11a26238ddc3588b7c7b2f15.zip
1 files changed, 3 insertions, 11 deletions
diff --git a/parser.l b/parser.l
index d7e53c49..98cdf344 100644
--- a/parser.l
+++ b/parser.l
@@ -945,18 +945,11 @@ UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
   return REGCHAR;
 }
 
-<SREGEX>. {
-  /* Allow non-UTF-8 byte for regexes scanned from string */
+<SREGEX,REGEX>. {
   yylval->chr = convert(unsigned char, yytext[0]) + 0xDC00;
   return REGCHAR;
 }
 
-<REGEX>. {
-  yyerrprepf(yyg, lit("non-UTF-8 byte in regex: '\\x~02x'"),
-             num(convert(unsigned char, yytext[0])), nao);
-  return ERRTOK;
-}
-
 <INITIAL>[ ]+ {
   yylval->lexeme = utf8_dup_from(yytext);
   return SPACE;
@@ -1128,9 +1121,8 @@ UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
 }
 
 <STRLIT,CHRLIT,QSILIT,WLIT,QWLIT>. {
-  yyerrprepf(yyg, lit("non-UTF-8 byte in literal: '\\x~02x'"),
-             num(convert(unsigned char, yytext[0])), nao);
-  return ERRTOK;
+  yylval->chr = convert(unsigned char, yytext[0]) + 0xDC00;
+  return LITCHAR;
 }
 
 %%
author	Kaz Kylheku <kaz@kylheku.com>	2021-04-08 21:25:58 -0700
committer	Kaz Kylheku <kaz@kylheku.com>	2021-04-08 21:25:58 -0700
commit	60cd468d75eb0fef11a26238ddc3588b7c7b2f15 (patch)
tree	c08ecfbf6929101a4e077a8a39db4dff96d47616 /parser.l
parent	9218460118760c2e4f3910b6c7ee73e0e644a401 (diff)
download	txr-60cd468d75eb0fef11a26238ddc3588b7c7b2f15.tar.gz txr-60cd468d75eb0fef11a26238ddc3588b7c7b2f15.tar.bz2 txr-60cd468d75eb0fef11a26238ddc3588b7c7b2f15.zip