diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2021-04-08 21:25:58 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2021-04-08 21:25:58 -0700 |
commit | 60cd468d75eb0fef11a26238ddc3588b7c7b2f15 (patch) | |
tree | c08ecfbf6929101a4e077a8a39db4dff96d47616 /parser.l | |
parent | 9218460118760c2e4f3910b6c7ee73e0e644a401 (diff) | |
download | txr-60cd468d75eb0fef11a26238ddc3588b7c7b2f15.tar.gz txr-60cd468d75eb0fef11a26238ddc3588b7c7b2f15.tar.bz2 txr-60cd468d75eb0fef11a26238ddc3588b7c7b2f15.zip |
parser: allow non-UTF-8 bytes in literals and regexes.
* parser.l (grammar): Just like we do in SREGEX, allow an
arbitrary byte in REGEX, mapping it to the DCxx range.
Do the same inside string literals of all types.
* lex.yy.c.shipped: Updated.
* tests/012/parse.tl: New tests.
Diffstat (limited to 'parser.l')
-rw-r--r-- | parser.l | 14 |
1 files changed, 3 insertions, 11 deletions
@@ -945,18 +945,11 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return REGCHAR; } -<SREGEX>. { - /* Allow non-UTF-8 byte for regexes scanned from string */ +<SREGEX,REGEX>. { yylval->chr = convert(unsigned char, yytext[0]) + 0xDC00; return REGCHAR; } -<REGEX>. { - yyerrprepf(yyg, lit("non-UTF-8 byte in regex: '\\x~02x'"), - num(convert(unsigned char, yytext[0])), nao); - return ERRTOK; -} - <INITIAL>[ ]+ { yylval->lexeme = utf8_dup_from(yytext); return SPACE; @@ -1128,9 +1121,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } <STRLIT,CHRLIT,QSILIT,WLIT,QWLIT>. { - yyerrprepf(yyg, lit("non-UTF-8 byte in literal: '\\x~02x'"), - num(convert(unsigned char, yytext[0])), nao); - return ERRTOK; + yylval->chr = convert(unsigned char, yytext[0]) + 0xDC00; + return LITCHAR; } %% |