diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2016-04-21 06:57:42 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2016-04-21 06:57:42 -0700 |
commit | ef8fe557841c440bf9e3e13ee0801bc127091b7e (patch) | |
tree | 5f351a20c31d7a9e8d4c832cd2a536b6cc9aa89f | |
parent | 96ca2e8a7799a2b820e6e3e6727f3fe15d56c061 (diff) | |
download | txr-ef8fe557841c440bf9e3e13ee0801bc127091b7e.tar.gz txr-ef8fe557841c440bf9e3e13ee0801bc127091b7e.tar.bz2 txr-ef8fe557841c440bf9e3e13ee0801bc127091b7e.zip |
Handle non-UTF-8 byte in regex scanned from string.
The current behavior is that there is no lex rule for this, so such a
byte gets echoed.
parser.l (grammar): Add fallback rule to match one byte
in SREGEX state and turn it into 0xDCxx character.
-rw-r--r-- | parser.l | 6 |
1 files changed, 6 insertions, 0 deletions
@@ -872,6 +872,12 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return REGCHAR; } +<SREGEX>. { + /* Allow non-UTF-8 byte for regexes scanned from string */ + yylval->chr = (unsigned char) yytext[0] + 0xDC00; + return REGCHAR; +} + <REGEX>. { yyerrprepf(yyg, lit("non-UTF-8 byte in regex: '\\x~02x'"), num(convert(unsigned char, yytext[0])), nao); |