Handle non-UTF-8 byte in regex scanned from string.

The current behavior is that there is no lex rule for this, so such a byte gets echoed. parser.l (grammar): Add fallback rule to match one byte in SREGEX state and turn it into 0xDCxx character.
author: Kaz Kylheku <kaz@kylheku.com> 2016-04-21 06:57:42 -0700
committer: Kaz Kylheku <kaz@kylheku.com> 2016-04-21 06:57:42 -0700
commit: ef8fe557841c440bf9e3e13ee0801bc127091b7e (patch)
tree: 5f351a20c31d7a9e8d4c832cd2a536b6cc9aa89f
parent: 96ca2e8a7799a2b820e6e3e6727f3fe15d56c061 (diff)
download: txr-ef8fe557841c440bf9e3e13ee0801bc127091b7e.tar.gz
txr-ef8fe557841c440bf9e3e13ee0801bc127091b7e.tar.bz2
txr-ef8fe557841c440bf9e3e13ee0801bc127091b7e.zip
1 files changed, 6 insertions, 0 deletions
diff --git a/parser.l b/parser.l
index d87e03eb..9ac79228 100644
--- a/parser.l
+++ b/parser.l
@@ -872,6 +872,12 @@ UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
   return REGCHAR;
 }
 
+<SREGEX>. {
+  /* Allow non-UTF-8 byte for regexes scanned from string */
+  yylval->chr = (unsigned char) yytext[0] + 0xDC00;
+  return REGCHAR;
+}
+
 <REGEX>. {
   yyerrprepf(yyg, lit("non-UTF-8 byte in regex: '\\x~02x'"),
              num(convert(unsigned char, yytext[0])), nao);
author	Kaz Kylheku <kaz@kylheku.com>	2016-04-21 06:57:42 -0700
committer	Kaz Kylheku <kaz@kylheku.com>	2016-04-21 06:57:42 -0700
commit	ef8fe557841c440bf9e3e13ee0801bc127091b7e (patch)
tree	5f351a20c31d7a9e8d4c832cd2a536b6cc9aa89f
parent	96ca2e8a7799a2b820e6e3e6727f3fe15d56c061 (diff)
download	txr-ef8fe557841c440bf9e3e13ee0801bc127091b7e.tar.gz txr-ef8fe557841c440bf9e3e13ee0801bc127091b7e.tar.bz2 txr-ef8fe557841c440bf9e3e13ee0801bc127091b7e.zip