summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--parser.l10
-rw-r--r--parser.y10
-rw-r--r--tests/012/parse.expected0
-rw-r--r--tests/012/parse.tl7
4 files changed, 20 insertions, 7 deletions
diff --git a/parser.l b/parser.l
index 778c632a..d7e53c49 100644
--- a/parser.l
+++ b/parser.l
@@ -938,9 +938,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
<REGEX,SREGEX>{UANYN} {
wchar_t wchr[8];
if (utf8_from_buf(wchr, coerce(unsigned char *, yytext), yyleng) != 2) {
- yyerrprepf(yyg, lit("non-UTF-8 byte in regex: '\\x~02x'"),
- num(convert(unsigned char, yytext[0])), nao);
- return ERRTOK;
+ yylval->lexeme = chk_strdup(wchr);
+ return TEXT;
}
yylval->chr = wchr[0];
return REGCHAR;
@@ -1100,9 +1099,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
<STRLIT,CHRLIT,QSILIT,WLIT,QWLIT>{UANYN} {
wchar_t wchr[8];
if (utf8_from_buf(wchr, coerce(unsigned char *, yytext), yyleng) != 2) {
- yyerrprepf(yyg, lit("non-UTF-8 byte in literal: '\\x~02x'"),
- num(convert(unsigned char, yytext[0])), nao);
- return ERRTOK;
+ yylval->lexeme = chk_strdup(wchr);
+ return TEXT;
}
yylval->chr = wchr[0];
return LITCHAR;
diff --git a/parser.y b/parser.y
index 7e5b898d..98c2aa5c 100644
--- a/parser.y
+++ b/parser.y
@@ -1187,6 +1187,7 @@ regterm : regterm '*' { $$ = list(zeroplus_s, $1, nao); }
| '-' { $$ = chr('-'); }
| REGCHAR { $$ = chr($1); }
| regtoken { $$ = $1; }
+ | TEXT { $$ = list(compound_s, string_own($1), nao); }
| '(' regexpr ')' { $$ = $2; }
| '(' error { $$ = nil;
yybadtok(yychar, lit("regex subexpression")); }
@@ -1258,6 +1259,10 @@ chrlit : HASH_BACKSLASH SYMTOK { wchar_t ch;
$$ = chr(ch); }
| HASH_BACKSLASH LITCHAR { $$ = chr($2);
end_of_char(scnr); }
+ | HASH_BACKSLASH TEXT { free($2);
+ yyerrorf(scnr,
+ lit("invalid UTF-8 used as character name"),
+ nao); }
| HASH_BACKSLASH error { $$ = nil;
yybadtok(yychar,
lit("character literal")); }
@@ -1278,7 +1283,6 @@ quasi_items : quasi_item { $$ = cons($1, nil);
;
quasi_item : litchars { $$ = $1; }
- | TEXT { $$ = string_own($1); }
| q_var { $$ = $1; }
| METANUM { $$ = cons(var_s, cons($1, nil));
rl($$, num(parser->lineno)); }
@@ -1292,10 +1296,14 @@ quasi_item : litchars { $$ = $1; }
litchars : LITCHAR { $$ = mkstring(one, chr($1)); }
| LITCHAR restlitchar { val ch = mkstring(one, chr($1));
$$ = string_extend(ch, $2); }
+ | TEXT { $$ = string_own($1); }
+ | TEXT restlitchar { $$ = string_extend(string_own($1), $2); }
;
restlitchar : LITCHAR { $$ = mkstring(one, chr($1)); }
| restlitchar LITCHAR { $$ = string_extend($1, chr($2)); }
+ | TEXT { $$ = string_own($1); }
+ | restlitchar TEXT { $$ = string_extend($1, string_own($2)); }
;
wordslit : '"' { $$ = nil; }
diff --git a/tests/012/parse.expected b/tests/012/parse.expected
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/tests/012/parse.expected
diff --git a/tests/012/parse.tl b/tests/012/parse.tl
new file mode 100644
index 00000000..8e3e7afc
--- /dev/null
+++ b/tests/012/parse.tl
@@ -0,0 +1,7 @@
+(load "../common")
+
+(test (read `"@(str-buf #b'EDB081')"`)
+ "\xDCED\xDCB0\xDC81")
+
+(test (regex-parse (str-buf #b'EDB081'))
+ (compound "\xDCED\xDCB0\xDC81"))