diff options
-rw-r--r-- | parser.l | 20 | ||||
-rw-r--r-- | utf8.c | 6 | ||||
-rw-r--r-- | utf8.h | 1 |
3 files changed, 14 insertions, 13 deletions
@@ -903,9 +903,13 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } <REGEX,SREGEX>{UANYN} { - wchar_t buf[8]; - utf8_from(buf, yytext); - yylval->chr = buf[0]; + wchar_t wchr[8]; + if (utf8_from_buf(wchr, coerce(unsigned char *, yytext), yyleng) != 2) { + yyerrprepf(yyg, lit("non-UTF-8 byte in regex: '\\x~02x'"), + num(convert(unsigned char, yytext[0])), nao); + return ERRTOK; + } + yylval->chr = wchr[0]; return REGCHAR; } @@ -1057,9 +1061,13 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } <STRLIT,CHRLIT,QSILIT,WLIT,QWLIT>{UANYN} { - wchar_t buf[8]; - utf8_from(buf, yytext); - yylval->chr = buf[0]; + wchar_t wchr[8]; + if (utf8_from_buf(wchr, coerce(unsigned char *, yytext), yyleng) != 2) { + yyerrprepf(yyg, lit("non-UTF-8 byte in literal: '\\x~02x'"), + num(convert(unsigned char, yytext[0])), nao); + return ERRTOK; + } + yylval->chr = wchr[0]; return LITCHAR; } @@ -139,12 +139,6 @@ size_t utf8_from_buf(wchar_t *wdst, const unsigned char *src, size_t nbytes) return nchar; } -size_t utf8_from(wchar_t *wdst, const char *src) -{ - size_t nbytes = strlen(src); - return utf8_from_buf(wdst, coerce(const unsigned char *, src), nbytes); -} - size_t utf8_to_buf(unsigned char *dst, const wchar_t *wsrc, int null_term) { size_t nbyte = 0; @@ -26,7 +26,6 @@ */ size_t utf8_from_buf(wchar_t *, const unsigned char *, size_t nbytes); -size_t utf8_from(wchar_t *, const char *); size_t utf8_to_buf(unsigned char *dst, const wchar_t *wsrc, int null_term); size_t utf8_to(char *, const wchar_t *); wchar_t *utf8_dup_from(const char *); |