diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2019-02-05 08:16:34 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2019-02-05 08:16:34 -0800 |
commit | 0c364bffbc87487ea32ec49d000cb84164fe6135 (patch) | |
tree | 14e5b15fc363fbf97707d5ecfa5afa9efa6b3b8e /parser.l | |
parent | 5725cb7635e93a0c09d6af97881c75f907edd225 (diff) | |
download | txr-0c364bffbc87487ea32ec49d000cb84164fe6135.tar.gz txr-0c364bffbc87487ea32ec49d000cb84164fe6135.tar.bz2 txr-0c364bffbc87487ea32ec49d000cb84164fe6135.zip |
parser: security: UTF-8 and NUL handling in literals.
A null byte in regex and string literals is being processed as
a #\nul instead of correctly turning into #\pnul. Bad UTF-8 is
not being rejected.
* parser.l (REGCHAR, LITCHAR): Use utf8_from_buffer to
properly convert yytext using its true length, rather than
utf8_from which assumes a null-terminated string. Thus
null bytes (including the case of a yytext being single NUL)
are handled properly. Check that the result is exactly one
character (null-terminated buffer, two characters wide).
* utf8.c (utf8_from): Unused function removed.
* utf8.h (utf8_from): Declaration removed.
Diffstat (limited to 'parser.l')
-rw-r--r-- | parser.l | 20 |
1 files changed, 14 insertions, 6 deletions
@@ -903,9 +903,13 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } <REGEX,SREGEX>{UANYN} { - wchar_t buf[8]; - utf8_from(buf, yytext); - yylval->chr = buf[0]; + wchar_t wchr[8]; + if (utf8_from_buf(wchr, coerce(unsigned char *, yytext), yyleng) != 2) { + yyerrprepf(yyg, lit("non-UTF-8 byte in regex: '\\x~02x'"), + num(convert(unsigned char, yytext[0])), nao); + return ERRTOK; + } + yylval->chr = wchr[0]; return REGCHAR; } @@ -1057,9 +1061,13 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } <STRLIT,CHRLIT,QSILIT,WLIT,QWLIT>{UANYN} { - wchar_t buf[8]; - utf8_from(buf, yytext); - yylval->chr = buf[0]; + wchar_t wchr[8]; + if (utf8_from_buf(wchr, coerce(unsigned char *, yytext), yyleng) != 2) { + yyerrprepf(yyg, lit("non-UTF-8 byte in literal: '\\x~02x'"), + num(convert(unsigned char, yytext[0])), nao); + return ERRTOK; + } + yylval->chr = wchr[0]; return LITCHAR; } |