summaryrefslogtreecommitdiffstats
path: root/parser.l
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2019-02-05 08:16:34 -0800
committerKaz Kylheku <kaz@kylheku.com>2019-02-05 08:16:34 -0800
commit0c364bffbc87487ea32ec49d000cb84164fe6135 (patch)
tree14e5b15fc363fbf97707d5ecfa5afa9efa6b3b8e /parser.l
parent5725cb7635e93a0c09d6af97881c75f907edd225 (diff)
downloadtxr-0c364bffbc87487ea32ec49d000cb84164fe6135.tar.gz
txr-0c364bffbc87487ea32ec49d000cb84164fe6135.tar.bz2
txr-0c364bffbc87487ea32ec49d000cb84164fe6135.zip
parser: security: UTF-8 and NUL handling in literals.
A null byte in regex and string literals is being processed as a #\nul instead of correctly turning into #\pnul. Bad UTF-8 is not being rejected. * parser.l (REGCHAR, LITCHAR): Use utf8_from_buffer to properly convert yytext using its true length, rather than utf8_from which assumes a null-terminated string. Thus null bytes (including the case of a yytext being single NUL) are handled properly. Check that the result is exactly one character (null-terminated buffer, two characters wide). * utf8.c (utf8_from): Unused function removed. * utf8.h (utf8_from): Declaration removed.
Diffstat (limited to 'parser.l')
-rw-r--r--parser.l20
1 files changed, 14 insertions, 6 deletions
diff --git a/parser.l b/parser.l
index da2f8116..7a9d8d8b 100644
--- a/parser.l
+++ b/parser.l
@@ -903,9 +903,13 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
}
<REGEX,SREGEX>{UANYN} {
- wchar_t buf[8];
- utf8_from(buf, yytext);
- yylval->chr = buf[0];
+ wchar_t wchr[8];
+ if (utf8_from_buf(wchr, coerce(unsigned char *, yytext), yyleng) != 2) {
+ yyerrprepf(yyg, lit("non-UTF-8 byte in regex: '\\x~02x'"),
+ num(convert(unsigned char, yytext[0])), nao);
+ return ERRTOK;
+ }
+ yylval->chr = wchr[0];
return REGCHAR;
}
@@ -1057,9 +1061,13 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
}
<STRLIT,CHRLIT,QSILIT,WLIT,QWLIT>{UANYN} {
- wchar_t buf[8];
- utf8_from(buf, yytext);
- yylval->chr = buf[0];
+ wchar_t wchr[8];
+ if (utf8_from_buf(wchr, coerce(unsigned char *, yytext), yyleng) != 2) {
+ yyerrprepf(yyg, lit("non-UTF-8 byte in literal: '\\x~02x'"),
+ num(convert(unsigned char, yytext[0])), nao);
+ return ERRTOK;
+ }
+ yylval->chr = wchr[0];
return LITCHAR;
}