From 8bc5fc7a77eb1a6707f3c742235ab38ca210f55e Mon Sep 17 00:00:00 2001 From: Kaz Kylheku <kaz@kylheku.com> Date: Fri, 28 May 2021 06:52:26 -0700 Subject: json: handling for bad UTF-8 bytes, NUL and \u0000. * parser.l <JLIT>: Convert \u+0000 sequence to U+DC00 code point, the pseudo-null. Also include JLIT in in the rule for catching bad bytes that are not matched by {UANYN}. * txr.1: Document this treatment as extensions to JSON. * lex.yy.c.shipped: Updated. --- parser.l | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'parser.l') diff --git a/parser.l b/parser.l index fab8dd9c..86472c03 100644 --- a/parser.l +++ b/parser.l @@ -1121,7 +1121,8 @@ NJPUNC [^(){},:\[\]"~*^ \t\n] } <JLIT>[\\]u{HEX}{4} { - yylval->chr = num_esc(yyg, yytext + 1); + wchar_t ch = num_esc(yyg, yytext + 1); + yylval->chr = if3(ch, ch, 0xDC00); return LITCHAR; } @@ -1171,7 +1172,7 @@ NJPUNC [^(){},:\[\]"~*^ \t\n] chr(yytext[0]), nao); } -<STRLIT,CHRLIT,QSILIT,WLIT,QWLIT>. { +<STRLIT,CHRLIT,QSILIT,WLIT,QWLIT,JLIT>. { yylval->chr = convert(unsigned char, yytext[0]) + 0xDC00; return LITCHAR; } -- cgit v1.2.3