diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2021-05-28 06:52:26 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2021-05-28 06:52:26 -0700 |
commit | 8bc5fc7a77eb1a6707f3c742235ab38ca210f55e (patch) | |
tree | bd5f91229ca61ff30dad2868b64c836a421468f1 /parser.l | |
parent | c0c5e8836a89c8439a675bbd52d6fed134792477 (diff) | |
download | txr-8bc5fc7a77eb1a6707f3c742235ab38ca210f55e.tar.gz txr-8bc5fc7a77eb1a6707f3c742235ab38ca210f55e.tar.bz2 txr-8bc5fc7a77eb1a6707f3c742235ab38ca210f55e.zip |
json: handling for bad UTF-8 bytes, NUL and \u0000.
* parser.l <JLIT>: Convert \u+0000 sequence to U+DC00
code point, the pseudo-null. Also include JLIT
in in the rule for catching bad bytes that are not
matched by {UANYN}.
* txr.1: Document this treatment as extensions to JSON.
* lex.yy.c.shipped: Updated.
Diffstat (limited to 'parser.l')
-rw-r--r-- | parser.l | 5 |
1 files changed, 3 insertions, 2 deletions
@@ -1121,7 +1121,8 @@ NJPUNC [^(){},:\[\]"~*^ \t\n] } <JLIT>[\\]u{HEX}{4} { - yylval->chr = num_esc(yyg, yytext + 1); + wchar_t ch = num_esc(yyg, yytext + 1); + yylval->chr = if3(ch, ch, 0xDC00); return LITCHAR; } @@ -1171,7 +1172,7 @@ NJPUNC [^(){},:\[\]"~*^ \t\n] chr(yytext[0]), nao); } -<STRLIT,CHRLIT,QSILIT,WLIT,QWLIT>. { +<STRLIT,CHRLIT,QSILIT,WLIT,QWLIT,JLIT>. { yylval->chr = convert(unsigned char, yytext[0]) + 0xDC00; return LITCHAR; } |