summaryrefslogtreecommitdiffstats
path: root/parser.l
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2021-05-28 06:52:26 -0700
committerKaz Kylheku <kaz@kylheku.com>2021-05-28 06:52:26 -0700
commit8bc5fc7a77eb1a6707f3c742235ab38ca210f55e (patch)
treebd5f91229ca61ff30dad2868b64c836a421468f1 /parser.l
parentc0c5e8836a89c8439a675bbd52d6fed134792477 (diff)
downloadtxr-8bc5fc7a77eb1a6707f3c742235ab38ca210f55e.tar.gz
txr-8bc5fc7a77eb1a6707f3c742235ab38ca210f55e.tar.bz2
txr-8bc5fc7a77eb1a6707f3c742235ab38ca210f55e.zip
json: handling for bad UTF-8 bytes, NUL and \u0000.
* parser.l <JLIT>: Convert \u+0000 sequence to U+DC00 code point, the pseudo-null. Also include JLIT in in the rule for catching bad bytes that are not matched by {UANYN}. * txr.1: Document this treatment as extensions to JSON. * lex.yy.c.shipped: Updated.
Diffstat (limited to 'parser.l')
-rw-r--r--parser.l5
1 files changed, 3 insertions, 2 deletions
diff --git a/parser.l b/parser.l
index fab8dd9c..86472c03 100644
--- a/parser.l
+++ b/parser.l
@@ -1121,7 +1121,8 @@ NJPUNC [^(){},:\[\]"~*^ \t\n]
}
<JLIT>[\\]u{HEX}{4} {
- yylval->chr = num_esc(yyg, yytext + 1);
+ wchar_t ch = num_esc(yyg, yytext + 1);
+ yylval->chr = if3(ch, ch, 0xDC00);
return LITCHAR;
}
@@ -1171,7 +1172,7 @@ NJPUNC [^(){},:\[\]"~*^ \t\n]
chr(yytext[0]), nao);
}
-<STRLIT,CHRLIT,QSILIT,WLIT,QWLIT>. {
+<STRLIT,CHRLIT,QSILIT,WLIT,QWLIT,JLIT>. {
yylval->chr = convert(unsigned char, yytext[0]) + 0xDC00;
return LITCHAR;
}