summaryrefslogtreecommitdiffstats
path: root/parser.l
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2021-05-26 22:48:52 -0700
committerKaz Kylheku <kaz@kylheku.com>2021-05-26 22:48:52 -0700
commit2df469c3cda02c07f17e65cdaae5d9d16dc6b978 (patch)
treeba88b0ceff362bf8f544fbac9bd389eda7da486a /parser.l
parent644a28827acc238aefcd907820eed3378d41e3eb (diff)
downloadtxr-2df469c3cda02c07f17e65cdaae5d9d16dc6b978.tar.gz
txr-2df469c3cda02c07f17e65cdaae5d9d16dc6b978.tar.bz2
txr-2df469c3cda02c07f17e65cdaae5d9d16dc6b978.zip
New #J syntax for JSON objects in TXR Lisp.
(needs buffer literal error message cleanup) * parser.c (json_s): New symbol variable. (is_balanced_line): Follow braces out of initial state. This concession allows the listener to accept input like #J{"a":"b"}. (me_json): New static function (macro expander). The #J X syntax produces a (json Y) form, with the JSON syntax X translated to a Lisp object Y. If that is evaluated, this macro translates it to (quote Y). (parse_init): initialize json_s variable with interned symbol, and register the json macro. * parser.h (json_s): Declared. (end_of_json): Declared. * parser.l (num_esc): Treat u escape sequences in the same way as x. This function can then be used for handling the \u escapes in JSON string literals. (DIG19, JNUM, JPUNC, NJPUNC): New lex named patterns. (JSON, JLIT): New lex start conditions. (grammar): Recognize #J syntax, mapping to HASH_J token, which transitions into JSON start state. In JSON start state, handle all the elements: numbers, keywords, arrays and objects. Transition into JLIT state. In JLIT start state, handle all the elements of JSON string literals, including surrogate pair escapes. JSON literals share the fallback {UANY} fallback patter with other literals. (end_of_jason): New function. * parser.y (HASH_J, JSKW): New token symbols. (json, json_val, json_vals, json_pairs): New nonterminal symbols, and rules. (i_expr, n_expr): Generate json nonterminal, to hook the stuff into the grammar. (yybadtoken): Handle JKSW and HASH_J tokens. * lex.yy.c.shipped, y.tab.c.shipped, y.tab.h.shipped: Updated.
Diffstat (limited to 'parser.l')
-rw-r--r--parser.l126
1 files changed, 124 insertions, 2 deletions
diff --git a/parser.l b/parser.l
index 55da6331..060b1023 100644
--- a/parser.l
+++ b/parser.l
@@ -163,7 +163,7 @@ static wchar_t num_esc(scanner_t *scn, char *num)
{
long val = 0;
- if (num[0] == 'x') {
+ if (num[0] == 'x' || num[0] == 'u') {
if (strlen(num) > 7)
yyerror(scn, yyget_extra(scn), "too many digits in hex character escape");
else
@@ -235,6 +235,7 @@ TOK [a-zA-Z0-9_]+
SGN [+\-]
EXP [eE][+\-]?[0-9]+
DIG [0-9]
+DIG19 [1-9]
XDIG [0-9A-Fa-f]
NUM {SGN}?{DIG}+
FLO {SGN}?({DIG}*[.]{DIG}+{EXP}?|{DIG}+[.]?{EXP})
@@ -278,8 +279,13 @@ UANY {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
UANYN {ASCN}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
+JNUM -?(0|{DIG19}{DIG}*)([.]{DIG}+)?{EXP}?
+JPUNC [(){},:\[\]"]
+NJPUNC [^(){},:\[\]" \t\n]
+
%x SPECIAL BRACED NESTED REGEX SREGEX STRLIT CHRLIT
%x QSILIT QSPECIAL WLIT QWLIT BUFLIT
+%x JSON JLIT
%%
@@ -738,6 +744,12 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return HASH_T;
}
+<NESTED,BRACED>#J {
+ yylval->lineno = yyextra->lineno;
+ yy_push_state(JSON, yyscanner);
+ return HASH_J;
+}
+
<NESTED,BRACED>#; {
yylval->lineno = yyextra->lineno;
return HASH_SEMI;
@@ -1089,7 +1101,46 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return ' ';
}
-<STRLIT,CHRLIT,QSILIT,WLIT,QWLIT>{UANYN} {
+<JLIT>\" {
+ yy_pop_state(yyscanner);
+ return yytext[0];
+}
+
+<JLIT>[\\][bfnrt"\\/] {
+ yylval->chr = char_esc(yytext[1]);
+ return LITCHAR;
+}
+
+<JLIT>[\\]u[Dd][8-9A-Fa-f]{HEX}{2}[\\]u[Dd][C-Fc-f]{HEX}{2} {
+ wchar_t ch0, ch1;
+ yytext[6] = 0;
+ ch0 = num_esc(yyg, yytext + 1);
+ ch1 = num_esc(yyg, yytext + 7);
+ yylval->chr = ((ch0 - 0xD800) << 10 | (ch1 - 0xDC00)) + 0x10000;
+ return LITCHAR;
+}
+
+<JLIT>[\\]u{HEX}{4} {
+ yylval->chr = num_esc(yyg, yytext + 1);
+ return LITCHAR;
+}
+
+<JLIT>[\\]u {
+ yyerrorf(yyg, lit("JSON \\u escape needs four digits"), nao);
+}
+
+<JLIT>[\\]. {
+ yyerrorf(yyg, lit("unrecognized JSON escape: \\~a"), chr(yytext[1]), nao);
+}
+
+<JLIT>{NL} {
+ yyerrprepf(yyg, lit("newline in JSON string"), nao);
+ yyextra->lineno++;
+ yylval->chr = yytext[0];
+ return ERRTOK;
+}
+
+<STRLIT,CHRLIT,QSILIT,WLIT,QWLIT,JLIT>{UANYN} {
wchar_t wchr[8];
if (utf8_from_buf(wchr, coerce(unsigned char *, yytext), yyleng) != 2) {
yylval->lexeme = chk_strdup(wchr);
@@ -1125,6 +1176,66 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return LITCHAR;
}
+<JSON>{JNUM} {
+ val str = string_own(utf8_dup_from(yytext));
+ if ((yylval->val = flo_str(str)) == nil)
+ out_of_range_float(yyg, str);
+ return NUMBER;
+}
+
+<JSON>true/({JPUNC}|[ \t\n]) {
+ yylval->val = t;
+ return JSKW;
+}
+
+<JSON>false/({JPUNC}|[ \t\n]) {
+ yylval->val = nil;
+ return JSKW;
+}
+
+<JSON>{NJPUNC}+ {
+ if (strcmp("true", yytext) == 0) {
+ yylval->val = t;
+ return JSKW;
+ }
+
+ if (strcmp("false", yytext) == 0) {
+ yylval->val = nil;
+ return JSKW;
+ }
+
+ {
+ val str = string_own(utf8_dup_from(yytext));
+ yyerrorf(yyg, lit("unrecognized JSON syntax: ~a"), str, nao);
+ }
+}
+
+<JSON>\" {
+ yy_push_state(JLIT, yyscanner);
+ return yytext[0];
+}
+
+<JSON>{JPUNC} {
+ return yytext[0];
+}
+
+<JSON>{NL} {
+ yyextra->lineno++;
+}
+
+<JSON>{WS} {
+}
+
+<JSON>. {
+ yyerrorf(yyg, lit("bad character ~s in JSON literal"),
+ chr(yytext[0]), nao);
+}
+
+<JSON>\" {
+ yy_push_state(JLIT, yyscanner);
+ return yytext[0];
+}
+
%%
static int directive_tok(scanner_t *yyscanner, int tok, int state)
@@ -1200,6 +1311,17 @@ void end_of_buflit(scanner_t *yyg)
yy_pop_state(yyg);
}
+void end_of_json(scanner_t *yyg)
+{
+ if (YYSTATE == JLIT)
+ yy_pop_state(yyg);
+
+ if (YYSTATE != JSON)
+ internal_error("end_of_json called in wrong scanner state");
+
+ yy_pop_state(yyg);
+}
+
val source_loc(val form)
{
return gethash(form_to_ln_hash, form);