From f9c0def5b6049bc600051b928910f724e14af4f5 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Mon, 10 Aug 2015 22:31:01 -0700 Subject: Dot with no whitespace generates qref syntax. a.b.(expr ...).c -> (qref a b (expr ...) c) Consing dot requires whitespace. * eval.c (qref_s): New symbol global variable. (eval_init): Initialize qref_s. * eval.h (qref_s): Declared. * parser.l (REQWS): New pattern definition, required whitespace. (grammar): New rules to scan CONSDOT (space required on both sides) and LAMBDOT (space required after). * parser.y (CONSDOT, LAMBDOT): New token types. (list): (. n_expr) rule replaced with LAMBDOT and CONSDOT. (r_exprs): r_exprs . n_expr consing dot rule replaced with CONSDOT. (n_expr): New n_expr . n_expr rule introduced here for producing qref expressions. (yybadtoken): Handle CONSDOT and LAMBDOT. * txr.1: Documented qref dot. --- eval.c | 3 ++- eval.h | 2 +- parser.l | 11 +++++++++++ parser.y | 20 ++++++++++++++++---- txr.1 | 41 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 71 insertions(+), 6 deletions(-) diff --git a/eval.c b/eval.c index 614e7992..0432931a 100644 --- a/eval.c +++ b/eval.c @@ -86,7 +86,7 @@ val gen_s, gun_s, generate_s, rest_s, plus_s; val promise_s, promise_forced_s, promise_inprogress_s, force_s; val op_s, ap_s, identity_s, apf_s, ipf_s; val ret_s, aret_s; -val hash_lit_s, hash_construct_s; +val hash_lit_s, hash_construct_s, qref_s; val vector_lit_s, vector_list_s; val macro_time_s, with_saved_vars_s, macrolet_s; val defsymacro_s, symacrolet_s, prof_s; @@ -4074,6 +4074,7 @@ void eval_init(void) rest_s = intern(lit("rest"), user_package); hash_lit_s = intern(lit("hash-construct"), system_package); hash_construct_s = intern(lit("hash-construct"), user_package); + qref_s = intern(lit("qref"), user_package); vector_lit_s = intern(lit("vector-lit"), system_package); vector_list_s = intern(lit("vector-list"), user_package); macro_time_s = intern(lit("macro-time"), user_package); diff --git a/eval.h b/eval.h index ea74148d..8519a83e 100644 --- a/eval.h +++ b/eval.h @@ -25,7 +25,7 @@ */ extern val dwim_s, lambda_s, vector_lit_s, vector_list_s; -extern val hash_lit_s, hash_construct_s; +extern val hash_lit_s, hash_construct_s, qref_s; extern val eval_error_s; extern val last_form_evaled, last_form_expanded; diff --git a/parser.l b/parser.l index ca7c893d..dc083867 100644 --- a/parser.l +++ b/parser.l @@ -197,6 +197,7 @@ NTREG ({NT0}|{NT1})?:{NT2}?|({NT0}|{NT1})(:{NT2})? NTKEY @?:{NT2}? NTOK {NTREG}|{NTKEY} WS [\t ]* +REQWS [\t ]+ HEX [0-9A-Fa-f] OCT [0-7] @@ -665,6 +666,16 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return '/'; } +{REQWS}\.{REQWS} { + yylval->chr = '.'; + return CONSDOT; +} + +\.{REQWS} { + yylval->chr = '.'; + return LAMBDOT; +} + \. { yylval->chr = '.'; return '.'; diff --git a/parser.y b/parser.y index 6df8fe17..1902633d 100644 --- a/parser.y +++ b/parser.y @@ -98,7 +98,7 @@ int yylex(union YYSTYPE *, yyscan_t scanner); %token NUMBER METANUM -%token REGCHAR REGTOKEN LITCHAR SPLICE +%token REGCHAR REGTOKEN LITCHAR SPLICE CONSDOT LAMBDOT %type spec clauses_rev clauses clauses_opt clause %type all_clause some_clause none_clause maybe_clause block_clause @@ -130,7 +130,7 @@ int yylex(union YYSTYPE *, yyscan_t scanner); %left '|' '/' %left '&' %right '~' '*' '?' '+' '%' -%right '.' REGCHAR REGTOKEN LITCHAR +%right '.' CONSDOT LAMBDOT REGCHAR REGTOKEN LITCHAR %right DOTDOT %% @@ -713,7 +713,8 @@ hash : HASH_H list { if (unquotes_occur($2, 0)) list : '(' n_exprs ')' { $$ = rl($2, num($1)); } | '(' ')' { $$ = nil; } - | '(' '.' n_expr ')' { $$ = $3; } + | '(' LAMBDOT n_expr ')' { $$ = $3; } + | '(' CONSDOT n_expr ')' { $$ = $3; } | '[' n_exprs ']' { $$ = rl(cons(dwim_s, $2), num($1)); } | '[' ']' { $$ = rl(cons(dwim_s, nil), num($1)); } | '@' n_expr { if (consp($2)) @@ -756,7 +757,8 @@ r_exprs : n_expr { val exprs = cons($1, nil); rplacd(term_atom_cons, rlcp(cons($2, exprs), or2($2, exprs))); $$ = term_atom_cons; } - | r_exprs '.' n_expr { val term_atom_cons = $1; + | r_exprs CONSDOT n_expr + { val term_atom_cons = $1; misplaced_consing_dot_check(scnr, term_atom_cons); rplaca(term_atom_cons, $3); $$ = $1; } @@ -813,6 +815,14 @@ n_expr : SYMTOK { $$ = symhlpr($1, t); } num(parser->lineno)); } | SPLICE n_expr { $$ = rl(rlcp(list(sys_splice_s, $2, nao), $2), num(parser->lineno)); } + | n_expr '.' n_expr { uses_or2; + if (consp($3) && car($3) == qref_s) { + rplacd($3, rlcp(cons($1, cdr($3)), $1)); + $$ = $3; + } else { + $$ = rlcp(list(qref_s, $1, $3, nao), + or2($1, $3)); + } } ; n_exprs_opt : n_exprs { $$ = $1; } @@ -1424,6 +1434,8 @@ void yybadtoken(parser_t *parser, int tok, val context) case REGCHAR: problem = lit("regular expression character"); break; case REGTOKEN: problem = lit("regular expression token"); break; case LITCHAR: problem = lit("string literal character"); break; + case CONSDOT: problem = lit("consing dot"); break; + case LAMBDOT: problem = lit("consing dot"); break; case DOTDOT: problem = lit(".."); break; case HASH_BACKSLASH: problem = lit("#\\"); break; case HASH_SLASH: problem = lit("#/"); break; diff --git a/txr.1 b/txr.1 index a2c1ee32..2954fc4b 100644 --- a/txr.1 +++ b/txr.1 @@ -9181,6 +9181,47 @@ or an atom as .codn "(. sym)" . +.NP* Referencing Dot + +A dot token which is flanked by expressions on both sides, without any +intervening whitespace, is the referencing dot, and not the consing dot. +The referencing dot is a syntactic sugar which translated to the +.code qref +syntax ("quoted ref"), which currently has no assigned meaning and is reserved +for a future language extension. The referencing dot obeys these +correspondences: + +.cblk + ;; a.b may be almost any expressions + a.b <--> (qref a b) + a.b.c <--> (qref a b c) + a.(qref b c) <--> (qref a b c) + (qref a b).c <--> (qref (qref a b) c) +.cble + +That is to say, this dot operator constructs a +.code qref +expression out of its left and right arguments. If the right argument +of the dot is already a qref expression (whether produced by another instance +of the dot operator, or expressed directly) it is merged. And the qref dot +operator is right-to-left associative, so that +.code a.b.c +first produces +.code (qref b c) +via the right dot, and then +.code a +is adjoined into the syntax via the right dot. + +Integer tokens cannot be involved in this syntax, because they +form floating-point constants when juxtaposed with a dot. +Such ambiguous uses of floating-point tokens are diagnosed as syntax errors: + +.cblk + (a.4) ;; error: cramped floating-point literal + (a .4) ;; good: a followed by 0.4 +.cble + + .NP* Quote and Quasiquote .meIP >> ' expr -- cgit v1.2.3