From 35eb4dbc80f857007f99278c48e22f8557e13b68 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Thu, 13 Oct 2011 08:41:56 -0700 Subject: * Makefile (%.ok: %.txr): Use unified diff for showing differences between expected and actual test output. * parser.l (yybadtoken): Handle new terminal symbol, SPACE. New rule for producing SPACE token out of an extent of tabs and spaces. * parser.y (SPACE): New terminal symbol. (o_var): New nonterminal. I noticed that the var rule was being used for output elements, and the var rule refers to elem rather than o_elem. A new o_var rule is a simplified duplicate of var. (elem): Handle SPACE token. Transform to regex if it is a single space, otherwise to literal text. (o_elem): Handle SPACE token in output. * tests/001/query-2.txr: This query depends on matching single spaces and so needs to use escapes. * tests/001/query-4.txr, test/001/query-4.expected: New test case, based on query-2.txr. It produces the same output, but is simpler thanks to the new semantics of space. * txr.1: Documented. --- parser.y | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) (limited to 'parser.y') diff --git a/parser.y b/parser.y index 96fec8f6..b814c1d6 100644 --- a/parser.y +++ b/parser.y @@ -58,11 +58,14 @@ static val parsed_spec; cnum num; } -%token TEXT IDENT KEYWORD METAVAR ALL SOME NONE MAYBE CASES CHOOSE +%token SPACE TEXT IDENT KEYWORD METAVAR +%token ALL SOME NONE MAYBE CASES CHOOSE %token AND OR END COLLECT %token UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE %token TRY CATCH FINALLY + %token NUMBER + %token REGCHAR LITCHAR %token METAPAR @@ -75,16 +78,17 @@ static val parsed_spec; %type elem var var_op meta_expr %type list exprs exprs_opt expr out_clauses out_clauses_opt out_clause %type repeat_clause repeat_parts_opt o_line -%type o_elems_opt o_elems_opt2 o_elems o_elem rep_elem rep_parts_opt +%type o_elems_opt o_elems_opt2 o_elems o_elem o_var rep_elem rep_parts_opt %type regex regexpr regbranch %type regterm regclass regclassterm regrange %type strlit chrlit quasilit quasi_items quasi_item litchars %type regchar + %nonassoc LOW /* used for precedence assertion */ %nonassoc ALL SOME NONE MAYBE CASES CHOOSE AND OR END COLLECT UNTIL COLL %nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY DEFINE %nonassoc '[' ']' '(' ')' -%right IDENT TEXT NUMBER '{' '}' +%right IDENT SPACE TEXT NUMBER '{' '}' %left '-' %left '|' '/' %left '&' @@ -220,6 +224,15 @@ elems : elem { $$ = cons($1, nil); } ; elem : TEXT { $$ = string_own($1); } + | SPACE { if ($1[0] == ' ' && $1[1] == 0) + { val spaces = list(oneplus_s, + list(set_s, chr(' '), + chr('\t'), nao), + nao); + $$ = cons(regex_compile(spaces), spaces); + free($1); } + else + { $$ = string_own($1); }} | var { $$ = $1; } | list { $$ = $1; } | regex { $$ = cons(regex_compile(rest($1)), @@ -423,7 +436,8 @@ o_elems : o_elem { $$ = cons($1, nil); } ; o_elem : TEXT { $$ = string_own($1); } - | var { $$ = $1; } + | SPACE { $$ = string_own($1); } + | o_var { $$ = $1; } | rep_elem { $$ = $1; } ; @@ -483,6 +497,23 @@ var : IDENT { $$ = list(var_s, intern(string_own($1), nil), yybadtoken(yychar, lit("variable spec")); } ; +o_var : IDENT { $$ = list(var_s, intern(string_own($1), nil), + nao); } + | IDENT o_elem { $$ = list(var_s, intern(string_own($1), nil), + $2, nao); } + | '{' IDENT '}' { $$ = list(var_s, intern(string_own($2), nil), + nao); } + | '{' IDENT '}' o_elem { $$ = list(var_s, intern(string_own($2), nil), + $4, nao); } + | '{' IDENT exprs '}' { $$ = list(var_s, intern(string_own($2), nil), + nil, $3, nao); } + | '{' IDENT exprs '}' o_elem { $$ = list(var_s, + intern(string_own($2), nil), + $5, $3, nao); } + | IDENT error { $$ = nil; + yybadtoken(yychar, lit("variable spec")); } + ; + var_op : '*' { $$ = list(t, nao); } ; @@ -632,7 +663,6 @@ litchars : LITCHAR { $$ = cons(chr($1), nil); } | LITCHAR litchars { $$ = cons(chr($1), $2); } ; - %% val repeat_rep_helper(val sym, val main, val parts) -- cgit v1.2.3