From 88a0ca81d753a6393b06fdabb984aeff48dcaa3b Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Wed, 28 Sep 2011 08:19:58 -0700 Subject: * match.c (match_line): Logic restructured to allow for regex variables which also have nested variables. Previously this code was assuming that the cases were mutually exclusive, and the parser happened to work that way. Also, added support for a "double var" match which occurs when an unbound variable is followed by a regex variable. This case should be allowed because it makes sense. It's similar to a variable followed by a regex, except that the regex is also a variable binding. * parser.y (o_elems_transform): New function. (o_elems_opt, o_elems_opt2, quasilit): Transform o_elems with new function. This is needed because subst_vars doesn't deal with the nested var syntax for consecutive variables. (var): New syntax case '{' IDENT exprs '}' elem. This allows consecutive variables to be nested in all cases. --- ChangeLog | 19 +++++++++++ match.c | 111 ++++++++++++++++++++++++++++++++++++++++++-------------------- parser.y | 34 +++++++++++++++++-- 3 files changed, 126 insertions(+), 38 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7fc9e6af..91978a74 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +2011-09-28 Kaz Kylheku + + * match.c (match_line): Logic restructured to allow for + regex variables which also have nested variables. + Previously this code was assuming that the cases were + mutually exclusive, and the parser happened to work that way. + Also, added support for a "double var" match which occurs + when an unbound variable is followed by a regex variable. + This case should be allowed because it makes sense. + It's similar to a variable followed by a regex, except + that the regex is also a variable binding. + + * parser.y (o_elems_transform): New function. + (o_elems_opt, o_elems_opt2, quasilit): Transform o_elems with new + function. This is needed because subst_vars doesn't + deal with the nested var syntax for consecutive variables. + (var): New syntax case '{' IDENT exprs '}' elem. This + allows consecutive variables to be nested in all cases. + 2011-09-27 Kaz Kylheku * parser.y ('{', '}'): Nope, still not right. diff --git a/match.c b/match.c index a00064ca..01d96bf5 100644 --- a/match.c +++ b/match.c @@ -305,6 +305,13 @@ static val match_line(val bindings, val specline, val dataline, val modifier = fourth(elem); val pair = assoc(bindings, sym); /* var exists already? */ + if (gt(length(modifier), one)) { + sem_error(spec_lineno, lit("multiple modifiers on variable ~s"), + sym, nao); + } + + modifier = car(modifier); + if (pair) { /* If the variable already has a binding, we replace it with its value, and treat it as a string match. @@ -336,37 +343,41 @@ static val match_line(val bindings, val specline, val dataline, specline = cons(cdr(pair), rest(specline)); } continue; - } else if (pat == nil) { /* match to end of line or with regex */ - if (gt(length(modifier), one)) { - sem_error(spec_lineno, lit("multiple modifiers on variable ~s"), - sym, nao); + } else if (consp(modifier)) { /* regex variable */ + val past = match_regex(dataline, car(modifier), pos); + if (nullp(past)) { + LOG_MISMATCH("var positive regex"); + return nil; } - - modifier = car(modifier); - - if (consp(modifier)) { - val past = match_regex(dataline, car(modifier), pos); - if (nullp(past)) { - LOG_MISMATCH("var positive regex"); - return nil; - } - LOG_MATCH("var positive regex", past); - bindings = acons_new(bindings, sym, sub_str(dataline, pos, past)); - pos = past; - } else if (nump(modifier)) { - val past = plus(pos, modifier); - if (length_str_lt(dataline, past) || lt(past, pos)) - { - LOG_MISMATCH("count based var"); - return nil; - } - LOG_MATCH("count based var", past); - bindings = acons_new(bindings, sym, trim_str(sub_str(dataline, pos, past))); - pos = past; - } else { - bindings = acons_new(bindings, sym, sub_str(dataline, pos, nil)); - pos = length_str(dataline); + LOG_MATCH("var positive regex", past); + bindings = acons_new(bindings, sym, sub_str(dataline, pos, past)); + pos = past; + /* This may have another variable attached */ + if (pat) { + specline = cons(pat, rest(specline)); + continue; } + } else if (nump(modifier)) { /* fixed field */ + val past = plus(pos, modifier); + if (length_str_lt(dataline, past) || lt(past, pos)) + { + LOG_MISMATCH("count based var"); + return nil; + } + LOG_MATCH("count based var", past); + bindings = acons_new(bindings, sym, trim_str(sub_str(dataline, pos, past))); + pos = past; + /* This may have another variable attached */ + if (pat) { + specline = cons(pat, rest(specline)); + continue; + } + } else if (modifier) { + sem_error(spec_lineno, lit("invalid modifier ~s on variable ~s"), + modifier, sym, nao); + } else if (pat == nil) { /* no modifier, no elem -> to end of line */ + bindings = acons_new(bindings, sym, sub_str(dataline, pos, nil)); + pos = length_str(dataline); } else if (type(pat) == STR) { val find = search_str(dataline, pat, pos, modifier); if (!find) { @@ -388,26 +399,55 @@ static val match_line(val bindings, val specline, val dataline, bindings = acons_new(bindings, sym, sub_str(dataline, pos, fpos)); pos = plus(fpos, flen); } else if (consp(pat) && first(pat) == var_s) { - /* Unbound var followed by var: the following one must be bound. */ + /* Unbound var followed by var: the following one must either + be bound, or must specify a regex. */ val second_sym = second(pat); val next_pat = third(pat); + val next_modifier = fourth(pat); val pair = assoc(bindings, second_sym); /* var exists already? */ - if (!pair) - sem_error(spec_lineno, lit("consecutive unbound variables"), nao); + if (gt(length(next_modifier), one)) { + sem_error(spec_lineno, lit("multiple modifiers on variable ~s"), + second_sym, nao); + } + + next_modifier = car(next_modifier); + if (!pair && consp(next_modifier)) { + val find = search_regex(dataline, first(next_modifier), pos, modifier); + val fpos = car(find); + val flen = cdr(find); + + if (!find) { + LOG_MISMATCH("double var regex"); + return nil; + } + + /* Text from here to start of regex match goes to this + variable. */ + bindings = acons_new(bindings, sym, sub_str(dataline, pos, fpos)); + /* Text from start of regex match to end goes to the + second variable */ + bindings = acons_new(bindings, second_sym, sub_str(dataline, fpos, plus(fpos, flen))); + LOG_MATCH("double var regex (first var)", fpos); + pos = fpos; + LOG_MATCH("double var regex (second var)", plus(fpos, flen)); + pos = plus(fpos, flen); + specline = cons(next_pat, rest(specline)); + continue; + } else if (!pair) { + sem_error(spec_lineno, lit("consecutive unbound variables"), nao); + } else { /* Re-generate a new spec with an edited version of the element we just processed, and repeat. */ - { val new_elem = list(var_s, sym, cdr(pair), modifier, nao); if (next_pat) specline = cons(new_elem, cons(next_pat, rest(specline))); else specline = cons(new_elem, rest(specline)); + continue; } - - continue; } else if (consp(pat) && (consp(first(pat)) || stringp(first(pat)))) { cons_bind (find, len, search_str(dataline, pat, pos, modifier)); if (!find) { @@ -608,6 +648,7 @@ static val subst_vars(val spec, val bindings, val filter) spec = cons(filter_string(filter, cdr(pair)), rest(spec)); continue; } + /* TODO: handle unbound variable */ } else if (first(elem) == quasi_s) { val nested = subst_vars(rest(elem), bindings, filter); list_collect_append(iter, nested); diff --git a/parser.y b/parser.y index 479b9993..4d213caa 100644 --- a/parser.y +++ b/parser.y @@ -42,6 +42,7 @@ int yylex(void); void yyerror(const char *); val repeat_rep_helper(val sym, val main, val parts); +val o_elems_transform(val output_form); val define_transform(val define_form); val lit_char_helper(val litchars); @@ -357,11 +358,12 @@ out_clauses_opt : out_clauses { $$ = $1; } o_line : o_elems_opt '\n' { $$ = $1; } ; -o_elems_opt : o_elems { $$ = cons(num(lineno - 1), $1); } +o_elems_opt : o_elems { $$ = cons(num(lineno - 1), + o_elems_transform($1)); } | { $$ = nil; } ; -o_elems_opt2 : o_elems { $$ = $1; } +o_elems_opt2 : o_elems { $$ = o_elems_transform($1); } | { $$ = null_list; } ; @@ -405,6 +407,8 @@ var : IDENT { $$ = list(var_s, intern(string_own($1), nil), $4, nao); } | '{' IDENT exprs '}' { $$ = list(var_s, intern(string_own($2), nil), nil, $3, nao); } + | '{' IDENT exprs '}' elem { $$ = list(var_s, intern(string_own($2), nil), + $5, $3, nao); } | var_op IDENT { $$ = list(var_s, intern(string_own($2), nil), nil, $1, nao); } | var_op IDENT elem { $$ = list(var_s, intern(string_own($2), nil), @@ -544,7 +548,7 @@ chrlit : '\'' '\'' { $$ = nil; ; quasilit : '`' '`' { $$ = null_string; } - | '`' quasi_items '`' { $$ = cons(quasi_s, $2); } + | '`' quasi_items '`' { $$ = cons(quasi_s, o_elems_transform($2)); } | '`' error { $$ = nil; yybadtoken(yychar, lit("string literal")); } ; @@ -595,6 +599,30 @@ val repeat_rep_helper(val sym, val main, val parts) last_parts, empty_parts, nao); } +val o_elems_transform(val o_elems) +{ + list_collect_decl(o_elems_out, ptail); + val iter; + + for (iter = o_elems; iter; iter = cdr(iter)) { + val elem = car(iter); + + while (consp(elem) && first(elem) == var_s) { + val sym = second(elem); + val pat = third(elem); + val modifiers = fourth(elem); + + list_collect(ptail, list(first(elem), sym, nil, modifiers, nao)); + elem = pat; + } + + if (elem) + list_collect(ptail, elem); + } + + return o_elems_out; +} + val define_transform(val define_form) { val sym = first(define_form); -- cgit v1.2.3