diff options
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | match.c | 68 | ||||
-rw-r--r-- | txr.1 | 72 |
3 files changed, 124 insertions, 23 deletions
@@ -1,5 +1,12 @@ 2011-10-08 Kaz Kylheku <kaz@kylheku.com> + * match.c (vars_to_bindings): New function. + (match_line): keyword argument :vars implemented for coll. + + * txr.1: Documented :vars. + +2011-10-08 Kaz Kylheku <kaz@kylheku.com> + * match.c (vars_k): New symbol variable. (match_files): Implemented :vars in collect. (match_init): New symbol variable initialized. @@ -285,6 +285,30 @@ static val dest_bind(val linenum, val bindings, val pattern, val value) return t; } +static val eval_form(val lineno, val form, val bindings); + +static val vars_to_bindings(val lineno, val vars, val bindings) +{ + val iter; + list_collect_decl (fixed_vars, tail); + + if (!consp(vars)) + sem_error(lineno, lit("not a valid variable list: ~a"), vars, nao); + + for (iter = vars; iter; iter = cdr(iter)) { + val item = car(iter); + if (bindable(item)) { + list_collect (tail, cons(item, nil)); + } else if (consp(item) && bindable(first(item))) { + list_collect (tail, cons(first(item), + cdr(eval_form(lineno, second(item), bindings)))); + } else { + sem_error(lineno, lit("not a variable spec: ~a"), item, nao); + } + } + return fixed_vars; +} + static val match_line(val bindings, val specline, val dataline, val pos, val spec_lineno, val data_lineno, val file) @@ -563,6 +587,7 @@ static val match_line(val bindings, val specline, val dataline, val mintimes = getplist(args, mintimes_k); val maxtimes = getplist(args, maxtimes_k); val chars = getplist(args, chars_k); + val vars = getplist(args, vars_k); cnum cmax = nump(gap) ? c_num(gap) : (nump(max) ? c_num(max) : 0); cnum cmin = nump(gap) ? c_num(gap) : (nump(min) ? c_num(min) : 0); cnum mincounter = cmin, maxcounter = 0; @@ -574,6 +599,8 @@ static val match_line(val bindings, val specline, val dataline, cnum timescounter = 0, charscounter = 0; val iter; + vars = vars_to_bindings(spec_lineno, vars, bindings); + if (((times || maxtimes) && ctimax == 0) || (chars && cchars == 0)) break; @@ -616,13 +643,30 @@ static val match_line(val bindings, val specline, val dataline, bindings, eq_f, nil); LOG_MATCH("coll", new_pos); + for (iter = vars; iter; iter = cdr(iter)) { + cons_bind (var, dfl, car(iter)); + val exists = assoc(new_bindings, var); + + if (!exists) { + if (!dfl) + sem_error(spec_lineno, lit("coll failed to bind ~a"), + var, nao); + else + strictly_new_bindings = acons(strictly_new_bindings, + var, dfl); + } + } + for (iter = strictly_new_bindings; iter; iter = cdr(iter)) { val binding = car(iter); - val existing = assoc(bindings_coll, car(binding)); + val vars_binding = assoc(vars, car(binding)); - bindings_coll = acons_new(bindings_coll, car(binding), - cons(cdr(binding), cdr(existing))); + if (!vars || vars_binding) { + val existing = assoc(bindings_coll, car(binding)); + bindings_coll = acons_new(bindings_coll, car(binding), + cons(cdr(binding), cdr(existing))); + } } } @@ -1654,23 +1698,7 @@ repeat_spec_same_data: if (gap && (max || min)) sem_error(spec_linenum, lit("collect: cannot mix :gap with :mingap or :maxgap"), nao); - if (vars) { - list_collect_decl (fixed_vars, tail); - - if (!consp(vars)) - sem_error(spec_linenum, lit("collect: invalid argument to :vars"), nao); - for (iter = vars; iter; iter = cdr(iter)) { - val item = car(iter); - if (bindable(item)) { - list_collect (tail, cons(item, nil)); - } else if (consp(item) && bindable(first(item))) { - list_collect (tail, cons(first(item), second(item))); - } else { - sem_error(spec_linenum, lit("not a variable spec: ~a"), item, nao); - } - } - vars = fixed_vars; - } + vars = vars_to_bindings(spec_linenum, vars, bindings); if ((times && ctimes == 0) || (lines && clines == 0)) { if ((spec = rest(spec)) == nil) @@ -1701,10 +1701,76 @@ other supported keywords are :times, :mintimes, :maxtimes and lines. The shorthand :times N means the same thing as :mintimes N :maxtimes N. These specify how many matches should be collected. If there are fewer than mintimes matches, the collect fails. If maxtimes matches are collected, -collect stops collecting immediately. +collect stops collecting immediately. Example: + + @(collect :times 3) + @a @b + @(end) + +This will collect a match for "@a @b" exactly three times. If three +matches are not found, it will fail. + +The :lines parameter specifies the upper bound on how many lines +should be scanned by collect, measuring from the starting position. +The extent of the collect body is not counted. Example: + + @(collect :lines 2) + foo: @a + bar: @b + baz: @c + @(end) + +The above collect will look for a match only twice: at the current position, +and one line down. + +There is one more keyword, :vars, discussed in the following section. + +.SS Specifying Variables in Collect + +Normally, any variable for which a new binding occurs in a collect is +collected. A collect clause may be sloppy: it can neglect to collect some +variables on some iterations, or bind some variables which behave like +local temporaries, but end up collated into lists. + +The :vars keyword allows the query writer to tame the collect body. + +The argument to :vars is a list of variable specs. A variable spec is either a +symbol, or a (<symbol> <expression>) pair, where the expression specifies a +default value. + +When a :vars list is specified, it means that only the given variables can +emerge from the successful collect. Any newly introduced bindings for other +variables do not propagate. + +Furthermore, for any variable which is not specified with a default value, the +collect body, whenever it matches successfully, must bind that variable. If it +neglects to bind the variable, an exception of type query_error is thrown. + +For any variable which has a default value, if the collect body neglects to +bind that variable, the behavior is as if the collect did bind that variable to that default value. + +The default values are expressions, and so can be quasiliterals. + +Example: + + @(collect :vars (a b (c "foo"))) + @a @c + @(end) + +Here, if the body "@a @c" matches, an error will be thrown because one of the +mandatory variables is b, and the body neglects to produce a binding for b. + +Example: + + @(collect :vars (a (c "foo"))) + @a @b + @(end) + +Here, if "@a @b" matches, only a will be collected, but not b, because b is not +in the variable list. Furthermore, because there is no binding for c in the +body, a binding is created with the value "foo", exactly as if c matched +such a piece of text. -Finally, the :lines parameter specifies the upper bound on how many lines -should be scanned by the collect. .SS The Coll Directive |