summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--match.c234
-rw-r--r--tests/010/span-var.txr13
-rw-r--r--txr.153
3 files changed, 278 insertions, 22 deletions
diff --git a/match.c b/match.c
index 1f10ab78..dd908cf1 100644
--- a/match.c
+++ b/match.c
@@ -607,7 +607,7 @@ static val search_match_binding_var(match_line_ctx *c, val sym,
return nil;
}
-static val h_var(match_line_ctx *c)
+static val h_var_compat(match_line_ctx *c)
{
val elem = pop(&c->specline);
val sym = second(elem);
@@ -821,6 +821,229 @@ static val h_var(match_line_ctx *c)
return next_spec_k;
}
+static val h_var(match_line_ctx *c)
+{
+ val elem = pop(&c->specline);
+ val sym = second(elem);
+ val next = first(c->specline);
+ val modifiers = third(elem);
+ val modifier = first(modifiers);
+ val pair = if2(sym, tx_lookup_var(sym, c->bindings));
+
+ if (sym == t)
+ sem_error(elem, lit("t is not a bindable symbol"), nao);
+
+ if (gt(length_list(modifiers), one)) {
+ sem_error(elem, lit("multiple modifiers on variable ~s"),
+ sym, nao);
+ }
+
+ if (bindable(modifier)) {
+ val mpair = tx_lookup_var_ubc(modifier, c->bindings, elem);
+ modifier = cdr(mpair);
+ }
+
+ if (pair && !consp(modifier)) {
+ /* Except in two cass, if the variable already has a binding, we replace
+ it with its value, and treat it as a string match. And if the spec looks
+ like ((var <sym>) <next> ...) and it must be transformed into
+ (<sym-substituted> <next> ...).
+ The special cases are:
+ - if the variable is a fix sized field match, then it has to match
+ that much text;
+ - if the variable is a function-spanning match, the function has
+ to be called every time; hence he !consp(modifier) check above,
+ and the use of dest_bind in the function spanning case. */
+ if (integerp(modifier)) {
+ val past = plus(c->pos, modifier);
+
+ if (length_str_lt(c->dataline, past) || lt(past, c->pos))
+ {
+ LOG_MISMATCH("fixed field size");
+ return nil;
+ }
+
+ if (!tree_find(trim_str(sub_str(c->dataline, c->pos, past)),
+ cdr(pair), equal_f))
+ {
+ LOG_MISMATCH("fixed field contents");
+ return nil;
+ }
+
+ LOG_MATCH("fixed field", past);
+ c->pos = past;
+ c->specline = rest(c->specline);
+ } else {
+ c->specline = rlcp(cons(cdr(pair), c->specline), c->specline);
+ }
+ return repeat_spec_k;
+ } else if (consp(modifier) || regexp(modifier)) { /* var bound over text matched by form */
+ cons_bind (new_bindings, new_pos,
+ match_line(ml_specline(*c, cons(modifier, nil))));
+
+ if (!new_pos) {
+ LOG_MISMATCH("var spanning form");
+ return nil;
+ }
+
+ new_pos = minus(new_pos, c->base);
+
+
+ LOG_MATCH("var spanning form", new_pos);
+
+ c->bindings = dest_bind(c->specline, new_bindings, sym,
+ sub_str(c->dataline, c->pos, new_pos), equal_f);
+ if (c->bindings == t) {
+ LOG_MISMATCH("function span mismatch");
+ return nil;
+ }
+
+ c->pos = new_pos;
+ /* This may have another variable attached */
+ if (next) {
+ c->specline = rlcp(cons(next, rest(c->specline)), c->specline);
+ return repeat_spec_k;
+ }
+ } else if (integerp(modifier)) { /* fixed field */
+ val past = plus(c->pos, modifier);
+ if (length_str_lt(c->dataline, past) || lt(past, c->pos))
+ {
+ LOG_MISMATCH("count based var");
+ return nil;
+ }
+ LOG_MATCH("count based var", past);
+ if (sym)
+ c->bindings = acons(sym, trim_str(sub_str(c->dataline, c->pos, past)), c->bindings);
+ c->pos = past;
+ /* This may have another variable attached */
+ if (next) {
+ c->specline = rlcp(cons(next, rest(c->specline)), c->specline);
+ return repeat_spec_k;
+ }
+ } else if (modifier && modifier != t) {
+ sem_error(elem, lit("invalid modifier ~s on variable ~s"),
+ modifier, sym, nao);
+ } else if (next == nil) { /* no modifier, no elem -> to end of line */
+ if (sym)
+ c->bindings = acons(sym, sub_str(c->dataline, c->pos, nil), c->bindings);
+ c->pos = length_str(c->dataline);
+ } else if (type(next) == STR) {
+ val find = search_str(c->dataline, next, c->pos, modifier);
+ if (!find) {
+ LOG_MISMATCH("var delimiting string");
+ return nil;
+ }
+ LOG_MATCH("var delimiting string", find);
+ if (sym)
+ c->bindings = acons(sym, sub_str(c->dataline, c->pos, find), c->bindings);
+ c->pos = plus(find, length_str(next));
+ } else if (regexp(next)) {
+ val find = search_regex(c->dataline, next, c->pos, modifier);
+ val fpos = car(find);
+ val flen = cdr(find);
+ if (!find) {
+ LOG_MISMATCH("var delimiting regex");
+ return nil;
+ }
+ LOG_MATCH("var delimiting regex", fpos);
+ if (sym)
+ c->bindings = acons(sym, sub_str(c->dataline, c->pos, fpos), c->bindings);
+ c->pos = if3(flen == t, t, plus(fpos, flen));
+ } else if (consp(next)) {
+ val op = first(next);
+
+ if (op == var_s) {
+ /* Unbound var followed by var: the following one must either
+ be bound, or must specify a regex. */
+ val second_sym = second(next);
+ val next_modifiers = third(next);
+ val next_modifier = first(next_modifiers);
+ val pair = if2(second_sym, tx_lookup_var(second_sym, c->bindings));
+
+ if (gt(length_list(next_modifiers), one)) {
+ sem_error(elem, lit("multiple modifiers on variable ~s"),
+ second_sym, nao);
+ }
+
+ if (!pair && regexp(next_modifier)) {
+ val find = search_regex(c->dataline, next_modifier, c->pos, modifier);
+ val fpos = car(find);
+ val flen = cdr(find);
+
+ if (!find) {
+ LOG_MISMATCH("double var regex");
+ return nil;
+ }
+
+ /* Text from here to start of regex match goes to this
+ variable. */
+ if (sym)
+ c->bindings = acons(sym, sub_str(c->dataline, c->pos, fpos),
+ c->bindings);
+ /* Text from start of regex match to end goes to the
+ second variable */
+ if (second_sym)
+ c->bindings = acons(second_sym,
+ sub_str(c->dataline, fpos, plus(fpos, flen)),
+ c->bindings);
+ LOG_MATCH("double var regex (first var)", fpos);
+ c->pos = fpos;
+ LOG_MATCH("double var regex (second var)", plus(fpos, flen));
+ c->pos = plus(fpos, flen);
+ return next_spec_k;
+ } else if (!pair) {
+ sem_error(elem, lit("consecutive unbound variables"), nao);
+ } else {
+ /* Re-generate a new spec in which the next variable
+ is replaced by its value, and repeat. */
+ val r = rest(c->specline);
+ c->specline = rlcp(cons(elem, rlcp(cons(cdr(pair), r), r)), r);
+ return repeat_spec_k;
+ }
+ } else if (op == text_s) {
+ val text_only_spec = rlcp(cons(next, nil), next);
+ val find = search_match(c, modifier, text_only_spec);
+ val fpos = car(find);
+ if (!find) {
+ LOG_MISMATCH("var delimiting text compound");
+ return nil;
+ }
+ LOG_MATCH("var delimiting text compound", fpos);
+ if (sym)
+ c->bindings = acons(sym, sub_str(c->dataline, c->pos, fpos), c->bindings);
+ c->pos = fpos;
+ return repeat_spec_k;
+ } else if (consp(op) || stringp(op)) {
+ cons_bind (find, len, search_str_tree(c->dataline, next, c->pos, modifier));
+ if (!find) {
+ LOG_MISMATCH("string");
+ return nil;
+ }
+ if (sym)
+ c->bindings = acons(sym, sub_str(c->dataline, c->pos, find), c->bindings);
+ c->pos = plus(find, len);
+ } else {
+ val find = if3(opt_compat && opt_compat <= 172,
+ search_match(c, modifier, c->specline),
+ search_match_binding_var(c, sym, modifier, c->specline));
+ val fpos = car(find);
+ if (!find) {
+ LOG_MISMATCH("var delimiting spec");
+ return nil;
+ }
+ LOG_MATCH("var delimiting spec", fpos);
+ if (sym)
+ c->bindings = acons(sym, sub_str(c->dataline, c->pos, fpos), c->bindings);
+ c->pos = fpos;
+ return repeat_spec_k;
+ }
+ } else {
+ sem_error(elem, lit("variable followed by invalid element: ~s"), next, nao);
+ }
+
+ return next_spec_k;
+}
+
static val h_skip(match_line_ctx *c)
{
val self = lit("skip");
@@ -2339,7 +2562,10 @@ static val v_var(match_files_ctx *c)
if (ret == next_spec_k) {
c->data = fc.data;
- c->bindings = acons(varsym, ldiff(data, fc.data), fc.bindings);
+ c->bindings = dest_bind(specline, fc.bindings, varsym,
+ ldiff(data, fc.data), equal_f);
+ if (c->bindings == t)
+ ret = nil;
}
return ret;
@@ -5098,6 +5324,8 @@ void match_init(void)
void match_compat_fixup(int compat_ver)
{
- if (compat_ver <= 272)
+ if (compat_ver <= 272) {
sethash(v_directive_table, var_s, cptr(coerce(mem_t *, v_var_compat)));
+ sethash(h_directive_table, var_s, cptr(coerce(mem_t *, h_var_compat)));
+ }
}
diff --git a/tests/010/span-var.txr b/tests/010/span-var.txr
index 5f5faa6c..b7142b79 100644
--- a/tests/010/span-var.txr
+++ b/tests/010/span-var.txr
@@ -13,3 +13,16 @@
@{w (fun2 x "a")}@y
@(require (equal w "a"))
@(require (equal y "b"))
+@(next :list '("a" "a" "a" "b" "c"))
+@(bind d ("d"))
+@(cases)
+@ {d (fun "x" "a")}
+@(or)
+@ (require "get here")
+@(end)
+@(next :string "ab")
+@(cases)
+@ {d (fun2 "x" "a")}
+@(or)
+@ (require "get here")
+@(end)
diff --git a/txr.1 b/txr.1
index 794bff7f..76d69b38 100644
--- a/txr.1
+++ b/txr.1
@@ -2481,18 +2481,27 @@ from a regular expression, function or character count, rather than from
trailing material (which is regarded as a "negative" match, since the
variable is bound to material which is
.B skipped
-in order to match the trailing material). In the
+in order to match the trailing material).
+
+The positive match syntax is processed without considering any following
+syntax, and therefore may be followed by an unbound variable.
+
+In the
.mono
-.meti <> / regex /
+.meti >> @{ bident <> / regex /}
.onom
form, the match
extends over all characters from the current position which match
the regular expression
.metn regex .
(See the Regular Expressions section below.)
+If the variable already has a value, then the regular expression is
+ignored; the variable's value is used to match and extract text, which
+may be different from what the regular expression would match.
+
In the
.mono
-.meti >> ( fun >> [ args ...])
+.meti >> @{ bident >> ( fun >> [ args ...])}
.onom
form, the match extends over lines or characters which
are matched by the call to the function, if the call
@@ -2522,9 +2531,13 @@ syntax. Then the variable indicated by
.meta bident
is bound to the list of lines matched by the function call.
Pattern functions are described in the Functions section below.
+The function is invoked even if the variable already has a value.
+The text matched by the function must match the variable.
In the
-.meta number
+.mono
+.meti >> @{ bident << number }
+.onom
form, the match processes a field of text which
consists of the specified number of characters, which must be a nonnegative
number. If the data line doesn't have that many characters starting at the
@@ -2532,10 +2545,9 @@ current position, the match fails. A match for zero characters produces an
empty string. The text which is actually bound to the variable
is all text within the specified field, but excluding leading and
trailing whitespace. If the field contains only spaces, then an empty
-string is extracted.
-
-This syntax is processed without considering any following syntax.
-A positive match may be directly followed by an unbound variable.
+string is extracted. This fixed-field extraction takes place whether or not the
+variable already has a binding. If it already has a binding, then it must match
+the extracted text.
The
.mono
@@ -86669,20 +86681,23 @@ is given an argument which is equal or lower. For instance
.code "-C 103"
selects the behaviors described below for version 105, but not those for 102.
.IP 272
-\*(TX 273 introduce a new feature into the pattern language: a pattern variable
-of the form
+The compatibility version value 272 restores old behaviors in the pattern
+language with regard to the regex and function cases of positive match variables.
+\*(TX 273, several semantic improvements took place in this area, which
+can break existing code. Pattern variables of the form
.mono
.meti >> @{ bident >> ( fun >> [ args ...])}
.onom
-matches multiple lines, if it appears as the only element of a query line,
-and if
-.meta fun
-has a binding as a vertical pattern function. Prior to 273, this situation was
-not given any special treatment; the vertical function
-.meta fun
-was called such that only one line of input is visible, and if it produced
-a match, the variable was bound to that line. A compatibility value of 272
-or lower restores this behavior.
+can now invoke a vertical function against the full input, and the variable
+consequently to be bound to multiple lines. Previously this syntax invoked
+only horizontal functions or else vertical functions in a single-line
+horizontal mode. That behavior is restored by 272 or lower compatibility.
+Secondly, the function is now always invoked, whether or not the variable
+has a binding. The variable is then matched against the text spanned
+by the function to either give it a new binding or match the existing binding.
+The old behavior, restored by 272 or lower compatibility, is that the
+function is not invoked when the variable has a binding; the
+variable's value is instead used to match text.
.IP 265
Until \*(TX 265, the
.code with-resources