From f9bcbd884bb1cd60144f13df3de00516b731827c Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Mon, 27 Dec 2021 07:36:01 -0800 Subject: txr: do not ignore regex in positive match. * match.c (h_var): Refactor the logic here a bit. Without regard for whether the variable has a value, we dispatch the regex, fixed field and function cases. These handle the binding against the existing value. Then before all other cases, we check for the existing value and convert that to a literal text match. The effect of this is that now the regular expression is processed even if the variable has a value. * tests/010/span-var.txr: Last two test cases hardened a bit so they cannot fall through to a successful exit, if they invoke the wrong case. This is not related to this change. New test cases for regex span. * txr.1: Updated documentation and compatibility notes. --- match.c | 56 +++++++++++++++----------------------------------- tests/010/span-var.txr | 11 ++++++++++ txr.1 | 17 ++++++++++----- 3 files changed, 40 insertions(+), 44 deletions(-) diff --git a/match.c b/match.c index dd908cf1..4dd19afc 100644 --- a/match.c +++ b/match.c @@ -843,41 +843,8 @@ static val h_var(match_line_ctx *c) modifier = cdr(mpair); } - if (pair && !consp(modifier)) { - /* Except in two cass, if the variable already has a binding, we replace - it with its value, and treat it as a string match. And if the spec looks - like ((var ) ...) and it must be transformed into - ( ...). - The special cases are: - - if the variable is a fix sized field match, then it has to match - that much text; - - if the variable is a function-spanning match, the function has - to be called every time; hence he !consp(modifier) check above, - and the use of dest_bind in the function spanning case. */ - if (integerp(modifier)) { - val past = plus(c->pos, modifier); - - if (length_str_lt(c->dataline, past) || lt(past, c->pos)) - { - LOG_MISMATCH("fixed field size"); - return nil; - } - - if (!tree_find(trim_str(sub_str(c->dataline, c->pos, past)), - cdr(pair), equal_f)) - { - LOG_MISMATCH("fixed field contents"); - return nil; - } - - LOG_MATCH("fixed field", past); - c->pos = past; - c->specline = rest(c->specline); - } else { - c->specline = rlcp(cons(cdr(pair), c->specline), c->specline); - } - return repeat_spec_k; - } else if (consp(modifier) || regexp(modifier)) { /* var bound over text matched by form */ + if (consp(modifier) || regexp(modifier)) { + /* var bound over text matched by regex or function */ cons_bind (new_bindings, new_pos, match_line(ml_specline(*c, cons(modifier, nil)))); @@ -888,13 +855,12 @@ static val h_var(match_line_ctx *c) new_pos = minus(new_pos, c->base); - LOG_MATCH("var spanning form", new_pos); c->bindings = dest_bind(c->specline, new_bindings, sym, sub_str(c->dataline, c->pos, new_pos), equal_f); if (c->bindings == t) { - LOG_MISMATCH("function span mismatch"); + LOG_MISMATCH("span mismatch"); return nil; } @@ -912,8 +878,15 @@ static val h_var(match_line_ctx *c) return nil; } LOG_MATCH("count based var", past); - if (sym) - c->bindings = acons(sym, trim_str(sub_str(c->dataline, c->pos, past)), c->bindings); + + c->bindings = dest_bind(c->specline, c->bindings, sym, + trim_str(sub_str(c->dataline, c->pos, past)), + equal_f); + if (c->bindings == t) { + LOG_MISMATCH("count based mismatch"); + return nil; + } + c->pos = past; /* This may have another variable attached */ if (next) { @@ -923,6 +896,11 @@ static val h_var(match_line_ctx *c) } else if (modifier && modifier != t) { sem_error(elem, lit("invalid modifier ~s on variable ~s"), modifier, sym, nao); + } else if ((pair = if2(sym, tx_lookup_var(sym, c->bindings)))) { + /* Variable is not of the above types and has an existing binding, + * Just substitute its value into the spec stream and match. */ + c->specline = rlcp(cons(cdr(pair), c->specline), c->specline); + return repeat_spec_k; } else if (next == nil) { /* no modifier, no elem -> to end of line */ if (sym) c->bindings = acons(sym, sub_str(c->dataline, c->pos, nil), c->bindings); diff --git a/tests/010/span-var.txr b/tests/010/span-var.txr index b7142b79..036acc6a 100644 --- a/tests/010/span-var.txr +++ b/tests/010/span-var.txr @@ -17,12 +17,23 @@ @(bind d ("d")) @(cases) @ {d (fun "x" "a")} +@ {require (not "get here")} @(or) @ (require "get here") @(end) @(next :string "ab") @(cases) @ {d (fun2 "x" "a")} +@ {require (not "get here")} @(or) @ (require "get here") @(end) +@(bind n "123") +@(next :string "123456") +@(cases) +@ {n /\d+/} +@ {require (not "get here")} +@(or) +@ {m /\d+/} +@ (require (equal m "123456")) +@(end) diff --git a/txr.1 b/txr.1 index 76d69b38..8c40ac13 100644 --- a/txr.1 +++ b/txr.1 @@ -2495,9 +2495,8 @@ extends over all characters from the current position which match the regular expression .metn regex . (See the Regular Expressions section below.) -If the variable already has a value, then the regular expression is -ignored; the variable's value is used to match and extract text, which -may be different from what the regular expression would match. +If the variable already has a value, the text extracted by the regular +expression must exactly match the variable. In the .mono @@ -2547,7 +2546,7 @@ is all text within the specified field, but excluding leading and trailing whitespace. If the field contains only spaces, then an empty string is extracted. This fixed-field extraction takes place whether or not the variable already has a binding. If it already has a binding, then it must match -the extracted text. +the extracted, trimmed text. The .mono @@ -86697,7 +86696,15 @@ has a binding. The variable is then matched against the text spanned by the function to either give it a new binding or match the existing binding. The old behavior, restored by 272 or lower compatibility, is that the function is not invoked when the variable has a binding; the -variable's value is instead used to match text. +variable's value is instead used to match text. Lastly, a similar change +took place in positive match regular expression variables of the +.mono +.meti >> @{ bident <> / regex /} +.onom +form. +Prior to 273, when a variable of this form has an existing binding, the regex +is ignored, and the situation is treated as a match for the variable content. +This old behavior is also restored. .IP 265 Until \*(TX 265, the .code with-resources -- cgit v1.2.3