diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2011-11-16 09:03:47 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2011-11-16 09:03:47 -0800 |
commit | b86a599bbfcd591f64f31ddfc9ab1a659d39a7c0 (patch) | |
tree | 1c54b7ee8e74507a1df9a1ce84798cc8c8979e71 | |
parent | 741212c072063115a72ed27619ce136bebc3d696 (diff) | |
download | txr-b86a599bbfcd591f64f31ddfc9ab1a659d39a7c0.tar.gz txr-b86a599bbfcd591f64f31ddfc9ab1a659d39a7c0.tar.bz2 txr-b86a599bbfcd591f64f31ddfc9ab1a659d39a7c0.zip |
Variable matches can span over function calls.
Function calls following variables have searching semantics.
* match.c (ml_specline_pos, search_form): New static functions.
(h_var): Handle functions and regexes in a common way.
* parser.y: Adjusted precedence of IDENT and ( so that
@var@(func) are parsed into a single var element.
* txr.1: Documented.
-rw-r--r-- | ChangeLog | 13 | ||||
-rw-r--r-- | match.c | 78 | ||||
-rw-r--r-- | parser.y | 4 | ||||
-rw-r--r-- | txr.1 | 26 |
4 files changed, 88 insertions, 33 deletions
@@ -1,3 +1,16 @@ +2011-11-16 Kaz Kylheku <kaz@kylheku.com> + + Variable matches can span over function calls. + Function calls following variables have searching semantics. + + * match.c (ml_specline_pos, search_form): New static functions. + (h_var): Handle functions and regexes in a common way. + + * parser.y: Adjusted precedence of IDENT and ( so that + @var@(func) are parsed into a single var element. + + * txr.1: Documented. + 2011-11-15 Kaz Kylheku <kaz@kylheku.com> * txr.vim: Update for new character constant syntax. @@ -359,6 +359,15 @@ static match_line_ctx ml_specline(match_line_ctx c, val specline) return nc; } +static match_line_ctx ml_specline_pos(match_line_ctx c, val specline, val pos) +{ + match_line_ctx nc = c; + nc.specline = specline; + nc.pos = pos; + return nc; +} + + static match_line_ctx ml_bindings_specline(match_line_ctx c, val bindings, val specline) { @@ -391,21 +400,46 @@ typedef val (*h_match_func)(match_line_ctx c, match_line_ctx *cout); val elem_var = first(specline); \ val directive_var = first(elem_var) +static val search_form(match_line_ctx *c, val needle_form, val from_end) +{ + if (regexp(first(needle_form))) { + return search_regex(c->dataline, first(needle_form), c->pos, from_end); + } else { + val spec = cons(needle_form, nil); + val pos = from_end ? length_str(c->dataline) : c->pos; + val step = from_end ? num(-1) : num(1); + + rlcp(spec, needle_form); + + for (; (from_end && ge(pos, c->pos)) || length_str_gt(c->dataline, pos); + pos = plus(pos, step)) + { + cons_bind (new_bindings, new_pos, + match_line(ml_specline_pos(*c, spec, pos))); + if (new_pos) { + c->bindings = new_bindings; + return cons(pos, new_pos); + } + } + + return nil; + } +} + static val h_var(match_line_ctx c, match_line_ctx *cout) { val elem = first(c.specline); val sym = second(elem); val pat = third(elem); - val modifier = fourth(elem); + val modifiers = fourth(elem); + val modifier = first(modifiers); val pair = assoc(c.bindings, sym); /* var exists already? */ - if (gt(length(modifier), one)) { + if (gt(length(modifiers), one)) { sem_error(elem, lit("multiple modifiers on variable ~s"), sym, nao); } - modifier = car(modifier); - if (pair) { /* If the variable already has a binding, we replace it with its value, and treat it as a string match. @@ -441,15 +475,18 @@ static val h_var(match_line_ctx c, match_line_ctx *cout) rl(car(c.specline), loc); } goto repeat; - } else if (consp(modifier)) { /* regex variable */ - val past = match_regex(c.dataline, car(modifier), c.pos); - if (nullp(past)) { - LOG_MISMATCH("var positive regex"); + } else if (consp(modifier)) { /* var bound over text matched by form */ + cons_bind (new_bindings, new_pos, + match_line(ml_specline(c, modifiers))); + + if (!new_pos) { + LOG_MISMATCH("var spanning form"); return nil; } - LOG_MATCH("var positive regex", past); - c.bindings = acons(c.bindings, sym, sub_str(c.dataline, c.pos, past)); - c.pos = past; + + LOG_MATCH("var spanning form", new_pos); + c.bindings = acons(new_bindings, sym, sub_str(c.dataline, c.pos, new_pos)); + c.pos = new_pos; /* This may have another variable attached */ if (pat) { val loc = source_loc(c.specline); @@ -487,34 +524,33 @@ static val h_var(match_line_ctx c, match_line_ctx *cout) LOG_MATCH("var delimiting string", find); c.bindings = acons(c.bindings, sym, sub_str(c.dataline, c.pos, find)); c.pos = plus(find, length_str(pat)); - } else if (consp(pat) && regexp(first(pat))) { - val find = search_regex(c.dataline, first(pat), c.pos, modifier); + } else if (consp(pat) && first(pat) != var_s) { + val find = search_form(&c, pat, modifier); val fpos = car(find); val flen = cdr(find); if (!find) { - LOG_MISMATCH("var delimiting regex"); + LOG_MISMATCH("var delimiting form"); return nil; } - LOG_MATCH("var delimiting regex", fpos); + LOG_MATCH("var delimiting form", fpos); c.bindings = acons(c.bindings, sym, sub_str(c.dataline, c.pos, fpos)); c.pos = plus(fpos, flen); - } else if (consp(pat) && first(pat) == var_s) { + } else if (consp(pat)) { /* Unbound var followed by var: the following one must either be bound, or must specify a regex. */ val second_sym = second(pat); val next_pat = third(pat); - val next_modifier = fourth(pat); + val next_modifiers = fourth(pat); + val next_modifier = first(fourth(pat)); val pair = assoc(c.bindings, second_sym); /* var exists already? */ - if (gt(length(next_modifier), one)) { + if (gt(length(next_modifiers), one)) { sem_error(elem, lit("multiple modifiers on variable ~s"), second_sym, nao); } - next_modifier = car(next_modifier); - if (!pair && consp(next_modifier)) { - val find = search_regex(c.dataline, first(next_modifier), c.pos, modifier); + val find = search_form(&c, next_modifier, modifier); val fpos = car(find); val flen = cdr(find); @@ -92,8 +92,8 @@ static val parsed_spec; %nonassoc LOW /* used for precedence assertion */ %nonassoc ALL SOME NONE MAYBE CASES CHOOSE AND OR END COLLECT UNTIL COLL %nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY DEFINE -%nonassoc '[' ']' '(' ')' -%right IDENT SPACE TEXT NUMBER '{' '}' +%nonassoc '[' ']' +%right IDENT SPACE TEXT NUMBER '{' '}' '(' ')' %left '-' %left '|' '/' %left '&' @@ -555,6 +555,7 @@ ways: @*NAME @*{NAME} @{NAME /RE/} + @{NAME (FUN [ ARGS ... ])} @{NAME NUMBER} The forms with an * indicate a long match, see Longest Match below. @@ -580,8 +581,8 @@ everything from the current position to the end of the line. The extent of the matched text (the text bound to the variable) is determined by looking at what follows the variable. A variable may be followed by a piece -of text, a regular expression directive, another variable, or nothing (i.e. -occurs at the end of a line). +of text, a regular expression directive, a function call, a directive, another +variable, or nothing (i.e. occurs at the end of a line). If the variable is followed by nothing, the match extends from the current position in the data, to the end of the line. @@ -611,11 +612,10 @@ is " e f". This is found within the data "c d e f" at position 3 (counting from 0). So positions 0-2 ("c d") constitute the matching text which is bound to FOO. -If the variable is followed by a regular expression directive, -the extent is determined by finding the closest match for the -regular expression. (See Regular Expressions section below). - -To match successfully, +If the variable is followed by a regular expression directive or a function +call, the extent is determined by finding the closest match for the regular +expression or function call. (See Regular Expressions section below, and +FUNCTIONS.) .SS Special Symbols @@ -702,16 +702,22 @@ covers only the "b ", stopping at the first "cd" occurrence. The syntax variants @{NAME /RE/} + @{NAME (FUN [ARGS ...])} @{NAME NUMBER} specify a variable binding that is driven by a positive match derived -from a regular expression or character count, rather than from trailing -material (which may be regarded as a "negative" match, since the variable is -bound to material which is +from a regular expression, function or character count, rather than from +trailing material (which may be regarded as a "negative" match, since the +variable is bound to material which is .B skipped in order to match the trailing material). In the /RE/ form, the match extends over all characters from the current position which match the regular expression RE. (see Regular Expressions section below). +In the (FUN [ARGS ...]) form, the match extends over characters which +are matched by the call to the function, if the call +succeeds. Thus @{x (y z w)} is just like @(y z w), except that the region of +text skipped over by @(y z w) is also bound to the variable x. +See FUNCTIONS below. In the NUMBER form, the match processes a field of text which consists of the specified number of characters, which must be nonnegative |