summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2011-11-16 09:03:47 -0800
committerKaz Kylheku <kaz@kylheku.com>2011-11-16 09:03:47 -0800
commitb86a599bbfcd591f64f31ddfc9ab1a659d39a7c0 (patch)
tree1c54b7ee8e74507a1df9a1ce84798cc8c8979e71
parent741212c072063115a72ed27619ce136bebc3d696 (diff)
downloadtxr-b86a599bbfcd591f64f31ddfc9ab1a659d39a7c0.tar.gz
txr-b86a599bbfcd591f64f31ddfc9ab1a659d39a7c0.tar.bz2
txr-b86a599bbfcd591f64f31ddfc9ab1a659d39a7c0.zip
Variable matches can span over function calls.
Function calls following variables have searching semantics. * match.c (ml_specline_pos, search_form): New static functions. (h_var): Handle functions and regexes in a common way. * parser.y: Adjusted precedence of IDENT and ( so that @var@(func) are parsed into a single var element. * txr.1: Documented.
-rw-r--r--ChangeLog13
-rw-r--r--match.c78
-rw-r--r--parser.y4
-rw-r--r--txr.126
4 files changed, 88 insertions, 33 deletions
diff --git a/ChangeLog b/ChangeLog
index 819027e6..8440671d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2011-11-16 Kaz Kylheku <kaz@kylheku.com>
+
+ Variable matches can span over function calls.
+ Function calls following variables have searching semantics.
+
+ * match.c (ml_specline_pos, search_form): New static functions.
+ (h_var): Handle functions and regexes in a common way.
+
+ * parser.y: Adjusted precedence of IDENT and ( so that
+ @var@(func) are parsed into a single var element.
+
+ * txr.1: Documented.
+
2011-11-15 Kaz Kylheku <kaz@kylheku.com>
* txr.vim: Update for new character constant syntax.
diff --git a/match.c b/match.c
index 6650bac1..3b501e55 100644
--- a/match.c
+++ b/match.c
@@ -359,6 +359,15 @@ static match_line_ctx ml_specline(match_line_ctx c, val specline)
return nc;
}
+static match_line_ctx ml_specline_pos(match_line_ctx c, val specline, val pos)
+{
+ match_line_ctx nc = c;
+ nc.specline = specline;
+ nc.pos = pos;
+ return nc;
+}
+
+
static match_line_ctx ml_bindings_specline(match_line_ctx c, val bindings,
val specline)
{
@@ -391,21 +400,46 @@ typedef val (*h_match_func)(match_line_ctx c, match_line_ctx *cout);
val elem_var = first(specline); \
val directive_var = first(elem_var)
+static val search_form(match_line_ctx *c, val needle_form, val from_end)
+{
+ if (regexp(first(needle_form))) {
+ return search_regex(c->dataline, first(needle_form), c->pos, from_end);
+ } else {
+ val spec = cons(needle_form, nil);
+ val pos = from_end ? length_str(c->dataline) : c->pos;
+ val step = from_end ? num(-1) : num(1);
+
+ rlcp(spec, needle_form);
+
+ for (; (from_end && ge(pos, c->pos)) || length_str_gt(c->dataline, pos);
+ pos = plus(pos, step))
+ {
+ cons_bind (new_bindings, new_pos,
+ match_line(ml_specline_pos(*c, spec, pos)));
+ if (new_pos) {
+ c->bindings = new_bindings;
+ return cons(pos, new_pos);
+ }
+ }
+
+ return nil;
+ }
+}
+
static val h_var(match_line_ctx c, match_line_ctx *cout)
{
val elem = first(c.specline);
val sym = second(elem);
val pat = third(elem);
- val modifier = fourth(elem);
+ val modifiers = fourth(elem);
+ val modifier = first(modifiers);
val pair = assoc(c.bindings, sym); /* var exists already? */
- if (gt(length(modifier), one)) {
+ if (gt(length(modifiers), one)) {
sem_error(elem, lit("multiple modifiers on variable ~s"),
sym, nao);
}
- modifier = car(modifier);
-
if (pair) {
/* If the variable already has a binding, we replace
it with its value, and treat it as a string match.
@@ -441,15 +475,18 @@ static val h_var(match_line_ctx c, match_line_ctx *cout)
rl(car(c.specline), loc);
}
goto repeat;
- } else if (consp(modifier)) { /* regex variable */
- val past = match_regex(c.dataline, car(modifier), c.pos);
- if (nullp(past)) {
- LOG_MISMATCH("var positive regex");
+ } else if (consp(modifier)) { /* var bound over text matched by form */
+ cons_bind (new_bindings, new_pos,
+ match_line(ml_specline(c, modifiers)));
+
+ if (!new_pos) {
+ LOG_MISMATCH("var spanning form");
return nil;
}
- LOG_MATCH("var positive regex", past);
- c.bindings = acons(c.bindings, sym, sub_str(c.dataline, c.pos, past));
- c.pos = past;
+
+ LOG_MATCH("var spanning form", new_pos);
+ c.bindings = acons(new_bindings, sym, sub_str(c.dataline, c.pos, new_pos));
+ c.pos = new_pos;
/* This may have another variable attached */
if (pat) {
val loc = source_loc(c.specline);
@@ -487,34 +524,33 @@ static val h_var(match_line_ctx c, match_line_ctx *cout)
LOG_MATCH("var delimiting string", find);
c.bindings = acons(c.bindings, sym, sub_str(c.dataline, c.pos, find));
c.pos = plus(find, length_str(pat));
- } else if (consp(pat) && regexp(first(pat))) {
- val find = search_regex(c.dataline, first(pat), c.pos, modifier);
+ } else if (consp(pat) && first(pat) != var_s) {
+ val find = search_form(&c, pat, modifier);
val fpos = car(find);
val flen = cdr(find);
if (!find) {
- LOG_MISMATCH("var delimiting regex");
+ LOG_MISMATCH("var delimiting form");
return nil;
}
- LOG_MATCH("var delimiting regex", fpos);
+ LOG_MATCH("var delimiting form", fpos);
c.bindings = acons(c.bindings, sym, sub_str(c.dataline, c.pos, fpos));
c.pos = plus(fpos, flen);
- } else if (consp(pat) && first(pat) == var_s) {
+ } else if (consp(pat)) {
/* Unbound var followed by var: the following one must either
be bound, or must specify a regex. */
val second_sym = second(pat);
val next_pat = third(pat);
- val next_modifier = fourth(pat);
+ val next_modifiers = fourth(pat);
+ val next_modifier = first(fourth(pat));
val pair = assoc(c.bindings, second_sym); /* var exists already? */
- if (gt(length(next_modifier), one)) {
+ if (gt(length(next_modifiers), one)) {
sem_error(elem, lit("multiple modifiers on variable ~s"),
second_sym, nao);
}
- next_modifier = car(next_modifier);
-
if (!pair && consp(next_modifier)) {
- val find = search_regex(c.dataline, first(next_modifier), c.pos, modifier);
+ val find = search_form(&c, next_modifier, modifier);
val fpos = car(find);
val flen = cdr(find);
diff --git a/parser.y b/parser.y
index d2ebffd7..846763d0 100644
--- a/parser.y
+++ b/parser.y
@@ -92,8 +92,8 @@ static val parsed_spec;
%nonassoc LOW /* used for precedence assertion */
%nonassoc ALL SOME NONE MAYBE CASES CHOOSE AND OR END COLLECT UNTIL COLL
%nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY DEFINE
-%nonassoc '[' ']' '(' ')'
-%right IDENT SPACE TEXT NUMBER '{' '}'
+%nonassoc '[' ']'
+%right IDENT SPACE TEXT NUMBER '{' '}' '(' ')'
%left '-'
%left '|' '/'
%left '&'
diff --git a/txr.1 b/txr.1
index 32de2aa5..8d5c0f5a 100644
--- a/txr.1
+++ b/txr.1
@@ -555,6 +555,7 @@ ways:
@*NAME
@*{NAME}
@{NAME /RE/}
+ @{NAME (FUN [ ARGS ... ])}
@{NAME NUMBER}
The forms with an * indicate a long match, see Longest Match below.
@@ -580,8 +581,8 @@ everything from the current position to the end of the line.
The extent of the matched text (the text bound to the variable) is determined
by looking at what follows the variable. A variable may be followed by a piece
-of text, a regular expression directive, another variable, or nothing (i.e.
-occurs at the end of a line).
+of text, a regular expression directive, a function call, a directive, another
+variable, or nothing (i.e. occurs at the end of a line).
If the variable is followed by nothing, the
match extends from the current position in the data, to the end of the line.
@@ -611,11 +612,10 @@ is " e f". This is found within the data "c d e f" at position 3
(counting from 0). So positions 0-2 ("c d") constitute the matching
text which is bound to FOO.
-If the variable is followed by a regular expression directive,
-the extent is determined by finding the closest match for the
-regular expression. (See Regular Expressions section below).
-
-To match successfully,
+If the variable is followed by a regular expression directive or a function
+call, the extent is determined by finding the closest match for the regular
+expression or function call. (See Regular Expressions section below, and
+FUNCTIONS.)
.SS Special Symbols
@@ -702,16 +702,22 @@ covers only the "b ", stopping at the first "cd" occurrence.
The syntax variants
@{NAME /RE/}
+ @{NAME (FUN [ARGS ...])}
@{NAME NUMBER}
specify a variable binding that is driven by a positive match derived
-from a regular expression or character count, rather than from trailing
-material (which may be regarded as a "negative" match, since the variable is
-bound to material which is
+from a regular expression, function or character count, rather than from
+trailing material (which may be regarded as a "negative" match, since the
+variable is bound to material which is
.B skipped
in order to match the trailing material). In the /RE/ form, the match
extends over all characters from the current position which match
the regular expression RE. (see Regular Expressions section below).
+In the (FUN [ARGS ...]) form, the match extends over characters which
+are matched by the call to the function, if the call
+succeeds. Thus @{x (y z w)} is just like @(y z w), except that the region of
+text skipped over by @(y z w) is also bound to the variable x.
+See FUNCTIONS below.
In the NUMBER form, the match processes a field of text which
consists of the specified number of characters, which must be nonnegative