summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2011-10-03 09:43:30 -0700
committerKaz Kylheku <kaz@kylheku.com>2011-10-03 09:43:30 -0700
commit53b4090be0ed161aadf9a7c36ebc896f3e131a1f (patch)
tree30e7554d93d4f0a45a64ab813092b30565791ecd
parentd1a8b58b39e131df4a9fdc748cda449c0447bd19 (diff)
downloadtxr-53b4090be0ed161aadf9a7c36ebc896f3e131a1f.tar.gz
txr-53b4090be0ed161aadf9a7c36ebc896f3e131a1f.tar.bz2
txr-53b4090be0ed161aadf9a7c36ebc896f3e131a1f.zip
Implemented new last clause for collect and coll.
Bugfix in cases inside coll: was not collecting bindings. Bugfix for until inside coll: was not seeing bindings from main clause. * lib.c (ldiff): New function. * lib.h (ldiff): Declared. * match.c (match_line): Implemented last clause. Fixed cases handling by moving misplaced termination check. (match_files): Implemented last clause. * parser.y (until_last): New nonterminal symbol. (collect_clause): Refactored syntax to support until and last. (elem): Likewise. * txr.1: Updated.
-rw-r--r--ChangeLog21
-rw-r--r--lib.c12
-rw-r--r--lib.h1
-rw-r--r--match.c55
-rw-r--r--parser.y19
-rw-r--r--txr.163
6 files changed, 138 insertions, 33 deletions
diff --git a/ChangeLog b/ChangeLog
index c2522163..2a377448 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,24 @@
+2011-10-03 Kaz Kylheku <kaz@kylheku.com>
+
+ Implemented new last clause for collect and coll.
+ Bugfix in cases inside coll: was not collecting bindings.
+ Bugfix for until inside coll: was not seeing bindings
+ from main clause.
+
+ * lib.c (ldiff): New function.
+
+ * lib.h (ldiff): Declared.
+
+ * match.c (match_line): Implemented last clause. Fixed cases
+ handling by moving misplaced termination check.
+ (match_files): Implemented last clause.
+
+ * parser.y (until_last): New nonterminal symbol.
+ (collect_clause): Refactored syntax to support until and last.
+ (elem): Likewise.
+
+ * txr.1: Updated.
+
2011-10-02 Kaz Kylheku <kaz@kylheku.com>
* parser.y (rep_elem): Bugfix: forgotten o_elems_transform on
diff --git a/lib.c b/lib.c
index 932fecde..29e661d2 100644
--- a/lib.c
+++ b/lib.c
@@ -354,6 +354,18 @@ val nappend2(val list1, val list2)
return list1;
}
+val ldiff(val list1, val list2)
+{
+ list_collect_decl (out, tail);
+
+ while (list1 && list1 != list2) {
+ list_collect (tail, car(list1));
+ list1 = cdr(list1);
+ }
+
+ return out;
+}
+
static val flatten_helper(val env, val item)
{
return flatten(item);
diff --git a/lib.h b/lib.h
index ae5bb79e..cf397751 100644
--- a/lib.h
+++ b/lib.h
@@ -270,6 +270,7 @@ val nreverse(val in);
val reverse(val in);
val append2(val list1, val list2);
val nappend2(val list1, val list2);
+val ldiff(val list1, val list2);
val flatten(val list);
val memq(val obj, val list);
val memqual(val obj, val list);
diff --git a/match.c b/match.c
index 7c73e0eb..592685f3 100644
--- a/match.c
+++ b/match.c
@@ -475,9 +475,10 @@ static val match_line(val bindings, val specline, val dataline,
pos = past;
} else if (directive == coll_s) {
val coll_specline = second(elem);
- val until_specline = third(elem);
+ val until_last_specline = third(elem);
val args = fourth(elem);
val bindings_coll = nil;
+ val last_bindings = nil;
val max = getplist(args, maxgap_k);
val min = getplist(args, mingap_k);
val gap = getplist(args, gap_k);
@@ -508,17 +509,22 @@ static val match_line(val bindings, val specline, val dataline,
match_line(bindings, coll_specline, dataline, pos,
spec_lineno, data_lineno, file));
- if (until_specline) {
- cons_bind (until_bindings, until_pos,
- match_line(bindings, until_specline, dataline, pos,
+ if (until_last_specline) {
+ cons_bind (sym, spec, until_last_specline);
+ cons_bind (until_last_bindings, until_pos,
+ match_line(new_bindings, spec,
+ dataline, pos,
spec_lineno, data_lineno, file));
if (until_pos) {
- (void) until_bindings;
- LOG_MATCH("until", until_pos);
+ LOG_MATCH("until/last", until_pos);
+ if (sym == last_s) {
+ last_bindings = ldiff(until_last_bindings, new_bindings);
+ pos = until_pos;
+ }
break;
} else {
- LOG_MISMATCH("until");
+ LOG_MISMATCH("until/last");
}
}
@@ -563,7 +569,7 @@ next_coll:
for (iter = bindings_coll; iter; iter = cdr(iter)) {
val pair = car(iter);
val rev = cons(car(pair), nreverse(cdr(pair)));
- bindings = cons(rev, bindings);
+ bindings = nappend2(last_bindings, cons(rev, bindings));
}
} else if (directive == all_s || directive == some_s ||
directive == none_s || directive == maybe_s ||
@@ -595,8 +601,6 @@ next_coll:
some_match = t;
if (gt(new_pos, max_pos))
max_pos = new_pos;
- if (directive == cases_s || directive == none_s)
- break;
if (directive == choose_s) {
val binding = choose_sym ? assoc(new_bindings, choose_sym) : nil;
val value = cdr(binding);
@@ -615,6 +619,8 @@ next_coll:
} else {
bindings = new_bindings;
}
+ if (directive == cases_s || directive == none_s)
+ break;
} else {
all_match = nil;
if (directive == all_s)
@@ -1477,9 +1483,10 @@ repeat_spec_same_data:
goto repeat_spec_same_data;
} else if (sym == collect_s) {
val coll_spec = second(first_spec);
- val until_spec = third(first_spec);
+ val until_last_spec = third(first_spec);
val args = fourth(first_spec);
val bindings_coll = nil;
+ val last_bindings = nil;
val max = getplist(args, maxgap_k);
val min = getplist(args, mingap_k);
val gap = getplist(args, gap_k);
@@ -1521,15 +1528,29 @@ repeat_spec_same_data:
match_files(coll_spec, files, bindings,
data, num(data_lineno)));
- /* Until clause sees un-collated bindings from collect. */
- if (until_spec)
+ /* Until/last clause sees un-collated bindings from collect. */
+ if (until_last_spec)
{
- cons_bind (discarded_bindings, success,
- match_files(until_spec, files, new_bindings,
+ cons_bind (sym, spec, until_last_spec);
+ cons_bind (until_last_bindings, success,
+ match_files(spec, files, new_bindings,
data, num(data_lineno)));
if (success) {
- (void) discarded_bindings;
+ debuglf(spec_linenum, lit("until/last matched ~a:~a"),
+ first(files), num(data_lineno), nao);
+ /* Until discards bindings and position, last keeps them. */
+ if (sym == last_s) {
+ last_bindings = ldiff(until_last_bindings, new_bindings);
+
+ if (success == t) {
+ data = t;
+ } else {
+ cons_bind (new_data, new_line, success);
+ data = new_data;
+ data_lineno = c_num(new_line);
+ }
+ }
break;
}
}
@@ -1603,6 +1624,8 @@ repeat_spec_same_data:
bindings = cons(rev, bindings);
}
+ bindings = nappend2(last_bindings, bindings);
+
if ((spec = rest(spec)) == nil)
break;
diff --git a/parser.y b/parser.y
index d767be50..24caac87 100644
--- a/parser.y
+++ b/parser.y
@@ -67,7 +67,7 @@ static val parsed_spec;
%type <val> spec clauses clauses_opt clause
%type <val> all_clause some_clause none_clause maybe_clause
-%type <val> cases_clause choose_clause collect_clause
+%type <val> cases_clause choose_clause collect_clause until_last
%type <val> clause_parts additional_parts
%type <val> output_clause define_clause try_clause catch_clauses_opt
%type <val> line elems_opt elems clause_parts_h additional_parts_h
@@ -179,18 +179,24 @@ collect_clause : COLLECT exprs_opt ')' newl
$5, nil, $2,
nao); }
| COLLECT exprs_opt ')'
- newl clauses
- UNTIL newl clauses END newl { $$ = list(collect_s, $5,
- $8, $2, nao); }
+ newl clauses until_last
+ newl clauses END newl { $$ = list(collect_s, $5,
+ cons($6, $8), $2, nao); }
| COLLECT exprs_opt ')'
newl error { $$ = nil;
- if (yychar == UNTIL || yychar == END)
+ if (yychar == UNTIL ||
+ yychar == END ||
+ yychar == LAST)
yyerror("empty collect");
else
yybadtoken(yychar,
lit("collect clause")); }
;
+until_last : UNTIL { $$ = until_s; }
+ | LAST { $$ = last_s; }
+ ;
+
clause_parts : clauses additional_parts { $$ = cons($1, $2); }
;
@@ -219,7 +225,8 @@ elem : TEXT { $$ = string_own($1); }
rest($1)); }
| COLL exprs_opt ')' elems END { $$ = list(coll_s, $4, nil, $2, nao); }
| COLL exprs_opt ')' elems
- UNTIL elems END { $$ = list(coll_s, $4, $6, $2, nao); }
+ until_last elems END { $$ = list(coll_s, $4, cons($5, $6),
+ $2, nao); }
| COLL error { $$ = nil;
yybadtoken(yychar, lit("coll clause")); }
| ALL clause_parts_h { $$ = list(all_s, t, $2, nao); }
diff --git a/txr.1 b/txr.1
index 8ce65daf..f3bd2cd5 100644
--- a/txr.1
+++ b/txr.1
@@ -1469,7 +1469,7 @@ The syntax of the collect directive is:
... lines of subquery
@(end)
-or with an until clause:
+or with an until or last clause:
@(collect)
... lines of subquery: main clause
@@ -1477,6 +1477,11 @@ or with an until clause:
... lines of subquery: until clause
@(end)
+ @(collect)
+ ... lines of subquery: main clause
+ @(last)
+ ... lines of subquery: last clause
+ @(end)
The subquery is matched repeatedly, starting at the current line.
If it fails to match, it is tried starting at the subsequent line.
@@ -1485,18 +1490,25 @@ entire extent of matched data, if there is one. Thus, the collected regions do
not overlap.
The collect as a whole always succeeds, even if the subquery does not match at
-any position, and even if the until clause does not match. That is to say, a
-query will never fail for the reason that a collect didn't collect anything.
+any position, and even if the until/last clause does not match. That is to say,
+a query will never fail for the reason that a collect didn't collect anything.
-If no until clause is specified, the collect is unbounded. It consumes the entire data file. If any query material follows such the collect clause, it will
-fail if it tries to match anything in the current file; but of course, it
+If no until/last clause is specified, the collect is unbounded. It consumes the
+entire data file. If any query material follows such the collect clause, it
+will fail if it tries to match anything in the current file; but of course, it
is possible to continue matching in another file by means of @(next).
-If an until clause is specified, the collection stops when that clause matches
-at the current position. When an until clause matches at a position,
-no bindings are collected at that position, even if the main clause
-matches at that position also. Moreover, the position is not advanced.
-The remainder of the query begins matching at that position.
+If an until/last clause is specified, the collection stops when that clause
+matches at the current position.
+
+If it is an until clause, no bindings are collected at that position, even if
+the main clause matches at that position also. Moreover, the position is not
+advanced. The remainder of the query begins matching at that position.
+
+If it is a last clause matches, the behavior is different. Any bindings
+captured by the main clause thrown away, just like with the until clause.
+However, the bindings in the last clause itself survive, and the position
+is advanced to skip over that material.
Example:
@@ -1504,7 +1516,9 @@ Example:
@a
@(until)
42
+ @b
@(end)
+ @c
Data: 1
2
@@ -1516,8 +1530,22 @@ Example:
Output: a[0]="1"
a[1]="2"
a[2]="3"
+ c="42"
+
+The line 42 is not collected, even though it matches @a. Furthermore,
+the until does not advance the position, so variable c takes 42.
-The line 42 is not collected, even though it matches @a.
+If the @(until) is changed to @(last) the output will be different:
+
+ Output: a[0]="1"
+ a[1]="2"
+ a[2]="3"
+ b=5
+ c=6
+
+The 42 is not collected into the a list, just like before. But now
+the binding captured by @b emerges. Furthermore, the position advances
+so variable now takes 6.
The binding variables within the clause of a collect are treated specially.
The multiple matches for each variable are collected into lists,
@@ -1646,6 +1674,8 @@ follows the last character of the match, if such a position exists.
If not bounded by an until clause, it will exhaust the entire line. If the
until clause matches, then the collection stops at that position,
and any bindings from that iteration are discarded.
+Like collect, coll also supports a last clause, which propagates varaible
+bindings and advances the position.
Coll clauses nest, and variables bound within a coll are available to within
the rest of the coll clause, including the until clause, and appear as single
@@ -1728,6 +1758,17 @@ Note that the @(end) is followed by a semicolon. That's because
when the @(until) clause meets a match, the matching material
is not consumed.
+Instead of regular expression hacks, this problem can be nicely
+solved with cases:
+
+ pattern: @(coll)@(cases)@a @(or)@a@(end)@(end)
+ data: 1 2 3 4 5
+ result: a[0]="1"
+ a[1]="2"
+ a[2]="3"
+ a[3]="4"
+ a[4]="5"
+
.SS Coll Keyword Parameters
The @(coll) directive takes most of the same parameters as @(collect).