summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog13
-rw-r--r--match.c98
-rw-r--r--parser.l4
-rw-r--r--parser.y6
-rw-r--r--txr.19
5 files changed, 92 insertions, 38 deletions
diff --git a/ChangeLog b/ChangeLog
index adb6e2d1..3d0554d2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2011-09-29 Kaz Kylheku <kaz@kylheku.com>
+
+ * match.c (chars_k): New variable.
+ (match_line): Keyword arguments in coll implemented.
+ (match_init): chars_k variable initialized.
+
+ * parser.l (COLL): Lexical syntax changed to allow for
+ argument material.
+
+ * parser.y (elem): Coll syntax rewritten for arguments.
+
+ * txr.1: Updated.
+
2011-09-28 Kaz Kylheku <kaz@kylheku.com>
* match.c (mingap_k, maxgap_k, gap_k, times_k, lines_k): New
diff --git a/match.c b/match.c
index 53177865..68f05600 100644
--- a/match.c
+++ b/match.c
@@ -47,7 +47,7 @@
int output_produced;
-val mingap_k, maxgap_k, gap_k, times_k, lines_k;
+val mingap_k, maxgap_k, gap_k, times_k, lines_k, chars_k;
static void debugf(val fmt, ...)
{
@@ -475,53 +475,84 @@ static val match_line(val bindings, val specline, val dataline,
} else if (directive == coll_s) {
val coll_specline = second(elem);
val until_specline = third(elem);
+ val args = fourth(elem);
val bindings_coll = nil;
+ val max = getplist(args, maxgap_k);
+ val min = getplist(args, mingap_k);
+ val gap = getplist(args, gap_k);
+ val times = getplist(args, times_k);
+ val chars = getplist(args, chars_k);
+ cnum cmax = nump(gap) ? c_num(gap) : (nump(max) ? c_num(max) : 0);
+ cnum cmin = nump(gap) ? c_num(gap) : (nump(min) ? c_num(min) : 0);
+ cnum mincounter = cmin, maxcounter = 0;
+ cnum timescounter = 0, charscounter = 0;
+ cnum ctimes = nump(times) ? c_num(times) : 0;
+ cnum cchars = nump(chars) ? c_num(chars) : 0;
val iter;
+ if ((times && ctimes == 0) || (chars && cchars == 0))
+ break;
+
for (;;) {
- cons_bind (new_bindings, new_pos,
- match_line(bindings, coll_specline, dataline, pos,
- spec_lineno, data_lineno, file));
+ if ((gap || min) && mincounter < cmin)
+ goto next_coll;
+
+ if (chars && charscounter++ >= cchars)
+ break;
- if (until_specline) {
- cons_bind (until_bindings, until_pos,
- match_line(bindings, until_specline, dataline, pos,
+ {
+ cons_bind (new_bindings, new_pos,
+ match_line(bindings, coll_specline, dataline, pos,
spec_lineno, data_lineno, file));
- if (until_pos) {
- (void) until_bindings;
- LOG_MATCH("until", until_pos);
- break;
- } else {
- LOG_MISMATCH("until");
+ if (until_specline) {
+ cons_bind (until_bindings, until_pos,
+ match_line(bindings, until_specline, dataline, pos,
+ spec_lineno, data_lineno, file));
+
+ if (until_pos) {
+ (void) until_bindings;
+ LOG_MATCH("until", until_pos);
+ break;
+ } else {
+ LOG_MISMATCH("until");
+ }
}
- }
- if (new_pos) {
- LOG_MATCH("coll", new_pos);
+ if (new_pos) {
+ LOG_MATCH("coll", new_pos);
- for (iter = new_bindings; iter && iter != bindings;
- iter = cdr(iter))
- {
- val binding = car(iter);
- val existing = assoc(bindings_coll, car(binding));
+ for (iter = new_bindings; iter && iter != bindings;
+ iter = cdr(iter))
+ {
+ val binding = car(iter);
+ val existing = assoc(bindings_coll, car(binding));
- bindings_coll = acons_new(bindings_coll, car(binding),
- cons(cdr(binding), cdr(existing)));
+ bindings_coll = acons_new(bindings_coll, car(binding),
+ cons(cdr(binding), cdr(existing)));
+ }
}
- }
- if (new_pos && !equal(new_pos, pos)) {
- pos = new_pos;
- bug_unless (length_str_ge(dataline, pos));
- } else {
- pos = plus(pos, one);
- }
+ if (new_pos && !equal(new_pos, pos)) {
+ pos = new_pos;
+ bug_unless (length_str_ge(dataline, pos));
- if (length_str_le(dataline, pos))
- break;
- }
+ if (times && ++timescounter >= ctimes)
+ break;
+ mincounter = 0;
+ maxcounter = 0;
+ } else {
+next_coll:
+ mincounter++;
+ if ((gap || max) && ++maxcounter > cmax)
+ break;
+ pos = plus(pos, one);
+ }
+ if (length_str_le(dataline, pos))
+ break;
+ }
+ }
if (!bindings_coll)
debuglf(spec_lineno, lit("nothing was collected"), nao);
@@ -1960,4 +1991,5 @@ void match_init(void)
gap_k = intern(lit("gap"), keyword_package);
times_k = intern(lit("times"), keyword_package);
lines_k = intern(lit("lines"), keyword_package);
+ chars_k = intern(lit("chars"), keyword_package);
}
diff --git a/parser.l b/parser.l
index 9f40dec8..991df970 100644
--- a/parser.l
+++ b/parser.l
@@ -275,8 +275,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return COLLECT;
}
-<SPECIAL>\({WS}coll{WS}\) {
- yy_pop_state();
+<SPECIAL>\({WS}coll/{ID_END} {
+ yy_push_state(NESTED);
return COLL;
}
diff --git a/parser.y b/parser.y
index 0972c100..17ad883a 100644
--- a/parser.y
+++ b/parser.y
@@ -201,9 +201,9 @@ elem : TEXT { $$ = string_own($1); }
| list { $$ = $1; }
| regex { $$ = cons(regex_compile(rest($1)),
rest($1)); }
- | COLL elems END { $$ = list(coll_s, $2, nao); }
- | COLL elems
- UNTIL elems END { $$ = list(coll_s, $2, $4, nao); }
+ | COLL exprs_opt ')' elems END { $$ = list(coll_s, $4, nil, $2, nao); }
+ | COLL exprs_opt ')' elems
+ UNTIL elems END { $$ = list(coll_s, $4, $6, $2, nao); }
| COLL error { $$ = nil;
yybadtoken(yychar, lit("coll clause")); }
;
diff --git a/txr.1 b/txr.1
index 1caadf71..0a5bcad1 100644
--- a/txr.1
+++ b/txr.1
@@ -1643,6 +1643,15 @@ Note that the @(end) is followed by a semicolon. That's because
when the @(until) clause meets a match, the matching material
is not consumed.
+.SS Coll Keyword Parameters
+
+The @(coll) directive takes most of the same parameters as @(collect).
+See the section Collect Keyword Parameters above.
+So for instance @(coll :gap 0) means that the collects must be
+consecutive, and @(coll :times 2) means that (at most) two matches
+will be collected. The :lines keyword does not exist, but there is
+an analogous :chars keyword.
+
.SS The Flatten Directive.
The flatten directive can be used to convert variables to one dimensional