summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog18
-rw-r--r--match.c132
-rw-r--r--match.h1
-rw-r--r--parser.l4
-rw-r--r--parser.y45
-rw-r--r--txr.138
-rw-r--r--txr.c1
7 files changed, 177 insertions, 62 deletions
diff --git a/ChangeLog b/ChangeLog
index 868df28e..adb6e2d1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,23 @@
2011-09-28 Kaz Kylheku <kaz@kylheku.com>
+ * match.c (mingap_k, maxgap_k, gap_k, times_k, lines_k): New
+ symbol variables.
+ (match_lines): Keyword arguments in collect implemented.
+ (match_init): New function.
+
+ * match.h (match_init): Declared.
+
+ * parser.l (COLLECT): Lexical syntax changed for COLLECT to
+ allow for argument material.
+
+ * parser.y (%union): obj renamed to val.
+ (exprs_opt): New nonterminal.
+ (collect_clause): Rewritten for arguments.
+
+ * txr.c (main): Call to match_init introduced.
+
+2011-09-28 Kaz Kylheku <kaz@kylheku.com>
+
* match.c (match_line): Bugfix in double var. Do not
prepend the next_pat to the specline if it is nil.
diff --git a/match.c b/match.c
index 81d7df95..53177865 100644
--- a/match.c
+++ b/match.c
@@ -47,6 +47,8 @@
int output_produced;
+val mingap_k, maxgap_k, gap_k, times_k, lines_k;
+
static void debugf(val fmt, ...)
{
if (opt_loglevel >= 2) {
@@ -1306,71 +1308,110 @@ repeat_spec_same_data:
} else if (sym == collect_s) {
val coll_spec = second(first_spec);
val until_spec = third(first_spec);
+ val args = fourth(first_spec);
val bindings_coll = nil;
+ val max = getplist(args, maxgap_k);
+ val min = getplist(args, mingap_k);
+ val gap = getplist(args, gap_k);
+ val times = getplist(args, times_k);
+ val lines = getplist(args, lines_k);
+ cnum cmax = nump(gap) ? c_num(gap) : (nump(max) ? c_num(max) : 0);
+ cnum cmin = nump(gap) ? c_num(gap) : (nump(min) ? c_num(min) : 0);
+ cnum mincounter = cmin, maxcounter = 0;
+ cnum timescounter = 0, linescounter = 0;
+ cnum ctimes = nump(times) ? c_num(times) : 0;
+ cnum clines = nump(lines) ? c_num(lines) : 0;
val iter;
+ if (gap && (max || min))
+ sem_error(spec_linenum, lit("collect: cannot mix :gap with :mingap or :maxgap"), nao);
+
+ if ((times && ctimes == 0) || (lines && clines == 0)) {
+ if ((spec = rest(spec)) == nil)
+ break;
+
+ goto repeat_spec_same_data;
+ }
+
uw_block_begin(nil, result);
result = t;
while (data) {
- cons_bind (new_bindings, success,
- match_files(coll_spec, files, bindings,
- data, num(data_lineno)));
+ if ((gap || min) && mincounter < cmin)
+ goto next_collect;
+
+ if (lines && linescounter++ >= clines)
+ break;
- /* Until clause sees un-collated bindings from collect. */
- if (until_spec)
{
- cons_bind (discarded_bindings, success,
- match_files(until_spec, files, new_bindings,
+ cons_bind (new_bindings, success,
+ match_files(coll_spec, files, bindings,
data, num(data_lineno)));
- if (success) {
- (void) discarded_bindings;
- break;
+ /* Until clause sees un-collated bindings from collect. */
+ if (until_spec)
+ {
+ cons_bind (discarded_bindings, success,
+ match_files(until_spec, files, new_bindings,
+ data, num(data_lineno)));
+
+ if (success) {
+ (void) discarded_bindings;
+ break;
+ }
}
- }
- if (success) {
- debuglf(spec_linenum, lit("collect matched ~a:~a"),
- first(files), num(data_lineno), nao);
+ if (success) {
+ debuglf(spec_linenum, lit("collect matched ~a:~a"),
+ first(files), num(data_lineno), nao);
- for (iter = new_bindings; iter && iter != bindings;
- iter = cdr(iter))
- {
- val binding = car(iter);
- val existing = assoc(bindings_coll, car(binding));
+ for (iter = new_bindings; iter && iter != bindings;
+ iter = cdr(iter))
+ {
+ val binding = car(iter);
+ val existing = assoc(bindings_coll, car(binding));
- bindings_coll = acons_new(bindings_coll, car(binding),
- cons(cdr(binding), cdr(existing)));
+ bindings_coll = acons_new(bindings_coll, car(binding),
+ cons(cdr(binding), cdr(existing)));
+ }
}
- }
- if (success) {
- if (consp(success)) {
- cons_bind (new_data, new_line, success);
- cnum new_lineno = c_num(new_line);
+ if (success) {
+ if (consp(success)) {
+ cons_bind (new_data, new_line, success);
+ cnum new_lineno = c_num(new_line);
- bug_unless (new_lineno >= data_lineno);
+ bug_unless (new_lineno >= data_lineno);
- if (new_lineno == data_lineno) {
- new_data = cdr(new_data);
- new_lineno++;
- }
+ if (new_lineno == data_lineno) {
+ new_data = cdr(new_data);
+ new_lineno++;
+ }
- debuglf(spec_linenum, lit("collect advancing from line ~a to ~a"),
- num(data_lineno), num(new_lineno), nao);
+ debuglf(spec_linenum, lit("collect advancing from line ~a to ~a"),
+ num(data_lineno), num(new_lineno), nao);
- data = new_data;
- data_lineno = new_lineno;
- *car_l(success) = nil;
+ data = new_data;
+ data_lineno = new_lineno;
+ *car_l(success) = nil;
+
+ if (times && ++timescounter >= ctimes)
+ break;
+ } else {
+ debuglf(spec_linenum, lit("collect consumed entire file"), nao);
+ data = nil;
+ }
+ mincounter = 0;
+ maxcounter = 0;
} else {
- debuglf(spec_linenum, lit("collect consumed entire file"), nao);
- data = nil;
+ next_collect:
+ mincounter++;
+ if ((gap || max) && ++maxcounter > cmax)
+ break;
+ data_lineno++;
+ data = rest(data);
}
- } else {
- data = rest(data);
- data_lineno++;
}
}
@@ -1911,3 +1952,12 @@ int extract(val spec, val files, val predefined_bindings)
return success ? 0 : EXIT_FAILURE;
}
+
+void match_init(void)
+{
+ mingap_k = intern(lit("mingap"), keyword_package);
+ maxgap_k = intern(lit("maxgap"), keyword_package);
+ gap_k = intern(lit("gap"), keyword_package);
+ times_k = intern(lit("times"), keyword_package);
+ lines_k = intern(lit("lines"), keyword_package);
+}
diff --git a/match.h b/match.h
index 7d49c48d..f343e654 100644
--- a/match.h
+++ b/match.h
@@ -24,4 +24,5 @@
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
+void match_init(void);
int extract(val spec, val filenames, val bindings);
diff --git a/parser.l b/parser.l
index 0fd39c5b..9f40dec8 100644
--- a/parser.l
+++ b/parser.l
@@ -270,8 +270,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return END;
}
-<SPECIAL>\({WS}collect{WS}\) {
- yy_pop_state();
+<SPECIAL>\({WS}collect/{ID_END} {
+ yy_push_state(NESTED);
return COLLECT;
}
diff --git a/parser.y b/parser.y
index 4d213caa..0972c100 100644
--- a/parser.y
+++ b/parser.y
@@ -52,7 +52,7 @@ static val parsed_spec;
%union {
wchar_t *lexeme;
- union obj *obj;
+ union obj *val;
wchar_t chr;
cnum num;
}
@@ -63,17 +63,17 @@ static val parsed_spec;
%token <num> NUMBER
%token <chr> REGCHAR LITCHAR
-%type <obj> spec clauses clauses_opt clause
-%type <obj> all_clause some_clause none_clause maybe_clause
-%type <obj> cases_clause collect_clause clause_parts additional_parts
-%type <obj> output_clause define_clause try_clause catch_clauses_opt
-%type <obj> line elems_opt elems elem var var_op
-%type <obj> list exprs expr out_clauses out_clauses_opt out_clause
-%type <obj> repeat_clause repeat_parts_opt o_line
-%type <obj> o_elems_opt o_elems_opt2 o_elems o_elem rep_elem rep_parts_opt
-%type <obj> regex regexpr regbranch
-%type <obj> regterm regclass regclassterm regrange
-%type <obj> strlit chrlit quasilit quasi_items quasi_item litchars
+%type <val> spec clauses clauses_opt clause
+%type <val> all_clause some_clause none_clause maybe_clause
+%type <val> cases_clause collect_clause clause_parts additional_parts
+%type <val> output_clause define_clause try_clause catch_clauses_opt
+%type <val> line elems_opt elems elem var var_op
+%type <val> list exprs exprs_opt expr out_clauses out_clauses_opt out_clause
+%type <val> repeat_clause repeat_parts_opt o_line
+%type <val> o_elems_opt o_elems_opt2 o_elems o_elem rep_elem rep_parts_opt
+%type <val> regex regexpr regbranch
+%type <val> regterm regclass regclassterm regrange
+%type <val> strlit chrlit quasilit quasi_items quasi_item litchars
%type <chr> regchar
%nonassoc LOW /* used for precedence assertion */
%nonassoc ALL SOME NONE MAYBE CASES AND OR END COLLECT UNTIL COLL
@@ -158,12 +158,16 @@ cases_clause : CASES newl clause_parts { $$ = cons(cases_s, $3); }
yyerror("empty cases clause"); }
;
-collect_clause : COLLECT newl clauses END newl { $$ = list(collect_s,
- $3, nao); }
- | COLLECT newl clauses
- UNTIL newl clauses END newl { $$ = list(collect_s, $3,
- $6, nao); }
- | COLLECT newl error { $$ = nil;
+collect_clause : COLLECT exprs_opt ')' newl
+ clauses END newl { $$ = list(collect_s,
+ $5, nil, $2,
+ nao); }
+ | COLLECT exprs_opt ')'
+ newl clauses
+ UNTIL newl clauses END newl { $$ = list(collect_s, $5,
+ $8, $2, nao); }
+ | COLLECT exprs_opt ')'
+ newl error { $$ = nil;
if (yychar == UNTIL || yychar == END)
yyerror("empty collect");
else
@@ -444,6 +448,10 @@ exprs : expr { $$ = cons($1, nil); }
| expr '.' expr { $$ = cons($1, $3); }
;
+exprs_opt : exprs { $$ = $1; }
+ | /* empty */ { $$ = nil; }
+ ;
+
expr : IDENT { $$ = intern(string_own($1), nil); }
| KEYWORD { $$ = intern(string_own($1),
keyword_package); }
@@ -685,4 +693,3 @@ val get_spec(void)
{
return parsed_spec;
}
-
diff --git a/txr.1 b/txr.1
index 90efb34e..1caadf71 100644
--- a/txr.1
+++ b/txr.1
@@ -1493,6 +1493,44 @@ established in the main clause. This is true even in the terminating
case when the until clause matches, and the bindings of the main clause
are discarded.
+.SS Collect Keyword Parameters
+
+By default, collect searches the rest of the input indefinitely,
+or until the @(until) clause matches. It skips arbitrary amounts of
+nonmatching material before the first match, and between matches.
+
+Within the @(collect) syntax, it is possible to specify some useful keyword
+parameters for additional control of the behavior. For instance
+
+ @(collect :maxgap 5)
+
+means that the collect will terminate if it does not find a match within five
+lines of the starting position, or if more than five lines are skipped since
+any successful match. A :maxgap of 0 means that the collected regions must be
+adjacent. For instance:
+
+ @(collect :maxgap 0)
+ M @a
+ @(end)
+
+means: from here, collect consecutive lines of the form "M ...". This will not
+search for the first such line, nor will it skip lines which do not match this
+form.
+
+Other keywords are :mingap, and :gap. The :mingap keyword specifies a minimum
+gap between matches, but has no effect on the distance to the first match. The
+:gap keyword specifies :mingap and :maxgap at the same time, and can only be
+used if these other two are not used. Thus:
+
+ @(collect :gap 1)
+ @a
+ @(end)
+
+means collect every other line starting with the current line. Two other
+keywords are :lines and :times. The :lines parameter specifies the upper bound
+on how many lines should be scanned by the collect, and :times specifies the
+upper bound on how many times the collect can match.
+
.SS The Coll Directive
The coll directive is a kind of miniature version of the collect directive.
diff --git a/txr.c b/txr.c
index ac7db17c..e66cc0ae 100644
--- a/txr.c
+++ b/txr.c
@@ -145,6 +145,7 @@ int main(int argc, char **argv)
val stack_bottom = nil;
progname = argv[0] ? utf8_dup_from(argv[0]) : progname;
init(progname, oom_realloc_handler, &stack_bottom);
+ match_init();
return txr_main(argc, argv);
}