diff options
-rw-r--r-- | ChangeLog | 18 | ||||
-rw-r--r-- | match.c | 132 | ||||
-rw-r--r-- | match.h | 1 | ||||
-rw-r--r-- | parser.l | 4 | ||||
-rw-r--r-- | parser.y | 45 | ||||
-rw-r--r-- | txr.1 | 38 | ||||
-rw-r--r-- | txr.c | 1 |
7 files changed, 177 insertions, 62 deletions
@@ -1,5 +1,23 @@ 2011-09-28 Kaz Kylheku <kaz@kylheku.com> + * match.c (mingap_k, maxgap_k, gap_k, times_k, lines_k): New + symbol variables. + (match_lines): Keyword arguments in collect implemented. + (match_init): New function. + + * match.h (match_init): Declared. + + * parser.l (COLLECT): Lexical syntax changed for COLLECT to + allow for argument material. + + * parser.y (%union): obj renamed to val. + (exprs_opt): New nonterminal. + (collect_clause): Rewritten for arguments. + + * txr.c (main): Call to match_init introduced. + +2011-09-28 Kaz Kylheku <kaz@kylheku.com> + * match.c (match_line): Bugfix in double var. Do not prepend the next_pat to the specline if it is nil. @@ -47,6 +47,8 @@ int output_produced; +val mingap_k, maxgap_k, gap_k, times_k, lines_k; + static void debugf(val fmt, ...) { if (opt_loglevel >= 2) { @@ -1306,71 +1308,110 @@ repeat_spec_same_data: } else if (sym == collect_s) { val coll_spec = second(first_spec); val until_spec = third(first_spec); + val args = fourth(first_spec); val bindings_coll = nil; + val max = getplist(args, maxgap_k); + val min = getplist(args, mingap_k); + val gap = getplist(args, gap_k); + val times = getplist(args, times_k); + val lines = getplist(args, lines_k); + cnum cmax = nump(gap) ? c_num(gap) : (nump(max) ? c_num(max) : 0); + cnum cmin = nump(gap) ? c_num(gap) : (nump(min) ? c_num(min) : 0); + cnum mincounter = cmin, maxcounter = 0; + cnum timescounter = 0, linescounter = 0; + cnum ctimes = nump(times) ? c_num(times) : 0; + cnum clines = nump(lines) ? c_num(lines) : 0; val iter; + if (gap && (max || min)) + sem_error(spec_linenum, lit("collect: cannot mix :gap with :mingap or :maxgap"), nao); + + if ((times && ctimes == 0) || (lines && clines == 0)) { + if ((spec = rest(spec)) == nil) + break; + + goto repeat_spec_same_data; + } + uw_block_begin(nil, result); result = t; while (data) { - cons_bind (new_bindings, success, - match_files(coll_spec, files, bindings, - data, num(data_lineno))); + if ((gap || min) && mincounter < cmin) + goto next_collect; + + if (lines && linescounter++ >= clines) + break; - /* Until clause sees un-collated bindings from collect. */ - if (until_spec) { - cons_bind (discarded_bindings, success, - match_files(until_spec, files, new_bindings, + cons_bind (new_bindings, success, + match_files(coll_spec, files, bindings, data, num(data_lineno))); - if (success) { - (void) discarded_bindings; - break; + /* Until clause sees un-collated bindings from collect. */ + if (until_spec) + { + cons_bind (discarded_bindings, success, + match_files(until_spec, files, new_bindings, + data, num(data_lineno))); + + if (success) { + (void) discarded_bindings; + break; + } } - } - if (success) { - debuglf(spec_linenum, lit("collect matched ~a:~a"), - first(files), num(data_lineno), nao); + if (success) { + debuglf(spec_linenum, lit("collect matched ~a:~a"), + first(files), num(data_lineno), nao); - for (iter = new_bindings; iter && iter != bindings; - iter = cdr(iter)) - { - val binding = car(iter); - val existing = assoc(bindings_coll, car(binding)); + for (iter = new_bindings; iter && iter != bindings; + iter = cdr(iter)) + { + val binding = car(iter); + val existing = assoc(bindings_coll, car(binding)); - bindings_coll = acons_new(bindings_coll, car(binding), - cons(cdr(binding), cdr(existing))); + bindings_coll = acons_new(bindings_coll, car(binding), + cons(cdr(binding), cdr(existing))); + } } - } - if (success) { - if (consp(success)) { - cons_bind (new_data, new_line, success); - cnum new_lineno = c_num(new_line); + if (success) { + if (consp(success)) { + cons_bind (new_data, new_line, success); + cnum new_lineno = c_num(new_line); - bug_unless (new_lineno >= data_lineno); + bug_unless (new_lineno >= data_lineno); - if (new_lineno == data_lineno) { - new_data = cdr(new_data); - new_lineno++; - } + if (new_lineno == data_lineno) { + new_data = cdr(new_data); + new_lineno++; + } - debuglf(spec_linenum, lit("collect advancing from line ~a to ~a"), - num(data_lineno), num(new_lineno), nao); + debuglf(spec_linenum, lit("collect advancing from line ~a to ~a"), + num(data_lineno), num(new_lineno), nao); - data = new_data; - data_lineno = new_lineno; - *car_l(success) = nil; + data = new_data; + data_lineno = new_lineno; + *car_l(success) = nil; + + if (times && ++timescounter >= ctimes) + break; + } else { + debuglf(spec_linenum, lit("collect consumed entire file"), nao); + data = nil; + } + mincounter = 0; + maxcounter = 0; } else { - debuglf(spec_linenum, lit("collect consumed entire file"), nao); - data = nil; + next_collect: + mincounter++; + if ((gap || max) && ++maxcounter > cmax) + break; + data_lineno++; + data = rest(data); } - } else { - data = rest(data); - data_lineno++; } } @@ -1911,3 +1952,12 @@ int extract(val spec, val files, val predefined_bindings) return success ? 0 : EXIT_FAILURE; } + +void match_init(void) +{ + mingap_k = intern(lit("mingap"), keyword_package); + maxgap_k = intern(lit("maxgap"), keyword_package); + gap_k = intern(lit("gap"), keyword_package); + times_k = intern(lit("times"), keyword_package); + lines_k = intern(lit("lines"), keyword_package); +} @@ -24,4 +24,5 @@ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ +void match_init(void); int extract(val spec, val filenames, val bindings); @@ -270,8 +270,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return END; } -<SPECIAL>\({WS}collect{WS}\) { - yy_pop_state(); +<SPECIAL>\({WS}collect/{ID_END} { + yy_push_state(NESTED); return COLLECT; } @@ -52,7 +52,7 @@ static val parsed_spec; %union { wchar_t *lexeme; - union obj *obj; + union obj *val; wchar_t chr; cnum num; } @@ -63,17 +63,17 @@ static val parsed_spec; %token <num> NUMBER %token <chr> REGCHAR LITCHAR -%type <obj> spec clauses clauses_opt clause -%type <obj> all_clause some_clause none_clause maybe_clause -%type <obj> cases_clause collect_clause clause_parts additional_parts -%type <obj> output_clause define_clause try_clause catch_clauses_opt -%type <obj> line elems_opt elems elem var var_op -%type <obj> list exprs expr out_clauses out_clauses_opt out_clause -%type <obj> repeat_clause repeat_parts_opt o_line -%type <obj> o_elems_opt o_elems_opt2 o_elems o_elem rep_elem rep_parts_opt -%type <obj> regex regexpr regbranch -%type <obj> regterm regclass regclassterm regrange -%type <obj> strlit chrlit quasilit quasi_items quasi_item litchars +%type <val> spec clauses clauses_opt clause +%type <val> all_clause some_clause none_clause maybe_clause +%type <val> cases_clause collect_clause clause_parts additional_parts +%type <val> output_clause define_clause try_clause catch_clauses_opt +%type <val> line elems_opt elems elem var var_op +%type <val> list exprs exprs_opt expr out_clauses out_clauses_opt out_clause +%type <val> repeat_clause repeat_parts_opt o_line +%type <val> o_elems_opt o_elems_opt2 o_elems o_elem rep_elem rep_parts_opt +%type <val> regex regexpr regbranch +%type <val> regterm regclass regclassterm regrange +%type <val> strlit chrlit quasilit quasi_items quasi_item litchars %type <chr> regchar %nonassoc LOW /* used for precedence assertion */ %nonassoc ALL SOME NONE MAYBE CASES AND OR END COLLECT UNTIL COLL @@ -158,12 +158,16 @@ cases_clause : CASES newl clause_parts { $$ = cons(cases_s, $3); } yyerror("empty cases clause"); } ; -collect_clause : COLLECT newl clauses END newl { $$ = list(collect_s, - $3, nao); } - | COLLECT newl clauses - UNTIL newl clauses END newl { $$ = list(collect_s, $3, - $6, nao); } - | COLLECT newl error { $$ = nil; +collect_clause : COLLECT exprs_opt ')' newl + clauses END newl { $$ = list(collect_s, + $5, nil, $2, + nao); } + | COLLECT exprs_opt ')' + newl clauses + UNTIL newl clauses END newl { $$ = list(collect_s, $5, + $8, $2, nao); } + | COLLECT exprs_opt ')' + newl error { $$ = nil; if (yychar == UNTIL || yychar == END) yyerror("empty collect"); else @@ -444,6 +448,10 @@ exprs : expr { $$ = cons($1, nil); } | expr '.' expr { $$ = cons($1, $3); } ; +exprs_opt : exprs { $$ = $1; } + | /* empty */ { $$ = nil; } + ; + expr : IDENT { $$ = intern(string_own($1), nil); } | KEYWORD { $$ = intern(string_own($1), keyword_package); } @@ -685,4 +693,3 @@ val get_spec(void) { return parsed_spec; } - @@ -1493,6 +1493,44 @@ established in the main clause. This is true even in the terminating case when the until clause matches, and the bindings of the main clause are discarded. +.SS Collect Keyword Parameters + +By default, collect searches the rest of the input indefinitely, +or until the @(until) clause matches. It skips arbitrary amounts of +nonmatching material before the first match, and between matches. + +Within the @(collect) syntax, it is possible to specify some useful keyword +parameters for additional control of the behavior. For instance + + @(collect :maxgap 5) + +means that the collect will terminate if it does not find a match within five +lines of the starting position, or if more than five lines are skipped since +any successful match. A :maxgap of 0 means that the collected regions must be +adjacent. For instance: + + @(collect :maxgap 0) + M @a + @(end) + +means: from here, collect consecutive lines of the form "M ...". This will not +search for the first such line, nor will it skip lines which do not match this +form. + +Other keywords are :mingap, and :gap. The :mingap keyword specifies a minimum +gap between matches, but has no effect on the distance to the first match. The +:gap keyword specifies :mingap and :maxgap at the same time, and can only be +used if these other two are not used. Thus: + + @(collect :gap 1) + @a + @(end) + +means collect every other line starting with the current line. Two other +keywords are :lines and :times. The :lines parameter specifies the upper bound +on how many lines should be scanned by the collect, and :times specifies the +upper bound on how many times the collect can match. + .SS The Coll Directive The coll directive is a kind of miniature version of the collect directive. @@ -145,6 +145,7 @@ int main(int argc, char **argv) val stack_bottom = nil; progname = argv[0] ? utf8_dup_from(argv[0]) : progname; init(progname, oom_realloc_handler, &stack_bottom); + match_init(); return txr_main(argc, argv); } |