diff options
-rw-r--r-- | ChangeLog | 13 | ||||
-rw-r--r-- | match.c | 98 | ||||
-rw-r--r-- | parser.l | 4 | ||||
-rw-r--r-- | parser.y | 6 | ||||
-rw-r--r-- | txr.1 | 9 |
5 files changed, 92 insertions, 38 deletions
@@ -1,3 +1,16 @@ +2011-09-29 Kaz Kylheku <kaz@kylheku.com> + + * match.c (chars_k): New variable. + (match_line): Keyword arguments in coll implemented. + (match_init): chars_k variable initialized. + + * parser.l (COLL): Lexical syntax changed to allow for + argument material. + + * parser.y (elem): Coll syntax rewritten for arguments. + + * txr.1: Updated. + 2011-09-28 Kaz Kylheku <kaz@kylheku.com> * match.c (mingap_k, maxgap_k, gap_k, times_k, lines_k): New @@ -47,7 +47,7 @@ int output_produced; -val mingap_k, maxgap_k, gap_k, times_k, lines_k; +val mingap_k, maxgap_k, gap_k, times_k, lines_k, chars_k; static void debugf(val fmt, ...) { @@ -475,53 +475,84 @@ static val match_line(val bindings, val specline, val dataline, } else if (directive == coll_s) { val coll_specline = second(elem); val until_specline = third(elem); + val args = fourth(elem); val bindings_coll = nil; + val max = getplist(args, maxgap_k); + val min = getplist(args, mingap_k); + val gap = getplist(args, gap_k); + val times = getplist(args, times_k); + val chars = getplist(args, chars_k); + cnum cmax = nump(gap) ? c_num(gap) : (nump(max) ? c_num(max) : 0); + cnum cmin = nump(gap) ? c_num(gap) : (nump(min) ? c_num(min) : 0); + cnum mincounter = cmin, maxcounter = 0; + cnum timescounter = 0, charscounter = 0; + cnum ctimes = nump(times) ? c_num(times) : 0; + cnum cchars = nump(chars) ? c_num(chars) : 0; val iter; + if ((times && ctimes == 0) || (chars && cchars == 0)) + break; + for (;;) { - cons_bind (new_bindings, new_pos, - match_line(bindings, coll_specline, dataline, pos, - spec_lineno, data_lineno, file)); + if ((gap || min) && mincounter < cmin) + goto next_coll; + + if (chars && charscounter++ >= cchars) + break; - if (until_specline) { - cons_bind (until_bindings, until_pos, - match_line(bindings, until_specline, dataline, pos, + { + cons_bind (new_bindings, new_pos, + match_line(bindings, coll_specline, dataline, pos, spec_lineno, data_lineno, file)); - if (until_pos) { - (void) until_bindings; - LOG_MATCH("until", until_pos); - break; - } else { - LOG_MISMATCH("until"); + if (until_specline) { + cons_bind (until_bindings, until_pos, + match_line(bindings, until_specline, dataline, pos, + spec_lineno, data_lineno, file)); + + if (until_pos) { + (void) until_bindings; + LOG_MATCH("until", until_pos); + break; + } else { + LOG_MISMATCH("until"); + } } - } - if (new_pos) { - LOG_MATCH("coll", new_pos); + if (new_pos) { + LOG_MATCH("coll", new_pos); - for (iter = new_bindings; iter && iter != bindings; - iter = cdr(iter)) - { - val binding = car(iter); - val existing = assoc(bindings_coll, car(binding)); + for (iter = new_bindings; iter && iter != bindings; + iter = cdr(iter)) + { + val binding = car(iter); + val existing = assoc(bindings_coll, car(binding)); - bindings_coll = acons_new(bindings_coll, car(binding), - cons(cdr(binding), cdr(existing))); + bindings_coll = acons_new(bindings_coll, car(binding), + cons(cdr(binding), cdr(existing))); + } } - } - if (new_pos && !equal(new_pos, pos)) { - pos = new_pos; - bug_unless (length_str_ge(dataline, pos)); - } else { - pos = plus(pos, one); - } + if (new_pos && !equal(new_pos, pos)) { + pos = new_pos; + bug_unless (length_str_ge(dataline, pos)); - if (length_str_le(dataline, pos)) - break; - } + if (times && ++timescounter >= ctimes) + break; + mincounter = 0; + maxcounter = 0; + } else { +next_coll: + mincounter++; + if ((gap || max) && ++maxcounter > cmax) + break; + pos = plus(pos, one); + } + if (length_str_le(dataline, pos)) + break; + } + } if (!bindings_coll) debuglf(spec_lineno, lit("nothing was collected"), nao); @@ -1960,4 +1991,5 @@ void match_init(void) gap_k = intern(lit("gap"), keyword_package); times_k = intern(lit("times"), keyword_package); lines_k = intern(lit("lines"), keyword_package); + chars_k = intern(lit("chars"), keyword_package); } @@ -275,8 +275,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return COLLECT; } -<SPECIAL>\({WS}coll{WS}\) { - yy_pop_state(); +<SPECIAL>\({WS}coll/{ID_END} { + yy_push_state(NESTED); return COLL; } @@ -201,9 +201,9 @@ elem : TEXT { $$ = string_own($1); } | list { $$ = $1; } | regex { $$ = cons(regex_compile(rest($1)), rest($1)); } - | COLL elems END { $$ = list(coll_s, $2, nao); } - | COLL elems - UNTIL elems END { $$ = list(coll_s, $2, $4, nao); } + | COLL exprs_opt ')' elems END { $$ = list(coll_s, $4, nil, $2, nao); } + | COLL exprs_opt ')' elems + UNTIL elems END { $$ = list(coll_s, $4, $6, $2, nao); } | COLL error { $$ = nil; yybadtoken(yychar, lit("coll clause")); } ; @@ -1643,6 +1643,15 @@ Note that the @(end) is followed by a semicolon. That's because when the @(until) clause meets a match, the matching material is not consumed. +.SS Coll Keyword Parameters + +The @(coll) directive takes most of the same parameters as @(collect). +See the section Collect Keyword Parameters above. +So for instance @(coll :gap 0) means that the collects must be +consecutive, and @(coll :times 2) means that (at most) two matches +will be collected. The :lines keyword does not exist, but there is +an analogous :chars keyword. + .SS The Flatten Directive. The flatten directive can be used to convert variables to one dimensional |