diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2011-10-07 21:19:04 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2011-10-07 21:19:04 -0700 |
commit | 81c5eee132546c90d878065722f52e70b27c359f (patch) | |
tree | 41e8b45cc3e000898b5c532c7a0fd4282159ff46 | |
parent | f5af9dd56254dd90f5985eb3e1dcbab042d72431 (diff) | |
download | txr-81c5eee132546c90d878065722f52e70b27c359f.tar.gz txr-81c5eee132546c90d878065722f52e70b27c359f.tar.bz2 txr-81c5eee132546c90d878065722f52e70b27c359f.zip |
* lib.c (eol_s): New symbol variable.
(obj_init): New variable initialized.
* lib.h (eol_s): Declared.
* match.c (match_line): Implemented horizontal skip as and
new eol directive.
(match_lines): Vertical skip defers to horizontal skip if
there is trailing material.
* txr.1: Updated.
* lib.c (eol_s): New symbol variable.
(obj_init): New variable initialized.
* lib.h (eol_s): Declared.
* match.c (match_line): Implemented horizontal skip as and
new eol directive.
(match_lines): Vertical skip defers to horizontal skip if
there is trailing material.
* txr.1: Updated.
-rw-r--r-- | ChangeLog | 14 | ||||
-rw-r--r-- | lib.c | 4 | ||||
-rw-r--r-- | lib.h | 3 | ||||
-rw-r--r-- | match.c | 67 | ||||
-rw-r--r-- | txr.1 | 32 |
5 files changed, 108 insertions, 12 deletions
@@ -1,5 +1,19 @@ 2011-10-07 Kaz Kylheku <kaz@kylheku.com> + * lib.c (eol_s): New symbol variable. + (obj_init): New variable initialized. + + * lib.h (eol_s): Declared. + + * match.c (match_line): Implemented horizontal skip as and + new eol directive. + (match_lines): Vertical skip defers to horizontal skip if + there is trailing material. + + * txr.1: Updated. + +2011-10-07 Kaz Kylheku <kaz@kylheku.com> + * lib.c (flatten_helper): Function removed. (flatten): Recurse directly, using func_n1. @@ -60,7 +60,8 @@ val all_s, some_s, none_s, maybe_s, cases_s, collect_s, until_s, coll_s; val define_s, output_s, single_s, first_s, last_s, empty_s; val repeat_s, rep_s, flatten_s, forget_s; val local_s, merge_s, bind_s, cat_s; -val try_s, catch_s, finally_s, throw_s, defex_s, deffilter_s, eof_s; +val try_s, catch_s, finally_s, throw_s, defex_s, deffilter_s; +val eof_s, eol_s; val error_s, type_error_s, internal_error_s; val numeric_error_s, range_error_s; val query_error_s, file_error_s, process_error_s; @@ -2212,6 +2213,7 @@ static void obj_init(void) defex_s = intern(lit("defex"), user_package); deffilter_s = intern(lit("deffilter"), user_package); eof_s = intern(lit("eof"), user_package); + eol_s = intern(lit("eol"), user_package); error_s = intern(lit("error"), user_package); type_error_s = intern(lit("type_error"), user_package); internal_error_s = intern(lit("internal_error"), user_package); @@ -227,7 +227,8 @@ extern val all_s, some_s, none_s, maybe_s, cases_s, collect_s, until_s, coll_s; extern val define_s, output_s, single_s, first_s, last_s, empty_s; extern val repeat_s, rep_s, flatten_s, forget_s; extern val local_s, merge_s, bind_s, cat_s; -extern val try_s, catch_s, finally_s, throw_s, defex_s, deffilter_s, eof_s; +extern val try_s, catch_s, finally_s, throw_s, defex_s, deffilter_s; +extern val eof_s, eol_s; extern val error_s, type_error_s, internal_error_s; extern val numeric_error_s, range_error_s; extern val query_error_s, file_error_s, process_error_s; @@ -486,6 +486,54 @@ static val match_line(val bindings, val specline, val dataline, } LOG_MATCH("regex", past); pos = past; + } else if (directive == skip_s) { + val max = second(elem); + val min = third(elem); + cnum cmax = nump(max) ? c_num(max) : 0; + cnum cmin = nump(min) ? c_num(min) : 0; + + if (!rest(specline)) + break; + + { + cnum reps_max = 0, reps_min = 0; + + while (length_str_gt(dataline, pos) && min && reps_min < cmin) { + pos = plus(pos, one); + reps_min++; + } + + if (min) { + if (reps_min != cmin) { + debuglf(spec_lineno, + lit("skipped only ~a/~a chars to ~a:~a:~a"), + num(reps_min), num(cmin), + file, data_lineno, pos, nao); + return nil; + } + + debuglf(spec_lineno, lit("skipped ~a chars to ~a:~a:~a"), + num(reps_min), file, data_lineno, pos, nao); + } + + while (!max || reps_max++ < cmax) { + val result = match_line(bindings, rest(specline), dataline, pos, + spec_lineno, data_lineno, file); + + if (result) { + LOG_MATCH("skip", pos); + return result; + } + + if (length_str_le(dataline, pos)) + break; + + pos = plus(pos, one); + } + } + + LOG_MISMATCH("skip"); + return nil; } else if (directive == coll_s) { val coll_specline = second(elem); val until_last_specline = third(elem); @@ -682,6 +730,13 @@ next_coll: } LOG_MATCH("trailer", new_pos); return cons(bindings, pos); + } else if (directive == eol_s) { + if (length_str_le(dataline, pos)) { + LOG_MATCH("eol", pos); + return cons(bindings, t); + } + LOG_MISMATCH("eol"); + return nil; } else if (consp(directive) || stringp(directive)) { cons_bind (find, len, search_str_tree(dataline, elem, pos, nil)); val newpos; @@ -1187,16 +1242,14 @@ repeat_spec_same_data: if (consp(first_spec)) { val sym = first(first_spec); - if (sym == skip_s) { - val max = first(rest(first_spec)); - val min = second(rest(first_spec)); + + if (sym == skip_s && rest(specline) == nil) { + val args = rest(first_spec); + val max = first(args); + val min = second(args); cnum cmax = nump(max) ? c_num(max) : 0; cnum cmin = nump(min) ? c_num(min) : 0; - if (rest(specline)) - sem_error(spec_linenum, - lit("unexpected material after skip directive"), nao); - if ((spec = rest(spec)) == nil) break; @@ -906,6 +906,10 @@ A summary of the available directives follows: Explicitly match the end of file. Fails if unmatched data remains in the input stream. +.IP @(eol) +Explicitly match the end of line. Fails if the the current position is not the +end of a line. Also Fails if no data remains (there is no current line). + .IP @(next) Continue matching in another file. @@ -915,9 +919,9 @@ Blocks may be referenced by @(accept) and @(fail) directives. Blocks are discussed in the section BLOCKS below. .IP @(skip) -Treat the remaining query as a subquery unit, and search the lines of -the input file until that subquery matches somewhere. -A skip is also an anonymous block. +Treat the remaining query as a subquery unit, and search the lines (or +characters) of the input file until that subquery matches somewhere. A skip is +also an anonymous block. .IP @(trailer) Treat the remaining query or subquery as a match for a trailing context. That @@ -1137,6 +1141,17 @@ the query is understood to be processed there. Of course, the remainder of the query can itself contain skip directives. Each such directive performs a recursive subsearch. +Skip comes in vertical and horizontal flavors. For instance, skip and match the +last line: + + @(skip) + @last + @(eof) + +Skip and match the last character of the line: + + @(skip)@{last 1}@(eol) + The skip directive has an optional numeric argument. The value of this argument limits the range of lines scanned for a match. Judicious use of this feature can improve the performance of queries. @@ -1194,6 +1209,17 @@ is a noop, because it means: "the remainder of the query must match starting on the very next line", or, more briefly, "skip exactly zero lines", which is the behavior if the skip directive is omitted altogether. +Here is a trick for grabbing the fourth line from the bottom of the input: + + @(skip) + @fourth_from_bottom + @(skip 1 3) + @(eof) + +Last three space-separated tokens of the line: + + @(skip)@a @b @c@(eol) + .SS The Trailer Directive The trailer directive introduces a trailing portion of a query or subquery |