summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog14
-rw-r--r--lib.c4
-rw-r--r--lib.h3
-rw-r--r--match.c67
-rw-r--r--txr.132
5 files changed, 108 insertions, 12 deletions
diff --git a/ChangeLog b/ChangeLog
index 11e76f2b..021132ca 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,19 @@
2011-10-07 Kaz Kylheku <kaz@kylheku.com>
+ * lib.c (eol_s): New symbol variable.
+ (obj_init): New variable initialized.
+
+ * lib.h (eol_s): Declared.
+
+ * match.c (match_line): Implemented horizontal skip as and
+ new eol directive.
+ (match_lines): Vertical skip defers to horizontal skip if
+ there is trailing material.
+
+ * txr.1: Updated.
+
+2011-10-07 Kaz Kylheku <kaz@kylheku.com>
+
* lib.c (flatten_helper): Function removed.
(flatten): Recurse directly, using func_n1.
diff --git a/lib.c b/lib.c
index 02c08171..e873a206 100644
--- a/lib.c
+++ b/lib.c
@@ -60,7 +60,8 @@ val all_s, some_s, none_s, maybe_s, cases_s, collect_s, until_s, coll_s;
val define_s, output_s, single_s, first_s, last_s, empty_s;
val repeat_s, rep_s, flatten_s, forget_s;
val local_s, merge_s, bind_s, cat_s;
-val try_s, catch_s, finally_s, throw_s, defex_s, deffilter_s, eof_s;
+val try_s, catch_s, finally_s, throw_s, defex_s, deffilter_s;
+val eof_s, eol_s;
val error_s, type_error_s, internal_error_s;
val numeric_error_s, range_error_s;
val query_error_s, file_error_s, process_error_s;
@@ -2212,6 +2213,7 @@ static void obj_init(void)
defex_s = intern(lit("defex"), user_package);
deffilter_s = intern(lit("deffilter"), user_package);
eof_s = intern(lit("eof"), user_package);
+ eol_s = intern(lit("eol"), user_package);
error_s = intern(lit("error"), user_package);
type_error_s = intern(lit("type_error"), user_package);
internal_error_s = intern(lit("internal_error"), user_package);
diff --git a/lib.h b/lib.h
index 38717fe9..dda8dfbd 100644
--- a/lib.h
+++ b/lib.h
@@ -227,7 +227,8 @@ extern val all_s, some_s, none_s, maybe_s, cases_s, collect_s, until_s, coll_s;
extern val define_s, output_s, single_s, first_s, last_s, empty_s;
extern val repeat_s, rep_s, flatten_s, forget_s;
extern val local_s, merge_s, bind_s, cat_s;
-extern val try_s, catch_s, finally_s, throw_s, defex_s, deffilter_s, eof_s;
+extern val try_s, catch_s, finally_s, throw_s, defex_s, deffilter_s;
+extern val eof_s, eol_s;
extern val error_s, type_error_s, internal_error_s;
extern val numeric_error_s, range_error_s;
extern val query_error_s, file_error_s, process_error_s;
diff --git a/match.c b/match.c
index 19b527b5..b310afcb 100644
--- a/match.c
+++ b/match.c
@@ -486,6 +486,54 @@ static val match_line(val bindings, val specline, val dataline,
}
LOG_MATCH("regex", past);
pos = past;
+ } else if (directive == skip_s) {
+ val max = second(elem);
+ val min = third(elem);
+ cnum cmax = nump(max) ? c_num(max) : 0;
+ cnum cmin = nump(min) ? c_num(min) : 0;
+
+ if (!rest(specline))
+ break;
+
+ {
+ cnum reps_max = 0, reps_min = 0;
+
+ while (length_str_gt(dataline, pos) && min && reps_min < cmin) {
+ pos = plus(pos, one);
+ reps_min++;
+ }
+
+ if (min) {
+ if (reps_min != cmin) {
+ debuglf(spec_lineno,
+ lit("skipped only ~a/~a chars to ~a:~a:~a"),
+ num(reps_min), num(cmin),
+ file, data_lineno, pos, nao);
+ return nil;
+ }
+
+ debuglf(spec_lineno, lit("skipped ~a chars to ~a:~a:~a"),
+ num(reps_min), file, data_lineno, pos, nao);
+ }
+
+ while (!max || reps_max++ < cmax) {
+ val result = match_line(bindings, rest(specline), dataline, pos,
+ spec_lineno, data_lineno, file);
+
+ if (result) {
+ LOG_MATCH("skip", pos);
+ return result;
+ }
+
+ if (length_str_le(dataline, pos))
+ break;
+
+ pos = plus(pos, one);
+ }
+ }
+
+ LOG_MISMATCH("skip");
+ return nil;
} else if (directive == coll_s) {
val coll_specline = second(elem);
val until_last_specline = third(elem);
@@ -682,6 +730,13 @@ next_coll:
}
LOG_MATCH("trailer", new_pos);
return cons(bindings, pos);
+ } else if (directive == eol_s) {
+ if (length_str_le(dataline, pos)) {
+ LOG_MATCH("eol", pos);
+ return cons(bindings, t);
+ }
+ LOG_MISMATCH("eol");
+ return nil;
} else if (consp(directive) || stringp(directive)) {
cons_bind (find, len, search_str_tree(dataline, elem, pos, nil));
val newpos;
@@ -1187,16 +1242,14 @@ repeat_spec_same_data:
if (consp(first_spec)) {
val sym = first(first_spec);
- if (sym == skip_s) {
- val max = first(rest(first_spec));
- val min = second(rest(first_spec));
+
+ if (sym == skip_s && rest(specline) == nil) {
+ val args = rest(first_spec);
+ val max = first(args);
+ val min = second(args);
cnum cmax = nump(max) ? c_num(max) : 0;
cnum cmin = nump(min) ? c_num(min) : 0;
- if (rest(specline))
- sem_error(spec_linenum,
- lit("unexpected material after skip directive"), nao);
-
if ((spec = rest(spec)) == nil)
break;
diff --git a/txr.1 b/txr.1
index b4546852..deda5f7f 100644
--- a/txr.1
+++ b/txr.1
@@ -906,6 +906,10 @@ A summary of the available directives follows:
Explicitly match the end of file. Fails if unmatched data remains in
the input stream.
+.IP @(eol)
+Explicitly match the end of line. Fails if the the current position is not the
+end of a line. Also Fails if no data remains (there is no current line).
+
.IP @(next)
Continue matching in another file.
@@ -915,9 +919,9 @@ Blocks may be referenced by @(accept) and @(fail) directives.
Blocks are discussed in the section BLOCKS below.
.IP @(skip)
-Treat the remaining query as a subquery unit, and search the lines of
-the input file until that subquery matches somewhere.
-A skip is also an anonymous block.
+Treat the remaining query as a subquery unit, and search the lines (or
+characters) of the input file until that subquery matches somewhere. A skip is
+also an anonymous block.
.IP @(trailer)
Treat the remaining query or subquery as a match for a trailing context. That
@@ -1137,6 +1141,17 @@ the query is understood to be processed there.
Of course, the remainder of the query can itself contain skip directives.
Each such directive performs a recursive subsearch.
+Skip comes in vertical and horizontal flavors. For instance, skip and match the
+last line:
+
+ @(skip)
+ @last
+ @(eof)
+
+Skip and match the last character of the line:
+
+ @(skip)@{last 1}@(eol)
+
The skip directive has an optional numeric argument. The value of this
argument limits the range of lines scanned for a match. Judicious use
of this feature can improve the performance of queries.
@@ -1194,6 +1209,17 @@ is a noop, because it means: "the remainder of the query must match starting on
the very next line", or, more briefly, "skip exactly zero lines", which is the
behavior if the skip directive is omitted altogether.
+Here is a trick for grabbing the fourth line from the bottom of the input:
+
+ @(skip)
+ @fourth_from_bottom
+ @(skip 1 3)
+ @(eof)
+
+Last three space-separated tokens of the line:
+
+ @(skip)@a @b @c@(eol)
+
.SS The Trailer Directive
The trailer directive introduces a trailing portion of a query or subquery