summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2017-07-31 17:28:44 -0700
committerKaz Kylheku <kaz@kylheku.com>2017-07-31 17:36:57 -0700
commitbf1cc0077168d7c1efa903afb5aa782d071533b6 (patch)
treee5925c12772ee9597cbde438e32f36aa1bdf1bd2
parentf8010dc6f95b967ffc6b653b33300e4b4d850c14 (diff)
downloadtxr-bf1cc0077168d7c1efa903afb5aa782d071533b6.tar.gz
txr-bf1cc0077168d7c1efa903afb5aa782d071533b6.tar.bz2
txr-bf1cc0077168d7c1efa903afb5aa782d071533b6.zip
txr-012 2009-09-28txr-012
-rw-r--r--ChangeLog41
-rw-r--r--extract.l2
-rw-r--r--extract.y82
-rw-r--r--lib.c120
-rw-r--r--lib.h6
-rw-r--r--tests/003/query-1.expected2
-rw-r--r--tests/003/query-1.txr18
-rw-r--r--txr.1147
8 files changed, 300 insertions, 118 deletions
diff --git a/ChangeLog b/ChangeLog
index b5279410..33225a61 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,44 @@
+2009-09-28 Kaz Kylheku <kkylheku@gmail.com>
+
+ Version 012
+
+ Semantics change of @(until) in @(collect) and @(coll).
+
+ Minor fixes.
+
+ * extract.y (match_line, match_files): The until clauses
+ continue to be processed after the main clauses of the collect
+ or coll (to see the bindings), but are processed before
+ the collection occurs, so that the until will veto
+ the bindings of the last iteration. Moreover, the data
+ positions stays where it is when this happens, and no
+ arrangement is made to match the until material again.
+
+ * txr.1: Tried to document the change.
+
+
+2009-09-27 Kaz Kylheku <kkylheku@gmail.com>
+
+ * txr.1: following proofread, fixed various escaping problems and
+ instances of missing text.
+
+2009-09-26 Kaz Kylheku <kkylheku@gmail.com>
+
+ * lib.c (equal): Bugfixes: wrong fallthrough of FUN case.
+ VEC case must return nil, not break.
+
+2009-09-26 Kaz Kylheku <kkylheku@gmail.com>
+
+ Preparation for some sorting support.
+
+ * extract.y (merge): Renamed to weird_merge.
+ (map_leaf_lists): New functino.
+ (match_file): Follow weird_merge rename.
+ * lib.c (all_satisfy, none_satisfy, string_lt, do_bind2other,
+ bind2other, merge, do_sort, sort): New functions.
+ * lib.h (all_satsify, none_satisfy, string_lt,
+ bind2other, sort): Declared.
+
2009-09-25 Kaz Kylheku <kkylheku@gmail.com>
Version 011
diff --git a/extract.l b/extract.l
index 4c15476d..e5baccad 100644
--- a/extract.l
+++ b/extract.l
@@ -40,7 +40,7 @@
#define YY_NO_UNPUT
-const char *version = "011";
+const char *version = "012";
const char *progname = "txr";
const char *spec_file = "stdin";
long lineno = 1;
diff --git a/extract.y b/extract.y
index 594b341e..8c53a428 100644
--- a/extract.y
+++ b/extract.y
@@ -505,7 +505,7 @@ obj_t *depth(obj_t *obj)
return plus(dep, one);
}
-obj_t *merge(obj_t *left, obj_t *right)
+obj_t *weird_merge(obj_t *left, obj_t *right)
{
obj_t *left_depth = depth(left);
obj_t *right_depth = depth(right);
@@ -523,6 +523,15 @@ obj_t *merge(obj_t *left, obj_t *right)
return append2(left, right);
}
+obj_t *map_leaf_lists(obj_t *func, obj_t *list)
+{
+ if (atom(list))
+ return list;
+ if (none_satisfy(list, func_n1(listp), nil))
+ return funcall1(func, list);
+ return mapcar(bind2(func_n2(map_leaf_lists), func), list);
+}
+
obj_t *dest_bind(obj_t *bindings, obj_t *pattern, obj_t *value)
{
if (nullp(pattern))
@@ -734,6 +743,20 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
match_line(bindings, coll_specline, dataline, pos,
spec_lineno, data_lineno, file));
+ if (until_specline) {
+ cons_bind (until_bindings, until_pos,
+ match_line(bindings, until_specline, dataline, pos,
+ spec_lineno, data_lineno, file));
+
+ if (until_pos) {
+ (void) until_bindings;
+ LOG_MATCH("until", until_pos);
+ break;
+ } else {
+ LOG_MISMATCH("until");
+ }
+ }
+
if (new_pos) {
LOG_MATCH("coll", new_pos);
@@ -748,27 +771,6 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
}
}
- if (until_specline) {
- cons_bind (until_bindings, until_pos,
- match_line(bindings, until_specline, dataline, pos,
- spec_lineno, data_lineno, file));
-
- (void) until_bindings;
- if (until_pos) {
- /* The until specline matched. Special behavior:
- We throw away its bindings, and run it again.
- We run it again by incorporating it into the
- surrouding specline, just behind the collect
- item, which will be popped off. */
- LOG_MATCH("until", until_pos);
- (void) new_bindings;
- specline = cons(first(specline),
- append2(until_specline, rest(specline)));
- break;
- }
- LOG_MISMATCH("until");
- }
-
if (new_pos && !equal(new_pos, pos)) {
pos = new_pos;
assert (c_num(pos) <= c_num(length_str(dataline)));
@@ -1362,6 +1364,19 @@ repeat_spec_same_data:
match_files(coll_spec, files, bindings,
data, num(data_lineno)));
+ /* Until clause sees un-collated bindings from collect. */
+ if (until_spec)
+ {
+ cons_bind (discarded_bindings, success,
+ match_files(until_spec, files, new_bindings,
+ data, num(data_lineno)));
+
+ if (success) {
+ (void) discarded_bindings;
+ break;
+ }
+ }
+
if (success) {
yyerrorlf(2, spec_lineno, "collect matched %s:%ld",
c_str(first(files)), data_lineno);
@@ -1377,27 +1392,6 @@ repeat_spec_same_data:
}
}
- /* Until clause sees un-collated bindings from collect. */
- if (until_spec)
- {
- cons_bind (discarded_bindings, success,
- match_files(until_spec, files, new_bindings,
- data, num(data_lineno)));
-
- if (success) {
- /* The until spec matched. Special behavior:
- We throw away its bindings, and run it again.
- We run it again by incorporating it into the
- surrouding spec, just behind the topmost one.
- When we bail out of this loop, the first(spec)
- will be popped, exposing the until_spec,
- and then the main loop is repeated. */
- (void) discarded_bindings;
- spec = cons(first(spec), append2(until_spec, rest(spec)));
- break;
- }
- }
-
if (success) {
if (consp(success)) {
yyerrorlf(2, spec_lineno,
@@ -1490,7 +1484,7 @@ repeat_spec_same_data:
c_str(symbol_name(sym)));
if (merged)
- merged = merge(merged, cdr(other_lookup));
+ merged = weird_merge(merged, cdr(other_lookup));
else
merged = cdr(other_lookup);
}
diff --git a/lib.c b/lib.c
index 348b54f3..e1dbc273 100644
--- a/lib.c
+++ b/lib.c
@@ -300,6 +300,32 @@ obj_t *some_satisfy(obj_t *list, obj_t *pred, obj_t *key)
return nil;
}
+obj_t *all_satisfy(obj_t *list, obj_t *pred, obj_t *key)
+{
+ if (!key)
+ key = identity_f;
+
+ for (; list; list = cdr(list)) {
+ if (!funcall1(pred, funcall1(key, car(list))))
+ return nil;
+ }
+
+ return t;
+}
+
+obj_t *none_satisfy(obj_t *list, obj_t *pred, obj_t *key)
+{
+ if (!key)
+ key = identity_f;
+
+ for (; list; list = cdr(list)) {
+ if (funcall1(pred, funcall1(key, car(list))))
+ return nil;
+ }
+
+ return t;
+}
+
obj_t *flatten(obj_t *list)
{
if (atom(list))
@@ -364,6 +390,7 @@ obj_t *equal(obj_t *left, obj_t *right)
}
return nil;
}
+ return nil;
case VEC:
if (right->t.type == VEC) {
long i, fill;
@@ -376,12 +403,13 @@ obj_t *equal(obj_t *left, obj_t *right)
}
return t;
}
- break;
+ return nil;
case STREAM:
return nil; /* Different stream objects never equal. */
case COBJ:
if (right->t.type == COBJ)
return left->co.ops->equal(left, right);
+ return nil;
}
assert (0 && "notreached");
@@ -741,6 +769,12 @@ obj_t *trim_str(obj_t *str)
}
}
+obj_t *string_lt(obj_t *astr, obj_t *bstr)
+{
+ int cmp = strcmp(c_str(astr), c_str(bstr));
+ return cmp == -1 ? t : nil;
+}
+
obj_t *chr(int ch)
{
obj_t *obj = make_obj();
@@ -998,6 +1032,17 @@ obj_t *bind2(obj_t *fun2, obj_t *arg)
return func_f1(cons(fun2, arg), do_bind2);
}
+obj_t *do_bind2other(obj_t *fcons, obj_t *arg1)
+{
+ return funcall2(car(fcons), arg1, cdr(fcons));
+}
+
+obj_t *bind2other(obj_t *fun2, obj_t *arg2)
+{
+ return func_f1(cons(fun2, arg2), do_bind2other);
+}
+
+
static obj_t *do_chain(obj_t *fun1_list, obj_t *arg)
{
for (; fun1_list; fun1_list = cdr(fun1_list))
@@ -1346,6 +1391,79 @@ obj_t *mappend(obj_t *fun, obj_t *list)
return out;
}
+obj_t *merge(obj_t *list1, obj_t *list2, obj_t *lessfun, obj_t *keyfun)
+{
+ list_collect_decl (out, ptail);
+
+ while (list1 && list2) {
+ obj_t *el1 = funcall1(keyfun, first(list1));
+ obj_t *el2 = funcall1(keyfun, first(list2));
+
+ if (funcall2(lessfun, el1, el2)) {
+ obj_t *next = cdr(list1);
+ *cdr_l(list1) = nil;
+ list_collect_append(ptail, list1);
+ list1 = next;
+ } else {
+ obj_t *next = cdr(list2);
+ *cdr_l(list2) = nil;
+ list_collect_append(ptail, list2);
+ list2 = next;
+ }
+ }
+
+ if (list1)
+ list_collect_append(ptail, list1);
+ else
+ list_collect_append(ptail, list2);
+
+ return out;
+}
+
+static obj_t *do_sort(obj_t *list, obj_t *lessfun, obj_t *keyfun)
+{
+ if (list == nil)
+ return nil;
+ if (!cdr(list))
+ return list;
+ if (!cdr(cdr(list))) {
+ if (funcall2(lessfun, funcall1(keyfun, first(list)),
+ funcall1(keyfun, second(list))))
+ {
+ return list;
+ } else {
+ obj_t *cons2 = cdr(list);
+ *cdr_l(cons2) = list;
+ *cdr_l(list) = nil;
+ return cons2;
+ }
+ }
+
+ {
+ obj_t *bisect, *iter;
+ obj_t *list2;
+
+ for (iter = cdr(cdr(list)), bisect = list; iter;
+ bisect = cdr(bisect), iter = cdr(cdr(iter)))
+ ; /* empty */
+
+ list2 = cdr(bisect);
+ *cdr_l(bisect) = nil;
+
+ return merge(sort(list, lessfun, keyfun),
+ sort(list2, lessfun, keyfun),
+ lessfun, keyfun);
+ }
+}
+
+obj_t *sort(obj_t *list, obj_t *lessfun, obj_t *keyfun)
+{
+ if (!keyfun)
+ keyfun = identity_f;
+
+ return do_sort(list, lessfun, keyfun);
+}
+
static void obj_init(void)
{
int gc_save = gc_state(0);
diff --git a/lib.h b/lib.h
index 026efb97..09574e1b 100644
--- a/lib.h
+++ b/lib.h
@@ -196,6 +196,8 @@ obj_t *flatten(obj_t *list);
obj_t *memq(obj_t *obj, obj_t *list);
obj_t *tree_find(obj_t *obj, obj_t *tree);
obj_t *some_satisfy(obj_t *list, obj_t *pred, obj_t *key);
+obj_t *all_satisfy(obj_t *list, obj_t *pred, obj_t *key);
+obj_t *none_satisfy(obj_t *list, obj_t *pred, obj_t *key);
long c_num(obj_t *num);
obj_t *nump(obj_t *num);
obj_t *equal(obj_t *left, obj_t *right);
@@ -235,6 +237,7 @@ obj_t *search_str_tree(obj_t *haystack, obj_t *tree, obj_t *start_num,
obj_t *sub_str(obj_t *str_in, obj_t *from_num, obj_t *to_num);
obj_t *cat_str(obj_t *list, obj_t *sep);
obj_t *trim_str(obj_t *str);
+obj_t *string_lt(obj_t *astr, obj_t *bstr);
obj_t *chr(int ch);
int c_chr(obj_t *chr);
obj_t *sym_name(obj_t *sym);
@@ -259,6 +262,7 @@ obj_t *funcall1(obj_t *fun, obj_t *arg);
obj_t *funcall2(obj_t *fun, obj_t *arg1, obj_t *arg2);
obj_t *reduce_left(obj_t *fun, obj_t *list, obj_t *init, obj_t *key);
obj_t *bind2(obj_t *fun2, obj_t *arg);
+obj_t *bind2other(obj_t *fun2, obj_t *arg2);
obj_t *chain(obj_t *fun1_list);
obj_t *vector(obj_t *alloc);
obj_t *vec_get_fill(obj_t *vec);
@@ -280,6 +284,8 @@ obj_t *acons_new(obj_t *list, obj_t *key, obj_t *value);
obj_t *alist_remove(obj_t *list, obj_t *keys);
obj_t *mapcar(obj_t *fun, obj_t *list);
obj_t *mappend(obj_t *fun, obj_t *list);
+obj_t *sort(obj_t *list, obj_t *lessfun, obj_t *keyfun);
+
void obj_print(obj_t *obj, FILE *);
void init(const char *progname, void *(*oom_realloc)(void *, size_t));
void dump(obj_t *obj, FILE *);
diff --git a/tests/003/query-1.expected b/tests/003/query-1.expected
new file mode 100644
index 00000000..dd18efe8
--- /dev/null
+++ b/tests/003/query-1.expected
@@ -0,0 +1,2 @@
+local fun!
+global fun!
diff --git a/tests/003/query-1.txr b/tests/003/query-1.txr
new file mode 100644
index 00000000..4f56151e
--- /dev/null
+++ b/tests/003/query-1.txr
@@ -0,0 +1,18 @@
+@(define which)
+@ (fun)
+@(end)
+@(define fun)
+@ (output)
+global fun!
+@ (end)
+@(end)
+@(define caller)
+@ (define fun)
+@ (output)
+local fun!
+@ (end)
+@ (end)
+@ (which)
+@(end)
+@(caller)
+@(which)
diff --git a/txr.1 b/txr.1
index 79a5eeaa..4bf67a7c 100644
--- a/txr.1
+++ b/txr.1
@@ -21,7 +21,7 @@
.\"IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
.\"WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
-.TH txr 1 2009-09-09 "txr v. 011" "Text Extraction Utility"
+.TH txr 1 2009-09-09 "txr v. 012" "Text Extraction Utility"
.SH NAME
txr \- text extractor
.SH SYNOPSIS
@@ -233,11 +233,12 @@ lines in the data, leading to spurious mismatches.
.SS Text
-character for character. Text which occurs at the beginning of a line matches
-the beginning of a line. Text which starts in the middle of a line, other than
-following a variable, must match exactly at the current position, where the
-previous match left off. Moreover, if the text is the last element in the line,
-its match is anchored to the end of the line.
+Query material which is not escaped by the special character @ is
+literal text, which matches input character for character. Text which occurs at
+the beginning of a line matches the beginning of a line. Text which starts in
+the middle of a line, other than following a variable, must match exactly at
+the current position, where the previous match left off. Moreover, if the text
+is the last element in the line, its match is anchored to the end of the line.
The semantics of text matching next to a variable is discussed in the following
section.
@@ -286,34 +287,34 @@ Control characters may be embedded directly in a query (with the exception of
newline characters). An alternative to embedding is to use escape syntax.
The following escapes are supported:
-.IP @\\a
+.IP @\ea
Alert character (ASCII 7, BEL).
-.IP @\\b
+.IP @\eb
Backspace (ASCII 8, BS).
-.IP @\\t
+.IP @\et
Horizontal tab (ASCII 9, HT).
-.IP @\\n
+.IP @\en
Line feed (ASCII 10, LF). Serves as abstract newline on POSIX systems.
-.IP @\\v
+.IP @\ev
Vertical tab (ASCII 11, VT).
-.IP @\\f
+.IP @\ef
Form feed (ASCII 12, FF). This character clears the screen on many
kinds of terminals, or ejects a page of text from a line printer.
-.IP @\\r
+.IP @\er
Carriage return (ASCII 13, CR).
-.IP @\\e
+.IP @\ee
Escape (ASCII 27, ESC)
-.IP @\\x<hex>
-A @\\x followed by a sequence of hex digits is interpreted as a hexadecimal
-numeric character code. For instance @\\x41 is the ASCII character A.
-.IP @\\<octal>
-A @\\ followed by a sequence of octal digits (0 through 7) is interpreted
-as an octal character code. For instance @\\010 is character 8, same as @\\b.
+.IP @\exHEX
+A @\ex followed by a sequence of hex digits is interpreted as a hexadecimal
+numeric character code. For instance @\ex41 is the ASCII character A.
+.IP @\eOCTAL
+A @\e followed by a sequence of octal digits (0 through 7) is interpreted
+as an octal character code. For instance @\e010 is character 8, same as @\eb.
.PP
-Note that if a newline is embedded into a query line with @\\n, this
+Note that if a newline is embedded into a query line with @\en, this
does not split the line into two; it's embedded into the line and
-thus cannot match anything. However, @\\n may be useful in the @(cat)
+thus cannot match anything. However, @\en may be useful in the @(cat)
directive and in @(output).
.SS Variables
@@ -505,8 +506,8 @@ or lowercase letter; the class [0-9a-f] means match a digit or
a lowercase letter, the class [^0-9] means match a non-digit, et cetera.
A ] or - can be used within a character class, but must be escaped
with a backslash. Two backslashes code for one backslash. So
-for instance [\[\-] means match a [ or - character, [^^] means match
-any character other than ^, and [\^\\] means match either a ^ or a
+for instance [\e[\e-] means match a [ or - character, [^^] means match
+any character other than ^, and [\e^\e\e] means match either a ^ or a
backslash.
.IP (RE)
If RE is a regular expression, then so is (RE).
@@ -531,8 +532,8 @@ a backslash to suppress its meaning and denote the character itself.
Furthermore, all of the same escapes are as described in the section Special
Characters in Text above---the difference is that in regular expressions, the @
-character is not required, so for example a tab is coded as \\t rather
-than @\\t.
+character is not required, so for example a tab is coded as \et rather
+than @\e\t.
Any escaped character which does not fall into the above escaping conventions,
or any unescaped character which is not a regular expression operator, denotes
@@ -808,17 +809,17 @@ be written instead:
These directives combine multiple subqueries, which are applied at the same position in parallel. The syntax of all three follows this example:
@(some)
- <subquery1>
+ subquery1
.
.
.
@(and)
- <subquery2>
+ subquery2
.
.
.
@(and)
- <subquery3>
+ subquery3
.
.
.
@@ -895,13 +896,13 @@ The syntax of the collect directive is:
or with an until clause:
@(collect)
- ... lines of subquery
+ ... lines of subquery: main clause
@(until)
- ... lines of subquery
+ ... lines of subquery: until clause
@(end)
-The the subquery is matched repeatedly, starting at the current line.
+The subquery is matched repeatedly, starting at the current line.
If it fails to match, it is tried starting at the subsequent line.
If it matches successfully, it is tried at the line following the
entire extent of matched data, if there is one. Thus, the collected regions do
@@ -916,10 +917,10 @@ fail if it tries to match anything in the current file; but of course, it
is possible to continue matching in another file by means of @(next).
If an until clause is specified, the collection stops when that clause matches
-at the current position (and that last position is also collected, if it
-matches). If the collection is stopped by a match in the until clause,
-any variables bound in that clause also emerge out of the overall collect
-clause (but these bindings are single values, not lists).
+at the current position. When an until clause matches at a position,
+no bindings are collected at that position, even if the main clause
+matches at that position also. Moreover, the position is not advanced.
+The remainder of the query begins matching at that position.
Example:
@@ -939,7 +940,8 @@ Example:
Output: a[0]="1"
a[1]="2"
a[2]="3"
- a[3]="42"
+
+The line 42 is not collected, even though it matches @a.
The binding variables within the clause of a collect are treated specially.
The multiple matches for each variable are collected into lists,
@@ -981,8 +983,9 @@ a two dimensional list is a list of lists of strings, etc.
It is important to note that the variables which are bound within the main
clause of a collect---i.e. the variables which are subject to
-collection---appear as normal one-value bindings. The collation into lists
-happens outside of the collect. So for instance in the query:
+collection---appear, within the collect, as normal one-value bindings. The
+collation into lists happens outside of the collect. So for instance in the
+query:
@(collect)
@x=@x
@@ -994,17 +997,10 @@ iteration, and these values are collected. What finally comes out of the
collect clause is list variable called x which holds each value that
was ever instantiated under that name within the collect clause.
-If the collect stops before exhausting the data file---that is to say,
-it is terminated by a successful match in the until clause---then
-the material consumed by the until clause is considered consumed.
-The current position in the data set which now faces any further
-query material is located beyond the last line which matches
-the until clause. This is true even if the until clause and collect
-clause both match simultaneously, and the clause matches a different
-number of lines. If this last collect matches a greater number of lines
-than the terminating until, then some of the material covered by this last
-collect will be again matched by query lines which follow the collect
-directive.
+Also note that the until clause has visibility over the bindings
+established in the main clause. This is true even in the terminating
+case when the until clause matches, and the bindings of the main clause
+are discarded.
.SS The Coll Directive
@@ -1034,8 +1030,8 @@ position. Whenever a match occurs, it continues at the character position which
follows the last character of the match, if such a position exists.
If not bounded by an until clause, it will exhaust the entire line. If the
-until clause matches, then the remainder of the data line following the extent
-consumed by the until clause is available for more matching.
+until clause matches, then the collection stops at that position,
+and any bindings from that iteration are discarded.
Coll clauses nest, and variables bound within a coll are available to within
the rest of the coll clause, including the until clause, and appear as single
@@ -1096,7 +1092,7 @@ or may not be terminated by a semicolon. We must exclude
the semicolon from being a valid character inside an item, and
add an until clause which recognizes a semicolon:
- pattern: @(coll)@{a /[^ ;]+/}@(until);@(end)
+ pattern: @(coll)@{a /[^ ;]+/}@(until);@(end);
data: 1 2 3 4 5;
result: a[0]="1"
@@ -1105,7 +1101,7 @@ add an until clause which recognizes a semicolon:
a[3]="4"
a[4]="5"
- data: 1 2 3 4 5
+ data: 1 2 3 4 5;
result: a[0]="1"
a[1]="2"
a[2]="3"
@@ -1114,6 +1110,10 @@ add an until clause which recognizes a semicolon:
Semicolon or not, the items are collected properly.
+Note that the @(end) is followed by a semicolon. That's because
+when the @(until) clause meets a match, the matching material
+is not consumed.
+
.SS The Flatten Directive.
The flatten directive can be used to convert variables to one dimensional
@@ -1240,7 +1240,7 @@ followed by a symbol: the forms (.) (. X) and (X .) are invalid.
Blocks are sections of a query which are denoted by a name. Blocks denoted by
the name nil are understood as anonymous.
-The @(block <name>) directive introduces a named block, except when the name is
+The @(block NAME) directive introduces a named block, except when the name is
the word nil. The @(block) directive introduces an unnamed block, equivalent
to @(block nil).
@@ -1278,14 +1278,14 @@ to its matching @(end).
Blocks may nest, and nested blocks may have the same names as blocks in
which they are nested. For instance:
-@(block)
-@(block)
-...
+ @(block)
+ @(block)
+ ...
is a nesting of two anonymous blocks, and
-@(block foo)
-@(block foo)
+ @(block foo)
+ @(block foo)
is a nesting of two named blocks which happen to have the same name.
When a nested block has the same name as an outer block, it creates
@@ -1295,12 +1295,12 @@ inner block, and not to the outer one.
A more complicated example of nesting is:
-@(skip)
-abc
-@(block)
-@(some)
-@(block foo)
-@(end)
+ @(skip)
+ abc
+ @(block)
+ @(some)
+ @(block foo)
+ @(end)
Here, the @(skip) introduces an anonymous block. The explicit anonymous
@(block) is nested within skip's anonymous block and shadows it.
@@ -1314,9 +1314,9 @@ normally. However, a block serves as a termination point for @(fail) and
The precise meaning of these directives is:
-.IP @(fail <name>)
+.IP @(fail\ NAME)
-Immediately terminate the enclosing query block called <name>, as if that block failed to match anything. If more than one block by that name encloses
+Immediately terminate the enclosing query block called NAME, as if that block failed to match anything. If more than one block by that name encloses
the directive, the inner-most block is terminated. No bindings
emerge from a failed block.
@@ -1338,9 +1338,9 @@ collect normally does not fail, even if it matches and collects nothing!
To prematurely terminate a collect by means of its anonymous block, without
failing it, use @(accept).
-.IP @(accept <name>)
+.IP @(accept\ NAME)
-Immediately terminate the enclosing query block called <name>, as if that block
+Immediately terminate the enclosing query block called NAME, as if that block
successfully matched. If more than one block by that name encloses the
directive, the inner-most block is terminated. Any bindings established within
that block until this point emerge from that block.
@@ -1373,7 +1373,7 @@ Example: alternative way to @(until) termination:
This query will collect entire lines into a list called LINE. However,
if the line --- is matched (by the embedded @(maybe)), the collection
is terminated. Only the lines up to, and not including the --- line,
-are collected. The effect is similar to:
+are collected. The effect is identical to:
@(collect)
@LINE
@@ -1381,6 +1381,9 @@ are collected. The effect is similar to:
---
@(end)
+The difference (not relevant in these examples) is that the until clause has
+visibility into the bindings set up by the main clause.
+
However, the following example has a different meaning:
@(collect)
@@ -1399,7 +1402,7 @@ action of collecting the last @LINE binding into the list is not performed.
.SS Data Extent of Terminated Blocks
-A data block may have matched some material prior to being terminated by
+A query block may have matched some material prior to being terminated by
accept. In that case, it is deemed to have only matched that material,
and not any material which follows. This may matter, depending on the context
in which the block occurs.