summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2011-11-06 17:23:55 -0800
committerKaz Kylheku <kaz@kylheku.com>2011-11-06 17:23:55 -0800
commit687fd6ab7031aa573cbcd1b3ae624eb02530a25c (patch)
tree34bc63f8a0d81c8d7a0aeefe24919673f7a07c10
parentd435df363ff8e2ac5303cb837c9ff2604920ae56 (diff)
downloadtxr-687fd6ab7031aa573cbcd1b3ae624eb02530a25c.tar.gz
txr-687fd6ab7031aa573cbcd1b3ae624eb02530a25c.tar.bz2
txr-687fd6ab7031aa573cbcd1b3ae624eb02530a25c.zip
Task #11581
* match.c (gather_s): New keyword variable. (v_gather): New function. (syms_init): gather_s initialized. (dir_tables_init): v_gather entered into table. * match.h (gather_s): Declared. * parser.l: GATHER token scanning added. * parser.y: GATHER token added. gather_clause nonterminal added. * txr.1: New directive documented. * txr.vim: gather keyword introduced.
-rw-r--r--ChangeLog19
-rw-r--r--match.c64
-rw-r--r--match.h2
-rw-r--r--parser.l4
-rw-r--r--parser.y20
-rw-r--r--txr.165
-rw-r--r--txr.vim2
7 files changed, 171 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index bc176237..aa7c7418 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+2011-11-06 Kaz Kylheku <kaz@kylheku.com>
+
+ Task #11581
+
+ * match.c (gather_s): New keyword variable.
+ (v_gather): New function.
+ (syms_init): gather_s initialized.
+ (dir_tables_init): v_gather entered into table.
+
+ * match.h (gather_s): Declared.
+
+ * parser.l: GATHER token scanning added.
+
+ * parser.y: GATHER token added. gather_clause nonterminal added.
+
+ * txr.1: New directive documented.
+
+ * txr.vim: gather keyword introduced.
+
2011-11-05 Kaz Kylheku <kaz@kylheku.com>
* lib.c (env): Fixed inappropriate cut-and-pasted error messages.
diff --git a/match.c b/match.c
index e283e338..066dbb23 100644
--- a/match.c
+++ b/match.c
@@ -51,7 +51,8 @@ int output_produced;
val decline_k, next_spec_k, repeat_spec_k;
val mingap_k, maxgap_k, gap_k, mintimes_k, maxtimes_k, times_k;
val lines_k, chars_k;
-val choose_s, longest_k, shortest_k, greedy_k;
+val choose_s, gather_s;
+val longest_k, shortest_k, greedy_k;
val vars_k;
val append_k, into_k, var_k, list_k, string_k, env_k;
@@ -1920,6 +1921,65 @@ static val v_parallel(match_files_ctx *c)
}
}
+static val v_gather(match_files_ctx *c)
+{
+ spec_bind (specline, spec_linenum, first_spec, c->spec);
+ val specs = copy_list(second(first_spec));
+
+ while (specs && c->data) {
+ list_collect_decl (new_specs, ptail);
+ val max_line = zero;
+ val max_data = nil;
+ val iter, next;
+
+ for (iter = specs, next = cdr(iter); iter != nil; iter = next, next = cdr(iter)) {
+ val nested_spec = first(iter);
+ cons_bind (new_bindings, success,
+ match_files(mf_spec(*c, nested_spec)));
+
+ if (!success) {
+ *cdr_l(iter) = nil;
+ list_collect_nconc(ptail, iter);
+ } else if (success == t) {
+ c->bindings = new_bindings;
+ max_data = t;
+ } else if (consp(success) && max_data != t) {
+ c->bindings = new_bindings;
+ cons_bind (new_data, new_line, success);
+ if (gt(new_line, max_line)) {
+ max_line = new_line;
+ max_data = new_data;
+ }
+ }
+ }
+
+ list_collect_terminate (ptail, nil);
+ specs = new_specs;
+
+ if (consp(max_data)) {
+ debuglf(spec_linenum, lit("gather advancing from line ~a to ~a"),
+ c->data_lineno, max_line, nao);
+ c->data_lineno = max_line;
+ c->data = max_data;
+ } else if (max_data == t) {
+ debuglf(spec_linenum, lit("gather consumed entire file"), nao);
+ c->data = nil;
+ } else {
+ c->data_lineno = plus(c->data_lineno, num(1));
+ c->data = rest(c->data);
+ debuglf(spec_linenum, lit("gather advancing by one line to ~a"), c->data_lineno, nao);
+ }
+ }
+
+ if (specs) {
+ debuglf(spec_linenum, lit("gather failed to match some specs:"), nao);
+ debuglf(spec_linenum, lit("~s"), specs, nao);
+ return nil;
+ }
+
+ return next_spec_k;
+}
+
static val v_collect(match_files_ctx *c)
{
spec_bind (specline, spec_linenum, first_spec, c->spec);
@@ -2877,6 +2937,7 @@ static void syms_init(void)
lines_k = intern(lit("lines"), keyword_package);
chars_k = intern(lit("chars"), keyword_package);
choose_s = intern(lit("choose"), user_package);
+ gather_s = intern(lit("gather"), user_package);
longest_k = intern(lit("longest"), keyword_package);
shortest_k = intern(lit("shortest"), keyword_package);
greedy_k = intern(lit("greedy"), keyword_package);
@@ -2911,6 +2972,7 @@ static void dir_tables_init(void)
sethash(v_directive_table, maybe_s, cptr((mem_t *) v_parallel));
sethash(v_directive_table, cases_s, cptr((mem_t *) v_parallel));
sethash(v_directive_table, choose_s, cptr((mem_t *) v_parallel));
+ sethash(v_directive_table, gather_s, cptr((mem_t *) v_gather));
sethash(v_directive_table, collect_s, cptr((mem_t *) v_collect));
sethash(v_directive_table, flatten_s, cptr((mem_t *) v_flatten));
sethash(v_directive_table, forget_s, cptr((mem_t *) v_forget_local));
diff --git a/match.h b/match.h
index 00eb1f1d..1e15dd2d 100644
--- a/match.h
+++ b/match.h
@@ -27,4 +27,4 @@
void match_init(void);
val match_funcall(val name, val arg, val other_args);
int extract(val spec, val filenames, val bindings);
-extern val choose_s;
+extern val choose_s, gather_s;
diff --git a/parser.l b/parser.l
index af6da051..2a36a05b 100644
--- a/parser.l
+++ b/parser.l
@@ -240,6 +240,10 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return CHOOSE;
}
+<SPECIAL>\({WS}gather/{ID_END} {
+ yy_push_state(NESTED);
+ return GATHER;
+ }
<SPECIAL>\({WS}and{WS}\) {
yy_pop_state();
diff --git a/parser.y b/parser.y
index 1437e982..115d9fbd 100644
--- a/parser.y
+++ b/parser.y
@@ -59,7 +59,7 @@ static val parsed_spec;
}
%token <lexeme> SPACE TEXT IDENT KEYWORD METAVAR
-%token <lexeme> ALL SOME NONE MAYBE CASES CHOOSE
+%token <lexeme> ALL SOME NONE MAYBE CASES CHOOSE GATHER
%token <lexeme> AND OR END COLLECT
%token <lexeme> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE
%token <lexeme> TRY CATCH FINALLY
@@ -72,7 +72,7 @@ static val parsed_spec;
%type <val> spec clauses clauses_opt clause
%type <val> all_clause some_clause none_clause maybe_clause
-%type <val> cases_clause choose_clause collect_clause until_last
+%type <val> cases_clause choose_clause gather_clause collect_clause until_last
%type <val> clause_parts additional_parts
%type <val> output_clause define_clause try_clause catch_clauses_opt
%type <val> line elems_opt elems clause_parts_h additional_parts_h
@@ -123,6 +123,7 @@ clause : all_clause { $$ = list(num(lineno - 1), $1, nao); }
| cases_clause { $$ = list(num(lineno - 1), $1, nao); }
| choose_clause { $$ = list(num(lineno - 1), $1, nao); }
| collect_clause { $$ = list(num(lineno - 1), $1, nao); }
+ | gather_clause { $$ = list(num(lineno - 1), $1, nao); }
| define_clause { $$ = list(num(lineno - 1),
define_transform($1), nao); }
| try_clause { $$ = list(num(lineno - 1), $1, nao); }
@@ -184,6 +185,21 @@ choose_clause : CHOOSE exprs_opt ')'
yyerror("empty choose clause"); }
;
+gather_clause : GATHER exprs_opt ')'
+ newl clause_parts { $$ = list(gather_s,
+ append2(mapcar(curry_12_1(func_n2(cons), nil),
+ first($5)), rest($5)),
+ $2, nao); }
+ | GATHER exprs_opt ')'
+ newl error { $$ = nil;
+ yybadtoken(yychar,
+ lit("gather clause")); }
+ | GATHER exprs_opt ')'
+ newl END newl { $$ = nil;
+ yyerror("empty gather clause"); }
+ ;
+
+
collect_clause : COLLECT exprs_opt ')' newl
clauses END newl { $$ = list(collect_s,
$5, nil, $2,
diff --git a/txr.1 b/txr.1
index 1523f3a4..ba6093ff 100644
--- a/txr.1
+++ b/txr.1
@@ -1012,6 +1012,11 @@ is the one which maximizes or minimizes the length of a particular variable.
.IP @(define\ NAME\ (\ ARGUMENTS\ ...))
Introduces a function. Functions are discussed in the FUNCTIONS section below.
+.IP @(gather)
+Searches text for matches for multiple clauses which may occur in arbitrary
+order. For convenience, lines of the first clause are treated as separate
+clauses.
+
.IP @(collect)
Search the data for multiple matches of a clause. Collect the
bindings in the clause into lists, which are output as array variables.
@@ -1601,6 +1606,66 @@ but the other one matches five lines, then the overall clause is considered to
have made a five line match at its position. If more directives follow, they
begin matching five lines down from that position.
+.SS The Gather Directive
+
+Sometimes text is structured as items that can appear in an arbitrary order.
+When multiple matches need to be extracted, there is a combinatorial explosion
+of possible orders, making it impractical to write pattern matches for all
+the possible orders.
+
+The gather directive is for these situations. It specifies multiple clauses
+which all have to match somewhere in the data, but in any order.
+
+For further convenience, the lines of the first clause of the gather directive
+are implicitly treated as separate clauses.
+
+The syntax follow this pattern
+
+ @(gather)
+ one-line-query1
+ one-line-query2
+ .
+ .
+ .
+ one-line-queryN
+ @(and)
+ multi
+ line
+ query1
+ .
+ .
+ .
+ @(and)
+ multi
+ line
+ query2
+ .
+ .
+ .
+ @(end)
+
+Of course the multi-line clauses are optional.
+
+How gather works is that the text is searched for matches for the single line
+and multi-line queries. The clauses are applied in the order in which they appear.
+Whenever one of the clauses matches, any bindings it produces are retained and
+it is removed from further consideration. Multiple clauses can match at the
+same text position. The position advances by the longest match from among the
+clauses which matched. If no clauses match, the position advances by one line.
+The search stops when all clauses are eliminated, and then the cumulative
+bindings are produced. If the data runs out, but unmatched clauses remain, the
+directive fails.
+
+Example: extract several environment variables, which do not appear in a particular
+order:
+
+ @(next :env)
+ @(gather)
+ USER=@USER
+ HOME=@HOME
+ SHELL=@SHELL
+ @(end)
+
.SS The Collect Directive
The syntax of the collect directive is:
diff --git a/txr.vim b/txr.vim
index 35573484..f101a0ae 100644
--- a/txr.vim
+++ b/txr.vim
@@ -18,7 +18,7 @@ syn spell toplevel
syn keyword txr_keyword contained skip trailer freeform block accept fail
syn keyword txr_keyword contained next some all none and or
-syn keyword txr_keyword contained maybe cases choose collect until last end
+syn keyword txr_keyword contained maybe cases choose gather collect until last end
syn keyword txr_keyword contained flatten forget local merge bind set cat output
syn keyword txr_keyword contained repeat rep first last single empty
syn keyword txr_keyword contained define try catch finally throw