diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2011-11-06 17:23:55 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2011-11-06 17:23:55 -0800 |
commit | 687fd6ab7031aa573cbcd1b3ae624eb02530a25c (patch) | |
tree | 34bc63f8a0d81c8d7a0aeefe24919673f7a07c10 | |
parent | d435df363ff8e2ac5303cb837c9ff2604920ae56 (diff) | |
download | txr-687fd6ab7031aa573cbcd1b3ae624eb02530a25c.tar.gz txr-687fd6ab7031aa573cbcd1b3ae624eb02530a25c.tar.bz2 txr-687fd6ab7031aa573cbcd1b3ae624eb02530a25c.zip |
Task #11581
* match.c (gather_s): New keyword variable.
(v_gather): New function.
(syms_init): gather_s initialized.
(dir_tables_init): v_gather entered into table.
* match.h (gather_s): Declared.
* parser.l: GATHER token scanning added.
* parser.y: GATHER token added. gather_clause nonterminal added.
* txr.1: New directive documented.
* txr.vim: gather keyword introduced.
-rw-r--r-- | ChangeLog | 19 | ||||
-rw-r--r-- | match.c | 64 | ||||
-rw-r--r-- | match.h | 2 | ||||
-rw-r--r-- | parser.l | 4 | ||||
-rw-r--r-- | parser.y | 20 | ||||
-rw-r--r-- | txr.1 | 65 | ||||
-rw-r--r-- | txr.vim | 2 |
7 files changed, 171 insertions, 5 deletions
@@ -1,3 +1,22 @@ +2011-11-06 Kaz Kylheku <kaz@kylheku.com> + + Task #11581 + + * match.c (gather_s): New keyword variable. + (v_gather): New function. + (syms_init): gather_s initialized. + (dir_tables_init): v_gather entered into table. + + * match.h (gather_s): Declared. + + * parser.l: GATHER token scanning added. + + * parser.y: GATHER token added. gather_clause nonterminal added. + + * txr.1: New directive documented. + + * txr.vim: gather keyword introduced. + 2011-11-05 Kaz Kylheku <kaz@kylheku.com> * lib.c (env): Fixed inappropriate cut-and-pasted error messages. @@ -51,7 +51,8 @@ int output_produced; val decline_k, next_spec_k, repeat_spec_k; val mingap_k, maxgap_k, gap_k, mintimes_k, maxtimes_k, times_k; val lines_k, chars_k; -val choose_s, longest_k, shortest_k, greedy_k; +val choose_s, gather_s; +val longest_k, shortest_k, greedy_k; val vars_k; val append_k, into_k, var_k, list_k, string_k, env_k; @@ -1920,6 +1921,65 @@ static val v_parallel(match_files_ctx *c) } } +static val v_gather(match_files_ctx *c) +{ + spec_bind (specline, spec_linenum, first_spec, c->spec); + val specs = copy_list(second(first_spec)); + + while (specs && c->data) { + list_collect_decl (new_specs, ptail); + val max_line = zero; + val max_data = nil; + val iter, next; + + for (iter = specs, next = cdr(iter); iter != nil; iter = next, next = cdr(iter)) { + val nested_spec = first(iter); + cons_bind (new_bindings, success, + match_files(mf_spec(*c, nested_spec))); + + if (!success) { + *cdr_l(iter) = nil; + list_collect_nconc(ptail, iter); + } else if (success == t) { + c->bindings = new_bindings; + max_data = t; + } else if (consp(success) && max_data != t) { + c->bindings = new_bindings; + cons_bind (new_data, new_line, success); + if (gt(new_line, max_line)) { + max_line = new_line; + max_data = new_data; + } + } + } + + list_collect_terminate (ptail, nil); + specs = new_specs; + + if (consp(max_data)) { + debuglf(spec_linenum, lit("gather advancing from line ~a to ~a"), + c->data_lineno, max_line, nao); + c->data_lineno = max_line; + c->data = max_data; + } else if (max_data == t) { + debuglf(spec_linenum, lit("gather consumed entire file"), nao); + c->data = nil; + } else { + c->data_lineno = plus(c->data_lineno, num(1)); + c->data = rest(c->data); + debuglf(spec_linenum, lit("gather advancing by one line to ~a"), c->data_lineno, nao); + } + } + + if (specs) { + debuglf(spec_linenum, lit("gather failed to match some specs:"), nao); + debuglf(spec_linenum, lit("~s"), specs, nao); + return nil; + } + + return next_spec_k; +} + static val v_collect(match_files_ctx *c) { spec_bind (specline, spec_linenum, first_spec, c->spec); @@ -2877,6 +2937,7 @@ static void syms_init(void) lines_k = intern(lit("lines"), keyword_package); chars_k = intern(lit("chars"), keyword_package); choose_s = intern(lit("choose"), user_package); + gather_s = intern(lit("gather"), user_package); longest_k = intern(lit("longest"), keyword_package); shortest_k = intern(lit("shortest"), keyword_package); greedy_k = intern(lit("greedy"), keyword_package); @@ -2911,6 +2972,7 @@ static void dir_tables_init(void) sethash(v_directive_table, maybe_s, cptr((mem_t *) v_parallel)); sethash(v_directive_table, cases_s, cptr((mem_t *) v_parallel)); sethash(v_directive_table, choose_s, cptr((mem_t *) v_parallel)); + sethash(v_directive_table, gather_s, cptr((mem_t *) v_gather)); sethash(v_directive_table, collect_s, cptr((mem_t *) v_collect)); sethash(v_directive_table, flatten_s, cptr((mem_t *) v_flatten)); sethash(v_directive_table, forget_s, cptr((mem_t *) v_forget_local)); @@ -27,4 +27,4 @@ void match_init(void); val match_funcall(val name, val arg, val other_args); int extract(val spec, val filenames, val bindings); -extern val choose_s; +extern val choose_s, gather_s; @@ -240,6 +240,10 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return CHOOSE; } +<SPECIAL>\({WS}gather/{ID_END} { + yy_push_state(NESTED); + return GATHER; + } <SPECIAL>\({WS}and{WS}\) { yy_pop_state(); @@ -59,7 +59,7 @@ static val parsed_spec; } %token <lexeme> SPACE TEXT IDENT KEYWORD METAVAR -%token <lexeme> ALL SOME NONE MAYBE CASES CHOOSE +%token <lexeme> ALL SOME NONE MAYBE CASES CHOOSE GATHER %token <lexeme> AND OR END COLLECT %token <lexeme> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE %token <lexeme> TRY CATCH FINALLY @@ -72,7 +72,7 @@ static val parsed_spec; %type <val> spec clauses clauses_opt clause %type <val> all_clause some_clause none_clause maybe_clause -%type <val> cases_clause choose_clause collect_clause until_last +%type <val> cases_clause choose_clause gather_clause collect_clause until_last %type <val> clause_parts additional_parts %type <val> output_clause define_clause try_clause catch_clauses_opt %type <val> line elems_opt elems clause_parts_h additional_parts_h @@ -123,6 +123,7 @@ clause : all_clause { $$ = list(num(lineno - 1), $1, nao); } | cases_clause { $$ = list(num(lineno - 1), $1, nao); } | choose_clause { $$ = list(num(lineno - 1), $1, nao); } | collect_clause { $$ = list(num(lineno - 1), $1, nao); } + | gather_clause { $$ = list(num(lineno - 1), $1, nao); } | define_clause { $$ = list(num(lineno - 1), define_transform($1), nao); } | try_clause { $$ = list(num(lineno - 1), $1, nao); } @@ -184,6 +185,21 @@ choose_clause : CHOOSE exprs_opt ')' yyerror("empty choose clause"); } ; +gather_clause : GATHER exprs_opt ')' + newl clause_parts { $$ = list(gather_s, + append2(mapcar(curry_12_1(func_n2(cons), nil), + first($5)), rest($5)), + $2, nao); } + | GATHER exprs_opt ')' + newl error { $$ = nil; + yybadtoken(yychar, + lit("gather clause")); } + | GATHER exprs_opt ')' + newl END newl { $$ = nil; + yyerror("empty gather clause"); } + ; + + collect_clause : COLLECT exprs_opt ')' newl clauses END newl { $$ = list(collect_s, $5, nil, $2, @@ -1012,6 +1012,11 @@ is the one which maximizes or minimizes the length of a particular variable. .IP @(define\ NAME\ (\ ARGUMENTS\ ...)) Introduces a function. Functions are discussed in the FUNCTIONS section below. +.IP @(gather) +Searches text for matches for multiple clauses which may occur in arbitrary +order. For convenience, lines of the first clause are treated as separate +clauses. + .IP @(collect) Search the data for multiple matches of a clause. Collect the bindings in the clause into lists, which are output as array variables. @@ -1601,6 +1606,66 @@ but the other one matches five lines, then the overall clause is considered to have made a five line match at its position. If more directives follow, they begin matching five lines down from that position. +.SS The Gather Directive + +Sometimes text is structured as items that can appear in an arbitrary order. +When multiple matches need to be extracted, there is a combinatorial explosion +of possible orders, making it impractical to write pattern matches for all +the possible orders. + +The gather directive is for these situations. It specifies multiple clauses +which all have to match somewhere in the data, but in any order. + +For further convenience, the lines of the first clause of the gather directive +are implicitly treated as separate clauses. + +The syntax follow this pattern + + @(gather) + one-line-query1 + one-line-query2 + . + . + . + one-line-queryN + @(and) + multi + line + query1 + . + . + . + @(and) + multi + line + query2 + . + . + . + @(end) + +Of course the multi-line clauses are optional. + +How gather works is that the text is searched for matches for the single line +and multi-line queries. The clauses are applied in the order in which they appear. +Whenever one of the clauses matches, any bindings it produces are retained and +it is removed from further consideration. Multiple clauses can match at the +same text position. The position advances by the longest match from among the +clauses which matched. If no clauses match, the position advances by one line. +The search stops when all clauses are eliminated, and then the cumulative +bindings are produced. If the data runs out, but unmatched clauses remain, the +directive fails. + +Example: extract several environment variables, which do not appear in a particular +order: + + @(next :env) + @(gather) + USER=@USER + HOME=@HOME + SHELL=@SHELL + @(end) + .SS The Collect Directive The syntax of the collect directive is: @@ -18,7 +18,7 @@ syn spell toplevel syn keyword txr_keyword contained skip trailer freeform block accept fail syn keyword txr_keyword contained next some all none and or -syn keyword txr_keyword contained maybe cases choose collect until last end +syn keyword txr_keyword contained maybe cases choose gather collect until last end syn keyword txr_keyword contained flatten forget local merge bind set cat output syn keyword txr_keyword contained repeat rep first last single empty syn keyword txr_keyword contained define try catch finally throw |