diff options
-rw-r--r-- | ChangeLog | 20 | ||||
-rw-r--r-- | match.c | 27 | ||||
-rw-r--r-- | parser.l | 6 | ||||
-rw-r--r-- | parser.y | 41 | ||||
-rw-r--r-- | txr.1 | 101 |
5 files changed, 156 insertions, 39 deletions
@@ -1,5 +1,25 @@ 2012-03-13 Kaz Kylheku <kaz@kylheku.com> + Change: @(block) requires @(end) from now on. + Blocks no longer extend to the end of the surrounding + scope. + + * match.c (v_block): Rewrite for new syntax. + + * parser.l (BLOCK): New token type handled. + + * parser.y (BLOCK): New token. + (block_clause): New nonterminal grammar symbol. + (clause): Collateral fix: replaced a bunch of + list(X, nao) forms with cons(X, nil). + Introduced block_clause as a constituent of clause. + + * txr.1: Revamped documentation of block, and + wrote about using blocks for reducing nested + skips and reducing backtracking in general. + +2012-03-13 Kaz Kylheku <kaz@kylheku.com> + * parser.l (ID_END): Bugfix: ID_END was defined incorrectly for the current way in which an identifier token is recognized. As a result @(collect-ing) was being interpreted as @(collect -ing). @@ -2110,20 +2110,35 @@ static val v_block(match_files_ctx *c) { spec_bind (specline, first_spec, c->spec); - val name = first(rest(first_spec)); + val args = rest(first_spec); + val name = first(args); + val spec = second(args); if (rest(specline)) sem_error(specline, lit("unexpected material after block directive"), nao); - if ((c->spec = rest(c->spec)) != nil) { uw_block_begin(name, result); - result = match_files(*c); + result = match_files(mf_spec(*c, spec)); uw_block_end; - return result; - } - return next_spec_k; + { + cons_bind (new_bindings, success, result); + + if (!success) { + return nil; + } else if (success == t) { + c->data = nil; + } else { + cons_bind (new_data, new_line, success); + c->data = new_data; + c->data_lineno = new_line; + } + + c->bindings = new_bindings; + return next_spec_k; + } + } } static val v_accept_fail(match_files_ctx *c) @@ -257,6 +257,12 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return CASES; } +<SPECIAL>\({WS}block/{ID_END} { + yy_push_state(NESTED); + yylval.lineno = lineno; + return BLOCK; +} + <SPECIAL>\({WS}choose/{ID_END} { yy_push_state(NESTED); yylval.lineno = lineno; @@ -67,7 +67,7 @@ static val parsed_spec; } %token <lexeme> SPACE TEXT IDENT KEYWORD METAVAR -%token <lineno> ALL SOME NONE MAYBE CASES CHOOSE GATHER +%token <lineno> ALL SOME NONE MAYBE CASES BLOCK CHOOSE GATHER %token <lineno> AND OR END COLLECT %token <lineno> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY %token <lineno> MOD MODLAST DEFINE TRY CATCH FINALLY @@ -80,7 +80,7 @@ static val parsed_spec; %token <chr> METAPAR METABKT SPLICE %type <val> spec clauses clauses_opt clause -%type <val> all_clause some_clause none_clause maybe_clause +%type <val> all_clause some_clause none_clause maybe_clause block_clause %type <val> cases_clause choose_clause gather_clause collect_clause until_last %type <val> clause_parts additional_parts gather_parts additional_gather_parts %type <val> output_clause define_clause try_clause catch_clauses_opt @@ -128,19 +128,20 @@ clauses_opt : clauses { $$ = $1; } | /* empty */ { $$ = nil; } ; -clause : all_clause { $$ = list($1, nao); rlcp($$, $1); } - | some_clause { $$ = list($1, nao); rlcp($$, $1); } - | none_clause { $$ = list($1, nao); rlcp($$, $1); } - | maybe_clause { $$ = list($1, nao); rlcp($$, $1); } - | cases_clause { $$ = list($1, nao); rlcp($$, $1); } - | choose_clause { $$ = list($1, nao); rlcp($$, $1); } - | collect_clause { $$ = list($1, nao); rlcp($$, $1); } - | gather_clause { $$ = list($1, nao); rlcp($$, $1); } +clause : all_clause { $$ = cons($1, nil); rlcp($$, $1); } + | some_clause { $$ = cons($1, nil); rlcp($$, $1); } + | none_clause { $$ = cons($1, nil); rlcp($$, $1); } + | maybe_clause { $$ = cons($1, nil); rlcp($$, $1); } + | cases_clause { $$ = cons($1, nil); rlcp($$, $1); } + | block_clause { $$ = cons($1, nil); rlcp($$, $1); } + | choose_clause { $$ = cons($1, nil); rlcp($$, $1); } + | collect_clause { $$ = cons($1, nil); rlcp($$, $1); } + | gather_clause { $$ = cons($1, nil); rlcp($$, $1); } | define_clause { $$ = list(define_transform($1), nao); rlcp(car($$), $1); rlcp($$, $1); } - | try_clause { $$ = list($1, nao); rlcp($$, $1); } - | output_clause { $$ = list($1, nao); rlcp($$, $1); } + | try_clause { $$ = cons($1, nil); rlcp($$, $1); } + | output_clause { $$ = cons($1, nil); rlcp($$, $1); } | line { $$ = $1; } | repeat_clause { $$ = nil; yyerror("repeat outside of output"); } @@ -196,6 +197,22 @@ cases_clause : CASES newl clause_parts { $$ = list(cases_s, $3, nao); yyerror("empty cases clause"); } ; +block_clause : BLOCK exprs_opt ')' + newl clauses_opt + END newl { val name = first($2); + if (gt(length($2), one)) + yyerror("block: takes zero or no arguments"); + if (name && !bindable(name)) + yyerrorf(lit("block: ~s is not a bindable symbol"), + name, nao); + $$ = list(block_s, name, $5, nao); + rl($$, num($1)); } + | BLOCK exprs_opt ')' + newl error { $$ = nil; + yybadtoken(yychar, + lit("block clause")); } + ; + choose_clause : CHOOSE exprs_opt ')' newl clause_parts { $$ = list(choose_s, $5, $2, nao); rl($$, num($1)); } @@ -1136,8 +1136,9 @@ end of a line. Also Fails if no data remains (there is no current line). Continue matching in another file or other data source. .IP @(block) -The remaining query is treated as an anonymous or named block. -Blocks may be referenced by @(accept) and @(fail) directives. +Groups to gether a sequence of directives into a logical name block, +which can be explicitly terminated from within using +the @(accept) and @(fail) directives. Blocks are discussed in the section BLOCKS below. .IP @(skip) @@ -1524,6 +1525,65 @@ will keep scanning even though it has found the correct match, then backtrack to the last good match once it runs out of data. The regular skip with explicit @(eof) will stop when the @(eof) matches. +.SS Reducing Backtracking with Blocks + +Skip can consider considerable CPU time when multiple skips are nested. +Consider: + + @(skip) + A + @(skip) + B + @(skip) + C + +This is actually nesting: the second a third skips occur within the body of the +first one, and thus this creates nested iteration. TXR is searching for the +combination of skips which find match the pattern of lines A, B and C, with +backtracking behavior. The outermost skip marches through the data until it +finds A, followed by a pattern match for the second skip. The second skip +iterates within to find B, followed by the third skip, and the third skip +iterates to find C. If there is only one line A, and one B, then this is +reasonably fast. But suppose there are many lines matching A and B, +giving rise to a large number combinations of skips which match A and B, and +yet no match for C, triggering backtracking. The nested stepping which tries +the combinations of A and B can give rise to a considerable running time. + +One way to deal with the problem is to unravel the nesting with the help of +blocks. For example: + + @(block) + @ (skip) + A + @(end) + @(block) + @ (skip) + B + @(end) + @(skip) + C + +Now the scope of each skip is just the remainder of the block in which it +occurs. The first skip finds A, and then the block ends. Control passes to the +next block, and backtracking will take place to a block which completed (unless +all these blocks are enclosed in some larger construct which backtracks, +causing the blocks to be re-executed. + +Of course, this rewrite is not equivalent, and cannot be used for instance +in backreferencing situations such as: + + @; + @; Find some three lines which are the same. + @; + @(skip) + @line + @(skip) + @line + @(skip) + @line + +This example depends on the nested search-within-search semantics. + .SS The Trailer Directive The trailer directive introduces a trailing portion of a query or subquery @@ -2627,28 +2687,36 @@ to @(block nil). The @(skip) and @(collect) directives introduce implicit anonymous blocks, as do function bodies. +Blocks are useful for terminating parts of a pattern matchin search +prematurely, and escaping to a higher level. This makes blocks not only +useful for simplifying the semantics of certain pattern matches, +but also an optimization tool. + +Judicious use of blocks and escapes can reduce or eliminate the amount +backtracking that TXR performs. + .SS Block Scope The names of blocks are in a distinct namespace from the variable binding space. So @(block foo) has no interaction with the variable @foo. A block extends from the @(block ...) directive which introduces it, -to the end of the subquery in which that directive is contained. For instance: +until the matching @(end), and may be empty. For instance: @(some) abc @(block foo) xyz @(end) + @(end) Here, the block foo occurs in a @(some) clause, and so it extends to the @(end) -which terminates that clause. After that @(end), the name foo is not -associated with a block (is not "in scope"). A block which is not contained in -any subquery extends to the end of the overall query. Blocks are never -terminated by @(end). +which terminates the block. After that @(end), the name foo is not +associated with a block (is not "in scope"). The second @(end) terminates +the @(some) block. The implicit anonymous block introduced by @(skip) has the same scope -as the @(skip): they extends over all of the material which follows the skip, +as the @(skip): it extends over all of the material which follows the skip, to the end of the containing subquery. .SS Block Nesting @@ -2659,11 +2727,15 @@ which they are nested. For instance: @(block) @(block) ... + @(end) + @(end) is a nesting of two anonymous blocks, and @(block foo) @(block foo) + @(end) + @(end) is a nesting of two named blocks which happen to have the same name. When a nested block has the same name as an outer block, it creates @@ -2671,19 +2743,6 @@ a block scope in which the outer block is "shadowed"; that is to say, directives which refer to that block name within the nested block refer to the inner block, and not to the outer one. -A more complicated example of nesting is: - - @(skip) - abc - @(block) - @(some) - @(block foo) - @(end) - -Here, the @(skip) introduces an anonymous block. The explicit anonymous -@(block) is nested within skip's anonymous block and shadows it. -The foo block is nested within both of these. - .SS Block Semantics A block normally does nothing. The query material in the block is evaluated |