From 916b0f4a081b26ebb4b58afbcb651fc74dbe23cc Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Wed, 19 Feb 2014 00:38:06 -0800 Subject: Fixing a long-running issue in the TXR pattern language: premature opening of files, prior to directives that actually need data. The documentation basically lied that this is the case: namely, the text "A file isn't opened until the query demands material from that file, and then the contents are read on demand, not all at once." This is now a fact. * match.c (non_matching_directive_table): New global variable. (open_data_source): New static function. Contains an almost verbatim migration of the source-opening logic that used to be in match_files. The useless assignment to c->nil is gone, and c->data == t is explicitly tested for. Instead of assuming that only the @(next) directive does not need to have a data source open, the table of non-matching directives is consulted. Opening the data source is now skipped for numerous directives. (match_files): Call open_data_source within the loop. This means that even after processing numerous non-matching directives, we will still correctly set up the data lazy list. (dir_tables_init): Initialize non_matching_directive_table, protect from GC and populate with numerous directives. * txr.1: Improved documentation for @(next :args), and removed a description of the hack that a single @(next) at the top of the query suppressed the opening of the data source. --- ChangeLog | 27 +++++++++++++++++++++++ match.c | 76 +++++++++++++++++++++++++++++++++++++++++++++------------------ txr.1 | 14 ++++++------ 3 files changed, 89 insertions(+), 28 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3ac4b107..80823cbe 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,30 @@ +2014-02-19 Kaz Kylheku + + Fixing a long-running issue in the TXR pattern language: premature + opening of files, prior to directives that actually need data. + The documentation basically lied that this is the case: namely, + the text "A file isn't opened until the query demands material from + that file, and then the contents are read on demand, not all at + once." This is now a fact. + + * match.c (non_matching_directive_table): New global variable. + (open_data_source): New static function. Contains an almost + verbatim migration of the source-opening logic that used + to be in match_files. The useless assignment to c->nil is gone, + and c->data == t is explicitly tested for. Instead of assuming + that only the @(next) directive does not need to have a data source + open, the table of non-matching directives is consulted. Opening + the data source is now skipped for numerous directives. + (match_files): Call open_data_source within the loop. This + means that even after processing numerous non-matching directives, + we will still correctly set up the data lazy list. + (dir_tables_init): Initialize non_matching_directive_table, + protect from GC and populate with numerous directives. + + * txr.1: Improved documentation for @(next :args), and removed + a description of the hack that a single @(next) at the top of the + query suppressed the opening of the data source. + 2014-02-18 Kaz Kylheku * eval.c (env_hash): new function. diff --git a/match.c b/match.c index 73452c8c..0da93ca8 100644 --- a/match.c +++ b/match.c @@ -68,6 +68,7 @@ val filter_s; val noval_s; static val h_directive_table, v_directive_table; +static val non_matching_directive_table; static void debuglf(val form, val fmt, ...) { @@ -3650,27 +3651,28 @@ static val h_do(match_line_ctx *c) return next_spec_k; } -static val match_files(match_files_ctx c) +static void open_data_source(match_files_ctx *c) { - debug_enter; - - gc_hint(c.data); + /* c->data == t is set up by the top level call to match_files. + * It indicates that we have not yet opened any data source. + */ - if (listp(c.data)) { /* recursive call with lazy list */ - ; /* no special initialization */ - } else if (c.files) { /* c.data == t: toplevel call with file list */ - val source_spec = first(c.files); + if (c->data == t && c->files) { + val source_spec = first(c->files); val name = consp(source_spec) ? cdr(source_spec) : source_spec; if (stringp(name)) { fpip_t fp = (errno = 0, complex_open(name, nil, nil)); - spec_bind (specline, first_spec, c.spec); + spec_bind (specline, first_spec, c->spec); - if (consp(first_spec) && eq(first(first_spec), next_s) && !rest(specline)) { + if (consp(first_spec) && (gethash(non_matching_directive_table, + first(first_spec)))) + { debuglf(first_spec, lit("not opening source ~a " - "since query starts with next directive"), name, nao); + "since query starts with non-matching " + "directive."), name, nao); } else { - val spec = first(c.spec); + val spec = first(c->spec); debuglf(spec, lit("opening data source ~a"), name, nao); if (complex_open_failed(fp)) { @@ -3686,20 +3688,25 @@ static val match_files(match_files_ctx c) debug_return (nil); } - c.files = cons(name, cdr(c.files)); /* Get rid of cons and nothrow */ + c->files = cons(name, cdr(c->files)); /* Get rid of cons and nothrow */ - if ((c.data = complex_snarf(fp, name)) != nil) - c.data_lineno = num(1); + if ((c->data = complex_snarf(fp, name)) != nil) + c->data_lineno = num(1); } } else if (streamp(name)) { - if ((c.data = lazy_stream_cons(name))) - c.data_lineno = num(1); + if ((c->data = lazy_stream_cons(name))) + c->data_lineno = num(1); } else { - c.data = nil; + c->data = nil; } - } else { /* toplevel call with no data or file list */ - c.data = nil; } +} + +static val match_files(match_files_ctx c) +{ + debug_enter; + + gc_hint(c.data); for (; c.spec; c.spec = rest(c.spec), c.data = rest(c.data), @@ -3708,6 +3715,8 @@ repeat_spec_same_data: { spec_bind (specline, first_spec, c.spec); + open_data_source(&c); + debug_check(first_spec, c.bindings, c.data, c.data_lineno, nil, nil); if (consp(first_spec) && !rest(specline)) { @@ -3898,8 +3907,10 @@ static void dir_tables_init(void) { h_directive_table = make_hash(nil, nil, nil); v_directive_table = make_hash(nil, nil, nil); + non_matching_directive_table = make_hash(nil, nil, nil); - protect(&h_directive_table, &v_directive_table, (val *) 0); + protect(&h_directive_table, &v_directive_table, + &non_matching_directive_table, (val *) 0); sethash(v_directive_table, skip_s, cptr((mem_t *) v_skip)); sethash(v_directive_table, fuzz_s, cptr((mem_t *) v_fuzz)); @@ -3963,6 +3974,29 @@ static void dir_tables_init(void) sethash(h_directive_table, eol_s, cptr((mem_t *) h_eol)); sethash(h_directive_table, do_s, cptr((mem_t *) h_do)); sethash(h_directive_table, require_s, cptr((mem_t *) hv_trampoline)); + + sethash(non_matching_directive_table, block_s, t); + sethash(non_matching_directive_table, accept_s, t); + sethash(non_matching_directive_table, fail_s, t); + sethash(non_matching_directive_table, next_s, t); + sethash(non_matching_directive_table, forget_s, t); + sethash(non_matching_directive_table, local_s, t); + sethash(non_matching_directive_table, merge_s, t); + sethash(non_matching_directive_table, bind_s, t); + sethash(non_matching_directive_table, rebind_s, t); + sethash(non_matching_directive_table, set_s, t); + sethash(non_matching_directive_table, cat_s, t); + sethash(non_matching_directive_table, output_s, t); + sethash(non_matching_directive_table, define_s, t); + sethash(non_matching_directive_table, try_s, t); + sethash(non_matching_directive_table, defex_s, t); + sethash(non_matching_directive_table, throw_s, t); + sethash(non_matching_directive_table, deffilter_s, t); + sethash(non_matching_directive_table, filter_s, t); + sethash(non_matching_directive_table, require_s, t); + sethash(non_matching_directive_table, do_s, t); + sethash(non_matching_directive_table, load_s, t); + sethash(non_matching_directive_table, close_s, t); } void match_init(void) diff --git a/txr.1 b/txr.1 index f9c50347..90d45953 100644 --- a/txr.1 +++ b/txr.1 @@ -1461,13 +1461,13 @@ match failure. The variant @(next :args) means that the remaining command line arguments are to be treated as a data source. For this purpose, each argument is considered to be a line of text. If an argument is currently being processed as an input -source, that argument is included. Note that if the first entry in the argument -list is not intended to name an input source, then the query should begin with -@(next :args) or some other form of next directive, to prevent an attempt to -open the input source named by that argument. If the very first directive of a query is any variant of the next directive, then -.B TXR -avoids opening the first input source, but it does open the input source for -any other directive, even one which does not consume any data. +source, that argument is included at the front of the list. As the arguments +are matched, they are consumed. This means that if a @(next) directive without +arguments is executed in the scope of @(next :args), it opens the file named +by the first unconsumed argument. + +To process arguments, and then continue with the original file and argument +list, wrap the argument processing in a @(block). The variant @(next :env) means that the list of process environment variables is treated as a source of data. It looks like a text file stream -- cgit v1.2.3