diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2011-11-23 10:46:32 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2011-11-23 10:46:32 -0800 |
commit | c1202a71a068c13a17b69348a6d7736b8855be0c (patch) | |
tree | eb2121adb2e75b6d2a0838d152a8c5b6c161ac1f | |
parent | e1174f5ea6ff0a51738830e10a92819135a22b32 (diff) | |
download | txr-c1202a71a068c13a17b69348a6d7736b8855be0c.tar.gz txr-c1202a71a068c13a17b69348a6d7736b8855be0c.tar.bz2 txr-c1202a71a068c13a17b69348a6d7736b8855be0c.zip |
Semantics change. If a variable is followed by a mixture
of text and regular expressions, that whole mixture is
considered to follow the variable and used for matching.
The earlier semantics change whereby a single unescaped
space denotes the regular expression / +/ broke the
simple case @a word. It caused the @a to be followed
not by the text " word" but by just the regular expression
element.
With this change @a word means that a is followed by
the regex / +/ and "word".
* match.c (text_s): New symbol variable.
(h_text): New function.
(syms_init): Initialize new symbol variable.
(dir_tables_init): Hook h_text into horizontal directives table.
* match.h (text_s): Declared.
* parser.y (text, texts): New nonterminals.
(elem): TEXT, SPACE and regex are now handled under texts
grammar production. All texts are run together and produce
an item which looks like (text items ...).
* txr.1, RELNOTES: Updated.
* txr.c (remove_hash_bang_line): Updated to find #! buried
in (text ...) syntax.
-rw-r--r-- | ChangeLog | 32 | ||||
-rw-r--r-- | RELNOTES | 23 | ||||
-rw-r--r-- | match.c | 19 | ||||
-rw-r--r-- | match.h | 2 | ||||
-rw-r--r-- | parser.y | 16 | ||||
-rw-r--r-- | txr.1 | 50 | ||||
-rw-r--r-- | txr.c | 14 |
7 files changed, 119 insertions, 37 deletions
@@ -1,3 +1,35 @@ +2011-11-23 Kaz Kylheku <kaz@kylheku.com> + + Semantics change. If a variable is followed by a mixture + of text and regular expressions, that whole mixture is + considered to follow the variable and used for matching. + + The earlier semantics change whereby a single unescaped + space denotes the regular expression / +/ broke the + simple case @a word. It caused the @a to be followed + not by the text " word" but by just the regular expression + element. + + With this change @a word means that a is followed by + the regex / +/ and "word". + + * match.c (text_s): New symbol variable. + (h_text): New function. + (syms_init): Initialize new symbol variable. + (dir_tables_init): Hook h_text into horizontal directives table. + + * match.h (text_s): Declared. + + * parser.y (text, texts): New nonterminals. + (elem): TEXT, SPACE and regex are now handled under texts + grammar production. All texts are run together and produce + an item which looks like (text items ...). + + * txr.1, RELNOTES: Updated. + + * txr.c (remove_hash_bang_line): Updated to find #! buried + in (text ...) syntax. + 2011-11-22 Kaz Kylheku <kaz@kylheku.com> * configure: Fix environ test case for C++. @@ -1,14 +1,21 @@ - (future release) - TXR 043 - 201?-??-?? + (future release) + TXR 043 + 201?-??-?? Bugs - Buggy @(eol) directive fixed. - (current release) - TXR 042 - 2011-11-20 + - Semantics change for text and regular expressions in "negative match": + - a variable is considered to be followed by a run of text which + consists of any mixture of regexes and literal text + - thus @foo bar behaves properly once again; it is not treated + as foo followed by the regex / +/, ignoring the text bar. + + + (current release) + TXR 042 + 2011-11-20 @@ -79,8 +86,8 @@ - TXR 041 - 2011-10-30 + TXR 041 + 2011-10-30 Features @@ -54,7 +54,7 @@ int opt_arraydims = 1; val decline_k, next_spec_k, repeat_spec_k; val mingap_k, maxgap_k, gap_k, mintimes_k, maxtimes_k, times_k; val lines_k, chars_k; -val choose_s, gather_s; +val text_s, choose_s, gather_s; val longest_k, shortest_k, greedy_k; val vars_k, resolve_k; val append_k, into_k, var_k, list_k, string_k, env_k; @@ -400,6 +400,21 @@ typedef val (*h_match_func)(match_line_ctx c, match_line_ctx *cout); val elem_var = first(specline); \ val directive_var = first(elem_var) +static val h_text(match_line_ctx c, match_line_ctx *cout) +{ + val elem = first(c.specline); + val texts = rest(elem); + val new_pos = cdr(match_line(ml_specline(c, texts))); + + if (new_pos) { + c.pos = new_pos; + *cout = c; + return next_spec_k; + } + + return nil; +} + static val search_form(match_line_ctx *c, val needle_form, val from_end) { if (regexp(first(needle_form))) { @@ -3250,6 +3265,7 @@ static void syms_init(void) times_k = intern(lit("times"), keyword_package); lines_k = intern(lit("lines"), keyword_package); chars_k = intern(lit("chars"), keyword_package); + text_s = intern(lit("text"), user_package); choose_s = intern(lit("choose"), user_package); gather_s = intern(lit("gather"), user_package); longest_k = intern(lit("longest"), keyword_package); @@ -3306,6 +3322,7 @@ static void dir_tables_init(void) sethash(v_directive_table, filter_s, cptr((mem_t *) v_filter)); sethash(v_directive_table, eof_s, cptr((mem_t *) v_eof)); + sethash(h_directive_table, text_s, cptr((mem_t *) h_text)); sethash(h_directive_table, var_s, cptr((mem_t *) h_var)); sethash(h_directive_table, skip_s, cptr((mem_t *) h_skip)); sethash(h_directive_table, coll_s, cptr((mem_t *) h_coll)); @@ -27,4 +27,4 @@ void match_init(void); val match_funcall(val name, val arg, val other_args); int extract(val spec, val filenames, val bindings); -extern val choose_s, gather_s; +extern val text_s, choose_s, gather_s; @@ -79,7 +79,7 @@ static val parsed_spec; %type <val> clause_parts additional_parts %type <val> output_clause define_clause try_clause catch_clauses_opt %type <val> line elems_opt elems clause_parts_h additional_parts_h -%type <val> elem var var_op meta_expr +%type <val> text texts elem var var_op meta_expr %type <val> list exprs exprs_opt expr out_clauses out_clauses_opt out_clause %type <val> repeat_clause repeat_parts_opt o_line %type <val> o_elems_opt o_elems_opt2 o_elems o_elem o_var rep_elem rep_parts_opt @@ -267,7 +267,8 @@ elems : elem { $$ = cons($1, nil); yyerror("rep outside of output"); } ; -elem : TEXT { $$ = rl(string_own($1), num(lineno)); } + +text : TEXT { $$ = rl(string_own($1), num(lineno)); } | SPACE { if ($1[0] == ' ' && $1[1] == 0) { val spaces = list(oneplus_s, chr(' '), nao); @@ -276,11 +277,18 @@ elem : TEXT { $$ = rl(string_own($1), num(lineno)); } free($1); } else { $$ = rl(string_own($1), num(lineno)); }} - | var { $$ = rl($1, num(lineno)); } - | list { $$ = $1; } | regex { $$ = cons(regex_compile(rest($1)), rest($1)); rl($$, num(lineno)); } + ; + +texts : text %prec LOW { $$ = rl(cons($1, nil), $1); } + | text texts { $$ = rl(cons($1, $2), $2); } + ; + +elem : texts { $$ = rl(cons(text_s, $1), $1); } + | var { $$ = rl($1, num(lineno)); } + | list { $$ = $1; } | COLL exprs_opt ')' elems END { $$ = list(coll_s, $4, nil, $2, nao); rl($$, num($1)); } | COLL exprs_opt ')' elems @@ -604,13 +604,21 @@ current position in the data, to the end of the line. Example: .SS Variable Followed by Text -If the variable is followed by text (all non-directive material extending to -the end of the line, or to the start of another directive), then the extent of -the negative match is determined by searching for the first occurrence of that text -within the line, starting at the current position. The variable matches -everything between the current position and the matching position (not -including the matching position). Any whitespace which follows the -variable (and is not enclosed inside braces that surround the variable +For the purposes of determining the negative match, text is defined as a +sequence of literal text and regular expressions, not divided by a directive. +So for instance in this example: + + @a:@/foo/bcd e@(maybe)f@(end) + +the variable @a is considered to be followed by ":@/foo/bcd e". + +If a variable is followed by text, then the extent of the negative match is +determined by searching for the first occurrence of that text within the line, +starting at the current position. + +The variable matches everything between the current position and the matching +position (not including the matching position). Any whitespace which follows +the variable (and is not enclosed inside braces that surround the variable name) is part of the text. For example: pattern: "a b @FOO e f" @@ -624,12 +632,12 @@ is " e f". This is found within the data "c d e f" at position 3 (counting from 0). So positions 0-2 ("c d") constitute the matching text which is bound to FOO. -.SS Variable Followed by a Regular Expression, Function Call or Directive +.SS Variable Followed by a Function Call or Directive -If the variable is followed by a regular expression, function -call, or a directive, the extent is determined by scanning the text -for the first position where a match occurs for the regular expression, call or -directive. (See Regular Expressions section below, and FUNCTIONS.) +If the variable is followed by a function call, or a directive, the extent is +determined by scanning the text for the first position where a match occurs for +the regular expression, call or directive. (For a description of functions, +see FUNCTIONS.) Note that the given variable and the function or directive are considered in isolation. This means, for instance, that @var@(skip)text is a degenerate @@ -695,12 +703,18 @@ nested list, then each character string in the list is tried in turn to produce a match. The first match is taken. An unbound variable may be followed by another unbound variable which specifies -a regular expression match. This is a special case called a "double variable -match". What happens is that the text is searched using the regular -expression. If the search fails, than neither variable is bound: it is a -matching failure. If the search succeeds, than the first variable is bound to -the text which is skipped by the regular expression search. The second -variable is bound to the text matched by the regular expression. +a regular expression or function call match. This is a special case called a +"double variable match". What happens is that the text is searched using the +regular expression or function. If the search fails, than neither variable is +bound: it is a matching failure. If the search succeeds, than the first +variable is bound to the text which is skipped by the search. The second +variable is bound to the text matched by the regular expression or function. +Examples: + + pattern: "@foo@{bar /abc/}" + data: "xyz@#abc" + result: foo="xyz@#", BAR="abc" + .SS Consecutive Variables Via Directive @@ -130,11 +130,15 @@ static val remove_hash_bang_line(val spec) { val shbang = string(L"#!"); val firstline = first(spec); - - if (stringp(first(firstline))) { - val twochars = sub_str(first(firstline), zero, two); - if (equal(twochars, shbang)) - return rest(spec); + val firstelem = first(firstline); + + if (consp(firstelem) && first(firstelem) == text_s) { + val item = second(firstelem); + if (stringp(item)) { + val twochars = sub_str(item, zero, two); + if (equal(twochars, shbang)) + return rest(spec); + } } return spec; |