summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog32
-rw-r--r--RELNOTES23
-rw-r--r--match.c19
-rw-r--r--match.h2
-rw-r--r--parser.y16
-rw-r--r--txr.150
-rw-r--r--txr.c14
7 files changed, 119 insertions, 37 deletions
diff --git a/ChangeLog b/ChangeLog
index ad2864d0..e51f06ba 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,35 @@
+2011-11-23 Kaz Kylheku <kaz@kylheku.com>
+
+ Semantics change. If a variable is followed by a mixture
+ of text and regular expressions, that whole mixture is
+ considered to follow the variable and used for matching.
+
+ The earlier semantics change whereby a single unescaped
+ space denotes the regular expression / +/ broke the
+ simple case @a word. It caused the @a to be followed
+ not by the text " word" but by just the regular expression
+ element.
+
+ With this change @a word means that a is followed by
+ the regex / +/ and "word".
+
+ * match.c (text_s): New symbol variable.
+ (h_text): New function.
+ (syms_init): Initialize new symbol variable.
+ (dir_tables_init): Hook h_text into horizontal directives table.
+
+ * match.h (text_s): Declared.
+
+ * parser.y (text, texts): New nonterminals.
+ (elem): TEXT, SPACE and regex are now handled under texts
+ grammar production. All texts are run together and produce
+ an item which looks like (text items ...).
+
+ * txr.1, RELNOTES: Updated.
+
+ * txr.c (remove_hash_bang_line): Updated to find #! buried
+ in (text ...) syntax.
+
2011-11-22 Kaz Kylheku <kaz@kylheku.com>
* configure: Fix environ test case for C++.
diff --git a/RELNOTES b/RELNOTES
index c214d239..9f9c16a1 100644
--- a/RELNOTES
+++ b/RELNOTES
@@ -1,14 +1,21 @@
- (future release)
- TXR 043
- 201?-??-??
+ (future release)
+ TXR 043
+ 201?-??-??
Bugs
- Buggy @(eol) directive fixed.
- (current release)
- TXR 042
- 2011-11-20
+ - Semantics change for text and regular expressions in "negative match":
+ - a variable is considered to be followed by a run of text which
+ consists of any mixture of regexes and literal text
+ - thus @foo bar behaves properly once again; it is not treated
+ as foo followed by the regex / +/, ignoring the text bar.
+
+
+ (current release)
+ TXR 042
+ 2011-11-20
@@ -79,8 +86,8 @@
- TXR 041
- 2011-10-30
+ TXR 041
+ 2011-10-30
Features
diff --git a/match.c b/match.c
index 52f79624..9f49657d 100644
--- a/match.c
+++ b/match.c
@@ -54,7 +54,7 @@ int opt_arraydims = 1;
val decline_k, next_spec_k, repeat_spec_k;
val mingap_k, maxgap_k, gap_k, mintimes_k, maxtimes_k, times_k;
val lines_k, chars_k;
-val choose_s, gather_s;
+val text_s, choose_s, gather_s;
val longest_k, shortest_k, greedy_k;
val vars_k, resolve_k;
val append_k, into_k, var_k, list_k, string_k, env_k;
@@ -400,6 +400,21 @@ typedef val (*h_match_func)(match_line_ctx c, match_line_ctx *cout);
val elem_var = first(specline); \
val directive_var = first(elem_var)
+static val h_text(match_line_ctx c, match_line_ctx *cout)
+{
+ val elem = first(c.specline);
+ val texts = rest(elem);
+ val new_pos = cdr(match_line(ml_specline(c, texts)));
+
+ if (new_pos) {
+ c.pos = new_pos;
+ *cout = c;
+ return next_spec_k;
+ }
+
+ return nil;
+}
+
static val search_form(match_line_ctx *c, val needle_form, val from_end)
{
if (regexp(first(needle_form))) {
@@ -3250,6 +3265,7 @@ static void syms_init(void)
times_k = intern(lit("times"), keyword_package);
lines_k = intern(lit("lines"), keyword_package);
chars_k = intern(lit("chars"), keyword_package);
+ text_s = intern(lit("text"), user_package);
choose_s = intern(lit("choose"), user_package);
gather_s = intern(lit("gather"), user_package);
longest_k = intern(lit("longest"), keyword_package);
@@ -3306,6 +3322,7 @@ static void dir_tables_init(void)
sethash(v_directive_table, filter_s, cptr((mem_t *) v_filter));
sethash(v_directive_table, eof_s, cptr((mem_t *) v_eof));
+ sethash(h_directive_table, text_s, cptr((mem_t *) h_text));
sethash(h_directive_table, var_s, cptr((mem_t *) h_var));
sethash(h_directive_table, skip_s, cptr((mem_t *) h_skip));
sethash(h_directive_table, coll_s, cptr((mem_t *) h_coll));
diff --git a/match.h b/match.h
index 1e15dd2d..e939208f 100644
--- a/match.h
+++ b/match.h
@@ -27,4 +27,4 @@
void match_init(void);
val match_funcall(val name, val arg, val other_args);
int extract(val spec, val filenames, val bindings);
-extern val choose_s, gather_s;
+extern val text_s, choose_s, gather_s;
diff --git a/parser.y b/parser.y
index ccef7e2c..8b8744bf 100644
--- a/parser.y
+++ b/parser.y
@@ -79,7 +79,7 @@ static val parsed_spec;
%type <val> clause_parts additional_parts
%type <val> output_clause define_clause try_clause catch_clauses_opt
%type <val> line elems_opt elems clause_parts_h additional_parts_h
-%type <val> elem var var_op meta_expr
+%type <val> text texts elem var var_op meta_expr
%type <val> list exprs exprs_opt expr out_clauses out_clauses_opt out_clause
%type <val> repeat_clause repeat_parts_opt o_line
%type <val> o_elems_opt o_elems_opt2 o_elems o_elem o_var rep_elem rep_parts_opt
@@ -267,7 +267,8 @@ elems : elem { $$ = cons($1, nil);
yyerror("rep outside of output"); }
;
-elem : TEXT { $$ = rl(string_own($1), num(lineno)); }
+
+text : TEXT { $$ = rl(string_own($1), num(lineno)); }
| SPACE { if ($1[0] == ' ' && $1[1] == 0)
{ val spaces = list(oneplus_s,
chr(' '), nao);
@@ -276,11 +277,18 @@ elem : TEXT { $$ = rl(string_own($1), num(lineno)); }
free($1); }
else
{ $$ = rl(string_own($1), num(lineno)); }}
- | var { $$ = rl($1, num(lineno)); }
- | list { $$ = $1; }
| regex { $$ = cons(regex_compile(rest($1)),
rest($1));
rl($$, num(lineno)); }
+ ;
+
+texts : text %prec LOW { $$ = rl(cons($1, nil), $1); }
+ | text texts { $$ = rl(cons($1, $2), $2); }
+ ;
+
+elem : texts { $$ = rl(cons(text_s, $1), $1); }
+ | var { $$ = rl($1, num(lineno)); }
+ | list { $$ = $1; }
| COLL exprs_opt ')' elems END { $$ = list(coll_s, $4, nil, $2, nao);
rl($$, num($1)); }
| COLL exprs_opt ')' elems
diff --git a/txr.1 b/txr.1
index 04d34f60..88b4cfee 100644
--- a/txr.1
+++ b/txr.1
@@ -604,13 +604,21 @@ current position in the data, to the end of the line. Example:
.SS Variable Followed by Text
-If the variable is followed by text (all non-directive material extending to
-the end of the line, or to the start of another directive), then the extent of
-the negative match is determined by searching for the first occurrence of that text
-within the line, starting at the current position. The variable matches
-everything between the current position and the matching position (not
-including the matching position). Any whitespace which follows the
-variable (and is not enclosed inside braces that surround the variable
+For the purposes of determining the negative match, text is defined as a
+sequence of literal text and regular expressions, not divided by a directive.
+So for instance in this example:
+
+ @a:@/foo/bcd e@(maybe)f@(end)
+
+the variable @a is considered to be followed by ":@/foo/bcd e".
+
+If a variable is followed by text, then the extent of the negative match is
+determined by searching for the first occurrence of that text within the line,
+starting at the current position.
+
+The variable matches everything between the current position and the matching
+position (not including the matching position). Any whitespace which follows
+the variable (and is not enclosed inside braces that surround the variable
name) is part of the text. For example:
pattern: "a b @FOO e f"
@@ -624,12 +632,12 @@ is " e f". This is found within the data "c d e f" at position 3
(counting from 0). So positions 0-2 ("c d") constitute the matching
text which is bound to FOO.
-.SS Variable Followed by a Regular Expression, Function Call or Directive
+.SS Variable Followed by a Function Call or Directive
-If the variable is followed by a regular expression, function
-call, or a directive, the extent is determined by scanning the text
-for the first position where a match occurs for the regular expression, call or
-directive. (See Regular Expressions section below, and FUNCTIONS.)
+If the variable is followed by a function call, or a directive, the extent is
+determined by scanning the text for the first position where a match occurs for
+the regular expression, call or directive. (For a description of functions,
+see FUNCTIONS.)
Note that the given variable and the function or directive are considered
in isolation. This means, for instance, that @var@(skip)text is a degenerate
@@ -695,12 +703,18 @@ nested list, then each character string in the list is tried in turn to produce
a match. The first match is taken.
An unbound variable may be followed by another unbound variable which specifies
-a regular expression match. This is a special case called a "double variable
-match". What happens is that the text is searched using the regular
-expression. If the search fails, than neither variable is bound: it is a
-matching failure. If the search succeeds, than the first variable is bound to
-the text which is skipped by the regular expression search. The second
-variable is bound to the text matched by the regular expression.
+a regular expression or function call match. This is a special case called a
+"double variable match". What happens is that the text is searched using the
+regular expression or function. If the search fails, than neither variable is
+bound: it is a matching failure. If the search succeeds, than the first
+variable is bound to the text which is skipped by the search. The second
+variable is bound to the text matched by the regular expression or function.
+Examples:
+
+ pattern: "@foo@{bar /abc/}"
+ data: "xyz@#abc"
+ result: foo="xyz@#", BAR="abc"
+
.SS Consecutive Variables Via Directive
diff --git a/txr.c b/txr.c
index 66ee31ac..21e30f87 100644
--- a/txr.c
+++ b/txr.c
@@ -130,11 +130,15 @@ static val remove_hash_bang_line(val spec)
{
val shbang = string(L"#!");
val firstline = first(spec);
-
- if (stringp(first(firstline))) {
- val twochars = sub_str(first(firstline), zero, two);
- if (equal(twochars, shbang))
- return rest(spec);
+ val firstelem = first(firstline);
+
+ if (consp(firstelem) && first(firstelem) == text_s) {
+ val item = second(firstelem);
+ if (stringp(item)) {
+ val twochars = sub_str(item, zero, two);
+ if (equal(twochars, shbang))
+ return rest(spec);
+ }
}
return spec;