Semantics change. If a variable is followed by a mixture

of text and regular expressions, that whole mixture is considered to follow the variable and used for matching. The earlier semantics change whereby a single unescaped space denotes the regular expression / +/ broke the simple case @a word. It caused the @a to be followed not by the text " word" but by just the regular expression element. With this change @a word means that a is followed by the regex / +/ and "word". * match.c (text_s): New symbol variable. (h_text): New function. (syms_init): Initialize new symbol variable. (dir_tables_init): Hook h_text into horizontal directives table. * match.h (text_s): Declared. * parser.y (text, texts): New nonterminals. (elem): TEXT, SPACE and regex are now handled under texts grammar production. All texts are run together and produce an item which looks like (text items ...). * txr.1, RELNOTES: Updated. * txr.c (remove_hash_bang_line): Updated to find #! buried in (text ...) syntax.
author: Kaz Kylheku <kaz@kylheku.com> 2011-11-23 10:46:32 -0800
committer: Kaz Kylheku <kaz@kylheku.com> 2011-11-23 10:46:32 -0800
commit: c1202a71a068c13a17b69348a6d7736b8855be0c (patch)
tree: eb2121adb2e75b6d2a0838d152a8c5b6c161ac1f
parent: e1174f5ea6ff0a51738830e10a92819135a22b32 (diff)
download: txr-c1202a71a068c13a17b69348a6d7736b8855be0c.tar.gz
txr-c1202a71a068c13a17b69348a6d7736b8855be0c.tar.bz2
txr-c1202a71a068c13a17b69348a6d7736b8855be0c.zip
7 files changed, 119 insertions, 37 deletions
diff --git a/ChangeLog b/ChangeLog
index ad2864d0..e51f06ba 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,35 @@
+2011-11-23  Kaz Kylheku  <kaz@kylheku.com>
+
+	Semantics change. If a variable is followed by a mixture
+	of text and regular expressions, that whole mixture is
+	considered to follow the variable and used for matching.
+
+	The earlier semantics change whereby a single unescaped
+	space denotes the regular expression / +/ broke the
+	simple case   @a word.   It caused the @a to be followed
+	not by the text " word"  but by just the regular expression
+	element.
+
+	With this change @a word means that a is followed by 
+	the regex / +/ and "word".
+
+	* match.c (text_s): New symbol variable.
+	(h_text): New function.
+	(syms_init): Initialize new symbol variable.
+	(dir_tables_init): Hook h_text into horizontal directives table.
+
+	* match.h (text_s): Declared.
+
+	* parser.y (text, texts): New nonterminals.
+	(elem): TEXT, SPACE and regex are now handled under texts
+	grammar production. All texts are run together and produce
+	an item which looks like (text items ...).
+
+	* txr.1, RELNOTES: Updated.
+
+	* txr.c (remove_hash_bang_line): Updated to find #! buried
+	in (text ...) syntax.
+
 2011-11-22  Kaz Kylheku  <kaz@kylheku.com>
 
 	* configure: Fix environ test case for C++.
diff --git a/RELNOTES b/RELNOTES
index c214d239..9f9c16a1 100644
--- a/RELNOTES
+++ b/RELNOTES
@@ -1,14 +1,21 @@
-				(future release)
-				    TXR 043
-				   201?-??-??
+                                (future release)
+                                    TXR 043
+                                   201?-??-??
 
   Bugs
 
   - Buggy @(eol) directive fixed.
 
-			       (current release)
-				    TXR 042
-				   2011-11-20
+  - Semantics change for text and regular expressions in "negative match":
+    - a variable is considered to be followed by a run of text which
+      consists of any mixture of regexes and literal text
+    - thus @foo bar behaves properly once again; it is not treated
+      as foo followed by the regex / +/, ignoring the text bar.
+
+
+                               (current release)
+                                    TXR 042
+                                   2011-11-20
 
 
 
@@ -79,8 +86,8 @@
 
 
 
-				    TXR 041
-				   2011-10-30
+                                    TXR 041
+                                   2011-10-30
 
 
   Features
diff --git a/match.c b/match.c
index 52f79624..9f49657d 100644
--- a/match.c
+++ b/match.c
@@ -54,7 +54,7 @@ int opt_arraydims = 1;
 val decline_k, next_spec_k, repeat_spec_k;
 val mingap_k, maxgap_k, gap_k, mintimes_k, maxtimes_k, times_k;
 val lines_k, chars_k;
-val choose_s, gather_s;
+val text_s, choose_s, gather_s;
 val longest_k, shortest_k, greedy_k;
 val vars_k, resolve_k;
 val append_k, into_k, var_k, list_k, string_k, env_k;
@@ -400,6 +400,21 @@ typedef val (*h_match_func)(match_line_ctx c, match_line_ctx *cout);
   val elem_var = first(specline);                       \
   val directive_var = first(elem_var)
 
+static val h_text(match_line_ctx c, match_line_ctx *cout)
+{
+  val elem = first(c.specline);
+  val texts = rest(elem);
+  val new_pos = cdr(match_line(ml_specline(c, texts)));
+
+  if (new_pos) {
+    c.pos = new_pos;
+    *cout = c;
+    return next_spec_k;
+  }
+
+  return nil;
+}
+
 static val search_form(match_line_ctx *c, val needle_form, val from_end)
 {
   if (regexp(first(needle_form))) {
@@ -3250,6 +3265,7 @@ static void syms_init(void)
   times_k = intern(lit("times"), keyword_package);
   lines_k = intern(lit("lines"), keyword_package);
   chars_k = intern(lit("chars"), keyword_package);
+  text_s = intern(lit("text"), user_package);
   choose_s = intern(lit("choose"), user_package);
   gather_s = intern(lit("gather"), user_package);
   longest_k = intern(lit("longest"), keyword_package);
@@ -3306,6 +3322,7 @@ static void dir_tables_init(void)
   sethash(v_directive_table, filter_s, cptr((mem_t *) v_filter));
   sethash(v_directive_table, eof_s, cptr((mem_t *) v_eof));
 
+  sethash(h_directive_table, text_s, cptr((mem_t *) h_text));
   sethash(h_directive_table, var_s, cptr((mem_t *) h_var));
   sethash(h_directive_table, skip_s, cptr((mem_t *) h_skip));
   sethash(h_directive_table, coll_s, cptr((mem_t *) h_coll));
diff --git a/match.h b/match.h
index 1e15dd2d..e939208f 100644
--- a/match.h
+++ b/match.h
@@ -27,4 +27,4 @@
 void match_init(void);
 val match_funcall(val name, val arg, val other_args);
 int extract(val spec, val filenames, val bindings);
-extern val choose_s, gather_s;
+extern val text_s, choose_s, gather_s;
diff --git a/parser.y b/parser.y
index ccef7e2c..8b8744bf 100644
--- a/parser.y
+++ b/parser.y
@@ -79,7 +79,7 @@ static val parsed_spec;
 %type <val> clause_parts additional_parts
 %type <val> output_clause define_clause try_clause catch_clauses_opt
 %type <val> line elems_opt elems clause_parts_h additional_parts_h
-%type <val> elem var var_op meta_expr
+%type <val> text texts elem var var_op meta_expr
 %type <val> list exprs exprs_opt expr out_clauses out_clauses_opt out_clause
 %type <val> repeat_clause repeat_parts_opt o_line
 %type <val> o_elems_opt o_elems_opt2 o_elems o_elem o_var rep_elem rep_parts_opt
@@ -267,7 +267,8 @@ elems : elem                    { $$ = cons($1, nil);
                                   yyerror("rep outside of output"); }
       ;
 
-elem : TEXT                     { $$ = rl(string_own($1), num(lineno)); }
+
+text : TEXT                     { $$ = rl(string_own($1), num(lineno)); }
      | SPACE                    { if ($1[0] == ' ' && $1[1] == 0)
                                   { val spaces = list(oneplus_s, 
                                                       chr(' '), nao);
@@ -276,11 +277,18 @@ elem : TEXT                     { $$ = rl(string_own($1), num(lineno)); }
                                     free($1); }
                                   else
                                   { $$ = rl(string_own($1), num(lineno)); }}
-     | var                      { $$ = rl($1, num(lineno)); }
-     | list                     { $$ = $1; }
      | regex                    { $$ = cons(regex_compile(rest($1)),
                                             rest($1));
                                   rl($$, num(lineno)); }
+     ;
+
+texts : text %prec LOW          { $$ = rl(cons($1, nil), $1); }
+      | text texts              { $$ = rl(cons($1, $2), $2); }
+      ;
+
+elem : texts                    { $$ = rl(cons(text_s, $1), $1); }
+     | var                      { $$ = rl($1, num(lineno)); }
+     | list                     { $$ = $1; }
      | COLL exprs_opt ')' elems END     { $$ = list(coll_s, $4, nil, $2, nao);
                                           rl($$, num($1)); }
      | COLL exprs_opt ')' elems
diff --git a/txr.1 b/txr.1
index 04d34f60..88b4cfee 100644
--- a/txr.1
+++ b/txr.1
@@ -604,13 +604,21 @@ current position in the data, to the end of the line.  Example:
 
 .SS Variable Followed by Text
 
-If the variable is followed by text (all non-directive material extending to
-the end of the line, or to the start of another directive), then the extent of
-the negative match is determined by searching for the first occurrence of that text
-within the line, starting at the current position. The variable matches
-everything between the current position and the matching position (not
-including the matching position). Any whitespace which follows the
-variable (and is not enclosed inside braces that surround the variable
+For the purposes of determining the negative match, text is defined as a
+sequence of literal text and regular expressions, not divided by a directive.
+So for instance in this example:
+
+  @a:@/foo/bcd e@(maybe)f@(end)
+
+the variable @a is considered to be followed by ":@/foo/bcd e".
+
+If a variable is followed by text, then the extent of the negative match is
+determined by searching for the first occurrence of that text within the line,
+starting at the current position. 
+
+The variable matches everything between the current position and the matching
+position (not including the matching position). Any whitespace which follows
+the variable (and is not enclosed inside braces that surround the variable
 name) is part of the text. For example:
 
   pattern:      "a b @FOO e f"
@@ -624,12 +632,12 @@ is " e f". This is found within the data "c d e f" at position 3
 (counting from 0).  So positions 0-2 ("c d") constitute the matching
 text which is bound to FOO.
 
-.SS Variable Followed by a Regular Expression, Function Call or Directive
+.SS Variable Followed by a Function Call or Directive
 
-If the variable is followed by a regular expression, function
-call, or a directive, the extent is determined by scanning the text
-for the first position where a match occurs for the regular expression, call or
-directive. (See Regular Expressions section below, and FUNCTIONS.)
+If the variable is followed by a function call, or a directive, the extent is
+determined by scanning the text for the first position where a match occurs for
+the regular expression, call or directive. (For a description of functions,
+see FUNCTIONS.)
 
 Note that the given variable and the function or directive are considered
 in isolation. This means, for instance, that @var@(skip)text is a degenerate
@@ -695,12 +703,18 @@ nested list, then each character string in the list is tried in turn to produce
 a match. The first match is taken.
 
 An unbound variable may be followed by another unbound variable which specifies
-a regular expression match. This is a special case called a "double variable
-match".  What happens is that the text is searched using the regular
-expression.  If the search fails, than neither variable is bound: it is a
-matching failure.  If the search succeeds, than the first variable is bound to
-the text which is skipped by the regular expression search.  The second
-variable is bound to the text matched by the regular expression.
+a regular expression or function call match. This is a special case called a
+"double variable match".  What happens is that the text is searched using the
+regular expression or function.  If the search fails, than neither variable is
+bound: it is a matching failure.  If the search succeeds, than the first
+variable is bound to the text which is skipped by the search.  The second
+variable is bound to the text matched by the regular expression or function.
+Examples:
+
+  pattern:      "@foo@{bar /abc/}"
+  data:         "xyz@#abc"
+  result:       foo="xyz@#", BAR="abc"
+
 
 .SS Consecutive Variables Via Directive
 
diff --git a/txr.c b/txr.c
index 66ee31ac..21e30f87 100644
--- a/txr.c
+++ b/txr.c
@@ -130,11 +130,15 @@ static val remove_hash_bang_line(val spec)
   {
     val shbang = string(L"#!");
     val firstline = first(spec);
-
-    if (stringp(first(firstline))) {
-      val twochars = sub_str(first(firstline), zero, two);
-      if (equal(twochars, shbang))
-        return rest(spec);
+    val firstelem = first(firstline);
+
+    if (consp(firstelem) && first(firstelem) == text_s) {
+      val item = second(firstelem);
+      if (stringp(item)) {
+        val twochars = sub_str(item, zero, two);
+        if (equal(twochars, shbang))
+          return rest(spec);
+      }
     }
 
     return spec;
author	Kaz Kylheku <kaz@kylheku.com>	2011-11-23 10:46:32 -0800
committer	Kaz Kylheku <kaz@kylheku.com>	2011-11-23 10:46:32 -0800
commit	c1202a71a068c13a17b69348a6d7736b8855be0c (patch)
tree	eb2121adb2e75b6d2a0838d152a8c5b6c161ac1f
parent	e1174f5ea6ff0a51738830e10a92819135a22b32 (diff)
download	txr-c1202a71a068c13a17b69348a6d7736b8855be0c.tar.gz txr-c1202a71a068c13a17b69348a6d7736b8855be0c.tar.bz2 txr-c1202a71a068c13a17b69348a6d7736b8855be0c.zip