From 4ce8dde9cafa8a77a0c623043fded0a751ad4b02 Mon Sep 17 00:00:00 2001
From: Kaz Kylheku <kaz@kylheku.com>
Date: Sun, 25 Sep 2011 10:28:41 -0700
Subject: Filtering feature for variable substitution in output.

* filter.c, filter.h: New files.

* Makefile (OBJS): filter.o added.

* gc.c (mark_obj): Mark new alloc field of string objets.

* hash.c (struct hash): New member, userdata.
(hash_mark): Mark new userdata member of hash.
(make_hash): Initialize userdata.
(get_hash_userdata, set_hash_userdata, hashp): New functions.

* hash.h (get_hash_userdata, set_hash_userdata, hashp): New functions
declared.

* lib.c (getplist, string_extend, cobjp): New functions.
(string_own, string, string_utf8): Initialize new alloc field to nil.
(mkstring, mkustring): Initialize new alloc field to actual size.
(length_str): When length is computed and cached, also compute
and cache alloc.
(init): Call filter_init.

* lib.h (string string): New member, alloc.
(num_fast): Macro converted to inline function.
(getplist, string_extend, cobjp): New functions declared.

* match.c (match_line): Follows change of modifier s-exp syntax.
(format_field): New parameter, filter.
New modifier syntax parsed. Filter retrieved, and applied.
(subst_vars): New parameter, filter. Filter is either applied
in this function or passed to format_field, as needed.
(eval_form): Pass nil to new parameter of subst_vars.
(do_output_line): New parameter, filter. Passed down to subst_vars.
(do_output): New parameter, filter. Passed down to do_output_line.
(match_files): Pass nil filter to subst_vars in cat directive.
Output directive refactored to parse keywords, extract the
filter and pass down to do_output.

* parser.y (regex): Generate (sys:regex regex syntax ...)
instead of (regex syntax ...).
(elem, expr): Updated w.r.t. regex syntax change.
(var): Cases '{' IDENT regex '}' and '{' IDENT NUMBER '}'
are removed. new syntax '{' IDENT exprs '}' to handle these
more generally and allow for keywords.

* txr.1: Updated.
---
 ChangeLog |  50 +++++++++++++++++++
 Makefile  |   2 +-
 filter.c  | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 filter.h  |  36 ++++++++++++++
 gc.c      |   3 +-
 hash.c    |  31 ++++++++++++
 hash.h    |   4 ++
 lib.c     |  78 +++++++++++++++++++++++++++++-
 lib.h     |  10 +++-
 match.c   | 122 +++++++++++++++++++++++++++++++++-------------
 parser.y  |  15 +++---
 txr.1     |  37 +++++++++++++-
 12 files changed, 505 insertions(+), 46 deletions(-)
 create mode 100644 filter.c
 create mode 100644 filter.h

diff --git a/ChangeLog b/ChangeLog
index 3a01570b..b22ba7c5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,53 @@
+2011-09-25  Kaz Kylheku  <kaz@kylheku.com>
+
+	Filtering feature for variable substitution in output.
+
+	* filter.c, filter.h: New files.
+
+	* Makefile (OBJS): filter.o added.
+
+	* gc.c (mark_obj): Mark new alloc field of string objets.
+
+	* hash.c (struct hash): New member, userdata.
+	(hash_mark): Mark new userdata member of hash.
+	(make_hash): Initialize userdata.
+	(get_hash_userdata, set_hash_userdata, hashp): New functions.
+
+	* hash.h (get_hash_userdata, set_hash_userdata, hashp): New functions
+	declared.
+
+	* lib.c (getplist, string_extend, cobjp): New functions.
+	(string_own, string, string_utf8): Initialize new alloc field to nil.
+	(mkstring, mkustring): Initialize new alloc field to actual size.
+	(length_str): When length is computed and cached, also compute
+	and cache alloc.
+	(init): Call filter_init.
+
+	* lib.h (string string): New member, alloc.
+	(num_fast): Macro converted to inline function.
+	(getplist, string_extend, cobjp): New functions declared.
+
+	* match.c (match_line): Follows change of modifier s-exp syntax.
+	(format_field): New parameter, filter.
+	New modifier syntax parsed. Filter retrieved, and applied.
+	(subst_vars): New parameter, filter. Filter is either applied
+	in this function or passed to format_field, as needed.
+	(eval_form): Pass nil to new parameter of subst_vars.
+	(do_output_line): New parameter, filter. Passed down to subst_vars.
+	(do_output): New parameter, filter. Passed down to do_output_line.
+	(match_files): Pass nil filter to subst_vars in cat directive.
+	Output directive refactored to parse keywords, extract the
+	filter and pass down to do_output.
+
+	* parser.y (regex): Generate (sys:regex regex syntax ...)
+	instead of (regex syntax ...).
+	(elem, expr): Updated w.r.t. regex syntax change.
+	(var): Cases '{' IDENT regex '}' and '{' IDENT NUMBER '}'
+	are removed. new syntax '{' IDENT exprs '}' to handle these
+	more generally and allow for keywords.
+
+	* txr.1: Updated.
+
 2011-09-23  Kaz Kylheku  <kaz@kylheku.com>
 
 	Numeric constants become real constants.
diff --git a/Makefile b/Makefile
index 7f842d5f..b555b657 100644
--- a/Makefile
+++ b/Makefile
@@ -37,7 +37,7 @@ CFLAGS := $(filter-out -Wmissing-prototypes -Wstrict-prototypes,$(CFLAGS))
 endif
 
 OBJS := txr.o lex.yy.o y.tab.o match.o lib.o regex.o gc.o unwind.o stream.o
-OBJS += hash.o utf8.o
+OBJS += hash.o utf8.o filter.o
 
 PROG := ./txr
 
diff --git a/filter.c b/filter.c
new file mode 100644
index 00000000..6b253473
--- /dev/null
+++ b/filter.c
@@ -0,0 +1,163 @@
+/* Copyright 2011
+ * Kaz Kylheku <kkylheku@gmail.com>
+ * Vancouver, Canada
+ * All rights reserved.
+ *
+ * BSD License:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *   3. The name of the author may not be used to endorse or promote
+ *      products derived from this software without specific prior
+ *      written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <stddef.h>
+#include <setjmp.h>
+#include "config.h"
+#include "lib.h"
+#include "hash.h"
+#include "unwind.h"
+#include "filter.h"
+
+static val make_trie(void)
+{
+  return make_hash(nil, nil);
+}
+
+static val trie_add(val trie, val key, val value)
+{
+  val node, i, len = length_str(key);
+
+  for (node = trie, i = zero; lt(i, len); i = plus(i, one)) {
+    val ch = chr_str(key, i);
+    val newnode_p;
+    val *loc = gethash_l(node, ch, &newnode_p);
+    if (newnode_p)
+      *loc = make_hash(nil, nil);
+    node = *loc;
+  }
+
+  set_hash_userdata(node, value);
+  return node;
+}
+
+val trie_lookup_begin(val trie)
+{
+  return trie;
+}
+
+val trie_value_at(val node)
+{
+  return get_hash_userdata(node);
+}
+
+val trie_lookup_feed_char(val node, val ch)
+{
+  return gethash(node, ch);
+}
+
+val get_filter_trie(val sym)
+{
+  return gethash(filters, sym);
+}
+
+struct filter_pair {
+  wchar_t *key, *value;
+};
+
+static val build_filter(struct filter_pair *pair)
+{
+  int i;
+  val trie = make_trie();
+
+  for (i = 0; pair[i].key; i++)
+    trie_add(trie, static_str(pair[i].key), static_str(pair[i].value));
+
+  return trie;
+}
+
+static struct filter_pair to_html_table[] = {
+  { L"<", L"&lt;" },
+  { L">", L"&gt;" },
+  { L"&", L"&amp;" },
+  { L"\"", L"&quot;" },
+  { 0, 0 }
+};
+
+static val trie_filter_string(val filter, val str)
+{
+  val len = length_str(str);
+  val i;
+  val out = string(L"");
+
+  for (i = zero; lt(i, len); ) {
+    val node = trie_lookup_begin(filter);
+    val match = nil;
+    val subst;
+    val j;
+
+    for (j = i; lt(j, len); j = plus(j, one)) {
+      val ch = chr_str(str, j);
+      val nnode = trie_lookup_feed_char(node, ch);
+      val nsubst;
+
+      if (!nnode)
+        break;
+
+      if ((nsubst = trie_value_at(nnode))) {
+        match = j;
+        subst = nsubst;
+      }
+
+      node = nnode;
+    }
+
+    if (match) {
+      string_extend(out, subst);
+      i = plus(match, one);
+    } else {
+      string_extend(out, chr_str(str, i));
+      i = plus(i, one);
+    }
+  }
+
+  return out;
+}
+
+val filters;
+val filter_k, to_html_k;
+
+val filter_string(val filter, val str)
+{
+  val type = typeof(filter);
+
+  if (type == null)
+    return str;
+  if (type == hash_s)
+    return trie_filter_string(filter, str);
+  else if (type == fun_s)
+    return funcall1(filter, str);
+  return str;
+  uw_throwf(error_s, lit("filter_string: invalid filter ~a"), filter, nao);
+}
+
+void filter_init(void)
+{
+  filters = make_hash(nil, nil);
+  filter_k = intern(lit("filter"), keyword_package);
+  to_html_k = intern(lit("to_html"), keyword_package);
+  sethash(filters, to_html_k, build_filter(to_html_table));
+}
diff --git a/filter.h b/filter.h
new file mode 100644
index 00000000..f8d86632
--- /dev/null
+++ b/filter.h
@@ -0,0 +1,36 @@
+/* Copyright 2011
+ * Kaz Kylheku <kkylheku@gmail.com>
+ * Vancouver, Canada
+ * All rights reserved.
+ *
+ * BSD License:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *   3. The name of the author may not be used to endorse or promote
+ *      products derived from this software without specific prior
+ *      written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+extern val filters;
+extern val filter_k, to_html_k;
+
+val trie_lookup_begin(val trie);
+val trie_value_at(val node);
+val trie_lookup_feed_char(val node, val ch);
+val get_filter_trie(val sym);
+val filter_string(val trie, val str);
+
+void filter_init(void);
diff --git a/gc.c b/gc.c
index ff6ad3ec..00073425 100644
--- a/gc.c
+++ b/gc.c
@@ -234,7 +234,8 @@ tail_call:
     mark_obj(obj->c.car);
     mark_obj_tail(obj->c.cdr);
   case STR:
-    mark_obj_tail(obj->st.len);
+    mark_obj(obj->st.len);
+    mark_obj_tail(obj->st.alloc);
   case CHR:
   case NUM:
   case LIT:
diff --git a/hash.c b/hash.c
index f26e83c2..dcd28011 100644
--- a/hash.c
+++ b/hash.c
@@ -51,6 +51,7 @@ struct hash {
   val table;
   cnum modulus;
   cnum count;
+  val userdata;
 };
 
 /*
@@ -141,6 +142,8 @@ static void hash_mark(val hash)
   struct hash *h = (struct hash *) hash->co.handle;
   cnum i;
 
+  gc_mark(h->userdata);
+
   switch (h->flags) {
   case hash_weak_none:
     /* If the hash is not weak, we can simply mark the table
@@ -235,6 +238,7 @@ val make_hash(val weak_keys, val weak_vals)
   h->modulus = c_num(mod);
   h->count = 0;
   h->table = table;
+  h->userdata = nil;
 
   return hash;
 }
@@ -258,6 +262,14 @@ val gethash(val hash, val key)
   return cdr(found);
 }
 
+val gethash_f(val hash, val key, val *found)
+{
+  struct hash *h = (struct hash *) hash->co.handle;
+  val chain = *vecref_l(h->table, num(ll_hash(key) % h->modulus));
+  *found = assoc(chain, key);
+  return cdr(*found);
+}
+
 val sethash(val hash, val key, val value)
 {
   val new_p;
@@ -275,6 +287,25 @@ val remhash(val hash, val key)
   return nil;
 }
 
+val get_hash_userdata(val hash)
+{
+  struct hash *h = (struct hash *) hash->co.handle;
+  return h->userdata;
+}
+
+val set_hash_userdata(val hash, val data)
+{
+  struct hash *h = (struct hash *) hash->co.handle;
+  val olddata = h->userdata;
+  h->userdata = data;
+  return olddata;
+}
+
+val hashp(val obj)
+{
+  return typeof(obj) == hash_s ? t : nil;
+}
+
 /*
  * Called from garbage collector. Hash module must process all weak tables
  * that were visited during the marking phase, maintained in the list
diff --git a/hash.h b/hash.h
index cd293ce9..b5bd200a 100644
--- a/hash.h
+++ b/hash.h
@@ -28,8 +28,12 @@ val hash_obj(val);
 val make_hash(val weak_keys, val weak_vals);
 val *gethash_l(val hash, val key, val *new_p);
 val gethash(val hash, val key);
+val gethash_f(val hash, val key, val *found);
 val sethash(val hash, val key, val value);
 val remhash(val hash, val key);
+val get_hash_userdata(val hash);
+val set_hash_userdata(val hash, val data);
+val hashp(val obj);
 void hash_process_weak(void);
 
 void hash_init(void);
diff --git a/lib.c b/lib.c
index 62881137..9aa95d84 100644
--- a/lib.c
+++ b/lib.c
@@ -41,6 +41,7 @@
 #include "unwind.h"
 #include "stream.h"
 #include "utf8.h"
+#include "filter.h"
 
 #define max(a, b) ((a) > (b) ? (a) : (b))
 #define min(a, b) ((a) < (b) ? (a) : (b))
@@ -654,6 +655,17 @@ val length(val list)
   return num(len);
 }
 
+val getplist(val list, val key)
+{
+  for (; list; list = cdr(cdr(list))) {
+    val ind = first(list);
+    if (eq(ind, key))
+      return second(list);
+  }
+
+  return nil;
+}
+
 val num(cnum n)
 {
   numeric_assert (n >= NUM_MIN && n <= NUM_MAX);
@@ -747,6 +759,7 @@ val string_own(wchar_t *str)
   obj->st.type = STR;
   obj->st.str = str;
   obj->st.len = nil;
+  obj->st.alloc = nil;
   return obj;
 }
 
@@ -756,6 +769,7 @@ val string(const wchar_t *str)
   obj->st.type = STR;
   obj->st.str = (wchar_t *) chk_strdup(str);
   obj->st.len = nil;
+  obj->st.alloc = nil;
   return obj;
 }
 
@@ -765,6 +779,7 @@ val string_utf8(const char *str)
   obj->st.type = STR;
   obj->st.str = utf8_dup_from(str);
   obj->st.len = nil;
+  obj->st.alloc = nil;
   return obj;
 }
 
@@ -775,6 +790,7 @@ val mkstring(val len, val ch)
   val s = string_own(str);
   wmemset(str, c_chr(ch), nchar);
   s->st.len = len;
+  s->st.alloc = plus(len, one);
   return s;
 }
 
@@ -785,6 +801,7 @@ val mkustring(val len)
   val s = string_own(str);
   str[l] = 0;
   s->st.len = len;
+  s->st.alloc = plus(len, one);
   return s;
 }
 
@@ -799,6 +816,52 @@ val copy_str(val str)
   return string(c_str(str));
 }
 
+val string_extend(val str, val tail)
+{
+  type_check(str, STR);
+  {
+    cnum len = c_num(length_str(str));
+    cnum alloc = c_num(str->st.alloc);
+    val needed;
+    val room = zero;
+
+    if (stringp(tail))
+      needed = length_str(tail);
+    else if (chrp(tail))
+      needed = one;
+    else
+      uw_throwf(error_s, lit("string_extend: tail ~s bad type"), str, nao);
+
+    room = num(alloc - len - 1);
+
+    while (gt(needed, room) && alloc < NUM_MAX) {
+      if (alloc > NUM_MAX / 2) {
+        alloc = NUM_MAX;
+      } else {
+        alloc *= 2;
+      }
+      room = num(alloc - len - 1);
+    }
+
+    if (gt(needed, room))
+      uw_throwf(error_s, lit("string_extend: overflow"), nao);
+
+    str->st.str = (wchar_t *) chk_realloc((mem_t *) str->st.str,
+                                          alloc * sizeof *str->st.str);
+    str->st.alloc = num(alloc);
+    str->st.len = plus(str->st.len, needed);
+
+    if (stringp(tail)) {
+      wmemcpy(str->st.str + len, c_str(tail), c_num(needed) + 1);
+    } else {
+      str->st.str[len] = c_chr(tail);
+      str->st.str[len + 1] = 0;
+    }
+  }
+
+  return str;
+}
+
 val stringp(val str)
 {
   switch (tag(str)) {
@@ -834,8 +897,10 @@ val length_str(val str)
       return length_str(str->ls.prefix);
     }
 
-    if (!str->st.len)
+    if (!str->st.len) {
       str->st.len = num(wcslen(str->st.str));
+      str->st.alloc = plus(str->st.len, one);
+    }
     return str->st.len;
   }
 }
@@ -1711,6 +1776,16 @@ val cobj(mem_t *handle, val cls_sym, struct cobj_ops *ops)
   return obj;
 }
 
+val cobjp(val obj)
+{
+  if (!obj) {
+    return nil;
+  } else {
+    type_t ty = type(obj);
+    return (ty == COBJ) ? t : nil;
+  }
+}
+
 mem_t *cobj_handle(val cobj, val cls_sym)
 {
   class_check(cobj, cls_sym);
@@ -2222,6 +2297,7 @@ void init(const wchar_t *pn, mem_t *(*oom)(mem_t *, size_t),
   obj_init();
   uw_init();
   stream_init();
+  filter_init();
 
   gc_state(gc_save);
 }
diff --git a/lib.h b/lib.h
index 7b69cbbf..f72111e4 100644
--- a/lib.h
+++ b/lib.h
@@ -68,6 +68,7 @@ struct string {
   type_t type;
   wchar_t *str;
   val len;
+  val alloc;
 };
 
 struct sym {
@@ -201,6 +202,11 @@ INLINE wchar_t *litptr(val obj)
  return (wchar_t *) ((cnum) obj & ~TAG_MASK);
 }
 
+INLINE val num_fast(cnum n)
+{
+  return (val) ((n << TAG_SHIFT) | TAG_NUM);
+}
+
 #define lit_noex(strlit) ((obj_t *) ((cnum) (L ## strlit) | TAG_LIT))
 #define lit(strlit) lit_noex(strlit)
 
@@ -277,6 +283,7 @@ val atom(val obj);
 val listp(val obj);
 val proper_listp(val obj);
 val length(val list);
+val getplist(val list, val key);
 val num(cnum val);
 cnum c_num(val num);
 val nump(val num);
@@ -298,6 +305,7 @@ val mkstring(val len, val ch);
 val mkustring(val len); /* must initialize immediately with init_str! */
 val init_str(val str, const wchar_t *);
 val copy_str(val str);
+val string_extend(val str, val tail);
 val stringp(val str);
 val lazy_stringp(val str);
 val length_str(val str);
@@ -357,6 +365,7 @@ val length_str_ge(val str, val len);
 val length_str_lt(val str, val len);
 val length_str_le(val str, val len);
 val cobj(mem_t *handle, val cls_sym, struct cobj_ops *ops);
+val cobjp(val obj);
 mem_t *cobj_handle(val cobj, val cls_sym);
 val assoc(val list, val key);
 val acons_new(val list, val key, val value);
@@ -428,7 +437,6 @@ val match(val spec, val data);
   obj_t *CAR = car(c_o_n_s ## CAR ## CDR);      \
   obj_t *CDR = cdr(c_o_n_s ## CAR ## CDR)
 
-#define num_fast(n) ((val) ((n << TAG_SHIFT) | TAG_NUM))
 #define zero num_fast(0)
 #define one num_fast(1)
 #define two num_fast(2)
diff --git a/match.c b/match.c
index d833f9df..e92d9687 100644
--- a/match.c
+++ b/match.c
@@ -42,6 +42,7 @@
 #include "parser.h"
 #include "txr.h"
 #include "utf8.h"
+#include "filter.h"
 #include "match.h"
 
 int output_produced;
@@ -336,6 +337,13 @@ static val match_line(val bindings, val specline, val dataline,
             }
             continue;
           } else if (pat == nil) { /* match to end of line or with regex */
+            if (gt(length(modifier), one)) {
+              sem_error(spec_lineno, lit("multiple modifiers on variable ~s"),
+                        sym, nao);
+            }
+
+            modifier = car(modifier);
+
             if (consp(modifier)) {
               val past = match_regex(dataline, car(modifier), pos);
               if (nullp(past)) {
@@ -519,14 +527,45 @@ static val match_line(val bindings, val specline, val dataline,
   return cons(bindings, pos);
 }
 
-static val format_field(val string_or_list, val spec)
+static val format_field(val string_or_list, val modifier, val filter)
 {
+  val n = zero;
+  val plist = nil;
+
   if (!stringp(string_or_list))
     return string_or_list;
 
+  for (; modifier; pop(&modifier)) {
+    val item = first(modifier);
+    if (nump(item))
+      n = item;
+    if (regexp(item))
+      uw_throw(query_error_s, lit("format_field: regex modifier in output"));
+    if (keywordp(item)) {
+      plist = modifier;
+      break;
+    }
+  }
+
   {
-    val right = lt(spec, zero);
-    val width = if3(lt(spec, zero), neg(spec), spec);
+    val filter_sym = getplist(plist, filter_k);
+
+    if (filter_sym) {
+      filter = get_filter_trie(filter_sym);
+
+      if (!filter) {
+        uw_throwf(query_error_s, lit("format_field: filter ~s not known"),
+                  filter_sym, nao);
+      }
+
+      string_or_list = filter_string(filter, cat_str(list(string_or_list, nao),
+                                                     nil));
+    }
+  }
+
+  {
+    val right = lt(n, zero);
+    val width = if3(lt(n, zero), neg(n), n);
     val diff = minus(width, length_str(string_or_list));
 
     if (le(diff, zero))
@@ -545,7 +584,7 @@ static val format_field(val string_or_list, val spec)
   }
 }
 
-static val subst_vars(val spec, val bindings)
+static val subst_vars(val spec, val bindings, val filter)
 {
   list_collect_decl(out, iter);
 
@@ -556,25 +595,25 @@ static val subst_vars(val spec, val bindings)
       if (first(elem) == var_s) {
         val sym = second(elem);
         val pat = third(elem);
-        val modifier = fourth(elem);
+        val modifiers = fourth(elem);
         val pair = assoc(bindings, sym);
 
         if (pair) {
           if (pat)
-            spec = cons(cdr(pair), cons(pat, rest(spec)));
-          else if (nump(modifier))
-            spec = cons(format_field(cdr(pair), modifier), rest(spec));
+            spec = cons(filter_string(filter, cdr(pair)), cons(pat, rest(spec)));
+          else if (modifiers)
+            spec = cons(format_field(cdr(pair), modifiers, filter), rest(spec));
           else
-            spec = cons(cdr(pair), rest(spec));
+            spec = cons(filter_string(filter, cdr(pair)), rest(spec));
           continue;
         }
       } else if (first(elem) == quasi_s) {
-        val nested = subst_vars(rest(elem), bindings);
+        val nested = subst_vars(rest(elem), bindings, filter);
         list_collect_append(iter, nested);
         spec = cdr(spec);
         continue;
       } else {
-        val nested = subst_vars(elem, bindings);
+        val nested = subst_vars(elem, bindings, filter);
         list_collect_append(iter, nested);
         spec = cdr(spec);
         continue;
@@ -596,7 +635,7 @@ static val eval_form(val form, val bindings)
     return assoc(bindings, form);
   } else if (consp(form)) {
     if (car(form) == quasi_s) {
-      return cons(t, cat_str(subst_vars(rest(form), bindings), nil));
+      return cons(t, cat_str(subst_vars(rest(form), bindings, nil), nil));
     } else if (regexp(car(form))) {
       return cons(t, form);
     } else {
@@ -740,7 +779,7 @@ static val extract_bindings(val bindings, val output_spec)
 }
 
 static void do_output_line(val bindings, val specline,
-                           val spec_lineno, val out)
+                           val spec_lineno, val filter, val out)
 {
   for (; specline; specline = rest(specline)) {
     val elem = first(specline);
@@ -751,7 +790,8 @@ static void do_output_line(val bindings, val specline,
         val directive = first(elem);
 
         if (directive == var_s) {
-          val str = cat_str(subst_vars(cons(elem, nil), bindings), nil);
+          val str = cat_str(subst_vars(cons(elem, nil),
+                                       bindings, filter), nil);
           if (str == nil)
             sem_error(spec_lineno, lit("bad substitution: ~a"),
                       second(elem), nao);
@@ -770,10 +810,10 @@ static void do_output_line(val bindings, val specline,
                                                  nao)));
 
           if (equal(max_depth, zero) && empty_clauses) {
-            do_output_line(bindings, empty_clauses, spec_lineno, out);
+            do_output_line(bindings, empty_clauses, spec_lineno, filter, out);
           } else if (equal(max_depth, one) && single_clauses) {
             val bind_a = mapcar(func_n1(bind_car), bind_cp);
-            do_output_line(bind_a, single_clauses, spec_lineno, out);
+            do_output_line(bind_a, single_clauses, spec_lineno, filter, out);
           } else if (!zerop(max_depth)) {
             cnum i;
 
@@ -782,11 +822,11 @@ static void do_output_line(val bindings, val specline,
               val bind_d = mapcar(func_n1(bind_cdr), bind_cp);
 
               if (i == 0 && first_clauses) {
-                do_output_line(bind_a, first_clauses, spec_lineno, out);
+                do_output_line(bind_a, first_clauses, spec_lineno, filter, out);
               } else if (i == c_num(max_depth) - 1 && last_clauses) {
-                do_output_line(bind_a, last_clauses, spec_lineno, out);
+                do_output_line(bind_a, last_clauses, spec_lineno, filter, out);
               } else {
-                do_output_line(bind_a, main_clauses, spec_lineno, out);
+                do_output_line(bind_a, main_clauses, spec_lineno, filter, out);
               }
 
               bind_cp = bind_d;
@@ -810,7 +850,7 @@ static void do_output_line(val bindings, val specline,
   }
 }
 
-static void do_output(val bindings, val specs, val out)
+static void do_output(val bindings, val specs, val filter, val out)
 {
   if (equal(specs, null_list))
     return;
@@ -836,10 +876,10 @@ static void do_output(val bindings, val specs, val out)
                                                nao)));
 
         if (equal(max_depth, zero) && empty_clauses) {
-          do_output(bind_cp, empty_clauses, out);
+          do_output(bind_cp, empty_clauses, filter, out);
         } else if (equal(max_depth, one) && single_clauses) {
           val bind_a = mapcar(func_n1(bind_car), bind_cp);
-          do_output(bind_a, single_clauses, out);
+          do_output(bind_a, single_clauses, filter, out);
         } else if (!zerop(max_depth)) {
           cnum i;
 
@@ -848,11 +888,11 @@ static void do_output(val bindings, val specs, val out)
             val bind_d = mapcar(func_n1(bind_cdr), bind_cp);
 
             if (i == 0 && first_clauses) {
-              do_output(bind_a, first_clauses, out);
+              do_output(bind_a, first_clauses, filter, out);
             } else if (i == c_num(max_depth) - 1 && last_clauses) {
-              do_output(bind_a, last_clauses, out);
+              do_output(bind_a, last_clauses, filter, out);
             } else {
-              do_output(bind_a, main_clauses, out);
+              do_output(bind_a, main_clauses, filter, out);
             }
 
             bind_cp = bind_d;
@@ -862,7 +902,7 @@ static void do_output(val bindings, val specs, val out)
       }
     }
 
-    do_output_line(bindings, specline, spec_lineno, out);
+    do_output_line(bindings, specline, spec_lineno, filter, out);
     put_char(out, chr('\n'));
   }
 }
@@ -1393,7 +1433,7 @@ repeat_spec_same_data:
             val sep = nil;
 
             if (rest(specline)) {
-              val sub = subst_vars(rest(specline), bindings);
+              val sub = subst_vars(rest(specline), bindings, nil);
               sep = cat_str(sub, nil);
             }
 
@@ -1410,14 +1450,14 @@ repeat_spec_same_data:
         val specs = second(first_spec);
         val dest_spec = third(first_spec);
         val nothrow = nil;
-        val dest;
+        val dest = lit("-");
+        val filter = nil;
         fpip_t fp;
 
         if (eq(first(dest_spec), nothrow_k)) {
           if (rest(dest_spec))
             sem_error(spec_linenum, lit("material after :nothrow in output"), nao);
-          dest = string(L"-");
-        } else {
+        } else if (!keywordp(first(dest_spec))) {
           val form = first(dest_spec);
           val val = eval_form(form, bindings);
 
@@ -1425,8 +1465,24 @@ repeat_spec_same_data:
             sem_error(spec_linenum,
                       lit("output: unbound variable in form ~a"), form, nao);
 
-          nothrow = eq(second(dest_spec), nothrow_k);
-          dest = or2(cdr(val), string(L"-"));
+          dest = or2(cdr(val), dest);
+          pop(&dest_spec);
+        }
+
+        if (eq(first(dest_spec), nothrow_k)) {
+          nothrow = t;
+          pop(&dest_spec);
+        }
+
+        if (keywordp(first(dest_spec))) {
+          val filter_sym = getplist(dest_spec, filter_k);
+
+          if (filter_sym) {
+            filter = get_filter_trie(filter_sym);
+
+            if (!filter)
+              sem_error(spec_linenum, lit("unknown filter ~s"), filter_sym, nao);
+          }
         }
 
         fp = (errno = 0, complex_open(dest, t));
@@ -1446,7 +1502,7 @@ repeat_spec_same_data:
           }
         } else {
           val stream = complex_stream(fp, dest);
-          do_output(bindings, specs, stream);
+          do_output(bindings, specs, filter, stream);
           close_stream(stream, t);
         }
 
diff --git a/parser.y b/parser.y
index d2bcf4ad..e6e66d1d 100644
--- a/parser.y
+++ b/parser.y
@@ -194,7 +194,8 @@ elems : elem                    { $$ = cons($1, nil); }
 elem : TEXT                     { $$ = string_own($1); }
      | var                      { $$ = $1; }
      | list                     { $$ = $1; }
-     | regex                    { $$ = cons(regex_compile($1), $1); }
+     | regex                    { $$ = cons(regex_compile(rest($1)),
+                                            rest($1)); }
      | COLL elems END           { $$ = list(coll_s, $2, nao); }
      | COLL elems
        UNTIL elems END          { $$ = list(coll_s, $2, $4, nao); }
@@ -400,11 +401,8 @@ var : IDENT                     { $$ = list(var_s, intern(string_own($1), nil),
                                             nao); }
     | '{' IDENT '}' elem        { $$ = list(var_s, intern(string_own($2), nil),
                                             $4, nao); }
-    | '{' IDENT regex '}'       { $$ = list(var_s, intern(string_own($2), nil),
-                                            nil, cons(regex_compile($3), $3),
-                                            nao); }
-    | '{' IDENT NUMBER '}'      { $$ = list(var_s, intern(string_own($2), nil),
-                                            nil, num($3), nao); }
+    | '{' IDENT exprs '}'       { $$ = list(var_s, intern(string_own($2), nil),
+                                            nil, $3, nao); }
     | var_op IDENT              { $$ = list(var_s, intern(string_own($2), nil),
                                             nil, $1, nao); }
     | var_op IDENT elem         { $$ = list(var_s, intern(string_own($2), nil),
@@ -445,13 +443,14 @@ expr : IDENT                    { $$ = intern(string_own($1), nil); }
                                               keyword_package); }
      | NUMBER                   { $$ = num($1); }
      | list                     { $$ = $1; }
-     | regex                    { $$ = cons(regex_compile($1), $1); }
+     | regex                    { $$ = cons(regex_compile(rest($1)),
+                                            rest($1)); }
      | chrlit                   { $$ = $1; }
      | strlit                   { $$ = $1; }
      | quasilit                 { $$ = $1; }
      ;
 
-regex : '/' regexpr '/'         { $$ = $2; end_of_regex(); }
+regex : '/' regexpr '/'         { $$ = cons(regex_s, $2); end_of_regex(); }
       | '/' error               { $$ = nil;
                                   yybadtoken(yychar, lit("regex"));
                                   end_of_regex(); }
diff --git a/txr.1 b/txr.1
index 2721edab..83c99412 100644
--- a/txr.1
+++ b/txr.1
@@ -2274,7 +2274,7 @@ usual printing of the variable bindings or the word false.
 
 The syntax of the @(output) directive is:
 
-  @(output [ DESTINATION ] [ :nothrow ])
+  @(output [ DESTINATION ] [ :nothrow ] [ { keyword value } * ])
   .
   . one or more output directives or lines
   .
@@ -2295,6 +2295,12 @@ asynchronously, a failing command will not throw an immediate exception that
 can be suppressed with :nothrow.   This is for synchronous errors, like
 trying to open a destination file, but not having permissions, etc.
 
+The keyword value list is used for passing additional options.
+Currently, the only keyword supported is the :filter keyword.
+This specifies a filter to be applied to the variable substitutions occuring
+within the output clause.
+
+
 .SS Output Text
 
 Text in an output clause is not matched against anything, but is output
@@ -2318,6 +2324,9 @@ field. If the text is shorter than the field, then it is left-adjusted within
 that field, if the width is specified as a positive number, and right-adjusted
 if the width is specified as negative.
 
+An output variable may specify a filter which overrides any filter established
+for the output clause. The syntax for this is @(NAME :filter <filtername>}.
+
 .SS The Repeat Directive
 
 The repeat directive is generates repeated text from a ``boilerplate'',
@@ -2465,6 +2474,32 @@ spaces each one, except the last which has no space.
 If the list has exactly one item, then the @(last) applies to it
 instead of the main clause: it is produced with no trailing space.
 
+.SS Output Filtering
+
+Often it is necessary to transform the output to preserve its meaning
+under the convention of a given data format. For instance, if a piece of
+text contains the characters < or >, then if that text is being
+substituted into HTML, these should be replaced by &lt; and &gt;.
+This is what filtering is for.  Filtering is applied to the contents of output
+variables, not to any template text.
+.B txr
+implements named filters. Currently, the only built-in filter available is
+called :to_html and user-defined filters are not possible.
+
+To escape HTML characters in all variable substitutions occuring in
+an output clause, specify :filter :to_html in the directive:
+
+  @(output :filter :to_html)
+  ...
+  @(end)
+
+To filter an individual variable, add the syntax to the variable spec:
+
+  @(output)
+  @{x :filter :to_html}
+  @(end)
+
+
 .SH EXCEPTIONS
 
 .SS Introduction
-- 
cgit v1.2.3