8 files changed, 1011 insertions, 149 deletions
diff --git a/ChangeLog b/ChangeLog
index 5a8a5776..2461f007 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,103 @@
+2009-10-03  Kaz Kylheku  <kkylheku@gmail.com>
+
+	Version 014
+
+	New cases directive.
+
+	New define directive: user-defined dynamically scoped functions.
+
+	String literals in bind and function calls.
+
+	EOF in the middle of a line handled properly.
+
+	* extract.l (version): Bump to 014.
+	* txr.1: Bump version to 014.
+
+
+2009-10-02  Kaz Kylheku  <kkylheku@gmail.com>
+
+	New cases directive.
+
+	* extract.l (yybadtoken): Add case for CASES.
+	(grammar): Tokenize cases directive.
+	* extract.y (CASES): New token kind.
+	(cases_clause): New grammar symbol.
+	(grammar): Implement new grammar cases.
+	(match_files): Implement semantics for cases.
+	* lib.c (cases): New global.
+	(obj_init): Intern cases symbol.
+	* lib.h (cases): Declared.
+	* txr.1: Documented.
+
+2009-10-02  Kaz Kylheku  <kkylheku@gmail.com>
+
+	Support for string and character literals.
+
+	* extract.l (char_esc): Support \' and \" escapes.
+	(STRLIT, CHRLIT): New flex start conditions.
+	(grammar): New rules for tokenizing string literals.
+	* extract.y (LITCHAR): New token kind.
+	(strlit, chrlit, litchars): New grammar symbols.
+	(grammar): Implement string literal parsing.
+	(dump_var): Support character objects, treating
+	them as one-character strings.
+	(eval_form): New function.
+	(match_files): In bind directive, allow the right
+	hand side to be an arbitrary object.
+	* lib.c (mkustring, init_str): New functions.
+	(cat_str): Allow characters in the mix, treating
+	them as one-character strings.
+	* lib.h (mkustring, init_str): Declared.
+	(chrp, chr_str, chr_str_set): New function.
+	* txr.1: Documented.
+
+2009-10-02  Kaz Kylheku  <kkylheku@gmail.com>
+
+	Support for query-defined functions.
+
+	* extract.l (yybadtoken): New DEFINE case.
+	(NESTED): New flex start condition.  This allows for different lexing
+	rules in nested lists, so even though for instance @(collect) is a
+	special token @((collect)) isn't.
+	(grammar): Refactored with NESTED.
+	Tokenize define directive.
+	* extract.y (define_transform): New function.
+	(DEFINE): New token kind.
+	(define_clause): New grammar symbol.
+	(match_files): Implement define semantics, and function calls.
+	* lib.c (define): New global.
+	* lib.h (define): Declared.
+	(proper_listp, alist_remove1, copy_cons, copy_alist): New functions.
+	(obj_init): Intern define symbol.
+	(init): Call new function uw_init.
+	* unwind.c (toplevel_env): New static structure.
+	(uw_unwind_to_exit_point): Support new UW_ENV frame type.
+	(uw_init, uw_find_env, uw_push_env, uw_get_func, uw_set_func): New
+	functions.
+	* unwind.h (UW_ENV): New enumeration member in uw_frtype.
+	(uw_dynamic_env): New struct.
+	(uw_block_begin, uw_block_end): Renamed some variables.
+	(uw_env_begin, uw_env_end): New macros.
+	* txr.1: Documented.
+
+2009-10-02  Kaz Kylheku  <kkylheku@gmail.com>
+
+	Misc. bugfixes and improvements.
+
+	* extract.l (grammar): Newline in a directive
+	no longer an error. Why not allow it.
+	* extract.y (grammar): Productions for catching empty bodies in some
+	constructs now end with END newl, rather
+	than just END, so parsing can continue sanely.
+	(match_lines): In diagnostics, don't say "ignored" about material which
+	causes an error that fails the query!
+	* lib.c (mkstring): Initialize length since we know it!
+	(c_str): Take a symbol as an arg, so we don't have
+	to keep writing c_str(symbol_name(sym)).
+	(obj_print): Use isprint rather than isctrl to decide
+	whether to print a character as an escape.
+	(snarf_line): Properly handle EOF in the middle of line.
+
 2009-09-29  Kaz Kylheku  <kkylheku@gmail.com>
 
 	Version 013
diff --git a/extract.l b/extract.l
index 81dc91d9..ab041bb9 100644
--- a/extract.l
+++ b/extract.l
@@ -40,7 +40,7 @@
 
 #define YY_NO_UNPUT
 
-const char *version = "013";
+const char *version = "014";
 const char *progname = "txr";
 const char *spec_file = "stdin";
 long lineno = 1;
@@ -111,6 +111,7 @@ void yybadtoken(int tok, const char *context)
   case SOME:    problem = "\"some\""; break;
   case NONE:    problem = "\"none\""; break;
   case MAYBE:   problem = "\"maybe\""; break;
+  case CASES:   problem = "\"cases\""; break;
   case AND:     problem = "\"and\""; break;
   case OR:      problem = "\"or\""; break;
   case END:     problem = "\"end\""; break;
@@ -124,6 +125,7 @@ void yybadtoken(int tok, const char *context)
   case FIRST:   problem = "\"first\""; break;
   case LAST:    problem = "\"last\""; break;
   case EMPTY:   problem = "\"empty\""; break;
+  case DEFINE:  problem = "\"define\""; break;
   case NUMBER:  problem = "\"number\""; break;
   case REGCHAR: problem = "regular expression character"; break;
   }
@@ -151,6 +153,8 @@ static int char_esc(int letter)
   case 'f': return '\f';
   case 'r': return '\r';
   case 'e': return 27;
+  case '"': return '"';
+  case '\'': return '\'';
   }
 
   abort();
@@ -172,34 +176,40 @@ static int num_esc(char *num)
 %}
 
 TOK     [a-zA-Z_][a-zA-Z0-9_]*|[+-]?[0-9]+
+ID_END  [^a-zA-Z0-9_]
+NUM_END [^0-9]
 WS      [\t ]*
-%x      SPECIAL REGEX REGCLASS
+HEX     [0-9A-Fa-f]
+OCT     [0-7]
+
+%x      SPECIAL NESTED REGEX REGCLASS STRLIT CHRLIT
 
 %%
 
-<SPECIAL>{TOK}  {
-                  long val;
-                  char *errp;
+<SPECIAL,NESTED>{TOK}   {
+                          long val;
+                          char *errp;
 
-                  errno = 0;
+                          errno = 0;
 
-                  val = strtol(yytext, &errp, 10);
+                          val = strtol(yytext, &errp, 10);
 
-                  if (nesting == 0)
-                    BEGIN(INITIAL);
+                          if (nesting == 0)
+                            BEGIN(INITIAL);
 
-                  if (*errp != 0) {
-                    /* not a number */
-                    yylval.lexeme = strdup(yytext);
-                    return IDENT;
-                  }
+                          if (*errp != 0) {
+                            /* not a number */
+                            yylval.lexeme = strdup(yytext);
+                            return IDENT;
+                          }
 
-                  if ((val == LONG_MAX || val == LONG_MIN) && errno == ERANGE)
-                    yyerror("numeric overflow in token");
+                          if ((val == LONG_MAX || val == LONG_MIN)
+                              && errno == ERANGE)
+                            yyerror("numeric overflow in token");
 
-                  yylval.num = val;
-                  return NUMBER;
-                }
+                          yylval.num = val;
+                          return NUMBER;
+                        }
 
 
 <SPECIAL>\({WS}all{WS}\)        {
@@ -222,12 +232,17 @@ WS      [\t ]*
                                   return MAYBE;
                                 }
 
+<SPECIAL>\({WS}cases{WS}\)      {
+                                  BEGIN(INITIAL);
+                                  return CASES;
+                                }
+
 <SPECIAL>\({WS}and{WS}\)        {
                                   BEGIN(INITIAL);
                                   return AND;
                                 }
 
-<SPECIAL>\({WS}or{WS}\)        {
+<SPECIAL>\({WS}or{WS}\)         {
                                   BEGIN(INITIAL);
                                   return OR;
                                 }
@@ -288,54 +303,74 @@ WS      [\t ]*
                                   return EMPTY;
                                 }
 
-<SPECIAL>\{|\(  {
-                  nesting++;
-                  if (yytext[0] == '{')
-                    closechar = '}';
-                  else
-                    closechar = ')';
-                  return yytext[0];
-                }
+<SPECIAL>\({WS}define/{ID_END}  {
+                                  nesting++;
+                                  closechar = ')';
+                                  BEGIN(NESTED);
+                                  return DEFINE;
+                                }
 
-<SPECIAL>\}|\)  {
-                  if (yytext[0] != closechar) {
-                    yyerror("paren mismatch");
-                    BEGIN(INITIAL);
-                  } else {
-                    if (--nesting == 0)
-                      BEGIN(INITIAL);
-                    return yytext[0];
-                  }
-                }
+<SPECIAL,NESTED>\{|\(   {
+                          nesting++;
+                          if (yytext[0] == '{')
+                            closechar = '}';
+                          else
+                            closechar = ')';
+                          BEGIN(NESTED);
+                          return yytext[0];
+                        }
 
-<SPECIAL>[\t ]+ {
-                  /* Eat whitespace in directive */
-                }
+<SPECIAL,NESTED>\}|\)   {
+                          if (yytext[0] != closechar) {
+                            yyerror("paren mismatch");
+                            BEGIN(INITIAL);
+                          } else {
+                            switch (--nesting) {
+                            case 1:
+                              BEGIN(SPECIAL);
+                              break;
+                            case 0:
+                              BEGIN(INITIAL);
+                              break;
+                            }
+
+                            return yytext[0];
+                          }
+                        }
+
+<SPECIAL,NESTED>[\t ]+  { /* Eat whitespace in directive */ }
+
+<SPECIAL,NESTED>\"      {
+                          BEGIN(STRLIT);
+                          return '"';
+                        }
+
+<SPECIAL,NESTED>\'      {
+                          BEGIN(CHRLIT);
+                          return '\'';
+                        }
 
 <SPECIAL>@      {
                   if (nesting == 0) {
                     BEGIN(INITIAL);
                     yylval.lexeme = strdup("@");
                     return TEXT;
-                  } else {
-                    yyerrorf(0, "bad character in directive: %c", yytext[0]);
                   }
                 }
 
-<SPECIAL>\n     {
-                  lineno++;
-                  yyerror("newline in directive");
-                }
+<SPECIAL,NESTED>\n      {
+                          lineno++;
+                        }
 
-<SPECIAL>[/]    {
-                  BEGIN(REGEX);
-                  return '/';
-                }
+<SPECIAL,NESTED>[/]     {
+                          BEGIN(REGEX);
+                          return '/';
+                        }
 
-<SPECIAL>\.     {
-                  yylval.chr = '.';
-                  return '.';
-                }
+<SPECIAL,NESTED>\.      {
+                          yylval.chr = '.';
+                          return '.';
+                        }
 
 <SPECIAL>[\\][abtnvfre] {
                           char lexeme[2];
@@ -346,24 +381,25 @@ WS      [\t ]*
                           return TEXT;
                         }
 
-<SPECIAL>[\\](x[0-9a-fA-F]+|[0-7]+)     {
-                                          char lexeme[2];
-                                          lexeme[0] = num_esc(yytext + 1);
-                                          lexeme[1] = 0;
-                                          yylval.lexeme = strdup(lexeme);
-                                          BEGIN(INITIAL);
-                                          return TEXT;
-                                        }
+<SPECIAL>[\\](x{HEX}+|{OCT}+)   {
+                                  char lexeme[2];
+                                  lexeme[0] = num_esc(yytext + 1);
+                                  lexeme[1] = 0;
+                                  yylval.lexeme = strdup(lexeme);
+                                  BEGIN(INITIAL);
+                                  return TEXT;
+                                }
 
-<SPECIAL>.      {
-                  yyerrorf(0, "bad character in directive: '%c'", yytext[0]);
-                }
+<SPECIAL,NESTED>.       {
+                          yyerrorf(0, "bad character in directive: '%c'",
+                                   yytext[0]);
+                        }
 
 <REGEX>[/]      {
                   if (nesting == 0)
                     BEGIN(INITIAL);
                   else
-                    BEGIN(SPECIAL);
+                    BEGIN(NESTED);
                   yylval.chr = '/';
                   return '/';
                 }
@@ -374,10 +410,10 @@ WS      [\t ]*
                           return REGCHAR;
                         }
 
-<REGEX>[\\](x[0-9a-fA-F]+|[0-9]+)       {
-                                          yylval.chr = num_esc(yytext + 1);
-                                          return REGCHAR;
-                                        }
+<REGEX>[\\](x{HEX}+|{OCT}+)     {
+                                  yylval.chr = num_esc(yytext + 1);
+                                  return REGCHAR;
+                                }
 
 <REGEX>\n       {
                   lineno++;
@@ -438,6 +474,48 @@ WS      [\t ]*
                    /* comment to end of line */
                 }
 
+<STRLIT>\"      {
+                  if (nesting == 0)
+                    BEGIN(INITIAL);
+                  else
+                    BEGIN(NESTED);
+                  return '"';
+                }
+
+<CHRLIT>\'      {
+                  if (nesting == 0)
+                    BEGIN(INITIAL);
+                  else
+                    BEGIN(NESTED);
+                  return '\'';
+                }
+
+<STRLIT,CHRLIT>[\\][abtnvfre] {
+                                yylval.chr = char_esc(yytext[1]);
+                                return LITCHAR;
+                              }
+
+<STRLIT,CHRLIT>[\\](x{HEX}+|{OCT}+)     {
+                                          yylval.chr = num_esc(yytext + 1);
+                                          return LITCHAR;
+                                        }
+<STRLIT>\n              {
+                          yyerror("newline in string literal");
+                          lineno++;
+                          yylval.chr = yytext[0];
+                          return LITCHAR;
+                        }
+<CHRLIT>\n              {
+                          yyerror("newline in character literal");
+                          lineno++;
+                          yylval.chr = yytext[0];
+                          return LITCHAR;
+                        }
+<STRLIT,CHRLIT>.        {
+                          yylval.chr = yytext[0];
+                          return LITCHAR;
+                        }
+
 %%
 
 void help(void)
diff --git a/extract.y b/extract.y
index 54dbb747..5ac61b2b 100644
--- a/extract.y
+++ b/extract.y
@@ -45,6 +45,7 @@ int yylex(void);
 void yyerror(const char *);
 
 obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts);
+obj_t *define_transform(obj_t *define_form);
 
 static obj_t *parsed_spec;
 static int output_produced;
@@ -58,23 +59,23 @@ static int output_produced;
   long num;
 }
 
-%token <lexeme> TEXT IDENT ALL SOME NONE MAYBE AND OR END COLLECT UNTIL COLL
-%token <lexeme> OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY
+%token <lexeme> TEXT IDENT ALL SOME NONE MAYBE CASES AND OR END COLLECT
+%token <lexeme> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE
 %token <num> NUMBER
-%token <chr> REGCHAR
+%token <chr> REGCHAR LITCHAR
 
 %type <obj> spec clauses clause all_clause some_clause none_clause maybe_clause
-%type <obj> collect_clause clause_parts additional_parts output_clause
-%type <obj> line elems_opt elems elem var var_op list exprs expr
-%type <obj> out_clauses out_clauses_opt out_clause
+%type <obj> cases_clause collect_clause clause_parts additional_parts
+%type <obj> output_clause define_clause line elems_opt elems elem var var_op
+%type <obj> list exprs expr out_clauses out_clauses_opt out_clause
 %type <obj> repeat_clause repeat_parts_opt o_line
 %type <obj> o_elems_opt o_elems_opt2 o_elems o_elem rep_elem rep_parts_opt
 %type <obj> regex regexpr regbranch
 %type <obj> regterm regclass regclassterm regrange
+%type <obj> strlit chrlit litchars
 %type <chr> regchar
-
-%nonassoc ALL SOME NONE MAYBE AND OR END COLLECT UNTIL COLL
-%nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY
+%nonassoc ALL SOME NONE MAYBE CASES AND OR END COLLECT UNTIL COLL
+%nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY DEFINE
 %nonassoc '{' '}' '[' ']' '(' ')'
 %right IDENT TEXT NUMBER
 %left '|' '/'
@@ -97,7 +98,10 @@ clause : all_clause             { $$ = list(num(lineno - 1), $1, nao); }
        | some_clause            { $$ = list(num(lineno - 1), $1, nao); }
        | none_clause            { $$ = list(num(lineno - 1), $1, nao); }
        | maybe_clause           { $$ = list(num(lineno - 1), $1, nao); }
+       | cases_clause           { $$ = list(num(lineno - 1), $1, nao); }
        | collect_clause         { $$ = list(num(lineno - 1), $1, nao); }
+       | define_clause          { $$ = list(num(lineno - 1),
+                                            define_transform($1), nao); }
        | output_clause          { $$ = list(num(lineno - 1), $1, nao); }
        | line                   { $$ = $1; }
        | repeat_clause          { $$ = nil;
@@ -108,7 +112,7 @@ all_clause : ALL newl clause_parts      { $$ = cons(all, $3); }
            | ALL newl error             { $$ = nil;
                                           yybadtoken(yychar,
                                                      "all clause"); }
-           | ALL newl END               { $$ = nil;
+           | ALL newl END newl          { $$ = nil;
                                           yyerror("empty all clause"); }
 
            ;
@@ -117,7 +121,7 @@ some_clause : SOME newl clause_parts    { $$ = cons(some, $3); }
             | SOME newl error           { $$ = nil;
                                           yybadtoken(yychar,
                                                      "some clause"); }
-            | SOME newl END             { $$ = nil;
+            | SOME newl END newl       { $$ = nil;
                                           yyerror("empty some clause"); }
             ;
 
@@ -125,7 +129,7 @@ none_clause : NONE newl clause_parts    { $$ = cons(none, $3); }
             | NONE newl error           { $$ = nil;
                                           yybadtoken(yychar,
                                                      "none clause"); }
-            | NONE newl END             { $$ = nil;
+            | NONE newl END newl        { $$ = nil;
                                           yyerror("empty none clause"); }
             ;
 
@@ -133,10 +137,18 @@ maybe_clause : MAYBE newl clause_parts  { $$ = cons(maybe, $3); }
              | MAYBE newl error         { $$ = nil;
                                           yybadtoken(yychar,
                                                      "maybe clause"); }
-             | MAYBE newl END           { $$ = nil;
+             | MAYBE newl END newl      { $$ = nil;
                                           yyerror("empty maybe clause"); }
              ;
 
+cases_clause : CASES newl clause_parts  { $$ = cons(cases, $3); }
+             | CASES newl error         { $$ = nil;
+                                          yybadtoken(yychar,
+                                                     "cases clause"); }
+             | CASES newl END newl      { $$ = nil;
+                                          yyerror("empty cases clause"); }
+             ;
+
 collect_clause : COLLECT newl clauses END newl  { $$ = list(collect, $3, nao); }
                | COLLECT newl clauses
                  UNTIL newl clauses END newl    { $$ = list(collect, $3,
@@ -181,6 +193,23 @@ elem : TEXT                     { $$ = string($1); }
                                   yybadtoken(yychar, "coll clause"); }
      ;
 
+define_clause : DEFINE exprs ')' newl
+                clauses
+                END newl        { $$ = list(define, $2, $5, nao); }
+              | DEFINE ')' newl
+                clauses
+                END newl        { $$ = list(define, nil, $4, nao); }
+              | DEFINE exprs ')' newl
+                END newl        { $$ = list(define, $2, nao); }
+              | DEFINE ')' newl
+                END newl        { $$ = list(define, nao); }
+              | DEFINE error    { yybadtoken(yychar, "list expression"); }
+              | DEFINE exprs ')' newl
+                error           { yybadtoken(yychar, "define"); }
+              | DEFINE ')' newl
+                error           { yybadtoken(yychar, "define"); }
+              ;
+
 output_clause : OUTPUT o_elems '\n'
                 out_clauses
                 END newl        { $$ = list(output, $4, $2, nao); }
@@ -209,8 +238,12 @@ out_clause : repeat_clause              { $$ = list(num(lineno - 1), $1, nao); }
                                           yyerror("match clause in output"); }
            | maybe_clause               { $$ = nil;
                                           yyerror("match clause in output"); }
+           | cases_clause               { $$ = nil;
+                                          yyerror("match clause in output"); }
            | collect_clause             { $$ = nil;
                                           yyerror("match clause in output"); }
+           | define_clause              { $$ = nil;
+                                          yyerror("match clause in output"); }
            | output_clause              { $$ = nil;
                                           yyerror("match clause in output"); }
            ;
@@ -324,6 +357,8 @@ expr : IDENT                    { $$ = intern(string($1)); }
      | NUMBER                   { $$ = num($1); }
      | list                     { $$ = $1; }
      | regex                    { $$ = cons(regex_compile($1), $1); }
+     | chrlit                   { $$ = $1; }
+     | strlit                   { $$ = $1; }
      ;
 
 regex : '/' regexpr '/'         { $$ = $2; }
@@ -384,6 +419,36 @@ newl : '\n'
                           yyerrok; }
      ;
 
+strlit : '"' '"'                { $$ = null_string; }
+       | '"' litchars '"'       {
+                                  if ($2) {
+                                    obj_t *len = length($2), *iter, *ix;
+                                    $$ = mkustring(len);
+                                    for (iter = $2, ix = zero;
+                                         iter;
+                                         iter = cdr(iter), ix = plus(ix, one))
+                                    {
+                                      chr_str_set($$, ix, car(iter));
+                                    }
+                                  } else {
+                                    $$ = nil;
+                                  }
+                                }
+       | '"' error              { yybadtoken(yychar, "string literal"); }
+       ;
+
+chrlit : '\'' '\''              { yyerror("empty character literal");
+                                  $$ = nil; }
+       | '\'' litchars '\''     { $$ = car($2);
+                                  if (cdr($2))
+                                    yyerror("multiple characters in "
+                                            "character literal"); }
+       | '\'' error              { yybadtoken(yychar, "character literal"); }
+       ;
+
+litchars : LITCHAR              { $$ = cons(chr($1), nil); }
+         | LITCHAR litchars     { $$ = cons(chr($1), $2); }
+         ;
 %%
 
 obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts)
@@ -415,6 +480,45 @@ obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts)
               last_parts, empty_parts, nao);
 }
 
+obj_t *define_transform(obj_t *define_form)
+{
+  obj_t *sym = first(define_form);
+  obj_t *args = second(define_form);
+
+  if (define_form == nil)
+    return nil;
+
+  assert (sym == define);
+
+  if (args == nil) {
+    yyerror("define requires arguments");
+    return define_form;
+  }
+
+  if (!consp(args) || !listp(cdr(args))) {
+    yyerror("bad define argument syntax");
+    return define_form;
+  } else {
+    obj_t *name = first(args);
+    obj_t *params = second(args);
+
+    if (!symbolp(name)) {
+      yyerror("function name must be a symbol");
+      return define_form;
+    }
+
+    if (!proper_listp(params)) {
+      yyerror("invalid function parameter list");
+      return define_form;
+    }
+
+    if (!all_satisfy(params, func_n1(symbolp), nil))
+      yyerror("function parameters must be symbols");
+  }
+
+  return define_form;
+}
+
 obj_t *get_spec(void)
 {
   return parsed_spec;
@@ -443,12 +547,19 @@ void dump_var(const char *name, char *pfx1, size_t len1,
   if (len1 >= 112 || len2 >= 112)
     abort();
 
-  if (stringp(value)) {
+  if (stringp(value) || chrp(value)) {
     fputs(name, stdout);
     fputs(pfx1, stdout);
     fputs(pfx2, stdout);
     putchar('=');
-    dump_shell_string(c_str(value));
+    if (stringp(value)) {
+      dump_shell_string(c_str(value));
+    } else {
+      char mini[2];
+      mini[0] = c_chr(value);
+      mini[1] = 0;
+      dump_shell_string(mini);
+    }
     putchar('\n');
   } else {
     obj_t *iter;
@@ -572,6 +683,13 @@ obj_t *dest_bind(obj_t *bindings, obj_t *pattern, obj_t *value)
   return bindings;
 }
 
+obj_t *eval_form(obj_t *form, obj_t *bindings)
+{
+  if (symbolp(form))
+    return assoc(bindings, form);
+  return cons(t, form);
+}
+
 obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
                   obj_t *pos, obj_t *spec_lineno, obj_t *data_lineno,
                   obj_t *file)
@@ -1192,7 +1310,7 @@ repeat_spec_same_data:
         long reps = 0;
 
         if (rest(specline))
-          yyerrorlf(1, spec_lineno, "material after skip directive ignored");
+          yyerrorlf(1, spec_lineno, "unexpected material after skip directive");
 
         if ((spec = rest(spec)) == nil)
           break;
@@ -1229,7 +1347,7 @@ repeat_spec_same_data:
         return nil;
       } else if (sym == trailer) {
         if (rest(specline))
-          yyerrorlf(1, spec_lineno, "material after trailer directive ignored");
+          yyerrorlf(1, spec_lineno, "unexpected material after trailer directive");
         if ((spec = rest(spec)) == nil)
           break;
 
@@ -1245,7 +1363,7 @@ repeat_spec_same_data:
       } else if (sym == block) {
         obj_t *name = first(rest(first_spec));
         if (rest(specline))
-          yyerrorlf(1, spec_lineno, "material after block directive ignored");
+          yyerrorlf(1, spec_lineno, "unexpected material after block directive");
         if ((spec = rest(spec)) == nil)
           break;
         uw_block_begin(name, result);
@@ -1256,7 +1374,7 @@ repeat_spec_same_data:
         obj_t *target = first(rest(first_spec));
 
         if (rest(specline))
-          yyerrorlf(1, spec_lineno, "material after %s ignored",
+          yyerrorlf(1, spec_lineno, "unexpected material after %s",
                     c_str(symbol_name(sym)));
 
         uw_block_return(target,
@@ -1302,7 +1420,9 @@ repeat_spec_same_data:
                         if3(data, cons(data, num(data_lineno)), t));
           return nil;
         }
-      } else if (sym == some || sym == all || sym == none || sym == maybe) {
+      } else if (sym == some || sym == all || sym == none || sym == maybe ||
+                 sym == cases)
+      {
         obj_t *specs;
         obj_t *all_match = t;
         obj_t *some_match = nil;
@@ -1331,6 +1451,8 @@ repeat_spec_same_data:
                 max_data = new_data;
               }
             }
+            if (sym == cases)
+              break;
           } else {
             all_match = nil;
           }
@@ -1341,8 +1463,8 @@ repeat_spec_same_data:
           return nil;
         }
 
-        if (sym == some && !some_match) {
-          yyerrorlf(2, spec_lineno, "some: no clauses matched");
+        if ((sym == some || sym == cases) && !some_match) {
+          yyerrorlf(2, spec_lineno, "some/cases: no clauses matched");
           return nil;
         }
 
@@ -1514,15 +1636,13 @@ repeat_spec_same_data:
       } else if (sym == bind) {
         obj_t *args = rest(first_spec);
         obj_t *pattern = first(args);
-        obj_t *var = second(args);
-        obj_t *lookup = assoc(bindings, var);
+        obj_t *form = second(args);
+        obj_t *val = eval_form(form, bindings);
 
-        if (!var || !symbolp(var))
-          yyerrorlf(1, spec_lineno, "bind: bad variable spec");
-        else if (!lookup)
-          yyerrorlf(1, spec_lineno, "bind: unbound source variable");
+        if (!val)
+          yyerrorlf(1, spec_lineno, "bind: unbound variable on right side");
 
-        bindings = dest_bind(bindings, pattern, cdr(lookup));
+        bindings = dest_bind(bindings, pattern, cdr(val));
 
         if (bindings == t)
           return nil;
@@ -1581,6 +1701,107 @@ repeat_spec_same_data:
           break;
 
         goto repeat_spec_same_data;
+      } else if (sym == define) {
+        obj_t *args = second(first_spec);
+        obj_t *body = third(first_spec);
+        obj_t *name = first(args);
+        obj_t *params = second(args);
+
+        if (rest(specline))
+          yyerrorlf(1, spec_lineno, "unexpected material after define");
+
+        uw_set_func(name, cons(params, body));
+
+        if ((spec = rest(spec)) == nil)
+          break;
+
+        goto repeat_spec_same_data;
+      } else {
+        obj_t *func = uw_get_func(sym);
+
+        if (func) {
+          obj_t *args = rest(first_spec);
+          obj_t *params = car(func);
+          obj_t *body = cdr(func);
+          obj_t *piter, *aiter;
+          obj_t *bindings_cp = copy_alist(bindings);
+
+          if (!equal(length(args), length(params))) {
+            yyerrorlf(1, spec_lineno, "function %s takes %ld argument(s)",
+                      c_str(sym), c_num(length(params)));
+            return nil;
+          }
+
+          for (piter = params, aiter = args; piter;
+               piter = cdr(piter), aiter = cdr(aiter))
+          {
+            obj_t *param = car(piter);
+            obj_t *arg = car(aiter);
+
+            if (symbolp(arg)) {
+              obj_t *existing = assoc(bindings, arg);
+              if (existing) {
+                bindings_cp = acons_new(bindings_cp,
+                                        param,
+                                        cdr(existing));
+              } else {
+                bindings_cp = alist_remove(bindings_cp, cons(param, nil));
+              }
+            } else {
+              bindings_cp = acons_new(bindings_cp, param, arg);
+            }
+          }
+
+          {
+            uw_block_begin(nil, result);
+            uw_env_begin;
+            result = match_files(body, files, bindings_cp,
+                                 data, num(data_lineno));
+            uw_env_end;
+            uw_block_end;
+
+            if (!result) {
+              yyerrorlf(2, spec_lineno, "function failed");
+              return nil;
+            }
+
+            {
+              cons_bind (new_bindings, success, result);
+
+              for (piter = params, aiter = args; piter;
+                   piter = cdr(piter), aiter = cdr(aiter))
+              {
+                obj_t *param = car(piter);
+                obj_t *arg = car(aiter);
+
+                if (symbolp(arg)) {
+                  obj_t *newbind = assoc(new_bindings, param);
+                  if (newbind) {
+                    bindings = dest_bind(bindings, arg, cdr(newbind));
+                    if (bindings == t)
+                      return nil;
+                  }
+                }
+              }
+
+              if (consp(success)) {
+                yyerrorlf(2, spec_lineno,
+                          "function matched; advancing from line %ld to %ld",
+                          data_lineno, c_num(cdr(success)));
+                data = car(success);
+                data_lineno = c_num(cdr(success));
+              } else {
+                yyerrorlf(2, spec_lineno, "function consumed entire file");
+                data = nil;
+              }
+            }
+          }
+
+          if ((spec = rest(spec)) == nil)
+            break;
+
+          goto repeat_spec_same_data;
+        }
       }
     }
 
diff --git a/lib.c b/lib.c
index 2a3ee7f7..bce4d088 100644
--- a/lib.c
+++ b/lib.c
@@ -32,8 +32,10 @@
 #include <limits.h>
 #include <stdarg.h>
 #include <dirent.h>
+#include <setjmp.h>
 #include "lib.h"
 #include "gc.h"
+#include "unwind.h"
 
 #define max(a, b) ((a) > (b) ? (a) : (b))
 #define min(a, b) ((a) < (b) ? (a) : (b))
@@ -44,8 +46,8 @@ obj_t *null, *t, *cons_t, *str_t, *chr_t, *num_t, *sym_t, *fun_t, *vec_t;
 obj_t *stream_t, *lcons_t, *var, *regex, *set, *cset, *wild, *oneplus;
 obj_t *zeroplus, *optional, *compound, *or;
 obj_t *skip, *trailer, *block, *next, *fail, *accept;
-obj_t *all, *some, *none, *maybe, *collect, *until, *coll;
-obj_t *output, *single, *frst, *lst, *empty, *repeat, *rep;
+obj_t *all, *some, *none, *maybe, *cases, *collect, *until, *coll;
+obj_t *define, *output, *single, *frst, *lst, *empty, *repeat, *rep;
 obj_t *flattn, *forget, *mrge, *bind, *cat, *dir;
 
 obj_t *zero, *one, *two, *negone, *maxint, *minint;
@@ -506,6 +508,14 @@ obj_t *listp(obj_t *obj)
          ? t : nil;
 }
 
+obj_t *proper_listp(obj_t *obj)
+{
+  while (consp(obj))
+    obj = cdr(obj);
+
+  return (obj == nil) ? t : nil;
+}
+
 obj_t *length(obj_t *list)
 {
   long len = 0;
@@ -616,9 +626,25 @@ obj_t *string(char *str)
 obj_t *mkstring(obj_t *len, obj_t *ch)
 {
   char *str = chk_malloc(c_num(len) + 1);
+  obj_t *s = string(str);
   memset(str, c_chr(ch), c_num(len));
   str[c_num(len)] = 0;
-  return string(str);
+  s->st.len = len;
+  return s;
+}
+
+obj_t *mkustring(obj_t *len)
+{
+  char *str = chk_malloc(c_num(len) + 1);
+  obj_t *s = string(str);
+  s->st.len = len;
+  return s;
+}
+
+obj_t *init_str(obj_t *str, const char *data)
+{
+  memcpy(str->st.str, data, c_num(str->st.len));
+  return str;
 }
 
 obj_t *copy_str(obj_t *str)
@@ -639,10 +665,18 @@ obj_t *length_str(obj_t *str)
   return str->st.len;
 }
 
-const char *c_str(obj_t *str)
+const char *c_str(obj_t *obj)
 {
-  assert (str && str->t.type == STR);
-  return str->st.str;
+  assert (obj);
+
+  switch (obj->t.type) {
+  case STR:
+    return obj->st.str;
+  case SYM:
+    return c_str(symbol_name(obj));
+  default:
+    abort();
+  }
 }
 
 obj_t *search_str(obj_t *haystack, obj_t *needle, obj_t *start_num,
@@ -721,11 +755,19 @@ obj_t *cat_str(obj_t *list, obj_t *sep)
     obj_t *item = car(iter);
     if (!item)
       continue;
-    if (!stringp(item))
-      return nil;
-    total += c_num(length_str(item));
-    if (len_sep && cdr(iter))
-      total += len_sep;
+    if (stringp(item)) {
+      total += c_num(length_str(item));
+      if (len_sep && cdr(iter))
+        total += len_sep;
+      continue;
+    }
+    if (chrp(item)) {
+      total += 1;
+      if (len_sep && cdr(iter))
+        total += len_sep;
+      continue;
+    }
+    return nil;
   }
 
   str = chk_malloc(total + 1);
@@ -735,9 +777,14 @@ obj_t *cat_str(obj_t *list, obj_t *sep)
     long len;
     if (!item)
       continue;
-    len = c_num(length_str(item));
-    memcpy(ptr, c_str(item), len);
-    ptr += len;
+    if (stringp(item)) {
+      len = c_num(length_str(item));
+      memcpy(ptr, c_str(item), len);
+      ptr += len;
+    } else {
+      *ptr++ = c_chr(item);
+    }
+
     if (len_sep && cdr(iter)) {
       memcpy(ptr, c_str(sep), len_sep);
       ptr += len_sep;
@@ -784,12 +831,41 @@ obj_t *chr(int ch)
   return obj;
 }
 
+obj_t *chrp(obj_t *chr)
+{
+  return (chr && chr->st.type == CHR) ? t : nil;
+}
+
 int c_chr(obj_t *chr)
 {
   assert (chr && chr->t.type == CHR);
   return chr->ch.ch;
 }
 
+obj_t *chr_str(obj_t *str, obj_t *index)
+{
+  long l = c_num(length_str(str));
+  long i = c_num(index);
+  const char *s = c_str(str);
+
+  assert (i < l);
+
+  return chr(s[i]);
+}
+
+obj_t *chr_str_set(obj_t *str, obj_t *index, obj_t *chr)
+{
+  long l = c_num(length_str(str));
+  long i = c_num(index);
+  char *s = str->st.str;
+
+  assert (i < l);
+
+  s[i] = c_chr(chr);
+
+  return chr;
+}
+
 obj_t *sym_name(obj_t *sym)
 {
   assert (sym && sym->t.type == SYM);
@@ -1372,6 +1448,30 @@ obj_t *alist_remove(obj_t *list, obj_t *keys)
   return list;
 }
 
+obj_t *alist_remove1(obj_t *list, obj_t *key)
+{
+  obj_t **plist = &list;
+
+  while (*plist) {
+    if (eq(car(car(*plist)), key))
+      *plist = cdr(*plist);
+    else
+      plist = cdr_l(*plist);
+  }
+
+  return list;
+}
+
+obj_t *copy_cons(obj_t *c)
+{
+  return cons(car(c), cdr(c));
+}
+
+obj_t *copy_alist(obj_t *list)
+{
+  return mapcar(func_n1(copy_cons), list);
+}
+
 obj_t *mapcar(obj_t *fun, obj_t *list)
 {
   list_collect_decl (out, iter);
@@ -1512,9 +1612,11 @@ static void obj_init(void)
   some = intern(string(strdup("some")));
   none = intern(string(strdup("none")));
   maybe = intern(string(strdup("maybe")));
+  cases = intern(string(strdup("cases")));
   collect = intern(string(strdup("collect")));
   until = intern(string(strdup("until")));
   coll = intern(string(strdup("coll")));
+  define = intern(string(strdup("define")));
   output = intern(string(strdup("output")));
   single = intern(string(strdup("single")));
   frst = intern(string(strdup("first")));
@@ -1591,10 +1693,10 @@ void obj_print(obj_t *obj, FILE *out)
         case '\\': fputs("\\\\", out); break;
         case 27: fputs("\\e", out); break;
         default:
-          if (iscntrl(*ptr))
-            fprintf(out, "\\%03o", (int) *ptr);
-          else
+          if (isprint(*ptr))
             putc(*ptr, out);
+          else
+            fprintf(out, "\\%03o", (int) *ptr);
         }
       }
       putc('"', out);
@@ -1617,10 +1719,10 @@ void obj_print(obj_t *obj, FILE *out)
       case '\\': fputs("\\\\", out); break;
       case 27: fputs("\\e", out); break;
       default:
-        if (iscntrl(ch))
-          fprintf(out, "\\%03o", ch);
-        else
+        if (isprint(ch))
           putc(ch, out);
+        else
+          fprintf(out, "\\%03o", ch);
       }
       putc('\'', out);
     }
@@ -1683,6 +1785,7 @@ void init(const char *pn, void *(*oom)(void *, size_t),
           ? max(maybe_bottom_0, maybe_bottom_1)
           : min(maybe_bottom_0, maybe_bottom_1));
 
+  uw_init();
   obj_init();
 }
 
@@ -1721,7 +1824,7 @@ char *snarf_line(FILE *in)
       size = newsize;
     }
 
-    if (ch == '\n') {
+    if (ch == '\n' || ch == EOF) {
       buf[fill++] = 0;
       break;
     }
diff --git a/lib.h b/lib.h
index 36081ad8..20cb2a77 100644
--- a/lib.h
+++ b/lib.h
@@ -159,8 +159,8 @@ extern obj_t *t, *cons_t, *str_t, *chr_t, *num_t, *sym_t, *fun_t, *vec_t;
 extern obj_t *stream_t, *lcons_t, *var, *regex, *set, *cset, *wild, *oneplus;
 extern obj_t *zeroplus, *optional, *compound, *or;
 extern obj_t *skip, *trailer, *block, *next, *fail, *accept;
-extern obj_t *all, *some, *none, *maybe, *collect, *until, *coll;
-extern obj_t *output, *single, *frst, *lst, *empty, *repeat, *rep;
+extern obj_t *all, *some, *none, *maybe, *cases, *collect, *until, *coll;
+extern obj_t *define, *output, *single, *frst, *lst, *empty, *repeat, *rep;
 extern obj_t *flattn, *forget, *mrge, *bind, *cat, *dir;
 
 extern obj_t *zero, *one, *two, *negone, *maxint, *minint;
@@ -210,6 +210,7 @@ obj_t *consp(obj_t *obj);
 obj_t *nullp(obj_t *obj);
 obj_t *atom(obj_t *obj);
 obj_t *listp(obj_t *obj);
+obj_t *proper_listp(obj_t *obj);
 obj_t *length(obj_t *list);
 obj_t *num(long val);
 long c_num(obj_t *num);
@@ -226,6 +227,8 @@ obj_t *max2(obj_t *anum, obj_t *bnum);
 obj_t *min2(obj_t *anum, obj_t *bnum);
 obj_t *string(char *str);
 obj_t *mkstring(obj_t *len, obj_t *ch);
+obj_t *mkustring(obj_t *len); /* must initialize immediately with init_str! */
+obj_t *init_str(obj_t *str, const char *);
 obj_t *copy_str(obj_t *str);
 obj_t *stringp(obj_t *str);
 obj_t *length_str(obj_t *str);
@@ -239,7 +242,10 @@ obj_t *cat_str(obj_t *list, obj_t *sep);
 obj_t *trim_str(obj_t *str);
 obj_t *string_lt(obj_t *astr, obj_t *bstr);
 obj_t *chr(int ch);
+obj_t *chrp(obj_t *str);
 int c_chr(obj_t *chr);
+obj_t *chr_str(obj_t *str, obj_t *index);
+obj_t *chr_str_set(obj_t *str, obj_t *index, obj_t *chr);
 obj_t *sym_name(obj_t *sym);
 obj_t *make_sym(obj_t *name);
 obj_t *intern(obj_t *str);
@@ -282,6 +288,9 @@ void cobj_print_op(obj_t *, FILE *); /* Print function for struct cobj_ops */
 obj_t *assoc(obj_t *list, obj_t *key);
 obj_t *acons_new(obj_t *list, obj_t *key, obj_t *value);
 obj_t *alist_remove(obj_t *list, obj_t *keys);
+obj_t *alist_remove1(obj_t *list, obj_t *key);
+obj_t *copy_cons(obj_t *cons);
+obj_t *copy_alist(obj_t *list);
 obj_t *mapcar(obj_t *fun, obj_t *list);
 obj_t *mappend(obj_t *fun, obj_t *list);
 obj_t *sort(obj_t *list, obj_t *lessfun, obj_t *keyfun);
diff --git a/txr.1 b/txr.1
index 191def67..e1a67248 100644
--- a/txr.1
+++ b/txr.1
@@ -21,7 +21,7 @@
 .\"IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
 .\"WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 
-.TH txr 1 2009-09-09 "txr v. 013" "Text Extraction Utility"
+.TH txr 1 2009-09-09 "txr v. 014" "Text Extraction Utility"
 .SH NAME
 txr \- text extractor
 .SH SYNOPSIS
@@ -601,8 +601,9 @@ The general syntax of a directive is:
   @EXPR
 
 where expr is a parenthesized list of subexpressions. A subexpression
-is an symbol, number, regular expression, or a parenthesized expression.
-So, examples of valid directives are:
+is an symbol, number, string literal, character literal, regular expression, or
+a parenthesized expression.  So, examples of syntactically valid directives
+are:
 
   @(banana)
 
@@ -610,11 +611,18 @@ So, examples of valid directives are:
 
   @(  a (b (c d) (e  ) ))
 
+  @("apple" 'b' 3)
+
   @(a /[a-z]*/ b)
 
 A symbol is lexically the same thing as a variable and the same rules
 apply. Tokens that look like numbers are treated as numbers.
 
+String and character literals are delimited by double and single quotes,
+respectively, and may not span multiple lines. Character literals must contain
+exactly one character. Character and numeric escapes may be used within
+literals to escape the quotes, and to denote control characters.
+
 Some directives are involved in structuring the overall syntax of the query.
 
 There are syntactic constraints that depend on the directive.  For instance the
@@ -630,7 +638,7 @@ Continue matching in another file.
 .IP @(block)
 The remaining query is treated as an anonymous or named block.
 Blocks may be referenced by @(accept) and @(fail) directives.
-Blocks are discussed in the section Blocks below.
+Blocks are discussed in the section BLOCKS below.
 
 .IP @(skip)
 Treat the remaining query as a subquery unit, and search the lines of
@@ -651,7 +659,15 @@ Match some clauses in parallel. Each one must match.
 Match some clauses in parallel. None must match.
 
 .IP @(maybe)
-Match some clauses in parallel. None must match.
+Match some clauses in parallel, which may or may not match.
+No failure occurs if none match.
+
+.IP @(cases)
+Match some clauses sequentially, stopping if one of them
+matches successfully.
+
+.IP @(define NAME ( ARGUMENTS ...))
+Introduces a function. Functions are discussed in the FUNCTIONS section below.
 
 .IP @(collect)
 Search the data for multiple matches of a clause. Collect the
@@ -671,17 +687,17 @@ Separator of clauses for @(some), @(all), and @(none).
 Equivalent to @(and). Choice is stylistic.
 
 .IP @(end)
-Required terminator for @(some), @(all), @(none), @(maybe), @(collect),
-@(output), and @(repeat).
+Required terminator for @(some), @(all), @(none), @(maybe), @(cases),
+@(collect), @(output), and @(repeat).
 
 .IP @(fail)
 Terminate the processing of a block, as if it were a failed match.
-Blocks are discussed in the section Blocks below.
+Blocks are discussed in the section BLOCKS below.
 
 .IP @(accept)
 Terminate the processing of a block, as if it were a successful match.
 What bindings emerge may depend on the kind of block: collect
-has special semantics.  Blocks are discussed in the section Blocks below.
+has special semantics.  Blocks are discussed in the section BLOCKS below.
 
 .IP @(flatten)
 Normalizes a set of specified variables to one-dimensional lists. Those
@@ -890,7 +906,7 @@ line "a dark". The @(some) clause combines the text line "it",
 and a @(none) clause which contains just one clause consisting of
 the line "was".
 
-The semantics of the some, all, none and maybe directives is:
+The semantics of the some, all, none, maybe and cases directives is:
 
 .IP @(all)
 Each of the clauses is matched at the current position. If any of the
@@ -914,12 +930,20 @@ The directive succeeds even if all of the clauses fail.
 Whatever bindings are found in any of the clauses are
 retained.
 
-When a @(some) or @(all) directive matches successfully, or a @(maybe)
-directive matches something, the query advances by the greatest number of lines
-matched in any of the subclauses. For instance if there are two subclauses, and
-one of them matches three lines, but the other one matches five lines, then the
-overall clause is considered to have made a five line match at its position. If
-more directives follow, they begin matching five lines down from that position.
+.IP @(cases)
+The clauses are matched, in order, at the current position.
+If any clause matches, the matching stops and the bindings
+collected from that clause are retained. Any remaining clauses
+after that one are not processed. If no clause matches, the
+directive fails, and produces no bindings.
+
+When a @(some), @(all), or @(cases) directive matches successfully, or a
+@(maybe) directive matches in at least one of its clauses, the query advances
+by the greatest number of lines matched in any of the subclauses. For instance
+if there are two subclauses, and one of them matches three lines, but the other
+one matches five lines, then the overall clause is considered to have made a
+five line match at its position. If more directives follow, they begin matching
+five lines down from that position.
 
 .SS The Collect Directive
 
@@ -1253,6 +1277,15 @@ to match B, or the bind fails. Matching means that either
 - A and B are lists and are either identical, or one is
   found as substructure within the other.
 
+The right hand side does not have to be a variable. It may be some other
+object, like a string, or list of strings, et cetera. For instance
+
+  @(bind A "ab\tc")
+
+will bind the string "ab\tc" (the letter a, b, a tab character, and c)
+to the variable A if A is unbound. If A is bound, this will fail unless
+A already contains an identical string.
+
 The left hand side of a bind can be a nested list pattern containing variables.
 The last item of a list at any nesting level can be preceded by a dot, which
 means that the variable matches the rest of the list from that position.
@@ -1280,7 +1313,8 @@ The @(block NAME) directive introduces a named block, except when the name is
 the word nil.  The @(block) directive introduces an unnamed block, equivalent
 to @(block nil).
 
-The @(skip) and @(collect) directives introduce implicit anonymous blocks.
+The @(skip) and @(collect) directives introduce implicit anonymous blocks,
+as do function bodies.
 
 .SS Block Scope
 
@@ -1384,12 +1418,12 @@ that block until this point emerge from that block.
 .IP @(accept)
 
 Immediately terminate the innermost enclosing anonymous block, as if
-that block successfully mached. Any bindings established within
+that block successfully matched. Any bindings established within
 that block until this point emerge from that block.
 
 If the implicit block introduced by @(skip) is terminated in this manner,
 this has the effect of causing the skip itself to succeed, as if
-all of the trailing material succesfully matched.
+all of the trailing material successfully matched.
 
 If the implicit block associated with a @(collect)  is terminated this way,
 then the collection stops. All bindings collected in the current iteration of
@@ -1544,6 +1578,253 @@ The second clause grabs four lines, which is the longest match.
 And so, the next line of input available for matching is 5, which goes
 to the @second variable.
 
+.SH FUNCTIONS
+
+.SS Introduction
+
+.B txr
+functions allow a query to be structured to avoid repetition.
+On a theoretical note, because
+.B txr
+functions support recursion, functions enable txr to match some
+kinds of patterns which exhibit self-embedding, or nesting,
+and thus cannot be matched by a regular language.
+
+Functions in
+.B txr
+are not exactly like functions in mathematics or functional languages, and are
+not like procedures in imperative programming languages. They are not exactly
+like macros either. What it means for a
+.B txr
+function to take arguments and produce a result is different from
+the conventional notion of a function.
+
+A
+.B txr
+function may have one or more parameters. When such a function is invoked, an
+argument must be specified for each parameter.  However, a special behavior is
+at play here. Namely, some or all of the argument expressions may be unbound
+variables.  In that case, the corresponding parameters behave like unbound
+variables also.  Thus
+.B txr
+function calls can transmit the "unbound" state from argument to parameter.
+
+It should be mentioned that functions have access to all bindings that are
+visible in the caller; functions may refer to variables which are not
+mentioned in their parameter list.
+
+With regard to returning,
+.B txr
+functions are also unconventional. If the function fails, then the function
+call is considered to have failed. The function call behaves like a kind of
+match; if the function fails, then the call is like a failed match.
+
+When a function call succeeds, then the bindings emanating from that function
+are processed specially. Firstly, any bindings for variables which do not
+correspond to one of the function's parameters are thrown away. Functions may
+internally bind arbitrary variables in order to get their job done, but only
+those variables which are named in the function argument list may propagate out
+of the function call.  Thus, a function with no arguments can only indicate
+matching success or failure, but not produce any bindings. Secondly,
+variables do not propagate out of the function directly, but undergo
+a renaming. For each parameter which went into the function as an unbound
+variable (because its corresponding argument was an unbound variable),
+if that parameter now has a value, that value is bound onto the corresponding
+argument.
+
+Example:
+
+  @(define collect_words (list))
+  @(coll)@{list /[^ \t]/}@(end)
+  @(end)
+
+The above function "collect_words" contains a query which collects words from a
+line (sequences of characters other than space or tab), into the list variable
+called "list".  This variable is named in the parameter list of the function,
+therefore, its value, if it has one, is permitted to escape from the function
+call.
+
+Suppose the input data is:
+
+  Fine summer day
+
+and the function is called like this:
+
+  @(collect_words wordlist)
+
+The result is:
+
+  wordlist[0]=Fine
+  wordlist[1]=summer
+  wordlist[1]=day
+
+How it works is that in the function call @(collect_words wordlist),
+"wordlist" is an unbound variable. The parameter corresponding to that
+unbound variable is the parameter "list". Therefore, that parameter
+is unbound over the body of the function.  The function body collects the
+words of "Fine summer day" into the variable "list", and then
+yields the that binding.   Then the function call completes by
+noticing that the function parameter "list" now has a binding, and
+that the corresponding argument "wordlist" has no binding. The binding
+is thus transferred to the "wordlist" variable.  After that, the
+bindings produced by the function are thrown away. The only enduring
+effects are:
+
+.IP -
+the function matched and consumed some input; and
+
+.IP -
+the function succeeded; and
+
+.IP -
+the wordlist variable now has a binding.
+.PP
+
+Another way to understand the parameter behavior is that function
+parameters behave like proxies which represent their arguments.  If an argument
+is an established value, such as a character string or bound variable, the
+parameter is a proxy for that value and behaves just like that value. If an
+argument is an unbound variable, the function parameter acts as a proxy
+representing that unbound variable. The effect of binding the proxy is
+that the variable becomes bound, an effect which is settled when the
+function goes out of scope.
+
+Within the function, both the original variable and the proxy are
+visible simultaneously, and are independent.  What if a function binds both of
+them? Suppose a function has a parameter called P, which is called
+with an argument A, and then in the function @A and @P are bound.  This is
+permitted, and they can even be bound to different values.  However, when the
+function terminates, the local binding of A simply disappears (because,
+remember, the symbol A is not a member of the list of parameters).
+Only the value bound to P emerges, and is bound to A, which still appears
+unbound at that point.
+
+.SS Definition Syntax
+
+A function definition begins with a @(define ...) directive which must be the
+only element in its line. The define must be followed by a symbol, which is the
+name of the function being defined. After the symbol, there is a parenthesized
+optional argument list. If there is no such list, or if the list is specified
+as () or the symbol "nil" then the function has no parameters. Examples of
+valid define syntax are:
+
+  @(define foo)
+  @(define bar ())
+  @(define match (a b c))
+
+The define directive may be followed directly by the @(end) directive,
+also on a line by itself, in which case the function has an empty body.
+Or it may be followed by one or more query lines and then @(end).
+What is between a @(define ...) and its matching @(end) constitutes the
+function body.
+
+Functions may be nested within function bodies. Such local functions have
+dynamic scope. They are visible in the function body in which they are defined,
+and in any functions invoked from that body.
+
+The body of a function is an anonymous block. (See BLOCKS above).
+
+The following trivial function b produces no bindings and has a body which
+simply matches the line "begin".
+
+ @(define b)
+ begin
+ @(end)
+
+Thus the call:
+
+ @(b)
+
+matches an input line "begin".
+
+.SS Call Syntax
+
+A function is invoked by compound directive whose first symbol is the name of
+that function. Additional elements in the directive are the arguments.
+Arguments may be symbols, or other objects like string and character
+literals.
+
+Example:
+
+ Query:         @(define pair (a b))
+                @a @b
+                @(end)
+                @(pair first second)
+                @(pair "ice" cream)
+
+ Data:          one two
+                ice milk
+
+ Output:        first="one"
+                second="two"
+                cream="milk"
+
+The first call to the function takes the line "one two". The parameter "a"
+takes "one" and parameter b takes "two". These are rebound to the arguments
+first and second. The second call to the function binds the a parameter
+to the word "ice", and the b is unbound, because the
+corresponding argument "cream" is unbound. Thus inside the function, @a
+is forced to match "ice". Then a space is matched and @b collects the text
+"milk". When the function returns, the unbound "cream" variable gets this value.
+
+If a symbol occurs multiple times in the argument list, it constrains
+both parameters to bind to the same value. That is to say, all parameters
+which, in the body of the function, bind a value, and which are all derived
+from the same argument symbol must bind to the same value. This is settled when
+the function terminates, not while it is matching. Example:
+
+ Query:         @(define pair (a b))
+                @a @b
+                @(end)
+                @(pair same same)
+
+ Data:          one two
+
+ Output:        [query fails, prints "false"]
+
+.SS Nested Functions
+
+Function definitions may appear in a function. Such definitions
+are visible in all functions which are invoked from the body
+(and not necessarily enclosed in the body). In other words, the
+scope is dynamic, not lexical.  Inner definitions shadow outer
+definitions. This means that a caller can redirect the function
+calls that take place in a callee, by defining local functions
+which capture the references.
+
+Example:
+
+  Query:        @(define which)
+                @  (fun)
+                @(end)
+                @(define fun)
+                @  (output)
+                toplevel fun!
+                @  (end)
+                @(end)
+                @(define callee)
+                @  (define fun)
+                @    (output)
+                local fun!
+                @    (end)
+                @  (end)
+                @  (which)
+                @(end)
+                @(callee)
+                @(which)
+
+   Output:      local fun!
+                toplevel fun!
+
+Here, the function "which" is defined which calls "fun".
+A toplevel definition of "fun" is introduced which
+outputs "toplevel fun!". Then, within the func
+The function "callee" provides its own local definition
+of "fun" which outputs "local fun!" before calling "which".
+When callee is invoked, it calls @(which), whose @(fun) call is routed to
+callee's local definition.  When @(which) is called directly from the top
+level, its @(fun) call goes to the toplevel definition.
+
 .SH OUTPUT
 
 A
diff --git a/unwind.c b/unwind.c
index c573c16d..eb1490d4 100644
--- a/unwind.c
+++ b/unwind.c
@@ -30,10 +30,12 @@
 #include <setjmp.h>
 #include <dirent.h>
 #include "lib.h"
+#include "gc.h"
 #include "unwind.h"
 
 static uw_frame_t *uw_stack;
 static uw_frame_t *uw_exit_point;
+static uw_frame_t toplevel_env;
 
 static void uw_unwind_to_exit_point()
 {
@@ -49,9 +51,16 @@ static void uw_unwind_to_exit_point()
   case UW_BLOCK:
     longjmp(uw_stack->bl.jb, 1);
     break;
+  case UW_ENV: /* env frame cannot be exit point */
+    abort();
+  default:
+    abort();
   }
+}
 
-  abort();
+void uw_init(void)
+{
+  protect(&toplevel_env.ev.func_bindings, 0);
 }
 
 void uw_push_block(uw_frame_t *fr, obj_t *tag)
@@ -63,6 +72,46 @@ void uw_push_block(uw_frame_t *fr, obj_t *tag)
   uw_stack = fr;
 }
 
+static uw_frame_t *uw_find_env(void)
+{
+  uw_frame_t *fr;
+
+  for (fr = uw_stack; fr != 0; fr = fr->uw.up) {
+    if (fr->uw.type == UW_ENV)
+      break;
+  }
+
+  return fr ? fr : &toplevel_env;
+}
+
+void uw_push_env(uw_frame_t *fr)
+{
+  uw_frame_t *prev_env = uw_find_env();
+  fr->ev.type = UW_ENV;
+
+  if (prev_env) {
+    fr->ev.func_bindings = copy_alist(prev_env->ev.func_bindings);
+  } else {
+    fr->ev.func_bindings = nil;
+  }
+
+  fr->ev.up = uw_stack;
+  uw_stack = fr;
+}
+
+obj_t *uw_get_func(obj_t *sym)
+{
+  uw_frame_t *env = uw_find_env();
+  return cdr(assoc(env->ev.func_bindings, sym));
+}
+
+obj_t *uw_set_func(obj_t *sym, obj_t *value)
+{
+  uw_frame_t *env = uw_find_env();
+  env->ev.func_bindings = acons_new(env->ev.func_bindings, sym, value);
+  return value;
+}
+
 void uw_pop_frame(uw_frame_t *fr)
 {
   assert (fr == uw_stack);
diff --git a/unwind.h b/unwind.h
index 8863fdff..1cd0792a 100644
--- a/unwind.h
+++ b/unwind.h
@@ -27,7 +27,7 @@
 typedef union uw_frame uw_frame_t;
 typedef enum uw_frtype uw_frtype_t;
 
-enum uw_frtype { UW_BLOCK };
+enum uw_frtype { UW_BLOCK, UW_ENV };
 
 struct uw_common {
   uw_frame_t *up;
@@ -42,25 +42,46 @@ struct uw_block {
   jmp_buf jb;
 };
 
+struct uw_dynamic_env {
+  uw_frame_t *up;
+  uw_frtype_t type;
+  obj_t *func_bindings;
+};
+
 union uw_frame {
   struct uw_common uw;
   struct uw_block bl;
+  struct uw_dynamic_env ev;
 };
 
+void uw_init(void);
 void uw_push_block(uw_frame_t *, obj_t *tag);
+void uw_push_env(uw_frame_t *);
+obj_t *uw_get_func(obj_t *sym);
+obj_t *uw_set_func(obj_t *sym, obj_t *value);
 obj_t *uw_block_return(obj_t *tag, obj_t *result);
 void uw_pop_frame(uw_frame_t *);
 
+
 #define uw_block_begin(TAG, RESULTVAR)  \
   obj_t *RESULTVAR = nil;               \
   {                                     \
-    uw_frame_t uw_fr;                   \
-    uw_push_block(&uw_fr, TAG);         \
-    if (setjmp(uw_fr.bl.jb)) {          \
-      RESULTVAR = uw_fr.bl.result;      \
+    uw_frame_t uw_blk;                  \
+    uw_push_block(&uw_blk, TAG);        \
+    if (setjmp(uw_blk.bl.jb)) {         \
+      RESULTVAR = uw_blk.bl.result;     \
     } else  {
 
 #define uw_block_end                    \
     }                                   \
-    uw_pop_frame(&uw_fr);               \
+    uw_pop_frame(&uw_blk);              \
+  }
+
+#define uw_env_begin                    \
+  {                                     \
+    uw_frame_t uw_env;                  \
+    uw_push_env(&uw_env)
+
+#define uw_env_end                      \
+    uw_pop_frame(&uw_env);              \
   }