summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog100
-rw-r--r--extract.l220
-rw-r--r--extract.y279
-rw-r--r--lib.c145
-rw-r--r--lib.h13
-rw-r--r--txr.1319
-rw-r--r--unwind.c51
-rw-r--r--unwind.h33
8 files changed, 1011 insertions, 149 deletions
diff --git a/ChangeLog b/ChangeLog
index 5a8a5776..2461f007 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,103 @@
+2009-10-03 Kaz Kylheku <kkylheku@gmail.com>
+
+ Version 014
+
+ New cases directive.
+
+ New define directive: user-defined dynamically scoped functions.
+
+ String literals in bind and function calls.
+
+ EOF in the middle of a line handled properly.
+
+ * extract.l (version): Bump to 014.
+ * txr.1: Bump version to 014.
+
+
+2009-10-02 Kaz Kylheku <kkylheku@gmail.com>
+
+ New cases directive.
+
+ * extract.l (yybadtoken): Add case for CASES.
+ (grammar): Tokenize cases directive.
+ * extract.y (CASES): New token kind.
+ (cases_clause): New grammar symbol.
+ (grammar): Implement new grammar cases.
+ (match_files): Implement semantics for cases.
+ * lib.c (cases): New global.
+ (obj_init): Intern cases symbol.
+ * lib.h (cases): Declared.
+ * txr.1: Documented.
+
+2009-10-02 Kaz Kylheku <kkylheku@gmail.com>
+
+ Support for string and character literals.
+
+ * extract.l (char_esc): Support \' and \" escapes.
+ (STRLIT, CHRLIT): New flex start conditions.
+ (grammar): New rules for tokenizing string literals.
+ * extract.y (LITCHAR): New token kind.
+ (strlit, chrlit, litchars): New grammar symbols.
+ (grammar): Implement string literal parsing.
+ (dump_var): Support character objects, treating
+ them as one-character strings.
+ (eval_form): New function.
+ (match_files): In bind directive, allow the right
+ hand side to be an arbitrary object.
+ * lib.c (mkustring, init_str): New functions.
+ (cat_str): Allow characters in the mix, treating
+ them as one-character strings.
+ * lib.h (mkustring, init_str): Declared.
+ (chrp, chr_str, chr_str_set): New function.
+ * txr.1: Documented.
+
+2009-10-02 Kaz Kylheku <kkylheku@gmail.com>
+
+ Support for query-defined functions.
+
+ * extract.l (yybadtoken): New DEFINE case.
+ (NESTED): New flex start condition. This allows for different lexing
+ rules in nested lists, so even though for instance @(collect) is a
+ special token @((collect)) isn't.
+ (grammar): Refactored with NESTED.
+ Tokenize define directive.
+ * extract.y (define_transform): New function.
+ (DEFINE): New token kind.
+ (define_clause): New grammar symbol.
+ (match_files): Implement define semantics, and function calls.
+ * lib.c (define): New global.
+ * lib.h (define): Declared.
+ (proper_listp, alist_remove1, copy_cons, copy_alist): New functions.
+ (obj_init): Intern define symbol.
+ (init): Call new function uw_init.
+ * unwind.c (toplevel_env): New static structure.
+ (uw_unwind_to_exit_point): Support new UW_ENV frame type.
+ (uw_init, uw_find_env, uw_push_env, uw_get_func, uw_set_func): New
+ functions.
+ * unwind.h (UW_ENV): New enumeration member in uw_frtype.
+ (uw_dynamic_env): New struct.
+ (uw_block_begin, uw_block_end): Renamed some variables.
+ (uw_env_begin, uw_env_end): New macros.
+ * txr.1: Documented.
+
+2009-10-02 Kaz Kylheku <kkylheku@gmail.com>
+
+ Misc. bugfixes and improvements.
+
+ * extract.l (grammar): Newline in a directive
+ no longer an error. Why not allow it.
+ * extract.y (grammar): Productions for catching empty bodies in some
+ constructs now end with END newl, rather
+ than just END, so parsing can continue sanely.
+ (match_lines): In diagnostics, don't say "ignored" about material which
+ causes an error that fails the query!
+ * lib.c (mkstring): Initialize length since we know it!
+ (c_str): Take a symbol as an arg, so we don't have
+ to keep writing c_str(symbol_name(sym)).
+ (obj_print): Use isprint rather than isctrl to decide
+ whether to print a character as an escape.
+ (snarf_line): Properly handle EOF in the middle of line.
+
2009-09-29 Kaz Kylheku <kkylheku@gmail.com>
Version 013
diff --git a/extract.l b/extract.l
index 81dc91d9..ab041bb9 100644
--- a/extract.l
+++ b/extract.l
@@ -40,7 +40,7 @@
#define YY_NO_UNPUT
-const char *version = "013";
+const char *version = "014";
const char *progname = "txr";
const char *spec_file = "stdin";
long lineno = 1;
@@ -111,6 +111,7 @@ void yybadtoken(int tok, const char *context)
case SOME: problem = "\"some\""; break;
case NONE: problem = "\"none\""; break;
case MAYBE: problem = "\"maybe\""; break;
+ case CASES: problem = "\"cases\""; break;
case AND: problem = "\"and\""; break;
case OR: problem = "\"or\""; break;
case END: problem = "\"end\""; break;
@@ -124,6 +125,7 @@ void yybadtoken(int tok, const char *context)
case FIRST: problem = "\"first\""; break;
case LAST: problem = "\"last\""; break;
case EMPTY: problem = "\"empty\""; break;
+ case DEFINE: problem = "\"define\""; break;
case NUMBER: problem = "\"number\""; break;
case REGCHAR: problem = "regular expression character"; break;
}
@@ -151,6 +153,8 @@ static int char_esc(int letter)
case 'f': return '\f';
case 'r': return '\r';
case 'e': return 27;
+ case '"': return '"';
+ case '\'': return '\'';
}
abort();
@@ -172,34 +176,40 @@ static int num_esc(char *num)
%}
TOK [a-zA-Z_][a-zA-Z0-9_]*|[+-]?[0-9]+
+ID_END [^a-zA-Z0-9_]
+NUM_END [^0-9]
WS [\t ]*
-%x SPECIAL REGEX REGCLASS
+HEX [0-9A-Fa-f]
+OCT [0-7]
+
+%x SPECIAL NESTED REGEX REGCLASS STRLIT CHRLIT
%%
-<SPECIAL>{TOK} {
- long val;
- char *errp;
+<SPECIAL,NESTED>{TOK} {
+ long val;
+ char *errp;
- errno = 0;
+ errno = 0;
- val = strtol(yytext, &errp, 10);
+ val = strtol(yytext, &errp, 10);
- if (nesting == 0)
- BEGIN(INITIAL);
+ if (nesting == 0)
+ BEGIN(INITIAL);
- if (*errp != 0) {
- /* not a number */
- yylval.lexeme = strdup(yytext);
- return IDENT;
- }
+ if (*errp != 0) {
+ /* not a number */
+ yylval.lexeme = strdup(yytext);
+ return IDENT;
+ }
- if ((val == LONG_MAX || val == LONG_MIN) && errno == ERANGE)
- yyerror("numeric overflow in token");
+ if ((val == LONG_MAX || val == LONG_MIN)
+ && errno == ERANGE)
+ yyerror("numeric overflow in token");
- yylval.num = val;
- return NUMBER;
- }
+ yylval.num = val;
+ return NUMBER;
+ }
<SPECIAL>\({WS}all{WS}\) {
@@ -222,12 +232,17 @@ WS [\t ]*
return MAYBE;
}
+<SPECIAL>\({WS}cases{WS}\) {
+ BEGIN(INITIAL);
+ return CASES;
+ }
+
<SPECIAL>\({WS}and{WS}\) {
BEGIN(INITIAL);
return AND;
}
-<SPECIAL>\({WS}or{WS}\) {
+<SPECIAL>\({WS}or{WS}\) {
BEGIN(INITIAL);
return OR;
}
@@ -288,54 +303,74 @@ WS [\t ]*
return EMPTY;
}
-<SPECIAL>\{|\( {
- nesting++;
- if (yytext[0] == '{')
- closechar = '}';
- else
- closechar = ')';
- return yytext[0];
- }
+<SPECIAL>\({WS}define/{ID_END} {
+ nesting++;
+ closechar = ')';
+ BEGIN(NESTED);
+ return DEFINE;
+ }
-<SPECIAL>\}|\) {
- if (yytext[0] != closechar) {
- yyerror("paren mismatch");
- BEGIN(INITIAL);
- } else {
- if (--nesting == 0)
- BEGIN(INITIAL);
- return yytext[0];
- }
- }
+<SPECIAL,NESTED>\{|\( {
+ nesting++;
+ if (yytext[0] == '{')
+ closechar = '}';
+ else
+ closechar = ')';
+ BEGIN(NESTED);
+ return yytext[0];
+ }
-<SPECIAL>[\t ]+ {
- /* Eat whitespace in directive */
- }
+<SPECIAL,NESTED>\}|\) {
+ if (yytext[0] != closechar) {
+ yyerror("paren mismatch");
+ BEGIN(INITIAL);
+ } else {
+ switch (--nesting) {
+ case 1:
+ BEGIN(SPECIAL);
+ break;
+ case 0:
+ BEGIN(INITIAL);
+ break;
+ }
+
+ return yytext[0];
+ }
+ }
+
+<SPECIAL,NESTED>[\t ]+ { /* Eat whitespace in directive */ }
+
+<SPECIAL,NESTED>\" {
+ BEGIN(STRLIT);
+ return '"';
+ }
+
+<SPECIAL,NESTED>\' {
+ BEGIN(CHRLIT);
+ return '\'';
+ }
<SPECIAL>@ {
if (nesting == 0) {
BEGIN(INITIAL);
yylval.lexeme = strdup("@");
return TEXT;
- } else {
- yyerrorf(0, "bad character in directive: %c", yytext[0]);
}
}
-<SPECIAL>\n {
- lineno++;
- yyerror("newline in directive");
- }
+<SPECIAL,NESTED>\n {
+ lineno++;
+ }
-<SPECIAL>[/] {
- BEGIN(REGEX);
- return '/';
- }
+<SPECIAL,NESTED>[/] {
+ BEGIN(REGEX);
+ return '/';
+ }
-<SPECIAL>\. {
- yylval.chr = '.';
- return '.';
- }
+<SPECIAL,NESTED>\. {
+ yylval.chr = '.';
+ return '.';
+ }
<SPECIAL>[\\][abtnvfre] {
char lexeme[2];
@@ -346,24 +381,25 @@ WS [\t ]*
return TEXT;
}
-<SPECIAL>[\\](x[0-9a-fA-F]+|[0-7]+) {
- char lexeme[2];
- lexeme[0] = num_esc(yytext + 1);
- lexeme[1] = 0;
- yylval.lexeme = strdup(lexeme);
- BEGIN(INITIAL);
- return TEXT;
- }
+<SPECIAL>[\\](x{HEX}+|{OCT}+) {
+ char lexeme[2];
+ lexeme[0] = num_esc(yytext + 1);
+ lexeme[1] = 0;
+ yylval.lexeme = strdup(lexeme);
+ BEGIN(INITIAL);
+ return TEXT;
+ }
-<SPECIAL>. {
- yyerrorf(0, "bad character in directive: '%c'", yytext[0]);
- }
+<SPECIAL,NESTED>. {
+ yyerrorf(0, "bad character in directive: '%c'",
+ yytext[0]);
+ }
<REGEX>[/] {
if (nesting == 0)
BEGIN(INITIAL);
else
- BEGIN(SPECIAL);
+ BEGIN(NESTED);
yylval.chr = '/';
return '/';
}
@@ -374,10 +410,10 @@ WS [\t ]*
return REGCHAR;
}
-<REGEX>[\\](x[0-9a-fA-F]+|[0-9]+) {
- yylval.chr = num_esc(yytext + 1);
- return REGCHAR;
- }
+<REGEX>[\\](x{HEX}+|{OCT}+) {
+ yylval.chr = num_esc(yytext + 1);
+ return REGCHAR;
+ }
<REGEX>\n {
lineno++;
@@ -438,6 +474,48 @@ WS [\t ]*
/* comment to end of line */
}
+<STRLIT>\" {
+ if (nesting == 0)
+ BEGIN(INITIAL);
+ else
+ BEGIN(NESTED);
+ return '"';
+ }
+
+<CHRLIT>\' {
+ if (nesting == 0)
+ BEGIN(INITIAL);
+ else
+ BEGIN(NESTED);
+ return '\'';
+ }
+
+<STRLIT,CHRLIT>[\\][abtnvfre] {
+ yylval.chr = char_esc(yytext[1]);
+ return LITCHAR;
+ }
+
+<STRLIT,CHRLIT>[\\](x{HEX}+|{OCT}+) {
+ yylval.chr = num_esc(yytext + 1);
+ return LITCHAR;
+ }
+<STRLIT>\n {
+ yyerror("newline in string literal");
+ lineno++;
+ yylval.chr = yytext[0];
+ return LITCHAR;
+ }
+<CHRLIT>\n {
+ yyerror("newline in character literal");
+ lineno++;
+ yylval.chr = yytext[0];
+ return LITCHAR;
+ }
+<STRLIT,CHRLIT>. {
+ yylval.chr = yytext[0];
+ return LITCHAR;
+ }
+
%%
void help(void)
diff --git a/extract.y b/extract.y
index 54dbb747..5ac61b2b 100644
--- a/extract.y
+++ b/extract.y
@@ -45,6 +45,7 @@ int yylex(void);
void yyerror(const char *);
obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts);
+obj_t *define_transform(obj_t *define_form);
static obj_t *parsed_spec;
static int output_produced;
@@ -58,23 +59,23 @@ static int output_produced;
long num;
}
-%token <lexeme> TEXT IDENT ALL SOME NONE MAYBE AND OR END COLLECT UNTIL COLL
-%token <lexeme> OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY
+%token <lexeme> TEXT IDENT ALL SOME NONE MAYBE CASES AND OR END COLLECT
+%token <lexeme> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE
%token <num> NUMBER
-%token <chr> REGCHAR
+%token <chr> REGCHAR LITCHAR
%type <obj> spec clauses clause all_clause some_clause none_clause maybe_clause
-%type <obj> collect_clause clause_parts additional_parts output_clause
-%type <obj> line elems_opt elems elem var var_op list exprs expr
-%type <obj> out_clauses out_clauses_opt out_clause
+%type <obj> cases_clause collect_clause clause_parts additional_parts
+%type <obj> output_clause define_clause line elems_opt elems elem var var_op
+%type <obj> list exprs expr out_clauses out_clauses_opt out_clause
%type <obj> repeat_clause repeat_parts_opt o_line
%type <obj> o_elems_opt o_elems_opt2 o_elems o_elem rep_elem rep_parts_opt
%type <obj> regex regexpr regbranch
%type <obj> regterm regclass regclassterm regrange
+%type <obj> strlit chrlit litchars
%type <chr> regchar
-
-%nonassoc ALL SOME NONE MAYBE AND OR END COLLECT UNTIL COLL
-%nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY
+%nonassoc ALL SOME NONE MAYBE CASES AND OR END COLLECT UNTIL COLL
+%nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY DEFINE
%nonassoc '{' '}' '[' ']' '(' ')'
%right IDENT TEXT NUMBER
%left '|' '/'
@@ -97,7 +98,10 @@ clause : all_clause { $$ = list(num(lineno - 1), $1, nao); }
| some_clause { $$ = list(num(lineno - 1), $1, nao); }
| none_clause { $$ = list(num(lineno - 1), $1, nao); }
| maybe_clause { $$ = list(num(lineno - 1), $1, nao); }
+ | cases_clause { $$ = list(num(lineno - 1), $1, nao); }
| collect_clause { $$ = list(num(lineno - 1), $1, nao); }
+ | define_clause { $$ = list(num(lineno - 1),
+ define_transform($1), nao); }
| output_clause { $$ = list(num(lineno - 1), $1, nao); }
| line { $$ = $1; }
| repeat_clause { $$ = nil;
@@ -108,7 +112,7 @@ all_clause : ALL newl clause_parts { $$ = cons(all, $3); }
| ALL newl error { $$ = nil;
yybadtoken(yychar,
"all clause"); }
- | ALL newl END { $$ = nil;
+ | ALL newl END newl { $$ = nil;
yyerror("empty all clause"); }
;
@@ -117,7 +121,7 @@ some_clause : SOME newl clause_parts { $$ = cons(some, $3); }
| SOME newl error { $$ = nil;
yybadtoken(yychar,
"some clause"); }
- | SOME newl END { $$ = nil;
+ | SOME newl END newl { $$ = nil;
yyerror("empty some clause"); }
;
@@ -125,7 +129,7 @@ none_clause : NONE newl clause_parts { $$ = cons(none, $3); }
| NONE newl error { $$ = nil;
yybadtoken(yychar,
"none clause"); }
- | NONE newl END { $$ = nil;
+ | NONE newl END newl { $$ = nil;
yyerror("empty none clause"); }
;
@@ -133,10 +137,18 @@ maybe_clause : MAYBE newl clause_parts { $$ = cons(maybe, $3); }
| MAYBE newl error { $$ = nil;
yybadtoken(yychar,
"maybe clause"); }
- | MAYBE newl END { $$ = nil;
+ | MAYBE newl END newl { $$ = nil;
yyerror("empty maybe clause"); }
;
+cases_clause : CASES newl clause_parts { $$ = cons(cases, $3); }
+ | CASES newl error { $$ = nil;
+ yybadtoken(yychar,
+ "cases clause"); }
+ | CASES newl END newl { $$ = nil;
+ yyerror("empty cases clause"); }
+ ;
+
collect_clause : COLLECT newl clauses END newl { $$ = list(collect, $3, nao); }
| COLLECT newl clauses
UNTIL newl clauses END newl { $$ = list(collect, $3,
@@ -181,6 +193,23 @@ elem : TEXT { $$ = string($1); }
yybadtoken(yychar, "coll clause"); }
;
+define_clause : DEFINE exprs ')' newl
+ clauses
+ END newl { $$ = list(define, $2, $5, nao); }
+ | DEFINE ')' newl
+ clauses
+ END newl { $$ = list(define, nil, $4, nao); }
+ | DEFINE exprs ')' newl
+ END newl { $$ = list(define, $2, nao); }
+ | DEFINE ')' newl
+ END newl { $$ = list(define, nao); }
+ | DEFINE error { yybadtoken(yychar, "list expression"); }
+ | DEFINE exprs ')' newl
+ error { yybadtoken(yychar, "define"); }
+ | DEFINE ')' newl
+ error { yybadtoken(yychar, "define"); }
+ ;
+
output_clause : OUTPUT o_elems '\n'
out_clauses
END newl { $$ = list(output, $4, $2, nao); }
@@ -209,8 +238,12 @@ out_clause : repeat_clause { $$ = list(num(lineno - 1), $1, nao); }
yyerror("match clause in output"); }
| maybe_clause { $$ = nil;
yyerror("match clause in output"); }
+ | cases_clause { $$ = nil;
+ yyerror("match clause in output"); }
| collect_clause { $$ = nil;
yyerror("match clause in output"); }
+ | define_clause { $$ = nil;
+ yyerror("match clause in output"); }
| output_clause { $$ = nil;
yyerror("match clause in output"); }
;
@@ -324,6 +357,8 @@ expr : IDENT { $$ = intern(string($1)); }
| NUMBER { $$ = num($1); }
| list { $$ = $1; }
| regex { $$ = cons(regex_compile($1), $1); }
+ | chrlit { $$ = $1; }
+ | strlit { $$ = $1; }
;
regex : '/' regexpr '/' { $$ = $2; }
@@ -384,6 +419,36 @@ newl : '\n'
yyerrok; }
;
+strlit : '"' '"' { $$ = null_string; }
+ | '"' litchars '"' {
+ if ($2) {
+ obj_t *len = length($2), *iter, *ix;
+ $$ = mkustring(len);
+ for (iter = $2, ix = zero;
+ iter;
+ iter = cdr(iter), ix = plus(ix, one))
+ {
+ chr_str_set($$, ix, car(iter));
+ }
+ } else {
+ $$ = nil;
+ }
+ }
+ | '"' error { yybadtoken(yychar, "string literal"); }
+ ;
+
+chrlit : '\'' '\'' { yyerror("empty character literal");
+ $$ = nil; }
+ | '\'' litchars '\'' { $$ = car($2);
+ if (cdr($2))
+ yyerror("multiple characters in "
+ "character literal"); }
+ | '\'' error { yybadtoken(yychar, "character literal"); }
+ ;
+
+litchars : LITCHAR { $$ = cons(chr($1), nil); }
+ | LITCHAR litchars { $$ = cons(chr($1), $2); }
+ ;
%%
obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts)
@@ -415,6 +480,45 @@ obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts)
last_parts, empty_parts, nao);
}
+obj_t *define_transform(obj_t *define_form)
+{
+ obj_t *sym = first(define_form);
+ obj_t *args = second(define_form);
+
+ if (define_form == nil)
+ return nil;
+
+ assert (sym == define);
+
+ if (args == nil) {
+ yyerror("define requires arguments");
+ return define_form;
+ }
+
+ if (!consp(args) || !listp(cdr(args))) {
+ yyerror("bad define argument syntax");
+ return define_form;
+ } else {
+ obj_t *name = first(args);
+ obj_t *params = second(args);
+
+ if (!symbolp(name)) {
+ yyerror("function name must be a symbol");
+ return define_form;
+ }
+
+ if (!proper_listp(params)) {
+ yyerror("invalid function parameter list");
+ return define_form;
+ }
+
+ if (!all_satisfy(params, func_n1(symbolp), nil))
+ yyerror("function parameters must be symbols");
+ }
+
+ return define_form;
+}
+
obj_t *get_spec(void)
{
return parsed_spec;
@@ -443,12 +547,19 @@ void dump_var(const char *name, char *pfx1, size_t len1,
if (len1 >= 112 || len2 >= 112)
abort();
- if (stringp(value)) {
+ if (stringp(value) || chrp(value)) {
fputs(name, stdout);
fputs(pfx1, stdout);
fputs(pfx2, stdout);
putchar('=');
- dump_shell_string(c_str(value));
+ if (stringp(value)) {
+ dump_shell_string(c_str(value));
+ } else {
+ char mini[2];
+ mini[0] = c_chr(value);
+ mini[1] = 0;
+ dump_shell_string(mini);
+ }
putchar('\n');
} else {
obj_t *iter;
@@ -572,6 +683,13 @@ obj_t *dest_bind(obj_t *bindings, obj_t *pattern, obj_t *value)
return bindings;
}
+obj_t *eval_form(obj_t *form, obj_t *bindings)
+{
+ if (symbolp(form))
+ return assoc(bindings, form);
+ return cons(t, form);
+}
+
obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline,
obj_t *pos, obj_t *spec_lineno, obj_t *data_lineno,
obj_t *file)
@@ -1192,7 +1310,7 @@ repeat_spec_same_data:
long reps = 0;
if (rest(specline))
- yyerrorlf(1, spec_lineno, "material after skip directive ignored");
+ yyerrorlf(1, spec_lineno, "unexpected material after skip directive");
if ((spec = rest(spec)) == nil)
break;
@@ -1229,7 +1347,7 @@ repeat_spec_same_data:
return nil;
} else if (sym == trailer) {
if (rest(specline))
- yyerrorlf(1, spec_lineno, "material after trailer directive ignored");
+ yyerrorlf(1, spec_lineno, "unexpected material after trailer directive");
if ((spec = rest(spec)) == nil)
break;
@@ -1245,7 +1363,7 @@ repeat_spec_same_data:
} else if (sym == block) {
obj_t *name = first(rest(first_spec));
if (rest(specline))
- yyerrorlf(1, spec_lineno, "material after block directive ignored");
+ yyerrorlf(1, spec_lineno, "unexpected material after block directive");
if ((spec = rest(spec)) == nil)
break;
uw_block_begin(name, result);
@@ -1256,7 +1374,7 @@ repeat_spec_same_data:
obj_t *target = first(rest(first_spec));
if (rest(specline))
- yyerrorlf(1, spec_lineno, "material after %s ignored",
+ yyerrorlf(1, spec_lineno, "unexpected material after %s",
c_str(symbol_name(sym)));
uw_block_return(target,
@@ -1302,7 +1420,9 @@ repeat_spec_same_data:
if3(data, cons(data, num(data_lineno)), t));
return nil;
}
- } else if (sym == some || sym == all || sym == none || sym == maybe) {
+ } else if (sym == some || sym == all || sym == none || sym == maybe ||
+ sym == cases)
+ {
obj_t *specs;
obj_t *all_match = t;
obj_t *some_match = nil;
@@ -1331,6 +1451,8 @@ repeat_spec_same_data:
max_data = new_data;
}
}
+ if (sym == cases)
+ break;
} else {
all_match = nil;
}
@@ -1341,8 +1463,8 @@ repeat_spec_same_data:
return nil;
}
- if (sym == some && !some_match) {
- yyerrorlf(2, spec_lineno, "some: no clauses matched");
+ if ((sym == some || sym == cases) && !some_match) {
+ yyerrorlf(2, spec_lineno, "some/cases: no clauses matched");
return nil;
}
@@ -1514,15 +1636,13 @@ repeat_spec_same_data:
} else if (sym == bind) {
obj_t *args = rest(first_spec);
obj_t *pattern = first(args);
- obj_t *var = second(args);
- obj_t *lookup = assoc(bindings, var);
+ obj_t *form = second(args);
+ obj_t *val = eval_form(form, bindings);
- if (!var || !symbolp(var))
- yyerrorlf(1, spec_lineno, "bind: bad variable spec");
- else if (!lookup)
- yyerrorlf(1, spec_lineno, "bind: unbound source variable");
+ if (!val)
+ yyerrorlf(1, spec_lineno, "bind: unbound variable on right side");
- bindings = dest_bind(bindings, pattern, cdr(lookup));
+ bindings = dest_bind(bindings, pattern, cdr(val));
if (bindings == t)
return nil;
@@ -1581,6 +1701,107 @@ repeat_spec_same_data:
break;
goto repeat_spec_same_data;
+ } else if (sym == define) {
+ obj_t *args = second(first_spec);
+ obj_t *body = third(first_spec);
+ obj_t *name = first(args);
+ obj_t *params = second(args);
+
+ if (rest(specline))
+ yyerrorlf(1, spec_lineno, "unexpected material after define");
+
+ uw_set_func(name, cons(params, body));
+
+ if ((spec = rest(spec)) == nil)
+ break;
+
+ goto repeat_spec_same_data;
+ } else {
+ obj_t *func = uw_get_func(sym);
+
+ if (func) {
+ obj_t *args = rest(first_spec);
+ obj_t *params = car(func);
+ obj_t *body = cdr(func);
+ obj_t *piter, *aiter;
+ obj_t *bindings_cp = copy_alist(bindings);
+
+ if (!equal(length(args), length(params))) {
+ yyerrorlf(1, spec_lineno, "function %s takes %ld argument(s)",
+ c_str(sym), c_num(length(params)));
+ return nil;
+ }
+
+ for (piter = params, aiter = args; piter;
+ piter = cdr(piter), aiter = cdr(aiter))
+ {
+ obj_t *param = car(piter);
+ obj_t *arg = car(aiter);
+
+ if (symbolp(arg)) {
+ obj_t *existing = assoc(bindings, arg);
+ if (existing) {
+ bindings_cp = acons_new(bindings_cp,
+ param,
+ cdr(existing));
+ } else {
+ bindings_cp = alist_remove(bindings_cp, cons(param, nil));
+ }
+ } else {
+ bindings_cp = acons_new(bindings_cp, param, arg);
+ }
+ }
+
+ {
+ uw_block_begin(nil, result);
+ uw_env_begin;
+ result = match_files(body, files, bindings_cp,
+ data, num(data_lineno));
+ uw_env_end;
+ uw_block_end;
+
+ if (!result) {
+ yyerrorlf(2, spec_lineno, "function failed");
+ return nil;
+ }
+
+ {
+ cons_bind (new_bindings, success, result);
+
+ for (piter = params, aiter = args; piter;
+ piter = cdr(piter), aiter = cdr(aiter))
+ {
+ obj_t *param = car(piter);
+ obj_t *arg = car(aiter);
+
+ if (symbolp(arg)) {
+ obj_t *newbind = assoc(new_bindings, param);
+ if (newbind) {
+ bindings = dest_bind(bindings, arg, cdr(newbind));
+ if (bindings == t)
+ return nil;
+ }
+ }
+ }
+
+ if (consp(success)) {
+ yyerrorlf(2, spec_lineno,
+ "function matched; advancing from line %ld to %ld",
+ data_lineno, c_num(cdr(success)));
+ data = car(success);
+ data_lineno = c_num(cdr(success));
+ } else {
+ yyerrorlf(2, spec_lineno, "function consumed entire file");
+ data = nil;
+ }
+ }
+ }
+
+ if ((spec = rest(spec)) == nil)
+ break;
+
+ goto repeat_spec_same_data;
+ }
}
}
diff --git a/lib.c b/lib.c
index 2a3ee7f7..bce4d088 100644
--- a/lib.c
+++ b/lib.c
@@ -32,8 +32,10 @@
#include <limits.h>
#include <stdarg.h>
#include <dirent.h>
+#include <setjmp.h>
#include "lib.h"
#include "gc.h"
+#include "unwind.h"
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min(a, b) ((a) < (b) ? (a) : (b))
@@ -44,8 +46,8 @@ obj_t *null, *t, *cons_t, *str_t, *chr_t, *num_t, *sym_t, *fun_t, *vec_t;
obj_t *stream_t, *lcons_t, *var, *regex, *set, *cset, *wild, *oneplus;
obj_t *zeroplus, *optional, *compound, *or;
obj_t *skip, *trailer, *block, *next, *fail, *accept;
-obj_t *all, *some, *none, *maybe, *collect, *until, *coll;
-obj_t *output, *single, *frst, *lst, *empty, *repeat, *rep;
+obj_t *all, *some, *none, *maybe, *cases, *collect, *until, *coll;
+obj_t *define, *output, *single, *frst, *lst, *empty, *repeat, *rep;
obj_t *flattn, *forget, *mrge, *bind, *cat, *dir;
obj_t *zero, *one, *two, *negone, *maxint, *minint;
@@ -506,6 +508,14 @@ obj_t *listp(obj_t *obj)
? t : nil;
}
+obj_t *proper_listp(obj_t *obj)
+{
+ while (consp(obj))
+ obj = cdr(obj);
+
+ return (obj == nil) ? t : nil;
+}
+
obj_t *length(obj_t *list)
{
long len = 0;
@@ -616,9 +626,25 @@ obj_t *string(char *str)
obj_t *mkstring(obj_t *len, obj_t *ch)
{
char *str = chk_malloc(c_num(len) + 1);
+ obj_t *s = string(str);
memset(str, c_chr(ch), c_num(len));
str[c_num(len)] = 0;
- return string(str);
+ s->st.len = len;
+ return s;
+}
+
+obj_t *mkustring(obj_t *len)
+{
+ char *str = chk_malloc(c_num(len) + 1);
+ obj_t *s = string(str);
+ s->st.len = len;
+ return s;
+}
+
+obj_t *init_str(obj_t *str, const char *data)
+{
+ memcpy(str->st.str, data, c_num(str->st.len));
+ return str;
}
obj_t *copy_str(obj_t *str)
@@ -639,10 +665,18 @@ obj_t *length_str(obj_t *str)
return str->st.len;
}
-const char *c_str(obj_t *str)
+const char *c_str(obj_t *obj)
{
- assert (str && str->t.type == STR);
- return str->st.str;
+ assert (obj);
+
+ switch (obj->t.type) {
+ case STR:
+ return obj->st.str;
+ case SYM:
+ return c_str(symbol_name(obj));
+ default:
+ abort();
+ }
}
obj_t *search_str(obj_t *haystack, obj_t *needle, obj_t *start_num,
@@ -721,11 +755,19 @@ obj_t *cat_str(obj_t *list, obj_t *sep)
obj_t *item = car(iter);
if (!item)
continue;
- if (!stringp(item))
- return nil;
- total += c_num(length_str(item));
- if (len_sep && cdr(iter))
- total += len_sep;
+ if (stringp(item)) {
+ total += c_num(length_str(item));
+ if (len_sep && cdr(iter))
+ total += len_sep;
+ continue;
+ }
+ if (chrp(item)) {
+ total += 1;
+ if (len_sep && cdr(iter))
+ total += len_sep;
+ continue;
+ }
+ return nil;
}
str = chk_malloc(total + 1);
@@ -735,9 +777,14 @@ obj_t *cat_str(obj_t *list, obj_t *sep)
long len;
if (!item)
continue;
- len = c_num(length_str(item));
- memcpy(ptr, c_str(item), len);
- ptr += len;
+ if (stringp(item)) {
+ len = c_num(length_str(item));
+ memcpy(ptr, c_str(item), len);
+ ptr += len;
+ } else {
+ *ptr++ = c_chr(item);
+ }
+
if (len_sep && cdr(iter)) {
memcpy(ptr, c_str(sep), len_sep);
ptr += len_sep;
@@ -784,12 +831,41 @@ obj_t *chr(int ch)
return obj;
}
+obj_t *chrp(obj_t *chr)
+{
+ return (chr && chr->st.type == CHR) ? t : nil;
+}
+
int c_chr(obj_t *chr)
{
assert (chr && chr->t.type == CHR);
return chr->ch.ch;
}
+obj_t *chr_str(obj_t *str, obj_t *index)
+{
+ long l = c_num(length_str(str));
+ long i = c_num(index);
+ const char *s = c_str(str);
+
+ assert (i < l);
+
+ return chr(s[i]);
+}
+
+obj_t *chr_str_set(obj_t *str, obj_t *index, obj_t *chr)
+{
+ long l = c_num(length_str(str));
+ long i = c_num(index);
+ char *s = str->st.str;
+
+ assert (i < l);
+
+ s[i] = c_chr(chr);
+
+ return chr;
+}
+
obj_t *sym_name(obj_t *sym)
{
assert (sym && sym->t.type == SYM);
@@ -1372,6 +1448,30 @@ obj_t *alist_remove(obj_t *list, obj_t *keys)
return list;
}
+obj_t *alist_remove1(obj_t *list, obj_t *key)
+{
+ obj_t **plist = &list;
+
+ while (*plist) {
+ if (eq(car(car(*plist)), key))
+ *plist = cdr(*plist);
+ else
+ plist = cdr_l(*plist);
+ }
+
+ return list;
+}
+
+obj_t *copy_cons(obj_t *c)
+{
+ return cons(car(c), cdr(c));
+}
+
+obj_t *copy_alist(obj_t *list)
+{
+ return mapcar(func_n1(copy_cons), list);
+}
+
obj_t *mapcar(obj_t *fun, obj_t *list)
{
list_collect_decl (out, iter);
@@ -1512,9 +1612,11 @@ static void obj_init(void)
some = intern(string(strdup("some")));
none = intern(string(strdup("none")));
maybe = intern(string(strdup("maybe")));
+ cases = intern(string(strdup("cases")));
collect = intern(string(strdup("collect")));
until = intern(string(strdup("until")));
coll = intern(string(strdup("coll")));
+ define = intern(string(strdup("define")));
output = intern(string(strdup("output")));
single = intern(string(strdup("single")));
frst = intern(string(strdup("first")));
@@ -1591,10 +1693,10 @@ void obj_print(obj_t *obj, FILE *out)
case '\\': fputs("\\\\", out); break;
case 27: fputs("\\e", out); break;
default:
- if (iscntrl(*ptr))
- fprintf(out, "\\%03o", (int) *ptr);
- else
+ if (isprint(*ptr))
putc(*ptr, out);
+ else
+ fprintf(out, "\\%03o", (int) *ptr);
}
}
putc('"', out);
@@ -1617,10 +1719,10 @@ void obj_print(obj_t *obj, FILE *out)
case '\\': fputs("\\\\", out); break;
case 27: fputs("\\e", out); break;
default:
- if (iscntrl(ch))
- fprintf(out, "\\%03o", ch);
- else
+ if (isprint(ch))
putc(ch, out);
+ else
+ fprintf(out, "\\%03o", ch);
}
putc('\'', out);
}
@@ -1683,6 +1785,7 @@ void init(const char *pn, void *(*oom)(void *, size_t),
? max(maybe_bottom_0, maybe_bottom_1)
: min(maybe_bottom_0, maybe_bottom_1));
+ uw_init();
obj_init();
}
@@ -1721,7 +1824,7 @@ char *snarf_line(FILE *in)
size = newsize;
}
- if (ch == '\n') {
+ if (ch == '\n' || ch == EOF) {
buf[fill++] = 0;
break;
}
diff --git a/lib.h b/lib.h
index 36081ad8..20cb2a77 100644
--- a/lib.h
+++ b/lib.h
@@ -159,8 +159,8 @@ extern obj_t *t, *cons_t, *str_t, *chr_t, *num_t, *sym_t, *fun_t, *vec_t;
extern obj_t *stream_t, *lcons_t, *var, *regex, *set, *cset, *wild, *oneplus;
extern obj_t *zeroplus, *optional, *compound, *or;
extern obj_t *skip, *trailer, *block, *next, *fail, *accept;
-extern obj_t *all, *some, *none, *maybe, *collect, *until, *coll;
-extern obj_t *output, *single, *frst, *lst, *empty, *repeat, *rep;
+extern obj_t *all, *some, *none, *maybe, *cases, *collect, *until, *coll;
+extern obj_t *define, *output, *single, *frst, *lst, *empty, *repeat, *rep;
extern obj_t *flattn, *forget, *mrge, *bind, *cat, *dir;
extern obj_t *zero, *one, *two, *negone, *maxint, *minint;
@@ -210,6 +210,7 @@ obj_t *consp(obj_t *obj);
obj_t *nullp(obj_t *obj);
obj_t *atom(obj_t *obj);
obj_t *listp(obj_t *obj);
+obj_t *proper_listp(obj_t *obj);
obj_t *length(obj_t *list);
obj_t *num(long val);
long c_num(obj_t *num);
@@ -226,6 +227,8 @@ obj_t *max2(obj_t *anum, obj_t *bnum);
obj_t *min2(obj_t *anum, obj_t *bnum);
obj_t *string(char *str);
obj_t *mkstring(obj_t *len, obj_t *ch);
+obj_t *mkustring(obj_t *len); /* must initialize immediately with init_str! */
+obj_t *init_str(obj_t *str, const char *);
obj_t *copy_str(obj_t *str);
obj_t *stringp(obj_t *str);
obj_t *length_str(obj_t *str);
@@ -239,7 +242,10 @@ obj_t *cat_str(obj_t *list, obj_t *sep);
obj_t *trim_str(obj_t *str);
obj_t *string_lt(obj_t *astr, obj_t *bstr);
obj_t *chr(int ch);
+obj_t *chrp(obj_t *str);
int c_chr(obj_t *chr);
+obj_t *chr_str(obj_t *str, obj_t *index);
+obj_t *chr_str_set(obj_t *str, obj_t *index, obj_t *chr);
obj_t *sym_name(obj_t *sym);
obj_t *make_sym(obj_t *name);
obj_t *intern(obj_t *str);
@@ -282,6 +288,9 @@ void cobj_print_op(obj_t *, FILE *); /* Print function for struct cobj_ops */
obj_t *assoc(obj_t *list, obj_t *key);
obj_t *acons_new(obj_t *list, obj_t *key, obj_t *value);
obj_t *alist_remove(obj_t *list, obj_t *keys);
+obj_t *alist_remove1(obj_t *list, obj_t *key);
+obj_t *copy_cons(obj_t *cons);
+obj_t *copy_alist(obj_t *list);
obj_t *mapcar(obj_t *fun, obj_t *list);
obj_t *mappend(obj_t *fun, obj_t *list);
obj_t *sort(obj_t *list, obj_t *lessfun, obj_t *keyfun);
diff --git a/txr.1 b/txr.1
index 191def67..e1a67248 100644
--- a/txr.1
+++ b/txr.1
@@ -21,7 +21,7 @@
.\"IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
.\"WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
-.TH txr 1 2009-09-09 "txr v. 013" "Text Extraction Utility"
+.TH txr 1 2009-09-09 "txr v. 014" "Text Extraction Utility"
.SH NAME
txr \- text extractor
.SH SYNOPSIS
@@ -601,8 +601,9 @@ The general syntax of a directive is:
@EXPR
where expr is a parenthesized list of subexpressions. A subexpression
-is an symbol, number, regular expression, or a parenthesized expression.
-So, examples of valid directives are:
+is an symbol, number, string literal, character literal, regular expression, or
+a parenthesized expression. So, examples of syntactically valid directives
+are:
@(banana)
@@ -610,11 +611,18 @@ So, examples of valid directives are:
@( a (b (c d) (e ) ))
+ @("apple" 'b' 3)
+
@(a /[a-z]*/ b)
A symbol is lexically the same thing as a variable and the same rules
apply. Tokens that look like numbers are treated as numbers.
+String and character literals are delimited by double and single quotes,
+respectively, and may not span multiple lines. Character literals must contain
+exactly one character. Character and numeric escapes may be used within
+literals to escape the quotes, and to denote control characters.
+
Some directives are involved in structuring the overall syntax of the query.
There are syntactic constraints that depend on the directive. For instance the
@@ -630,7 +638,7 @@ Continue matching in another file.
.IP @(block)
The remaining query is treated as an anonymous or named block.
Blocks may be referenced by @(accept) and @(fail) directives.
-Blocks are discussed in the section Blocks below.
+Blocks are discussed in the section BLOCKS below.
.IP @(skip)
Treat the remaining query as a subquery unit, and search the lines of
@@ -651,7 +659,15 @@ Match some clauses in parallel. Each one must match.
Match some clauses in parallel. None must match.
.IP @(maybe)
-Match some clauses in parallel. None must match.
+Match some clauses in parallel, which may or may not match.
+No failure occurs if none match.
+
+.IP @(cases)
+Match some clauses sequentially, stopping if one of them
+matches successfully.
+
+.IP @(define NAME ( ARGUMENTS ...))
+Introduces a function. Functions are discussed in the FUNCTIONS section below.
.IP @(collect)
Search the data for multiple matches of a clause. Collect the
@@ -671,17 +687,17 @@ Separator of clauses for @(some), @(all), and @(none).
Equivalent to @(and). Choice is stylistic.
.IP @(end)
-Required terminator for @(some), @(all), @(none), @(maybe), @(collect),
-@(output), and @(repeat).
+Required terminator for @(some), @(all), @(none), @(maybe), @(cases),
+@(collect), @(output), and @(repeat).
.IP @(fail)
Terminate the processing of a block, as if it were a failed match.
-Blocks are discussed in the section Blocks below.
+Blocks are discussed in the section BLOCKS below.
.IP @(accept)
Terminate the processing of a block, as if it were a successful match.
What bindings emerge may depend on the kind of block: collect
-has special semantics. Blocks are discussed in the section Blocks below.
+has special semantics. Blocks are discussed in the section BLOCKS below.
.IP @(flatten)
Normalizes a set of specified variables to one-dimensional lists. Those
@@ -890,7 +906,7 @@ line "a dark". The @(some) clause combines the text line "it",
and a @(none) clause which contains just one clause consisting of
the line "was".
-The semantics of the some, all, none and maybe directives is:
+The semantics of the some, all, none, maybe and cases directives is:
.IP @(all)
Each of the clauses is matched at the current position. If any of the
@@ -914,12 +930,20 @@ The directive succeeds even if all of the clauses fail.
Whatever bindings are found in any of the clauses are
retained.
-When a @(some) or @(all) directive matches successfully, or a @(maybe)
-directive matches something, the query advances by the greatest number of lines
-matched in any of the subclauses. For instance if there are two subclauses, and
-one of them matches three lines, but the other one matches five lines, then the
-overall clause is considered to have made a five line match at its position. If
-more directives follow, they begin matching five lines down from that position.
+.IP @(cases)
+The clauses are matched, in order, at the current position.
+If any clause matches, the matching stops and the bindings
+collected from that clause are retained. Any remaining clauses
+after that one are not processed. If no clause matches, the
+directive fails, and produces no bindings.
+
+When a @(some), @(all), or @(cases) directive matches successfully, or a
+@(maybe) directive matches in at least one of its clauses, the query advances
+by the greatest number of lines matched in any of the subclauses. For instance
+if there are two subclauses, and one of them matches three lines, but the other
+one matches five lines, then the overall clause is considered to have made a
+five line match at its position. If more directives follow, they begin matching
+five lines down from that position.
.SS The Collect Directive
@@ -1253,6 +1277,15 @@ to match B, or the bind fails. Matching means that either
- A and B are lists and are either identical, or one is
found as substructure within the other.
+The right hand side does not have to be a variable. It may be some other
+object, like a string, or list of strings, et cetera. For instance
+
+ @(bind A "ab\tc")
+
+will bind the string "ab\tc" (the letter a, b, a tab character, and c)
+to the variable A if A is unbound. If A is bound, this will fail unless
+A already contains an identical string.
+
The left hand side of a bind can be a nested list pattern containing variables.
The last item of a list at any nesting level can be preceded by a dot, which
means that the variable matches the rest of the list from that position.
@@ -1280,7 +1313,8 @@ The @(block NAME) directive introduces a named block, except when the name is
the word nil. The @(block) directive introduces an unnamed block, equivalent
to @(block nil).
-The @(skip) and @(collect) directives introduce implicit anonymous blocks.
+The @(skip) and @(collect) directives introduce implicit anonymous blocks,
+as do function bodies.
.SS Block Scope
@@ -1384,12 +1418,12 @@ that block until this point emerge from that block.
.IP @(accept)
Immediately terminate the innermost enclosing anonymous block, as if
-that block successfully mached. Any bindings established within
+that block successfully matched. Any bindings established within
that block until this point emerge from that block.
If the implicit block introduced by @(skip) is terminated in this manner,
this has the effect of causing the skip itself to succeed, as if
-all of the trailing material succesfully matched.
+all of the trailing material successfully matched.
If the implicit block associated with a @(collect) is terminated this way,
then the collection stops. All bindings collected in the current iteration of
@@ -1544,6 +1578,253 @@ The second clause grabs four lines, which is the longest match.
And so, the next line of input available for matching is 5, which goes
to the @second variable.
+.SH FUNCTIONS
+
+.SS Introduction
+
+.B txr
+functions allow a query to be structured to avoid repetition.
+On a theoretical note, because
+.B txr
+functions support recursion, functions enable txr to match some
+kinds of patterns which exhibit self-embedding, or nesting,
+and thus cannot be matched by a regular language.
+
+Functions in
+.B txr
+are not exactly like functions in mathematics or functional languages, and are
+not like procedures in imperative programming languages. They are not exactly
+like macros either. What it means for a
+.B txr
+function to take arguments and produce a result is different from
+the conventional notion of a function.
+
+A
+.B txr
+function may have one or more parameters. When such a function is invoked, an
+argument must be specified for each parameter. However, a special behavior is
+at play here. Namely, some or all of the argument expressions may be unbound
+variables. In that case, the corresponding parameters behave like unbound
+variables also. Thus
+.B txr
+function calls can transmit the "unbound" state from argument to parameter.
+
+It should be mentioned that functions have access to all bindings that are
+visible in the caller; functions may refer to variables which are not
+mentioned in their parameter list.
+
+With regard to returning,
+.B txr
+functions are also unconventional. If the function fails, then the function
+call is considered to have failed. The function call behaves like a kind of
+match; if the function fails, then the call is like a failed match.
+
+When a function call succeeds, then the bindings emanating from that function
+are processed specially. Firstly, any bindings for variables which do not
+correspond to one of the function's parameters are thrown away. Functions may
+internally bind arbitrary variables in order to get their job done, but only
+those variables which are named in the function argument list may propagate out
+of the function call. Thus, a function with no arguments can only indicate
+matching success or failure, but not produce any bindings. Secondly,
+variables do not propagate out of the function directly, but undergo
+a renaming. For each parameter which went into the function as an unbound
+variable (because its corresponding argument was an unbound variable),
+if that parameter now has a value, that value is bound onto the corresponding
+argument.
+
+Example:
+
+ @(define collect_words (list))
+ @(coll)@{list /[^ \t]/}@(end)
+ @(end)
+
+The above function "collect_words" contains a query which collects words from a
+line (sequences of characters other than space or tab), into the list variable
+called "list". This variable is named in the parameter list of the function,
+therefore, its value, if it has one, is permitted to escape from the function
+call.
+
+Suppose the input data is:
+
+ Fine summer day
+
+and the function is called like this:
+
+ @(collect_words wordlist)
+
+The result is:
+
+ wordlist[0]=Fine
+ wordlist[1]=summer
+ wordlist[1]=day
+
+How it works is that in the function call @(collect_words wordlist),
+"wordlist" is an unbound variable. The parameter corresponding to that
+unbound variable is the parameter "list". Therefore, that parameter
+is unbound over the body of the function. The function body collects the
+words of "Fine summer day" into the variable "list", and then
+yields the that binding. Then the function call completes by
+noticing that the function parameter "list" now has a binding, and
+that the corresponding argument "wordlist" has no binding. The binding
+is thus transferred to the "wordlist" variable. After that, the
+bindings produced by the function are thrown away. The only enduring
+effects are:
+
+.IP -
+the function matched and consumed some input; and
+
+.IP -
+the function succeeded; and
+
+.IP -
+the wordlist variable now has a binding.
+.PP
+
+Another way to understand the parameter behavior is that function
+parameters behave like proxies which represent their arguments. If an argument
+is an established value, such as a character string or bound variable, the
+parameter is a proxy for that value and behaves just like that value. If an
+argument is an unbound variable, the function parameter acts as a proxy
+representing that unbound variable. The effect of binding the proxy is
+that the variable becomes bound, an effect which is settled when the
+function goes out of scope.
+
+Within the function, both the original variable and the proxy are
+visible simultaneously, and are independent. What if a function binds both of
+them? Suppose a function has a parameter called P, which is called
+with an argument A, and then in the function @A and @P are bound. This is
+permitted, and they can even be bound to different values. However, when the
+function terminates, the local binding of A simply disappears (because,
+remember, the symbol A is not a member of the list of parameters).
+Only the value bound to P emerges, and is bound to A, which still appears
+unbound at that point.
+
+.SS Definition Syntax
+
+A function definition begins with a @(define ...) directive which must be the
+only element in its line. The define must be followed by a symbol, which is the
+name of the function being defined. After the symbol, there is a parenthesized
+optional argument list. If there is no such list, or if the list is specified
+as () or the symbol "nil" then the function has no parameters. Examples of
+valid define syntax are:
+
+ @(define foo)
+ @(define bar ())
+ @(define match (a b c))
+
+The define directive may be followed directly by the @(end) directive,
+also on a line by itself, in which case the function has an empty body.
+Or it may be followed by one or more query lines and then @(end).
+What is between a @(define ...) and its matching @(end) constitutes the
+function body.
+
+Functions may be nested within function bodies. Such local functions have
+dynamic scope. They are visible in the function body in which they are defined,
+and in any functions invoked from that body.
+
+The body of a function is an anonymous block. (See BLOCKS above).
+
+The following trivial function b produces no bindings and has a body which
+simply matches the line "begin".
+
+ @(define b)
+ begin
+ @(end)
+
+Thus the call:
+
+ @(b)
+
+matches an input line "begin".
+
+.SS Call Syntax
+
+A function is invoked by compound directive whose first symbol is the name of
+that function. Additional elements in the directive are the arguments.
+Arguments may be symbols, or other objects like string and character
+literals.
+
+Example:
+
+ Query: @(define pair (a b))
+ @a @b
+ @(end)
+ @(pair first second)
+ @(pair "ice" cream)
+
+ Data: one two
+ ice milk
+
+ Output: first="one"
+ second="two"
+ cream="milk"
+
+The first call to the function takes the line "one two". The parameter "a"
+takes "one" and parameter b takes "two". These are rebound to the arguments
+first and second. The second call to the function binds the a parameter
+to the word "ice", and the b is unbound, because the
+corresponding argument "cream" is unbound. Thus inside the function, @a
+is forced to match "ice". Then a space is matched and @b collects the text
+"milk". When the function returns, the unbound "cream" variable gets this value.
+
+If a symbol occurs multiple times in the argument list, it constrains
+both parameters to bind to the same value. That is to say, all parameters
+which, in the body of the function, bind a value, and which are all derived
+from the same argument symbol must bind to the same value. This is settled when
+the function terminates, not while it is matching. Example:
+
+ Query: @(define pair (a b))
+ @a @b
+ @(end)
+ @(pair same same)
+
+ Data: one two
+
+ Output: [query fails, prints "false"]
+
+.SS Nested Functions
+
+Function definitions may appear in a function. Such definitions
+are visible in all functions which are invoked from the body
+(and not necessarily enclosed in the body). In other words, the
+scope is dynamic, not lexical. Inner definitions shadow outer
+definitions. This means that a caller can redirect the function
+calls that take place in a callee, by defining local functions
+which capture the references.
+
+Example:
+
+ Query: @(define which)
+ @ (fun)
+ @(end)
+ @(define fun)
+ @ (output)
+ toplevel fun!
+ @ (end)
+ @(end)
+ @(define callee)
+ @ (define fun)
+ @ (output)
+ local fun!
+ @ (end)
+ @ (end)
+ @ (which)
+ @(end)
+ @(callee)
+ @(which)
+
+ Output: local fun!
+ toplevel fun!
+
+Here, the function "which" is defined which calls "fun".
+A toplevel definition of "fun" is introduced which
+outputs "toplevel fun!". Then, within the func
+The function "callee" provides its own local definition
+of "fun" which outputs "local fun!" before calling "which".
+When callee is invoked, it calls @(which), whose @(fun) call is routed to
+callee's local definition. When @(which) is called directly from the top
+level, its @(fun) call goes to the toplevel definition.
+
.SH OUTPUT
A
diff --git a/unwind.c b/unwind.c
index c573c16d..eb1490d4 100644
--- a/unwind.c
+++ b/unwind.c
@@ -30,10 +30,12 @@
#include <setjmp.h>
#include <dirent.h>
#include "lib.h"
+#include "gc.h"
#include "unwind.h"
static uw_frame_t *uw_stack;
static uw_frame_t *uw_exit_point;
+static uw_frame_t toplevel_env;
static void uw_unwind_to_exit_point()
{
@@ -49,9 +51,16 @@ static void uw_unwind_to_exit_point()
case UW_BLOCK:
longjmp(uw_stack->bl.jb, 1);
break;
+ case UW_ENV: /* env frame cannot be exit point */
+ abort();
+ default:
+ abort();
}
+}
- abort();
+void uw_init(void)
+{
+ protect(&toplevel_env.ev.func_bindings, 0);
}
void uw_push_block(uw_frame_t *fr, obj_t *tag)
@@ -63,6 +72,46 @@ void uw_push_block(uw_frame_t *fr, obj_t *tag)
uw_stack = fr;
}
+static uw_frame_t *uw_find_env(void)
+{
+ uw_frame_t *fr;
+
+ for (fr = uw_stack; fr != 0; fr = fr->uw.up) {
+ if (fr->uw.type == UW_ENV)
+ break;
+ }
+
+ return fr ? fr : &toplevel_env;
+}
+
+void uw_push_env(uw_frame_t *fr)
+{
+ uw_frame_t *prev_env = uw_find_env();
+ fr->ev.type = UW_ENV;
+
+ if (prev_env) {
+ fr->ev.func_bindings = copy_alist(prev_env->ev.func_bindings);
+ } else {
+ fr->ev.func_bindings = nil;
+ }
+
+ fr->ev.up = uw_stack;
+ uw_stack = fr;
+}
+
+obj_t *uw_get_func(obj_t *sym)
+{
+ uw_frame_t *env = uw_find_env();
+ return cdr(assoc(env->ev.func_bindings, sym));
+}
+
+obj_t *uw_set_func(obj_t *sym, obj_t *value)
+{
+ uw_frame_t *env = uw_find_env();
+ env->ev.func_bindings = acons_new(env->ev.func_bindings, sym, value);
+ return value;
+}
+
void uw_pop_frame(uw_frame_t *fr)
{
assert (fr == uw_stack);
diff --git a/unwind.h b/unwind.h
index 8863fdff..1cd0792a 100644
--- a/unwind.h
+++ b/unwind.h
@@ -27,7 +27,7 @@
typedef union uw_frame uw_frame_t;
typedef enum uw_frtype uw_frtype_t;
-enum uw_frtype { UW_BLOCK };
+enum uw_frtype { UW_BLOCK, UW_ENV };
struct uw_common {
uw_frame_t *up;
@@ -42,25 +42,46 @@ struct uw_block {
jmp_buf jb;
};
+struct uw_dynamic_env {
+ uw_frame_t *up;
+ uw_frtype_t type;
+ obj_t *func_bindings;
+};
+
union uw_frame {
struct uw_common uw;
struct uw_block bl;
+ struct uw_dynamic_env ev;
};
+void uw_init(void);
void uw_push_block(uw_frame_t *, obj_t *tag);
+void uw_push_env(uw_frame_t *);
+obj_t *uw_get_func(obj_t *sym);
+obj_t *uw_set_func(obj_t *sym, obj_t *value);
obj_t *uw_block_return(obj_t *tag, obj_t *result);
void uw_pop_frame(uw_frame_t *);
+
#define uw_block_begin(TAG, RESULTVAR) \
obj_t *RESULTVAR = nil; \
{ \
- uw_frame_t uw_fr; \
- uw_push_block(&uw_fr, TAG); \
- if (setjmp(uw_fr.bl.jb)) { \
- RESULTVAR = uw_fr.bl.result; \
+ uw_frame_t uw_blk; \
+ uw_push_block(&uw_blk, TAG); \
+ if (setjmp(uw_blk.bl.jb)) { \
+ RESULTVAR = uw_blk.bl.result; \
} else {
#define uw_block_end \
} \
- uw_pop_frame(&uw_fr); \
+ uw_pop_frame(&uw_blk); \
+ }
+
+#define uw_env_begin \
+ { \
+ uw_frame_t uw_env; \
+ uw_push_env(&uw_env)
+
+#define uw_env_end \
+ uw_pop_frame(&uw_env); \
}