diff options
-rw-r--r-- | ChangeLog | 100 | ||||
-rw-r--r-- | extract.l | 220 | ||||
-rw-r--r-- | extract.y | 279 | ||||
-rw-r--r-- | lib.c | 145 | ||||
-rw-r--r-- | lib.h | 13 | ||||
-rw-r--r-- | txr.1 | 319 | ||||
-rw-r--r-- | unwind.c | 51 | ||||
-rw-r--r-- | unwind.h | 33 |
8 files changed, 1011 insertions, 149 deletions
@@ -1,3 +1,103 @@ +2009-10-03 Kaz Kylheku <kkylheku@gmail.com> + + Version 014 + + New cases directive. + + New define directive: user-defined dynamically scoped functions. + + String literals in bind and function calls. + + EOF in the middle of a line handled properly. + + * extract.l (version): Bump to 014. + * txr.1: Bump version to 014. + + +2009-10-02 Kaz Kylheku <kkylheku@gmail.com> + + New cases directive. + + * extract.l (yybadtoken): Add case for CASES. + (grammar): Tokenize cases directive. + * extract.y (CASES): New token kind. + (cases_clause): New grammar symbol. + (grammar): Implement new grammar cases. + (match_files): Implement semantics for cases. + * lib.c (cases): New global. + (obj_init): Intern cases symbol. + * lib.h (cases): Declared. + * txr.1: Documented. + +2009-10-02 Kaz Kylheku <kkylheku@gmail.com> + + Support for string and character literals. + + * extract.l (char_esc): Support \' and \" escapes. + (STRLIT, CHRLIT): New flex start conditions. + (grammar): New rules for tokenizing string literals. + * extract.y (LITCHAR): New token kind. + (strlit, chrlit, litchars): New grammar symbols. + (grammar): Implement string literal parsing. + (dump_var): Support character objects, treating + them as one-character strings. + (eval_form): New function. + (match_files): In bind directive, allow the right + hand side to be an arbitrary object. + * lib.c (mkustring, init_str): New functions. + (cat_str): Allow characters in the mix, treating + them as one-character strings. + * lib.h (mkustring, init_str): Declared. + (chrp, chr_str, chr_str_set): New function. + * txr.1: Documented. + +2009-10-02 Kaz Kylheku <kkylheku@gmail.com> + + Support for query-defined functions. + + * extract.l (yybadtoken): New DEFINE case. + (NESTED): New flex start condition. This allows for different lexing + rules in nested lists, so even though for instance @(collect) is a + special token @((collect)) isn't. + (grammar): Refactored with NESTED. + Tokenize define directive. + * extract.y (define_transform): New function. + (DEFINE): New token kind. + (define_clause): New grammar symbol. + (match_files): Implement define semantics, and function calls. + * lib.c (define): New global. + * lib.h (define): Declared. + (proper_listp, alist_remove1, copy_cons, copy_alist): New functions. + (obj_init): Intern define symbol. + (init): Call new function uw_init. + * unwind.c (toplevel_env): New static structure. + (uw_unwind_to_exit_point): Support new UW_ENV frame type. + (uw_init, uw_find_env, uw_push_env, uw_get_func, uw_set_func): New + functions. + * unwind.h (UW_ENV): New enumeration member in uw_frtype. + (uw_dynamic_env): New struct. + (uw_block_begin, uw_block_end): Renamed some variables. + (uw_env_begin, uw_env_end): New macros. + * txr.1: Documented. + +2009-10-02 Kaz Kylheku <kkylheku@gmail.com> + + Misc. bugfixes and improvements. + + * extract.l (grammar): Newline in a directive + no longer an error. Why not allow it. + * extract.y (grammar): Productions for catching empty bodies in some + constructs now end with END newl, rather + than just END, so parsing can continue sanely. + (match_lines): In diagnostics, don't say "ignored" about material which + causes an error that fails the query! + * lib.c (mkstring): Initialize length since we know it! + (c_str): Take a symbol as an arg, so we don't have + to keep writing c_str(symbol_name(sym)). + (obj_print): Use isprint rather than isctrl to decide + whether to print a character as an escape. + (snarf_line): Properly handle EOF in the middle of line. + 2009-09-29 Kaz Kylheku <kkylheku@gmail.com> Version 013 @@ -40,7 +40,7 @@ #define YY_NO_UNPUT -const char *version = "013"; +const char *version = "014"; const char *progname = "txr"; const char *spec_file = "stdin"; long lineno = 1; @@ -111,6 +111,7 @@ void yybadtoken(int tok, const char *context) case SOME: problem = "\"some\""; break; case NONE: problem = "\"none\""; break; case MAYBE: problem = "\"maybe\""; break; + case CASES: problem = "\"cases\""; break; case AND: problem = "\"and\""; break; case OR: problem = "\"or\""; break; case END: problem = "\"end\""; break; @@ -124,6 +125,7 @@ void yybadtoken(int tok, const char *context) case FIRST: problem = "\"first\""; break; case LAST: problem = "\"last\""; break; case EMPTY: problem = "\"empty\""; break; + case DEFINE: problem = "\"define\""; break; case NUMBER: problem = "\"number\""; break; case REGCHAR: problem = "regular expression character"; break; } @@ -151,6 +153,8 @@ static int char_esc(int letter) case 'f': return '\f'; case 'r': return '\r'; case 'e': return 27; + case '"': return '"'; + case '\'': return '\''; } abort(); @@ -172,34 +176,40 @@ static int num_esc(char *num) %} TOK [a-zA-Z_][a-zA-Z0-9_]*|[+-]?[0-9]+ +ID_END [^a-zA-Z0-9_] +NUM_END [^0-9] WS [\t ]* -%x SPECIAL REGEX REGCLASS +HEX [0-9A-Fa-f] +OCT [0-7] + +%x SPECIAL NESTED REGEX REGCLASS STRLIT CHRLIT %% -<SPECIAL>{TOK} { - long val; - char *errp; +<SPECIAL,NESTED>{TOK} { + long val; + char *errp; - errno = 0; + errno = 0; - val = strtol(yytext, &errp, 10); + val = strtol(yytext, &errp, 10); - if (nesting == 0) - BEGIN(INITIAL); + if (nesting == 0) + BEGIN(INITIAL); - if (*errp != 0) { - /* not a number */ - yylval.lexeme = strdup(yytext); - return IDENT; - } + if (*errp != 0) { + /* not a number */ + yylval.lexeme = strdup(yytext); + return IDENT; + } - if ((val == LONG_MAX || val == LONG_MIN) && errno == ERANGE) - yyerror("numeric overflow in token"); + if ((val == LONG_MAX || val == LONG_MIN) + && errno == ERANGE) + yyerror("numeric overflow in token"); - yylval.num = val; - return NUMBER; - } + yylval.num = val; + return NUMBER; + } <SPECIAL>\({WS}all{WS}\) { @@ -222,12 +232,17 @@ WS [\t ]* return MAYBE; } +<SPECIAL>\({WS}cases{WS}\) { + BEGIN(INITIAL); + return CASES; + } + <SPECIAL>\({WS}and{WS}\) { BEGIN(INITIAL); return AND; } -<SPECIAL>\({WS}or{WS}\) { +<SPECIAL>\({WS}or{WS}\) { BEGIN(INITIAL); return OR; } @@ -288,54 +303,74 @@ WS [\t ]* return EMPTY; } -<SPECIAL>\{|\( { - nesting++; - if (yytext[0] == '{') - closechar = '}'; - else - closechar = ')'; - return yytext[0]; - } +<SPECIAL>\({WS}define/{ID_END} { + nesting++; + closechar = ')'; + BEGIN(NESTED); + return DEFINE; + } -<SPECIAL>\}|\) { - if (yytext[0] != closechar) { - yyerror("paren mismatch"); - BEGIN(INITIAL); - } else { - if (--nesting == 0) - BEGIN(INITIAL); - return yytext[0]; - } - } +<SPECIAL,NESTED>\{|\( { + nesting++; + if (yytext[0] == '{') + closechar = '}'; + else + closechar = ')'; + BEGIN(NESTED); + return yytext[0]; + } -<SPECIAL>[\t ]+ { - /* Eat whitespace in directive */ - } +<SPECIAL,NESTED>\}|\) { + if (yytext[0] != closechar) { + yyerror("paren mismatch"); + BEGIN(INITIAL); + } else { + switch (--nesting) { + case 1: + BEGIN(SPECIAL); + break; + case 0: + BEGIN(INITIAL); + break; + } + + return yytext[0]; + } + } + +<SPECIAL,NESTED>[\t ]+ { /* Eat whitespace in directive */ } + +<SPECIAL,NESTED>\" { + BEGIN(STRLIT); + return '"'; + } + +<SPECIAL,NESTED>\' { + BEGIN(CHRLIT); + return '\''; + } <SPECIAL>@ { if (nesting == 0) { BEGIN(INITIAL); yylval.lexeme = strdup("@"); return TEXT; - } else { - yyerrorf(0, "bad character in directive: %c", yytext[0]); } } -<SPECIAL>\n { - lineno++; - yyerror("newline in directive"); - } +<SPECIAL,NESTED>\n { + lineno++; + } -<SPECIAL>[/] { - BEGIN(REGEX); - return '/'; - } +<SPECIAL,NESTED>[/] { + BEGIN(REGEX); + return '/'; + } -<SPECIAL>\. { - yylval.chr = '.'; - return '.'; - } +<SPECIAL,NESTED>\. { + yylval.chr = '.'; + return '.'; + } <SPECIAL>[\\][abtnvfre] { char lexeme[2]; @@ -346,24 +381,25 @@ WS [\t ]* return TEXT; } -<SPECIAL>[\\](x[0-9a-fA-F]+|[0-7]+) { - char lexeme[2]; - lexeme[0] = num_esc(yytext + 1); - lexeme[1] = 0; - yylval.lexeme = strdup(lexeme); - BEGIN(INITIAL); - return TEXT; - } +<SPECIAL>[\\](x{HEX}+|{OCT}+) { + char lexeme[2]; + lexeme[0] = num_esc(yytext + 1); + lexeme[1] = 0; + yylval.lexeme = strdup(lexeme); + BEGIN(INITIAL); + return TEXT; + } -<SPECIAL>. { - yyerrorf(0, "bad character in directive: '%c'", yytext[0]); - } +<SPECIAL,NESTED>. { + yyerrorf(0, "bad character in directive: '%c'", + yytext[0]); + } <REGEX>[/] { if (nesting == 0) BEGIN(INITIAL); else - BEGIN(SPECIAL); + BEGIN(NESTED); yylval.chr = '/'; return '/'; } @@ -374,10 +410,10 @@ WS [\t ]* return REGCHAR; } -<REGEX>[\\](x[0-9a-fA-F]+|[0-9]+) { - yylval.chr = num_esc(yytext + 1); - return REGCHAR; - } +<REGEX>[\\](x{HEX}+|{OCT}+) { + yylval.chr = num_esc(yytext + 1); + return REGCHAR; + } <REGEX>\n { lineno++; @@ -438,6 +474,48 @@ WS [\t ]* /* comment to end of line */ } +<STRLIT>\" { + if (nesting == 0) + BEGIN(INITIAL); + else + BEGIN(NESTED); + return '"'; + } + +<CHRLIT>\' { + if (nesting == 0) + BEGIN(INITIAL); + else + BEGIN(NESTED); + return '\''; + } + +<STRLIT,CHRLIT>[\\][abtnvfre] { + yylval.chr = char_esc(yytext[1]); + return LITCHAR; + } + +<STRLIT,CHRLIT>[\\](x{HEX}+|{OCT}+) { + yylval.chr = num_esc(yytext + 1); + return LITCHAR; + } +<STRLIT>\n { + yyerror("newline in string literal"); + lineno++; + yylval.chr = yytext[0]; + return LITCHAR; + } +<CHRLIT>\n { + yyerror("newline in character literal"); + lineno++; + yylval.chr = yytext[0]; + return LITCHAR; + } +<STRLIT,CHRLIT>. { + yylval.chr = yytext[0]; + return LITCHAR; + } + %% void help(void) @@ -45,6 +45,7 @@ int yylex(void); void yyerror(const char *); obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts); +obj_t *define_transform(obj_t *define_form); static obj_t *parsed_spec; static int output_produced; @@ -58,23 +59,23 @@ static int output_produced; long num; } -%token <lexeme> TEXT IDENT ALL SOME NONE MAYBE AND OR END COLLECT UNTIL COLL -%token <lexeme> OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY +%token <lexeme> TEXT IDENT ALL SOME NONE MAYBE CASES AND OR END COLLECT +%token <lexeme> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE %token <num> NUMBER -%token <chr> REGCHAR +%token <chr> REGCHAR LITCHAR %type <obj> spec clauses clause all_clause some_clause none_clause maybe_clause -%type <obj> collect_clause clause_parts additional_parts output_clause -%type <obj> line elems_opt elems elem var var_op list exprs expr -%type <obj> out_clauses out_clauses_opt out_clause +%type <obj> cases_clause collect_clause clause_parts additional_parts +%type <obj> output_clause define_clause line elems_opt elems elem var var_op +%type <obj> list exprs expr out_clauses out_clauses_opt out_clause %type <obj> repeat_clause repeat_parts_opt o_line %type <obj> o_elems_opt o_elems_opt2 o_elems o_elem rep_elem rep_parts_opt %type <obj> regex regexpr regbranch %type <obj> regterm regclass regclassterm regrange +%type <obj> strlit chrlit litchars %type <chr> regchar - -%nonassoc ALL SOME NONE MAYBE AND OR END COLLECT UNTIL COLL -%nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY +%nonassoc ALL SOME NONE MAYBE CASES AND OR END COLLECT UNTIL COLL +%nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY DEFINE %nonassoc '{' '}' '[' ']' '(' ')' %right IDENT TEXT NUMBER %left '|' '/' @@ -97,7 +98,10 @@ clause : all_clause { $$ = list(num(lineno - 1), $1, nao); } | some_clause { $$ = list(num(lineno - 1), $1, nao); } | none_clause { $$ = list(num(lineno - 1), $1, nao); } | maybe_clause { $$ = list(num(lineno - 1), $1, nao); } + | cases_clause { $$ = list(num(lineno - 1), $1, nao); } | collect_clause { $$ = list(num(lineno - 1), $1, nao); } + | define_clause { $$ = list(num(lineno - 1), + define_transform($1), nao); } | output_clause { $$ = list(num(lineno - 1), $1, nao); } | line { $$ = $1; } | repeat_clause { $$ = nil; @@ -108,7 +112,7 @@ all_clause : ALL newl clause_parts { $$ = cons(all, $3); } | ALL newl error { $$ = nil; yybadtoken(yychar, "all clause"); } - | ALL newl END { $$ = nil; + | ALL newl END newl { $$ = nil; yyerror("empty all clause"); } ; @@ -117,7 +121,7 @@ some_clause : SOME newl clause_parts { $$ = cons(some, $3); } | SOME newl error { $$ = nil; yybadtoken(yychar, "some clause"); } - | SOME newl END { $$ = nil; + | SOME newl END newl { $$ = nil; yyerror("empty some clause"); } ; @@ -125,7 +129,7 @@ none_clause : NONE newl clause_parts { $$ = cons(none, $3); } | NONE newl error { $$ = nil; yybadtoken(yychar, "none clause"); } - | NONE newl END { $$ = nil; + | NONE newl END newl { $$ = nil; yyerror("empty none clause"); } ; @@ -133,10 +137,18 @@ maybe_clause : MAYBE newl clause_parts { $$ = cons(maybe, $3); } | MAYBE newl error { $$ = nil; yybadtoken(yychar, "maybe clause"); } - | MAYBE newl END { $$ = nil; + | MAYBE newl END newl { $$ = nil; yyerror("empty maybe clause"); } ; +cases_clause : CASES newl clause_parts { $$ = cons(cases, $3); } + | CASES newl error { $$ = nil; + yybadtoken(yychar, + "cases clause"); } + | CASES newl END newl { $$ = nil; + yyerror("empty cases clause"); } + ; + collect_clause : COLLECT newl clauses END newl { $$ = list(collect, $3, nao); } | COLLECT newl clauses UNTIL newl clauses END newl { $$ = list(collect, $3, @@ -181,6 +193,23 @@ elem : TEXT { $$ = string($1); } yybadtoken(yychar, "coll clause"); } ; +define_clause : DEFINE exprs ')' newl + clauses + END newl { $$ = list(define, $2, $5, nao); } + | DEFINE ')' newl + clauses + END newl { $$ = list(define, nil, $4, nao); } + | DEFINE exprs ')' newl + END newl { $$ = list(define, $2, nao); } + | DEFINE ')' newl + END newl { $$ = list(define, nao); } + | DEFINE error { yybadtoken(yychar, "list expression"); } + | DEFINE exprs ')' newl + error { yybadtoken(yychar, "define"); } + | DEFINE ')' newl + error { yybadtoken(yychar, "define"); } + ; + output_clause : OUTPUT o_elems '\n' out_clauses END newl { $$ = list(output, $4, $2, nao); } @@ -209,8 +238,12 @@ out_clause : repeat_clause { $$ = list(num(lineno - 1), $1, nao); } yyerror("match clause in output"); } | maybe_clause { $$ = nil; yyerror("match clause in output"); } + | cases_clause { $$ = nil; + yyerror("match clause in output"); } | collect_clause { $$ = nil; yyerror("match clause in output"); } + | define_clause { $$ = nil; + yyerror("match clause in output"); } | output_clause { $$ = nil; yyerror("match clause in output"); } ; @@ -324,6 +357,8 @@ expr : IDENT { $$ = intern(string($1)); } | NUMBER { $$ = num($1); } | list { $$ = $1; } | regex { $$ = cons(regex_compile($1), $1); } + | chrlit { $$ = $1; } + | strlit { $$ = $1; } ; regex : '/' regexpr '/' { $$ = $2; } @@ -384,6 +419,36 @@ newl : '\n' yyerrok; } ; +strlit : '"' '"' { $$ = null_string; } + | '"' litchars '"' { + if ($2) { + obj_t *len = length($2), *iter, *ix; + $$ = mkustring(len); + for (iter = $2, ix = zero; + iter; + iter = cdr(iter), ix = plus(ix, one)) + { + chr_str_set($$, ix, car(iter)); + } + } else { + $$ = nil; + } + } + | '"' error { yybadtoken(yychar, "string literal"); } + ; + +chrlit : '\'' '\'' { yyerror("empty character literal"); + $$ = nil; } + | '\'' litchars '\'' { $$ = car($2); + if (cdr($2)) + yyerror("multiple characters in " + "character literal"); } + | '\'' error { yybadtoken(yychar, "character literal"); } + ; + +litchars : LITCHAR { $$ = cons(chr($1), nil); } + | LITCHAR litchars { $$ = cons(chr($1), $2); } + ; %% obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts) @@ -415,6 +480,45 @@ obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts) last_parts, empty_parts, nao); } +obj_t *define_transform(obj_t *define_form) +{ + obj_t *sym = first(define_form); + obj_t *args = second(define_form); + + if (define_form == nil) + return nil; + + assert (sym == define); + + if (args == nil) { + yyerror("define requires arguments"); + return define_form; + } + + if (!consp(args) || !listp(cdr(args))) { + yyerror("bad define argument syntax"); + return define_form; + } else { + obj_t *name = first(args); + obj_t *params = second(args); + + if (!symbolp(name)) { + yyerror("function name must be a symbol"); + return define_form; + } + + if (!proper_listp(params)) { + yyerror("invalid function parameter list"); + return define_form; + } + + if (!all_satisfy(params, func_n1(symbolp), nil)) + yyerror("function parameters must be symbols"); + } + + return define_form; +} + obj_t *get_spec(void) { return parsed_spec; @@ -443,12 +547,19 @@ void dump_var(const char *name, char *pfx1, size_t len1, if (len1 >= 112 || len2 >= 112) abort(); - if (stringp(value)) { + if (stringp(value) || chrp(value)) { fputs(name, stdout); fputs(pfx1, stdout); fputs(pfx2, stdout); putchar('='); - dump_shell_string(c_str(value)); + if (stringp(value)) { + dump_shell_string(c_str(value)); + } else { + char mini[2]; + mini[0] = c_chr(value); + mini[1] = 0; + dump_shell_string(mini); + } putchar('\n'); } else { obj_t *iter; @@ -572,6 +683,13 @@ obj_t *dest_bind(obj_t *bindings, obj_t *pattern, obj_t *value) return bindings; } +obj_t *eval_form(obj_t *form, obj_t *bindings) +{ + if (symbolp(form)) + return assoc(bindings, form); + return cons(t, form); +} + obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline, obj_t *pos, obj_t *spec_lineno, obj_t *data_lineno, obj_t *file) @@ -1192,7 +1310,7 @@ repeat_spec_same_data: long reps = 0; if (rest(specline)) - yyerrorlf(1, spec_lineno, "material after skip directive ignored"); + yyerrorlf(1, spec_lineno, "unexpected material after skip directive"); if ((spec = rest(spec)) == nil) break; @@ -1229,7 +1347,7 @@ repeat_spec_same_data: return nil; } else if (sym == trailer) { if (rest(specline)) - yyerrorlf(1, spec_lineno, "material after trailer directive ignored"); + yyerrorlf(1, spec_lineno, "unexpected material after trailer directive"); if ((spec = rest(spec)) == nil) break; @@ -1245,7 +1363,7 @@ repeat_spec_same_data: } else if (sym == block) { obj_t *name = first(rest(first_spec)); if (rest(specline)) - yyerrorlf(1, spec_lineno, "material after block directive ignored"); + yyerrorlf(1, spec_lineno, "unexpected material after block directive"); if ((spec = rest(spec)) == nil) break; uw_block_begin(name, result); @@ -1256,7 +1374,7 @@ repeat_spec_same_data: obj_t *target = first(rest(first_spec)); if (rest(specline)) - yyerrorlf(1, spec_lineno, "material after %s ignored", + yyerrorlf(1, spec_lineno, "unexpected material after %s", c_str(symbol_name(sym))); uw_block_return(target, @@ -1302,7 +1420,9 @@ repeat_spec_same_data: if3(data, cons(data, num(data_lineno)), t)); return nil; } - } else if (sym == some || sym == all || sym == none || sym == maybe) { + } else if (sym == some || sym == all || sym == none || sym == maybe || + sym == cases) + { obj_t *specs; obj_t *all_match = t; obj_t *some_match = nil; @@ -1331,6 +1451,8 @@ repeat_spec_same_data: max_data = new_data; } } + if (sym == cases) + break; } else { all_match = nil; } @@ -1341,8 +1463,8 @@ repeat_spec_same_data: return nil; } - if (sym == some && !some_match) { - yyerrorlf(2, spec_lineno, "some: no clauses matched"); + if ((sym == some || sym == cases) && !some_match) { + yyerrorlf(2, spec_lineno, "some/cases: no clauses matched"); return nil; } @@ -1514,15 +1636,13 @@ repeat_spec_same_data: } else if (sym == bind) { obj_t *args = rest(first_spec); obj_t *pattern = first(args); - obj_t *var = second(args); - obj_t *lookup = assoc(bindings, var); + obj_t *form = second(args); + obj_t *val = eval_form(form, bindings); - if (!var || !symbolp(var)) - yyerrorlf(1, spec_lineno, "bind: bad variable spec"); - else if (!lookup) - yyerrorlf(1, spec_lineno, "bind: unbound source variable"); + if (!val) + yyerrorlf(1, spec_lineno, "bind: unbound variable on right side"); - bindings = dest_bind(bindings, pattern, cdr(lookup)); + bindings = dest_bind(bindings, pattern, cdr(val)); if (bindings == t) return nil; @@ -1581,6 +1701,107 @@ repeat_spec_same_data: break; goto repeat_spec_same_data; + } else if (sym == define) { + obj_t *args = second(first_spec); + obj_t *body = third(first_spec); + obj_t *name = first(args); + obj_t *params = second(args); + + if (rest(specline)) + yyerrorlf(1, spec_lineno, "unexpected material after define"); + + uw_set_func(name, cons(params, body)); + + if ((spec = rest(spec)) == nil) + break; + + goto repeat_spec_same_data; + } else { + obj_t *func = uw_get_func(sym); + + if (func) { + obj_t *args = rest(first_spec); + obj_t *params = car(func); + obj_t *body = cdr(func); + obj_t *piter, *aiter; + obj_t *bindings_cp = copy_alist(bindings); + + if (!equal(length(args), length(params))) { + yyerrorlf(1, spec_lineno, "function %s takes %ld argument(s)", + c_str(sym), c_num(length(params))); + return nil; + } + + for (piter = params, aiter = args; piter; + piter = cdr(piter), aiter = cdr(aiter)) + { + obj_t *param = car(piter); + obj_t *arg = car(aiter); + + if (symbolp(arg)) { + obj_t *existing = assoc(bindings, arg); + if (existing) { + bindings_cp = acons_new(bindings_cp, + param, + cdr(existing)); + } else { + bindings_cp = alist_remove(bindings_cp, cons(param, nil)); + } + } else { + bindings_cp = acons_new(bindings_cp, param, arg); + } + } + + { + uw_block_begin(nil, result); + uw_env_begin; + result = match_files(body, files, bindings_cp, + data, num(data_lineno)); + uw_env_end; + uw_block_end; + + if (!result) { + yyerrorlf(2, spec_lineno, "function failed"); + return nil; + } + + { + cons_bind (new_bindings, success, result); + + for (piter = params, aiter = args; piter; + piter = cdr(piter), aiter = cdr(aiter)) + { + obj_t *param = car(piter); + obj_t *arg = car(aiter); + + if (symbolp(arg)) { + obj_t *newbind = assoc(new_bindings, param); + if (newbind) { + bindings = dest_bind(bindings, arg, cdr(newbind)); + if (bindings == t) + return nil; + } + } + } + + if (consp(success)) { + yyerrorlf(2, spec_lineno, + "function matched; advancing from line %ld to %ld", + data_lineno, c_num(cdr(success))); + data = car(success); + data_lineno = c_num(cdr(success)); + } else { + yyerrorlf(2, spec_lineno, "function consumed entire file"); + data = nil; + } + } + } + + if ((spec = rest(spec)) == nil) + break; + + goto repeat_spec_same_data; + } } } @@ -32,8 +32,10 @@ #include <limits.h> #include <stdarg.h> #include <dirent.h> +#include <setjmp.h> #include "lib.h" #include "gc.h" +#include "unwind.h" #define max(a, b) ((a) > (b) ? (a) : (b)) #define min(a, b) ((a) < (b) ? (a) : (b)) @@ -44,8 +46,8 @@ obj_t *null, *t, *cons_t, *str_t, *chr_t, *num_t, *sym_t, *fun_t, *vec_t; obj_t *stream_t, *lcons_t, *var, *regex, *set, *cset, *wild, *oneplus; obj_t *zeroplus, *optional, *compound, *or; obj_t *skip, *trailer, *block, *next, *fail, *accept; -obj_t *all, *some, *none, *maybe, *collect, *until, *coll; -obj_t *output, *single, *frst, *lst, *empty, *repeat, *rep; +obj_t *all, *some, *none, *maybe, *cases, *collect, *until, *coll; +obj_t *define, *output, *single, *frst, *lst, *empty, *repeat, *rep; obj_t *flattn, *forget, *mrge, *bind, *cat, *dir; obj_t *zero, *one, *two, *negone, *maxint, *minint; @@ -506,6 +508,14 @@ obj_t *listp(obj_t *obj) ? t : nil; } +obj_t *proper_listp(obj_t *obj) +{ + while (consp(obj)) + obj = cdr(obj); + + return (obj == nil) ? t : nil; +} + obj_t *length(obj_t *list) { long len = 0; @@ -616,9 +626,25 @@ obj_t *string(char *str) obj_t *mkstring(obj_t *len, obj_t *ch) { char *str = chk_malloc(c_num(len) + 1); + obj_t *s = string(str); memset(str, c_chr(ch), c_num(len)); str[c_num(len)] = 0; - return string(str); + s->st.len = len; + return s; +} + +obj_t *mkustring(obj_t *len) +{ + char *str = chk_malloc(c_num(len) + 1); + obj_t *s = string(str); + s->st.len = len; + return s; +} + +obj_t *init_str(obj_t *str, const char *data) +{ + memcpy(str->st.str, data, c_num(str->st.len)); + return str; } obj_t *copy_str(obj_t *str) @@ -639,10 +665,18 @@ obj_t *length_str(obj_t *str) return str->st.len; } -const char *c_str(obj_t *str) +const char *c_str(obj_t *obj) { - assert (str && str->t.type == STR); - return str->st.str; + assert (obj); + + switch (obj->t.type) { + case STR: + return obj->st.str; + case SYM: + return c_str(symbol_name(obj)); + default: + abort(); + } } obj_t *search_str(obj_t *haystack, obj_t *needle, obj_t *start_num, @@ -721,11 +755,19 @@ obj_t *cat_str(obj_t *list, obj_t *sep) obj_t *item = car(iter); if (!item) continue; - if (!stringp(item)) - return nil; - total += c_num(length_str(item)); - if (len_sep && cdr(iter)) - total += len_sep; + if (stringp(item)) { + total += c_num(length_str(item)); + if (len_sep && cdr(iter)) + total += len_sep; + continue; + } + if (chrp(item)) { + total += 1; + if (len_sep && cdr(iter)) + total += len_sep; + continue; + } + return nil; } str = chk_malloc(total + 1); @@ -735,9 +777,14 @@ obj_t *cat_str(obj_t *list, obj_t *sep) long len; if (!item) continue; - len = c_num(length_str(item)); - memcpy(ptr, c_str(item), len); - ptr += len; + if (stringp(item)) { + len = c_num(length_str(item)); + memcpy(ptr, c_str(item), len); + ptr += len; + } else { + *ptr++ = c_chr(item); + } + if (len_sep && cdr(iter)) { memcpy(ptr, c_str(sep), len_sep); ptr += len_sep; @@ -784,12 +831,41 @@ obj_t *chr(int ch) return obj; } +obj_t *chrp(obj_t *chr) +{ + return (chr && chr->st.type == CHR) ? t : nil; +} + int c_chr(obj_t *chr) { assert (chr && chr->t.type == CHR); return chr->ch.ch; } +obj_t *chr_str(obj_t *str, obj_t *index) +{ + long l = c_num(length_str(str)); + long i = c_num(index); + const char *s = c_str(str); + + assert (i < l); + + return chr(s[i]); +} + +obj_t *chr_str_set(obj_t *str, obj_t *index, obj_t *chr) +{ + long l = c_num(length_str(str)); + long i = c_num(index); + char *s = str->st.str; + + assert (i < l); + + s[i] = c_chr(chr); + + return chr; +} + obj_t *sym_name(obj_t *sym) { assert (sym && sym->t.type == SYM); @@ -1372,6 +1448,30 @@ obj_t *alist_remove(obj_t *list, obj_t *keys) return list; } +obj_t *alist_remove1(obj_t *list, obj_t *key) +{ + obj_t **plist = &list; + + while (*plist) { + if (eq(car(car(*plist)), key)) + *plist = cdr(*plist); + else + plist = cdr_l(*plist); + } + + return list; +} + +obj_t *copy_cons(obj_t *c) +{ + return cons(car(c), cdr(c)); +} + +obj_t *copy_alist(obj_t *list) +{ + return mapcar(func_n1(copy_cons), list); +} + obj_t *mapcar(obj_t *fun, obj_t *list) { list_collect_decl (out, iter); @@ -1512,9 +1612,11 @@ static void obj_init(void) some = intern(string(strdup("some"))); none = intern(string(strdup("none"))); maybe = intern(string(strdup("maybe"))); + cases = intern(string(strdup("cases"))); collect = intern(string(strdup("collect"))); until = intern(string(strdup("until"))); coll = intern(string(strdup("coll"))); + define = intern(string(strdup("define"))); output = intern(string(strdup("output"))); single = intern(string(strdup("single"))); frst = intern(string(strdup("first"))); @@ -1591,10 +1693,10 @@ void obj_print(obj_t *obj, FILE *out) case '\\': fputs("\\\\", out); break; case 27: fputs("\\e", out); break; default: - if (iscntrl(*ptr)) - fprintf(out, "\\%03o", (int) *ptr); - else + if (isprint(*ptr)) putc(*ptr, out); + else + fprintf(out, "\\%03o", (int) *ptr); } } putc('"', out); @@ -1617,10 +1719,10 @@ void obj_print(obj_t *obj, FILE *out) case '\\': fputs("\\\\", out); break; case 27: fputs("\\e", out); break; default: - if (iscntrl(ch)) - fprintf(out, "\\%03o", ch); - else + if (isprint(ch)) putc(ch, out); + else + fprintf(out, "\\%03o", ch); } putc('\'', out); } @@ -1683,6 +1785,7 @@ void init(const char *pn, void *(*oom)(void *, size_t), ? max(maybe_bottom_0, maybe_bottom_1) : min(maybe_bottom_0, maybe_bottom_1)); + uw_init(); obj_init(); } @@ -1721,7 +1824,7 @@ char *snarf_line(FILE *in) size = newsize; } - if (ch == '\n') { + if (ch == '\n' || ch == EOF) { buf[fill++] = 0; break; } @@ -159,8 +159,8 @@ extern obj_t *t, *cons_t, *str_t, *chr_t, *num_t, *sym_t, *fun_t, *vec_t; extern obj_t *stream_t, *lcons_t, *var, *regex, *set, *cset, *wild, *oneplus; extern obj_t *zeroplus, *optional, *compound, *or; extern obj_t *skip, *trailer, *block, *next, *fail, *accept; -extern obj_t *all, *some, *none, *maybe, *collect, *until, *coll; -extern obj_t *output, *single, *frst, *lst, *empty, *repeat, *rep; +extern obj_t *all, *some, *none, *maybe, *cases, *collect, *until, *coll; +extern obj_t *define, *output, *single, *frst, *lst, *empty, *repeat, *rep; extern obj_t *flattn, *forget, *mrge, *bind, *cat, *dir; extern obj_t *zero, *one, *two, *negone, *maxint, *minint; @@ -210,6 +210,7 @@ obj_t *consp(obj_t *obj); obj_t *nullp(obj_t *obj); obj_t *atom(obj_t *obj); obj_t *listp(obj_t *obj); +obj_t *proper_listp(obj_t *obj); obj_t *length(obj_t *list); obj_t *num(long val); long c_num(obj_t *num); @@ -226,6 +227,8 @@ obj_t *max2(obj_t *anum, obj_t *bnum); obj_t *min2(obj_t *anum, obj_t *bnum); obj_t *string(char *str); obj_t *mkstring(obj_t *len, obj_t *ch); +obj_t *mkustring(obj_t *len); /* must initialize immediately with init_str! */ +obj_t *init_str(obj_t *str, const char *); obj_t *copy_str(obj_t *str); obj_t *stringp(obj_t *str); obj_t *length_str(obj_t *str); @@ -239,7 +242,10 @@ obj_t *cat_str(obj_t *list, obj_t *sep); obj_t *trim_str(obj_t *str); obj_t *string_lt(obj_t *astr, obj_t *bstr); obj_t *chr(int ch); +obj_t *chrp(obj_t *str); int c_chr(obj_t *chr); +obj_t *chr_str(obj_t *str, obj_t *index); +obj_t *chr_str_set(obj_t *str, obj_t *index, obj_t *chr); obj_t *sym_name(obj_t *sym); obj_t *make_sym(obj_t *name); obj_t *intern(obj_t *str); @@ -282,6 +288,9 @@ void cobj_print_op(obj_t *, FILE *); /* Print function for struct cobj_ops */ obj_t *assoc(obj_t *list, obj_t *key); obj_t *acons_new(obj_t *list, obj_t *key, obj_t *value); obj_t *alist_remove(obj_t *list, obj_t *keys); +obj_t *alist_remove1(obj_t *list, obj_t *key); +obj_t *copy_cons(obj_t *cons); +obj_t *copy_alist(obj_t *list); obj_t *mapcar(obj_t *fun, obj_t *list); obj_t *mappend(obj_t *fun, obj_t *list); obj_t *sort(obj_t *list, obj_t *lessfun, obj_t *keyfun); @@ -21,7 +21,7 @@ .\"IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED .\"WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. -.TH txr 1 2009-09-09 "txr v. 013" "Text Extraction Utility" +.TH txr 1 2009-09-09 "txr v. 014" "Text Extraction Utility" .SH NAME txr \- text extractor .SH SYNOPSIS @@ -601,8 +601,9 @@ The general syntax of a directive is: @EXPR where expr is a parenthesized list of subexpressions. A subexpression -is an symbol, number, regular expression, or a parenthesized expression. -So, examples of valid directives are: +is an symbol, number, string literal, character literal, regular expression, or +a parenthesized expression. So, examples of syntactically valid directives +are: @(banana) @@ -610,11 +611,18 @@ So, examples of valid directives are: @( a (b (c d) (e ) )) + @("apple" 'b' 3) + @(a /[a-z]*/ b) A symbol is lexically the same thing as a variable and the same rules apply. Tokens that look like numbers are treated as numbers. +String and character literals are delimited by double and single quotes, +respectively, and may not span multiple lines. Character literals must contain +exactly one character. Character and numeric escapes may be used within +literals to escape the quotes, and to denote control characters. + Some directives are involved in structuring the overall syntax of the query. There are syntactic constraints that depend on the directive. For instance the @@ -630,7 +638,7 @@ Continue matching in another file. .IP @(block) The remaining query is treated as an anonymous or named block. Blocks may be referenced by @(accept) and @(fail) directives. -Blocks are discussed in the section Blocks below. +Blocks are discussed in the section BLOCKS below. .IP @(skip) Treat the remaining query as a subquery unit, and search the lines of @@ -651,7 +659,15 @@ Match some clauses in parallel. Each one must match. Match some clauses in parallel. None must match. .IP @(maybe) -Match some clauses in parallel. None must match. +Match some clauses in parallel, which may or may not match. +No failure occurs if none match. + +.IP @(cases) +Match some clauses sequentially, stopping if one of them +matches successfully. + +.IP @(define NAME ( ARGUMENTS ...)) +Introduces a function. Functions are discussed in the FUNCTIONS section below. .IP @(collect) Search the data for multiple matches of a clause. Collect the @@ -671,17 +687,17 @@ Separator of clauses for @(some), @(all), and @(none). Equivalent to @(and). Choice is stylistic. .IP @(end) -Required terminator for @(some), @(all), @(none), @(maybe), @(collect), -@(output), and @(repeat). +Required terminator for @(some), @(all), @(none), @(maybe), @(cases), +@(collect), @(output), and @(repeat). .IP @(fail) Terminate the processing of a block, as if it were a failed match. -Blocks are discussed in the section Blocks below. +Blocks are discussed in the section BLOCKS below. .IP @(accept) Terminate the processing of a block, as if it were a successful match. What bindings emerge may depend on the kind of block: collect -has special semantics. Blocks are discussed in the section Blocks below. +has special semantics. Blocks are discussed in the section BLOCKS below. .IP @(flatten) Normalizes a set of specified variables to one-dimensional lists. Those @@ -890,7 +906,7 @@ line "a dark". The @(some) clause combines the text line "it", and a @(none) clause which contains just one clause consisting of the line "was". -The semantics of the some, all, none and maybe directives is: +The semantics of the some, all, none, maybe and cases directives is: .IP @(all) Each of the clauses is matched at the current position. If any of the @@ -914,12 +930,20 @@ The directive succeeds even if all of the clauses fail. Whatever bindings are found in any of the clauses are retained. -When a @(some) or @(all) directive matches successfully, or a @(maybe) -directive matches something, the query advances by the greatest number of lines -matched in any of the subclauses. For instance if there are two subclauses, and -one of them matches three lines, but the other one matches five lines, then the -overall clause is considered to have made a five line match at its position. If -more directives follow, they begin matching five lines down from that position. +.IP @(cases) +The clauses are matched, in order, at the current position. +If any clause matches, the matching stops and the bindings +collected from that clause are retained. Any remaining clauses +after that one are not processed. If no clause matches, the +directive fails, and produces no bindings. + +When a @(some), @(all), or @(cases) directive matches successfully, or a +@(maybe) directive matches in at least one of its clauses, the query advances +by the greatest number of lines matched in any of the subclauses. For instance +if there are two subclauses, and one of them matches three lines, but the other +one matches five lines, then the overall clause is considered to have made a +five line match at its position. If more directives follow, they begin matching +five lines down from that position. .SS The Collect Directive @@ -1253,6 +1277,15 @@ to match B, or the bind fails. Matching means that either - A and B are lists and are either identical, or one is found as substructure within the other. +The right hand side does not have to be a variable. It may be some other +object, like a string, or list of strings, et cetera. For instance + + @(bind A "ab\tc") + +will bind the string "ab\tc" (the letter a, b, a tab character, and c) +to the variable A if A is unbound. If A is bound, this will fail unless +A already contains an identical string. + The left hand side of a bind can be a nested list pattern containing variables. The last item of a list at any nesting level can be preceded by a dot, which means that the variable matches the rest of the list from that position. @@ -1280,7 +1313,8 @@ The @(block NAME) directive introduces a named block, except when the name is the word nil. The @(block) directive introduces an unnamed block, equivalent to @(block nil). -The @(skip) and @(collect) directives introduce implicit anonymous blocks. +The @(skip) and @(collect) directives introduce implicit anonymous blocks, +as do function bodies. .SS Block Scope @@ -1384,12 +1418,12 @@ that block until this point emerge from that block. .IP @(accept) Immediately terminate the innermost enclosing anonymous block, as if -that block successfully mached. Any bindings established within +that block successfully matched. Any bindings established within that block until this point emerge from that block. If the implicit block introduced by @(skip) is terminated in this manner, this has the effect of causing the skip itself to succeed, as if -all of the trailing material succesfully matched. +all of the trailing material successfully matched. If the implicit block associated with a @(collect) is terminated this way, then the collection stops. All bindings collected in the current iteration of @@ -1544,6 +1578,253 @@ The second clause grabs four lines, which is the longest match. And so, the next line of input available for matching is 5, which goes to the @second variable. +.SH FUNCTIONS + +.SS Introduction + +.B txr +functions allow a query to be structured to avoid repetition. +On a theoretical note, because +.B txr +functions support recursion, functions enable txr to match some +kinds of patterns which exhibit self-embedding, or nesting, +and thus cannot be matched by a regular language. + +Functions in +.B txr +are not exactly like functions in mathematics or functional languages, and are +not like procedures in imperative programming languages. They are not exactly +like macros either. What it means for a +.B txr +function to take arguments and produce a result is different from +the conventional notion of a function. + +A +.B txr +function may have one or more parameters. When such a function is invoked, an +argument must be specified for each parameter. However, a special behavior is +at play here. Namely, some or all of the argument expressions may be unbound +variables. In that case, the corresponding parameters behave like unbound +variables also. Thus +.B txr +function calls can transmit the "unbound" state from argument to parameter. + +It should be mentioned that functions have access to all bindings that are +visible in the caller; functions may refer to variables which are not +mentioned in their parameter list. + +With regard to returning, +.B txr +functions are also unconventional. If the function fails, then the function +call is considered to have failed. The function call behaves like a kind of +match; if the function fails, then the call is like a failed match. + +When a function call succeeds, then the bindings emanating from that function +are processed specially. Firstly, any bindings for variables which do not +correspond to one of the function's parameters are thrown away. Functions may +internally bind arbitrary variables in order to get their job done, but only +those variables which are named in the function argument list may propagate out +of the function call. Thus, a function with no arguments can only indicate +matching success or failure, but not produce any bindings. Secondly, +variables do not propagate out of the function directly, but undergo +a renaming. For each parameter which went into the function as an unbound +variable (because its corresponding argument was an unbound variable), +if that parameter now has a value, that value is bound onto the corresponding +argument. + +Example: + + @(define collect_words (list)) + @(coll)@{list /[^ \t]/}@(end) + @(end) + +The above function "collect_words" contains a query which collects words from a +line (sequences of characters other than space or tab), into the list variable +called "list". This variable is named in the parameter list of the function, +therefore, its value, if it has one, is permitted to escape from the function +call. + +Suppose the input data is: + + Fine summer day + +and the function is called like this: + + @(collect_words wordlist) + +The result is: + + wordlist[0]=Fine + wordlist[1]=summer + wordlist[1]=day + +How it works is that in the function call @(collect_words wordlist), +"wordlist" is an unbound variable. The parameter corresponding to that +unbound variable is the parameter "list". Therefore, that parameter +is unbound over the body of the function. The function body collects the +words of "Fine summer day" into the variable "list", and then +yields the that binding. Then the function call completes by +noticing that the function parameter "list" now has a binding, and +that the corresponding argument "wordlist" has no binding. The binding +is thus transferred to the "wordlist" variable. After that, the +bindings produced by the function are thrown away. The only enduring +effects are: + +.IP - +the function matched and consumed some input; and + +.IP - +the function succeeded; and + +.IP - +the wordlist variable now has a binding. +.PP + +Another way to understand the parameter behavior is that function +parameters behave like proxies which represent their arguments. If an argument +is an established value, such as a character string or bound variable, the +parameter is a proxy for that value and behaves just like that value. If an +argument is an unbound variable, the function parameter acts as a proxy +representing that unbound variable. The effect of binding the proxy is +that the variable becomes bound, an effect which is settled when the +function goes out of scope. + +Within the function, both the original variable and the proxy are +visible simultaneously, and are independent. What if a function binds both of +them? Suppose a function has a parameter called P, which is called +with an argument A, and then in the function @A and @P are bound. This is +permitted, and they can even be bound to different values. However, when the +function terminates, the local binding of A simply disappears (because, +remember, the symbol A is not a member of the list of parameters). +Only the value bound to P emerges, and is bound to A, which still appears +unbound at that point. + +.SS Definition Syntax + +A function definition begins with a @(define ...) directive which must be the +only element in its line. The define must be followed by a symbol, which is the +name of the function being defined. After the symbol, there is a parenthesized +optional argument list. If there is no such list, or if the list is specified +as () or the symbol "nil" then the function has no parameters. Examples of +valid define syntax are: + + @(define foo) + @(define bar ()) + @(define match (a b c)) + +The define directive may be followed directly by the @(end) directive, +also on a line by itself, in which case the function has an empty body. +Or it may be followed by one or more query lines and then @(end). +What is between a @(define ...) and its matching @(end) constitutes the +function body. + +Functions may be nested within function bodies. Such local functions have +dynamic scope. They are visible in the function body in which they are defined, +and in any functions invoked from that body. + +The body of a function is an anonymous block. (See BLOCKS above). + +The following trivial function b produces no bindings and has a body which +simply matches the line "begin". + + @(define b) + begin + @(end) + +Thus the call: + + @(b) + +matches an input line "begin". + +.SS Call Syntax + +A function is invoked by compound directive whose first symbol is the name of +that function. Additional elements in the directive are the arguments. +Arguments may be symbols, or other objects like string and character +literals. + +Example: + + Query: @(define pair (a b)) + @a @b + @(end) + @(pair first second) + @(pair "ice" cream) + + Data: one two + ice milk + + Output: first="one" + second="two" + cream="milk" + +The first call to the function takes the line "one two". The parameter "a" +takes "one" and parameter b takes "two". These are rebound to the arguments +first and second. The second call to the function binds the a parameter +to the word "ice", and the b is unbound, because the +corresponding argument "cream" is unbound. Thus inside the function, @a +is forced to match "ice". Then a space is matched and @b collects the text +"milk". When the function returns, the unbound "cream" variable gets this value. + +If a symbol occurs multiple times in the argument list, it constrains +both parameters to bind to the same value. That is to say, all parameters +which, in the body of the function, bind a value, and which are all derived +from the same argument symbol must bind to the same value. This is settled when +the function terminates, not while it is matching. Example: + + Query: @(define pair (a b)) + @a @b + @(end) + @(pair same same) + + Data: one two + + Output: [query fails, prints "false"] + +.SS Nested Functions + +Function definitions may appear in a function. Such definitions +are visible in all functions which are invoked from the body +(and not necessarily enclosed in the body). In other words, the +scope is dynamic, not lexical. Inner definitions shadow outer +definitions. This means that a caller can redirect the function +calls that take place in a callee, by defining local functions +which capture the references. + +Example: + + Query: @(define which) + @ (fun) + @(end) + @(define fun) + @ (output) + toplevel fun! + @ (end) + @(end) + @(define callee) + @ (define fun) + @ (output) + local fun! + @ (end) + @ (end) + @ (which) + @(end) + @(callee) + @(which) + + Output: local fun! + toplevel fun! + +Here, the function "which" is defined which calls "fun". +A toplevel definition of "fun" is introduced which +outputs "toplevel fun!". Then, within the func +The function "callee" provides its own local definition +of "fun" which outputs "local fun!" before calling "which". +When callee is invoked, it calls @(which), whose @(fun) call is routed to +callee's local definition. When @(which) is called directly from the top +level, its @(fun) call goes to the toplevel definition. + .SH OUTPUT A @@ -30,10 +30,12 @@ #include <setjmp.h> #include <dirent.h> #include "lib.h" +#include "gc.h" #include "unwind.h" static uw_frame_t *uw_stack; static uw_frame_t *uw_exit_point; +static uw_frame_t toplevel_env; static void uw_unwind_to_exit_point() { @@ -49,9 +51,16 @@ static void uw_unwind_to_exit_point() case UW_BLOCK: longjmp(uw_stack->bl.jb, 1); break; + case UW_ENV: /* env frame cannot be exit point */ + abort(); + default: + abort(); } +} - abort(); +void uw_init(void) +{ + protect(&toplevel_env.ev.func_bindings, 0); } void uw_push_block(uw_frame_t *fr, obj_t *tag) @@ -63,6 +72,46 @@ void uw_push_block(uw_frame_t *fr, obj_t *tag) uw_stack = fr; } +static uw_frame_t *uw_find_env(void) +{ + uw_frame_t *fr; + + for (fr = uw_stack; fr != 0; fr = fr->uw.up) { + if (fr->uw.type == UW_ENV) + break; + } + + return fr ? fr : &toplevel_env; +} + +void uw_push_env(uw_frame_t *fr) +{ + uw_frame_t *prev_env = uw_find_env(); + fr->ev.type = UW_ENV; + + if (prev_env) { + fr->ev.func_bindings = copy_alist(prev_env->ev.func_bindings); + } else { + fr->ev.func_bindings = nil; + } + + fr->ev.up = uw_stack; + uw_stack = fr; +} + +obj_t *uw_get_func(obj_t *sym) +{ + uw_frame_t *env = uw_find_env(); + return cdr(assoc(env->ev.func_bindings, sym)); +} + +obj_t *uw_set_func(obj_t *sym, obj_t *value) +{ + uw_frame_t *env = uw_find_env(); + env->ev.func_bindings = acons_new(env->ev.func_bindings, sym, value); + return value; +} + void uw_pop_frame(uw_frame_t *fr) { assert (fr == uw_stack); @@ -27,7 +27,7 @@ typedef union uw_frame uw_frame_t; typedef enum uw_frtype uw_frtype_t; -enum uw_frtype { UW_BLOCK }; +enum uw_frtype { UW_BLOCK, UW_ENV }; struct uw_common { uw_frame_t *up; @@ -42,25 +42,46 @@ struct uw_block { jmp_buf jb; }; +struct uw_dynamic_env { + uw_frame_t *up; + uw_frtype_t type; + obj_t *func_bindings; +}; + union uw_frame { struct uw_common uw; struct uw_block bl; + struct uw_dynamic_env ev; }; +void uw_init(void); void uw_push_block(uw_frame_t *, obj_t *tag); +void uw_push_env(uw_frame_t *); +obj_t *uw_get_func(obj_t *sym); +obj_t *uw_set_func(obj_t *sym, obj_t *value); obj_t *uw_block_return(obj_t *tag, obj_t *result); void uw_pop_frame(uw_frame_t *); + #define uw_block_begin(TAG, RESULTVAR) \ obj_t *RESULTVAR = nil; \ { \ - uw_frame_t uw_fr; \ - uw_push_block(&uw_fr, TAG); \ - if (setjmp(uw_fr.bl.jb)) { \ - RESULTVAR = uw_fr.bl.result; \ + uw_frame_t uw_blk; \ + uw_push_block(&uw_blk, TAG); \ + if (setjmp(uw_blk.bl.jb)) { \ + RESULTVAR = uw_blk.bl.result; \ } else { #define uw_block_end \ } \ - uw_pop_frame(&uw_fr); \ + uw_pop_frame(&uw_blk); \ + } + +#define uw_env_begin \ + { \ + uw_frame_t uw_env; \ + uw_push_env(&uw_env) + +#define uw_env_end \ + uw_pop_frame(&uw_env); \ } |