diff options
-rw-r--r-- | ChangeLog | 430 | ||||
-rw-r--r-- | Makefile | 28 | ||||
-rw-r--r-- | dep.mk | 9 | ||||
-rw-r--r-- | depend.txr | 11 | ||||
-rw-r--r-- | gc.c | 43 | ||||
-rw-r--r-- | gc.h | 4 | ||||
-rw-r--r-- | lib.c | 571 | ||||
-rw-r--r-- | lib.h | 57 | ||||
-rw-r--r-- | match.c (renamed from extract.y) | 1029 | ||||
-rw-r--r-- | match.h | 27 | ||||
-rw-r--r-- | parser.h (renamed from extract.h) | 11 | ||||
-rw-r--r-- | parser.l (renamed from extract.l) | 495 | ||||
-rw-r--r-- | parser.y | 593 | ||||
-rw-r--r-- | regex.c | 17 | ||||
-rw-r--r-- | stream.c | 641 | ||||
-rw-r--r-- | stream.h | 48 | ||||
-rw-r--r-- | txr.1 | 419 | ||||
-rw-r--r-- | txr.c | 336 | ||||
-rw-r--r-- | txr.h | 33 | ||||
-rw-r--r-- | unwind.c | 235 | ||||
-rw-r--r-- | unwind.h | 92 |
21 files changed, 3712 insertions, 1417 deletions
@@ -1,3 +1,433 @@ +2009-10-14 Kaz Kylheku <kkylheku@gmail.com> + + Version 015 + + Code restructuring. + + Corruption bugfix in gc-debugging code. + + The nil symbol more properly implemented. + + Semantics change: collect treated as a failed match if it + does not collect anything. + + Bugfix in function argument reconciliation: must only + be done for unbound parameters. + + New @(local) directive (synonym of forget) for expressing + local variables in functions. + + Quasi-literals: backquote-delimited literals that contain interpolated + variables. Useful in next, output, bind and function calls. + + Hygiene: some implementation-inserted syntax tree elements + are now in their own namespace so they can't clash with user-defined + constructs. + + Rewritten streams implementation. + + Exception handling: try/catch/finally. + + Exceptions used internally and externally. + + File errors are mapped to exceptions now. + + Hash bang (#!) scripting supported. + + New -f paramater, allowing entire query to be specified + as argument rather than from a file or stdin. + + * txr.c: (version): Bump to 014. + * txr.1: Bump version to 014. More documentation about + exceptions. + +2009-10-14 Kaz Kylheku <kkylheku@gmail.com> + + Support for hash bang execution, and embedding query + in a command line argument. + + * txr.c (remove_hash_bang_line): New function. + (main): Added -f option. Initialize and gc-protect yyin_stream, and + use it in all places where yyin was previously set up. + Diagnose when -a, -D and -f are wrongly clumped with other options. + Remove the first line of the query if it starts with #!. + * parser.h (yyin): Declaration removed. + (yyin_stream): Declared. + * parser.l (YY_INPUT): Macro defined. + (yyin_stream): New global. + * stream.c (string_in_get_line, string_in_get_char): Bugfix: + wrong length function used. + (string_in_ops): Bugfix: wrong get_char function wired in. + (get_char): New function. + * stream.h (get_char): Declared. + * txr.1: -f option documented. + +2009-10-14 Kaz Kylheku <kkylheku@gmail.com> + + * lib.c (obj_print, obj_pprint): Print #<garbage ...> + syntax if an object has a bad type code; do not just return + without printing anything. + +2009-10-14 Kaz Kylheku <kkylheku@gmail.com> + + Code cleanup and documentation. + + * txr.1: Start documenting quasiliterals, exception handling and + nothrow in next and output. + * parser.y (catch_clauses_opt): Add missing empty production, so that + a try block doesn't have to have a finally clause. + * lib.h (or2, or3, or4): New macros. + * match.c (match_files): Allow output and next forms which just + have one argument that is nothrow, as documented. + * stream.c common_vformat, string_out_vcformat, string_out_vcformat, + make_string_output_stream, make_dir_stream, close_stream, get_line, + vformat, vcformat, format, cformat, put_string, put_cstring, + put_char): Switch to new style type assertions. + +2009-10-13 Kaz Kylheku <kkylheku@gmail.com> + + New syntax for next and output directives, taking advantage + of quasi-literals. Non-throwing behavior can be specified in + both using nothrow. The old syntax is supported, and has + the old semantics (non-throwing). Hence, the test cases + pass again without modification. + + File open errors thrown as file_error type. + + * lib.c (nothrow, file_error): New symbol globals. + (obj_init): New symbols interned. + * lib.h (nothrow, file_error): Declared. + * match.c (file_err): New function. + (eval_form): Bugfix: if input is nil, or an atom other than a symbol, + return the value hoisted into a cons. A nil return strictly means, + unbound variable. + (match_files): Support new syntax for next and and output. + Throw open errors as file_err. + * parser.l (grammar): Change how OUTPUT is returned to the + style similar to DEFINE, so interior forms can be parsed. + * parser.y (grammar): Fix up output_clause with new syntax. + * unwind.c (uw_throw): Do not abort on unhandled file_error, + but terminate with a failed status. + (uw_init): Register file_error as a subtype of error exception. + +2009-10-13 Kaz Kylheku <kkylheku@gmail.com> + + First cut at working try/catch/finally implementation. + + * lib.c (try, catch, finally): New symbol globals. + (obj_init): New symbols interned. + * lib.h (try, catch, finally: Declared. + * parser.y (TRY, CATCH, FINALLY): New tokens. + (try_clause, catch_clauses_opt): New nonterminal grammar symbols. + * parser.l (yybadtoken): TRY, CATCH and FINALLY handled. + (grammar): New cases for try, catch and finally. + * unwind.h (struct uw_catch): New member called visible. + (uw_continue): New parameter added. + (uw_exception_subtype_p): Declared. + (uw_catch_begin): Macro rewritten to use switch logic + around setjmp. + (uw_do_unwind, uw_catch, uw_unwind): New macros. + (uw_catch_end): Rewritten to close switch, and automatically + continue the unwinding if the block is entered as an unwind. + * unwind.c (uw_unwind_to_exit_point): Exception catching + frames made invisible via new flag prior to control passing to them. + longjmp code 2 introduced for distinguishing a catch from + an unwind. Visibility flag is checked and invisible frames + are skipped. + (uw_push_catch): cont member of the unwind frame initialized to zero. + (exception_subtype_p): Renamed to uw_exception_subtype_p, changed + to extern. Fixed wrong order of arguments to assoc. + (uw_throw): Honor visibility flag: do not consider invisible + catch frames. + (uw_register_subtype): sup/sub mixup bugfix. + (uw_continue): Takes extra argument: the continuation frame + that (re)establishes the exit point for the unwinding. + This allows nested unwinding action to take place in a finally, + and then to continue to the original exit point. + * match.c (match_files): Handling for try directive added. + +2009-10-13 Kaz Kylheku <kkylheku@gmail.com> + + * parser.l (yybadtoken): Bugfix: added missing LITCHAR case. + * unwind.h (internal_error): Fixed broken macro. + * match.c (match_line, match_files): sem_error bugfix: used %a instead + of ~a. + (match_files): Wrap block handler in compound statement, otherwise the + macroexpansion declares a variable in the middle of a statement, which + is a gcc extension to C90 (or a C99 feature, + but we aren't using C99). + +2009-10-08 Kaz Kylheku <kkylheku@gmail.com> + + Exception handling for query errors. + Verbose logging decoupled from yyerror functions. + Superior object-oriented formatting used for cleaner code. + + * lib.c (query_error): New symbol global. + (obj_init): New symbol interned. + * lib.h (query_error): Declared. + * match.c (output_produced): Variable changed to external linkage. + (debugf, debuglf, debuglcf, sem_error): New static functions. + (dest_bind, match_line, match_files): Regtargetted away from + the yyerrorf and yyerrorlf functions to use debugf, + debuglf, debuglcf for logging and sem_error for throwing + query errors as exceptions. + * parser.h (spec_file_str): New global declared. + * parser.l (yyerror): Calls yyerrorf instead of yyerrorlf; + lets yyerrorf increment error count. + (yyerrorf): Loses level argument. + (yyerrorlf): Function removed. + (yybadtoken): Retargetted from yyerrorlf to yyerrorf. + (grammar): yyerrorf call fixed up. + * txr.c (spec_file_str): New global defined. + (main): Protects new global against gc, and initializes it. + * unwind.c (uw_throw): If an unhandled exception is of + type query_error, it results in an exit rather than abort. + The false string is conditionally printed. + (uw_init): Register query_error as subtype of error. + +2009-10-08 Kaz Kylheku <kkylheku@gmail.com> + + Exception handling framework implemented. + + * lib.c (cobj_t, error, type_error, internal_err, numeric_err, + range_err): New symbol globals. + (prog_string): New string global. + (code2type): New static function. + (typeof): Rewritten using code2type. + (type_check, type_check2): New static functions. + (car, cdr, list, plus, minus, length_str, chr_p, chr_str, + chr_str_set, apply, funcall, funcall1, funcall2, + vec_get_fill, vecref_l, lazy_stream_cons): Checks and + assertions rewritten using new functions and macros. + (obj_init): prog_string protected from gc. + New symbols interned. + (init): uw_init() call moved after obj_init() because + it needs stable symbols. + * lib.h (cobj_t, error, type_error, internal_err, numeric_err, + range_err, prog_string, type_check, type_check2): Declared. + * match.c (dump_var, complex_snarf, complex_close): abort + calls rewritten to use exception handling. + * regex.c (nfa_all_states, nfa_closure, nfa_move): Likewise. + * stream.c (string_out_vcformat): Bugfix: fill index not updated. + (make_string_output_stream): Bugfix: initial buffer not null terminated. + (get_string_from_stream): New function. + * stream.h (get_string_from_stream): Declared. + * txr.c (main): Some error prints turned to throws. + * unwind.c (unwind_to_exit_point): Supports UW_CATCH frames, + whose finalization logic has to be invoked during unwinding, + and as target exit points. + (uw_init): Installs exception symbols into + subtyping hirearchy. + (uw_push_catch, exception_subtype_p, uw_throw, uw_throwf, + uw_errorf, uw_throwcf, uw_errorcf, type_mismatch, + uw_register_subtype, uw_continue): New functions. + (exception_subtypes): New static global. + * unwind.h (noreturn): New macro, conditionally defined on __GNUC__. + (enum uw_frtype): New member, UW_CATCH. + (struct uw_catch): New struct type. + (union uw_frame): New member, ca. + (uw_push_catch, exception_subtype_p, uw_throw, uw_throwf, + uw_errorf, uw_throwcf, uw_errorcf, type_mismatch, + uw_register_subtype, uw_continue): New functions declared. + (uw_catch_begin, uw_catch_end, internal_error, type_assert, + bug_unless, numeric_assert, range_bug_unless): New macros. + +2009-10-07 Kaz Kylheku <kkylheku@gmail.com> + + Rewritten streams implementation. + + * stream.h, stream.c: New files. + * Makefile (OBJS): New object file stream.o. + * dep.mk: Dependencies updated. + * gc.c (finalize): STREAM case removed. Call destroy only if not null. + (mark_obj): STREAM case removed. + * lib.c (push, pop): New functions. + (equal): STREAM case removed. + (sub_str): Allow from parameter to be nil, defaulting to zero. + (stdio_line_read, stdio_line_write, stdio_close, stdio_line_stream, + pipe_close, pipe_line_stream, dirent_read, dirent_close, + dirent_stream, stream_get, stream_pushback, stream_put, + stream_close): Functions removed. + (stream_ops dirent_stream_ops, stdio_line_stream_ops, + struct stream_ops, pipe_line_stream_op): Static structs removed. + (lazy_stream_func, lazy_stream_cons): Retargetted to new streams. + (cobj_print_op): Likewise. + (init): Disables and restores GC, instead of doing it in obj_init. + (obj_print): Retargetted to new streams. + (obj_pprint): New function. + (obj_init): Does not manipulate gc_state any more, moved to init. + Call to stream_init added. + (d, snarf): Retargetted to new streams. + (snarf_line): Removed, now appears in stream.c, retargetted + to new streams. + * lib.h (enum type): STREAM removed. + (struct stream, struct stream_ops): Removed. + (struct cobj_ops): Retargetted to new streams. + (union obj): sm member removed. + (push, pop, obj_pprint): Declared. + (stdio_line_stream, pipe_line_stream, dirent_stream, stream_get, + stream_pushback, stream_put, stream_close, snarf_line): Removed. + (cobj_print_op, dump, snarf): Modified. + * match.c (dump_bindings, complex_snarf): Retargetted to new streams. + * txr.c (main): format used to dump bindings and specs in verbose mode. + +2009-10-07 Kaz Kylheku <kkylheku@gmail.com> + + Implemented quasi-literals: string literals which may + contain variables to be interpolated. + + Also, took care of a hygiene problem with respect to some + parser-generated forms, which must be invisible to the user. + + * Makefile (LEX_DB_FLAGS): New variable; helpful + in generating a lexical analyzer with debug tracing. + * parser.l (nesting, closechar): Static variables removed. + (char_esc): Add \` escape for quasi-literals. + (stack): New %option, to generate a scanner which has + a start condition stack. + (QSILIT): New start condition. + (grammar): Refactored to use start condition stacks. + Quasi-literal lexical analysis added. + * parser.y (lit_char_helper): New function, for factoring out + some common logic between string literals and quasi literals. + (quasilit, quasi_item, quasi_items): New grammar symbols and + production rules. + (strlit): Rule shortened with new helper function. + Bugfix: error case assigns nil to $$. + (chrlist): Bugfix: error case assigns nil to $$. + (LITCHAR): Added to %prec table to fix shift-reduce problem. + (expr): Production now can generate a quasilit. + * lib.c (quasi): New symbol global. + (obj_init): Intern quasi as "$quasi", so the user can + make a function called quasi. Also, var and regex are now interned + with the names "$var" and "$regex" for the same reason. + * lib.h (quasi): Declared. + * match.c (eval_form): Rewritten with recursive processing + to handle deeply embedded variables, as well as quasi-strings. + (subst_vars): Handles quasi-strings. + (match_files): Function calls now use eval_form for function + argument evaluation, except of course in the special case that if an + argument is a symbol, it may be unbound. + +2009-10-06 Kaz Kylheku <kkylheku@gmail.com> + + * match.c (match_files): No error message for merging to + a symbol which is already bound; the existing behavior + is to destructively update the binding, which is useful, + and so the error is pointless. + +2009-10-06 Kaz Kylheku <kkylheku@gmail.com> + + Introduce local as synonym to forget. It does exactly the + same thing; a previous binding is forgotten. This spelling + is nicer for functions. + * lib.h (local): Declared. + * lib.c (local): Defined. + (obj_init): New symbol interned. + +2009-10-06 Kaz Kylheku <kkylheku@gmail.com> + + Bugfix: function parameter reconciliation (after function call + completes) must only consider the unbound parameters. + Otherwise false mismatches result if the function destructively + manipulated some bindings of bound parameters. + E.g. @(define foo (a)) is called as @(foo "bar") and internally + it rebinds bound parameter a to "baz". This situation is + not a mismatch. The rebinding is thrown away. + + * match.c (match_files): When processing a function call, + keep an alist which associates arguments and unbound parameters. + Then, after the function call, process the alist, rather + than the full parameter list. + +2009-10-06 Kaz Kylheku <kkylheku@gmail.com> + + Semantics change: collect fails if it does not collect + anything. Non-failing behavior can be obtained by + wrapping with @(maybe) (but no such workaround for coll yet). + + * match.c (match_line): Return nil if coll collected nothing. + (match_files): Return nil if collect collected nothing. + + +2009-10-06 Kaz Kylheku <kkylheku@gmail.com> + + Bugfix: nil must be on the list of interned symbols. + + * lib.c (sym_name): Function removed. This was like + symbol_name but did not accept nil. + (intern): Use symbol_name instead of sym_name, allowing + nil to be on the list of interned symbols. + (obj_init): Add nil to interned_syms list. + (nil_string): Changed from "NIL" to "nil". + * match.c (dest_bind): Treat nil as a value, not a symbol. + (match_files): Treat nil as a value when it's + a function argument. + +2009-10-06 Kaz Kylheku <kkylheku@gmail.com> + + * gc.c (more): Bugfix: free_tail was incorectly calculated, + thereby destroying the validity of the FIFO recycling algorithm + used when GC debugging is enabled. This showed up as mysterious + assertions and crashes. + (mark_obj): Do not abort if a free object is marked. + (mark_mem_region): Renamed bottom and top variables to low + and high. The naming was confusing inverted relative + to that in the caller. + (sweep): Abort if somehow a block is free and marked reachable. + +2009-10-06 Kaz Kylheku <kkylheku@gmail.com> + + * match.c (match_files): Fixed nonexitent symbol warning for merge + directive (complained about wrong symbol). + +2009-10-05 Kaz Kylheku <kkylheku@gmail.com> + + Refactoring matching code. + + * lib.h (cobj_ops): New function pointer, mark. + * gc.c (mark_obj): For a COBJ type, call the mark function + if the pointer is non-null. + (gc_mark): New public function, wrapper that calls + the private mark_obj. Implementations of mark for COBJ + objects will need to call this. + * gc.h (mark_obj): Declared. + * regex.c (regex_obj_ops): Explicitly initialize mark function pointer + to null. + +2009-10-05 Kaz Kylheku <kkylheku@gmail.com> + + Code restructuring. + + * Makefile (match.o): New object file. + (depend): New rule for generating dep.mk, using txr. + (lib.o, lex.yy.o, regex.o, y.tab.o unwind.o, txr.o, match.o, gc.o): + Dependency rules removed. + * dep.mk: New make include file; captures dependencies. Generated + by new depend rule in Makefile, using txr. + * depend.txr: Txr query to generate dependencies. + * extract.y: File renamed to parser.y + (output_produced): Variable removed, + moved into new file match.c. + (dump_shell_string, dump_shell_string, dump_var, dump_bindings, depth, + weird_merge, map_leaf_lists, dest_bind, eval_form, match_line, + format_field, subs_vars, complex_open, complex_open_failed, + complex_close, complex_snarf, robust_length, bind_car, bind_cdr, + extract_vars, extract_bindings, do_output_line, do_output, + match_files, extract): Functions removed, added to match.c. + (struct fpip): Definition removed, added to match.c + (<stdlib.h>, <string.h>, <ctype.h>, <errno.h>, <setjmp.h>, + "gc.h", "unwind.h"): Unneeded headers removed. + * match.c: New file. + * extract.l: Renamed to parser.l. + * extract.h: Renamed to parser.h. + (opt_loglevel, opt_nobindings, opt_arraydims, version, progname): + Declarations moved to txr.h. + (extract): Dclaration moved to match.h. + * txr.h, match.h: New headers. + * gc.h (opt_gc_debug): Moved to txr.h. + 2009-10-03 Kaz Kylheku <kkylheku@gmail.com> Version 014 @@ -25,40 +25,36 @@ # Test data in the tests/ directory is in the public domain, # unless it contains notices to the contrary. + OPT_FLAGS := -O2 LANG_FLAGS := -ansi -D_GNU_SOURCE DIAG_FLAGS := -Wall DBG_FLAGS := -g +LEX_DBG_FLAGS := TXR_DBG_OPTS := --gc-debug LEXLIB := fl CFLAGS := $(LANG_FLAGS) $(DIAG_FLAGS) $(OPT_FLAGS) $(DBG_FLAGS) -txr: lex.yy.o y.tab.o lib.o regex.o gc.o unwind.o +OBJS := txr.o lex.yy.o y.tab.o match.o lib.o regex.o gc.o unwind.o stream.o +txr: $(OBJS) $(CC) $(CFLAGS) -o $@ $^ -l$(LEXLIB) -lex.yy.o y.tab.o: y.tab.h extract.h lib.h gc.h - -y.tab.o: regex.h - -lib.o: lib.h gc.h - -regex.o: regex.h lib.h gc.h +-include dep.mk -gc.o: gc.h lib.h gc.h +lex.yy.c: parser.l + $(LEX) $(LEX_DBG_FLAGS) $< -unwind.o: unwind.h lib.h - -lex.yy.c: extract.l - $(LEX) $< - -y.tab.c y.tab.h: extract.y +y.tab.c y.tab.h: parser.y if $(YACC) -v -d $< ; then true ; else rm $@ ; false ; fi clean: - rm -f txr lex.yy.o y.tab.o lib.o regex.o gc.o unwind.o \ + rm -f txr $(OBJS) \ y.tab.c lex.yy.c y.tab.h y.output $(TESTS:.ok=.out) +depend: txr + ./txr depend.txr > dep.mk + TESTS := $(patsubst %.txr,%.ok,$(shell find tests -name '*.txr' | sort)) tests: txr $(TESTS) @@ -0,0 +1,9 @@ +lib.o: lib.h gc.h unwind.h stream.h +lex.yy.o: y.tab.h lib.h gc.h parser.h +regex.o: lib.h unwind.h regex.h +y.tab.o: lib.h regex.h parser.h +unwind.o: lib.h gc.h stream.h unwind.h +txr.o: lib.h stream.h gc.h unwind.h parser.h match.h txr.h +match.o: lib.h gc.h unwind.h regex.h stream.h parser.h txr.h match.h +stream.o: lib.h gc.h stream.h +gc.o: lib.h stream.h txr.h gc.h diff --git a/depend.txr b/depend.txr new file mode 100644 index 00000000..7fa2183c --- /dev/null +++ b/depend.txr @@ -0,0 +1,11 @@ +@(next)$. +@(collect) +@file.c +@(next)@file.c +@(collect) +#include "@header" +@(end) +@(output) +@file.o:@(rep) @header@(end) +@(end) +@(end) @@ -31,6 +31,8 @@ #include <setjmp.h> #include <dirent.h> #include "lib.h" +#include "stream.h" +#include "txr.h" #include "gc.h" #define PROT_STACK_SIZE 1024 @@ -102,13 +104,15 @@ static void more() heap_t *heap = (heap_t *) chk_malloc(sizeof *heap); obj_t *block = heap->block, *end = heap->block + HEAP_SIZE; + assert (free_list == 0); + while (block < end) { block->t.next = free_list; block->t.type = FREE; free_list = block++; } - free_tail = &block[-1].t.next; + free_tail = &heap->block[0].t.next; heap->next = heap_list; heap_list = heap; @@ -161,13 +165,11 @@ static void finalize(obj_t *obj) obj->v.vec = 0; } break; - case STREAM: - stream_close(obj); - break; case LCONS: break; case COBJ: - obj->co.ops->destroy(obj); + if (obj->co.ops->destroy) + obj->co.ops->destroy(obj); break; default: assert (0 && "corrupt type field"); @@ -224,9 +226,6 @@ static void mark_obj(obj_t *obj) mark_obj(obj->v.vec[i]); } break; - case STREAM: - mark_obj(obj->sm.label_pushback); - break; case LCONS: mark_obj(obj->lc.car); mark_obj(obj->lc.cdr); @@ -234,6 +233,8 @@ static void mark_obj(obj_t *obj) break; case COBJ: mark_obj(obj->co.cls); + if (obj->co.ops->mark) + obj->co.ops->mark(obj); break; default: assert (0 && "corrupt type field"); @@ -253,22 +254,22 @@ static int in_heap(obj_t *ptr) return 0; } -static void mark_mem_region(obj_t **bottom, obj_t **top) +static void mark_mem_region(obj_t **low, obj_t **high) { - if (bottom > top) { - obj_t **tmp = top; - top = bottom; - bottom = tmp; + if (low > high) { + obj_t **tmp = high; + high = low; + low = tmp; } - while (bottom < top) { - obj_t *maybe_obj = *bottom; + while (low < high) { + obj_t *maybe_obj = *low; if (in_heap(maybe_obj)) { type_t t = maybe_obj->t.type; if ((t & FREE) == 0) mark_obj(maybe_obj); } - bottom++; + low++; } } @@ -300,6 +301,9 @@ static void sweep(void) block < end; block++) { + if ((block->t.type & (REACHABLE | FREE)) == (REACHABLE | FREE)) + abort(); + if (block->t.type & REACHABLE) { block->t.type &= ~REACHABLE; continue; @@ -310,7 +314,7 @@ static void sweep(void) if (0 && dbg) { fprintf(stderr, "%s: finalizing: ", progname); - obj_print(block, stderr); + obj_print(block, std_error); putc('\n', stderr); } finalize(block); @@ -356,6 +360,11 @@ void gc_init(obj_t **stack_bottom) gc_stack_bottom = stack_bottom; } +void gc_mark(obj_t *obj) +{ + mark_obj(obj); +} + /* * Useful functions for gdb'ing. */ @@ -23,9 +23,6 @@ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ - -extern int opt_gc_debug; - void gc_init(obj_t **stack_bottom); obj_t *prot1(obj_t **loc); void rel1(obj_t **loc); @@ -34,3 +31,4 @@ void release(obj_t **, ...); obj_t *make_obj(void); void gc(void); int gc_state(int); +void gc_mark(obj_t *); @@ -36,6 +36,7 @@ #include "lib.h" #include "gc.h" #include "unwind.h" +#include "stream.h" #define max(a, b) ((a) > (b) ? (a) : (b)) #define min(a, b) ((a) < (b) ? (a) : (b)) @@ -43,12 +44,15 @@ obj_t *interned_syms; obj_t *null, *t, *cons_t, *str_t, *chr_t, *num_t, *sym_t, *fun_t, *vec_t; -obj_t *stream_t, *lcons_t, *var, *regex, *set, *cset, *wild, *oneplus; -obj_t *zeroplus, *optional, *compound, *or; +obj_t *stream_t, *lcons_t, *cobj_t, *var, *regex, *set, *cset, *wild, *oneplus; +obj_t *zeroplus, *optional, *compound, *or, *quasi; obj_t *skip, *trailer, *block, *next, *fail, *accept; obj_t *all, *some, *none, *maybe, *cases, *collect, *until, *coll; obj_t *define, *output, *single, *frst, *lst, *empty, *repeat, *rep; -obj_t *flattn, *forget, *mrge, *bind, *cat, *dir; +obj_t *flattn, *forget, *local, *mrge, *bind, *cat, *dir; +obj_t *try, *catch, *finally, *nothrow; +obj_t *error, *type_error, *internal_err, *numeric_err, *range_err; +obj_t *query_error, *file_error; obj_t *zero, *one, *two, *negone, *maxint, *minint; obj_t *null_string; @@ -59,6 +63,8 @@ obj_t *identity_f; obj_t *equal_f; const char *progname; +obj_t *prog_string; + void *(*oom_realloc)(void *, size_t); @@ -75,11 +81,9 @@ static obj_t *identity_tramp(obj_t *env, obj_t *obj) static obj_t *equal_tramp(obj_t *env, obj_t *, obj_t *); -obj_t *typeof(obj_t *obj) +static obj_t *code2type(int code) { - if (obj == nil) - return null; - switch (obj->t.type) { + switch (code) { case CONS: return cons_t; case STR: return str_t; case CHR: return chr_t; @@ -87,11 +91,39 @@ obj_t *typeof(obj_t *obj) case SYM: return sym_t; case FUN: return fun_t; case VEC: return vec_t; - case STREAM: return stream_t; case LCONS: return lcons_t; - case COBJ: return obj->co.cls; + case COBJ: return cobj_t; } - assert (0 && "corrupt type field"); + return nil; +} + +obj_t *typeof(obj_t *obj) +{ + if (obj == nil) { + return null; + } else if (obj->t.type == COBJ) { + return obj->co.cls; + } else { + obj_t *type = code2type(obj->t.type); + if (!type) + internal_error("corrupt type field"); + return type; + } +} + +obj_t *type_check(obj_t *obj, int type) +{ + if (!obj || obj->t.type != type) + type_mismatch("~s is not of type ~s", obj, code2type(type), nao); + return t; +} + +obj_t *type_check2(obj_t *obj, int t1, int t2) +{ + if (!obj || (obj->t.type != t1 && obj->t.type != t2)) + type_mismatch("~s is not of type ~s or ~s", obj, + code2type(t1), code2type(t2), nao); + return t; } obj_t *car(obj_t *cons) @@ -110,7 +142,7 @@ obj_t *car(obj_t *cons) return cons->lc.car; } default: - assert (0 && "corrupt type field"); + type_mismatch("~s is not a cons", cons, nao); } } @@ -130,7 +162,7 @@ obj_t *cdr(obj_t *cons) return cons->lc.cdr; } default: - assert (0 && "corrupt type field"); + type_mismatch("~s is not a cons", cons, nao); } } @@ -143,7 +175,7 @@ obj_t **car_l(obj_t *cons) funcall1(cons->lc.func, cons); return &cons->lc.car; default: - assert (0 && "corrupt type field"); + type_mismatch("~s is not a cons", cons, nao); } } @@ -156,7 +188,7 @@ obj_t **cdr_l(obj_t *cons) funcall1(cons->lc.func, cons); return &cons->lc.cdr; default: - assert (0 && "corrupt type field"); + type_mismatch("~s is not a cons", cons, nao); } } @@ -202,6 +234,18 @@ obj_t **tail(obj_t *cons) return cdr_l(cons); } +obj_t *pop(obj_t **plist) +{ + obj_t *ret = car(*plist); + *plist = cdr(*plist); + return ret; +} + +obj_t *push(obj_t *val, obj_t **plist) +{ + return *plist = cons(val, *plist); +} + obj_t *copy_list(obj_t *list) { list_collect_decl (out, tail); @@ -407,16 +451,13 @@ obj_t *equal(obj_t *left, obj_t *right) return t; } return nil; - case STREAM: - return nil; /* Different stream objects never equal. */ case COBJ: if (right->t.type == COBJ) return left->co.ops->equal(left, right); return nil; } - assert (0 && "notreached"); - return nil; + internal_error("unhandled case in equal function"); } static obj_t *equal_tramp(obj_t *env, obj_t *left, obj_t *right) @@ -473,7 +514,7 @@ obj_t *list(obj_t *first, ...) do { *ptr++ = next; if (ptr == array + 32) - abort(); + internal_error("runaway arguments in list function"); next = va_arg(vl, obj_t *); } while (next != nao); @@ -536,7 +577,7 @@ obj_t *num(long val) long c_num(obj_t *num) { - assert (num && num->t.type == NUM); + type_check(num, NUM); return num->n.val; } @@ -550,8 +591,8 @@ obj_t *plus(obj_t *anum, obj_t *bnum) long a = c_num(anum); long b = c_num(bnum); - assert (a <= 0 || b <= 0 || LONG_MAX - b >= a); - assert (a >= 0 || b >= 0 || LONG_MIN - b >= a); + numeric_assert (a <= 0 || b <= 0 || LONG_MAX - b >= a); + numeric_assert (a >= 0 || b >= 0 || LONG_MIN - b >= a); return num(a + b); } @@ -561,9 +602,9 @@ obj_t *minus(obj_t *anum, obj_t *bnum) long a = c_num(anum); long b = c_num(bnum); - assert (b != LONG_MIN || LONG_MIN == -LONG_MAX); - assert (a <= 0 || -b <= 0 || LONG_MAX + b >= a); - assert (a >= 0 || -b >= 0 || LONG_MIN + b >= a); + numeric_assert (b != LONG_MIN || LONG_MIN == -LONG_MAX); + numeric_assert (a <= 0 || -b <= 0 || LONG_MAX + b >= a); + numeric_assert (a >= 0 || -b >= 0 || LONG_MIN + b >= a); return num(a - b); } @@ -659,7 +700,7 @@ obj_t *stringp(obj_t *str) obj_t *length_str(obj_t *str) { - assert (str && str->t.type == STR); + type_check (str, STR); if (!str->st.len) str->st.len = num(strlen(str->st.str)); return str->st.len; @@ -667,7 +708,7 @@ obj_t *length_str(obj_t *str) const char *c_str(obj_t *obj) { - assert (obj); + type_check2(obj, STR, SYM); switch (obj->t.type) { case STR: @@ -721,7 +762,7 @@ obj_t *sub_str(obj_t *str_in, obj_t *from_num, obj_t *to_num) { const char *str = c_str(str_in); size_t len = c_num(length_str(str_in)); - long from = c_num(from_num); + long from = from_num ? c_num(from_num) : 0; long to = to_num ? c_num(to_num) : len; if (to < 0) @@ -838,7 +879,7 @@ obj_t *chrp(obj_t *chr) int c_chr(obj_t *chr) { - assert (chr && chr->t.type == CHR); + type_check(chr, CHR); return chr->ch.ch; } @@ -848,7 +889,7 @@ obj_t *chr_str(obj_t *str, obj_t *index) long i = c_num(index); const char *s = c_str(str); - assert (i < l); + bug_unless (i < l); return chr(s[i]); } @@ -859,17 +900,18 @@ obj_t *chr_str_set(obj_t *str, obj_t *index, obj_t *chr) long i = c_num(index); char *s = str->st.str; - assert (i < l); + bug_unless (i < l); s[i] = c_chr(chr); return chr; } -obj_t *sym_name(obj_t *sym) +obj_t *symbol_name(obj_t *sym) { - assert (sym && sym->t.type == SYM); - return sym->s.name; + if (sym) + type_check(sym, SYM); + return sym ? sym->s.name : nil_string; } obj_t *make_sym(obj_t *name) @@ -887,7 +929,7 @@ obj_t *intern(obj_t *str) for (iter = interned_syms; iter != nil; iter = cdr(iter)) { obj_t *sym = car(iter); - if (equal(sym_name(sym), str)) + if (equal(symbol_name(sym), str)) return sym; } @@ -900,12 +942,6 @@ obj_t *symbolp(obj_t *sym) return (sym == nil || sym->s.type == SYM) ? t : nil; } -obj_t *symbol_name(obj_t *sym) -{ - assert (sym == nil || sym->t.type == SYM); - return sym ? sym->s.name : nil_string; -} - obj_t *func_f0(obj_t *env, obj_t *(*fun)(obj_t *)) { obj_t *obj = make_obj(); @@ -1010,8 +1046,10 @@ obj_t *apply(obj_t *fun, obj_t *arglist) { obj_t *arg[4], **p = arg; - assert (fun && fun->f.type == FUN); - assert (arglist == nil || consp(arglist)); + type_check (fun, FUN); + + type_assert (listp(arglist), + ("apply arglist ~s is not a list", arglist, nao)); *p++ = car(arglist); arglist = cdr(arglist); *p++ = car(arglist); arglist = cdr(arglist); @@ -1040,15 +1078,15 @@ obj_t *apply(obj_t *fun, obj_t *arglist) case N4: return fun->f.f.n4(arg[0], arg[1], arg[2], arg[3]); case FINTERP: - abort(); + internal_error("unsupported function type"); } - assert (0 && "bad functype"); + internal_error("corrupt function type field"); } obj_t *funcall(obj_t *fun) { - assert (fun && fun->f.type == FUN); + type_check(fun, FUN); switch (fun->f.functype) { case F0: @@ -1062,7 +1100,7 @@ obj_t *funcall(obj_t *fun) obj_t *funcall1(obj_t *fun, obj_t *arg) { - assert (fun && fun->f.type == FUN); + type_check(fun, FUN); switch (fun->f.functype) { case F1: @@ -1076,7 +1114,7 @@ obj_t *funcall1(obj_t *fun, obj_t *arg) obj_t *funcall2(obj_t *fun, obj_t *arg1, obj_t *arg2) { - assert (fun && fun->f.type == FUN); + type_check(fun, FUN); switch (fun->f.functype) { case F2: @@ -1147,13 +1185,13 @@ obj_t *vector(obj_t *alloc) obj_t *vec_get_fill(obj_t *vec) { - assert (vec && vec->v.type == VEC); + type_check(vec, VEC); return vec->v.vec[vec_fill]; } obj_t *vec_set_fill(obj_t *vec, obj_t *fill) { - assert (vec && vec->v.type == VEC); + type_check(vec, VEC); { long new_fill = c_num(fill); @@ -1185,8 +1223,8 @@ obj_t *vec_set_fill(obj_t *vec, obj_t *fill) obj_t **vecref_l(obj_t *vec, obj_t *ind) { - assert (vec && vec->v.type == VEC); - assert (c_num(ind) < c_num(vec->v.vec[vec_fill])); + type_check(vec, VEC); + range_bug_unless (c_num(ind) < c_num(vec->v.vec[vec_fill])); return vec->v.vec + c_num(ind); } @@ -1198,160 +1236,6 @@ obj_t *vec_push(obj_t *vec, obj_t *item) return fill; } - -static obj_t *stdio_line_read(struct stream *sm) -{ - if (sm->handle == 0) { - return nil; - } else { - char *line = snarf_line((FILE *) sm->handle); - - if (!line) - return nil; - - return string(line); - } -} - -static obj_t *stdio_line_write(struct stream *sm, obj_t *obj) -{ - assert (obj->t.type == STR); - if (sm->handle == 0) - return nil; - if (fputs(c_str(obj), (FILE *) sm->handle) == EOF) - return nil; - if (putc('\n', (FILE *) sm->handle) == EOF) - return nil; - return t; -} - -static obj_t *stdio_close(struct stream *sm) -{ - FILE *f = (FILE *) sm->handle; - - if (f != 0 && f != stdin && f != stdout) { - fclose((FILE *) sm->handle); - sm->handle = 0; - return t; - } - return nil; -} - -static struct stream_ops stdio_line_stream_ops = { - stdio_line_read, stdio_line_write, stdio_close -}; - -obj_t *stdio_line_stream(FILE *f, obj_t *label) -{ - obj_t *sm = make_obj(); - sm->sm.type = STREAM; - sm->sm.handle = f; - sm->sm.ops = &stdio_line_stream_ops; - sm->sm.label_pushback = label; - assert (atom(label)); - return sm; -} - -static obj_t *pipe_close(struct stream *sm) -{ - if (sm->handle != 0) { - pclose((FILE *) sm->handle); - sm->handle = 0; - return t; - } - return nil; -} - -static struct stream_ops pipe_line_stream_ops = { - stdio_line_read, stdio_line_write, pipe_close -}; - -obj_t *pipe_line_stream(FILE *f, obj_t *label) -{ - obj_t *sm = make_obj(); - sm->sm.type = STREAM; - sm->sm.handle = f; - sm->sm.ops = &pipe_line_stream_ops; - sm->sm.label_pushback = label; - assert (atom(label)); - return sm; -} - -obj_t *dirent_read(struct stream *sm) -{ - if (sm->handle == 0) { - return nil; - } else { - for (;;) { - struct dirent *e = readdir(sm->handle); - if (!e) - return nil; - if (!strcmp(e->d_name, ".") || !strcmp(e->d_name, "..")) - continue; - return string(chk_strdup(e->d_name)); - } - } -} - -obj_t *dirent_close(struct stream *sm) -{ - if (sm->handle != 0) { - closedir((DIR *) sm->handle); - sm->handle = 0; - return t; - } - - return nil; -} - -static struct stream_ops dirent_stream_ops = { - dirent_read, 0, dirent_close -}; - -obj_t *dirent_stream(DIR *d, obj_t *label) -{ - obj_t *sm = make_obj(); - sm->sm.type = STREAM; - sm->sm.handle = d; - sm->sm.ops = &dirent_stream_ops; - sm->sm.label_pushback = label; - assert (atom(label)); - return sm; -} - -obj_t *stream_get(obj_t *sm) -{ - assert (sm->sm.type == STREAM); - - if (consp(sm->sm.label_pushback)) { - obj_t *ret = car(sm->sm.label_pushback); - sm->sm.label_pushback = cdr(sm->sm.label_pushback); - return ret; - } - - return sm->sm.ops->read(&sm->sm); -} - -obj_t *stream_pushback(obj_t *sm, obj_t *obj) -{ - assert (sm->sm.type == STREAM); - sm->sm.label_pushback = cons(obj, sm->sm.label_pushback); - return obj; -} - -obj_t *stream_put(obj_t *sm, obj_t *obj) -{ - assert (sm->sm.type == STREAM); - return sm->sm.ops->write(&sm->sm, obj); -} - -obj_t *stream_close(obj_t *sm) -{ - assert (sm->sm.type == STREAM); - return sm->sm.ops->close(&sm->sm); -} - - static obj_t *make_lazycons(obj_t *func) { obj_t *obj = make_obj(); @@ -1361,36 +1245,36 @@ static obj_t *make_lazycons(obj_t *func) return obj; } -static obj_t *lazy_stream_func(obj_t *stream, obj_t *lcons) +static obj_t *lazy_stream_func(obj_t *env, obj_t *lcons) { - obj_t *next = stream_get(stream); - obj_t *ahead = stream_get(stream); + obj_t *stream = car(env); + obj_t *next = cdr(env) ? pop(cdr_l(env)) : get_line(stream); + obj_t *ahead = get_line(stream); lcons->lc.car = next; lcons->lc.cdr = if2(ahead, make_lazycons(lcons->lc.func)); lcons->lc.func = nil; if (!next || !ahead) - stream_close(stream); + close_stream(stream); if (ahead) - stream_pushback(stream, ahead); + push(ahead, cdr_l(env)); return next; } obj_t *lazy_stream_cons(obj_t *stream) { - obj_t *first = stream_get(stream); + obj_t *first = get_line(stream); if (!first) { - stream_close(stream); + close_stream(stream); return nil; } - stream_pushback(stream, first); - - return make_lazycons(func_f1(stream, lazy_stream_func)); + return make_lazycons(func_f1(cons(stream, cons(first, nil)), + lazy_stream_func)); } obj_t *cobj(void *handle, obj_t *cls_sym, struct cobj_ops *ops) @@ -1403,11 +1287,11 @@ obj_t *cobj(void *handle, obj_t *cls_sym, struct cobj_ops *ops) return obj; } -void cobj_print_op(obj_t *obj, FILE *out) +void cobj_print_op(obj_t *obj, obj_t *out) { - fprintf(out, "#<"); + put_cstring(out, "#<"); obj_print(obj->co.cls, out); - fprintf(out, ": %p>", obj->co.handle); + cformat(out, ": %p>", obj->co.handle); } obj_t *assoc(obj_t *list, obj_t *key) @@ -1567,8 +1451,6 @@ obj_t *sort(obj_t *list, obj_t *lessfun, obj_t *keyfun) static void obj_init(void) { - int gc_save = gc_state(0); - /* * No need to GC-protect the convenience variables which hold the interned * symbols, because the interned_syms list holds a reference to all the @@ -1579,7 +1461,9 @@ static void obj_init(void) &two, &negone, &maxint, &minint, &null_string, &nil_string, &null_list, &equal_f, - &identity_f, 0); + &identity_f, &prog_string, 0); + + nil_string = string(strdup("nil")); null = intern(string(strdup("null"))); t = intern(string(strdup("t"))); @@ -1592,8 +1476,9 @@ static void obj_init(void) vec_t = intern(string(strdup("vec"))); stream_t = intern(string(strdup("stream"))); lcons_t = intern(string(strdup("lcons"))); - var = intern(string(strdup("var"))); - regex = intern(string(strdup("regex"))); + cobj_t = intern(string(strdup("cobj"))); + var = intern(string(strdup("$var"))); + regex = intern(string(strdup("$regex"))); set = intern(string(strdup("set"))); cset = intern(string(strdup("cset"))); wild = intern(string(strdup("wild"))); @@ -1602,6 +1487,7 @@ static void obj_init(void) optional = intern(string(strdup("?"))); compound = intern(string(strdup("compound"))); or = intern(string(strdup("or"))); + quasi = intern(string(strdup("$quasi"))); skip = intern(string(strdup("skip"))); trailer = intern(string(strdup("trailer"))); block = intern(string(strdup("block"))); @@ -1626,10 +1512,24 @@ static void obj_init(void) rep = intern(string(strdup("rep"))); flattn = intern(string(strdup("flatten"))); forget = intern(string(strdup("forget"))); + local = intern(string(strdup("local"))); mrge = intern(string(strdup("merge"))); bind = intern(string(strdup("bind"))); cat = intern(string(strdup("cat"))); dir = intern(string(strdup("dir"))); + try = intern(string(strdup("try"))); + catch = intern(string(strdup("catch"))); + finally = intern(string(strdup("finally"))); + nothrow = intern(string(strdup("nothrow"))); + error = intern(string(strdup("error"))); + type_error = intern(string(strdup("type_error"))); + internal_err = intern(string(strdup("internal_error"))); + numeric_err = intern(string(strdup("numeric_error"))); + range_err = intern(string(strdup("range_error"))); + query_error = intern(string(strdup("query_error"))); + file_error = intern(string(strdup("file_error"))); + + interned_syms = cons(nil, interned_syms); zero = num(0); one = num(1); @@ -1639,20 +1539,18 @@ static void obj_init(void) minint = num(LONG_MIN); null_string = string(strdup("")); - nil_string = string(strdup("NIL")); null_list = cons(nil, nil); equal_f = func_f2(nil, equal_tramp); identity_f = func_f1(nil, identity_tramp); - - gc_state(gc_save); + prog_string = string(strdup(progname)); } -void obj_print(obj_t *obj, FILE *out) +void obj_print(obj_t *obj, obj_t *out) { if (obj == nil) { - fputs("nil", out); + put_cstring(out, "nil"); return; } @@ -1661,108 +1559,161 @@ void obj_print(obj_t *obj, FILE *out) case LCONS: { obj_t *iter; - putc('(', out); + put_cchar(out, '('); for (iter = obj; consp(iter); iter = cdr(iter)) { obj_print(car(iter), out); if (nullp(cdr(iter))) { - putc(')', out); + put_cchar(out, ')'); } else if (consp(cdr(iter))) { - putc(' ', out); + put_cchar(out, ' '); } else { - fputs(" . ", out); + put_cstring(out, " . "); obj_print(cdr(iter), out); - putc(')', out); + put_cchar(out, ')'); } } } - break; + return; case STR: { const char *ptr; - putc('"', out); + put_cchar(out, '"'); for (ptr = obj->st.str; *ptr; ptr++) { switch (*ptr) { - case '\a': fputs("\\a", out); break; - case '\b': fputs("\\b", out); break; - case '\t': fputs("\\t", out); break; - case '\n': fputs("\\n", out); break; - case '\v': fputs("\\v", out); break; - case '\f': fputs("\\f", out); break; - case '\r': fputs("\\r", out); break; - case '"': fputs("\\\"", out); break; - case '\\': fputs("\\\\", out); break; - case 27: fputs("\\e", out); break; + case '\a': put_cstring(out, "\\a"); break; + case '\b': put_cstring(out, "\\b"); break; + case '\t': put_cstring(out, "\\t"); break; + case '\n': put_cstring(out, "\\n"); break; + case '\v': put_cstring(out, "\\v"); break; + case '\f': put_cstring(out, "\\f"); break; + case '\r': put_cstring(out, "\\r"); break; + case '"': put_cstring(out, "\\\""); break; + case '\\': put_cstring(out, "\\\\"); break; + case 27: put_cstring(out, "\\e"); break; default: if (isprint(*ptr)) - putc(*ptr, out); + put_cchar(out, *ptr); else - fprintf(out, "\\%03o", (int) *ptr); + cformat(out, "\\%03o", (int) *ptr); } } - putc('"', out); + put_cchar(out, '"'); } - break; + return; case CHR: { int ch = obj->ch.ch; - putc('\'', out); + put_cchar(out, '\''); switch (ch) { - case '\a': fputs("\\a", out); break; - case '\b': fputs("\\b", out); break; - case '\t': fputs("\\t", out); break; - case '\n': fputs("\\n", out); break; - case '\v': fputs("\\v", out); break; - case '\f': fputs("\\f", out); break; - case '\r': fputs("\\r", out); break; - case '"': fputs("\\\"", out); break; - case '\\': fputs("\\\\", out); break; - case 27: fputs("\\e", out); break; + case '\a': put_cstring(out, "\\a"); break; + case '\b': put_cstring(out, "\\b"); break; + case '\t': put_cstring(out, "\\t"); break; + case '\n': put_cstring(out, "\\n"); break; + case '\v': put_cstring(out, "\\v"); break; + case '\f': put_cstring(out, "\\f"); break; + case '\r': put_cstring(out, "\\r"); break; + case '"': put_cstring(out, "\\\""); break; + case '\\': put_cstring(out, "\\\\"); break; + case 27: put_cstring(out, "\\e"); break; default: if (isprint(ch)) - putc(ch, out); + put_cchar(out, ch); else - fprintf(out, "\\%03o", ch); + cformat(out, "\\%03o", ch); } - putc('\'', out); + put_cchar(out, '\''); } - break; + return; case NUM: - fprintf(out, "%ld", c_num(obj)); - break; + cformat(out, "%ld", c_num(obj)); + return; case SYM: - fputs(c_str(symbol_name(obj)), out); - break; + put_string(out, symbol_name(obj)); + return; case FUN: - fprintf(out, "#<function: f%d>", (int) obj->f.functype); - break; + cformat(out, "#<function: f%d>", (int) obj->f.functype); + return; case VEC: { long i, fill = c_num(obj->v.vec[vec_fill]); - fputs("#(", out); + put_cstring(out, "#("); for (i = 0; i < fill; i++) { obj_print(obj->v.vec[i], out); if (i < fill - 1) - putc(' ', out); + put_cchar(out, ' '); } - putc(')', out); + put_cchar(out, ')'); } - break; - case STREAM: - fprintf(out, "#<stream: "); + return; + case COBJ: + obj->co.ops->print(obj, out); + return; + } + + cformat(out, "#<garbage: %p>", (void *) obj); +} + +void obj_pprint(obj_t *obj, obj_t *out) +{ + if (obj == nil) { + put_cstring(out, "nil"); + return; + } + + switch (obj->t.type) { + case CONS: + case LCONS: { obj_t *iter; - /* skip stream pushback items to find label */ - for (iter = obj->sm.label_pushback; consp(iter); iter = cdr(iter)) - ; - obj_print(iter, out); + put_cchar(out, '('); + for (iter = obj; consp(iter); iter = cdr(iter)) { + obj_pprint(car(iter), out); + if (nullp(cdr(iter))) { + put_cchar(out, ')'); + } else if (consp(cdr(iter))) { + put_cchar(out, ' '); + } else { + put_cstring(out, " . "); + obj_pprint(cdr(iter), out); + put_cchar(out, ')'); + } + } } - fprintf(out, ", %p>", (void *) obj->sm.handle); - break; + return; + case STR: + put_string(out, obj); + return; + case CHR: + put_char(out, obj); + return; + case NUM: + cformat(out, "%ld", c_num(obj)); + return; + case SYM: + put_string(out, symbol_name(obj)); + return; + case FUN: + cformat(out, "#<function: f%d>", (int) obj->f.functype); + return; + case VEC: + { + long i, fill = c_num(obj->v.vec[vec_fill]); + put_cstring(out, "#("); + for (i = 0; i < fill; i++) { + obj_pprint(obj->v.vec[i], out); + if (i < fill - 1) + put_cchar(out, ' '); + } + put_cchar(out, ')'); + } + return; case COBJ: obj->co.ops->print(obj, out); - break; + return; } + + cformat(out, "#<garbage: %p>", (void *) obj); } void init(const char *pn, void *(*oom)(void *, size_t), @@ -1771,6 +1722,7 @@ void init(const char *pn, void *(*oom)(void *, size_t), int growsdown; obj_t *local_bottom = nil; progname = pn; + int gc_save = gc_state(0); /* If the local_bottom variable has a smaller address than either of the two possible top variables from @@ -1785,14 +1737,17 @@ void init(const char *pn, void *(*oom)(void *, size_t), ? max(maybe_bottom_0, maybe_bottom_1) : min(maybe_bottom_0, maybe_bottom_1)); - uw_init(); obj_init(); + uw_init(); + stream_init(); + + gc_state(gc_save); } -void dump(obj_t *obj, FILE *out) +void dump(obj_t *obj, obj_t *out) { obj_print(obj, out); - putc('\n', out); + put_cchar(out, '\n'); } /* @@ -1802,48 +1757,16 @@ void dump(obj_t *obj, FILE *out) */ void d(obj_t *obj) { - dump(obj, stdout); -} - -char *snarf_line(FILE *in) -{ - const size_t min_size = 512; - size_t size = 0; - size_t fill = 0; - char *buf = 0; - - for (;;) { - int ch = getc(in); - - if (ch == EOF && buf == 0) - break; - - if (fill >= size) { - size_t newsize = size ? size * 2 : min_size; - buf = chk_realloc(buf, newsize); - size = newsize; - } - - if (ch == '\n' || ch == EOF) { - buf[fill++] = 0; - break; - } - buf[fill++] = ch; - } - - if (buf) - buf = chk_realloc(buf, fill); - - return buf; + dump(obj, std_output); } -obj_t *snarf(FILE *in) +obj_t *snarf(obj_t *in) { list_collect_decl (list, iter); - char *str; + obj_t *str; - while ((str = snarf_line(in)) != 0) - list_collect (iter, string(str)); + while ((str = get_line(in)) != 0) + list_collect (iter, str); return list; } @@ -25,7 +25,7 @@ */ typedef enum type { - CONS = 1, STR, CHR, NUM, SYM, FUN, VEC, STREAM, LCONS, COBJ + CONS = 1, STR, CHR, NUM, SYM, FUN, VEC, LCONS, COBJ } type_t; typedef enum functype @@ -99,19 +99,6 @@ struct vec { obj_t **vec; }; -struct stream { - type_t type; - void *handle; - struct stream_ops *ops; - obj_t *label_pushback; /* label-terminated pushback stack */ -}; - -struct stream_ops { - obj_t *(*read)(struct stream *); - obj_t *(*write)(struct stream *, obj_t *); - obj_t *(*close)(struct stream *); -}; - /* * Lazy cons. When initially constructed, acts as a promise. The car and cdr * cache pointers are nil, and func points to a function. The job of the @@ -135,8 +122,9 @@ struct cobj { struct cobj_ops { obj_t *(*equal)(obj_t *self, obj_t *other); - void (*print)(obj_t *self, FILE *); + void (*print)(obj_t *self, obj_t *stream); void (*destroy)(obj_t *self); + void (*mark)(obj_t *self); }; union obj { @@ -148,7 +136,6 @@ union obj { struct sym s; struct func f; struct vec v; - struct stream sm; struct lazy_cons lc; struct cobj co; }; @@ -157,24 +144,31 @@ extern obj_t *interned_syms; extern obj_t *t, *cons_t, *str_t, *chr_t, *num_t, *sym_t, *fun_t, *vec_t; extern obj_t *stream_t, *lcons_t, *var, *regex, *set, *cset, *wild, *oneplus; -extern obj_t *zeroplus, *optional, *compound, *or; +extern obj_t *zeroplus, *optional, *compound, *or, *quasi; extern obj_t *skip, *trailer, *block, *next, *fail, *accept; extern obj_t *all, *some, *none, *maybe, *cases, *collect, *until, *coll; extern obj_t *define, *output, *single, *frst, *lst, *empty, *repeat, *rep; -extern obj_t *flattn, *forget, *mrge, *bind, *cat, *dir; +extern obj_t *flattn, *forget, *local, *mrge, *bind, *cat, *dir; +extern obj_t *try, *catch, *finally, *nothrow; +extern obj_t *error, *type_error, *internal_err, *numeric_err, *range_err; +extern obj_t *query_error, *file_error; extern obj_t *zero, *one, *two, *negone, *maxint, *minint; extern obj_t *null_string; -extern obj_t *null_list; /* (NIL) */ +extern obj_t *null_list; /* (nil) */ extern obj_t *identity_f; extern obj_t *equal_f; extern const char *progname; +extern obj_t *prog_string; + extern void *(*oom_realloc)(void *, size_t); obj_t *identity(obj_t *obj); obj_t *typeof(obj_t *obj); +obj_t *type_check(obj_t *obj, int); +obj_t *type_check2(obj_t *obj, int, int); obj_t *car(obj_t *cons); obj_t *cdr(obj_t *cons); obj_t **car_l(obj_t *cons); @@ -187,6 +181,8 @@ obj_t *fourth(obj_t *cons); obj_t *fifth(obj_t *cons); obj_t *sixth(obj_t *cons); obj_t **tail(obj_t *cons); +obj_t *pop(obj_t **plist); +obj_t *push(obj_t *val, obj_t **plist); obj_t *copy_list(obj_t *list); obj_t *nreverse(obj_t *in); obj_t *reverse(obj_t *in); @@ -275,16 +271,9 @@ obj_t *vec_get_fill(obj_t *vec); obj_t *vec_set_fill(obj_t *vec, obj_t *fill); obj_t **vecref_l(obj_t *vec, obj_t *ind); obj_t *vec_push(obj_t *vec, obj_t *item); -obj_t *stdio_line_stream(FILE *f, obj_t *label); -obj_t *pipe_line_stream(FILE *f, obj_t *label); -obj_t *dirent_stream(DIR *d, obj_t *label); -obj_t *stream_get(obj_t *sm); -obj_t *stream_pushback(obj_t *sm, obj_t *obj); -obj_t *stream_put(obj_t *sm, obj_t *obj); -obj_t *stream_close(obj_t *sm); obj_t *lazy_stream_cons(obj_t *stream); obj_t *cobj(void *handle, obj_t *cls_sym, struct cobj_ops *ops); -void cobj_print_op(obj_t *, FILE *); /* Print function for struct cobj_ops */ +void cobj_print_op(obj_t *, obj_t *); /* Default function for struct cobj_ops */ obj_t *assoc(obj_t *list, obj_t *key); obj_t *acons_new(obj_t *list, obj_t *key, obj_t *value); obj_t *alist_remove(obj_t *list, obj_t *keys); @@ -295,12 +284,12 @@ obj_t *mapcar(obj_t *fun, obj_t *list); obj_t *mappend(obj_t *fun, obj_t *list); obj_t *sort(obj_t *list, obj_t *lessfun, obj_t *keyfun); -void obj_print(obj_t *obj, FILE *); +void obj_print(obj_t *obj, obj_t *stream); +void obj_pprint(obj_t *obj, obj_t *stream); void init(const char *progname, void *(*oom_realloc)(void *, size_t), obj_t **maybe_bottom_0, obj_t **maybe_bottom_1); -void dump(obj_t *obj, FILE *); -char *snarf_line(FILE *in); -obj_t *snarf(FILE *in); +void dump(obj_t *obj, obj_t *stream); +obj_t *snarf(obj_t *in); obj_t *match(obj_t *spec, obj_t *data); #define nil ((obj_t *) 0) @@ -313,6 +302,12 @@ obj_t *match(obj_t *spec, obj_t *data); #define if3(a, b, c) ((a) ? (b) : (c)) +#define or2(a, b) ((a) ? (a) : (b)) + +#define or3(a, b, c) or2(a, or2(b, c)) + +#define or4(a, b, c, d) or2(a, or3(b, c, d)) + #define list_collect_decl(OUT, PTAIL) \ obj_t *OUT = nil, **PTAIL = &OUT @@ -24,506 +24,94 @@ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -%{ - #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <ctype.h> #include <assert.h> -#include <limits.h> #include <errno.h> #include <dirent.h> #include <setjmp.h> +#include <stdarg.h> #include "lib.h" #include "gc.h" #include "unwind.h" #include "regex.h" -#include "extract.h" - -int yylex(void); -void yyerror(const char *); - -obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts); -obj_t *define_transform(obj_t *define_form); +#include "stream.h" +#include "parser.h" +#include "txr.h" +#include "match.h" -static obj_t *parsed_spec; -static int output_produced; +int output_produced; -%} - -%union { - char *lexeme; - union obj *obj; - char chr; - long num; +static void debugf(const char *fmt, ...) +{ + if (opt_loglevel >= 2) { + va_list vl; + va_start (vl, fmt); + format(std_error, "~a: ", prog_string, nao); + vformat(std_error, fmt, vl); + put_cchar(std_error, '\n'); + va_end (vl); + } } -%token <lexeme> TEXT IDENT ALL SOME NONE MAYBE CASES AND OR END COLLECT -%token <lexeme> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE -%token <num> NUMBER -%token <chr> REGCHAR LITCHAR - -%type <obj> spec clauses clause all_clause some_clause none_clause maybe_clause -%type <obj> cases_clause collect_clause clause_parts additional_parts -%type <obj> output_clause define_clause line elems_opt elems elem var var_op -%type <obj> list exprs expr out_clauses out_clauses_opt out_clause -%type <obj> repeat_clause repeat_parts_opt o_line -%type <obj> o_elems_opt o_elems_opt2 o_elems o_elem rep_elem rep_parts_opt -%type <obj> regex regexpr regbranch -%type <obj> regterm regclass regclassterm regrange -%type <obj> strlit chrlit litchars -%type <chr> regchar -%nonassoc ALL SOME NONE MAYBE CASES AND OR END COLLECT UNTIL COLL -%nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY DEFINE -%nonassoc '{' '}' '[' ']' '(' ')' -%right IDENT TEXT NUMBER -%left '|' '/' -%right '*' '?' '+' -%right '^' '.' '\\' REGCHAR - -%% - -spec : clauses { parsed_spec = $1; } - | { parsed_spec = nil; } - | error { parsed_spec = nil; - yybadtoken(yychar, 0); } - ; - -clauses : clause { $$ = cons($1, nil); } - | clause clauses { $$ = cons($1, $2); } - ; - -clause : all_clause { $$ = list(num(lineno - 1), $1, nao); } - | some_clause { $$ = list(num(lineno - 1), $1, nao); } - | none_clause { $$ = list(num(lineno - 1), $1, nao); } - | maybe_clause { $$ = list(num(lineno - 1), $1, nao); } - | cases_clause { $$ = list(num(lineno - 1), $1, nao); } - | collect_clause { $$ = list(num(lineno - 1), $1, nao); } - | define_clause { $$ = list(num(lineno - 1), - define_transform($1), nao); } - | output_clause { $$ = list(num(lineno - 1), $1, nao); } - | line { $$ = $1; } - | repeat_clause { $$ = nil; - yyerror("repeat outside of output"); } - ; - -all_clause : ALL newl clause_parts { $$ = cons(all, $3); } - | ALL newl error { $$ = nil; - yybadtoken(yychar, - "all clause"); } - | ALL newl END newl { $$ = nil; - yyerror("empty all clause"); } - - ; - -some_clause : SOME newl clause_parts { $$ = cons(some, $3); } - | SOME newl error { $$ = nil; - yybadtoken(yychar, - "some clause"); } - | SOME newl END newl { $$ = nil; - yyerror("empty some clause"); } - ; - -none_clause : NONE newl clause_parts { $$ = cons(none, $3); } - | NONE newl error { $$ = nil; - yybadtoken(yychar, - "none clause"); } - | NONE newl END newl { $$ = nil; - yyerror("empty none clause"); } - ; - -maybe_clause : MAYBE newl clause_parts { $$ = cons(maybe, $3); } - | MAYBE newl error { $$ = nil; - yybadtoken(yychar, - "maybe clause"); } - | MAYBE newl END newl { $$ = nil; - yyerror("empty maybe clause"); } - ; - -cases_clause : CASES newl clause_parts { $$ = cons(cases, $3); } - | CASES newl error { $$ = nil; - yybadtoken(yychar, - "cases clause"); } - | CASES newl END newl { $$ = nil; - yyerror("empty cases clause"); } - ; - -collect_clause : COLLECT newl clauses END newl { $$ = list(collect, $3, nao); } - | COLLECT newl clauses - UNTIL newl clauses END newl { $$ = list(collect, $3, - $6, nao); } - | COLLECT newl error { $$ = nil; - if (yychar == UNTIL || yychar == END) - yyerror("empty collect"); - else - yybadtoken(yychar, - "collect clause"); } - ; - -clause_parts : clauses additional_parts { $$ = cons($1, $2); } - ; - -additional_parts : END newl { $$ = nil; } - | AND newl clauses additional_parts { $$ = cons($3, $4); } - | OR newl clauses additional_parts { $$ = cons($3, $4); } - ; - -line : elems_opt '\n' { $$ = $1; } - ; - -elems_opt : elems { $$ = cons(num(lineno - 1), $1); } - | { $$ = nil; } - ; - -elems : elem { $$ = cons($1, nil); } - | elem elems { $$ = cons($1, $2); } - | rep_elem { $$ = nil; - yyerror("rep outside of output"); } - ; - -elem : TEXT { $$ = string($1); } - | var { $$ = $1; } - | list { $$ = $1; } - | regex { $$ = cons(regex_compile($1), $1); } - | COLL elems END { $$ = list(coll, $2, nao); } - | COLL elems - UNTIL elems END { $$ = list(coll, $2, $4, nao); } - | COLL error { $$ = nil; - yybadtoken(yychar, "coll clause"); } - ; - -define_clause : DEFINE exprs ')' newl - clauses - END newl { $$ = list(define, $2, $5, nao); } - | DEFINE ')' newl - clauses - END newl { $$ = list(define, nil, $4, nao); } - | DEFINE exprs ')' newl - END newl { $$ = list(define, $2, nao); } - | DEFINE ')' newl - END newl { $$ = list(define, nao); } - | DEFINE error { yybadtoken(yychar, "list expression"); } - | DEFINE exprs ')' newl - error { yybadtoken(yychar, "define"); } - | DEFINE ')' newl - error { yybadtoken(yychar, "define"); } - ; - -output_clause : OUTPUT o_elems '\n' - out_clauses - END newl { $$ = list(output, $4, $2, nao); } - | OUTPUT newl - out_clauses - END newl { $$ = list(output, $3, nao); } - | OUTPUT o_elems '\n' - error { $$ = nil; - yybadtoken(yychar, "output clause"); } - | OUTPUT newl - error { $$ = nil; - yybadtoken(yychar, "output clause"); } - ; - -out_clauses : out_clause { $$ = cons($1, nil); } - | out_clause out_clauses { $$ = cons($1, $2); } - ; - -out_clause : repeat_clause { $$ = list(num(lineno - 1), $1, nao); } - | o_line { $$ = $1; } - | all_clause { $$ = nil; - yyerror("match clause in output"); } - | some_clause { $$ = nil; - yyerror("match clause in output"); } - | none_clause { $$ = nil; - yyerror("match clause in output"); } - | maybe_clause { $$ = nil; - yyerror("match clause in output"); } - | cases_clause { $$ = nil; - yyerror("match clause in output"); } - | collect_clause { $$ = nil; - yyerror("match clause in output"); } - | define_clause { $$ = nil; - yyerror("match clause in output"); } - | output_clause { $$ = nil; - yyerror("match clause in output"); } - ; - -repeat_clause : REPEAT newl - out_clauses - repeat_parts_opt - END newl { $$ = repeat_rep_helper(repeat, $3, $4); } - | REPEAT newl - error { $$ = nil; - yybadtoken(yychar, "repeat clause"); } - ; - -repeat_parts_opt : SINGLE newl - out_clauses_opt - repeat_parts_opt { $$ = cons(cons(single, $3), $4); } - | FIRST newl - out_clauses_opt - repeat_parts_opt { $$ = cons(cons(frst, $3), $4); } - | LAST newl - out_clauses_opt - repeat_parts_opt { $$ = cons(cons(lst, $3), $4); } - | EMPTY newl - out_clauses_opt - repeat_parts_opt { $$ = cons(cons(empty, $3), $4); } - | /* empty */ { $$ = nil; } - ; - - -out_clauses_opt : out_clauses { $$ = $1; } - | /* empty */ { $$ = null_list; } - -o_line : o_elems_opt '\n' { $$ = $1; } - ; - -o_elems_opt : o_elems { $$ = cons(num(lineno - 1), $1); } - | { $$ = nil; } - ; - -o_elems_opt2 : o_elems { $$ = $1; } - | { $$ = null_list; } - ; - -o_elems : o_elem { $$ = cons($1, nil); } - | o_elem o_elems { $$ = cons($1, $2); } - ; - -o_elem : TEXT { $$ = string($1); } - | var { $$ = $1; } - | rep_elem { $$ = $1; } - ; - -rep_elem : REP o_elems - rep_parts_opt END { $$ = repeat_rep_helper(rep, $2, $3); } - | REP error { $$ = nil; yybadtoken(yychar, "rep clause"); } - ; - -rep_parts_opt : SINGLE o_elems_opt2 - rep_parts_opt { $$ = cons(cons(single, $2), $3); } - | FIRST o_elems_opt2 - rep_parts_opt { $$ = cons(cons(frst, $2), $3); } - | LAST o_elems_opt2 - rep_parts_opt { $$ = cons(cons(lst, $2), $3); } - | EMPTY o_elems_opt2 - rep_parts_opt { $$ = cons(cons(empty, $2), $3); } - | /* empty */ { $$ = nil; } - ; - - -/* This sucks, but factoring '*' into a nonterminal - * that generates an empty phrase causes reduce/reduce conflicts. - */ -var : IDENT { $$ = list(var, intern(string($1)), nao); } - | IDENT elem { $$ = list(var, intern(string($1)), $2, nao); } - | '{' IDENT '}' { $$ = list(var, intern(string($2)), nao); } - | '{' IDENT '}' elem { $$ = list(var, intern(string($2)), $4, nao); } - | '{' IDENT regex '}' { $$ = list(var, intern(string($2)), - nil, cons(regex_compile($3), $3), - nao); } - | '{' IDENT NUMBER '}' { $$ = list(var, intern(string($2)), - nil, num($3), nao); } - | var_op IDENT { $$ = list(var, intern(string($2)), - nil, $1, nao); } - | var_op IDENT elem { $$ = list(var, intern(string($2)), - $3, $1, nao); } - | var_op '{' IDENT '}' { $$ = list(var, intern(string($3)), - nil, $1, nao); } - | var_op '{' IDENT '}' elem { $$ = list(var, intern(string($3)), - $5, $1, nao); } - | IDENT error { $$ = nil; - yybadtoken(yychar, "variable spec"); } - | var_op error { $$ = nil; - yybadtoken(yychar, "variable spec"); } - ; - -var_op : '*' { $$ = t; } - ; - -list : '(' exprs ')' { $$ = $2; } - | '(' ')' { $$ = nil; } - | '(' error { $$ = nil; - yybadtoken(yychar, "list expression"); } - ; - -exprs : expr { $$ = cons($1, nil); } - | expr exprs { $$ = cons($1, $2); } - | expr '.' expr { $$ = cons($1, $3); } - ; - -expr : IDENT { $$ = intern(string($1)); } - | NUMBER { $$ = num($1); } - | list { $$ = $1; } - | regex { $$ = cons(regex_compile($1), $1); } - | chrlit { $$ = $1; } - | strlit { $$ = $1; } - ; - -regex : '/' regexpr '/' { $$ = $2; } - | '/' '/' { $$ = nil; } - | '/' error { $$ = nil; - yybadtoken(yychar, "regex"); } - ; - -regexpr : regbranch { $$ = $1; } - | regbranch '|' regbranch { $$ = list(list(or, $1, - $3, nao), nao); } - ; - -regbranch : regterm { $$ = cons($1, nil); } - | regterm regbranch { $$ = cons($1, $2); } - ; - -regterm : '[' regclass ']' { $$ = cons(set, $2); } - | '[' '^' regclass ']' { $$ = cons(cset, $3); } - | '.' { $$ = wild; } - | '^' { $$ = chr('^'); } - | ']' { $$ = chr(']'); } - | '-' { $$ = chr('-'); } - | regterm '*' { $$ = list(zeroplus, $1, nao); } - | regterm '+' { $$ = list(oneplus, $1, nao); } - | regterm '?' { $$ = list(optional, $1, nao); } - | REGCHAR { $$ = chr($1); } - | '(' regexpr ')' { $$ = cons(compound, $2); } - | '(' error { $$ = nil; - yybadtoken(yychar, "regex subexpression"); } - | '[' error { $$ = nil; - yybadtoken(yychar, "regex character class"); } - ; - -regclass : regclassterm { $$ = cons($1, nil); } - | regclassterm regclass { $$ = cons($1, $2); } - ; - -regclassterm : regrange { $$ = $1; } - | regchar { $$ = chr($1); } - ; - -regrange : regchar '-' regchar { $$ = cons(chr($1), chr($3)); } - -regchar : '?' { $$ = '?'; } - | '.' { $$ = '.'; } - | '*' { $$ = '*'; } - | '+' { $$ = '+'; } - | '(' { $$ = '('; } - | ')' { $$ = ')'; } - | '^' { $$ = '^'; } - | '|' { $$ = '|'; } - | REGCHAR { $$ = $1; } - ; - -newl : '\n' - | error '\n' { yyerror("newline expected after directive"); - yyerrok; } - ; - -strlit : '"' '"' { $$ = null_string; } - | '"' litchars '"' { - if ($2) { - obj_t *len = length($2), *iter, *ix; - $$ = mkustring(len); - for (iter = $2, ix = zero; - iter; - iter = cdr(iter), ix = plus(ix, one)) - { - chr_str_set($$, ix, car(iter)); - } - } else { - $$ = nil; - } - } - | '"' error { yybadtoken(yychar, "string literal"); } - ; - -chrlit : '\'' '\'' { yyerror("empty character literal"); - $$ = nil; } - | '\'' litchars '\'' { $$ = car($2); - if (cdr($2)) - yyerror("multiple characters in " - "character literal"); } - | '\'' error { yybadtoken(yychar, "character literal"); } - ; - -litchars : LITCHAR { $$ = cons(chr($1), nil); } - | LITCHAR litchars { $$ = cons(chr($1), $2); } - ; -%% - -obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts) +static void debuglf(obj_t *line, const char *fmt, ...) { - obj_t *single_parts = nil; - obj_t *first_parts = nil; - obj_t *last_parts = nil; - obj_t *empty_parts = nil; - obj_t *iter; - - for (iter = parts; iter != nil; iter = cdr(iter)) { - obj_t *part = car(iter); - obj_t *sym = car(part); - obj_t *clauses = cdr(part); - - if (sym == single) - single_parts = nappend2(single_parts, clauses); - else if (sym == frst) - first_parts = nappend2(first_parts, clauses); - else if (sym == lst) - last_parts = nappend2(last_parts, clauses); - else if (sym == empty) - empty_parts = nappend2(empty_parts, clauses); - else - abort(); + if (opt_loglevel >= 2) { + va_list vl; + va_start (vl, fmt); + format(std_error, "~a: (~a:~a) ", prog_string, spec_file_str, line, nao); + vformat(std_error, fmt, vl); + put_cchar(std_error, '\n'); + va_end (vl); } - - return list(sym, main, single_parts, first_parts, - last_parts, empty_parts, nao); } -obj_t *define_transform(obj_t *define_form) +static void debuglcf(obj_t *line, const char *fmt, ...) { - obj_t *sym = first(define_form); - obj_t *args = second(define_form); - - if (define_form == nil) - return nil; - - assert (sym == define); - - if (args == nil) { - yyerror("define requires arguments"); - return define_form; + if (opt_loglevel >= 2) { + va_list vl; + va_start (vl, fmt); + format(std_error, "~a: (~a:~a)", prog_string, spec_file_str, line, nao); + vcformat(std_error, fmt, vl); + put_cchar(std_error, '\n'); + va_end (vl); } +} - if (!consp(args) || !listp(cdr(args))) { - yyerror("bad define argument syntax"); - return define_form; - } else { - obj_t *name = first(args); - obj_t *params = second(args); - - if (!symbolp(name)) { - yyerror("function name must be a symbol"); - return define_form; - } - - if (!proper_listp(params)) { - yyerror("invalid function parameter list"); - return define_form; - } +static void sem_error(obj_t *line, const char *fmt, ...) +{ + va_list vl; + obj_t *stream = make_string_output_stream(); - if (!all_satisfy(params, func_n1(symbolp), nil)) - yyerror("function parameters must be symbols"); - } + va_start (vl, fmt); + format(stream, "~a: ", prog_string, nao); + if (line) + format(stream, "(~a:~a) ", spec_file_str, line, nao); + (void) vformat(stream, fmt, vl); + va_end (vl); - return define_form; + uw_throw(query_error, get_string_from_stream(stream)); + abort(); } -obj_t *get_spec(void) +static void file_err(obj_t *line, const char *fmt, ...) { - return parsed_spec; + va_list vl; + obj_t *stream = make_string_output_stream(); + + va_start (vl, fmt); + format(stream, "~a: ", prog_string, nao); + if (line) + format(stream, "(~a:~a) ", spec_file_str, line, nao); + (void) vformat(stream, fmt, vl); + va_end (vl); + + uw_throw(file_error, get_string_from_stream(stream)); + abort(); } + void dump_shell_string(const char *str) { int ch; @@ -545,7 +133,7 @@ void dump_var(const char *name, char *pfx1, size_t len1, char *pfx2, size_t len2, obj_t *value, int level) { if (len1 >= 112 || len2 >= 112) - abort(); + internal_error("too much depth in bindings"); if (stringp(value) || chrp(value)) { fputs(name, stdout); @@ -584,7 +172,7 @@ void dump_bindings(obj_t *bindings) { if (opt_loglevel >= 2) { fputs("raw_bindings:\n", stderr); - dump(bindings, stderr); + dump(bindings, std_error); } while (bindings) { @@ -655,7 +243,7 @@ obj_t *dest_bind(obj_t *bindings, obj_t *pattern, obj_t *value) return bindings; if (tree_find(cdr(existing), value)) return bindings; - yyerrorf(2, "bind variable mismatch: %s", c_str(symbol_name(pattern))); + debugf("bind variable mismatch: ~a", pattern, nao); return t; } return cons(cons(pattern, value), bindings); @@ -683,34 +271,25 @@ obj_t *dest_bind(obj_t *bindings, obj_t *pattern, obj_t *value) return bindings; } -obj_t *eval_form(obj_t *form, obj_t *bindings) -{ - if (symbolp(form)) - return assoc(bindings, form); - return cons(t, form); -} - obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline, obj_t *pos, obj_t *spec_lineno, obj_t *data_lineno, obj_t *file) { #define LOG_MISMATCH(KIND) \ - yyerrorlf(2, c_num(spec_lineno), \ - "%s mismatch, position %ld (%s:%ld)", (KIND), c_num(pos), \ - c_str(file), c_num(data_lineno)); \ - yyerrorlf(2, c_num(spec_lineno), " %s", c_str(dataline)); \ + debuglf(spec_lineno, KIND " mismatch, position ~a (~a:~a)", pos, \ + file, data_lineno, nao); \ + debuglf(spec_lineno, " ~a", dataline, nao); \ if (c_num(pos) < 77) \ - yyerrorlf(2, c_num(spec_lineno), " %*s^", (int) c_num(pos), "") + debuglcf(spec_lineno, " %*s^", (int) c_num(pos), "") #define LOG_MATCH(KIND, EXTENT) \ - yyerrorlf(2, c_num(spec_lineno), \ - "%s matched, position %ld-%ld (%s:%ld)", (KIND), \ - c_num(pos), c_num(EXTENT), c_str(file), \ - c_num(data_lineno)); \ - yyerrorlf(2, c_num(spec_lineno), " %s", c_str(dataline)); \ + debuglf(spec_lineno, KIND " matched, position ~a-~a (~a:~a)", \ + pos, EXTENT, file, data_lineno, nao); \ + debuglf(spec_lineno, " ~a", dataline, nao); \ if (c_num(EXTENT) < 77) \ - yyerrorlf(2, c_num(spec_lineno), " %*s%-*s^", (int) c_num(pos), \ + debuglcf(spec_lineno, " %*s%-*s^", (int) c_num(pos), \ "", (int) (c_num(EXTENT) - c_num(pos)), "^") + for (;;) { obj_t *elem; @@ -813,10 +392,8 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline, obj_t *next_pat = third(pat); obj_t *pair = assoc(bindings, second_sym); /* var exists already? */ - if (!pair) { - yyerrorlf(1, c_num(spec_lineno), "consecutive unbound variables"); - return nil; - } + if (!pair) + sem_error(spec_lineno, "consecutive unbound variables", nao); /* Re-generate a new spec with an edited version of the element we just processed, and repeat. */ @@ -839,8 +416,8 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline, bindings = acons_new(bindings, sym, sub_str(dataline, pos, find)); pos = plus(find, len); } else { - yyerrorlf(0, c_num(spec_lineno), "variable followed by invalid element"); - return nil; + sem_error(spec_lineno, + "variable followed by invalid element", nao); } } else if (typeof(directive) == regex) { obj_t *past = match_regex(dataline, directive, pos); @@ -901,8 +478,10 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline, } - if (!bindings_coll) - yyerrorlf(2, c_num(spec_lineno), "nothing was collected"); + if (!bindings_coll) { + debuglf(spec_lineno, "nothing was collected", nao); + return nil; + } for (iter = bindings_coll; iter; iter = cdr(iter)) { obj_t *pair = car(iter); @@ -922,8 +501,7 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline, LOG_MATCH("string tree", newpos); pos = newpos; } else { - yyerrorlf(0, c_num(spec_lineno), "unknown directive: %s", - c_str(symbol_name(directive))); + sem_error(spec_lineno, "unknown directive: ~a", directive, nao); } } break; @@ -941,7 +519,7 @@ obj_t *match_line(obj_t *bindings, obj_t *specline, obj_t *dataline, break; } default: - yyerrorlf(0, c_num(spec_lineno), "unsupported object in spec"); + sem_error(spec_lineno, "unsupported object in spec: ~s", elem, nao); } specline = cdr(specline); @@ -983,19 +561,31 @@ obj_t *subst_vars(obj_t *spec, obj_t *bindings) while (spec) { obj_t *elem = first(spec); - if (consp(elem) && first(elem) == var) { - obj_t *sym = second(elem); - obj_t *pat = third(elem); - obj_t *modifier = fourth(elem); - obj_t *pair = assoc(bindings, sym); - - if (pair) { - if (pat) - spec = cons(cdr(pair), cons(pat, rest(spec))); - else if (nump(modifier)) - spec = cons(format_field(cdr(pair), modifier), rest(spec)); - else - spec = cons(cdr(pair), rest(spec)); + if (consp(elem)) { + if (first(elem) == var) { + obj_t *sym = second(elem); + obj_t *pat = third(elem); + obj_t *modifier = fourth(elem); + obj_t *pair = assoc(bindings, sym); + + if (pair) { + if (pat) + spec = cons(cdr(pair), cons(pat, rest(spec))); + else if (nump(modifier)) + spec = cons(format_field(cdr(pair), modifier), rest(spec)); + else + spec = cons(cdr(pair), rest(spec)); + continue; + } + } else if (first(elem) == quasi) { + obj_t *nested = subst_vars(rest(elem), bindings); + list_collect_append(iter, nested); + spec = cdr(spec); + continue; + } else { + obj_t *nested = subst_vars(elem, bindings); + list_collect_append(iter, nested); + spec = cdr(spec); continue; } } @@ -1007,6 +597,29 @@ obj_t *subst_vars(obj_t *spec, obj_t *bindings) return out; } +obj_t *eval_form(obj_t *form, obj_t *bindings) +{ + if (!form) + return cons(t, form); + else if (symbolp(form)) + return assoc(bindings, form); + else if (consp(form)) { + if (car(form) == quasi) { + return cons(t, cat_str(subst_vars(rest(form), bindings), nil)); + } else { + obj_t *subforms = mapcar(bind2other(func_n2(eval_form), bindings), form); + + if (all_satisfy(subforms, identity_f, nil)) + return cons(t, mapcar(func_n1(cdr), subforms)); + return nil; + } + } if (stringp(form)) { + return cons(t, form); + } + + return cons(t, form); +} + typedef struct fpip { FILE *f; DIR *d; @@ -1065,21 +678,21 @@ void complex_close(fpip_t fp) return; } - abort(); + internal_error("bad input source type code"); } obj_t *complex_snarf(fpip_t fp, obj_t *name) { switch (fp.close) { case fpip_fclose: - return lazy_stream_cons(stdio_line_stream(fp.f, name)); + return lazy_stream_cons(make_stdio_stream(fp.f, t, nil)); case fpip_pclose: - return lazy_stream_cons(pipe_line_stream(fp.f, name)); + return lazy_stream_cons(make_pipe_stream(fp.f, t, nil)); case fpip_closedir: - return lazy_stream_cons(dirent_stream(fp.d, name)); + return lazy_stream_cons(make_dir_stream(fp.d)); } - abort(); + internal_error("bad input source type"); } obj_t *robust_length(obj_t *obj) @@ -1146,11 +759,8 @@ void do_output_line(obj_t *bindings, obj_t *specline, if (directive == var) { obj_t *str = cat_str(subst_vars(cons(elem, nil), bindings), nil); - if (str == nil) { - yyerrorlf(1, c_num(spec_lineno), "bad substitution: %s", - c_str(symbol_name(second(elem)))); - continue; - } + if (str == nil) + sem_error(spec_lineno, "bad substitution: ~a", second(elem), nao); fputs(c_str(str), out); } else if (directive == rep) { obj_t *main_clauses = second(elem); @@ -1190,8 +800,7 @@ void do_output_line(obj_t *bindings, obj_t *specline, } } else { - yyerrorlf(0, c_num(spec_lineno), "unknown directive: %s", - c_str(symbol_name(directive))); + sem_error(spec_lineno, "unknown directive: ~a", directive, nao); } } break; @@ -1201,7 +810,7 @@ void do_output_line(obj_t *bindings, obj_t *specline, case 0: break; default: - yyerrorlf(0, c_num(spec_lineno), "unsupported object in output spec"); + sem_error(spec_lineno, "unsupported object in output spec: ~s", elem); } } } @@ -1275,16 +884,22 @@ obj_t *match_files(obj_t *spec, obj_t *files, data_lineno = c_num(data_linenum); first_file_parsed = nil; } else if (files) { - obj_t *name = first(files); + obj_t *spec = first(files); + obj_t *name = consp(spec) ? cdr(spec) : spec; fpip_t fp = (errno = 0, complex_open(name, nil)); - yyerrorf(2, "opening data source %s", c_str(name)); + debugf("opening data source ~a", name, nao); if (complex_open_failed(fp)) { - if (errno != 0) - yyerrorf(2, "could not open %s: %s", c_str(name), strerror(errno)); + if (consp(spec) && car(spec) == nothrow) { + debugf("could not open ~a: treating as failed match due to nothrow", + name, nao); + return nil; + } else if (errno != 0) + file_err(nil, "could not open ~a (error ~a/~a)", name, + num(errno), string(strdup(strerror(errno))), nao); else - yyerrorf(2, "could not open %s", c_str(name)); + file_err(nil, "could not open ~a", name, nao); return nil; } @@ -1299,7 +914,6 @@ repeat_spec_same_data: obj_t *dataline = first(data); obj_t *spec_linenum = first(first(spec)); obj_t *first_spec = first(specline); - long spec_lineno = spec_linenum ? c_num(spec_linenum) : 0; if (consp(first_spec)) { obj_t *sym = first(first_spec); @@ -1310,7 +924,8 @@ repeat_spec_same_data: long reps = 0; if (rest(specline)) - yyerrorlf(1, spec_lineno, "unexpected material after skip directive"); + sem_error(spec_linenum, + "unexpected material after skip directive", nao); if ((spec = rest(spec)) == nil) break; @@ -1324,14 +939,14 @@ repeat_spec_same_data: data, num(data_lineno))); if (success) { - yyerrorlf(2, spec_lineno, "skip matched %s:%ld", - c_str(first(files)), data_lineno); + debuglf(spec_linenum, "skip matched ~a:~a", first(files), + num(data_lineno), nao); result = cons(new_bindings, cons(data, num(data_lineno))); break; } - yyerrorlf(2, spec_lineno, "skip didn't match %s:%ld", - c_str(first(files)), data_lineno); + debuglf(spec_linenum, "skip didn't match ~a:~a", first(files), + num(data_lineno), nao); data = rest(data); data_lineno++; dataline = first(data); @@ -1343,11 +958,13 @@ repeat_spec_same_data: return result; } - yyerrorlf(2, spec_lineno, "skip failed"); + debuglf(spec_linenum, "skip failed", nao); return nil; } else if (sym == trailer) { if (rest(specline)) - yyerrorlf(1, spec_lineno, "unexpected material after trailer directive"); + sem_error(spec_linenum, + "unexpected material after trailer directive", nao); + if ((spec = rest(spec)) == nil) break; @@ -1363,49 +980,89 @@ repeat_spec_same_data: } else if (sym == block) { obj_t *name = first(rest(first_spec)); if (rest(specline)) - yyerrorlf(1, spec_lineno, "unexpected material after block directive"); + sem_error(spec_linenum, + "unexpected material after block directive", nao); if ((spec = rest(spec)) == nil) break; - uw_block_begin(name, result); - result = match_files(spec, files, bindings, data, num(data_lineno)); - uw_block_end; - return result; + { + uw_block_begin(name, result); + result = match_files(spec, files, bindings, data, num(data_lineno)); + uw_block_end; + return result; + } } else if (sym == fail || sym == accept) { obj_t *target = first(rest(first_spec)); if (rest(specline)) - yyerrorlf(1, spec_lineno, "unexpected material after %s", - c_str(symbol_name(sym))); + sem_error(spec_linenum, "unexpected material after ~a", sym, nao); uw_block_return(target, if2(sym == accept, cons(bindings, if3(data, cons(data, num(data_lineno)), t)))); + /* TODO: uw_block_return could just throw this */ if (target) - yyerrorlf(1, spec_lineno, "%s: no block named %s in scope", - c_str(symbol_name(sym)), c_str(symbol_name(target))); + sem_error(spec_linenum, "~a: no block named ~a in scope", + sym, target, nao); else - yyerrorlf(1, spec_lineno, "%s: not anonymous block in scope", - c_str(symbol_name(sym))); - + sem_error(spec_linenum, "%~a: no anonymous block in scope", sym, nao); return nil; } else if (sym == next) { - if (rest(first_spec)) - yyerrorlf(0, spec_lineno, "next takes no args"); + if (rest(first_spec) && rest(specline)) + sem_error(spec_linenum, + "invalid combination of old and new next syntax", nao); if ((spec = rest(spec)) == nil) break; - if (rest(specline)) { + if (rest(first_spec)) { + obj_t *source = rest(first_spec); + + if (eq(first(source), nothrow)) + push(nil, &source); + + { + obj_t *val = eval_form(first(source), bindings); + obj_t *name = cdr(val); + + if (!val) + sem_error(spec_linenum, "next: unbound variable in form ~a", + first(source), nao); + + if (eq(second(source), nothrow)) { + if (name) { + files = cons(cons(nothrow, name), files); + } else { + files = rest(files); + if (!files) { + debuglf(spec_linenum, "next: out of arguments", nao); + return nil; + } + files = cons(cons(nothrow, first(files)), rest(files)); + } + } else { + if (name) { + files = cons(name, files); + } else { + files = rest(files); + if (!files) + sem_error(spec_linenum, "next: out of arguments", nao); + files = cons(cons(nothrow, first(files)), rest(files)); + } + } + } + } else if (rest(specline)) { obj_t *sub = subst_vars(rest(specline), bindings); obj_t *str = cat_str(sub, nil); if (str == nil) { - yyerrorlf(2, spec_lineno, "bad substitution in next file spec"); + sem_error(spec_linenum, "bad substitution in next file spec", nao); continue; } - files = cons(str, files); + files = cons(cons(nothrow, str), files); } else { files = rest(files); + if (!files) + sem_error(spec_linenum, "next: out of arguments", nao); } /* We recursively process the file list, but the new @@ -1459,17 +1116,17 @@ repeat_spec_same_data: } if (sym == all && !all_match) { - yyerrorlf(2, spec_lineno, "all: some clauses didn't match"); + debuglf(spec_linenum, "all: some clauses didn't match", nao); return nil; } if ((sym == some || sym == cases) && !some_match) { - yyerrorlf(2, spec_lineno, "some/cases: no clauses matched"); + debuglf(spec_linenum, "some/cases: no clauses matched", nao); return nil; } if (sym == none && some_match) { - yyerrorlf(2, spec_lineno, "none: some clauses matched"); + debuglf(spec_linenum, "none: some clauses matched", nao); return nil; } @@ -1515,8 +1172,8 @@ repeat_spec_same_data: } if (success) { - yyerrorlf(2, spec_lineno, "collect matched %s:%ld", - c_str(first(files)), data_lineno); + debuglcf(spec_linenum, "collect matched %s:%ld", + first(files), data_lineno); for (iter = new_bindings; iter && iter != bindings; iter = cdr(iter)) @@ -1531,15 +1188,14 @@ repeat_spec_same_data: if (success) { if (consp(success)) { - yyerrorlf(2, spec_lineno, - "collect advancing from line %ld to %ld", - data_lineno, c_num(cdr(success))); + debuglcf(spec_linenum, + "collect advancing from line d to %ld", + data_lineno, c_num(cdr(success))); data = car(success); data_lineno = c_num(cdr(success)); } else { - yyerrorlf(2, spec_lineno, "collect consumed entire file"); + debuglf(spec_linenum, "collect consumed entire file", nao); data = nil; - break; } } else { data = rest(data); @@ -1550,12 +1206,14 @@ repeat_spec_same_data: uw_block_end; if (!result) { - yyerrorlf(2, spec_lineno, "collect explicitly failed"); + debuglf(spec_linenum, "collect explicitly failed", nao); return nil; } - if (!bindings_coll) - yyerrorlf(2, spec_lineno, "nothing was collected"); + if (!bindings_coll) { + debuglf(spec_linenum, "nothing was collected", nao); + return nil; + } for (iter = bindings_coll; iter; iter = cdr(iter)) { obj_t *pair = car(iter); @@ -1574,8 +1232,7 @@ repeat_spec_same_data: obj_t *sym = first(iter); if (!symbolp(sym)) { - yyerrorlf(1, spec_lineno, "non-symbol in flatten directive"); - continue; + sem_error(spec_linenum, "non-symbol in flatten directive", nao); } else { obj_t *existing = assoc(bindings, sym); @@ -1588,7 +1245,7 @@ repeat_spec_same_data: break; goto repeat_spec_same_data; - } else if (sym == forget) { + } else if (sym == forget || sym == local) { bindings = alist_remove(bindings, rest(first_spec)); if ((spec = rest(spec)) == nil) @@ -1598,15 +1255,10 @@ repeat_spec_same_data: } else if (sym == mrge) { obj_t *target = first(rest(first_spec)); obj_t *args = rest(rest(first_spec)); - obj_t *exists = assoc(bindings, target); obj_t *merged = nil; if (!target || !symbolp(target)) - yyerrorlf(1, spec_lineno, "bad merge directive"); - - if (exists) - yyerrorlf(1, spec_lineno, "merge: symbol %s already bound", - c_str(symbol_name(target))); + sem_error(spec_linenum, "bad merge directive", nao); for (; args; args = rest(args)) { obj_t *other_sym = first(args); @@ -1615,10 +1267,10 @@ repeat_spec_same_data: obj_t *other_lookup = assoc(bindings, other_sym); if (!symbolp(other_sym)) - yyerrorlf(1, spec_lineno, "non-symbol in merge directive"); + sem_error(spec_linenum, "non-symbol in merge directive", nao); else if (!other_lookup) - yyerrorlf(1, spec_lineno, "merge: nonexistent symbol %s", - c_str(symbol_name(sym))); + sem_error(spec_linenum, "merge: nonexistent symbol ~a", + other_sym, nao); if (merged) merged = weird_merge(merged, cdr(other_lookup)); @@ -1640,7 +1292,7 @@ repeat_spec_same_data: obj_t *val = eval_form(form, bindings); if (!val) - yyerrorlf(1, spec_lineno, "bind: unbound variable on right side"); + sem_error(spec_linenum, "bind: unbound variable on right side", nao); bindings = dest_bind(bindings, pattern, cdr(val)); @@ -1658,8 +1310,7 @@ repeat_spec_same_data: obj_t *sym = first(iter); if (!symbolp(sym)) { - yyerrorlf(1, spec_lineno, "non-symbol in cat directive"); - continue; + sem_error(spec_linenum, "non-symbol in cat directive", nao); } else { obj_t *existing = assoc(bindings, sym); obj_t *sep = nil; @@ -1680,18 +1331,45 @@ repeat_spec_same_data: goto repeat_spec_same_data; } else if (sym == output) { obj_t *specs = second(first_spec); - obj_t *dest_opt = third(first_spec); - obj_t *dest = dest_opt ? cat_str(subst_vars(dest_opt, bindings), nil) - : string(chk_strdup("-")); + obj_t *old_style_dest = third(first_spec); + obj_t *new_style_dest = fourth(first_spec); + obj_t *nt = nil; + obj_t *dest; + + if (old_style_dest) { + dest = cat_str(subst_vars(old_style_dest, bindings), nil); + } else { + if (eq(first(new_style_dest), nothrow)) + push(nil, &new_style_dest); + + { + obj_t *form = first(new_style_dest); + obj_t *val = eval_form(form, bindings); + + if (!val) + sem_error(spec_linenum, "output: unbound variable in form ~a", + form, nao); + + nt = eq(second(new_style_dest), nothrow); + dest = or2(cdr(val), string(strdup("-"))); + } + } + fpip_t fp = (errno = 0, complex_open(dest, t)); - yyerrorf(2, "opening data sink %s", c_str(dest)); + debugf("opening data sink ~a", dest, nao); if (complex_open_failed(fp)) { - if (errno != 0) - yyerrorf(2, "could not open %s: %s", c_str(dest), strerror(errno)); - else - yyerrorf(2, "could not open %s", c_str(dest)); + if (nt) { + debugf("could not open ~a: treating as failed match due to nothrow", + dest, nao); + return nil; + } else if (errno != 0) { + file_err(nil, "could not open ~a (error ~a/~a)", dest, + num(errno), string(strdup(strerror(errno))), nao); + } else { + file_err(nil, "could not open ~a", dest, nao); + } } else { do_output(bindings, specs, fp.f); complex_close(fp); @@ -1708,7 +1386,7 @@ repeat_spec_same_data: obj_t *params = second(args); if (rest(specline)) - yyerrorlf(1, spec_lineno, "unexpected material after define"); + sem_error(spec_linenum, "unexpected material after define", nao); uw_set_func(name, cons(params, body)); @@ -1716,21 +1394,130 @@ repeat_spec_same_data: break; goto repeat_spec_same_data; + } else if (sym == try) { + obj_t *catch_syms = second(first_spec); + obj_t *try_clause = third(first_spec); + obj_t *catch_fin = fourth(first_spec); + obj_t *finally_clause = nil; + + { + uw_block_begin(nil, result); + uw_catch_begin(catch_syms, exsym, exception); + + { + result = match_files(try_clause, files, bindings, + data, num(data_lineno)); + uw_do_unwind; + } + + uw_catch(exsym, exception) { + { + obj_t *iter; + + for (iter = catch_fin; iter; iter = cdr(iter)) { + obj_t *clause = car(iter); + obj_t *matches = second(clause); + obj_t *body = third(clause); + + if (first(clause) == catch) { + obj_t *match; + for (match = matches; match; match = cdr(match)) + if (uw_exception_subtype_p(exsym, car(match))) + break; + if (match) { + cons_bind (new_bindings, success, + match_files(body, files, bindings, + data, num(data_lineno))); + if (success) { + bindings = new_bindings; + result = t; /* catch succeeded, so try succeeds */ + if (consp(success)) { + data = car(success); + data_lineno = c_num(cdr(success)); + } else { + data = nil; + } + } + break; + } + } else if (car(clause) == finally) { + finally_clause = body; + } + } + } + uw_do_unwind; + } + + uw_unwind { + obj_t *iter; + + /* result may be t, from catch above. */ + if (consp(result)) { + /* We process it before finally, as part of the unwinding, so + finally can accumulate more bindings over top of any bindings + produced by the main clause. */ + cons_bind (new_bindings, success, result); + if (consp(success)) { + data = car(success); + data_lineno = c_num(cdr(success)); + } else { + data = nil; + } + bindings = new_bindings; + } + + if (!finally_clause) { + for (iter = catch_fin; iter; iter = cdr(iter)) { + obj_t *clause = car(iter); + if (first(clause) == finally) { + finally_clause = third(clause); + break; + } + } + } + + if (finally_clause) { + cons_bind (new_bindings, success, + match_files(finally_clause, files, bindings, + data, num(data_lineno))); + if (success) { + bindings = new_bindings; + result = t; /* finally succeeds, so try block succeeds */ + if (consp(success)) { + data = car(success); + data_lineno = c_num(cdr(success)); + } else { + data = nil; + } + } + } + } + + uw_catch_end; + uw_block_end; + + if (!result) + return nil; + + if ((spec = rest(spec)) == nil) + break; + + goto repeat_spec_same_data; + } } else { obj_t *func = uw_get_func(sym); if (func) { obj_t *args = rest(first_spec); obj_t *params = car(func); + obj_t *ub_p_a_pairs = nil; obj_t *body = cdr(func); obj_t *piter, *aiter; obj_t *bindings_cp = copy_alist(bindings); - if (!equal(length(args), length(params))) { - yyerrorlf(1, spec_lineno, "function %s takes %ld argument(s)", - c_str(sym), c_num(length(params))); - return nil; - } + if (!equal(length(args), length(params))) + sem_error(spec_linenum, "function ~a takes ~a argument(s)", + sym, length(params), nao); for (piter = params, aiter = args; piter; piter = cdr(piter), aiter = cdr(aiter)) @@ -1738,17 +1525,22 @@ repeat_spec_same_data: obj_t *param = car(piter); obj_t *arg = car(aiter); - if (symbolp(arg)) { - obj_t *existing = assoc(bindings, arg); - if (existing) { + if (arg && symbolp(arg)) { + obj_t *val = eval_form(arg, bindings); + if (val) { bindings_cp = acons_new(bindings_cp, param, - cdr(existing)); + cdr(val)); } else { bindings_cp = alist_remove(bindings_cp, cons(param, nil)); + ub_p_a_pairs = cons(cons(param, arg), ub_p_a_pairs); } } else { - bindings_cp = acons_new(bindings_cp, param, arg); + obj_t *val = eval_form(arg, bindings); + if (!val) + sem_error(spec_linenum, + "unbound variable in function argument form", nao); + bindings_cp = acons_new(bindings_cp, param, cdr(val)); } } @@ -1761,37 +1553,38 @@ repeat_spec_same_data: uw_block_end; if (!result) { - yyerrorlf(2, spec_lineno, "function failed"); + debuglf(spec_linenum, "function failed", nao); return nil; } { cons_bind (new_bindings, success, result); - for (piter = params, aiter = args; piter; - piter = cdr(piter), aiter = cdr(aiter)) + for (piter = ub_p_a_pairs; piter; piter = cdr(aiter)) { - obj_t *param = car(piter); - obj_t *arg = car(aiter); + cons_bind (param, arg, car(piter)); if (symbolp(arg)) { obj_t *newbind = assoc(new_bindings, param); if (newbind) { bindings = dest_bind(bindings, arg, cdr(newbind)); - if (bindings == t) + if (bindings == t) { + debuglf(spec_linenum, "binding mismatch on ~a " + "when returning from ~a", arg, sym, nao); return nil; + } } } } if (consp(success)) { - yyerrorlf(2, spec_lineno, - "function matched; advancing from line %ld to %ld", - data_lineno, c_num(cdr(success))); + debuglcf(spec_linenum, + "function matched; advancing from line %ld to %ld", + data_lineno, c_num(cdr(success))); data = car(success); data_lineno = c_num(cdr(success)); } else { - yyerrorlf(2, spec_lineno, "function consumed entire file"); + debuglf(spec_linenum, "function consumed entire file", nao); data = nil; } } @@ -1814,8 +1607,8 @@ repeat_spec_same_data: spec_linenum, num(data_lineno), first(files))); if (nump(success) && c_num(success) < c_num(length_str(dataline))) { - yyerrorf(2, "spec only matches line to position %ld: %s", - c_num(success), c_str(dataline)); + debuglf(spec_linenum, "spec only matches line to position ~a: ~a", + success, dataline, nao); return nil; } diff --git a/match.h b/match.h new file mode 100644 index 00000000..9fc2f8ed --- /dev/null +++ b/match.h @@ -0,0 +1,27 @@ +/* Copyright 2009 + * Kaz Kylheku <kkylheku@gmail.com> + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +int extract(obj_t *spec, obj_t *filenames, obj_t *bindings); @@ -26,12 +26,11 @@ #include <stdio.h> long lineno; -extern int opt_loglevel; -extern int opt_nobindings; -extern int opt_arraydims; +extern int errors; +extern obj_t *yyin_stream; +extern const char *spec_file; +extern obj_t *spec_file_str; int yyparse(void); obj_t *get_spec(void); -int extract(obj_t *spec, obj_t *filenames, obj_t *bindings); -void yyerrorf(int level, const char *s, ...); -void yyerrorlf(int level, long spec_lineno, const char *s, ...); +void yyerrorf(const char *s, ...); void yybadtoken(int tok, const char *context); @@ -36,43 +36,41 @@ #include "y.tab.h" #include "lib.h" #include "gc.h" -#include "extract.h" +#include "stream.h" +#include "parser.h" #define YY_NO_UNPUT -const char *version = "014"; -const char *progname = "txr"; -const char *spec_file = "stdin"; +#define YY_INPUT(buf, result, max_size) \ + do { \ + obj_t *c = nil; \ + int n, ch; \ + for (n = 0; n < max_size && \ + (c = get_char(yyin_stream)) && \ + (ch = c_chr(c)) != '\n'; ++n) \ + buf[n] = (char) ch; \ + if (ch == '\n') \ + buf[n++] = (char) ch; \ + result = n; \ + } while (0) + +obj_t *yyin_stream; + long lineno = 1; int opt_loglevel = 1; /* 0 - quiet; 1 - normal; 2 - verbose */ int opt_nobindings = 0; int opt_arraydims = 1; -static int nesting; -static int closechar; -static int errors; - -/* - * Can implement an emergency allocator here from a fixed storage - * pool, which sets an OOM flag. Program can check flag - * and gracefully terminate instead of aborting like this. - */ -void *oom_realloc_handler(void *old, size_t size) -{ - fprintf(stderr, "%s: out of memory\n", progname); - puts("false"); - abort(); -} +int errors; void yyerror(const char *s) { - yyerrorlf(1, lineno, "%s", s); - errors++; + yyerrorf("%s", s); } -void yyerrorf(int level, const char *s, ...) +void yyerrorf(const char *s, ...) { - if (opt_loglevel >= level) { + if (opt_loglevel >= 1) { va_list vl; va_start (vl, s); fprintf(stderr, "%s: (%s:%ld): ", progname, spec_file, lineno); @@ -80,24 +78,7 @@ void yyerrorf(int level, const char *s, ...) putc('\n', stderr); va_end (vl); } - - if (level < 2) - errors++; -} - -void yyerrorlf(int level, long spec_lineno, const char *s, ...) -{ - if (opt_loglevel >= level) { - va_list vl; - va_start (vl, s); - fprintf(stderr, "%s: (%s:%ld): ", progname, spec_file, spec_lineno); - vfprintf(stderr, s, vl); - putc('\n', stderr); - va_end (vl); - } - - if (level < 2) - errors++; + errors++; } void yybadtoken(int tok, const char *context) @@ -126,20 +107,24 @@ void yybadtoken(int tok, const char *context) case LAST: problem = "\"last\""; break; case EMPTY: problem = "\"empty\""; break; case DEFINE: problem = "\"define\""; break; + case TRY: problem = "\"try\""; break; + case CATCH: problem = "\"catch\""; break; + case FINALLY: problem = "\"finally\""; break; case NUMBER: problem = "\"number\""; break; case REGCHAR: problem = "regular expression character"; break; + case LITCHAR: problem = "string literal character"; break; } if (problem != 0) if (context) - yyerrorlf(1, lineno, "misplaced %s in %s", problem, context); + yyerrorf("misplaced %s in %s", problem, context); else - yyerrorlf(1, lineno, "unexpected %s", problem); + yyerrorf("unexpected %s", problem); else if (context) - yyerrorlf(1, lineno, "unterminated %s", context); + yyerrorf("unterminated %s", context); else - yyerrorlf(1, lineno, "unexpected end of input"); + yyerrorf("unexpected end of input"); } static int char_esc(int letter) @@ -155,6 +140,7 @@ static int char_esc(int letter) case 'e': return 27; case '"': return '"'; case '\'': return '\''; + case '`': return '`'; } abort(); @@ -175,6 +161,8 @@ static int num_esc(char *num) %} +%option stack + TOK [a-zA-Z_][a-zA-Z0-9_]*|[+-]?[0-9]+ ID_END [^a-zA-Z0-9_] NUM_END [^0-9] @@ -182,7 +170,7 @@ WS [\t ]* HEX [0-9A-Fa-f] OCT [0-7] -%x SPECIAL NESTED REGEX REGCLASS STRLIT CHRLIT +%x SPECIAL NESTED REGEX REGCLASS STRLIT CHRLIT QSILIT %% @@ -194,8 +182,9 @@ OCT [0-7] val = strtol(yytext, &errp, 10); - if (nesting == 0) - BEGIN(INITIAL); + if (yy_top_state() == INITIAL + || yy_top_state() == QSILIT) + yy_pop_state(); if (*errp != 0) { /* not a number */ @@ -211,159 +200,162 @@ OCT [0-7] return NUMBER; } - <SPECIAL>\({WS}all{WS}\) { - BEGIN(INITIAL); + yy_pop_state(); return ALL; } <SPECIAL>\({WS}some{WS}\) { - BEGIN(INITIAL); + yy_pop_state(); return SOME; } <SPECIAL>\({WS}none{WS}\) { - BEGIN(INITIAL); + yy_pop_state(); return NONE; } <SPECIAL>\({WS}maybe{WS}\) { - BEGIN(INITIAL); + yy_pop_state(); return MAYBE; } <SPECIAL>\({WS}cases{WS}\) { - BEGIN(INITIAL); + yy_pop_state(); return CASES; } <SPECIAL>\({WS}and{WS}\) { - BEGIN(INITIAL); + yy_pop_state(); return AND; } <SPECIAL>\({WS}or{WS}\) { - BEGIN(INITIAL); + yy_pop_state(); return OR; } <SPECIAL>\({WS}end{WS}\) { - BEGIN(INITIAL); + yy_pop_state(); return END; } <SPECIAL>\({WS}collect{WS}\) { - BEGIN(INITIAL); + yy_pop_state(); return COLLECT; } <SPECIAL>\({WS}coll{WS}\) { - BEGIN(INITIAL); + yy_pop_state(); return COLL; } <SPECIAL>\({WS}until{WS}\) { - BEGIN(INITIAL); + yy_pop_state(); return UNTIL; } -<SPECIAL>\({WS}output{WS}\) { - BEGIN(INITIAL); +<SPECIAL>\({WS}output/{ID_END} { + yy_push_state(NESTED); return OUTPUT; } <SPECIAL>\({WS}repeat{WS}\) { - BEGIN(INITIAL); + yy_pop_state(); return REPEAT; } <SPECIAL>\({WS}rep{WS}\) { - BEGIN(INITIAL); + yy_pop_state(); return REP; } <SPECIAL>\({WS}single{WS}\) { - BEGIN(INITIAL); + yy_pop_state(); return SINGLE; } <SPECIAL>\({WS}first{WS}\) { - BEGIN(INITIAL); + yy_pop_state(); return FIRST; } <SPECIAL>\({WS}last{WS}\) { - BEGIN(INITIAL); + yy_pop_state(); return LAST; } <SPECIAL>\({WS}empty{WS}\) { - BEGIN(INITIAL); + yy_pop_state(); return EMPTY; } <SPECIAL>\({WS}define/{ID_END} { - nesting++; - closechar = ')'; - BEGIN(NESTED); + yy_push_state(NESTED); return DEFINE; } +<SPECIAL>\({WS}try{WS}\) { + yy_pop_state(); + return TRY; + } + +<SPECIAL>\({WS}catch/{ID_END} { + yy_push_state(NESTED); + return CATCH; + } + +<SPECIAL>\({WS}finally{WS}\) { + yy_pop_state(); + return FINALLY; + } + <SPECIAL,NESTED>\{|\( { - nesting++; - if (yytext[0] == '{') - closechar = '}'; - else - closechar = ')'; - BEGIN(NESTED); + yy_push_state(NESTED); + if (yy_top_state() == INITIAL + || yy_top_state() == QSILIT) + yy_pop_state(); return yytext[0]; } <SPECIAL,NESTED>\}|\) { - if (yytext[0] != closechar) { - yyerror("paren mismatch"); - BEGIN(INITIAL); - } else { - switch (--nesting) { - case 1: - BEGIN(SPECIAL); - break; - case 0: - BEGIN(INITIAL); - break; - } - - return yytext[0]; - } + yy_pop_state(); + if (yy_top_state() == INITIAL + || yy_top_state() == QSILIT) + yy_pop_state(); + return yytext[0]; } <SPECIAL,NESTED>[\t ]+ { /* Eat whitespace in directive */ } <SPECIAL,NESTED>\" { - BEGIN(STRLIT); + yy_push_state(STRLIT); return '"'; } <SPECIAL,NESTED>\' { - BEGIN(CHRLIT); + yy_push_state(CHRLIT); return '\''; } -<SPECIAL>@ { - if (nesting == 0) { - BEGIN(INITIAL); - yylval.lexeme = strdup("@"); - return TEXT; - } - } +<SPECIAL,NESTED>` { + yy_push_state(QSILIT); + return '`'; + } + +<SPECIAL>@ { + yy_pop_state(); + yylval.lexeme = strdup("@"); + return TEXT; + } <SPECIAL,NESTED>\n { lineno++; } <SPECIAL,NESTED>[/] { - BEGIN(REGEX); + yy_push_state(REGEX); return '/'; } @@ -377,7 +369,7 @@ OCT [0-7] lexeme[0] = char_esc(yytext[1]); lexeme[1] = 0; yylval.lexeme = strdup(lexeme); - BEGIN(INITIAL); + yy_pop_state(); return TEXT; } @@ -386,20 +378,20 @@ OCT [0-7] lexeme[0] = num_esc(yytext + 1); lexeme[1] = 0; yylval.lexeme = strdup(lexeme); - BEGIN(INITIAL); + yy_pop_state(); return TEXT; } <SPECIAL,NESTED>. { - yyerrorf(0, "bad character in directive: '%c'", + yyerrorf("bad character in directive: '%c'", yytext[0]); } <REGEX>[/] { - if (nesting == 0) - BEGIN(INITIAL); - else - BEGIN(NESTED); + yy_pop_state(); + if (yy_top_state() == INITIAL + || yy_top_state() == QSILIT) + yy_pop_state(); yylval.chr = '/'; return '/'; } @@ -457,12 +449,12 @@ OCT [0-7] } <INITIAL>@{WS}\* { - BEGIN(SPECIAL); + yy_push_state(SPECIAL); return '*'; } <INITIAL>@ { - BEGIN(SPECIAL); + yy_push_state(SPECIAL); } <INITIAL>^@#.*\n { @@ -475,25 +467,24 @@ OCT [0-7] } <STRLIT>\" { - if (nesting == 0) - BEGIN(INITIAL); - else - BEGIN(NESTED); - return '"'; + yy_pop_state(); + return yytext[0]; } <CHRLIT>\' { - if (nesting == 0) - BEGIN(INITIAL); - else - BEGIN(NESTED); - return '\''; + yy_pop_state(); + return yytext[0]; } -<STRLIT,CHRLIT>[\\][abtnvfre] { - yylval.chr = char_esc(yytext[1]); - return LITCHAR; - } +<QSILIT>` { + yy_pop_state(); + return yytext[0]; + } + +<STRLIT,CHRLIT,QSILIT>[\\][abtnvfre"`'] { + yylval.chr = char_esc(yytext[1]); + return LITCHAR; + } <STRLIT,CHRLIT>[\\](x{HEX}+|{OCT}+) { yylval.chr = num_esc(yytext + 1); @@ -505,256 +496,28 @@ OCT [0-7] yylval.chr = yytext[0]; return LITCHAR; } + <CHRLIT>\n { yyerror("newline in character literal"); lineno++; yylval.chr = yytext[0]; return LITCHAR; } -<STRLIT,CHRLIT>. { + +<QSILIT>\n { + yyerror("newline in string quasiliteral"); + lineno++; yylval.chr = yytext[0]; return LITCHAR; } -%% - -void help(void) -{ - const char *text = -"\n" -"txr version %s\n" -"\n" -"copyright 2009, Kaz Kylheku <kkylheku@gmail.com>\n" -"\n" -"usage:\n" -"\n" -" %s [ options ] query-file { data-file }*\n" -"\n" -"The query-file or data-file arguments may be specified as -, in which case\n" -"standard input is used. If these arguments end with a | character, then\n" -"they are treated as command pipes. Leading arguments which begin with a -\n" -"followed by one or more characters, and which are not arguments to options\n" -"are interpreted as options. The -- option indicates the end of the options.\n" -"\n" -"If no data-file arguments sare supplied, then the query itself must open a\n" -"a data source prior to attempting to make any pattern match, or it will\n" -"simply fail due to a match which has run out of data.\n" -"\n" -"options:\n" -"\n" -"-Dvar=value Pre-define variable var, with the given value.\n" -" A list value can be specified using commas.\n" -"-Dvar Predefine variable var, with empty string value.\n" -"-q Quiet: don't report errors during query matching.\n" -"-v Verbose: extra logging from matcher.\n" -"-b Don't dump list of bindings.\n" -"-a num Generate array variables up to num-dimensions.\n" -" Default is 1. Additional dimensions are fudged\n" -" by generating numeric suffixes\n" -"--help You already know!\n" -"--version Display program version\n" -"\n" -"Options that take no argument can be combined. The -q and -v options\n" -"are mutually exclusive; the right-most one dominates.\n" -"\n" - ; - fprintf(stdout, text, version, progname); -} - -void hint(void) -{ - fprintf(stderr, "%s: incorrect arguments: try --help\n", progname); -} - -int main(int argc, char **argv) -{ - obj_t *stack_bottom_0 = nil; - obj_t *spec = nil; - obj_t *bindings = nil; - int match_loglevel = opt_loglevel; - progname = argv[0] ? argv[0] : progname; - obj_t *stack_bottom_1 = nil; - - init(progname, oom_realloc_handler, &stack_bottom_0, &stack_bottom_1); - - if (argc <= 1) { - hint(); - return EXIT_FAILURE; - } - - argc--, argv++; - - while (argc > 0 && (*argv)[0] == '-') { - if (!strcmp(*argv, "--")) { - argv++, argc--; - break; - } - - if (!strcmp(*argv, "-")) - break; - - if (!strncmp(*argv, "-D", 2)) { - char *var = *argv + 2; - char *equals = strchr(var, '='); - char *has_comma = (equals != 0) ? strchr(equals, ',') : 0; - - if (has_comma) { - char *val = equals + 1; - obj_t *list = nil; - - *equals = 0; - - for (;;) { - size_t piece = strcspn(val, ","); - char comma_p = val[piece]; - - val[piece] = 0; - - list = cons(string(strdup(val)), list); - - if (!comma_p) - break; - - val += piece + 1; - } - - list = nreverse(list); - bindings = cons(cons(intern(string(strdup(var))), list), bindings); - } else if (equals) { - char *val = equals + 1; - *equals = 0; - bindings = cons(cons(intern(string(strdup(var))), - string(strdup(val))), bindings); - } else { - bindings = cons(cons(intern(string(strdup(var))), - null_string), bindings); - } - - argc--, argv++; - continue; - } - - if (!strcmp(*argv, "--version")) { - printf("%s: version %s\n", progname, version); - return 0; - } - - if (!strcmp(*argv, "--help")) { - help(); - return 0; - } - - if (!strcmp(*argv, "-a")) { - long val; - char *errp; - char opt = (*argv)[1]; - - if (argc == 1) { - fprintf(stderr, "%s: option %c needs argument\n", progname, opt); - - return EXIT_FAILURE; - } - - argv++, argc--; - - switch (opt) { - case 'a': - val = strtol(*argv, &errp, 10); - if (*errp != 0) { - fprintf(stderr, "%s: option %c needs numeric argument, not %s\n", - progname, opt, *argv); - return EXIT_FAILURE; - } - - opt_arraydims = val; - break; - } - - argv++, argc--; - continue; - } - - if (!strcmp(*argv, "--gc-debug")) { - opt_gc_debug = 1; - argv++, argc--; - continue; - } - - { - char *popt; - for (popt = (*argv)+1; *popt != 0; popt++) { - switch (*popt) { - case 'v': - match_loglevel = 2; - break; - case 'q': - match_loglevel = 0; - break; - case 'b': - opt_nobindings = 1; - break; - case '-': - fprintf(stderr, "%s: unrecognized long option: --%s\n", - progname, popt + 1); - return EXIT_FAILURE; - default: - fprintf(stderr, "%s: unrecognized option: %c\n", progname, *popt); - return EXIT_FAILURE; - } - } - - argc--, argv++; - } - } - - if (argc < 1) { - hint(); - return EXIT_FAILURE; - } - - if (strcmp(*argv, "-") != 0) { - yyin = fopen(*argv, "r"); - if (yyin == 0) { - fprintf(stderr, "%s: unable to open %s\n", progname, *argv); - return EXIT_FAILURE; - } - spec_file = *argv; - } - - argc--, argv++; - - { - int gc; - - gc = gc_state(0); - yyparse(); - gc_state(gc); - - if (errors) - return EXIT_FAILURE; - spec = get_spec(); - - - opt_loglevel = match_loglevel; - - if (opt_loglevel >= 2) { - fputs("spec:\n", stderr); - dump(spec, stderr); - - fputs("bindings:\n", stderr); - dump(bindings, stderr); - } - - { - int retval; - list_collect_decl(filenames, iter); - - while (*argv) - list_collect(iter, string(*argv++)); +<QSILIT>@ { + yy_push_state(SPECIAL); + } - retval = extract(spec, filenames, bindings); +<STRLIT,CHRLIT,QSILIT>. { + yylval.chr = yytext[0]; + return LITCHAR; + } - return errors ? EXIT_FAILURE : retval; - } - } -} +%% diff --git a/parser.y b/parser.y new file mode 100644 index 00000000..9b440919 --- /dev/null +++ b/parser.y @@ -0,0 +1,593 @@ +/* Copyright 2009 + * Kaz Kylheku <kkylheku@gmail.com> + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +%{ + +#include <stdio.h> +#include <assert.h> +#include <limits.h> +#include <dirent.h> +#include "lib.h" +#include "regex.h" +#include "parser.h" + +int yylex(void); +void yyerror(const char *); + +obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts); +obj_t *define_transform(obj_t *define_form); +obj_t *lit_char_helper(obj_t *litchars); + +static obj_t *parsed_spec; + +%} + +%union { + char *lexeme; + union obj *obj; + char chr; + long num; +} + +%token <lexeme> TEXT IDENT ALL SOME NONE MAYBE CASES AND OR END COLLECT +%token <lexeme> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE +%token <lexeme> TRY CATCH FINALLY +%token <num> NUMBER +%token <chr> REGCHAR LITCHAR + +%type <obj> spec clauses clause all_clause some_clause none_clause maybe_clause +%type <obj> cases_clause collect_clause clause_parts additional_parts +%type <obj> output_clause define_clause try_clause catch_clauses_opt +%type <obj> line elems_opt elems elem var var_op +%type <obj> list exprs expr out_clauses out_clauses_opt out_clause +%type <obj> repeat_clause repeat_parts_opt o_line +%type <obj> o_elems_opt o_elems_opt2 o_elems o_elem rep_elem rep_parts_opt +%type <obj> regex regexpr regbranch +%type <obj> regterm regclass regclassterm regrange +%type <obj> strlit chrlit quasilit quasi_items quasi_item litchars +%type <chr> regchar +%nonassoc ALL SOME NONE MAYBE CASES AND OR END COLLECT UNTIL COLL +%nonassoc OUTPUT REPEAT REP FIRST LAST EMPTY DEFINE +%nonassoc '{' '}' '[' ']' '(' ')' +%right IDENT TEXT NUMBER +%left '|' '/' +%right '*' '?' '+' +%right '^' '.' '\\' REGCHAR LITCHAR + +%% + +spec : clauses { parsed_spec = $1; } + | { parsed_spec = nil; } + | error { parsed_spec = nil; + yybadtoken(yychar, 0); } + ; + +clauses : clause { $$ = cons($1, nil); } + | clause clauses { $$ = cons($1, $2); } + ; + +clause : all_clause { $$ = list(num(lineno - 1), $1, nao); } + | some_clause { $$ = list(num(lineno - 1), $1, nao); } + | none_clause { $$ = list(num(lineno - 1), $1, nao); } + | maybe_clause { $$ = list(num(lineno - 1), $1, nao); } + | cases_clause { $$ = list(num(lineno - 1), $1, nao); } + | collect_clause { $$ = list(num(lineno - 1), $1, nao); } + | define_clause { $$ = list(num(lineno - 1), + define_transform($1), nao); } + | try_clause { $$ = list(num(lineno - 1), $1, nao); } + | output_clause { $$ = list(num(lineno - 1), $1, nao); } + | line { $$ = $1; } + | repeat_clause { $$ = nil; + yyerror("repeat outside of output"); } + ; + +all_clause : ALL newl clause_parts { $$ = cons(all, $3); } + | ALL newl error { $$ = nil; + yybadtoken(yychar, + "all clause"); } + | ALL newl END newl { $$ = nil; + yyerror("empty all clause"); } + + ; + +some_clause : SOME newl clause_parts { $$ = cons(some, $3); } + | SOME newl error { $$ = nil; + yybadtoken(yychar, + "some clause"); } + | SOME newl END newl { $$ = nil; + yyerror("empty some clause"); } + ; + +none_clause : NONE newl clause_parts { $$ = cons(none, $3); } + | NONE newl error { $$ = nil; + yybadtoken(yychar, + "none clause"); } + | NONE newl END newl { $$ = nil; + yyerror("empty none clause"); } + ; + +maybe_clause : MAYBE newl clause_parts { $$ = cons(maybe, $3); } + | MAYBE newl error { $$ = nil; + yybadtoken(yychar, + "maybe clause"); } + | MAYBE newl END newl { $$ = nil; + yyerror("empty maybe clause"); } + ; + +cases_clause : CASES newl clause_parts { $$ = cons(cases, $3); } + | CASES newl error { $$ = nil; + yybadtoken(yychar, + "cases clause"); } + | CASES newl END newl { $$ = nil; + yyerror("empty cases clause"); } + ; + +collect_clause : COLLECT newl clauses END newl { $$ = list(collect, $3, nao); } + | COLLECT newl clauses + UNTIL newl clauses END newl { $$ = list(collect, $3, + $6, nao); } + | COLLECT newl error { $$ = nil; + if (yychar == UNTIL || yychar == END) + yyerror("empty collect"); + else + yybadtoken(yychar, + "collect clause"); } + ; + +clause_parts : clauses additional_parts { $$ = cons($1, $2); } + ; + +additional_parts : END newl { $$ = nil; } + | AND newl clauses additional_parts { $$ = cons($3, $4); } + | OR newl clauses additional_parts { $$ = cons($3, $4); } + ; + +line : elems_opt '\n' { $$ = $1; } + ; + +elems_opt : elems { $$ = cons(num(lineno - 1), $1); } + | { $$ = nil; } + ; + +elems : elem { $$ = cons($1, nil); } + | elem elems { $$ = cons($1, $2); } + | rep_elem { $$ = nil; + yyerror("rep outside of output"); } + ; + +elem : TEXT { $$ = string($1); } + | var { $$ = $1; } + | list { $$ = $1; } + | regex { $$ = cons(regex_compile($1), $1); } + | COLL elems END { $$ = list(coll, $2, nao); } + | COLL elems + UNTIL elems END { $$ = list(coll, $2, $4, nao); } + | COLL error { $$ = nil; + yybadtoken(yychar, "coll clause"); } + ; + +define_clause : DEFINE exprs ')' newl + clauses + END newl { $$ = list(define, $2, $5, nao); } + | DEFINE ')' newl + clauses + END newl { $$ = list(define, nil, $4, nao); } + | DEFINE exprs ')' newl + END newl { $$ = list(define, $2, nao); } + | DEFINE ')' newl + END newl { $$ = list(define, nao); } + | DEFINE error { yybadtoken(yychar, "list expression"); } + | DEFINE exprs ')' newl + error { yybadtoken(yychar, "define"); } + | DEFINE ')' newl + error { yybadtoken(yychar, "define"); } + ; + +try_clause : TRY newl + clauses + catch_clauses_opt + END newl { $$ = list(try, + flatten(mapcar(func_n1(second), + $4)), + $3, $4, nao); } + | TRY newl + error { $$ = nil; + yybadtoken(yychar, "try clause"); } + ; + +catch_clauses_opt : CATCH ')' newl + clauses + catch_clauses_opt { $$ = cons(list(catch, nil, $4, nao), + $5); } + | CATCH exprs ')' newl + clauses + catch_clauses_opt { $$ = cons(list(catch, $2, $5, nao), + $6); } + | FINALLY newl + clauses { $$ = cons(list(finally, nil, + $3, nao), + nil); } + | { $$ = nil; } + ; + + +output_clause : OUTPUT ')' o_elems '\n' + out_clauses + END newl { $$ = list(output, $5, $3, nao); } + | OUTPUT ')' newl + out_clauses + END newl { $$ = list(output, $4, nao); } + | OUTPUT exprs ')' newl + out_clauses + END newl { $$ = list(output, $5, nil, $2, nao); } + | OUTPUT exprs ')' o_elems '\n' + out_clauses + END newl { yyerror("invalid combination of old and " + "new syntax in output directive"); } + | OUTPUT error { yybadtoken(yychar, "list expression"); } + | OUTPUT ')' o_elems '\n' + error { $$ = nil; + yybadtoken(yychar, "output clause"); } + | OUTPUT ')' newl + error { $$ = nil; + yybadtoken(yychar, "output clause"); } + | OUTPUT exprs ')' o_elems '\n' + error { $$ = nil; + yybadtoken(yychar, "output clause"); } + | OUTPUT exprs ')' newl + error { $$ = nil; + yybadtoken(yychar, "output clause"); } + ; + +out_clauses : out_clause { $$ = cons($1, nil); } + | out_clause out_clauses { $$ = cons($1, $2); } + ; + +out_clause : repeat_clause { $$ = list(num(lineno - 1), $1, nao); } + | o_line { $$ = $1; } + | all_clause { $$ = nil; + yyerror("match clause in output"); } + | some_clause { $$ = nil; + yyerror("match clause in output"); } + | none_clause { $$ = nil; + yyerror("match clause in output"); } + | maybe_clause { $$ = nil; + yyerror("match clause in output"); } + | cases_clause { $$ = nil; + yyerror("match clause in output"); } + | collect_clause { $$ = nil; + yyerror("match clause in output"); } + | define_clause { $$ = nil; + yyerror("match clause in output"); } + + | try_clause { $$ = nil; + yyerror("match clause in output"); } + | output_clause { $$ = nil; + yyerror("match clause in output"); } + ; + +repeat_clause : REPEAT newl + out_clauses + repeat_parts_opt + END newl { $$ = repeat_rep_helper(repeat, $3, $4); } + | REPEAT newl + error { $$ = nil; + yybadtoken(yychar, "repeat clause"); } + ; + +repeat_parts_opt : SINGLE newl + out_clauses_opt + repeat_parts_opt { $$ = cons(cons(single, $3), $4); } + | FIRST newl + out_clauses_opt + repeat_parts_opt { $$ = cons(cons(frst, $3), $4); } + | LAST newl + out_clauses_opt + repeat_parts_opt { $$ = cons(cons(lst, $3), $4); } + | EMPTY newl + out_clauses_opt + repeat_parts_opt { $$ = cons(cons(empty, $3), $4); } + | /* empty */ { $$ = nil; } + ; + + +out_clauses_opt : out_clauses { $$ = $1; } + | /* empty */ { $$ = null_list; } + +o_line : o_elems_opt '\n' { $$ = $1; } + ; + +o_elems_opt : o_elems { $$ = cons(num(lineno - 1), $1); } + | { $$ = nil; } + ; + +o_elems_opt2 : o_elems { $$ = $1; } + | { $$ = null_list; } + ; + +o_elems : o_elem { $$ = cons($1, nil); } + | o_elem o_elems { $$ = cons($1, $2); } + ; + +o_elem : TEXT { $$ = string($1); } + | var { $$ = $1; } + | rep_elem { $$ = $1; } + ; + +rep_elem : REP o_elems + rep_parts_opt END { $$ = repeat_rep_helper(rep, $2, $3); } + | REP error { $$ = nil; yybadtoken(yychar, "rep clause"); } + ; + +rep_parts_opt : SINGLE o_elems_opt2 + rep_parts_opt { $$ = cons(cons(single, $2), $3); } + | FIRST o_elems_opt2 + rep_parts_opt { $$ = cons(cons(frst, $2), $3); } + | LAST o_elems_opt2 + rep_parts_opt { $$ = cons(cons(lst, $2), $3); } + | EMPTY o_elems_opt2 + rep_parts_opt { $$ = cons(cons(empty, $2), $3); } + | /* empty */ { $$ = nil; } + ; + + +/* This sucks, but factoring '*' into a nonterminal + * that generates an empty phrase causes reduce/reduce conflicts. + */ +var : IDENT { $$ = list(var, intern(string($1)), nao); } + | IDENT elem { $$ = list(var, intern(string($1)), $2, nao); } + | '{' IDENT '}' { $$ = list(var, intern(string($2)), nao); } + | '{' IDENT '}' elem { $$ = list(var, intern(string($2)), $4, nao); } + | '{' IDENT regex '}' { $$ = list(var, intern(string($2)), + nil, cons(regex_compile($3), $3), + nao); } + | '{' IDENT NUMBER '}' { $$ = list(var, intern(string($2)), + nil, num($3), nao); } + | var_op IDENT { $$ = list(var, intern(string($2)), + nil, $1, nao); } + | var_op IDENT elem { $$ = list(var, intern(string($2)), + $3, $1, nao); } + | var_op '{' IDENT '}' { $$ = list(var, intern(string($3)), + nil, $1, nao); } + | var_op '{' IDENT '}' elem { $$ = list(var, intern(string($3)), + $5, $1, nao); } + | IDENT error { $$ = nil; + yybadtoken(yychar, "variable spec"); } + | var_op error { $$ = nil; + yybadtoken(yychar, "variable spec"); } + ; + +var_op : '*' { $$ = t; } + ; + +list : '(' exprs ')' { $$ = $2; } + | '(' ')' { $$ = nil; } + | '(' error { $$ = nil; + yybadtoken(yychar, "list expression"); } + ; + +exprs : expr { $$ = cons($1, nil); } + | expr exprs { $$ = cons($1, $2); } + | expr '.' expr { $$ = cons($1, $3); } + ; + +expr : IDENT { $$ = intern(string($1)); } + | NUMBER { $$ = num($1); } + | list { $$ = $1; } + | regex { $$ = cons(regex_compile($1), $1); } + | chrlit { $$ = $1; } + | strlit { $$ = $1; } + | quasilit { $$ = $1; } + ; + +regex : '/' regexpr '/' { $$ = $2; } + | '/' '/' { $$ = nil; } + | '/' error { $$ = nil; + yybadtoken(yychar, "regex"); } + ; + +regexpr : regbranch { $$ = $1; } + | regbranch '|' regbranch { $$ = list(list(or, $1, + $3, nao), nao); } + ; + +regbranch : regterm { $$ = cons($1, nil); } + | regterm regbranch { $$ = cons($1, $2); } + ; + +regterm : '[' regclass ']' { $$ = cons(set, $2); } + | '[' '^' regclass ']' { $$ = cons(cset, $3); } + | '.' { $$ = wild; } + | '^' { $$ = chr('^'); } + | ']' { $$ = chr(']'); } + | '-' { $$ = chr('-'); } + | regterm '*' { $$ = list(zeroplus, $1, nao); } + | regterm '+' { $$ = list(oneplus, $1, nao); } + | regterm '?' { $$ = list(optional, $1, nao); } + | REGCHAR { $$ = chr($1); } + | '(' regexpr ')' { $$ = cons(compound, $2); } + | '(' error { $$ = nil; + yybadtoken(yychar, "regex subexpression"); } + | '[' error { $$ = nil; + yybadtoken(yychar, "regex character class"); } + ; + +regclass : regclassterm { $$ = cons($1, nil); } + | regclassterm regclass { $$ = cons($1, $2); } + ; + +regclassterm : regrange { $$ = $1; } + | regchar { $$ = chr($1); } + ; + +regrange : regchar '-' regchar { $$ = cons(chr($1), chr($3)); } + +regchar : '?' { $$ = '?'; } + | '.' { $$ = '.'; } + | '*' { $$ = '*'; } + | '+' { $$ = '+'; } + | '(' { $$ = '('; } + | ')' { $$ = ')'; } + | '^' { $$ = '^'; } + | '|' { $$ = '|'; } + | REGCHAR { $$ = $1; } + ; + +newl : '\n' + | error '\n' { yyerror("newline expected after directive"); + yyerrok; } + ; + +strlit : '"' '"' { $$ = null_string; } + | '"' litchars '"' { $$ = lit_char_helper($2); } + | '"' error { yybadtoken(yychar, "string literal"); } + ; + +chrlit : '\'' '\'' { yyerror("empty character literal"); } + { $$ = nil; } + | '\'' litchars '\'' { $$ = car($2); + if (cdr($2)) + yyerror("multiple characters in " + "character literal"); } + | '\'' error { $$ = nil; + yybadtoken(yychar, "character literal"); } + ; + +quasilit : '`' '`' { $$ = null_string; } + | '`' quasi_items '`' { $$ = cons(quasi, $2); } + | '`' error { $$ = nil; + yybadtoken(yychar, "string literal"); } + ; + +quasi_items : quasi_item { $$ = cons($1, nil); } + | quasi_item quasi_items { $$ = cons($1, $2); } + ; + +quasi_item : litchars { $$ = lit_char_helper($1); } + | TEXT { $$ = string($1); } + | var { $$ = $1; } + | list { $$ = $1; } + ; + +litchars : LITCHAR { $$ = cons(chr($1), nil); } + | LITCHAR litchars { $$ = cons(chr($1), $2); } + ; + + +%% + +obj_t *repeat_rep_helper(obj_t *sym, obj_t *main, obj_t *parts) +{ + obj_t *single_parts = nil; + obj_t *first_parts = nil; + obj_t *last_parts = nil; + obj_t *empty_parts = nil; + obj_t *iter; + + for (iter = parts; iter != nil; iter = cdr(iter)) { + obj_t *part = car(iter); + obj_t *sym = car(part); + obj_t *clauses = cdr(part); + + if (sym == single) + single_parts = nappend2(single_parts, clauses); + else if (sym == frst) + first_parts = nappend2(first_parts, clauses); + else if (sym == lst) + last_parts = nappend2(last_parts, clauses); + else if (sym == empty) + empty_parts = nappend2(empty_parts, clauses); + else + abort(); + } + + return list(sym, main, single_parts, first_parts, + last_parts, empty_parts, nao); +} + +obj_t *define_transform(obj_t *define_form) +{ + obj_t *sym = first(define_form); + obj_t *args = second(define_form); + + if (define_form == nil) + return nil; + + assert (sym == define); + + if (args == nil) { + yyerror("define requires arguments"); + return define_form; + } + + if (!consp(args) || !listp(cdr(args))) { + yyerror("bad define argument syntax"); + return define_form; + } else { + obj_t *name = first(args); + obj_t *params = second(args); + + if (!symbolp(name)) { + yyerror("function name must be a symbol"); + return define_form; + } + + if (!proper_listp(params)) { + yyerror("invalid function parameter list"); + return define_form; + } + + if (!all_satisfy(params, func_n1(symbolp), nil)) + yyerror("function parameters must be symbols"); + } + + return define_form; +} + +obj_t *lit_char_helper(obj_t *litchars) +{ + obj_t *ret = nil; + + if (litchars) { + obj_t *len = length(litchars), *iter, *ix; + ret = mkustring(len); + for (iter = litchars, ix = zero; + iter; + iter = cdr(iter), ix = plus(ix, one)) + { + chr_str_set(ret, ix, car(iter)); + } + } else { + ret = nil; + } + return ret; +} + +obj_t *get_spec(void) +{ + return parsed_spec; +} + @@ -28,7 +28,10 @@ #include <stdlib.h> #include <assert.h> #include <dirent.h> +#include <setjmp.h> +#include <dirent.h> #include "lib.h" +#include "unwind.h" #include "regex.h" #define NFA_SET_SIZE 512 @@ -340,7 +343,7 @@ int nfa_all_states(nfa_state_t **inout, int num, int visited) nfa_state_t *s = inout[i]; if (num >= NFA_SET_SIZE) - abort(); + internal_error("NFA set size exceeded"); switch (s->a.kind) { case nfa_accept: @@ -372,7 +375,7 @@ int nfa_all_states(nfa_state_t **inout, int num, int visited) } if (num > NFA_SET_SIZE) - abort(); + internal_error("NFA set size exceeded"); return num; } @@ -417,7 +420,7 @@ int nfa_closure(nfa_state_t **stack, nfa_state_t **in, int nin, push them on the stack, and mark them as visited. */ for (i = 0; i < nin; i++) { if (stackp >= NFA_SET_SIZE) - abort(); + internal_error("NFA set size exceeded"); in[i]->a.visited = visited; stack[stackp++] = in[i]; out[nout++] = in[i]; @@ -429,7 +432,7 @@ int nfa_closure(nfa_state_t **stack, nfa_state_t **in, int nin, nfa_state_t *top = stack[--stackp]; if (nout >= NFA_SET_SIZE) - abort(); + internal_error("NFA set size exceeded"); /* Only states of type nfa_empty are interesting. Each such state at most two epsilon transitions. */ @@ -457,7 +460,7 @@ int nfa_closure(nfa_state_t **stack, nfa_state_t **in, int nin, } if (nout > NFA_SET_SIZE) - abort(); + internal_error("NFA set size exceeded"); return nout; } @@ -497,7 +500,7 @@ int nfa_move(nfa_state_t **in, int nin, nfa_state_t **out, int ch) among a common set of leading struct members in the union. */ if (nmove >= NFA_SET_SIZE) - abort(); + internal_error("NFA set size exceeded"); out[nmove++] = s->o.trans; } @@ -580,7 +583,7 @@ static void regex_destroy(obj_t *regex) } static struct cobj_ops regex_obj_ops = { - regex_equal, cobj_print_op, regex_destroy + regex_equal, cobj_print_op, regex_destroy, 0, }; obj_t *regex_compile(obj_t *regex_sexp) diff --git a/stream.c b/stream.c new file mode 100644 index 00000000..f91ae753 --- /dev/null +++ b/stream.c @@ -0,0 +1,641 @@ +/* Copyright 2009 + * Kaz Kylheku <kkylheku@gmail.com> + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include <stdio.h> +#include <string.h> +#include <dirent.h> +#include <stdarg.h> +#include <stdlib.h> +#include <assert.h> +#include <setjmp.h> +#include "lib.h" +#include "gc.h" +#include "unwind.h" +#include "stream.h" + +obj_t *std_input, *std_output, *std_error; + +struct strm_ops { + struct cobj_ops cobj_ops; + obj_t *(*put_string)(obj_t *, const char *); + obj_t *(*put_char)(obj_t *, int); + obj_t *(*get_line)(obj_t *); + obj_t *(*get_char)(obj_t *); + obj_t *(*vcformat)(obj_t *, const char *fmt, va_list vl); + obj_t *(*vformat)(obj_t *, const char *fmt, va_list vl); + obj_t *(*close)(obj_t *); +}; + +static obj_t *common_equal(obj_t *self, obj_t *other) +{ + return self == other ? t : nil; +} + +static void common_destroy(obj_t *obj) +{ + (void) close_stream(obj); +} + +obj_t *common_vformat(obj_t *stream, const char *fmt, va_list vl) +{ + int ch; + + for (; (ch = *fmt) != 0; fmt++) { + obj_t *obj; + + if (ch == '~') { + ch = *++fmt; + if (ch == 0) + abort(); + switch (ch) { + case '~': + put_cchar(stream, ch); + continue; + case 'a': + obj = va_arg(vl, obj_t *); + if (obj == nao) + abort(); + obj_pprint(obj, stream); + continue; + case 's': + obj = va_arg(vl, obj_t *); + if (obj == nao) + abort(); + obj_print(obj, stream); + continue; + default: + abort(); + } + continue; + } + + put_cchar(stream, ch); + } + + if (va_arg(vl, obj_t *) != nao) + internal_error("unterminated format argument list"); + return t; +} + +static obj_t *stdio_put_string(obj_t *stream, const char *s) +{ + FILE *f = (FILE *) stream->co.handle; + return (f && fputs(s, f) != EOF) ? t : nil; +} + +static obj_t *stdio_put_char(obj_t *stream, int ch) +{ + FILE *f = (FILE *) stream->co.handle; + return (f && putc(ch, f) != EOF) ? t : nil; +} + +static char *snarf_line(FILE *in) +{ + const size_t min_size = 512; + size_t size = 0; + size_t fill = 0; + char *buf = 0; + + for (;;) { + int ch = getc(in); + + if (ch == EOF && buf == 0) + break; + + if (fill >= size) { + size_t newsize = size ? size * 2 : min_size; + buf = chk_realloc(buf, newsize); + size = newsize; + } + + if (ch == '\n' || ch == EOF) { + buf[fill++] = 0; + break; + } + buf[fill++] = ch; + } + + if (buf) + buf = chk_realloc(buf, fill); + + return buf; +} + +static obj_t *stdio_get_line(obj_t *stream) +{ + if (stream->co.handle == 0) { + return nil; + } else { + char *line = snarf_line((FILE *) stream->co.handle); + if (!line) + return nil; + return string(line); + } +} + +obj_t *stdio_get_char(obj_t *stream) +{ + FILE *f = (FILE *) stream->co.handle; + if (f) { + int ch = getc(f); + return (ch != EOF) ? chr(ch) : nil; + } + return nil; +} + +obj_t *stdio_vcformat(obj_t *stream, const char *fmt, va_list vl) +{ + FILE *f = (FILE *) stream->co.handle; + if (f) { + int n = vfprintf(f, fmt, vl); + return (n >= 0) ? num(n) : nil; + } + return nil; +} + +static obj_t *stdio_close(obj_t *stream) +{ + + FILE *f = (FILE *) stream->co.handle; + + if (f != 0 && f != stdin && f != stdout) { + int result = fclose(f); + stream->co.handle = 0; + return result != EOF ? t : nil; + } + return nil; +} + +static struct strm_ops stdio_ops = { + { common_equal, + cobj_print_op, + common_destroy, + 0 }, + stdio_put_string, + stdio_put_char, + stdio_get_line, + stdio_get_char, + stdio_vcformat, + common_vformat, + stdio_close +}; + +static obj_t *pipe_close(obj_t *stream) +{ + FILE *f = (FILE *) stream->co.handle; + + if (f != 0) { + int result = pclose(f); + stream->co.handle = 0; + return result >= 0 ? t : nil; + } + return nil; +} + +static struct strm_ops pipe_ops = { + { common_equal, + cobj_print_op, + common_destroy, + 0 }, + stdio_put_string, + stdio_put_char, + stdio_get_line, + stdio_get_char, + stdio_vcformat, + common_vformat, + pipe_close +}; + +void string_in_stream_mark(obj_t *stream) +{ + obj_t *stuff = (obj_t *) stream->co.handle; + gc_mark(stuff); +} + +static obj_t *string_in_get_line(obj_t *stream) +{ + obj_t *pair = (obj_t *) stream->co.handle; + obj_t *string = car(pair); + obj_t *pos = cdr(pair); + + /* TODO: broken, should only scan to newline */ + if (lt(pos, length(string))) { + obj_t *result = sub_str(string, pos, nil); + *cdr_l(pair) = length_str(string); + return result; + } + + return nil; +} + +static obj_t *string_in_get_char(obj_t *stream) +{ + obj_t *pair = (obj_t *) stream->co.handle; + obj_t *string = car(pair); + obj_t *pos = cdr(pair); + + if (lt(pos, length_str(string))) { + *cdr_l(pair) = plus(pos, one); + return chr_str(string, pos); + } + + return nil; +} + +static struct strm_ops string_in_ops = { + { common_equal, + cobj_print_op, + 0, + string_in_stream_mark }, + 0, + 0, + string_in_get_line, + string_in_get_char, + 0, + 0, + 0 +}; + +struct string_output { + char *buf; + size_t size; + size_t fill; +}; + +static void string_out_stream_destroy(obj_t *stream) +{ + struct string_output *so = (struct string_output *) stream->co.handle; + + if (so) { + free(so->buf); + so->buf = 0; + free(so); + stream->co.handle = 0; + } +} + +static obj_t *string_out_put_string(obj_t *stream, const char *s) +{ + struct string_output *so = (struct string_output *) stream->co.handle; + + if (so == 0) { + return nil; + } else { + size_t len = strlen(s); + size_t old_size = so->size; + size_t required_size = len + so->fill + 1; + + if (required_size < len) + return nil; + + while (so->size <= required_size) { + so->size *= 2; + if (so->size < old_size) + return nil; + } + + so->buf = chk_realloc(so->buf, so->size); + memcpy(so->buf + so->fill, s, len + 1); + so->fill += len; + return t; + } +} + +static obj_t *string_out_put_char(obj_t *stream, int ch) +{ + char mini[2]; + mini[0] = ch; + mini[1] = 0; + return string_out_put_string(stream, mini); +} + +obj_t *string_out_vcformat(obj_t *stream, const char *fmt, va_list vl) +{ + struct string_output *so = (struct string_output *) stream->co.handle; + + if (so == 0) { + return nil; + } else { + int nchars, nchars2; + char dummy_buf[1]; + size_t old_size = so->size; + size_t required_size; + va_list vl_copy; + +#if defined va_copy + va_copy (vl_copy, vl); +#elif defined __va_copy + __va_copy (vl_copy, vl); +#else + vl_copy = vl; +#endif + + nchars = vsnprintf(dummy_buf, 0, fmt, vl_copy); + +#if defined va_copy || defined __va_copy + va_end (vl_copy); +#endif + + bug_unless (nchars >= 0); + + required_size = so->fill + nchars + 1; + + if (required_size < so->fill) + return nil; + + while (so->size <= required_size) { + so->size *= 2; + if (so->size < old_size) + return nil; + } + + so->buf = chk_realloc(so->buf, so->size); + nchars2 = vsnprintf(so->buf + so->fill, so->size-so->fill, fmt, vl); + bug_unless (nchars == nchars2); + so->fill += nchars; + return t; + } +} + +static struct strm_ops string_out_ops = { + { common_equal, + cobj_print_op, + string_out_stream_destroy, + 0 }, + string_out_put_string, + string_out_put_char, + 0, + 0, + string_out_vcformat, + common_vformat, + 0, +}; + +static obj_t *dir_get_line(obj_t *stream) +{ + DIR *handle = (DIR *) stream->co.handle; + + if (handle == 0) { + return nil; + } else { + for (;;) { + struct dirent *e = readdir(handle); + if (!e) + return nil; + if (!strcmp(e->d_name, ".") || !strcmp(e->d_name, "..")) + continue; + return string(chk_strdup(e->d_name)); + } + } +} + +static obj_t *dir_close(obj_t *stream) +{ + if (stream->co.handle != 0) { + closedir((DIR *) stream->co.handle); + stream->co.handle = 0; + return t; + } + + return nil; +} + +static struct strm_ops dir_ops = { + { common_equal, + cobj_print_op, + common_destroy, + 0 }, + 0, + 0, + dir_get_line, + 0, + 0, + 0, + dir_close +}; + + +obj_t *make_stdio_stream(FILE *handle, obj_t *input, obj_t *output) +{ + return cobj((void *) handle, stream_t, &stdio_ops.cobj_ops); +} + +obj_t *make_pipe_stream(FILE *handle, obj_t *input, obj_t *output) +{ + return cobj((void *) handle, stream_t, &pipe_ops.cobj_ops); +} + +obj_t *make_string_input_stream(obj_t *string) +{ + return cobj((void *) cons(string, zero), stream_t, &string_in_ops.cobj_ops); +} + +obj_t *make_string_output_stream(void) +{ + struct string_output *so = chk_malloc(sizeof *so); + so->size = 128; + so->buf = chk_malloc(so->size); + so->fill = 0; + so->buf[0] = 0; + return cobj((void *) so, stream_t, &string_out_ops.cobj_ops); +} + +obj_t *get_string_from_stream(obj_t *stream) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + if (stream->co.ops == &string_out_ops.cobj_ops) { + struct string_output *so = (struct string_output *) stream->co.handle; + obj_t *out = nil; + + stream->co.handle = 0; + + if (!so) + return out; + + so->buf = chk_realloc(so->buf, so->fill + 1); + out = string(so->buf); + free(so); + return out; + } else if (stream->co.ops == &string_in_ops.cobj_ops) { + obj_t *pair = (obj_t *) stream->co.handle; + return pair ? car(pair) : nil; + } else { + abort(); /* not a string input or output stream */ + } +} + +obj_t *make_dir_stream(DIR *dir) +{ + return cobj((void *) dir, stream_t, &dir_ops.cobj_ops); +} + +obj_t *close_stream(obj_t *stream) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->close ? ops->close(stream) : nil; + } +} + +obj_t *get_line(obj_t *stream) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->get_line ? ops->get_line(stream) : nil; + } +} + +obj_t *get_char(obj_t *stream) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->get_char ? ops->get_char(stream) : nil; + } +} + +obj_t *vformat(obj_t *stream, const char *str, va_list vl) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->vformat ? ops->vformat(stream, str, vl) : nil; + } +} + +obj_t *vcformat(obj_t *stream, const char *string, va_list vl) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->vcformat ? ops->vcformat(stream, string, vl) : nil; + } +} + +obj_t *format(obj_t *stream, const char *str, ...) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + va_list vl; + obj_t *ret; + + va_start (vl, str); + ret = ops->vformat ? ops->vformat(stream, str, vl) : nil; + va_end (vl); + return ret; + } +} + +obj_t *cformat(obj_t *stream, const char *string, ...) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + va_list vl; + obj_t *ret; + + va_start (vl, string); + ret = ops->vformat ? ops->vcformat(stream, string, vl) : nil; + va_end (vl); + return ret; + } +} + +obj_t *put_string(obj_t *stream, obj_t *string) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->put_string ? ops->put_string(stream, c_str(string)) : nil; + } +} + +obj_t *put_cstring(obj_t *stream, const char *str) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->put_string ? ops->put_string(stream, str) : nil; + } +} + +obj_t *put_char(obj_t *stream, obj_t *ch) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->put_char ? ops->put_char(stream, c_chr(ch)) : nil; + } +} + +obj_t *put_cchar(obj_t *stream, int ch) +{ + type_check (stream, COBJ); + type_assert (stream->co.cls == stream_t, ("~a is not a stream", stream)); + + { + struct strm_ops *ops = (struct strm_ops *) stream->co.ops; + return ops->put_char ? ops->put_char(stream, ch) : nil; + } +} + +obj_t *put_line(obj_t *stream, obj_t *string) +{ + return (put_string(stream, string), put_cchar(stream, '\n')); +} + +void stream_init(void) +{ + protect(&std_input, &std_output, &std_error, 0); + std_input = make_stdio_stream(stdin, t, nil); + std_output = make_stdio_stream(stdout, nil, t); + std_error = make_stdio_stream(stderr, nil, t); +} diff --git a/stream.h b/stream.h new file mode 100644 index 00000000..2be353f3 --- /dev/null +++ b/stream.h @@ -0,0 +1,48 @@ +/* Copyright 2009 + * Kaz Kylheku <kkylheku@gmail.com> + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +extern obj_t *std_input, *std_output, *std_error; + +obj_t *make_stdio_stream(FILE *, obj_t *input, obj_t *output); +obj_t *make_pipe_stream(FILE *, obj_t *input, obj_t *output); +obj_t *make_string_input_stream(obj_t *); +obj_t *make_string_output_stream(void); +obj_t *get_string_from_stream(obj_t *); +obj_t *make_dir_stream(DIR *); +obj_t *close_stream(obj_t *); +obj_t *get_line(obj_t *); +obj_t *get_char(obj_t *); +obj_t *vformat(obj_t *stream, const char *string, va_list); /* nao-terminated */ +obj_t *vcformat(obj_t *stream, const char *string, va_list); /* printf-style */ +obj_t *format(obj_t *stream, const char *string, ...); +obj_t *cformat(obj_t *stream, const char *string, ...); +obj_t *put_string(obj_t *stream, obj_t *string); +obj_t *put_line(obj_t *stream, obj_t *string); +obj_t *put_cstring(obj_t *stream, const char *); +obj_t *put_char(obj_t *stream, obj_t *ch); +obj_t *put_cchar(obj_t *stream, int ch); + +void stream_init(void); @@ -1,4 +1,4 @@ -.\"Copyright (C) 2009, Kaz Kylheku <kkylheku@gmail.com>. +5\"Copyright (C) 2009, Kaz Kylheku <kkylheku@gmail.com>. .\"All rights reserved. .\" .\"BSD License: @@ -21,7 +21,7 @@ .\"IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED .\"WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. -.TH txr 1 2009-09-09 "txr v. 014" "Text Extraction Utility" +.TH txr 1 2009-10-14 "txr v. 015" "Text Extraction Utility" .SH NAME txr \- text extractor .SH SYNOPSIS @@ -76,8 +76,8 @@ from their subqueries in special ways. .SH ARGUMENTS AND OPTIONS -Options other than -D may be combined together into a single argument. -The -v and -q options are mutually exclusive. The one which occurs +Options other than -D, -a and -f may be combined together into a single +argument. The -v and -q options are mutually exclusive. The one which occurs in the rightmost position in the argument list dominates. .IP -Dvar=value @@ -135,6 +135,38 @@ reported as: The leftmost bracketed index is the most major index. That is to say, the dimension order is: NAME_m_m+1_..._n[1][2]...[m-1]. +.IP -f query +Specifies the query in the form of a command line argument. If this option is +used, the query-file argument is omitted. The first non-option argument, +if there is one, now specifies the first input source rather than a query. +Queries specified as arguments must properly end in a newline, as if they +were read from a text file, thus -f "@a" is not a properly formed query. + +Example: + + # read two lines "1" and "2" from standard input, + # binding them to variables a and b. Standard + # input is specified as - and the data + # comes from shell "here document" redirection. + + txr -f "@a + @b + " - <<! + 1 + 2 + ! + + Output: + a=1 + b=2 + +The @# comment syntax can be used for better formatting: + + txr -f "@# + @a + @b + " + .IP --help Prints usage summary on standard output, and terminates successfully. @@ -231,6 +263,29 @@ comment which follows does. Without this intuitive behavior, line comment would give rise to empty lines that must match empty lines in the data, leading to spurious mismatches. +.SH Hash Bang Support + +If the first line of a query begins with the characters #!, +that entire line is deleted from the query. This allows +for txr queries to be turned into standalone executable programs in the POSIX +environment. + +Shell example: create a simple executable program called "twoline.txr" and +run it. This assumes txr is installed in /usr/bin. + + $ cat > twoline.txr + #!/usr/bin/txr + @a + @b + [Ctrl-D] + $ chmod a+x twoline.txr + $ ./twoline.txr - + 1 + 2 + [Ctrl-D] + a=1 + b=2 + .SS Text Query material which is not escaped by the special character @ is @@ -601,9 +656,9 @@ The general syntax of a directive is: @EXPR where expr is a parenthesized list of subexpressions. A subexpression -is an symbol, number, string literal, character literal, regular expression, or -a parenthesized expression. So, examples of syntactically valid directives -are: +is an symbol, number, string literal, character literal, quasiliteral, regular +expression, or a parenthesized expression. So, examples of syntactically valid +directives are: @(banana) @@ -615,6 +670,8 @@ are: @(a /[a-z]*/ b) + @(_ `@file.txt`) + A symbol is lexically the same thing as a variable and the same rules apply. Tokens that look like numbers are treated as numbers. @@ -623,6 +680,15 @@ respectively, and may not span multiple lines. Character literals must contain exactly one character. Character and numeric escapes may be used within literals to escape the quotes, and to denote control characters. +Quasiliterals are similar to string literals, except that they may +contain variable references denoted by the usual @ syntax. The quasiliteral +represents a string formed by substituting the values of those variables +into the literal template. If a is bound to "apple" and b to "banana", +the quasiliteral `one@a and two @{b}s` represents the string +"one apple and two bananas". A backquote escaped by a backslash represents +itself, and two consecutive @ characters code for a literal @. +There is no \e@ escape. + Some directives are involved in structuring the overall syntax of the query. There are syntactic constraints that depend on the directive. For instance the @@ -699,6 +765,13 @@ Terminate the processing of a block, as if it were a successful match. What bindings emerge may depend on the kind of block: collect has special semantics. Blocks are discussed in the section BLOCKS below. +.IP @(try) +Indicates the start of a try block, which is related to exception +handling, discussed in the EXCEPTIONS section below. + +.IP @(catch), @(finally) +Special clauses within @(try). See EXCEPTIONS below. + .IP @(flatten) Normalizes a set of specified variables to one-dimensional lists. Those variables which have scalar value are reduced to lists of that value. @@ -733,23 +806,51 @@ produces repeated text within one line. .SS The Next Directive -The next directive comes in two forms. It can occur by itself as the -only element in a query line: +The next directive comes in two forms, one of which is obsolescent +syntax. This directive indicates that the remainder of the query. + +In the first form, it can occur by itself as the only element in a query line, +with, or without arguments: @(next) + @(next SOURCE) + @(next SOURCE nothrow) + +The lone @(next) without arguments switches to the next file in the +argument list which was passed to the +.B txr +utility. If SOURCE is given, it must be text-valued expression which denotes an +input source; it may be a string literal, quasiliteral or a variable. +For instance, if variable A contains the text "data", then + + @(next A) + +means switch to the file called "data", and -Or it may be followed by material, which may contain variables. -All of the variables must be bound. For example: + @(next `@A.txt`) + +means to switch to the file "data.txt". + +If the input source cannot be opened for whatever reason, +.B txr +throws an exception (see EXCEPTIONS below). An unhandled exception will +terminate the program. Often, such a drastic measure is inconvenient; +if @(next) is invoked with the nothrow keyword, then if the input +source cannot be opened, the situation is treated as a simple +match failure. + +In the obsolescent second form, @(next) is followed by material on the same +line, which may contain variables. All of the variables must be bound. For +example: @(next)/path/to/@foo.txt -Both forms indicate that the remainder of the query applies -to a new file. The lone @(next) switches to the next file in the -argument list which was passed to the +The trailing material specifies gives the input source. +The nothrow behavior is implicit in this form. The syntax will +disappear in some future version of .B txr -utility. The second form diverts the remainder of the query to a file whose -name is given by the trailing material, after variable substitutions are -performed. +. + Note that "remainder of the query" refers to the subquery in which the next directive appears, not necessarily the entire query. @@ -760,7 +861,7 @@ After the @(end) which terminates the @(some), the "abc" is matched in the current file. @(some) - @(next)foo.txt + @(next "foo.txt") xyz@suffix @(end) abc @@ -1845,6 +1946,14 @@ usual printing of the variable bindings or the word false. The syntax of the @(output) directive is: + @(output [ DESTINATION ] [ nothrow ]) + . + . one or more output directives or lines + . + @(end) + +An obsolescent syntax is also supported: + @(output)...optional destination... . . one or more output directives or lines @@ -1853,7 +1962,16 @@ The syntax of the @(output) directive is: The optional destination is a filename, the special name, - which redirects to standard output, or a shell command preceded by the ! symbol. -Variables are substituted in the directive. +In the first form, the destination may be specified as a variable +which holds text, a string literal or a quasiliteral + +In the second obsolescent form, the material to the right of @(output) +is query text which may contain variables. + +The new syntax throws an exception if the output destination +cannot be opened, unless the nothrow keyword is present, in which +case the situation is treated as a match failure. The old syntax throws an +exception. .SS Output Text @@ -2025,6 +2143,269 @@ spaces each one, except the last which has no space. If the list has exactly one item, then the @(last) applies to it instead of the main clause: it is produced with no trailing space. +.SH EXCEPTIONS + +The exceptions mechanism in +.B txr +is disciplined way for representing and handling abnormal situations that may +occur during query processing, such as using an unbound variable, or attempting +to open a nonexistent file. + +An exception is a situation in the query which stops the query and +demands handling. If handling is not provided for that exception, +the execution of the program is terminated. + +An exception is always identified by a symbol, which is its type. Types are +organized in a subtype-supertype hierarchy. For instance, the file_error +exception type is a subtype of the error type. This means that a file error is +a kind of error. An exception handling block which catches exceptions of type +error will catch exceptions of type file_error, but a block which catches +file_error will not catch all exceptions of type error. A query_error is a kind +of error, but not a kind of file_error. The symbol t is the supertype +of every type: every exception type is considered to be a kind of t. +(Mnemonic: t stands for type, as in any type). + +Exceptions are handled using @(catch) clauses within a @(try) directive. + +In addition to being useful for exception handling, the @(try) directive +also provides unwind protection by means of a @(finally) clause, +which specifies query material to be executed unconditionally when +the try clause terminates, no matter how it terminates. + +.SS The Try Directive + +The general syntax of the try directive is + + @(try) + ... main clause, required ... + ... optional catch clauses ... + ... optional finally clause + @(end) + +A catch clause looks like: + + @(catch TYPE) + . + . + . + +and also the this form, equivalent to @(catch (t)): + + @(catch) + . + . + . + +which catches all exceptions. + +A finally clause looks like: + + @(finally) + ... + . + . + +None of the clauses may be empty. + +A try clause is surrounded by an implicit anonymous block (see BLOCKS section +above). So for instance, the following is a no-op (an operation with no effect, +other than successful execution): + + @(try) + @(accept) + @(end) + +The @(accept) causes a successful termination of the implicit anonymous block. +Execution resumes with query lines or directives which follow, if any. + +Try clauses and blocks interact. For instance, a block accept from within +a try clause invokes a finally. + + Query: @(block foo) + @ (try) + @ (accept foo) + @ (finally) + @ (output) + bye! + @ (end) + @ (end) + + Output: bye! + +How this works: the try block's main clause is @(accept foo). This causes +the enclosing block named foo to terminate, as a successful match. +Since the try is nested within this block, it too must terminate +in order for the block to terminate. But the try has a finally clause, +which executes unconditionally, no matter how the try block +terminates. The finally clause performs some output, which is seen. + +.SH The Finally Clause + +A try directive can terminate in one of three ways. The main clause +may match successfully, and possibly yield some new variable bindings. +The main clause may fail to match. Or the main clause may be terminated +by a non-local control transfer, like an exception being thrown or a block +return (like the block foo example in the previous section). + +No matter how the try clause terminates, the finally clause is processed. + +Now, the finally clause is itself a query which binds variables, which leads to +the question: what happens to such variables? What if the finally block fails +as a query? Another question is: what if a finally clause itself initiates a +control transfer? Answers follow. + +Firstly, a finally clause will contribute variable bindings only if the main +clause terminates normally (either as a successful or failed match). +If the main clause successfully matches, then the finally block continues +matching at the next position in the data, and contributes bindings. +If the main clause fails, then the finally block matches at the +same position. + +The overall try directive succeeds as a match if either the main clause +or the finally clause succeed. If both fail, then the try directive is +a failed match. The subquery in which it is located fails, et cetera. + +Example: + + Query: @(try) + @a + @(finally) + @b + @(end) + @c + + Data: 1 + 2 + 3 + + Output: a=1 + b=2 + c=3 + +In this example, the main clause of the try captures line "1" of the data as +variable a, then the finally clause captures "2" as b, and then the +query continues with the @c variable after try block, and captures "3". + + +Example: + + Query: @(try) + hello @a + @(finally) + @b + @(end) + @c + + Data: 1 + 2 + + Output: b=1 + c=2 + +In this example, the main clause of the try fails to match, because +the input is not prefixed with "hello ". However, the finally clause +matches, binding b to "1". This means that the try block is a successful +match, and so processing continues with @c which captures "2". + +When finally clauses are processed during a non-local return, +they have no externally visible effect if they do not bind variables. +However, their execution makes itself known if they perform side effects, +such as output. + +A finally clause guards only the main clause and the catch clauses. It does not +guard itself. Once the finally clause is executing, the try block is no +longer guarded. This means if a nonlocal transfer, such as a block accept +or exception, is initiated within the finally clause, it will not re-execute +the finally clause. The finally clause is simply abandoned. + +The disestablishment of blocks and try clauses is properly interleaved +with the execution of finally clauses. This means that all surrounding +exit points are visible in a finally clause, even if the finally clause +is being invoked as part of a transfer to a distant exit point. +The finally clause can make a control transfer to an exit point which +is more near than the original one, thereby "hijacking" the control +transfer. Also, the anonymous block established by the try directive +is visible in the finally clause. + +Example: + +@(try) +@ (try) +@ (next "nonexistent-file") +@ (finally) +@ (accept) +@ (end) +@(catch file_error) +@ (output) +file error caught +@ (end) +@(end) + +In this example, the @(next) directive throws an exception of type file_error, +because the given file does not exist. The exit point for this exception is the +@(catch file_error) clause in the outer-most try block. The inner block is +not eligible because it contains no catch clauses at all. However, the inner +try block has a finally clause, and so during the processing of this +exception which is headed for the @(catch file_error), the finally +clause performs an anonymous accept. The exit point for the accept +is the anonymous block surrounding the inner try. So the original +transfer to the catch clause is forgotten. The inner try terminates +sucessfully, and since it constitutes the main clause of the outer try, +that also terminates sucessfully. The "file error caught" message is +never printed. + +.SS Catch Clauses + +Catch clauses establish a try block as a potential exit point for +an exception-induced control transfer (called a ``throw''). + +A catch clause specifies an optional list of symbols which represent +the exception types which it catches. The catch clause will catch +exceptions which are a subtype of any one of those exception types. + +If a try block has more than one catch clause which can match a given +exception, the first one will be invoked. + +The exception protection of a try block does not extend over the +catch clauses. Once a catch clause is being executed, if it throws +an exception, that exception will not re-enter any catch within the +same try block, even if it matches one. + +Catches are processed prior to finally. + +When a catch is invoked, it is of course understood that the main clause did +not terminate normally, and so the main clause could not have produced any +bindings. + +So the success or failure of the try block depends on the behavior of the catch +clause or the finally, if there is one. If either of them succeed, then the try block is considered a successful match. + +Example: + + Query: @(try) + @ (next "nonexistent-file") + @ x + @ (catch file_error) + @a + @(finally) + @b + @(end) + @c + + Data: 1 + 2 + 3 + + Output: a=1 + b=2 + c=3 + +Here, the try block's main clause is terminated abruptly by a file_error +exception from the @(next) directive. This is handled by the +catch clause, which binds variable a to the input line "1". +Then the finally clause executes, binding b to "2". The try block +then terminates successfully, and so @c takes "3". + .SH NOTES ON FALSE The reason for printing the word @@ -0,0 +1,336 @@ +/* Copyright 2009 + * Kaz Kylheku <kkylheku@gmail.com> + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <limits.h> +#include <dirent.h> +#include <setjmp.h> +#include "lib.h" +#include "stream.h" +#include "gc.h" +#include "unwind.h" +#include "parser.h" +#include "match.h" +#include "txr.h" + +const char *version = "015"; +const char *progname = "txr"; +const char *spec_file = "stdin"; +obj_t *spec_file_str; + +/* + * Can implement an emergency allocator here from a fixed storage + * pool, which sets an OOM flag. Program can check flag + * and gracefully terminate instead of aborting like this. + */ +void *oom_realloc_handler(void *old, size_t size) +{ + fprintf(stderr, "%s: out of memory\n", progname); + puts("false"); + abort(); +} + +void help(void) +{ + const char *text = +"\n" +"txr version %s\n" +"\n" +"copyright 2009, Kaz Kylheku <kkylheku@gmail.com>\n" +"\n" +"usage:\n" +"\n" +" %s [ options ] query-file { data-file }*\n" +"\n" +"The query-file or data-file arguments may be specified as -, in which case\n" +"standard input is used. If these arguments end with a | character, then\n" +"they are treated as command pipes. Leading arguments which begin with a -\n" +"followed by one or more characters, and which are not arguments to options\n" +"are interpreted as options. The -- option indicates the end of the options.\n" +"\n" +"If no data-file arguments sare supplied, then the query itself must open a\n" +"a data source prior to attempting to make any pattern match, or it will\n" +"simply fail due to a match which has run out of data.\n" +"\n" +"options:\n" +"\n" +"-Dvar=value Pre-define variable var, with the given value.\n" +" A list value can be specified using commas.\n" +"-Dvar Predefine variable var, with empty string value.\n" +"-q Quiet: don't report errors during query matching.\n" +"-v Verbose: extra logging from matcher.\n" +"-b Don't dump list of bindings.\n" +"-a num Generate array variables up to num-dimensions.\n" +" Default is 1. Additional dimensions are fudged\n" +" by generating numeric suffixes\n" +"-f query Specify the query text as an argument.\n" +" The query-file argument is omitted in this case.\n" +"--help You already know!\n" +"--version Display program version\n" +"\n" +"Options that take no argument can be combined. The -q and -v options\n" +"are mutually exclusive; the right-most one dominates.\n" +"\n" + ; + fprintf(stdout, text, version, progname); +} + +void hint(void) +{ + fprintf(stderr, "%s: incorrect arguments: try --help\n", progname); +} + +obj_t *remove_hash_bang_line(obj_t *spec) +{ + if (!consp(spec)) + return spec; + + { + obj_t *shbang = string(strdup("#!")); + obj_t *firstline = first(spec); + obj_t *items = rest(firstline); + + if (stringp(first(items))) { + obj_t *twochars = sub_str(first(items), zero, two); + if (equal(twochars, shbang)) + return rest(spec); + } + + return spec; + } +} + +int main(int argc, char **argv) +{ + obj_t *stack_bottom_0 = nil; + obj_t *specstring = nil; + obj_t *spec = nil; + obj_t *bindings = nil; + int match_loglevel = opt_loglevel; + progname = argv[0] ? argv[0] : progname; + obj_t *stack_bottom_1 = nil; + + init(progname, oom_realloc_handler, &stack_bottom_0, &stack_bottom_1); + + protect(&spec_file_str, 0); + spec_file_str = string(strdup(spec_file)); + + yyin_stream = std_input; + protect(&yyin_stream, 0); + + if (argc <= 1) { + hint(); + return EXIT_FAILURE; + } + + argc--, argv++; + + while (argc > 0 && (*argv)[0] == '-') { + if (!strcmp(*argv, "--")) { + argv++, argc--; + break; + } + + if (!strcmp(*argv, "-")) + break; + + if (!strncmp(*argv, "-D", 2)) { + char *var = *argv + 2; + char *equals = strchr(var, '='); + char *has_comma = (equals != 0) ? strchr(equals, ',') : 0; + + if (has_comma) { + char *val = equals + 1; + obj_t *list = nil; + + *equals = 0; + + for (;;) { + size_t piece = strcspn(val, ","); + char comma_p = val[piece]; + + val[piece] = 0; + + list = cons(string(strdup(val)), list); + + if (!comma_p) + break; + + val += piece + 1; + } + + list = nreverse(list); + bindings = cons(cons(intern(string(strdup(var))), list), bindings); + } else if (equals) { + char *val = equals + 1; + *equals = 0; + bindings = cons(cons(intern(string(strdup(var))), + string(strdup(val))), bindings); + } else { + bindings = cons(cons(intern(string(strdup(var))), + null_string), bindings); + } + + argc--, argv++; + continue; + } + + if (!strcmp(*argv, "--version")) { + printf("%s: version %s\n", progname, version); + return 0; + } + + if (!strcmp(*argv, "--help")) { + help(); + return 0; + } + + if (!strcmp(*argv, "-a") || !strcmp(*argv, "-f")) { + long val; + char *errp; + char opt = (*argv)[1]; + + if (argc == 1) { + fprintf(stderr, "%s: option %c needs argument\n", progname, opt); + + return EXIT_FAILURE; + } + + argv++, argc--; + + switch (opt) { + case 'a': + val = strtol(*argv, &errp, 10); + if (*errp != 0) { + fprintf(stderr, "%s: option %c needs numeric argument, not %s\n", + progname, opt, *argv); + return EXIT_FAILURE; + } + + opt_arraydims = val; + break; + case 'f': + specstring = string(strdup(*argv)); + break; + } + + argv++, argc--; + continue; + } + + if (!strcmp(*argv, "--gc-debug")) { + opt_gc_debug = 1; + argv++, argc--; + continue; + } + + { + char *popt; + for (popt = (*argv)+1; *popt != 0; popt++) { + switch (*popt) { + case 'v': + match_loglevel = 2; + break; + case 'q': + match_loglevel = 0; + break; + case 'b': + opt_nobindings = 1; + break; + case 'a': + case 'f': + case 'D': + fprintf(stderr, "%s: option -%c does not clump\n", progname, *popt); + return EXIT_FAILURE; + case '-': + fprintf(stderr, "%s: unrecognized long option: --%s\n", + progname, popt + 1); + return EXIT_FAILURE; + default: + fprintf(stderr, "%s: unrecognized option: %c\n", progname, *popt); + return EXIT_FAILURE; + } + } + + argc--, argv++; + } + } + + if (specstring) { + spec_file = "cmdline"; + yyin_stream = make_string_input_stream(specstring); + } else { + if (argc < 1) { + hint(); + return EXIT_FAILURE; + } + + if (strcmp(*argv, "-") != 0) { + FILE *in = fopen(*argv, "r"); + if (in == 0) { + uw_errorcf("%s: unable to open %s", progname, *argv); + fprintf(stderr, "%s: unable to open %s\n", progname, *argv); + return EXIT_FAILURE; + } + yyin_stream = make_stdio_stream(in, t, nil); + spec_file = *argv; + spec_file_str = string(strdup(spec_file)); + } + argc--, argv++; + } + + { + int gc = gc_state(0); + yyparse(); + gc_state(gc); + + if (errors) + return EXIT_FAILURE; + spec = remove_hash_bang_line(get_spec()); + + opt_loglevel = match_loglevel; + + if (opt_loglevel >= 2) { + format(std_error, "spec:\n~s\n", spec, nao); + format(std_error, "bindings:\n~s\n", bindings, nao); + } + + { + int retval; + list_collect_decl(filenames, iter); + + while (*argv) + list_collect(iter, string(*argv++)); + + retval = extract(spec, filenames, bindings); + + return errors ? EXIT_FAILURE : retval; + } + } +} @@ -0,0 +1,33 @@ +/* Copyright 2009 + * Kaz Kylheku <kkylheku@gmail.com> + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +extern int opt_loglevel; +extern int opt_nobindings; +extern int opt_arraydims; +extern int opt_gc_debug; +extern const char *version; +extern const char *progname; +extern int output_produced; @@ -29,8 +29,11 @@ #include <assert.h> #include <setjmp.h> #include <dirent.h> +#include <stdarg.h> #include "lib.h" #include "gc.h" +#include "stream.h" +#include "txr.h" #include "unwind.h" static uw_frame_t *uw_stack; @@ -39,8 +42,39 @@ static uw_frame_t toplevel_env; static void uw_unwind_to_exit_point() { - while (uw_stack && uw_stack != uw_exit_point) - uw_stack = uw_stack->uw.up; + assert (uw_exit_point); + + for (; uw_stack && uw_stack != uw_exit_point; uw_stack = uw_stack->uw.up) { + switch (uw_stack->uw.type) { + case UW_CATCH: + /* If a catch block is not visible, do + not run its unwind stuff. This + would cause infinite loops if + unwind blocks trigger a nonlocal exit. */ + if (!uw_stack->ca.visible) + continue; + /* Catches catch everything, so that they + can do "finally" or "unwind protect" logic. + If a catch is invoked with a nil exception + and symbol, it must excecute the + mandatory clean-up code and then + continue the unwinding by calling uw_continue, + passing it the ca.cont value. */ + uw_stack->ca.sym = nil; + uw_stack->ca.exception = nil; + uw_stack->ca.cont = uw_exit_point; + /* This catch frame is no longer + visible. If the unwind section + throws something, it cannot + be caught in the same frame. */ + uw_stack->ca.visible = 0; + /* 1 means unwind only. */ + longjmp(uw_stack->ca.jb, 1); + abort(); + default: + break; + } + } if (!uw_stack) abort(); @@ -50,19 +84,21 @@ static void uw_unwind_to_exit_point() switch (uw_stack->uw.type) { case UW_BLOCK: longjmp(uw_stack->bl.jb, 1); - break; + abort(); case UW_ENV: /* env frame cannot be exit point */ abort(); + case UW_CATCH: + /* Catch frame is no longer visible. + If a catch or unwind throw something, + it cannot go back to the same catch. */ + uw_stack->ca.visible = 0; + /* 2 means actual catch, not just unwind */ + longjmp(uw_stack->ca.jb, 2); default: abort(); } } -void uw_init(void) -{ - protect(&toplevel_env.ev.func_bindings, 0); -} - void uw_push_block(uw_frame_t *fr, obj_t *tag) { fr->bl.type = UW_BLOCK; @@ -135,3 +171,186 @@ obj_t *uw_block_return(obj_t *tag, obj_t *result) uw_unwind_to_exit_point(); abort(); } + +void uw_push_catch(uw_frame_t *fr, obj_t *matches) +{ + fr->ca.type = UW_CATCH; + fr->ca.matches = matches; + fr->ca.exception = nil; + fr->ca.cont = 0; + fr->ca.visible = 1; + fr->ca.up = uw_stack; + uw_stack = fr; +} + +static obj_t *exception_subtypes; + +obj_t *uw_exception_subtype_p(obj_t *sub, obj_t *sup) +{ + if (sub == nil || sup == t || sub == sup) { + return t; + } else { + obj_t *entry = assoc(exception_subtypes, sub); + return memq(sup, cdr(entry)) ? t : nil; + } +} + +obj_t *uw_throw(obj_t *sym, obj_t *exception) +{ + uw_frame_t *ex; + + for (ex = uw_stack; ex != 0; ex = ex->uw.up) { + if (ex->uw.type == UW_CATCH && ex->ca.visible) { + /* The some_satisfy would require us to + cons up a function; we want to + avoid consing in exception handling, if we can. */ + obj_t *matches = ex->ca.matches; + obj_t *match; + for (match = matches; match; match = cdr(match)) + if (uw_exception_subtype_p(sym, car(match))) + break; + if (match) + break; + } + } + + if (ex == 0) { + if (opt_loglevel >= 1) { + format(std_error, "~a: unhandled exception of type ~a:\n", + prog_string, sym, nao); + format(std_error, "~a\n", exception, nao); + } + if (uw_exception_subtype_p(sym, query_error) || + uw_exception_subtype_p(sym, file_error)) { + if (!output_produced) + put_cstring(std_output, "false\n"); + exit(EXIT_FAILURE); + } + abort(); + } + + ex->ca.sym = sym; + ex->ca.exception = exception; + uw_exit_point = ex; + uw_unwind_to_exit_point(); + abort(); +} + +obj_t *uw_throwf(obj_t *sym, const char *fmt, ...) +{ + va_list vl; + obj_t *stream = make_string_output_stream(); + + va_start (vl, fmt); + (void) vformat(stream, fmt, vl); + va_end (vl); + + uw_throw(sym, get_string_from_stream(stream)); + abort(); +} + +obj_t *uw_errorf(const char *fmt, ...) +{ + va_list vl; + obj_t *stream = make_string_output_stream(); + + va_start (vl, fmt); + (void) vformat(stream, fmt, vl); + va_end (vl); + + uw_throw(error, get_string_from_stream(stream)); + abort(); +} + +obj_t *uw_throwcf(obj_t *sym, const char *fmt, ...) +{ + va_list vl; + obj_t *stream = make_string_output_stream(); + + va_start (vl, fmt); + (void) vcformat(stream, fmt, vl); + va_end (vl); + + uw_throw(sym, get_string_from_stream(stream)); + abort(); +} + +obj_t *uw_errorcf(const char *fmt, ...) +{ + va_list vl; + obj_t *stream = make_string_output_stream(); + + va_start (vl, fmt); + (void) vcformat(stream, fmt, vl); + va_end (vl); + + uw_throw(error, get_string_from_stream(stream)); + abort(); +} + +obj_t *type_mismatch(const char *fmt, ...) +{ + va_list vl; + obj_t *stream = make_string_output_stream(); + + va_start (vl, fmt); + (void) vformat(stream, fmt, vl); + va_end (vl); + + uw_throw(type_error, get_string_from_stream(stream)); + abort(); +} + +void uw_register_subtype(obj_t *sub, obj_t *sup) +{ + obj_t *t_entry = assoc(exception_subtypes, t); + obj_t *sub_entry = assoc(exception_subtypes, sub); + obj_t *sup_entry = assoc(exception_subtypes, sup); + + assert (t_entry != 0); + + if (sub == nil) + return; + + if (sub == t) { + if (sup == t) + return; + abort(); + } + + /* If sup symbol not registered, then we make it + an immediate subtype of t. */ + if (!sup_entry) { + sup_entry = cons(sup, t_entry); + exception_subtypes = cons(sup_entry, exception_subtypes); + } + + /* If sub already registered, we delete that + registration. */ + if (sub_entry) { + exception_subtypes = alist_remove1(exception_subtypes, sub); + } + + /* Register sub as an immediate subtype of sup. */ + sub_entry = cons(sub, sup_entry); + exception_subtypes = cons(sub_entry, exception_subtypes); +} + +void uw_continue(uw_frame_t *current, uw_frame_t *cont) +{ + uw_pop_frame(current); + uw_exit_point = cont; + uw_unwind_to_exit_point(); +} + +void uw_init(void) +{ + protect(&toplevel_env.ev.func_bindings, &exception_subtypes, 0); + exception_subtypes = cons(cons(t, cons(t, nil)), exception_subtypes); + uw_register_subtype(type_error, error); + uw_register_subtype(internal_err, error); + uw_register_subtype(numeric_err, error); + uw_register_subtype(range_err, error); + uw_register_subtype(query_error, error); + uw_register_subtype(file_error, error); +} @@ -24,10 +24,16 @@ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ +#ifdef __GNUC__ +#define noreturn __attribute__((noreturn)) +#else +#define noreturn +#endif + typedef union uw_frame uw_frame_t; typedef enum uw_frtype uw_frtype_t; -enum uw_frtype { UW_BLOCK, UW_ENV }; +enum uw_frtype { UW_BLOCK, UW_ENV, UW_CATCH }; struct uw_common { uw_frame_t *up; @@ -48,20 +54,42 @@ struct uw_dynamic_env { obj_t *func_bindings; }; +struct uw_catch { + uw_frame_t *up; + uw_frtype_t type; + obj_t *matches; + obj_t *sym; + obj_t *exception; + uw_frame_t *cont; + int visible; + jmp_buf jb; +}; + union uw_frame { struct uw_common uw; struct uw_block bl; struct uw_dynamic_env ev; + struct uw_catch ca; }; -void uw_init(void); void uw_push_block(uw_frame_t *, obj_t *tag); void uw_push_env(uw_frame_t *); obj_t *uw_get_func(obj_t *sym); obj_t *uw_set_func(obj_t *sym, obj_t *value); obj_t *uw_block_return(obj_t *tag, obj_t *result); +void uw_push_catch(uw_frame_t *, obj_t *matches); +noreturn obj_t *uw_throw(obj_t *sym, obj_t *exception); +noreturn obj_t *uw_throwf(obj_t *sym, const char *fmt, ...); +noreturn obj_t *uw_errorf(const char *fmt, ...); +noreturn obj_t *uw_throwcf(obj_t *sym, const char *fmt, ...); +noreturn obj_t *uw_errorcf(const char *fmt, ...); +void uw_register_subtype(obj_t *sub, obj_t *super); +obj_t *uw_exception_subtype_p(obj_t *sub, obj_t *sup); +void uw_continue(uw_frame_t *curr, uw_frame_t *target); void uw_pop_frame(uw_frame_t *); +void uw_init(void); +noreturn obj_t *type_mismatch(const char *, ...); #define uw_block_begin(TAG, RESULTVAR) \ obj_t *RESULTVAR = nil; \ @@ -85,3 +113,63 @@ void uw_pop_frame(uw_frame_t *); #define uw_env_end \ uw_pop_frame(&uw_env); \ } + +#define uw_catch_begin(MATCHES, SYMVAR, \ + EXCVAR) \ + obj_t *SYMVAR = nil; \ + obj_t *EXCVAR = nil; \ + { \ + uw_frame_t uw_catch; \ + uw_push_catch(&uw_catch, MATCHES); \ + switch (setjmp(uw_catch.ca.jb)) { \ + case 0: + +#define uw_do_unwind \ + goto uw_unwind_label + +#define uw_catch(SYMVAR, EXCVAR) \ + break; \ + case 2: \ + EXCVAR = uw_catch.ca.exception; \ + SYMVAR = uw_catch.ca.sym; \ + +#define uw_unwind \ + break; \ + uw_unwind_label: \ + case 1: + +#define uw_catch_end \ + default: \ + break; \ + } \ + if (uw_catch.ca.cont) \ + uw_continue(&uw_catch, \ + uw_catch.ca.cont); \ + uw_pop_frame(&uw_catch); \ + } + +#define internal_error(STR) \ + uw_throwcf(internal_err, \ + "%s:%d %s", __FILE__, \ + __LINE__, STR) + +#define type_assert(EXPR, ARGS) \ + if (!(EXPR)) type_mismatch ARGS + +#define bug_unless(EXPR) \ + if (!(EXPR)) \ + internal_error("assertion " \ + #EXPR \ + " failed") + +#define numeric_assert(EXPR) \ + if (!(EXPR)) \ + uw_throwcf(numeric_err, "%s", \ + "assertion " #EXPR \ + " failed") + +#define range_bug_unless(EXPR) \ + if (!(EXPR)) \ + uw_throwcf(range_err, "%s", \ + "assertion" #EXPR \ + " failed") |