diff options
-rw-r--r-- | ChangeLog | 70 | ||||
-rw-r--r-- | gc.c | 26 | ||||
-rw-r--r-- | lib.c | 4 | ||||
-rw-r--r-- | lib.h | 2 | ||||
-rw-r--r-- | match.c | 48 | ||||
-rw-r--r-- | txr.1 | 158 | ||||
-rw-r--r-- | txr.c | 39 | ||||
-rw-r--r-- | unwind.c | 29 |
8 files changed, 321 insertions, 55 deletions
@@ -1,3 +1,73 @@ +2009-10-17 Kaz Kylheku <kkylheku@gmail.com> + + Version 016 + + Bugfix in exception subtype definition (defex). + + Tail recursion in marking function of garbage collector. + + -f option for specifying query file, allowing more + options to follow, useful in hash-bang scripting and + other situations. + + * txr.c: (version): Bump to 016 + * txr.1: Bump version to 016. + +2009-10-17 Kaz Kylheku <kkylheku@gmail.com> + + * txr.1: Documented defex. + * unwind.c (uw_register_subtype): Bugfix: if the subtype + exists already, we must not delete it and create a new entry, but + destructively point its entry to its assigned supertype. + An exceptions is thrown rather than abort for attempts + to make t a subtype of something other than itself. + An attempt to make something other than nil a subtype of nil + is diagnosed. Attempts to redefine the relationship + between two types if they are already connected by one; + this covers circularity and other cases, while still allowing + a relaxed order of definition. + +2009-10-17 Kaz Kylheku <kkylheku@gmail.com> + + * gc.c (mark_obj_tail): New macro. + (mark_obj): Optimized with manual tail recursion. + The funtion will no longer generate long call stacks + for long lists. Descending to the car field of + a cons is still recursive, but ``car-heavy'' + trees are rare. + +2009-10-16 Kaz Kylheku <kkylheku@gmail.com> + + Resurrect -f option, with different meaning. + We need "-f query-file" so that hash-bang scripts + can be written which can pass options to txr. + + * txr.c (help, main): Inplement and document -f. + Also bugfix: do not throw file open errors as exceptions + of type error, because these cause an abort, potentially leading to a + core dump. They are now thrown as file_error. + * txr.1: Documented -f. + +2009-10-16 Kaz Kylheku <kkylheku@gmail.com> + + Implemented @(next arg) for treating the command line + as an input source. + + * txr.1: Updated, and fixed a few unrelated mistakes. + * lib.c (dir): Removed unused symbol globa. + (args): New symbol global. + * lib.h (dir): Declaration removed. + (args): Declared. + match.c (match_files): Implemented @(next arg). + Had to hack laziness to the file opening logic in match_files. + If the function is entered with a spec whose first + directive is @(next), then it defers opening the first + file in the list of files (since it will be immediately + abandoned in favor of another input source). + This prevents an error in the situation when the + arguments do not name files, and there is a @(next args) + directive to process them as an input source. + 2009-10-16 Kaz Kylheku <kkylheku@gmail.com> Version 016 @@ -180,6 +180,13 @@ static void mark_obj(obj_t *obj) { type_t t; +#if 1 +tail_call: +#define mark_obj_tail(o) do { obj = (o); goto tail_call; } while (0) +#else +#define mark_obj_tail(o) return mark_obj(o) +#endif + if (obj == nil) return; @@ -196,22 +203,19 @@ static void mark_obj(obj_t *obj) switch (t) { case CONS: mark_obj(obj->c.car); - mark_obj(obj->c.cdr); - break; + mark_obj_tail(obj->c.cdr); case STR: - mark_obj(obj->st.len); - break; + mark_obj_tail(obj->st.len); case CHR: case NUM: break; case SYM: mark_obj(obj->s.name); - mark_obj(obj->s.val); - break; + mark_obj_tail(obj->s.val); case FUN: mark_obj(obj->f.env); if (obj->f.functype == FINTERP) - mark_obj(obj->f.f.interp_fun); + mark_obj_tail(obj->f.f.interp_fun); break; case VEC: { @@ -227,15 +231,13 @@ static void mark_obj(obj_t *obj) } break; case LCONS: - mark_obj(obj->lc.car); - mark_obj(obj->lc.cdr); mark_obj(obj->lc.func); - break; + mark_obj(obj->lc.car); + mark_obj_tail(obj->lc.cdr); case COBJ: - mark_obj(obj->co.cls); if (obj->co.ops->mark) obj->co.ops->mark(obj); - break; + mark_obj_tail(obj->co.cls); default: assert (0 && "corrupt type field"); } @@ -49,7 +49,7 @@ obj_t *zeroplus, *optional, *compound, *or, *quasi; obj_t *skip, *trailer, *block, *next, *fail, *accept; obj_t *all, *some, *none, *maybe, *cases, *collect, *until, *coll; obj_t *define, *output, *single, *frst, *lst, *empty, *repeat, *rep; -obj_t *flattn, *forget, *local, *mrge, *bind, *cat, *dir; +obj_t *flattn, *forget, *local, *mrge, *bind, *cat, *args; obj_t *try, *catch, *finally, *nothrow, *throw, *defex; obj_t *error, *type_error, *internal_err, *numeric_err, *range_err; obj_t *query_error, *file_error; @@ -1516,7 +1516,7 @@ static void obj_init(void) mrge = intern(string(strdup("merge"))); bind = intern(string(strdup("bind"))); cat = intern(string(strdup("cat"))); - dir = intern(string(strdup("dir"))); + args = intern(string(strdup("args"))); try = intern(string(strdup("try"))); catch = intern(string(strdup("catch"))); finally = intern(string(strdup("finally"))); @@ -148,7 +148,7 @@ extern obj_t *zeroplus, *optional, *compound, *or, *quasi; extern obj_t *skip, *trailer, *block, *next, *fail, *accept; extern obj_t *all, *some, *none, *maybe, *cases, *collect, *until, *coll; extern obj_t *define, *output, *single, *frst, *lst, *empty, *repeat, *rep; -extern obj_t *flattn, *forget, *local, *mrge, *bind, *cat, *dir; +extern obj_t *flattn, *forget, *local, *mrge, *bind, *cat, *args; extern obj_t *try, *catch, *finally, *nothrow, *throw, *defex; extern obj_t *error, *type_error, *internal_err, *numeric_err, *range_err; extern obj_t *query_error, *file_error; @@ -882,27 +882,33 @@ obj_t *match_files(obj_t *spec, obj_t *files, data_lineno = c_num(data_linenum); first_file_parsed = nil; } else if (files) { - obj_t *spec = first(files); - obj_t *name = consp(spec) ? cdr(spec) : spec; + obj_t *source_spec = first(files); + obj_t *name = consp(source_spec) ? cdr(source_spec) : source_spec; fpip_t fp = (errno = 0, complex_open(name, nil)); + obj_t *first_spec_item = second(first(spec)); - debugf("opening data source ~a", name, nao); + if (consp(first_spec_item) && eq(first(first_spec_item), next)) { + debugf("not opening source ~a since query starts with next directive", + name, nao); + } else { + debugf("opening data source ~a", name, nao); - if (complex_open_failed(fp)) { - if (consp(spec) && car(spec) == nothrow) { - debugf("could not open ~a: treating as failed match due to nothrow", - name, nao); + if (complex_open_failed(fp)) { + if (consp(source_spec) && car(source_spec) == nothrow) { + debugf("could not open ~a: treating as failed match due to nothrow", + name, nao); + return nil; + } else if (errno != 0) + file_err(nil, "could not open ~a (error ~a/~a)", name, + num(errno), string(strdup(strerror(errno))), nao); + else + file_err(nil, "could not open ~a", name, nao); return nil; - } else if (errno != 0) - file_err(nil, "could not open ~a (error ~a/~a)", name, - num(errno), string(strdup(strerror(errno))), nao); - else - file_err(nil, "could not open ~a", name, nao); - return nil; - } + } - if ((data = complex_snarf(fp, name)) != nil) - data_lineno = 1; + if ((data = complex_snarf(fp, name)) != nil) + data_lineno = 1; + } } for (; spec; spec = rest(spec), data = rest(data), data_lineno++) @@ -1018,6 +1024,16 @@ repeat_spec_same_data: if (eq(first(source), nothrow)) push(nil, &source); + else if (eq(first(source), args)) { + obj_t *input_name = string(strdup("args")); + cons_bind (new_bindings, success, + match_files(spec, cons(input_name, files), + bindings, files, one)); + if (success) + return cons(new_bindings, + if3(data, cons(data, num(data_lineno)), t)); + return nil; + } { obj_t *val = eval_form(first(source), bindings); @@ -21,7 +21,7 @@ .\"IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED .\"WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. -.TH txr 1 2009-10-14 "txr v. 016" "Text Extraction Utility" +.TH txr 1 2009-10-14 "txr v. 017" "Text Extraction Utility" .SH NAME txr \- text extractor .SH SYNOPSIS @@ -76,9 +76,11 @@ from their subqueries in special ways. .SH ARGUMENTS AND OPTIONS -Options other than -D, -a and -c may be combined together into a single -argument. The -v and -q options are mutually exclusive. The one which occurs -in the rightmost position in the argument list dominates. +Options which don't take an argument may be combined together. +The -v and -q options are mutually exclusive. Of these two, the one which +occurs in the rightmost position in the argument list dominates. +The -c and -f options are also mutually exclusive; if both are specified, +it is a fatal error. .IP -Dvar=value Bind the variable @@ -167,6 +169,11 @@ The @# comment syntax can be used for better formatting: @b " +.IP -f query-file +Specifies the file from which the query is to be read, instead of the +query-file argument. This is useful in #! scripts. (See Hash Bang Support +below). + .IP --help Prints usage summary on standard output, and terminates successfully. @@ -286,6 +293,23 @@ run it. This assumes txr is installed in /usr/bin. a=1 b=2 +A script written in this manner will not pass options to txr. For +instance, if the above script is invoked like this + + ./twoline.txr -Da=42 + +the -D option isn't passed down to txr; -Da=42 is an ordinary +argument (which the script will try to open as an input file). +This behavior is useful if the script author wants not to +expose the txr options to the user of the script. + +However, if the hash bang line can use the -f option: + + #!/usr/bin/txr -f + +Now, the name of the script is passed as an argument to the -f option, +and txr will look for more options after that. + .SS Text Query material which is not escaped by the special character @ is @@ -810,7 +834,8 @@ produces repeated text within one line. .SS The Next Directive The next directive comes in two forms, one of which is obsolescent -syntax. This directive indicates that the remainder of the query. +syntax. The directive indicates that the remainder of the query +is to be applied to a new input source. In the first form, it can occur by itself as the only element in a query line, with, or without arguments: @@ -818,6 +843,7 @@ with, or without arguments: @(next) @(next SOURCE) @(next SOURCE nothrow) + @(next args) The lone @(next) without arguments switches to the next file in the argument list which was passed to the @@ -842,6 +868,17 @@ if @(next) is invoked with the nothrow keyword, then if the input source cannot be opened, the situation is treated as a simple match failure. +The variant @(next args) means that the remaining command line arguments are to +be treated as a data source. For this purpose, each argument is considered to +be a line of text. If an argument is currently being processed as an input +source, that argument is included. Note that if the first entry in the argument +list does not name an input source, then the query should begin with +@(next args) or some other form of next directive, to prevent an attempt to +open the input source named by that argument. If the very first directive of a query is any variant of the next directive, then +.B txr +avoids opening the first input source, but it does open the input source for +any other directive, even one which does not consume any data. + In the obsolescent second form, @(next) is followed by material on the same line, which may contain variables. All of the variables must be bound. For example: @@ -2447,10 +2484,10 @@ variable, it has to be identical to the argument, otherwise the catch fails. Query: @(bind a "apple") @(try) @(throw e "banana") - @(catch e a) + @(catch e (a)) @(end) - Output: [unhandled exception diagnostic] + Output: false If any argument is an unbound variable, the corresponding parameter in the catch is left alone: if it is an unbound variable, it remains @@ -2522,6 +2559,113 @@ the try has disappeared already. Being unbound, the catch parameter a can take whatever value the corresponding throw argument provides, so it ends up with "lc". +.SS The Defex Directive + +The defex directive allows the query writer to invent custom exception types, +which are arranged in a type hierarchy (meaning that some exception types are +considered subtypes of other types). + +Subtyping means that if an exception type B is a subtype of A, then every +exception of type B is also considered to be of type A. So a catch for type A +will also catch exceptions of type B. Every type is a supertype of itself: an +A is a kind of A. This of course implies that ever type is a subtype of itself +also. Furthermore, every type is a subtype of the type t, which has no +supertype other than itself. Type nil is is a subtype of every type, including +itself. The subtyping relationship is transitive also. If A is a subtype +of B, and B is a subtype of C, then A is a subtype of C. + +Defex may be invoked with no arguments, in which case it does nothing: + + @(defex) + +It may be invoked with one argument, which must be a symbol. This introduces a +new exception type. Strictly speaking, such an introduction is not necessary; +any symbol may be used as an exception type without being introduced by +@(defex): + + @(defex a) + +Therefore, this also does nothing, other than document the intent to use +a as an exception. + +If two or more argument symbols are given, the symbols are all introduced as +types, engaged in a subtype-supertype relationship from left to right. +That is to say, the first (leftmost) symbol is a subtype of the next one, +which is a subtype of the next one and so on. The last symbol, if it +had not been already defined as a subtype of some type, becomes a +direct subtype of the master supertype t. Example: + + @(defex d e) + @(defex a b c d) + +The fist directive defines d as a subtype of e, and e as a subtype of t. +The second defines a as a subtype of b, b as a subtype of c, and +c as a subtype of d, which is already defined as a subtype of e. +Thus a is now a subtype of e. It should be obvious that the above +could be condensed to: + + @(defex a b c d e) + +Example: + + Query: @(defex gorilla ape primate) + @(defex monkey primate) + @(defex human primate) + @(collect) + @(try) + @(skip) + @(cases) + gorilla @name + @(throw gorilla name) + @(or) + monkey @name + @(throw monkey name) + @(or) + human @name + @(throw human name) + @(end)@#cases + @(catch primate (name)) + @kind @name + @(output) + we have a primate @name of kind @kind + @(end)@#output + @(end)@#try + @(end)@#collect + + + Input: gorilla joe + human bob + monkey alice + + Output: we have a primate joe of kind gorilla + we have a primate bob of kind human + we have a primate alice of kind monkey + +Exception types have a pervasive scope. Once a type relationship is introduced, +it is visible everywhere. Moreover, the defex directive is destructive, +meaning that the supertype of a type can be redefined. This is necessary so +that something like the following works right. + + @(defex gorilla ape) + @(defex ape primate) + +These directives are evaluated in sequence. So after the first one, the ape +type has the type t as its immediate supertype. But in the second directive, +ape appears again, and is assigned the primate supertype, while retaining +gorilla as a subtype. This situation could instead be diagnosed as an +error, forcing the programmer to reorder the statements, but instead +txr obliges. However, there are limitations. It is an error to define a +subtype-supertype relationship between two types if they are already connected +by such a relationship, directly or transitively. So the following +definitions are in error: + + @(defex a b) + @(defex b c) + @(defex a c)@# error: a is already a subtype of c, through b + + @(defex x y) + @(defex y x)@# error: circularity; y is already a supertype of x. + .SH NOTES ON FALSE The reason for printing the word @@ -39,7 +39,7 @@ #include "match.h" #include "txr.h" -const char *version = "016"; +const char *version = "017"; const char *progname = "txr"; const char *spec_file = "stdin"; obj_t *spec_file_str; @@ -94,6 +94,10 @@ void help(void) "-c query-text The query is read from the query-text argument\n" " itself. The query-file argument is omitted in\n" " this case; the first argument is a data file.\n" +"-f query-file Specify the query-file as an option argument.\n" +" option, instead of the query-file argument.\n" +" This allows #! scripts to pass options through\n" +" to the utility.\n" "--help You already know!\n" "--version Display program version\n" "\n" @@ -142,7 +146,6 @@ int main(int argc, char **argv) init(progname, oom_realloc_handler, &stack_bottom_0, &stack_bottom_1); protect(&spec_file_str, 0); - spec_file_str = string(strdup(spec_file)); yyin_stream = std_input; protect(&yyin_stream, 0); @@ -214,7 +217,7 @@ int main(int argc, char **argv) return 0; } - if (!strcmp(*argv, "-a") || !strcmp(*argv, "-c")) { + if (!strcmp(*argv, "-a") || !strcmp(*argv, "-c") || !strcmp(*argv, "-f")) { long val; char *errp; char opt = (*argv)[1]; @@ -241,6 +244,9 @@ int main(int argc, char **argv) case 'c': specstring = string(strdup(*argv)); break; + case 'f': + spec_file_str = string(strdup(*argv)); + break; } argv++, argc--; @@ -285,9 +291,24 @@ int main(int argc, char **argv) } } + if (specstring && spec_file_str) { + fprintf(stderr, "%s: cannot specify both -f and -c\n", progname); + return EXIT_FAILURE; + } + if (specstring) { spec_file = "cmdline"; + spec_file_str = string(strdup(spec_file)); yyin_stream = make_string_input_stream(specstring); + } else if (spec_file_str) { + if (strcmp(c_str(spec_file_str), "-") != 0) { + FILE *in = fopen(c_str(spec_file_str), "r"); + if (in == 0) + uw_throwcf(file_error, "unable to open %s", c_str(spec_file_str)); + yyin_stream = make_stdio_stream(in, t, nil); + } else { + spec_file = "stdin"; + } } else { if (argc < 1) { hint(); @@ -296,18 +317,18 @@ int main(int argc, char **argv) if (strcmp(*argv, "-") != 0) { FILE *in = fopen(*argv, "r"); - if (in == 0) { - uw_errorcf("%s: unable to open %s", progname, *argv); - fprintf(stderr, "%s: unable to open %s\n", progname, *argv); - return EXIT_FAILURE; - } + if (in == 0) + uw_throwcf(file_error, "unable to open %s", *argv); yyin_stream = make_stdio_stream(in, t, nil); spec_file = *argv; - spec_file_str = string(strdup(spec_file)); + } else { + spec_file = "stdin"; } argc--, argv++; + spec_file_str = string(strdup(spec_file)); } + { int gc = gc_state(0); yyparse(); @@ -317,9 +317,23 @@ obj_t *uw_register_subtype(obj_t *sub, obj_t *sup) if (sub == t) { if (sup == t) return sup; - abort(); + uw_throwf(type_error, "cannot define ~a as an exception subtype of ~a", + sub, sup, nao); + } + + if (sup == nil) { + uw_throwf(type_error, "cannot define ~a as an exception subtype of ~a", + sub, sup, nao); } + if (uw_exception_subtype_p(sub, sup)) + uw_throwf(type_error, "~a is already an exception subtype of ~a", + sub, sup, nao); + + if (uw_exception_subtype_p(sup, sub)) + uw_throwf(type_error, "~a is already an exception supertype of ~a", + sub, sup, nao); + /* If sup symbol not registered, then we make it an immediate subtype of t. */ if (!sup_entry) { @@ -327,15 +341,14 @@ obj_t *uw_register_subtype(obj_t *sub, obj_t *sup) exception_subtypes = cons(sup_entry, exception_subtypes); } - /* If sub already registered, we delete that - registration. */ + /* Make sub an immediate subtype of sup. + If sub already registered, we just repoint it. */ if (sub_entry) { - exception_subtypes = alist_remove1(exception_subtypes, sub); + *cdr_l(sub_entry) = sup_entry; + } else { + sub_entry = cons(sub, sup_entry); + exception_subtypes = cons(sub_entry, exception_subtypes); } - - /* Register sub as an immediate subtype of sup. */ - sub_entry = cons(sub, sup_entry); - exception_subtypes = cons(sub_entry, exception_subtypes); return sup; } |