summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog70
-rw-r--r--gc.c26
-rw-r--r--lib.c4
-rw-r--r--lib.h2
-rw-r--r--match.c48
-rw-r--r--txr.1158
-rw-r--r--txr.c39
-rw-r--r--unwind.c29
8 files changed, 321 insertions, 55 deletions
diff --git a/ChangeLog b/ChangeLog
index ea4a2f86..6f7259ce 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,73 @@
+2009-10-17 Kaz Kylheku <kkylheku@gmail.com>
+
+ Version 016
+
+ Bugfix in exception subtype definition (defex).
+
+ Tail recursion in marking function of garbage collector.
+
+ -f option for specifying query file, allowing more
+ options to follow, useful in hash-bang scripting and
+ other situations.
+
+ * txr.c: (version): Bump to 016
+ * txr.1: Bump version to 016.
+
+2009-10-17 Kaz Kylheku <kkylheku@gmail.com>
+
+ * txr.1: Documented defex.
+ * unwind.c (uw_register_subtype): Bugfix: if the subtype
+ exists already, we must not delete it and create a new entry, but
+ destructively point its entry to its assigned supertype.
+ An exceptions is thrown rather than abort for attempts
+ to make t a subtype of something other than itself.
+ An attempt to make something other than nil a subtype of nil
+ is diagnosed. Attempts to redefine the relationship
+ between two types if they are already connected by one;
+ this covers circularity and other cases, while still allowing
+ a relaxed order of definition.
+
+2009-10-17 Kaz Kylheku <kkylheku@gmail.com>
+
+ * gc.c (mark_obj_tail): New macro.
+ (mark_obj): Optimized with manual tail recursion.
+ The funtion will no longer generate long call stacks
+ for long lists. Descending to the car field of
+ a cons is still recursive, but ``car-heavy''
+ trees are rare.
+
+2009-10-16 Kaz Kylheku <kkylheku@gmail.com>
+
+ Resurrect -f option, with different meaning.
+ We need "-f query-file" so that hash-bang scripts
+ can be written which can pass options to txr.
+
+ * txr.c (help, main): Inplement and document -f.
+ Also bugfix: do not throw file open errors as exceptions
+ of type error, because these cause an abort, potentially leading to a
+ core dump. They are now thrown as file_error.
+ * txr.1: Documented -f.
+
+2009-10-16 Kaz Kylheku <kkylheku@gmail.com>
+
+ Implemented @(next arg) for treating the command line
+ as an input source.
+
+ * txr.1: Updated, and fixed a few unrelated mistakes.
+ * lib.c (dir): Removed unused symbol globa.
+ (args): New symbol global.
+ * lib.h (dir): Declaration removed.
+ (args): Declared.
+ match.c (match_files): Implemented @(next arg).
+ Had to hack laziness to the file opening logic in match_files.
+ If the function is entered with a spec whose first
+ directive is @(next), then it defers opening the first
+ file in the list of files (since it will be immediately
+ abandoned in favor of another input source).
+ This prevents an error in the situation when the
+ arguments do not name files, and there is a @(next args)
+ directive to process them as an input source.
+
2009-10-16 Kaz Kylheku <kkylheku@gmail.com>
Version 016
diff --git a/gc.c b/gc.c
index a9c74c6f..32411a62 100644
--- a/gc.c
+++ b/gc.c
@@ -180,6 +180,13 @@ static void mark_obj(obj_t *obj)
{
type_t t;
+#if 1
+tail_call:
+#define mark_obj_tail(o) do { obj = (o); goto tail_call; } while (0)
+#else
+#define mark_obj_tail(o) return mark_obj(o)
+#endif
+
if (obj == nil)
return;
@@ -196,22 +203,19 @@ static void mark_obj(obj_t *obj)
switch (t) {
case CONS:
mark_obj(obj->c.car);
- mark_obj(obj->c.cdr);
- break;
+ mark_obj_tail(obj->c.cdr);
case STR:
- mark_obj(obj->st.len);
- break;
+ mark_obj_tail(obj->st.len);
case CHR:
case NUM:
break;
case SYM:
mark_obj(obj->s.name);
- mark_obj(obj->s.val);
- break;
+ mark_obj_tail(obj->s.val);
case FUN:
mark_obj(obj->f.env);
if (obj->f.functype == FINTERP)
- mark_obj(obj->f.f.interp_fun);
+ mark_obj_tail(obj->f.f.interp_fun);
break;
case VEC:
{
@@ -227,15 +231,13 @@ static void mark_obj(obj_t *obj)
}
break;
case LCONS:
- mark_obj(obj->lc.car);
- mark_obj(obj->lc.cdr);
mark_obj(obj->lc.func);
- break;
+ mark_obj(obj->lc.car);
+ mark_obj_tail(obj->lc.cdr);
case COBJ:
- mark_obj(obj->co.cls);
if (obj->co.ops->mark)
obj->co.ops->mark(obj);
- break;
+ mark_obj_tail(obj->co.cls);
default:
assert (0 && "corrupt type field");
}
diff --git a/lib.c b/lib.c
index 0072f929..4953b644 100644
--- a/lib.c
+++ b/lib.c
@@ -49,7 +49,7 @@ obj_t *zeroplus, *optional, *compound, *or, *quasi;
obj_t *skip, *trailer, *block, *next, *fail, *accept;
obj_t *all, *some, *none, *maybe, *cases, *collect, *until, *coll;
obj_t *define, *output, *single, *frst, *lst, *empty, *repeat, *rep;
-obj_t *flattn, *forget, *local, *mrge, *bind, *cat, *dir;
+obj_t *flattn, *forget, *local, *mrge, *bind, *cat, *args;
obj_t *try, *catch, *finally, *nothrow, *throw, *defex;
obj_t *error, *type_error, *internal_err, *numeric_err, *range_err;
obj_t *query_error, *file_error;
@@ -1516,7 +1516,7 @@ static void obj_init(void)
mrge = intern(string(strdup("merge")));
bind = intern(string(strdup("bind")));
cat = intern(string(strdup("cat")));
- dir = intern(string(strdup("dir")));
+ args = intern(string(strdup("args")));
try = intern(string(strdup("try")));
catch = intern(string(strdup("catch")));
finally = intern(string(strdup("finally")));
diff --git a/lib.h b/lib.h
index 9473a299..703e618f 100644
--- a/lib.h
+++ b/lib.h
@@ -148,7 +148,7 @@ extern obj_t *zeroplus, *optional, *compound, *or, *quasi;
extern obj_t *skip, *trailer, *block, *next, *fail, *accept;
extern obj_t *all, *some, *none, *maybe, *cases, *collect, *until, *coll;
extern obj_t *define, *output, *single, *frst, *lst, *empty, *repeat, *rep;
-extern obj_t *flattn, *forget, *local, *mrge, *bind, *cat, *dir;
+extern obj_t *flattn, *forget, *local, *mrge, *bind, *cat, *args;
extern obj_t *try, *catch, *finally, *nothrow, *throw, *defex;
extern obj_t *error, *type_error, *internal_err, *numeric_err, *range_err;
extern obj_t *query_error, *file_error;
diff --git a/match.c b/match.c
index babaab18..10ce0c57 100644
--- a/match.c
+++ b/match.c
@@ -882,27 +882,33 @@ obj_t *match_files(obj_t *spec, obj_t *files,
data_lineno = c_num(data_linenum);
first_file_parsed = nil;
} else if (files) {
- obj_t *spec = first(files);
- obj_t *name = consp(spec) ? cdr(spec) : spec;
+ obj_t *source_spec = first(files);
+ obj_t *name = consp(source_spec) ? cdr(source_spec) : source_spec;
fpip_t fp = (errno = 0, complex_open(name, nil));
+ obj_t *first_spec_item = second(first(spec));
- debugf("opening data source ~a", name, nao);
+ if (consp(first_spec_item) && eq(first(first_spec_item), next)) {
+ debugf("not opening source ~a since query starts with next directive",
+ name, nao);
+ } else {
+ debugf("opening data source ~a", name, nao);
- if (complex_open_failed(fp)) {
- if (consp(spec) && car(spec) == nothrow) {
- debugf("could not open ~a: treating as failed match due to nothrow",
- name, nao);
+ if (complex_open_failed(fp)) {
+ if (consp(source_spec) && car(source_spec) == nothrow) {
+ debugf("could not open ~a: treating as failed match due to nothrow",
+ name, nao);
+ return nil;
+ } else if (errno != 0)
+ file_err(nil, "could not open ~a (error ~a/~a)", name,
+ num(errno), string(strdup(strerror(errno))), nao);
+ else
+ file_err(nil, "could not open ~a", name, nao);
return nil;
- } else if (errno != 0)
- file_err(nil, "could not open ~a (error ~a/~a)", name,
- num(errno), string(strdup(strerror(errno))), nao);
- else
- file_err(nil, "could not open ~a", name, nao);
- return nil;
- }
+ }
- if ((data = complex_snarf(fp, name)) != nil)
- data_lineno = 1;
+ if ((data = complex_snarf(fp, name)) != nil)
+ data_lineno = 1;
+ }
}
for (; spec; spec = rest(spec), data = rest(data), data_lineno++)
@@ -1018,6 +1024,16 @@ repeat_spec_same_data:
if (eq(first(source), nothrow))
push(nil, &source);
+ else if (eq(first(source), args)) {
+ obj_t *input_name = string(strdup("args"));
+ cons_bind (new_bindings, success,
+ match_files(spec, cons(input_name, files),
+ bindings, files, one));
+ if (success)
+ return cons(new_bindings,
+ if3(data, cons(data, num(data_lineno)), t));
+ return nil;
+ }
{
obj_t *val = eval_form(first(source), bindings);
diff --git a/txr.1 b/txr.1
index 6cdc4401..84b121e5 100644
--- a/txr.1
+++ b/txr.1
@@ -21,7 +21,7 @@
.\"IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
.\"WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
-.TH txr 1 2009-10-14 "txr v. 016" "Text Extraction Utility"
+.TH txr 1 2009-10-14 "txr v. 017" "Text Extraction Utility"
.SH NAME
txr \- text extractor
.SH SYNOPSIS
@@ -76,9 +76,11 @@ from their subqueries in special ways.
.SH ARGUMENTS AND OPTIONS
-Options other than -D, -a and -c may be combined together into a single
-argument. The -v and -q options are mutually exclusive. The one which occurs
-in the rightmost position in the argument list dominates.
+Options which don't take an argument may be combined together.
+The -v and -q options are mutually exclusive. Of these two, the one which
+occurs in the rightmost position in the argument list dominates.
+The -c and -f options are also mutually exclusive; if both are specified,
+it is a fatal error.
.IP -Dvar=value
Bind the variable
@@ -167,6 +169,11 @@ The @# comment syntax can be used for better formatting:
@b
"
+.IP -f query-file
+Specifies the file from which the query is to be read, instead of the
+query-file argument. This is useful in #! scripts. (See Hash Bang Support
+below).
+
.IP --help
Prints usage summary on standard output, and terminates successfully.
@@ -286,6 +293,23 @@ run it. This assumes txr is installed in /usr/bin.
a=1
b=2
+A script written in this manner will not pass options to txr. For
+instance, if the above script is invoked like this
+
+ ./twoline.txr -Da=42
+
+the -D option isn't passed down to txr; -Da=42 is an ordinary
+argument (which the script will try to open as an input file).
+This behavior is useful if the script author wants not to
+expose the txr options to the user of the script.
+
+However, if the hash bang line can use the -f option:
+
+ #!/usr/bin/txr -f
+
+Now, the name of the script is passed as an argument to the -f option,
+and txr will look for more options after that.
+
.SS Text
Query material which is not escaped by the special character @ is
@@ -810,7 +834,8 @@ produces repeated text within one line.
.SS The Next Directive
The next directive comes in two forms, one of which is obsolescent
-syntax. This directive indicates that the remainder of the query.
+syntax. The directive indicates that the remainder of the query
+is to be applied to a new input source.
In the first form, it can occur by itself as the only element in a query line,
with, or without arguments:
@@ -818,6 +843,7 @@ with, or without arguments:
@(next)
@(next SOURCE)
@(next SOURCE nothrow)
+ @(next args)
The lone @(next) without arguments switches to the next file in the
argument list which was passed to the
@@ -842,6 +868,17 @@ if @(next) is invoked with the nothrow keyword, then if the input
source cannot be opened, the situation is treated as a simple
match failure.
+The variant @(next args) means that the remaining command line arguments are to
+be treated as a data source. For this purpose, each argument is considered to
+be a line of text. If an argument is currently being processed as an input
+source, that argument is included. Note that if the first entry in the argument
+list does not name an input source, then the query should begin with
+@(next args) or some other form of next directive, to prevent an attempt to
+open the input source named by that argument. If the very first directive of a query is any variant of the next directive, then
+.B txr
+avoids opening the first input source, but it does open the input source for
+any other directive, even one which does not consume any data.
+
In the obsolescent second form, @(next) is followed by material on the same
line, which may contain variables. All of the variables must be bound. For
example:
@@ -2447,10 +2484,10 @@ variable, it has to be identical to the argument, otherwise the catch fails.
Query: @(bind a "apple")
@(try)
@(throw e "banana")
- @(catch e a)
+ @(catch e (a))
@(end)
- Output: [unhandled exception diagnostic]
+ Output: false
If any argument is an unbound variable, the corresponding parameter
in the catch is left alone: if it is an unbound variable, it remains
@@ -2522,6 +2559,113 @@ the try has disappeared already. Being unbound, the catch parameter a can take
whatever value the corresponding throw argument provides, so it ends up with
"lc".
+.SS The Defex Directive
+
+The defex directive allows the query writer to invent custom exception types,
+which are arranged in a type hierarchy (meaning that some exception types are
+considered subtypes of other types).
+
+Subtyping means that if an exception type B is a subtype of A, then every
+exception of type B is also considered to be of type A. So a catch for type A
+will also catch exceptions of type B. Every type is a supertype of itself: an
+A is a kind of A. This of course implies that ever type is a subtype of itself
+also. Furthermore, every type is a subtype of the type t, which has no
+supertype other than itself. Type nil is is a subtype of every type, including
+itself. The subtyping relationship is transitive also. If A is a subtype
+of B, and B is a subtype of C, then A is a subtype of C.
+
+Defex may be invoked with no arguments, in which case it does nothing:
+
+ @(defex)
+
+It may be invoked with one argument, which must be a symbol. This introduces a
+new exception type. Strictly speaking, such an introduction is not necessary;
+any symbol may be used as an exception type without being introduced by
+@(defex):
+
+ @(defex a)
+
+Therefore, this also does nothing, other than document the intent to use
+a as an exception.
+
+If two or more argument symbols are given, the symbols are all introduced as
+types, engaged in a subtype-supertype relationship from left to right.
+That is to say, the first (leftmost) symbol is a subtype of the next one,
+which is a subtype of the next one and so on. The last symbol, if it
+had not been already defined as a subtype of some type, becomes a
+direct subtype of the master supertype t. Example:
+
+ @(defex d e)
+ @(defex a b c d)
+
+The fist directive defines d as a subtype of e, and e as a subtype of t.
+The second defines a as a subtype of b, b as a subtype of c, and
+c as a subtype of d, which is already defined as a subtype of e.
+Thus a is now a subtype of e. It should be obvious that the above
+could be condensed to:
+
+ @(defex a b c d e)
+
+Example:
+
+ Query: @(defex gorilla ape primate)
+ @(defex monkey primate)
+ @(defex human primate)
+ @(collect)
+ @(try)
+ @(skip)
+ @(cases)
+ gorilla @name
+ @(throw gorilla name)
+ @(or)
+ monkey @name
+ @(throw monkey name)
+ @(or)
+ human @name
+ @(throw human name)
+ @(end)@#cases
+ @(catch primate (name))
+ @kind @name
+ @(output)
+ we have a primate @name of kind @kind
+ @(end)@#output
+ @(end)@#try
+ @(end)@#collect
+
+
+ Input: gorilla joe
+ human bob
+ monkey alice
+
+ Output: we have a primate joe of kind gorilla
+ we have a primate bob of kind human
+ we have a primate alice of kind monkey
+
+Exception types have a pervasive scope. Once a type relationship is introduced,
+it is visible everywhere. Moreover, the defex directive is destructive,
+meaning that the supertype of a type can be redefined. This is necessary so
+that something like the following works right.
+
+ @(defex gorilla ape)
+ @(defex ape primate)
+
+These directives are evaluated in sequence. So after the first one, the ape
+type has the type t as its immediate supertype. But in the second directive,
+ape appears again, and is assigned the primate supertype, while retaining
+gorilla as a subtype. This situation could instead be diagnosed as an
+error, forcing the programmer to reorder the statements, but instead
+txr obliges. However, there are limitations. It is an error to define a
+subtype-supertype relationship between two types if they are already connected
+by such a relationship, directly or transitively. So the following
+definitions are in error:
+
+ @(defex a b)
+ @(defex b c)
+ @(defex a c)@# error: a is already a subtype of c, through b
+
+ @(defex x y)
+ @(defex y x)@# error: circularity; y is already a supertype of x.
+
.SH NOTES ON FALSE
The reason for printing the word
diff --git a/txr.c b/txr.c
index c0651f48..702a182e 100644
--- a/txr.c
+++ b/txr.c
@@ -39,7 +39,7 @@
#include "match.h"
#include "txr.h"
-const char *version = "016";
+const char *version = "017";
const char *progname = "txr";
const char *spec_file = "stdin";
obj_t *spec_file_str;
@@ -94,6 +94,10 @@ void help(void)
"-c query-text The query is read from the query-text argument\n"
" itself. The query-file argument is omitted in\n"
" this case; the first argument is a data file.\n"
+"-f query-file Specify the query-file as an option argument.\n"
+" option, instead of the query-file argument.\n"
+" This allows #! scripts to pass options through\n"
+" to the utility.\n"
"--help You already know!\n"
"--version Display program version\n"
"\n"
@@ -142,7 +146,6 @@ int main(int argc, char **argv)
init(progname, oom_realloc_handler, &stack_bottom_0, &stack_bottom_1);
protect(&spec_file_str, 0);
- spec_file_str = string(strdup(spec_file));
yyin_stream = std_input;
protect(&yyin_stream, 0);
@@ -214,7 +217,7 @@ int main(int argc, char **argv)
return 0;
}
- if (!strcmp(*argv, "-a") || !strcmp(*argv, "-c")) {
+ if (!strcmp(*argv, "-a") || !strcmp(*argv, "-c") || !strcmp(*argv, "-f")) {
long val;
char *errp;
char opt = (*argv)[1];
@@ -241,6 +244,9 @@ int main(int argc, char **argv)
case 'c':
specstring = string(strdup(*argv));
break;
+ case 'f':
+ spec_file_str = string(strdup(*argv));
+ break;
}
argv++, argc--;
@@ -285,9 +291,24 @@ int main(int argc, char **argv)
}
}
+ if (specstring && spec_file_str) {
+ fprintf(stderr, "%s: cannot specify both -f and -c\n", progname);
+ return EXIT_FAILURE;
+ }
+
if (specstring) {
spec_file = "cmdline";
+ spec_file_str = string(strdup(spec_file));
yyin_stream = make_string_input_stream(specstring);
+ } else if (spec_file_str) {
+ if (strcmp(c_str(spec_file_str), "-") != 0) {
+ FILE *in = fopen(c_str(spec_file_str), "r");
+ if (in == 0)
+ uw_throwcf(file_error, "unable to open %s", c_str(spec_file_str));
+ yyin_stream = make_stdio_stream(in, t, nil);
+ } else {
+ spec_file = "stdin";
+ }
} else {
if (argc < 1) {
hint();
@@ -296,18 +317,18 @@ int main(int argc, char **argv)
if (strcmp(*argv, "-") != 0) {
FILE *in = fopen(*argv, "r");
- if (in == 0) {
- uw_errorcf("%s: unable to open %s", progname, *argv);
- fprintf(stderr, "%s: unable to open %s\n", progname, *argv);
- return EXIT_FAILURE;
- }
+ if (in == 0)
+ uw_throwcf(file_error, "unable to open %s", *argv);
yyin_stream = make_stdio_stream(in, t, nil);
spec_file = *argv;
- spec_file_str = string(strdup(spec_file));
+ } else {
+ spec_file = "stdin";
}
argc--, argv++;
+ spec_file_str = string(strdup(spec_file));
}
+
{
int gc = gc_state(0);
yyparse();
diff --git a/unwind.c b/unwind.c
index 0e32a51b..1a8cb987 100644
--- a/unwind.c
+++ b/unwind.c
@@ -317,9 +317,23 @@ obj_t *uw_register_subtype(obj_t *sub, obj_t *sup)
if (sub == t) {
if (sup == t)
return sup;
- abort();
+ uw_throwf(type_error, "cannot define ~a as an exception subtype of ~a",
+ sub, sup, nao);
+ }
+
+ if (sup == nil) {
+ uw_throwf(type_error, "cannot define ~a as an exception subtype of ~a",
+ sub, sup, nao);
}
+ if (uw_exception_subtype_p(sub, sup))
+ uw_throwf(type_error, "~a is already an exception subtype of ~a",
+ sub, sup, nao);
+
+ if (uw_exception_subtype_p(sup, sub))
+ uw_throwf(type_error, "~a is already an exception supertype of ~a",
+ sub, sup, nao);
+
/* If sup symbol not registered, then we make it
an immediate subtype of t. */
if (!sup_entry) {
@@ -327,15 +341,14 @@ obj_t *uw_register_subtype(obj_t *sub, obj_t *sup)
exception_subtypes = cons(sup_entry, exception_subtypes);
}
- /* If sub already registered, we delete that
- registration. */
+ /* Make sub an immediate subtype of sup.
+ If sub already registered, we just repoint it. */
if (sub_entry) {
- exception_subtypes = alist_remove1(exception_subtypes, sub);
+ *cdr_l(sub_entry) = sup_entry;
+ } else {
+ sub_entry = cons(sub, sup_entry);
+ exception_subtypes = cons(sub_entry, exception_subtypes);
}
-
- /* Register sub as an immediate subtype of sup. */
- sub_entry = cons(sub, sup_entry);
- exception_subtypes = cons(sub_entry, exception_subtypes);
return sup;
}