summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2009-11-21 11:12:20 -0800
committerKaz Kylheku <kaz@kylheku.com>2009-11-21 11:12:20 -0800
commit4a1556a848c5bfb527cecb2b823a750ba63e6f80 (patch)
treebe9378666222056692e4770a8f0eb79b45ef8993
parent00f823aee439ed8c2cdd71dfbb89385dc68eae7b (diff)
downloadtxr-4a1556a848c5bfb527cecb2b823a750ba63e6f80.tar.gz
txr-4a1556a848c5bfb527cecb2b823a750ba63e6f80.tar.bz2
txr-4a1556a848c5bfb527cecb2b823a750ba63e6f80.zip
Introducing symbol packages. Internal symbols are now in
a system package instead of being hacked with the $ prefix. Keyword symbols are provided. In the matcher, evaluation is tightened up. Keywords, nil and t are not bindeable, and errors are thrown if attempts are made to bind them. Destructuring in dest_bind is strict in the number of items. String streams are exploited to print bindings to objects that are not strings or characters. Numerous bugfixes.
-rw-r--r--ChangeLog80
-rw-r--r--gc.c7
-rw-r--r--hash.c5
-rw-r--r--hash.h2
-rw-r--r--lib.c243
-rw-r--r--lib.h22
-rw-r--r--match.c90
-rw-r--r--parser.l15
-rw-r--r--parser.y26
-rw-r--r--regex.c3
-rw-r--r--stream.c9
-rw-r--r--txr.126
-rw-r--r--txr.c8
13 files changed, 382 insertions, 154 deletions
diff --git a/ChangeLog b/ChangeLog
index 745608a1..e9319f9d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,83 @@
+2009-11-21 Kaz Kylheku <kkylheku@gmail.com>
+
+ Introducing symbol packages. Internal symbols are now in
+ a system package instead of being hacked with the $ prefix.
+ Keyword symbols are provided. In the matcher, evaluation
+ is tightened up. Keywords, nil and t are not bindeable, and
+ errors are thrown if attempts are made to bind them.
+ Destructuring in dest_bind is strict in the number of items.
+ String streams are exploited to print bindings to objects
+ that are not strings or characters. Numerous bugfixes.
+
+ * lib.h (enum type, type_t): new member: PKG.
+ (struct stym): New member: package.
+ (struct package): New type.
+ (union obj, obj_t): New member pk.
+ (interned_syms): Declaration removed.
+ (keyword_package, pkg_t): Declared.
+ (intern, acons_new_l): Declarations updated.
+ (find_package, symbol_package, keywordp): Declared.
+
+ * lib.c (interned_syms): Definition removed.
+ (packages, pkg_t, system_package, keyword_package, user_package): New
+ global variables.
+ (code2type, equal, obj_pprint): Handle PKG case.
+ (symbol_package, make_package, find_package, keywordp): New functions.
+ (make_sym): Initialize package field of symbol.
+ (intern): Takes package argument. Rewritten using packages,
+ which use hash tables to store symbols.
+ (acons_new_l): Takes extra pointer argument to return an extra
+ value.
+ (obj_init): Updated to handle packages. The orders of some
+ initializations have to change. The way nil is added as a symbol is
+ quite different, and a special hack for the symbol t is used.
+ Most symbols go into the user_package, but symbols that were
+ previously namespaced with $ go to the system package.
+ (obj_print): SYM cases now considers the packge of a symbol.
+ Symbols in the user package are printed as before.
+ Symbols with no package are printed using #: notation;
+ keywords with : notation; and all others with their package prefix.
+ PKG case is handled.
+
+ * gc.c (finalize): Handle PKG case.
+ (mark_obj): For SYM, mark the new package member. Handle PKG case.
+
+ * hash.h (gethash_l): Declaration updated.
+
+ * hash.c (ll_hash): Handle PKG case.
+ (gethash_l): Extra argument added to distinguish new addition
+ from existing find.
+
+ * match.c (dump_var): Dumps any object now by printing to
+ a string with a string stream.
+ (bindable): New function.
+ (dest_bind): Tightened up to distinguish bindable symbols
+ from non-bindable. Symbols that stand for themselves, including nil,
+ can only match themselves. Destructuring matches have to
+ match in the number of elements: dot notation can be used
+ to match superfluous elements.
+ (eval_form): Tightened up to recognize bindable symbols.
+ (match_files): Various directives honor non-bindable symbols (cat,
+ merge, flatten).
+
+ * parser.l (yybadtoken): Handle KEYWORD case.
+ (grammar): TOK can start with : . Returned as KEYWORD terminal,
+ with a lexeme that no longer has the : character.
+
+ * parser.y (KEYWORD): New nonterminal.
+ (grammar): Calls to intern given extra parameter.
+ In the expr rule, KEYWORD turned into symbol in keyword package.
+
+ * regex.c (regexp): Bugfix: dereferencing non pointer.
+
+ * stream.c (vformat): Bugfixes in state machine: handling
+ of prefix digits; printing of numbers in ~s.
+
+ * txr.c (txr_main): Intern calls updated.
+
+ * txr.1: Updated with information about nil, t and keywords.
+ More details about destructuring matching in bind.
+
2009-11-20 Kaz Kylheku <kkylheku@gmail.com>
* unwind.c (uw_throw): If streams are not initialized,
diff --git a/gc.c b/gc.c
index f195687a..0de0c3f2 100644
--- a/gc.c
+++ b/gc.c
@@ -160,6 +160,7 @@ static void finalize(val obj)
case NUM:
case LIT:
case SYM:
+ case PKG:
case FUN:
return;
case VEC:
@@ -216,7 +217,11 @@ tail_call:
return;
case SYM:
mark_obj(obj->s.name);
- mark_obj_tail(obj->s.val);
+ mark_obj(obj->s.val);
+ mark_obj_tail(obj->s.package);
+ case PKG:
+ mark_obj(obj->pk.name);
+ mark_obj_tail(obj->pk.symhash);
case FUN:
mark_obj(obj->f.env);
if (obj->f.functype == FINTERP)
diff --git a/hash.c b/hash.c
index 28bd8455..81d2709b 100644
--- a/hash.c
+++ b/hash.c
@@ -93,6 +93,7 @@ static long ll_hash(val obj)
case NUM:
return c_num(obj) & NUM_MAX;
case SYM:
+ case PKG:
return ((long) obj) & NUM_MAX;
case FUN:
return ((long) obj->f.f.interp_fun + ll_hash(obj->f.env)) & NUM_MAX;
@@ -238,12 +239,12 @@ val make_hash(val weak_keys, val weak_vals)
return hash;
}
-val *gethash_l(val hash, val key)
+val *gethash_l(val hash, val key, val *new_p)
{
struct hash *h = (struct hash *) hash->co.handle;
val *pchain = vecref_l(h->table, num(ll_hash(key) % h->modulus));
val old = *pchain;
- val *place = acons_new_l(pchain, key);
+ val *place = acons_new_l(pchain, key, new_p);
if (old != *pchain && ++h->count > 2 * h->modulus)
hash_grow(h);
return place;
diff --git a/hash.h b/hash.h
index 40ba35c0..68d500fc 100644
--- a/hash.h
+++ b/hash.h
@@ -26,7 +26,7 @@
val hash_obj(val);
val make_hash(val weak_keys, val weak_vals);
-val *gethash_l(val hash, val key);
+val *gethash_l(val hash, val key, val *new_p);
val gethash(val hash, val key);
val remhash(val hash, val key);
void hash_process_weak(void);
diff --git a/lib.c b/lib.c
index b3196509..1be02723 100644
--- a/lib.c
+++ b/lib.c
@@ -44,9 +44,11 @@
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min(a, b) ((a) < (b) ? (a) : (b))
-val interned_syms;
+val packages;
-val null, t, cons_t, str_t, chr_t, num_t, sym_t, fun_t, vec_t;
+val system_package, keyword_package, user_package;
+
+val null, t, cons_t, str_t, chr_t, num_t, sym_t, pkg_t, fun_t, vec_t;
val stream_t, hash_t, lcons_t, lstr_t, cobj_t;
val var, regex, set, cset, wild, oneplus;
val zeroplus, optional, compound, or, quasi;
@@ -93,6 +95,7 @@ static val code2type(int code)
case CHR: return chr_t;
case NUM: return num_t;
case SYM: return sym_t;
+ case PKG: return pkg_t;
case FUN: return fun_t;
case VEC: return vec_t;
case LCONS: return lcons_t;
@@ -461,6 +464,7 @@ val equal(val left, val right)
}
return nil;
case SYM:
+ case PKG:
return right == left ? t : nil;
case FUN:
if (type(right) == FUN &&
@@ -1058,21 +1062,66 @@ val symbol_name(val sym)
return sym ? sym->s.name : nil_string;
}
+val symbol_package(val sym)
+{
+ if (sym == nil)
+ return user_package;
+ type_check(sym, SYM);
+ return sym->s.package;
+}
+
val make_sym(val name)
{
val obj = make_obj();
obj->s.type = SYM;
obj->s.name = name;
+ obj->s.package = nil;
obj->s.val = nil;
return obj;
}
-val intern(val str)
+val make_package(val name)
+{
+ if (find_package(name))
+ uw_throwf(error, lit("make_package: ~a exists already"), name, nao);
+
+ val obj = make_obj();
+ obj->pk.type = PKG;
+ obj->pk.name = name;
+ obj->pk.symhash = make_hash(nil, nil);
+
+ push(cons(name, obj), &packages);
+ return obj;
+}
+
+val find_package(val name)
+{
+ return cdr(assoc(packages, name));
+}
+
+val intern(val str, val package)
{
- val *place = gethash_l(interned_syms, str);
- if (*place)
+ val new_p;
+
+ if (nullp(package)) {
+ package = user_package;
+ } else if (stringp(package)) {
+ package = find_package(str);
+ if (!package)
+ uw_throwf(error, lit("make_package: ~a exists already"), str, nao);
+ }
+
+ type_check (package, PKG);
+
+ val *place = gethash_l(package->pk.symhash, str, &new_p);
+
+ if (!new_p) {
return *place;
- return *place = make_sym(str);
+ } else {
+ val newsym = make_sym(str);
+ newsym->s.package = package;
+ return *place = newsym;
+ }
}
val symbolp(val sym)
@@ -1080,6 +1129,11 @@ val symbolp(val sym)
return (sym == nil || (is_ptr(sym) && sym->s.type == SYM)) ? t : nil;
}
+val keywordp(val sym)
+{
+ return (symbolp(sym) && symbol_package(sym) == keyword_package) ? t : nil;
+}
+
val func_f0(val env, val (*fun)(val))
{
val obj = make_obj();
@@ -1594,15 +1648,19 @@ val acons_new(val list, val key, val value)
}
}
-val *acons_new_l(val *list, val key)
+val *acons_new_l(val *list, val key, val *new_p)
{
val existing = assoc(*list, key);
if (existing) {
+ if (new_p)
+ *new_p = nil;
return cdr_l(existing);
} else {
val new = cons(key, nil);
*list = cons(new, *list);
+ if (new_p)
+ *new_p = t;
return cdr_l(new);
}
}
@@ -1746,7 +1804,8 @@ static void obj_init(void)
* symbols.
*/
- protect(&interned_syms, &zero, &one,
+ protect(&packages, &system_package, &keyword_package,
+ &user_package, &zero, &one,
&two, &negone, &maxint, &minint,
&null_string, &nil_string,
&null_list, &equal_f,
@@ -1764,79 +1823,88 @@ static void obj_init(void)
maxint = num(NUM_MAX);
minint = num(NUM_MIN);
- interned_syms = make_hash(nil, nil);
-
- *gethash_l(interned_syms, nil_string) = nil;
-
- null = intern(lit("null"));
- t = intern(lit("t"));
- cons_t = intern(lit("cons"));
- str_t = intern(lit("str"));
- chr_t = intern(lit("chr"));
- num_t = intern(lit("num"));
- sym_t = intern(lit("sym"));
- fun_t = intern(lit("fun"));
- vec_t = intern(lit("vec"));
- stream_t = intern(lit("stream"));
- hash_t = intern(lit("hash"));
- lcons_t = intern(lit("lcons"));
- lstr_t = intern(lit("lstr"));
- cobj_t = intern(lit("cobj"));
- var = intern(lit("$var"));
- regex = intern(lit("$regex"));
- set = intern(lit("set"));
- cset = intern(lit("cset"));
- wild = intern(lit("wild"));
- oneplus = intern(lit("1+"));
- zeroplus = intern(lit("0+"));
- optional = intern(lit("?"));
- compound = intern(lit("compound"));
- or = intern(lit("or"));
- quasi = intern(lit("$quasi"));
- skip = intern(lit("skip"));
- trailer = intern(lit("trailer"));
- block = intern(lit("block"));
- next = intern(lit("next"));
- freeform = intern(lit("freeform"));
- fail = intern(lit("fail"));
- accept = intern(lit("accept"));
- all = intern(lit("all"));
- some = intern(lit("some"));
- none = intern(lit("none"));
- maybe = intern(lit("maybe"));
- cases = intern(lit("cases"));
- collect = intern(lit("collect"));
- until = intern(lit("until"));
- coll = intern(lit("coll"));
- define = intern(lit("define"));
- output = intern(lit("output"));
- single = intern(lit("single"));
- frst = intern(lit("first"));
- lst = intern(lit("last"));
- empty = intern(lit("empty"));
- repeat = intern(lit("repeat"));
- rep = intern(lit("rep"));
- flattn = intern(lit("flatten"));
- forget = intern(lit("forget"));
- local = intern(lit("local"));
- mrge = intern(lit("merge"));
- bind = intern(lit("bind"));
- cat = intern(lit("cat"));
- args = intern(lit("args"));
- try = intern(lit("try"));
- catch = intern(lit("catch"));
- finally = intern(lit("finally"));
- nothrow = intern(lit("nothrow"));
- throw = intern(lit("throw"));
- defex = intern(lit("defex"));
- error = intern(lit("error"));
- type_error = intern(lit("type_error"));
- internal_err = intern(lit("internal_error"));
- numeric_err = intern(lit("numeric_error"));
- range_err = intern(lit("range_error"));
- query_error = intern(lit("query_error"));
- file_error = intern(lit("file_error"));
- process_error = intern(lit("process_error"));
+ system_package = make_package(lit("sys"));
+ keyword_package = make_package(lit("keyword"));
+ user_package = make_package(lit("usr"));
+
+ /* nil can't be interned because it's not a SYM object;
+ it works as a symbol because the nil case is handled by
+ symbol-manipulating function. */
+ *gethash_l(user_package->pk.symhash, nil_string, 0) = nil;
+
+ /* t can't be interned, because gethash_l needs t in order to do its job. */
+ t = *gethash_l(user_package->pk.symhash, lit("t"), 0) = make_sym(lit("t"));
+ t->s.package = user_package;
+
+ null = intern(lit("null"), user_package);
+ cons_t = intern(lit("cons"), user_package);
+ str_t = intern(lit("str"), user_package);
+ chr_t = intern(lit("chr"), user_package);
+ num_t = intern(lit("num"), user_package);
+ sym_t = intern(lit("sym"), user_package);
+ pkg_t = intern(lit("pkg"), user_package);
+ fun_t = intern(lit("fun"), user_package);
+ vec_t = intern(lit("vec"), user_package);
+ stream_t = intern(lit("stream"), user_package);
+ hash_t = intern(lit("hash"), user_package);
+ lcons_t = intern(lit("lcons"), user_package);
+ lstr_t = intern(lit("lstr"), user_package);
+ cobj_t = intern(lit("cobj"), user_package);
+ var = intern(lit("var"), system_package);
+ regex = intern(lit("regex"), system_package);
+ set = intern(lit("set"), user_package);
+ cset = intern(lit("cset"), user_package);
+ wild = intern(lit("wild"), user_package);
+ oneplus = intern(lit("1+"), user_package);
+ zeroplus = intern(lit("0+"), user_package);
+ optional = intern(lit("?"), user_package);
+ compound = intern(lit("compound"), user_package);
+ or = intern(lit("or"), user_package);
+ quasi = intern(lit("quasi"), system_package);
+ skip = intern(lit("skip"), user_package);
+ trailer = intern(lit("trailer"), user_package);
+ block = intern(lit("block"), user_package);
+ next = intern(lit("next"), user_package);
+ freeform = intern(lit("freeform"), user_package);
+ fail = intern(lit("fail"), user_package);
+ accept = intern(lit("accept"), user_package);
+ all = intern(lit("all"), user_package);
+ some = intern(lit("some"), user_package);
+ none = intern(lit("none"), user_package);
+ maybe = intern(lit("maybe"), user_package);
+ cases = intern(lit("cases"), user_package);
+ collect = intern(lit("collect"), user_package);
+ until = intern(lit("until"), user_package);
+ coll = intern(lit("coll"), user_package);
+ define = intern(lit("define"), user_package);
+ output = intern(lit("output"), user_package);
+ single = intern(lit("single"), user_package);
+ frst = intern(lit("first"), user_package);
+ lst = intern(lit("last"), user_package);
+ empty = intern(lit("empty"), user_package);
+ repeat = intern(lit("repeat"), user_package);
+ rep = intern(lit("rep"), user_package);
+ flattn = intern(lit("flatten"), user_package);
+ forget = intern(lit("forget"), user_package);
+ local = intern(lit("local"), user_package);
+ mrge = intern(lit("merge"), user_package);
+ bind = intern(lit("bind"), user_package);
+ cat = intern(lit("cat"), user_package);
+ args = intern(lit("args"), user_package);
+ try = intern(lit("try"), user_package);
+ catch = intern(lit("catch"), user_package);
+ finally = intern(lit("finally"), user_package);
+ nothrow = intern(lit("nothrow"), user_package);
+ throw = intern(lit("throw"), user_package);
+ defex = intern(lit("defex"), user_package);
+ error = intern(lit("error"), user_package);
+ type_error = intern(lit("type_error"), user_package);
+ internal_err = intern(lit("internal_error"), user_package);
+ numeric_err = intern(lit("numeric_error"), user_package);
+ range_err = intern(lit("range_error"), user_package);
+ query_error = intern(lit("query_error"), user_package);
+ file_error = intern(lit("file_error"), user_package);
+ process_error = intern(lit("process_error"), user_package);
equal_f = func_f2(nil, equal_tramp);
identity_f = func_f1(nil, identity_tramp);
@@ -1926,8 +1994,18 @@ void obj_print(val obj, val out)
format(out, lit("~s"), obj, nao);
return;
case SYM:
+ if (obj->s.package != user_package) {
+ if (!obj->s.package)
+ put_char(out, chr('#'));
+ else if (obj->s.package != keyword_package)
+ put_string(out, obj->s.package->pk.name);
+ put_char(out, chr(':'));
+ }
put_string(out, symbol_name(obj));
return;
+ case PKG:
+ format(out, lit("#<package: ~s>"), obj->pk.name, nao);
+ return;
case FUN:
format(out, lit("#<function: f~a>"), num(obj->f.functype), nao);
return;
@@ -1995,6 +2073,9 @@ void obj_pprint(val obj, val out)
case SYM:
put_string(out, symbol_name(obj));
return;
+ case PKG:
+ format(out, lit("#<package: ~s>"), obj->pk.name, nao);
+ return;
case FUN:
format(out, lit("#<function: f~a>"), num(obj->f.functype), nao);
return;
diff --git a/lib.h b/lib.h
index 042286c7..71fd4668 100644
--- a/lib.h
+++ b/lib.h
@@ -35,7 +35,7 @@
typedef enum type {
NUM = TAG_NUM, CHR = TAG_CHR, LIT = TAG_LIT, CONS,
- STR, SYM, FUN, VEC, LCONS, LSTR, COBJ
+ STR, SYM, PKG, FUN, VEC, LCONS, LSTR, COBJ
} type_t;
typedef enum functype
@@ -69,9 +69,16 @@ struct string {
struct sym {
type_t type;
val name;
+ val package;
val val;
};
+struct package {
+ type_t type;
+ val name;
+ val symhash;
+};
+
struct func {
type_t type;
functype_t functype;
@@ -146,6 +153,7 @@ union obj {
struct cons c;
struct string st;
struct sym s;
+ struct package pk;
struct func f;
struct vec v;
struct lazy_cons lc;
@@ -182,9 +190,8 @@ inline wchar_t *litptr(val obj)
#define lit_noex(strlit) ((obj_t *) ((long) (L ## strlit) | TAG_LIT))
#define lit(strlit) lit_noex(strlit)
-extern val interned_syms;
-
-extern val t, cons_t, str_t, chr_t, num_t, sym_t, fun_t, vec_t;
+extern val keyword_package;
+extern val t, cons_t, str_t, chr_t, num_t, sym_t, pkg_t, fun_t, vec_t;
extern val stream_t, hash_t, lcons_t, lstr_t, cobj_t;
extern val var, regex, set, cset, wild, oneplus;
extern val zeroplus, optional, compound, or, quasi;
@@ -290,9 +297,12 @@ val chr_str(val str, val index);
val chr_str_set(val str, val index, val chr);
val sym_name(val sym);
val make_sym(val name);
-val intern(val str);
+val find_package(val name);
+val intern(val str, val package);
val symbolp(val sym);
val symbol_name(val sym);
+val symbol_package(val sym);
+val keywordp(val sym);
val func_f0(val, val (*fun)(val));
val func_f1(val, val (*fun)(val, val));
val func_f2(val, val (*fun)(val, val, val));
@@ -329,7 +339,7 @@ val cobj(void *handle, val cls_sym, struct cobj_ops *ops);
void cobj_print_op(val, val); /* Default function for struct cobj_ops */
val assoc(val list, val key);
val acons_new(val list, val key, val value);
-val *acons_new_l(val *list, val key);
+val *acons_new_l(val *list, val key, val *new_p);
val alist_remove(val list, val keys);
val alist_remove1(val list, val key);
val copy_cons(val cons);
diff --git a/match.c b/match.c
index 9c93136d..937d3223 100644
--- a/match.c
+++ b/match.c
@@ -131,21 +131,8 @@ void dump_var(val var, char *pfx1, size_t len1,
if (len1 >= 112 || len2 >= 112)
internal_error("too much depth in bindings");
- if (stringp(value) || chrp(value)) {
- put_string(std_output, var);
- dump_byte_string(pfx1);
- dump_byte_string(pfx2);
- put_char(std_output, chr('='));
- if (stringp(value)) {
- dump_shell_string(c_str(value));
- } else {
- wchar_t mini[2];
- mini[0] = c_chr(value);
- mini[1] = 0;
- dump_shell_string(mini);
- }
- put_char(std_output, chr('\n'));
- } else {
+
+ if (listp(value)) {
val iter;
int i;
size_t add1 = 0, add2 = 0;
@@ -161,6 +148,19 @@ void dump_var(val var, char *pfx1, size_t len1,
dump_var(var, pfx1, len1 + add1, pfx2, len2 + add2, car(iter), level + 1);
}
+ } else {
+ val ss = make_string_output_stream();
+ val str;
+
+ obj_pprint(value, ss);
+ str = get_string_from_stream(ss);
+
+ put_string(std_output, var);
+ dump_byte_string(pfx1);
+ dump_byte_string(pfx2);
+ put_char(std_output, chr('='));
+ dump_shell_string(c_str(str));
+ put_char(std_output, chr('\n'));
}
}
@@ -226,22 +226,28 @@ val map_leaf_lists(val func, val list)
return mapcar(bind2(func_n2(map_leaf_lists), func), list);
}
-val dest_bind(val bindings, val pattern, val value)
+val bindable(val obj)
{
- if (nullp(pattern))
- return bindings;
+ return (obj && symbolp(obj) && obj != t && !keywordp(obj)) ? t : nil;
+}
+val dest_bind(val bindings, val pattern, val value)
+{
if (symbolp(pattern)) {
- val existing = assoc(bindings, pattern);
- if (existing) {
- if (tree_find(value, cdr(existing)))
- return bindings;
- if (tree_find(cdr(existing), value))
- return bindings;
- debugf(lit("bind variable mismatch: ~a"), pattern, nao);
- return t;
+ if (bindable(pattern)) {
+ val existing = assoc(bindings, pattern);
+ if (existing) {
+ if (tree_find(value, cdr(existing)))
+ return bindings;
+ if (tree_find(cdr(existing), value))
+ return bindings;
+ debugf(lit("bind variable mismatch: ~a"), pattern, nao);
+ return t;
+ }
+ return cons(cons(pattern, value), bindings);
+ } else {
+ return equal(pattern, value) ? bindings : t;
}
- return cons(cons(pattern, value), bindings);
} else if (consp(pattern)) {
val piter = pattern, viter = value;
@@ -254,10 +260,12 @@ val dest_bind(val bindings, val pattern, val value)
viter = cdr(viter);
} while (consp(piter) && consp(viter));
- if (symbolp(piter)) {
+ if (bindable(piter)) {
bindings = dest_bind(bindings, piter, viter);
if (bindings == t)
return t;
+ } else {
+ return equal(piter, viter) ? bindings : t;
}
return bindings;
} else if (tree_find(value, pattern)) {
@@ -591,11 +599,11 @@ val subst_vars(val spec, val bindings)
val eval_form(val form, val bindings)
{
- if (!form)
+ if (!form) {
return cons(t, form);
- else if (symbolp(form))
+ } else if (bindable(form)) {
return assoc(bindings, form);
- else if (consp(form)) {
+ } else if (consp(form)) {
if (car(form) == quasi) {
return cons(t, cat_str(subst_vars(rest(form), bindings), nil));
} else if (regexp(car(form))) {
@@ -1307,9 +1315,9 @@ repeat_spec_same_data:
for (iter = rest(first_spec); iter; iter = rest(iter)) {
val sym = first(iter);
- if (!symbolp(sym)) {
- sem_error(spec_linenum, lit("non-symbol in flatten directive"),
- nao);
+ if (!bindable(sym)) {
+ sem_error(spec_linenum,
+ lit("flatten: ~s is not a bindable symbol"), sym, nao);
} else {
val existing = assoc(bindings, sym);
@@ -1334,8 +1342,9 @@ repeat_spec_same_data:
val args = rest(rest(first_spec));
val merged = nil;
- if (!target || !symbolp(target))
- sem_error(spec_linenum, lit("bad merge directive"), nao);
+ if (!bindable(target))
+ sem_error(spec_linenum, lit("~a: ~s is not a bindable symbol"),
+ sym, target, nao);
for (; args; args = rest(args)) {
val arg = first(args);
@@ -1344,8 +1353,8 @@ repeat_spec_same_data:
val arg_eval = eval_form(arg, bindings);
if (!arg_eval)
- sem_error(spec_linenum, lit("merge: unbound variable in form ~a"),
- arg, nao);
+ sem_error(spec_linenum, lit("~a: unbound variable in form ~s"),
+ sym, arg, nao);
if (merged)
merged = weird_merge(merged, cdr(arg_eval));
@@ -1385,8 +1394,9 @@ repeat_spec_same_data:
for (iter = rest(first_spec); iter; iter = rest(iter)) {
val sym = first(iter);
- if (!symbolp(sym)) {
- sem_error(spec_linenum, lit("non-symbol in cat directive"), nao);
+ if (!bindable(sym)) {
+ sem_error(spec_linenum,
+ lit("cat: ~s is not a bindable symbol"), sym, nao);
} else {
val existing = assoc(bindings, sym);
val sep = nil;
diff --git a/parser.l b/parser.l
index 9fd779f5..564f9730 100644
--- a/parser.l
+++ b/parser.l
@@ -91,6 +91,7 @@ void yybadtoken(int tok, const char *context)
switch (tok) {
case TEXT: problem = lit("text"); break;
case IDENT: problem = lit("identifier"); break;
+ case KEYWORD: problem = lit("keyword"); break;
case ALL: problem = lit("\"all\""); break;
case SOME: problem = lit("\"some\""); break;
case NONE: problem = lit("\"none\""); break;
@@ -166,7 +167,7 @@ static wchar_t num_esc(char *num)
%option stack
-TOK [a-zA-Z_][a-zA-Z0-9_]*|[+-]?[0-9]+
+TOK :?[a-zA-Z_][a-zA-Z0-9_]*|[+-]?[0-9]+
ID_END [^a-zA-Z0-9_]
NUM_END [^0-9]
WS [\t ]*
@@ -192,14 +193,20 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
long val;
char *errp;
- errno = 0;
-
- val = strtol(yytext, &errp, 10);
if (yy_top_state() == INITIAL
|| yy_top_state() == QSILIT)
yy_pop_state();
+ if (yytext[0] == ':') {
+ yylval.lexeme = utf8_dup_from(yytext + 1);
+ return KEYWORD;
+ }
+
+ errno = 0;
+
+ val = strtol(yytext, &errp, 10);
+
if (*errp != 0) {
/* not a number */
yylval.lexeme = utf8_dup_from(yytext);
diff --git a/parser.y b/parser.y
index 0ababe57..c69c1b33 100644
--- a/parser.y
+++ b/parser.y
@@ -53,7 +53,7 @@ static val parsed_spec;
long num;
}
-%token <lexeme> TEXT IDENT ALL SOME NONE MAYBE CASES AND OR END COLLECT
+%token <lexeme> TEXT IDENT KEYWORD ALL SOME NONE MAYBE CASES AND OR END COLLECT
%token <lexeme> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE
%token <lexeme> TRY CATCH FINALLY
%token <num> NUMBER
@@ -380,26 +380,26 @@ rep_parts_opt : SINGLE o_elems_opt2
/* This sucks, but factoring '*' into a nonterminal
* that generates an empty phrase causes reduce/reduce conflicts.
*/
-var : IDENT { $$ = list(var, intern(string_own($1)),
+var : IDENT { $$ = list(var, intern(string_own($1), nil),
nao); }
- | IDENT elem { $$ = list(var, intern(string_own($1)),
+ | IDENT elem { $$ = list(var, intern(string_own($1), nil),
$2, nao); }
- | '{' IDENT '}' { $$ = list(var, intern(string_own($2)),
+ | '{' IDENT '}' { $$ = list(var, intern(string_own($2), nil),
nao); }
- | '{' IDENT '}' elem { $$ = list(var, intern(string_own($2)),
+ | '{' IDENT '}' elem { $$ = list(var, intern(string_own($2), nil),
$4, nao); }
- | '{' IDENT regex '}' { $$ = list(var, intern(string_own($2)),
+ | '{' IDENT regex '}' { $$ = list(var, intern(string_own($2), nil),
nil, cons(regex_compile($3), $3),
nao); }
- | '{' IDENT NUMBER '}' { $$ = list(var, intern(string_own($2)),
+ | '{' IDENT NUMBER '}' { $$ = list(var, intern(string_own($2), nil),
nil, num($3), nao); }
- | var_op IDENT { $$ = list(var, intern(string_own($2)),
+ | var_op IDENT { $$ = list(var, intern(string_own($2), nil),
nil, $1, nao); }
- | var_op IDENT elem { $$ = list(var, intern(string_own($2)),
+ | var_op IDENT elem { $$ = list(var, intern(string_own($2), nil),
$3, $1, nao); }
- | var_op '{' IDENT '}' { $$ = list(var, intern(string_own($3)),
+ | var_op '{' IDENT '}' { $$ = list(var, intern(string_own($3), nil),
nil, $1, nao); }
- | var_op '{' IDENT '}' elem { $$ = list(var, intern(string_own($3)),
+ | var_op '{' IDENT '}' elem { $$ = list(var, intern(string_own($3), nil),
$5, $1, nao); }
| var_op '{' IDENT regex '}' { $$ = nil;
yyerror("longest match "
@@ -428,7 +428,9 @@ exprs : expr { $$ = cons($1, nil); }
| expr '.' expr { $$ = cons($1, $3); }
;
-expr : IDENT { $$ = intern(string_own($1)); }
+expr : IDENT { $$ = intern(string_own($1), nil); }
+ | KEYWORD { $$ = intern(string_own($1),
+ keyword_package); }
| NUMBER { $$ = num($1); }
| list { $$ = $1; }
| regex { $$ = cons(regex_compile($1), $1); }
diff --git a/regex.c b/regex.c
index 767958e8..19bbd785 100644
--- a/regex.c
+++ b/regex.c
@@ -1061,7 +1061,8 @@ val regex_compile(val regex_sexp)
val regexp(val obj)
{
- return (obj->co.type == COBJ && obj->co.cls == regex) ? t : nil;
+ return (is_ptr(obj) && obj->co.type == COBJ && obj->co.cls == regex)
+ ? t : nil;
}
nfa_t *regex_nfa(val reg)
diff --git a/stream.c b/stream.c
index b7945e42..b57a57b1 100644
--- a/stream.c
+++ b/stream.c
@@ -775,7 +775,12 @@ val vformat(val stream, val fmtstr, va_list vl)
} else {
width = digits;
}
- state = (ch == ',') ? vf_precision : vf_spec;
+ if (ch == ',') {
+ state = vf_precision;
+ } else {
+ state = vf_spec;
+ --fmt;
+ }
continue;
case vf_precision:
precision = digits;
@@ -827,7 +832,7 @@ val vformat(val stream, val fmtstr, va_list vl)
if (nump(obj)) {
value = c_num(obj);
sprintf(num_buf, "%ld", value);
- if (vformat_num(stream, num_buf, 0, 0, 0, 0))
+ if (!vformat_num(stream, num_buf, 0, 0, 0, 0))
return nil;
continue;
}
diff --git a/txr.1 b/txr.1
index c37dfff3..7b3459d8 100644
--- a/txr.1
+++ b/txr.1
@@ -508,6 +508,19 @@ If the variable is followed by a regular expression directive,
the extent is determined by finding the closest match for the
regular expression. (See Regular Expressions section below).
+.SS Special Symbols
+
+Just like in the programming language Lisp, the names nil and t cannot be used
+as variables. They always represent themselves, and have many uses, internal to
+the program as well as externally visible. The nil symbol stands for the empty
+list object, an object which marks the end of a list, and boolean false. It is
+synonymous with the syntax () which may be used interchangeably with nil in
+most constructs.
+
+Names whose names begin with the : character are keyword symbols. These also
+may not be used as variables either and stand for themselves. Keywords are
+useful for labeling information and situations.
+
.SS Consecutive Variables
If an unbound variable is followed by another unbound variable, the
@@ -1518,7 +1531,9 @@ instance
will bind the string "ab\tc" (the letter a, b, a tab character, and c)
to the variable A if A is unbound. If A is bound, this will fail unless
-A already contains an identical string.
+A already contains an identical string. However, the right hand side of
+cannot be an unbound variable, nor a complex expression that contains unbound
+variables.
The left hand side of a bind can be a nested list pattern containing variables.
The last item of a list at any nesting level can be preceded by a dot, which
@@ -1536,6 +1551,15 @@ binds H to "how", N to "now", B to "brown" and C to "cow".
The dot notation may be used at any nesting level. it must be preceded and
followed by a symbol: the forms (.) (. X) and (X .) are invalid.
+The number of items in a left pattern match must match the number of items in
+the corresponding right side object. So the pattern () only matches
+an empty list. The notation () and nil means exactly the same thing.
+
+The symbols nil, t and keyword symbols may be used on either side.
+They represent themselves. For example @(bind :foo :bar) fails,
+but @(bind :foo :foo) succeeds since the two sides denote the same
+keyword symbol object.
+
.SH BLOCKS
.SS Introduction
diff --git a/txr.c b/txr.c
index 52edb734..b6b49b9c 100644
--- a/txr.c
+++ b/txr.c
@@ -201,13 +201,15 @@ static int txr_main(int argc, char **argv)
}
list = nreverse(list);
- bindings = cons(cons(intern(string_utf8(var)), list), bindings);
+ bindings = cons(cons(intern(string_utf8(var), nil), list), bindings);
} else if (equals) {
char *pval = equals + 1;
*equals = 0;
- bindings = cons(cons(intern(string_utf8(var)), string_utf8(pval)), bindings);
+ bindings = cons(cons(intern(string_utf8(var), nil),
+ string_utf8(pval)), bindings);
} else {
- bindings = cons(cons(intern(string_utf8(var)), null_string), bindings);
+ bindings = cons(cons(intern(string_utf8(var), nil),
+ null_string), bindings);
}
argc--, argv++;