diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2009-11-21 11:12:20 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2009-11-21 11:12:20 -0800 |
commit | 4a1556a848c5bfb527cecb2b823a750ba63e6f80 (patch) | |
tree | be9378666222056692e4770a8f0eb79b45ef8993 | |
parent | 00f823aee439ed8c2cdd71dfbb89385dc68eae7b (diff) | |
download | txr-4a1556a848c5bfb527cecb2b823a750ba63e6f80.tar.gz txr-4a1556a848c5bfb527cecb2b823a750ba63e6f80.tar.bz2 txr-4a1556a848c5bfb527cecb2b823a750ba63e6f80.zip |
Introducing symbol packages. Internal symbols are now in
a system package instead of being hacked with the $ prefix.
Keyword symbols are provided. In the matcher, evaluation
is tightened up. Keywords, nil and t are not bindeable, and
errors are thrown if attempts are made to bind them.
Destructuring in dest_bind is strict in the number of items.
String streams are exploited to print bindings to objects
that are not strings or characters. Numerous bugfixes.
-rw-r--r-- | ChangeLog | 80 | ||||
-rw-r--r-- | gc.c | 7 | ||||
-rw-r--r-- | hash.c | 5 | ||||
-rw-r--r-- | hash.h | 2 | ||||
-rw-r--r-- | lib.c | 243 | ||||
-rw-r--r-- | lib.h | 22 | ||||
-rw-r--r-- | match.c | 90 | ||||
-rw-r--r-- | parser.l | 15 | ||||
-rw-r--r-- | parser.y | 26 | ||||
-rw-r--r-- | regex.c | 3 | ||||
-rw-r--r-- | stream.c | 9 | ||||
-rw-r--r-- | txr.1 | 26 | ||||
-rw-r--r-- | txr.c | 8 |
13 files changed, 382 insertions, 154 deletions
@@ -1,3 +1,83 @@ +2009-11-21 Kaz Kylheku <kkylheku@gmail.com> + + Introducing symbol packages. Internal symbols are now in + a system package instead of being hacked with the $ prefix. + Keyword symbols are provided. In the matcher, evaluation + is tightened up. Keywords, nil and t are not bindeable, and + errors are thrown if attempts are made to bind them. + Destructuring in dest_bind is strict in the number of items. + String streams are exploited to print bindings to objects + that are not strings or characters. Numerous bugfixes. + + * lib.h (enum type, type_t): new member: PKG. + (struct stym): New member: package. + (struct package): New type. + (union obj, obj_t): New member pk. + (interned_syms): Declaration removed. + (keyword_package, pkg_t): Declared. + (intern, acons_new_l): Declarations updated. + (find_package, symbol_package, keywordp): Declared. + + * lib.c (interned_syms): Definition removed. + (packages, pkg_t, system_package, keyword_package, user_package): New + global variables. + (code2type, equal, obj_pprint): Handle PKG case. + (symbol_package, make_package, find_package, keywordp): New functions. + (make_sym): Initialize package field of symbol. + (intern): Takes package argument. Rewritten using packages, + which use hash tables to store symbols. + (acons_new_l): Takes extra pointer argument to return an extra + value. + (obj_init): Updated to handle packages. The orders of some + initializations have to change. The way nil is added as a symbol is + quite different, and a special hack for the symbol t is used. + Most symbols go into the user_package, but symbols that were + previously namespaced with $ go to the system package. + (obj_print): SYM cases now considers the packge of a symbol. + Symbols in the user package are printed as before. + Symbols with no package are printed using #: notation; + keywords with : notation; and all others with their package prefix. + PKG case is handled. + + * gc.c (finalize): Handle PKG case. + (mark_obj): For SYM, mark the new package member. Handle PKG case. + + * hash.h (gethash_l): Declaration updated. + + * hash.c (ll_hash): Handle PKG case. + (gethash_l): Extra argument added to distinguish new addition + from existing find. + + * match.c (dump_var): Dumps any object now by printing to + a string with a string stream. + (bindable): New function. + (dest_bind): Tightened up to distinguish bindable symbols + from non-bindable. Symbols that stand for themselves, including nil, + can only match themselves. Destructuring matches have to + match in the number of elements: dot notation can be used + to match superfluous elements. + (eval_form): Tightened up to recognize bindable symbols. + (match_files): Various directives honor non-bindable symbols (cat, + merge, flatten). + + * parser.l (yybadtoken): Handle KEYWORD case. + (grammar): TOK can start with : . Returned as KEYWORD terminal, + with a lexeme that no longer has the : character. + + * parser.y (KEYWORD): New nonterminal. + (grammar): Calls to intern given extra parameter. + In the expr rule, KEYWORD turned into symbol in keyword package. + + * regex.c (regexp): Bugfix: dereferencing non pointer. + + * stream.c (vformat): Bugfixes in state machine: handling + of prefix digits; printing of numbers in ~s. + + * txr.c (txr_main): Intern calls updated. + + * txr.1: Updated with information about nil, t and keywords. + More details about destructuring matching in bind. + 2009-11-20 Kaz Kylheku <kkylheku@gmail.com> * unwind.c (uw_throw): If streams are not initialized, @@ -160,6 +160,7 @@ static void finalize(val obj) case NUM: case LIT: case SYM: + case PKG: case FUN: return; case VEC: @@ -216,7 +217,11 @@ tail_call: return; case SYM: mark_obj(obj->s.name); - mark_obj_tail(obj->s.val); + mark_obj(obj->s.val); + mark_obj_tail(obj->s.package); + case PKG: + mark_obj(obj->pk.name); + mark_obj_tail(obj->pk.symhash); case FUN: mark_obj(obj->f.env); if (obj->f.functype == FINTERP) @@ -93,6 +93,7 @@ static long ll_hash(val obj) case NUM: return c_num(obj) & NUM_MAX; case SYM: + case PKG: return ((long) obj) & NUM_MAX; case FUN: return ((long) obj->f.f.interp_fun + ll_hash(obj->f.env)) & NUM_MAX; @@ -238,12 +239,12 @@ val make_hash(val weak_keys, val weak_vals) return hash; } -val *gethash_l(val hash, val key) +val *gethash_l(val hash, val key, val *new_p) { struct hash *h = (struct hash *) hash->co.handle; val *pchain = vecref_l(h->table, num(ll_hash(key) % h->modulus)); val old = *pchain; - val *place = acons_new_l(pchain, key); + val *place = acons_new_l(pchain, key, new_p); if (old != *pchain && ++h->count > 2 * h->modulus) hash_grow(h); return place; @@ -26,7 +26,7 @@ val hash_obj(val); val make_hash(val weak_keys, val weak_vals); -val *gethash_l(val hash, val key); +val *gethash_l(val hash, val key, val *new_p); val gethash(val hash, val key); val remhash(val hash, val key); void hash_process_weak(void); @@ -44,9 +44,11 @@ #define max(a, b) ((a) > (b) ? (a) : (b)) #define min(a, b) ((a) < (b) ? (a) : (b)) -val interned_syms; +val packages; -val null, t, cons_t, str_t, chr_t, num_t, sym_t, fun_t, vec_t; +val system_package, keyword_package, user_package; + +val null, t, cons_t, str_t, chr_t, num_t, sym_t, pkg_t, fun_t, vec_t; val stream_t, hash_t, lcons_t, lstr_t, cobj_t; val var, regex, set, cset, wild, oneplus; val zeroplus, optional, compound, or, quasi; @@ -93,6 +95,7 @@ static val code2type(int code) case CHR: return chr_t; case NUM: return num_t; case SYM: return sym_t; + case PKG: return pkg_t; case FUN: return fun_t; case VEC: return vec_t; case LCONS: return lcons_t; @@ -461,6 +464,7 @@ val equal(val left, val right) } return nil; case SYM: + case PKG: return right == left ? t : nil; case FUN: if (type(right) == FUN && @@ -1058,21 +1062,66 @@ val symbol_name(val sym) return sym ? sym->s.name : nil_string; } +val symbol_package(val sym) +{ + if (sym == nil) + return user_package; + type_check(sym, SYM); + return sym->s.package; +} + val make_sym(val name) { val obj = make_obj(); obj->s.type = SYM; obj->s.name = name; + obj->s.package = nil; obj->s.val = nil; return obj; } -val intern(val str) +val make_package(val name) +{ + if (find_package(name)) + uw_throwf(error, lit("make_package: ~a exists already"), name, nao); + + val obj = make_obj(); + obj->pk.type = PKG; + obj->pk.name = name; + obj->pk.symhash = make_hash(nil, nil); + + push(cons(name, obj), &packages); + return obj; +} + +val find_package(val name) +{ + return cdr(assoc(packages, name)); +} + +val intern(val str, val package) { - val *place = gethash_l(interned_syms, str); - if (*place) + val new_p; + + if (nullp(package)) { + package = user_package; + } else if (stringp(package)) { + package = find_package(str); + if (!package) + uw_throwf(error, lit("make_package: ~a exists already"), str, nao); + } + + type_check (package, PKG); + + val *place = gethash_l(package->pk.symhash, str, &new_p); + + if (!new_p) { return *place; - return *place = make_sym(str); + } else { + val newsym = make_sym(str); + newsym->s.package = package; + return *place = newsym; + } } val symbolp(val sym) @@ -1080,6 +1129,11 @@ val symbolp(val sym) return (sym == nil || (is_ptr(sym) && sym->s.type == SYM)) ? t : nil; } +val keywordp(val sym) +{ + return (symbolp(sym) && symbol_package(sym) == keyword_package) ? t : nil; +} + val func_f0(val env, val (*fun)(val)) { val obj = make_obj(); @@ -1594,15 +1648,19 @@ val acons_new(val list, val key, val value) } } -val *acons_new_l(val *list, val key) +val *acons_new_l(val *list, val key, val *new_p) { val existing = assoc(*list, key); if (existing) { + if (new_p) + *new_p = nil; return cdr_l(existing); } else { val new = cons(key, nil); *list = cons(new, *list); + if (new_p) + *new_p = t; return cdr_l(new); } } @@ -1746,7 +1804,8 @@ static void obj_init(void) * symbols. */ - protect(&interned_syms, &zero, &one, + protect(&packages, &system_package, &keyword_package, + &user_package, &zero, &one, &two, &negone, &maxint, &minint, &null_string, &nil_string, &null_list, &equal_f, @@ -1764,79 +1823,88 @@ static void obj_init(void) maxint = num(NUM_MAX); minint = num(NUM_MIN); - interned_syms = make_hash(nil, nil); - - *gethash_l(interned_syms, nil_string) = nil; - - null = intern(lit("null")); - t = intern(lit("t")); - cons_t = intern(lit("cons")); - str_t = intern(lit("str")); - chr_t = intern(lit("chr")); - num_t = intern(lit("num")); - sym_t = intern(lit("sym")); - fun_t = intern(lit("fun")); - vec_t = intern(lit("vec")); - stream_t = intern(lit("stream")); - hash_t = intern(lit("hash")); - lcons_t = intern(lit("lcons")); - lstr_t = intern(lit("lstr")); - cobj_t = intern(lit("cobj")); - var = intern(lit("$var")); - regex = intern(lit("$regex")); - set = intern(lit("set")); - cset = intern(lit("cset")); - wild = intern(lit("wild")); - oneplus = intern(lit("1+")); - zeroplus = intern(lit("0+")); - optional = intern(lit("?")); - compound = intern(lit("compound")); - or = intern(lit("or")); - quasi = intern(lit("$quasi")); - skip = intern(lit("skip")); - trailer = intern(lit("trailer")); - block = intern(lit("block")); - next = intern(lit("next")); - freeform = intern(lit("freeform")); - fail = intern(lit("fail")); - accept = intern(lit("accept")); - all = intern(lit("all")); - some = intern(lit("some")); - none = intern(lit("none")); - maybe = intern(lit("maybe")); - cases = intern(lit("cases")); - collect = intern(lit("collect")); - until = intern(lit("until")); - coll = intern(lit("coll")); - define = intern(lit("define")); - output = intern(lit("output")); - single = intern(lit("single")); - frst = intern(lit("first")); - lst = intern(lit("last")); - empty = intern(lit("empty")); - repeat = intern(lit("repeat")); - rep = intern(lit("rep")); - flattn = intern(lit("flatten")); - forget = intern(lit("forget")); - local = intern(lit("local")); - mrge = intern(lit("merge")); - bind = intern(lit("bind")); - cat = intern(lit("cat")); - args = intern(lit("args")); - try = intern(lit("try")); - catch = intern(lit("catch")); - finally = intern(lit("finally")); - nothrow = intern(lit("nothrow")); - throw = intern(lit("throw")); - defex = intern(lit("defex")); - error = intern(lit("error")); - type_error = intern(lit("type_error")); - internal_err = intern(lit("internal_error")); - numeric_err = intern(lit("numeric_error")); - range_err = intern(lit("range_error")); - query_error = intern(lit("query_error")); - file_error = intern(lit("file_error")); - process_error = intern(lit("process_error")); + system_package = make_package(lit("sys")); + keyword_package = make_package(lit("keyword")); + user_package = make_package(lit("usr")); + + /* nil can't be interned because it's not a SYM object; + it works as a symbol because the nil case is handled by + symbol-manipulating function. */ + *gethash_l(user_package->pk.symhash, nil_string, 0) = nil; + + /* t can't be interned, because gethash_l needs t in order to do its job. */ + t = *gethash_l(user_package->pk.symhash, lit("t"), 0) = make_sym(lit("t")); + t->s.package = user_package; + + null = intern(lit("null"), user_package); + cons_t = intern(lit("cons"), user_package); + str_t = intern(lit("str"), user_package); + chr_t = intern(lit("chr"), user_package); + num_t = intern(lit("num"), user_package); + sym_t = intern(lit("sym"), user_package); + pkg_t = intern(lit("pkg"), user_package); + fun_t = intern(lit("fun"), user_package); + vec_t = intern(lit("vec"), user_package); + stream_t = intern(lit("stream"), user_package); + hash_t = intern(lit("hash"), user_package); + lcons_t = intern(lit("lcons"), user_package); + lstr_t = intern(lit("lstr"), user_package); + cobj_t = intern(lit("cobj"), user_package); + var = intern(lit("var"), system_package); + regex = intern(lit("regex"), system_package); + set = intern(lit("set"), user_package); + cset = intern(lit("cset"), user_package); + wild = intern(lit("wild"), user_package); + oneplus = intern(lit("1+"), user_package); + zeroplus = intern(lit("0+"), user_package); + optional = intern(lit("?"), user_package); + compound = intern(lit("compound"), user_package); + or = intern(lit("or"), user_package); + quasi = intern(lit("quasi"), system_package); + skip = intern(lit("skip"), user_package); + trailer = intern(lit("trailer"), user_package); + block = intern(lit("block"), user_package); + next = intern(lit("next"), user_package); + freeform = intern(lit("freeform"), user_package); + fail = intern(lit("fail"), user_package); + accept = intern(lit("accept"), user_package); + all = intern(lit("all"), user_package); + some = intern(lit("some"), user_package); + none = intern(lit("none"), user_package); + maybe = intern(lit("maybe"), user_package); + cases = intern(lit("cases"), user_package); + collect = intern(lit("collect"), user_package); + until = intern(lit("until"), user_package); + coll = intern(lit("coll"), user_package); + define = intern(lit("define"), user_package); + output = intern(lit("output"), user_package); + single = intern(lit("single"), user_package); + frst = intern(lit("first"), user_package); + lst = intern(lit("last"), user_package); + empty = intern(lit("empty"), user_package); + repeat = intern(lit("repeat"), user_package); + rep = intern(lit("rep"), user_package); + flattn = intern(lit("flatten"), user_package); + forget = intern(lit("forget"), user_package); + local = intern(lit("local"), user_package); + mrge = intern(lit("merge"), user_package); + bind = intern(lit("bind"), user_package); + cat = intern(lit("cat"), user_package); + args = intern(lit("args"), user_package); + try = intern(lit("try"), user_package); + catch = intern(lit("catch"), user_package); + finally = intern(lit("finally"), user_package); + nothrow = intern(lit("nothrow"), user_package); + throw = intern(lit("throw"), user_package); + defex = intern(lit("defex"), user_package); + error = intern(lit("error"), user_package); + type_error = intern(lit("type_error"), user_package); + internal_err = intern(lit("internal_error"), user_package); + numeric_err = intern(lit("numeric_error"), user_package); + range_err = intern(lit("range_error"), user_package); + query_error = intern(lit("query_error"), user_package); + file_error = intern(lit("file_error"), user_package); + process_error = intern(lit("process_error"), user_package); equal_f = func_f2(nil, equal_tramp); identity_f = func_f1(nil, identity_tramp); @@ -1926,8 +1994,18 @@ void obj_print(val obj, val out) format(out, lit("~s"), obj, nao); return; case SYM: + if (obj->s.package != user_package) { + if (!obj->s.package) + put_char(out, chr('#')); + else if (obj->s.package != keyword_package) + put_string(out, obj->s.package->pk.name); + put_char(out, chr(':')); + } put_string(out, symbol_name(obj)); return; + case PKG: + format(out, lit("#<package: ~s>"), obj->pk.name, nao); + return; case FUN: format(out, lit("#<function: f~a>"), num(obj->f.functype), nao); return; @@ -1995,6 +2073,9 @@ void obj_pprint(val obj, val out) case SYM: put_string(out, symbol_name(obj)); return; + case PKG: + format(out, lit("#<package: ~s>"), obj->pk.name, nao); + return; case FUN: format(out, lit("#<function: f~a>"), num(obj->f.functype), nao); return; @@ -35,7 +35,7 @@ typedef enum type { NUM = TAG_NUM, CHR = TAG_CHR, LIT = TAG_LIT, CONS, - STR, SYM, FUN, VEC, LCONS, LSTR, COBJ + STR, SYM, PKG, FUN, VEC, LCONS, LSTR, COBJ } type_t; typedef enum functype @@ -69,9 +69,16 @@ struct string { struct sym { type_t type; val name; + val package; val val; }; +struct package { + type_t type; + val name; + val symhash; +}; + struct func { type_t type; functype_t functype; @@ -146,6 +153,7 @@ union obj { struct cons c; struct string st; struct sym s; + struct package pk; struct func f; struct vec v; struct lazy_cons lc; @@ -182,9 +190,8 @@ inline wchar_t *litptr(val obj) #define lit_noex(strlit) ((obj_t *) ((long) (L ## strlit) | TAG_LIT)) #define lit(strlit) lit_noex(strlit) -extern val interned_syms; - -extern val t, cons_t, str_t, chr_t, num_t, sym_t, fun_t, vec_t; +extern val keyword_package; +extern val t, cons_t, str_t, chr_t, num_t, sym_t, pkg_t, fun_t, vec_t; extern val stream_t, hash_t, lcons_t, lstr_t, cobj_t; extern val var, regex, set, cset, wild, oneplus; extern val zeroplus, optional, compound, or, quasi; @@ -290,9 +297,12 @@ val chr_str(val str, val index); val chr_str_set(val str, val index, val chr); val sym_name(val sym); val make_sym(val name); -val intern(val str); +val find_package(val name); +val intern(val str, val package); val symbolp(val sym); val symbol_name(val sym); +val symbol_package(val sym); +val keywordp(val sym); val func_f0(val, val (*fun)(val)); val func_f1(val, val (*fun)(val, val)); val func_f2(val, val (*fun)(val, val, val)); @@ -329,7 +339,7 @@ val cobj(void *handle, val cls_sym, struct cobj_ops *ops); void cobj_print_op(val, val); /* Default function for struct cobj_ops */ val assoc(val list, val key); val acons_new(val list, val key, val value); -val *acons_new_l(val *list, val key); +val *acons_new_l(val *list, val key, val *new_p); val alist_remove(val list, val keys); val alist_remove1(val list, val key); val copy_cons(val cons); @@ -131,21 +131,8 @@ void dump_var(val var, char *pfx1, size_t len1, if (len1 >= 112 || len2 >= 112) internal_error("too much depth in bindings"); - if (stringp(value) || chrp(value)) { - put_string(std_output, var); - dump_byte_string(pfx1); - dump_byte_string(pfx2); - put_char(std_output, chr('=')); - if (stringp(value)) { - dump_shell_string(c_str(value)); - } else { - wchar_t mini[2]; - mini[0] = c_chr(value); - mini[1] = 0; - dump_shell_string(mini); - } - put_char(std_output, chr('\n')); - } else { + + if (listp(value)) { val iter; int i; size_t add1 = 0, add2 = 0; @@ -161,6 +148,19 @@ void dump_var(val var, char *pfx1, size_t len1, dump_var(var, pfx1, len1 + add1, pfx2, len2 + add2, car(iter), level + 1); } + } else { + val ss = make_string_output_stream(); + val str; + + obj_pprint(value, ss); + str = get_string_from_stream(ss); + + put_string(std_output, var); + dump_byte_string(pfx1); + dump_byte_string(pfx2); + put_char(std_output, chr('=')); + dump_shell_string(c_str(str)); + put_char(std_output, chr('\n')); } } @@ -226,22 +226,28 @@ val map_leaf_lists(val func, val list) return mapcar(bind2(func_n2(map_leaf_lists), func), list); } -val dest_bind(val bindings, val pattern, val value) +val bindable(val obj) { - if (nullp(pattern)) - return bindings; + return (obj && symbolp(obj) && obj != t && !keywordp(obj)) ? t : nil; +} +val dest_bind(val bindings, val pattern, val value) +{ if (symbolp(pattern)) { - val existing = assoc(bindings, pattern); - if (existing) { - if (tree_find(value, cdr(existing))) - return bindings; - if (tree_find(cdr(existing), value)) - return bindings; - debugf(lit("bind variable mismatch: ~a"), pattern, nao); - return t; + if (bindable(pattern)) { + val existing = assoc(bindings, pattern); + if (existing) { + if (tree_find(value, cdr(existing))) + return bindings; + if (tree_find(cdr(existing), value)) + return bindings; + debugf(lit("bind variable mismatch: ~a"), pattern, nao); + return t; + } + return cons(cons(pattern, value), bindings); + } else { + return equal(pattern, value) ? bindings : t; } - return cons(cons(pattern, value), bindings); } else if (consp(pattern)) { val piter = pattern, viter = value; @@ -254,10 +260,12 @@ val dest_bind(val bindings, val pattern, val value) viter = cdr(viter); } while (consp(piter) && consp(viter)); - if (symbolp(piter)) { + if (bindable(piter)) { bindings = dest_bind(bindings, piter, viter); if (bindings == t) return t; + } else { + return equal(piter, viter) ? bindings : t; } return bindings; } else if (tree_find(value, pattern)) { @@ -591,11 +599,11 @@ val subst_vars(val spec, val bindings) val eval_form(val form, val bindings) { - if (!form) + if (!form) { return cons(t, form); - else if (symbolp(form)) + } else if (bindable(form)) { return assoc(bindings, form); - else if (consp(form)) { + } else if (consp(form)) { if (car(form) == quasi) { return cons(t, cat_str(subst_vars(rest(form), bindings), nil)); } else if (regexp(car(form))) { @@ -1307,9 +1315,9 @@ repeat_spec_same_data: for (iter = rest(first_spec); iter; iter = rest(iter)) { val sym = first(iter); - if (!symbolp(sym)) { - sem_error(spec_linenum, lit("non-symbol in flatten directive"), - nao); + if (!bindable(sym)) { + sem_error(spec_linenum, + lit("flatten: ~s is not a bindable symbol"), sym, nao); } else { val existing = assoc(bindings, sym); @@ -1334,8 +1342,9 @@ repeat_spec_same_data: val args = rest(rest(first_spec)); val merged = nil; - if (!target || !symbolp(target)) - sem_error(spec_linenum, lit("bad merge directive"), nao); + if (!bindable(target)) + sem_error(spec_linenum, lit("~a: ~s is not a bindable symbol"), + sym, target, nao); for (; args; args = rest(args)) { val arg = first(args); @@ -1344,8 +1353,8 @@ repeat_spec_same_data: val arg_eval = eval_form(arg, bindings); if (!arg_eval) - sem_error(spec_linenum, lit("merge: unbound variable in form ~a"), - arg, nao); + sem_error(spec_linenum, lit("~a: unbound variable in form ~s"), + sym, arg, nao); if (merged) merged = weird_merge(merged, cdr(arg_eval)); @@ -1385,8 +1394,9 @@ repeat_spec_same_data: for (iter = rest(first_spec); iter; iter = rest(iter)) { val sym = first(iter); - if (!symbolp(sym)) { - sem_error(spec_linenum, lit("non-symbol in cat directive"), nao); + if (!bindable(sym)) { + sem_error(spec_linenum, + lit("cat: ~s is not a bindable symbol"), sym, nao); } else { val existing = assoc(bindings, sym); val sep = nil; @@ -91,6 +91,7 @@ void yybadtoken(int tok, const char *context) switch (tok) { case TEXT: problem = lit("text"); break; case IDENT: problem = lit("identifier"); break; + case KEYWORD: problem = lit("keyword"); break; case ALL: problem = lit("\"all\""); break; case SOME: problem = lit("\"some\""); break; case NONE: problem = lit("\"none\""); break; @@ -166,7 +167,7 @@ static wchar_t num_esc(char *num) %option stack -TOK [a-zA-Z_][a-zA-Z0-9_]*|[+-]?[0-9]+ +TOK :?[a-zA-Z_][a-zA-Z0-9_]*|[+-]?[0-9]+ ID_END [^a-zA-Z0-9_] NUM_END [^0-9] WS [\t ]* @@ -192,14 +193,20 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} long val; char *errp; - errno = 0; - - val = strtol(yytext, &errp, 10); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); + if (yytext[0] == ':') { + yylval.lexeme = utf8_dup_from(yytext + 1); + return KEYWORD; + } + + errno = 0; + + val = strtol(yytext, &errp, 10); + if (*errp != 0) { /* not a number */ yylval.lexeme = utf8_dup_from(yytext); @@ -53,7 +53,7 @@ static val parsed_spec; long num; } -%token <lexeme> TEXT IDENT ALL SOME NONE MAYBE CASES AND OR END COLLECT +%token <lexeme> TEXT IDENT KEYWORD ALL SOME NONE MAYBE CASES AND OR END COLLECT %token <lexeme> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE %token <lexeme> TRY CATCH FINALLY %token <num> NUMBER @@ -380,26 +380,26 @@ rep_parts_opt : SINGLE o_elems_opt2 /* This sucks, but factoring '*' into a nonterminal * that generates an empty phrase causes reduce/reduce conflicts. */ -var : IDENT { $$ = list(var, intern(string_own($1)), +var : IDENT { $$ = list(var, intern(string_own($1), nil), nao); } - | IDENT elem { $$ = list(var, intern(string_own($1)), + | IDENT elem { $$ = list(var, intern(string_own($1), nil), $2, nao); } - | '{' IDENT '}' { $$ = list(var, intern(string_own($2)), + | '{' IDENT '}' { $$ = list(var, intern(string_own($2), nil), nao); } - | '{' IDENT '}' elem { $$ = list(var, intern(string_own($2)), + | '{' IDENT '}' elem { $$ = list(var, intern(string_own($2), nil), $4, nao); } - | '{' IDENT regex '}' { $$ = list(var, intern(string_own($2)), + | '{' IDENT regex '}' { $$ = list(var, intern(string_own($2), nil), nil, cons(regex_compile($3), $3), nao); } - | '{' IDENT NUMBER '}' { $$ = list(var, intern(string_own($2)), + | '{' IDENT NUMBER '}' { $$ = list(var, intern(string_own($2), nil), nil, num($3), nao); } - | var_op IDENT { $$ = list(var, intern(string_own($2)), + | var_op IDENT { $$ = list(var, intern(string_own($2), nil), nil, $1, nao); } - | var_op IDENT elem { $$ = list(var, intern(string_own($2)), + | var_op IDENT elem { $$ = list(var, intern(string_own($2), nil), $3, $1, nao); } - | var_op '{' IDENT '}' { $$ = list(var, intern(string_own($3)), + | var_op '{' IDENT '}' { $$ = list(var, intern(string_own($3), nil), nil, $1, nao); } - | var_op '{' IDENT '}' elem { $$ = list(var, intern(string_own($3)), + | var_op '{' IDENT '}' elem { $$ = list(var, intern(string_own($3), nil), $5, $1, nao); } | var_op '{' IDENT regex '}' { $$ = nil; yyerror("longest match " @@ -428,7 +428,9 @@ exprs : expr { $$ = cons($1, nil); } | expr '.' expr { $$ = cons($1, $3); } ; -expr : IDENT { $$ = intern(string_own($1)); } +expr : IDENT { $$ = intern(string_own($1), nil); } + | KEYWORD { $$ = intern(string_own($1), + keyword_package); } | NUMBER { $$ = num($1); } | list { $$ = $1; } | regex { $$ = cons(regex_compile($1), $1); } @@ -1061,7 +1061,8 @@ val regex_compile(val regex_sexp) val regexp(val obj) { - return (obj->co.type == COBJ && obj->co.cls == regex) ? t : nil; + return (is_ptr(obj) && obj->co.type == COBJ && obj->co.cls == regex) + ? t : nil; } nfa_t *regex_nfa(val reg) @@ -775,7 +775,12 @@ val vformat(val stream, val fmtstr, va_list vl) } else { width = digits; } - state = (ch == ',') ? vf_precision : vf_spec; + if (ch == ',') { + state = vf_precision; + } else { + state = vf_spec; + --fmt; + } continue; case vf_precision: precision = digits; @@ -827,7 +832,7 @@ val vformat(val stream, val fmtstr, va_list vl) if (nump(obj)) { value = c_num(obj); sprintf(num_buf, "%ld", value); - if (vformat_num(stream, num_buf, 0, 0, 0, 0)) + if (!vformat_num(stream, num_buf, 0, 0, 0, 0)) return nil; continue; } @@ -508,6 +508,19 @@ If the variable is followed by a regular expression directive, the extent is determined by finding the closest match for the regular expression. (See Regular Expressions section below). +.SS Special Symbols + +Just like in the programming language Lisp, the names nil and t cannot be used +as variables. They always represent themselves, and have many uses, internal to +the program as well as externally visible. The nil symbol stands for the empty +list object, an object which marks the end of a list, and boolean false. It is +synonymous with the syntax () which may be used interchangeably with nil in +most constructs. + +Names whose names begin with the : character are keyword symbols. These also +may not be used as variables either and stand for themselves. Keywords are +useful for labeling information and situations. + .SS Consecutive Variables If an unbound variable is followed by another unbound variable, the @@ -1518,7 +1531,9 @@ instance will bind the string "ab\tc" (the letter a, b, a tab character, and c) to the variable A if A is unbound. If A is bound, this will fail unless -A already contains an identical string. +A already contains an identical string. However, the right hand side of +cannot be an unbound variable, nor a complex expression that contains unbound +variables. The left hand side of a bind can be a nested list pattern containing variables. The last item of a list at any nesting level can be preceded by a dot, which @@ -1536,6 +1551,15 @@ binds H to "how", N to "now", B to "brown" and C to "cow". The dot notation may be used at any nesting level. it must be preceded and followed by a symbol: the forms (.) (. X) and (X .) are invalid. +The number of items in a left pattern match must match the number of items in +the corresponding right side object. So the pattern () only matches +an empty list. The notation () and nil means exactly the same thing. + +The symbols nil, t and keyword symbols may be used on either side. +They represent themselves. For example @(bind :foo :bar) fails, +but @(bind :foo :foo) succeeds since the two sides denote the same +keyword symbol object. + .SH BLOCKS .SS Introduction @@ -201,13 +201,15 @@ static int txr_main(int argc, char **argv) } list = nreverse(list); - bindings = cons(cons(intern(string_utf8(var)), list), bindings); + bindings = cons(cons(intern(string_utf8(var), nil), list), bindings); } else if (equals) { char *pval = equals + 1; *equals = 0; - bindings = cons(cons(intern(string_utf8(var)), string_utf8(pval)), bindings); + bindings = cons(cons(intern(string_utf8(var), nil), + string_utf8(pval)), bindings); } else { - bindings = cons(cons(intern(string_utf8(var)), null_string), bindings); + bindings = cons(cons(intern(string_utf8(var), nil), + null_string), bindings); } argc--, argv++; |