diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2011-09-26 11:20:08 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2011-09-26 11:20:08 -0700 |
commit | f3fe0151fc4116f0fec40e7454d8866f11908560 (patch) | |
tree | af1a4cee2e429f295b26587ee55463917860b341 | |
parent | d8e2644d5ebb3783660b2c9cb7e1d09dac2230f1 (diff) | |
download | txr-f3fe0151fc4116f0fec40e7454d8866f11908560.tar.gz txr-f3fe0151fc4116f0fec40e7454d8866f11908560.tar.bz2 txr-f3fe0151fc4116f0fec40e7454d8866f11908560.zip |
Trie compression. Hash table iteration.
Bugfix in typeof.
* filter.c (trie_compress): New function.
(trie_value_at, trie_lookup_feed_char, filter_string): Handle cons cell
nodes in trie.
(build_filter): Call trie_compress.
* gc.c (cobj_destroy_op): Function renamed to cobj_destroy_stub_op
since it doesn't do anything.
(cobj_destroy_free_op): New function.
* hash.c (struct hash_iter): New type.
(hash_destroy): Function removed.
(hash_ops): Reference to hash_destroy replaced with
cobj_destroy_free_op.
(hash_count, hash_iter_mark, hash_begin, hash_next): New functions.
(hash_iter_ops): New static structure.
* hash.h (hash_count, hash_begin, hash_next): New functions declared.
* lib.c (hash_iter_s): New symbol variable.
(typeof): Bugfix: TAG_LIT type tag not handled.
(vecref): New function.
(obj_init): Initialize hash_iter_s.
* lib.h (cobj_destroy_op): Declaration renamed.
(cobj_destroy_free_op, vecref): New functions declared.
(hash_iter_s): New variable declared.
* stream.c (string_in_ops, byte_in_ops): cobj_destroy_op
renamed to cobj_destroy_stub_op.
-rw-r--r-- | ChangeLog | 30 | ||||
-rw-r--r-- | filter.c | 55 | ||||
-rw-r--r-- | gc.c | 7 | ||||
-rw-r--r-- | hash.c | 60 | ||||
-rw-r--r-- | hash.h | 3 | ||||
-rw-r--r-- | lib.c | 11 | ||||
-rw-r--r-- | lib.h | 6 | ||||
-rw-r--r-- | stream.c | 4 |
8 files changed, 160 insertions, 16 deletions
@@ -1,3 +1,33 @@ +2011-09-26 Kaz Kylheku <kaz@kylheku.com> + + Trie compression. Hash table iteration. + Bugfix in typeof. + + * filter.c (trie_compress): New function. + (trie_value_at, trie_lookup_feed_char, filter_string): Handle cons cell + nodes in trie. + (build_filter): Call trie_compress. + + * gc.c (cobj_destroy_op): Function renamed to cobj_destroy_stub_op + since it doesn't do anything. + (cobj_destroy_free_op): New function. + * hash.c (struct hash_iter): New type. + (hash_destroy): Function removed. + (hash_ops): Reference to hash_destroy replaced with + cobj_destroy_free_op. + (hash_count, hash_iter_mark, hash_begin, hash_next): New functions. + (hash_iter_ops): New static structure. + * hash.h (hash_count, hash_begin, hash_next): New functions declared. + * lib.c (hash_iter_s): New symbol variable. + (typeof): Bugfix: TAG_LIT type tag not handled. + (vecref): New function. + (obj_init): Initialize hash_iter_s. + * lib.h (cobj_destroy_op): Declaration renamed. + (cobj_destroy_free_op, vecref): New functions declared. + (hash_iter_s): New variable declared. + * stream.c (string_in_ops, byte_in_ops): cobj_destroy_op + renamed to cobj_destroy_stub_op. + 2011-09-25 Kaz Kylheku <kaz@kylheku.com> Filtering from HTML implemented. @@ -54,6 +54,44 @@ static val trie_add(val trie, val key, val value) return node; } +/* + * Reduce the storage requirement for a the trie, by applying + * these rules: + * + * 1. All leaf-level nodes (i.e. empty hash tables) are replaced by + * just the node value (hash user data). + * 2. All hash tables with a single transition (i.e. hash tables + * containing one element) and which have no node value + * are replaced by a cons cell, whose CAR is the transition + * character, and whose CDR is the transition. + */ + +static void trie_compress(val *ptrie) +{ + val trie = *ptrie; + + if (hashp(trie)) { + val count = hash_count(trie); + val value = get_hash_userdata(trie); + + if (zerop(count)) { + *ptrie = value; + } else if (eq(count, one) && nullp(value)) { + val iter = hash_begin(trie); + val cell = hash_next(&iter); + *ptrie = cons(car(cell), cdr(cell)); + trie_compress(cdr_l(*ptrie)); + } else { + val cell, iter = hash_begin(trie); + + for (cell = hash_next(&iter); iter; cell = hash_next(&iter)) + trie_compress(cdr_l(cell)); + } + } else if (consp(trie)) { + trie_compress(cdr_l(trie)); + } +} + val trie_lookup_begin(val trie) { return trie; @@ -61,12 +99,20 @@ val trie_lookup_begin(val trie) val trie_value_at(val node) { - return get_hash_userdata(node); + if (hashp(node)) + return get_hash_userdata(node); + if (consp(node)) + return nil; + return node; } val trie_lookup_feed_char(val node, val ch) { - return gethash(node, ch); + if (hashp(node)) + return gethash(node, ch); + if (consp(node) && eq(ch,car(node))) + return cdr(node); + return nil; } val get_filter_trie(val sym) @@ -86,6 +132,7 @@ static val build_filter(struct filter_pair *pair) for (i = 0; pair[i].key; i++) trie_add(trie, static_str(pair[i].key), static_str(pair[i].value)); + trie_compress(&trie); return trie; } @@ -133,9 +180,9 @@ val filter_string(val filter, val str) { val type = typeof(filter); - if (type == null) + if (eq(type, null)) return str; - if (type == hash_s) + if (eq(type, hash_s) || eq(type, cons_s)) return trie_filter_string(filter, str); else if (type == fun_s) return funcall1(filter, str); @@ -201,10 +201,15 @@ static void finalize(val obj) assert (0 && "corrupt type field"); } -void cobj_destroy_op(val obj) +void cobj_destroy_stub_op(val obj) { } +void cobj_destroy_free_op(val obj) +{ + free(obj->co.handle); +} + static void mark_obj(val obj) { type_t t; @@ -54,6 +54,12 @@ struct hash { val userdata; }; +struct hash_iter { + val hash; + cnum chain; + val cons; +}; + /* * Dynamic list built up during gc. */ @@ -132,11 +138,6 @@ val hash_obj(val obj) return num(ll_hash(obj)); } -static void hash_destroy(val hash) -{ - free(hash->co.handle); -} - static void hash_mark(val hash) { struct hash *h = (struct hash *) hash->co.handle; @@ -190,7 +191,7 @@ static void hash_mark(val hash) static struct cobj_ops hash_ops = { cobj_equal_op, cobj_print_op, - hash_destroy, + cobj_destroy_free_op, hash_mark, cobj_hash_op }; @@ -287,6 +288,12 @@ val remhash(val hash, val key) return nil; } +val hash_count(val hash) +{ + struct hash *h = (struct hash *) hash->co.handle; + return num(h->count); +} + val get_hash_userdata(val hash) { struct hash *h = (struct hash *) hash->co.handle; @@ -306,6 +313,47 @@ val hashp(val obj) return typeof(obj) == hash_s ? t : nil; } +static void hash_iter_mark(val hash_iter) +{ + struct hash_iter *hi = (struct hash_iter *) hash_iter->co.handle; + gc_mark(hi->hash); + gc_mark(hi->cons); +} + +static struct cobj_ops hash_iter_ops = { + cobj_equal_op, + cobj_print_op, + cobj_destroy_free_op, + hash_iter_mark, + cobj_hash_op +}; + +val hash_begin(val hash) +{ + struct hash_iter *hi = (struct hash_iter *) chk_malloc(sizeof *hi); + hi->hash = hash; + hi->chain = -1; + hi->cons = nil; + return cobj((mem_t *) hi, hash_iter_s, &hash_iter_ops); +} + +val hash_next(val *iter) +{ + struct hash_iter *hi = (struct hash_iter *) (*iter)->co.handle; + val hash = hi->hash; + struct hash *h = (struct hash *) hash->co.handle; + if (hi->cons) + hi->cons = cdr(hi->cons); + while (nullp(hi->cons)) { + if (++hi->chain >= h->modulus) { + *iter = nil; + return nil; + } + hi->cons = vecref(h->table, num(hi->chain)); + } + return car(hi->cons); +} + /* * Called from garbage collector. Hash module must process all weak tables * that were visited during the marking phase, maintained in the list @@ -31,9 +31,12 @@ val gethash(val hash, val key); val gethash_f(val hash, val key, val *found); val sethash(val hash, val key, val value); val remhash(val hash, val key); +val hash_count(val hash); val get_hash_userdata(val hash); val set_hash_userdata(val hash, val data); val hashp(val obj); +val hash_begin(val hash); +val hash_next(val *iter); void hash_process_weak(void); void hash_init(void); @@ -51,7 +51,7 @@ val packages; val system_package, keyword_package, user_package; val null, t, cons_s, str_s, chr_s, num_s, sym_s, pkg_s, fun_s, vec_s; -val stream_s, hash_s, lcons_s, lstr_s, cobj_s; +val stream_s, hash_s, hash_iter_s, lcons_s, lstr_s, cobj_s; val var_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s; val nongreedy_s, compiled_regex_s; val zeroplus_s, optional_s, compl_s, compound_s, or_s, and_s, quasi_s; @@ -117,6 +117,8 @@ val typeof(val obj) return num_s; case TAG_CHR: return chr_s; + case TAG_LIT: + return str_s; case TAG_PTR: if (obj == nil) { return null; @@ -1542,6 +1544,12 @@ val vec_set_fill(val vec, val fill) return vec; } +val vecref(val vec, val ind) +{ + type_check(vec, VEC); + range_bug_unless (c_num(ind) < c_num(vec->v.vec[vec_fill])); + return vec->v.vec[c_num(ind)]; +} val *vecref_l(val vec, val ind) { @@ -2023,6 +2031,7 @@ static void obj_init(void) vec_s = intern(lit("vec"), user_package); stream_s = intern(lit("stream"), user_package); hash_s = intern(lit("hash"), user_package); + hash_iter_s = intern(lit("hash-iter"), user_package); lcons_s = intern(lit("lcons"), user_package); lstr_s = intern(lit("lstr"), user_package); cobj_s = intern(lit("cobj"), user_package); @@ -159,7 +159,8 @@ struct cobj_ops { /* Default operations for above structure. */ val cobj_equal_op(val, val); void cobj_print_op(val, val); -void cobj_destroy_op(val); +void cobj_destroy_stub_op(val); +void cobj_destroy_free_op(val); void cobj_mark_op(val); cnum cobj_hash_op(val); @@ -212,7 +213,7 @@ INLINE val num_fast(cnum n) extern val keyword_package, system_package, user_package; extern val null, t, cons_s, str_s, chr_s, num_s, sym_s, pkg_s, fun_s, vec_s; -extern val stream_s, hash_s, lcons_s, lstr_s, cobj_s; +extern val stream_s, hash_s, hash_iter_s, lcons_s, lstr_s, cobj_s; extern val var_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s; extern val nongreedy_s, compiled_regex_s; extern val zeroplus_s, optional_s, compl_s, compound_s, or_s, and_s, quasi_s; @@ -353,6 +354,7 @@ val chain(val fun1_list); val vector(val alloc); val vec_get_fill(val vec); val vec_set_fill(val vec, val fill); +val vecref(val vec, val ind); val *vecref_l(val vec, val ind); val vec_push(val vec, val item); val lazy_stream_cons(val stream); @@ -324,7 +324,7 @@ static val string_in_get_char(val stream) static struct strm_ops string_in_ops = { { cobj_equal_op, cobj_print_op, - cobj_destroy_op, + cobj_destroy_stub_op, string_in_stream_mark, cobj_hash_op }, 0, @@ -353,7 +353,7 @@ static val byte_in_get_byte(val stream) static struct strm_ops byte_in_ops = { { cobj_equal_op, cobj_print_op, - cobj_destroy_op, + cobj_destroy_stub_op, cobj_mark_op, cobj_hash_op }, 0, |