summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2011-09-26 11:20:08 -0700
committerKaz Kylheku <kaz@kylheku.com>2011-09-26 11:20:08 -0700
commitf3fe0151fc4116f0fec40e7454d8866f11908560 (patch)
treeaf1a4cee2e429f295b26587ee55463917860b341
parentd8e2644d5ebb3783660b2c9cb7e1d09dac2230f1 (diff)
downloadtxr-f3fe0151fc4116f0fec40e7454d8866f11908560.tar.gz
txr-f3fe0151fc4116f0fec40e7454d8866f11908560.tar.bz2
txr-f3fe0151fc4116f0fec40e7454d8866f11908560.zip
Trie compression. Hash table iteration.
Bugfix in typeof. * filter.c (trie_compress): New function. (trie_value_at, trie_lookup_feed_char, filter_string): Handle cons cell nodes in trie. (build_filter): Call trie_compress. * gc.c (cobj_destroy_op): Function renamed to cobj_destroy_stub_op since it doesn't do anything. (cobj_destroy_free_op): New function. * hash.c (struct hash_iter): New type. (hash_destroy): Function removed. (hash_ops): Reference to hash_destroy replaced with cobj_destroy_free_op. (hash_count, hash_iter_mark, hash_begin, hash_next): New functions. (hash_iter_ops): New static structure. * hash.h (hash_count, hash_begin, hash_next): New functions declared. * lib.c (hash_iter_s): New symbol variable. (typeof): Bugfix: TAG_LIT type tag not handled. (vecref): New function. (obj_init): Initialize hash_iter_s. * lib.h (cobj_destroy_op): Declaration renamed. (cobj_destroy_free_op, vecref): New functions declared. (hash_iter_s): New variable declared. * stream.c (string_in_ops, byte_in_ops): cobj_destroy_op renamed to cobj_destroy_stub_op.
-rw-r--r--ChangeLog30
-rw-r--r--filter.c55
-rw-r--r--gc.c7
-rw-r--r--hash.c60
-rw-r--r--hash.h3
-rw-r--r--lib.c11
-rw-r--r--lib.h6
-rw-r--r--stream.c4
8 files changed, 160 insertions, 16 deletions
diff --git a/ChangeLog b/ChangeLog
index ccff56ff..4d25b968 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,33 @@
+2011-09-26 Kaz Kylheku <kaz@kylheku.com>
+
+ Trie compression. Hash table iteration.
+ Bugfix in typeof.
+
+ * filter.c (trie_compress): New function.
+ (trie_value_at, trie_lookup_feed_char, filter_string): Handle cons cell
+ nodes in trie.
+ (build_filter): Call trie_compress.
+
+ * gc.c (cobj_destroy_op): Function renamed to cobj_destroy_stub_op
+ since it doesn't do anything.
+ (cobj_destroy_free_op): New function.
+ * hash.c (struct hash_iter): New type.
+ (hash_destroy): Function removed.
+ (hash_ops): Reference to hash_destroy replaced with
+ cobj_destroy_free_op.
+ (hash_count, hash_iter_mark, hash_begin, hash_next): New functions.
+ (hash_iter_ops): New static structure.
+ * hash.h (hash_count, hash_begin, hash_next): New functions declared.
+ * lib.c (hash_iter_s): New symbol variable.
+ (typeof): Bugfix: TAG_LIT type tag not handled.
+ (vecref): New function.
+ (obj_init): Initialize hash_iter_s.
+ * lib.h (cobj_destroy_op): Declaration renamed.
+ (cobj_destroy_free_op, vecref): New functions declared.
+ (hash_iter_s): New variable declared.
+ * stream.c (string_in_ops, byte_in_ops): cobj_destroy_op
+ renamed to cobj_destroy_stub_op.
+
2011-09-25 Kaz Kylheku <kaz@kylheku.com>
Filtering from HTML implemented.
diff --git a/filter.c b/filter.c
index dc09a8fe..5053520f 100644
--- a/filter.c
+++ b/filter.c
@@ -54,6 +54,44 @@ static val trie_add(val trie, val key, val value)
return node;
}
+/*
+ * Reduce the storage requirement for a the trie, by applying
+ * these rules:
+ *
+ * 1. All leaf-level nodes (i.e. empty hash tables) are replaced by
+ * just the node value (hash user data).
+ * 2. All hash tables with a single transition (i.e. hash tables
+ * containing one element) and which have no node value
+ * are replaced by a cons cell, whose CAR is the transition
+ * character, and whose CDR is the transition.
+ */
+
+static void trie_compress(val *ptrie)
+{
+ val trie = *ptrie;
+
+ if (hashp(trie)) {
+ val count = hash_count(trie);
+ val value = get_hash_userdata(trie);
+
+ if (zerop(count)) {
+ *ptrie = value;
+ } else if (eq(count, one) && nullp(value)) {
+ val iter = hash_begin(trie);
+ val cell = hash_next(&iter);
+ *ptrie = cons(car(cell), cdr(cell));
+ trie_compress(cdr_l(*ptrie));
+ } else {
+ val cell, iter = hash_begin(trie);
+
+ for (cell = hash_next(&iter); iter; cell = hash_next(&iter))
+ trie_compress(cdr_l(cell));
+ }
+ } else if (consp(trie)) {
+ trie_compress(cdr_l(trie));
+ }
+}
+
val trie_lookup_begin(val trie)
{
return trie;
@@ -61,12 +99,20 @@ val trie_lookup_begin(val trie)
val trie_value_at(val node)
{
- return get_hash_userdata(node);
+ if (hashp(node))
+ return get_hash_userdata(node);
+ if (consp(node))
+ return nil;
+ return node;
}
val trie_lookup_feed_char(val node, val ch)
{
- return gethash(node, ch);
+ if (hashp(node))
+ return gethash(node, ch);
+ if (consp(node) && eq(ch,car(node)))
+ return cdr(node);
+ return nil;
}
val get_filter_trie(val sym)
@@ -86,6 +132,7 @@ static val build_filter(struct filter_pair *pair)
for (i = 0; pair[i].key; i++)
trie_add(trie, static_str(pair[i].key), static_str(pair[i].value));
+ trie_compress(&trie);
return trie;
}
@@ -133,9 +180,9 @@ val filter_string(val filter, val str)
{
val type = typeof(filter);
- if (type == null)
+ if (eq(type, null))
return str;
- if (type == hash_s)
+ if (eq(type, hash_s) || eq(type, cons_s))
return trie_filter_string(filter, str);
else if (type == fun_s)
return funcall1(filter, str);
diff --git a/gc.c b/gc.c
index 00073425..a989094e 100644
--- a/gc.c
+++ b/gc.c
@@ -201,10 +201,15 @@ static void finalize(val obj)
assert (0 && "corrupt type field");
}
-void cobj_destroy_op(val obj)
+void cobj_destroy_stub_op(val obj)
{
}
+void cobj_destroy_free_op(val obj)
+{
+ free(obj->co.handle);
+}
+
static void mark_obj(val obj)
{
type_t t;
diff --git a/hash.c b/hash.c
index dcd28011..e07ebf56 100644
--- a/hash.c
+++ b/hash.c
@@ -54,6 +54,12 @@ struct hash {
val userdata;
};
+struct hash_iter {
+ val hash;
+ cnum chain;
+ val cons;
+};
+
/*
* Dynamic list built up during gc.
*/
@@ -132,11 +138,6 @@ val hash_obj(val obj)
return num(ll_hash(obj));
}
-static void hash_destroy(val hash)
-{
- free(hash->co.handle);
-}
-
static void hash_mark(val hash)
{
struct hash *h = (struct hash *) hash->co.handle;
@@ -190,7 +191,7 @@ static void hash_mark(val hash)
static struct cobj_ops hash_ops = {
cobj_equal_op,
cobj_print_op,
- hash_destroy,
+ cobj_destroy_free_op,
hash_mark,
cobj_hash_op
};
@@ -287,6 +288,12 @@ val remhash(val hash, val key)
return nil;
}
+val hash_count(val hash)
+{
+ struct hash *h = (struct hash *) hash->co.handle;
+ return num(h->count);
+}
+
val get_hash_userdata(val hash)
{
struct hash *h = (struct hash *) hash->co.handle;
@@ -306,6 +313,47 @@ val hashp(val obj)
return typeof(obj) == hash_s ? t : nil;
}
+static void hash_iter_mark(val hash_iter)
+{
+ struct hash_iter *hi = (struct hash_iter *) hash_iter->co.handle;
+ gc_mark(hi->hash);
+ gc_mark(hi->cons);
+}
+
+static struct cobj_ops hash_iter_ops = {
+ cobj_equal_op,
+ cobj_print_op,
+ cobj_destroy_free_op,
+ hash_iter_mark,
+ cobj_hash_op
+};
+
+val hash_begin(val hash)
+{
+ struct hash_iter *hi = (struct hash_iter *) chk_malloc(sizeof *hi);
+ hi->hash = hash;
+ hi->chain = -1;
+ hi->cons = nil;
+ return cobj((mem_t *) hi, hash_iter_s, &hash_iter_ops);
+}
+
+val hash_next(val *iter)
+{
+ struct hash_iter *hi = (struct hash_iter *) (*iter)->co.handle;
+ val hash = hi->hash;
+ struct hash *h = (struct hash *) hash->co.handle;
+ if (hi->cons)
+ hi->cons = cdr(hi->cons);
+ while (nullp(hi->cons)) {
+ if (++hi->chain >= h->modulus) {
+ *iter = nil;
+ return nil;
+ }
+ hi->cons = vecref(h->table, num(hi->chain));
+ }
+ return car(hi->cons);
+}
+
/*
* Called from garbage collector. Hash module must process all weak tables
* that were visited during the marking phase, maintained in the list
diff --git a/hash.h b/hash.h
index b5bd200a..2a99278c 100644
--- a/hash.h
+++ b/hash.h
@@ -31,9 +31,12 @@ val gethash(val hash, val key);
val gethash_f(val hash, val key, val *found);
val sethash(val hash, val key, val value);
val remhash(val hash, val key);
+val hash_count(val hash);
val get_hash_userdata(val hash);
val set_hash_userdata(val hash, val data);
val hashp(val obj);
+val hash_begin(val hash);
+val hash_next(val *iter);
void hash_process_weak(void);
void hash_init(void);
diff --git a/lib.c b/lib.c
index 9aa95d84..92aaf68a 100644
--- a/lib.c
+++ b/lib.c
@@ -51,7 +51,7 @@ val packages;
val system_package, keyword_package, user_package;
val null, t, cons_s, str_s, chr_s, num_s, sym_s, pkg_s, fun_s, vec_s;
-val stream_s, hash_s, lcons_s, lstr_s, cobj_s;
+val stream_s, hash_s, hash_iter_s, lcons_s, lstr_s, cobj_s;
val var_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s;
val nongreedy_s, compiled_regex_s;
val zeroplus_s, optional_s, compl_s, compound_s, or_s, and_s, quasi_s;
@@ -117,6 +117,8 @@ val typeof(val obj)
return num_s;
case TAG_CHR:
return chr_s;
+ case TAG_LIT:
+ return str_s;
case TAG_PTR:
if (obj == nil) {
return null;
@@ -1542,6 +1544,12 @@ val vec_set_fill(val vec, val fill)
return vec;
}
+val vecref(val vec, val ind)
+{
+ type_check(vec, VEC);
+ range_bug_unless (c_num(ind) < c_num(vec->v.vec[vec_fill]));
+ return vec->v.vec[c_num(ind)];
+}
val *vecref_l(val vec, val ind)
{
@@ -2023,6 +2031,7 @@ static void obj_init(void)
vec_s = intern(lit("vec"), user_package);
stream_s = intern(lit("stream"), user_package);
hash_s = intern(lit("hash"), user_package);
+ hash_iter_s = intern(lit("hash-iter"), user_package);
lcons_s = intern(lit("lcons"), user_package);
lstr_s = intern(lit("lstr"), user_package);
cobj_s = intern(lit("cobj"), user_package);
diff --git a/lib.h b/lib.h
index f72111e4..db597da5 100644
--- a/lib.h
+++ b/lib.h
@@ -159,7 +159,8 @@ struct cobj_ops {
/* Default operations for above structure. */
val cobj_equal_op(val, val);
void cobj_print_op(val, val);
-void cobj_destroy_op(val);
+void cobj_destroy_stub_op(val);
+void cobj_destroy_free_op(val);
void cobj_mark_op(val);
cnum cobj_hash_op(val);
@@ -212,7 +213,7 @@ INLINE val num_fast(cnum n)
extern val keyword_package, system_package, user_package;
extern val null, t, cons_s, str_s, chr_s, num_s, sym_s, pkg_s, fun_s, vec_s;
-extern val stream_s, hash_s, lcons_s, lstr_s, cobj_s;
+extern val stream_s, hash_s, hash_iter_s, lcons_s, lstr_s, cobj_s;
extern val var_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s;
extern val nongreedy_s, compiled_regex_s;
extern val zeroplus_s, optional_s, compl_s, compound_s, or_s, and_s, quasi_s;
@@ -353,6 +354,7 @@ val chain(val fun1_list);
val vector(val alloc);
val vec_get_fill(val vec);
val vec_set_fill(val vec, val fill);
+val vecref(val vec, val ind);
val *vecref_l(val vec, val ind);
val vec_push(val vec, val item);
val lazy_stream_cons(val stream);
diff --git a/stream.c b/stream.c
index 2f6bf1e8..2b4d09f2 100644
--- a/stream.c
+++ b/stream.c
@@ -324,7 +324,7 @@ static val string_in_get_char(val stream)
static struct strm_ops string_in_ops = {
{ cobj_equal_op,
cobj_print_op,
- cobj_destroy_op,
+ cobj_destroy_stub_op,
string_in_stream_mark,
cobj_hash_op },
0,
@@ -353,7 +353,7 @@ static val byte_in_get_byte(val stream)
static struct strm_ops byte_in_ops = {
{ cobj_equal_op,
cobj_print_op,
- cobj_destroy_op,
+ cobj_destroy_stub_op,
cobj_mark_op,
cobj_hash_op },
0,