summaryrefslogtreecommitdiffstats
path: root/hash.c
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2009-11-09 13:44:39 -0800
committerKaz Kylheku <kaz@kylheku.com>2009-11-09 13:44:39 -0800
commit357121301094005f6c56471fb18f9ff1b6bc8d13 (patch)
treee9b4d47ace9622de678f5c863e473bfa00fd721f /hash.c
parent10e4d4687df9a41a017fc438bc16407265dfe281 (diff)
downloadtxr-357121301094005f6c56471fb18f9ff1b6bc8d13.tar.gz
txr-357121301094005f6c56471fb18f9ff1b6bc8d13.tar.bz2
txr-357121301094005f6c56471fb18f9ff1b6bc8d13.zip
First cut at hash tables. One known problem is allocation during gc,
due to use of boxed numbers for vector access.
Diffstat (limited to 'hash.c')
-rw-r--r--hash.c301
1 files changed, 301 insertions, 0 deletions
diff --git a/hash.c b/hash.c
new file mode 100644
index 00000000..33510917
--- /dev/null
+++ b/hash.c
@@ -0,0 +1,301 @@
+/* Copyright 2009
+ * Kaz Kylheku <kkylheku@gmail.com>
+ * Vancouver, Canada
+ * All rights reserved.
+ *
+ * BSD License:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <dirent.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <setjmp.h>
+#include <limits.h>
+#include "lib.h"
+#include "gc.h"
+#include "unwind.h"
+#include "hash.h"
+
+typedef enum hash_flags {
+ hash_weak_none = 0,
+ hash_weak_keys = 1,
+ hash_weak_vals = 2,
+ hash_weak_both = 3
+} hash_flags_t;
+
+struct hash {
+ hash_flags_t flags;
+ struct hash *next;
+ obj_t *table;
+ long modulus;
+ long count;
+};
+
+/*
+ * Dynamic list built up during gc.
+ */
+static struct hash *reachable_weak_hashes;
+
+/*
+ * This is is an adaptation of hashpjw, from Compilers: Principles, Techniques
+ * and Tools, Aho, Sethi, Ulman, 1988. P. 436. The register is wider by
+ * a few bits, and we bring down five overflow bits instead of four.
+ * We don't reduce the final result modulo a small prime, but leave it
+ * as it is; let the hashing routines do their own reduction.
+ */
+static long hash_c_str(const char *str)
+{
+ unsigned long h = 0;
+ while (*str) {
+ unsigned long g;
+ h = (h << 4) + *str++;
+ g = h & 0x7C000000;
+ h = h ^ (g >> 26) ^ g;
+ }
+ return h;
+}
+
+static long ll_hash(obj_t *obj)
+{
+ if (obj == nil)
+ return LONG_MAX;
+
+ switch (obj->t.type) {
+ case CONS:
+ return (ll_hash(obj->c.car) + ll_hash(obj->c.cdr)) & LONG_MAX;
+ case STR:
+ return hash_c_str(obj->st.str);
+ case CHR:
+ return obj->ch.ch + LONG_MAX / 2;
+ case NUM:
+ return obj->n.val & LONG_MAX;
+ case SYM:
+ return ((long) obj) & LONG_MAX;
+ case FUN:
+ return ((long) obj->f.f.interp_fun + ll_hash(obj->f.env)) & LONG_MAX;
+ case VEC:
+ {
+ obj_t *fill = obj->v.vec[vec_fill];
+ long i, h = ll_hash(obj->v.vec[vec_fill]);
+ long len = c_num(fill);
+
+ for (i = 0; i < len; i++)
+ h = (h + ll_hash(obj->v.vec[i])) & LONG_MAX;
+
+ return h;
+ }
+ case LCONS:
+ return (ll_hash(car(obj)) + ll_hash(cdr(obj))) & LONG_MAX;
+ case LSTR:
+ lazy_str_force(obj);
+ return ll_hash(obj->ls.prefix);
+ case COBJ:
+ if (obj->co.ops->hash)
+ return obj->co.ops->hash(obj);
+ return ((long) obj) & LONG_MAX;
+ }
+
+ internal_error("unhandled case in equal function");
+}
+
+obj_t *hash_obj(obj_t *obj)
+{
+ return num(ll_hash(obj));
+}
+
+obj_t *hash_equal(obj_t *self, obj_t *other)
+{
+ return self == other ? t : nil;
+}
+
+void hash_destroy(obj_t *hash)
+{
+ free(hash->co.handle);
+}
+
+void hash_mark(obj_t *hash)
+{
+ struct hash *h = (struct hash *) hash->co.handle;
+ long i;
+
+ switch (h->flags) {
+ case hash_weak_none:
+ /* If the hash is not weak, we can simply mark the table
+ vector and we are done. */
+ gc_mark(h->table);
+ break;
+ case hash_weak_keys:
+ /* Keys are weak: mark the values only. */
+ for (i = 0; i < h->modulus; i++) {
+ obj_t *ind = num(i);
+ obj_t **pchain = vecref_l(h->table, ind);
+ obj_t *iter;
+
+ for (iter = *pchain; iter != nil; iter = cdr(iter)) {
+ obj_t *entry = car(iter);
+ gc_mark(cdr(entry));
+ }
+ }
+ reachable_weak_hashes->next = h;
+ reachable_weak_hashes = h;
+ break;
+ case hash_weak_vals:
+ /* Values are weak: mark the keys only. */
+
+ for (i = 0; i < h->modulus; i++) {
+ obj_t *ind = num(i);
+ obj_t **pchain = vecref_l(h->table, ind);
+ obj_t *iter;
+
+ for (iter = *pchain; iter != nil; iter = cdr(iter)) {
+ obj_t *entry = car(iter);
+ gc_mark(car(entry));
+ }
+ }
+ reachable_weak_hashes->next = h;
+ reachable_weak_hashes = h;
+ break;
+ case hash_weak_both:
+ /* Values and keys are weak: don't mark anything. */
+ break;
+ }
+}
+
+static struct cobj_ops hash_ops = {
+ hash_equal,
+ cobj_print_op,
+ hash_destroy,
+ hash_mark,
+};
+
+obj_t *make_hash(obj_t *weak_keys, obj_t *weak_vals)
+{
+ int flags = ((weak_vals != nil) << 1) | (weak_keys != nil);
+ struct hash *h = (struct hash *) chk_malloc(sizeof *h);
+ obj_t *mod = num(256);
+ h->flags = (hash_flags_t) flags;
+ h->modulus = c_num(mod);
+ h->table = vector(mod);
+ vec_set_fill(h->table, mod);
+ return cobj((void *) h, hash_t, &hash_ops);
+}
+
+obj_t **l_gethash(obj_t *hash, obj_t *key)
+{
+ struct hash *h = (struct hash *) hash->co.handle;
+ obj_t **pchain = vecref_l(h->table, num(ll_hash(key) % h->modulus));
+ return acons_new_l(pchain, key);
+}
+
+obj_t *gethash(obj_t *hash, obj_t *key)
+{
+ struct hash *h = (struct hash *) hash->co.handle;
+ obj_t *chain = *vecref_l(h->table, num(ll_hash(key) % h->modulus));
+ obj_t *found = assoc(chain, key);
+ return cdr(found);
+}
+
+/*
+ * Called from garbage collector. Hash module must process all weak tables
+ * that were visited during the marking phase, maintained in the list
+ * reachable_weak_hashes.
+ */
+void hash_process_weak(void)
+{
+ struct hash *h;
+ long i;
+
+ for (h = reachable_weak_hashes; h != 0; h = h->next) {
+ switch (h->flags) {
+ case hash_weak_none:
+ /* what is this doing here */
+ break;
+ case hash_weak_keys:
+ /* Sweep through all entries. Delete any which have keys
+ that are garbage. */
+ for (i = 0; i < h->modulus; i++) {
+ obj_t *ind = num(i);
+ obj_t **pchain = vecref_l(h->table, ind);
+ obj_t **iter;
+
+ for (iter = pchain; *iter != nil; ) {
+ obj_t *entry = car(*iter);
+ if (!gc_is_reachable(car(entry)))
+ *iter = cdr(*iter);
+ else
+ iter = cdr_l(*iter);
+ }
+ }
+ /* Garbage is gone now. Seal things by marking the vector. */
+ gc_mark(h->table);
+ break;
+ case hash_weak_vals:
+ /* Sweep through all entries. Delete any which have values
+ that are garbage. */
+ for (i = 0; i < h->modulus; i++) {
+ obj_t *ind = num(i);
+ obj_t **pchain = vecref_l(h->table, ind);
+ obj_t **iter;
+
+ for (iter = pchain; *iter != nil; ) {
+ obj_t *entry = car(*iter);
+ if (!gc_is_reachable(cdr(entry)))
+ *iter = cdr(*iter);
+ else
+ iter = cdr_l(*iter);
+ }
+ }
+ /* Garbage is gone now. Seal things by marking the vector. */
+ gc_mark(h->table);
+ break;
+ case hash_weak_both:
+ /* Sweep through all entries. Delete any which have keys
+ or values that are garbage. */
+ for (i = 0; i < h->modulus; i++) {
+ obj_t *ind = num(i);
+ obj_t **pchain = vecref_l(h->table, ind);
+ obj_t **iter;
+
+ for (iter = pchain; *iter != nil; ) {
+ obj_t *entry = car(*iter);
+ if (!gc_is_reachable(car(entry)) || !gc_is_reachable(cdr(entry)))
+ *iter = cdr(*iter);
+ else
+ iter = cdr_l(*iter);
+ }
+ }
+ /* Garbage is gone now. Seal things by marking the vector. */
+ gc_mark(h->table);
+ break;
+ }
+ }
+
+ /* Done with weak processing; clear out the list in preparation for
+ the next gc round. */
+ reachable_weak_hashes = 0;
+}
+
+void hash_init(void)
+{
+}