summaryrefslogtreecommitdiffstats
path: root/hash.c
diff options
context:
space:
mode:
Diffstat (limited to 'hash.c')
-rw-r--r--hash.c162
1 files changed, 95 insertions, 67 deletions
diff --git a/hash.c b/hash.c
index 4d8fcf04..68a7e75c 100644
--- a/hash.c
+++ b/hash.c
@@ -43,6 +43,7 @@
#include "stream.h"
#include "eval.h"
#include "cadr.h"
+#include "itypes.h"
#include "hash.h"
typedef enum hash_flags {
@@ -53,7 +54,7 @@ typedef enum hash_flags {
} hash_flags_t;
struct hash_ops {
- cnum (*hash_fun)(val, int *);
+ cnum (*hash_fun)(val, int *, ucnum);
val (*equal_fun)(val, val);
val (*assoc_fun)(val key, cnum hash, val list);
val (*acons_new_c_fun)(val key, cnum hash, loc new_p, loc list);
@@ -63,6 +64,7 @@ struct hash_ops {
{ hash, equal, assoc, acons }
struct hash {
+ ucnum seed;
hash_flags_t flags;
struct hash *next;
val table;
@@ -91,69 +93,83 @@ val weak_keys_k, weak_vals_k, equal_based_k, eql_based_k, userdata_k;
static struct hash *reachable_weak_hashes;
static struct hash_iter *reachable_iters;
-static int hash_str_limit = 128, hash_rec_limit = 32;
+static int hash_str_limit = INT_MAX, hash_rec_limit = 32;
/* C99 inline instantiations. */
#if __STDC_VERSION__ >= 199901L
loc gethash_l(val hash, val key, loc new_p);
#endif
-/*
- * This is is an adaptation of hashpjw, from Compilers: Principles, Techniques
- * and Tools, Aho, Sethi, Ulman, 1988. P. 436. The register is wider by
- * a few bits, and we bring down five overflow bits instead of four.
- * We don't reduce the final result modulo a small prime, but leave it
- * as it is; let the hashing routines do their own reduction.
- */
-static unsigned long hash_c_str(const wchar_t *str)
+static u32_t randbox[] = {
+ 0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U,
+ 0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU,
+ 0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU,
+ 0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU,
+};
+
+static u32_t hash_c_str(const wchar_t *str, u32_t seed)
{
int count = hash_str_limit;
- unsigned long h = 0;
- while (*str && count--) {
- unsigned long g;
- h = (h << 4) + *str++;
- g = h & 0x7C000000;
- h = h ^ (g >> 26) ^ g;
+ u32_t acc = seed;
+ u32_t ch;
+
+ while (count-- && (ch = *str++) != 0) {
+ acc ^= ch;
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (32 - 1);
}
- return h;
+
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (32 - 1);
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (32 - 1);
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (32 - 1);
+ acc ^= randbox[acc & 0xf];
+
+ return acc;
}
-static unsigned long hash_buf(const mem_t *ptr, cnum size)
+static u32_t hash_buf(const mem_t *ptr, cnum size, u32_t seed)
{
+ const u32_t *buf = coerce(const u32_t *, ptr);
int count = hash_str_limit;
- unsigned long h = 0;
+ u32_t acc = seed;
- for (; size >= 4 && count--; size -= 4, ptr += 4) {
- unsigned long el = (convert(unsigned long, ptr[0]) << 24 |
- convert(unsigned long, ptr[1]) << 16 |
- convert(unsigned long, ptr[2]) << 8 |
- convert(unsigned long, ptr[3]));
- unsigned long g;
- h = (h << 4) + el;
- g = h & 0x7C000000;
- h = h ^ (g >> 26) ^ g;
- ptr += 4;
+ for (; size >= sizeof *buf && count--; size -= sizeof *buf, buf++) {
+ u32_t in = *buf;
+ acc ^= in;
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (32 - 1);
}
- if (count) {
- unsigned long el = 0;
- unsigned long g;
+ if (size != 0) {
+ const mem_t *tail = coerce(const mem_t *, ptr);
+ u32_t in = 0;
+
switch (size) {
- case 0:
- break;
case 3:
- el = *ptr++;
+ in |= convert(u32_t, tail[2]) << 16;
case 2:
- el = el << 8 | *ptr++;
+ in |= convert(u32_t, tail[1]) << 8;
case 1:
- el = el << 8 | *ptr++;
- h = (h << 4) + el;
- g = h & 0x7C000000;
- h = h ^ (g >> 26) ^ g;
- ptr += 4;
+ in |= convert(u32_t, tail[0]);
}
+
+ acc ^= in;
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (32 - 1);
}
- return h;
+
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (32 - 1);
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (32 - 1);
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (32 - 1);
+ acc ^= randbox[acc & 0xf];
+
+ return acc;
}
static cnum hash_double(double n)
@@ -174,7 +190,7 @@ static cnum hash_double(double n)
return h & NUM_MAX;
}
-cnum equal_hash(val obj, int *count)
+cnum equal_hash(val obj, int *count, ucnum seed)
{
if ((*count)-- <= 0)
return 0;
@@ -183,12 +199,13 @@ cnum equal_hash(val obj, int *count)
case NIL:
return NUM_MAX;
case LIT:
- return hash_c_str(litptr(obj)) & NUM_MAX;
+ return hash_c_str(litptr(obj), seed) & NUM_MAX;
case CONS:
- return (equal_hash(obj->c.car, count)
- + 32 * (equal_hash(obj->c.cdr, count) & (NUM_MAX / 16))) & NUM_MAX;
+ return (equal_hash(obj->c.car, count, seed)
+ + 32 * (equal_hash(obj->c.cdr, count, seed)
+ & (NUM_MAX / 16))) & NUM_MAX;
case STR:
- return hash_c_str(obj->st.str) & NUM_MAX;
+ return hash_c_str(obj->st.str, seed) & NUM_MAX;
case CHR:
return c_chr(obj) & NUM_MAX;
case NUM:
@@ -205,27 +222,28 @@ cnum equal_hash(val obj, int *count)
break;
case FUN:
return (coerce(cnum, obj->f.f.interp_fun)
- + equal_hash(obj->f.env, count)) & NUM_MAX;
+ + equal_hash(obj->f.env, count, seed)) & NUM_MAX;
case VEC:
{
val length = obj->v.vec[vec_length];
- cnum i, h = equal_hash(obj->v.vec[vec_length], count);
+ cnum i, h = equal_hash(obj->v.vec[vec_length], count, seed);
cnum len = c_num(length);
for (i = 0; i < len; i++) {
h = 32 * (h & (NUM_MAX / 16));
- h += equal_hash(obj->v.vec[i], count);
+ h += equal_hash(obj->v.vec[i], count, seed);
h &= NUM_MAX;
}
return h;
}
case LCONS:
- return (equal_hash(car(obj), count)
- + 32 * (equal_hash(cdr(obj), count) & (NUM_MAX / 16))) & NUM_MAX;
+ return (equal_hash(car(obj), count, seed)
+ + 32 * (equal_hash(cdr(obj), count, seed) &
+ (NUM_MAX / 16))) & NUM_MAX;
case LSTR:
lazy_str_force_upto(obj, num(hash_str_limit - 1));
- return equal_hash(obj->ls.prefix, count);
+ return equal_hash(obj->ls.prefix, count, seed);
case BGNUM:
return mp_hash(mp(obj)) & NUM_MAX;
case FLNUM:
@@ -235,14 +253,15 @@ cnum equal_hash(val obj, int *count)
if (obj->co.ops->equalsub) {
val sub = obj->co.ops->equalsub(obj);
if (sub)
- return equal_hash(sub, count);
+ return equal_hash(sub, count, seed);
}
- return obj->co.ops->hash(obj, count) & NUM_MAX;
+ return obj->co.ops->hash(obj, count, seed) & NUM_MAX;
case RNG:
- return (equal_hash(obj->rn.from, count)
- + 32 * (equal_hash(obj->rn.to, count) & (NUM_MAX / 16))) & NUM_MAX;
+ return (equal_hash(obj->rn.from, count, seed)
+ + 32 * (equal_hash(obj->rn.to, count, seed)
+ & (NUM_MAX / 16))) & NUM_MAX;
case BUF:
- return hash_buf(obj->b.data, c_num(obj->b.len));
+ return hash_buf(obj->b.data, c_num(obj->b.len), seed);
}
internal_error("unhandled case in equal function");
@@ -286,9 +305,16 @@ static cnum eql_hash(val obj, int *count)
abort();
}
-cnum cobj_eq_hash_op(val obj, int *count)
+static cnum eql_hash_op(val obj, int *count, ucnum seed)
+{
+ (void) seed;
+ return eql_hash(obj, count);
+}
+
+cnum cobj_eq_hash_op(val obj, int *count, ucnum seed)
{
(void) count;
+ (void) seed;
switch (sizeof (mem_t *)) {
case 4:
@@ -387,7 +413,7 @@ static val hash_equal_op(val left, val right)
return eq(pending, nil);
}
-static cnum hash_hash_op(val obj, int *count)
+static cnum hash_hash_op(val obj, int *count, ucnum seed)
{
cnum out = 0;
struct hash *h = coerce(struct hash *, obj->co.handle);
@@ -403,13 +429,13 @@ static cnum hash_hash_op(val obj, int *count)
out += coerce(cnum, h->hops) >> 5;
}
- out += equal_hash(h->userdata, count);
+ out += equal_hash(h->userdata, count, seed);
out &= NUM_MAX;
iter = hash_begin(obj);
while ((cell = hash_next(iter)) != nil) {
- out += equal_hash(cell, count);
+ out += equal_hash(cell, count, seed);
out &= NUM_MAX;
}
@@ -647,7 +673,7 @@ static val hash_aconsql_new_c(val key, cnum hash, loc new_p, loc list)
}
}
-static_def(struct hash_ops hash_eql_ops = hash_ops_init(eql_hash, eql,
+static_def(struct hash_ops hash_eql_ops = hash_ops_init(eql_hash_op, eql,
hash_assql,
hash_aconsql_new_c));
@@ -668,6 +694,7 @@ val make_hash(val weak_keys, val weak_vals, val equal_based)
val table = vector(mod, nil);
val hash = cobj(coerce(mem_t *, h), hash_s, &hash_ops);
+ h->seed = 0;
h->flags = convert(hash_flags_t, flags);
h->modulus = c_num(mod);
h->count = 0;
@@ -694,6 +721,7 @@ val make_similar_hash(val existing)
h->table = table;
h->userdata = ex->userdata;
+ h->seed = ex->seed;
h->flags = ex->flags;
h->usecount = 0;
h->hops = ex->hops;
@@ -743,7 +771,7 @@ val gethash_c(val hash, val key, loc new_p)
{
struct hash *h = coerce(struct hash *, cobj_handle(hash, hash_s));
int lim = hash_rec_limit;
- cnum hv = h->hops->hash_fun(key, &lim);
+ cnum hv = h->hops->hash_fun(key, &lim, h->seed);
loc pchain = vecref_l(h->table, num_fast(hv % h->modulus));
val old = deref(pchain);
val cell = h->hops->acons_new_c_fun(key, hv, new_p, pchain);
@@ -756,7 +784,7 @@ val gethash_e(val hash, val key)
{
struct hash *h = coerce(struct hash *, cobj_handle(hash, hash_s));
int lim = hash_rec_limit;
- cnum hv = h->hops->hash_fun(key, &lim);
+ cnum hv = h->hops->hash_fun(key, &lim, h->seed);
val chain = vecref(h->table, num_fast(hv % h->modulus));
return h->hops->assoc_fun(key, hv, chain);
}
@@ -813,7 +841,7 @@ val remhash(val hash, val key)
{
struct hash *h = coerce(struct hash *, cobj_handle(hash, hash_s));
int lim = hash_rec_limit;
- cnum hv = h->hops->hash_fun(key, &lim);
+ cnum hv = h->hops->hash_fun(key, &lim, h->seed);
val *pchain = valptr(vecref_l(h->table, num_fast(hv % h->modulus)));
val existing = h->hops->assoc_fun(key, hv, *pchain);
@@ -940,7 +968,7 @@ val hash_eql(val obj)
val hash_equal(val obj)
{
int lim = hash_rec_limit;
- return num_fast(equal_hash(obj, &lim));
+ return num_fast(equal_hash(obj, &lim, 0));
}
/*