diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2018-07-04 22:07:33 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2018-07-04 22:07:33 -0700 |
commit | 44b66777caafeebf543c32c9373632c04f748353 (patch) | |
tree | 40e7f6f3a3b3435ef71865a48f072c3c732e3206 /hash.c | |
parent | 9127ea3ff84327cd32552b4a90d56b4bf114bb86 (diff) | |
download | txr-44b66777caafeebf543c32c9373632c04f748353.tar.gz txr-44b66777caafeebf543c32c9373632c04f748353.tar.bz2 txr-44b66777caafeebf543c32c9373632c04f748353.zip |
hashing: overhaul part 1.
Hashing of buffers and character strings is being replaced
with a seedable hash, providing a tool against denial of
service attacks against hash tables.
This commit lays most of the groundwork: most of the internal
interface changes, and a new hashing implementation. What is
missing is the mechanisms to do the seeding.
* hash.c (struct hash_ops): Hash operation now takes a seed
argument of type ucnum.
(struct hash): New member, seed.
(hash_str_limit): Default value changed to INT_MAX.
A short value opens the gateway to an obvious collision attack
whereby strings sharing the same 128 character prefix are
entered into the same hash table, which will defeat any
seedings strategy.
(randbox): New static array. Values come from the Kazlib hash
module, but are not used in exactly the same way.
(hash_c_str, hash_buf): Now take a seed argument, and are
rewritten.
(equal_hash): Takes a seed, and passes it to hash_c_str,
hash_buf and to recursive self calls.
(eql_hash_op): New static function. Adapts the eql_hash
operation, which doesn't take a seed, to the new interface
that calls for a seed.
(obj_eq_hash_op): Take a seed; ignore it.
(hash_hash_op): Take a seed, pass it down to equal_hash.
(hash_eql_ops): Wire hash functiono pointer to eql_hash_op
instead of eql_hash.
(make_hash): For now, intialize the hash's seed to zero.
(make_similar_hash): Copy original hash's seed.
(gethash_c, gethash_e, remhash): Pass hash table's seed to
the hashing function.
(hash_equal): Pass a seed of zero to equal_hash for now;
this function will soon acquire an optional parameter for the
seed.
* hash.h (equal_hash): Declaration updated.
* lib.c (cobj_handle_hash_op): Take seed argument, pass down.
* lib.h (cobj_ops): Hash operation now takes seed.
(cobj_eq_hash_op, cobj_handle_hash_op): Declarations updated.
* struct.c (struct_inst_hash): Take seed argument, pass down.
* tests/009/json.expected: Updated, because the hash table
included in this output is now printed in a different order.
Diffstat (limited to 'hash.c')
-rw-r--r-- | hash.c | 162 |
1 files changed, 95 insertions, 67 deletions
@@ -43,6 +43,7 @@ #include "stream.h" #include "eval.h" #include "cadr.h" +#include "itypes.h" #include "hash.h" typedef enum hash_flags { @@ -53,7 +54,7 @@ typedef enum hash_flags { } hash_flags_t; struct hash_ops { - cnum (*hash_fun)(val, int *); + cnum (*hash_fun)(val, int *, ucnum); val (*equal_fun)(val, val); val (*assoc_fun)(val key, cnum hash, val list); val (*acons_new_c_fun)(val key, cnum hash, loc new_p, loc list); @@ -63,6 +64,7 @@ struct hash_ops { { hash, equal, assoc, acons } struct hash { + ucnum seed; hash_flags_t flags; struct hash *next; val table; @@ -91,69 +93,83 @@ val weak_keys_k, weak_vals_k, equal_based_k, eql_based_k, userdata_k; static struct hash *reachable_weak_hashes; static struct hash_iter *reachable_iters; -static int hash_str_limit = 128, hash_rec_limit = 32; +static int hash_str_limit = INT_MAX, hash_rec_limit = 32; /* C99 inline instantiations. */ #if __STDC_VERSION__ >= 199901L loc gethash_l(val hash, val key, loc new_p); #endif -/* - * This is is an adaptation of hashpjw, from Compilers: Principles, Techniques - * and Tools, Aho, Sethi, Ulman, 1988. P. 436. The register is wider by - * a few bits, and we bring down five overflow bits instead of four. - * We don't reduce the final result modulo a small prime, but leave it - * as it is; let the hashing routines do their own reduction. - */ -static unsigned long hash_c_str(const wchar_t *str) +static u32_t randbox[] = { + 0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U, + 0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU, + 0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU, + 0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU, +}; + +static u32_t hash_c_str(const wchar_t *str, u32_t seed) { int count = hash_str_limit; - unsigned long h = 0; - while (*str && count--) { - unsigned long g; - h = (h << 4) + *str++; - g = h & 0x7C000000; - h = h ^ (g >> 26) ^ g; + u32_t acc = seed; + u32_t ch; + + while (count-- && (ch = *str++) != 0) { + acc ^= ch; + acc ^= randbox[acc & 0xf]; + acc = acc >> 1 | acc << (32 - 1); } - return h; + + acc ^= randbox[acc & 0xf]; + acc = acc >> 1 | acc << (32 - 1); + acc ^= randbox[acc & 0xf]; + acc = acc >> 1 | acc << (32 - 1); + acc ^= randbox[acc & 0xf]; + acc = acc >> 1 | acc << (32 - 1); + acc ^= randbox[acc & 0xf]; + + return acc; } -static unsigned long hash_buf(const mem_t *ptr, cnum size) +static u32_t hash_buf(const mem_t *ptr, cnum size, u32_t seed) { + const u32_t *buf = coerce(const u32_t *, ptr); int count = hash_str_limit; - unsigned long h = 0; + u32_t acc = seed; - for (; size >= 4 && count--; size -= 4, ptr += 4) { - unsigned long el = (convert(unsigned long, ptr[0]) << 24 | - convert(unsigned long, ptr[1]) << 16 | - convert(unsigned long, ptr[2]) << 8 | - convert(unsigned long, ptr[3])); - unsigned long g; - h = (h << 4) + el; - g = h & 0x7C000000; - h = h ^ (g >> 26) ^ g; - ptr += 4; + for (; size >= sizeof *buf && count--; size -= sizeof *buf, buf++) { + u32_t in = *buf; + acc ^= in; + acc ^= randbox[acc & 0xf]; + acc = acc >> 1 | acc << (32 - 1); } - if (count) { - unsigned long el = 0; - unsigned long g; + if (size != 0) { + const mem_t *tail = coerce(const mem_t *, ptr); + u32_t in = 0; + switch (size) { - case 0: - break; case 3: - el = *ptr++; + in |= convert(u32_t, tail[2]) << 16; case 2: - el = el << 8 | *ptr++; + in |= convert(u32_t, tail[1]) << 8; case 1: - el = el << 8 | *ptr++; - h = (h << 4) + el; - g = h & 0x7C000000; - h = h ^ (g >> 26) ^ g; - ptr += 4; + in |= convert(u32_t, tail[0]); } + + acc ^= in; + acc ^= randbox[acc & 0xf]; + acc = acc >> 1 | acc << (32 - 1); } - return h; + + acc ^= randbox[acc & 0xf]; + acc = acc >> 1 | acc << (32 - 1); + acc ^= randbox[acc & 0xf]; + acc = acc >> 1 | acc << (32 - 1); + acc ^= randbox[acc & 0xf]; + acc = acc >> 1 | acc << (32 - 1); + acc ^= randbox[acc & 0xf]; + + return acc; } static cnum hash_double(double n) @@ -174,7 +190,7 @@ static cnum hash_double(double n) return h & NUM_MAX; } -cnum equal_hash(val obj, int *count) +cnum equal_hash(val obj, int *count, ucnum seed) { if ((*count)-- <= 0) return 0; @@ -183,12 +199,13 @@ cnum equal_hash(val obj, int *count) case NIL: return NUM_MAX; case LIT: - return hash_c_str(litptr(obj)) & NUM_MAX; + return hash_c_str(litptr(obj), seed) & NUM_MAX; case CONS: - return (equal_hash(obj->c.car, count) - + 32 * (equal_hash(obj->c.cdr, count) & (NUM_MAX / 16))) & NUM_MAX; + return (equal_hash(obj->c.car, count, seed) + + 32 * (equal_hash(obj->c.cdr, count, seed) + & (NUM_MAX / 16))) & NUM_MAX; case STR: - return hash_c_str(obj->st.str) & NUM_MAX; + return hash_c_str(obj->st.str, seed) & NUM_MAX; case CHR: return c_chr(obj) & NUM_MAX; case NUM: @@ -205,27 +222,28 @@ cnum equal_hash(val obj, int *count) break; case FUN: return (coerce(cnum, obj->f.f.interp_fun) - + equal_hash(obj->f.env, count)) & NUM_MAX; + + equal_hash(obj->f.env, count, seed)) & NUM_MAX; case VEC: { val length = obj->v.vec[vec_length]; - cnum i, h = equal_hash(obj->v.vec[vec_length], count); + cnum i, h = equal_hash(obj->v.vec[vec_length], count, seed); cnum len = c_num(length); for (i = 0; i < len; i++) { h = 32 * (h & (NUM_MAX / 16)); - h += equal_hash(obj->v.vec[i], count); + h += equal_hash(obj->v.vec[i], count, seed); h &= NUM_MAX; } return h; } case LCONS: - return (equal_hash(car(obj), count) - + 32 * (equal_hash(cdr(obj), count) & (NUM_MAX / 16))) & NUM_MAX; + return (equal_hash(car(obj), count, seed) + + 32 * (equal_hash(cdr(obj), count, seed) & + (NUM_MAX / 16))) & NUM_MAX; case LSTR: lazy_str_force_upto(obj, num(hash_str_limit - 1)); - return equal_hash(obj->ls.prefix, count); + return equal_hash(obj->ls.prefix, count, seed); case BGNUM: return mp_hash(mp(obj)) & NUM_MAX; case FLNUM: @@ -235,14 +253,15 @@ cnum equal_hash(val obj, int *count) if (obj->co.ops->equalsub) { val sub = obj->co.ops->equalsub(obj); if (sub) - return equal_hash(sub, count); + return equal_hash(sub, count, seed); } - return obj->co.ops->hash(obj, count) & NUM_MAX; + return obj->co.ops->hash(obj, count, seed) & NUM_MAX; case RNG: - return (equal_hash(obj->rn.from, count) - + 32 * (equal_hash(obj->rn.to, count) & (NUM_MAX / 16))) & NUM_MAX; + return (equal_hash(obj->rn.from, count, seed) + + 32 * (equal_hash(obj->rn.to, count, seed) + & (NUM_MAX / 16))) & NUM_MAX; case BUF: - return hash_buf(obj->b.data, c_num(obj->b.len)); + return hash_buf(obj->b.data, c_num(obj->b.len), seed); } internal_error("unhandled case in equal function"); @@ -286,9 +305,16 @@ static cnum eql_hash(val obj, int *count) abort(); } -cnum cobj_eq_hash_op(val obj, int *count) +static cnum eql_hash_op(val obj, int *count, ucnum seed) +{ + (void) seed; + return eql_hash(obj, count); +} + +cnum cobj_eq_hash_op(val obj, int *count, ucnum seed) { (void) count; + (void) seed; switch (sizeof (mem_t *)) { case 4: @@ -387,7 +413,7 @@ static val hash_equal_op(val left, val right) return eq(pending, nil); } -static cnum hash_hash_op(val obj, int *count) +static cnum hash_hash_op(val obj, int *count, ucnum seed) { cnum out = 0; struct hash *h = coerce(struct hash *, obj->co.handle); @@ -403,13 +429,13 @@ static cnum hash_hash_op(val obj, int *count) out += coerce(cnum, h->hops) >> 5; } - out += equal_hash(h->userdata, count); + out += equal_hash(h->userdata, count, seed); out &= NUM_MAX; iter = hash_begin(obj); while ((cell = hash_next(iter)) != nil) { - out += equal_hash(cell, count); + out += equal_hash(cell, count, seed); out &= NUM_MAX; } @@ -647,7 +673,7 @@ static val hash_aconsql_new_c(val key, cnum hash, loc new_p, loc list) } } -static_def(struct hash_ops hash_eql_ops = hash_ops_init(eql_hash, eql, +static_def(struct hash_ops hash_eql_ops = hash_ops_init(eql_hash_op, eql, hash_assql, hash_aconsql_new_c)); @@ -668,6 +694,7 @@ val make_hash(val weak_keys, val weak_vals, val equal_based) val table = vector(mod, nil); val hash = cobj(coerce(mem_t *, h), hash_s, &hash_ops); + h->seed = 0; h->flags = convert(hash_flags_t, flags); h->modulus = c_num(mod); h->count = 0; @@ -694,6 +721,7 @@ val make_similar_hash(val existing) h->table = table; h->userdata = ex->userdata; + h->seed = ex->seed; h->flags = ex->flags; h->usecount = 0; h->hops = ex->hops; @@ -743,7 +771,7 @@ val gethash_c(val hash, val key, loc new_p) { struct hash *h = coerce(struct hash *, cobj_handle(hash, hash_s)); int lim = hash_rec_limit; - cnum hv = h->hops->hash_fun(key, &lim); + cnum hv = h->hops->hash_fun(key, &lim, h->seed); loc pchain = vecref_l(h->table, num_fast(hv % h->modulus)); val old = deref(pchain); val cell = h->hops->acons_new_c_fun(key, hv, new_p, pchain); @@ -756,7 +784,7 @@ val gethash_e(val hash, val key) { struct hash *h = coerce(struct hash *, cobj_handle(hash, hash_s)); int lim = hash_rec_limit; - cnum hv = h->hops->hash_fun(key, &lim); + cnum hv = h->hops->hash_fun(key, &lim, h->seed); val chain = vecref(h->table, num_fast(hv % h->modulus)); return h->hops->assoc_fun(key, hv, chain); } @@ -813,7 +841,7 @@ val remhash(val hash, val key) { struct hash *h = coerce(struct hash *, cobj_handle(hash, hash_s)); int lim = hash_rec_limit; - cnum hv = h->hops->hash_fun(key, &lim); + cnum hv = h->hops->hash_fun(key, &lim, h->seed); val *pchain = valptr(vecref_l(h->table, num_fast(hv % h->modulus))); val existing = h->hops->assoc_fun(key, hv, *pchain); @@ -940,7 +968,7 @@ val hash_eql(val obj) val hash_equal(val obj) { int lim = hash_rec_limit; - return num_fast(equal_hash(obj, &lim)); + return num_fast(equal_hash(obj, &lim, 0)); } /* |