summaryrefslogtreecommitdiffstats
path: root/hash.c
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2021-11-17 07:25:41 -0800
committerKaz Kylheku <kaz@kylheku.com>2021-11-17 07:25:41 -0800
commit9108e9f8f4434fb29200b08a4b576df47c407c01 (patch)
treeb30b49b3880238899e61de1352eb8e69c8d398b4 /hash.c
parent1f2c8c8e1e709a91e6c25f02f3883f9624a1f7f7 (diff)
downloadtxr-9108e9f8f4434fb29200b08a4b576df47c407c01.tar.gz
txr-9108e9f8f4434fb29200b08a4b576df47c407c01.tar.bz2
txr-9108e9f8f4434fb29200b08a4b576df47c407c01.zip
hash: 64 bit string and buffer hashing and seeds.
* hash.c (randbox, hash_c_str, hash_buf): Separate implementation for 64 bit pointers, using 64 bit random values, and producing a 64 bit hash, taking in a 64 bit seed. (gen_hash_seed): Use time_sec_nsec to get nanoseconds. On 64 bit, put together the seed differently to generate a wider value. * tests/009/json.txr: Change from hash tables to lists, so the order of the output doesn't change between 64 and 32 bits, due to the different string hashing. * tests/009/json.expected: Updated. * txr.1: Documented that seeds are up to 64 bits, but with possibly only the lower 32 bits being used.
Diffstat (limited to 'hash.c')
-rw-r--r--hash.c115
1 files changed, 112 insertions, 3 deletions
diff --git a/hash.c b/hash.c
index 2a85d2f6..ccc10507 100644
--- a/hash.c
+++ b/hash.c
@@ -98,6 +98,107 @@ static struct hash_iter *reachable_iters;
static int hash_traversal_limit = 32;
+#if SIZEOF_PTR == 8
+
+static u64_t randbox[] = {
+ 0x41f00c9949848f1bU, 0x16f1d887e6255dbaU,
+ 0x6921f21236da5bdcU, 0x1878975147bf94e9U,
+ 0x97b6024b8cbcce22U, 0x9a803523559fc06aU,
+ 0x45d41914d268f536U, 0x40b0bc43e10af79aU,
+ 0x90e011a9c1af4d69U, 0x6ac090fe1d2917b5U,
+ 0x142a6ebeec4c304dU, 0x08fb26fc9ee5016cU,
+ 0x3f04e1d969232f74U, 0x6177e1befead7bb3U,
+ 0x778108b4e9089ab6U, 0x6a6e2ab4f012f6aeU
+};
+
+static u64_t hash_c_str(const wchar_t *str, u64_t seed, int *pcount)
+{
+ int count = *pcount << 2;
+ u64_t acc = seed;
+ u64_t ch;
+
+ while (count-- && (ch = *str++) != 0) {
+ acc ^= ch;
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (64 - 1);
+ }
+
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (64 - 1);
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (64 - 1);
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (64 - 1);
+ acc ^= randbox[acc & 0xf];
+
+ *pcount = count >> 2;
+
+ return acc;
+}
+
+static u64_t hash_buf(const mem_t *ptr, ucnum size, u64_t seed, int *pcount)
+{
+ const u64_t *buf = coerce(const u64_t *, ptr);
+ int count = *pcount << 2;
+ u64_t acc = seed;
+
+ for (; size >= sizeof *buf && count--; size -= sizeof *buf, buf++) {
+ u64_t in = *buf;
+ acc ^= in;
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (64 - 1);
+ }
+
+ if (size != 0) {
+ const mem_t *tail = coerce(const mem_t *, ptr);
+ u64_t in = 0;
+
+ switch (size) {
+ case 7:
+ in |= convert(u64_t, tail[6]) << 48;
+ break;
+ /* fallthrough */
+ case 6:
+ in |= convert(u64_t, tail[5]) << 40;
+ break;
+ /* fallthrough */
+ case 5:
+ in |= convert(u64_t, tail[4]) << 32;
+ break;
+ case 4:
+ in |= convert(u64_t, tail[3]) << 24;
+ break;
+ case 3:
+ in |= convert(u64_t, tail[2]) << 16;
+ /* fallthrough */
+ case 2:
+ in |= convert(u64_t, tail[1]) << 8;
+ /* fallthrough */
+ case 1:
+ in |= convert(u64_t, tail[0]);
+ break;
+ }
+
+ acc ^= in;
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (64 - 1);
+ }
+
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (64 - 1);
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (64 - 1);
+ acc ^= randbox[acc & 0xf];
+ acc = acc >> 1 | acc << (64 - 1);
+ acc ^= randbox[acc & 0xf];
+
+ *pcount = count >> 2;
+
+ return acc;
+}
+
+#elif SIZEOF_PTR == 4
+
static u32_t randbox[] = {
0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U,
0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU,
@@ -177,6 +278,10 @@ static u32_t hash_buf(const mem_t *ptr, ucnum size, u32_t seed, int *pcount)
return acc;
}
+#else
+#error portme
+#endif
+
static ucnum hash_double(double n)
{
union hack {
@@ -1968,15 +2073,19 @@ static val set_hash_traversal_limit(val lim)
static val gen_hash_seed(void)
{
val self = lit("gen-hash-seed");
- val time = time_sec_usec();
+ val time = time_sec_nsec();
ucnum sec = convert(ucnum, c_time(car(time), self));
- ucnum usec = c_unum(cdr(time), self);
+ ucnum nsec = c_unum(cdr(time), self);
#if HAVE_UNISTD_H
ucnum pid = convert(ucnum, getpid());
#else
ucnum pid = 0;
#endif
- return unum(sec ^ (usec << 12) ^ pid);
+#if SIZEOF_PTR == 8
+ return unum((sec << 32) ^ (pid << 16) ^ nsec);
+#else
+ return unum(sec ^ (nsec << 12) ^ pid);
+#endif
}
void hash_early_init(void)