Reduce work done by hashing.

Curtail traversal of objects and strings. * hash.c (struct hash): hash_fun member takes int * parameter now. (HASH_STR_LIMIT, HASH_REC_LIMIT): New macros. (hash_c_str): Hash only HASH_STR_LIMIT characters. (equal_hash): Becomes extern function. Takes pointer-to-int count argument, which is decremented. Function stops recursing and returns zero when this hits zero. (eql_hash): Also takes int * param, for compatibility with function pointer in struct hash. This parameter is not used, though. (cobj_hash_op): Take pointer-to-count parameter, but ignore it. (hash_hash_op): Take pointer-to-count parameter, decrement and check that it has not hit zero, pass down to equal hash. (hash_grow, gethash_c, gethash, gethash_f, gethash_n, remhash): Initialize a counter to HASH_REC_LIMIT and pass down to hashing function. (hash_eql): Pass down a pointer to a dummy counter to eql_hash. (hash_equal): Initialize a counter to HASH_REC_LIMIT and pass down to hash_equal. * hash.h (equal_hash): Declared. * lib.h (cobj_ops): hash member takes int * parameter. (cobj_hash_op): Declaration updated with new param. * struct.c (struct_inst_hash): Takes new int * parameter for count. Calls equal_hash instead of hash_equal, eliminating c_num calls; pointer to count is passed to equal_hash.
author: Kaz Kylheku <kaz@kylheku.com> 2016-05-27 20:47:15 -0700
committer: Kaz Kylheku <kaz@kylheku.com> 2016-05-27 20:47:15 -0700
commit: 5a4d281d6701e9f27b67bf9def3842780410ab56 (patch)
tree: 153269999a6bb764e8596befd47b8e5c4b9bf1ac
parent: 3d9e60cb8ba2a9698336ae4697f59e14282a673d (diff)
download: txr-5a4d281d6701e9f27b67bf9def3842780410ab56.tar.gz
txr-5a4d281d6701e9f27b67bf9def3842780410ab56.tar.bz2
txr-5a4d281d6701e9f27b67bf9def3842780410ab56.zip
4 files changed, 59 insertions, 37 deletions
diff --git a/hash.c b/hash.c
index 01d0b90c..f40b3a50 100644
--- a/hash.c
+++ b/hash.c
@@ -56,7 +56,7 @@ struct hash {
   cnum count;
   val userdata;
   int usecount;
-  cnum (*hash_fun)(val);
+  cnum (*hash_fun)(val, int *);
   val (*equal_fun)(val, val);
   val (*assoc_fun)(val key, val list);
   val (*acons_new_c_fun)(val key, loc new_p, loc list);
@@ -82,6 +82,9 @@ static struct hash_iter *reachable_iters;
 loc gethash_l(val hash, val key, loc new_p);
 #endif
 
+#define HASH_STR_LIMIT 128
+#define HASH_REC_LIMIT 32
+
 /*
  * This is is an adaptation of hashpjw, from Compilers: Principles, Techniques
  * and Tools, Aho, Sethi, Ulman, 1988. P. 436.  The register is wider by
@@ -91,8 +94,9 @@ loc gethash_l(val hash, val key, loc new_p);
  */
 static unsigned long hash_c_str(const wchar_t *str)
 {
+  int count = HASH_STR_LIMIT;
   unsigned long h = 0;
-  while (*str) {
+  while (*str && count--) {
     unsigned long g;
     h = (h << 4) + *str++;
     g = h & 0x7C000000;
@@ -119,16 +123,19 @@ static cnum hash_double(double n)
   return h & NUM_MAX;
 }
 
-static cnum equal_hash(val obj)
+cnum equal_hash(val obj, int *count)
 {
+  if ((*count)-- <= 0)
+    return 0;
+
   switch (type(obj)) {
   case NIL:
     return NUM_MAX;
   case LIT:
     return hash_c_str(litptr(obj)) & NUM_MAX;
   case CONS:
-    return (equal_hash(obj->c.car)
-            + 32 * (equal_hash(obj->c.cdr) & (NUM_MAX / 16))) & NUM_MAX;
+    return (equal_hash(obj->c.car, count)
+            + 32 * (equal_hash(obj->c.cdr, count) & (NUM_MAX / 16))) & NUM_MAX;
   case STR:
     return hash_c_str(obj->st.str) & NUM_MAX;
   case CHR:
@@ -146,27 +153,28 @@ static cnum equal_hash(val obj)
     }
     break;
   case FUN:
-    return (coerce(cnum, obj->f.f.interp_fun) + equal_hash(obj->f.env)) & NUM_MAX;
+    return (coerce(cnum, obj->f.f.interp_fun)
+            + equal_hash(obj->f.env, count)) & NUM_MAX;
   case VEC:
     {
       val length = obj->v.vec[vec_length];
-      cnum i, h = equal_hash(obj->v.vec[vec_length]);
+      cnum i, h = equal_hash(obj->v.vec[vec_length], count);
       cnum len = c_num(length);
 
       for (i = 0; i < len; i++) {
         h = 32 * (h & (NUM_MAX / 16));
-        h += equal_hash(obj->v.vec[i]);
+        h += equal_hash(obj->v.vec[i], count);
         h &= NUM_MAX;
       }
 
       return h;
     }
   case LCONS:
-    return (equal_hash(car(obj))
-            + 32 * (equal_hash(cdr(obj)) & (NUM_MAX / 16))) & NUM_MAX;
+    return (equal_hash(car(obj), count)
+            + 32 * (equal_hash(cdr(obj), count) & (NUM_MAX / 16))) & NUM_MAX;
   case LSTR:
-    lazy_str_force(obj);
-    return equal_hash(obj->ls.prefix);
+    lazy_str_force_upto(obj, num(HASH_STR_LIMIT - 1));
+    return equal_hash(obj->ls.prefix, count);
   case BGNUM:
     return mp_hash(mp(obj)) & NUM_MAX;
   case FLNUM:
@@ -175,18 +183,18 @@ static cnum equal_hash(val obj)
     if (obj->co.ops->equalsub) {
       val sub = obj->co.ops->equalsub(obj);
       if (sub)
-        return equal_hash(sub);
+        return equal_hash(sub, count);
     }
-    return obj->co.ops->hash(obj) & NUM_MAX;
+    return obj->co.ops->hash(obj, count) & NUM_MAX;
   case RNG:
-    return (equal_hash(obj->rn.from)
-            + 32 * (equal_hash(obj->rn.to) & (NUM_MAX / 16))) & NUM_MAX;
+    return (equal_hash(obj->rn.from, count)
+            + 32 * (equal_hash(obj->rn.to, count) & (NUM_MAX / 16))) & NUM_MAX;
   }
 
   internal_error("unhandled case in equal function");
 }
 
-static cnum eql_hash(val obj)
+static cnum eql_hash(val obj, int *count)
 {
   switch (tag(obj)) {
   case TAG_PTR:
@@ -198,8 +206,8 @@ static cnum eql_hash(val obj)
     case FLNUM:
       return hash_double(obj->fl.n);
     case RNG:
-      return (eql_hash(obj->rn.from)
-              + 32 * (eql_hash(obj->rn.to) & (NUM_MAX / 16))) & NUM_MAX;
+      return (eql_hash(obj->rn.from, count)
+              + 32 * (eql_hash(obj->rn.to, count) & (NUM_MAX / 16))) & NUM_MAX;
     default:
       switch (sizeof (mem_t *)) {
       case 4:
@@ -224,8 +232,10 @@ static cnum eql_hash(val obj)
   abort();
 }
 
-cnum cobj_hash_op(val obj)
+cnum cobj_hash_op(val obj, int *count)
 {
+  (void) count;
+
   switch (sizeof (mem_t *)) {
   case 4:
     return (coerce(cnum, obj) >> 4) & NUM_MAX;
@@ -333,12 +343,15 @@ static val hash_equal_op(val left, val right)
   return eq(pending, nil);
 }
 
-static cnum hash_hash_op(val obj)
+static cnum hash_hash_op(val obj, int *count)
 {
   cnum out = 0;
   struct hash *h = coerce(struct hash *, obj->co.handle);
   val iter, cell;
 
+  if ((*count)-- <= 0)
+    return 0;
+
   switch (sizeof (mem_t *)) {
   case 4:
     out += coerce(cnum, h->hash_fun) >> 4;
@@ -346,13 +359,13 @@ static cnum hash_hash_op(val obj)
     out += coerce(cnum, h->hash_fun) >> 5;
   }
 
-  out += equal_hash(h->userdata);
+  out += equal_hash(h->userdata, count);
   out &= NUM_MAX;
 
   iter = hash_begin(obj);
 
   while ((cell = hash_next(iter)) != nil) {
-    out += equal_hash(cell);
+    out += equal_hash(cell, count);
     out &= NUM_MAX;
   }
 
@@ -479,8 +492,9 @@ static void hash_grow(struct hash *h, val hash)
       val entry = car(conses);
       val next = cdr(conses);
       val key = car(entry);
+      int lim = HASH_REC_LIMIT;
       loc pchain = vecref_l(new_table,
-                            num_fast(h->hash_fun(key) % new_modulus));
+                            num_fast(h->hash_fun(key, &lim) % new_modulus));
       set(cdr_l(conses), deref(pchain));
       set(pchain, conses);
       conses = next;
@@ -573,7 +587,8 @@ val copy_hash(val existing)
 val gethash_c(val hash, val key, loc new_p)
 {
   struct hash *h = coerce(struct hash *, cobj_handle(hash, hash_s));
-  loc pchain = vecref_l(h->table, num_fast(h->hash_fun(key) % h->modulus));
+  int lim = HASH_REC_LIMIT;
+  loc pchain = vecref_l(h->table, num_fast(h->hash_fun(key, &lim) % h->modulus));
   val old = deref(pchain);
   val cell = h->acons_new_c_fun(key, new_p, pchain);
   if (old != deref(pchain) && ++h->count > 2 * h->modulus && h->usecount == 0)
@@ -584,7 +599,8 @@ val gethash_c(val hash, val key, loc new_p)
 val gethash(val hash, val key)
 {
   struct hash *h = coerce(struct hash *, cobj_handle(hash, hash_s));
-  val chain = vecref(h->table, num_fast(h->hash_fun(key) % h->modulus));
+  int lim = HASH_REC_LIMIT;
+  val chain = vecref(h->table, num_fast(h->hash_fun(key, &lim) % h->modulus));
   val found = h->assoc_fun(key, chain);
   return cdr(found);
 }
@@ -608,7 +624,8 @@ val inhash(val hash, val key, val init)
 val gethash_f(val hash, val key, loc found)
 {
   struct hash *h = coerce(struct hash *, cobj_handle(hash, hash_s));
-  val chain = vecref(h->table, num_fast(h->hash_fun(key) % h->modulus));
+  int lim = HASH_REC_LIMIT;
+  val chain = vecref(h->table, num_fast(h->hash_fun(key, &lim) % h->modulus));
   set(found, h->assoc_fun(key, chain));
   return cdr(deref(found));
 }
@@ -616,7 +633,8 @@ val gethash_f(val hash, val key, loc found)
 val gethash_n(val hash, val key, val notfound_val)
 {
   struct hash *h = coerce(struct hash *, cobj_handle(hash, hash_s));
-  val chain = vecref(h->table, num_fast(h->hash_fun(key) % h->modulus));
+  int lim = HASH_REC_LIMIT;
+  val chain = vecref(h->table, num_fast(h->hash_fun(key, &lim) % h->modulus));
   val existing = h->assoc_fun(key, chain);
   return if3(existing, cdr(existing), default_bool_arg(notfound_val));
 }
@@ -638,7 +656,8 @@ val pushhash(val hash, val key, val value)
 val remhash(val hash, val key)
 {
   struct hash *h = coerce(struct hash *, cobj_handle(hash, hash_s));
-  loc pchain = vecref_l(h->table, num_fast(h->hash_fun(key) % h->modulus));
+  int lim = HASH_REC_LIMIT;
+  loc pchain = vecref_l(h->table, num_fast(h->hash_fun(key, &lim) % h->modulus));
   val existing = h->assoc_fun(key, deref(pchain));
 
   if (existing) {
@@ -741,12 +760,14 @@ val maphash(val fun, val hash)
 
 val hash_eql(val obj)
 {
-  return num_fast(eql_hash(obj));
+  int lim = 0;
+  return num_fast(eql_hash(obj, &lim));
 }
 
 val hash_equal(val obj)
 {
-  return num_fast(equal_hash(obj));
+  int lim = HASH_REC_LIMIT;
+  return num_fast(equal_hash(obj, &lim));
 }
 
 /*
diff --git a/hash.h b/hash.h
index a1ec19fc..29ff19d7 100644
--- a/hash.h
+++ b/hash.h
@@ -26,6 +26,7 @@
 
 extern val weak_keys_k, weak_vals_k, equal_based_k;
 
+cnum equal_hash(val obj, int *count);
 val make_hash(val weak_keys, val weak_vals, val equal_based);
 val make_similar_hash(val existing);
 val copy_hash(val existing);
diff --git a/lib.h b/lib.h
index 18f7f6d9..a004926b 100644
--- a/lib.h
+++ b/lib.h
@@ -226,7 +226,7 @@ struct cobj_ops {
   void (*print)(val self, val stream, val pretty);
   void (*destroy)(val self);
   void (*mark)(val self);
-  cnum (*hash)(val self);
+  cnum (*hash)(val self, int *count);
   val (*equalsub)(val self);
 };
 
@@ -245,7 +245,7 @@ void cobj_print_op(val, val, val);
 void cobj_destroy_stub_op(val);
 void cobj_destroy_free_op(val);
 void cobj_mark_op(val);
-cnum cobj_hash_op(val);
+cnum cobj_hash_op(val, int *);
 
 struct env {
   obj_common;
diff --git a/struct.c b/struct.c
index 7cfdee24..2318ee8b 100644
--- a/struct.c
+++ b/struct.c
@@ -1044,17 +1044,17 @@ static val struct_inst_equal(val left, val right)
   return t;
 }
 
-static cnum struct_inst_hash(val obj)
+static cnum struct_inst_hash(val obj, int *count)
 {
   struct struct_inst *si = coerce(struct struct_inst *, obj->co.handle);
   struct struct_type *st = si->type;
-  cnum nslots = st->nslots, sl, out = c_num(hash_equal(st->self));
+  cnum nslots = st->nslots, sl, out = equal_hash(st->self, count);
 
   check_init_lazy_struct(obj, si);
 
   for (sl = 0; sl < nslots; sl++) {
-    val hash = hash_equal(si->slot[sl]);
-    out += c_num(hash);
+    cnum hash = equal_hash(si->slot[sl], count);
+    out += hash;
     out &= NUM_MAX;
   }
author	Kaz Kylheku <kaz@kylheku.com>	2016-05-27 20:47:15 -0700
committer	Kaz Kylheku <kaz@kylheku.com>	2016-05-27 20:47:15 -0700
commit	5a4d281d6701e9f27b67bf9def3842780410ab56 (patch)
tree	153269999a6bb764e8596befd47b8e5c4b9bf1ac
parent	3d9e60cb8ba2a9698336ae4697f59e14282a673d (diff)
download	txr-5a4d281d6701e9f27b67bf9def3842780410ab56.tar.gz txr-5a4d281d6701e9f27b67bf9def3842780410ab56.tar.bz2 txr-5a4d281d6701e9f27b67bf9def3842780410ab56.zip