summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2018-07-05 19:58:18 -0700
committerKaz Kylheku <kaz@kylheku.com>2018-07-05 19:58:18 -0700
commit6e7c0080cdb160d18c0a83f5f85e92a9b988bb5f (patch)
tree10c2a569393b82bb403da79e9ad16c8ce7f7feb7
parent44b66777caafeebf543c32c9373632c04f748353 (diff)
downloadtxr-6e7c0080cdb160d18c0a83f5f85e92a9b988bb5f.tar.gz
txr-6e7c0080cdb160d18c0a83f5f85e92a9b988bb5f.tar.bz2
txr-6e7c0080cdb160d18c0a83f5f85e92a9b988bb5f.zip
hashing: overhaul part 2.
In this commit we feature-complete the seeded hashing implementation. The *hash-seed* special variable is provided from which newly created hashes take their seed. The make-hash and hash-equal functions get an optional seed argument. A function called gen-hash-seed is provided for obtaining a randomized seed. * hash.c (hash_seed): New macro. (hash_seed_s): New symbol variable. (make_seeded_hash): New function, made from make_hash. (make_hash): Reduced to wrapper around make_seeded_hash. (hash-equal): Take seed argument. (gen_hash_seed): New function. (hash_init): Initialize hash_seed_s and register *hash-seed* special variable. Re-register make-hash with new optional parameter, with regard to make_seeded_hash. Re-register hash-equal with new optional parameter. Register gen-hash-seed. * hash.h (make_seeded_hash): Declared. (hash_equal): Declaration updated. * txr.1: Documented optional seed arguments of make-hash and hash-equal. Documented new *hash-seed* variable and gen-hash-seed function.
-rw-r--r--hash.c43
-rw-r--r--hash.h3
-rw-r--r--txr.188
3 files changed, 125 insertions, 9 deletions
diff --git a/hash.c b/hash.c
index 68a7e75c..e8b9a593 100644
--- a/hash.c
+++ b/hash.c
@@ -34,6 +34,9 @@
#include <signal.h>
#include "config.h"
#include ALLOCA_H
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
#include "lib.h"
#include "gc.h"
#include "args.h"
@@ -44,6 +47,7 @@
#include "eval.h"
#include "cadr.h"
#include "itypes.h"
+#include "arith.h"
#include "hash.h"
typedef enum hash_flags {
@@ -82,10 +86,13 @@ struct hash_iter {
val cons;
};
+#define hash_seed (deref(lookup_var_l(nil, hash_seed_s)))
+
static_forward(struct hash_ops hash_eql_ops);
static_forward(struct hash_ops hash_equal_ops);
val weak_keys_k, weak_vals_k, equal_based_k, eql_based_k, userdata_k;
+val hash_seed_s;
/*
* Dynamic lists built up during gc.
@@ -681,7 +688,7 @@ static_def(struct hash_ops hash_equal_ops = hash_ops_init(equal_hash, equal,
hash_assoc,
hash_acons_new_c));
-val make_hash(val weak_keys, val weak_vals, val equal_based)
+val make_seeded_hash(val weak_keys, val weak_vals, val equal_based, val seed)
{
if (weak_keys && equal_based) {
uw_throwf(error_s,
@@ -694,7 +701,9 @@ val make_hash(val weak_keys, val weak_vals, val equal_based)
val table = vector(mod, nil);
val hash = cobj(coerce(mem_t *, h), hash_s, &hash_ops);
- h->seed = 0;
+ h->seed = convert(u32_t, c_unum(default_arg(seed,
+ if3(hash_seed_s,
+ hash_seed, zero))));
h->flags = convert(hash_flags_t, flags);
h->modulus = c_num(mod);
h->count = 0;
@@ -708,6 +717,11 @@ val make_hash(val weak_keys, val weak_vals, val equal_based)
}
}
+val make_hash(val weak_keys, val weak_vals, val equal_based)
+{
+ return make_seeded_hash(weak_keys, weak_vals, equal_based, nil);
+}
+
val make_similar_hash(val existing)
{
struct hash *ex = coerce(struct hash *, cobj_handle(existing, hash_s));
@@ -965,10 +979,10 @@ val hash_eql(val obj)
return num_fast(eql_hash(obj, &lim));
}
-val hash_equal(val obj)
+val hash_equal(val obj, val seed)
{
int lim = hash_rec_limit;
- return num_fast(equal_hash(obj, &lim, 0));
+ return num_fast(equal_hash(obj, &lim, if3(missingp(seed), 0, c_unum(seed))));
}
/*
@@ -1499,6 +1513,19 @@ static val set_hash_rec_limit(val lim)
return old;
}
+static val gen_hash_seed(void)
+{
+ val time = time_sec_usec();
+ ucnum sec = convert(ucnum, c_num(car(time)));
+ ucnum usec = convert(ucnum, c_num(cdr(time)));
+#if HAVE_UNISTD_H
+ ucnum pid = convert(ucnum, getpid());
+#else
+ ucnum pid = 0;
+#endif
+ return unum(sec ^ (usec << 12) ^ pid);
+}
+
void hash_init(void)
{
weak_keys_k = intern(lit("weak-keys"), keyword_package);
@@ -1506,9 +1533,12 @@ void hash_init(void)
equal_based_k = intern(lit("equal-based"), keyword_package);
eql_based_k = intern(lit("eql-based"), keyword_package);
userdata_k = intern(lit("userdata"), keyword_package);
+ hash_seed_s = intern(lit("*hash-seed*"), user_package);
val ghu = func_n1(get_hash_userdata);
- reg_fun(intern(lit("make-hash"), user_package), func_n3(make_hash));
+ reg_var(hash_seed_s, zero);
+
+ reg_fun(intern(lit("make-hash"), user_package), func_n4o(make_seeded_hash, 3));
reg_fun(intern(lit("make-similar-hash"), user_package), func_n1(make_similar_hash));
reg_fun(intern(lit("copy-hash"), user_package), func_n1(copy_hash));
reg_fun(intern(lit("hash"), user_package), func_n0v(hashv));
@@ -1529,7 +1559,7 @@ void hash_init(void)
reg_fun(intern(lit("hashp"), user_package), func_n1(hashp));
reg_fun(intern(lit("maphash"), user_package), func_n2(maphash));
reg_fun(intern(lit("hash-eql"), user_package), func_n1(hash_eql));
- reg_fun(intern(lit("hash-equal"), user_package), func_n1(hash_equal));
+ reg_fun(intern(lit("hash-equal"), user_package), func_n2o(hash_equal, 1));
reg_fun(intern(lit("hash-keys"), user_package), func_n1(hash_keys));
reg_fun(intern(lit("hash-values"), user_package), func_n1(hash_values));
reg_fun(intern(lit("hash-pairs"), user_package), func_n1(hash_pairs));
@@ -1552,4 +1582,5 @@ void hash_init(void)
func_n1(set_hash_str_limit));
reg_fun(intern(lit("set-hash-rec-limit"), system_package),
func_n1(set_hash_rec_limit));
+ reg_fun(intern(lit("gen-hash-seed"), user_package), func_n0(gen_hash_seed));
}
diff --git a/hash.h b/hash.h
index 61d58424..2b8f70b2 100644
--- a/hash.h
+++ b/hash.h
@@ -28,6 +28,7 @@
extern val weak_keys_k, weak_vals_k, equal_based_k, eql_based_k, userdata_k;
cnum equal_hash(val obj, int *count, ucnum);
+val make_seeded_hash(val weak_keys, val weak_vals, val equal_based, val seed);
val make_hash(val weak_keys, val weak_vals, val equal_based);
val make_similar_hash(val existing);
val copy_hash(val existing);
@@ -49,7 +50,7 @@ val maphash(val func, val hash);
val hash_begin(val hash);
val hash_next(val iter);
val hash_eql(val obj);
-val hash_equal(val obj);
+val hash_equal(val obj, val seed);
val hashv(struct args *args);
val hashl(val args);
val hash_construct(val hashl_args, val pairs);
diff --git a/txr.1 b/txr.1
index 730c4f16..038c171e 100644
--- a/txr.1
+++ b/txr.1
@@ -39866,7 +39866,7 @@ and
.SS* Hashing Library
.coNP Functions @ make-hash and @ hash
.synb
-.mets (make-hash < weak-keys < weak-vals << equal-based )
+.mets (make-hash < weak-keys < weak-vals < equal-based <> [ hash-seed ])
.mets (hash {:weak-keys | :weak-vals |
.mets \ \ \ \ \ \ :eql-based | :equal-based |
.mets \ \ \ \ \ \ :userdata << obj }*)
@@ -39895,6 +39895,18 @@ The hash function defaults
all three of these properties to false, and allows them to be overridden to
true by the presence of keyword arguments.
+The optional
+.meta hash-seed
+parameter must be an integer, if specified. Its value perturbs the hashing
+function of the hash table, which affects
+.code :equal-based
+hash tables, when character strings and buffers are used as keys.
+If
+.meta hash-seed
+is omitted, then the value of the
+.code *hash-seed*
+variable is used as the seed.
+
It is an error to attempt to construct an
.codn equal -based
hash table which has weak keys.
@@ -40641,7 +40653,7 @@ function.
.coNP Functions @ hash-eql and @ hash-equal
.synb
.mets (hash-eql << object )
-.mets (hash-equal << object )
+.mets (hash-equal < object <> [ hash-seed ])
.syne
.desc
These functions each compute an integer hash value from the internal
@@ -40678,6 +40690,27 @@ an equality substitution via an
.code equal
method. See the Equality Substitution section under Structures.
+The optional
+.meta hash-seed
+value perturbs the hashing function used by
+.code hash-equal
+for strings and buffer objects. This seed value must be a non-negative integer
+no wider than 32 bits: that is, in the range 0 to 4294967295.
+If the value isn't specified, it defaults to zero.
+Effectively, each possible value of the seed specifies a different hashing
+function. If two objects
+.code A
+and
+.code B
+are the same under the
+.code equal
+function, then
+.code "(hash-equal A S)"
+and
+.code "(hash-equal B S)"
+each produce the same integer hash value for any valid seed value
+.codn S .
+
.coNP Functions @, hash_keys @, hash_values @ hash_pairs and @ hash_alist
.synb
.mets (hash-keys << hash )
@@ -40998,6 +41031,57 @@ to
.code nil
if there is no next value.
+.coNP Special variable @ *hash-seed*
+.desc
+The
+.code *hash-seed*
+special variable is initialized with a value of zero. Whenever a new
+hash table is explicitly or implicitly created, it takes its seed from
+the value of the
+.code *hash-seed*
+variable in the current dynamic environment.
+
+The only situation in which
+.code *hash-seed*
+is not used when creating a new hash table is when
+.code make-hash
+is called with an argument given for the optional
+.meta hash-seed
+argument.
+
+Only
+.codn equal -based
+hash tables make use of their seed, and only for keys which are strings and
+buffers. The purpose of the seed is to scramble the hashing function, to make
+a hash table resistant to a type of denial-of-service attack, whereby a
+malicious input causes a hash table to be populated with a large number of keys
+which all map to the same hash table chain, causing the performance to severely
+degrade.
+
+The value of
+.code *hash-seed*
+must be a non-negative integer, no wider than 32 bits.
+
+.coNP Function @ gen-hash-seed
+.synb
+.mets (gen-hash-seed)
+.syne
+.desc
+The
+.code gen-hash-seed
+function returns an integer value suitable for the
+.code *hash-seed*
+variable, or as the
+.code hash-seed
+argument of the
+.code make-hash
+and
+.code hash-equal
+functions.
+
+The value is derived from the host environment, from information such
+as the process ID and time of day.
+
.SS* Partial Evaluation and Combinators
.coNP Macros @ op and @ do
.synb