diff options
-rw-r--r-- | hash.c | 43 | ||||
-rw-r--r-- | hash.h | 3 | ||||
-rw-r--r-- | txr.1 | 88 |
3 files changed, 125 insertions, 9 deletions
@@ -34,6 +34,9 @@ #include <signal.h> #include "config.h" #include ALLOCA_H +#if HAVE_UNISTD_H +#include <unistd.h> +#endif #include "lib.h" #include "gc.h" #include "args.h" @@ -44,6 +47,7 @@ #include "eval.h" #include "cadr.h" #include "itypes.h" +#include "arith.h" #include "hash.h" typedef enum hash_flags { @@ -82,10 +86,13 @@ struct hash_iter { val cons; }; +#define hash_seed (deref(lookup_var_l(nil, hash_seed_s))) + static_forward(struct hash_ops hash_eql_ops); static_forward(struct hash_ops hash_equal_ops); val weak_keys_k, weak_vals_k, equal_based_k, eql_based_k, userdata_k; +val hash_seed_s; /* * Dynamic lists built up during gc. @@ -681,7 +688,7 @@ static_def(struct hash_ops hash_equal_ops = hash_ops_init(equal_hash, equal, hash_assoc, hash_acons_new_c)); -val make_hash(val weak_keys, val weak_vals, val equal_based) +val make_seeded_hash(val weak_keys, val weak_vals, val equal_based, val seed) { if (weak_keys && equal_based) { uw_throwf(error_s, @@ -694,7 +701,9 @@ val make_hash(val weak_keys, val weak_vals, val equal_based) val table = vector(mod, nil); val hash = cobj(coerce(mem_t *, h), hash_s, &hash_ops); - h->seed = 0; + h->seed = convert(u32_t, c_unum(default_arg(seed, + if3(hash_seed_s, + hash_seed, zero)))); h->flags = convert(hash_flags_t, flags); h->modulus = c_num(mod); h->count = 0; @@ -708,6 +717,11 @@ val make_hash(val weak_keys, val weak_vals, val equal_based) } } +val make_hash(val weak_keys, val weak_vals, val equal_based) +{ + return make_seeded_hash(weak_keys, weak_vals, equal_based, nil); +} + val make_similar_hash(val existing) { struct hash *ex = coerce(struct hash *, cobj_handle(existing, hash_s)); @@ -965,10 +979,10 @@ val hash_eql(val obj) return num_fast(eql_hash(obj, &lim)); } -val hash_equal(val obj) +val hash_equal(val obj, val seed) { int lim = hash_rec_limit; - return num_fast(equal_hash(obj, &lim, 0)); + return num_fast(equal_hash(obj, &lim, if3(missingp(seed), 0, c_unum(seed)))); } /* @@ -1499,6 +1513,19 @@ static val set_hash_rec_limit(val lim) return old; } +static val gen_hash_seed(void) +{ + val time = time_sec_usec(); + ucnum sec = convert(ucnum, c_num(car(time))); + ucnum usec = convert(ucnum, c_num(cdr(time))); +#if HAVE_UNISTD_H + ucnum pid = convert(ucnum, getpid()); +#else + ucnum pid = 0; +#endif + return unum(sec ^ (usec << 12) ^ pid); +} + void hash_init(void) { weak_keys_k = intern(lit("weak-keys"), keyword_package); @@ -1506,9 +1533,12 @@ void hash_init(void) equal_based_k = intern(lit("equal-based"), keyword_package); eql_based_k = intern(lit("eql-based"), keyword_package); userdata_k = intern(lit("userdata"), keyword_package); + hash_seed_s = intern(lit("*hash-seed*"), user_package); val ghu = func_n1(get_hash_userdata); - reg_fun(intern(lit("make-hash"), user_package), func_n3(make_hash)); + reg_var(hash_seed_s, zero); + + reg_fun(intern(lit("make-hash"), user_package), func_n4o(make_seeded_hash, 3)); reg_fun(intern(lit("make-similar-hash"), user_package), func_n1(make_similar_hash)); reg_fun(intern(lit("copy-hash"), user_package), func_n1(copy_hash)); reg_fun(intern(lit("hash"), user_package), func_n0v(hashv)); @@ -1529,7 +1559,7 @@ void hash_init(void) reg_fun(intern(lit("hashp"), user_package), func_n1(hashp)); reg_fun(intern(lit("maphash"), user_package), func_n2(maphash)); reg_fun(intern(lit("hash-eql"), user_package), func_n1(hash_eql)); - reg_fun(intern(lit("hash-equal"), user_package), func_n1(hash_equal)); + reg_fun(intern(lit("hash-equal"), user_package), func_n2o(hash_equal, 1)); reg_fun(intern(lit("hash-keys"), user_package), func_n1(hash_keys)); reg_fun(intern(lit("hash-values"), user_package), func_n1(hash_values)); reg_fun(intern(lit("hash-pairs"), user_package), func_n1(hash_pairs)); @@ -1552,4 +1582,5 @@ void hash_init(void) func_n1(set_hash_str_limit)); reg_fun(intern(lit("set-hash-rec-limit"), system_package), func_n1(set_hash_rec_limit)); + reg_fun(intern(lit("gen-hash-seed"), user_package), func_n0(gen_hash_seed)); } @@ -28,6 +28,7 @@ extern val weak_keys_k, weak_vals_k, equal_based_k, eql_based_k, userdata_k; cnum equal_hash(val obj, int *count, ucnum); +val make_seeded_hash(val weak_keys, val weak_vals, val equal_based, val seed); val make_hash(val weak_keys, val weak_vals, val equal_based); val make_similar_hash(val existing); val copy_hash(val existing); @@ -49,7 +50,7 @@ val maphash(val func, val hash); val hash_begin(val hash); val hash_next(val iter); val hash_eql(val obj); -val hash_equal(val obj); +val hash_equal(val obj, val seed); val hashv(struct args *args); val hashl(val args); val hash_construct(val hashl_args, val pairs); @@ -39866,7 +39866,7 @@ and .SS* Hashing Library .coNP Functions @ make-hash and @ hash .synb -.mets (make-hash < weak-keys < weak-vals << equal-based ) +.mets (make-hash < weak-keys < weak-vals < equal-based <> [ hash-seed ]) .mets (hash {:weak-keys | :weak-vals | .mets \ \ \ \ \ \ :eql-based | :equal-based | .mets \ \ \ \ \ \ :userdata << obj }*) @@ -39895,6 +39895,18 @@ The hash function defaults all three of these properties to false, and allows them to be overridden to true by the presence of keyword arguments. +The optional +.meta hash-seed +parameter must be an integer, if specified. Its value perturbs the hashing +function of the hash table, which affects +.code :equal-based +hash tables, when character strings and buffers are used as keys. +If +.meta hash-seed +is omitted, then the value of the +.code *hash-seed* +variable is used as the seed. + It is an error to attempt to construct an .codn equal -based hash table which has weak keys. @@ -40641,7 +40653,7 @@ function. .coNP Functions @ hash-eql and @ hash-equal .synb .mets (hash-eql << object ) -.mets (hash-equal << object ) +.mets (hash-equal < object <> [ hash-seed ]) .syne .desc These functions each compute an integer hash value from the internal @@ -40678,6 +40690,27 @@ an equality substitution via an .code equal method. See the Equality Substitution section under Structures. +The optional +.meta hash-seed +value perturbs the hashing function used by +.code hash-equal +for strings and buffer objects. This seed value must be a non-negative integer +no wider than 32 bits: that is, in the range 0 to 4294967295. +If the value isn't specified, it defaults to zero. +Effectively, each possible value of the seed specifies a different hashing +function. If two objects +.code A +and +.code B +are the same under the +.code equal +function, then +.code "(hash-equal A S)" +and +.code "(hash-equal B S)" +each produce the same integer hash value for any valid seed value +.codn S . + .coNP Functions @, hash_keys @, hash_values @ hash_pairs and @ hash_alist .synb .mets (hash-keys << hash ) @@ -40998,6 +41031,57 @@ to .code nil if there is no next value. +.coNP Special variable @ *hash-seed* +.desc +The +.code *hash-seed* +special variable is initialized with a value of zero. Whenever a new +hash table is explicitly or implicitly created, it takes its seed from +the value of the +.code *hash-seed* +variable in the current dynamic environment. + +The only situation in which +.code *hash-seed* +is not used when creating a new hash table is when +.code make-hash +is called with an argument given for the optional +.meta hash-seed +argument. + +Only +.codn equal -based +hash tables make use of their seed, and only for keys which are strings and +buffers. The purpose of the seed is to scramble the hashing function, to make +a hash table resistant to a type of denial-of-service attack, whereby a +malicious input causes a hash table to be populated with a large number of keys +which all map to the same hash table chain, causing the performance to severely +degrade. + +The value of +.code *hash-seed* +must be a non-negative integer, no wider than 32 bits. + +.coNP Function @ gen-hash-seed +.synb +.mets (gen-hash-seed) +.syne +.desc +The +.code gen-hash-seed +function returns an integer value suitable for the +.code *hash-seed* +variable, or as the +.code hash-seed +argument of the +.code make-hash +and +.code hash-equal +functions. + +The value is derived from the host environment, from information such +as the process ID and time of day. + .SS* Partial Evaluation and Combinators .coNP Macros @ op and @ do .synb |