diff options
-rw-r--r-- | rand.c | 65 | ||||
-rw-r--r-- | stdlib/doc-syms.tl | 5 | ||||
-rw-r--r-- | txr.1 | 31 |
3 files changed, 97 insertions, 4 deletions
@@ -31,6 +31,7 @@ #include <wchar.h> #include <limits.h> #include <signal.h> +#include <math.h> #include "config.h" #if HAVE_UNISTD_H #include <unistd.h> @@ -44,6 +45,7 @@ #include "buf.h" #include "txr.h" #include "itypes.h" +#include "gc.h" #include "rand.h" #define random_warmup (deref(lookup_var_l(nil, random_warmup_s))) @@ -284,7 +286,7 @@ val random_fixnum(val state) return num(rand32(r) & NUM_MAX); } -static val random_float(val state) +static double random_float_impl(val state) { val self = lit("random-float"); struct rand_state *r = coerce(struct rand_state *, @@ -309,7 +311,12 @@ static val random_float(val state) * this subtraction, reducing us to 51 bits of precision. * Still; an attractive approach. */ - return flo(h.d - 1.0); + return h.d - 1.0; +} + +static val random_float(val state) +{ + return flo(random_float_impl(state)); } static val random_float_incl(val state) @@ -493,6 +500,59 @@ void rand_compat_fixup(int compat_ver) } } +static double elrd(double denom, val state) +{ + return exp(log(random_float_impl(state))/denom); +} + +static double flrd(double weight, val state) +{ + return floor(log(random_float_impl(state))/log(1.0 - weight)); +} + +static val random_sample(val size, val seq, val state_in) +{ + val self = lit("random-sample"); + val state = default_arg(state_in, random_state); + cnum sz = c_fixnum(size, self), i; + seq_iter_t iter; + val samp, elem; + + seq_iter_init(self, &iter, seq); + + samp = vector(size, nil); + + for (i = 0; i < sz && seq_get(&iter, &elem); i++) + samp->v.vec[i] = elem; + + if (i < sz) { + vec_set_length(samp, unum(i)); + return samp; + } + + if (iter.inf.kind == SEQ_VECLIKE) { + cnum len = c_fixnum(length(seq), self); + double weight = elrd(sz, state); + + while (i < len) { + i += flrd(weight, state) + 1; + if (i < len) { + samp->v.vec[c_n(random(state, size))] = ref(seq, unum(i)); + mut(samp); + weight *= elrd(sz, state); + } + } + } else { + for (; seq_get(&iter, &elem) && i <= INT_PTR_MAX; i++) { + cnum r = c_n(random(state, unum(i))); + if (r < sz) + samp->v.vec[r] = elem; + } + } + + return samp; +} + void rand_init(void) { random_state_var_s = intern(lit("*random-state*"), user_package); @@ -515,4 +575,5 @@ void rand_init(void) reg_fun(intern(lit("random"), user_package), func_n2(random)); reg_fun(intern(lit("rand"), user_package), func_n2o(rnd, 1)); reg_fun(intern(lit("random-buf"), user_package), func_n2o(random_buf, 1)); + reg_fun(intern(lit("random-sample"), user_package), func_n3o(random_sample, 2)); } diff --git a/stdlib/doc-syms.tl b/stdlib/doc-syms.tl index 723ea9c2..71302389 100644 --- a/stdlib/doc-syms.tl +++ b/stdlib/doc-syms.tl @@ -175,7 +175,7 @@ ("blksize-t" "N-01153D9E") ("block" "D-006F") ("block*" "N-02F60DCE") - ("bool" "D-002D") + ("bool" "D-002C") ("boundp" "N-01FBF828") ("bracket" "N-02400F97") ("break-str" "N-00A9DB25") @@ -1085,7 +1085,7 @@ ("kill" "N-0386CCD5") ("krs" "N-02D33A4D") ("labels" "N-0209307D") - ("lambda" "D-002C") + ("lambda" "D-002D") ("lambda-match" "N-031E43FF") ("lambda-set" "N-02FEBA97") ("last" "D-0042") @@ -1547,6 +1547,7 @@ ("random-fixnum" "N-03A57C86") ("random-float" "N-0276C623") ("random-float-incl" "N-0276C623") + ("random-sample" "N-0107F246") ("random-state-get-vec" "N-005C0F98") ("random-state-p" "N-00C9A749") ("range" "N-033BE5A1") @@ -64759,6 +64759,37 @@ for a description of .code buf objects. +.coNP Function @ random-sample +.synb +.mets (random-sample < size < seq <> [ random-state ]) +.syne +.desc +The +.code random-sample +function returns a vector of +.meta size +randomly selected elements from the sequence +.metn seq , +using reservoir sampling. + +If the number of elements in +.meta seq +is equal to or smaller than +.metn size , +then the function returns a vector of all the elements of +.meta seq +in their original order. + +In other cases, the selected elements are not required to appear +in their original order. + +No element of sequence +.meta seq +is selected more than once; duplicate values can appear +in the output only if +.meta seq +itself contains duplicates. + .SS* Time .coNP Functions @, time @ time-usec and @ time-nsec .synb |