summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--rand.c65
-rw-r--r--stdlib/doc-syms.tl5
-rw-r--r--txr.131
3 files changed, 97 insertions, 4 deletions
diff --git a/rand.c b/rand.c
index 60e120d6..d9fe796d 100644
--- a/rand.c
+++ b/rand.c
@@ -31,6 +31,7 @@
#include <wchar.h>
#include <limits.h>
#include <signal.h>
+#include <math.h>
#include "config.h"
#if HAVE_UNISTD_H
#include <unistd.h>
@@ -44,6 +45,7 @@
#include "buf.h"
#include "txr.h"
#include "itypes.h"
+#include "gc.h"
#include "rand.h"
#define random_warmup (deref(lookup_var_l(nil, random_warmup_s)))
@@ -284,7 +286,7 @@ val random_fixnum(val state)
return num(rand32(r) & NUM_MAX);
}
-static val random_float(val state)
+static double random_float_impl(val state)
{
val self = lit("random-float");
struct rand_state *r = coerce(struct rand_state *,
@@ -309,7 +311,12 @@ static val random_float(val state)
* this subtraction, reducing us to 51 bits of precision.
* Still; an attractive approach.
*/
- return flo(h.d - 1.0);
+ return h.d - 1.0;
+}
+
+static val random_float(val state)
+{
+ return flo(random_float_impl(state));
}
static val random_float_incl(val state)
@@ -493,6 +500,59 @@ void rand_compat_fixup(int compat_ver)
}
}
+static double elrd(double denom, val state)
+{
+ return exp(log(random_float_impl(state))/denom);
+}
+
+static double flrd(double weight, val state)
+{
+ return floor(log(random_float_impl(state))/log(1.0 - weight));
+}
+
+static val random_sample(val size, val seq, val state_in)
+{
+ val self = lit("random-sample");
+ val state = default_arg(state_in, random_state);
+ cnum sz = c_fixnum(size, self), i;
+ seq_iter_t iter;
+ val samp, elem;
+
+ seq_iter_init(self, &iter, seq);
+
+ samp = vector(size, nil);
+
+ for (i = 0; i < sz && seq_get(&iter, &elem); i++)
+ samp->v.vec[i] = elem;
+
+ if (i < sz) {
+ vec_set_length(samp, unum(i));
+ return samp;
+ }
+
+ if (iter.inf.kind == SEQ_VECLIKE) {
+ cnum len = c_fixnum(length(seq), self);
+ double weight = elrd(sz, state);
+
+ while (i < len) {
+ i += flrd(weight, state) + 1;
+ if (i < len) {
+ samp->v.vec[c_n(random(state, size))] = ref(seq, unum(i));
+ mut(samp);
+ weight *= elrd(sz, state);
+ }
+ }
+ } else {
+ for (; seq_get(&iter, &elem) && i <= INT_PTR_MAX; i++) {
+ cnum r = c_n(random(state, unum(i)));
+ if (r < sz)
+ samp->v.vec[r] = elem;
+ }
+ }
+
+ return samp;
+}
+
void rand_init(void)
{
random_state_var_s = intern(lit("*random-state*"), user_package);
@@ -515,4 +575,5 @@ void rand_init(void)
reg_fun(intern(lit("random"), user_package), func_n2(random));
reg_fun(intern(lit("rand"), user_package), func_n2o(rnd, 1));
reg_fun(intern(lit("random-buf"), user_package), func_n2o(random_buf, 1));
+ reg_fun(intern(lit("random-sample"), user_package), func_n3o(random_sample, 2));
}
diff --git a/stdlib/doc-syms.tl b/stdlib/doc-syms.tl
index 723ea9c2..71302389 100644
--- a/stdlib/doc-syms.tl
+++ b/stdlib/doc-syms.tl
@@ -175,7 +175,7 @@
("blksize-t" "N-01153D9E")
("block" "D-006F")
("block*" "N-02F60DCE")
- ("bool" "D-002D")
+ ("bool" "D-002C")
("boundp" "N-01FBF828")
("bracket" "N-02400F97")
("break-str" "N-00A9DB25")
@@ -1085,7 +1085,7 @@
("kill" "N-0386CCD5")
("krs" "N-02D33A4D")
("labels" "N-0209307D")
- ("lambda" "D-002C")
+ ("lambda" "D-002D")
("lambda-match" "N-031E43FF")
("lambda-set" "N-02FEBA97")
("last" "D-0042")
@@ -1547,6 +1547,7 @@
("random-fixnum" "N-03A57C86")
("random-float" "N-0276C623")
("random-float-incl" "N-0276C623")
+ ("random-sample" "N-0107F246")
("random-state-get-vec" "N-005C0F98")
("random-state-p" "N-00C9A749")
("range" "N-033BE5A1")
diff --git a/txr.1 b/txr.1
index 81ee2846..47b89655 100644
--- a/txr.1
+++ b/txr.1
@@ -64759,6 +64759,37 @@ for a description of
.code buf
objects.
+.coNP Function @ random-sample
+.synb
+.mets (random-sample < size < seq <> [ random-state ])
+.syne
+.desc
+The
+.code random-sample
+function returns a vector of
+.meta size
+randomly selected elements from the sequence
+.metn seq ,
+using reservoir sampling.
+
+If the number of elements in
+.meta seq
+is equal to or smaller than
+.metn size ,
+then the function returns a vector of all the elements of
+.meta seq
+in their original order.
+
+In other cases, the selected elements are not required to appear
+in their original order.
+
+No element of sequence
+.meta seq
+is selected more than once; duplicate values can appear
+in the output only if
+.meta seq
+itself contains duplicates.
+
.SS* Time
.coNP Functions @, time @ time-usec and @ time-nsec
.synb