summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2015-11-10 06:46:53 -0800
committerKaz Kylheku <kaz@kylheku.com>2015-11-10 06:46:53 -0800
commite52438a4ff3e470863b3122cfd46a95c3a417516 (patch)
treee75088d5ac403a61c539a6916823cd8767b971db
parent1b033ba4d434efc0c1d55c33305b686338eb5f50 (diff)
downloadtxr-e52438a4ff3e470863b3122cfd46a95c3a417516.tar.gz
txr-e52438a4ff3e470863b3122cfd46a95c3a417516.tar.bz2
txr-e52438a4ff3e470863b3122cfd46a95c3a417516.zip
New function: group-reduce.
* eval.c (eval_init): Register group-reduce intrinsic. * hash.c (group_reduce): New function. * hash.h (group_reduce): Declared. * txr.1: Documented group-reduce.
-rw-r--r--eval.c2
-rw-r--r--hash.c45
-rw-r--r--hash.h2
-rw-r--r--txr.171
4 files changed, 120 insertions, 0 deletions
diff --git a/eval.c b/eval.c
index 215d562f..099c1327 100644
--- a/eval.c
+++ b/eval.c
@@ -4636,6 +4636,8 @@ void eval_init(void)
reg_fun(intern(lit("hash-subset"), user_package), func_n2(hash_subset));
reg_fun(intern(lit("hash-proper-subset"), user_package), func_n2(hash_proper_subset));
reg_fun(intern(lit("group-by"), user_package), func_n2v(group_by));
+ reg_fun(intern(lit("group-reduce"), user_package),
+ func_n6o(group_reduce, 4));
reg_fun(intern(lit("sort-group"), user_package), func_n3o(sort_group, 1));
reg_fun(intern(lit("unique"), user_package), func_n2ov(unique, 1));
reg_fun(intern(lit("uniq"), user_package), func_n1(uniq));
diff --git a/hash.c b/hash.c
index 265da1af..77de4dea 100644
--- a/hash.c
+++ b/hash.c
@@ -960,6 +960,51 @@ val group_by(val func, val seq, struct args *hashv_args)
}
}
+val group_reduce(val hash, val by_fun, val reduce_fun, val seq,
+ val initval, val filter_fun)
+{
+ initval = default_bool_arg(initval);
+ filter_fun = default_arg(filter_fun, identity_f);
+
+ if (vectorp(seq)) {
+ cnum i, len;
+
+ for (i = 0, len = c_num(length(seq)); i < len; i++) {
+ val v = vecref(seq, num_fast(i));
+ val key = funcall1(by_fun, v);
+ val new_p;
+ val cell = gethash_c(hash, key, mkcloc(new_p));
+
+ if (new_p)
+ rplacd(cell, funcall2(reduce_fun, initval, v));
+ else
+ rplacd(cell, funcall2(reduce_fun, cdr(cell), v));
+ }
+ } else {
+ for (; seq; seq = cdr(seq)) {
+ val v = car(seq);
+ val key = funcall1(by_fun, v);
+ val new_p;
+ val cell = gethash_c(hash, key, mkcloc(new_p));
+
+ if (new_p)
+ rplacd(cell, funcall2(reduce_fun, initval, v));
+ else
+ rplacd(cell, funcall2(reduce_fun, cdr(cell), v));
+ }
+ }
+
+ if (filter_fun != identity_f) {
+ val iter = hash_begin(hash);
+ val cell;
+
+ while ((cell = hash_next(iter)) != nil)
+ rplacd(cell, funcall1(filter_fun, cdr(cell)));
+ }
+
+ return hash;
+}
+
static val hash_keys_lazy(val iter, val lcons)
{
val cell = hash_next(iter);
diff --git a/hash.h b/hash.h
index a8d9de4d..0d5d2ffe 100644
--- a/hash.h
+++ b/hash.h
@@ -52,6 +52,8 @@ val hash_construct(val hashl_args, val pairs);
val hash_from_pairs_v(val pairs, struct args *hashv_args);
val hash_list(val keys, struct args *hashv_args);
val group_by(val func, val seq, struct args *hashv_args);
+val group_reduce(val hash, val by_fun, val reduce_fun, val seq,
+ val initval, val filter_fun);
val hash_keys(val hash);
val hash_values(val hash);
val hash_pairs(val hash);
diff --git a/txr.1 b/txr.1
index 05bbe0fc..aa22c111 100644
--- a/txr.1
+++ b/txr.1
@@ -28797,6 +28797,77 @@ according to the modulo 3 congruence:
-> #H(() (0 (0 3 6 9)) (1 (1 4 7 10)) (2 (2 5 8)))
.cble
+.coNP Function @ group-reduce
+.synb
+.mets (group-reduce < hash < classify-fun < binary-fun < seq
+.mets \ \ >> [ init-value <> [ filter-fun ]])
+.syne
+.desc
+The
+.code group-reduce
+updates hash table
+.meta hash
+by grouping and reducing sequence
+.metn seq .
+
+The function regards the hash table as being populated with
+keys denoting accumulator values. Missing accumulators which
+need to be created in the hash table are initialized with
+.meta init-value
+which defaults to
+.codn nil .
+
+The function iterates over
+.meta seq
+and treats each element according to the following steps:
+.RS
+.IP 1.
+Each element is mapped to a hash key through
+.metn classify-fun .
+.IP 2.
+The value associated with the hash key (the accumulator for that
+key) is retrieved. If it doesn't exist,
+.meta init-value
+is used.
+.IP 3.
+The function
+.meta binary-fun
+is invoked with two arguments: the accumulator from step 2, and the
+original element from
+.metn seq .
+.IP 4.
+The resulting value is stored back into the hash table under the
+same key.
+.RE
+
+.IP
+If
+.code group-reduce
+is invoked on an empty hash table, its net result closely resembles a
+.code group-by
+operation followed by separately performing a
+.code reduce-left
+on each value in the hash.
+
+.TP* Examples:
+
+Frequency histogram:
+
+.cblk
+ [group-reduce (hash) identity (do inc @1)
+ "fourscoreandsevenyearsago" 0]
+ --> #H(() (#\ea 3) (#\ec 1) (#\ed 1) (#\ee 4) (#\ef 1)
+ (#\eg 1) (#\en 2) (#\eo 3) (#\er 3) (#\es 3)
+ (#\eu 1) (#\ev 1) (#\ey 1))
+.cble
+
+Separate the integers 1-10 into even and odd, and sum these groups:
+
+.cblk
+ [group-reduce (hash) evenp + (range 1 10) 0]
+ -> #H(() (t 30) (nil 25))
+.cble
+
.coNP Functions @ make-similar-hash and @ copy-hash
.synb
.mets (make-similar-hash << hash )