summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog25
-rw-r--r--Makefile1
-rw-r--r--dep.mk3
-rw-r--r--gc.c16
-rw-r--r--gc.h1
-rw-r--r--hash.c301
-rw-r--r--hash.h33
-rw-r--r--lib.c16
-rw-r--r--lib.h4
9 files changed, 397 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index 04646481..4e20ecb1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,30 @@
2009-11-06 Kaz Kylheku <kkylheku@gmail.com>
+ First cut at hash tables. One known problem is allocation during gc,
+ due to use of boxed numbers for vector access.
+
+ * gc.c (gc): Disable gc when doing garbage collection, in case
+ something tries to allocate memory during gc, triggering a recursive
+ gc, which would be very bad. Also, call the new function,
+ hash_process_weak, in between the mark and sweep phases.
+ (gc_is_reachable): New function.
+
+ * gc.h (gc_is_reachable): Declared.
+
+ * lib.c (hash_t): New symbol global.
+ (acons_new_l): New function.
+ (obj_init): New symbol interned.
+
+ * lib.h (hash_t, acons_new_l): Declared.
+
+ * hash.c, hash.h: New files.
+
+ * Makefile: New target, hash.o.
+
+ * dep.mk: Regenerated.
+
+2009-11-06 Kaz Kylheku <kkylheku@gmail.com>
+
Throw exception on stream error during close, or I/O operations. This
is needed for pipes that terminate abnormally or return failed
termination. Pipe and stdio streams have an extra description field
diff --git a/Makefile b/Makefile
index b93bfc5c..766cdd0f 100644
--- a/Makefile
+++ b/Makefile
@@ -31,6 +31,7 @@
CFLAGS := -I$(top_srcdir) $(LANG_FLAGS) $(DIAG_FLAGS) $(OPT_FLAGS) $(DBG_FLAGS)
OBJS := txr.o lex.yy.o y.tab.o match.o lib.o regex.o gc.o unwind.o stream.o
+OBJS += hash.o
PROG := ./txr
diff --git a/dep.mk b/dep.mk
index 8c94e995..914735b6 100644
--- a/dep.mk
+++ b/dep.mk
@@ -7,4 +7,5 @@ unwind.o: $(top_srcdir)/lib.h $(top_srcdir)/gc.h $(top_srcdir)/stream.h $(top_sr
txr.o: $(top_srcdir)/lib.h $(top_srcdir)/stream.h $(top_srcdir)/gc.h $(top_srcdir)/unwind.h $(top_srcdir)/parser.h $(top_srcdir)/match.h $(top_srcdir)/txr.h
match.o: $(top_srcdir)/lib.h $(top_srcdir)/gc.h $(top_srcdir)/unwind.h $(top_srcdir)/regex.h $(top_srcdir)/stream.h $(top_srcdir)/parser.h $(top_srcdir)/txr.h $(top_srcdir)/match.h
stream.o: $(top_srcdir)/lib.h $(top_srcdir)/gc.h $(top_srcdir)/unwind.h $(top_srcdir)/stream.h
-gc.o: $(top_srcdir)/lib.h $(top_srcdir)/stream.h $(top_srcdir)/txr.h $(top_srcdir)/gc.h
+gc.o: $(top_srcdir)/lib.h $(top_srcdir)/stream.h $(top_srcdir)/hash.h $(top_srcdir)/txr.h $(top_srcdir)/gc.h
+hash.o: $(top_srcdir)/lib.h $(top_srcdir)/gc.h $(top_srcdir)/unwind.h $(top_srcdir)/hash.h
diff --git a/gc.c b/gc.c
index 4247c813..efc135e0 100644
--- a/gc.c
+++ b/gc.c
@@ -32,6 +32,7 @@
#include <dirent.h>
#include "lib.h"
#include "stream.h"
+#include "hash.h"
#include "txr.h"
#include "gc.h"
@@ -350,8 +351,11 @@ void gc(void)
if (gc_enabled) {
jmp_buf jmp;
setjmp(jmp);
+ gc_enabled = 0;
mark();
+ hash_process_weak();
sweep();
+ gc_enabled = 1;
}
}
@@ -372,6 +376,18 @@ void gc_mark(obj_t *obj)
mark_obj(obj);
}
+int gc_is_reachable(obj_t *obj)
+{
+ type_t t;
+
+ if (obj == nil)
+ return 1;
+
+ t = obj->t.type;
+
+ return (t & REACHABLE) != 0;
+}
+
/*
* Useful functions for gdb'ing.
*/
diff --git a/gc.h b/gc.h
index 664b079c..2db8d9f8 100644
--- a/gc.h
+++ b/gc.h
@@ -32,3 +32,4 @@ obj_t *make_obj(void);
void gc(void);
int gc_state(int);
void gc_mark(obj_t *);
+int gc_is_reachable(obj_t *);
diff --git a/hash.c b/hash.c
new file mode 100644
index 00000000..33510917
--- /dev/null
+++ b/hash.c
@@ -0,0 +1,301 @@
+/* Copyright 2009
+ * Kaz Kylheku <kkylheku@gmail.com>
+ * Vancouver, Canada
+ * All rights reserved.
+ *
+ * BSD License:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <dirent.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <setjmp.h>
+#include <limits.h>
+#include "lib.h"
+#include "gc.h"
+#include "unwind.h"
+#include "hash.h"
+
+typedef enum hash_flags {
+ hash_weak_none = 0,
+ hash_weak_keys = 1,
+ hash_weak_vals = 2,
+ hash_weak_both = 3
+} hash_flags_t;
+
+struct hash {
+ hash_flags_t flags;
+ struct hash *next;
+ obj_t *table;
+ long modulus;
+ long count;
+};
+
+/*
+ * Dynamic list built up during gc.
+ */
+static struct hash *reachable_weak_hashes;
+
+/*
+ * This is is an adaptation of hashpjw, from Compilers: Principles, Techniques
+ * and Tools, Aho, Sethi, Ulman, 1988. P. 436. The register is wider by
+ * a few bits, and we bring down five overflow bits instead of four.
+ * We don't reduce the final result modulo a small prime, but leave it
+ * as it is; let the hashing routines do their own reduction.
+ */
+static long hash_c_str(const char *str)
+{
+ unsigned long h = 0;
+ while (*str) {
+ unsigned long g;
+ h = (h << 4) + *str++;
+ g = h & 0x7C000000;
+ h = h ^ (g >> 26) ^ g;
+ }
+ return h;
+}
+
+static long ll_hash(obj_t *obj)
+{
+ if (obj == nil)
+ return LONG_MAX;
+
+ switch (obj->t.type) {
+ case CONS:
+ return (ll_hash(obj->c.car) + ll_hash(obj->c.cdr)) & LONG_MAX;
+ case STR:
+ return hash_c_str(obj->st.str);
+ case CHR:
+ return obj->ch.ch + LONG_MAX / 2;
+ case NUM:
+ return obj->n.val & LONG_MAX;
+ case SYM:
+ return ((long) obj) & LONG_MAX;
+ case FUN:
+ return ((long) obj->f.f.interp_fun + ll_hash(obj->f.env)) & LONG_MAX;
+ case VEC:
+ {
+ obj_t *fill = obj->v.vec[vec_fill];
+ long i, h = ll_hash(obj->v.vec[vec_fill]);
+ long len = c_num(fill);
+
+ for (i = 0; i < len; i++)
+ h = (h + ll_hash(obj->v.vec[i])) & LONG_MAX;
+
+ return h;
+ }
+ case LCONS:
+ return (ll_hash(car(obj)) + ll_hash(cdr(obj))) & LONG_MAX;
+ case LSTR:
+ lazy_str_force(obj);
+ return ll_hash(obj->ls.prefix);
+ case COBJ:
+ if (obj->co.ops->hash)
+ return obj->co.ops->hash(obj);
+ return ((long) obj) & LONG_MAX;
+ }
+
+ internal_error("unhandled case in equal function");
+}
+
+obj_t *hash_obj(obj_t *obj)
+{
+ return num(ll_hash(obj));
+}
+
+obj_t *hash_equal(obj_t *self, obj_t *other)
+{
+ return self == other ? t : nil;
+}
+
+void hash_destroy(obj_t *hash)
+{
+ free(hash->co.handle);
+}
+
+void hash_mark(obj_t *hash)
+{
+ struct hash *h = (struct hash *) hash->co.handle;
+ long i;
+
+ switch (h->flags) {
+ case hash_weak_none:
+ /* If the hash is not weak, we can simply mark the table
+ vector and we are done. */
+ gc_mark(h->table);
+ break;
+ case hash_weak_keys:
+ /* Keys are weak: mark the values only. */
+ for (i = 0; i < h->modulus; i++) {
+ obj_t *ind = num(i);
+ obj_t **pchain = vecref_l(h->table, ind);
+ obj_t *iter;
+
+ for (iter = *pchain; iter != nil; iter = cdr(iter)) {
+ obj_t *entry = car(iter);
+ gc_mark(cdr(entry));
+ }
+ }
+ reachable_weak_hashes->next = h;
+ reachable_weak_hashes = h;
+ break;
+ case hash_weak_vals:
+ /* Values are weak: mark the keys only. */
+
+ for (i = 0; i < h->modulus; i++) {
+ obj_t *ind = num(i);
+ obj_t **pchain = vecref_l(h->table, ind);
+ obj_t *iter;
+
+ for (iter = *pchain; iter != nil; iter = cdr(iter)) {
+ obj_t *entry = car(iter);
+ gc_mark(car(entry));
+ }
+ }
+ reachable_weak_hashes->next = h;
+ reachable_weak_hashes = h;
+ break;
+ case hash_weak_both:
+ /* Values and keys are weak: don't mark anything. */
+ break;
+ }
+}
+
+static struct cobj_ops hash_ops = {
+ hash_equal,
+ cobj_print_op,
+ hash_destroy,
+ hash_mark,
+};
+
+obj_t *make_hash(obj_t *weak_keys, obj_t *weak_vals)
+{
+ int flags = ((weak_vals != nil) << 1) | (weak_keys != nil);
+ struct hash *h = (struct hash *) chk_malloc(sizeof *h);
+ obj_t *mod = num(256);
+ h->flags = (hash_flags_t) flags;
+ h->modulus = c_num(mod);
+ h->table = vector(mod);
+ vec_set_fill(h->table, mod);
+ return cobj((void *) h, hash_t, &hash_ops);
+}
+
+obj_t **l_gethash(obj_t *hash, obj_t *key)
+{
+ struct hash *h = (struct hash *) hash->co.handle;
+ obj_t **pchain = vecref_l(h->table, num(ll_hash(key) % h->modulus));
+ return acons_new_l(pchain, key);
+}
+
+obj_t *gethash(obj_t *hash, obj_t *key)
+{
+ struct hash *h = (struct hash *) hash->co.handle;
+ obj_t *chain = *vecref_l(h->table, num(ll_hash(key) % h->modulus));
+ obj_t *found = assoc(chain, key);
+ return cdr(found);
+}
+
+/*
+ * Called from garbage collector. Hash module must process all weak tables
+ * that were visited during the marking phase, maintained in the list
+ * reachable_weak_hashes.
+ */
+void hash_process_weak(void)
+{
+ struct hash *h;
+ long i;
+
+ for (h = reachable_weak_hashes; h != 0; h = h->next) {
+ switch (h->flags) {
+ case hash_weak_none:
+ /* what is this doing here */
+ break;
+ case hash_weak_keys:
+ /* Sweep through all entries. Delete any which have keys
+ that are garbage. */
+ for (i = 0; i < h->modulus; i++) {
+ obj_t *ind = num(i);
+ obj_t **pchain = vecref_l(h->table, ind);
+ obj_t **iter;
+
+ for (iter = pchain; *iter != nil; ) {
+ obj_t *entry = car(*iter);
+ if (!gc_is_reachable(car(entry)))
+ *iter = cdr(*iter);
+ else
+ iter = cdr_l(*iter);
+ }
+ }
+ /* Garbage is gone now. Seal things by marking the vector. */
+ gc_mark(h->table);
+ break;
+ case hash_weak_vals:
+ /* Sweep through all entries. Delete any which have values
+ that are garbage. */
+ for (i = 0; i < h->modulus; i++) {
+ obj_t *ind = num(i);
+ obj_t **pchain = vecref_l(h->table, ind);
+ obj_t **iter;
+
+ for (iter = pchain; *iter != nil; ) {
+ obj_t *entry = car(*iter);
+ if (!gc_is_reachable(cdr(entry)))
+ *iter = cdr(*iter);
+ else
+ iter = cdr_l(*iter);
+ }
+ }
+ /* Garbage is gone now. Seal things by marking the vector. */
+ gc_mark(h->table);
+ break;
+ case hash_weak_both:
+ /* Sweep through all entries. Delete any which have keys
+ or values that are garbage. */
+ for (i = 0; i < h->modulus; i++) {
+ obj_t *ind = num(i);
+ obj_t **pchain = vecref_l(h->table, ind);
+ obj_t **iter;
+
+ for (iter = pchain; *iter != nil; ) {
+ obj_t *entry = car(*iter);
+ if (!gc_is_reachable(car(entry)) || !gc_is_reachable(cdr(entry)))
+ *iter = cdr(*iter);
+ else
+ iter = cdr_l(*iter);
+ }
+ }
+ /* Garbage is gone now. Seal things by marking the vector. */
+ gc_mark(h->table);
+ break;
+ }
+ }
+
+ /* Done with weak processing; clear out the list in preparation for
+ the next gc round. */
+ reachable_weak_hashes = 0;
+}
+
+void hash_init(void)
+{
+}
diff --git a/hash.h b/hash.h
new file mode 100644
index 00000000..e5a1e510
--- /dev/null
+++ b/hash.h
@@ -0,0 +1,33 @@
+/* Copyright 2009
+ * Kaz Kylheku <kkylheku@gmail.com>
+ * Vancouver, Canada
+ * All rights reserved.
+ *
+ * BSD License:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+obj_t *hash_obj(obj_t *);
+obj_t *make_hash(obj_t *weak_keys, obj_t *weak_vals);
+obj_t **l_gethash(obj_t *hash, obj_t *key);
+obj_t *gethash(obj_t *hash, obj_t *key);
+void hash_process_weak(void);
+
+void hash_init(void);
diff --git a/lib.c b/lib.c
index 635ab8d4..0d55c14c 100644
--- a/lib.c
+++ b/lib.c
@@ -44,7 +44,7 @@
obj_t *interned_syms;
obj_t *null, *t, *cons_t, *str_t, *chr_t, *num_t, *sym_t, *fun_t, *vec_t;
-obj_t *stream_t, *lcons_t, *lstr_t, *cobj_t;
+obj_t *stream_t, *hash_t, *lcons_t, *lstr_t, *cobj_t;
obj_t *var, *regex, *set, *cset, *wild, *oneplus;
obj_t *zeroplus, *optional, *compound, *or, *quasi;
obj_t *skip, *trailer, *block, *next, *freeform, *fail, *accept;
@@ -1538,6 +1538,19 @@ obj_t *acons_new(obj_t *list, obj_t *key, obj_t *value)
}
}
+obj_t **acons_new_l(obj_t **list, obj_t *key)
+{
+ obj_t *existing = assoc(*list, key);
+
+ if (existing) {
+ return cdr_l(existing);
+ } else {
+ obj_t *new = cons(key, nil);
+ *list = cons(new, *list);
+ return cdr_l(new);
+ }
+}
+
obj_t *alist_remove(obj_t *list, obj_t *keys)
{
obj_t **plist = &list;
@@ -1696,6 +1709,7 @@ static void obj_init(void)
fun_t = intern(string("fun"));
vec_t = intern(string("vec"));
stream_t = intern(string("stream"));
+ hash_t = intern(string("hash"));
lcons_t = intern(string("lcons"));
lstr_t = intern(string("lstr"));
cobj_t = intern(string("cobj"));
diff --git a/lib.h b/lib.h
index 03fede10..ce84f69f 100644
--- a/lib.h
+++ b/lib.h
@@ -136,6 +136,7 @@ struct cobj_ops {
void (*print)(obj_t *self, obj_t *stream);
void (*destroy)(obj_t *self);
void (*mark)(obj_t *self);
+ long (*hash)(obj_t *self);
};
union obj {
@@ -155,7 +156,7 @@ union obj {
extern obj_t *interned_syms;
extern obj_t *t, *cons_t, *str_t, *chr_t, *num_t, *sym_t, *fun_t, *vec_t;
-extern obj_t *stream_t, *lcons_t, *lstr_t, *cobj_t;
+extern obj_t *stream_t, *hash_t, *lcons_t, *lstr_t, *cobj_t;
extern obj_t *var, *regex, *set, *cset, *wild, *oneplus;
extern obj_t *zeroplus, *optional, *compound, *or, *quasi;
extern obj_t *skip, *trailer, *block, *next, *freeform, *fail, *accept;
@@ -301,6 +302,7 @@ obj_t *cobj(void *handle, obj_t *cls_sym, struct cobj_ops *ops);
void cobj_print_op(obj_t *, obj_t *); /* Default function for struct cobj_ops */
obj_t *assoc(obj_t *list, obj_t *key);
obj_t *acons_new(obj_t *list, obj_t *key, obj_t *value);
+obj_t **acons_new_l(obj_t **list, obj_t *key);
obj_t *alist_remove(obj_t *list, obj_t *keys);
obj_t *alist_remove1(obj_t *list, obj_t *key);
obj_t *copy_cons(obj_t *cons);