diff options
-rw-r--r-- | ChangeLog | 25 | ||||
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | dep.mk | 3 | ||||
-rw-r--r-- | gc.c | 16 | ||||
-rw-r--r-- | gc.h | 1 | ||||
-rw-r--r-- | hash.c | 301 | ||||
-rw-r--r-- | hash.h | 33 | ||||
-rw-r--r-- | lib.c | 16 | ||||
-rw-r--r-- | lib.h | 4 |
9 files changed, 397 insertions, 3 deletions
@@ -1,5 +1,30 @@ 2009-11-06 Kaz Kylheku <kkylheku@gmail.com> + First cut at hash tables. One known problem is allocation during gc, + due to use of boxed numbers for vector access. + + * gc.c (gc): Disable gc when doing garbage collection, in case + something tries to allocate memory during gc, triggering a recursive + gc, which would be very bad. Also, call the new function, + hash_process_weak, in between the mark and sweep phases. + (gc_is_reachable): New function. + + * gc.h (gc_is_reachable): Declared. + + * lib.c (hash_t): New symbol global. + (acons_new_l): New function. + (obj_init): New symbol interned. + + * lib.h (hash_t, acons_new_l): Declared. + + * hash.c, hash.h: New files. + + * Makefile: New target, hash.o. + + * dep.mk: Regenerated. + +2009-11-06 Kaz Kylheku <kkylheku@gmail.com> + Throw exception on stream error during close, or I/O operations. This is needed for pipes that terminate abnormally or return failed termination. Pipe and stdio streams have an extra description field @@ -31,6 +31,7 @@ CFLAGS := -I$(top_srcdir) $(LANG_FLAGS) $(DIAG_FLAGS) $(OPT_FLAGS) $(DBG_FLAGS) OBJS := txr.o lex.yy.o y.tab.o match.o lib.o regex.o gc.o unwind.o stream.o +OBJS += hash.o PROG := ./txr @@ -7,4 +7,5 @@ unwind.o: $(top_srcdir)/lib.h $(top_srcdir)/gc.h $(top_srcdir)/stream.h $(top_sr txr.o: $(top_srcdir)/lib.h $(top_srcdir)/stream.h $(top_srcdir)/gc.h $(top_srcdir)/unwind.h $(top_srcdir)/parser.h $(top_srcdir)/match.h $(top_srcdir)/txr.h match.o: $(top_srcdir)/lib.h $(top_srcdir)/gc.h $(top_srcdir)/unwind.h $(top_srcdir)/regex.h $(top_srcdir)/stream.h $(top_srcdir)/parser.h $(top_srcdir)/txr.h $(top_srcdir)/match.h stream.o: $(top_srcdir)/lib.h $(top_srcdir)/gc.h $(top_srcdir)/unwind.h $(top_srcdir)/stream.h -gc.o: $(top_srcdir)/lib.h $(top_srcdir)/stream.h $(top_srcdir)/txr.h $(top_srcdir)/gc.h +gc.o: $(top_srcdir)/lib.h $(top_srcdir)/stream.h $(top_srcdir)/hash.h $(top_srcdir)/txr.h $(top_srcdir)/gc.h +hash.o: $(top_srcdir)/lib.h $(top_srcdir)/gc.h $(top_srcdir)/unwind.h $(top_srcdir)/hash.h @@ -32,6 +32,7 @@ #include <dirent.h> #include "lib.h" #include "stream.h" +#include "hash.h" #include "txr.h" #include "gc.h" @@ -350,8 +351,11 @@ void gc(void) if (gc_enabled) { jmp_buf jmp; setjmp(jmp); + gc_enabled = 0; mark(); + hash_process_weak(); sweep(); + gc_enabled = 1; } } @@ -372,6 +376,18 @@ void gc_mark(obj_t *obj) mark_obj(obj); } +int gc_is_reachable(obj_t *obj) +{ + type_t t; + + if (obj == nil) + return 1; + + t = obj->t.type; + + return (t & REACHABLE) != 0; +} + /* * Useful functions for gdb'ing. */ @@ -32,3 +32,4 @@ obj_t *make_obj(void); void gc(void); int gc_state(int); void gc_mark(obj_t *); +int gc_is_reachable(obj_t *); @@ -0,0 +1,301 @@ +/* Copyright 2009 + * Kaz Kylheku <kkylheku@gmail.com> + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include <stdio.h> +#include <string.h> +#include <dirent.h> +#include <stdarg.h> +#include <stdlib.h> +#include <assert.h> +#include <setjmp.h> +#include <limits.h> +#include "lib.h" +#include "gc.h" +#include "unwind.h" +#include "hash.h" + +typedef enum hash_flags { + hash_weak_none = 0, + hash_weak_keys = 1, + hash_weak_vals = 2, + hash_weak_both = 3 +} hash_flags_t; + +struct hash { + hash_flags_t flags; + struct hash *next; + obj_t *table; + long modulus; + long count; +}; + +/* + * Dynamic list built up during gc. + */ +static struct hash *reachable_weak_hashes; + +/* + * This is is an adaptation of hashpjw, from Compilers: Principles, Techniques + * and Tools, Aho, Sethi, Ulman, 1988. P. 436. The register is wider by + * a few bits, and we bring down five overflow bits instead of four. + * We don't reduce the final result modulo a small prime, but leave it + * as it is; let the hashing routines do their own reduction. + */ +static long hash_c_str(const char *str) +{ + unsigned long h = 0; + while (*str) { + unsigned long g; + h = (h << 4) + *str++; + g = h & 0x7C000000; + h = h ^ (g >> 26) ^ g; + } + return h; +} + +static long ll_hash(obj_t *obj) +{ + if (obj == nil) + return LONG_MAX; + + switch (obj->t.type) { + case CONS: + return (ll_hash(obj->c.car) + ll_hash(obj->c.cdr)) & LONG_MAX; + case STR: + return hash_c_str(obj->st.str); + case CHR: + return obj->ch.ch + LONG_MAX / 2; + case NUM: + return obj->n.val & LONG_MAX; + case SYM: + return ((long) obj) & LONG_MAX; + case FUN: + return ((long) obj->f.f.interp_fun + ll_hash(obj->f.env)) & LONG_MAX; + case VEC: + { + obj_t *fill = obj->v.vec[vec_fill]; + long i, h = ll_hash(obj->v.vec[vec_fill]); + long len = c_num(fill); + + for (i = 0; i < len; i++) + h = (h + ll_hash(obj->v.vec[i])) & LONG_MAX; + + return h; + } + case LCONS: + return (ll_hash(car(obj)) + ll_hash(cdr(obj))) & LONG_MAX; + case LSTR: + lazy_str_force(obj); + return ll_hash(obj->ls.prefix); + case COBJ: + if (obj->co.ops->hash) + return obj->co.ops->hash(obj); + return ((long) obj) & LONG_MAX; + } + + internal_error("unhandled case in equal function"); +} + +obj_t *hash_obj(obj_t *obj) +{ + return num(ll_hash(obj)); +} + +obj_t *hash_equal(obj_t *self, obj_t *other) +{ + return self == other ? t : nil; +} + +void hash_destroy(obj_t *hash) +{ + free(hash->co.handle); +} + +void hash_mark(obj_t *hash) +{ + struct hash *h = (struct hash *) hash->co.handle; + long i; + + switch (h->flags) { + case hash_weak_none: + /* If the hash is not weak, we can simply mark the table + vector and we are done. */ + gc_mark(h->table); + break; + case hash_weak_keys: + /* Keys are weak: mark the values only. */ + for (i = 0; i < h->modulus; i++) { + obj_t *ind = num(i); + obj_t **pchain = vecref_l(h->table, ind); + obj_t *iter; + + for (iter = *pchain; iter != nil; iter = cdr(iter)) { + obj_t *entry = car(iter); + gc_mark(cdr(entry)); + } + } + reachable_weak_hashes->next = h; + reachable_weak_hashes = h; + break; + case hash_weak_vals: + /* Values are weak: mark the keys only. */ + + for (i = 0; i < h->modulus; i++) { + obj_t *ind = num(i); + obj_t **pchain = vecref_l(h->table, ind); + obj_t *iter; + + for (iter = *pchain; iter != nil; iter = cdr(iter)) { + obj_t *entry = car(iter); + gc_mark(car(entry)); + } + } + reachable_weak_hashes->next = h; + reachable_weak_hashes = h; + break; + case hash_weak_both: + /* Values and keys are weak: don't mark anything. */ + break; + } +} + +static struct cobj_ops hash_ops = { + hash_equal, + cobj_print_op, + hash_destroy, + hash_mark, +}; + +obj_t *make_hash(obj_t *weak_keys, obj_t *weak_vals) +{ + int flags = ((weak_vals != nil) << 1) | (weak_keys != nil); + struct hash *h = (struct hash *) chk_malloc(sizeof *h); + obj_t *mod = num(256); + h->flags = (hash_flags_t) flags; + h->modulus = c_num(mod); + h->table = vector(mod); + vec_set_fill(h->table, mod); + return cobj((void *) h, hash_t, &hash_ops); +} + +obj_t **l_gethash(obj_t *hash, obj_t *key) +{ + struct hash *h = (struct hash *) hash->co.handle; + obj_t **pchain = vecref_l(h->table, num(ll_hash(key) % h->modulus)); + return acons_new_l(pchain, key); +} + +obj_t *gethash(obj_t *hash, obj_t *key) +{ + struct hash *h = (struct hash *) hash->co.handle; + obj_t *chain = *vecref_l(h->table, num(ll_hash(key) % h->modulus)); + obj_t *found = assoc(chain, key); + return cdr(found); +} + +/* + * Called from garbage collector. Hash module must process all weak tables + * that were visited during the marking phase, maintained in the list + * reachable_weak_hashes. + */ +void hash_process_weak(void) +{ + struct hash *h; + long i; + + for (h = reachable_weak_hashes; h != 0; h = h->next) { + switch (h->flags) { + case hash_weak_none: + /* what is this doing here */ + break; + case hash_weak_keys: + /* Sweep through all entries. Delete any which have keys + that are garbage. */ + for (i = 0; i < h->modulus; i++) { + obj_t *ind = num(i); + obj_t **pchain = vecref_l(h->table, ind); + obj_t **iter; + + for (iter = pchain; *iter != nil; ) { + obj_t *entry = car(*iter); + if (!gc_is_reachable(car(entry))) + *iter = cdr(*iter); + else + iter = cdr_l(*iter); + } + } + /* Garbage is gone now. Seal things by marking the vector. */ + gc_mark(h->table); + break; + case hash_weak_vals: + /* Sweep through all entries. Delete any which have values + that are garbage. */ + for (i = 0; i < h->modulus; i++) { + obj_t *ind = num(i); + obj_t **pchain = vecref_l(h->table, ind); + obj_t **iter; + + for (iter = pchain; *iter != nil; ) { + obj_t *entry = car(*iter); + if (!gc_is_reachable(cdr(entry))) + *iter = cdr(*iter); + else + iter = cdr_l(*iter); + } + } + /* Garbage is gone now. Seal things by marking the vector. */ + gc_mark(h->table); + break; + case hash_weak_both: + /* Sweep through all entries. Delete any which have keys + or values that are garbage. */ + for (i = 0; i < h->modulus; i++) { + obj_t *ind = num(i); + obj_t **pchain = vecref_l(h->table, ind); + obj_t **iter; + + for (iter = pchain; *iter != nil; ) { + obj_t *entry = car(*iter); + if (!gc_is_reachable(car(entry)) || !gc_is_reachable(cdr(entry))) + *iter = cdr(*iter); + else + iter = cdr_l(*iter); + } + } + /* Garbage is gone now. Seal things by marking the vector. */ + gc_mark(h->table); + break; + } + } + + /* Done with weak processing; clear out the list in preparation for + the next gc round. */ + reachable_weak_hashes = 0; +} + +void hash_init(void) +{ +} @@ -0,0 +1,33 @@ +/* Copyright 2009 + * Kaz Kylheku <kkylheku@gmail.com> + * Vancouver, Canada + * All rights reserved. + * + * BSD License: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +obj_t *hash_obj(obj_t *); +obj_t *make_hash(obj_t *weak_keys, obj_t *weak_vals); +obj_t **l_gethash(obj_t *hash, obj_t *key); +obj_t *gethash(obj_t *hash, obj_t *key); +void hash_process_weak(void); + +void hash_init(void); @@ -44,7 +44,7 @@ obj_t *interned_syms; obj_t *null, *t, *cons_t, *str_t, *chr_t, *num_t, *sym_t, *fun_t, *vec_t; -obj_t *stream_t, *lcons_t, *lstr_t, *cobj_t; +obj_t *stream_t, *hash_t, *lcons_t, *lstr_t, *cobj_t; obj_t *var, *regex, *set, *cset, *wild, *oneplus; obj_t *zeroplus, *optional, *compound, *or, *quasi; obj_t *skip, *trailer, *block, *next, *freeform, *fail, *accept; @@ -1538,6 +1538,19 @@ obj_t *acons_new(obj_t *list, obj_t *key, obj_t *value) } } +obj_t **acons_new_l(obj_t **list, obj_t *key) +{ + obj_t *existing = assoc(*list, key); + + if (existing) { + return cdr_l(existing); + } else { + obj_t *new = cons(key, nil); + *list = cons(new, *list); + return cdr_l(new); + } +} + obj_t *alist_remove(obj_t *list, obj_t *keys) { obj_t **plist = &list; @@ -1696,6 +1709,7 @@ static void obj_init(void) fun_t = intern(string("fun")); vec_t = intern(string("vec")); stream_t = intern(string("stream")); + hash_t = intern(string("hash")); lcons_t = intern(string("lcons")); lstr_t = intern(string("lstr")); cobj_t = intern(string("cobj")); @@ -136,6 +136,7 @@ struct cobj_ops { void (*print)(obj_t *self, obj_t *stream); void (*destroy)(obj_t *self); void (*mark)(obj_t *self); + long (*hash)(obj_t *self); }; union obj { @@ -155,7 +156,7 @@ union obj { extern obj_t *interned_syms; extern obj_t *t, *cons_t, *str_t, *chr_t, *num_t, *sym_t, *fun_t, *vec_t; -extern obj_t *stream_t, *lcons_t, *lstr_t, *cobj_t; +extern obj_t *stream_t, *hash_t, *lcons_t, *lstr_t, *cobj_t; extern obj_t *var, *regex, *set, *cset, *wild, *oneplus; extern obj_t *zeroplus, *optional, *compound, *or, *quasi; extern obj_t *skip, *trailer, *block, *next, *freeform, *fail, *accept; @@ -301,6 +302,7 @@ obj_t *cobj(void *handle, obj_t *cls_sym, struct cobj_ops *ops); void cobj_print_op(obj_t *, obj_t *); /* Default function for struct cobj_ops */ obj_t *assoc(obj_t *list, obj_t *key); obj_t *acons_new(obj_t *list, obj_t *key, obj_t *value); +obj_t **acons_new_l(obj_t **list, obj_t *key); obj_t *alist_remove(obj_t *list, obj_t *keys); obj_t *alist_remove1(obj_t *list, obj_t *key); obj_t *copy_cons(obj_t *cons); |