diff options
-rw-r--r-- | ChangeLog | 25 | ||||
-rw-r--r-- | lib.c | 3 | ||||
-rw-r--r-- | lib.h | 2 | ||||
-rw-r--r-- | regex.c | 71 |
4 files changed, 69 insertions, 32 deletions
@@ -1,3 +1,28 @@ +2014-10-02 Kaz Kylheku <kaz@kylheku.com> + + Using unified COBJ representation for both regex kinds, + rather than the list-based notation for derivative-based + regexes, and an encapsulated COBJ for NFA-based regexes. + + * lib.c (compiled_regex_s): Variable removed. + (obj_init): Initialization of compiled_regex_s removed. + + * lib.h (compiled_regex_s): Declaration removed. + + * regex.c (struct regex, regex_t): New type. + (regex_destroy): Object is now a regex_t, not nfa_t. + (regex_mark): New function. + (regex_obj_ops): Register regex_mark operation. + (reg_nullable, reg_derivative): Remove cases that handles + compiled_regex_s. + (regex_compile): Output of dv_compile_regex becomes + a cobj nwo. Output of nfa_compile_regex must be + embedded in regex_t structure. + (regexp): Drop the check for compiles_regex_s. + (regex_nfa): Function removed. + (regex_run, regex_machine_init): Use cobj_handle to retrieve regex_t * + pointer and dispatch appropriate code based on regex->kind. + 2014-09-30 Kaz Kylheku <kaz@kylheku.com> * genman.txr: Add PayPal donation button. @@ -73,7 +73,7 @@ val null_s, t, cons_s, str_s, chr_s, fixnum_s, sym_s, pkg_s, fun_s, vec_s; val lit_s, stream_s, hash_s, hash_iter_s, lcons_s, lstr_s, cobj_s, cptr_s; val env_s, bignum_s, float_s; val var_s, expr_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s; -val nongreedy_s, compiled_regex_s; +val nongreedy_s; val quote_s, qquote_s, unquote_s, splice_s; val sys_qquote_s, sys_unquote_s, sys_splice_s; val zeroplus_s, optional_s, compl_s, compound_s; @@ -6120,7 +6120,6 @@ static void obj_init(void) expr_s = intern(lit("expr"), system_package); regex_s = intern(lit("regex"), system_package); nongreedy_s = intern(lit("ng0+"), user_package); - compiled_regex_s = intern(lit("compiled-regex"), system_package); quote_s = intern(lit("quote"), user_package); qquote_s = intern(lit("qquote"), user_package); unquote_s = intern(lit("unquote"), user_package); @@ -353,7 +353,7 @@ extern val sym_s, pkg_s, fun_s, vec_s; extern val stream_s, hash_s, hash_iter_s, lcons_s, lstr_s, cobj_s, cptr_s; extern val env_s, bignum_s, float_s; extern val var_s, expr_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s; -extern val nongreedy_s, compiled_regex_s; +extern val nongreedy_s; extern val quote_s, qquote_s, unquote_s, splice_s; extern val sys_qquote_s, sys_unquote_s, sys_splice_s; extern val zeroplus_s, optional_s, compl_s, compound_s; @@ -54,6 +54,14 @@ typedef struct nfa { nfa_state_t *accept; } nfa_t; +typedef struct regex { + enum { REGEX_NFA, REGEX_DV } kind; + union { + struct nfa nfa; + val dv; + } r; +} regex_t; + /* * Result from regex_machine_feed. * These values have two meanings, based on whether @@ -1279,19 +1287,27 @@ static cnum regex_machine_match_span(regex_machine_t *regm) return regm->n.last_accept_pos; } -static void regex_destroy(val regex) +static void regex_destroy(val obj) +{ + regex_t *regex = (regex_t *) obj->co.handle; + if (regex->kind == REGEX_NFA) + nfa_free(regex->r.nfa); + free(regex); + obj->co.handle = 0; +} + +static void regex_mark(val obj) { - nfa_t *pnfa = (nfa_t *) regex->co.handle; - nfa_free(*pnfa); - free(pnfa); - regex->co.handle = 0; + regex_t *regex = (regex_t *) obj->co.handle; + if (regex->kind == REGEX_DV) + gc_mark(regex->r.dv); } static struct cobj_ops regex_obj_ops = { eq, cobj_print_op, regex_destroy, - cobj_mark_op, + regex_mark, cobj_hash_op }; @@ -1406,7 +1422,7 @@ static val reg_nullable(val exp) return nil; } else if (sym == compound_s) { return reg_nullable_list(args); - } else if (sym == oneplus_s || sym == compiled_regex_s) { + } else if (sym == oneplus_s) { return reg_nullable(first(args)); } else if (sym == zeroplus_s || sym == optional_s) { return t; @@ -1537,8 +1553,6 @@ static val reg_derivative(val exp, val ch) if (sym == set_s || sym == cset_s) { internal_error("uncompiled regex passed to reg_derivative"); - } else if (sym == compiled_regex_s) { - return reg_derivative(first(args), ch); } else if (sym == compound_s) { return reg_derivative_list(args, ch); } else if (sym == optional_s) { @@ -1648,33 +1662,30 @@ val regex_compile(val regex_sexp, val error_stream) regex_sexp = regex_parse(regex_sexp, default_bool_arg(error_stream)); return if2(regex_sexp, regex_compile(regex_sexp, error_stream)); } else if (opt_derivative_regex || regex_requires_dv(regex_sexp)) { - return cons(compiled_regex_s, cons(dv_compile_regex(regex_sexp), nil)); + regex_t *regex = (regex_t *) chk_malloc(sizeof *regex); + regex->kind = REGEX_DV; + regex->r.dv = dv_compile_regex(regex_sexp); + return cobj((mem_t *) regex, regex_s, ®ex_obj_ops); } else { - nfa_t *pnfa = (nfa_t *) chk_malloc(sizeof *pnfa); - *pnfa = nfa_compile_regex(regex_sexp); - return cobj((mem_t *) pnfa, regex_s, ®ex_obj_ops); + regex_t *regex = (regex_t *) chk_malloc(sizeof *regex); + regex->kind = REGEX_NFA; + regex->r.nfa = nfa_compile_regex(regex_sexp); + return cobj((mem_t *) regex, regex_s, ®ex_obj_ops); } } val regexp(val obj) { - if (consp(obj)) - return eq(car(obj), compiled_regex_s); - return typeof(obj) == regex_s ? t : nil; } -static nfa_t *regex_nfa(val reg) -{ - assert (typeof(reg) == regex_s); - return (nfa_t *) reg->co.handle; -} - static cnum regex_run(val compiled_regex, const wchar_t *str) { - if (consp(compiled_regex)) - return dv_run(compiled_regex, str); - return nfa_run(*regex_nfa(compiled_regex), str); + regex_t *regex = (regex_t *) cobj_handle(compiled_regex, regex_s); + + return if3(regex->kind == REGEX_DV, + dv_run(regex->r.dv, str), + nfa_run(regex->r.nfa, str)); } /* @@ -1706,14 +1717,16 @@ static void regex_machine_reset(regex_machine_t *regm) regm->n.last_accept_pos = regm->n.count; } -static void regex_machine_init(regex_machine_t *regm, val regex) +static void regex_machine_init(regex_machine_t *regm, val reg) { - if (consp(regex)) { + regex_t *regex = (regex_t *) cobj_handle(reg, regex_s); + + if (regex->kind == REGEX_DV) { regm->n.is_nfa = 0; - regm->d.regex = regex; + regm->d.regex = regex->r.dv; } else { regm->n.is_nfa = 1; - regm->n.nfa = *regex_nfa(regex); + regm->n.nfa = regex->r.nfa; regm->n.move = (nfa_state_t **) chk_malloc(NFA_SET_SIZE * sizeof *regm->n.move); regm->n.clos = (nfa_state_t **) |