diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2021-07-08 19:17:39 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2021-07-08 19:17:39 -0700 |
commit | fc22de3079459193253522efccdd7519879e34b7 (patch) | |
tree | 6c665cc9b800b81827a2a5214ed1125cb0dd49f3 /regex.c | |
parent | c0a9036d15d9f0a910aab82905e6b5e7d6ce71da (diff) | |
download | txr-fc22de3079459193253522efccdd7519879e34b7.tar.gz txr-fc22de3079459193253522efccdd7519879e34b7.tar.bz2 txr-fc22de3079459193253522efccdd7519879e34b7.zip |
type: disallow structs using built-in type names.
This is a big commit motivated by the need to clean up the
situation with built-in type symbols, COBJ objects and
structs.
The struct type system allows struct types to be defined
for symbols like regex or str, which are used by built-in
or cobj types. This is a bad thing.
What is worse, structure instances are COBJ types which
identify their type using the COBJ class symbol mechanism.
There are places in the C implementation which assume
that when a COBJ has a certain class symbol, it is of
a certain expected type, which is totally different from
and incompatible form a struct instance. User code can
define a structure object which will fool that code.
There are multiple things going on in this patch.
The major theme is that the COBJ representation is changing.
Instead of a class symbol, COBJ instances now carry a
"struct cobj_class *" pointer. This pointer is obtained
by registration via the cobj_register function. All modules
must register their class symbols to obtain these class
handles, which are then used in cobj() calls for
instantiation.
The CPTR type was identical to COBJ until now, except for the
type tag. This is changing; CPTR objects will keep the old
representation with the class symbol.
commit 20fdfc6008297001491308849c17498c006fe7b4
Author: Kaz Kylheku <kaz@kylheku.com>
Date: Thu Jul 8 19:17:39 2021 -0700
* ffi.h (carray_cls): Declared.
* hash.h (hash_cls): Declared.
(hash_early_init): Declared.
* lib.h (struct cobj_class): New struct.
(struct cobj): cls member changing to struct cobj_class *.
(struct cptr): New struct, same as previous struct cobj.
(union obj): New member cp of type struct cptr, for CPTR.
(builtin_type): Declared.
(class_check): Declaration moved closer to COBJ-related
functions and updated.
(cobj_register, cobj_register_super, cobj_class_exists): New
functions declared.
(cobjclassp, cobj_handle, cobj_ops): Declarations updated.
* parser.h (parser_cls): Declared.
* rand.h (random_state_cls): Declared.
* regex.h (regex_cls): Declared.
* stream.h (stream_cls, stdio_stream_cls): Declared.
* struct.h (struct_cls): Declared.
* tree.h (tree_cls, tree_iter_cls): Declared.
* vm.h (vm_desc_cls): Declared.
* buf.c (buf_strm, make_buf_stream): Pass stream_cls
functions instead of stream_s class symbol.
* chksum.c (sha256_ctx_cls, md5_ctx_cls): New static class
handles.
(sha256_begin, sha256_hash, sha256_end, md5_begin, md5_hash,
md5_end): Pass class handles to instead of class symbols.
(chksum_init): Initialize class handle variables.
* ffi.c (ffi_type_cls, ffi_call_desc_cls, ffi_closure_cls,
union_cls): New static class handles.
(carray_cls): New global variable.
(ffi_type_struct_checked, ffi_type_print_op,
ffi_closure_struct_checked, ffi_closure_print_op,
make_ffi_type_builtin, make_ffi_type_pointer,
make_ffi_type_struct, make_ffi_type_union,
make_ffi_type_array, make_ffi_type_enum,
ffi_call_desc_checked, ffi_call_desc_print_op,
ffi_make_call_desc, ffi_make_closure, carray_struct_checked,
carray_print_op, make_carray, cptr_getobj, cptr_out,
uni_struct_checked, make_union_common): Pass class handles
instead of class symbols.
(ffi_init): Initialize class handle variables.
* filter.c (regex_from_trie): Use hash_cls class handle
instead of hash_s.
* gc.c (mark_obj): Split COBJ and CPTR cases since the
representation is different.
* hash.c (hash_cls, hash_iter_cls): New class handles.
(make_similar_hash, copy_hash, gethash_c, gethash_e, remhash,
clearhash, hash_count, get_hash_userdata, set_hash_userdata,
hashp, hash_iter_init, hash_begin, hash_next, hash_peek,
hash_reset, hash_reset, hash_uni, hash_diff, hash_symdiff,
hash_isec): Pass class handles instead of class symbols.
(hash_early_init): New function.
(hash_init): Set the class symbols in the class handles that
were created in hash_early_init at a time when these symbols
did not exist.
* lib.c (nelem): New macro.
(cobj_class): New static array.
(cobj_ptr): New static pointer.
(cobj_hash): New static hash.
(seq_iter_cls): New static class handle.
(builtin_type_p): New function.
(typeof): Struct instances now all carry the same symbol,
struct, as their COBJ class symbol. To get their type, we must
call struct_type_name.
(subtypep): Rearrangement of two cases: let's make the
reflexive case first. Adjust code for different location
of COBJ class symbol.
(seq_iter_init_with_info, seq_begin, seq_next, seq_reset,
iter_begin, iter_more, iter_item, iter_step, iter_reset,
make_like, list_collect, do_generic_funcall): Use class
handles instead of class symbols.
(class_check, cobj, cobjclassp, cobj_handle, cobj_ops): Take
class handle argument instead of class symbol.
(cobj_register, cobj_register_super, cobj_class_exists): New
functions.
(cobj_populate_hash): New static function.
(cobj_print_op): Adjust for different location of class
(cptr_print_op, cptr_typed, cptr_type, cptr_handle,
cptr_get): cptr functions now refer to obj->cp rather than
obj->co.
(copy, length, sub, ref, refset, replace, dwim_set, dwim_del,
obj_print): Use class handles for various COBJ types rather
than class symbols.
(obj_init): gc-protect cobj_hash. Initialize seq_iter_cls
class symbol and cobj_hash. Populate cobj_hash as the last
initialization step.
(init): Call hash_early_init immediately after gc_init.
diff --git a/lib.c b/lib.c
* match.c (do_match_line): Refer to regex_cls class handle
instead of regex_s..
* parser.c (parser_cls): New global class handle.
(parse, parser_get_impl, lisp_parse_impl, txr_parse,
parser_errors): Use class handles instead of class symbols.
(parse_init): Initialize parser_cls.
* rand.c (random_state_cls): New global class handle.
(make_state, random_state_p, make_random_state,
random_state_get_vec, random_fixnum, random_float, random):
Use class handles instead of class symbols.
(rand_init): Initialize random_state_cls.
* regex.c (regex_cls): New global class handle.
(chset_cls): New static class handle.
(reg_compile_csets, reg_derivative, regex_compile, regexp,
regex_source, regex_print, regex_run, regex_machine_init): Use
class handles instead of class symbols.
(regex_init): Initialize regex_cls and chset_cls.
* socket.c (make_dgram_sock_stream): Use stream_cls class
symbol instead of stream_s.
* stream.c (stream_cls, stdio_stream_cls): New class handles.
(make_null_stream, stdio_get_fd, make_stdio_stream_common,
stream_fd, sock_family, sock_type, sock_peer, sock_set_peer,
make_dir_stream, make_string_input_stream,
make_string_byte_input_stream, make_strlist_input_stream,
make_string_output_stream, make_strlist_output_stream,
get_list_from_stream, make_catenated_stream,
make_delegate_stream, make_delegate_stream, stream_set_prop,
stream_get_prop, close_stream, get_error, get_error_str,
clear_error, get_line, get_char, get_byte, get_bytes,
unget_char, unget_byte, put_buf, fill_buf, fill_buf_adjust,
get_line_as_buf, format, put_string, put_char, put_byte,
flush_stream, seek_stream, truncate_stream, get_indent_mode,
test_set_indent_mode, test_neq_set_indent_mode,
set_indent_mode, get_indent, set_indent, inc_indent,
width_check, force_break, set_max_length, set_max_depth): Use
class handle instead of symbol.
(stream_init): Initialize stream_cls and stdio_stream_cls.
* struct.c (struct_type_cls, struct_cls): New class handles.
(struct_init): Initialize struct_type_cls and struct_cls.
(struct_handle): Static function moved to avoid forward
declaration.
(stype_handle): Refer to struct_type_cls class handle instead
of struct_type_s symbol. Handle instance objects in addition
to types.
(make_struct_type): Throw error if a built-in type is being
defined as a struct type. Refer to class handle instead of
class symbol.
(find_struct_type, allocate_struct, make_struct_impl,
make_lazy_struct, copy_struct): Refer to class handle instead of
class symbol.
* strudel.c (make_struct_delegate_stream): Refer to stream_cls
class handle instead of stream_s symbol.
* sysif.c (dir_cls): New class handle.
(poll_wrap): Use typep instead of subtypep, eliminating access
to class symbol.
(opendir_wrap, closedir_wrap, readdir_wrap): Use class handles
instead of class symbols.
(sysif_init): Initialize dir_cls.
* syslog.c (make_syslog_stream): Refer to stream_cls class
handle instead of stream_s symbol.
* tree.c (tree_cls, tree_iter_cls): New class handles.
(tree_insert_node, tree_lookup_node, tree_delete_node,
tree_root, tree_equal_op, tree, copy_search_tree,
make_similar_tree, treep, tree_begin, copy_tree_iter,
replace_tree_iter, tree_reset, tree_next, tree_peek,
tree_clear): Use class handle instead of class symbol.
(tree_init): Initialize tree_cls and tree_iter_cls.
* unwind.c (sys_cont_cls): New static class handle.
(revive_cont, capture_cont): Use class handle instead of class
symbol.
(uw_late_init): Initialize sys_cont_cls.
* vm.c (vm_desc_cls): New global class handle.
(vm_closure_cls): New static class handle.
(vm_desc_struct, vm_make_desc, vm_closure_struct,
vm_make_closure, vm_copy_closure): Use class handle instead of
class symbol.
(vm_init): Initialize vm_desc_cls and vm_closure_cls.
Diffstat (limited to 'regex.c')
-rw-r--r-- | regex.c | 36 |
1 files changed, 21 insertions, 15 deletions
@@ -256,6 +256,9 @@ union regex_machine { int opt_derivative_regex = 0; +struct cobj_class *regex_cls; +static struct cobj_class *chset_cls; + wchar_t spaces[] = { 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x0020, 0x00a0, 0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, @@ -1609,17 +1612,17 @@ static val reg_nary_to_bin(val regex) static val reg_compile_csets(val exp) { if (exp == space_k) { - return cobj(coerce(mem_t *, space_cs), chset_s, &char_set_obj_ops); + return cobj(coerce(mem_t *, space_cs), chset_cls, &char_set_obj_ops); } else if (exp == digit_k) { - return cobj(coerce(mem_t *, digit_cs), chset_s, &char_set_obj_ops); + return cobj(coerce(mem_t *, digit_cs), chset_cls, &char_set_obj_ops); } else if (exp == word_char_k) { - return cobj(coerce(mem_t *, word_cs), chset_s, &char_set_obj_ops); + return cobj(coerce(mem_t *, word_cs), chset_cls, &char_set_obj_ops); } else if (exp == cspace_k) { - return cobj(coerce(mem_t *, cspace_cs), chset_s, &char_set_obj_ops); + return cobj(coerce(mem_t *, cspace_cs), chset_cls, &char_set_obj_ops); } else if (exp == cdigit_k) { - return cobj(coerce(mem_t *, cdigit_cs), chset_s, &char_set_obj_ops); + return cobj(coerce(mem_t *, cdigit_cs), chset_cls, &char_set_obj_ops); } else if (exp == cword_char_k) { - return cobj(coerce(mem_t *, cword_cs), chset_s, &char_set_obj_ops); + return cobj(coerce(mem_t *, cword_cs), chset_cls, &char_set_obj_ops); } else if (symbolp(exp) || chrp(exp)) { return exp; } else if (stringp(exp)) { @@ -1630,7 +1633,7 @@ static val reg_compile_csets(val exp) if (sym == set_s || sym == cset_s) { char_set_t *set = char_set_compile(args, eq(sym, cset_s)); - return cobj(coerce(mem_t *, set), chset_s, &char_set_obj_ops); + return cobj(coerce(mem_t *, set), chset_cls, &char_set_obj_ops); } else if (sym == compound_s || sym == zeroplus_s || sym == oneplus_s || sym == optional_s || sym == compl_s || sym == nongreedy_s || sym == or_s || sym == and_s) @@ -1841,7 +1844,7 @@ static val reg_derivative(val exp, val ch) return t; } else if (chrp(exp)) { return null(eq(exp, ch)); - } else if (cobjclassp(exp, chset_s)) { + } else if (cobjclassp(exp, chset_cls)) { char_set_t *set = coerce(char_set_t *, exp->co.handle); return if3(char_set_contains(set, c_chr(ch)), nil, t); } else if (exp == wild_s) { @@ -2234,7 +2237,7 @@ val regex_compile(val regex_sexp, val error_stream) regex->kind = REGEX_DV; regex->nstates = 0; regex->source = nil; - ret = cobj(coerce(mem_t *, regex), regex_s, ®ex_obj_ops); + ret = cobj(coerce(mem_t *, regex), regex_cls, ®ex_obj_ops); regex->r.dv = dv; regex->source = regex_source; return ret; @@ -2243,7 +2246,7 @@ val regex_compile(val regex_sexp, val error_stream) val ret; regex->kind = REGEX_NFA; regex->source = nil; - ret = cobj(coerce(mem_t *, regex), regex_s, ®ex_obj_ops); + ret = cobj(coerce(mem_t *, regex), regex_cls, ®ex_obj_ops); regex->r.nfa = nfa_optimize(nfa_compile_regex(regex_sexp)); regex->nstates = nfa_count_states(regex->r.nfa.start); regex->source = regex_source; @@ -2253,14 +2256,14 @@ val regex_compile(val regex_sexp, val error_stream) val regexp(val obj) { - return cobjclassp(obj, regex_s); + return cobjclassp(obj, regex_cls); } val regex_source(val compiled_regex) { val self = lit("regex-source"); regex_t *regex = coerce(regex_t *, - cobj_handle(self, compiled_regex, regex_s)); + cobj_handle(self, compiled_regex, regex_cls)); return regex->source; } @@ -2434,7 +2437,7 @@ static void print_rec(val exp, val stream, int *semi_flag) static void regex_print(val obj, val stream, val pretty, struct strm_ctx *ctx) { val self = lit("regex-print"); - regex_t *regex = coerce(regex_t *, cobj_handle(self, obj, regex_s)); + regex_t *regex = coerce(regex_t *, cobj_handle(self, obj, regex_cls)); int semi_flag = 0; (void) pretty; @@ -2448,7 +2451,7 @@ static void regex_print(val obj, val stream, val pretty, struct strm_ctx *ctx) static cnum regex_run(val compiled_regex, const wchar_t *str) { val self = lit("regex-run"); - regex_t *regex = coerce(regex_t *, cobj_handle(self, compiled_regex, regex_s)); + regex_t *regex = coerce(regex_t *, cobj_handle(self, compiled_regex, regex_cls)); return if3(regex->kind == REGEX_DV, dv_run(regex->r.dv, str), @@ -2492,7 +2495,7 @@ static void regex_machine_reset(regex_machine_t *regm) static void regex_machine_init(val self, regex_machine_t *regm, val reg) { - regex_t *regex = coerce(regex_t *, cobj_handle(self, reg, regex_s)); + regex_t *regex = coerce(regex_t *, cobj_handle(self, reg, regex_cls)); if (regex->kind == REGEX_DV) { regm->n.is_nfa = 0; @@ -3342,6 +3345,9 @@ void regex_init(void) cdigit_k = intern(lit("cdigit"), keyword_package); cword_char_k = intern(lit("cword-char"), keyword_package); + regex_cls = cobj_register(regex_s); + chset_cls = cobj_register(chset_s); + reg_fun(intern(lit("regex-compile"), user_package), func_n2o(regex_compile, 1)); reg_fun(intern(lit("regexp"), user_package), func_n1(regexp)); reg_fun(intern(lit("regex-source"), user_package), func_n1(regex_source)); |