diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2009-11-11 08:54:21 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2009-11-11 08:54:21 -0800 |
commit | d59d8950ec58702821ec618b92dfb2490ae0bf31 (patch) | |
tree | e27e2914d563171ad56c2f7ae30c7c49343df06c /regex.c | |
parent | 2f62f352f603b837a5cf032c257531052530c410 (diff) | |
download | txr-d59d8950ec58702821ec618b92dfb2490ae0bf31.tar.gz txr-d59d8950ec58702821ec618b92dfb2490ae0bf31.tar.bz2 txr-d59d8950ec58702821ec618b92dfb2490ae0bf31.zip |
Big conversion to wide characters and UTF-8 support.
This is incomplete. There are too many dependencies on
wide character support from the C stream I/O library,
and implicit use of some encoding which may not be UTF-8.
The regex code does not handle wide characters properly.
Character type is still int in some places, rather than wchar_t.
Test suite passes though.
Diffstat (limited to 'regex.c')
-rw-r--r-- | regex.c | 6 |
1 files changed, 3 insertions, 3 deletions
@@ -528,9 +528,9 @@ int nfa_move(nfa_state_t **in, int nin, nfa_state_t **out, int ch) * determines the match length (defaulting to zero * if no acceptance states were encountered). */ -long nfa_run(nfa_t nfa, const char *str) +long nfa_run(nfa_t nfa, const wchar_t *str) { - const char *last_accept_pos = 0, *ptr = str; + const wchar_t *last_accept_pos = 0, *ptr = str; unsigned visited = nfa.start->a.visited + 1; nfa_state_t **move = chk_malloc(NFA_SET_SIZE * sizeof *move); nfa_state_t **clos = chk_malloc(NFA_SET_SIZE * sizeof *clos); @@ -697,7 +697,7 @@ obj_t *search_regex(obj_t *haystack, obj_t *needle_regex, obj_t *start, if (from_end) { long i; long s = c_num(start); - const char *h = c_str(haystack); + const wchar_t *h = c_str(haystack); for (i = c_num(length_str(haystack)) - 1; i >= s; i--) { long span = nfa_run(*pnfa, h + i); |