diff options
-rw-r--r-- | ChangeLog | 16 | ||||
-rw-r--r-- | lib.c | 2 | ||||
-rw-r--r-- | lib.h | 2 | ||||
-rw-r--r-- | stream.c | 2 | ||||
-rw-r--r-- | utf8.c | 38 | ||||
-rw-r--r-- | utf8.h | 12 |
6 files changed, 59 insertions, 13 deletions
@@ -1,5 +1,21 @@ 2009-11-14 Kaz Kylheku <kkylheku@gmail.com> + Provide both char * and unsigned char * interfaces in UTF-8 module. + Fix unsigned and plan char * mixing. + + * utf8.c (utf8_from_uc, utf8_to_uc, utf8_dup_from_uc, + utf8_dup_to_uc): New functions. + (utf8_from): Fix type of backtrack pointer to unsigned char *. + + * utf8.h (utf8_from_uc, utf8_to_uc, utf8_dup_from_uc, + utf8_dup_to_uc): Declared. + + * lib.c (string_utf8): Changed to take char * argument. + + * lib.h (string_utf8): Declaration updated. + +2009-11-14 Kaz Kylheku <kkylheku@gmail.com> + * Makefile (depend): Marked phony and $(PROG) prerequisite dropped. (clean, distclean, tests, install): Phony targets marked phony. @@ -701,7 +701,7 @@ obj_t *string(const wchar_t *str) return obj; } -obj_t *string_utf8(const unsigned char *str) +obj_t *string_utf8(const char *str) { obj_t *obj = make_obj(); obj->st.type = STR; @@ -240,7 +240,7 @@ obj_t *max2(obj_t *anum, obj_t *bnum); obj_t *min2(obj_t *anum, obj_t *bnum); obj_t *string_own(wchar_t *str); obj_t *string(const wchar_t *str); -obj_t *string_utf8(const unsigned char *str); +obj_t *string_utf8(const char *str); obj_t *mkstring(obj_t *len, obj_t *ch); obj_t *mkustring(obj_t *len); /* must initialize immediately with init_str! */ obj_t *init_str(obj_t *str, const wchar_t *); @@ -629,7 +629,7 @@ obj_t *make_string_byte_input_stream(obj_t *string) { struct byte_input *bi = (struct byte_input *) chk_malloc(sizeof *bi); - unsigned char *utf8 = utf8_dup_to(c_str(string)); + unsigned char *utf8 = utf8_dup_to_uc(c_str(string)); bi->buf = utf8; bi->size = strlen((char *) utf8); bi->index = 0; @@ -31,11 +31,11 @@ #include "lib.h" #include "utf8.h" -size_t utf8_from(wchar_t *wdst, const unsigned char *src) +size_t utf8_from_uc(wchar_t *wdst, const unsigned char *src) { size_t nchar = 1; enum utf8_state state = utf8_init; - const char *backtrack = 0; + const unsigned char *backtrack = 0; wchar_t wch = 0; for (;;) { @@ -101,7 +101,12 @@ size_t utf8_from(wchar_t *wdst, const unsigned char *src) return nchar; } -size_t utf8_to(unsigned char *dst, const wchar_t *wsrc) +size_t utf8_from(wchar_t *wdst, const char *src) +{ + return utf8_from_uc(wdst, (const unsigned char *) src); +} + +size_t utf8_to_uc(unsigned char *dst, const wchar_t *wsrc) { size_t nbyte = 1; wchar_t wch; @@ -140,7 +145,20 @@ size_t utf8_to(unsigned char *dst, const wchar_t *wsrc) return nbyte; } -wchar_t *utf8_dup_from(const unsigned char *str) +size_t utf8_to(char *dst, const wchar_t *wsrc) +{ + return utf8_to_uc((unsigned char *) dst, wsrc); +} + +wchar_t *utf8_dup_from_uc(const unsigned char *str) +{ + size_t nchar = utf8_from_uc(0, str); + wchar_t *wstr = chk_malloc(sizeof *wstr * nchar); + utf8_from_uc(wstr, str); + return wstr; +} + +wchar_t *utf8_dup_from(const char *str) { size_t nchar = utf8_from(0, str); wchar_t *wstr = chk_malloc(sizeof *wstr * nchar); @@ -148,10 +166,18 @@ wchar_t *utf8_dup_from(const unsigned char *str) return wstr; } -unsigned char *utf8_dup_to(const wchar_t *wstr) +unsigned char *utf8_dup_to_uc(const wchar_t *wstr) { - size_t nbyte = utf8_to(0, wstr); + size_t nbyte = utf8_to_uc(0, wstr); unsigned char *str = chk_malloc(nbyte); + utf8_to_uc(str, wstr); + return str; +} + +char *utf8_dup_to(const wchar_t *wstr) +{ + size_t nbyte = utf8_to(0, wstr); + char *str = chk_malloc(nbyte); utf8_to(str, wstr); return str; } @@ -24,10 +24,14 @@ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -size_t utf8_from(wchar_t *, const unsigned char *); -size_t utf8_to(unsigned char *, const wchar_t *); -wchar_t *utf8_dup_from(const unsigned char *); -unsigned char *utf8_dup_to(const wchar_t *); +size_t utf8_from_uc(wchar_t *, const unsigned char *); +size_t utf8_from(wchar_t *, const char *); +size_t utf8_to_uc(unsigned char *, const wchar_t *); +size_t utf8_to(char *, const wchar_t *); +wchar_t *utf8_dup_from_uc(const unsigned char *); +wchar_t *utf8_dup_from(const char *); +char *utf8_dup_to(const wchar_t *); +unsigned char *utf8_dup_to_uc(const wchar_t *); enum utf8_state { utf8_init, utf8_more1, utf8_more2, utf8_more3 }; |