diff options
-rw-r--r-- | tests/017/ffi-misc.tl | 7 | ||||
-rw-r--r-- | utf8.c | 8 |
2 files changed, 12 insertions, 3 deletions
diff --git a/tests/017/ffi-misc.tl b/tests/017/ffi-misc.tl index 1578cd2c..db510737 100644 --- a/tests/017/ffi-misc.tl +++ b/tests/017/ffi-misc.tl @@ -9,3 +9,10 @@ (test (ffi-put "\x1234@@@" zar) #b'e188b440404000') (test (ffi-get (ffi-put "\x1234@@@" zar) zar) "\x1234@@@") + +(unless (meq (os-symbol) :cygwin :cygnal) + (test (ffi-get #b'EDB08100' (ffi (zarray char))) + "\xDCED\xDCB0\xDC81") + + (test (ffi-get #b'ED7F7FEDFF00' (ffi (zarray char))) + "\xDCED\x7F\x7F\xDCED\xDCFF")) @@ -54,10 +54,10 @@ size_t utf8_from_buf(wchar_t *wdst, const unsigned char *src, size_t nbytes) { size_t nchar = 1; enum utf8_state state = utf8_init; - const unsigned char *backtrack = 0; + const unsigned char *backtrack = 0, *end = src + nbytes; wchar_t wch = 0, wch_min = 0; - while (nbytes-- > 0) { + while (src < end) { int ch = *src++; switch (state) { @@ -101,7 +101,7 @@ size_t utf8_from_buf(wchar_t *wdst, const unsigned char *src, size_t nbytes) nchar++; break; } - backtrack = src; + backtrack = src - 1; break; case utf8_more1: case utf8_more2: @@ -118,6 +118,7 @@ size_t utf8_from_buf(wchar_t *wdst, const unsigned char *src, size_t nbytes) src = backtrack; if (wdst) *wdst++ = 0xDC00 | *src; + src++; } else { if (wdst) *wdst++ = wch; @@ -128,6 +129,7 @@ size_t utf8_from_buf(wchar_t *wdst, const unsigned char *src, size_t nbytes) src = backtrack; if (wdst) *wdst++ = 0xDC00 | *src; + src++; nchar++; state = utf8_init; } |