summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/017/ffi-misc.tl7
-rw-r--r--utf8.c8
2 files changed, 12 insertions, 3 deletions
diff --git a/tests/017/ffi-misc.tl b/tests/017/ffi-misc.tl
index 1578cd2c..db510737 100644
--- a/tests/017/ffi-misc.tl
+++ b/tests/017/ffi-misc.tl
@@ -9,3 +9,10 @@
(test (ffi-put "\x1234@@@" zar) #b'e188b440404000')
(test (ffi-get (ffi-put "\x1234@@@" zar) zar) "\x1234@@@")
+
+(unless (meq (os-symbol) :cygwin :cygnal)
+ (test (ffi-get #b'EDB08100' (ffi (zarray char)))
+ "\xDCED\xDCB0\xDC81")
+
+ (test (ffi-get #b'ED7F7FEDFF00' (ffi (zarray char)))
+ "\xDCED\x7F\x7F\xDCED\xDCFF"))
diff --git a/utf8.c b/utf8.c
index 8cb81749..c23eefce 100644
--- a/utf8.c
+++ b/utf8.c
@@ -54,10 +54,10 @@ size_t utf8_from_buf(wchar_t *wdst, const unsigned char *src, size_t nbytes)
{
size_t nchar = 1;
enum utf8_state state = utf8_init;
- const unsigned char *backtrack = 0;
+ const unsigned char *backtrack = 0, *end = src + nbytes;
wchar_t wch = 0, wch_min = 0;
- while (nbytes-- > 0) {
+ while (src < end) {
int ch = *src++;
switch (state) {
@@ -101,7 +101,7 @@ size_t utf8_from_buf(wchar_t *wdst, const unsigned char *src, size_t nbytes)
nchar++;
break;
}
- backtrack = src;
+ backtrack = src - 1;
break;
case utf8_more1:
case utf8_more2:
@@ -118,6 +118,7 @@ size_t utf8_from_buf(wchar_t *wdst, const unsigned char *src, size_t nbytes)
src = backtrack;
if (wdst)
*wdst++ = 0xDC00 | *src;
+ src++;
} else {
if (wdst)
*wdst++ = wch;
@@ -128,6 +129,7 @@ size_t utf8_from_buf(wchar_t *wdst, const unsigned char *src, size_t nbytes)
src = backtrack;
if (wdst)
*wdst++ = 0xDC00 | *src;
+ src++;
nchar++;
state = utf8_init;
}