diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2017-05-10 06:41:50 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2017-05-10 06:41:50 -0700 |
commit | 471149c262613e4b69fc14b9599fe541106084e4 (patch) | |
tree | e784850dd3089f325d37482621aa5d3ae7c2c045 | |
parent | fe81a856d6a1db346c519897581925d1797913ad (diff) | |
download | txr-471149c262613e4b69fc14b9599fe541106084e4.tar.gz txr-471149c262613e4b69fc14b9599fe541106084e4.tar.bz2 txr-471149c262613e4b69fc14b9599fe541106084e4.zip |
ffi: adjust semantics of zarray of characters.
We want to be able to extract null-terminated UTF-8 strings
from arrays, without trailing junk, yet retain the ability to
extract the entire array including embedded nulls. The natural
way is to use the array/zarray distinction.
* ffi.c (ffi_array_in, ffi_array_get): Don't try to guess
whether the array is null terminated; just rely on the
null_term flag, and treat accordingly.
* txr.1: Doc updated.
-rw-r--r-- | ffi.c | 18 | ||||
-rw-r--r-- | txr.1 | 30 |
2 files changed, 34 insertions, 14 deletions
@@ -1008,7 +1008,7 @@ static val ffi_array_in(struct txr_ffi_type *tft, int copy, mem_t *src, str = null_string; } else { const char *chptr = coerce(const char *, src); - if (chptr[tft->size - 1] == 0) { + if (tft->null_term) { str = string_utf8(chptr); } else { wchar_t *wch = utf8_dup_from_buf(chptr, tft->size); @@ -1022,13 +1022,12 @@ static val ffi_array_in(struct txr_ffi_type *tft, int copy, mem_t *src, if (nelem == 0) { str = null_string; } else { - cnum nchar = tft->size / sizeof (wchar_t); const wchar_t *wchptr = coerce(const wchar_t *, src); - if (wchptr[nchar - 1] == 0) { + if (tft->null_term) { str = string(wchptr); } else { - val ustr = mkustring(num_fast(nchar)); + val ustr = mkustring(num_fast(nelem)); str = init_str(ustr, wchptr); } } @@ -1040,7 +1039,7 @@ static val ffi_array_in(struct txr_ffi_type *tft, int copy, mem_t *src, str = null_string; } else { const unsigned char *chptr = coerce(const unsigned char *, src); - if (chptr[tft->size - 1] == 0) + if (tft->null_term) str = string_8bit(chptr); else str = string_8bit_size(chptr, tft->size); @@ -1129,7 +1128,7 @@ static val ffi_array_get(struct txr_ffi_type *tft, mem_t *src, val self) return null_string; } else { const char *chptr = coerce(const char *, src); - if (chptr[tft->size - 1] == 0) { + if (tft->null_term) { return string_utf8(chptr); } else { wchar_t *wch = utf8_dup_from_buf(chptr, tft->size); @@ -1140,13 +1139,12 @@ static val ffi_array_get(struct txr_ffi_type *tft, mem_t *src, val self) if (nelem == 0) { return null_string; } else { - cnum nchar = tft->size / sizeof (wchar_t); const wchar_t *wchptr = coerce(const wchar_t *, src); - if (wchptr[nchar - 1] == 0) { + if (tft->null_term) { return string(wchptr); } else { - val ustr = mkustring(num_fast(nchar)); + val ustr = mkustring(num_fast(nelem)); return init_str(ustr, wchptr); } } @@ -1155,7 +1153,7 @@ static val ffi_array_get(struct txr_ffi_type *tft, mem_t *src, val self) return null_string; } else { const unsigned char *chptr = coerce(const unsigned char *, src); - if (chptr[tft->size - 1] == 0) + if (tft->null_term) return string_8bit(chptr); else return string_8bit_size(chptr, tft->size); @@ -53597,17 +53597,39 @@ When converting from Lisp to C, it ensures that the array is null-terminated. This means that the last element of the array is written out as all zero bytes. The .code zarray -type also allows the Lisp object to be one element short. For instance, +type is useful for handling null terminated character arrays representing +strings, and for null terminated vectors. +Unlike +.codn array , +.code zarray +allows the Lisp object to be one element short. For instance, when a .code "(zarray 5 int)" passed by pointer a foreign function is converted back to Lisp, the Lisp object is required to have only four elements. If the Lisp object has five elements, then the fifth one will be decoded from the C array in earnest; it is not expected to be null. -The +Lastly, the .code zarray -type is useful for handling null terminated character arrays representing -strings, and for null terminated vectors. +further extends the special treatment which the +.code array +type applies to the types +.codn char , +.code wchar +and +.codn bchar . +Namely, +.code zarray +assumes, and depends on the incoming data being null-terminated, and converts it to a Lisp +string accordingly. The regular +.code array +type doesn't assume null termination. In particular, this means that an +.code "(array 42 char)" +will decode 42 bytes of UTF-8, even if some of them are null. The null bytes +convert to U+DC00. In contrast, a +.code zarray +will treat the 42 bytes as a null-terminated string, and decode UTF-8 only +up to the first null. .meIP (zarray << type ) The |