summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2017-05-10 06:41:50 -0700
committerKaz Kylheku <kaz@kylheku.com>2017-05-10 06:41:50 -0700
commit471149c262613e4b69fc14b9599fe541106084e4 (patch)
treee784850dd3089f325d37482621aa5d3ae7c2c045
parentfe81a856d6a1db346c519897581925d1797913ad (diff)
downloadtxr-471149c262613e4b69fc14b9599fe541106084e4.tar.gz
txr-471149c262613e4b69fc14b9599fe541106084e4.tar.bz2
txr-471149c262613e4b69fc14b9599fe541106084e4.zip
ffi: adjust semantics of zarray of characters.
We want to be able to extract null-terminated UTF-8 strings from arrays, without trailing junk, yet retain the ability to extract the entire array including embedded nulls. The natural way is to use the array/zarray distinction. * ffi.c (ffi_array_in, ffi_array_get): Don't try to guess whether the array is null terminated; just rely on the null_term flag, and treat accordingly. * txr.1: Doc updated.
-rw-r--r--ffi.c18
-rw-r--r--txr.130
2 files changed, 34 insertions, 14 deletions
diff --git a/ffi.c b/ffi.c
index 5918933d..8c99f3d9 100644
--- a/ffi.c
+++ b/ffi.c
@@ -1008,7 +1008,7 @@ static val ffi_array_in(struct txr_ffi_type *tft, int copy, mem_t *src,
str = null_string;
} else {
const char *chptr = coerce(const char *, src);
- if (chptr[tft->size - 1] == 0) {
+ if (tft->null_term) {
str = string_utf8(chptr);
} else {
wchar_t *wch = utf8_dup_from_buf(chptr, tft->size);
@@ -1022,13 +1022,12 @@ static val ffi_array_in(struct txr_ffi_type *tft, int copy, mem_t *src,
if (nelem == 0) {
str = null_string;
} else {
- cnum nchar = tft->size / sizeof (wchar_t);
const wchar_t *wchptr = coerce(const wchar_t *, src);
- if (wchptr[nchar - 1] == 0) {
+ if (tft->null_term) {
str = string(wchptr);
} else {
- val ustr = mkustring(num_fast(nchar));
+ val ustr = mkustring(num_fast(nelem));
str = init_str(ustr, wchptr);
}
}
@@ -1040,7 +1039,7 @@ static val ffi_array_in(struct txr_ffi_type *tft, int copy, mem_t *src,
str = null_string;
} else {
const unsigned char *chptr = coerce(const unsigned char *, src);
- if (chptr[tft->size - 1] == 0)
+ if (tft->null_term)
str = string_8bit(chptr);
else
str = string_8bit_size(chptr, tft->size);
@@ -1129,7 +1128,7 @@ static val ffi_array_get(struct txr_ffi_type *tft, mem_t *src, val self)
return null_string;
} else {
const char *chptr = coerce(const char *, src);
- if (chptr[tft->size - 1] == 0) {
+ if (tft->null_term) {
return string_utf8(chptr);
} else {
wchar_t *wch = utf8_dup_from_buf(chptr, tft->size);
@@ -1140,13 +1139,12 @@ static val ffi_array_get(struct txr_ffi_type *tft, mem_t *src, val self)
if (nelem == 0) {
return null_string;
} else {
- cnum nchar = tft->size / sizeof (wchar_t);
const wchar_t *wchptr = coerce(const wchar_t *, src);
- if (wchptr[nchar - 1] == 0) {
+ if (tft->null_term) {
return string(wchptr);
} else {
- val ustr = mkustring(num_fast(nchar));
+ val ustr = mkustring(num_fast(nelem));
return init_str(ustr, wchptr);
}
}
@@ -1155,7 +1153,7 @@ static val ffi_array_get(struct txr_ffi_type *tft, mem_t *src, val self)
return null_string;
} else {
const unsigned char *chptr = coerce(const unsigned char *, src);
- if (chptr[tft->size - 1] == 0)
+ if (tft->null_term)
return string_8bit(chptr);
else
return string_8bit_size(chptr, tft->size);
diff --git a/txr.1 b/txr.1
index fa48abd5..a56cac44 100644
--- a/txr.1
+++ b/txr.1
@@ -53597,17 +53597,39 @@ When converting from Lisp to C, it ensures that the array is null-terminated.
This means that the last element of the array is written out as all zero bytes.
The
.code zarray
-type also allows the Lisp object to be one element short. For instance,
+type is useful for handling null terminated character arrays representing
+strings, and for null terminated vectors.
+Unlike
+.codn array ,
+.code zarray
+allows the Lisp object to be one element short. For instance,
when a
.code "(zarray 5 int)"
passed by pointer a foreign function is converted back to Lisp,
the Lisp object is required to have only four elements. If the Lisp object
has five elements, then the fifth one will be decoded from the C array
in earnest; it is not expected to be null.
-The
+Lastly, the
.code zarray
-type is useful for handling null terminated character arrays representing
-strings, and for null terminated vectors.
+further extends the special treatment which the
+.code array
+type applies to the types
+.codn char ,
+.code wchar
+and
+.codn bchar .
+Namely,
+.code zarray
+assumes, and depends on the incoming data being null-terminated, and converts it to a Lisp
+string accordingly. The regular
+.code array
+type doesn't assume null termination. In particular, this means that an
+.code "(array 42 char)"
+will decode 42 bytes of UTF-8, even if some of them are null. The null bytes
+convert to U+DC00. In contrast, a
+.code zarray
+will treat the 42 bytes as a null-terminated string, and decode UTF-8 only
+up to the first null.
.meIP (zarray << type )
The