summaryrefslogtreecommitdiffstats
path: root/utf8.h
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2014-02-15 00:19:15 -0800
committerKaz Kylheku <kaz@kylheku.com>2014-02-15 00:19:15 -0800
commit3b64319b10196425401d4d71f7ee1273e3bffe32 (patch)
tree5197904de12a1b7a3d601fa468f4ab3514e0de2e /utf8.h
parent48fbe97484faad462a1fc52049d682fdaaa665a0 (diff)
downloadtxr-3b64319b10196425401d4d71f7ee1273e3bffe32.tar.gz
txr-3b64319b10196425401d4d71f7ee1273e3bffe32.tar.bz2
txr-3b64319b10196425401d4d71f7ee1273e3bffe32.zip
A trivial change in the UTF-8 decoder allows TXR to handle null bytes
in text. * utf8.h (UTF8_ADMIT_NUL): New preprocessor symbol. (utf8_decoder): New member, flags. * utf8.c (utf8_decoder_init): Initialize flags to 0. (utf8_decode): If a null byte is encountered in the input, then convert it to 0xDC00, rather than keeping it as zero, unless flags contains UTF8_ADMIT_NUL. * txr.1: Document handling of null bytes.
Diffstat (limited to 'utf8.h')
-rw-r--r--utf8.h3
1 files changed, 3 insertions, 0 deletions
diff --git a/utf8.h b/utf8.h
index c4915488..67dee69a 100644
--- a/utf8.h
+++ b/utf8.h
@@ -35,8 +35,11 @@ unsigned char *utf8_dup_to_uc(const wchar_t *);
enum utf8_state { utf8_init, utf8_more1, utf8_more2, utf8_more3 };
+#define UTF8_ADMIT_NUL 1
+
typedef struct utf8_decoder {
enum utf8_state state;
+ int flags;
wchar_t wch, wch_min;
int head, tail, back;
int buf[8];