summaryrefslogtreecommitdiffstats
path: root/utf8.c
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2018-12-17 09:47:01 -0800
committerKaz Kylheku <kaz@kylheku.com>2018-12-17 09:47:01 -0800
commit83d7d1a4cd39ac09c795838e26fa03f09f0cd604 (patch)
tree4534d351bd0b3ff06d7e2009b9381172cc51244d /utf8.c
parent39888e21ad17f5abbeaddc876f734a9683b5b914 (diff)
downloadtxr-83d7d1a4cd39ac09c795838e26fa03f09f0cd604.tar.gz
txr-83d7d1a4cd39ac09c795838e26fa03f09f0cd604.tar.bz2
txr-83d7d1a4cd39ac09c795838e26fa03f09f0cd604.zip
UTF-8: fix incorrect decoding of four-byte sequences.
utf8.c (utf8_decode): The wch_min value is set incorrectly for the four byte case due to an extra zero; it should be only 0x10000. Code points encoded to four utf8 bytes start at this value. The consequence of this error is that utf8-encoded characters in this range are treated as invalid bytes after being decoded due to failing the range test.
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c2
1 files changed, 1 insertions, 1 deletions
diff --git a/utf8.c b/utf8.c
index 3ddc74a5..eaef3864 100644
--- a/utf8.c
+++ b/utf8.c
@@ -324,7 +324,7 @@ wint_t utf8_decode(utf8_decoder_t *ud, int (*get)(mem_t *ctx), mem_t *ctx)
if (ch < 0xF5) {
ud->state = utf8_more3;
ud->wch = (ch & 0x7);
- ud->wch_min = 0x100000;
+ ud->wch_min = 0x10000;
break;
}
/* fallthrough */