summaryrefslogtreecommitdiffstats
path: root/utf8.c
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2012-02-04 22:22:57 +0100
committerKaz Kylheku <kaz@kylheku.com>2012-02-04 22:22:57 +0100
commit6949749e00019594d17a2dd7788dadd1663aff64 (patch)
tree384eb23fc60054fc0fb0c05de69c2c553b963c0b /utf8.c
parent442c9efa4b176ff2c4c89a43beac3ea3fad247d4 (diff)
downloadtxr-6949749e00019594d17a2dd7788dadd1663aff64.tar.gz
txr-6949749e00019594d17a2dd7788dadd1663aff64.tar.bz2
txr-6949749e00019594d17a2dd7788dadd1663aff64.zip
* utf8.c (utf8_from_uc): Bugfix: incorrect condition in character
range check (less than minimum *and* U+DCxx, rather than *or*). Also, we must check for out of range characters. UTF-8 sequences beginning with F4 can code beyond 0x10FFFF. (utf8_decode): Check for characters beyond 0x10FFFF.
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c8
1 files changed, 5 insertions, 3 deletions
diff --git a/utf8.c b/utf8.c
index 0c9c109c..d4ca3513 100644
--- a/utf8.c
+++ b/utf8.c
@@ -104,8 +104,9 @@ size_t utf8_from_uc(wchar_t *wdst, const unsigned char *src)
wch |= (ch & 0x3F);
state = (enum utf8_state) (state - 1);
if (state == utf8_init) {
- if (wch < wch_min &&
- (wch <= 0xFFFF && (wch & 0xFF00) == 0xDC00))
+ if (wch < wch_min ||
+ (wch <= 0xFFFF && (wch & 0xFF00) == 0xDC00) ||
+ (wch > 0x10FFFF))
{
src = backtrack;
if (wdst)
@@ -311,7 +312,8 @@ wint_t utf8_decode(utf8_decoder_t *ud, int (*get)(mem_t *ctx), mem_t *ctx)
ud->state = (enum utf8_state) (ud->state - 1);
if (ud->state == utf8_init) {
if (ud->wch < ud->wch_min ||
- (ud->wch <= 0xFFFF && (ud->wch & 0xFF00) == 0xDC00))
+ (ud->wch <= 0xFFFF && (ud->wch & 0xFF00) == 0xDC00) ||
+ (ud->wch > 0x10FFFF))
{
wchar_t wch = 0xDC00 | ud->buf[ud->back];
ud->tail = ud->back = (ud->back + 1) % 8;