diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2021-04-20 07:45:30 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2021-04-20 07:45:30 -0700 |
commit | 2db8b0497c7cc13b44210fb06b74d45fefccefc3 (patch) | |
tree | 224f5617e347fa39b371eb7716567187b34b8480 | |
parent | 901440d00b54e747cc68da70df3a0a1eaef258fc (diff) | |
download | txr-2db8b0497c7cc13b44210fb06b74d45fefccefc3.tar.gz txr-2db8b0497c7cc13b44210fb06b74d45fefccefc3.tar.bz2 txr-2db8b0497c7cc13b44210fb06b74d45fefccefc3.zip |
utf8: decode: reduce strictness of full unicode check.
* utf8.c (utf8_from_buf, utf8_deocde): On 16 bit wchar_t, we
dont' have to throw on every value in the range 0xF0-0xFF.
Only the values 0xF0 through 0xF4 are potential UTF-8 bytes;
so we only need to error out on those. 0xF5 through 0xFF
are invalid bytes, which we can map into the 0xDCNN range.
-rw-r--r-- | utf8.c | 8 |
1 files changed, 4 insertions, 4 deletions
@@ -84,16 +84,16 @@ size_t utf8_from_buf(wchar_t *wdst, const unsigned char *src, size_t nbytes) wch_min = 0x800; break; case 0xF: -#ifdef FULL_UNICODE if (ch < 0xF5) { +#ifdef FULL_UNICODE state = utf8_more3; wch = (ch & 0x7); wch_min = 0x10000; break; - } #else conversion_error(); #endif + } /* fallthrough */ default: if (wdst) @@ -317,16 +317,16 @@ wint_t utf8_decode(utf8_decoder_t *ud, int (*get)(mem_t *ctx), mem_t *ctx) ud->wch_min = 0x800; break; case 0xF: -#ifdef FULL_UNICODE if (ch < 0xF5) { +#ifdef FULL_UNICODE ud->state = utf8_more3; ud->wch = (ch & 0x7); ud->wch_min = 0x10000; break; - } #else conversion_error(); #endif + } /* fallthrough */ default: ud->back = ud->tail; |