diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2015-08-10 19:57:35 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2015-08-10 19:57:35 -0700 |
commit | d36002e99bd8c844a0f1abdc26e66be7f94409b4 (patch) | |
tree | 9658e92e45356b20003e1d66567e416f99d0f01a | |
parent | 7d962e011cbb56d33d2874ad12205ecea68a7191 (diff) | |
download | txr-d36002e99bd8c844a0f1abdc26e66be7f94409b4.tar.gz txr-d36002e99bd8c844a0f1abdc26e66be7f94409b4.tar.bz2 txr-d36002e99bd8c844a0f1abdc26e66be7f94409b4.zip |
Count East Asian Wide and Full Fidth chars as two columns.
* regex.c (create_wide_cs): New static function.
(wide_display_char_p): New function.
* regex.h (wide_display_char_p): Declared.
* stream.c (put_string, put_char): Use wide_display_char_p
to determine whether an extra column need be counted. Also bugfix:
iswprint evidently cannot be relied to work over the entire Unicode
range, at least not in the C locale. Glibc's version and is reporting
valid Japanese characters as unprintable on Ubuntu. As a hack we
instead check for control characters and invert the result: control
chars are unprintable.
* tests/009/json.expected: Updated.
-rw-r--r-- | regex.c | 66 | ||||
-rw-r--r-- | regex.h | 2 | ||||
-rw-r--r-- | stream.c | 9 | ||||
-rw-r--r-- | tests/009/json.expected | 4 |
4 files changed, 74 insertions, 7 deletions
@@ -2153,6 +2153,72 @@ val match_regst_right(val str, val regex, val end) sub_str(str, minus(end, len), end))); } +static char_set_t *create_wide_cs(void) +{ +#ifdef FULL_UNICODE + chset_type_t cst = CHSET_XLARGE; +#else + chset_type_t cst = CHSET_LARGE; +#endif + + char_set_t *cs = char_set_create(cst, 0, 1); + + char_set_add_range(cs, 0x1100, 0x115F); + char_set_add_range(cs, 0x2329, 0x232A); + char_set_add_range(cs, 0x2E80, 0x2E99); + char_set_add_range(cs, 0x2E9B, 0x2EF3); + char_set_add_range(cs, 0x2F00, 0x2FD5); + char_set_add_range(cs, 0x2FF0, 0x2FFB); + char_set_add_range(cs, 0x3000, 0x303E); + char_set_add_range(cs, 0x3000, 0x303E); + char_set_add_range(cs, 0x3041, 0x3096); + char_set_add_range(cs, 0x3099, 0x30FF); + char_set_add_range(cs, 0x3105, 0x312D); + char_set_add_range(cs, 0x3131, 0x318E); + char_set_add_range(cs, 0x3190, 0x31BA); + char_set_add_range(cs, 0x31C0, 0x31E3); + char_set_add_range(cs, 0x31F0, 0x321E); + char_set_add_range(cs, 0x3220, 0x3247); + char_set_add_range(cs, 0x3250, 0x32FE); + char_set_add_range(cs, 0x3300, 0x4DB5); + char_set_add_range(cs, 0x4E00, 0x9FFF); + char_set_add_range(cs, 0xA000, 0xA48C); + char_set_add_range(cs, 0xA490, 0xA4C6); + char_set_add_range(cs, 0xA960, 0xA97C); + char_set_add_range(cs, 0xAC00, 0xD7A3); + char_set_add_range(cs, 0xF900, 0xFAFF); + char_set_add_range(cs, 0xFE10, 0xFE19); + char_set_add_range(cs, 0xFE30, 0xFE52); + char_set_add_range(cs, 0xFE54, 0xFE6B); + char_set_add_range(cs, 0xFF01, 0xFF60); + char_set_add_range(cs, 0xFFE0, 0xFFE6); + +#ifdef FULL_UNICODE + char_set_add_range(cs, 0x1B000, 0x1B001); + char_set_add_range(cs, 0x1F200, 0x1F202); + char_set_add_range(cs, 0x1F210, 0x1F23A); + char_set_add_range(cs, 0x1F240, 0x1F248); + char_set_add_range(cs, 0x1F250, 0x1F251); + char_set_add_range(cs, 0x20000, 0x2FFFD); + char_set_add_range(cs, 0x30000, 0x3FFFD); +#endif + + return cs; +} + +int wide_display_char_p(wchar_t ch) +{ + static char_set_t *wide_cs; + + if (ch < 0x1100) + return 0; + + if (!wide_cs) + wide_cs = create_wide_cs(); + + return char_set_contains(wide_cs, ch); +} + val space_k, digit_k, word_char_k; val cspace_k, cdigit_k, cword_char_k; @@ -39,5 +39,5 @@ val search_regst(val haystack, val needle_regex, val start_num, val from_end); val match_regst(val str, val regex, val pos); val match_regst_right(val str, val regex, val end); val regsub(val regex, val repl, val str); - +int wide_display_char_p(wchar_t ch); void regex_init(void); @@ -2584,8 +2584,8 @@ val put_string(val string, val stream_in) col = (col + 1) | 7; break; default: - if (iswprint(*p)) - col++; + if (!iswcntrl(*p)) + col += 1 + wide_display_char_p(*p); break; } } @@ -2621,8 +2621,9 @@ val put_char(val ch, val stream_in) s->column = s->indent_chars; } ops->put_char(stream, ch); - if (iswprint(cch)) - s->column++; + + if (!iswcntrl(cch)) + s->column += 1 + wide_display_char_p(cch); break; } diff --git a/tests/009/json.expected b/tests/009/json.expected index eea65fb8..4e80c58f 100644 --- a/tests/009/json.expected +++ b/tests/009/json.expected @@ -46,8 +46,8 @@ AST: #("JSON Test Pattern pass1" #H((:equal-based) ("object with 1 member" #("ar ("address" "50 St. James Street") ("compact" #(1.0 2.0 3.0 4.0 5.0 6.0 7.0)) ("object" #H((:equal-based))) ("quote" "\"") ("jsontext" "{\"object with 1 member\":[\"array with 1 element\"]}") ("true" :true) ("integer" 1234567890.0) ("ALPHA" "ABCDEFGHIJKLMNOPQRSTUVWYZ") - ("quotes" "" \" %22 0x22 034 "") ("hex" "ģ䕧覫췯ꯍ") ("0123456789" "digit") - ("controls" "\b\f\n\r\t") ("alpha" "abcdefghijklmnopqrstuvwyz") + ("quotes" "" \" %22 0x22 034 "") ("hex" "ģ䕧覫췯ꯍ") + ("0123456789" "digit") ("controls" "\b\f\n\r\t") ("alpha" "abcdefghijklmnopqrstuvwyz") (" s p a c e d " #(1.0 2.0 3.0 4.0 5.0 6.0 7.0))) 0.5 98.6 99.44 1066.0 10.0 1.0 0.1 1.0 2.0 2.0 "rosebud") |