diff options
author | Corinna Vinschen <corinna@vinschen.de> | 2009-03-24 12:18:34 +0000 |
---|---|---|
committer | Corinna Vinschen <corinna@vinschen.de> | 2009-03-24 12:18:34 +0000 |
commit | 161211d186a16e4f090b8b3c63040f0b9aee25d4 (patch) | |
tree | 4ac0e1154417f3b0119ba79407a8c5687d96bf83 /winsup/cygwin/fhandler_console.cc | |
parent | 6a32d500a9d601b4f25cee0e1ec6b2ac5195a7e9 (diff) | |
download | cygnal-161211d186a16e4f090b8b3c63040f0b9aee25d4.tar.gz cygnal-161211d186a16e4f090b8b3c63040f0b9aee25d4.tar.bz2 cygnal-161211d186a16e4f090b8b3c63040f0b9aee25d4.zip |
* ctype.cc (_CTYPE_DATA_0_127): Add _B class to TAB character.
(__ctype_default): New character class array for default ASCII
character set.
(__ctype_iso): New array of character class array for ISO charsets.
(__ctype_cp): Ditto for singlebyte Windows codepages.
(tolower): Implement as distinct function to support any singlebyte
charset.
(toupper): Ditto.
(__set_ctype): New function to copy singlebyte character classes
corresponding to current charset to ctype_b array.
Align copyright text to upstream.
* dcrt0.cc (dll_crt0_1): Reset current locale to "C" per POSIX.
* environ.cc (set_file_api_mode): Remove.
(codepage_init): Remove.
(parse_thing): Remove "codepage" setting.
(environ_init): Set locale according to environment settings, or
to current codepage, before converting environment to multibyte.
* fhandler.h (fhandler_console::write_replacement_char): Drop argument.
* fhandler_console.cc (dev_console::str_to_con): Call sys_cp_mbstowcs
rather than MultiByteToWideChar.
(fhandler_console::write_replacement_char): Always print a funny
half filled square if a character isn't in the current charset.
(fhandler_console::write_normal): Convert to using __mbtowc
rather than next_char.
* fork.cc (frok::child): Drop call to set_file_api_mode.
* globals.cc (enum codepage_type) Remove.
(current_codepage): Remove.
* miscfuncs.cc (cygwin_wcslwr): Unused, dangerous. Remove.
(cygwin_wcsupr): Ditto.
(is_cp_multibyte): Remove.
(next_char): Remove.
* miscfuncs.h (is_cp_multibyte): Drop declaration.
(next_char): Ditto.
* strfuncs.cc (get_cp): Remove.
(__db_wctomb): New function to implement _wctomb_r functionality for
doublebyte charsets using WideCharToMultiByte.
(__sjis_wctomb): New function to replace unusable newlib function.
(__jis_wctomb): Ditto.
(__eucjp_wctomb): Ditto.
(__gbk_wctomb): New function.
(__kr_wctomb): Ditto.
(__big5_wctomb): Ditto.
(__db_mbtowc): New function to implement _mbtowc_r functionality for
doublebyte charsets using MultiByteToWideChar.
(__sjis_mbtowc): New function to replace unusable newlib function.
(__jis_mbtowc): Ditto.
(__eucjp_mbtowc): Ditto.
(__gbk_mbtowc): New function.
(__kr_mbtowc): New function
(__big5_mbtowc): New function
(__set_charset_from_codepage): New function.
(sys_wcstombs): Reimplement, basically using same wide char to multibyte
conversion as newlib's application level functions. Plus extras.
Add lengthy comment to explain. Change return type to size_t.
(sys_wcstombs_alloc): Just use sys_wcstombs. Change return type to
size_t.
(sys_cp_mbstowcs): Replace sys_mbstowcs, take additional codepage
argument. Explain why. Change return type to size_t.
(sys_mbstowcs_alloc): Just use sys_mbstowcs. Change return type to
size_t.
* wchar.h: Declare internal functions implemented in strfuncs.cc.
(wcscasecmp): Remove.
(wcsncasecmp): Remove.
(wcslwr): Remove.
(wcsupr): Remove.
* winsup.h (codepage_init): Remove declaration.
(get_cp): Ditto.
(sys_wcstombs): Align declaration to new implementation.
(sys_wcstombs_alloc): Ditto.
(sys_cp_mbstowcs): Add declaration.
(sys_mbstowcs): Define as inline function.
(sys_mbstowcs_alloc): Align declaration to new implementation.
(set_file_api_mode): Remove declaration.
* include/ctype.h (isblank): Redefine to use _B character class.
(toupper): Remove ASCII-only definition.
(tolower): Ditto.
Diffstat (limited to 'winsup/cygwin/fhandler_console.cc')
-rw-r--r-- | winsup/cygwin/fhandler_console.cc | 88 |
1 files changed, 55 insertions, 33 deletions
diff --git a/winsup/cygwin/fhandler_console.cc b/winsup/cygwin/fhandler_console.cc index 11cf70639..68fb71ea9 100644 --- a/winsup/cygwin/fhandler_console.cc +++ b/winsup/cygwin/fhandler_console.cc @@ -13,6 +13,7 @@ details. */ #include "miscfuncs.h" #include <stdio.h> #include <stdlib.h> +#include <wchar.h> #include <wingdi.h> #include <winuser.h> #include <winnls.h> @@ -133,13 +134,13 @@ dev_console::con_to_str (char *d, int dlen, WCHAR w) inline UINT dev_console::get_console_cp () { - return alternate_charset_active ? GetConsoleOutputCP () : get_cp (); + return alternate_charset_active ? GetConsoleOutputCP () : 0; } inline DWORD dev_console::str_to_con (PWCHAR d, const char *s, DWORD sz) { - return MultiByteToWideChar (get_console_cp (), 0, s, sz, d, CONVERT_LIMIT); + return sys_cp_mbstowcs (get_console_cp (), d, CONVERT_LIMIT, s, sz); } bool @@ -1400,22 +1401,15 @@ beep () MessageBeep (MB_OK); } -/* This gets called when we found an invalid UTF-8 character. We try with - the default ANSI codepage. If that fails we just print a question mark. - Looks ugly but is a neat and alomst sane fallback for many languages. */ +/* This gets called when we found an invalid input character. We just + print a half filled square (UTF 0x2592). We have no chance to figure + out the "meaning" of the input char anyway. */ void -fhandler_console::write_replacement_char (const unsigned char *char_p) +fhandler_console::write_replacement_char () { - int n; - WCHAR def_cp_chars[2]; + static const wchar_t replacement_char = 0x2592; /* Half filled square */ DWORD done; - - n = MultiByteToWideChar (GetACP (), 0, (const CHAR *) char_p, 1, - def_cp_chars, 2); - if (n) - WriteConsoleW (get_output_handle (), def_cp_chars, n, &done, 0); - else - WriteConsoleW (get_output_handle (), L"?", 1, &done, 0); + WriteConsoleW (get_output_handle (), &replacement_char, 1, &done, 0); } const unsigned char * @@ -1426,22 +1420,46 @@ fhandler_console::write_normal (const unsigned char *src, DWORD done; DWORD buf_len; const unsigned char *found = src; - const unsigned char *nfound; + size_t ret; + mbstate_t ps; UINT cp = dev_state->get_console_cp (); + char charsetbuf[32]; + char *charset = __locale_charset (); + mbtowc_p f_mbtowc = __mbtowc; + + if (cp) + f_mbtowc = __set_charset_from_codepage (cp, charset = charsetbuf); /* First check if we have cached lead bytes of a former try to write a truncated multibyte sequence. If so, process it. */ if (trunc_buf.len) { + const unsigned char *nfound; int cp_len = min (end - src, 4 - trunc_buf.len); memcpy (trunc_buf.buf + trunc_buf.len, src, cp_len); - nfound = next_char (cp, trunc_buf.buf, - trunc_buf.buf + trunc_buf.len + cp_len); - /* Still truncated multibyte sequence? Keep in trunc_buf. */ - if (nfound == trunc_buf.buf) + memset (&ps, 0, sizeof ps); + switch (ret = f_mbtowc (_REENT, NULL, (const char *) trunc_buf.buf, + trunc_buf.len + cp_len, charset, &ps)) { + case -2: + /* Still truncated multibyte sequence? Keep in trunc_buf. */ trunc_buf.len += cp_len; return end; + case -1: + /* Give up, print replacement chars for trunc_buf... */ + for (int i = 0; i < trunc_buf.len; ++i) + write_replacement_char (); + /* ... mark trunc_buf as unused... */ + trunc_buf.len = 0; + /* ... and proceed. */ + nfound = NULL; + break; + case 0: + nfound = trunc_buf.buf + 1; + break; + default: + nfound = trunc_buf.buf + ret; + break; } /* Valid multibyte sequence? Process. */ if (nfound) @@ -1454,28 +1472,32 @@ fhandler_console::write_normal (const unsigned char *src, trunc_buf.len = 0; return found; } - /* Give up, print replacement chars for trunc_buf... */ - for (int i = 0; i < trunc_buf.len; ++i) - write_replacement_char (trunc_buf.buf + i); - /* ... mark trunc_buf as unused... */ - trunc_buf.len = 0; - /* ... and proceed. */ } + memset (&ps, 0, sizeof ps); while (found < end && found - src < CONVERT_LIMIT && base_chars[*found] == NOR) { - nfound = next_char (cp, found, end); - if (!nfound) /* Invalid multibyte sequence. */ - break; - if (nfound == found) /* Truncated multibyte sequence. */ - { /* Stick to it until the next write. */ + switch (ret = f_mbtowc (_REENT, NULL, (const char *) found, + end - found, charset, &ps)) + { + case -2: + /* Truncated multibyte sequence. Stick to it until the next write. */ trunc_buf.len = end - found; memcpy (trunc_buf.buf, found, trunc_buf.len); return end; + case -1: + break; + case 0: + found++; + break; + default: + found += ret; + break; } - found = nfound; + if (ret == (size_t) -1) /* Invalid multibyte sequence. */ + break; } /* Print all the base ones out */ @@ -1558,7 +1580,7 @@ fhandler_console::write_normal (const unsigned char *src, cursor_set (false, 8 * (x / 8 + 1), y); break; case NOR: - write_replacement_char (found); + write_replacement_char (); break; } found++; |