diff options
Diffstat (limited to 'newlib/libc/locale/locale.c')
-rw-r--r-- | newlib/libc/locale/locale.c | 117 |
1 files changed, 86 insertions, 31 deletions
diff --git a/newlib/libc/locale/locale.c b/newlib/libc/locale/locale.c index 85069aefa..26283c5f4 100644 --- a/newlib/libc/locale/locale.c +++ b/newlib/libc/locale/locale.c @@ -56,34 +56,36 @@ for a given language, a three character string per ISO 639-3. <<"TERRITORY">> is a country code per ISO 3166. For <<"charset">> and <<"modifier">> see below. -Additionally to the POSIX specifier, seven extensions are supported for -backward compatibility with older implementations using newlib: -<<"C-UTF-8">>, <<"C-JIS">>, <<"C-eucJP">>, <<"C-SJIS">>, <<C-KOI8-R>>, -<<C-KOI8-U>>, <<"C-ISO-8859-x">> with 1 <= x <= 15, or <<"C-CPxxx">> with -xxx in [437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 932, -1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258]. - -Instead of <<"C-">>, you can specify also <<"C.">>. Both variations allow +Additionally to the POSIX specifier, the following extension is supported +for backward compatibility with older implementations using newlib: +<<"C-charset">>. +Instead of <<"C-">>, you can also specify <<"C.">>. Both variations allow to specify language neutral locales while using other charsets than ASCII, for instance <<"C.UTF-8">>, which keeps all settings as in the C locale, but uses the UTF-8 charset. -Even when using POSIX locale strings, the only charsets allowed are +The following charsets are recogized: <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>, -<<"ISO-8859-x">> with 1 <= x <= 15, or <<"CPxxx">> with xxx in -[437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 932, 1125, 1250, -1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258]. +<<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">> with +1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855, +857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256, +1257, 1258]. + Charsets are case insensitive. For instance, <<"EUCJP">> and <<"eucJP">> are equivalent. Charset names with dashes can also be written without dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>. <<"EUCJP">> and <<"EUCKR"> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>. +Full support for all of the above charsets requires that newlib has been +build with multibyte support and support for all ISO and Windows Codepage. +Otherwise all singlebyte charsets are simply mapped to ASCII. Right now, +only newlib for Cygwin is built with full charset support by default. +Under Cygwin, this implementation additionally supports the charsets +<<"GBK">>, <<"eucKR">>, and <<"Big5">>. Cygwin does not support <<"JIS">>. + (<<"">> is also accepted; if given, the settings are read from the corresponding LC_* environment variables and $LANG according to POSIX rules. -Under Cygwin, this implementation additionally supports the charsets -<<"GBK">>, <<"eucKR">>, <<"Big5">>, and <<"TIS-620">>. - This implementation also supports a single modifier, <<"cjknarrow">>. Any other modifier is ignored. <<"cjknarrow">>, in conjunction with one of the language specifiers <<"ja">>, <<"ko">>, and <<"zh">> specifies @@ -720,38 +722,91 @@ loadlocale(struct _reent *p, int category) l_mbtowc = __ascii_mbtowc; #endif break; -#ifdef __CYGWIN__ case 'G': case 'g': - if (strcasecmp (charset, "GBK")) - return NULL; - strcpy (charset, "GBK"); - mbc_max = 2; +#ifdef __CYGWIN__ + if (!strcasecmp (charset, "GBK")) + { + strcpy (charset, "GBK"); + mbc_max = 2; #ifdef _MB_CAPABLE - l_wctomb = __gbk_wctomb; - l_mbtowc = __gbk_mbtowc; + l_wctomb = __gbk_wctomb; + l_mbtowc = __gbk_mbtowc; #endif + } + else +#endif /* __CYGWIN__ */ + /* GEORGIAN-PS and the alias without dash */ + if (!strncasecmp (charset, "GEORGIAN", 8)) + { + c = charset + 8; + if (*c == '-') + ++c; + if (strcasecmp (c, "PS")) + return NULL; + strcpy (charset, "CP101"); + mbc_max = 1; +#ifdef _MB_CAPABLE +#ifdef _MB_EXTENDED_CHARSETS_WINDOWS + l_wctomb = __cp_wctomb; + l_mbtowc = __cp_mbtowc; +#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */ + l_wctomb = __ascii_wctomb; + l_mbtowc = __ascii_mbtowc; +#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ +#endif + } + else + return NULL; break; - case 'B': - case 'b': - if (strcasecmp (charset, "BIG5")) - return NULL; - strcpy (charset, "BIG5"); - mbc_max = 2; + case 'P': + case 'p': + /* PT154 */ + if (strcasecmp (charset, "PT154")) + return NULL; + strcpy (charset, "CP102"); + mbc_max = 1; #ifdef _MB_CAPABLE - l_wctomb = __big5_wctomb; - l_mbtowc = __big5_mbtowc; +#ifdef _MB_EXTENDED_CHARSETS_WINDOWS + l_wctomb = __cp_wctomb; + l_mbtowc = __cp_mbtowc; +#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */ + l_wctomb = __ascii_wctomb; + l_mbtowc = __ascii_mbtowc; +#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ #endif break; case 'T': case 't': - if (strcasecmp (charset, "TIS620") && strcasecmp (charset, "TIS-620")) + if (strncasecmp (charset, "TIS", 3)) + return NULL; + c = charset + 3; + if (*c == '-') + ++c; + if (strcasecmp (c, "620")) return NULL; strcpy (charset, "CP874"); mbc_max = 1; #ifdef _MB_CAPABLE +#ifdef _MB_EXTENDED_CHARSETS_WINDOWS l_wctomb = __cp_wctomb; l_mbtowc = __cp_mbtowc; +#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */ + l_wctomb = __ascii_wctomb; + l_mbtowc = __ascii_mbtowc; +#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ +#endif + break; +#ifdef __CYGWIN__ + case 'B': + case 'b': + if (strcasecmp (charset, "BIG5")) + return NULL; + strcpy (charset, "BIG5"); + mbc_max = 2; +#ifdef _MB_CAPABLE + l_wctomb = __big5_wctomb; + l_mbtowc = __big5_mbtowc; #endif break; #endif /* __CYGWIN__ */ |