summaryrefslogtreecommitdiffstats
path: root/newlib/libc/locale/locale.c
diff options
context:
space:
mode:
Diffstat (limited to 'newlib/libc/locale/locale.c')
-rw-r--r--newlib/libc/locale/locale.c169
1 files changed, 152 insertions, 17 deletions
diff --git a/newlib/libc/locale/locale.c b/newlib/libc/locale/locale.c
index a4cd30eb9..d3644eb8a 100644
--- a/newlib/libc/locale/locale.c
+++ b/newlib/libc/locale/locale.c
@@ -47,11 +47,18 @@ and <<"C">> values for <[locale]>; strings representing other locales are not
honored unless _MB_CAPABLE is defined in which case POSIX locale strings
are allowed, plus five extensions supported for backward compatibility with
older implementations using newlib: <<"C-UTF-8">>, <<"C-JIS">>, <<"C-EUCJP">>,
-<<"C-SJIS">>, or <<"C-ISO-8859-x">> with 1 <= x <= 15. Even when using
-POSIX locale strings, the only charsets allowed are <<"UTF-8">>, <<"JIS">>,
-<<"EUCJP">>, <<"SJIS">>, or <<"ISO-8859-x">> with 1 <= x <= 15. (<<"">> is
-also accepted; if given, the settings are read from the corresponding
-LC_* environment variables and $LANG according to POSIX rules.
+<<"C-SJIS">>, <<"C-ISO-8859-x">> with 1 <= x <= 15, or <<"C-CPxxx">> with
+xxx in [437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250,
+1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258]. Even when using POSIX
+locale strings, the only charsets allowed are <<"UTF-8">>, <<"JIS">>,
+<<"EUCJP">>, <<"SJIS">>, <<"ISO-8859-x">> with 1 <= x <= 15, or
+<<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866,
+874, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258].
+(<<"">> is also accepted; if given, the settings are read from the
+corresponding LC_* environment variables and $LANG according to POSIX rules.
+
+Under Cygwin, this implementation additionally supports the charsets <<"GBK">>,
+<<"CP949">>, and <<"BIG5">>.
If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns
a pointer to the string representing the current locale (always
@@ -85,6 +92,9 @@ PORTABILITY
ANSI C requires <<setlocale>>, but the only locale required across all
implementations is the C locale.
+NOTES
+There is no ISO-8859-12 codepage. It's also refused by this implementation.
+
No supporting OS subroutines are required.
*/
@@ -129,6 +139,11 @@ No supporting OS subroutines are required.
#include <limits.h>
#include <reent.h>
#include <stdlib.h>
+#include <wchar.h>
+#include "../stdlib/local.h"
+#ifdef __CYGWIN__
+#include <windows.h>
+#endif
#define _LC_LAST 7
#define ENCODING_LEN 31
@@ -190,8 +205,8 @@ static const char *__get_locale_env(struct _reent *, int);
#endif
-static char lc_ctype_charset[ENCODING_LEN + 1] = "ISO-8859-1";
-static char lc_message_charset[ENCODING_LEN + 1] = "ISO-8859-1";
+static char lc_ctype_charset[ENCODING_LEN + 1] = "ASCII";
+static char lc_message_charset[ENCODING_LEN + 1] = "ASCII";
char *
_DEFUN(_setlocale_r, (p, category, locale),
@@ -205,8 +220,6 @@ _DEFUN(_setlocale_r, (p, category, locale),
if (strcmp (locale, "POSIX") && strcmp (locale, "C")
&& strcmp (locale, ""))
return NULL;
- p->_current_category = category;
- p->_current_locale = locale;
}
return "C";
#else
@@ -361,6 +374,11 @@ currentlocale()
#endif
#ifdef _MB_CAPABLE
+#ifdef __CYGWIN__
+extern void *__set_charset_from_codepage (unsigned int, char *charset);
+extern void __set_ctype (const char *charset);
+#endif /* __CYGWIN__ */
+
static char *
loadlocale(struct _reent *p, int category)
{
@@ -382,7 +400,7 @@ loadlocale(struct _reent *p, int category)
if (!strcmp (locale, "POSIX"))
strcpy (locale, "C");
if (!strcmp (locale, "C")) /* Default "C" locale */
- strcpy (charset, "ISO-8859-1");
+ strcpy (charset, "ASCII");
else if (locale[0] == 'C' && locale[1] == '-') /* Old newlib style */
strcpy (charset, locale + 2);
else /* POSIX style */
@@ -414,7 +432,11 @@ loadlocale(struct _reent *p, int category)
}
else if (c[0] == '\0' || c[0] == '@')
/* End of string or just a modifier */
+#ifdef __CYGWIN__
+ __set_charset_from_codepage (GetACP (), charset);
+#else
strcpy (charset, "ISO-8859-1");
+#endif
else
/* Invalid string */
return NULL;
@@ -426,42 +448,155 @@ loadlocale(struct _reent *p, int category)
if (strcmp (charset, "UTF-8"))
return NULL;
mbc_max = 6;
+#ifdef _MB_CAPABLE
+ __wctomb = __utf8_wctomb;
+ __mbtowc = __utf8_mbtowc;
+#endif
break;
case 'J':
if (strcmp (charset, "JIS"))
return NULL;
mbc_max = 8;
+#ifdef _MB_CAPABLE
+ __wctomb = __jis_wctomb;
+ __mbtowc = __jis_mbtowc;
+#endif
break;
case 'E':
- if (strcmp (charset, "EUCJP"))
+ if (strcmp (charset, "EUCJP") && strcmp (charset, "eucJP"))
return NULL;
+ strcpy (charset, "EUCJP");
mbc_max = 2;
+#ifdef _MB_CAPABLE
+ __wctomb = __eucjp_wctomb;
+ __mbtowc = __eucjp_mbtowc;
+#endif
break;
case 'S':
if (strcmp (charset, "SJIS"))
return NULL;
mbc_max = 2;
+#ifdef _MB_CAPABLE
+ __wctomb = __sjis_wctomb;
+ __mbtowc = __sjis_mbtowc;
+#endif
break;
case 'I':
- default:
- /* Must be exactly one of ISO-8859-1, [...] ISO-8859-15. */
+ /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
+ ISO-8859-12. */
if (strncmp (charset, "ISO-8859-", 9))
return NULL;
- val = strtol (charset + 9, &end, 10);
- if (val < 1 || val > 15 || *end)
+ val = _strtol_r (p, charset + 9, &end, 10);
+ if (val < 1 || val > 16 || val == 12 || *end)
return NULL;
mbc_max = 1;
+#ifdef _MB_CAPABLE
+#ifdef _MB_EXTENDED_CHARSETS_ISO
+ __wctomb = __iso_wctomb;
+ __mbtowc = __iso_mbtowc;
+#else /* !_MB_EXTENDED_CHARSETS_ISO */
+ __wctomb = __ascii_wctomb;
+ __mbtowc = __ascii_mbtowc;
+#endif /* _MB_EXTENDED_CHARSETS_ISO */
+#endif
+ break;
+ case 'C':
+ if (charset[1] != 'P')
+ return NULL;
+ val = _strtol_r (p, charset + 2, &end, 10);
+ if (*end)
+ return NULL;
+ switch (val)
+ {
+ case 437:
+ case 720:
+ case 737:
+ case 775:
+ case 850:
+ case 852:
+ case 855:
+ case 857:
+ case 858:
+ case 862:
+ case 866:
+ case 874:
+ case 1125:
+ case 1250:
+ case 1251:
+ case 1252:
+ case 1253:
+ case 1254:
+ case 1255:
+ case 1256:
+ case 1257:
+ case 1258:
+ mbc_max = 1;
+#ifdef _MB_CAPABLE
+#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
+ __wctomb = __cp_wctomb;
+ __mbtowc = __cp_mbtowc;
+#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
+ __wctomb = __ascii_wctomb;
+ __mbtowc = __ascii_mbtowc;
+#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
+#endif
+ break;
+#ifdef __CYGWIN__
+ case 949:
+ mbc_max = 2;
+#ifdef _MB_CAPABLE
+ __wctomb = __kr_wctomb;
+ __mbtowc = __kr_mbtowc;
+#endif
+ break;
+#endif
+ default:
+ return NULL;
+ }
+ break;
+ case 'A':
+ if (strcmp (charset, "ASCII"))
+ return NULL;
+ mbc_max = 1;
+#ifdef _MB_CAPABLE
+ __wctomb = __ascii_wctomb;
+ __mbtowc = __ascii_mbtowc;
+#endif
break;
+#ifdef __CYGWIN__
+ case 'G':
+ if (strcmp (charset, "GBK"))
+ return NULL;
+ mbc_max = 2;
+#ifdef _MB_CAPABLE
+ __wctomb = __gbk_wctomb;
+ __mbtowc = __gbk_mbtowc;
+#endif
+ break;
+ case 'B':
+ if (strcmp (charset, "BIG5") && strcmp (charset, "Big5"))
+ return NULL;
+ strcpy (charset, "BIG5");
+ mbc_max = 2;
+#ifdef _MB_CAPABLE
+ __wctomb = __big5_wctomb;
+ __mbtowc = __big5_mbtowc;
+#endif
+ break;
+#endif /* __CYGWIN__ */
+ default:
+ return NULL;
}
if (category == LC_CTYPE)
{
strcpy (lc_ctype_charset, charset);
__mb_cur_max = mbc_max;
+#ifdef __CYGWIN__
+ __set_ctype (charset);
+#endif
}
else if (category == LC_MESSAGES)
strcpy (lc_message_charset, charset);
- p->_current_category = category;
- p->_current_locale = locale;
return strcpy(current_categories[category], new_categories[category]);
}