diff options
Diffstat (limited to 'winsup/cygwin/libc/strptime.cc')
-rw-r--r-- | winsup/cygwin/libc/strptime.cc | 789 |
1 files changed, 315 insertions, 474 deletions
diff --git a/winsup/cygwin/libc/strptime.cc b/winsup/cygwin/libc/strptime.cc index e81cdd580..d0a77a850 100644 --- a/winsup/cygwin/libc/strptime.cc +++ b/winsup/cygwin/libc/strptime.cc @@ -1,26 +1,11 @@ -/* - * Powerdog Industries kindly requests feedback from anyone modifying - * this function: +/* $NetBSD: strptime.c,v 1.28 2008/04/28 20:23:01 martin Exp $ */ + +/*- + * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc. + * All rights reserved. * - * Date: Thu, 05 Jun 1997 23:17:17 -0400 - * From: Kevin Ruddy <kevin.ruddy@powerdog.com> - * To: James FitzGibbon <james@nexis.net> - * Subject: Re: Use of your strptime(3) code (fwd) - * - * The reason for the "no mod" clause was so that modifications would - * come back and we could integrate them and reissue so that a wider - * audience could use it (thereby spreading the wealth). This has - * made it possible to get strptime to work on many operating systems. - * I'm not sure why that's "plain unacceptable" to the FreeBSD team. - * - * Anyway, you can change it to "with or without modification" as - * you see fit. Enjoy. - * - * Kevin Ruddy - * Powerdog Industries, Inc. - */ -/* - * Copyright (c) 1994 Powerdog Industries. All rights reserved. + * This code was contributed to The NetBSD Foundation by Klaus Klein. + * Heavily optimised by David Laight * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -28,518 +13,374 @@ * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgement: - * This product includes software developed by Powerdog Industries. - * 4. The name of Powerdog Industries may not be used to endorse or - * promote products derived from this software without specific prior - * written permission. + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY POWERDOG INDUSTRIES ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE POWERDOG INDUSTRIES BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: strptime.c,v 1.28 2008/04/28 20:23:01 martin Exp $"); +#endif + #ifdef __CYGWIN__ #include "winsup.h" #else -#ifndef lint -#ifndef NOID -static char copyright[] __unused = -"@(#) Copyright (c) 1994 Powerdog Industries. All rights reserved."; -static char sccsid[] __unused = "@(#)strptime.c 0.1 (Powerdog) 94/03/27"; -#endif /* !defined NOID */ -#endif /* not lint */ -__FBSDID("$FreeBSD: /repoman/r/ncvs/src/lib/libc/stdtime/strptime.c,v 1.35 2003/11/17 04:19:15 nectar Exp $"); - #include "namespace.h" #endif -#include <time.h> +#include <sys/localedef.h> #include <ctype.h> -#include <errno.h> -#include <stdlib.h> +#include <locale.h> #include <string.h> -#ifndef __CYGWIN__ -#include <pthread.h> -#include "un-namespace.h" -#include "libc_private.h" +#include <time.h> +#include <tzfile.h> + +#ifdef __weak_alias +__weak_alias(strptime,_strptime) #endif -#include "timelocal.h" -static char * _strptime(const char *, const char *, struct tm *, int *); +#define _ctloc(x) (_CurrentTimeLocale->x) + +/* + * We do not implement alternate representations. However, we always + * check whether a given modifier is allowed for a certain conversion. + */ +#define ALT_E 0x01 +#define ALT_O 0x02 +#define LEGAL_ALT(x) { if (alt_format & ~(x)) return NULL; } -#define asizeof(a) ((int)(sizeof (a) / sizeof ((a)[0]))) +static const char gmt[4] = { "GMT" }; -static char * -_strptime(const char *buf, const char *fmt, struct tm *tm, int *GMTp) +static const u_char *conv_num(const unsigned char *, int *, uint, uint); +static const u_char *find_string(const u_char *, int *, const char * const *, + const char * const *, int); + + +char * +strptime(const char *buf, const char *fmt, struct tm *tm) { - char c; - const char *ptr; - int i, - len; - int Ealternative, Oalternative; - struct lc_time_T *tptr = __get_current_time_locale(); - - ptr = fmt; - while (*ptr != 0) { - if (*buf == 0) - break; - - c = *ptr++; - - if (c != '%') { - if (isspace((unsigned char)c)) - while (*buf != 0 && isspace((unsigned char)*buf)) - buf++; - else if (c != *buf++) - return 0; + unsigned char c; + const unsigned char *bp; + int alt_format, i, split_year = 0; + const char *new_fmt; + + bp = (const u_char *)buf; + + while (bp != NULL && (c = *fmt++) != '\0') { + /* Clear `alternate' modifier prior to new conversion. */ + alt_format = 0; + i = 0; + + /* Eat up white-space. */ + if (isspace(c)) { + while (isspace(*bp)) + bp++; continue; } - Ealternative = 0; - Oalternative = 0; -label: - c = *ptr++; - switch (c) { - case 0: - case '%': - if (*buf++ != '%') - return 0; - break; - - case '+': - buf = _strptime(buf, tptr->date_fmt, tm, GMTp); - if (buf == 0) - return 0; - break; - - case 'C': - if (!isdigit((unsigned char)*buf)) - return 0; - - /* XXX This will break for 3-digit centuries. */ - len = 2; - for (i = 0; len && *buf != 0 && isdigit((unsigned char)*buf); buf++) { - i *= 10; - i += *buf - '0'; - len--; - } - if (i < 19) - return 0; - - tm->tm_year = i * 100 - 1900; - break; - - case 'c': - buf = _strptime(buf, tptr->c_fmt, tm, GMTp); - if (buf == 0) - return 0; - break; - - case 'D': - buf = _strptime(buf, "%m/%d/%y", tm, GMTp); - if (buf == 0) - return 0; - break; - - case 'E': - if (Ealternative || Oalternative) - break; - Ealternative++; - goto label; - - case 'O': - if (Ealternative || Oalternative) - break; - Oalternative++; - goto label; - - case 'F': - buf = _strptime(buf, "%Y-%m-%d", tm, GMTp); - if (buf == 0) - return 0; - break; - - case 'R': - buf = _strptime(buf, "%H:%M", tm, GMTp); - if (buf == 0) - return 0; - break; - - case 'r': - buf = _strptime(buf, tptr->ampm_fmt, tm, GMTp); - if (buf == 0) - return 0; - break; - - case 'T': - buf = _strptime(buf, "%H:%M:%S", tm, GMTp); - if (buf == 0) - return 0; - break; - - case 'X': - buf = _strptime(buf, tptr->X_fmt, tm, GMTp); - if (buf == 0) - return 0; - break; - - case 'x': - buf = _strptime(buf, tptr->x_fmt, tm, GMTp); - if (buf == 0) - return 0; - break; - - case 'j': - if (!isdigit((unsigned char)*buf)) - return 0; - - len = 3; - for (i = 0; len && *buf != 0 && isdigit((unsigned char)*buf); buf++) { - i *= 10; - i += *buf - '0'; - len--; - } - if (i < 1 || i > 366) - return 0; + if (c != '%') + goto literal; - tm->tm_yday = i - 1; - break; - case 'M': - case 'S': - if (*buf == 0 || isspace((unsigned char)*buf)) - break; +again: switch (c = *fmt++) { + case '%': /* "%%" is converted to "%". */ +literal: + if (c != *bp++) + return NULL; + LEGAL_ALT(0); + continue; - if (!isdigit((unsigned char)*buf)) - return 0; + /* + * "Alternative" modifiers. Just set the appropriate flag + * and start over again. + */ + case 'E': /* "%E?" alternative conversion modifier. */ + LEGAL_ALT(0); + alt_format |= ALT_E; + goto again; + + case 'O': /* "%O?" alternative conversion modifier. */ + LEGAL_ALT(0); + alt_format |= ALT_O; + goto again; + + /* + * "Complex" conversion rules, implemented through recursion. + */ + case 'c': /* Date and time, using the locale's format. */ + new_fmt = _ctloc(d_t_fmt); + goto recurse; + + case 'D': /* The date as "%m/%d/%y". */ + new_fmt = "%m/%d/%y"; + LEGAL_ALT(0); + goto recurse; + + case 'F': /* The date as "%Y-%m-%d". */ + new_fmt = "%Y-%m-%d"; + LEGAL_ALT(0); + goto recurse; + + case 'R': /* The time as "%H:%M". */ + new_fmt = "%H:%M"; + LEGAL_ALT(0); + goto recurse; + + case 'r': /* The time in 12-hour clock representation. */ + new_fmt =_ctloc(t_fmt_ampm); + LEGAL_ALT(0); + goto recurse; + + case 'T': /* The time as "%H:%M:%S". */ + new_fmt = "%H:%M:%S"; + LEGAL_ALT(0); + goto recurse; + + case 'X': /* The time, using the locale's format. */ + new_fmt =_ctloc(t_fmt); + goto recurse; + + case 'x': /* The date, using the locale's format. */ + new_fmt =_ctloc(d_fmt); + recurse: + bp = (const u_char *)strptime((const char *)bp, + new_fmt, tm); + LEGAL_ALT(ALT_E); + continue; - len = 2; - for (i = 0; len && *buf != 0 && isdigit((unsigned char)*buf); buf++) { - i *= 10; - i += *buf - '0'; - len--; - } + /* + * "Elementary" conversion rules. + */ + case 'A': /* The day of week, using the locale's form. */ + case 'a': + bp = find_string(bp, &tm->tm_wday, _ctloc(day), + _ctloc(abday), 7); + LEGAL_ALT(0); + continue; - if (c == 'M') { - if (i > 59) - return 0; - tm->tm_min = i; - } else { - if (i > 60) - return 0; - tm->tm_sec = i; - } + case 'B': /* The month, using the locale's form. */ + case 'b': + case 'h': + bp = find_string(bp, &tm->tm_mon, _ctloc(mon), + _ctloc(abmon), 12); + LEGAL_ALT(0); + continue; - if (*buf != 0 && isspace((unsigned char)*buf)) - while (*ptr != 0 && !isspace((unsigned char)*ptr)) - ptr++; - break; + case 'C': /* The century number. */ + i = 20; + bp = conv_num(bp, &i, 0, 99); - case 'H': - case 'I': - case 'k': - case 'l': - /* - * Of these, %l is the only specifier explicitly - * documented as not being zero-padded. However, - * there is no harm in allowing zero-padding. - * - * XXX The %l specifier may gobble one too many - * digits if used incorrectly. - */ - if (!isdigit((unsigned char)*buf)) - return 0; - - len = 2; - for (i = 0; len && *buf != 0 && isdigit((unsigned char)*buf); buf++) { - i *= 10; - i += *buf - '0'; - len--; - } - if (c == 'H' || c == 'k') { - if (i > 23) - return 0; - } else if (i > 12) - return 0; + i = i * 100 - TM_YEAR_BASE; + if (split_year) + i += tm->tm_year % 100; + split_year = 1; + tm->tm_year = i; + LEGAL_ALT(ALT_E); + continue; - tm->tm_hour = i; + case 'd': /* The day of month. */ + case 'e': + bp = conv_num(bp, &tm->tm_mday, 1, 31); + LEGAL_ALT(ALT_O); + continue; - if (*buf != 0 && isspace((unsigned char)*buf)) - while (*ptr != 0 && !isspace((unsigned char)*ptr)) - ptr++; - break; + case 'k': /* The hour (24-hour clock representation). */ + LEGAL_ALT(0); + /* FALLTHROUGH */ + case 'H': + bp = conv_num(bp, &tm->tm_hour, 0, 23); + LEGAL_ALT(ALT_O); + continue; - case 'p': - /* - * XXX This is bogus if parsed before hour-related - * specifiers. - */ - len = strlen(tptr->am); - if (strncasecmp(buf, tptr->am, len) == 0) { - if (tm->tm_hour > 12) - return 0; - if (tm->tm_hour == 12) - tm->tm_hour = 0; - buf += len; - break; - } + case 'l': /* The hour (12-hour clock representation). */ + LEGAL_ALT(0); + /* FALLTHROUGH */ + case 'I': + bp = conv_num(bp, &tm->tm_hour, 1, 12); + if (tm->tm_hour == 12) + tm->tm_hour = 0; + LEGAL_ALT(ALT_O); + continue; - len = strlen(tptr->pm); - if (strncasecmp(buf, tptr->pm, len) == 0) { - if (tm->tm_hour > 12) - return 0; - if (tm->tm_hour != 12) - tm->tm_hour += 12; - buf += len; - break; - } + case 'j': /* The day of year. */ + i = 1; + bp = conv_num(bp, &i, 1, 366); + tm->tm_yday = i - 1; + LEGAL_ALT(0); + continue; - return 0; + case 'M': /* The minute. */ + bp = conv_num(bp, &tm->tm_min, 0, 59); + LEGAL_ALT(ALT_O); + continue; - case 'A': - case 'a': - len = 0; - for (i = 0; i < asizeof(tptr->weekday); i++) { - len = strlen(tptr->weekday[i]); - if (strncasecmp(buf, tptr->weekday[i], - len) == 0) - break; - len = strlen(tptr->wday[i]); - if (strncasecmp(buf, tptr->wday[i], - len) == 0) - break; - } - if (i == asizeof(tptr->weekday)) - return 0; + case 'm': /* The month. */ + i = 1; + bp = conv_num(bp, &i, 1, 12); + tm->tm_mon = i - 1; + LEGAL_ALT(ALT_O); + continue; - tm->tm_wday = i; - buf += len; - break; + case 'p': /* The locale's equivalent of AM/PM. */ + bp = find_string(bp, &i, _ctloc(am_pm), NULL, 2); + if (tm->tm_hour > 11) + return NULL; + tm->tm_hour += i * 12; + LEGAL_ALT(0); + continue; + + case 'S': /* The seconds. */ + bp = conv_num(bp, &tm->tm_sec, 0, 61); + LEGAL_ALT(ALT_O); + continue; - case 'U': - case 'W': + case 'U': /* The week of year, beginning on sunday. */ + case 'W': /* The week of year, beginning on monday. */ /* * XXX This is bogus, as we can not assume any valid * information present in the tm structure at this * point to calculate a real value, so just check the * range for now. */ - if (!isdigit((unsigned char)*buf)) - return 0; - - len = 2; - for (i = 0; len && *buf != 0 && isdigit((unsigned char)*buf); buf++) { - i *= 10; - i += *buf - '0'; - len--; - } - if (i > 53) - return 0; - - if (*buf != 0 && isspace((unsigned char)*buf)) - while (*ptr != 0 && !isspace((unsigned char)*ptr)) - ptr++; - break; - - case 'w': - if (!isdigit((unsigned char)*buf)) - return 0; + bp = conv_num(bp, &i, 0, 53); + LEGAL_ALT(ALT_O); + continue; - i = *buf - '0'; - if (i > 6) - return 0; + case 'w': /* The day of week, beginning on sunday. */ + bp = conv_num(bp, &tm->tm_wday, 0, 6); + LEGAL_ALT(ALT_O); + continue; - tm->tm_wday = i; + case 'Y': /* The year. */ + i = TM_YEAR_BASE; /* just for data sanity... */ + bp = conv_num(bp, &i, 0, 9999); + tm->tm_year = i - TM_YEAR_BASE; + LEGAL_ALT(ALT_E); + continue; - if (*buf != 0 && isspace((unsigned char)*buf)) - while (*ptr != 0 && !isspace((unsigned char)*ptr)) - ptr++; - break; + case 'y': /* The year within 100 years of the epoch. */ + /* LEGAL_ALT(ALT_E | ALT_O); */ + bp = conv_num(bp, &i, 0, 99); + + if (split_year) + /* preserve century */ + i += (tm->tm_year / 100) * 100; + else { + split_year = 1; + if (i <= 68) + i = i + 2000 - TM_YEAR_BASE; + else + i = i + 1900 - TM_YEAR_BASE; + } + tm->tm_year = i; + continue; - case 'd': - case 'e': - /* - * The %e specifier is explicitly documented as not - * being zero-padded but there is no harm in allowing - * such padding. - * - * XXX The %e specifier may gobble one too many - * digits if used incorrectly. - */ - if (!isdigit((unsigned char)*buf)) - return 0; - - len = 2; - for (i = 0; len && *buf != 0 && isdigit((unsigned char)*buf); buf++) { - i *= 10; - i += *buf - '0'; - len--; + case 'Z': + tzset(); + if (strncmp((const char *)bp, gmt, 3) == 0) { + tm->tm_isdst = 0; +#ifdef TM_GMTOFF + tm->TM_GMTOFF = 0; +#endif +#ifdef TM_ZONE + tm->TM_ZONE = gmt; +#endif + bp += 3; + } else { + const unsigned char *ep; + + ep = find_string(bp, &i, + (const char * const *)tzname, + NULL, 2); + if (ep != NULL) { + tm->tm_isdst = i; +#ifdef TM_GMTOFF + tm->TM_GMTOFF = -(timezone); +#endif +#ifdef TM_ZONE + tm->TM_ZONE = tzname[i]; +#endif + } + bp = ep; } - if (i > 31) - return 0; + continue; - tm->tm_mday = i; + /* + * Miscellaneous conversions. + */ + case 'n': /* Any kind of white-space. */ + case 't': + while (isspace(*bp)) + bp++; + LEGAL_ALT(0); + continue; - if (*buf != 0 && isspace((unsigned char)*buf)) - while (*ptr != 0 && !isspace((unsigned char)*ptr)) - ptr++; - break; - case 'B': - case 'b': - case 'h': - len = 0; - for (i = 0; i < asizeof(tptr->month); i++) { - if (Oalternative) { - if (c == 'B') { - len = strlen(tptr->alt_month[i]); - if (strncasecmp(buf, - tptr->alt_month[i], - len) == 0) - break; - } - } else { - len = strlen(tptr->month[i]); - if (strncasecmp(buf, tptr->month[i], - len) == 0) - break; - len = strlen(tptr->mon[i]); - if (strncasecmp(buf, tptr->mon[i], - len) == 0) - break; - } - } - if (i == asizeof(tptr->month)) - return 0; - - tm->tm_mon = i; - buf += len; - break; - - case 'm': - if (!isdigit((unsigned char)*buf)) - return 0; - - len = 2; - for (i = 0; len && *buf != 0 && isdigit((unsigned char)*buf); buf++) { - i *= 10; - i += *buf - '0'; - len--; - } - if (i < 1 || i > 12) - return 0; + default: /* Unknown/unsupported conversion. */ + return NULL; + } + } - tm->tm_mon = i - 1; + return (char *) bp; +} - if (*buf != 0 && isspace((unsigned char)*buf)) - while (*ptr != 0 && !isspace((unsigned char)*ptr)) - ptr++; - break; - - case 's': - { - char *cp; - int sverrno; - long n; - time_t t; - - sverrno = errno; - errno = 0; - n = strtol(buf, &cp, 10); - if (errno == ERANGE || (long)(t = n) != n) { - errno = sverrno; - return 0; - } - errno = sverrno; - buf = cp; - gmtime_r(&t, tm); - *GMTp = 1; - } - break; - case 'Y': - case 'y': - if (*buf == 0 || isspace((unsigned char)*buf)) - break; +static const u_char * +conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim) +{ + uint result = 0; + unsigned char ch; - if (!isdigit((unsigned char)*buf)) - return 0; + /* The limit also determines the number of valid digits. */ + uint rulim = ulim; - len = (c == 'Y') ? 4 : 2; - for (i = 0; len && *buf != 0 && isdigit((unsigned char)*buf); buf++) { - i *= 10; - i += *buf - '0'; - len--; - } - if (c == 'Y') - i -= 1900; - if (c == 'y' && i < 69) - i += 100; - if (i < 0) - return 0; + ch = *buf; + if (ch < '0' || ch > '9') + return NULL; - tm->tm_year = i; + do { + result *= 10; + result += ch - '0'; + rulim /= 10; + ch = *++buf; + } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9'); - if (*buf != 0 && isspace((unsigned char)*buf)) - while (*ptr != 0 && !isspace((unsigned char)*ptr)) - ptr++; - break; + if (result < llim || result > ulim) + return NULL; - case 'Z': - { - const char *cp; - char *zonestr; - - for (cp = buf; *cp && isupper((unsigned char)*cp); ++cp) {/*empty*/} - if (cp - buf) { - zonestr = (char *) alloca(cp - buf + 1); - strncpy(zonestr, buf, cp - buf); - zonestr[cp - buf] = '\0'; - tzset(); - if (0 == strcmp(zonestr, "GMT")) { - *GMTp = 1; - } else if (0 == strcmp(zonestr, tzname[0])) { - tm->tm_isdst = 0; - } else if (0 == strcmp(zonestr, tzname[1])) { - tm->tm_isdst = 1; - } else { - return 0; - } - buf += cp - buf; - } - } - break; - } - } - return (char *)buf; + *dest = result; + return buf; } -extern "C" char * -strptime(const char * __restrict buf, const char * __restrict fmt, - struct tm * __restrict tm) +static const u_char * +find_string(const u_char *bp, int *tgt, const char * const *n1, + const char * const *n2, int c) { - char *ret; - int gmt; - - gmt = 0; - ret = _strptime(buf, fmt, tm, &gmt); - if (ret && gmt) { - time_t t = timegm(tm); - localtime_r(&t, tm); + int i; + unsigned int len; + + /* check full name - then abbreviated ones */ + for (; n1 != NULL; n1 = n2, n2 = NULL) { + for (i = 0; i < c; i++, n1++) { + len = strlen(*n1); + if (strncasecmp(*n1, (const char *)bp, len) == 0) { + *tgt = i; + return bp + len; + } + } } - return (ret); + /* Nothing matched */ + return NULL; } |