Incorporate upstream's strptime %Z/%z support.
I made toybox use strptime %Z recently (so that it can parse the default
POSIX date(1) output), forgetting that bionic's strptime(3) doesn't
support %Z. Neither does glibc, for that matter; the toybox change works
on glibc effectively by accident --- glibc just ignores the next word
when parsing %Z and assumes that the current time zone ($TZ) is
appropriate. Which it is for the only obvious use case of "round trip
date(1) output".
The related %z is potentially quite a bit more useful in general (parsing
any valid RFC822 time zone), though sadly not useful for the toybox case
that prompted.
Every time I touch this file I promise that I'll actually get us back in
sync with upstream, and every time I fail to get round to it. Maybe
2020 or 2021 will finally be the year...
Also add corresponding tests.
Bug: https://b/167455975
Test: treehugger
Change-Id: I13a7fb7e3ad01ae855750b9314d2eec661fe034f
diff --git a/libc/tzcode/strptime.c b/libc/tzcode/strptime.c
index 41eaa9b..7e8e234 100644
--- a/libc/tzcode/strptime.c
+++ b/libc/tzcode/strptime.c
@@ -95,9 +95,22 @@
int century;
int relyear;
};
+
+static char gmt[] = { "GMT" };
+static char utc[] = { "UTC" };
+/* RFC-822/RFC-2822 */
+static const char * const nast[5] = {
+ "EST", "CST", "MST", "PST", "\0\0\0"
+};
+static const char * const nadt[5] = {
+ "EDT", "CDT", "MDT", "PDT", "\0\0\0"
+};
+
static int _conv_num(const unsigned char **, int *, int, int);
static unsigned char *_strptime(const unsigned char *, const char *, struct tm *,
struct century_relyear *);
+static const u_char *_find_string(const u_char *, int *, const char * const *,
+ const char * const *, int);
char *
@@ -113,9 +126,10 @@
_strptime(const unsigned char *buf, const char *fmt, struct tm *tm, struct century_relyear *cr)
{
unsigned char c;
- const unsigned char *bp;
+ const unsigned char *bp, *ep;
size_t len = 0;
- int alt_format, i;
+ int alt_format, i, offs;
+ int neg = 0;
bp = (unsigned char *)buf;
while ((c = *fmt) != '\0') {
@@ -432,6 +446,108 @@
return (NULL);
break;
+ case 'Z':
+ tzset();
+ if (strncmp((const char *)bp, gmt, 3) == 0) {
+ tm->tm_isdst = 0;
+ tm->tm_gmtoff = 0;
+ tm->tm_zone = gmt;
+ bp += 3;
+ } else if (strncmp((const char *)bp, utc, 3) == 0) {
+ tm->tm_isdst = 0;
+ tm->tm_gmtoff = 0;
+ tm->tm_zone = utc;
+ bp += 3;
+ } else {
+ ep = _find_string(bp, &i,
+ (const char * const *)tzname,
+ NULL, 2);
+ if (ep == NULL)
+ return (NULL);
+
+ tm->tm_isdst = i;
+ tm->tm_gmtoff = -(timezone);
+ tm->tm_zone = tzname[i];
+ bp = ep;
+ }
+ continue;
+
+ case 'z':
+ /*
+ * We recognize all ISO 8601 formats:
+ * Z = Zulu time/UTC
+ * [+-]hhmm
+ * [+-]hh:mm
+ * [+-]hh
+ * We recognize all RFC-822/RFC-2822 formats:
+ * UT|GMT
+ * North American : UTC offsets
+ * E[DS]T = Eastern : -4 | -5
+ * C[DS]T = Central : -5 | -6
+ * M[DS]T = Mountain: -6 | -7
+ * P[DS]T = Pacific : -7 | -8
+ */
+ while (isspace(*bp))
+ bp++;
+
+ switch (*bp++) {
+ case 'G':
+ if (*bp++ != 'M')
+ return NULL;
+ /*FALLTHROUGH*/
+ case 'U':
+ if (*bp++ != 'T')
+ return NULL;
+ /*FALLTHROUGH*/
+ case 'Z':
+ tm->tm_isdst = 0;
+ tm->tm_gmtoff = 0;
+ tm->tm_zone = utc;
+ continue;
+ case '+':
+ neg = 0;
+ break;
+ case '-':
+ neg = 1;
+ break;
+ default:
+ --bp;
+ ep = _find_string(bp, &i, nast, NULL, 4);
+ if (ep != NULL) {
+ tm->tm_gmtoff = (-5 - i) * SECSPERHOUR;
+ tm->tm_zone = (char *)nast[i];
+ bp = ep;
+ continue;
+ }
+ ep = _find_string(bp, &i, nadt, NULL, 4);
+ if (ep != NULL) {
+ tm->tm_isdst = 1;
+ tm->tm_gmtoff = (-4 - i) * SECSPERHOUR;
+ tm->tm_zone = (char *)nadt[i];
+ bp = ep;
+ continue;
+ }
+ return NULL;
+ }
+ if (!isdigit(bp[0]) || !isdigit(bp[1]))
+ return NULL;
+ offs = ((bp[0]-'0') * 10 + (bp[1]-'0')) * SECSPERHOUR;
+ bp += 2;
+ if (*bp == ':')
+ bp++;
+ if (isdigit(*bp)) {
+ offs += (*bp++ - '0') * 10 * SECSPERMIN;
+ if (!isdigit(*bp))
+ return NULL;
+ offs += (*bp++ - '0') * SECSPERMIN;
+ }
+ if (neg)
+ offs = -offs;
+ tm->tm_isdst = 0; /* XXX */
+ tm->tm_gmtoff = offs;
+ tm->tm_zone = NULL; /* XXX */
+ continue;
+
/*
* Miscellaneous conversions.
*/
@@ -468,28 +584,49 @@
return (unsigned char*)bp;
}
-
static int
_conv_num(const unsigned char **buf, int *dest, int llim, int ulim)
{
- int result = 0;
- int rulim = ulim;
+ int result = 0;
+ int rulim = ulim;
- if (**buf < '0' || **buf > '9')
- return (0);
+ if (**buf < '0' || **buf > '9')
+ return (0);
- /* we use rulim to break out of the loop when we run out of digits */
- do {
- result *= 10;
- result += *(*buf)++ - '0';
- rulim /= 10;
- } while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9');
+ /* we use rulim to break out of the loop when we run out of digits */
+ do {
+ result *= 10;
+ result += *(*buf)++ - '0';
+ rulim /= 10;
+ } while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9');
- if (result < llim || result > ulim)
- return (0);
+ if (result < llim || result > ulim)
+ return (0);
- *dest = result;
- return (1);
+ *dest = result;
+ return (1);
+}
+
+static const u_char *
+_find_string(const u_char *bp, int *tgt, const char * const *n1,
+ const char * const *n2, int c)
+{
+ int i;
+ unsigned int len;
+
+ /* check full name - then abbreviated ones */
+ for (; n1 != NULL; n1 = n2, n2 = NULL) {
+ for (i = 0; i < c; i++, n1++) {
+ len = strlen(*n1);
+ if (strncasecmp(*n1, (const char *)bp, len) == 0) {
+ *tgt = i;
+ return bp + len;
+ }
+ }
+ }
+
+ /* Nothing matched */
+ return NULL;
}
char* strptime_l(const char* buf, const char* fmt, struct tm* tm, locale_t l) {
diff --git a/tests/time_test.cpp b/tests/time_test.cpp
index 3d745ea..b1de0a4 100644
--- a/tests/time_test.cpp
+++ b/tests/time_test.cpp
@@ -363,6 +363,105 @@
EXPECT_TRUE(memcmp(&tm, &zero, sizeof(tm)) == 0);
}
+TEST(time, strptime_Z) {
+#if defined(__BIONIC__)
+ // glibc doesn't handle %Z at all.
+ // The BSDs only handle hard-coded "GMT" and "UTC", plus whatever two strings
+ // are in the global `tzname` (which correspond to the current $TZ).
+ struct tm tm;
+ setenv("TZ", "Europe/Berlin", 1);
+
+ // "GMT" always works.
+ tm = {};
+ ASSERT_EQ('\0', *strptime("GMT", "%Z", &tm));
+ EXPECT_STREQ("GMT", tm.tm_zone);
+ EXPECT_EQ(0, tm.tm_isdst);
+ EXPECT_EQ(0, tm.tm_gmtoff);
+
+ // As does "UTC".
+ tm = {};
+ ASSERT_EQ('\0', *strptime("UTC", "%Z", &tm));
+ EXPECT_STREQ("UTC", tm.tm_zone);
+ EXPECT_EQ(0, tm.tm_isdst);
+ EXPECT_EQ(0, tm.tm_gmtoff);
+
+ // Europe/Berlin is known as "CET" when there's no DST.
+ tm = {};
+ ASSERT_EQ('\0', *strptime("CET", "%Z", &tm));
+ EXPECT_STREQ("CET", tm.tm_zone);
+ EXPECT_EQ(0, tm.tm_isdst);
+ EXPECT_EQ(3600, tm.tm_gmtoff);
+
+ // Europe/Berlin is known as "CEST" when there's no DST.
+ tm = {};
+ ASSERT_EQ('\0', *strptime("CEST", "%Z", &tm));
+ EXPECT_STREQ("CEST", tm.tm_zone);
+ EXPECT_EQ(1, tm.tm_isdst);
+ EXPECT_EQ(3600, tm.tm_gmtoff);
+
+ // And as long as we're in Europe/Berlin, those are the only time zone
+ // abbreviations that are recognized.
+ tm = {};
+ ASSERT_TRUE(strptime("PDT", "%Z", &tm) == nullptr);
+#endif
+}
+
+TEST(time, strptime_z) {
+ struct tm tm;
+ setenv("TZ", "Europe/Berlin", 1);
+
+ // "UT" is what RFC822 called UTC.
+ tm = {};
+ ASSERT_EQ('\0', *strptime("UT", "%z", &tm));
+ EXPECT_STREQ("UTC", tm.tm_zone);
+ EXPECT_EQ(0, tm.tm_isdst);
+ EXPECT_EQ(0, tm.tm_gmtoff);
+ // "GMT" is RFC822's other name for UTC.
+ tm = {};
+ ASSERT_EQ('\0', *strptime("GMT", "%z", &tm));
+ EXPECT_STREQ("UTC", tm.tm_zone);
+ EXPECT_EQ(0, tm.tm_isdst);
+ EXPECT_EQ(0, tm.tm_gmtoff);
+
+ // "Z" ("Zulu") is a synonym for UTC.
+ tm = {};
+ ASSERT_EQ('\0', *strptime("Z", "%z", &tm));
+ EXPECT_STREQ("UTC", tm.tm_zone);
+ EXPECT_EQ(0, tm.tm_isdst);
+ EXPECT_EQ(0, tm.tm_gmtoff);
+
+ // "PST"/"PDT" and the other common US zone abbreviations are all supported.
+ tm = {};
+ ASSERT_EQ('\0', *strptime("PST", "%z", &tm));
+ EXPECT_STREQ("PST", tm.tm_zone);
+ EXPECT_EQ(0, tm.tm_isdst);
+ EXPECT_EQ(-28800, tm.tm_gmtoff);
+ tm = {};
+ ASSERT_EQ('\0', *strptime("PDT", "%z", &tm));
+ EXPECT_STREQ("PDT", tm.tm_zone);
+ EXPECT_EQ(1, tm.tm_isdst);
+ EXPECT_EQ(-25200, tm.tm_gmtoff);
+
+ // +-hh
+ tm = {};
+ ASSERT_EQ('\0', *strptime("+01", "%z", &tm));
+ EXPECT_EQ(3600, tm.tm_gmtoff);
+ EXPECT_TRUE(tm.tm_zone == nullptr);
+ EXPECT_EQ(0, tm.tm_isdst);
+ // +-hhmm
+ tm = {};
+ ASSERT_EQ('\0', *strptime("+0130", "%z", &tm));
+ EXPECT_EQ(5400, tm.tm_gmtoff);
+ EXPECT_TRUE(tm.tm_zone == nullptr);
+ EXPECT_EQ(0, tm.tm_isdst);
+ // +-hh:mm
+ tm = {};
+ ASSERT_EQ('\0', *strptime("+01:30", "%z", &tm));
+ EXPECT_EQ(5400, tm.tm_gmtoff);
+ EXPECT_TRUE(tm.tm_zone == nullptr);
+ EXPECT_EQ(0, tm.tm_isdst);
+}
+
void SetTime(timer_t t, time_t value_s, time_t value_ns, time_t interval_s, time_t interval_ns) {
itimerspec ts;
ts.it_value.tv_sec = value_s;