Fix sscanf/wcstod parsing of NaNs.
The parsefloat routines -- which let us pass NaNs and infinities on to
strto(f|d|ld) -- come from NetBSD.
Also fix LP64's strtold to return a NaN, and fix all the architectures
to return quiet NaNs.
Also fix wcstof/wcstod/wcstold to use parsefloat so they support hex
floats.
Lots of new tests.
Bug: http://b/31101647
Change-Id: Id7d46ac2d8acb8770b5e8c445e87cfabfde6f111
diff --git a/libc/stdio/parsefloat.c b/libc/stdio/parsefloat.c
new file mode 100644
index 0000000..e911da4
--- /dev/null
+++ b/libc/stdio/parsefloat.c
@@ -0,0 +1,336 @@
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <ctype.h>
+#include <stdlib.h>
+
+#include "local.h"
+#include "floatio.h"
+
+#define BUF 513 /* Maximum length of numeric string. */
+
+size_t parsefloat(FILE *fp, char *buf, char *end) {
+ char *commit, *p;
+ int infnanpos = 0;
+ enum {
+ S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
+ S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
+ } state = S_START;
+ unsigned char c;
+ int gotmantdig = 0, ishex = 0;
+
+ /*
+ * We set commit = p whenever the string we have read so far
+ * constitutes a valid representation of a floating point
+ * number by itself. At some point, the parse will complete
+ * or fail, and we will ungetc() back to the last commit point.
+ * To ensure that the file offset gets updated properly, it is
+ * always necessary to read at least one character that doesn't
+ * match; thus, we can't short-circuit "infinity" or "nan(...)".
+ */
+ commit = buf - 1;
+ for (p = buf; p < end; ) {
+ c = *fp->_p;
+reswitch:
+ switch (state) {
+ case S_START:
+ state = S_GOTSIGN;
+ if (c == '-' || c == '+')
+ break;
+ else
+ goto reswitch;
+ case S_GOTSIGN:
+ switch (c) {
+ case '0':
+ state = S_MAYBEHEX;
+ commit = p;
+ break;
+ case 'I':
+ case 'i':
+ state = S_INF;
+ break;
+ case 'N':
+ case 'n':
+ state = S_NAN;
+ break;
+ default:
+ state = S_DIGITS;
+ goto reswitch;
+ }
+ break;
+ case S_INF:
+ if (infnanpos > 6 ||
+ (c != "nfinity"[infnanpos] &&
+ c != "NFINITY"[infnanpos]))
+ goto parsedone;
+ if (infnanpos == 1 || infnanpos == 6)
+ commit = p; /* inf or infinity */
+ infnanpos++;
+ break;
+ case S_NAN:
+ switch (infnanpos) {
+ case -1: /* XXX kludge to deal with nan(...) */
+ goto parsedone;
+ case 0:
+ if (c != 'A' && c != 'a')
+ goto parsedone;
+ break;
+ case 1:
+ if (c != 'N' && c != 'n')
+ goto parsedone;
+ else
+ commit = p;
+ break;
+ case 2:
+ if (c != '(')
+ goto parsedone;
+ break;
+ default:
+ if (c == ')') {
+ commit = p;
+ infnanpos = -2;
+ } else if (!isalnum(c) && c != '_')
+ goto parsedone;
+ break;
+ }
+ infnanpos++;
+ break;
+ case S_MAYBEHEX:
+ state = S_DIGITS;
+ if (c == 'X' || c == 'x') {
+ ishex = 1;
+ break;
+ } else { /* we saw a '0', but no 'x' */
+ gotmantdig = 1;
+ goto reswitch;
+ }
+ case S_DIGITS:
+ if ((ishex && isxdigit(c)) || isdigit(c))
+ gotmantdig = 1;
+ else {
+ state = S_FRAC;
+ if (c != '.')
+ goto reswitch;
+ }
+ if (gotmantdig)
+ commit = p;
+ break;
+ case S_FRAC:
+ if (((c == 'E' || c == 'e') && !ishex) ||
+ ((c == 'P' || c == 'p') && ishex)) {
+ if (!gotmantdig)
+ goto parsedone;
+ else
+ state = S_EXP;
+ } else if ((ishex && isxdigit(c)) || isdigit(c)) {
+ commit = p;
+ gotmantdig = 1;
+ } else
+ goto parsedone;
+ break;
+ case S_EXP:
+ state = S_EXPDIGITS;
+ if (c == '-' || c == '+')
+ break;
+ else
+ goto reswitch;
+ case S_EXPDIGITS:
+ if (isdigit(c))
+ commit = p;
+ else
+ goto parsedone;
+ break;
+ default:
+ abort();
+ }
+ *p++ = c;
+ if (--fp->_r > 0)
+ fp->_p++;
+ else if (__srefill(fp))
+ break; /* EOF */
+ }
+
+parsedone:
+ while (commit < --p)
+ (void)ungetc(*(unsigned char *)p, fp);
+ *++commit = '\0';
+ return commit - buf;
+}
+
+size_t wparsefloat(FILE *fp, wchar_t *buf, wchar_t *end) {
+ wchar_t *commit, *p;
+ int infnanpos = 0;
+ enum {
+ S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
+ S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
+ } state = S_START;
+ wint_t c;
+ int gotmantdig = 0, ishex = 0;
+
+ /*
+ * We set commit = p whenever the string we have read so far
+ * constitutes a valid representation of a floating point
+ * number by itself. At some point, the parse will complete
+ * or fail, and we will ungetc() back to the last commit point.
+ * To ensure that the file offset gets updated properly, it is
+ * always necessary to read at least one character that doesn't
+ * match; thus, we can't short-circuit "infinity" or "nan(...)".
+ */
+ commit = buf - 1;
+ c = WEOF;
+ for (p = buf; p < end; ) {
+ if ((c = __fgetwc_unlock(fp)) == WEOF)
+ break;
+reswitch:
+ switch (state) {
+ case S_START:
+ state = S_GOTSIGN;
+ if (c == '-' || c == '+')
+ break;
+ else
+ goto reswitch;
+ case S_GOTSIGN:
+ switch (c) {
+ case '0':
+ state = S_MAYBEHEX;
+ commit = p;
+ break;
+ case 'I':
+ case 'i':
+ state = S_INF;
+ break;
+ case 'N':
+ case 'n':
+ state = S_NAN;
+ break;
+ default:
+ state = S_DIGITS;
+ goto reswitch;
+ }
+ break;
+ case S_INF:
+ if (infnanpos > 6 ||
+ (c != (wint_t)"nfinity"[infnanpos] &&
+ c != (wint_t)"NFINITY"[infnanpos]))
+ goto parsedone;
+ if (infnanpos == 1 || infnanpos == 6)
+ commit = p; /* inf or infinity */
+ infnanpos++;
+ break;
+ case S_NAN:
+ switch (infnanpos) {
+ case -1: /* XXX kludge to deal with nan(...) */
+ goto parsedone;
+ case 0:
+ if (c != 'A' && c != 'a')
+ goto parsedone;
+ break;
+ case 1:
+ if (c != 'N' && c != 'n')
+ goto parsedone;
+ else
+ commit = p;
+ break;
+ case 2:
+ if (c != '(')
+ goto parsedone;
+ break;
+ default:
+ if (c == ')') {
+ commit = p;
+ infnanpos = -2;
+ } else if (!iswalnum(c) && c != '_')
+ goto parsedone;
+ break;
+ }
+ infnanpos++;
+ break;
+ case S_MAYBEHEX:
+ state = S_DIGITS;
+ if (c == 'X' || c == 'x') {
+ ishex = 1;
+ break;
+ } else { /* we saw a '0', but no 'x' */
+ gotmantdig = 1;
+ goto reswitch;
+ }
+ case S_DIGITS:
+ if ((ishex && iswxdigit(c)) || iswdigit(c))
+ gotmantdig = 1;
+ else {
+ state = S_FRAC;
+ if (c != L'.')
+ goto reswitch;
+ }
+ if (gotmantdig)
+ commit = p;
+ break;
+ case S_FRAC:
+ if (((c == 'E' || c == 'e') && !ishex) ||
+ ((c == 'P' || c == 'p') && ishex)) {
+ if (!gotmantdig)
+ goto parsedone;
+ else
+ state = S_EXP;
+ } else if ((ishex && iswxdigit(c)) || iswdigit(c)) {
+ commit = p;
+ gotmantdig = 1;
+ } else
+ goto parsedone;
+ break;
+ case S_EXP:
+ state = S_EXPDIGITS;
+ if (c == '-' || c == '+')
+ break;
+ else
+ goto reswitch;
+ case S_EXPDIGITS:
+ if (iswdigit(c))
+ commit = p;
+ else
+ goto parsedone;
+ break;
+ default:
+ abort();
+ }
+ *p++ = c;
+ c = WEOF;
+ }
+
+parsedone:
+ if (c != WEOF)
+ ungetwc(c, fp);
+ while (commit < --p)
+ ungetwc(*p, fp);
+ *++commit = '\0';
+ return (int)(commit - buf);
+}