Update to FreeBSD libm r336665.
This reverts commit 253a8306316cedfd6fd3e3a169fbffe4cac04035 and moves
us forward to a revision that contains fixes for the problem with the
previous attempt.
This also makes sincos(3)/sincosf(3)/sincosl(3) available to `_BSD_SOURCE`
as well as `_GNU_SOURCE`.
The new FreeBSD libm code requires the FreeBSD `__CONCAT` macro, and all
our existing callers are FreeBSD too, so update that.
There's also an assumption that <complex.h> drags in <math.h> which isn't
true for us, so work around that with `-include` in the makefile. This
then causes clang to recognize a bug -- returning from a void function --
in our fake (LP32) sincosl(3), so fix that too.
Bug: http://b/111710419
Change-Id: I84703ad844f8afde6ec6b11604ab3c096ccb62c3
Test: ran tests
diff --git a/libm/upstream-freebsd/lib/msun/src/math_private.h b/libm/upstream-freebsd/lib/msun/src/math_private.h
index 637a09a..bc3d516 100644
--- a/libm/upstream-freebsd/lib/msun/src/math_private.h
+++ b/libm/upstream-freebsd/lib/msun/src/math_private.h
@@ -11,7 +11,7 @@
/*
* from: @(#)fdlibm.h 5.1 93/09/24
- * $FreeBSD: head/lib/msun/src/math_private.h 319047 2017-05-28 06:13:38Z mmel $
+ * $FreeBSD: head/lib/msun/src/math_private.h 336663 2018-07-24 10:10:16Z bde $
*/
#ifndef _MATH_PRIVATE_H_
@@ -48,6 +48,47 @@
#define IEEE_WORD_ORDER BYTE_ORDER
#endif
+/* A union which permits us to convert between a long double and
+ four 32 bit ints. */
+
+#if IEEE_WORD_ORDER == BIG_ENDIAN
+
+typedef union
+{
+ long double value;
+ struct {
+ u_int32_t mswhi;
+ u_int32_t mswlo;
+ u_int32_t lswhi;
+ u_int32_t lswlo;
+ } parts32;
+ struct {
+ u_int64_t msw;
+ u_int64_t lsw;
+ } parts64;
+} ieee_quad_shape_type;
+
+#endif
+
+#if IEEE_WORD_ORDER == LITTLE_ENDIAN
+
+typedef union
+{
+ long double value;
+ struct {
+ u_int32_t lswlo;
+ u_int32_t lswhi;
+ u_int32_t mswlo;
+ u_int32_t mswhi;
+ } parts32;
+ struct {
+ u_int64_t lsw;
+ u_int64_t msw;
+ } parts64;
+} ieee_quad_shape_type;
+
+#endif
+
#if IEEE_WORD_ORDER == BIG_ENDIAN
typedef union
@@ -294,8 +335,9 @@
/* Support switching the mode to FP_PE if necessary. */
#if defined(__i386__) && !defined(NO_FPSETPREC)
-#define ENTERI() \
- long double __retval; \
+#define ENTERI() ENTERIT(long double)
+#define ENTERIT(returntype) \
+ returntype __retval; \
fp_prec_t __oprec; \
\
if ((__oprec = fpgetprec()) != FP_PE) \
@@ -318,6 +360,7 @@
} while (0)
#else
#define ENTERI()
+#define ENTERIT(x)
#define RETURNI(x) RETURNF(x)
#define ENTERV()
#define RETURNV() return
@@ -435,6 +478,31 @@
*/
void _scan_nan(uint32_t *__words, int __num_words, const char *__s);
+/*
+ * Mix 0, 1 or 2 NaNs. First add 0 to each arg. This normally just turns
+ * signaling NaNs into quiet NaNs by setting a quiet bit. We do this
+ * because we want to never return a signaling NaN, and also because we
+ * don't want the quiet bit to affect the result. Then mix the converted
+ * args using the specified operation.
+ *
+ * When one arg is NaN, the result is typically that arg quieted. When both
+ * args are NaNs, the result is typically the quietening of the arg whose
+ * mantissa is largest after quietening. When neither arg is NaN, the
+ * result may be NaN because it is indeterminate, or finite for subsequent
+ * construction of a NaN as the indeterminate 0.0L/0.0L.
+ *
+ * Technical complications: the result in bits after rounding to the final
+ * precision might depend on the runtime precision and/or on compiler
+ * optimizations, especially when different register sets are used for
+ * different precisions. Try to make the result not depend on at least the
+ * runtime precision by always doing the main mixing step in long double
+ * precision. Try to reduce dependencies on optimizations by adding the
+ * the 0's in different precisions (unless everything is in long double
+ * precision).
+ */
+#define nan_mix(x, y) (nan_mix_op((x), (y), +))
+#define nan_mix_op(x, y, op) (((x) + 0.0L) op ((y) + 0))
+
#ifdef _COMPLEX_H
/*
@@ -510,48 +578,116 @@
#endif /* _COMPLEX_H */
-#ifdef __GNUCLIKE_ASM
+/*
+ * The rnint() family rounds to the nearest integer for a restricted range
+ * range of args (up to about 2**MANT_DIG). We assume that the current
+ * rounding mode is FE_TONEAREST so that this can be done efficiently.
+ * Extra precision causes more problems in practice, and we only centralize
+ * this here to reduce those problems, and have not solved the efficiency
+ * problems. The exp2() family uses a more delicate version of this that
+ * requires extracting bits from the intermediate value, so it is not
+ * centralized here and should copy any solution of the efficiency problems.
+ */
-/* Asm versions of some functions. */
+static inline double
+rnint(__double_t x)
+{
+ /*
+ * This casts to double to kill any extra precision. This depends
+ * on the cast being applied to a double_t to avoid compiler bugs
+ * (this is a cleaner version of STRICT_ASSIGN()). This is
+ * inefficient if there actually is extra precision, but is hard
+ * to improve on. We use double_t in the API to minimise conversions
+ * for just calling here. Note that we cannot easily change the
+ * magic number to the one that works directly with double_t, since
+ * the rounding precision is variable at runtime on x86 so the
+ * magic number would need to be variable. Assuming that the
+ * rounding precision is always the default is too fragile. This
+ * and many other complications will move when the default is
+ * changed to FP_PE.
+ */
+ return ((double)(x + 0x1.8p52) - 0x1.8p52);
+}
-#ifdef __amd64__
+static inline float
+rnintf(__float_t x)
+{
+ /*
+ * As for rnint(), except we could just call that to handle the
+ * extra precision case, usually without losing efficiency.
+ */
+ return ((float)(x + 0x1.8p23F) - 0x1.8p23F);
+}
+
+#ifdef LDBL_MANT_DIG
+/*
+ * The complications for extra precision are smaller for rnintl() since it
+ * can safely assume that the rounding precision has been increased from
+ * its default to FP_PE on x86. We don't exploit that here to get small
+ * optimizations from limiting the rangle to double. We just need it for
+ * the magic number to work with long doubles. ld128 callers should use
+ * rnint() instead of this if possible. ld80 callers should prefer
+ * rnintl() since for amd64 this avoids swapping the register set, while
+ * for i386 it makes no difference (assuming FP_PE), and for other arches
+ * it makes little difference.
+ */
+static inline long double
+rnintl(long double x)
+{
+ return (x + __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2 -
+ __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2);
+}
+#endif /* LDBL_MANT_DIG */
+
+/*
+ * irint() and i64rint() give the same result as casting to their integer
+ * return type provided their arg is a floating point integer. They can
+ * sometimes be more efficient because no rounding is required.
+ */
+#if (defined(amd64) || defined(__i386__)) && defined(__GNUCLIKE_ASM)
+#define irint(x) \
+ (sizeof(x) == sizeof(float) && \
+ sizeof(__float_t) == sizeof(long double) ? irintf(x) : \
+ sizeof(x) == sizeof(double) && \
+ sizeof(__double_t) == sizeof(long double) ? irintd(x) : \
+ sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x))
+#else
+#define irint(x) ((int)(x))
+#endif
+
+#define i64rint(x) ((int64_t)(x)) /* only needed for ld128 so not opt. */
+
+#if defined(__i386__) && defined(__GNUCLIKE_ASM)
static __inline int
-irint(double x)
+irintf(float x)
{
int n;
- asm("cvtsd2si %1,%0" : "=r" (n) : "x" (x));
+ __asm("fistl %0" : "=m" (n) : "t" (x));
return (n);
}
-#define HAVE_EFFICIENT_IRINT
-#endif
-#ifdef __i386__
static __inline int
-irint(double x)
+irintd(double x)
{
int n;
- asm("fistl %0" : "=m" (n) : "t" (x));
+ __asm("fistl %0" : "=m" (n) : "t" (x));
return (n);
}
-#define HAVE_EFFICIENT_IRINT
#endif
-#if defined(__amd64__) || defined(__i386__)
+#if (defined(__amd64__) || defined(__i386__)) && defined(__GNUCLIKE_ASM)
static __inline int
irintl(long double x)
{
int n;
- asm("fistl %0" : "=m" (n) : "t" (x));
+ __asm("fistl %0" : "=m" (n) : "t" (x));
return (n);
}
-#define HAVE_EFFICIENT_IRINTL
#endif
-#endif /* __GNUCLIKE_ASM */
-
#ifdef DEBUG
#if defined(__amd64__) || defined(__i386__)
#define breakpoint() asm("int $3")