Blame - libm/upstream-freebsd/lib/msun/src/math_private.h - android_bionic

blob: df526e71e545b80ee7ab33115519db505e3b6e72 [file] [log] [blame]

Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	1	/*
				2	* ====================================================
				3	* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
				4	*
				5	* Developed at SunPro, a Sun Microsystems, Inc. business.
				6	* Permission to use, copy, modify, and distribute this
				7	* software is freely granted, provided that this notice
				8	* is preserved.
				9	* ====================================================
				10	*/
				11
				12	/*
				13	* from: @(#)fdlibm.h 5.1 93/09/24
Elliott Hughes	bac0ebb	2021-01-26 14:17:20 -0800	[diff] [blame]	14	* $FreeBSD$
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	15	*/
				16
				17	#ifndef _MATH_PRIVATE_H_
				18	#define _MATH_PRIVATE_H_
				19
				20	#include <sys/types.h>
				21	#include <machine/endian.h>
				22
				23	/*
				24	* The original fdlibm code used statements like:
				25	* n0 = (((int)&one)>>29)^1; * index of high word *
				26	* ix0 = (n0+(int)&x); * high word of x *
				27	* ix1 = ((1-n0)+(int)&x); * low word of x *
				28	* to dig two 32 bit words out of the 64 bit IEEE floating point
				29	* value. That is non-ANSI, and, moreover, the gcc instruction
				30	* scheduler gets it wrong. We instead use the following macros.
				31	* Unlike the original code, we determine the endianness at compile
				32	* time, not at run time; I don't see much benefit to selecting
				33	* endianness at run time.
				34	*/
				35
				36	/*
				37	* A union which permits us to convert between a double and two 32 bit
				38	* ints.
				39	*/
				40
				41	#ifdef __arm__
Calin Juravle	bd3155d	2014-03-13 16:20:36 +0000	[diff] [blame]	42	#if defined(__VFP_FP__) \|\| defined(__ARM_EABI__)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	43	#define IEEE_WORD_ORDER BYTE_ORDER
				44	#else
				45	#define IEEE_WORD_ORDER BIG_ENDIAN
				46	#endif
				47	#else /* __arm__ */
				48	#define IEEE_WORD_ORDER BYTE_ORDER
				49	#endif
				50
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	51	/* A union which permits us to convert between a long double and
				52	four 32 bit ints. */
				53
				54	#if IEEE_WORD_ORDER == BIG_ENDIAN
				55
				56	typedef union
				57	{
				58	long double value;
				59	struct {
				60	u_int32_t mswhi;
				61	u_int32_t mswlo;
				62	u_int32_t lswhi;
				63	u_int32_t lswlo;
				64	} parts32;
				65	struct {
				66	u_int64_t msw;
				67	u_int64_t lsw;
				68	} parts64;
				69	} ieee_quad_shape_type;
				70
				71	#endif
				72
				73	#if IEEE_WORD_ORDER == LITTLE_ENDIAN
				74
				75	typedef union
				76	{
				77	long double value;
				78	struct {
				79	u_int32_t lswlo;
				80	u_int32_t lswhi;
				81	u_int32_t mswlo;
				82	u_int32_t mswhi;
				83	} parts32;
				84	struct {
				85	u_int64_t lsw;
				86	u_int64_t msw;
				87	} parts64;
				88	} ieee_quad_shape_type;
				89
				90	#endif
				91
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	92	#if IEEE_WORD_ORDER == BIG_ENDIAN
				93
				94	typedef union
				95	{
				96	double value;
				97	struct
				98	{
				99	u_int32_t msw;
				100	u_int32_t lsw;
				101	} parts;
				102	struct
				103	{
				104	u_int64_t w;
				105	} xparts;
				106	} ieee_double_shape_type;
				107
				108	#endif
				109
				110	#if IEEE_WORD_ORDER == LITTLE_ENDIAN
				111
				112	typedef union
				113	{
				114	double value;
				115	struct
				116	{
				117	u_int32_t lsw;
				118	u_int32_t msw;
				119	} parts;
				120	struct
				121	{
				122	u_int64_t w;
				123	} xparts;
				124	} ieee_double_shape_type;
				125
				126	#endif
				127
				128	/* Get two 32 bit ints from a double. */
				129
				130	#define EXTRACT_WORDS(ix0,ix1,d) \
				131	do { \
				132	ieee_double_shape_type ew_u; \
				133	ew_u.value = (d); \
				134	(ix0) = ew_u.parts.msw; \
				135	(ix1) = ew_u.parts.lsw; \
				136	} while (0)
				137
				138	/* Get a 64-bit int from a double. */
				139	#define EXTRACT_WORD64(ix,d) \
				140	do { \
				141	ieee_double_shape_type ew_u; \
				142	ew_u.value = (d); \
				143	(ix) = ew_u.xparts.w; \
				144	} while (0)
				145
				146	/* Get the more significant 32 bit int from a double. */
				147
				148	#define GET_HIGH_WORD(i,d) \
				149	do { \
				150	ieee_double_shape_type gh_u; \
				151	gh_u.value = (d); \
				152	(i) = gh_u.parts.msw; \
				153	} while (0)
				154
				155	/* Get the less significant 32 bit int from a double. */
				156
				157	#define GET_LOW_WORD(i,d) \
				158	do { \
				159	ieee_double_shape_type gl_u; \
				160	gl_u.value = (d); \
				161	(i) = gl_u.parts.lsw; \
				162	} while (0)
				163
				164	/* Set a double from two 32 bit ints. */
				165
				166	#define INSERT_WORDS(d,ix0,ix1) \
				167	do { \
				168	ieee_double_shape_type iw_u; \
				169	iw_u.parts.msw = (ix0); \
				170	iw_u.parts.lsw = (ix1); \
				171	(d) = iw_u.value; \
				172	} while (0)
				173
				174	/* Set a double from a 64-bit int. */
				175	#define INSERT_WORD64(d,ix) \
				176	do { \
				177	ieee_double_shape_type iw_u; \
				178	iw_u.xparts.w = (ix); \
				179	(d) = iw_u.value; \
				180	} while (0)
				181
				182	/* Set the more significant 32 bits of a double from an int. */
				183
				184	#define SET_HIGH_WORD(d,v) \
				185	do { \
				186	ieee_double_shape_type sh_u; \
				187	sh_u.value = (d); \
				188	sh_u.parts.msw = (v); \
				189	(d) = sh_u.value; \
				190	} while (0)
				191
				192	/* Set the less significant 32 bits of a double from an int. */
				193
				194	#define SET_LOW_WORD(d,v) \
				195	do { \
				196	ieee_double_shape_type sl_u; \
				197	sl_u.value = (d); \
				198	sl_u.parts.lsw = (v); \
				199	(d) = sl_u.value; \
				200	} while (0)
				201
				202	/*
				203	* A union which permits us to convert between a float and a 32 bit
				204	* int.
				205	*/
				206
				207	typedef union
				208	{
				209	float value;
				210	/* FIXME: Assumes 32 bit int. */
				211	unsigned int word;
				212	} ieee_float_shape_type;
				213
				214	/* Get a 32 bit int from a float. */
				215
				216	#define GET_FLOAT_WORD(i,d) \
				217	do { \
				218	ieee_float_shape_type gf_u; \
				219	gf_u.value = (d); \
				220	(i) = gf_u.word; \
				221	} while (0)
				222
				223	/* Set a float from a 32 bit int. */
				224
				225	#define SET_FLOAT_WORD(d,i) \
				226	do { \
				227	ieee_float_shape_type sf_u; \
				228	sf_u.word = (i); \
				229	(d) = sf_u.value; \
				230	} while (0)
				231
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	232	/*
				233	* Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long
				234	* double.
				235	*/
				236
				237	#define EXTRACT_LDBL80_WORDS(ix0,ix1,d) \
				238	do { \
				239	union IEEEl2bits ew_u; \
				240	ew_u.e = (d); \
				241	(ix0) = ew_u.xbits.expsign; \
				242	(ix1) = ew_u.xbits.man; \
				243	} while (0)
				244
				245	/*
				246	* Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit
				247	* long double.
				248	*/
				249
				250	#define EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d) \
				251	do { \
				252	union IEEEl2bits ew_u; \
				253	ew_u.e = (d); \
				254	(ix0) = ew_u.xbits.expsign; \
				255	(ix1) = ew_u.xbits.manh; \
				256	(ix2) = ew_u.xbits.manl; \
				257	} while (0)
				258
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	259	/* Get expsign as a 16 bit int from a long double. */
				260
				261	#define GET_LDBL_EXPSIGN(i,d) \
				262	do { \
				263	union IEEEl2bits ge_u; \
				264	ge_u.e = (d); \
				265	(i) = ge_u.xbits.expsign; \
				266	} while (0)
				267
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	268	/*
				269	* Set an 80 bit long double from a 16 bit int expsign and a 64 bit int
				270	* mantissa.
				271	*/
				272
				273	#define INSERT_LDBL80_WORDS(d,ix0,ix1) \
				274	do { \
				275	union IEEEl2bits iw_u; \
				276	iw_u.xbits.expsign = (ix0); \
				277	iw_u.xbits.man = (ix1); \
				278	(d) = iw_u.e; \
				279	} while (0)
				280
				281	/*
				282	* Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints
				283	* comprising the mantissa.
				284	*/
				285
				286	#define INSERT_LDBL128_WORDS(d,ix0,ix1,ix2) \
				287	do { \
				288	union IEEEl2bits iw_u; \
				289	iw_u.xbits.expsign = (ix0); \
				290	iw_u.xbits.manh = (ix1); \
				291	iw_u.xbits.manl = (ix2); \
				292	(d) = iw_u.e; \
				293	} while (0)
				294
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	295	/* Set expsign of a long double from a 16 bit int. */
				296
				297	#define SET_LDBL_EXPSIGN(d,v) \
				298	do { \
				299	union IEEEl2bits se_u; \
				300	se_u.e = (d); \
				301	se_u.xbits.expsign = (v); \
				302	(d) = se_u.e; \
				303	} while (0)
				304
				305	#ifdef __i386__
				306	/* Long double constants are broken on i386. */
				307	#define LD80C(m, ex, v) { \
				308	.xbits.man = __CONCAT(m, ULL), \
				309	.xbits.expsign = (0x3fff + (ex)) \| ((v) < 0 ? 0x8000 : 0), \
				310	}
				311	#else
				312	/* The above works on non-i386 too, but we use this to check v. */
				313	#define LD80C(m, ex, v) { .e = (v), }
				314	#endif
				315
				316	#ifdef FLT_EVAL_METHOD
				317	/*
				318	* Attempt to get strict C99 semantics for assignment with non-C99 compilers.
				319	*/
				320	#if FLT_EVAL_METHOD == 0 \|\| __GNUC__ == 0
				321	#define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval))
				322	#else
				323	#define STRICT_ASSIGN(type, lval, rval) do { \
				324	volatile type __lval; \
				325	\
				326	if (sizeof(type) >= sizeof(long double)) \
				327	(lval) = (rval); \
				328	else { \
				329	__lval = (rval); \
				330	(lval) = __lval; \
				331	} \
				332	} while (0)
				333	#endif
				334	#endif /* FLT_EVAL_METHOD */
				335
				336	/* Support switching the mode to FP_PE if necessary. */
				337	#if defined(__i386__) && !defined(NO_FPSETPREC)
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	338	#define ENTERI() ENTERIT(long double)
				339	#define ENTERIT(returntype) \
				340	returntype __retval; \
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	341	fp_prec_t __oprec; \
				342	\
				343	if ((__oprec = fpgetprec()) != FP_PE) \
				344	fpsetprec(FP_PE)
				345	#define RETURNI(x) do { \
				346	__retval = (x); \
				347	if (__oprec != FP_PE) \
				348	fpsetprec(__oprec); \
				349	RETURNF(__retval); \
				350	} while (0)
Elliott Hughes	8da8ca4	2018-05-08 13:35:33 -0700	[diff] [blame]	351	#define ENTERV() \
				352	fp_prec_t __oprec; \
				353	\
				354	if ((__oprec = fpgetprec()) != FP_PE) \
				355	fpsetprec(FP_PE)
				356	#define RETURNV() do { \
				357	if (__oprec != FP_PE) \
				358	fpsetprec(__oprec); \
				359	return; \
				360	} while (0)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	361	#else
Elliott Hughes	8da8ca4	2018-05-08 13:35:33 -0700	[diff] [blame]	362	#define ENTERI()
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	363	#define ENTERIT(x)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	364	#define RETURNI(x) RETURNF(x)
Elliott Hughes	8da8ca4	2018-05-08 13:35:33 -0700	[diff] [blame]	365	#define ENTERV()
				366	#define RETURNV() return
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	367	#endif
				368
				369	/* Default return statement if hack_t() is not used. /
				370	#define RETURNF(v) return (v)
				371
				372	/*
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	373	* 2sum gives the same result as 2sumF without requiring \|a\| >= \|b\| or
				374	* a == 0, but is slower.
				375	*/
				376	#define _2sum(a, b) do { \
				377	__typeof(a) __s, __w; \
				378	\
				379	__w = (a) + (b); \
				380	__s = __w - (a); \
				381	(b) = ((a) - (__w - __s)) + ((b) - __s); \
				382	(a) = __w; \
				383	} while (0)
				384
				385	/*
				386	* 2sumF algorithm.
				387	*
				388	* "Normalize" the terms in the infinite-precision expression a + b for
				389	* the sum of 2 floating point values so that b is as small as possible
				390	* relative to 'a'. (The resulting 'a' is the value of the expression in
				391	* the same precision as 'a' and the resulting b is the rounding error.)
				392	* \|a\| must be >= \|b\| or 0, b's type must be no larger than 'a's type, and
				393	* exponent overflow or underflow must not occur. This uses a Theorem of
				394	* Dekker (1971). See Knuth (1981) 4.2.2 Theorem C. The name "TwoSum"
				395	* is apparently due to Skewchuk (1997).
				396	*
				397	* For this to always work, assignment of a + b to 'a' must not retain any
				398	* extra precision in a + b. This is required by C standards but broken
				399	* in many compilers. The brokenness cannot be worked around using
				400	* STRICT_ASSIGN() like we do elsewhere, since the efficiency of this
				401	* algorithm would be destroyed by non-null strict assignments. (The
				402	* compilers are correct to be broken -- the efficiency of all floating
				403	* point code calculations would be destroyed similarly if they forced the
				404	* conversions.)
				405	*
				406	* Fortunately, a case that works well can usually be arranged by building
				407	* any extra precision into the type of 'a' -- 'a' should have type float_t,
				408	* double_t or long double. b's type should be no larger than 'a's type.
				409	* Callers should use these types with scopes as large as possible, to
				410	* reduce their own extra-precision and efficiciency problems. In
				411	* particular, they shouldn't convert back and forth just to call here.
				412	*/
				413	#ifdef DEBUG
				414	#define _2sumF(a, b) do { \
				415	__typeof(a) __w; \
				416	volatile __typeof(a) __ia, __ib, __r, __vw; \
				417	\
				418	__ia = (a); \
				419	__ib = (b); \
				420	assert(__ia == 0 \|\| fabsl(__ia) >= fabsl(__ib)); \
				421	\
				422	__w = (a) + (b); \
				423	(b) = ((a) - __w) + (b); \
				424	(a) = __w; \
				425	\
				426	/* The next 2 assertions are weak if (a) is already long double. */ \
				427	assert((long double)__ia + __ib == (long double)(a) + (b)); \
				428	__vw = __ia + __ib; \
				429	__r = __ia - __vw; \
				430	__r += __ib; \
				431	assert(__vw == (a) && __r == (b)); \
				432	} while (0)
				433	#else /* !DEBUG */
				434	#define _2sumF(a, b) do { \
				435	__typeof(a) __w; \
				436	\
				437	__w = (a) + (b); \
				438	(b) = ((a) - __w) + (b); \
				439	(a) = __w; \
				440	} while (0)
				441	#endif /* DEBUG */
				442
				443	/*
				444	* Set x += c, where x is represented in extra precision as a + b.
				445	* x must be sufficiently normalized and sufficiently larger than c,
				446	* and the result is then sufficiently normalized.
				447	*
				448	* The details of ordering are that \|a\| must be >= \|c\| (so that (a, c)
				449	* can be normalized without extra work to swap 'a' with c). The details of
				450	* the normalization are that b must be small relative to the normalized 'a'.
				451	* Normalization of (a, c) makes the normalized c tiny relative to the
				452	* normalized a, so b remains small relative to 'a' in the result. However,
				453	* b need not ever be tiny relative to 'a'. For example, b might be about
				454	* 2**20 times smaller than 'a' to give about 20 extra bits of precision.
				455	* That is usually enough, and adding c (which by normalization is about
				456	* 2**53 times smaller than a) cannot change b significantly. However,
				457	* cancellation of 'a' with c in normalization of (a, c) may reduce 'a'
				458	* significantly relative to b. The caller must ensure that significant
				459	* cancellation doesn't occur, either by having c of the same sign as 'a',
				460	* or by having \|c\| a few percent smaller than \|a\|. Pre-normalization of
				461	* (a, b) may help.
				462	*
Elliott Hughes	022e1aa	2022-07-12 17:01:46 -0700	[diff] [blame]	463	* This is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	464	* exercise 19). We gain considerable efficiency by requiring the terms to
				465	* be sufficiently normalized and sufficiently increasing.
				466	*/
				467	#define _3sumF(a, b, c) do { \
				468	__typeof(a) __tmp; \
				469	\
				470	__tmp = (c); \
				471	_2sumF(__tmp, (a)); \
				472	(b) += (a); \
				473	(a) = __tmp; \
				474	} while (0)
				475
				476	/*
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	477	* Common routine to process the arguments to nan(), nanf(), and nanl().
				478	*/
				479	void _scan_nan(uint32_t __words, int __num_words, const char __s);
				480
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	481	/*
				482	* Mix 0, 1 or 2 NaNs. First add 0 to each arg. This normally just turns
				483	* signaling NaNs into quiet NaNs by setting a quiet bit. We do this
				484	* because we want to never return a signaling NaN, and also because we
				485	* don't want the quiet bit to affect the result. Then mix the converted
				486	* args using the specified operation.
				487	*
				488	* When one arg is NaN, the result is typically that arg quieted. When both
				489	* args are NaNs, the result is typically the quietening of the arg whose
				490	* mantissa is largest after quietening. When neither arg is NaN, the
				491	* result may be NaN because it is indeterminate, or finite for subsequent
				492	* construction of a NaN as the indeterminate 0.0L/0.0L.
				493	*
				494	* Technical complications: the result in bits after rounding to the final
				495	* precision might depend on the runtime precision and/or on compiler
				496	* optimizations, especially when different register sets are used for
				497	* different precisions. Try to make the result not depend on at least the
				498	* runtime precision by always doing the main mixing step in long double
				499	* precision. Try to reduce dependencies on optimizations by adding the
				500	* the 0's in different precisions (unless everything is in long double
				501	* precision).
				502	*/
				503	#define nan_mix(x, y) (nan_mix_op((x), (y), +))
				504	#define nan_mix_op(x, y, op) (((x) + 0.0L) op ((y) + 0))
				505
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	506	#ifdef _COMPLEX_H
				507
				508	/*
				509	* C99 specifies that complex numbers have the same representation as
				510	* an array of two elements, where the first element is the real part
				511	* and the second element is the imaginary part.
				512	*/
				513	typedef union {
				514	float complex f;
				515	float a[2];
				516	} float_complex;
				517	typedef union {
				518	double complex f;
				519	double a[2];
				520	} double_complex;
				521	typedef union {
				522	long double complex f;
				523	long double a[2];
				524	} long_double_complex;
				525	#define REALPART(z) ((z).a[0])
				526	#define IMAGPART(z) ((z).a[1])
				527
				528	/*
				529	* Inline functions that can be used to construct complex values.
				530	*
				531	* The C99 standard intends x+Iy to be used for this, but x+Iy is
				532	* currently unusable in general since gcc introduces many overflow,
				533	* underflow, sign and efficiency bugs by rewriting I*y as
				534	* (0.0+I)(y+0.0I) and laboriously computing the full complex product.
				535	* In particular, IInf is corrupted to NaN+IInf, and I*-0 is corrupted
				536	* to -0.0+I*0.0.
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	537	*
				538	* The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL()
				539	* to construct complex values. Compilers that conform to the C99
				540	* standard require the following functions to avoid the above issues.
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	541	*/
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	542
				543	#ifndef CMPLXF
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	544	static __inline float complex
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	545	CMPLXF(float x, float y)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	546	{
				547	float_complex z;
				548
				549	REALPART(z) = x;
				550	IMAGPART(z) = y;
				551	return (z.f);
				552	}
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	553	#endif
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	554
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	555	#ifndef CMPLX
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	556	static __inline double complex
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	557	CMPLX(double x, double y)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	558	{
				559	double_complex z;
				560
				561	REALPART(z) = x;
				562	IMAGPART(z) = y;
				563	return (z.f);
				564	}
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	565	#endif
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	566
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	567	#ifndef CMPLXL
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	568	static __inline long double complex
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	569	CMPLXL(long double x, long double y)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	570	{
				571	long_double_complex z;
				572
				573	REALPART(z) = x;
				574	IMAGPART(z) = y;
				575	return (z.f);
				576	}
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	577	#endif
				578
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	579	#endif /* _COMPLEX_H */
				580
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	581	/*
				582	* The rnint() family rounds to the nearest integer for a restricted range
				583	* range of args (up to about 2**MANT_DIG). We assume that the current
				584	* rounding mode is FE_TONEAREST so that this can be done efficiently.
				585	* Extra precision causes more problems in practice, and we only centralize
				586	* this here to reduce those problems, and have not solved the efficiency
				587	* problems. The exp2() family uses a more delicate version of this that
				588	* requires extracting bits from the intermediate value, so it is not
				589	* centralized here and should copy any solution of the efficiency problems.
				590	*/
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	591
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	592	static inline double
				593	rnint(__double_t x)
				594	{
				595	/*
				596	* This casts to double to kill any extra precision. This depends
				597	* on the cast being applied to a double_t to avoid compiler bugs
				598	* (this is a cleaner version of STRICT_ASSIGN()). This is
				599	* inefficient if there actually is extra precision, but is hard
				600	* to improve on. We use double_t in the API to minimise conversions
				601	* for just calling here. Note that we cannot easily change the
				602	* magic number to the one that works directly with double_t, since
				603	* the rounding precision is variable at runtime on x86 so the
				604	* magic number would need to be variable. Assuming that the
				605	* rounding precision is always the default is too fragile. This
				606	* and many other complications will move when the default is
				607	* changed to FP_PE.
				608	*/
				609	return ((double)(x + 0x1.8p52) - 0x1.8p52);
				610	}
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	611
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	612	static inline float
				613	rnintf(__float_t x)
				614	{
				615	/*
				616	* As for rnint(), except we could just call that to handle the
				617	* extra precision case, usually without losing efficiency.
				618	*/
				619	return ((float)(x + 0x1.8p23F) - 0x1.8p23F);
				620	}
				621
				622	#ifdef LDBL_MANT_DIG
				623	/*
				624	* The complications for extra precision are smaller for rnintl() since it
				625	* can safely assume that the rounding precision has been increased from
				626	* its default to FP_PE on x86. We don't exploit that here to get small
				627	* optimizations from limiting the rangle to double. We just need it for
				628	* the magic number to work with long doubles. ld128 callers should use
				629	* rnint() instead of this if possible. ld80 callers should prefer
				630	* rnintl() since for amd64 this avoids swapping the register set, while
				631	* for i386 it makes no difference (assuming FP_PE), and for other arches
				632	* it makes little difference.
				633	*/
				634	static inline long double
				635	rnintl(long double x)
				636	{
				637	return (x + __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2 -
				638	__CONCAT(0x1.8p, LDBL_MANT_DIG) / 2);
				639	}
				640	#endif /* LDBL_MANT_DIG */
				641
				642	/*
				643	* irint() and i64rint() give the same result as casting to their integer
				644	* return type provided their arg is a floating point integer. They can
				645	* sometimes be more efficient because no rounding is required.
				646	*/
Elliott Hughes	022e1aa	2022-07-12 17:01:46 -0700	[diff] [blame]	647	#if defined(amd64) \|\| defined(__i386__)
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	648	#define irint(x) \
				649	(sizeof(x) == sizeof(float) && \
				650	sizeof(__float_t) == sizeof(long double) ? irintf(x) : \
				651	sizeof(x) == sizeof(double) && \
				652	sizeof(__double_t) == sizeof(long double) ? irintd(x) : \
				653	sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x))
				654	#else
				655	#define irint(x) ((int)(x))
				656	#endif
				657
				658	#define i64rint(x) ((int64_t)(x)) /* only needed for ld128 so not opt. */
				659
Elliott Hughes	022e1aa	2022-07-12 17:01:46 -0700	[diff] [blame]	660	#if defined(__i386__)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	661	static __inline int
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	662	irintf(float x)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	663	{
				664	int n;
				665
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	666	__asm("fistl %0" : "=m" (n) : "t" (x));
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	667	return (n);
				668	}
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	669
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	670	static __inline int
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	671	irintd(double x)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	672	{
				673	int n;
				674
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	675	__asm("fistl %0" : "=m" (n) : "t" (x));
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	676	return (n);
				677	}
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	678	#endif
				679
Elliott Hughes	022e1aa	2022-07-12 17:01:46 -0700	[diff] [blame]	680	#if defined(__amd64__) \|\| defined(__i386__)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	681	static __inline int
				682	irintl(long double x)
				683	{
				684	int n;
				685
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	686	__asm("fistl %0" : "=m" (n) : "t" (x));
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	687	return (n);
				688	}
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	689	#endif
				690
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	691	#ifdef DEBUG
				692	#if defined(__amd64__) \|\| defined(__i386__)
				693	#define breakpoint() asm("int $3")
				694	#else
				695	#include <signal.h>
				696
				697	#define breakpoint() raise(SIGTRAP)
				698	#endif
				699	#endif
				700
				701	/* Write a pari script to test things externally. */
				702	#ifdef DOPRINT
				703	#include <stdio.h>
				704
				705	#ifndef DOPRINT_SWIZZLE
				706	#define DOPRINT_SWIZZLE 0
				707	#endif
				708
				709	#ifdef DOPRINT_LD80
				710
				711	#define DOPRINT_START(xp) do { \
				712	uint64_t __lx; \
				713	uint16_t __hx; \
				714	\
				715	/* Hack to give more-problematic args. */ \
				716	EXTRACT_LDBL80_WORDS(__hx, __lx, *xp); \
				717	__lx ^= DOPRINT_SWIZZLE; \
				718	INSERT_LDBL80_WORDS(*xp, __hx, __lx); \
				719	printf("x = %.21Lg; ", (long double)*xp); \
				720	} while (0)
				721	#define DOPRINT_END1(v) \
				722	printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v))
				723	#define DOPRINT_END2(hi, lo) \
				724	printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n", \
				725	(long double)(hi), (long double)(lo))
				726
				727	#elif defined(DOPRINT_D64)
				728
				729	#define DOPRINT_START(xp) do { \
				730	uint32_t __hx, __lx; \
				731	\
				732	EXTRACT_WORDS(__hx, __lx, *xp); \
				733	__lx ^= DOPRINT_SWIZZLE; \
				734	INSERT_WORDS(*xp, __hx, __lx); \
				735	printf("x = %.21Lg; ", (long double)*xp); \
				736	} while (0)
				737	#define DOPRINT_END1(v) \
				738	printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v))
				739	#define DOPRINT_END2(hi, lo) \
				740	printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n", \
				741	(long double)(hi), (long double)(lo))
				742
				743	#elif defined(DOPRINT_F32)
				744
				745	#define DOPRINT_START(xp) do { \
				746	uint32_t __hx; \
				747	\
				748	GET_FLOAT_WORD(__hx, *xp); \
				749	__hx ^= DOPRINT_SWIZZLE; \
				750	SET_FLOAT_WORD(*xp, __hx); \
				751	printf("x = %.21Lg; ", (long double)*xp); \
				752	} while (0)
				753	#define DOPRINT_END1(v) \
				754	printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v))
				755	#define DOPRINT_END2(hi, lo) \
				756	printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n", \
				757	(long double)(hi), (long double)(lo))
				758
				759	#else /* !DOPRINT_LD80 && !DOPRINT_D64 (LD128 only) */
				760
				761	#ifndef DOPRINT_SWIZZLE_HIGH
				762	#define DOPRINT_SWIZZLE_HIGH 0
				763	#endif
				764
				765	#define DOPRINT_START(xp) do { \
				766	uint64_t __lx, __llx; \
				767	uint16_t __hx; \
				768	\
				769	EXTRACT_LDBL128_WORDS(__hx, __lx, __llx, *xp); \
				770	__llx ^= DOPRINT_SWIZZLE; \
				771	__lx ^= DOPRINT_SWIZZLE_HIGH; \
				772	INSERT_LDBL128_WORDS(*xp, __hx, __lx, __llx); \
				773	printf("x = %.36Lg; ", (long double)*xp); \
				774	} while (0)
				775	#define DOPRINT_END1(v) \
				776	printf("y = %.36Lg; z = 0; show(x, y, z);\n", (long double)(v))
				777	#define DOPRINT_END2(hi, lo) \
				778	printf("y = %.36Lg; z = %.36Lg; show(x, y, z);\n", \
				779	(long double)(hi), (long double)(lo))
				780
				781	#endif /* DOPRINT_LD80 */
				782
				783	#else /* !DOPRINT */
				784	#define DOPRINT_START(xp)
				785	#define DOPRINT_END1(v)
				786	#define DOPRINT_END2(hi, lo)
				787	#endif /* DOPRINT */
				788
				789	#define RETURNP(x) do { \
				790	DOPRINT_END1(x); \
				791	RETURNF(x); \
				792	} while (0)
				793	#define RETURNPI(x) do { \
				794	DOPRINT_END1(x); \
				795	RETURNI(x); \
				796	} while (0)
				797	#define RETURN2P(x, y) do { \
				798	DOPRINT_END2((x), (y)); \
				799	RETURNF((x) + (y)); \
				800	} while (0)
				801	#define RETURN2PI(x, y) do { \
				802	DOPRINT_END2((x), (y)); \
				803	RETURNI((x) + (y)); \
				804	} while (0)
				805	#ifdef STRUCT_RETURN
				806	#define RETURNSP(rp) do { \
				807	if (!(rp)->lo_set) \
				808	RETURNP((rp)->hi); \
				809	RETURN2P((rp)->hi, (rp)->lo); \
				810	} while (0)
				811	#define RETURNSPI(rp) do { \
				812	if (!(rp)->lo_set) \
				813	RETURNPI((rp)->hi); \
				814	RETURN2PI((rp)->hi, (rp)->lo); \
				815	} while (0)
				816	#endif
				817	#define SUM2P(x, y) ({ \
				818	const __typeof (x) __x = (x); \
				819	const __typeof (y) __y = (y); \
				820	\
				821	DOPRINT_END2(__x, __y); \
				822	__x + __y; \
				823	})
				824
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	825	/*
				826	* ieee style elementary functions
				827	*
				828	* We rename functions here to improve other sources' diffability
				829	* against fdlibm.
				830	*/
				831	#define __ieee754_sqrt sqrt
				832	#define __ieee754_acos acos
				833	#define __ieee754_acosh acosh
				834	#define __ieee754_log log
				835	#define __ieee754_log2 log2
				836	#define __ieee754_atanh atanh
				837	#define __ieee754_asin asin
				838	#define __ieee754_atan2 atan2
				839	#define __ieee754_exp exp
				840	#define __ieee754_cosh cosh
				841	#define __ieee754_fmod fmod
				842	#define __ieee754_pow pow
				843	#define __ieee754_lgamma lgamma
				844	#define __ieee754_gamma gamma
				845	#define __ieee754_lgamma_r lgamma_r
				846	#define __ieee754_gamma_r gamma_r
				847	#define __ieee754_log10 log10
				848	#define __ieee754_sinh sinh
				849	#define __ieee754_hypot hypot
				850	#define __ieee754_j0 j0
				851	#define __ieee754_j1 j1
				852	#define __ieee754_y0 y0
				853	#define __ieee754_y1 y1
				854	#define __ieee754_jn jn
				855	#define __ieee754_yn yn
				856	#define __ieee754_remainder remainder
				857	#define __ieee754_scalb scalb
				858	#define __ieee754_sqrtf sqrtf
				859	#define __ieee754_acosf acosf
				860	#define __ieee754_acoshf acoshf
				861	#define __ieee754_logf logf
				862	#define __ieee754_atanhf atanhf
				863	#define __ieee754_asinf asinf
				864	#define __ieee754_atan2f atan2f
				865	#define __ieee754_expf expf
				866	#define __ieee754_coshf coshf
				867	#define __ieee754_fmodf fmodf
				868	#define __ieee754_powf powf
				869	#define __ieee754_lgammaf lgammaf
				870	#define __ieee754_gammaf gammaf
				871	#define __ieee754_lgammaf_r lgammaf_r
				872	#define __ieee754_gammaf_r gammaf_r
				873	#define __ieee754_log10f log10f
				874	#define __ieee754_log2f log2f
				875	#define __ieee754_sinhf sinhf
				876	#define __ieee754_hypotf hypotf
				877	#define __ieee754_j0f j0f
				878	#define __ieee754_j1f j1f
				879	#define __ieee754_y0f y0f
				880	#define __ieee754_y1f y1f
				881	#define __ieee754_jnf jnf
				882	#define __ieee754_ynf ynf
				883	#define __ieee754_remainderf remainderf
				884	#define __ieee754_scalbf scalbf
				885
				886	/* fdlibm kernel function */
				887	int __kernel_rem_pio2(double,double,int,int,int);
				888
				889	/* double precision kernel functions */
				890	#ifndef INLINE_REM_PIO2
				891	int __ieee754_rem_pio2(double,double*);
				892	#endif
				893	double __kernel_sin(double,double,int);
				894	double __kernel_cos(double,double);
				895	double __kernel_tan(double,double,int);
				896	double __ldexp_exp(double,int);
				897	#ifdef _COMPLEX_H
				898	double complex __ldexp_cexp(double complex,int);
				899	#endif
				900
				901	/* float precision kernel functions */
				902	#ifndef INLINE_REM_PIO2F
				903	int __ieee754_rem_pio2f(float,double*);
				904	#endif
				905	#ifndef INLINE_KERNEL_SINDF
				906	float __kernel_sindf(double);
				907	#endif
				908	#ifndef INLINE_KERNEL_COSDF
				909	float __kernel_cosdf(double);
				910	#endif
				911	#ifndef INLINE_KERNEL_TANDF
				912	float __kernel_tandf(double,int);
				913	#endif
				914	float __ldexp_expf(float,int);
				915	#ifdef _COMPLEX_H
				916	float complex __ldexp_cexpf(float complex,int);
				917	#endif
				918
				919	/* long double precision kernel functions */
				920	long double __kernel_sinl(long double, long double, int);
				921	long double __kernel_cosl(long double, long double);
				922	long double __kernel_tanl(long double, long double, int);
				923
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	924	#endif /* !_MATH_PRIVATE_H_ */