Blame - libm/upstream-freebsd/lib/msun/src/math_private.h - android_bionic

blob: 1595f902846c7489792b3eaf4df553ef47f9ec2e [file] [log] [blame]

Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	1	/*
				2	* ====================================================
				3	* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
				4	*
				5	* Developed at SunPro, a Sun Microsystems, Inc. business.
				6	* Permission to use, copy, modify, and distribute this
				7	* software is freely granted, provided that this notice
				8	* is preserved.
				9	* ====================================================
				10	*/
				11
				12	/*
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	13	*/
				14
				15	#ifndef _MATH_PRIVATE_H_
				16	#define _MATH_PRIVATE_H_
				17
				18	#include <sys/types.h>
				19	#include <machine/endian.h>
				20
				21	/*
				22	* The original fdlibm code used statements like:
				23	* n0 = (((int)&one)>>29)^1; * index of high word *
				24	* ix0 = (n0+(int)&x); * high word of x *
				25	* ix1 = ((1-n0)+(int)&x); * low word of x *
				26	* to dig two 32 bit words out of the 64 bit IEEE floating point
				27	* value. That is non-ANSI, and, moreover, the gcc instruction
				28	* scheduler gets it wrong. We instead use the following macros.
				29	* Unlike the original code, we determine the endianness at compile
				30	* time, not at run time; I don't see much benefit to selecting
				31	* endianness at run time.
				32	*/
				33
				34	/*
				35	* A union which permits us to convert between a double and two 32 bit
				36	* ints.
				37	*/
				38
				39	#ifdef __arm__
Calin Juravle	bd3155d	2014-03-13 16:20:36 +0000	[diff] [blame]	40	#if defined(__VFP_FP__) \|\| defined(__ARM_EABI__)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	41	#define IEEE_WORD_ORDER BYTE_ORDER
				42	#else
				43	#define IEEE_WORD_ORDER BIG_ENDIAN
				44	#endif
				45	#else /* __arm__ */
				46	#define IEEE_WORD_ORDER BYTE_ORDER
				47	#endif
				48
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	49	/* A union which permits us to convert between a long double and
				50	four 32 bit ints. */
				51
				52	#if IEEE_WORD_ORDER == BIG_ENDIAN
				53
				54	typedef union
				55	{
				56	long double value;
				57	struct {
				58	u_int32_t mswhi;
				59	u_int32_t mswlo;
				60	u_int32_t lswhi;
				61	u_int32_t lswlo;
				62	} parts32;
				63	struct {
				64	u_int64_t msw;
				65	u_int64_t lsw;
				66	} parts64;
				67	} ieee_quad_shape_type;
				68
				69	#endif
				70
				71	#if IEEE_WORD_ORDER == LITTLE_ENDIAN
				72
				73	typedef union
				74	{
				75	long double value;
				76	struct {
				77	u_int32_t lswlo;
				78	u_int32_t lswhi;
				79	u_int32_t mswlo;
				80	u_int32_t mswhi;
				81	} parts32;
				82	struct {
				83	u_int64_t lsw;
				84	u_int64_t msw;
				85	} parts64;
				86	} ieee_quad_shape_type;
				87
				88	#endif
				89
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	90	#if IEEE_WORD_ORDER == BIG_ENDIAN
				91
				92	typedef union
				93	{
				94	double value;
				95	struct
				96	{
				97	u_int32_t msw;
				98	u_int32_t lsw;
				99	} parts;
				100	struct
				101	{
				102	u_int64_t w;
				103	} xparts;
				104	} ieee_double_shape_type;
				105
				106	#endif
				107
				108	#if IEEE_WORD_ORDER == LITTLE_ENDIAN
				109
				110	typedef union
				111	{
				112	double value;
				113	struct
				114	{
				115	u_int32_t lsw;
				116	u_int32_t msw;
				117	} parts;
				118	struct
				119	{
				120	u_int64_t w;
				121	} xparts;
				122	} ieee_double_shape_type;
				123
				124	#endif
				125
				126	/* Get two 32 bit ints from a double. */
				127
				128	#define EXTRACT_WORDS(ix0,ix1,d) \
				129	do { \
				130	ieee_double_shape_type ew_u; \
				131	ew_u.value = (d); \
				132	(ix0) = ew_u.parts.msw; \
				133	(ix1) = ew_u.parts.lsw; \
				134	} while (0)
				135
				136	/* Get a 64-bit int from a double. */
				137	#define EXTRACT_WORD64(ix,d) \
				138	do { \
				139	ieee_double_shape_type ew_u; \
				140	ew_u.value = (d); \
				141	(ix) = ew_u.xparts.w; \
				142	} while (0)
				143
				144	/* Get the more significant 32 bit int from a double. */
				145
				146	#define GET_HIGH_WORD(i,d) \
				147	do { \
				148	ieee_double_shape_type gh_u; \
				149	gh_u.value = (d); \
				150	(i) = gh_u.parts.msw; \
				151	} while (0)
				152
				153	/* Get the less significant 32 bit int from a double. */
				154
				155	#define GET_LOW_WORD(i,d) \
				156	do { \
				157	ieee_double_shape_type gl_u; \
				158	gl_u.value = (d); \
				159	(i) = gl_u.parts.lsw; \
				160	} while (0)
				161
				162	/* Set a double from two 32 bit ints. */
				163
				164	#define INSERT_WORDS(d,ix0,ix1) \
				165	do { \
				166	ieee_double_shape_type iw_u; \
				167	iw_u.parts.msw = (ix0); \
				168	iw_u.parts.lsw = (ix1); \
				169	(d) = iw_u.value; \
				170	} while (0)
				171
				172	/* Set a double from a 64-bit int. */
				173	#define INSERT_WORD64(d,ix) \
				174	do { \
				175	ieee_double_shape_type iw_u; \
				176	iw_u.xparts.w = (ix); \
				177	(d) = iw_u.value; \
				178	} while (0)
				179
				180	/* Set the more significant 32 bits of a double from an int. */
				181
				182	#define SET_HIGH_WORD(d,v) \
				183	do { \
				184	ieee_double_shape_type sh_u; \
				185	sh_u.value = (d); \
				186	sh_u.parts.msw = (v); \
				187	(d) = sh_u.value; \
				188	} while (0)
				189
				190	/* Set the less significant 32 bits of a double from an int. */
				191
				192	#define SET_LOW_WORD(d,v) \
				193	do { \
				194	ieee_double_shape_type sl_u; \
				195	sl_u.value = (d); \
				196	sl_u.parts.lsw = (v); \
				197	(d) = sl_u.value; \
				198	} while (0)
				199
				200	/*
				201	* A union which permits us to convert between a float and a 32 bit
				202	* int.
				203	*/
				204
				205	typedef union
				206	{
				207	float value;
				208	/* FIXME: Assumes 32 bit int. */
				209	unsigned int word;
				210	} ieee_float_shape_type;
				211
				212	/* Get a 32 bit int from a float. */
				213
				214	#define GET_FLOAT_WORD(i,d) \
				215	do { \
				216	ieee_float_shape_type gf_u; \
				217	gf_u.value = (d); \
				218	(i) = gf_u.word; \
				219	} while (0)
				220
				221	/* Set a float from a 32 bit int. */
				222
				223	#define SET_FLOAT_WORD(d,i) \
				224	do { \
				225	ieee_float_shape_type sf_u; \
				226	sf_u.word = (i); \
				227	(d) = sf_u.value; \
				228	} while (0)
				229
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	230	/*
				231	* Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long
				232	* double.
				233	*/
				234
				235	#define EXTRACT_LDBL80_WORDS(ix0,ix1,d) \
				236	do { \
				237	union IEEEl2bits ew_u; \
				238	ew_u.e = (d); \
				239	(ix0) = ew_u.xbits.expsign; \
				240	(ix1) = ew_u.xbits.man; \
				241	} while (0)
				242
				243	/*
				244	* Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit
				245	* long double.
				246	*/
				247
				248	#define EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d) \
				249	do { \
				250	union IEEEl2bits ew_u; \
				251	ew_u.e = (d); \
				252	(ix0) = ew_u.xbits.expsign; \
				253	(ix1) = ew_u.xbits.manh; \
				254	(ix2) = ew_u.xbits.manl; \
				255	} while (0)
				256
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	257	/* Get expsign as a 16 bit int from a long double. */
				258
				259	#define GET_LDBL_EXPSIGN(i,d) \
				260	do { \
				261	union IEEEl2bits ge_u; \
				262	ge_u.e = (d); \
				263	(i) = ge_u.xbits.expsign; \
				264	} while (0)
				265
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	266	/*
				267	* Set an 80 bit long double from a 16 bit int expsign and a 64 bit int
				268	* mantissa.
				269	*/
				270
				271	#define INSERT_LDBL80_WORDS(d,ix0,ix1) \
				272	do { \
				273	union IEEEl2bits iw_u; \
				274	iw_u.xbits.expsign = (ix0); \
				275	iw_u.xbits.man = (ix1); \
				276	(d) = iw_u.e; \
				277	} while (0)
				278
				279	/*
				280	* Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints
				281	* comprising the mantissa.
				282	*/
				283
				284	#define INSERT_LDBL128_WORDS(d,ix0,ix1,ix2) \
				285	do { \
				286	union IEEEl2bits iw_u; \
				287	iw_u.xbits.expsign = (ix0); \
				288	iw_u.xbits.manh = (ix1); \
				289	iw_u.xbits.manl = (ix2); \
				290	(d) = iw_u.e; \
				291	} while (0)
				292
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	293	/* Set expsign of a long double from a 16 bit int. */
				294
				295	#define SET_LDBL_EXPSIGN(d,v) \
				296	do { \
				297	union IEEEl2bits se_u; \
				298	se_u.e = (d); \
				299	se_u.xbits.expsign = (v); \
				300	(d) = se_u.e; \
				301	} while (0)
				302
				303	#ifdef __i386__
				304	/* Long double constants are broken on i386. */
				305	#define LD80C(m, ex, v) { \
				306	.xbits.man = __CONCAT(m, ULL), \
				307	.xbits.expsign = (0x3fff + (ex)) \| ((v) < 0 ? 0x8000 : 0), \
				308	}
				309	#else
				310	/* The above works on non-i386 too, but we use this to check v. */
				311	#define LD80C(m, ex, v) { .e = (v), }
				312	#endif
				313
				314	#ifdef FLT_EVAL_METHOD
				315	/*
				316	* Attempt to get strict C99 semantics for assignment with non-C99 compilers.
				317	*/
				318	#if FLT_EVAL_METHOD == 0 \|\| __GNUC__ == 0
				319	#define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval))
				320	#else
				321	#define STRICT_ASSIGN(type, lval, rval) do { \
				322	volatile type __lval; \
				323	\
				324	if (sizeof(type) >= sizeof(long double)) \
				325	(lval) = (rval); \
				326	else { \
				327	__lval = (rval); \
				328	(lval) = __lval; \
				329	} \
				330	} while (0)
				331	#endif
				332	#endif /* FLT_EVAL_METHOD */
				333
				334	/* Support switching the mode to FP_PE if necessary. */
				335	#if defined(__i386__) && !defined(NO_FPSETPREC)
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	336	#define ENTERI() ENTERIT(long double)
				337	#define ENTERIT(returntype) \
				338	returntype __retval; \
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	339	fp_prec_t __oprec; \
				340	\
				341	if ((__oprec = fpgetprec()) != FP_PE) \
				342	fpsetprec(FP_PE)
				343	#define RETURNI(x) do { \
				344	__retval = (x); \
				345	if (__oprec != FP_PE) \
				346	fpsetprec(__oprec); \
				347	RETURNF(__retval); \
				348	} while (0)
Elliott Hughes	8da8ca4	2018-05-08 13:35:33 -0700	[diff] [blame]	349	#define ENTERV() \
				350	fp_prec_t __oprec; \
				351	\
				352	if ((__oprec = fpgetprec()) != FP_PE) \
				353	fpsetprec(FP_PE)
				354	#define RETURNV() do { \
				355	if (__oprec != FP_PE) \
				356	fpsetprec(__oprec); \
				357	return; \
				358	} while (0)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	359	#else
Elliott Hughes	8da8ca4	2018-05-08 13:35:33 -0700	[diff] [blame]	360	#define ENTERI()
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	361	#define ENTERIT(x)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	362	#define RETURNI(x) RETURNF(x)
Elliott Hughes	8da8ca4	2018-05-08 13:35:33 -0700	[diff] [blame]	363	#define ENTERV()
				364	#define RETURNV() return
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	365	#endif
				366
				367	/* Default return statement if hack_t() is not used. /
				368	#define RETURNF(v) return (v)
				369
				370	/*
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	371	* 2sum gives the same result as 2sumF without requiring \|a\| >= \|b\| or
				372	* a == 0, but is slower.
				373	*/
				374	#define _2sum(a, b) do { \
				375	__typeof(a) __s, __w; \
				376	\
				377	__w = (a) + (b); \
				378	__s = __w - (a); \
				379	(b) = ((a) - (__w - __s)) + ((b) - __s); \
				380	(a) = __w; \
				381	} while (0)
				382
				383	/*
				384	* 2sumF algorithm.
				385	*
				386	* "Normalize" the terms in the infinite-precision expression a + b for
				387	* the sum of 2 floating point values so that b is as small as possible
				388	* relative to 'a'. (The resulting 'a' is the value of the expression in
				389	* the same precision as 'a' and the resulting b is the rounding error.)
				390	* \|a\| must be >= \|b\| or 0, b's type must be no larger than 'a's type, and
				391	* exponent overflow or underflow must not occur. This uses a Theorem of
				392	* Dekker (1971). See Knuth (1981) 4.2.2 Theorem C. The name "TwoSum"
				393	* is apparently due to Skewchuk (1997).
				394	*
				395	* For this to always work, assignment of a + b to 'a' must not retain any
				396	* extra precision in a + b. This is required by C standards but broken
				397	* in many compilers. The brokenness cannot be worked around using
				398	* STRICT_ASSIGN() like we do elsewhere, since the efficiency of this
				399	* algorithm would be destroyed by non-null strict assignments. (The
				400	* compilers are correct to be broken -- the efficiency of all floating
				401	* point code calculations would be destroyed similarly if they forced the
				402	* conversions.)
				403	*
				404	* Fortunately, a case that works well can usually be arranged by building
				405	* any extra precision into the type of 'a' -- 'a' should have type float_t,
				406	* double_t or long double. b's type should be no larger than 'a's type.
				407	* Callers should use these types with scopes as large as possible, to
Elliott Hughes	8cca5d4	2024-08-29 20:28:45 +0000	[diff] [blame]	408	* reduce their own extra-precision and efficiency problems. In
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	409	* particular, they shouldn't convert back and forth just to call here.
				410	*/
				411	#ifdef DEBUG
				412	#define _2sumF(a, b) do { \
				413	__typeof(a) __w; \
				414	volatile __typeof(a) __ia, __ib, __r, __vw; \
				415	\
				416	__ia = (a); \
				417	__ib = (b); \
				418	assert(__ia == 0 \|\| fabsl(__ia) >= fabsl(__ib)); \
				419	\
				420	__w = (a) + (b); \
				421	(b) = ((a) - __w) + (b); \
				422	(a) = __w; \
				423	\
				424	/* The next 2 assertions are weak if (a) is already long double. */ \
				425	assert((long double)__ia + __ib == (long double)(a) + (b)); \
				426	__vw = __ia + __ib; \
				427	__r = __ia - __vw; \
				428	__r += __ib; \
				429	assert(__vw == (a) && __r == (b)); \
				430	} while (0)
				431	#else /* !DEBUG */
				432	#define _2sumF(a, b) do { \
				433	__typeof(a) __w; \
				434	\
				435	__w = (a) + (b); \
				436	(b) = ((a) - __w) + (b); \
				437	(a) = __w; \
				438	} while (0)
				439	#endif /* DEBUG */
				440
				441	/*
				442	* Set x += c, where x is represented in extra precision as a + b.
				443	* x must be sufficiently normalized and sufficiently larger than c,
				444	* and the result is then sufficiently normalized.
				445	*
				446	* The details of ordering are that \|a\| must be >= \|c\| (so that (a, c)
				447	* can be normalized without extra work to swap 'a' with c). The details of
				448	* the normalization are that b must be small relative to the normalized 'a'.
				449	* Normalization of (a, c) makes the normalized c tiny relative to the
				450	* normalized a, so b remains small relative to 'a' in the result. However,
				451	* b need not ever be tiny relative to 'a'. For example, b might be about
				452	* 2**20 times smaller than 'a' to give about 20 extra bits of precision.
				453	* That is usually enough, and adding c (which by normalization is about
				454	* 2**53 times smaller than a) cannot change b significantly. However,
				455	* cancellation of 'a' with c in normalization of (a, c) may reduce 'a'
				456	* significantly relative to b. The caller must ensure that significant
				457	* cancellation doesn't occur, either by having c of the same sign as 'a',
				458	* or by having \|c\| a few percent smaller than \|a\|. Pre-normalization of
				459	* (a, b) may help.
				460	*
Elliott Hughes	022e1aa	2022-07-12 17:01:46 -0700	[diff] [blame]	461	* This is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	462	* exercise 19). We gain considerable efficiency by requiring the terms to
				463	* be sufficiently normalized and sufficiently increasing.
				464	*/
				465	#define _3sumF(a, b, c) do { \
				466	__typeof(a) __tmp; \
				467	\
				468	__tmp = (c); \
				469	_2sumF(__tmp, (a)); \
				470	(b) += (a); \
				471	(a) = __tmp; \
				472	} while (0)
				473
				474	/*
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	475	* Common routine to process the arguments to nan(), nanf(), and nanl().
				476	*/
				477	void _scan_nan(uint32_t __words, int __num_words, const char __s);
				478
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	479	/*
				480	* Mix 0, 1 or 2 NaNs. First add 0 to each arg. This normally just turns
				481	* signaling NaNs into quiet NaNs by setting a quiet bit. We do this
				482	* because we want to never return a signaling NaN, and also because we
				483	* don't want the quiet bit to affect the result. Then mix the converted
				484	* args using the specified operation.
				485	*
				486	* When one arg is NaN, the result is typically that arg quieted. When both
				487	* args are NaNs, the result is typically the quietening of the arg whose
				488	* mantissa is largest after quietening. When neither arg is NaN, the
				489	* result may be NaN because it is indeterminate, or finite for subsequent
				490	* construction of a NaN as the indeterminate 0.0L/0.0L.
				491	*
				492	* Technical complications: the result in bits after rounding to the final
				493	* precision might depend on the runtime precision and/or on compiler
				494	* optimizations, especially when different register sets are used for
				495	* different precisions. Try to make the result not depend on at least the
				496	* runtime precision by always doing the main mixing step in long double
				497	* precision. Try to reduce dependencies on optimizations by adding the
				498	* the 0's in different precisions (unless everything is in long double
				499	* precision).
				500	*/
				501	#define nan_mix(x, y) (nan_mix_op((x), (y), +))
				502	#define nan_mix_op(x, y, op) (((x) + 0.0L) op ((y) + 0))
				503
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	504	#ifdef _COMPLEX_H
				505
				506	/*
				507	* C99 specifies that complex numbers have the same representation as
				508	* an array of two elements, where the first element is the real part
				509	* and the second element is the imaginary part.
				510	*/
				511	typedef union {
				512	float complex f;
				513	float a[2];
				514	} float_complex;
				515	typedef union {
				516	double complex f;
				517	double a[2];
				518	} double_complex;
				519	typedef union {
				520	long double complex f;
				521	long double a[2];
				522	} long_double_complex;
				523	#define REALPART(z) ((z).a[0])
				524	#define IMAGPART(z) ((z).a[1])
				525
				526	/*
				527	* Inline functions that can be used to construct complex values.
				528	*
				529	* The C99 standard intends x+Iy to be used for this, but x+Iy is
				530	* currently unusable in general since gcc introduces many overflow,
				531	* underflow, sign and efficiency bugs by rewriting I*y as
				532	* (0.0+I)(y+0.0I) and laboriously computing the full complex product.
				533	* In particular, IInf is corrupted to NaN+IInf, and I*-0 is corrupted
				534	* to -0.0+I*0.0.
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	535	*
				536	* The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL()
				537	* to construct complex values. Compilers that conform to the C99
				538	* standard require the following functions to avoid the above issues.
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	539	*/
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	540
				541	#ifndef CMPLXF
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	542	static __inline float complex
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	543	CMPLXF(float x, float y)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	544	{
				545	float_complex z;
				546
				547	REALPART(z) = x;
				548	IMAGPART(z) = y;
				549	return (z.f);
				550	}
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	551	#endif
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	552
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	553	#ifndef CMPLX
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	554	static __inline double complex
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	555	CMPLX(double x, double y)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	556	{
				557	double_complex z;
				558
				559	REALPART(z) = x;
				560	IMAGPART(z) = y;
				561	return (z.f);
				562	}
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	563	#endif
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	564
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	565	#ifndef CMPLXL
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	566	static __inline long double complex
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	567	CMPLXL(long double x, long double y)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	568	{
				569	long_double_complex z;
				570
				571	REALPART(z) = x;
				572	IMAGPART(z) = y;
				573	return (z.f);
				574	}
Elliott Hughes	8cff2f9	2015-08-28 20:21:43 -0700	[diff] [blame]	575	#endif
				576
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	577	#endif /* _COMPLEX_H */
				578
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	579	/*
				580	* The rnint() family rounds to the nearest integer for a restricted range
				581	* range of args (up to about 2**MANT_DIG). We assume that the current
				582	* rounding mode is FE_TONEAREST so that this can be done efficiently.
				583	* Extra precision causes more problems in practice, and we only centralize
				584	* this here to reduce those problems, and have not solved the efficiency
				585	* problems. The exp2() family uses a more delicate version of this that
				586	* requires extracting bits from the intermediate value, so it is not
				587	* centralized here and should copy any solution of the efficiency problems.
				588	*/
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	589
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	590	static inline double
				591	rnint(__double_t x)
				592	{
				593	/*
				594	* This casts to double to kill any extra precision. This depends
				595	* on the cast being applied to a double_t to avoid compiler bugs
				596	* (this is a cleaner version of STRICT_ASSIGN()). This is
				597	* inefficient if there actually is extra precision, but is hard
				598	* to improve on. We use double_t in the API to minimise conversions
				599	* for just calling here. Note that we cannot easily change the
				600	* magic number to the one that works directly with double_t, since
				601	* the rounding precision is variable at runtime on x86 so the
				602	* magic number would need to be variable. Assuming that the
				603	* rounding precision is always the default is too fragile. This
				604	* and many other complications will move when the default is
				605	* changed to FP_PE.
				606	*/
				607	return ((double)(x + 0x1.8p52) - 0x1.8p52);
				608	}
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	609
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	610	static inline float
				611	rnintf(__float_t x)
				612	{
				613	/*
				614	* As for rnint(), except we could just call that to handle the
				615	* extra precision case, usually without losing efficiency.
				616	*/
				617	return ((float)(x + 0x1.8p23F) - 0x1.8p23F);
				618	}
				619
				620	#ifdef LDBL_MANT_DIG
				621	/*
				622	* The complications for extra precision are smaller for rnintl() since it
				623	* can safely assume that the rounding precision has been increased from
				624	* its default to FP_PE on x86. We don't exploit that here to get small
Elliott Hughes	4088e3a	2023-08-03 13:33:56 -0700	[diff] [blame]	625	* optimizations from limiting the range to double. We just need it for
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	626	* the magic number to work with long doubles. ld128 callers should use
				627	* rnint() instead of this if possible. ld80 callers should prefer
				628	* rnintl() since for amd64 this avoids swapping the register set, while
				629	* for i386 it makes no difference (assuming FP_PE), and for other arches
				630	* it makes little difference.
				631	*/
				632	static inline long double
				633	rnintl(long double x)
				634	{
				635	return (x + __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2 -
				636	__CONCAT(0x1.8p, LDBL_MANT_DIG) / 2);
				637	}
				638	#endif /* LDBL_MANT_DIG */
				639
				640	/*
				641	* irint() and i64rint() give the same result as casting to their integer
				642	* return type provided their arg is a floating point integer. They can
				643	* sometimes be more efficient because no rounding is required.
				644	*/
Elliott Hughes	022e1aa	2022-07-12 17:01:46 -0700	[diff] [blame]	645	#if defined(amd64) \|\| defined(__i386__)
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	646	#define irint(x) \
				647	(sizeof(x) == sizeof(float) && \
				648	sizeof(__float_t) == sizeof(long double) ? irintf(x) : \
				649	sizeof(x) == sizeof(double) && \
				650	sizeof(__double_t) == sizeof(long double) ? irintd(x) : \
				651	sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x))
				652	#else
				653	#define irint(x) ((int)(x))
				654	#endif
				655
				656	#define i64rint(x) ((int64_t)(x)) /* only needed for ld128 so not opt. */
				657
Elliott Hughes	022e1aa	2022-07-12 17:01:46 -0700	[diff] [blame]	658	#if defined(__i386__)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	659	static __inline int
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	660	irintf(float x)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	661	{
				662	int n;
				663
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	664	__asm("fistl %0" : "=m" (n) : "t" (x));
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	665	return (n);
				666	}
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	667
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	668	static __inline int
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	669	irintd(double x)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	670	{
				671	int n;
				672
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	673	__asm("fistl %0" : "=m" (n) : "t" (x));
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	674	return (n);
				675	}
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	676	#endif
				677
Elliott Hughes	022e1aa	2022-07-12 17:01:46 -0700	[diff] [blame]	678	#if defined(__amd64__) \|\| defined(__i386__)
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	679	static __inline int
				680	irintl(long double x)
				681	{
				682	int n;
				683
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	684	__asm("fistl %0" : "=m" (n) : "t" (x));
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	685	return (n);
				686	}
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	687	#endif
				688
Elliott Hughes	4088e3a	2023-08-03 13:33:56 -0700	[diff] [blame]	689	/*
				690	* The following are fast floor macros for 0 <= \|x\| < 0x1p(N-1), where
				691	* N is the precision of the type of x. These macros are used in the
				692	* half-cycle trignometric functions (e.g., sinpi(x)).
				693	*/
				694	#define FFLOORF(x, j0, ix) do { \
				695	(j0) = (((ix) >> 23) & 0xff) - 0x7f; \
				696	(ix) &= ~(0x007fffff >> (j0)); \
				697	SET_FLOAT_WORD((x), (ix)); \
				698	} while (0)
				699
				700	#define FFLOOR(x, j0, ix, lx) do { \
				701	(j0) = (((ix) >> 20) & 0x7ff) - 0x3ff; \
				702	if ((j0) < 20) { \
				703	(ix) &= ~(0x000fffff >> (j0)); \
				704	(lx) = 0; \
				705	} else { \
				706	(lx) &= ~((uint32_t)0xffffffff >> ((j0) - 20)); \
				707	} \
				708	INSERT_WORDS((x), (ix), (lx)); \
				709	} while (0)
				710
				711	#define FFLOORL80(x, j0, ix, lx) do { \
				712	j0 = ix - 0x3fff + 1; \
				713	if ((j0) < 32) { \
				714	(lx) = ((lx) >> 32) << 32; \
				715	(lx) &= ~((((lx) << 32)-1) >> (j0)); \
				716	} else { \
				717	uint64_t _m; \
				718	_m = (uint64_t)-1 >> (j0); \
				719	if ((lx) & _m) (lx) &= ~_m; \
				720	} \
				721	INSERT_LDBL80_WORDS((x), (ix), (lx)); \
				722	} while (0)
				723
				724	#define FFLOORL128(x, ai, ar) do { \
				725	union IEEEl2bits u; \
				726	uint64_t m; \
				727	int e; \
				728	u.e = (x); \
				729	e = u.bits.exp - 16383; \
				730	if (e < 48) { \
				731	m = ((1llu << 49) - 1) >> (e + 1); \
				732	u.bits.manh &= ~m; \
				733	u.bits.manl = 0; \
				734	} else { \
				735	m = (uint64_t)-1 >> (e - 48); \
				736	u.bits.manl &= ~m; \
				737	} \
				738	(ai) = u.e; \
				739	(ar) = (x) - (ai); \
				740	} while (0)
				741
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	742	#ifdef DEBUG
				743	#if defined(__amd64__) \|\| defined(__i386__)
				744	#define breakpoint() asm("int $3")
				745	#else
				746	#include <signal.h>
				747
				748	#define breakpoint() raise(SIGTRAP)
				749	#endif
				750	#endif
				751
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	752	#ifdef STRUCT_RETURN
				753	#define RETURNSP(rp) do { \
				754	if (!(rp)->lo_set) \
Elliott Hughes	4088e3a	2023-08-03 13:33:56 -0700	[diff] [blame]	755	RETURNF((rp)->hi); \
				756	RETURNF((rp)->hi + (rp)->lo); \
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	757	} while (0)
				758	#define RETURNSPI(rp) do { \
				759	if (!(rp)->lo_set) \
Elliott Hughes	4088e3a	2023-08-03 13:33:56 -0700	[diff] [blame]	760	RETURNI((rp)->hi); \
				761	RETURNI((rp)->hi + (rp)->lo); \
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	762	} while (0)
				763	#endif
Elliott Hughes	4088e3a	2023-08-03 13:33:56 -0700	[diff] [blame]	764
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	765	#define SUM2P(x, y) ({ \
				766	const __typeof (x) __x = (x); \
				767	const __typeof (y) __y = (y); \
Elliott Hughes	7841946	2013-06-12 16:37:58 -0700	[diff] [blame]	768	__x + __y; \
				769	})
				770
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	771	/* fdlibm kernel function */
				772	int __kernel_rem_pio2(double,double,int,int,int);
				773
				774	/* double precision kernel functions */
				775	#ifndef INLINE_REM_PIO2
				776	int __ieee754_rem_pio2(double,double*);
				777	#endif
				778	double __kernel_sin(double,double,int);
				779	double __kernel_cos(double,double);
				780	double __kernel_tan(double,double,int);
				781	double __ldexp_exp(double,int);
				782	#ifdef _COMPLEX_H
				783	double complex __ldexp_cexp(double complex,int);
				784	#endif
				785
				786	/* float precision kernel functions */
				787	#ifndef INLINE_REM_PIO2F
				788	int __ieee754_rem_pio2f(float,double*);
				789	#endif
				790	#ifndef INLINE_KERNEL_SINDF
				791	float __kernel_sindf(double);
				792	#endif
				793	#ifndef INLINE_KERNEL_COSDF
				794	float __kernel_cosdf(double);
				795	#endif
				796	#ifndef INLINE_KERNEL_TANDF
				797	float __kernel_tandf(double,int);
				798	#endif
				799	float __ldexp_expf(float,int);
				800	#ifdef _COMPLEX_H
				801	float complex __ldexp_cexpf(float complex,int);
				802	#endif
				803
				804	/* long double precision kernel functions */
				805	long double __kernel_sinl(long double, long double, int);
				806	long double __kernel_cosl(long double, long double);
				807	long double __kernel_tanl(long double, long double, int);
				808
Elliott Hughes	a0ee078	2013-01-30 19:06:37 -0800	[diff] [blame]	809	#endif /* !_MATH_PRIVATE_H_ */