Elliott Hughes | a0ee078 | 2013-01-30 19:06:37 -0800 | [diff] [blame] | 1 | /*- |
| 2 | * ==================================================== |
| 3 | * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. |
| 4 | * Copyright (c) 2009-2011, Bruce D. Evans, Steven G. Kargl, David Schultz. |
| 5 | * |
| 6 | * Developed at SunPro, a Sun Microsystems, Inc. business. |
| 7 | * Permission to use, copy, modify, and distribute this |
| 8 | * software is freely granted, provided that this notice |
| 9 | * is preserved. |
| 10 | * ==================================================== |
| 11 | * |
| 12 | * The argument reduction and testing for exceptional cases was |
| 13 | * written by Steven G. Kargl with input from Bruce D. Evans |
| 14 | * and David A. Schultz. |
| 15 | */ |
| 16 | |
| 17 | #include <sys/cdefs.h> |
| 18 | __FBSDID("$FreeBSD$"); |
| 19 | |
| 20 | #include <float.h> |
| 21 | #ifdef __i386__ |
| 22 | #include <ieeefp.h> |
| 23 | #endif |
| 24 | |
| 25 | #include "fpmath.h" |
| 26 | #include "math.h" |
| 27 | #include "math_private.h" |
| 28 | |
| 29 | #define BIAS (LDBL_MAX_EXP - 1) |
| 30 | |
| 31 | static const unsigned |
| 32 | B1 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */ |
| 33 | |
| 34 | long double |
| 35 | cbrtl(long double x) |
| 36 | { |
| 37 | union IEEEl2bits u, v; |
| 38 | long double r, s, t, w; |
| 39 | double dr, dt, dx; |
| 40 | float ft, fx; |
| 41 | uint32_t hx; |
| 42 | uint16_t expsign; |
| 43 | int k; |
| 44 | |
| 45 | u.e = x; |
| 46 | expsign = u.xbits.expsign; |
| 47 | k = expsign & 0x7fff; |
| 48 | |
| 49 | /* |
| 50 | * If x = +-Inf, then cbrt(x) = +-Inf. |
| 51 | * If x = NaN, then cbrt(x) = NaN. |
| 52 | */ |
| 53 | if (k == BIAS + LDBL_MAX_EXP) |
| 54 | return (x + x); |
| 55 | |
| 56 | ENTERI(); |
| 57 | if (k == 0) { |
| 58 | /* If x = +-0, then cbrt(x) = +-0. */ |
| 59 | if ((u.bits.manh | u.bits.manl) == 0) |
| 60 | RETURNI(x); |
| 61 | /* Adjust subnormal numbers. */ |
| 62 | u.e *= 0x1.0p514; |
| 63 | k = u.bits.exp; |
| 64 | k -= BIAS + 514; |
| 65 | } else |
| 66 | k -= BIAS; |
| 67 | u.xbits.expsign = BIAS; |
| 68 | v.e = 1; |
| 69 | |
| 70 | x = u.e; |
| 71 | switch (k % 3) { |
| 72 | case 1: |
| 73 | case -2: |
| 74 | x = 2*x; |
| 75 | k--; |
| 76 | break; |
| 77 | case 2: |
| 78 | case -1: |
| 79 | x = 4*x; |
| 80 | k -= 2; |
| 81 | break; |
| 82 | } |
| 83 | v.xbits.expsign = (expsign & 0x8000) | (BIAS + k / 3); |
| 84 | |
| 85 | /* |
| 86 | * The following is the guts of s_cbrtf, with the handling of |
| 87 | * special values removed and extra care for accuracy not taken, |
| 88 | * but with most of the extra accuracy not discarded. |
| 89 | */ |
| 90 | |
| 91 | /* ~5-bit estimate: */ |
| 92 | fx = x; |
| 93 | GET_FLOAT_WORD(hx, fx); |
| 94 | SET_FLOAT_WORD(ft, ((hx & 0x7fffffff) / 3 + B1)); |
| 95 | |
| 96 | /* ~16-bit estimate: */ |
| 97 | dx = x; |
| 98 | dt = ft; |
| 99 | dr = dt * dt * dt; |
| 100 | dt = dt * (dx + dx + dr) / (dx + dr + dr); |
| 101 | |
| 102 | /* ~47-bit estimate: */ |
| 103 | dr = dt * dt * dt; |
| 104 | dt = dt * (dx + dx + dr) / (dx + dr + dr); |
| 105 | |
| 106 | #if LDBL_MANT_DIG == 64 |
| 107 | /* |
| 108 | * dt is cbrtl(x) to ~47 bits (after x has been reduced to 1 <= x < 8). |
| 109 | * Round it away from zero to 32 bits (32 so that t*t is exact, and |
| 110 | * away from zero for technical reasons). |
| 111 | */ |
| 112 | volatile double vd2 = 0x1.0p32; |
| 113 | volatile double vd1 = 0x1.0p-31; |
| 114 | #define vd ((long double)vd2 + vd1) |
| 115 | |
| 116 | t = dt + vd - 0x1.0p32; |
| 117 | #elif LDBL_MANT_DIG == 113 |
| 118 | /* |
| 119 | * Round dt away from zero to 47 bits. Since we don't trust the 47, |
| 120 | * add 2 47-bit ulps instead of 1 to round up. Rounding is slow and |
| 121 | * might be avoidable in this case, since on most machines dt will |
| 122 | * have been evaluated in 53-bit precision and the technical reasons |
| 123 | * for rounding up might not apply to either case in cbrtl() since |
| 124 | * dt is much more accurate than needed. |
| 125 | */ |
| 126 | t = dt + 0x2.0p-46 + 0x1.0p60L - 0x1.0p60; |
| 127 | #else |
| 128 | #error "Unsupported long double format" |
| 129 | #endif |
| 130 | |
| 131 | /* |
| 132 | * Final step Newton iteration to 64 or 113 bits with |
| 133 | * error < 0.667 ulps |
| 134 | */ |
| 135 | s=t*t; /* t*t is exact */ |
| 136 | r=x/s; /* error <= 0.5 ulps; |r| < |t| */ |
| 137 | w=t+t; /* t+t is exact */ |
| 138 | r=(r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */ |
| 139 | t=t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */ |
| 140 | |
| 141 | t *= v.e; |
| 142 | RETURNI(t); |
| 143 | } |