| /*- | 
 |  * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG> | 
 |  * All rights reserved. | 
 |  * | 
 |  * Redistribution and use in source and binary forms, with or without | 
 |  * modification, are permitted provided that the following conditions | 
 |  * are met: | 
 |  * 1. Redistributions of source code must retain the above copyright | 
 |  *    notice, this list of conditions and the following disclaimer. | 
 |  * 2. Redistributions in binary form must reproduce the above copyright | 
 |  *    notice, this list of conditions and the following disclaimer in the | 
 |  *    documentation and/or other materials provided with the distribution. | 
 |  * | 
 |  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | 
 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
 |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | 
 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 
 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 
 |  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 
 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 
 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 
 |  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 
 |  * SUCH DAMAGE. | 
 |  * | 
 |  * $FreeBSD: src/lib/msun/i387/fenv.c,v 1.2 2005/03/17 22:21:46 das Exp $ | 
 |  */ | 
 |  | 
 | #include <sys/cdefs.h> | 
 | #include <sys/types.h> | 
 | #include "fenv.h" | 
 |  | 
 | #define ROUND_MASK   (FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO) | 
 |  | 
 | /* | 
 |  * The hardware default control word for i387's and later coprocessors is | 
 |  * 0x37F, giving: | 
 |  * | 
 |  *	round to nearest | 
 |  *	64-bit precision | 
 |  *	all exceptions masked. | 
 |  * | 
 |  * We modify the affine mode bit and precision bits in this to give: | 
 |  * | 
 |  *	affine mode for 287's (if they work at all) (1 in bitfield 1<<12) | 
 |  *	53-bit precision (2 in bitfield 3<<8) | 
 |  * | 
 |  * 64-bit precision often gives bad results with high level languages | 
 |  * because it makes the results of calculations depend on whether | 
 |  * intermediate values are stored in memory or in FPU registers. | 
 |  */ | 
 | #define	__INITIAL_NPXCW__	0x127F | 
 | #define	__INITIAL_MXCSR__	0x1F80 | 
 |  | 
 | /* | 
 |  * As compared to the x87 control word, the SSE unit's control word | 
 |  * has the rounding control bits offset by 3 and the exception mask | 
 |  * bits offset by 7. | 
 |  */ | 
 | #define _SSE_ROUND_SHIFT 3 | 
 | #define _SSE_EMASK_SHIFT 7 | 
 |  | 
 | const fenv_t __fe_dfl_env = { | 
 |   __INITIAL_NPXCW__, /*__control*/ | 
 |   0x0000,            /*__mxcsr_hi*/ | 
 |   0x0000,            /*__status*/ | 
 |   0x1f80,            /*__mxcsr_lo*/ | 
 |   0xffffffff,        /*__tag*/ | 
 |   { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | 
 |     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff } /*__other*/ | 
 | }; | 
 |  | 
 | #define __fldcw(__cw)           __asm volatile("fldcw %0" : : "m" (__cw)) | 
 | #define __fldenv(__env)         __asm volatile("fldenv %0" : : "m" (__env)) | 
 | #define __fldenvx(__env)        __asm volatile("fldenv %0" : : "m" (__env)  \ | 
 |                                 : "st", "st(1)", "st(2)", "st(3)", "st(4)",   \ | 
 |                                 "st(5)", "st(6)", "st(7)") | 
 | #define __fnclex()              __asm volatile("fnclex") | 
 | #define __fnstenv(__env)        __asm volatile("fnstenv %0" : "=m" (*(__env))) | 
 | #define __fnstcw(__cw)          __asm volatile("fnstcw %0" : "=m" (*(__cw))) | 
 | #define __fnstsw(__sw)          __asm volatile("fnstsw %0" : "=am" (*(__sw))) | 
 | #define __fwait()               __asm volatile("fwait") | 
 | #define __ldmxcsr(__csr)        __asm volatile("ldmxcsr %0" : : "m" (__csr)) | 
 | #define __stmxcsr(__csr)        __asm volatile("stmxcsr %0" : "=m" (*(__csr))) | 
 |  | 
 | /* After testing for SSE support once, we cache the result in __has_sse. */ | 
 | enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK }; | 
 | #ifdef __SSE__ | 
 | #define __HAS_SSE()     1 | 
 | #else | 
 | #define __HAS_SSE()     (__has_sse == __SSE_YES ||                      \ | 
 |                         (__has_sse == __SSE_UNK && __test_sse())) | 
 | #endif | 
 |  | 
 | enum __sse_support __has_sse = | 
 | #ifdef __SSE__ | 
 |   __SSE_YES; | 
 | #else | 
 |   __SSE_UNK; | 
 | #endif | 
 |  | 
 | #ifndef __SSE__ | 
 | #define getfl(x)    __asm volatile("pushfl\n\tpopl %0" : "=mr" (*(x))) | 
 | #define setfl(x)    __asm volatile("pushl %0\n\tpopfl" : : "g" (x)) | 
 | #define cpuid_dx(x) __asm volatile("pushl %%ebx\n\tmovl $1, %%eax\n\t"  \ | 
 |                     "cpuid\n\tpopl %%ebx"          \ | 
 |                     : "=d" (*(x)) : : "eax", "ecx") | 
 |  | 
 | /* | 
 |  * Test for SSE support on this processor.  We need to do this because | 
 |  * we need to use ldmxcsr/stmxcsr to get correct results if any part | 
 |  * of the program was compiled to use SSE floating-point, but we can't | 
 |  * use SSE on older processors. | 
 |  */ | 
 | int | 
 | __test_sse(void) | 
 | { | 
 |   int flag, nflag; | 
 |   int dx_features; | 
 |  | 
 |   /* Am I a 486? */ | 
 |   getfl(&flag); | 
 |   nflag = flag ^ 0x200000; | 
 |   setfl(nflag); | 
 |   getfl(&nflag); | 
 |   if (flag != nflag) { | 
 |     /* Not a 486, so CPUID should work. */ | 
 |     cpuid_dx(&dx_features); | 
 |     if (dx_features & 0x2000000) { | 
 |       __has_sse = __SSE_YES; | 
 |       return (1); | 
 |     } | 
 |   } | 
 |   __has_sse = __SSE_NO; | 
 |   return (0); | 
 | } | 
 | #endif /* __SSE__ */ | 
 |  | 
 | int | 
 | fesetexceptflag(const fexcept_t *flagp, int excepts) | 
 | { | 
 |   fenv_t env; | 
 |   __uint32_t mxcsr; | 
 |  | 
 |   excepts &= FE_ALL_EXCEPT; | 
 |   if (excepts) { /* Do nothing if excepts is 0 */ | 
 |     __fnstenv(&env); | 
 |     env.__status &= ~excepts; | 
 |     env.__status |= *flagp & excepts; | 
 |     __fnclex(); | 
 |     __fldenv(env); | 
 |     if (__HAS_SSE()) { | 
 |       __stmxcsr(&mxcsr); | 
 |       mxcsr &= ~excepts; | 
 |       mxcsr |= *flagp & excepts; | 
 |       __ldmxcsr(mxcsr); | 
 |     } | 
 |   } | 
 |  | 
 |   return (0); | 
 | } | 
 |  | 
 | int | 
 | feraiseexcept(int excepts) | 
 | { | 
 |   fexcept_t ex = excepts; | 
 |  | 
 |   fesetexceptflag(&ex, excepts); | 
 |   __fwait(); | 
 |   return (0); | 
 | } | 
 |  | 
 | int | 
 | fegetenv(fenv_t *envp) | 
 | { | 
 |   __uint32_t mxcsr; | 
 |  | 
 |   __fnstenv(envp); | 
 |   /* | 
 |    * fnstenv masks all exceptions, so we need to restore | 
 |    * the old control word to avoid this side effect. | 
 |    */ | 
 |   __fldcw(envp->__control); | 
 |   if (__HAS_SSE()) { | 
 |     __stmxcsr(&mxcsr); | 
 |     envp->__mxcsr_hi = mxcsr >> 16; | 
 |     envp->__mxcsr_lo = mxcsr & 0xffff; | 
 |   } | 
 |   return (0); | 
 | } | 
 |  | 
 | int | 
 | feholdexcept(fenv_t *envp) | 
 | { | 
 |   __uint32_t mxcsr; | 
 |   fenv_t env; | 
 |  | 
 |   __fnstenv(&env); | 
 |   *envp = env; | 
 |   env.__status &= ~FE_ALL_EXCEPT; | 
 |   env.__control |= FE_ALL_EXCEPT; | 
 |   __fnclex(); | 
 |   __fldenv(env); | 
 |   if (__HAS_SSE()) { | 
 |     __stmxcsr(&mxcsr); | 
 |     envp->__mxcsr_hi = mxcsr >> 16; | 
 |     envp->__mxcsr_lo = mxcsr & 0xffff; | 
 |     mxcsr &= ~FE_ALL_EXCEPT; | 
 |     mxcsr |= FE_ALL_EXCEPT << _SSE_EMASK_SHIFT; | 
 |     __ldmxcsr(mxcsr); | 
 |   } | 
 |   return (0); | 
 | } | 
 |  | 
 | int | 
 | feupdateenv(const fenv_t *envp) | 
 | { | 
 |   __uint32_t mxcsr; | 
 |   __uint16_t status; | 
 |  | 
 |   __fnstsw(&status); | 
 |   if (__HAS_SSE()) { | 
 |     __stmxcsr(&mxcsr); | 
 |   } else { | 
 |     mxcsr = 0; | 
 |   } | 
 |   fesetenv(envp); | 
 |   feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT); | 
 |   return (0); | 
 | } | 
 |  | 
 | int | 
 | feenableexcept(int mask) | 
 | { | 
 |   __uint32_t mxcsr; | 
 |   __uint16_t control, omask; | 
 |  | 
 |   mask &= FE_ALL_EXCEPT; | 
 |   __fnstcw(&control); | 
 |   if (__HAS_SSE()) { | 
 |     __stmxcsr(&mxcsr); | 
 |   } else { | 
 |     mxcsr = 0; | 
 |   } | 
 |   omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; | 
 |   if (mask) { | 
 |     control &= ~mask; | 
 |     __fldcw(control); | 
 |     if (__HAS_SSE()) { | 
 |       mxcsr &= ~(mask << _SSE_EMASK_SHIFT); | 
 |       __ldmxcsr(mxcsr); | 
 |     } | 
 |   } | 
 |   return (omask); | 
 | } | 
 |  | 
 | int | 
 | fedisableexcept(int mask) | 
 | { | 
 |   __uint32_t mxcsr; | 
 |   __uint16_t control, omask; | 
 |  | 
 |   mask &= FE_ALL_EXCEPT; | 
 |   __fnstcw(&control); | 
 |   if (__HAS_SSE()) { | 
 |     __stmxcsr(&mxcsr); | 
 |   } else { | 
 |     mxcsr = 0; | 
 |   } | 
 |   omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; | 
 |   if (mask) { | 
 |     control |= mask; | 
 |     __fldcw(control); | 
 |     if (__HAS_SSE()) { | 
 |       mxcsr |= mask << _SSE_EMASK_SHIFT; | 
 |       __ldmxcsr(mxcsr); | 
 |     } | 
 |   } | 
 |   return (omask); | 
 | } | 
 |  | 
 | int | 
 | feclearexcept(int excepts) | 
 | { | 
 |   fenv_t env; | 
 |   __uint32_t mxcsr; | 
 |  | 
 |   excepts &= FE_ALL_EXCEPT; | 
 |   if (excepts) { /* Do nothing if excepts is 0 */ | 
 |     __fnstenv(&env); | 
 |     env.__status &= ~excepts; | 
 |     __fnclex(); | 
 |     __fldenv(env); | 
 |     if (__HAS_SSE()) { | 
 |       __stmxcsr(&mxcsr); | 
 |       mxcsr &= ~excepts; | 
 |       __ldmxcsr(mxcsr); | 
 |     } | 
 |   } | 
 |   return (0); | 
 | } | 
 |  | 
 | int | 
 | fegetexceptflag(fexcept_t *flagp, int excepts) | 
 | { | 
 |   __uint32_t mxcsr; | 
 |   __uint16_t status; | 
 |  | 
 |   excepts &= FE_ALL_EXCEPT; | 
 |   __fnstsw(&status); | 
 |   if (__HAS_SSE()) { | 
 |     __stmxcsr(&mxcsr); | 
 |   } else { | 
 |     mxcsr = 0; | 
 |   } | 
 |   *flagp = (status | mxcsr) & excepts; | 
 |   return (0); | 
 | } | 
 |  | 
 | int | 
 | fetestexcept(int excepts) | 
 | { | 
 |   __uint32_t mxcsr; | 
 |   __uint16_t status; | 
 |  | 
 |   excepts &= FE_ALL_EXCEPT; | 
 |   if (excepts) { /* Do nothing if excepts is 0 */ | 
 |     __fnstsw(&status); | 
 |     if (__HAS_SSE()) { | 
 |       __stmxcsr(&mxcsr); | 
 |     } else { | 
 |       mxcsr = 0; | 
 |     } | 
 |     return ((status | mxcsr) & excepts); | 
 |   } | 
 |   return (0); | 
 | } | 
 |  | 
 | int | 
 | fegetround(void) | 
 | { | 
 |   __uint16_t control; | 
 |  | 
 |   /* | 
 |    * We assume that the x87 and the SSE unit agree on the | 
 |    * rounding mode.  Reading the control word on the x87 turns | 
 |    * out to be about 5 times faster than reading it on the SSE | 
 |    * unit on an Opteron 244. | 
 |    */ | 
 |   __fnstcw(&control); | 
 |   return (control & ROUND_MASK); | 
 | } | 
 |  | 
 | int | 
 | fesetround(int round) | 
 | { | 
 |   __uint32_t mxcsr; | 
 |   __uint16_t control; | 
 |  | 
 |   if (round & ~ROUND_MASK) { | 
 |     return (-1); | 
 |   } else { | 
 |     __fnstcw(&control); | 
 |     control &= ~ROUND_MASK; | 
 |     control |= round; | 
 |     __fldcw(control); | 
 |     if (__HAS_SSE()) { | 
 |       __stmxcsr(&mxcsr); | 
 |       mxcsr &= ~(ROUND_MASK << _SSE_ROUND_SHIFT); | 
 |       mxcsr |= round << _SSE_ROUND_SHIFT; | 
 |       __ldmxcsr(mxcsr); | 
 |     } | 
 |     return (0); | 
 |   } | 
 | } | 
 |  | 
 | int | 
 | fesetenv(const fenv_t *envp) | 
 | { | 
 |   fenv_t env = *envp; | 
 |   __uint32_t mxcsr; | 
 |  | 
 |   mxcsr = (env.__mxcsr_hi << 16) | (env.__mxcsr_lo); | 
 |   env.__mxcsr_hi = 0xffff; | 
 |   env.__mxcsr_lo = 0xffff; | 
 |   /* | 
 |    * XXX Using fldenvx() instead of fldenv() tells the compiler that this | 
 |    * instruction clobbers the i387 register stack.  This happens because | 
 |    * we restore the tag word from the saved environment.  Normally, this | 
 |    * would happen anyway and we wouldn't care, because the ABI allows | 
 |    * function calls to clobber the i387 regs.  However, fesetenv() is | 
 |    * inlined, so we need to be more careful. | 
 |    */ | 
 |   __fldenvx(env); | 
 |   if (__HAS_SSE()) { | 
 |     __ldmxcsr(mxcsr); | 
 |   } | 
 |   return (0); | 
 | } | 
 |  | 
 | int | 
 | fegetexcept(void) | 
 | { | 
 |   __uint16_t control; | 
 |  | 
 |   /* | 
 |    * We assume that the masks for the x87 and the SSE unit are | 
 |    * the same. | 
 |    */ | 
 |   __fnstcw(&control); | 
 |   return (~control & FE_ALL_EXCEPT); | 
 | } |