Merge "AArch64: libm: Fix ARM64 fenv_t and refactor ARM64 libm implementation."
diff --git a/benchmarks/math_benchmark.cpp b/benchmarks/math_benchmark.cpp
index 3602de4..a9748cd 100644
--- a/benchmarks/math_benchmark.cpp
+++ b/benchmarks/math_benchmark.cpp
@@ -16,6 +16,7 @@
 
 #include "benchmark.h"
 
+#include <fenv.h>
 #include <math.h>
 
 // Avoid optimization.
@@ -113,10 +114,49 @@
 }
 BENCHMARK(BM_math_isinf_ZERO);
 
+static void BM_math_sin_fast(int iters) {
+  StartBenchmarkTiming();
 
+  d = 1.0;
+  for (int i = 0; i < iters; ++i) {
+    d += sin(d);
+  }
 
+  StopBenchmarkTiming();
+}
+BENCHMARK(BM_math_sin_fast);
 
+static void BM_math_sin_feupdateenv(int iters) {
+  StartBenchmarkTiming();
 
+  d = 1.0;
+  for (int i = 0; i < iters; ++i) {
+    fenv_t __libc_save_rm;
+    feholdexcept(&__libc_save_rm);
+    fesetround(FE_TONEAREST);
+    d += sin(d);
+    feupdateenv(&__libc_save_rm);
+  }
+
+  StopBenchmarkTiming();
+}
+BENCHMARK(BM_math_sin_feupdateenv);
+
+static void BM_math_sin_fesetenv(int iters) {
+  StartBenchmarkTiming();
+
+  d = 1.0;
+  for (int i = 0; i < iters; ++i) {
+    fenv_t __libc_save_rm;
+    feholdexcept(&__libc_save_rm);
+    fesetround(FE_TONEAREST);
+    d += sin(d);
+    fesetenv(&__libc_save_rm);
+  }
+
+  StopBenchmarkTiming();
+}
+BENCHMARK(BM_math_sin_fesetenv);
 
 static void BM_math_fpclassify_NORMAL(int iters) {
   StartBenchmarkTiming();
diff --git a/libm/arm64/fenv.c b/libm/arm64/fenv.c
index 9db21ef..ce560a7 100644
--- a/libm/arm64/fenv.c
+++ b/libm/arm64/fenv.c
@@ -28,114 +28,168 @@
 
 #include <fenv.h>
 
-/*
- * Hopefully the system ID byte is immutable, so it's valid to use
- * this as a default environment.
- */
-const fenv_t __fe_dfl_env = 0;
+#define FPCR_EXCEPT_SHIFT 8
+#define FPCR_EXCEPT_MASK  (FE_ALL_EXCEPT << FPCR_EXCEPT_SHIFT)
 
-int fegetenv(fenv_t* __envp) {
-  fenv_t _fpcr, _fpsr;
-  __asm__ __volatile__("mrs %0,fpcr" : "=r" (_fpcr));
-  __asm__ __volatile__("mrs %0,fpsr" : "=r" (_fpsr));
-  *__envp = (_fpcr | _fpsr);
+#define FPCR_RMODE_SHIFT 22
+
+const fenv_t __fe_dfl_env = { 0 /* control */, 0 /* status */};
+
+typedef __uint32_t fpu_control_t;   // FPCR, Floating-point Control Register.
+typedef __uint32_t fpu_status_t;    // FPSR, Floating-point Status Register.
+
+#define __get_fpcr(__fpcr) __asm__ __volatile__("mrs %0,fpcr" : "=r" (__fpcr))
+#define __get_fpsr(__fpsr) __asm__ __volatile__("mrs %0,fpsr" : "=r" (__fpsr))
+#define __set_fpcr(__fpcr) __asm__ __volatile__("msr fpcr,%0" : :"ri" (__fpcr))
+#define __set_fpsr(__fpsr) __asm__ __volatile__("msr fpsr,%0" : :"ri" (__fpsr))
+
+int fegetenv(fenv_t* envp) {
+  __get_fpcr(envp->__control);
+  __get_fpsr(envp->__status);
   return 0;
 }
 
-int fesetenv(const fenv_t* __envp) {
-  fenv_t _fpcr = (*__envp & FPCR_MASK);
-  fenv_t _fpsr = (*__envp & FPSR_MASK);
-  __asm__ __volatile__("msr fpcr,%0" : :"ri" (_fpcr));
-  __asm__ __volatile__("msr fpsr,%0" : :"ri" (_fpsr));
+int fesetenv(const fenv_t* envp) {
+  fpu_control_t fpcr;
+
+  __get_fpcr(fpcr);
+  if (envp->__control != fpcr) {
+    __set_fpcr(envp->__control);
+  }
+  __set_fpsr(envp->__status);
   return 0;
 }
 
-int feclearexcept(int __excepts) {
-  fexcept_t __fpscr;
-  fegetenv(&__fpscr);
-  __fpscr &= ~__excepts;
-  fesetenv(&__fpscr);
+int feclearexcept(int excepts) {
+  fpu_status_t fpsr;
+
+  excepts &= FE_ALL_EXCEPT;
+  __get_fpsr(fpsr);
+  fpsr &= ~excepts;
+  __set_fpsr(fpsr);
   return 0;
 }
 
-int fegetexceptflag(fexcept_t* __flagp, int __excepts) {
-  fexcept_t __fpscr;
-  fegetenv(&__fpscr);
-  *__flagp = __fpscr & __excepts;
+int fegetexceptflag(fexcept_t* flagp, int excepts) {
+  fpu_status_t fpsr;
+
+  excepts &= FE_ALL_EXCEPT;
+  __get_fpsr(fpsr);
+  *flagp = fpsr & excepts;
   return 0;
 }
 
-int fesetexceptflag(const fexcept_t* __flagp, int __excepts) {
-  fexcept_t __fpscr;
-  fegetenv(&__fpscr);
-  __fpscr &= ~__excepts;
-  __fpscr |= *__flagp & __excepts;
-  fesetenv(&__fpscr);
+int fesetexceptflag(const fexcept_t* flagp, int excepts) {
+  fpu_status_t fpsr;
+
+  excepts &= FE_ALL_EXCEPT;
+  __get_fpsr(fpsr);
+  fpsr &= ~excepts;
+  fpsr |= *flagp & excepts;
+  __set_fpsr(fpsr);
   return 0;
 }
 
-int feraiseexcept(int __excepts) {
-  fexcept_t __ex = __excepts;
-  fesetexceptflag(&__ex, __excepts);
+int feraiseexcept(int excepts) {
+  fexcept_t ex = excepts;
+
+  fesetexceptflag(&ex, excepts);
   return 0;
 }
 
-int fetestexcept(int __excepts) {
-  fexcept_t __fpscr;
-  fegetenv(&__fpscr);
-  return (__fpscr & __excepts);
+int fetestexcept(int excepts) {
+  fpu_status_t fpsr;
+
+  excepts &= FE_ALL_EXCEPT;
+  __get_fpsr(fpsr);
+  return (fpsr & excepts);
 }
 
 int fegetround(void) {
-  fenv_t _fpscr;
-  fegetenv(&_fpscr);
-  return ((_fpscr >> _FPSCR_RMODE_SHIFT) & 0x3);
+  fpu_control_t fpcr;
+
+  __get_fpcr(fpcr);
+  return ((fpcr >> FPCR_RMODE_SHIFT) & FE_TOWARDZERO);
 }
 
-int fesetround(int __round) {
-  fenv_t _fpscr;
-  fegetenv(&_fpscr);
-  _fpscr &= ~(0x3 << _FPSCR_RMODE_SHIFT);
-  _fpscr |= (__round << _FPSCR_RMODE_SHIFT);
-  fesetenv(&_fpscr);
+int fesetround(int round) {
+  fpu_control_t fpcr, new_fpcr;
+
+  round &= FE_TOWARDZERO;
+  __get_fpcr(fpcr);
+  new_fpcr = fpcr & ~(FE_TOWARDZERO << FPCR_RMODE_SHIFT);
+  new_fpcr |= (round << FPCR_RMODE_SHIFT);
+  if (new_fpcr != fpcr) {
+    __set_fpcr(new_fpcr);
+  }
   return 0;
 }
 
-int feholdexcept(fenv_t* __envp) {
-  fenv_t __env;
-  fegetenv(&__env);
-  *__envp = __env;
-  __env &= ~(FE_ALL_EXCEPT | _FPSCR_ENABLE_MASK);
-  fesetenv(&__env);
+int feholdexcept(fenv_t* envp) {
+  fenv_t env;
+  fpu_status_t fpsr;
+  fpu_control_t fpcr, new_fpcr;
+
+  __get_fpsr(fpsr);
+  __get_fpcr(fpcr);
+  env.__status = fpsr;
+  env.__control = fpcr;
+  *envp = env;
+
+  // Set exceptions to untrapped.
+  new_fpcr = fpcr & ~(FE_ALL_EXCEPT << FPCR_EXCEPT_SHIFT);
+  if (new_fpcr != fpcr) {
+    __set_fpcr(new_fpcr);
+  }
+
+  // Clear all exceptions.
+  fpsr &= ~FE_ALL_EXCEPT;
+  __set_fpsr(fpsr);
   return 0;
 }
 
-int feupdateenv(const fenv_t* __envp) {
-  fexcept_t __fpscr;
-  fegetenv(&__fpscr);
-  fesetenv(__envp);
-  feraiseexcept(__fpscr & FE_ALL_EXCEPT);
+int feupdateenv(const fenv_t* envp) {
+  fpu_status_t fpsr;
+  fpu_control_t fpcr;
+
+  // Set FPU Control register.
+  __get_fpcr(fpcr);
+  if (envp->__control != fpcr) {
+    __set_fpcr(envp->__control);
+  }
+
+  // Set FPU Status register to status | currently raised exceptions.
+  __get_fpsr(fpsr);
+  fpsr = envp->__status | (fpsr & FE_ALL_EXCEPT);
+  __set_fpsr(fpsr);
   return 0;
 }
 
-int feenableexcept(int __mask) {
-  fenv_t __old_fpscr, __new_fpscr;
-  fegetenv(&__old_fpscr);
-  __new_fpscr = __old_fpscr | (__mask & FE_ALL_EXCEPT) << _FPSCR_ENABLE_SHIFT;
-  fesetenv(&__new_fpscr);
-  return ((__old_fpscr >> _FPSCR_ENABLE_SHIFT) & FE_ALL_EXCEPT);
+int feenableexcept(int mask) {
+  fpu_control_t old_fpcr, new_fpcr;
+
+  __get_fpcr(old_fpcr);
+  new_fpcr = old_fpcr | ((mask & FE_ALL_EXCEPT) << FPCR_EXCEPT_SHIFT);
+  if (new_fpcr != old_fpcr) {
+    __set_fpcr(new_fpcr);
+  }
+  return ((old_fpcr >> FPCR_EXCEPT_SHIFT) & FE_ALL_EXCEPT);
 }
 
-int fedisableexcept(int __mask) {
-  fenv_t __old_fpscr, __new_fpscr;
-  fegetenv(&__old_fpscr);
-  __new_fpscr = __old_fpscr & ~((__mask & FE_ALL_EXCEPT) << _FPSCR_ENABLE_SHIFT);
-  fesetenv(&__new_fpscr);
-  return ((__old_fpscr >> _FPSCR_ENABLE_SHIFT) & FE_ALL_EXCEPT);
+int fedisableexcept(int mask) {
+  fpu_control_t old_fpcr, new_fpcr;
+
+  __get_fpcr(old_fpcr);
+  new_fpcr = old_fpcr & ~((mask & FE_ALL_EXCEPT) << FPCR_EXCEPT_SHIFT);
+  if (new_fpcr != old_fpcr) {
+    __set_fpcr(new_fpcr);
+  }
+  return ((old_fpcr >> FPCR_EXCEPT_SHIFT) & FE_ALL_EXCEPT);
 }
 
 int fegetexcept(void) {
-  fenv_t __fpscr;
-  fegetenv(&__fpscr);
-  return ((__fpscr & _FPSCR_ENABLE_MASK) >> _FPSCR_ENABLE_SHIFT);
+  fpu_control_t fpcr;
+
+  __get_fpcr(fpcr);
+  return ((fpcr & FPCR_EXCEPT_MASK) >> FPCR_EXCEPT_SHIFT);
 }
diff --git a/libm/include/arm64/machine/fenv.h b/libm/include/arm64/machine/fenv.h
index 2efeee3..a8568b8 100644
--- a/libm/include/arm64/machine/fenv.h
+++ b/libm/include/arm64/machine/fenv.h
@@ -27,15 +27,44 @@
  */
 
 /*
- * Rewritten for Android.
+ * In ARMv8, AArch64 state, floating-point operation is controlled by:
  *
- * The ARM FPSCR (Floating-point Status and Control Register) described here:
- * http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0344b/Chdfafia.html
- * has been split into the FPCR (Floating-point Control Register) and FPSR
- * (Floating-point Status Register) on the ARMv8. These are described briefly in
- * "Procedure Call Standard for the ARM 64-bit Architecture"
- * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055a/IHI0055A_aapcs64.pdf
- * section 5.1.2 SIMD and Floating-Point Registers
+ *  * FPCR - 32Bit Floating-Point Control Register:
+ *      * [31:27] - Reserved, Res0;
+ *      * [26]    - AHP, Alternative half-precision control bit;
+ *      * [25]    - DN, Default NaN mode control bit;
+ *      * [24]    - FZ, Flush-to-zero mode control bit;
+ *      * [23:22] - RMode, Rounding Mode control field:
+ *            * 00  - Round to Nearest (RN) mode;
+ *            * 01  - Round towards Plus Infinity (RP) mode;
+ *            * 10  - Round towards Minus Infinity (RM) mode;
+ *            * 11  - Round towards Zero (RZ) mode.
+ *      * [21:20] - Stride, ignored during AArch64 execution;
+ *      * [19]    - Reserved, Res0;
+ *      * [18:16] - Len, ignored during AArch64 execution;
+ *      * [15]    - IDE, Input Denormal exception trap;
+ *      * [14:13] - Reserved, Res0;
+ *      * [12]    - IXE, Inexact exception trap;
+ *      * [11]    - UFE, Underflow exception trap;
+ *      * [10]    - OFE, Overflow exception trap;
+ *      * [9]     - DZE, Division by Zero exception;
+ *      * [8]     - IOE, Invalid Operation exception;
+ *      * [7:0]   - Reserved, Res0.
+ *
+ *  * FPSR - 32Bit Floating-Point Status Register:
+ *      * [31]    - N, Negative condition flag for AArch32 (AArch64 sets PSTATE.N);
+ *      * [30]    - Z, Zero condition flag for AArch32 (AArch64 sets PSTATE.Z);
+ *      * [29]    - C, Carry conditon flag for AArch32 (AArch64 sets PSTATE.C);
+ *      * [28]    - V, Overflow conditon flag for AArch32 (AArch64 sets PSTATE.V);
+ *      * [27]    - QC, Cumulative saturation bit, Advanced SIMD only;
+ *      * [26:8]  - Reserved, Res0;
+ *      * [7]     - IDC, Input Denormal cumulative exception;
+ *      * [6:5]   - Reserved, Res0;
+ *      * [4]     - IXC, Inexact cumulative exception;
+ *      * [3]     - UFC, Underflow cumulative exception;
+ *      * [2]     - OFC, Overflow cumulative exception;
+ *      * [1]     - DZC, Division by Zero cumulative exception;
+ *      * [0]     - IOC, Invalid Operation cumulative exception.
  */
 
 #ifndef _ARM64_FENV_H_
@@ -45,7 +74,11 @@
 
 __BEGIN_DECLS
 
-typedef __uint32_t fenv_t;
+typedef struct {
+  __uint32_t __control;     /* FPCR, Floating-point Control Register */
+  __uint32_t __status;      /* FPSR, Floating-point Status Register */
+} fenv_t;
+
 typedef __uint32_t fexcept_t;
 
 /* Exception flags. */
@@ -54,11 +87,9 @@
 #define FE_OVERFLOW   0x04
 #define FE_UNDERFLOW  0x08
 #define FE_INEXACT    0x10
+#define FE_DENORMAL   0x80
 #define FE_ALL_EXCEPT (FE_DIVBYZERO | FE_INEXACT | FE_INVALID | \
-                       FE_OVERFLOW | FE_UNDERFLOW)
-
-#define _FPSCR_ENABLE_SHIFT 8
-#define _FPSCR_ENABLE_MASK  (FE_ALL_EXCEPT << _FPSCR_ENABLE_SHIFT)
+                       FE_OVERFLOW | FE_UNDERFLOW | FE_DENORMAL)
 
 /* Rounding modes. */
 #define FE_TONEAREST  0x0
@@ -66,56 +97,6 @@
 #define FE_DOWNWARD   0x2
 #define FE_TOWARDZERO 0x3
 
-#define _FPSCR_RMODE_SHIFT 22
-
-#define FPCR_IOE    (1 << 8)
-#define FPCR_DZE    (1 << 9)
-#define FPCR_OFE    (1 << 10)
-#define FPCR_UFE    (1 << 11)
-#define FPCR_IXE    (1 << 12)
-#define FPCR_IDE    (1 << 15)
-#define FPCR_LEN    (7 << 16)
-#define FPCR_STRIDE (3 << 20)
-#define FPCR_RMODE  (3 << 22)
-#define FPCR_FZ     (1 << 24)
-#define FPCR_DN     (1 << 25)
-#define FPCR_AHP    (1 << 26)
-#define FPCR_MASK   (FPCR_IOE | \
-                     FPCR_DZE | \
-                     FPCR_OFE | \
-                     FPCR_UFE | \
-                     FPCR_IXE | \
-                     FPCR_IDE | \
-                     FPCR_LEN | \
-                     FPCR_STRIDE | \
-                     FPCR_RMODE | \
-                     FPCR_FZ | \
-                     FPCR_DN | \
-                     FPCR_AHP )
-
-#define FPSR_IOC    (1 << 0)
-#define FPSR_DZC    (1 << 1)
-#define FPSR_OFC    (1 << 2)
-#define FPSR_UFC    (1 << 3)
-#define FPSR_IXC    (1 << 4)
-#define FPSR_IDC    (1 << 7)
-#define FPSR_QC     (1 << 27)
-#define FPSR_V      (1 << 28)
-#define FPSR_C      (1 << 29)
-#define FPSR_Z      (1 << 30)
-#define FPSR_N      (1 << 31)
-#define FPSR_MASK   (FPSR_IOC | \
-                     FPSR_DZC | \
-                     FPSR_OFC | \
-                     FPSR_UFC | \
-                     FPSR_IXC | \
-                     FPSR_IDC | \
-                     FPSR_QC | \
-                     FPSR_V | \
-                     FPSR_C | \
-                     FPSR_Z | \
-                     FPSR_N )
-
 __END_DECLS
 
 #endif /* !_ARM64_FENV_H_ */