Add trivial assembler implementations of sqrt()/sqrtf().

10x faster in the usual bionic benchmarks.

Test: treehugger
Change-Id: If121cb1ed1ff0756310f34cbaa2c14e28484f1b8
diff --git a/libm/Android.bp b/libm/Android.bp
index effd7e9..62da017 100644
--- a/libm/Android.bp
+++ b/libm/Android.bp
@@ -347,6 +347,12 @@
         riscv64: {
             srcs: [
                 "riscv64/fenv.c",
+                "riscv64/sqrt.S",
+            ],
+
+            exclude_srcs: [
+                "upstream-freebsd/lib/msun/src/e_sqrt.c",
+                "upstream-freebsd/lib/msun/src/e_sqrtf.c",
             ],
             version_script: ":libm.riscv64.map",
         },