Add the optimized implementation of 18 math functions for x86 and x86_64 respectively

Change-Id: I31bf601448a9427f825517f3a0ff24de47f49bfa
Signed-off-by: Jingwei Zhang <jingwei.zhang@intel.com>
Signed-off-by: Mingwei Shi <mingwei.shi@intel.com>
diff --git a/libm/x86_64/e_exp.S b/libm/x86_64/e_exp.S
new file mode 100644
index 0000000..6882dfc
--- /dev/null
+++ b/libm/x86_64/e_exp.S
@@ -0,0 +1,636 @@
+/*
+Copyright (c) 2014, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/******************************************************************************/
+//                     ALGORITHM DESCRIPTION
+//                     ---------------------
+//
+// Description:
+//  Let K = 64 (table size).
+//        x    x/log(2)     n
+//       e  = 2          = 2 * T[j] * (1 + P(y))
+//  where
+//       x = m*log(2)/K + y,    y in [-log(2)/K..log(2)/K]
+//       m = n*K + j,           m,n,j - signed integer, j in [-K/2..K/2]
+//                  j/K
+//       values of 2   are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
+//
+//       P(y) is a minimax polynomial approximation of exp(x)-1
+//       on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
+//
+//  To avoid problems with arithmetic overflow and underflow,
+//            n                        n1  n2
+//  value of 2  is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
+//  where BIAS is a value of exponent bias.
+//
+// Special cases:
+//  exp(NaN) = NaN
+//  exp(+INF) = +INF
+//  exp(-INF) = 0
+//  exp(x) = 1 for subnormals
+//  for finite argument, only exp(0)=1 is exact
+//  For IEEE double
+//    if x >  709.782712893383973096 then exp(x) overflow
+//    if x < -745.133219101941108420 then exp(x) underflow
+//
+/******************************************************************************/
+
+#include <private/bionic_asm.h>
+# -- Begin  exp
+ENTRY(exp)
+# parameter 1: %xmm0
+..B1.1:
+..___tag_value_exp.1:
+        subq      $24, %rsp
+..___tag_value_exp.3:
+        movsd     %xmm0, 8(%rsp)
+..B1.2:
+        unpcklpd  %xmm0, %xmm0
+        movapd    cv(%rip), %xmm1
+        movapd    Shifter(%rip), %xmm6
+        movapd    16+cv(%rip), %xmm2
+        movapd    32+cv(%rip), %xmm3
+        pextrw    $3, %xmm0, %eax
+        andl      $32767, %eax
+        movl      $16527, %edx
+        subl      %eax, %edx
+        subl      $15504, %eax
+        orl       %eax, %edx
+        cmpl      $-2147483648, %edx
+        jae       .L_2TAG_PACKET_0.0.2
+        mulpd     %xmm0, %xmm1
+        addpd     %xmm6, %xmm1
+        movapd    %xmm1, %xmm7
+        subpd     %xmm6, %xmm1
+        mulpd     %xmm1, %xmm2
+        movapd    64+cv(%rip), %xmm4
+        mulpd     %xmm1, %xmm3
+        movapd    80+cv(%rip), %xmm5
+        subpd     %xmm2, %xmm0
+        movd      %xmm7, %eax
+        movl      %eax, %ecx
+        andl      $63, %ecx
+        shll      $4, %ecx
+        sarl      $6, %eax
+        movl      %eax, %edx
+        movdqa    mmask(%rip), %xmm6
+        pand      %xmm6, %xmm7
+        movdqa    bias(%rip), %xmm6
+        paddq     %xmm6, %xmm7
+        psllq     $46, %xmm7
+        subpd     %xmm3, %xmm0
+        lea       Tbl_addr(%rip), %r8
+        movapd    (%rcx,%r8), %xmm2
+        mulpd     %xmm0, %xmm4
+        movapd    %xmm0, %xmm6
+        movapd    %xmm0, %xmm1
+        mulpd     %xmm6, %xmm6
+        mulpd     %xmm6, %xmm0
+        addpd     %xmm4, %xmm5
+        mulsd     %xmm6, %xmm0
+        mulpd     48+cv(%rip), %xmm6
+        addsd     %xmm2, %xmm1
+        unpckhpd  %xmm2, %xmm2
+        mulpd     %xmm5, %xmm0
+        addsd     %xmm0, %xmm1
+        orpd      %xmm7, %xmm2
+        unpckhpd  %xmm0, %xmm0
+        addsd     %xmm1, %xmm0
+        addsd     %xmm6, %xmm0
+        addl      $894, %edx
+        cmpl      $1916, %edx
+        ja        .L_2TAG_PACKET_1.0.2
+        mulsd     %xmm2, %xmm0
+        addsd     %xmm2, %xmm0
+        jmp       ..B1.5
+.L_2TAG_PACKET_1.0.2:
+        xorpd     %xmm3, %xmm3
+        movapd    ALLONES(%rip), %xmm4
+        movl      $-1022, %edx
+        subl      %eax, %edx
+        movd      %edx, %xmm5
+        psllq     %xmm5, %xmm4
+        movl      %eax, %ecx
+        sarl      $1, %eax
+        pinsrw    $3, %eax, %xmm3
+        movapd    ebias(%rip), %xmm6
+        psllq     $4, %xmm3
+        psubd     %xmm3, %xmm2
+        mulsd     %xmm2, %xmm0
+        cmpl      $52, %edx
+        jg        .L_2TAG_PACKET_2.0.2
+        andpd     %xmm2, %xmm4
+        paddd     %xmm6, %xmm3
+        subsd     %xmm4, %xmm2
+        addsd     %xmm2, %xmm0
+        cmpl      $1023, %ecx
+        jge       .L_2TAG_PACKET_3.0.2
+        pextrw    $3, %xmm0, %ecx
+        andl      $32768, %ecx
+        orl       %ecx, %edx
+        cmpl      $0, %edx
+        je        .L_2TAG_PACKET_4.0.2
+        movapd    %xmm0, %xmm6
+        addsd     %xmm4, %xmm0
+        mulsd     %xmm3, %xmm0
+        pextrw    $3, %xmm0, %ecx
+        andl      $32752, %ecx
+        cmpl      $0, %ecx
+        je        .L_2TAG_PACKET_5.0.2
+        jmp       ..B1.5
+.L_2TAG_PACKET_5.0.2:
+        mulsd     %xmm3, %xmm6
+        mulsd     %xmm3, %xmm4
+        movq      %xmm6, %xmm0
+        pxor      %xmm4, %xmm6
+        psrad     $31, %xmm6
+        pshufd    $85, %xmm6, %xmm6
+        psllq     $1, %xmm0
+        psrlq     $1, %xmm0
+        pxor      %xmm6, %xmm0
+        psrlq     $63, %xmm6
+        paddq     %xmm6, %xmm0
+        paddq     %xmm4, %xmm0
+        movl      $15, (%rsp)
+        jmp       .L_2TAG_PACKET_6.0.2
+.L_2TAG_PACKET_4.0.2:
+        addsd     %xmm4, %xmm0
+        mulsd     %xmm3, %xmm0
+        jmp       ..B1.5
+.L_2TAG_PACKET_3.0.2:
+        addsd     %xmm4, %xmm0
+        mulsd     %xmm3, %xmm0
+        pextrw    $3, %xmm0, %ecx
+        andl      $32752, %ecx
+        cmpl      $32752, %ecx
+        jnb       .L_2TAG_PACKET_7.0.2
+        jmp       ..B1.5
+.L_2TAG_PACKET_2.0.2:
+        paddd     %xmm6, %xmm3
+        addpd     %xmm2, %xmm0
+        mulsd     %xmm3, %xmm0
+        movl      $15, (%rsp)
+        jmp       .L_2TAG_PACKET_6.0.2
+.L_2TAG_PACKET_8.0.2:
+        cmpl      $2146435072, %eax
+        jae       .L_2TAG_PACKET_9.0.2
+        movl      12(%rsp), %eax
+        cmpl      $-2147483648, %eax
+        jae       .L_2TAG_PACKET_10.0.2
+        movsd     XMAX(%rip), %xmm0
+        mulsd     %xmm0, %xmm0
+.L_2TAG_PACKET_7.0.2:
+        movl      $14, (%rsp)
+        jmp       .L_2TAG_PACKET_6.0.2
+.L_2TAG_PACKET_10.0.2:
+        movsd     XMIN(%rip), %xmm0
+        mulsd     %xmm0, %xmm0
+        movl      $15, (%rsp)
+        jmp       .L_2TAG_PACKET_6.0.2
+.L_2TAG_PACKET_9.0.2:
+        movl      8(%rsp), %edx
+        cmpl      $2146435072, %eax
+        ja        .L_2TAG_PACKET_11.0.2
+        cmpl      $0, %edx
+        jne       .L_2TAG_PACKET_11.0.2
+        movl      12(%rsp), %eax
+        cmpl      $2146435072, %eax
+        jne       .L_2TAG_PACKET_12.0.2
+        movsd     INF(%rip), %xmm0
+        jmp       ..B1.5
+.L_2TAG_PACKET_12.0.2:
+        movsd     ZERO(%rip), %xmm0
+        jmp       ..B1.5
+.L_2TAG_PACKET_11.0.2:
+        movsd     8(%rsp), %xmm0
+        addsd     %xmm0, %xmm0
+        jmp       ..B1.5
+.L_2TAG_PACKET_0.0.2:
+        movl      12(%rsp), %eax
+        andl      $2147483647, %eax
+        cmpl      $1083179008, %eax
+        jae       .L_2TAG_PACKET_8.0.2
+        movsd     8(%rsp), %xmm0
+        addsd     ONE_val(%rip), %xmm0
+        jmp       ..B1.5
+.L_2TAG_PACKET_6.0.2:
+        movq      %xmm0, 16(%rsp)
+..B1.3:
+        movq      16(%rsp), %xmm0
+.L_2TAG_PACKET_13.0.2:
+..B1.5:
+        addq      $24, %rsp
+..___tag_value_exp.4:
+        ret       
+..___tag_value_exp.5:
+END(exp)
+# -- End  exp
+	.section .rodata, "a"
+	.align 16
+	.align 16
+cv:
+	.long	1697350398
+	.long	1079448903
+	.long	1697350398
+	.long	1079448903
+	.long	4277796864
+	.long	1065758274
+	.long	4277796864
+	.long	1065758274
+	.long	3164486458
+	.long	1025308570
+	.long	3164486458
+	.long	1025308570
+	.long	4294967294
+	.long	1071644671
+	.long	4294967294
+	.long	1071644671
+	.long	3811088480
+	.long	1062650204
+	.long	1432067621
+	.long	1067799893
+	.long	3230715663
+	.long	1065423125
+	.long	1431604129
+	.long	1069897045
+	.type	cv,@object
+	.size	cv,96
+	.align 16
+Shifter:
+	.long	0
+	.long	1127743488
+	.long	0
+	.long	1127743488
+	.type	Shifter,@object
+	.size	Shifter,16
+	.align 16
+mmask:
+	.long	4294967232
+	.long	0
+	.long	4294967232
+	.long	0
+	.type	mmask,@object
+	.size	mmask,16
+	.align 16
+bias:
+	.long	65472
+	.long	0
+	.long	65472
+	.long	0
+	.type	bias,@object
+	.size	bias,16
+	.align 16
+Tbl_addr:
+	.long	0
+	.long	0
+	.long	0
+	.long	0
+	.long	235107661
+	.long	1018002367
+	.long	1048019040
+	.long	11418
+	.long	896005651
+	.long	1015861842
+	.long	3541402996
+	.long	22960
+	.long	1642514529
+	.long	1012987726
+	.long	410360776
+	.long	34629
+	.long	1568897900
+	.long	1016568486
+	.long	1828292879
+	.long	46424
+	.long	1882168529
+	.long	1010744893
+	.long	852742562
+	.long	58348
+	.long	509852888
+	.long	1017336174
+	.long	3490863952
+	.long	70401
+	.long	653277307
+	.long	1017431380
+	.long	2930322911
+	.long	82586
+	.long	1649557430
+	.long	1017729363
+	.long	1014845818
+	.long	94904
+	.long	1058231231
+	.long	1015777676
+	.long	3949972341
+	.long	107355
+	.long	1044000607
+	.long	1016786167
+	.long	828946858
+	.long	119943
+	.long	1151779725
+	.long	1015705409
+	.long	2288159958
+	.long	132667
+	.long	3819481236
+	.long	1016499965
+	.long	1853186616
+	.long	145530
+	.long	2552227826
+	.long	1015039787
+	.long	1709341917
+	.long	158533
+	.long	1829350193
+	.long	1015216097
+	.long	4112506593
+	.long	171677
+	.long	1913391795
+	.long	1015756674
+	.long	2799960843
+	.long	184965
+	.long	1303423926
+	.long	1015238005
+	.long	171030293
+	.long	198398
+	.long	1574172746
+	.long	1016061241
+	.long	2992903935
+	.long	211976
+	.long	3424156969
+	.long	1017196428
+	.long	926591434
+	.long	225703
+	.long	1938513547
+	.long	1017631273
+	.long	887463926
+	.long	239579
+	.long	2804567149
+	.long	1015390024
+	.long	1276261410
+	.long	253606
+	.long	631083525
+	.long	1017690182
+	.long	569847337
+	.long	267786
+	.long	1623370770
+	.long	1011049453
+	.long	1617004845
+	.long	282120
+	.long	3667985273
+	.long	1013894369
+	.long	3049340112
+	.long	296610
+	.long	3145379760
+	.long	1014403278
+	.long	3577096743
+	.long	311258
+	.long	2603100681
+	.long	1017152460
+	.long	1990012070
+	.long	326066
+	.long	3249202951
+	.long	1017448880
+	.long	1453150081
+	.long	341035
+	.long	419288974
+	.long	1016280325
+	.long	917841882
+	.long	356167
+	.long	3793507337
+	.long	1016095713
+	.long	3712504873
+	.long	371463
+	.long	728023093
+	.long	1016345318
+	.long	363667784
+	.long	386927
+	.long	2582678538
+	.long	1017123460
+	.long	2956612996
+	.long	402558
+	.long	7592966
+	.long	1016721543
+	.long	2186617380
+	.long	418360
+	.long	228611441
+	.long	1016696141
+	.long	1719614412
+	.long	434334
+	.long	2261665670
+	.long	1017457593
+	.long	1013258798
+	.long	450482
+	.long	544148907
+	.long	1017323666
+	.long	3907805043
+	.long	466805
+	.long	2383914918
+	.long	1017143586
+	.long	1447192520
+	.long	483307
+	.long	1176412038
+	.long	1017267372
+	.long	1944781190
+	.long	499988
+	.long	2882956373
+	.long	1013312481
+	.long	919555682
+	.long	516851
+	.long	3154077648
+	.long	1016528543
+	.long	2571947538
+	.long	533897
+	.long	348651999
+	.long	1016405780
+	.long	2604962540
+	.long	551129
+	.long	3253791412
+	.long	1015920431
+	.long	1110089947
+	.long	568549
+	.long	1509121860
+	.long	1014756995
+	.long	2568320822
+	.long	586158
+	.long	2617649212
+	.long	1017340090
+	.long	2966275556
+	.long	603959
+	.long	553214634
+	.long	1016457425
+	.long	2682146383
+	.long	621954
+	.long	730975783
+	.long	1014083580
+	.long	2191782032
+	.long	640145
+	.long	1486499517
+	.long	1016818996
+	.long	2069751140
+	.long	658534
+	.long	2595788928
+	.long	1016407932
+	.long	2990417244
+	.long	677123
+	.long	1853053619
+	.long	1015310724
+	.long	1434058175
+	.long	695915
+	.long	2462790535
+	.long	1015814775
+	.long	2572866477
+	.long	714911
+	.long	3693944214
+	.long	1017259110
+	.long	3092190714
+	.long	734114
+	.long	2979333550
+	.long	1017188654
+	.long	4076559942
+	.long	753526
+	.long	174054861
+	.long	1014300631
+	.long	2420883922
+	.long	773150
+	.long	816778419
+	.long	1014197934
+	.long	3716502172
+	.long	792987
+	.long	3507050924
+	.long	1015341199
+	.long	777507147
+	.long	813041
+	.long	1821514088
+	.long	1013410604
+	.long	3706687593
+	.long	833312
+	.long	920623539
+	.long	1016295433
+	.long	1242007931
+	.long	853805
+	.long	2789017511
+	.long	1014276997
+	.long	3707479175
+	.long	874520
+	.long	3586233004
+	.long	1015962192
+	.long	64696965
+	.long	895462
+	.long	474650514
+	.long	1016642419
+	.long	863738718
+	.long	916631
+	.long	1614448851
+	.long	1014281732
+	.long	3884662774
+	.long	938030
+	.long	2450082086
+	.long	1016164135
+	.long	2728693977
+	.long	959663
+	.long	1101668360
+	.long	1015989180
+	.long	3999357479
+	.long	981531
+	.long	835814894
+	.long	1015702697
+	.long	1533953344
+	.long	1003638
+	.long	1301400989
+	.long	1014466875
+	.long	2174652632
+	.long	1025985
+	.type	Tbl_addr,@object
+	.size	Tbl_addr,1024
+	.align 16
+ALLONES:
+	.long	4294967295
+	.long	4294967295
+	.long	4294967295
+	.long	4294967295
+	.type	ALLONES,@object
+	.size	ALLONES,16
+	.align 16
+ebias:
+	.long	0
+	.long	1072693248
+	.long	0
+	.long	1072693248
+	.type	ebias,@object
+	.size	ebias,16
+	.align 4
+XMAX:
+	.long	4294967295
+	.long	2146435071
+	.type	XMAX,@object
+	.size	XMAX,8
+	.align 4
+XMIN:
+	.long	0
+	.long	1048576
+	.type	XMIN,@object
+	.size	XMIN,8
+	.align 4
+INF:
+	.long	0
+	.long	2146435072
+	.type	INF,@object
+	.size	INF,8
+	.align 4
+ZERO:
+	.long	0
+	.long	0
+	.type	ZERO,@object
+	.size	ZERO,8
+	.align 4
+ONE_val:
+	.long	0
+	.long	1072693248
+	.type	ONE_val,@object
+	.size	ONE_val,8
+	.data
+	.section .note.GNU-stack, ""
+// -- Begin DWARF2 SEGMENT .eh_frame
+	.section .eh_frame,"a",@progbits
+.eh_frame_seg:
+	.align 1
+	.4byte 0x00000014
+	.8byte 0x00527a0100000000
+	.8byte 0x08070c1b01107801
+	.4byte 0x00000190
+	.4byte 0x0000001c
+	.4byte 0x0000001c
+	.4byte ..___tag_value_exp.1-.
+	.4byte ..___tag_value_exp.5-..___tag_value_exp.1
+	.2byte 0x0400
+	.4byte ..___tag_value_exp.3-..___tag_value_exp.1
+	.2byte 0x200e
+	.byte 0x04
+	.4byte ..___tag_value_exp.4-..___tag_value_exp.3
+	.2byte 0x080e
+	.byte 0x00
+# End