| Jingwei Zhang | 5d4f0e6 | 2014-10-31 18:29:18 +0800 | [diff] [blame] | 1 | /* | 
|  | 2 | Copyright (c) 2014, Intel Corporation | 
|  | 3 | All rights reserved. | 
|  | 4 |  | 
|  | 5 | Redistribution and use in source and binary forms, with or without | 
|  | 6 | modification, are permitted provided that the following conditions are met: | 
|  | 7 |  | 
|  | 8 | * Redistributions of source code must retain the above copyright notice, | 
|  | 9 | * this list of conditions and the following disclaimer. | 
|  | 10 |  | 
|  | 11 | * Redistributions in binary form must reproduce the above copyright notice, | 
|  | 12 | * this list of conditions and the following disclaimer in the documentation | 
|  | 13 | * and/or other materials provided with the distribution. | 
|  | 14 |  | 
|  | 15 | * Neither the name of Intel Corporation nor the names of its contributors | 
|  | 16 | * may be used to endorse or promote products derived from this software | 
|  | 17 | * without specific prior written permission. | 
|  | 18 |  | 
|  | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | 
|  | 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | 
|  | 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | 
|  | 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | 
|  | 23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | 
|  | 24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | 
|  | 25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | 
|  | 26 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
|  | 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | 
|  | 28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
|  | 29 | */ | 
|  | 30 |  | 
|  | 31 | /******************************************************************************/ | 
|  | 32 | //                     ALGORITHM DESCRIPTION | 
|  | 33 | //                     --------------------- | 
|  | 34 | // | 
|  | 35 | // X87 version: | 
|  | 36 | // Use 80-bit FPU precision fmul, fsqrt to compute square and sqrt. | 
|  | 37 | // | 
|  | 38 | // SSE version: | 
|  | 39 | // Swap x, y if |x|<|y| | 
|  | 40 | // For x=2^k*x, get y=y*2^(-k) | 
|  | 41 | // Get S ~ sqrt(x^2+y^2)  (leading 1 + leading 25 mantissa bits) | 
|  | 42 | // | 
|  | 43 | // Get D = ( RN(x^2+y^2) - S^2 ) + ( x^2 - RN(x^2) ) + | 
|  | 44 | //                               + ( y^2 - ((RN(x^2+y^2)-RN(x^2)) ) | 
|  | 45 | // | 
|  | 46 | // Result is 2^k*(S + Se),  where Se = S*e | 
|  | 47 | //        S*e is approximated as (D/2S)*( 1 - (D/2S)^2*1.0/S ) | 
|  | 48 | // | 
|  | 49 | // Return 2^k*(S+Se) | 
|  | 50 | // | 
|  | 51 | // For |y/x|<2^(-64), return x | 
|  | 52 | // | 
|  | 53 | // For cases where maximum biased exponent is either greater than 7fdh or | 
|  | 54 | // below 32, take a special path to check for special cases (0, NaN, Inf), | 
|  | 55 | // possible overflow, and more accurate computation for denormal results | 
|  | 56 | // | 
|  | 57 | // Special cases: | 
|  | 58 | //  hypot(x,y), hypot(y,x), and hypot(x,-y) are equivalent | 
|  | 59 | //  hypot(x,+-0) is equivalent to fabs(x) | 
|  | 60 | //  hypot(x,y) = y if (x==NaN or x==INF) and y==INF | 
|  | 61 | //  hypot(x,y) = x if (x==NaN or x==INF) and y!=INF (even if y==NaN!) | 
|  | 62 | //  hypot(x,y) = y if (x!=NaN and x!=INF) and (y==NaN or y==INF) | 
|  | 63 | // | 
|  | 64 | /******************************************************************************/ | 
|  | 65 |  | 
|  | 66 | #include <private/bionic_asm.h> | 
|  | 67 | # -- Begin  static_func | 
|  | 68 | .text | 
|  | 69 | .align __bionic_asm_align | 
|  | 70 | .type static_func, @function | 
|  | 71 | static_func: | 
|  | 72 | ..B1.1: | 
|  | 73 | call      ..L2 | 
|  | 74 | ..L2: | 
|  | 75 | popl      %eax | 
|  | 76 | lea       _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax | 
|  | 77 | lea       static_const_table@GOTOFF(%eax), %eax | 
|  | 78 | ret | 
|  | 79 | .size   static_func,.-static_func | 
|  | 80 | # -- End  static_func | 
|  | 81 |  | 
|  | 82 | # -- Begin  hypot | 
|  | 83 | ENTRY(hypot) | 
|  | 84 | # parameter 1: 8 + %ebp | 
|  | 85 | # parameter 2: 16 + %ebp | 
|  | 86 | ..B2.1: | 
|  | 87 | ..B2.2: | 
|  | 88 | pushl     %ebp | 
|  | 89 | movl      %esp, %ebp | 
|  | 90 | subl      $152, %esp | 
|  | 91 | movl      %ebx, 96(%esp) | 
|  | 92 | call      static_func | 
|  | 93 | movl      %eax, %ebx | 
|  | 94 | movapd    (%ebx), %xmm3 | 
|  | 95 | movsd     160(%esp), %xmm0 | 
|  | 96 | movsd     168(%esp), %xmm1 | 
|  | 97 | andpd     %xmm3, %xmm0 | 
|  | 98 | andpd     %xmm3, %xmm1 | 
|  | 99 | pextrw    $3, %xmm0, %eax | 
|  | 100 | pextrw    $3, %xmm1, %edx | 
|  | 101 | cmpl      $24528, %eax | 
|  | 102 | ja        .L_2TAG_PACKET_0.0.2 | 
|  | 103 | cmpl      $24528, %edx | 
|  | 104 | ja        .L_2TAG_PACKET_0.0.2 | 
|  | 105 | .L_2TAG_PACKET_1.0.2: | 
|  | 106 | fldl      160(%esp) | 
|  | 107 | fldl      168(%esp) | 
|  | 108 | fxch      %st(1) | 
|  | 109 | fmul      %st(0), %st | 
|  | 110 | fxch      %st(1) | 
|  | 111 | nop | 
|  | 112 | fmul      %st(0), %st | 
|  | 113 | faddp     %st, %st(1) | 
|  | 114 | fsqrt | 
|  | 115 | jmp       .L_2TAG_PACKET_2.0.2 | 
|  | 116 | .L_2TAG_PACKET_0.0.2: | 
|  | 117 | cmpl      $32752, %eax | 
|  | 118 | movl      %eax, %ecx | 
|  | 119 | jae       .L_2TAG_PACKET_3.0.2 | 
|  | 120 | subl      %edx, %ecx | 
|  | 121 | cmpl      $32752, %edx | 
|  | 122 | jae       .L_2TAG_PACKET_3.0.2 | 
|  | 123 | addl      $928, %ecx | 
|  | 124 | addl      %edx, %eax | 
|  | 125 | cmpl      $1856, %ecx | 
|  | 126 | ja        .L_2TAG_PACKET_4.0.2 | 
|  | 127 | cmpl      $49056, %eax | 
|  | 128 | jb        .L_2TAG_PACKET_1.0.2 | 
|  | 129 | fldl      160(%esp) | 
|  | 130 | fldl      168(%esp) | 
|  | 131 | fxch      %st(1) | 
|  | 132 | fmul      %st(0), %st | 
|  | 133 | fxch      %st(1) | 
|  | 134 | nop | 
|  | 135 | fmul      %st(0), %st | 
|  | 136 | faddp     %st, %st(1) | 
|  | 137 | fsqrt | 
|  | 138 | .L_2TAG_PACKET_5.0.2: | 
|  | 139 | fstl      (%esp) | 
|  | 140 | fstpt     16(%esp) | 
|  | 141 | xorl      %eax, %eax | 
|  | 142 | movw      24(%esp), %ax | 
|  | 143 | cmpl      $17407, %eax | 
|  | 144 | jae       .L_2TAG_PACKET_6.0.2 | 
|  | 145 | fldl      (%esp) | 
|  | 146 | jmp       .L_2TAG_PACKET_7.0.2 | 
|  | 147 | .L_2TAG_PACKET_4.0.2: | 
|  | 148 | movsd     %xmm0, 32(%esp) | 
|  | 149 | movsd     %xmm1, 40(%esp) | 
|  | 150 | fldl      32(%esp) | 
|  | 151 | faddl     40(%esp) | 
|  | 152 | jmp       .L_2TAG_PACKET_5.0.2 | 
|  | 153 | .L_2TAG_PACKET_6.0.2: | 
|  | 154 | movl      $46, %edx | 
|  | 155 | .L_2TAG_PACKET_8.0.2: | 
|  | 156 | movsd     160(%esp), %xmm0 | 
|  | 157 | movsd     168(%esp), %xmm1 | 
|  | 158 | fldl      (%esp) | 
|  | 159 | jmp       .L_2TAG_PACKET_7.0.2 | 
|  | 160 | .L_2TAG_PACKET_3.0.2: | 
|  | 161 | shufpd    $0, %xmm1, %xmm0 | 
|  | 162 | movdqa    %xmm0, %xmm2 | 
|  | 163 | movdqa    16(%ebx), %xmm3 | 
|  | 164 | movsd     %xmm0, 32(%esp) | 
|  | 165 | movsd     %xmm1, 40(%esp) | 
|  | 166 | cmppd     $3, %xmm0, %xmm2 | 
|  | 167 | cmppd     $0, %xmm0, %xmm3 | 
|  | 168 | movmskpd  %xmm2, %edx | 
|  | 169 | movmskpd  %xmm3, %eax | 
|  | 170 | testl     %edx, %edx | 
|  | 171 | je        .L_2TAG_PACKET_9.0.2 | 
|  | 172 | fldl      32(%esp) | 
|  | 173 | fmull     40(%esp) | 
|  | 174 | testl     $1, %eax | 
|  | 175 | jne       .L_2TAG_PACKET_10.0.2 | 
|  | 176 | testl     $2, %eax | 
|  | 177 | jne       .L_2TAG_PACKET_11.0.2 | 
|  | 178 | jmp       .L_2TAG_PACKET_2.0.2 | 
|  | 179 | .L_2TAG_PACKET_9.0.2: | 
|  | 180 | fldl      32(%esp) | 
|  | 181 | faddl     40(%esp) | 
|  | 182 | jmp       .L_2TAG_PACKET_2.0.2 | 
|  | 183 | .L_2TAG_PACKET_10.0.2: | 
|  | 184 | fstpl     40(%esp) | 
|  | 185 | fldl      32(%esp) | 
|  | 186 | jmp       .L_2TAG_PACKET_7.0.2 | 
|  | 187 | .L_2TAG_PACKET_11.0.2: | 
|  | 188 | fstpl     32(%esp) | 
|  | 189 | fldl      40(%esp) | 
|  | 190 | jmp       .L_2TAG_PACKET_7.0.2 | 
|  | 191 | .L_2TAG_PACKET_2.0.2: | 
|  | 192 | .L_2TAG_PACKET_7.0.2: | 
|  | 193 | movl      96(%esp), %ebx | 
|  | 194 | movl      %ebp, %esp | 
|  | 195 | popl      %ebp | 
|  | 196 | ret | 
|  | 197 | ..B2.3: | 
|  | 198 | END(hypot) | 
|  | 199 | # -- End  hypot | 
|  | 200 |  | 
|  | 201 | # Start file scope ASM | 
| Christopher Ferris | 995b813 | 2015-03-13 17:43:52 -0700 | [diff] [blame] | 202 | ALIAS_SYMBOL(hypotl, hypot); | 
| Jingwei Zhang | 5d4f0e6 | 2014-10-31 18:29:18 +0800 | [diff] [blame] | 203 | # End file scope ASM | 
|  | 204 | .section .rodata, "a" | 
|  | 205 | .align 16 | 
|  | 206 | .align 16 | 
|  | 207 | static_const_table: | 
|  | 208 | .long	4294967295 | 
|  | 209 | .long	2147483647 | 
|  | 210 | .long	4294967295 | 
|  | 211 | .long	2147483647 | 
|  | 212 | .long	0 | 
|  | 213 | .long	2146435072 | 
|  | 214 | .long	0 | 
|  | 215 | .long	2146435072 | 
|  | 216 | .type	static_const_table,@object | 
|  | 217 | .size	static_const_table,32 | 
|  | 218 | .data | 
|  | 219 | .section .note.GNU-stack, "" | 
|  | 220 | # End |