| Varvara Rainchik | 5a92284 | 2014-04-24 15:41:20 +0400 | [diff] [blame] | 1 | /* | 
 | 2 | Copyright (c) 2014, Intel Corporation | 
 | 3 | All rights reserved. | 
 | 4 |  | 
 | 5 | Redistribution and use in source and binary forms, with or without | 
 | 6 | modification, are permitted provided that the following conditions are met: | 
 | 7 |  | 
 | 8 |     * Redistributions of source code must retain the above copyright notice, | 
 | 9 |     * this list of conditions and the following disclaimer. | 
 | 10 |  | 
 | 11 |     * Redistributions in binary form must reproduce the above copyright notice, | 
 | 12 |     * this list of conditions and the following disclaimer in the documentation | 
 | 13 |     * and/or other materials provided with the distribution. | 
 | 14 |  | 
 | 15 |     * Neither the name of Intel Corporation nor the names of its contributors | 
 | 16 |     * may be used to endorse or promote products derived from this software | 
 | 17 |     * without specific prior written permission. | 
 | 18 |  | 
 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | 
 | 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | 
 | 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | 
 | 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | 
 | 23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | 
 | 24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | 
 | 25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | 
 | 26 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
 | 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | 
 | 28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
 | 29 | */ | 
 | 30 |  | 
 | 31 | #ifndef STRLEN | 
| Haibo Huang | b9244ff | 2018-08-11 10:12:13 -0700 | [diff] [blame] | 32 | # define STRLEN strlen_generic | 
| Varvara Rainchik | 5a92284 | 2014-04-24 15:41:20 +0400 | [diff] [blame] | 33 | #endif | 
 | 34 |  | 
 | 35 | #ifndef L | 
 | 36 | # define L(label)	.L##label | 
 | 37 | #endif | 
 | 38 |  | 
 | 39 | #ifndef cfi_startproc | 
 | 40 | # define cfi_startproc	.cfi_startproc | 
 | 41 | #endif | 
 | 42 |  | 
 | 43 | #ifndef cfi_endproc | 
 | 44 | # define cfi_endproc	.cfi_endproc | 
 | 45 | #endif | 
 | 46 |  | 
 | 47 | #ifndef cfi_rel_offset | 
 | 48 | # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off | 
 | 49 | #endif | 
 | 50 |  | 
 | 51 | #ifndef cfi_restore | 
 | 52 | # define cfi_restore(reg)	.cfi_restore reg | 
 | 53 | #endif | 
 | 54 |  | 
 | 55 | #ifndef cfi_adjust_cfa_offset | 
 | 56 | # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off | 
 | 57 | #endif | 
 | 58 |  | 
 | 59 | #ifndef ENTRY | 
 | 60 | # define ENTRY(name)             \ | 
 | 61 | 	.type name,  @function;  \ | 
 | 62 | 	.globl name;             \ | 
 | 63 | 	.p2align 4;              \ | 
 | 64 | name:                            \ | 
 | 65 | 	cfi_startproc | 
 | 66 | #endif | 
 | 67 |  | 
 | 68 | #ifndef END | 
 | 69 | # define END(name)               \ | 
 | 70 | 	cfi_endproc;             \ | 
 | 71 | 	.size name,	.-name | 
 | 72 | #endif | 
 | 73 |  | 
 | 74 | #define CFI_PUSH(REG)                   \ | 
 | 75 | 	cfi_adjust_cfa_offset (4);      \ | 
 | 76 | 	cfi_rel_offset (REG, 0) | 
 | 77 |  | 
 | 78 | #define CFI_POP(REG)                    \ | 
 | 79 | 	cfi_adjust_cfa_offset (-4);     \ | 
 | 80 | 	cfi_restore (REG) | 
 | 81 |  | 
 | 82 | #define PUSH(REG) pushl REG; CFI_PUSH (REG) | 
 | 83 | #define POP(REG) popl REG; CFI_POP (REG) | 
 | 84 |  | 
 | 85 | 	.section .text.sse2,"ax",@progbits | 
 | 86 | ENTRY (STRLEN) | 
 | 87 | 	mov	4(%esp), %edx | 
 | 88 | 	mov	%edx, %ecx | 
 | 89 | 	and	$0x3f, %ecx | 
 | 90 | 	pxor	%xmm0, %xmm0 | 
 | 91 | 	cmp	$0x30, %ecx | 
 | 92 | 	ja	L(next) | 
 | 93 | 	movdqu	(%edx), %xmm1 | 
 | 94 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 95 | 	pmovmskb %xmm0, %ecx | 
 | 96 | 	test	%ecx, %ecx | 
 | 97 | 	jnz	L(exit_less16) | 
 | 98 | 	mov	%edx, %eax | 
 | 99 | 	and	$-16, %eax | 
 | 100 | 	jmp	L(align16_start) | 
 | 101 | L(next): | 
 | 102 | 	mov	%edx, %eax | 
 | 103 | 	and	$-16, %eax | 
 | 104 | 	PUSH	(%edi) | 
 | 105 | 	pcmpeqb	(%eax), %xmm0 | 
 | 106 | 	mov	$-1, %edi | 
 | 107 | 	sub	%eax, %ecx | 
 | 108 | 	shl	%cl, %edi | 
 | 109 | 	pmovmskb %xmm0, %ecx | 
 | 110 | 	and	%edi, %ecx | 
 | 111 | 	POP	(%edi) | 
 | 112 | 	jnz	L(exit_unaligned) | 
 | 113 | 	pxor	%xmm0, %xmm0 | 
 | 114 | L(align16_start): | 
 | 115 | 	pxor	%xmm1, %xmm1 | 
 | 116 | 	pxor	%xmm2, %xmm2 | 
 | 117 | 	pxor	%xmm3, %xmm3 | 
 | 118 | 	pcmpeqb	16(%eax), %xmm0 | 
 | 119 | 	pmovmskb %xmm0, %ecx | 
 | 120 | 	test	%ecx, %ecx | 
 | 121 | 	jnz	L(exit16) | 
 | 122 |  | 
 | 123 | 	pcmpeqb	32(%eax), %xmm1 | 
 | 124 | 	pmovmskb %xmm1, %ecx | 
 | 125 | 	test	%ecx, %ecx | 
 | 126 | 	jnz	L(exit32) | 
 | 127 |  | 
 | 128 | 	pcmpeqb	48(%eax), %xmm2 | 
 | 129 | 	pmovmskb %xmm2, %ecx | 
 | 130 | 	test	%ecx, %ecx | 
 | 131 | 	jnz	L(exit48) | 
 | 132 |  | 
 | 133 | 	pcmpeqb	64(%eax), %xmm3 | 
 | 134 | 	pmovmskb %xmm3, %ecx | 
 | 135 | 	test	%ecx, %ecx | 
 | 136 | 	jnz	L(exit64) | 
 | 137 |  | 
 | 138 | 	pcmpeqb	80(%eax), %xmm0 | 
 | 139 | 	add	$64, %eax | 
 | 140 | 	pmovmskb %xmm0, %ecx | 
 | 141 | 	test	%ecx, %ecx | 
 | 142 | 	jnz	L(exit16) | 
 | 143 |  | 
 | 144 | 	pcmpeqb	32(%eax), %xmm1 | 
 | 145 | 	pmovmskb %xmm1, %ecx | 
 | 146 | 	test	%ecx, %ecx | 
 | 147 | 	jnz	L(exit32) | 
 | 148 |  | 
 | 149 | 	pcmpeqb	48(%eax), %xmm2 | 
 | 150 | 	pmovmskb %xmm2, %ecx | 
 | 151 | 	test	%ecx, %ecx | 
 | 152 | 	jnz	L(exit48) | 
 | 153 |  | 
 | 154 | 	pcmpeqb	64(%eax), %xmm3 | 
 | 155 | 	pmovmskb %xmm3, %ecx | 
 | 156 | 	test	%ecx, %ecx | 
 | 157 | 	jnz	L(exit64) | 
 | 158 |  | 
 | 159 | 	pcmpeqb	80(%eax), %xmm0 | 
 | 160 | 	add	$64, %eax | 
 | 161 | 	pmovmskb %xmm0, %ecx | 
 | 162 | 	test	%ecx, %ecx | 
 | 163 | 	jnz	L(exit16) | 
 | 164 |  | 
 | 165 | 	pcmpeqb	32(%eax), %xmm1 | 
 | 166 | 	pmovmskb %xmm1, %ecx | 
 | 167 | 	test	%ecx, %ecx | 
 | 168 | 	jnz	L(exit32) | 
 | 169 |  | 
 | 170 | 	pcmpeqb	48(%eax), %xmm2 | 
 | 171 | 	pmovmskb %xmm2, %ecx | 
 | 172 | 	test	%ecx, %ecx | 
 | 173 | 	jnz	L(exit48) | 
 | 174 |  | 
 | 175 | 	pcmpeqb	64(%eax), %xmm3 | 
 | 176 | 	pmovmskb %xmm3, %ecx | 
 | 177 | 	test	%ecx, %ecx | 
 | 178 | 	jnz	L(exit64) | 
 | 179 |  | 
 | 180 | 	pcmpeqb	80(%eax), %xmm0 | 
 | 181 | 	add	$64, %eax | 
 | 182 | 	pmovmskb %xmm0, %ecx | 
 | 183 | 	test	%ecx, %ecx | 
 | 184 | 	jnz	L(exit16) | 
 | 185 |  | 
 | 186 | 	pcmpeqb	32(%eax), %xmm1 | 
 | 187 | 	pmovmskb %xmm1, %ecx | 
 | 188 | 	test	%ecx, %ecx | 
 | 189 | 	jnz	L(exit32) | 
 | 190 |  | 
 | 191 | 	pcmpeqb	48(%eax), %xmm2 | 
 | 192 | 	pmovmskb %xmm2, %ecx | 
 | 193 | 	test	%ecx, %ecx | 
 | 194 | 	jnz	L(exit48) | 
 | 195 |  | 
 | 196 | 	pcmpeqb	64(%eax), %xmm3 | 
 | 197 | 	pmovmskb %xmm3, %ecx | 
 | 198 | 	test	%ecx, %ecx | 
 | 199 | 	jnz	L(exit64) | 
 | 200 |  | 
 | 201 |  | 
 | 202 | 	test	$0x3f, %eax | 
 | 203 | 	jz	L(align64_loop) | 
 | 204 |  | 
 | 205 | 	pcmpeqb	80(%eax), %xmm0 | 
 | 206 | 	add	$80, %eax | 
 | 207 | 	pmovmskb %xmm0, %ecx | 
 | 208 | 	test	%ecx, %ecx | 
 | 209 | 	jnz	L(exit) | 
 | 210 |  | 
 | 211 | 	test	$0x3f, %eax | 
 | 212 | 	jz	L(align64_loop) | 
 | 213 |  | 
 | 214 | 	pcmpeqb	16(%eax), %xmm1 | 
 | 215 | 	add	$16, %eax | 
 | 216 | 	pmovmskb %xmm1, %ecx | 
 | 217 | 	test	%ecx, %ecx | 
 | 218 | 	jnz	L(exit) | 
 | 219 |  | 
 | 220 | 	test	$0x3f, %eax | 
 | 221 | 	jz	L(align64_loop) | 
 | 222 |  | 
 | 223 | 	pcmpeqb	16(%eax), %xmm2 | 
 | 224 | 	add	$16, %eax | 
 | 225 | 	pmovmskb %xmm2, %ecx | 
 | 226 | 	test	%ecx, %ecx | 
 | 227 | 	jnz	L(exit) | 
 | 228 |  | 
 | 229 | 	test	$0x3f, %eax | 
 | 230 | 	jz	L(align64_loop) | 
 | 231 |  | 
 | 232 | 	pcmpeqb	16(%eax), %xmm3 | 
 | 233 | 	add	$16, %eax | 
 | 234 | 	pmovmskb %xmm3, %ecx | 
 | 235 | 	test	%ecx, %ecx | 
 | 236 | 	jnz	L(exit) | 
 | 237 |  | 
 | 238 | 	add	$16, %eax | 
 | 239 | 	.p2align 4 | 
 | 240 | L(align64_loop): | 
 | 241 | 	movaps	(%eax),	%xmm4 | 
 | 242 | 	pminub	16(%eax), 	%xmm4 | 
 | 243 | 	movaps	32(%eax), 	%xmm5 | 
 | 244 | 	pminub	48(%eax), 	%xmm5 | 
 | 245 | 	add	$64, 	%eax | 
 | 246 | 	pminub	%xmm4,	%xmm5 | 
 | 247 | 	pcmpeqb	%xmm0,	%xmm5 | 
 | 248 | 	pmovmskb %xmm5,	%ecx | 
 | 249 | 	test	%ecx,	%ecx | 
 | 250 | 	jz	L(align64_loop) | 
 | 251 |  | 
 | 252 |  | 
 | 253 | 	pcmpeqb	-64(%eax), %xmm0 | 
 | 254 | 	sub	$80, 	%eax | 
 | 255 | 	pmovmskb %xmm0, %ecx | 
 | 256 | 	test	%ecx, %ecx | 
 | 257 | 	jnz	L(exit16) | 
 | 258 |  | 
 | 259 | 	pcmpeqb	32(%eax), %xmm1 | 
 | 260 | 	pmovmskb %xmm1, %ecx | 
 | 261 | 	test	%ecx, %ecx | 
 | 262 | 	jnz	L(exit32) | 
 | 263 |  | 
 | 264 | 	pcmpeqb	48(%eax), %xmm2 | 
 | 265 | 	pmovmskb %xmm2, %ecx | 
 | 266 | 	test	%ecx, %ecx | 
 | 267 | 	jnz	L(exit48) | 
 | 268 |  | 
 | 269 | 	pcmpeqb	64(%eax), %xmm3 | 
 | 270 | 	pmovmskb %xmm3, %ecx | 
 | 271 | 	sub	%edx, %eax | 
 | 272 | 	bsf	%ecx, %ecx | 
 | 273 | 	add	%ecx, %eax | 
 | 274 | 	add	$64, %eax | 
 | 275 | 	ret | 
 | 276 |  | 
 | 277 | 	.p2align 4 | 
 | 278 | L(exit): | 
 | 279 | 	sub	%edx, %eax | 
 | 280 | 	bsf	%ecx, %ecx | 
 | 281 | 	add	%ecx, %eax | 
 | 282 | 	ret | 
 | 283 |  | 
 | 284 | L(exit_less16): | 
 | 285 | 	bsf	%ecx, %eax | 
 | 286 | 	ret | 
 | 287 |  | 
 | 288 | 	.p2align 4 | 
 | 289 | L(exit_unaligned): | 
 | 290 | 	sub	%edx, %eax | 
 | 291 | 	bsf	%ecx, %ecx | 
 | 292 | 	add	%ecx, %eax | 
 | 293 | 	ret | 
 | 294 |  | 
 | 295 | 	.p2align 4 | 
 | 296 | L(exit16): | 
 | 297 | 	sub	%edx, %eax | 
 | 298 | 	bsf	%ecx, %ecx | 
 | 299 | 	add	%ecx, %eax | 
 | 300 | 	add	$16, %eax | 
 | 301 | 	ret | 
 | 302 |  | 
 | 303 | 	.p2align 4 | 
 | 304 | L(exit32): | 
 | 305 | 	sub	%edx, %eax | 
 | 306 | 	bsf	%ecx, %ecx | 
 | 307 | 	add	%ecx, %eax | 
 | 308 | 	add	$32, %eax | 
 | 309 | 	ret | 
 | 310 |  | 
 | 311 | 	.p2align 4 | 
 | 312 | L(exit48): | 
 | 313 | 	sub	%edx, %eax | 
 | 314 | 	bsf	%ecx, %ecx | 
 | 315 | 	add	%ecx, %eax | 
 | 316 | 	add	$48, %eax | 
 | 317 | 	ret | 
 | 318 |  | 
 | 319 | 	.p2align 4 | 
 | 320 | L(exit64): | 
 | 321 | 	sub	%edx, %eax | 
 | 322 | 	bsf	%ecx, %ecx | 
 | 323 | 	add	%ecx, %eax | 
 | 324 | 	add	$64, %eax | 
 | 325 | 	ret | 
 | 326 |  | 
 | 327 | END (STRLEN) | 
 | 328 |  |