blob: 27cc0258cb7d50f46e57d7dac71bcec5955bc169 [file] [log] [blame]
Varvara Rainchik5a922842014-04-24 15:41:20 +04001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef STRLEN
George Burgess IV0193c3d2024-09-18 11:00:33 -060032# define STRLEN strlen
Varvara Rainchik5a922842014-04-24 15:41:20 +040033#endif
34
35#ifndef L
36# define L(label) .L##label
37#endif
38
39#ifndef cfi_startproc
40# define cfi_startproc .cfi_startproc
41#endif
42
43#ifndef cfi_endproc
44# define cfi_endproc .cfi_endproc
45#endif
46
47#ifndef cfi_rel_offset
48# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
49#endif
50
51#ifndef cfi_restore
52# define cfi_restore(reg) .cfi_restore reg
53#endif
54
55#ifndef cfi_adjust_cfa_offset
56# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
57#endif
58
59#ifndef ENTRY
60# define ENTRY(name) \
61 .type name, @function; \
62 .globl name; \
63 .p2align 4; \
64name: \
65 cfi_startproc
66#endif
67
68#ifndef END
69# define END(name) \
70 cfi_endproc; \
71 .size name, .-name
72#endif
73
74#define CFI_PUSH(REG) \
75 cfi_adjust_cfa_offset (4); \
76 cfi_rel_offset (REG, 0)
77
78#define CFI_POP(REG) \
79 cfi_adjust_cfa_offset (-4); \
80 cfi_restore (REG)
81
82#define PUSH(REG) pushl REG; CFI_PUSH (REG)
83#define POP(REG) popl REG; CFI_POP (REG)
84
85 .section .text.sse2,"ax",@progbits
86ENTRY (STRLEN)
87 mov 4(%esp), %edx
88 mov %edx, %ecx
89 and $0x3f, %ecx
90 pxor %xmm0, %xmm0
91 cmp $0x30, %ecx
92 ja L(next)
93 movdqu (%edx), %xmm1
94 pcmpeqb %xmm1, %xmm0
95 pmovmskb %xmm0, %ecx
96 test %ecx, %ecx
97 jnz L(exit_less16)
98 mov %edx, %eax
99 and $-16, %eax
100 jmp L(align16_start)
101L(next):
102 mov %edx, %eax
103 and $-16, %eax
104 PUSH (%edi)
105 pcmpeqb (%eax), %xmm0
106 mov $-1, %edi
107 sub %eax, %ecx
108 shl %cl, %edi
109 pmovmskb %xmm0, %ecx
110 and %edi, %ecx
111 POP (%edi)
112 jnz L(exit_unaligned)
113 pxor %xmm0, %xmm0
114L(align16_start):
115 pxor %xmm1, %xmm1
116 pxor %xmm2, %xmm2
117 pxor %xmm3, %xmm3
118 pcmpeqb 16(%eax), %xmm0
119 pmovmskb %xmm0, %ecx
120 test %ecx, %ecx
121 jnz L(exit16)
122
123 pcmpeqb 32(%eax), %xmm1
124 pmovmskb %xmm1, %ecx
125 test %ecx, %ecx
126 jnz L(exit32)
127
128 pcmpeqb 48(%eax), %xmm2
129 pmovmskb %xmm2, %ecx
130 test %ecx, %ecx
131 jnz L(exit48)
132
133 pcmpeqb 64(%eax), %xmm3
134 pmovmskb %xmm3, %ecx
135 test %ecx, %ecx
136 jnz L(exit64)
137
138 pcmpeqb 80(%eax), %xmm0
139 add $64, %eax
140 pmovmskb %xmm0, %ecx
141 test %ecx, %ecx
142 jnz L(exit16)
143
144 pcmpeqb 32(%eax), %xmm1
145 pmovmskb %xmm1, %ecx
146 test %ecx, %ecx
147 jnz L(exit32)
148
149 pcmpeqb 48(%eax), %xmm2
150 pmovmskb %xmm2, %ecx
151 test %ecx, %ecx
152 jnz L(exit48)
153
154 pcmpeqb 64(%eax), %xmm3
155 pmovmskb %xmm3, %ecx
156 test %ecx, %ecx
157 jnz L(exit64)
158
159 pcmpeqb 80(%eax), %xmm0
160 add $64, %eax
161 pmovmskb %xmm0, %ecx
162 test %ecx, %ecx
163 jnz L(exit16)
164
165 pcmpeqb 32(%eax), %xmm1
166 pmovmskb %xmm1, %ecx
167 test %ecx, %ecx
168 jnz L(exit32)
169
170 pcmpeqb 48(%eax), %xmm2
171 pmovmskb %xmm2, %ecx
172 test %ecx, %ecx
173 jnz L(exit48)
174
175 pcmpeqb 64(%eax), %xmm3
176 pmovmskb %xmm3, %ecx
177 test %ecx, %ecx
178 jnz L(exit64)
179
180 pcmpeqb 80(%eax), %xmm0
181 add $64, %eax
182 pmovmskb %xmm0, %ecx
183 test %ecx, %ecx
184 jnz L(exit16)
185
186 pcmpeqb 32(%eax), %xmm1
187 pmovmskb %xmm1, %ecx
188 test %ecx, %ecx
189 jnz L(exit32)
190
191 pcmpeqb 48(%eax), %xmm2
192 pmovmskb %xmm2, %ecx
193 test %ecx, %ecx
194 jnz L(exit48)
195
196 pcmpeqb 64(%eax), %xmm3
197 pmovmskb %xmm3, %ecx
198 test %ecx, %ecx
199 jnz L(exit64)
200
201
202 test $0x3f, %eax
203 jz L(align64_loop)
204
205 pcmpeqb 80(%eax), %xmm0
206 add $80, %eax
207 pmovmskb %xmm0, %ecx
208 test %ecx, %ecx
209 jnz L(exit)
210
211 test $0x3f, %eax
212 jz L(align64_loop)
213
214 pcmpeqb 16(%eax), %xmm1
215 add $16, %eax
216 pmovmskb %xmm1, %ecx
217 test %ecx, %ecx
218 jnz L(exit)
219
220 test $0x3f, %eax
221 jz L(align64_loop)
222
223 pcmpeqb 16(%eax), %xmm2
224 add $16, %eax
225 pmovmskb %xmm2, %ecx
226 test %ecx, %ecx
227 jnz L(exit)
228
229 test $0x3f, %eax
230 jz L(align64_loop)
231
232 pcmpeqb 16(%eax), %xmm3
233 add $16, %eax
234 pmovmskb %xmm3, %ecx
235 test %ecx, %ecx
236 jnz L(exit)
237
238 add $16, %eax
239 .p2align 4
240L(align64_loop):
241 movaps (%eax), %xmm4
242 pminub 16(%eax), %xmm4
243 movaps 32(%eax), %xmm5
244 pminub 48(%eax), %xmm5
245 add $64, %eax
246 pminub %xmm4, %xmm5
247 pcmpeqb %xmm0, %xmm5
248 pmovmskb %xmm5, %ecx
249 test %ecx, %ecx
250 jz L(align64_loop)
251
252
253 pcmpeqb -64(%eax), %xmm0
254 sub $80, %eax
255 pmovmskb %xmm0, %ecx
256 test %ecx, %ecx
257 jnz L(exit16)
258
259 pcmpeqb 32(%eax), %xmm1
260 pmovmskb %xmm1, %ecx
261 test %ecx, %ecx
262 jnz L(exit32)
263
264 pcmpeqb 48(%eax), %xmm2
265 pmovmskb %xmm2, %ecx
266 test %ecx, %ecx
267 jnz L(exit48)
268
269 pcmpeqb 64(%eax), %xmm3
270 pmovmskb %xmm3, %ecx
271 sub %edx, %eax
272 bsf %ecx, %ecx
273 add %ecx, %eax
274 add $64, %eax
275 ret
276
277 .p2align 4
278L(exit):
279 sub %edx, %eax
280 bsf %ecx, %ecx
281 add %ecx, %eax
282 ret
283
284L(exit_less16):
285 bsf %ecx, %eax
286 ret
287
288 .p2align 4
289L(exit_unaligned):
290 sub %edx, %eax
291 bsf %ecx, %ecx
292 add %ecx, %eax
293 ret
294
295 .p2align 4
296L(exit16):
297 sub %edx, %eax
298 bsf %ecx, %ecx
299 add %ecx, %eax
300 add $16, %eax
301 ret
302
303 .p2align 4
304L(exit32):
305 sub %edx, %eax
306 bsf %ecx, %ecx
307 add %ecx, %eax
308 add $32, %eax
309 ret
310
311 .p2align 4
312L(exit48):
313 sub %edx, %eax
314 bsf %ecx, %ecx
315 add %ecx, %eax
316 add $48, %eax
317 ret
318
319 .p2align 4
320L(exit64):
321 sub %edx, %eax
322 bsf %ecx, %ecx
323 add %ecx, %eax
324 add $64, %eax
325 ret
326
327END (STRLEN)
328