blob: 320afec112418b85bf9ea4eaf998889be5412758 [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Elliott Hughes81d6a182016-03-03 16:10:33 -080031#include <private/bionic_asm.h>
32
Elliott Hughesed777142022-07-25 16:25:11 +000033#define FOR_ATOM
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040034#include "cache.h"
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040035
Bruce Beare8ff1a272010-03-04 11:03:37 -080036#ifndef L
37# define L(label) .L##label
38#endif
39
40#ifndef ALIGN
41# define ALIGN(n) .p2align n
42#endif
43
Bruce Beare8ff1a272010-03-04 11:03:37 -080044#define CFI_PUSH(REG) \
Elliott Hughese412f892016-08-01 14:00:45 -070045 .cfi_adjust_cfa_offset 4; \
46 .cfi_rel_offset REG, 0
Bruce Beare8ff1a272010-03-04 11:03:37 -080047
48#define CFI_POP(REG) \
Elliott Hughese412f892016-08-01 14:00:45 -070049 .cfi_adjust_cfa_offset -4; \
50 .cfi_restore REG
Bruce Beare8ff1a272010-03-04 11:03:37 -080051
Elliott Hughese412f892016-08-01 14:00:45 -070052#define PUSH(REG) pushl REG; CFI_PUSH(REG)
53#define POP(REG) popl REG; CFI_POP(REG)
Bruce Beare8ff1a272010-03-04 11:03:37 -080054
Elliott Hughese412f892016-08-01 14:00:45 -070055#define PARMS 8 /* Preserve EBX. */
56#define DST PARMS
57#define CHR (DST+4)
58#define LEN (CHR+4)
Elliott Hughes81d6a182016-03-03 16:10:33 -080059#define CHK_DST_LEN (LEN+4)
60#define SETRTNVAL movl DST(%esp), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -080061
Elliott Hughese412f892016-08-01 14:00:45 -070062#define ENTRANCE PUSH(%ebx);
63#define RETURN_END POP(%ebx); ret
64#define RETURN RETURN_END; CFI_PUSH(%ebx)
65#define JMPTBL(I, B) I - B
Bruce Beare8ff1a272010-03-04 11:03:37 -080066
67/* Load an entry in a jump table into EBX and branch to it. TABLE is a
68 jump table with relative offsets. */
69# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
70 /* We first load PC into EBX. */ \
Varvara Rainchik5a922842014-04-24 15:41:20 +040071 call __x86.get_pc_thunk.bx; \
Bruce Beare8ff1a272010-03-04 11:03:37 -080072 /* Get the address of the jump table. */ \
73 add $(TABLE - .), %ebx; \
74 /* Get the entry and convert the relative offset to the \
75 absolute address. */ \
76 add (%ebx,%ecx,4), %ebx; \
77 add %ecx, %edx; \
Elliott Hughese412f892016-08-01 14:00:45 -070078 /* We loaded the jump table and adjusted EDX. Go. */ \
Bruce Beare8ff1a272010-03-04 11:03:37 -080079 jmp *%ebx
80
Haibo Huangb9244ff2018-08-11 10:12:13 -070081ENTRY(__memset_chk_atom)
Elliott Hughes204990c2016-03-24 22:34:47 -070082 ENTRANCE
Elliott Hughes81d6a182016-03-03 16:10:33 -080083
Elliott Hughes204990c2016-03-24 22:34:47 -070084 movl LEN(%esp), %ecx
85 cmpl CHK_DST_LEN(%esp), %ecx
Lev Rumyantsev3a528f12016-07-29 17:55:42 -070086 jna L(memset_length_loaded)
87
Elliott Hughese412f892016-08-01 14:00:45 -070088 POP(%ebx) // Undo ENTRANCE without returning.
Lev Rumyantsev3a528f12016-07-29 17:55:42 -070089 jmp __memset_chk_fail
Haibo Huangb9244ff2018-08-11 10:12:13 -070090END(__memset_chk_atom)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040091
Bruce Beare8ff1a272010-03-04 11:03:37 -080092 .section .text.sse2,"ax",@progbits
Elliott Hughese412f892016-08-01 14:00:45 -070093 ALIGN(4)
Haibo Huangb9244ff2018-08-11 10:12:13 -070094ENTRY(memset_atom)
Bruce Beare8ff1a272010-03-04 11:03:37 -080095 ENTRANCE
96
97 movl LEN(%esp), %ecx
Elliott Hughes204990c2016-03-24 22:34:47 -070098L(memset_length_loaded):
Bruce Beare8ff1a272010-03-04 11:03:37 -080099 movzbl CHR(%esp), %eax
100 movb %al, %ah
101 /* Fill the whole EAX with pattern. */
102 movl %eax, %edx
103 shl $16, %eax
104 or %edx, %eax
Elliott Hughes81d6a182016-03-03 16:10:33 -0800105 movl DST(%esp), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800106 cmp $32, %ecx
107 jae L(32bytesormore)
108
109L(write_less32bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700110 BRANCH_TO_JMPTBL_ENTRY(L(table_less_32bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800111
112
113 .pushsection .rodata.sse2,"a",@progbits
Elliott Hughese412f892016-08-01 14:00:45 -0700114 ALIGN(2)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800115L(table_less_32bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700116 .int JMPTBL(L(write_0bytes), L(table_less_32bytes))
117 .int JMPTBL(L(write_1bytes), L(table_less_32bytes))
118 .int JMPTBL(L(write_2bytes), L(table_less_32bytes))
119 .int JMPTBL(L(write_3bytes), L(table_less_32bytes))
120 .int JMPTBL(L(write_4bytes), L(table_less_32bytes))
121 .int JMPTBL(L(write_5bytes), L(table_less_32bytes))
122 .int JMPTBL(L(write_6bytes), L(table_less_32bytes))
123 .int JMPTBL(L(write_7bytes), L(table_less_32bytes))
124 .int JMPTBL(L(write_8bytes), L(table_less_32bytes))
125 .int JMPTBL(L(write_9bytes), L(table_less_32bytes))
126 .int JMPTBL(L(write_10bytes), L(table_less_32bytes))
127 .int JMPTBL(L(write_11bytes), L(table_less_32bytes))
128 .int JMPTBL(L(write_12bytes), L(table_less_32bytes))
129 .int JMPTBL(L(write_13bytes), L(table_less_32bytes))
130 .int JMPTBL(L(write_14bytes), L(table_less_32bytes))
131 .int JMPTBL(L(write_15bytes), L(table_less_32bytes))
132 .int JMPTBL(L(write_16bytes), L(table_less_32bytes))
133 .int JMPTBL(L(write_17bytes), L(table_less_32bytes))
134 .int JMPTBL(L(write_18bytes), L(table_less_32bytes))
135 .int JMPTBL(L(write_19bytes), L(table_less_32bytes))
136 .int JMPTBL(L(write_20bytes), L(table_less_32bytes))
137 .int JMPTBL(L(write_21bytes), L(table_less_32bytes))
138 .int JMPTBL(L(write_22bytes), L(table_less_32bytes))
139 .int JMPTBL(L(write_23bytes), L(table_less_32bytes))
140 .int JMPTBL(L(write_24bytes), L(table_less_32bytes))
141 .int JMPTBL(L(write_25bytes), L(table_less_32bytes))
142 .int JMPTBL(L(write_26bytes), L(table_less_32bytes))
143 .int JMPTBL(L(write_27bytes), L(table_less_32bytes))
144 .int JMPTBL(L(write_28bytes), L(table_less_32bytes))
145 .int JMPTBL(L(write_29bytes), L(table_less_32bytes))
146 .int JMPTBL(L(write_30bytes), L(table_less_32bytes))
147 .int JMPTBL(L(write_31bytes), L(table_less_32bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800148 .popsection
149
Elliott Hughese412f892016-08-01 14:00:45 -0700150 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800151L(write_28bytes):
152 movl %eax, -28(%edx)
153L(write_24bytes):
154 movl %eax, -24(%edx)
155L(write_20bytes):
156 movl %eax, -20(%edx)
157L(write_16bytes):
158 movl %eax, -16(%edx)
159L(write_12bytes):
160 movl %eax, -12(%edx)
161L(write_8bytes):
162 movl %eax, -8(%edx)
163L(write_4bytes):
164 movl %eax, -4(%edx)
165L(write_0bytes):
166 SETRTNVAL
167 RETURN
168
Elliott Hughese412f892016-08-01 14:00:45 -0700169 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800170L(write_29bytes):
171 movl %eax, -29(%edx)
172L(write_25bytes):
173 movl %eax, -25(%edx)
174L(write_21bytes):
175 movl %eax, -21(%edx)
176L(write_17bytes):
177 movl %eax, -17(%edx)
178L(write_13bytes):
179 movl %eax, -13(%edx)
180L(write_9bytes):
181 movl %eax, -9(%edx)
182L(write_5bytes):
183 movl %eax, -5(%edx)
184L(write_1bytes):
185 movb %al, -1(%edx)
186 SETRTNVAL
187 RETURN
188
Elliott Hughese412f892016-08-01 14:00:45 -0700189 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800190L(write_30bytes):
191 movl %eax, -30(%edx)
192L(write_26bytes):
193 movl %eax, -26(%edx)
194L(write_22bytes):
195 movl %eax, -22(%edx)
196L(write_18bytes):
197 movl %eax, -18(%edx)
198L(write_14bytes):
199 movl %eax, -14(%edx)
200L(write_10bytes):
201 movl %eax, -10(%edx)
202L(write_6bytes):
203 movl %eax, -6(%edx)
204L(write_2bytes):
205 movw %ax, -2(%edx)
206 SETRTNVAL
207 RETURN
208
Elliott Hughese412f892016-08-01 14:00:45 -0700209 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800210L(write_31bytes):
211 movl %eax, -31(%edx)
212L(write_27bytes):
213 movl %eax, -27(%edx)
214L(write_23bytes):
215 movl %eax, -23(%edx)
216L(write_19bytes):
217 movl %eax, -19(%edx)
218L(write_15bytes):
219 movl %eax, -15(%edx)
220L(write_11bytes):
221 movl %eax, -11(%edx)
222L(write_7bytes):
223 movl %eax, -7(%edx)
224L(write_3bytes):
225 movw %ax, -3(%edx)
226 movb %al, -1(%edx)
227 SETRTNVAL
228 RETURN
229
Elliott Hughese412f892016-08-01 14:00:45 -0700230 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800231/* ECX > 32 and EDX is 4 byte aligned. */
232L(32bytesormore):
233 /* Fill xmm0 with the pattern. */
Bruce Beare8ff1a272010-03-04 11:03:37 -0800234 movd %eax, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800235 pshufd $0, %xmm0, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800236 testl $0xf, %edx
237 jz L(aligned_16)
238/* ECX > 32 and EDX is not 16 byte aligned. */
239L(not_aligned_16):
240 movdqu %xmm0, (%edx)
241 movl %edx, %eax
242 and $-16, %edx
243 add $16, %edx
244 sub %edx, %eax
245 add %eax, %ecx
246 movd %xmm0, %eax
247
Elliott Hughese412f892016-08-01 14:00:45 -0700248 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800249L(aligned_16):
250 cmp $128, %ecx
251 jae L(128bytesormore)
252
253L(aligned_16_less128bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700254 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800255
Elliott Hughese412f892016-08-01 14:00:45 -0700256 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800257L(128bytesormore):
Elliott Hughese412f892016-08-01 14:00:45 -0700258 PUSH(%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800259 mov $SHARED_CACHE_SIZE, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800260 cmp %ebx, %ecx
261 jae L(128bytesormore_nt_start)
262
263
Elliott Hughese412f892016-08-01 14:00:45 -0700264 POP(%ebx)
265# define RESTORE_EBX_STATE CFI_PUSH(%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800266 cmp $DATA_CACHE_SIZE, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800267
268 jae L(128bytes_L2_normal)
269 subl $128, %ecx
270L(128bytesormore_normal):
271 sub $128, %ecx
272 movdqa %xmm0, (%edx)
273 movdqa %xmm0, 0x10(%edx)
274 movdqa %xmm0, 0x20(%edx)
275 movdqa %xmm0, 0x30(%edx)
276 movdqa %xmm0, 0x40(%edx)
277 movdqa %xmm0, 0x50(%edx)
278 movdqa %xmm0, 0x60(%edx)
279 movdqa %xmm0, 0x70(%edx)
280 lea 128(%edx), %edx
281 jb L(128bytesless_normal)
282
283
284 sub $128, %ecx
285 movdqa %xmm0, (%edx)
286 movdqa %xmm0, 0x10(%edx)
287 movdqa %xmm0, 0x20(%edx)
288 movdqa %xmm0, 0x30(%edx)
289 movdqa %xmm0, 0x40(%edx)
290 movdqa %xmm0, 0x50(%edx)
291 movdqa %xmm0, 0x60(%edx)
292 movdqa %xmm0, 0x70(%edx)
293 lea 128(%edx), %edx
294 jae L(128bytesormore_normal)
295
296L(128bytesless_normal):
Bruce Beare124a5422010-10-11 12:24:41 -0700297 add $128, %ecx
Elliott Hughese412f892016-08-01 14:00:45 -0700298 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800299
Elliott Hughese412f892016-08-01 14:00:45 -0700300 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800301L(128bytes_L2_normal):
302 prefetcht0 0x380(%edx)
303 prefetcht0 0x3c0(%edx)
304 sub $128, %ecx
305 movdqa %xmm0, (%edx)
306 movaps %xmm0, 0x10(%edx)
307 movaps %xmm0, 0x20(%edx)
308 movaps %xmm0, 0x30(%edx)
309 movaps %xmm0, 0x40(%edx)
310 movaps %xmm0, 0x50(%edx)
311 movaps %xmm0, 0x60(%edx)
312 movaps %xmm0, 0x70(%edx)
313 add $128, %edx
314 cmp $128, %ecx
315 jae L(128bytes_L2_normal)
316
317L(128bytesless_L2_normal):
Elliott Hughese412f892016-08-01 14:00:45 -0700318 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800319
Bruce Beare124a5422010-10-11 12:24:41 -0700320 RESTORE_EBX_STATE
Bruce Beare8ff1a272010-03-04 11:03:37 -0800321L(128bytesormore_nt_start):
322 sub %ebx, %ecx
Bruce Beare124a5422010-10-11 12:24:41 -0700323 mov %ebx, %eax
324 and $0x7f, %eax
325 add %eax, %ecx
326 movd %xmm0, %eax
Elliott Hughese412f892016-08-01 14:00:45 -0700327 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800328L(128bytesormore_shared_cache_loop):
329 prefetcht0 0x3c0(%edx)
330 prefetcht0 0x380(%edx)
331 sub $0x80, %ebx
332 movdqa %xmm0, (%edx)
333 movdqa %xmm0, 0x10(%edx)
334 movdqa %xmm0, 0x20(%edx)
335 movdqa %xmm0, 0x30(%edx)
336 movdqa %xmm0, 0x40(%edx)
337 movdqa %xmm0, 0x50(%edx)
338 movdqa %xmm0, 0x60(%edx)
339 movdqa %xmm0, 0x70(%edx)
340 add $0x80, %edx
341 cmp $0x80, %ebx
342 jae L(128bytesormore_shared_cache_loop)
343 cmp $0x80, %ecx
344 jb L(shared_cache_loop_end)
Elliott Hughese412f892016-08-01 14:00:45 -0700345 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800346L(128bytesormore_nt):
347 sub $0x80, %ecx
348 movntdq %xmm0, (%edx)
349 movntdq %xmm0, 0x10(%edx)
350 movntdq %xmm0, 0x20(%edx)
351 movntdq %xmm0, 0x30(%edx)
352 movntdq %xmm0, 0x40(%edx)
353 movntdq %xmm0, 0x50(%edx)
354 movntdq %xmm0, 0x60(%edx)
355 movntdq %xmm0, 0x70(%edx)
356 add $0x80, %edx
357 cmp $0x80, %ecx
358 jae L(128bytesormore_nt)
359 sfence
360L(shared_cache_loop_end):
Elliott Hughese412f892016-08-01 14:00:45 -0700361 POP(%ebx)
362 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800363
364
365 .pushsection .rodata.sse2,"a",@progbits
Elliott Hughese412f892016-08-01 14:00:45 -0700366 ALIGN(2)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800367L(table_16_128bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700368 .int JMPTBL(L(aligned_16_0bytes), L(table_16_128bytes))
369 .int JMPTBL(L(aligned_16_1bytes), L(table_16_128bytes))
370 .int JMPTBL(L(aligned_16_2bytes), L(table_16_128bytes))
371 .int JMPTBL(L(aligned_16_3bytes), L(table_16_128bytes))
372 .int JMPTBL(L(aligned_16_4bytes), L(table_16_128bytes))
373 .int JMPTBL(L(aligned_16_5bytes), L(table_16_128bytes))
374 .int JMPTBL(L(aligned_16_6bytes), L(table_16_128bytes))
375 .int JMPTBL(L(aligned_16_7bytes), L(table_16_128bytes))
376 .int JMPTBL(L(aligned_16_8bytes), L(table_16_128bytes))
377 .int JMPTBL(L(aligned_16_9bytes), L(table_16_128bytes))
378 .int JMPTBL(L(aligned_16_10bytes), L(table_16_128bytes))
379 .int JMPTBL(L(aligned_16_11bytes), L(table_16_128bytes))
380 .int JMPTBL(L(aligned_16_12bytes), L(table_16_128bytes))
381 .int JMPTBL(L(aligned_16_13bytes), L(table_16_128bytes))
382 .int JMPTBL(L(aligned_16_14bytes), L(table_16_128bytes))
383 .int JMPTBL(L(aligned_16_15bytes), L(table_16_128bytes))
384 .int JMPTBL(L(aligned_16_16bytes), L(table_16_128bytes))
385 .int JMPTBL(L(aligned_16_17bytes), L(table_16_128bytes))
386 .int JMPTBL(L(aligned_16_18bytes), L(table_16_128bytes))
387 .int JMPTBL(L(aligned_16_19bytes), L(table_16_128bytes))
388 .int JMPTBL(L(aligned_16_20bytes), L(table_16_128bytes))
389 .int JMPTBL(L(aligned_16_21bytes), L(table_16_128bytes))
390 .int JMPTBL(L(aligned_16_22bytes), L(table_16_128bytes))
391 .int JMPTBL(L(aligned_16_23bytes), L(table_16_128bytes))
392 .int JMPTBL(L(aligned_16_24bytes), L(table_16_128bytes))
393 .int JMPTBL(L(aligned_16_25bytes), L(table_16_128bytes))
394 .int JMPTBL(L(aligned_16_26bytes), L(table_16_128bytes))
395 .int JMPTBL(L(aligned_16_27bytes), L(table_16_128bytes))
396 .int JMPTBL(L(aligned_16_28bytes), L(table_16_128bytes))
397 .int JMPTBL(L(aligned_16_29bytes), L(table_16_128bytes))
398 .int JMPTBL(L(aligned_16_30bytes), L(table_16_128bytes))
399 .int JMPTBL(L(aligned_16_31bytes), L(table_16_128bytes))
400 .int JMPTBL(L(aligned_16_32bytes), L(table_16_128bytes))
401 .int JMPTBL(L(aligned_16_33bytes), L(table_16_128bytes))
402 .int JMPTBL(L(aligned_16_34bytes), L(table_16_128bytes))
403 .int JMPTBL(L(aligned_16_35bytes), L(table_16_128bytes))
404 .int JMPTBL(L(aligned_16_36bytes), L(table_16_128bytes))
405 .int JMPTBL(L(aligned_16_37bytes), L(table_16_128bytes))
406 .int JMPTBL(L(aligned_16_38bytes), L(table_16_128bytes))
407 .int JMPTBL(L(aligned_16_39bytes), L(table_16_128bytes))
408 .int JMPTBL(L(aligned_16_40bytes), L(table_16_128bytes))
409 .int JMPTBL(L(aligned_16_41bytes), L(table_16_128bytes))
410 .int JMPTBL(L(aligned_16_42bytes), L(table_16_128bytes))
411 .int JMPTBL(L(aligned_16_43bytes), L(table_16_128bytes))
412 .int JMPTBL(L(aligned_16_44bytes), L(table_16_128bytes))
413 .int JMPTBL(L(aligned_16_45bytes), L(table_16_128bytes))
414 .int JMPTBL(L(aligned_16_46bytes), L(table_16_128bytes))
415 .int JMPTBL(L(aligned_16_47bytes), L(table_16_128bytes))
416 .int JMPTBL(L(aligned_16_48bytes), L(table_16_128bytes))
417 .int JMPTBL(L(aligned_16_49bytes), L(table_16_128bytes))
418 .int JMPTBL(L(aligned_16_50bytes), L(table_16_128bytes))
419 .int JMPTBL(L(aligned_16_51bytes), L(table_16_128bytes))
420 .int JMPTBL(L(aligned_16_52bytes), L(table_16_128bytes))
421 .int JMPTBL(L(aligned_16_53bytes), L(table_16_128bytes))
422 .int JMPTBL(L(aligned_16_54bytes), L(table_16_128bytes))
423 .int JMPTBL(L(aligned_16_55bytes), L(table_16_128bytes))
424 .int JMPTBL(L(aligned_16_56bytes), L(table_16_128bytes))
425 .int JMPTBL(L(aligned_16_57bytes), L(table_16_128bytes))
426 .int JMPTBL(L(aligned_16_58bytes), L(table_16_128bytes))
427 .int JMPTBL(L(aligned_16_59bytes), L(table_16_128bytes))
428 .int JMPTBL(L(aligned_16_60bytes), L(table_16_128bytes))
429 .int JMPTBL(L(aligned_16_61bytes), L(table_16_128bytes))
430 .int JMPTBL(L(aligned_16_62bytes), L(table_16_128bytes))
431 .int JMPTBL(L(aligned_16_63bytes), L(table_16_128bytes))
432 .int JMPTBL(L(aligned_16_64bytes), L(table_16_128bytes))
433 .int JMPTBL(L(aligned_16_65bytes), L(table_16_128bytes))
434 .int JMPTBL(L(aligned_16_66bytes), L(table_16_128bytes))
435 .int JMPTBL(L(aligned_16_67bytes), L(table_16_128bytes))
436 .int JMPTBL(L(aligned_16_68bytes), L(table_16_128bytes))
437 .int JMPTBL(L(aligned_16_69bytes), L(table_16_128bytes))
438 .int JMPTBL(L(aligned_16_70bytes), L(table_16_128bytes))
439 .int JMPTBL(L(aligned_16_71bytes), L(table_16_128bytes))
440 .int JMPTBL(L(aligned_16_72bytes), L(table_16_128bytes))
441 .int JMPTBL(L(aligned_16_73bytes), L(table_16_128bytes))
442 .int JMPTBL(L(aligned_16_74bytes), L(table_16_128bytes))
443 .int JMPTBL(L(aligned_16_75bytes), L(table_16_128bytes))
444 .int JMPTBL(L(aligned_16_76bytes), L(table_16_128bytes))
445 .int JMPTBL(L(aligned_16_77bytes), L(table_16_128bytes))
446 .int JMPTBL(L(aligned_16_78bytes), L(table_16_128bytes))
447 .int JMPTBL(L(aligned_16_79bytes), L(table_16_128bytes))
448 .int JMPTBL(L(aligned_16_80bytes), L(table_16_128bytes))
449 .int JMPTBL(L(aligned_16_81bytes), L(table_16_128bytes))
450 .int JMPTBL(L(aligned_16_82bytes), L(table_16_128bytes))
451 .int JMPTBL(L(aligned_16_83bytes), L(table_16_128bytes))
452 .int JMPTBL(L(aligned_16_84bytes), L(table_16_128bytes))
453 .int JMPTBL(L(aligned_16_85bytes), L(table_16_128bytes))
454 .int JMPTBL(L(aligned_16_86bytes), L(table_16_128bytes))
455 .int JMPTBL(L(aligned_16_87bytes), L(table_16_128bytes))
456 .int JMPTBL(L(aligned_16_88bytes), L(table_16_128bytes))
457 .int JMPTBL(L(aligned_16_89bytes), L(table_16_128bytes))
458 .int JMPTBL(L(aligned_16_90bytes), L(table_16_128bytes))
459 .int JMPTBL(L(aligned_16_91bytes), L(table_16_128bytes))
460 .int JMPTBL(L(aligned_16_92bytes), L(table_16_128bytes))
461 .int JMPTBL(L(aligned_16_93bytes), L(table_16_128bytes))
462 .int JMPTBL(L(aligned_16_94bytes), L(table_16_128bytes))
463 .int JMPTBL(L(aligned_16_95bytes), L(table_16_128bytes))
464 .int JMPTBL(L(aligned_16_96bytes), L(table_16_128bytes))
465 .int JMPTBL(L(aligned_16_97bytes), L(table_16_128bytes))
466 .int JMPTBL(L(aligned_16_98bytes), L(table_16_128bytes))
467 .int JMPTBL(L(aligned_16_99bytes), L(table_16_128bytes))
468 .int JMPTBL(L(aligned_16_100bytes), L(table_16_128bytes))
469 .int JMPTBL(L(aligned_16_101bytes), L(table_16_128bytes))
470 .int JMPTBL(L(aligned_16_102bytes), L(table_16_128bytes))
471 .int JMPTBL(L(aligned_16_103bytes), L(table_16_128bytes))
472 .int JMPTBL(L(aligned_16_104bytes), L(table_16_128bytes))
473 .int JMPTBL(L(aligned_16_105bytes), L(table_16_128bytes))
474 .int JMPTBL(L(aligned_16_106bytes), L(table_16_128bytes))
475 .int JMPTBL(L(aligned_16_107bytes), L(table_16_128bytes))
476 .int JMPTBL(L(aligned_16_108bytes), L(table_16_128bytes))
477 .int JMPTBL(L(aligned_16_109bytes), L(table_16_128bytes))
478 .int JMPTBL(L(aligned_16_110bytes), L(table_16_128bytes))
479 .int JMPTBL(L(aligned_16_111bytes), L(table_16_128bytes))
480 .int JMPTBL(L(aligned_16_112bytes), L(table_16_128bytes))
481 .int JMPTBL(L(aligned_16_113bytes), L(table_16_128bytes))
482 .int JMPTBL(L(aligned_16_114bytes), L(table_16_128bytes))
483 .int JMPTBL(L(aligned_16_115bytes), L(table_16_128bytes))
484 .int JMPTBL(L(aligned_16_116bytes), L(table_16_128bytes))
485 .int JMPTBL(L(aligned_16_117bytes), L(table_16_128bytes))
486 .int JMPTBL(L(aligned_16_118bytes), L(table_16_128bytes))
487 .int JMPTBL(L(aligned_16_119bytes), L(table_16_128bytes))
488 .int JMPTBL(L(aligned_16_120bytes), L(table_16_128bytes))
489 .int JMPTBL(L(aligned_16_121bytes), L(table_16_128bytes))
490 .int JMPTBL(L(aligned_16_122bytes), L(table_16_128bytes))
491 .int JMPTBL(L(aligned_16_123bytes), L(table_16_128bytes))
492 .int JMPTBL(L(aligned_16_124bytes), L(table_16_128bytes))
493 .int JMPTBL(L(aligned_16_125bytes), L(table_16_128bytes))
494 .int JMPTBL(L(aligned_16_126bytes), L(table_16_128bytes))
495 .int JMPTBL(L(aligned_16_127bytes), L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800496 .popsection
497
Elliott Hughese412f892016-08-01 14:00:45 -0700498 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800499L(aligned_16_112bytes):
500 movdqa %xmm0, -112(%edx)
501L(aligned_16_96bytes):
502 movdqa %xmm0, -96(%edx)
503L(aligned_16_80bytes):
504 movdqa %xmm0, -80(%edx)
505L(aligned_16_64bytes):
506 movdqa %xmm0, -64(%edx)
507L(aligned_16_48bytes):
508 movdqa %xmm0, -48(%edx)
509L(aligned_16_32bytes):
510 movdqa %xmm0, -32(%edx)
511L(aligned_16_16bytes):
512 movdqa %xmm0, -16(%edx)
513L(aligned_16_0bytes):
514 SETRTNVAL
515 RETURN
516
Elliott Hughese412f892016-08-01 14:00:45 -0700517 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800518L(aligned_16_113bytes):
519 movdqa %xmm0, -113(%edx)
520L(aligned_16_97bytes):
521 movdqa %xmm0, -97(%edx)
522L(aligned_16_81bytes):
523 movdqa %xmm0, -81(%edx)
524L(aligned_16_65bytes):
525 movdqa %xmm0, -65(%edx)
526L(aligned_16_49bytes):
527 movdqa %xmm0, -49(%edx)
528L(aligned_16_33bytes):
529 movdqa %xmm0, -33(%edx)
530L(aligned_16_17bytes):
531 movdqa %xmm0, -17(%edx)
532L(aligned_16_1bytes):
533 movb %al, -1(%edx)
534 SETRTNVAL
535 RETURN
536
Elliott Hughese412f892016-08-01 14:00:45 -0700537 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800538L(aligned_16_114bytes):
539 movdqa %xmm0, -114(%edx)
540L(aligned_16_98bytes):
541 movdqa %xmm0, -98(%edx)
542L(aligned_16_82bytes):
543 movdqa %xmm0, -82(%edx)
544L(aligned_16_66bytes):
545 movdqa %xmm0, -66(%edx)
546L(aligned_16_50bytes):
547 movdqa %xmm0, -50(%edx)
548L(aligned_16_34bytes):
549 movdqa %xmm0, -34(%edx)
550L(aligned_16_18bytes):
551 movdqa %xmm0, -18(%edx)
552L(aligned_16_2bytes):
553 movw %ax, -2(%edx)
554 SETRTNVAL
555 RETURN
556
Elliott Hughese412f892016-08-01 14:00:45 -0700557 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800558L(aligned_16_115bytes):
559 movdqa %xmm0, -115(%edx)
560L(aligned_16_99bytes):
561 movdqa %xmm0, -99(%edx)
562L(aligned_16_83bytes):
563 movdqa %xmm0, -83(%edx)
564L(aligned_16_67bytes):
565 movdqa %xmm0, -67(%edx)
566L(aligned_16_51bytes):
567 movdqa %xmm0, -51(%edx)
568L(aligned_16_35bytes):
569 movdqa %xmm0, -35(%edx)
570L(aligned_16_19bytes):
571 movdqa %xmm0, -19(%edx)
572L(aligned_16_3bytes):
573 movw %ax, -3(%edx)
574 movb %al, -1(%edx)
575 SETRTNVAL
576 RETURN
577
Elliott Hughese412f892016-08-01 14:00:45 -0700578 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800579L(aligned_16_116bytes):
580 movdqa %xmm0, -116(%edx)
581L(aligned_16_100bytes):
582 movdqa %xmm0, -100(%edx)
583L(aligned_16_84bytes):
584 movdqa %xmm0, -84(%edx)
585L(aligned_16_68bytes):
586 movdqa %xmm0, -68(%edx)
587L(aligned_16_52bytes):
588 movdqa %xmm0, -52(%edx)
589L(aligned_16_36bytes):
590 movdqa %xmm0, -36(%edx)
591L(aligned_16_20bytes):
592 movdqa %xmm0, -20(%edx)
593L(aligned_16_4bytes):
594 movl %eax, -4(%edx)
595 SETRTNVAL
596 RETURN
597
Elliott Hughese412f892016-08-01 14:00:45 -0700598 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800599L(aligned_16_117bytes):
600 movdqa %xmm0, -117(%edx)
601L(aligned_16_101bytes):
602 movdqa %xmm0, -101(%edx)
603L(aligned_16_85bytes):
604 movdqa %xmm0, -85(%edx)
605L(aligned_16_69bytes):
606 movdqa %xmm0, -69(%edx)
607L(aligned_16_53bytes):
608 movdqa %xmm0, -53(%edx)
609L(aligned_16_37bytes):
610 movdqa %xmm0, -37(%edx)
611L(aligned_16_21bytes):
612 movdqa %xmm0, -21(%edx)
613L(aligned_16_5bytes):
614 movl %eax, -5(%edx)
615 movb %al, -1(%edx)
616 SETRTNVAL
617 RETURN
618
Elliott Hughese412f892016-08-01 14:00:45 -0700619 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800620L(aligned_16_118bytes):
621 movdqa %xmm0, -118(%edx)
622L(aligned_16_102bytes):
623 movdqa %xmm0, -102(%edx)
624L(aligned_16_86bytes):
625 movdqa %xmm0, -86(%edx)
626L(aligned_16_70bytes):
627 movdqa %xmm0, -70(%edx)
628L(aligned_16_54bytes):
629 movdqa %xmm0, -54(%edx)
630L(aligned_16_38bytes):
631 movdqa %xmm0, -38(%edx)
632L(aligned_16_22bytes):
633 movdqa %xmm0, -22(%edx)
634L(aligned_16_6bytes):
635 movl %eax, -6(%edx)
636 movw %ax, -2(%edx)
637 SETRTNVAL
638 RETURN
639
Elliott Hughese412f892016-08-01 14:00:45 -0700640 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800641L(aligned_16_119bytes):
642 movdqa %xmm0, -119(%edx)
643L(aligned_16_103bytes):
644 movdqa %xmm0, -103(%edx)
645L(aligned_16_87bytes):
646 movdqa %xmm0, -87(%edx)
647L(aligned_16_71bytes):
648 movdqa %xmm0, -71(%edx)
649L(aligned_16_55bytes):
650 movdqa %xmm0, -55(%edx)
651L(aligned_16_39bytes):
652 movdqa %xmm0, -39(%edx)
653L(aligned_16_23bytes):
654 movdqa %xmm0, -23(%edx)
655L(aligned_16_7bytes):
656 movl %eax, -7(%edx)
657 movw %ax, -3(%edx)
658 movb %al, -1(%edx)
659 SETRTNVAL
660 RETURN
661
Elliott Hughese412f892016-08-01 14:00:45 -0700662 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800663L(aligned_16_120bytes):
664 movdqa %xmm0, -120(%edx)
665L(aligned_16_104bytes):
666 movdqa %xmm0, -104(%edx)
667L(aligned_16_88bytes):
668 movdqa %xmm0, -88(%edx)
669L(aligned_16_72bytes):
670 movdqa %xmm0, -72(%edx)
671L(aligned_16_56bytes):
672 movdqa %xmm0, -56(%edx)
673L(aligned_16_40bytes):
674 movdqa %xmm0, -40(%edx)
675L(aligned_16_24bytes):
676 movdqa %xmm0, -24(%edx)
677L(aligned_16_8bytes):
678 movq %xmm0, -8(%edx)
679 SETRTNVAL
680 RETURN
681
Elliott Hughese412f892016-08-01 14:00:45 -0700682 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800683L(aligned_16_121bytes):
684 movdqa %xmm0, -121(%edx)
685L(aligned_16_105bytes):
686 movdqa %xmm0, -105(%edx)
687L(aligned_16_89bytes):
688 movdqa %xmm0, -89(%edx)
689L(aligned_16_73bytes):
690 movdqa %xmm0, -73(%edx)
691L(aligned_16_57bytes):
692 movdqa %xmm0, -57(%edx)
693L(aligned_16_41bytes):
694 movdqa %xmm0, -41(%edx)
695L(aligned_16_25bytes):
696 movdqa %xmm0, -25(%edx)
697L(aligned_16_9bytes):
698 movq %xmm0, -9(%edx)
699 movb %al, -1(%edx)
700 SETRTNVAL
701 RETURN
702
Elliott Hughese412f892016-08-01 14:00:45 -0700703 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800704L(aligned_16_122bytes):
705 movdqa %xmm0, -122(%edx)
706L(aligned_16_106bytes):
707 movdqa %xmm0, -106(%edx)
708L(aligned_16_90bytes):
709 movdqa %xmm0, -90(%edx)
710L(aligned_16_74bytes):
711 movdqa %xmm0, -74(%edx)
712L(aligned_16_58bytes):
713 movdqa %xmm0, -58(%edx)
714L(aligned_16_42bytes):
715 movdqa %xmm0, -42(%edx)
716L(aligned_16_26bytes):
717 movdqa %xmm0, -26(%edx)
718L(aligned_16_10bytes):
719 movq %xmm0, -10(%edx)
720 movw %ax, -2(%edx)
721 SETRTNVAL
722 RETURN
723
Elliott Hughese412f892016-08-01 14:00:45 -0700724 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800725L(aligned_16_123bytes):
726 movdqa %xmm0, -123(%edx)
727L(aligned_16_107bytes):
728 movdqa %xmm0, -107(%edx)
729L(aligned_16_91bytes):
730 movdqa %xmm0, -91(%edx)
731L(aligned_16_75bytes):
732 movdqa %xmm0, -75(%edx)
733L(aligned_16_59bytes):
734 movdqa %xmm0, -59(%edx)
735L(aligned_16_43bytes):
736 movdqa %xmm0, -43(%edx)
737L(aligned_16_27bytes):
738 movdqa %xmm0, -27(%edx)
739L(aligned_16_11bytes):
740 movq %xmm0, -11(%edx)
741 movw %ax, -3(%edx)
742 movb %al, -1(%edx)
743 SETRTNVAL
744 RETURN
745
Elliott Hughese412f892016-08-01 14:00:45 -0700746 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800747L(aligned_16_124bytes):
748 movdqa %xmm0, -124(%edx)
749L(aligned_16_108bytes):
750 movdqa %xmm0, -108(%edx)
751L(aligned_16_92bytes):
752 movdqa %xmm0, -92(%edx)
753L(aligned_16_76bytes):
754 movdqa %xmm0, -76(%edx)
755L(aligned_16_60bytes):
756 movdqa %xmm0, -60(%edx)
757L(aligned_16_44bytes):
758 movdqa %xmm0, -44(%edx)
759L(aligned_16_28bytes):
760 movdqa %xmm0, -28(%edx)
761L(aligned_16_12bytes):
762 movq %xmm0, -12(%edx)
763 movl %eax, -4(%edx)
764 SETRTNVAL
765 RETURN
766
Elliott Hughese412f892016-08-01 14:00:45 -0700767 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800768L(aligned_16_125bytes):
769 movdqa %xmm0, -125(%edx)
770L(aligned_16_109bytes):
771 movdqa %xmm0, -109(%edx)
772L(aligned_16_93bytes):
773 movdqa %xmm0, -93(%edx)
774L(aligned_16_77bytes):
775 movdqa %xmm0, -77(%edx)
776L(aligned_16_61bytes):
777 movdqa %xmm0, -61(%edx)
778L(aligned_16_45bytes):
779 movdqa %xmm0, -45(%edx)
780L(aligned_16_29bytes):
781 movdqa %xmm0, -29(%edx)
782L(aligned_16_13bytes):
783 movq %xmm0, -13(%edx)
784 movl %eax, -5(%edx)
785 movb %al, -1(%edx)
786 SETRTNVAL
787 RETURN
788
Elliott Hughese412f892016-08-01 14:00:45 -0700789 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800790L(aligned_16_126bytes):
791 movdqa %xmm0, -126(%edx)
792L(aligned_16_110bytes):
793 movdqa %xmm0, -110(%edx)
794L(aligned_16_94bytes):
795 movdqa %xmm0, -94(%edx)
796L(aligned_16_78bytes):
797 movdqa %xmm0, -78(%edx)
798L(aligned_16_62bytes):
799 movdqa %xmm0, -62(%edx)
800L(aligned_16_46bytes):
801 movdqa %xmm0, -46(%edx)
802L(aligned_16_30bytes):
803 movdqa %xmm0, -30(%edx)
804L(aligned_16_14bytes):
805 movq %xmm0, -14(%edx)
806 movl %eax, -6(%edx)
807 movw %ax, -2(%edx)
808 SETRTNVAL
809 RETURN
810
Elliott Hughese412f892016-08-01 14:00:45 -0700811 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800812L(aligned_16_127bytes):
813 movdqa %xmm0, -127(%edx)
814L(aligned_16_111bytes):
815 movdqa %xmm0, -111(%edx)
816L(aligned_16_95bytes):
817 movdqa %xmm0, -95(%edx)
818L(aligned_16_79bytes):
819 movdqa %xmm0, -79(%edx)
820L(aligned_16_63bytes):
821 movdqa %xmm0, -63(%edx)
822L(aligned_16_47bytes):
823 movdqa %xmm0, -47(%edx)
824L(aligned_16_31bytes):
825 movdqa %xmm0, -31(%edx)
826L(aligned_16_15bytes):
827 movq %xmm0, -15(%edx)
828 movl %eax, -7(%edx)
829 movw %ax, -3(%edx)
830 movb %al, -1(%edx)
831 SETRTNVAL
832 RETURN_END
833
Haibo Huangb9244ff2018-08-11 10:12:13 -0700834END(memset_atom)