blob: e4cd038356221be4e85f3a504a6858620128be14 [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Elliott Hughes81d6a182016-03-03 16:10:33 -080031#include <private/bionic_asm.h>
32
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040033#include "cache.h"
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040034
Bruce Beare8ff1a272010-03-04 11:03:37 -080035#ifndef L
36# define L(label) .L##label
37#endif
38
39#ifndef ALIGN
40# define ALIGN(n) .p2align n
41#endif
42
Bruce Beare8ff1a272010-03-04 11:03:37 -080043#define CFI_PUSH(REG) \
Elliott Hughese412f892016-08-01 14:00:45 -070044 .cfi_adjust_cfa_offset 4; \
45 .cfi_rel_offset REG, 0
Bruce Beare8ff1a272010-03-04 11:03:37 -080046
47#define CFI_POP(REG) \
Elliott Hughese412f892016-08-01 14:00:45 -070048 .cfi_adjust_cfa_offset -4; \
49 .cfi_restore REG
Bruce Beare8ff1a272010-03-04 11:03:37 -080050
Elliott Hughese412f892016-08-01 14:00:45 -070051#define PUSH(REG) pushl REG; CFI_PUSH(REG)
52#define POP(REG) popl REG; CFI_POP(REG)
Bruce Beare8ff1a272010-03-04 11:03:37 -080053
Elliott Hughese412f892016-08-01 14:00:45 -070054#define PARMS 8 /* Preserve EBX. */
55#define DST PARMS
56#define CHR (DST+4)
57#define LEN (CHR+4)
Elliott Hughes81d6a182016-03-03 16:10:33 -080058#define CHK_DST_LEN (LEN+4)
59#define SETRTNVAL movl DST(%esp), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -080060
Elliott Hughese412f892016-08-01 14:00:45 -070061#define ENTRANCE PUSH(%ebx);
62#define RETURN_END POP(%ebx); ret
63#define RETURN RETURN_END; CFI_PUSH(%ebx)
64#define JMPTBL(I, B) I - B
Bruce Beare8ff1a272010-03-04 11:03:37 -080065
66/* Load an entry in a jump table into EBX and branch to it. TABLE is a
67 jump table with relative offsets. */
68# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
69 /* We first load PC into EBX. */ \
Varvara Rainchik5a922842014-04-24 15:41:20 +040070 call __x86.get_pc_thunk.bx; \
Bruce Beare8ff1a272010-03-04 11:03:37 -080071 /* Get the address of the jump table. */ \
72 add $(TABLE - .), %ebx; \
73 /* Get the entry and convert the relative offset to the \
74 absolute address. */ \
75 add (%ebx,%ecx,4), %ebx; \
76 add %ecx, %edx; \
Elliott Hughese412f892016-08-01 14:00:45 -070077 /* We loaded the jump table and adjusted EDX. Go. */ \
Bruce Beare8ff1a272010-03-04 11:03:37 -080078 jmp *%ebx
79
Haibo Huangb9244ff2018-08-11 10:12:13 -070080ENTRY(__memset_chk_atom)
Elliott Hughes204990c2016-03-24 22:34:47 -070081 ENTRANCE
Elliott Hughes81d6a182016-03-03 16:10:33 -080082
Elliott Hughes204990c2016-03-24 22:34:47 -070083 movl LEN(%esp), %ecx
84 cmpl CHK_DST_LEN(%esp), %ecx
Lev Rumyantsev3a528f12016-07-29 17:55:42 -070085 jna L(memset_length_loaded)
86
Elliott Hughese412f892016-08-01 14:00:45 -070087 POP(%ebx) // Undo ENTRANCE without returning.
Lev Rumyantsev3a528f12016-07-29 17:55:42 -070088 jmp __memset_chk_fail
Haibo Huangb9244ff2018-08-11 10:12:13 -070089END(__memset_chk_atom)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040090
Bruce Beare8ff1a272010-03-04 11:03:37 -080091 .section .text.sse2,"ax",@progbits
Elliott Hughese412f892016-08-01 14:00:45 -070092 ALIGN(4)
Haibo Huangb9244ff2018-08-11 10:12:13 -070093ENTRY(memset_atom)
Bruce Beare8ff1a272010-03-04 11:03:37 -080094 ENTRANCE
95
96 movl LEN(%esp), %ecx
Elliott Hughes204990c2016-03-24 22:34:47 -070097L(memset_length_loaded):
Bruce Beare8ff1a272010-03-04 11:03:37 -080098 movzbl CHR(%esp), %eax
99 movb %al, %ah
100 /* Fill the whole EAX with pattern. */
101 movl %eax, %edx
102 shl $16, %eax
103 or %edx, %eax
Elliott Hughes81d6a182016-03-03 16:10:33 -0800104 movl DST(%esp), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800105 cmp $32, %ecx
106 jae L(32bytesormore)
107
108L(write_less32bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700109 BRANCH_TO_JMPTBL_ENTRY(L(table_less_32bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800110
111
112 .pushsection .rodata.sse2,"a",@progbits
Elliott Hughese412f892016-08-01 14:00:45 -0700113 ALIGN(2)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800114L(table_less_32bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700115 .int JMPTBL(L(write_0bytes), L(table_less_32bytes))
116 .int JMPTBL(L(write_1bytes), L(table_less_32bytes))
117 .int JMPTBL(L(write_2bytes), L(table_less_32bytes))
118 .int JMPTBL(L(write_3bytes), L(table_less_32bytes))
119 .int JMPTBL(L(write_4bytes), L(table_less_32bytes))
120 .int JMPTBL(L(write_5bytes), L(table_less_32bytes))
121 .int JMPTBL(L(write_6bytes), L(table_less_32bytes))
122 .int JMPTBL(L(write_7bytes), L(table_less_32bytes))
123 .int JMPTBL(L(write_8bytes), L(table_less_32bytes))
124 .int JMPTBL(L(write_9bytes), L(table_less_32bytes))
125 .int JMPTBL(L(write_10bytes), L(table_less_32bytes))
126 .int JMPTBL(L(write_11bytes), L(table_less_32bytes))
127 .int JMPTBL(L(write_12bytes), L(table_less_32bytes))
128 .int JMPTBL(L(write_13bytes), L(table_less_32bytes))
129 .int JMPTBL(L(write_14bytes), L(table_less_32bytes))
130 .int JMPTBL(L(write_15bytes), L(table_less_32bytes))
131 .int JMPTBL(L(write_16bytes), L(table_less_32bytes))
132 .int JMPTBL(L(write_17bytes), L(table_less_32bytes))
133 .int JMPTBL(L(write_18bytes), L(table_less_32bytes))
134 .int JMPTBL(L(write_19bytes), L(table_less_32bytes))
135 .int JMPTBL(L(write_20bytes), L(table_less_32bytes))
136 .int JMPTBL(L(write_21bytes), L(table_less_32bytes))
137 .int JMPTBL(L(write_22bytes), L(table_less_32bytes))
138 .int JMPTBL(L(write_23bytes), L(table_less_32bytes))
139 .int JMPTBL(L(write_24bytes), L(table_less_32bytes))
140 .int JMPTBL(L(write_25bytes), L(table_less_32bytes))
141 .int JMPTBL(L(write_26bytes), L(table_less_32bytes))
142 .int JMPTBL(L(write_27bytes), L(table_less_32bytes))
143 .int JMPTBL(L(write_28bytes), L(table_less_32bytes))
144 .int JMPTBL(L(write_29bytes), L(table_less_32bytes))
145 .int JMPTBL(L(write_30bytes), L(table_less_32bytes))
146 .int JMPTBL(L(write_31bytes), L(table_less_32bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800147 .popsection
148
Elliott Hughese412f892016-08-01 14:00:45 -0700149 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800150L(write_28bytes):
151 movl %eax, -28(%edx)
152L(write_24bytes):
153 movl %eax, -24(%edx)
154L(write_20bytes):
155 movl %eax, -20(%edx)
156L(write_16bytes):
157 movl %eax, -16(%edx)
158L(write_12bytes):
159 movl %eax, -12(%edx)
160L(write_8bytes):
161 movl %eax, -8(%edx)
162L(write_4bytes):
163 movl %eax, -4(%edx)
164L(write_0bytes):
165 SETRTNVAL
166 RETURN
167
Elliott Hughese412f892016-08-01 14:00:45 -0700168 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800169L(write_29bytes):
170 movl %eax, -29(%edx)
171L(write_25bytes):
172 movl %eax, -25(%edx)
173L(write_21bytes):
174 movl %eax, -21(%edx)
175L(write_17bytes):
176 movl %eax, -17(%edx)
177L(write_13bytes):
178 movl %eax, -13(%edx)
179L(write_9bytes):
180 movl %eax, -9(%edx)
181L(write_5bytes):
182 movl %eax, -5(%edx)
183L(write_1bytes):
184 movb %al, -1(%edx)
185 SETRTNVAL
186 RETURN
187
Elliott Hughese412f892016-08-01 14:00:45 -0700188 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800189L(write_30bytes):
190 movl %eax, -30(%edx)
191L(write_26bytes):
192 movl %eax, -26(%edx)
193L(write_22bytes):
194 movl %eax, -22(%edx)
195L(write_18bytes):
196 movl %eax, -18(%edx)
197L(write_14bytes):
198 movl %eax, -14(%edx)
199L(write_10bytes):
200 movl %eax, -10(%edx)
201L(write_6bytes):
202 movl %eax, -6(%edx)
203L(write_2bytes):
204 movw %ax, -2(%edx)
205 SETRTNVAL
206 RETURN
207
Elliott Hughese412f892016-08-01 14:00:45 -0700208 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800209L(write_31bytes):
210 movl %eax, -31(%edx)
211L(write_27bytes):
212 movl %eax, -27(%edx)
213L(write_23bytes):
214 movl %eax, -23(%edx)
215L(write_19bytes):
216 movl %eax, -19(%edx)
217L(write_15bytes):
218 movl %eax, -15(%edx)
219L(write_11bytes):
220 movl %eax, -11(%edx)
221L(write_7bytes):
222 movl %eax, -7(%edx)
223L(write_3bytes):
224 movw %ax, -3(%edx)
225 movb %al, -1(%edx)
226 SETRTNVAL
227 RETURN
228
Elliott Hughese412f892016-08-01 14:00:45 -0700229 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800230/* ECX > 32 and EDX is 4 byte aligned. */
231L(32bytesormore):
232 /* Fill xmm0 with the pattern. */
Bruce Beare8ff1a272010-03-04 11:03:37 -0800233 movd %eax, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800234 pshufd $0, %xmm0, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800235 testl $0xf, %edx
236 jz L(aligned_16)
237/* ECX > 32 and EDX is not 16 byte aligned. */
238L(not_aligned_16):
239 movdqu %xmm0, (%edx)
240 movl %edx, %eax
241 and $-16, %edx
242 add $16, %edx
243 sub %edx, %eax
244 add %eax, %ecx
245 movd %xmm0, %eax
246
Elliott Hughese412f892016-08-01 14:00:45 -0700247 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800248L(aligned_16):
249 cmp $128, %ecx
250 jae L(128bytesormore)
251
252L(aligned_16_less128bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700253 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800254
Elliott Hughese412f892016-08-01 14:00:45 -0700255 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800256L(128bytesormore):
Elliott Hughese412f892016-08-01 14:00:45 -0700257 PUSH(%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800258 mov $SHARED_CACHE_SIZE, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800259 cmp %ebx, %ecx
260 jae L(128bytesormore_nt_start)
261
262
Elliott Hughese412f892016-08-01 14:00:45 -0700263 POP(%ebx)
264# define RESTORE_EBX_STATE CFI_PUSH(%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800265 cmp $DATA_CACHE_SIZE, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800266
267 jae L(128bytes_L2_normal)
268 subl $128, %ecx
269L(128bytesormore_normal):
270 sub $128, %ecx
271 movdqa %xmm0, (%edx)
272 movdqa %xmm0, 0x10(%edx)
273 movdqa %xmm0, 0x20(%edx)
274 movdqa %xmm0, 0x30(%edx)
275 movdqa %xmm0, 0x40(%edx)
276 movdqa %xmm0, 0x50(%edx)
277 movdqa %xmm0, 0x60(%edx)
278 movdqa %xmm0, 0x70(%edx)
279 lea 128(%edx), %edx
280 jb L(128bytesless_normal)
281
282
283 sub $128, %ecx
284 movdqa %xmm0, (%edx)
285 movdqa %xmm0, 0x10(%edx)
286 movdqa %xmm0, 0x20(%edx)
287 movdqa %xmm0, 0x30(%edx)
288 movdqa %xmm0, 0x40(%edx)
289 movdqa %xmm0, 0x50(%edx)
290 movdqa %xmm0, 0x60(%edx)
291 movdqa %xmm0, 0x70(%edx)
292 lea 128(%edx), %edx
293 jae L(128bytesormore_normal)
294
295L(128bytesless_normal):
Bruce Beare124a5422010-10-11 12:24:41 -0700296 add $128, %ecx
Elliott Hughese412f892016-08-01 14:00:45 -0700297 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800298
Elliott Hughese412f892016-08-01 14:00:45 -0700299 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800300L(128bytes_L2_normal):
301 prefetcht0 0x380(%edx)
302 prefetcht0 0x3c0(%edx)
303 sub $128, %ecx
304 movdqa %xmm0, (%edx)
305 movaps %xmm0, 0x10(%edx)
306 movaps %xmm0, 0x20(%edx)
307 movaps %xmm0, 0x30(%edx)
308 movaps %xmm0, 0x40(%edx)
309 movaps %xmm0, 0x50(%edx)
310 movaps %xmm0, 0x60(%edx)
311 movaps %xmm0, 0x70(%edx)
312 add $128, %edx
313 cmp $128, %ecx
314 jae L(128bytes_L2_normal)
315
316L(128bytesless_L2_normal):
Elliott Hughese412f892016-08-01 14:00:45 -0700317 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800318
Bruce Beare124a5422010-10-11 12:24:41 -0700319 RESTORE_EBX_STATE
Bruce Beare8ff1a272010-03-04 11:03:37 -0800320L(128bytesormore_nt_start):
321 sub %ebx, %ecx
Bruce Beare124a5422010-10-11 12:24:41 -0700322 mov %ebx, %eax
323 and $0x7f, %eax
324 add %eax, %ecx
325 movd %xmm0, %eax
Elliott Hughese412f892016-08-01 14:00:45 -0700326 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800327L(128bytesormore_shared_cache_loop):
328 prefetcht0 0x3c0(%edx)
329 prefetcht0 0x380(%edx)
330 sub $0x80, %ebx
331 movdqa %xmm0, (%edx)
332 movdqa %xmm0, 0x10(%edx)
333 movdqa %xmm0, 0x20(%edx)
334 movdqa %xmm0, 0x30(%edx)
335 movdqa %xmm0, 0x40(%edx)
336 movdqa %xmm0, 0x50(%edx)
337 movdqa %xmm0, 0x60(%edx)
338 movdqa %xmm0, 0x70(%edx)
339 add $0x80, %edx
340 cmp $0x80, %ebx
341 jae L(128bytesormore_shared_cache_loop)
342 cmp $0x80, %ecx
343 jb L(shared_cache_loop_end)
Elliott Hughese412f892016-08-01 14:00:45 -0700344 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800345L(128bytesormore_nt):
346 sub $0x80, %ecx
347 movntdq %xmm0, (%edx)
348 movntdq %xmm0, 0x10(%edx)
349 movntdq %xmm0, 0x20(%edx)
350 movntdq %xmm0, 0x30(%edx)
351 movntdq %xmm0, 0x40(%edx)
352 movntdq %xmm0, 0x50(%edx)
353 movntdq %xmm0, 0x60(%edx)
354 movntdq %xmm0, 0x70(%edx)
355 add $0x80, %edx
356 cmp $0x80, %ecx
357 jae L(128bytesormore_nt)
358 sfence
359L(shared_cache_loop_end):
Elliott Hughese412f892016-08-01 14:00:45 -0700360 POP(%ebx)
361 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800362
363
364 .pushsection .rodata.sse2,"a",@progbits
Elliott Hughese412f892016-08-01 14:00:45 -0700365 ALIGN(2)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800366L(table_16_128bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700367 .int JMPTBL(L(aligned_16_0bytes), L(table_16_128bytes))
368 .int JMPTBL(L(aligned_16_1bytes), L(table_16_128bytes))
369 .int JMPTBL(L(aligned_16_2bytes), L(table_16_128bytes))
370 .int JMPTBL(L(aligned_16_3bytes), L(table_16_128bytes))
371 .int JMPTBL(L(aligned_16_4bytes), L(table_16_128bytes))
372 .int JMPTBL(L(aligned_16_5bytes), L(table_16_128bytes))
373 .int JMPTBL(L(aligned_16_6bytes), L(table_16_128bytes))
374 .int JMPTBL(L(aligned_16_7bytes), L(table_16_128bytes))
375 .int JMPTBL(L(aligned_16_8bytes), L(table_16_128bytes))
376 .int JMPTBL(L(aligned_16_9bytes), L(table_16_128bytes))
377 .int JMPTBL(L(aligned_16_10bytes), L(table_16_128bytes))
378 .int JMPTBL(L(aligned_16_11bytes), L(table_16_128bytes))
379 .int JMPTBL(L(aligned_16_12bytes), L(table_16_128bytes))
380 .int JMPTBL(L(aligned_16_13bytes), L(table_16_128bytes))
381 .int JMPTBL(L(aligned_16_14bytes), L(table_16_128bytes))
382 .int JMPTBL(L(aligned_16_15bytes), L(table_16_128bytes))
383 .int JMPTBL(L(aligned_16_16bytes), L(table_16_128bytes))
384 .int JMPTBL(L(aligned_16_17bytes), L(table_16_128bytes))
385 .int JMPTBL(L(aligned_16_18bytes), L(table_16_128bytes))
386 .int JMPTBL(L(aligned_16_19bytes), L(table_16_128bytes))
387 .int JMPTBL(L(aligned_16_20bytes), L(table_16_128bytes))
388 .int JMPTBL(L(aligned_16_21bytes), L(table_16_128bytes))
389 .int JMPTBL(L(aligned_16_22bytes), L(table_16_128bytes))
390 .int JMPTBL(L(aligned_16_23bytes), L(table_16_128bytes))
391 .int JMPTBL(L(aligned_16_24bytes), L(table_16_128bytes))
392 .int JMPTBL(L(aligned_16_25bytes), L(table_16_128bytes))
393 .int JMPTBL(L(aligned_16_26bytes), L(table_16_128bytes))
394 .int JMPTBL(L(aligned_16_27bytes), L(table_16_128bytes))
395 .int JMPTBL(L(aligned_16_28bytes), L(table_16_128bytes))
396 .int JMPTBL(L(aligned_16_29bytes), L(table_16_128bytes))
397 .int JMPTBL(L(aligned_16_30bytes), L(table_16_128bytes))
398 .int JMPTBL(L(aligned_16_31bytes), L(table_16_128bytes))
399 .int JMPTBL(L(aligned_16_32bytes), L(table_16_128bytes))
400 .int JMPTBL(L(aligned_16_33bytes), L(table_16_128bytes))
401 .int JMPTBL(L(aligned_16_34bytes), L(table_16_128bytes))
402 .int JMPTBL(L(aligned_16_35bytes), L(table_16_128bytes))
403 .int JMPTBL(L(aligned_16_36bytes), L(table_16_128bytes))
404 .int JMPTBL(L(aligned_16_37bytes), L(table_16_128bytes))
405 .int JMPTBL(L(aligned_16_38bytes), L(table_16_128bytes))
406 .int JMPTBL(L(aligned_16_39bytes), L(table_16_128bytes))
407 .int JMPTBL(L(aligned_16_40bytes), L(table_16_128bytes))
408 .int JMPTBL(L(aligned_16_41bytes), L(table_16_128bytes))
409 .int JMPTBL(L(aligned_16_42bytes), L(table_16_128bytes))
410 .int JMPTBL(L(aligned_16_43bytes), L(table_16_128bytes))
411 .int JMPTBL(L(aligned_16_44bytes), L(table_16_128bytes))
412 .int JMPTBL(L(aligned_16_45bytes), L(table_16_128bytes))
413 .int JMPTBL(L(aligned_16_46bytes), L(table_16_128bytes))
414 .int JMPTBL(L(aligned_16_47bytes), L(table_16_128bytes))
415 .int JMPTBL(L(aligned_16_48bytes), L(table_16_128bytes))
416 .int JMPTBL(L(aligned_16_49bytes), L(table_16_128bytes))
417 .int JMPTBL(L(aligned_16_50bytes), L(table_16_128bytes))
418 .int JMPTBL(L(aligned_16_51bytes), L(table_16_128bytes))
419 .int JMPTBL(L(aligned_16_52bytes), L(table_16_128bytes))
420 .int JMPTBL(L(aligned_16_53bytes), L(table_16_128bytes))
421 .int JMPTBL(L(aligned_16_54bytes), L(table_16_128bytes))
422 .int JMPTBL(L(aligned_16_55bytes), L(table_16_128bytes))
423 .int JMPTBL(L(aligned_16_56bytes), L(table_16_128bytes))
424 .int JMPTBL(L(aligned_16_57bytes), L(table_16_128bytes))
425 .int JMPTBL(L(aligned_16_58bytes), L(table_16_128bytes))
426 .int JMPTBL(L(aligned_16_59bytes), L(table_16_128bytes))
427 .int JMPTBL(L(aligned_16_60bytes), L(table_16_128bytes))
428 .int JMPTBL(L(aligned_16_61bytes), L(table_16_128bytes))
429 .int JMPTBL(L(aligned_16_62bytes), L(table_16_128bytes))
430 .int JMPTBL(L(aligned_16_63bytes), L(table_16_128bytes))
431 .int JMPTBL(L(aligned_16_64bytes), L(table_16_128bytes))
432 .int JMPTBL(L(aligned_16_65bytes), L(table_16_128bytes))
433 .int JMPTBL(L(aligned_16_66bytes), L(table_16_128bytes))
434 .int JMPTBL(L(aligned_16_67bytes), L(table_16_128bytes))
435 .int JMPTBL(L(aligned_16_68bytes), L(table_16_128bytes))
436 .int JMPTBL(L(aligned_16_69bytes), L(table_16_128bytes))
437 .int JMPTBL(L(aligned_16_70bytes), L(table_16_128bytes))
438 .int JMPTBL(L(aligned_16_71bytes), L(table_16_128bytes))
439 .int JMPTBL(L(aligned_16_72bytes), L(table_16_128bytes))
440 .int JMPTBL(L(aligned_16_73bytes), L(table_16_128bytes))
441 .int JMPTBL(L(aligned_16_74bytes), L(table_16_128bytes))
442 .int JMPTBL(L(aligned_16_75bytes), L(table_16_128bytes))
443 .int JMPTBL(L(aligned_16_76bytes), L(table_16_128bytes))
444 .int JMPTBL(L(aligned_16_77bytes), L(table_16_128bytes))
445 .int JMPTBL(L(aligned_16_78bytes), L(table_16_128bytes))
446 .int JMPTBL(L(aligned_16_79bytes), L(table_16_128bytes))
447 .int JMPTBL(L(aligned_16_80bytes), L(table_16_128bytes))
448 .int JMPTBL(L(aligned_16_81bytes), L(table_16_128bytes))
449 .int JMPTBL(L(aligned_16_82bytes), L(table_16_128bytes))
450 .int JMPTBL(L(aligned_16_83bytes), L(table_16_128bytes))
451 .int JMPTBL(L(aligned_16_84bytes), L(table_16_128bytes))
452 .int JMPTBL(L(aligned_16_85bytes), L(table_16_128bytes))
453 .int JMPTBL(L(aligned_16_86bytes), L(table_16_128bytes))
454 .int JMPTBL(L(aligned_16_87bytes), L(table_16_128bytes))
455 .int JMPTBL(L(aligned_16_88bytes), L(table_16_128bytes))
456 .int JMPTBL(L(aligned_16_89bytes), L(table_16_128bytes))
457 .int JMPTBL(L(aligned_16_90bytes), L(table_16_128bytes))
458 .int JMPTBL(L(aligned_16_91bytes), L(table_16_128bytes))
459 .int JMPTBL(L(aligned_16_92bytes), L(table_16_128bytes))
460 .int JMPTBL(L(aligned_16_93bytes), L(table_16_128bytes))
461 .int JMPTBL(L(aligned_16_94bytes), L(table_16_128bytes))
462 .int JMPTBL(L(aligned_16_95bytes), L(table_16_128bytes))
463 .int JMPTBL(L(aligned_16_96bytes), L(table_16_128bytes))
464 .int JMPTBL(L(aligned_16_97bytes), L(table_16_128bytes))
465 .int JMPTBL(L(aligned_16_98bytes), L(table_16_128bytes))
466 .int JMPTBL(L(aligned_16_99bytes), L(table_16_128bytes))
467 .int JMPTBL(L(aligned_16_100bytes), L(table_16_128bytes))
468 .int JMPTBL(L(aligned_16_101bytes), L(table_16_128bytes))
469 .int JMPTBL(L(aligned_16_102bytes), L(table_16_128bytes))
470 .int JMPTBL(L(aligned_16_103bytes), L(table_16_128bytes))
471 .int JMPTBL(L(aligned_16_104bytes), L(table_16_128bytes))
472 .int JMPTBL(L(aligned_16_105bytes), L(table_16_128bytes))
473 .int JMPTBL(L(aligned_16_106bytes), L(table_16_128bytes))
474 .int JMPTBL(L(aligned_16_107bytes), L(table_16_128bytes))
475 .int JMPTBL(L(aligned_16_108bytes), L(table_16_128bytes))
476 .int JMPTBL(L(aligned_16_109bytes), L(table_16_128bytes))
477 .int JMPTBL(L(aligned_16_110bytes), L(table_16_128bytes))
478 .int JMPTBL(L(aligned_16_111bytes), L(table_16_128bytes))
479 .int JMPTBL(L(aligned_16_112bytes), L(table_16_128bytes))
480 .int JMPTBL(L(aligned_16_113bytes), L(table_16_128bytes))
481 .int JMPTBL(L(aligned_16_114bytes), L(table_16_128bytes))
482 .int JMPTBL(L(aligned_16_115bytes), L(table_16_128bytes))
483 .int JMPTBL(L(aligned_16_116bytes), L(table_16_128bytes))
484 .int JMPTBL(L(aligned_16_117bytes), L(table_16_128bytes))
485 .int JMPTBL(L(aligned_16_118bytes), L(table_16_128bytes))
486 .int JMPTBL(L(aligned_16_119bytes), L(table_16_128bytes))
487 .int JMPTBL(L(aligned_16_120bytes), L(table_16_128bytes))
488 .int JMPTBL(L(aligned_16_121bytes), L(table_16_128bytes))
489 .int JMPTBL(L(aligned_16_122bytes), L(table_16_128bytes))
490 .int JMPTBL(L(aligned_16_123bytes), L(table_16_128bytes))
491 .int JMPTBL(L(aligned_16_124bytes), L(table_16_128bytes))
492 .int JMPTBL(L(aligned_16_125bytes), L(table_16_128bytes))
493 .int JMPTBL(L(aligned_16_126bytes), L(table_16_128bytes))
494 .int JMPTBL(L(aligned_16_127bytes), L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800495 .popsection
496
Elliott Hughese412f892016-08-01 14:00:45 -0700497 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800498L(aligned_16_112bytes):
499 movdqa %xmm0, -112(%edx)
500L(aligned_16_96bytes):
501 movdqa %xmm0, -96(%edx)
502L(aligned_16_80bytes):
503 movdqa %xmm0, -80(%edx)
504L(aligned_16_64bytes):
505 movdqa %xmm0, -64(%edx)
506L(aligned_16_48bytes):
507 movdqa %xmm0, -48(%edx)
508L(aligned_16_32bytes):
509 movdqa %xmm0, -32(%edx)
510L(aligned_16_16bytes):
511 movdqa %xmm0, -16(%edx)
512L(aligned_16_0bytes):
513 SETRTNVAL
514 RETURN
515
Elliott Hughese412f892016-08-01 14:00:45 -0700516 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800517L(aligned_16_113bytes):
518 movdqa %xmm0, -113(%edx)
519L(aligned_16_97bytes):
520 movdqa %xmm0, -97(%edx)
521L(aligned_16_81bytes):
522 movdqa %xmm0, -81(%edx)
523L(aligned_16_65bytes):
524 movdqa %xmm0, -65(%edx)
525L(aligned_16_49bytes):
526 movdqa %xmm0, -49(%edx)
527L(aligned_16_33bytes):
528 movdqa %xmm0, -33(%edx)
529L(aligned_16_17bytes):
530 movdqa %xmm0, -17(%edx)
531L(aligned_16_1bytes):
532 movb %al, -1(%edx)
533 SETRTNVAL
534 RETURN
535
Elliott Hughese412f892016-08-01 14:00:45 -0700536 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800537L(aligned_16_114bytes):
538 movdqa %xmm0, -114(%edx)
539L(aligned_16_98bytes):
540 movdqa %xmm0, -98(%edx)
541L(aligned_16_82bytes):
542 movdqa %xmm0, -82(%edx)
543L(aligned_16_66bytes):
544 movdqa %xmm0, -66(%edx)
545L(aligned_16_50bytes):
546 movdqa %xmm0, -50(%edx)
547L(aligned_16_34bytes):
548 movdqa %xmm0, -34(%edx)
549L(aligned_16_18bytes):
550 movdqa %xmm0, -18(%edx)
551L(aligned_16_2bytes):
552 movw %ax, -2(%edx)
553 SETRTNVAL
554 RETURN
555
Elliott Hughese412f892016-08-01 14:00:45 -0700556 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800557L(aligned_16_115bytes):
558 movdqa %xmm0, -115(%edx)
559L(aligned_16_99bytes):
560 movdqa %xmm0, -99(%edx)
561L(aligned_16_83bytes):
562 movdqa %xmm0, -83(%edx)
563L(aligned_16_67bytes):
564 movdqa %xmm0, -67(%edx)
565L(aligned_16_51bytes):
566 movdqa %xmm0, -51(%edx)
567L(aligned_16_35bytes):
568 movdqa %xmm0, -35(%edx)
569L(aligned_16_19bytes):
570 movdqa %xmm0, -19(%edx)
571L(aligned_16_3bytes):
572 movw %ax, -3(%edx)
573 movb %al, -1(%edx)
574 SETRTNVAL
575 RETURN
576
Elliott Hughese412f892016-08-01 14:00:45 -0700577 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800578L(aligned_16_116bytes):
579 movdqa %xmm0, -116(%edx)
580L(aligned_16_100bytes):
581 movdqa %xmm0, -100(%edx)
582L(aligned_16_84bytes):
583 movdqa %xmm0, -84(%edx)
584L(aligned_16_68bytes):
585 movdqa %xmm0, -68(%edx)
586L(aligned_16_52bytes):
587 movdqa %xmm0, -52(%edx)
588L(aligned_16_36bytes):
589 movdqa %xmm0, -36(%edx)
590L(aligned_16_20bytes):
591 movdqa %xmm0, -20(%edx)
592L(aligned_16_4bytes):
593 movl %eax, -4(%edx)
594 SETRTNVAL
595 RETURN
596
Elliott Hughese412f892016-08-01 14:00:45 -0700597 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800598L(aligned_16_117bytes):
599 movdqa %xmm0, -117(%edx)
600L(aligned_16_101bytes):
601 movdqa %xmm0, -101(%edx)
602L(aligned_16_85bytes):
603 movdqa %xmm0, -85(%edx)
604L(aligned_16_69bytes):
605 movdqa %xmm0, -69(%edx)
606L(aligned_16_53bytes):
607 movdqa %xmm0, -53(%edx)
608L(aligned_16_37bytes):
609 movdqa %xmm0, -37(%edx)
610L(aligned_16_21bytes):
611 movdqa %xmm0, -21(%edx)
612L(aligned_16_5bytes):
613 movl %eax, -5(%edx)
614 movb %al, -1(%edx)
615 SETRTNVAL
616 RETURN
617
Elliott Hughese412f892016-08-01 14:00:45 -0700618 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800619L(aligned_16_118bytes):
620 movdqa %xmm0, -118(%edx)
621L(aligned_16_102bytes):
622 movdqa %xmm0, -102(%edx)
623L(aligned_16_86bytes):
624 movdqa %xmm0, -86(%edx)
625L(aligned_16_70bytes):
626 movdqa %xmm0, -70(%edx)
627L(aligned_16_54bytes):
628 movdqa %xmm0, -54(%edx)
629L(aligned_16_38bytes):
630 movdqa %xmm0, -38(%edx)
631L(aligned_16_22bytes):
632 movdqa %xmm0, -22(%edx)
633L(aligned_16_6bytes):
634 movl %eax, -6(%edx)
635 movw %ax, -2(%edx)
636 SETRTNVAL
637 RETURN
638
Elliott Hughese412f892016-08-01 14:00:45 -0700639 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800640L(aligned_16_119bytes):
641 movdqa %xmm0, -119(%edx)
642L(aligned_16_103bytes):
643 movdqa %xmm0, -103(%edx)
644L(aligned_16_87bytes):
645 movdqa %xmm0, -87(%edx)
646L(aligned_16_71bytes):
647 movdqa %xmm0, -71(%edx)
648L(aligned_16_55bytes):
649 movdqa %xmm0, -55(%edx)
650L(aligned_16_39bytes):
651 movdqa %xmm0, -39(%edx)
652L(aligned_16_23bytes):
653 movdqa %xmm0, -23(%edx)
654L(aligned_16_7bytes):
655 movl %eax, -7(%edx)
656 movw %ax, -3(%edx)
657 movb %al, -1(%edx)
658 SETRTNVAL
659 RETURN
660
Elliott Hughese412f892016-08-01 14:00:45 -0700661 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800662L(aligned_16_120bytes):
663 movdqa %xmm0, -120(%edx)
664L(aligned_16_104bytes):
665 movdqa %xmm0, -104(%edx)
666L(aligned_16_88bytes):
667 movdqa %xmm0, -88(%edx)
668L(aligned_16_72bytes):
669 movdqa %xmm0, -72(%edx)
670L(aligned_16_56bytes):
671 movdqa %xmm0, -56(%edx)
672L(aligned_16_40bytes):
673 movdqa %xmm0, -40(%edx)
674L(aligned_16_24bytes):
675 movdqa %xmm0, -24(%edx)
676L(aligned_16_8bytes):
677 movq %xmm0, -8(%edx)
678 SETRTNVAL
679 RETURN
680
Elliott Hughese412f892016-08-01 14:00:45 -0700681 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800682L(aligned_16_121bytes):
683 movdqa %xmm0, -121(%edx)
684L(aligned_16_105bytes):
685 movdqa %xmm0, -105(%edx)
686L(aligned_16_89bytes):
687 movdqa %xmm0, -89(%edx)
688L(aligned_16_73bytes):
689 movdqa %xmm0, -73(%edx)
690L(aligned_16_57bytes):
691 movdqa %xmm0, -57(%edx)
692L(aligned_16_41bytes):
693 movdqa %xmm0, -41(%edx)
694L(aligned_16_25bytes):
695 movdqa %xmm0, -25(%edx)
696L(aligned_16_9bytes):
697 movq %xmm0, -9(%edx)
698 movb %al, -1(%edx)
699 SETRTNVAL
700 RETURN
701
Elliott Hughese412f892016-08-01 14:00:45 -0700702 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800703L(aligned_16_122bytes):
704 movdqa %xmm0, -122(%edx)
705L(aligned_16_106bytes):
706 movdqa %xmm0, -106(%edx)
707L(aligned_16_90bytes):
708 movdqa %xmm0, -90(%edx)
709L(aligned_16_74bytes):
710 movdqa %xmm0, -74(%edx)
711L(aligned_16_58bytes):
712 movdqa %xmm0, -58(%edx)
713L(aligned_16_42bytes):
714 movdqa %xmm0, -42(%edx)
715L(aligned_16_26bytes):
716 movdqa %xmm0, -26(%edx)
717L(aligned_16_10bytes):
718 movq %xmm0, -10(%edx)
719 movw %ax, -2(%edx)
720 SETRTNVAL
721 RETURN
722
Elliott Hughese412f892016-08-01 14:00:45 -0700723 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800724L(aligned_16_123bytes):
725 movdqa %xmm0, -123(%edx)
726L(aligned_16_107bytes):
727 movdqa %xmm0, -107(%edx)
728L(aligned_16_91bytes):
729 movdqa %xmm0, -91(%edx)
730L(aligned_16_75bytes):
731 movdqa %xmm0, -75(%edx)
732L(aligned_16_59bytes):
733 movdqa %xmm0, -59(%edx)
734L(aligned_16_43bytes):
735 movdqa %xmm0, -43(%edx)
736L(aligned_16_27bytes):
737 movdqa %xmm0, -27(%edx)
738L(aligned_16_11bytes):
739 movq %xmm0, -11(%edx)
740 movw %ax, -3(%edx)
741 movb %al, -1(%edx)
742 SETRTNVAL
743 RETURN
744
Elliott Hughese412f892016-08-01 14:00:45 -0700745 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800746L(aligned_16_124bytes):
747 movdqa %xmm0, -124(%edx)
748L(aligned_16_108bytes):
749 movdqa %xmm0, -108(%edx)
750L(aligned_16_92bytes):
751 movdqa %xmm0, -92(%edx)
752L(aligned_16_76bytes):
753 movdqa %xmm0, -76(%edx)
754L(aligned_16_60bytes):
755 movdqa %xmm0, -60(%edx)
756L(aligned_16_44bytes):
757 movdqa %xmm0, -44(%edx)
758L(aligned_16_28bytes):
759 movdqa %xmm0, -28(%edx)
760L(aligned_16_12bytes):
761 movq %xmm0, -12(%edx)
762 movl %eax, -4(%edx)
763 SETRTNVAL
764 RETURN
765
Elliott Hughese412f892016-08-01 14:00:45 -0700766 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800767L(aligned_16_125bytes):
768 movdqa %xmm0, -125(%edx)
769L(aligned_16_109bytes):
770 movdqa %xmm0, -109(%edx)
771L(aligned_16_93bytes):
772 movdqa %xmm0, -93(%edx)
773L(aligned_16_77bytes):
774 movdqa %xmm0, -77(%edx)
775L(aligned_16_61bytes):
776 movdqa %xmm0, -61(%edx)
777L(aligned_16_45bytes):
778 movdqa %xmm0, -45(%edx)
779L(aligned_16_29bytes):
780 movdqa %xmm0, -29(%edx)
781L(aligned_16_13bytes):
782 movq %xmm0, -13(%edx)
783 movl %eax, -5(%edx)
784 movb %al, -1(%edx)
785 SETRTNVAL
786 RETURN
787
Elliott Hughese412f892016-08-01 14:00:45 -0700788 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800789L(aligned_16_126bytes):
790 movdqa %xmm0, -126(%edx)
791L(aligned_16_110bytes):
792 movdqa %xmm0, -110(%edx)
793L(aligned_16_94bytes):
794 movdqa %xmm0, -94(%edx)
795L(aligned_16_78bytes):
796 movdqa %xmm0, -78(%edx)
797L(aligned_16_62bytes):
798 movdqa %xmm0, -62(%edx)
799L(aligned_16_46bytes):
800 movdqa %xmm0, -46(%edx)
801L(aligned_16_30bytes):
802 movdqa %xmm0, -30(%edx)
803L(aligned_16_14bytes):
804 movq %xmm0, -14(%edx)
805 movl %eax, -6(%edx)
806 movw %ax, -2(%edx)
807 SETRTNVAL
808 RETURN
809
Elliott Hughese412f892016-08-01 14:00:45 -0700810 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800811L(aligned_16_127bytes):
812 movdqa %xmm0, -127(%edx)
813L(aligned_16_111bytes):
814 movdqa %xmm0, -111(%edx)
815L(aligned_16_95bytes):
816 movdqa %xmm0, -95(%edx)
817L(aligned_16_79bytes):
818 movdqa %xmm0, -79(%edx)
819L(aligned_16_63bytes):
820 movdqa %xmm0, -63(%edx)
821L(aligned_16_47bytes):
822 movdqa %xmm0, -47(%edx)
823L(aligned_16_31bytes):
824 movdqa %xmm0, -31(%edx)
825L(aligned_16_15bytes):
826 movq %xmm0, -15(%edx)
827 movl %eax, -7(%edx)
828 movw %ax, -3(%edx)
829 movb %al, -1(%edx)
830 SETRTNVAL
831 RETURN_END
832
Haibo Huangb9244ff2018-08-11 10:12:13 -0700833END(memset_atom)