blob: e43ead0d1be6fa5740eb0b47498522e210fb06c9 [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Elliott Hughes81d6a182016-03-03 16:10:33 -080031#include <private/bionic_asm.h>
32
Elliott Hughesed777142022-07-25 16:25:11 +000033#define FOR_ATOM
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040034
Bruce Beare8ff1a272010-03-04 11:03:37 -080035#ifndef L
36# define L(label) .L##label
37#endif
38
39#ifndef ALIGN
40# define ALIGN(n) .p2align n
41#endif
42
Bruce Beare8ff1a272010-03-04 11:03:37 -080043#define CFI_PUSH(REG) \
Elliott Hughese412f892016-08-01 14:00:45 -070044 .cfi_adjust_cfa_offset 4; \
45 .cfi_rel_offset REG, 0
Bruce Beare8ff1a272010-03-04 11:03:37 -080046
47#define CFI_POP(REG) \
Elliott Hughese412f892016-08-01 14:00:45 -070048 .cfi_adjust_cfa_offset -4; \
49 .cfi_restore REG
Bruce Beare8ff1a272010-03-04 11:03:37 -080050
Elliott Hughese412f892016-08-01 14:00:45 -070051#define PUSH(REG) pushl REG; CFI_PUSH(REG)
52#define POP(REG) popl REG; CFI_POP(REG)
Bruce Beare8ff1a272010-03-04 11:03:37 -080053
Elliott Hughese412f892016-08-01 14:00:45 -070054#define PARMS 8 /* Preserve EBX. */
55#define DST PARMS
56#define CHR (DST+4)
57#define LEN (CHR+4)
Elliott Hughes81d6a182016-03-03 16:10:33 -080058#define CHK_DST_LEN (LEN+4)
59#define SETRTNVAL movl DST(%esp), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -080060
Elliott Hughese412f892016-08-01 14:00:45 -070061#define ENTRANCE PUSH(%ebx);
62#define RETURN_END POP(%ebx); ret
63#define RETURN RETURN_END; CFI_PUSH(%ebx)
64#define JMPTBL(I, B) I - B
Bruce Beare8ff1a272010-03-04 11:03:37 -080065
Elliott Hughes4f3b7e12024-07-19 12:00:17 +000066#define SETUP_PIC_REG(x) call __x86.get_pc_thunk.x
67
Bruce Beare8ff1a272010-03-04 11:03:37 -080068/* Load an entry in a jump table into EBX and branch to it. TABLE is a
69 jump table with relative offsets. */
70# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
71 /* We first load PC into EBX. */ \
Varvara Rainchik5a922842014-04-24 15:41:20 +040072 call __x86.get_pc_thunk.bx; \
Bruce Beare8ff1a272010-03-04 11:03:37 -080073 /* Get the address of the jump table. */ \
74 add $(TABLE - .), %ebx; \
75 /* Get the entry and convert the relative offset to the \
76 absolute address. */ \
77 add (%ebx,%ecx,4), %ebx; \
78 add %ecx, %edx; \
Elliott Hughese412f892016-08-01 14:00:45 -070079 /* We loaded the jump table and adjusted EDX. Go. */ \
Bruce Beare8ff1a272010-03-04 11:03:37 -080080 jmp *%ebx
81
Haibo Huangb9244ff2018-08-11 10:12:13 -070082ENTRY(__memset_chk_atom)
Elliott Hughes204990c2016-03-24 22:34:47 -070083 ENTRANCE
Elliott Hughes81d6a182016-03-03 16:10:33 -080084
Elliott Hughes204990c2016-03-24 22:34:47 -070085 movl LEN(%esp), %ecx
86 cmpl CHK_DST_LEN(%esp), %ecx
Lev Rumyantsev3a528f12016-07-29 17:55:42 -070087 jna L(memset_length_loaded)
88
Elliott Hughese412f892016-08-01 14:00:45 -070089 POP(%ebx) // Undo ENTRANCE without returning.
Lev Rumyantsev3a528f12016-07-29 17:55:42 -070090 jmp __memset_chk_fail
Haibo Huangb9244ff2018-08-11 10:12:13 -070091END(__memset_chk_atom)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040092
Bruce Beare8ff1a272010-03-04 11:03:37 -080093 .section .text.sse2,"ax",@progbits
Elliott Hughese412f892016-08-01 14:00:45 -070094 ALIGN(4)
Haibo Huangb9244ff2018-08-11 10:12:13 -070095ENTRY(memset_atom)
Bruce Beare8ff1a272010-03-04 11:03:37 -080096 ENTRANCE
97
98 movl LEN(%esp), %ecx
Elliott Hughes204990c2016-03-24 22:34:47 -070099L(memset_length_loaded):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800100 movzbl CHR(%esp), %eax
101 movb %al, %ah
102 /* Fill the whole EAX with pattern. */
103 movl %eax, %edx
104 shl $16, %eax
105 or %edx, %eax
Elliott Hughes81d6a182016-03-03 16:10:33 -0800106 movl DST(%esp), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800107 cmp $32, %ecx
108 jae L(32bytesormore)
109
110L(write_less32bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700111 BRANCH_TO_JMPTBL_ENTRY(L(table_less_32bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800112
113
114 .pushsection .rodata.sse2,"a",@progbits
Elliott Hughese412f892016-08-01 14:00:45 -0700115 ALIGN(2)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800116L(table_less_32bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700117 .int JMPTBL(L(write_0bytes), L(table_less_32bytes))
118 .int JMPTBL(L(write_1bytes), L(table_less_32bytes))
119 .int JMPTBL(L(write_2bytes), L(table_less_32bytes))
120 .int JMPTBL(L(write_3bytes), L(table_less_32bytes))
121 .int JMPTBL(L(write_4bytes), L(table_less_32bytes))
122 .int JMPTBL(L(write_5bytes), L(table_less_32bytes))
123 .int JMPTBL(L(write_6bytes), L(table_less_32bytes))
124 .int JMPTBL(L(write_7bytes), L(table_less_32bytes))
125 .int JMPTBL(L(write_8bytes), L(table_less_32bytes))
126 .int JMPTBL(L(write_9bytes), L(table_less_32bytes))
127 .int JMPTBL(L(write_10bytes), L(table_less_32bytes))
128 .int JMPTBL(L(write_11bytes), L(table_less_32bytes))
129 .int JMPTBL(L(write_12bytes), L(table_less_32bytes))
130 .int JMPTBL(L(write_13bytes), L(table_less_32bytes))
131 .int JMPTBL(L(write_14bytes), L(table_less_32bytes))
132 .int JMPTBL(L(write_15bytes), L(table_less_32bytes))
133 .int JMPTBL(L(write_16bytes), L(table_less_32bytes))
134 .int JMPTBL(L(write_17bytes), L(table_less_32bytes))
135 .int JMPTBL(L(write_18bytes), L(table_less_32bytes))
136 .int JMPTBL(L(write_19bytes), L(table_less_32bytes))
137 .int JMPTBL(L(write_20bytes), L(table_less_32bytes))
138 .int JMPTBL(L(write_21bytes), L(table_less_32bytes))
139 .int JMPTBL(L(write_22bytes), L(table_less_32bytes))
140 .int JMPTBL(L(write_23bytes), L(table_less_32bytes))
141 .int JMPTBL(L(write_24bytes), L(table_less_32bytes))
142 .int JMPTBL(L(write_25bytes), L(table_less_32bytes))
143 .int JMPTBL(L(write_26bytes), L(table_less_32bytes))
144 .int JMPTBL(L(write_27bytes), L(table_less_32bytes))
145 .int JMPTBL(L(write_28bytes), L(table_less_32bytes))
146 .int JMPTBL(L(write_29bytes), L(table_less_32bytes))
147 .int JMPTBL(L(write_30bytes), L(table_less_32bytes))
148 .int JMPTBL(L(write_31bytes), L(table_less_32bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800149 .popsection
150
Elliott Hughese412f892016-08-01 14:00:45 -0700151 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800152L(write_28bytes):
153 movl %eax, -28(%edx)
154L(write_24bytes):
155 movl %eax, -24(%edx)
156L(write_20bytes):
157 movl %eax, -20(%edx)
158L(write_16bytes):
159 movl %eax, -16(%edx)
160L(write_12bytes):
161 movl %eax, -12(%edx)
162L(write_8bytes):
163 movl %eax, -8(%edx)
164L(write_4bytes):
165 movl %eax, -4(%edx)
166L(write_0bytes):
167 SETRTNVAL
168 RETURN
169
Elliott Hughese412f892016-08-01 14:00:45 -0700170 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800171L(write_29bytes):
172 movl %eax, -29(%edx)
173L(write_25bytes):
174 movl %eax, -25(%edx)
175L(write_21bytes):
176 movl %eax, -21(%edx)
177L(write_17bytes):
178 movl %eax, -17(%edx)
179L(write_13bytes):
180 movl %eax, -13(%edx)
181L(write_9bytes):
182 movl %eax, -9(%edx)
183L(write_5bytes):
184 movl %eax, -5(%edx)
185L(write_1bytes):
186 movb %al, -1(%edx)
187 SETRTNVAL
188 RETURN
189
Elliott Hughese412f892016-08-01 14:00:45 -0700190 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800191L(write_30bytes):
192 movl %eax, -30(%edx)
193L(write_26bytes):
194 movl %eax, -26(%edx)
195L(write_22bytes):
196 movl %eax, -22(%edx)
197L(write_18bytes):
198 movl %eax, -18(%edx)
199L(write_14bytes):
200 movl %eax, -14(%edx)
201L(write_10bytes):
202 movl %eax, -10(%edx)
203L(write_6bytes):
204 movl %eax, -6(%edx)
205L(write_2bytes):
206 movw %ax, -2(%edx)
207 SETRTNVAL
208 RETURN
209
Elliott Hughese412f892016-08-01 14:00:45 -0700210 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800211L(write_31bytes):
212 movl %eax, -31(%edx)
213L(write_27bytes):
214 movl %eax, -27(%edx)
215L(write_23bytes):
216 movl %eax, -23(%edx)
217L(write_19bytes):
218 movl %eax, -19(%edx)
219L(write_15bytes):
220 movl %eax, -15(%edx)
221L(write_11bytes):
222 movl %eax, -11(%edx)
223L(write_7bytes):
224 movl %eax, -7(%edx)
225L(write_3bytes):
226 movw %ax, -3(%edx)
227 movb %al, -1(%edx)
228 SETRTNVAL
229 RETURN
230
Elliott Hughese412f892016-08-01 14:00:45 -0700231 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800232/* ECX > 32 and EDX is 4 byte aligned. */
233L(32bytesormore):
234 /* Fill xmm0 with the pattern. */
Bruce Beare8ff1a272010-03-04 11:03:37 -0800235 movd %eax, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800236 pshufd $0, %xmm0, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800237 testl $0xf, %edx
238 jz L(aligned_16)
239/* ECX > 32 and EDX is not 16 byte aligned. */
240L(not_aligned_16):
241 movdqu %xmm0, (%edx)
242 movl %edx, %eax
243 and $-16, %edx
244 add $16, %edx
245 sub %edx, %eax
246 add %eax, %ecx
247 movd %xmm0, %eax
248
Elliott Hughese412f892016-08-01 14:00:45 -0700249 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800250L(aligned_16):
251 cmp $128, %ecx
252 jae L(128bytesormore)
253
254L(aligned_16_less128bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700255 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800256
Elliott Hughese412f892016-08-01 14:00:45 -0700257 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800258L(128bytesormore):
Elliott Hughese412f892016-08-01 14:00:45 -0700259 PUSH(%ebx)
Elliott Hughes4f3b7e12024-07-19 12:00:17 +0000260 SETUP_PIC_REG(bx)
261 add $_GLOBAL_OFFSET_TABLE_, %ebx
262 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800263 cmp %ebx, %ecx
264 jae L(128bytesormore_nt_start)
265
266
Elliott Hughese412f892016-08-01 14:00:45 -0700267 POP(%ebx)
268# define RESTORE_EBX_STATE CFI_PUSH(%ebx)
Elliott Hughes4f3b7e12024-07-19 12:00:17 +0000269 PUSH(%ebx)
270 SETUP_PIC_REG(bx)
271 add $_GLOBAL_OFFSET_TABLE_, %ebx
272 cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx
273 POP(%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800274
275 jae L(128bytes_L2_normal)
276 subl $128, %ecx
277L(128bytesormore_normal):
278 sub $128, %ecx
279 movdqa %xmm0, (%edx)
280 movdqa %xmm0, 0x10(%edx)
281 movdqa %xmm0, 0x20(%edx)
282 movdqa %xmm0, 0x30(%edx)
283 movdqa %xmm0, 0x40(%edx)
284 movdqa %xmm0, 0x50(%edx)
285 movdqa %xmm0, 0x60(%edx)
286 movdqa %xmm0, 0x70(%edx)
287 lea 128(%edx), %edx
288 jb L(128bytesless_normal)
289
290
291 sub $128, %ecx
292 movdqa %xmm0, (%edx)
293 movdqa %xmm0, 0x10(%edx)
294 movdqa %xmm0, 0x20(%edx)
295 movdqa %xmm0, 0x30(%edx)
296 movdqa %xmm0, 0x40(%edx)
297 movdqa %xmm0, 0x50(%edx)
298 movdqa %xmm0, 0x60(%edx)
299 movdqa %xmm0, 0x70(%edx)
300 lea 128(%edx), %edx
301 jae L(128bytesormore_normal)
302
303L(128bytesless_normal):
Bruce Beare124a5422010-10-11 12:24:41 -0700304 add $128, %ecx
Elliott Hughese412f892016-08-01 14:00:45 -0700305 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800306
Elliott Hughese412f892016-08-01 14:00:45 -0700307 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800308L(128bytes_L2_normal):
309 prefetcht0 0x380(%edx)
310 prefetcht0 0x3c0(%edx)
311 sub $128, %ecx
312 movdqa %xmm0, (%edx)
313 movaps %xmm0, 0x10(%edx)
314 movaps %xmm0, 0x20(%edx)
315 movaps %xmm0, 0x30(%edx)
316 movaps %xmm0, 0x40(%edx)
317 movaps %xmm0, 0x50(%edx)
318 movaps %xmm0, 0x60(%edx)
319 movaps %xmm0, 0x70(%edx)
320 add $128, %edx
321 cmp $128, %ecx
322 jae L(128bytes_L2_normal)
323
324L(128bytesless_L2_normal):
Elliott Hughese412f892016-08-01 14:00:45 -0700325 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800326
Bruce Beare124a5422010-10-11 12:24:41 -0700327 RESTORE_EBX_STATE
Bruce Beare8ff1a272010-03-04 11:03:37 -0800328L(128bytesormore_nt_start):
329 sub %ebx, %ecx
Bruce Beare124a5422010-10-11 12:24:41 -0700330 mov %ebx, %eax
331 and $0x7f, %eax
332 add %eax, %ecx
333 movd %xmm0, %eax
Elliott Hughese412f892016-08-01 14:00:45 -0700334 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800335L(128bytesormore_shared_cache_loop):
336 prefetcht0 0x3c0(%edx)
337 prefetcht0 0x380(%edx)
338 sub $0x80, %ebx
339 movdqa %xmm0, (%edx)
340 movdqa %xmm0, 0x10(%edx)
341 movdqa %xmm0, 0x20(%edx)
342 movdqa %xmm0, 0x30(%edx)
343 movdqa %xmm0, 0x40(%edx)
344 movdqa %xmm0, 0x50(%edx)
345 movdqa %xmm0, 0x60(%edx)
346 movdqa %xmm0, 0x70(%edx)
347 add $0x80, %edx
348 cmp $0x80, %ebx
349 jae L(128bytesormore_shared_cache_loop)
350 cmp $0x80, %ecx
351 jb L(shared_cache_loop_end)
Elliott Hughese412f892016-08-01 14:00:45 -0700352 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800353L(128bytesormore_nt):
354 sub $0x80, %ecx
355 movntdq %xmm0, (%edx)
356 movntdq %xmm0, 0x10(%edx)
357 movntdq %xmm0, 0x20(%edx)
358 movntdq %xmm0, 0x30(%edx)
359 movntdq %xmm0, 0x40(%edx)
360 movntdq %xmm0, 0x50(%edx)
361 movntdq %xmm0, 0x60(%edx)
362 movntdq %xmm0, 0x70(%edx)
363 add $0x80, %edx
364 cmp $0x80, %ecx
365 jae L(128bytesormore_nt)
366 sfence
367L(shared_cache_loop_end):
Elliott Hughese412f892016-08-01 14:00:45 -0700368 POP(%ebx)
369 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800370
371
372 .pushsection .rodata.sse2,"a",@progbits
Elliott Hughese412f892016-08-01 14:00:45 -0700373 ALIGN(2)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800374L(table_16_128bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700375 .int JMPTBL(L(aligned_16_0bytes), L(table_16_128bytes))
376 .int JMPTBL(L(aligned_16_1bytes), L(table_16_128bytes))
377 .int JMPTBL(L(aligned_16_2bytes), L(table_16_128bytes))
378 .int JMPTBL(L(aligned_16_3bytes), L(table_16_128bytes))
379 .int JMPTBL(L(aligned_16_4bytes), L(table_16_128bytes))
380 .int JMPTBL(L(aligned_16_5bytes), L(table_16_128bytes))
381 .int JMPTBL(L(aligned_16_6bytes), L(table_16_128bytes))
382 .int JMPTBL(L(aligned_16_7bytes), L(table_16_128bytes))
383 .int JMPTBL(L(aligned_16_8bytes), L(table_16_128bytes))
384 .int JMPTBL(L(aligned_16_9bytes), L(table_16_128bytes))
385 .int JMPTBL(L(aligned_16_10bytes), L(table_16_128bytes))
386 .int JMPTBL(L(aligned_16_11bytes), L(table_16_128bytes))
387 .int JMPTBL(L(aligned_16_12bytes), L(table_16_128bytes))
388 .int JMPTBL(L(aligned_16_13bytes), L(table_16_128bytes))
389 .int JMPTBL(L(aligned_16_14bytes), L(table_16_128bytes))
390 .int JMPTBL(L(aligned_16_15bytes), L(table_16_128bytes))
391 .int JMPTBL(L(aligned_16_16bytes), L(table_16_128bytes))
392 .int JMPTBL(L(aligned_16_17bytes), L(table_16_128bytes))
393 .int JMPTBL(L(aligned_16_18bytes), L(table_16_128bytes))
394 .int JMPTBL(L(aligned_16_19bytes), L(table_16_128bytes))
395 .int JMPTBL(L(aligned_16_20bytes), L(table_16_128bytes))
396 .int JMPTBL(L(aligned_16_21bytes), L(table_16_128bytes))
397 .int JMPTBL(L(aligned_16_22bytes), L(table_16_128bytes))
398 .int JMPTBL(L(aligned_16_23bytes), L(table_16_128bytes))
399 .int JMPTBL(L(aligned_16_24bytes), L(table_16_128bytes))
400 .int JMPTBL(L(aligned_16_25bytes), L(table_16_128bytes))
401 .int JMPTBL(L(aligned_16_26bytes), L(table_16_128bytes))
402 .int JMPTBL(L(aligned_16_27bytes), L(table_16_128bytes))
403 .int JMPTBL(L(aligned_16_28bytes), L(table_16_128bytes))
404 .int JMPTBL(L(aligned_16_29bytes), L(table_16_128bytes))
405 .int JMPTBL(L(aligned_16_30bytes), L(table_16_128bytes))
406 .int JMPTBL(L(aligned_16_31bytes), L(table_16_128bytes))
407 .int JMPTBL(L(aligned_16_32bytes), L(table_16_128bytes))
408 .int JMPTBL(L(aligned_16_33bytes), L(table_16_128bytes))
409 .int JMPTBL(L(aligned_16_34bytes), L(table_16_128bytes))
410 .int JMPTBL(L(aligned_16_35bytes), L(table_16_128bytes))
411 .int JMPTBL(L(aligned_16_36bytes), L(table_16_128bytes))
412 .int JMPTBL(L(aligned_16_37bytes), L(table_16_128bytes))
413 .int JMPTBL(L(aligned_16_38bytes), L(table_16_128bytes))
414 .int JMPTBL(L(aligned_16_39bytes), L(table_16_128bytes))
415 .int JMPTBL(L(aligned_16_40bytes), L(table_16_128bytes))
416 .int JMPTBL(L(aligned_16_41bytes), L(table_16_128bytes))
417 .int JMPTBL(L(aligned_16_42bytes), L(table_16_128bytes))
418 .int JMPTBL(L(aligned_16_43bytes), L(table_16_128bytes))
419 .int JMPTBL(L(aligned_16_44bytes), L(table_16_128bytes))
420 .int JMPTBL(L(aligned_16_45bytes), L(table_16_128bytes))
421 .int JMPTBL(L(aligned_16_46bytes), L(table_16_128bytes))
422 .int JMPTBL(L(aligned_16_47bytes), L(table_16_128bytes))
423 .int JMPTBL(L(aligned_16_48bytes), L(table_16_128bytes))
424 .int JMPTBL(L(aligned_16_49bytes), L(table_16_128bytes))
425 .int JMPTBL(L(aligned_16_50bytes), L(table_16_128bytes))
426 .int JMPTBL(L(aligned_16_51bytes), L(table_16_128bytes))
427 .int JMPTBL(L(aligned_16_52bytes), L(table_16_128bytes))
428 .int JMPTBL(L(aligned_16_53bytes), L(table_16_128bytes))
429 .int JMPTBL(L(aligned_16_54bytes), L(table_16_128bytes))
430 .int JMPTBL(L(aligned_16_55bytes), L(table_16_128bytes))
431 .int JMPTBL(L(aligned_16_56bytes), L(table_16_128bytes))
432 .int JMPTBL(L(aligned_16_57bytes), L(table_16_128bytes))
433 .int JMPTBL(L(aligned_16_58bytes), L(table_16_128bytes))
434 .int JMPTBL(L(aligned_16_59bytes), L(table_16_128bytes))
435 .int JMPTBL(L(aligned_16_60bytes), L(table_16_128bytes))
436 .int JMPTBL(L(aligned_16_61bytes), L(table_16_128bytes))
437 .int JMPTBL(L(aligned_16_62bytes), L(table_16_128bytes))
438 .int JMPTBL(L(aligned_16_63bytes), L(table_16_128bytes))
439 .int JMPTBL(L(aligned_16_64bytes), L(table_16_128bytes))
440 .int JMPTBL(L(aligned_16_65bytes), L(table_16_128bytes))
441 .int JMPTBL(L(aligned_16_66bytes), L(table_16_128bytes))
442 .int JMPTBL(L(aligned_16_67bytes), L(table_16_128bytes))
443 .int JMPTBL(L(aligned_16_68bytes), L(table_16_128bytes))
444 .int JMPTBL(L(aligned_16_69bytes), L(table_16_128bytes))
445 .int JMPTBL(L(aligned_16_70bytes), L(table_16_128bytes))
446 .int JMPTBL(L(aligned_16_71bytes), L(table_16_128bytes))
447 .int JMPTBL(L(aligned_16_72bytes), L(table_16_128bytes))
448 .int JMPTBL(L(aligned_16_73bytes), L(table_16_128bytes))
449 .int JMPTBL(L(aligned_16_74bytes), L(table_16_128bytes))
450 .int JMPTBL(L(aligned_16_75bytes), L(table_16_128bytes))
451 .int JMPTBL(L(aligned_16_76bytes), L(table_16_128bytes))
452 .int JMPTBL(L(aligned_16_77bytes), L(table_16_128bytes))
453 .int JMPTBL(L(aligned_16_78bytes), L(table_16_128bytes))
454 .int JMPTBL(L(aligned_16_79bytes), L(table_16_128bytes))
455 .int JMPTBL(L(aligned_16_80bytes), L(table_16_128bytes))
456 .int JMPTBL(L(aligned_16_81bytes), L(table_16_128bytes))
457 .int JMPTBL(L(aligned_16_82bytes), L(table_16_128bytes))
458 .int JMPTBL(L(aligned_16_83bytes), L(table_16_128bytes))
459 .int JMPTBL(L(aligned_16_84bytes), L(table_16_128bytes))
460 .int JMPTBL(L(aligned_16_85bytes), L(table_16_128bytes))
461 .int JMPTBL(L(aligned_16_86bytes), L(table_16_128bytes))
462 .int JMPTBL(L(aligned_16_87bytes), L(table_16_128bytes))
463 .int JMPTBL(L(aligned_16_88bytes), L(table_16_128bytes))
464 .int JMPTBL(L(aligned_16_89bytes), L(table_16_128bytes))
465 .int JMPTBL(L(aligned_16_90bytes), L(table_16_128bytes))
466 .int JMPTBL(L(aligned_16_91bytes), L(table_16_128bytes))
467 .int JMPTBL(L(aligned_16_92bytes), L(table_16_128bytes))
468 .int JMPTBL(L(aligned_16_93bytes), L(table_16_128bytes))
469 .int JMPTBL(L(aligned_16_94bytes), L(table_16_128bytes))
470 .int JMPTBL(L(aligned_16_95bytes), L(table_16_128bytes))
471 .int JMPTBL(L(aligned_16_96bytes), L(table_16_128bytes))
472 .int JMPTBL(L(aligned_16_97bytes), L(table_16_128bytes))
473 .int JMPTBL(L(aligned_16_98bytes), L(table_16_128bytes))
474 .int JMPTBL(L(aligned_16_99bytes), L(table_16_128bytes))
475 .int JMPTBL(L(aligned_16_100bytes), L(table_16_128bytes))
476 .int JMPTBL(L(aligned_16_101bytes), L(table_16_128bytes))
477 .int JMPTBL(L(aligned_16_102bytes), L(table_16_128bytes))
478 .int JMPTBL(L(aligned_16_103bytes), L(table_16_128bytes))
479 .int JMPTBL(L(aligned_16_104bytes), L(table_16_128bytes))
480 .int JMPTBL(L(aligned_16_105bytes), L(table_16_128bytes))
481 .int JMPTBL(L(aligned_16_106bytes), L(table_16_128bytes))
482 .int JMPTBL(L(aligned_16_107bytes), L(table_16_128bytes))
483 .int JMPTBL(L(aligned_16_108bytes), L(table_16_128bytes))
484 .int JMPTBL(L(aligned_16_109bytes), L(table_16_128bytes))
485 .int JMPTBL(L(aligned_16_110bytes), L(table_16_128bytes))
486 .int JMPTBL(L(aligned_16_111bytes), L(table_16_128bytes))
487 .int JMPTBL(L(aligned_16_112bytes), L(table_16_128bytes))
488 .int JMPTBL(L(aligned_16_113bytes), L(table_16_128bytes))
489 .int JMPTBL(L(aligned_16_114bytes), L(table_16_128bytes))
490 .int JMPTBL(L(aligned_16_115bytes), L(table_16_128bytes))
491 .int JMPTBL(L(aligned_16_116bytes), L(table_16_128bytes))
492 .int JMPTBL(L(aligned_16_117bytes), L(table_16_128bytes))
493 .int JMPTBL(L(aligned_16_118bytes), L(table_16_128bytes))
494 .int JMPTBL(L(aligned_16_119bytes), L(table_16_128bytes))
495 .int JMPTBL(L(aligned_16_120bytes), L(table_16_128bytes))
496 .int JMPTBL(L(aligned_16_121bytes), L(table_16_128bytes))
497 .int JMPTBL(L(aligned_16_122bytes), L(table_16_128bytes))
498 .int JMPTBL(L(aligned_16_123bytes), L(table_16_128bytes))
499 .int JMPTBL(L(aligned_16_124bytes), L(table_16_128bytes))
500 .int JMPTBL(L(aligned_16_125bytes), L(table_16_128bytes))
501 .int JMPTBL(L(aligned_16_126bytes), L(table_16_128bytes))
502 .int JMPTBL(L(aligned_16_127bytes), L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800503 .popsection
504
Elliott Hughese412f892016-08-01 14:00:45 -0700505 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800506L(aligned_16_112bytes):
507 movdqa %xmm0, -112(%edx)
508L(aligned_16_96bytes):
509 movdqa %xmm0, -96(%edx)
510L(aligned_16_80bytes):
511 movdqa %xmm0, -80(%edx)
512L(aligned_16_64bytes):
513 movdqa %xmm0, -64(%edx)
514L(aligned_16_48bytes):
515 movdqa %xmm0, -48(%edx)
516L(aligned_16_32bytes):
517 movdqa %xmm0, -32(%edx)
518L(aligned_16_16bytes):
519 movdqa %xmm0, -16(%edx)
520L(aligned_16_0bytes):
521 SETRTNVAL
522 RETURN
523
Elliott Hughese412f892016-08-01 14:00:45 -0700524 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800525L(aligned_16_113bytes):
526 movdqa %xmm0, -113(%edx)
527L(aligned_16_97bytes):
528 movdqa %xmm0, -97(%edx)
529L(aligned_16_81bytes):
530 movdqa %xmm0, -81(%edx)
531L(aligned_16_65bytes):
532 movdqa %xmm0, -65(%edx)
533L(aligned_16_49bytes):
534 movdqa %xmm0, -49(%edx)
535L(aligned_16_33bytes):
536 movdqa %xmm0, -33(%edx)
537L(aligned_16_17bytes):
538 movdqa %xmm0, -17(%edx)
539L(aligned_16_1bytes):
540 movb %al, -1(%edx)
541 SETRTNVAL
542 RETURN
543
Elliott Hughese412f892016-08-01 14:00:45 -0700544 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800545L(aligned_16_114bytes):
546 movdqa %xmm0, -114(%edx)
547L(aligned_16_98bytes):
548 movdqa %xmm0, -98(%edx)
549L(aligned_16_82bytes):
550 movdqa %xmm0, -82(%edx)
551L(aligned_16_66bytes):
552 movdqa %xmm0, -66(%edx)
553L(aligned_16_50bytes):
554 movdqa %xmm0, -50(%edx)
555L(aligned_16_34bytes):
556 movdqa %xmm0, -34(%edx)
557L(aligned_16_18bytes):
558 movdqa %xmm0, -18(%edx)
559L(aligned_16_2bytes):
560 movw %ax, -2(%edx)
561 SETRTNVAL
562 RETURN
563
Elliott Hughese412f892016-08-01 14:00:45 -0700564 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800565L(aligned_16_115bytes):
566 movdqa %xmm0, -115(%edx)
567L(aligned_16_99bytes):
568 movdqa %xmm0, -99(%edx)
569L(aligned_16_83bytes):
570 movdqa %xmm0, -83(%edx)
571L(aligned_16_67bytes):
572 movdqa %xmm0, -67(%edx)
573L(aligned_16_51bytes):
574 movdqa %xmm0, -51(%edx)
575L(aligned_16_35bytes):
576 movdqa %xmm0, -35(%edx)
577L(aligned_16_19bytes):
578 movdqa %xmm0, -19(%edx)
579L(aligned_16_3bytes):
580 movw %ax, -3(%edx)
581 movb %al, -1(%edx)
582 SETRTNVAL
583 RETURN
584
Elliott Hughese412f892016-08-01 14:00:45 -0700585 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800586L(aligned_16_116bytes):
587 movdqa %xmm0, -116(%edx)
588L(aligned_16_100bytes):
589 movdqa %xmm0, -100(%edx)
590L(aligned_16_84bytes):
591 movdqa %xmm0, -84(%edx)
592L(aligned_16_68bytes):
593 movdqa %xmm0, -68(%edx)
594L(aligned_16_52bytes):
595 movdqa %xmm0, -52(%edx)
596L(aligned_16_36bytes):
597 movdqa %xmm0, -36(%edx)
598L(aligned_16_20bytes):
599 movdqa %xmm0, -20(%edx)
600L(aligned_16_4bytes):
601 movl %eax, -4(%edx)
602 SETRTNVAL
603 RETURN
604
Elliott Hughese412f892016-08-01 14:00:45 -0700605 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800606L(aligned_16_117bytes):
607 movdqa %xmm0, -117(%edx)
608L(aligned_16_101bytes):
609 movdqa %xmm0, -101(%edx)
610L(aligned_16_85bytes):
611 movdqa %xmm0, -85(%edx)
612L(aligned_16_69bytes):
613 movdqa %xmm0, -69(%edx)
614L(aligned_16_53bytes):
615 movdqa %xmm0, -53(%edx)
616L(aligned_16_37bytes):
617 movdqa %xmm0, -37(%edx)
618L(aligned_16_21bytes):
619 movdqa %xmm0, -21(%edx)
620L(aligned_16_5bytes):
621 movl %eax, -5(%edx)
622 movb %al, -1(%edx)
623 SETRTNVAL
624 RETURN
625
Elliott Hughese412f892016-08-01 14:00:45 -0700626 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800627L(aligned_16_118bytes):
628 movdqa %xmm0, -118(%edx)
629L(aligned_16_102bytes):
630 movdqa %xmm0, -102(%edx)
631L(aligned_16_86bytes):
632 movdqa %xmm0, -86(%edx)
633L(aligned_16_70bytes):
634 movdqa %xmm0, -70(%edx)
635L(aligned_16_54bytes):
636 movdqa %xmm0, -54(%edx)
637L(aligned_16_38bytes):
638 movdqa %xmm0, -38(%edx)
639L(aligned_16_22bytes):
640 movdqa %xmm0, -22(%edx)
641L(aligned_16_6bytes):
642 movl %eax, -6(%edx)
643 movw %ax, -2(%edx)
644 SETRTNVAL
645 RETURN
646
Elliott Hughese412f892016-08-01 14:00:45 -0700647 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800648L(aligned_16_119bytes):
649 movdqa %xmm0, -119(%edx)
650L(aligned_16_103bytes):
651 movdqa %xmm0, -103(%edx)
652L(aligned_16_87bytes):
653 movdqa %xmm0, -87(%edx)
654L(aligned_16_71bytes):
655 movdqa %xmm0, -71(%edx)
656L(aligned_16_55bytes):
657 movdqa %xmm0, -55(%edx)
658L(aligned_16_39bytes):
659 movdqa %xmm0, -39(%edx)
660L(aligned_16_23bytes):
661 movdqa %xmm0, -23(%edx)
662L(aligned_16_7bytes):
663 movl %eax, -7(%edx)
664 movw %ax, -3(%edx)
665 movb %al, -1(%edx)
666 SETRTNVAL
667 RETURN
668
Elliott Hughese412f892016-08-01 14:00:45 -0700669 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800670L(aligned_16_120bytes):
671 movdqa %xmm0, -120(%edx)
672L(aligned_16_104bytes):
673 movdqa %xmm0, -104(%edx)
674L(aligned_16_88bytes):
675 movdqa %xmm0, -88(%edx)
676L(aligned_16_72bytes):
677 movdqa %xmm0, -72(%edx)
678L(aligned_16_56bytes):
679 movdqa %xmm0, -56(%edx)
680L(aligned_16_40bytes):
681 movdqa %xmm0, -40(%edx)
682L(aligned_16_24bytes):
683 movdqa %xmm0, -24(%edx)
684L(aligned_16_8bytes):
685 movq %xmm0, -8(%edx)
686 SETRTNVAL
687 RETURN
688
Elliott Hughese412f892016-08-01 14:00:45 -0700689 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800690L(aligned_16_121bytes):
691 movdqa %xmm0, -121(%edx)
692L(aligned_16_105bytes):
693 movdqa %xmm0, -105(%edx)
694L(aligned_16_89bytes):
695 movdqa %xmm0, -89(%edx)
696L(aligned_16_73bytes):
697 movdqa %xmm0, -73(%edx)
698L(aligned_16_57bytes):
699 movdqa %xmm0, -57(%edx)
700L(aligned_16_41bytes):
701 movdqa %xmm0, -41(%edx)
702L(aligned_16_25bytes):
703 movdqa %xmm0, -25(%edx)
704L(aligned_16_9bytes):
705 movq %xmm0, -9(%edx)
706 movb %al, -1(%edx)
707 SETRTNVAL
708 RETURN
709
Elliott Hughese412f892016-08-01 14:00:45 -0700710 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800711L(aligned_16_122bytes):
712 movdqa %xmm0, -122(%edx)
713L(aligned_16_106bytes):
714 movdqa %xmm0, -106(%edx)
715L(aligned_16_90bytes):
716 movdqa %xmm0, -90(%edx)
717L(aligned_16_74bytes):
718 movdqa %xmm0, -74(%edx)
719L(aligned_16_58bytes):
720 movdqa %xmm0, -58(%edx)
721L(aligned_16_42bytes):
722 movdqa %xmm0, -42(%edx)
723L(aligned_16_26bytes):
724 movdqa %xmm0, -26(%edx)
725L(aligned_16_10bytes):
726 movq %xmm0, -10(%edx)
727 movw %ax, -2(%edx)
728 SETRTNVAL
729 RETURN
730
Elliott Hughese412f892016-08-01 14:00:45 -0700731 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800732L(aligned_16_123bytes):
733 movdqa %xmm0, -123(%edx)
734L(aligned_16_107bytes):
735 movdqa %xmm0, -107(%edx)
736L(aligned_16_91bytes):
737 movdqa %xmm0, -91(%edx)
738L(aligned_16_75bytes):
739 movdqa %xmm0, -75(%edx)
740L(aligned_16_59bytes):
741 movdqa %xmm0, -59(%edx)
742L(aligned_16_43bytes):
743 movdqa %xmm0, -43(%edx)
744L(aligned_16_27bytes):
745 movdqa %xmm0, -27(%edx)
746L(aligned_16_11bytes):
747 movq %xmm0, -11(%edx)
748 movw %ax, -3(%edx)
749 movb %al, -1(%edx)
750 SETRTNVAL
751 RETURN
752
Elliott Hughese412f892016-08-01 14:00:45 -0700753 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800754L(aligned_16_124bytes):
755 movdqa %xmm0, -124(%edx)
756L(aligned_16_108bytes):
757 movdqa %xmm0, -108(%edx)
758L(aligned_16_92bytes):
759 movdqa %xmm0, -92(%edx)
760L(aligned_16_76bytes):
761 movdqa %xmm0, -76(%edx)
762L(aligned_16_60bytes):
763 movdqa %xmm0, -60(%edx)
764L(aligned_16_44bytes):
765 movdqa %xmm0, -44(%edx)
766L(aligned_16_28bytes):
767 movdqa %xmm0, -28(%edx)
768L(aligned_16_12bytes):
769 movq %xmm0, -12(%edx)
770 movl %eax, -4(%edx)
771 SETRTNVAL
772 RETURN
773
Elliott Hughese412f892016-08-01 14:00:45 -0700774 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800775L(aligned_16_125bytes):
776 movdqa %xmm0, -125(%edx)
777L(aligned_16_109bytes):
778 movdqa %xmm0, -109(%edx)
779L(aligned_16_93bytes):
780 movdqa %xmm0, -93(%edx)
781L(aligned_16_77bytes):
782 movdqa %xmm0, -77(%edx)
783L(aligned_16_61bytes):
784 movdqa %xmm0, -61(%edx)
785L(aligned_16_45bytes):
786 movdqa %xmm0, -45(%edx)
787L(aligned_16_29bytes):
788 movdqa %xmm0, -29(%edx)
789L(aligned_16_13bytes):
790 movq %xmm0, -13(%edx)
791 movl %eax, -5(%edx)
792 movb %al, -1(%edx)
793 SETRTNVAL
794 RETURN
795
Elliott Hughese412f892016-08-01 14:00:45 -0700796 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800797L(aligned_16_126bytes):
798 movdqa %xmm0, -126(%edx)
799L(aligned_16_110bytes):
800 movdqa %xmm0, -110(%edx)
801L(aligned_16_94bytes):
802 movdqa %xmm0, -94(%edx)
803L(aligned_16_78bytes):
804 movdqa %xmm0, -78(%edx)
805L(aligned_16_62bytes):
806 movdqa %xmm0, -62(%edx)
807L(aligned_16_46bytes):
808 movdqa %xmm0, -46(%edx)
809L(aligned_16_30bytes):
810 movdqa %xmm0, -30(%edx)
811L(aligned_16_14bytes):
812 movq %xmm0, -14(%edx)
813 movl %eax, -6(%edx)
814 movw %ax, -2(%edx)
815 SETRTNVAL
816 RETURN
817
Elliott Hughese412f892016-08-01 14:00:45 -0700818 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800819L(aligned_16_127bytes):
820 movdqa %xmm0, -127(%edx)
821L(aligned_16_111bytes):
822 movdqa %xmm0, -111(%edx)
823L(aligned_16_95bytes):
824 movdqa %xmm0, -95(%edx)
825L(aligned_16_79bytes):
826 movdqa %xmm0, -79(%edx)
827L(aligned_16_63bytes):
828 movdqa %xmm0, -63(%edx)
829L(aligned_16_47bytes):
830 movdqa %xmm0, -47(%edx)
831L(aligned_16_31bytes):
832 movdqa %xmm0, -31(%edx)
833L(aligned_16_15bytes):
834 movq %xmm0, -15(%edx)
835 movl %eax, -7(%edx)
836 movw %ax, -3(%edx)
837 movb %al, -1(%edx)
838 SETRTNVAL
839 RETURN_END
840
Haibo Huangb9244ff2018-08-11 10:12:13 -0700841END(memset_atom)