blob: 016c49e58d3d33c8d1b1b9e2bc230e63f41feca1 [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Elliott Hughes81d6a182016-03-03 16:10:33 -080031#include <private/bionic_asm.h>
32
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040033#include "cache.h"
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040034
Bruce Beare8ff1a272010-03-04 11:03:37 -080035#ifndef L
36# define L(label) .L##label
37#endif
38
39#ifndef ALIGN
40# define ALIGN(n) .p2align n
41#endif
42
Bruce Beare8ff1a272010-03-04 11:03:37 -080043#define CFI_PUSH(REG) \
Elliott Hughese412f892016-08-01 14:00:45 -070044 .cfi_adjust_cfa_offset 4; \
45 .cfi_rel_offset REG, 0
Bruce Beare8ff1a272010-03-04 11:03:37 -080046
47#define CFI_POP(REG) \
Elliott Hughese412f892016-08-01 14:00:45 -070048 .cfi_adjust_cfa_offset -4; \
49 .cfi_restore REG
Bruce Beare8ff1a272010-03-04 11:03:37 -080050
Elliott Hughese412f892016-08-01 14:00:45 -070051#define PUSH(REG) pushl REG; CFI_PUSH(REG)
52#define POP(REG) popl REG; CFI_POP(REG)
Bruce Beare8ff1a272010-03-04 11:03:37 -080053
Elliott Hughese412f892016-08-01 14:00:45 -070054#define PARMS 8 /* Preserve EBX. */
55#define DST PARMS
56#define CHR (DST+4)
57#define LEN (CHR+4)
Elliott Hughes81d6a182016-03-03 16:10:33 -080058#define CHK_DST_LEN (LEN+4)
59#define SETRTNVAL movl DST(%esp), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -080060
Elliott Hughese412f892016-08-01 14:00:45 -070061#define ENTRANCE PUSH(%ebx);
62#define RETURN_END POP(%ebx); ret
63#define RETURN RETURN_END; CFI_PUSH(%ebx)
64#define JMPTBL(I, B) I - B
Bruce Beare8ff1a272010-03-04 11:03:37 -080065
66/* Load an entry in a jump table into EBX and branch to it. TABLE is a
67 jump table with relative offsets. */
68# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
69 /* We first load PC into EBX. */ \
Varvara Rainchik5a922842014-04-24 15:41:20 +040070 call __x86.get_pc_thunk.bx; \
Bruce Beare8ff1a272010-03-04 11:03:37 -080071 /* Get the address of the jump table. */ \
72 add $(TABLE - .), %ebx; \
73 /* Get the entry and convert the relative offset to the \
74 absolute address. */ \
75 add (%ebx,%ecx,4), %ebx; \
76 add %ecx, %edx; \
Elliott Hughese412f892016-08-01 14:00:45 -070077 /* We loaded the jump table and adjusted EDX. Go. */ \
Bruce Beare8ff1a272010-03-04 11:03:37 -080078 jmp *%ebx
79
Varvara Rainchik5a922842014-04-24 15:41:20 +040080 .section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
81 .globl __x86.get_pc_thunk.bx
82 .hidden __x86.get_pc_thunk.bx
Elliott Hughese412f892016-08-01 14:00:45 -070083 ALIGN(4)
Varvara Rainchik5a922842014-04-24 15:41:20 +040084 .type __x86.get_pc_thunk.bx,@function
85__x86.get_pc_thunk.bx:
Bruce Beare8ff1a272010-03-04 11:03:37 -080086 movl (%esp), %ebx
87 ret
Bruce Beare8ff1a272010-03-04 11:03:37 -080088
Haibo Huangb9244ff2018-08-11 10:12:13 -070089ENTRY(__memset_chk_atom)
Elliott Hughes204990c2016-03-24 22:34:47 -070090 ENTRANCE
Elliott Hughes81d6a182016-03-03 16:10:33 -080091
Elliott Hughes204990c2016-03-24 22:34:47 -070092 movl LEN(%esp), %ecx
93 cmpl CHK_DST_LEN(%esp), %ecx
Lev Rumyantsev3a528f12016-07-29 17:55:42 -070094 jna L(memset_length_loaded)
95
Elliott Hughese412f892016-08-01 14:00:45 -070096 POP(%ebx) // Undo ENTRANCE without returning.
Lev Rumyantsev3a528f12016-07-29 17:55:42 -070097 jmp __memset_chk_fail
Haibo Huangb9244ff2018-08-11 10:12:13 -070098END(__memset_chk_atom)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040099
Bruce Beare8ff1a272010-03-04 11:03:37 -0800100 .section .text.sse2,"ax",@progbits
Elliott Hughese412f892016-08-01 14:00:45 -0700101 ALIGN(4)
Haibo Huangb9244ff2018-08-11 10:12:13 -0700102ENTRY(memset_atom)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800103 ENTRANCE
104
105 movl LEN(%esp), %ecx
Elliott Hughes204990c2016-03-24 22:34:47 -0700106L(memset_length_loaded):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800107 movzbl CHR(%esp), %eax
108 movb %al, %ah
109 /* Fill the whole EAX with pattern. */
110 movl %eax, %edx
111 shl $16, %eax
112 or %edx, %eax
Elliott Hughes81d6a182016-03-03 16:10:33 -0800113 movl DST(%esp), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800114 cmp $32, %ecx
115 jae L(32bytesormore)
116
117L(write_less32bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700118 BRANCH_TO_JMPTBL_ENTRY(L(table_less_32bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800119
120
121 .pushsection .rodata.sse2,"a",@progbits
Elliott Hughese412f892016-08-01 14:00:45 -0700122 ALIGN(2)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800123L(table_less_32bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700124 .int JMPTBL(L(write_0bytes), L(table_less_32bytes))
125 .int JMPTBL(L(write_1bytes), L(table_less_32bytes))
126 .int JMPTBL(L(write_2bytes), L(table_less_32bytes))
127 .int JMPTBL(L(write_3bytes), L(table_less_32bytes))
128 .int JMPTBL(L(write_4bytes), L(table_less_32bytes))
129 .int JMPTBL(L(write_5bytes), L(table_less_32bytes))
130 .int JMPTBL(L(write_6bytes), L(table_less_32bytes))
131 .int JMPTBL(L(write_7bytes), L(table_less_32bytes))
132 .int JMPTBL(L(write_8bytes), L(table_less_32bytes))
133 .int JMPTBL(L(write_9bytes), L(table_less_32bytes))
134 .int JMPTBL(L(write_10bytes), L(table_less_32bytes))
135 .int JMPTBL(L(write_11bytes), L(table_less_32bytes))
136 .int JMPTBL(L(write_12bytes), L(table_less_32bytes))
137 .int JMPTBL(L(write_13bytes), L(table_less_32bytes))
138 .int JMPTBL(L(write_14bytes), L(table_less_32bytes))
139 .int JMPTBL(L(write_15bytes), L(table_less_32bytes))
140 .int JMPTBL(L(write_16bytes), L(table_less_32bytes))
141 .int JMPTBL(L(write_17bytes), L(table_less_32bytes))
142 .int JMPTBL(L(write_18bytes), L(table_less_32bytes))
143 .int JMPTBL(L(write_19bytes), L(table_less_32bytes))
144 .int JMPTBL(L(write_20bytes), L(table_less_32bytes))
145 .int JMPTBL(L(write_21bytes), L(table_less_32bytes))
146 .int JMPTBL(L(write_22bytes), L(table_less_32bytes))
147 .int JMPTBL(L(write_23bytes), L(table_less_32bytes))
148 .int JMPTBL(L(write_24bytes), L(table_less_32bytes))
149 .int JMPTBL(L(write_25bytes), L(table_less_32bytes))
150 .int JMPTBL(L(write_26bytes), L(table_less_32bytes))
151 .int JMPTBL(L(write_27bytes), L(table_less_32bytes))
152 .int JMPTBL(L(write_28bytes), L(table_less_32bytes))
153 .int JMPTBL(L(write_29bytes), L(table_less_32bytes))
154 .int JMPTBL(L(write_30bytes), L(table_less_32bytes))
155 .int JMPTBL(L(write_31bytes), L(table_less_32bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800156 .popsection
157
Elliott Hughese412f892016-08-01 14:00:45 -0700158 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800159L(write_28bytes):
160 movl %eax, -28(%edx)
161L(write_24bytes):
162 movl %eax, -24(%edx)
163L(write_20bytes):
164 movl %eax, -20(%edx)
165L(write_16bytes):
166 movl %eax, -16(%edx)
167L(write_12bytes):
168 movl %eax, -12(%edx)
169L(write_8bytes):
170 movl %eax, -8(%edx)
171L(write_4bytes):
172 movl %eax, -4(%edx)
173L(write_0bytes):
174 SETRTNVAL
175 RETURN
176
Elliott Hughese412f892016-08-01 14:00:45 -0700177 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800178L(write_29bytes):
179 movl %eax, -29(%edx)
180L(write_25bytes):
181 movl %eax, -25(%edx)
182L(write_21bytes):
183 movl %eax, -21(%edx)
184L(write_17bytes):
185 movl %eax, -17(%edx)
186L(write_13bytes):
187 movl %eax, -13(%edx)
188L(write_9bytes):
189 movl %eax, -9(%edx)
190L(write_5bytes):
191 movl %eax, -5(%edx)
192L(write_1bytes):
193 movb %al, -1(%edx)
194 SETRTNVAL
195 RETURN
196
Elliott Hughese412f892016-08-01 14:00:45 -0700197 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800198L(write_30bytes):
199 movl %eax, -30(%edx)
200L(write_26bytes):
201 movl %eax, -26(%edx)
202L(write_22bytes):
203 movl %eax, -22(%edx)
204L(write_18bytes):
205 movl %eax, -18(%edx)
206L(write_14bytes):
207 movl %eax, -14(%edx)
208L(write_10bytes):
209 movl %eax, -10(%edx)
210L(write_6bytes):
211 movl %eax, -6(%edx)
212L(write_2bytes):
213 movw %ax, -2(%edx)
214 SETRTNVAL
215 RETURN
216
Elliott Hughese412f892016-08-01 14:00:45 -0700217 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800218L(write_31bytes):
219 movl %eax, -31(%edx)
220L(write_27bytes):
221 movl %eax, -27(%edx)
222L(write_23bytes):
223 movl %eax, -23(%edx)
224L(write_19bytes):
225 movl %eax, -19(%edx)
226L(write_15bytes):
227 movl %eax, -15(%edx)
228L(write_11bytes):
229 movl %eax, -11(%edx)
230L(write_7bytes):
231 movl %eax, -7(%edx)
232L(write_3bytes):
233 movw %ax, -3(%edx)
234 movb %al, -1(%edx)
235 SETRTNVAL
236 RETURN
237
Elliott Hughese412f892016-08-01 14:00:45 -0700238 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800239/* ECX > 32 and EDX is 4 byte aligned. */
240L(32bytesormore):
241 /* Fill xmm0 with the pattern. */
Bruce Beare8ff1a272010-03-04 11:03:37 -0800242 movd %eax, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800243 pshufd $0, %xmm0, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800244 testl $0xf, %edx
245 jz L(aligned_16)
246/* ECX > 32 and EDX is not 16 byte aligned. */
247L(not_aligned_16):
248 movdqu %xmm0, (%edx)
249 movl %edx, %eax
250 and $-16, %edx
251 add $16, %edx
252 sub %edx, %eax
253 add %eax, %ecx
254 movd %xmm0, %eax
255
Elliott Hughese412f892016-08-01 14:00:45 -0700256 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800257L(aligned_16):
258 cmp $128, %ecx
259 jae L(128bytesormore)
260
261L(aligned_16_less128bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700262 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800263
Elliott Hughese412f892016-08-01 14:00:45 -0700264 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800265L(128bytesormore):
Elliott Hughese412f892016-08-01 14:00:45 -0700266 PUSH(%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800267 mov $SHARED_CACHE_SIZE, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800268 cmp %ebx, %ecx
269 jae L(128bytesormore_nt_start)
270
271
Elliott Hughese412f892016-08-01 14:00:45 -0700272 POP(%ebx)
273# define RESTORE_EBX_STATE CFI_PUSH(%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800274 cmp $DATA_CACHE_SIZE, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800275
276 jae L(128bytes_L2_normal)
277 subl $128, %ecx
278L(128bytesormore_normal):
279 sub $128, %ecx
280 movdqa %xmm0, (%edx)
281 movdqa %xmm0, 0x10(%edx)
282 movdqa %xmm0, 0x20(%edx)
283 movdqa %xmm0, 0x30(%edx)
284 movdqa %xmm0, 0x40(%edx)
285 movdqa %xmm0, 0x50(%edx)
286 movdqa %xmm0, 0x60(%edx)
287 movdqa %xmm0, 0x70(%edx)
288 lea 128(%edx), %edx
289 jb L(128bytesless_normal)
290
291
292 sub $128, %ecx
293 movdqa %xmm0, (%edx)
294 movdqa %xmm0, 0x10(%edx)
295 movdqa %xmm0, 0x20(%edx)
296 movdqa %xmm0, 0x30(%edx)
297 movdqa %xmm0, 0x40(%edx)
298 movdqa %xmm0, 0x50(%edx)
299 movdqa %xmm0, 0x60(%edx)
300 movdqa %xmm0, 0x70(%edx)
301 lea 128(%edx), %edx
302 jae L(128bytesormore_normal)
303
304L(128bytesless_normal):
Bruce Beare124a5422010-10-11 12:24:41 -0700305 add $128, %ecx
Elliott Hughese412f892016-08-01 14:00:45 -0700306 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800307
Elliott Hughese412f892016-08-01 14:00:45 -0700308 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800309L(128bytes_L2_normal):
310 prefetcht0 0x380(%edx)
311 prefetcht0 0x3c0(%edx)
312 sub $128, %ecx
313 movdqa %xmm0, (%edx)
314 movaps %xmm0, 0x10(%edx)
315 movaps %xmm0, 0x20(%edx)
316 movaps %xmm0, 0x30(%edx)
317 movaps %xmm0, 0x40(%edx)
318 movaps %xmm0, 0x50(%edx)
319 movaps %xmm0, 0x60(%edx)
320 movaps %xmm0, 0x70(%edx)
321 add $128, %edx
322 cmp $128, %ecx
323 jae L(128bytes_L2_normal)
324
325L(128bytesless_L2_normal):
Elliott Hughese412f892016-08-01 14:00:45 -0700326 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800327
Bruce Beare124a5422010-10-11 12:24:41 -0700328 RESTORE_EBX_STATE
Bruce Beare8ff1a272010-03-04 11:03:37 -0800329L(128bytesormore_nt_start):
330 sub %ebx, %ecx
Bruce Beare124a5422010-10-11 12:24:41 -0700331 mov %ebx, %eax
332 and $0x7f, %eax
333 add %eax, %ecx
334 movd %xmm0, %eax
Elliott Hughese412f892016-08-01 14:00:45 -0700335 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800336L(128bytesormore_shared_cache_loop):
337 prefetcht0 0x3c0(%edx)
338 prefetcht0 0x380(%edx)
339 sub $0x80, %ebx
340 movdqa %xmm0, (%edx)
341 movdqa %xmm0, 0x10(%edx)
342 movdqa %xmm0, 0x20(%edx)
343 movdqa %xmm0, 0x30(%edx)
344 movdqa %xmm0, 0x40(%edx)
345 movdqa %xmm0, 0x50(%edx)
346 movdqa %xmm0, 0x60(%edx)
347 movdqa %xmm0, 0x70(%edx)
348 add $0x80, %edx
349 cmp $0x80, %ebx
350 jae L(128bytesormore_shared_cache_loop)
351 cmp $0x80, %ecx
352 jb L(shared_cache_loop_end)
Elliott Hughese412f892016-08-01 14:00:45 -0700353 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800354L(128bytesormore_nt):
355 sub $0x80, %ecx
356 movntdq %xmm0, (%edx)
357 movntdq %xmm0, 0x10(%edx)
358 movntdq %xmm0, 0x20(%edx)
359 movntdq %xmm0, 0x30(%edx)
360 movntdq %xmm0, 0x40(%edx)
361 movntdq %xmm0, 0x50(%edx)
362 movntdq %xmm0, 0x60(%edx)
363 movntdq %xmm0, 0x70(%edx)
364 add $0x80, %edx
365 cmp $0x80, %ecx
366 jae L(128bytesormore_nt)
367 sfence
368L(shared_cache_loop_end):
Elliott Hughese412f892016-08-01 14:00:45 -0700369 POP(%ebx)
370 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800371
372
373 .pushsection .rodata.sse2,"a",@progbits
Elliott Hughese412f892016-08-01 14:00:45 -0700374 ALIGN(2)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800375L(table_16_128bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700376 .int JMPTBL(L(aligned_16_0bytes), L(table_16_128bytes))
377 .int JMPTBL(L(aligned_16_1bytes), L(table_16_128bytes))
378 .int JMPTBL(L(aligned_16_2bytes), L(table_16_128bytes))
379 .int JMPTBL(L(aligned_16_3bytes), L(table_16_128bytes))
380 .int JMPTBL(L(aligned_16_4bytes), L(table_16_128bytes))
381 .int JMPTBL(L(aligned_16_5bytes), L(table_16_128bytes))
382 .int JMPTBL(L(aligned_16_6bytes), L(table_16_128bytes))
383 .int JMPTBL(L(aligned_16_7bytes), L(table_16_128bytes))
384 .int JMPTBL(L(aligned_16_8bytes), L(table_16_128bytes))
385 .int JMPTBL(L(aligned_16_9bytes), L(table_16_128bytes))
386 .int JMPTBL(L(aligned_16_10bytes), L(table_16_128bytes))
387 .int JMPTBL(L(aligned_16_11bytes), L(table_16_128bytes))
388 .int JMPTBL(L(aligned_16_12bytes), L(table_16_128bytes))
389 .int JMPTBL(L(aligned_16_13bytes), L(table_16_128bytes))
390 .int JMPTBL(L(aligned_16_14bytes), L(table_16_128bytes))
391 .int JMPTBL(L(aligned_16_15bytes), L(table_16_128bytes))
392 .int JMPTBL(L(aligned_16_16bytes), L(table_16_128bytes))
393 .int JMPTBL(L(aligned_16_17bytes), L(table_16_128bytes))
394 .int JMPTBL(L(aligned_16_18bytes), L(table_16_128bytes))
395 .int JMPTBL(L(aligned_16_19bytes), L(table_16_128bytes))
396 .int JMPTBL(L(aligned_16_20bytes), L(table_16_128bytes))
397 .int JMPTBL(L(aligned_16_21bytes), L(table_16_128bytes))
398 .int JMPTBL(L(aligned_16_22bytes), L(table_16_128bytes))
399 .int JMPTBL(L(aligned_16_23bytes), L(table_16_128bytes))
400 .int JMPTBL(L(aligned_16_24bytes), L(table_16_128bytes))
401 .int JMPTBL(L(aligned_16_25bytes), L(table_16_128bytes))
402 .int JMPTBL(L(aligned_16_26bytes), L(table_16_128bytes))
403 .int JMPTBL(L(aligned_16_27bytes), L(table_16_128bytes))
404 .int JMPTBL(L(aligned_16_28bytes), L(table_16_128bytes))
405 .int JMPTBL(L(aligned_16_29bytes), L(table_16_128bytes))
406 .int JMPTBL(L(aligned_16_30bytes), L(table_16_128bytes))
407 .int JMPTBL(L(aligned_16_31bytes), L(table_16_128bytes))
408 .int JMPTBL(L(aligned_16_32bytes), L(table_16_128bytes))
409 .int JMPTBL(L(aligned_16_33bytes), L(table_16_128bytes))
410 .int JMPTBL(L(aligned_16_34bytes), L(table_16_128bytes))
411 .int JMPTBL(L(aligned_16_35bytes), L(table_16_128bytes))
412 .int JMPTBL(L(aligned_16_36bytes), L(table_16_128bytes))
413 .int JMPTBL(L(aligned_16_37bytes), L(table_16_128bytes))
414 .int JMPTBL(L(aligned_16_38bytes), L(table_16_128bytes))
415 .int JMPTBL(L(aligned_16_39bytes), L(table_16_128bytes))
416 .int JMPTBL(L(aligned_16_40bytes), L(table_16_128bytes))
417 .int JMPTBL(L(aligned_16_41bytes), L(table_16_128bytes))
418 .int JMPTBL(L(aligned_16_42bytes), L(table_16_128bytes))
419 .int JMPTBL(L(aligned_16_43bytes), L(table_16_128bytes))
420 .int JMPTBL(L(aligned_16_44bytes), L(table_16_128bytes))
421 .int JMPTBL(L(aligned_16_45bytes), L(table_16_128bytes))
422 .int JMPTBL(L(aligned_16_46bytes), L(table_16_128bytes))
423 .int JMPTBL(L(aligned_16_47bytes), L(table_16_128bytes))
424 .int JMPTBL(L(aligned_16_48bytes), L(table_16_128bytes))
425 .int JMPTBL(L(aligned_16_49bytes), L(table_16_128bytes))
426 .int JMPTBL(L(aligned_16_50bytes), L(table_16_128bytes))
427 .int JMPTBL(L(aligned_16_51bytes), L(table_16_128bytes))
428 .int JMPTBL(L(aligned_16_52bytes), L(table_16_128bytes))
429 .int JMPTBL(L(aligned_16_53bytes), L(table_16_128bytes))
430 .int JMPTBL(L(aligned_16_54bytes), L(table_16_128bytes))
431 .int JMPTBL(L(aligned_16_55bytes), L(table_16_128bytes))
432 .int JMPTBL(L(aligned_16_56bytes), L(table_16_128bytes))
433 .int JMPTBL(L(aligned_16_57bytes), L(table_16_128bytes))
434 .int JMPTBL(L(aligned_16_58bytes), L(table_16_128bytes))
435 .int JMPTBL(L(aligned_16_59bytes), L(table_16_128bytes))
436 .int JMPTBL(L(aligned_16_60bytes), L(table_16_128bytes))
437 .int JMPTBL(L(aligned_16_61bytes), L(table_16_128bytes))
438 .int JMPTBL(L(aligned_16_62bytes), L(table_16_128bytes))
439 .int JMPTBL(L(aligned_16_63bytes), L(table_16_128bytes))
440 .int JMPTBL(L(aligned_16_64bytes), L(table_16_128bytes))
441 .int JMPTBL(L(aligned_16_65bytes), L(table_16_128bytes))
442 .int JMPTBL(L(aligned_16_66bytes), L(table_16_128bytes))
443 .int JMPTBL(L(aligned_16_67bytes), L(table_16_128bytes))
444 .int JMPTBL(L(aligned_16_68bytes), L(table_16_128bytes))
445 .int JMPTBL(L(aligned_16_69bytes), L(table_16_128bytes))
446 .int JMPTBL(L(aligned_16_70bytes), L(table_16_128bytes))
447 .int JMPTBL(L(aligned_16_71bytes), L(table_16_128bytes))
448 .int JMPTBL(L(aligned_16_72bytes), L(table_16_128bytes))
449 .int JMPTBL(L(aligned_16_73bytes), L(table_16_128bytes))
450 .int JMPTBL(L(aligned_16_74bytes), L(table_16_128bytes))
451 .int JMPTBL(L(aligned_16_75bytes), L(table_16_128bytes))
452 .int JMPTBL(L(aligned_16_76bytes), L(table_16_128bytes))
453 .int JMPTBL(L(aligned_16_77bytes), L(table_16_128bytes))
454 .int JMPTBL(L(aligned_16_78bytes), L(table_16_128bytes))
455 .int JMPTBL(L(aligned_16_79bytes), L(table_16_128bytes))
456 .int JMPTBL(L(aligned_16_80bytes), L(table_16_128bytes))
457 .int JMPTBL(L(aligned_16_81bytes), L(table_16_128bytes))
458 .int JMPTBL(L(aligned_16_82bytes), L(table_16_128bytes))
459 .int JMPTBL(L(aligned_16_83bytes), L(table_16_128bytes))
460 .int JMPTBL(L(aligned_16_84bytes), L(table_16_128bytes))
461 .int JMPTBL(L(aligned_16_85bytes), L(table_16_128bytes))
462 .int JMPTBL(L(aligned_16_86bytes), L(table_16_128bytes))
463 .int JMPTBL(L(aligned_16_87bytes), L(table_16_128bytes))
464 .int JMPTBL(L(aligned_16_88bytes), L(table_16_128bytes))
465 .int JMPTBL(L(aligned_16_89bytes), L(table_16_128bytes))
466 .int JMPTBL(L(aligned_16_90bytes), L(table_16_128bytes))
467 .int JMPTBL(L(aligned_16_91bytes), L(table_16_128bytes))
468 .int JMPTBL(L(aligned_16_92bytes), L(table_16_128bytes))
469 .int JMPTBL(L(aligned_16_93bytes), L(table_16_128bytes))
470 .int JMPTBL(L(aligned_16_94bytes), L(table_16_128bytes))
471 .int JMPTBL(L(aligned_16_95bytes), L(table_16_128bytes))
472 .int JMPTBL(L(aligned_16_96bytes), L(table_16_128bytes))
473 .int JMPTBL(L(aligned_16_97bytes), L(table_16_128bytes))
474 .int JMPTBL(L(aligned_16_98bytes), L(table_16_128bytes))
475 .int JMPTBL(L(aligned_16_99bytes), L(table_16_128bytes))
476 .int JMPTBL(L(aligned_16_100bytes), L(table_16_128bytes))
477 .int JMPTBL(L(aligned_16_101bytes), L(table_16_128bytes))
478 .int JMPTBL(L(aligned_16_102bytes), L(table_16_128bytes))
479 .int JMPTBL(L(aligned_16_103bytes), L(table_16_128bytes))
480 .int JMPTBL(L(aligned_16_104bytes), L(table_16_128bytes))
481 .int JMPTBL(L(aligned_16_105bytes), L(table_16_128bytes))
482 .int JMPTBL(L(aligned_16_106bytes), L(table_16_128bytes))
483 .int JMPTBL(L(aligned_16_107bytes), L(table_16_128bytes))
484 .int JMPTBL(L(aligned_16_108bytes), L(table_16_128bytes))
485 .int JMPTBL(L(aligned_16_109bytes), L(table_16_128bytes))
486 .int JMPTBL(L(aligned_16_110bytes), L(table_16_128bytes))
487 .int JMPTBL(L(aligned_16_111bytes), L(table_16_128bytes))
488 .int JMPTBL(L(aligned_16_112bytes), L(table_16_128bytes))
489 .int JMPTBL(L(aligned_16_113bytes), L(table_16_128bytes))
490 .int JMPTBL(L(aligned_16_114bytes), L(table_16_128bytes))
491 .int JMPTBL(L(aligned_16_115bytes), L(table_16_128bytes))
492 .int JMPTBL(L(aligned_16_116bytes), L(table_16_128bytes))
493 .int JMPTBL(L(aligned_16_117bytes), L(table_16_128bytes))
494 .int JMPTBL(L(aligned_16_118bytes), L(table_16_128bytes))
495 .int JMPTBL(L(aligned_16_119bytes), L(table_16_128bytes))
496 .int JMPTBL(L(aligned_16_120bytes), L(table_16_128bytes))
497 .int JMPTBL(L(aligned_16_121bytes), L(table_16_128bytes))
498 .int JMPTBL(L(aligned_16_122bytes), L(table_16_128bytes))
499 .int JMPTBL(L(aligned_16_123bytes), L(table_16_128bytes))
500 .int JMPTBL(L(aligned_16_124bytes), L(table_16_128bytes))
501 .int JMPTBL(L(aligned_16_125bytes), L(table_16_128bytes))
502 .int JMPTBL(L(aligned_16_126bytes), L(table_16_128bytes))
503 .int JMPTBL(L(aligned_16_127bytes), L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800504 .popsection
505
Elliott Hughese412f892016-08-01 14:00:45 -0700506 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800507L(aligned_16_112bytes):
508 movdqa %xmm0, -112(%edx)
509L(aligned_16_96bytes):
510 movdqa %xmm0, -96(%edx)
511L(aligned_16_80bytes):
512 movdqa %xmm0, -80(%edx)
513L(aligned_16_64bytes):
514 movdqa %xmm0, -64(%edx)
515L(aligned_16_48bytes):
516 movdqa %xmm0, -48(%edx)
517L(aligned_16_32bytes):
518 movdqa %xmm0, -32(%edx)
519L(aligned_16_16bytes):
520 movdqa %xmm0, -16(%edx)
521L(aligned_16_0bytes):
522 SETRTNVAL
523 RETURN
524
Elliott Hughese412f892016-08-01 14:00:45 -0700525 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800526L(aligned_16_113bytes):
527 movdqa %xmm0, -113(%edx)
528L(aligned_16_97bytes):
529 movdqa %xmm0, -97(%edx)
530L(aligned_16_81bytes):
531 movdqa %xmm0, -81(%edx)
532L(aligned_16_65bytes):
533 movdqa %xmm0, -65(%edx)
534L(aligned_16_49bytes):
535 movdqa %xmm0, -49(%edx)
536L(aligned_16_33bytes):
537 movdqa %xmm0, -33(%edx)
538L(aligned_16_17bytes):
539 movdqa %xmm0, -17(%edx)
540L(aligned_16_1bytes):
541 movb %al, -1(%edx)
542 SETRTNVAL
543 RETURN
544
Elliott Hughese412f892016-08-01 14:00:45 -0700545 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800546L(aligned_16_114bytes):
547 movdqa %xmm0, -114(%edx)
548L(aligned_16_98bytes):
549 movdqa %xmm0, -98(%edx)
550L(aligned_16_82bytes):
551 movdqa %xmm0, -82(%edx)
552L(aligned_16_66bytes):
553 movdqa %xmm0, -66(%edx)
554L(aligned_16_50bytes):
555 movdqa %xmm0, -50(%edx)
556L(aligned_16_34bytes):
557 movdqa %xmm0, -34(%edx)
558L(aligned_16_18bytes):
559 movdqa %xmm0, -18(%edx)
560L(aligned_16_2bytes):
561 movw %ax, -2(%edx)
562 SETRTNVAL
563 RETURN
564
Elliott Hughese412f892016-08-01 14:00:45 -0700565 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800566L(aligned_16_115bytes):
567 movdqa %xmm0, -115(%edx)
568L(aligned_16_99bytes):
569 movdqa %xmm0, -99(%edx)
570L(aligned_16_83bytes):
571 movdqa %xmm0, -83(%edx)
572L(aligned_16_67bytes):
573 movdqa %xmm0, -67(%edx)
574L(aligned_16_51bytes):
575 movdqa %xmm0, -51(%edx)
576L(aligned_16_35bytes):
577 movdqa %xmm0, -35(%edx)
578L(aligned_16_19bytes):
579 movdqa %xmm0, -19(%edx)
580L(aligned_16_3bytes):
581 movw %ax, -3(%edx)
582 movb %al, -1(%edx)
583 SETRTNVAL
584 RETURN
585
Elliott Hughese412f892016-08-01 14:00:45 -0700586 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800587L(aligned_16_116bytes):
588 movdqa %xmm0, -116(%edx)
589L(aligned_16_100bytes):
590 movdqa %xmm0, -100(%edx)
591L(aligned_16_84bytes):
592 movdqa %xmm0, -84(%edx)
593L(aligned_16_68bytes):
594 movdqa %xmm0, -68(%edx)
595L(aligned_16_52bytes):
596 movdqa %xmm0, -52(%edx)
597L(aligned_16_36bytes):
598 movdqa %xmm0, -36(%edx)
599L(aligned_16_20bytes):
600 movdqa %xmm0, -20(%edx)
601L(aligned_16_4bytes):
602 movl %eax, -4(%edx)
603 SETRTNVAL
604 RETURN
605
Elliott Hughese412f892016-08-01 14:00:45 -0700606 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800607L(aligned_16_117bytes):
608 movdqa %xmm0, -117(%edx)
609L(aligned_16_101bytes):
610 movdqa %xmm0, -101(%edx)
611L(aligned_16_85bytes):
612 movdqa %xmm0, -85(%edx)
613L(aligned_16_69bytes):
614 movdqa %xmm0, -69(%edx)
615L(aligned_16_53bytes):
616 movdqa %xmm0, -53(%edx)
617L(aligned_16_37bytes):
618 movdqa %xmm0, -37(%edx)
619L(aligned_16_21bytes):
620 movdqa %xmm0, -21(%edx)
621L(aligned_16_5bytes):
622 movl %eax, -5(%edx)
623 movb %al, -1(%edx)
624 SETRTNVAL
625 RETURN
626
Elliott Hughese412f892016-08-01 14:00:45 -0700627 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800628L(aligned_16_118bytes):
629 movdqa %xmm0, -118(%edx)
630L(aligned_16_102bytes):
631 movdqa %xmm0, -102(%edx)
632L(aligned_16_86bytes):
633 movdqa %xmm0, -86(%edx)
634L(aligned_16_70bytes):
635 movdqa %xmm0, -70(%edx)
636L(aligned_16_54bytes):
637 movdqa %xmm0, -54(%edx)
638L(aligned_16_38bytes):
639 movdqa %xmm0, -38(%edx)
640L(aligned_16_22bytes):
641 movdqa %xmm0, -22(%edx)
642L(aligned_16_6bytes):
643 movl %eax, -6(%edx)
644 movw %ax, -2(%edx)
645 SETRTNVAL
646 RETURN
647
Elliott Hughese412f892016-08-01 14:00:45 -0700648 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800649L(aligned_16_119bytes):
650 movdqa %xmm0, -119(%edx)
651L(aligned_16_103bytes):
652 movdqa %xmm0, -103(%edx)
653L(aligned_16_87bytes):
654 movdqa %xmm0, -87(%edx)
655L(aligned_16_71bytes):
656 movdqa %xmm0, -71(%edx)
657L(aligned_16_55bytes):
658 movdqa %xmm0, -55(%edx)
659L(aligned_16_39bytes):
660 movdqa %xmm0, -39(%edx)
661L(aligned_16_23bytes):
662 movdqa %xmm0, -23(%edx)
663L(aligned_16_7bytes):
664 movl %eax, -7(%edx)
665 movw %ax, -3(%edx)
666 movb %al, -1(%edx)
667 SETRTNVAL
668 RETURN
669
Elliott Hughese412f892016-08-01 14:00:45 -0700670 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800671L(aligned_16_120bytes):
672 movdqa %xmm0, -120(%edx)
673L(aligned_16_104bytes):
674 movdqa %xmm0, -104(%edx)
675L(aligned_16_88bytes):
676 movdqa %xmm0, -88(%edx)
677L(aligned_16_72bytes):
678 movdqa %xmm0, -72(%edx)
679L(aligned_16_56bytes):
680 movdqa %xmm0, -56(%edx)
681L(aligned_16_40bytes):
682 movdqa %xmm0, -40(%edx)
683L(aligned_16_24bytes):
684 movdqa %xmm0, -24(%edx)
685L(aligned_16_8bytes):
686 movq %xmm0, -8(%edx)
687 SETRTNVAL
688 RETURN
689
Elliott Hughese412f892016-08-01 14:00:45 -0700690 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800691L(aligned_16_121bytes):
692 movdqa %xmm0, -121(%edx)
693L(aligned_16_105bytes):
694 movdqa %xmm0, -105(%edx)
695L(aligned_16_89bytes):
696 movdqa %xmm0, -89(%edx)
697L(aligned_16_73bytes):
698 movdqa %xmm0, -73(%edx)
699L(aligned_16_57bytes):
700 movdqa %xmm0, -57(%edx)
701L(aligned_16_41bytes):
702 movdqa %xmm0, -41(%edx)
703L(aligned_16_25bytes):
704 movdqa %xmm0, -25(%edx)
705L(aligned_16_9bytes):
706 movq %xmm0, -9(%edx)
707 movb %al, -1(%edx)
708 SETRTNVAL
709 RETURN
710
Elliott Hughese412f892016-08-01 14:00:45 -0700711 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800712L(aligned_16_122bytes):
713 movdqa %xmm0, -122(%edx)
714L(aligned_16_106bytes):
715 movdqa %xmm0, -106(%edx)
716L(aligned_16_90bytes):
717 movdqa %xmm0, -90(%edx)
718L(aligned_16_74bytes):
719 movdqa %xmm0, -74(%edx)
720L(aligned_16_58bytes):
721 movdqa %xmm0, -58(%edx)
722L(aligned_16_42bytes):
723 movdqa %xmm0, -42(%edx)
724L(aligned_16_26bytes):
725 movdqa %xmm0, -26(%edx)
726L(aligned_16_10bytes):
727 movq %xmm0, -10(%edx)
728 movw %ax, -2(%edx)
729 SETRTNVAL
730 RETURN
731
Elliott Hughese412f892016-08-01 14:00:45 -0700732 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800733L(aligned_16_123bytes):
734 movdqa %xmm0, -123(%edx)
735L(aligned_16_107bytes):
736 movdqa %xmm0, -107(%edx)
737L(aligned_16_91bytes):
738 movdqa %xmm0, -91(%edx)
739L(aligned_16_75bytes):
740 movdqa %xmm0, -75(%edx)
741L(aligned_16_59bytes):
742 movdqa %xmm0, -59(%edx)
743L(aligned_16_43bytes):
744 movdqa %xmm0, -43(%edx)
745L(aligned_16_27bytes):
746 movdqa %xmm0, -27(%edx)
747L(aligned_16_11bytes):
748 movq %xmm0, -11(%edx)
749 movw %ax, -3(%edx)
750 movb %al, -1(%edx)
751 SETRTNVAL
752 RETURN
753
Elliott Hughese412f892016-08-01 14:00:45 -0700754 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800755L(aligned_16_124bytes):
756 movdqa %xmm0, -124(%edx)
757L(aligned_16_108bytes):
758 movdqa %xmm0, -108(%edx)
759L(aligned_16_92bytes):
760 movdqa %xmm0, -92(%edx)
761L(aligned_16_76bytes):
762 movdqa %xmm0, -76(%edx)
763L(aligned_16_60bytes):
764 movdqa %xmm0, -60(%edx)
765L(aligned_16_44bytes):
766 movdqa %xmm0, -44(%edx)
767L(aligned_16_28bytes):
768 movdqa %xmm0, -28(%edx)
769L(aligned_16_12bytes):
770 movq %xmm0, -12(%edx)
771 movl %eax, -4(%edx)
772 SETRTNVAL
773 RETURN
774
Elliott Hughese412f892016-08-01 14:00:45 -0700775 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800776L(aligned_16_125bytes):
777 movdqa %xmm0, -125(%edx)
778L(aligned_16_109bytes):
779 movdqa %xmm0, -109(%edx)
780L(aligned_16_93bytes):
781 movdqa %xmm0, -93(%edx)
782L(aligned_16_77bytes):
783 movdqa %xmm0, -77(%edx)
784L(aligned_16_61bytes):
785 movdqa %xmm0, -61(%edx)
786L(aligned_16_45bytes):
787 movdqa %xmm0, -45(%edx)
788L(aligned_16_29bytes):
789 movdqa %xmm0, -29(%edx)
790L(aligned_16_13bytes):
791 movq %xmm0, -13(%edx)
792 movl %eax, -5(%edx)
793 movb %al, -1(%edx)
794 SETRTNVAL
795 RETURN
796
Elliott Hughese412f892016-08-01 14:00:45 -0700797 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800798L(aligned_16_126bytes):
799 movdqa %xmm0, -126(%edx)
800L(aligned_16_110bytes):
801 movdqa %xmm0, -110(%edx)
802L(aligned_16_94bytes):
803 movdqa %xmm0, -94(%edx)
804L(aligned_16_78bytes):
805 movdqa %xmm0, -78(%edx)
806L(aligned_16_62bytes):
807 movdqa %xmm0, -62(%edx)
808L(aligned_16_46bytes):
809 movdqa %xmm0, -46(%edx)
810L(aligned_16_30bytes):
811 movdqa %xmm0, -30(%edx)
812L(aligned_16_14bytes):
813 movq %xmm0, -14(%edx)
814 movl %eax, -6(%edx)
815 movw %ax, -2(%edx)
816 SETRTNVAL
817 RETURN
818
Elliott Hughese412f892016-08-01 14:00:45 -0700819 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800820L(aligned_16_127bytes):
821 movdqa %xmm0, -127(%edx)
822L(aligned_16_111bytes):
823 movdqa %xmm0, -111(%edx)
824L(aligned_16_95bytes):
825 movdqa %xmm0, -95(%edx)
826L(aligned_16_79bytes):
827 movdqa %xmm0, -79(%edx)
828L(aligned_16_63bytes):
829 movdqa %xmm0, -63(%edx)
830L(aligned_16_47bytes):
831 movdqa %xmm0, -47(%edx)
832L(aligned_16_31bytes):
833 movdqa %xmm0, -31(%edx)
834L(aligned_16_15bytes):
835 movq %xmm0, -15(%edx)
836 movl %eax, -7(%edx)
837 movw %ax, -3(%edx)
838 movb %al, -1(%edx)
839 SETRTNVAL
840 RETURN_END
841
Haibo Huangb9244ff2018-08-11 10:12:13 -0700842END(memset_atom)