blob: e03cd1a1c3b8fa54f2168e3466456e4865c7aada [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Elliott Hughes81d6a182016-03-03 16:10:33 -080031#include <private/bionic_asm.h>
32
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040033#include "cache.h"
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040034
Bruce Beare8ff1a272010-03-04 11:03:37 -080035#ifndef L
36# define L(label) .L##label
37#endif
38
39#ifndef ALIGN
40# define ALIGN(n) .p2align n
41#endif
42
Bruce Beare8ff1a272010-03-04 11:03:37 -080043#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
Bruce Beare124a5422010-10-11 12:24:41 -070048# define cfi_restore(reg) .cfi_restore reg
Bruce Beare8ff1a272010-03-04 11:03:37 -080049#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
Bruce Beare8ff1a272010-03-04 11:03:37 -080055#define CFI_PUSH(REG) \
56 cfi_adjust_cfa_offset (4); \
57 cfi_rel_offset (REG, 0)
58
59#define CFI_POP(REG) \
60 cfi_adjust_cfa_offset (-4); \
61 cfi_restore (REG)
62
63#define PUSH(REG) pushl REG; CFI_PUSH (REG)
64#define POP(REG) popl REG; CFI_POP (REG)
65
Elliott Hughes81d6a182016-03-03 16:10:33 -080066#define DST PARMS
67#define CHR DST+4
Elliott Hughes01d5b942016-03-02 17:18:18 -080068#define LEN CHR+4
Elliott Hughes81d6a182016-03-03 16:10:33 -080069#define CHK_DST_LEN (LEN+4)
70#define SETRTNVAL movl DST(%esp), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -080071
Nick Kralevich0aa82892011-11-11 15:47:24 -080072#if (defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -080073# define ENTRANCE PUSH (%ebx);
74# define RETURN_END POP (%ebx); ret
75# define RETURN RETURN_END; CFI_PUSH (%ebx)
76# define PARMS 8 /* Preserve EBX. */
77# define JMPTBL(I, B) I - B
78
79/* Load an entry in a jump table into EBX and branch to it. TABLE is a
80 jump table with relative offsets. */
81# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
82 /* We first load PC into EBX. */ \
Varvara Rainchik5a922842014-04-24 15:41:20 +040083 call __x86.get_pc_thunk.bx; \
Bruce Beare8ff1a272010-03-04 11:03:37 -080084 /* Get the address of the jump table. */ \
85 add $(TABLE - .), %ebx; \
86 /* Get the entry and convert the relative offset to the \
87 absolute address. */ \
88 add (%ebx,%ecx,4), %ebx; \
89 add %ecx, %edx; \
90 /* We loaded the jump table and adjuested EDX. Go. */ \
91 jmp *%ebx
92
Varvara Rainchik5a922842014-04-24 15:41:20 +040093 .section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
94 .globl __x86.get_pc_thunk.bx
95 .hidden __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -080096 ALIGN (4)
Varvara Rainchik5a922842014-04-24 15:41:20 +040097 .type __x86.get_pc_thunk.bx,@function
98__x86.get_pc_thunk.bx:
Bruce Beare8ff1a272010-03-04 11:03:37 -080099 movl (%esp), %ebx
100 ret
101#else
102# define ENTRANCE
103# define RETURN_END ret
104# define RETURN RETURN_END
105# define PARMS 4
106# define JMPTBL(I, B) I
107
108/* Branch to an entry in a jump table. TABLE is a jump table with
109 absolute offsets. */
110# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
111 add %ecx, %edx; \
112 jmp *TABLE(,%ecx,4)
113#endif
114
Elliott Hughes81d6a182016-03-03 16:10:33 -0800115ENTRY(__memset_chk)
Elliott Hughes204990c2016-03-24 22:34:47 -0700116 ENTRANCE
Elliott Hughes81d6a182016-03-03 16:10:33 -0800117
Elliott Hughes204990c2016-03-24 22:34:47 -0700118 movl LEN(%esp), %ecx
119 cmpl CHK_DST_LEN(%esp), %ecx
120 ja __memset_chk_fail
121 jmp L(memset_length_loaded)
Elliott Hughes81d6a182016-03-03 16:10:33 -0800122END(__memset_chk)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400123
Bruce Beare8ff1a272010-03-04 11:03:37 -0800124 .section .text.sse2,"ax",@progbits
125 ALIGN (4)
Elliott Hughes81d6a182016-03-03 16:10:33 -0800126ENTRY (memset)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800127 ENTRANCE
128
129 movl LEN(%esp), %ecx
Elliott Hughes204990c2016-03-24 22:34:47 -0700130L(memset_length_loaded):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800131 movzbl CHR(%esp), %eax
132 movb %al, %ah
133 /* Fill the whole EAX with pattern. */
134 movl %eax, %edx
135 shl $16, %eax
136 or %edx, %eax
Elliott Hughes81d6a182016-03-03 16:10:33 -0800137 movl DST(%esp), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800138 cmp $32, %ecx
139 jae L(32bytesormore)
140
141L(write_less32bytes):
142 BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes))
143
144
145 .pushsection .rodata.sse2,"a",@progbits
146 ALIGN (2)
147L(table_less_32bytes):
148 .int JMPTBL (L(write_0bytes), L(table_less_32bytes))
149 .int JMPTBL (L(write_1bytes), L(table_less_32bytes))
150 .int JMPTBL (L(write_2bytes), L(table_less_32bytes))
151 .int JMPTBL (L(write_3bytes), L(table_less_32bytes))
152 .int JMPTBL (L(write_4bytes), L(table_less_32bytes))
153 .int JMPTBL (L(write_5bytes), L(table_less_32bytes))
154 .int JMPTBL (L(write_6bytes), L(table_less_32bytes))
155 .int JMPTBL (L(write_7bytes), L(table_less_32bytes))
156 .int JMPTBL (L(write_8bytes), L(table_less_32bytes))
157 .int JMPTBL (L(write_9bytes), L(table_less_32bytes))
158 .int JMPTBL (L(write_10bytes), L(table_less_32bytes))
159 .int JMPTBL (L(write_11bytes), L(table_less_32bytes))
160 .int JMPTBL (L(write_12bytes), L(table_less_32bytes))
161 .int JMPTBL (L(write_13bytes), L(table_less_32bytes))
162 .int JMPTBL (L(write_14bytes), L(table_less_32bytes))
163 .int JMPTBL (L(write_15bytes), L(table_less_32bytes))
164 .int JMPTBL (L(write_16bytes), L(table_less_32bytes))
165 .int JMPTBL (L(write_17bytes), L(table_less_32bytes))
166 .int JMPTBL (L(write_18bytes), L(table_less_32bytes))
167 .int JMPTBL (L(write_19bytes), L(table_less_32bytes))
168 .int JMPTBL (L(write_20bytes), L(table_less_32bytes))
169 .int JMPTBL (L(write_21bytes), L(table_less_32bytes))
170 .int JMPTBL (L(write_22bytes), L(table_less_32bytes))
171 .int JMPTBL (L(write_23bytes), L(table_less_32bytes))
172 .int JMPTBL (L(write_24bytes), L(table_less_32bytes))
173 .int JMPTBL (L(write_25bytes), L(table_less_32bytes))
174 .int JMPTBL (L(write_26bytes), L(table_less_32bytes))
175 .int JMPTBL (L(write_27bytes), L(table_less_32bytes))
176 .int JMPTBL (L(write_28bytes), L(table_less_32bytes))
177 .int JMPTBL (L(write_29bytes), L(table_less_32bytes))
178 .int JMPTBL (L(write_30bytes), L(table_less_32bytes))
179 .int JMPTBL (L(write_31bytes), L(table_less_32bytes))
180 .popsection
181
182 ALIGN (4)
183L(write_28bytes):
184 movl %eax, -28(%edx)
185L(write_24bytes):
186 movl %eax, -24(%edx)
187L(write_20bytes):
188 movl %eax, -20(%edx)
189L(write_16bytes):
190 movl %eax, -16(%edx)
191L(write_12bytes):
192 movl %eax, -12(%edx)
193L(write_8bytes):
194 movl %eax, -8(%edx)
195L(write_4bytes):
196 movl %eax, -4(%edx)
197L(write_0bytes):
198 SETRTNVAL
199 RETURN
200
201 ALIGN (4)
202L(write_29bytes):
203 movl %eax, -29(%edx)
204L(write_25bytes):
205 movl %eax, -25(%edx)
206L(write_21bytes):
207 movl %eax, -21(%edx)
208L(write_17bytes):
209 movl %eax, -17(%edx)
210L(write_13bytes):
211 movl %eax, -13(%edx)
212L(write_9bytes):
213 movl %eax, -9(%edx)
214L(write_5bytes):
215 movl %eax, -5(%edx)
216L(write_1bytes):
217 movb %al, -1(%edx)
218 SETRTNVAL
219 RETURN
220
221 ALIGN (4)
222L(write_30bytes):
223 movl %eax, -30(%edx)
224L(write_26bytes):
225 movl %eax, -26(%edx)
226L(write_22bytes):
227 movl %eax, -22(%edx)
228L(write_18bytes):
229 movl %eax, -18(%edx)
230L(write_14bytes):
231 movl %eax, -14(%edx)
232L(write_10bytes):
233 movl %eax, -10(%edx)
234L(write_6bytes):
235 movl %eax, -6(%edx)
236L(write_2bytes):
237 movw %ax, -2(%edx)
238 SETRTNVAL
239 RETURN
240
241 ALIGN (4)
242L(write_31bytes):
243 movl %eax, -31(%edx)
244L(write_27bytes):
245 movl %eax, -27(%edx)
246L(write_23bytes):
247 movl %eax, -23(%edx)
248L(write_19bytes):
249 movl %eax, -19(%edx)
250L(write_15bytes):
251 movl %eax, -15(%edx)
252L(write_11bytes):
253 movl %eax, -11(%edx)
254L(write_7bytes):
255 movl %eax, -7(%edx)
256L(write_3bytes):
257 movw %ax, -3(%edx)
258 movb %al, -1(%edx)
259 SETRTNVAL
260 RETURN
261
262 ALIGN (4)
263/* ECX > 32 and EDX is 4 byte aligned. */
264L(32bytesormore):
265 /* Fill xmm0 with the pattern. */
Bruce Beare8ff1a272010-03-04 11:03:37 -0800266 movd %eax, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800267 pshufd $0, %xmm0, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800268 testl $0xf, %edx
269 jz L(aligned_16)
270/* ECX > 32 and EDX is not 16 byte aligned. */
271L(not_aligned_16):
272 movdqu %xmm0, (%edx)
273 movl %edx, %eax
274 and $-16, %edx
275 add $16, %edx
276 sub %edx, %eax
277 add %eax, %ecx
278 movd %xmm0, %eax
279
280 ALIGN (4)
281L(aligned_16):
282 cmp $128, %ecx
283 jae L(128bytesormore)
284
285L(aligned_16_less128bytes):
286 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
287
288 ALIGN (4)
289L(128bytesormore):
290#ifdef SHARED_CACHE_SIZE
291 PUSH (%ebx)
292 mov $SHARED_CACHE_SIZE, %ebx
293#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800294# if (defined SHARED || defined __PIC__)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400295 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800296 add $_GLOBAL_OFFSET_TABLE_, %ebx
297 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx
298# else
299 PUSH (%ebx)
300 mov __x86_shared_cache_size, %ebx
301# endif
302#endif
303 cmp %ebx, %ecx
304 jae L(128bytesormore_nt_start)
305
306
307#ifdef DATA_CACHE_SIZE
308 POP (%ebx)
Bruce Beare124a5422010-10-11 12:24:41 -0700309# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800310 cmp $DATA_CACHE_SIZE, %ecx
311#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800312# if (defined SHARED || defined __PIC__)
Bruce Beare124a5422010-10-11 12:24:41 -0700313# define RESTORE_EBX_STATE
Varvara Rainchik5a922842014-04-24 15:41:20 +0400314 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800315 add $_GLOBAL_OFFSET_TABLE_, %ebx
316 cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx
317# else
318 POP (%ebx)
Bruce Beare124a5422010-10-11 12:24:41 -0700319# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800320 cmp __x86_data_cache_size, %ecx
321# endif
322#endif
323
324 jae L(128bytes_L2_normal)
325 subl $128, %ecx
326L(128bytesormore_normal):
327 sub $128, %ecx
328 movdqa %xmm0, (%edx)
329 movdqa %xmm0, 0x10(%edx)
330 movdqa %xmm0, 0x20(%edx)
331 movdqa %xmm0, 0x30(%edx)
332 movdqa %xmm0, 0x40(%edx)
333 movdqa %xmm0, 0x50(%edx)
334 movdqa %xmm0, 0x60(%edx)
335 movdqa %xmm0, 0x70(%edx)
336 lea 128(%edx), %edx
337 jb L(128bytesless_normal)
338
339
340 sub $128, %ecx
341 movdqa %xmm0, (%edx)
342 movdqa %xmm0, 0x10(%edx)
343 movdqa %xmm0, 0x20(%edx)
344 movdqa %xmm0, 0x30(%edx)
345 movdqa %xmm0, 0x40(%edx)
346 movdqa %xmm0, 0x50(%edx)
347 movdqa %xmm0, 0x60(%edx)
348 movdqa %xmm0, 0x70(%edx)
349 lea 128(%edx), %edx
350 jae L(128bytesormore_normal)
351
352L(128bytesless_normal):
Bruce Beare124a5422010-10-11 12:24:41 -0700353 add $128, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800354 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
355
356 ALIGN (4)
357L(128bytes_L2_normal):
358 prefetcht0 0x380(%edx)
359 prefetcht0 0x3c0(%edx)
360 sub $128, %ecx
361 movdqa %xmm0, (%edx)
362 movaps %xmm0, 0x10(%edx)
363 movaps %xmm0, 0x20(%edx)
364 movaps %xmm0, 0x30(%edx)
365 movaps %xmm0, 0x40(%edx)
366 movaps %xmm0, 0x50(%edx)
367 movaps %xmm0, 0x60(%edx)
368 movaps %xmm0, 0x70(%edx)
369 add $128, %edx
370 cmp $128, %ecx
371 jae L(128bytes_L2_normal)
372
373L(128bytesless_L2_normal):
374 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
375
Bruce Beare124a5422010-10-11 12:24:41 -0700376 RESTORE_EBX_STATE
Bruce Beare8ff1a272010-03-04 11:03:37 -0800377L(128bytesormore_nt_start):
378 sub %ebx, %ecx
Bruce Beare124a5422010-10-11 12:24:41 -0700379 mov %ebx, %eax
380 and $0x7f, %eax
381 add %eax, %ecx
382 movd %xmm0, %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -0800383 ALIGN (4)
384L(128bytesormore_shared_cache_loop):
385 prefetcht0 0x3c0(%edx)
386 prefetcht0 0x380(%edx)
387 sub $0x80, %ebx
388 movdqa %xmm0, (%edx)
389 movdqa %xmm0, 0x10(%edx)
390 movdqa %xmm0, 0x20(%edx)
391 movdqa %xmm0, 0x30(%edx)
392 movdqa %xmm0, 0x40(%edx)
393 movdqa %xmm0, 0x50(%edx)
394 movdqa %xmm0, 0x60(%edx)
395 movdqa %xmm0, 0x70(%edx)
396 add $0x80, %edx
397 cmp $0x80, %ebx
398 jae L(128bytesormore_shared_cache_loop)
399 cmp $0x80, %ecx
400 jb L(shared_cache_loop_end)
401 ALIGN (4)
402L(128bytesormore_nt):
403 sub $0x80, %ecx
404 movntdq %xmm0, (%edx)
405 movntdq %xmm0, 0x10(%edx)
406 movntdq %xmm0, 0x20(%edx)
407 movntdq %xmm0, 0x30(%edx)
408 movntdq %xmm0, 0x40(%edx)
409 movntdq %xmm0, 0x50(%edx)
410 movntdq %xmm0, 0x60(%edx)
411 movntdq %xmm0, 0x70(%edx)
412 add $0x80, %edx
413 cmp $0x80, %ecx
414 jae L(128bytesormore_nt)
415 sfence
416L(shared_cache_loop_end):
Nick Kralevich0aa82892011-11-11 15:47:24 -0800417#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800418 POP (%ebx)
419#endif
420 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
421
422
423 .pushsection .rodata.sse2,"a",@progbits
424 ALIGN (2)
425L(table_16_128bytes):
426 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
427 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
428 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
429 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
430 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
431 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
432 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
433 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
434 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
435 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
436 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
437 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
438 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
439 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
440 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
441 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
442 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
443 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
444 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
445 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
446 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
447 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
448 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
449 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
450 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
451 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
452 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
453 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
454 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
455 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
456 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
457 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
458 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
459 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
460 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
461 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
462 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
463 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
464 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
465 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
466 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
467 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
468 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
469 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
470 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
471 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
472 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
473 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
474 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
475 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
476 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
477 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
478 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
479 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
480 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
481 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
482 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
483 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
484 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
485 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
486 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
487 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
488 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
489 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
490 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
491 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
492 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
493 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
494 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
495 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
496 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
497 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
498 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
499 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
500 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
501 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
502 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
503 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
504 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
505 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
506 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
507 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
508 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
509 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
510 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
511 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
512 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
513 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
514 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
515 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
516 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
517 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
518 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
519 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
520 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
521 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
522 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
523 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
524 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
525 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
526 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
527 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
528 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
529 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
530 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
531 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
532 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
533 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
534 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
535 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
536 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
537 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
538 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
539 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
540 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
541 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
542 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
543 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
544 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
545 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
546 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
547 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
548 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
549 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
550 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
551 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
552 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
553 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
554 .popsection
555
556 ALIGN (4)
557L(aligned_16_112bytes):
558 movdqa %xmm0, -112(%edx)
559L(aligned_16_96bytes):
560 movdqa %xmm0, -96(%edx)
561L(aligned_16_80bytes):
562 movdqa %xmm0, -80(%edx)
563L(aligned_16_64bytes):
564 movdqa %xmm0, -64(%edx)
565L(aligned_16_48bytes):
566 movdqa %xmm0, -48(%edx)
567L(aligned_16_32bytes):
568 movdqa %xmm0, -32(%edx)
569L(aligned_16_16bytes):
570 movdqa %xmm0, -16(%edx)
571L(aligned_16_0bytes):
572 SETRTNVAL
573 RETURN
574
575 ALIGN (4)
576L(aligned_16_113bytes):
577 movdqa %xmm0, -113(%edx)
578L(aligned_16_97bytes):
579 movdqa %xmm0, -97(%edx)
580L(aligned_16_81bytes):
581 movdqa %xmm0, -81(%edx)
582L(aligned_16_65bytes):
583 movdqa %xmm0, -65(%edx)
584L(aligned_16_49bytes):
585 movdqa %xmm0, -49(%edx)
586L(aligned_16_33bytes):
587 movdqa %xmm0, -33(%edx)
588L(aligned_16_17bytes):
589 movdqa %xmm0, -17(%edx)
590L(aligned_16_1bytes):
591 movb %al, -1(%edx)
592 SETRTNVAL
593 RETURN
594
595 ALIGN (4)
596L(aligned_16_114bytes):
597 movdqa %xmm0, -114(%edx)
598L(aligned_16_98bytes):
599 movdqa %xmm0, -98(%edx)
600L(aligned_16_82bytes):
601 movdqa %xmm0, -82(%edx)
602L(aligned_16_66bytes):
603 movdqa %xmm0, -66(%edx)
604L(aligned_16_50bytes):
605 movdqa %xmm0, -50(%edx)
606L(aligned_16_34bytes):
607 movdqa %xmm0, -34(%edx)
608L(aligned_16_18bytes):
609 movdqa %xmm0, -18(%edx)
610L(aligned_16_2bytes):
611 movw %ax, -2(%edx)
612 SETRTNVAL
613 RETURN
614
615 ALIGN (4)
616L(aligned_16_115bytes):
617 movdqa %xmm0, -115(%edx)
618L(aligned_16_99bytes):
619 movdqa %xmm0, -99(%edx)
620L(aligned_16_83bytes):
621 movdqa %xmm0, -83(%edx)
622L(aligned_16_67bytes):
623 movdqa %xmm0, -67(%edx)
624L(aligned_16_51bytes):
625 movdqa %xmm0, -51(%edx)
626L(aligned_16_35bytes):
627 movdqa %xmm0, -35(%edx)
628L(aligned_16_19bytes):
629 movdqa %xmm0, -19(%edx)
630L(aligned_16_3bytes):
631 movw %ax, -3(%edx)
632 movb %al, -1(%edx)
633 SETRTNVAL
634 RETURN
635
636 ALIGN (4)
637L(aligned_16_116bytes):
638 movdqa %xmm0, -116(%edx)
639L(aligned_16_100bytes):
640 movdqa %xmm0, -100(%edx)
641L(aligned_16_84bytes):
642 movdqa %xmm0, -84(%edx)
643L(aligned_16_68bytes):
644 movdqa %xmm0, -68(%edx)
645L(aligned_16_52bytes):
646 movdqa %xmm0, -52(%edx)
647L(aligned_16_36bytes):
648 movdqa %xmm0, -36(%edx)
649L(aligned_16_20bytes):
650 movdqa %xmm0, -20(%edx)
651L(aligned_16_4bytes):
652 movl %eax, -4(%edx)
653 SETRTNVAL
654 RETURN
655
656 ALIGN (4)
657L(aligned_16_117bytes):
658 movdqa %xmm0, -117(%edx)
659L(aligned_16_101bytes):
660 movdqa %xmm0, -101(%edx)
661L(aligned_16_85bytes):
662 movdqa %xmm0, -85(%edx)
663L(aligned_16_69bytes):
664 movdqa %xmm0, -69(%edx)
665L(aligned_16_53bytes):
666 movdqa %xmm0, -53(%edx)
667L(aligned_16_37bytes):
668 movdqa %xmm0, -37(%edx)
669L(aligned_16_21bytes):
670 movdqa %xmm0, -21(%edx)
671L(aligned_16_5bytes):
672 movl %eax, -5(%edx)
673 movb %al, -1(%edx)
674 SETRTNVAL
675 RETURN
676
677 ALIGN (4)
678L(aligned_16_118bytes):
679 movdqa %xmm0, -118(%edx)
680L(aligned_16_102bytes):
681 movdqa %xmm0, -102(%edx)
682L(aligned_16_86bytes):
683 movdqa %xmm0, -86(%edx)
684L(aligned_16_70bytes):
685 movdqa %xmm0, -70(%edx)
686L(aligned_16_54bytes):
687 movdqa %xmm0, -54(%edx)
688L(aligned_16_38bytes):
689 movdqa %xmm0, -38(%edx)
690L(aligned_16_22bytes):
691 movdqa %xmm0, -22(%edx)
692L(aligned_16_6bytes):
693 movl %eax, -6(%edx)
694 movw %ax, -2(%edx)
695 SETRTNVAL
696 RETURN
697
698 ALIGN (4)
699L(aligned_16_119bytes):
700 movdqa %xmm0, -119(%edx)
701L(aligned_16_103bytes):
702 movdqa %xmm0, -103(%edx)
703L(aligned_16_87bytes):
704 movdqa %xmm0, -87(%edx)
705L(aligned_16_71bytes):
706 movdqa %xmm0, -71(%edx)
707L(aligned_16_55bytes):
708 movdqa %xmm0, -55(%edx)
709L(aligned_16_39bytes):
710 movdqa %xmm0, -39(%edx)
711L(aligned_16_23bytes):
712 movdqa %xmm0, -23(%edx)
713L(aligned_16_7bytes):
714 movl %eax, -7(%edx)
715 movw %ax, -3(%edx)
716 movb %al, -1(%edx)
717 SETRTNVAL
718 RETURN
719
720 ALIGN (4)
721L(aligned_16_120bytes):
722 movdqa %xmm0, -120(%edx)
723L(aligned_16_104bytes):
724 movdqa %xmm0, -104(%edx)
725L(aligned_16_88bytes):
726 movdqa %xmm0, -88(%edx)
727L(aligned_16_72bytes):
728 movdqa %xmm0, -72(%edx)
729L(aligned_16_56bytes):
730 movdqa %xmm0, -56(%edx)
731L(aligned_16_40bytes):
732 movdqa %xmm0, -40(%edx)
733L(aligned_16_24bytes):
734 movdqa %xmm0, -24(%edx)
735L(aligned_16_8bytes):
736 movq %xmm0, -8(%edx)
737 SETRTNVAL
738 RETURN
739
740 ALIGN (4)
741L(aligned_16_121bytes):
742 movdqa %xmm0, -121(%edx)
743L(aligned_16_105bytes):
744 movdqa %xmm0, -105(%edx)
745L(aligned_16_89bytes):
746 movdqa %xmm0, -89(%edx)
747L(aligned_16_73bytes):
748 movdqa %xmm0, -73(%edx)
749L(aligned_16_57bytes):
750 movdqa %xmm0, -57(%edx)
751L(aligned_16_41bytes):
752 movdqa %xmm0, -41(%edx)
753L(aligned_16_25bytes):
754 movdqa %xmm0, -25(%edx)
755L(aligned_16_9bytes):
756 movq %xmm0, -9(%edx)
757 movb %al, -1(%edx)
758 SETRTNVAL
759 RETURN
760
761 ALIGN (4)
762L(aligned_16_122bytes):
763 movdqa %xmm0, -122(%edx)
764L(aligned_16_106bytes):
765 movdqa %xmm0, -106(%edx)
766L(aligned_16_90bytes):
767 movdqa %xmm0, -90(%edx)
768L(aligned_16_74bytes):
769 movdqa %xmm0, -74(%edx)
770L(aligned_16_58bytes):
771 movdqa %xmm0, -58(%edx)
772L(aligned_16_42bytes):
773 movdqa %xmm0, -42(%edx)
774L(aligned_16_26bytes):
775 movdqa %xmm0, -26(%edx)
776L(aligned_16_10bytes):
777 movq %xmm0, -10(%edx)
778 movw %ax, -2(%edx)
779 SETRTNVAL
780 RETURN
781
782 ALIGN (4)
783L(aligned_16_123bytes):
784 movdqa %xmm0, -123(%edx)
785L(aligned_16_107bytes):
786 movdqa %xmm0, -107(%edx)
787L(aligned_16_91bytes):
788 movdqa %xmm0, -91(%edx)
789L(aligned_16_75bytes):
790 movdqa %xmm0, -75(%edx)
791L(aligned_16_59bytes):
792 movdqa %xmm0, -59(%edx)
793L(aligned_16_43bytes):
794 movdqa %xmm0, -43(%edx)
795L(aligned_16_27bytes):
796 movdqa %xmm0, -27(%edx)
797L(aligned_16_11bytes):
798 movq %xmm0, -11(%edx)
799 movw %ax, -3(%edx)
800 movb %al, -1(%edx)
801 SETRTNVAL
802 RETURN
803
804 ALIGN (4)
805L(aligned_16_124bytes):
806 movdqa %xmm0, -124(%edx)
807L(aligned_16_108bytes):
808 movdqa %xmm0, -108(%edx)
809L(aligned_16_92bytes):
810 movdqa %xmm0, -92(%edx)
811L(aligned_16_76bytes):
812 movdqa %xmm0, -76(%edx)
813L(aligned_16_60bytes):
814 movdqa %xmm0, -60(%edx)
815L(aligned_16_44bytes):
816 movdqa %xmm0, -44(%edx)
817L(aligned_16_28bytes):
818 movdqa %xmm0, -28(%edx)
819L(aligned_16_12bytes):
820 movq %xmm0, -12(%edx)
821 movl %eax, -4(%edx)
822 SETRTNVAL
823 RETURN
824
825 ALIGN (4)
826L(aligned_16_125bytes):
827 movdqa %xmm0, -125(%edx)
828L(aligned_16_109bytes):
829 movdqa %xmm0, -109(%edx)
830L(aligned_16_93bytes):
831 movdqa %xmm0, -93(%edx)
832L(aligned_16_77bytes):
833 movdqa %xmm0, -77(%edx)
834L(aligned_16_61bytes):
835 movdqa %xmm0, -61(%edx)
836L(aligned_16_45bytes):
837 movdqa %xmm0, -45(%edx)
838L(aligned_16_29bytes):
839 movdqa %xmm0, -29(%edx)
840L(aligned_16_13bytes):
841 movq %xmm0, -13(%edx)
842 movl %eax, -5(%edx)
843 movb %al, -1(%edx)
844 SETRTNVAL
845 RETURN
846
847 ALIGN (4)
848L(aligned_16_126bytes):
849 movdqa %xmm0, -126(%edx)
850L(aligned_16_110bytes):
851 movdqa %xmm0, -110(%edx)
852L(aligned_16_94bytes):
853 movdqa %xmm0, -94(%edx)
854L(aligned_16_78bytes):
855 movdqa %xmm0, -78(%edx)
856L(aligned_16_62bytes):
857 movdqa %xmm0, -62(%edx)
858L(aligned_16_46bytes):
859 movdqa %xmm0, -46(%edx)
860L(aligned_16_30bytes):
861 movdqa %xmm0, -30(%edx)
862L(aligned_16_14bytes):
863 movq %xmm0, -14(%edx)
864 movl %eax, -6(%edx)
865 movw %ax, -2(%edx)
866 SETRTNVAL
867 RETURN
868
869 ALIGN (4)
870L(aligned_16_127bytes):
871 movdqa %xmm0, -127(%edx)
872L(aligned_16_111bytes):
873 movdqa %xmm0, -111(%edx)
874L(aligned_16_95bytes):
875 movdqa %xmm0, -95(%edx)
876L(aligned_16_79bytes):
877 movdqa %xmm0, -79(%edx)
878L(aligned_16_63bytes):
879 movdqa %xmm0, -63(%edx)
880L(aligned_16_47bytes):
881 movdqa %xmm0, -47(%edx)
882L(aligned_16_31bytes):
883 movdqa %xmm0, -31(%edx)
884L(aligned_16_15bytes):
885 movq %xmm0, -15(%edx)
886 movl %eax, -7(%edx)
887 movw %ax, -3(%edx)
888 movb %al, -1(%edx)
889 SETRTNVAL
890 RETURN_END
891
Elliott Hughes81d6a182016-03-03 16:10:33 -0800892END (memset)