blob: 30fb3f1332d0713927dadec11133e77ad1012f5a [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Elliott Hughes81d6a182016-03-03 16:10:33 -080031#include <private/bionic_asm.h>
32
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040033#include "cache.h"
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040034
Bruce Beare8ff1a272010-03-04 11:03:37 -080035#ifndef L
36# define L(label) .L##label
37#endif
38
39#ifndef ALIGN
40# define ALIGN(n) .p2align n
41#endif
42
Bruce Beare8ff1a272010-03-04 11:03:37 -080043#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
Bruce Beare124a5422010-10-11 12:24:41 -070048# define cfi_restore(reg) .cfi_restore reg
Bruce Beare8ff1a272010-03-04 11:03:37 -080049#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
Bruce Beare8ff1a272010-03-04 11:03:37 -080055#define CFI_PUSH(REG) \
56 cfi_adjust_cfa_offset (4); \
57 cfi_rel_offset (REG, 0)
58
59#define CFI_POP(REG) \
60 cfi_adjust_cfa_offset (-4); \
61 cfi_restore (REG)
62
63#define PUSH(REG) pushl REG; CFI_PUSH (REG)
64#define POP(REG) popl REG; CFI_POP (REG)
65
Elliott Hughes81d6a182016-03-03 16:10:33 -080066#define DST PARMS
67#define CHR DST+4
Elliott Hughes01d5b942016-03-02 17:18:18 -080068#define LEN CHR+4
Elliott Hughes81d6a182016-03-03 16:10:33 -080069#define CHK_DST_LEN (LEN+4)
70#define SETRTNVAL movl DST(%esp), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -080071
Nick Kralevich0aa82892011-11-11 15:47:24 -080072#if (defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -080073# define ENTRANCE PUSH (%ebx);
74# define RETURN_END POP (%ebx); ret
75# define RETURN RETURN_END; CFI_PUSH (%ebx)
76# define PARMS 8 /* Preserve EBX. */
77# define JMPTBL(I, B) I - B
78
79/* Load an entry in a jump table into EBX and branch to it. TABLE is a
80 jump table with relative offsets. */
81# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
82 /* We first load PC into EBX. */ \
Varvara Rainchik5a922842014-04-24 15:41:20 +040083 call __x86.get_pc_thunk.bx; \
Bruce Beare8ff1a272010-03-04 11:03:37 -080084 /* Get the address of the jump table. */ \
85 add $(TABLE - .), %ebx; \
86 /* Get the entry and convert the relative offset to the \
87 absolute address. */ \
88 add (%ebx,%ecx,4), %ebx; \
89 add %ecx, %edx; \
90 /* We loaded the jump table and adjuested EDX. Go. */ \
91 jmp *%ebx
92
Varvara Rainchik5a922842014-04-24 15:41:20 +040093 .section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
94 .globl __x86.get_pc_thunk.bx
95 .hidden __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -080096 ALIGN (4)
Varvara Rainchik5a922842014-04-24 15:41:20 +040097 .type __x86.get_pc_thunk.bx,@function
98__x86.get_pc_thunk.bx:
Bruce Beare8ff1a272010-03-04 11:03:37 -080099 movl (%esp), %ebx
100 ret
101#else
102# define ENTRANCE
103# define RETURN_END ret
104# define RETURN RETURN_END
105# define PARMS 4
106# define JMPTBL(I, B) I
107
108/* Branch to an entry in a jump table. TABLE is a jump table with
109 absolute offsets. */
110# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
111 add %ecx, %edx; \
112 jmp *TABLE(,%ecx,4)
113#endif
114
Elliott Hughes81d6a182016-03-03 16:10:33 -0800115ENTRY(__memset_chk)
116 movl LEN(%esp), %ecx
117 cmpl %ecx, CHK_DST_LEN(%esp)
118 jbe memset
119
120 jmp __memset_chk_fail
121END(__memset_chk)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400122
Bruce Beare8ff1a272010-03-04 11:03:37 -0800123 .section .text.sse2,"ax",@progbits
124 ALIGN (4)
Elliott Hughes81d6a182016-03-03 16:10:33 -0800125ENTRY (memset)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800126 ENTRANCE
127
128 movl LEN(%esp), %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800129 movzbl CHR(%esp), %eax
130 movb %al, %ah
131 /* Fill the whole EAX with pattern. */
132 movl %eax, %edx
133 shl $16, %eax
134 or %edx, %eax
Elliott Hughes81d6a182016-03-03 16:10:33 -0800135 movl DST(%esp), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800136 cmp $32, %ecx
137 jae L(32bytesormore)
138
139L(write_less32bytes):
140 BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes))
141
142
143 .pushsection .rodata.sse2,"a",@progbits
144 ALIGN (2)
145L(table_less_32bytes):
146 .int JMPTBL (L(write_0bytes), L(table_less_32bytes))
147 .int JMPTBL (L(write_1bytes), L(table_less_32bytes))
148 .int JMPTBL (L(write_2bytes), L(table_less_32bytes))
149 .int JMPTBL (L(write_3bytes), L(table_less_32bytes))
150 .int JMPTBL (L(write_4bytes), L(table_less_32bytes))
151 .int JMPTBL (L(write_5bytes), L(table_less_32bytes))
152 .int JMPTBL (L(write_6bytes), L(table_less_32bytes))
153 .int JMPTBL (L(write_7bytes), L(table_less_32bytes))
154 .int JMPTBL (L(write_8bytes), L(table_less_32bytes))
155 .int JMPTBL (L(write_9bytes), L(table_less_32bytes))
156 .int JMPTBL (L(write_10bytes), L(table_less_32bytes))
157 .int JMPTBL (L(write_11bytes), L(table_less_32bytes))
158 .int JMPTBL (L(write_12bytes), L(table_less_32bytes))
159 .int JMPTBL (L(write_13bytes), L(table_less_32bytes))
160 .int JMPTBL (L(write_14bytes), L(table_less_32bytes))
161 .int JMPTBL (L(write_15bytes), L(table_less_32bytes))
162 .int JMPTBL (L(write_16bytes), L(table_less_32bytes))
163 .int JMPTBL (L(write_17bytes), L(table_less_32bytes))
164 .int JMPTBL (L(write_18bytes), L(table_less_32bytes))
165 .int JMPTBL (L(write_19bytes), L(table_less_32bytes))
166 .int JMPTBL (L(write_20bytes), L(table_less_32bytes))
167 .int JMPTBL (L(write_21bytes), L(table_less_32bytes))
168 .int JMPTBL (L(write_22bytes), L(table_less_32bytes))
169 .int JMPTBL (L(write_23bytes), L(table_less_32bytes))
170 .int JMPTBL (L(write_24bytes), L(table_less_32bytes))
171 .int JMPTBL (L(write_25bytes), L(table_less_32bytes))
172 .int JMPTBL (L(write_26bytes), L(table_less_32bytes))
173 .int JMPTBL (L(write_27bytes), L(table_less_32bytes))
174 .int JMPTBL (L(write_28bytes), L(table_less_32bytes))
175 .int JMPTBL (L(write_29bytes), L(table_less_32bytes))
176 .int JMPTBL (L(write_30bytes), L(table_less_32bytes))
177 .int JMPTBL (L(write_31bytes), L(table_less_32bytes))
178 .popsection
179
180 ALIGN (4)
181L(write_28bytes):
182 movl %eax, -28(%edx)
183L(write_24bytes):
184 movl %eax, -24(%edx)
185L(write_20bytes):
186 movl %eax, -20(%edx)
187L(write_16bytes):
188 movl %eax, -16(%edx)
189L(write_12bytes):
190 movl %eax, -12(%edx)
191L(write_8bytes):
192 movl %eax, -8(%edx)
193L(write_4bytes):
194 movl %eax, -4(%edx)
195L(write_0bytes):
196 SETRTNVAL
197 RETURN
198
199 ALIGN (4)
200L(write_29bytes):
201 movl %eax, -29(%edx)
202L(write_25bytes):
203 movl %eax, -25(%edx)
204L(write_21bytes):
205 movl %eax, -21(%edx)
206L(write_17bytes):
207 movl %eax, -17(%edx)
208L(write_13bytes):
209 movl %eax, -13(%edx)
210L(write_9bytes):
211 movl %eax, -9(%edx)
212L(write_5bytes):
213 movl %eax, -5(%edx)
214L(write_1bytes):
215 movb %al, -1(%edx)
216 SETRTNVAL
217 RETURN
218
219 ALIGN (4)
220L(write_30bytes):
221 movl %eax, -30(%edx)
222L(write_26bytes):
223 movl %eax, -26(%edx)
224L(write_22bytes):
225 movl %eax, -22(%edx)
226L(write_18bytes):
227 movl %eax, -18(%edx)
228L(write_14bytes):
229 movl %eax, -14(%edx)
230L(write_10bytes):
231 movl %eax, -10(%edx)
232L(write_6bytes):
233 movl %eax, -6(%edx)
234L(write_2bytes):
235 movw %ax, -2(%edx)
236 SETRTNVAL
237 RETURN
238
239 ALIGN (4)
240L(write_31bytes):
241 movl %eax, -31(%edx)
242L(write_27bytes):
243 movl %eax, -27(%edx)
244L(write_23bytes):
245 movl %eax, -23(%edx)
246L(write_19bytes):
247 movl %eax, -19(%edx)
248L(write_15bytes):
249 movl %eax, -15(%edx)
250L(write_11bytes):
251 movl %eax, -11(%edx)
252L(write_7bytes):
253 movl %eax, -7(%edx)
254L(write_3bytes):
255 movw %ax, -3(%edx)
256 movb %al, -1(%edx)
257 SETRTNVAL
258 RETURN
259
260 ALIGN (4)
261/* ECX > 32 and EDX is 4 byte aligned. */
262L(32bytesormore):
263 /* Fill xmm0 with the pattern. */
Bruce Beare8ff1a272010-03-04 11:03:37 -0800264 movd %eax, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800265 pshufd $0, %xmm0, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800266 testl $0xf, %edx
267 jz L(aligned_16)
268/* ECX > 32 and EDX is not 16 byte aligned. */
269L(not_aligned_16):
270 movdqu %xmm0, (%edx)
271 movl %edx, %eax
272 and $-16, %edx
273 add $16, %edx
274 sub %edx, %eax
275 add %eax, %ecx
276 movd %xmm0, %eax
277
278 ALIGN (4)
279L(aligned_16):
280 cmp $128, %ecx
281 jae L(128bytesormore)
282
283L(aligned_16_less128bytes):
284 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
285
286 ALIGN (4)
287L(128bytesormore):
288#ifdef SHARED_CACHE_SIZE
289 PUSH (%ebx)
290 mov $SHARED_CACHE_SIZE, %ebx
291#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800292# if (defined SHARED || defined __PIC__)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400293 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800294 add $_GLOBAL_OFFSET_TABLE_, %ebx
295 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx
296# else
297 PUSH (%ebx)
298 mov __x86_shared_cache_size, %ebx
299# endif
300#endif
301 cmp %ebx, %ecx
302 jae L(128bytesormore_nt_start)
303
304
305#ifdef DATA_CACHE_SIZE
306 POP (%ebx)
Bruce Beare124a5422010-10-11 12:24:41 -0700307# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800308 cmp $DATA_CACHE_SIZE, %ecx
309#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800310# if (defined SHARED || defined __PIC__)
Bruce Beare124a5422010-10-11 12:24:41 -0700311# define RESTORE_EBX_STATE
Varvara Rainchik5a922842014-04-24 15:41:20 +0400312 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800313 add $_GLOBAL_OFFSET_TABLE_, %ebx
314 cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx
315# else
316 POP (%ebx)
Bruce Beare124a5422010-10-11 12:24:41 -0700317# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800318 cmp __x86_data_cache_size, %ecx
319# endif
320#endif
321
322 jae L(128bytes_L2_normal)
323 subl $128, %ecx
324L(128bytesormore_normal):
325 sub $128, %ecx
326 movdqa %xmm0, (%edx)
327 movdqa %xmm0, 0x10(%edx)
328 movdqa %xmm0, 0x20(%edx)
329 movdqa %xmm0, 0x30(%edx)
330 movdqa %xmm0, 0x40(%edx)
331 movdqa %xmm0, 0x50(%edx)
332 movdqa %xmm0, 0x60(%edx)
333 movdqa %xmm0, 0x70(%edx)
334 lea 128(%edx), %edx
335 jb L(128bytesless_normal)
336
337
338 sub $128, %ecx
339 movdqa %xmm0, (%edx)
340 movdqa %xmm0, 0x10(%edx)
341 movdqa %xmm0, 0x20(%edx)
342 movdqa %xmm0, 0x30(%edx)
343 movdqa %xmm0, 0x40(%edx)
344 movdqa %xmm0, 0x50(%edx)
345 movdqa %xmm0, 0x60(%edx)
346 movdqa %xmm0, 0x70(%edx)
347 lea 128(%edx), %edx
348 jae L(128bytesormore_normal)
349
350L(128bytesless_normal):
Bruce Beare124a5422010-10-11 12:24:41 -0700351 add $128, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800352 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
353
354 ALIGN (4)
355L(128bytes_L2_normal):
356 prefetcht0 0x380(%edx)
357 prefetcht0 0x3c0(%edx)
358 sub $128, %ecx
359 movdqa %xmm0, (%edx)
360 movaps %xmm0, 0x10(%edx)
361 movaps %xmm0, 0x20(%edx)
362 movaps %xmm0, 0x30(%edx)
363 movaps %xmm0, 0x40(%edx)
364 movaps %xmm0, 0x50(%edx)
365 movaps %xmm0, 0x60(%edx)
366 movaps %xmm0, 0x70(%edx)
367 add $128, %edx
368 cmp $128, %ecx
369 jae L(128bytes_L2_normal)
370
371L(128bytesless_L2_normal):
372 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
373
Bruce Beare124a5422010-10-11 12:24:41 -0700374 RESTORE_EBX_STATE
Bruce Beare8ff1a272010-03-04 11:03:37 -0800375L(128bytesormore_nt_start):
376 sub %ebx, %ecx
Bruce Beare124a5422010-10-11 12:24:41 -0700377 mov %ebx, %eax
378 and $0x7f, %eax
379 add %eax, %ecx
380 movd %xmm0, %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -0800381 ALIGN (4)
382L(128bytesormore_shared_cache_loop):
383 prefetcht0 0x3c0(%edx)
384 prefetcht0 0x380(%edx)
385 sub $0x80, %ebx
386 movdqa %xmm0, (%edx)
387 movdqa %xmm0, 0x10(%edx)
388 movdqa %xmm0, 0x20(%edx)
389 movdqa %xmm0, 0x30(%edx)
390 movdqa %xmm0, 0x40(%edx)
391 movdqa %xmm0, 0x50(%edx)
392 movdqa %xmm0, 0x60(%edx)
393 movdqa %xmm0, 0x70(%edx)
394 add $0x80, %edx
395 cmp $0x80, %ebx
396 jae L(128bytesormore_shared_cache_loop)
397 cmp $0x80, %ecx
398 jb L(shared_cache_loop_end)
399 ALIGN (4)
400L(128bytesormore_nt):
401 sub $0x80, %ecx
402 movntdq %xmm0, (%edx)
403 movntdq %xmm0, 0x10(%edx)
404 movntdq %xmm0, 0x20(%edx)
405 movntdq %xmm0, 0x30(%edx)
406 movntdq %xmm0, 0x40(%edx)
407 movntdq %xmm0, 0x50(%edx)
408 movntdq %xmm0, 0x60(%edx)
409 movntdq %xmm0, 0x70(%edx)
410 add $0x80, %edx
411 cmp $0x80, %ecx
412 jae L(128bytesormore_nt)
413 sfence
414L(shared_cache_loop_end):
Nick Kralevich0aa82892011-11-11 15:47:24 -0800415#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800416 POP (%ebx)
417#endif
418 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
419
420
421 .pushsection .rodata.sse2,"a",@progbits
422 ALIGN (2)
423L(table_16_128bytes):
424 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
425 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
426 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
427 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
428 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
429 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
430 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
431 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
432 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
433 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
434 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
435 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
436 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
437 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
438 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
439 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
440 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
441 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
442 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
443 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
444 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
445 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
446 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
447 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
448 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
449 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
450 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
451 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
452 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
453 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
454 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
455 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
456 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
457 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
458 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
459 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
460 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
461 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
462 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
463 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
464 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
465 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
466 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
467 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
468 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
469 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
470 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
471 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
472 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
473 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
474 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
475 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
476 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
477 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
478 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
479 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
480 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
481 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
482 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
483 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
484 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
485 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
486 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
487 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
488 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
489 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
490 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
491 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
492 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
493 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
494 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
495 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
496 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
497 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
498 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
499 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
500 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
501 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
502 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
503 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
504 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
505 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
506 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
507 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
508 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
509 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
510 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
511 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
512 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
513 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
514 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
515 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
516 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
517 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
518 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
519 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
520 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
521 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
522 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
523 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
524 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
525 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
526 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
527 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
528 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
529 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
530 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
531 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
532 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
533 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
534 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
535 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
536 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
537 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
538 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
539 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
540 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
541 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
542 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
543 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
544 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
545 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
546 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
547 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
548 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
549 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
550 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
551 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
552 .popsection
553
554 ALIGN (4)
555L(aligned_16_112bytes):
556 movdqa %xmm0, -112(%edx)
557L(aligned_16_96bytes):
558 movdqa %xmm0, -96(%edx)
559L(aligned_16_80bytes):
560 movdqa %xmm0, -80(%edx)
561L(aligned_16_64bytes):
562 movdqa %xmm0, -64(%edx)
563L(aligned_16_48bytes):
564 movdqa %xmm0, -48(%edx)
565L(aligned_16_32bytes):
566 movdqa %xmm0, -32(%edx)
567L(aligned_16_16bytes):
568 movdqa %xmm0, -16(%edx)
569L(aligned_16_0bytes):
570 SETRTNVAL
571 RETURN
572
573 ALIGN (4)
574L(aligned_16_113bytes):
575 movdqa %xmm0, -113(%edx)
576L(aligned_16_97bytes):
577 movdqa %xmm0, -97(%edx)
578L(aligned_16_81bytes):
579 movdqa %xmm0, -81(%edx)
580L(aligned_16_65bytes):
581 movdqa %xmm0, -65(%edx)
582L(aligned_16_49bytes):
583 movdqa %xmm0, -49(%edx)
584L(aligned_16_33bytes):
585 movdqa %xmm0, -33(%edx)
586L(aligned_16_17bytes):
587 movdqa %xmm0, -17(%edx)
588L(aligned_16_1bytes):
589 movb %al, -1(%edx)
590 SETRTNVAL
591 RETURN
592
593 ALIGN (4)
594L(aligned_16_114bytes):
595 movdqa %xmm0, -114(%edx)
596L(aligned_16_98bytes):
597 movdqa %xmm0, -98(%edx)
598L(aligned_16_82bytes):
599 movdqa %xmm0, -82(%edx)
600L(aligned_16_66bytes):
601 movdqa %xmm0, -66(%edx)
602L(aligned_16_50bytes):
603 movdqa %xmm0, -50(%edx)
604L(aligned_16_34bytes):
605 movdqa %xmm0, -34(%edx)
606L(aligned_16_18bytes):
607 movdqa %xmm0, -18(%edx)
608L(aligned_16_2bytes):
609 movw %ax, -2(%edx)
610 SETRTNVAL
611 RETURN
612
613 ALIGN (4)
614L(aligned_16_115bytes):
615 movdqa %xmm0, -115(%edx)
616L(aligned_16_99bytes):
617 movdqa %xmm0, -99(%edx)
618L(aligned_16_83bytes):
619 movdqa %xmm0, -83(%edx)
620L(aligned_16_67bytes):
621 movdqa %xmm0, -67(%edx)
622L(aligned_16_51bytes):
623 movdqa %xmm0, -51(%edx)
624L(aligned_16_35bytes):
625 movdqa %xmm0, -35(%edx)
626L(aligned_16_19bytes):
627 movdqa %xmm0, -19(%edx)
628L(aligned_16_3bytes):
629 movw %ax, -3(%edx)
630 movb %al, -1(%edx)
631 SETRTNVAL
632 RETURN
633
634 ALIGN (4)
635L(aligned_16_116bytes):
636 movdqa %xmm0, -116(%edx)
637L(aligned_16_100bytes):
638 movdqa %xmm0, -100(%edx)
639L(aligned_16_84bytes):
640 movdqa %xmm0, -84(%edx)
641L(aligned_16_68bytes):
642 movdqa %xmm0, -68(%edx)
643L(aligned_16_52bytes):
644 movdqa %xmm0, -52(%edx)
645L(aligned_16_36bytes):
646 movdqa %xmm0, -36(%edx)
647L(aligned_16_20bytes):
648 movdqa %xmm0, -20(%edx)
649L(aligned_16_4bytes):
650 movl %eax, -4(%edx)
651 SETRTNVAL
652 RETURN
653
654 ALIGN (4)
655L(aligned_16_117bytes):
656 movdqa %xmm0, -117(%edx)
657L(aligned_16_101bytes):
658 movdqa %xmm0, -101(%edx)
659L(aligned_16_85bytes):
660 movdqa %xmm0, -85(%edx)
661L(aligned_16_69bytes):
662 movdqa %xmm0, -69(%edx)
663L(aligned_16_53bytes):
664 movdqa %xmm0, -53(%edx)
665L(aligned_16_37bytes):
666 movdqa %xmm0, -37(%edx)
667L(aligned_16_21bytes):
668 movdqa %xmm0, -21(%edx)
669L(aligned_16_5bytes):
670 movl %eax, -5(%edx)
671 movb %al, -1(%edx)
672 SETRTNVAL
673 RETURN
674
675 ALIGN (4)
676L(aligned_16_118bytes):
677 movdqa %xmm0, -118(%edx)
678L(aligned_16_102bytes):
679 movdqa %xmm0, -102(%edx)
680L(aligned_16_86bytes):
681 movdqa %xmm0, -86(%edx)
682L(aligned_16_70bytes):
683 movdqa %xmm0, -70(%edx)
684L(aligned_16_54bytes):
685 movdqa %xmm0, -54(%edx)
686L(aligned_16_38bytes):
687 movdqa %xmm0, -38(%edx)
688L(aligned_16_22bytes):
689 movdqa %xmm0, -22(%edx)
690L(aligned_16_6bytes):
691 movl %eax, -6(%edx)
692 movw %ax, -2(%edx)
693 SETRTNVAL
694 RETURN
695
696 ALIGN (4)
697L(aligned_16_119bytes):
698 movdqa %xmm0, -119(%edx)
699L(aligned_16_103bytes):
700 movdqa %xmm0, -103(%edx)
701L(aligned_16_87bytes):
702 movdqa %xmm0, -87(%edx)
703L(aligned_16_71bytes):
704 movdqa %xmm0, -71(%edx)
705L(aligned_16_55bytes):
706 movdqa %xmm0, -55(%edx)
707L(aligned_16_39bytes):
708 movdqa %xmm0, -39(%edx)
709L(aligned_16_23bytes):
710 movdqa %xmm0, -23(%edx)
711L(aligned_16_7bytes):
712 movl %eax, -7(%edx)
713 movw %ax, -3(%edx)
714 movb %al, -1(%edx)
715 SETRTNVAL
716 RETURN
717
718 ALIGN (4)
719L(aligned_16_120bytes):
720 movdqa %xmm0, -120(%edx)
721L(aligned_16_104bytes):
722 movdqa %xmm0, -104(%edx)
723L(aligned_16_88bytes):
724 movdqa %xmm0, -88(%edx)
725L(aligned_16_72bytes):
726 movdqa %xmm0, -72(%edx)
727L(aligned_16_56bytes):
728 movdqa %xmm0, -56(%edx)
729L(aligned_16_40bytes):
730 movdqa %xmm0, -40(%edx)
731L(aligned_16_24bytes):
732 movdqa %xmm0, -24(%edx)
733L(aligned_16_8bytes):
734 movq %xmm0, -8(%edx)
735 SETRTNVAL
736 RETURN
737
738 ALIGN (4)
739L(aligned_16_121bytes):
740 movdqa %xmm0, -121(%edx)
741L(aligned_16_105bytes):
742 movdqa %xmm0, -105(%edx)
743L(aligned_16_89bytes):
744 movdqa %xmm0, -89(%edx)
745L(aligned_16_73bytes):
746 movdqa %xmm0, -73(%edx)
747L(aligned_16_57bytes):
748 movdqa %xmm0, -57(%edx)
749L(aligned_16_41bytes):
750 movdqa %xmm0, -41(%edx)
751L(aligned_16_25bytes):
752 movdqa %xmm0, -25(%edx)
753L(aligned_16_9bytes):
754 movq %xmm0, -9(%edx)
755 movb %al, -1(%edx)
756 SETRTNVAL
757 RETURN
758
759 ALIGN (4)
760L(aligned_16_122bytes):
761 movdqa %xmm0, -122(%edx)
762L(aligned_16_106bytes):
763 movdqa %xmm0, -106(%edx)
764L(aligned_16_90bytes):
765 movdqa %xmm0, -90(%edx)
766L(aligned_16_74bytes):
767 movdqa %xmm0, -74(%edx)
768L(aligned_16_58bytes):
769 movdqa %xmm0, -58(%edx)
770L(aligned_16_42bytes):
771 movdqa %xmm0, -42(%edx)
772L(aligned_16_26bytes):
773 movdqa %xmm0, -26(%edx)
774L(aligned_16_10bytes):
775 movq %xmm0, -10(%edx)
776 movw %ax, -2(%edx)
777 SETRTNVAL
778 RETURN
779
780 ALIGN (4)
781L(aligned_16_123bytes):
782 movdqa %xmm0, -123(%edx)
783L(aligned_16_107bytes):
784 movdqa %xmm0, -107(%edx)
785L(aligned_16_91bytes):
786 movdqa %xmm0, -91(%edx)
787L(aligned_16_75bytes):
788 movdqa %xmm0, -75(%edx)
789L(aligned_16_59bytes):
790 movdqa %xmm0, -59(%edx)
791L(aligned_16_43bytes):
792 movdqa %xmm0, -43(%edx)
793L(aligned_16_27bytes):
794 movdqa %xmm0, -27(%edx)
795L(aligned_16_11bytes):
796 movq %xmm0, -11(%edx)
797 movw %ax, -3(%edx)
798 movb %al, -1(%edx)
799 SETRTNVAL
800 RETURN
801
802 ALIGN (4)
803L(aligned_16_124bytes):
804 movdqa %xmm0, -124(%edx)
805L(aligned_16_108bytes):
806 movdqa %xmm0, -108(%edx)
807L(aligned_16_92bytes):
808 movdqa %xmm0, -92(%edx)
809L(aligned_16_76bytes):
810 movdqa %xmm0, -76(%edx)
811L(aligned_16_60bytes):
812 movdqa %xmm0, -60(%edx)
813L(aligned_16_44bytes):
814 movdqa %xmm0, -44(%edx)
815L(aligned_16_28bytes):
816 movdqa %xmm0, -28(%edx)
817L(aligned_16_12bytes):
818 movq %xmm0, -12(%edx)
819 movl %eax, -4(%edx)
820 SETRTNVAL
821 RETURN
822
823 ALIGN (4)
824L(aligned_16_125bytes):
825 movdqa %xmm0, -125(%edx)
826L(aligned_16_109bytes):
827 movdqa %xmm0, -109(%edx)
828L(aligned_16_93bytes):
829 movdqa %xmm0, -93(%edx)
830L(aligned_16_77bytes):
831 movdqa %xmm0, -77(%edx)
832L(aligned_16_61bytes):
833 movdqa %xmm0, -61(%edx)
834L(aligned_16_45bytes):
835 movdqa %xmm0, -45(%edx)
836L(aligned_16_29bytes):
837 movdqa %xmm0, -29(%edx)
838L(aligned_16_13bytes):
839 movq %xmm0, -13(%edx)
840 movl %eax, -5(%edx)
841 movb %al, -1(%edx)
842 SETRTNVAL
843 RETURN
844
845 ALIGN (4)
846L(aligned_16_126bytes):
847 movdqa %xmm0, -126(%edx)
848L(aligned_16_110bytes):
849 movdqa %xmm0, -110(%edx)
850L(aligned_16_94bytes):
851 movdqa %xmm0, -94(%edx)
852L(aligned_16_78bytes):
853 movdqa %xmm0, -78(%edx)
854L(aligned_16_62bytes):
855 movdqa %xmm0, -62(%edx)
856L(aligned_16_46bytes):
857 movdqa %xmm0, -46(%edx)
858L(aligned_16_30bytes):
859 movdqa %xmm0, -30(%edx)
860L(aligned_16_14bytes):
861 movq %xmm0, -14(%edx)
862 movl %eax, -6(%edx)
863 movw %ax, -2(%edx)
864 SETRTNVAL
865 RETURN
866
867 ALIGN (4)
868L(aligned_16_127bytes):
869 movdqa %xmm0, -127(%edx)
870L(aligned_16_111bytes):
871 movdqa %xmm0, -111(%edx)
872L(aligned_16_95bytes):
873 movdqa %xmm0, -95(%edx)
874L(aligned_16_79bytes):
875 movdqa %xmm0, -79(%edx)
876L(aligned_16_63bytes):
877 movdqa %xmm0, -63(%edx)
878L(aligned_16_47bytes):
879 movdqa %xmm0, -47(%edx)
880L(aligned_16_31bytes):
881 movdqa %xmm0, -31(%edx)
882L(aligned_16_15bytes):
883 movq %xmm0, -15(%edx)
884 movl %eax, -7(%edx)
885 movw %ax, -3(%edx)
886 movb %al, -1(%edx)
887 SETRTNVAL
888 RETURN_END
889
Elliott Hughes81d6a182016-03-03 16:10:33 -0800890END (memset)