blob: 04de18f747dc8f651a8211c4f8314c39cd13c946 [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Elliott Hughes81d6a182016-03-03 16:10:33 -080031#include <private/bionic_asm.h>
32
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040033#include "cache.h"
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040034
Bruce Beare8ff1a272010-03-04 11:03:37 -080035#ifndef L
36# define L(label) .L##label
37#endif
38
39#ifndef ALIGN
40# define ALIGN(n) .p2align n
41#endif
42
Bruce Beare8ff1a272010-03-04 11:03:37 -080043#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
Bruce Beare124a5422010-10-11 12:24:41 -070048# define cfi_restore(reg) .cfi_restore reg
Bruce Beare8ff1a272010-03-04 11:03:37 -080049#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
Bruce Beare8ff1a272010-03-04 11:03:37 -080055#define CFI_PUSH(REG) \
56 cfi_adjust_cfa_offset (4); \
57 cfi_rel_offset (REG, 0)
58
59#define CFI_POP(REG) \
60 cfi_adjust_cfa_offset (-4); \
61 cfi_restore (REG)
62
63#define PUSH(REG) pushl REG; CFI_PUSH (REG)
64#define POP(REG) popl REG; CFI_POP (REG)
65
Elliott Hughes81d6a182016-03-03 16:10:33 -080066#define DST PARMS
67#define CHR DST+4
Elliott Hughes01d5b942016-03-02 17:18:18 -080068#define LEN CHR+4
Elliott Hughes81d6a182016-03-03 16:10:33 -080069#define CHK_DST_LEN (LEN+4)
70#define SETRTNVAL movl DST(%esp), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -080071
Nick Kralevich0aa82892011-11-11 15:47:24 -080072#if (defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -080073# define ENTRANCE PUSH (%ebx);
74# define RETURN_END POP (%ebx); ret
75# define RETURN RETURN_END; CFI_PUSH (%ebx)
76# define PARMS 8 /* Preserve EBX. */
77# define JMPTBL(I, B) I - B
78
79/* Load an entry in a jump table into EBX and branch to it. TABLE is a
80 jump table with relative offsets. */
81# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
82 /* We first load PC into EBX. */ \
Varvara Rainchik5a922842014-04-24 15:41:20 +040083 call __x86.get_pc_thunk.bx; \
Bruce Beare8ff1a272010-03-04 11:03:37 -080084 /* Get the address of the jump table. */ \
85 add $(TABLE - .), %ebx; \
86 /* Get the entry and convert the relative offset to the \
87 absolute address. */ \
88 add (%ebx,%ecx,4), %ebx; \
89 add %ecx, %edx; \
90 /* We loaded the jump table and adjuested EDX. Go. */ \
91 jmp *%ebx
92
Varvara Rainchik5a922842014-04-24 15:41:20 +040093 .section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
94 .globl __x86.get_pc_thunk.bx
95 .hidden __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -080096 ALIGN (4)
Varvara Rainchik5a922842014-04-24 15:41:20 +040097 .type __x86.get_pc_thunk.bx,@function
98__x86.get_pc_thunk.bx:
Bruce Beare8ff1a272010-03-04 11:03:37 -080099 movl (%esp), %ebx
100 ret
101#else
102# define ENTRANCE
103# define RETURN_END ret
104# define RETURN RETURN_END
105# define PARMS 4
106# define JMPTBL(I, B) I
107
108/* Branch to an entry in a jump table. TABLE is a jump table with
109 absolute offsets. */
110# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
111 add %ecx, %edx; \
112 jmp *TABLE(,%ecx,4)
113#endif
114
Elliott Hughes81d6a182016-03-03 16:10:33 -0800115ENTRY(__memset_chk)
Elliott Hughes204990c2016-03-24 22:34:47 -0700116 ENTRANCE
Elliott Hughes81d6a182016-03-03 16:10:33 -0800117
Elliott Hughes204990c2016-03-24 22:34:47 -0700118 movl LEN(%esp), %ecx
119 cmpl CHK_DST_LEN(%esp), %ecx
Lev Rumyantsev3a528f12016-07-29 17:55:42 -0700120 jna L(memset_length_loaded)
121
122 POP (%ebx) // Undo ENTRANCE without returning.
123 jmp __memset_chk_fail
Elliott Hughes81d6a182016-03-03 16:10:33 -0800124END(__memset_chk)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400125
Bruce Beare8ff1a272010-03-04 11:03:37 -0800126 .section .text.sse2,"ax",@progbits
127 ALIGN (4)
Elliott Hughes81d6a182016-03-03 16:10:33 -0800128ENTRY (memset)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800129 ENTRANCE
130
131 movl LEN(%esp), %ecx
Elliott Hughes204990c2016-03-24 22:34:47 -0700132L(memset_length_loaded):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800133 movzbl CHR(%esp), %eax
134 movb %al, %ah
135 /* Fill the whole EAX with pattern. */
136 movl %eax, %edx
137 shl $16, %eax
138 or %edx, %eax
Elliott Hughes81d6a182016-03-03 16:10:33 -0800139 movl DST(%esp), %edx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800140 cmp $32, %ecx
141 jae L(32bytesormore)
142
143L(write_less32bytes):
144 BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes))
145
146
147 .pushsection .rodata.sse2,"a",@progbits
148 ALIGN (2)
149L(table_less_32bytes):
150 .int JMPTBL (L(write_0bytes), L(table_less_32bytes))
151 .int JMPTBL (L(write_1bytes), L(table_less_32bytes))
152 .int JMPTBL (L(write_2bytes), L(table_less_32bytes))
153 .int JMPTBL (L(write_3bytes), L(table_less_32bytes))
154 .int JMPTBL (L(write_4bytes), L(table_less_32bytes))
155 .int JMPTBL (L(write_5bytes), L(table_less_32bytes))
156 .int JMPTBL (L(write_6bytes), L(table_less_32bytes))
157 .int JMPTBL (L(write_7bytes), L(table_less_32bytes))
158 .int JMPTBL (L(write_8bytes), L(table_less_32bytes))
159 .int JMPTBL (L(write_9bytes), L(table_less_32bytes))
160 .int JMPTBL (L(write_10bytes), L(table_less_32bytes))
161 .int JMPTBL (L(write_11bytes), L(table_less_32bytes))
162 .int JMPTBL (L(write_12bytes), L(table_less_32bytes))
163 .int JMPTBL (L(write_13bytes), L(table_less_32bytes))
164 .int JMPTBL (L(write_14bytes), L(table_less_32bytes))
165 .int JMPTBL (L(write_15bytes), L(table_less_32bytes))
166 .int JMPTBL (L(write_16bytes), L(table_less_32bytes))
167 .int JMPTBL (L(write_17bytes), L(table_less_32bytes))
168 .int JMPTBL (L(write_18bytes), L(table_less_32bytes))
169 .int JMPTBL (L(write_19bytes), L(table_less_32bytes))
170 .int JMPTBL (L(write_20bytes), L(table_less_32bytes))
171 .int JMPTBL (L(write_21bytes), L(table_less_32bytes))
172 .int JMPTBL (L(write_22bytes), L(table_less_32bytes))
173 .int JMPTBL (L(write_23bytes), L(table_less_32bytes))
174 .int JMPTBL (L(write_24bytes), L(table_less_32bytes))
175 .int JMPTBL (L(write_25bytes), L(table_less_32bytes))
176 .int JMPTBL (L(write_26bytes), L(table_less_32bytes))
177 .int JMPTBL (L(write_27bytes), L(table_less_32bytes))
178 .int JMPTBL (L(write_28bytes), L(table_less_32bytes))
179 .int JMPTBL (L(write_29bytes), L(table_less_32bytes))
180 .int JMPTBL (L(write_30bytes), L(table_less_32bytes))
181 .int JMPTBL (L(write_31bytes), L(table_less_32bytes))
182 .popsection
183
184 ALIGN (4)
185L(write_28bytes):
186 movl %eax, -28(%edx)
187L(write_24bytes):
188 movl %eax, -24(%edx)
189L(write_20bytes):
190 movl %eax, -20(%edx)
191L(write_16bytes):
192 movl %eax, -16(%edx)
193L(write_12bytes):
194 movl %eax, -12(%edx)
195L(write_8bytes):
196 movl %eax, -8(%edx)
197L(write_4bytes):
198 movl %eax, -4(%edx)
199L(write_0bytes):
200 SETRTNVAL
201 RETURN
202
203 ALIGN (4)
204L(write_29bytes):
205 movl %eax, -29(%edx)
206L(write_25bytes):
207 movl %eax, -25(%edx)
208L(write_21bytes):
209 movl %eax, -21(%edx)
210L(write_17bytes):
211 movl %eax, -17(%edx)
212L(write_13bytes):
213 movl %eax, -13(%edx)
214L(write_9bytes):
215 movl %eax, -9(%edx)
216L(write_5bytes):
217 movl %eax, -5(%edx)
218L(write_1bytes):
219 movb %al, -1(%edx)
220 SETRTNVAL
221 RETURN
222
223 ALIGN (4)
224L(write_30bytes):
225 movl %eax, -30(%edx)
226L(write_26bytes):
227 movl %eax, -26(%edx)
228L(write_22bytes):
229 movl %eax, -22(%edx)
230L(write_18bytes):
231 movl %eax, -18(%edx)
232L(write_14bytes):
233 movl %eax, -14(%edx)
234L(write_10bytes):
235 movl %eax, -10(%edx)
236L(write_6bytes):
237 movl %eax, -6(%edx)
238L(write_2bytes):
239 movw %ax, -2(%edx)
240 SETRTNVAL
241 RETURN
242
243 ALIGN (4)
244L(write_31bytes):
245 movl %eax, -31(%edx)
246L(write_27bytes):
247 movl %eax, -27(%edx)
248L(write_23bytes):
249 movl %eax, -23(%edx)
250L(write_19bytes):
251 movl %eax, -19(%edx)
252L(write_15bytes):
253 movl %eax, -15(%edx)
254L(write_11bytes):
255 movl %eax, -11(%edx)
256L(write_7bytes):
257 movl %eax, -7(%edx)
258L(write_3bytes):
259 movw %ax, -3(%edx)
260 movb %al, -1(%edx)
261 SETRTNVAL
262 RETURN
263
264 ALIGN (4)
265/* ECX > 32 and EDX is 4 byte aligned. */
266L(32bytesormore):
267 /* Fill xmm0 with the pattern. */
Bruce Beare8ff1a272010-03-04 11:03:37 -0800268 movd %eax, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800269 pshufd $0, %xmm0, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800270 testl $0xf, %edx
271 jz L(aligned_16)
272/* ECX > 32 and EDX is not 16 byte aligned. */
273L(not_aligned_16):
274 movdqu %xmm0, (%edx)
275 movl %edx, %eax
276 and $-16, %edx
277 add $16, %edx
278 sub %edx, %eax
279 add %eax, %ecx
280 movd %xmm0, %eax
281
282 ALIGN (4)
283L(aligned_16):
284 cmp $128, %ecx
285 jae L(128bytesormore)
286
287L(aligned_16_less128bytes):
288 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
289
290 ALIGN (4)
291L(128bytesormore):
292#ifdef SHARED_CACHE_SIZE
293 PUSH (%ebx)
294 mov $SHARED_CACHE_SIZE, %ebx
295#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800296# if (defined SHARED || defined __PIC__)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400297 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800298 add $_GLOBAL_OFFSET_TABLE_, %ebx
299 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx
300# else
301 PUSH (%ebx)
302 mov __x86_shared_cache_size, %ebx
303# endif
304#endif
305 cmp %ebx, %ecx
306 jae L(128bytesormore_nt_start)
307
308
309#ifdef DATA_CACHE_SIZE
310 POP (%ebx)
Bruce Beare124a5422010-10-11 12:24:41 -0700311# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800312 cmp $DATA_CACHE_SIZE, %ecx
313#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800314# if (defined SHARED || defined __PIC__)
Bruce Beare124a5422010-10-11 12:24:41 -0700315# define RESTORE_EBX_STATE
Varvara Rainchik5a922842014-04-24 15:41:20 +0400316 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800317 add $_GLOBAL_OFFSET_TABLE_, %ebx
318 cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx
319# else
320 POP (%ebx)
Bruce Beare124a5422010-10-11 12:24:41 -0700321# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800322 cmp __x86_data_cache_size, %ecx
323# endif
324#endif
325
326 jae L(128bytes_L2_normal)
327 subl $128, %ecx
328L(128bytesormore_normal):
329 sub $128, %ecx
330 movdqa %xmm0, (%edx)
331 movdqa %xmm0, 0x10(%edx)
332 movdqa %xmm0, 0x20(%edx)
333 movdqa %xmm0, 0x30(%edx)
334 movdqa %xmm0, 0x40(%edx)
335 movdqa %xmm0, 0x50(%edx)
336 movdqa %xmm0, 0x60(%edx)
337 movdqa %xmm0, 0x70(%edx)
338 lea 128(%edx), %edx
339 jb L(128bytesless_normal)
340
341
342 sub $128, %ecx
343 movdqa %xmm0, (%edx)
344 movdqa %xmm0, 0x10(%edx)
345 movdqa %xmm0, 0x20(%edx)
346 movdqa %xmm0, 0x30(%edx)
347 movdqa %xmm0, 0x40(%edx)
348 movdqa %xmm0, 0x50(%edx)
349 movdqa %xmm0, 0x60(%edx)
350 movdqa %xmm0, 0x70(%edx)
351 lea 128(%edx), %edx
352 jae L(128bytesormore_normal)
353
354L(128bytesless_normal):
Bruce Beare124a5422010-10-11 12:24:41 -0700355 add $128, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800356 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
357
358 ALIGN (4)
359L(128bytes_L2_normal):
360 prefetcht0 0x380(%edx)
361 prefetcht0 0x3c0(%edx)
362 sub $128, %ecx
363 movdqa %xmm0, (%edx)
364 movaps %xmm0, 0x10(%edx)
365 movaps %xmm0, 0x20(%edx)
366 movaps %xmm0, 0x30(%edx)
367 movaps %xmm0, 0x40(%edx)
368 movaps %xmm0, 0x50(%edx)
369 movaps %xmm0, 0x60(%edx)
370 movaps %xmm0, 0x70(%edx)
371 add $128, %edx
372 cmp $128, %ecx
373 jae L(128bytes_L2_normal)
374
375L(128bytesless_L2_normal):
376 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
377
Bruce Beare124a5422010-10-11 12:24:41 -0700378 RESTORE_EBX_STATE
Bruce Beare8ff1a272010-03-04 11:03:37 -0800379L(128bytesormore_nt_start):
380 sub %ebx, %ecx
Bruce Beare124a5422010-10-11 12:24:41 -0700381 mov %ebx, %eax
382 and $0x7f, %eax
383 add %eax, %ecx
384 movd %xmm0, %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -0800385 ALIGN (4)
386L(128bytesormore_shared_cache_loop):
387 prefetcht0 0x3c0(%edx)
388 prefetcht0 0x380(%edx)
389 sub $0x80, %ebx
390 movdqa %xmm0, (%edx)
391 movdqa %xmm0, 0x10(%edx)
392 movdqa %xmm0, 0x20(%edx)
393 movdqa %xmm0, 0x30(%edx)
394 movdqa %xmm0, 0x40(%edx)
395 movdqa %xmm0, 0x50(%edx)
396 movdqa %xmm0, 0x60(%edx)
397 movdqa %xmm0, 0x70(%edx)
398 add $0x80, %edx
399 cmp $0x80, %ebx
400 jae L(128bytesormore_shared_cache_loop)
401 cmp $0x80, %ecx
402 jb L(shared_cache_loop_end)
403 ALIGN (4)
404L(128bytesormore_nt):
405 sub $0x80, %ecx
406 movntdq %xmm0, (%edx)
407 movntdq %xmm0, 0x10(%edx)
408 movntdq %xmm0, 0x20(%edx)
409 movntdq %xmm0, 0x30(%edx)
410 movntdq %xmm0, 0x40(%edx)
411 movntdq %xmm0, 0x50(%edx)
412 movntdq %xmm0, 0x60(%edx)
413 movntdq %xmm0, 0x70(%edx)
414 add $0x80, %edx
415 cmp $0x80, %ecx
416 jae L(128bytesormore_nt)
417 sfence
418L(shared_cache_loop_end):
Nick Kralevich0aa82892011-11-11 15:47:24 -0800419#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800420 POP (%ebx)
421#endif
422 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
423
424
425 .pushsection .rodata.sse2,"a",@progbits
426 ALIGN (2)
427L(table_16_128bytes):
428 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
429 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
430 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
431 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
432 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
433 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
434 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
435 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
436 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
437 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
438 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
439 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
440 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
441 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
442 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
443 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
444 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
445 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
446 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
447 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
448 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
449 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
450 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
451 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
452 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
453 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
454 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
455 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
456 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
457 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
458 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
459 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
460 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
461 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
462 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
463 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
464 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
465 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
466 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
467 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
468 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
469 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
470 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
471 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
472 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
473 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
474 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
475 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
476 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
477 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
478 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
479 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
480 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
481 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
482 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
483 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
484 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
485 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
486 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
487 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
488 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
489 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
490 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
491 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
492 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
493 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
494 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
495 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
496 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
497 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
498 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
499 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
500 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
501 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
502 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
503 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
504 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
505 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
506 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
507 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
508 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
509 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
510 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
511 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
512 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
513 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
514 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
515 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
516 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
517 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
518 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
519 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
520 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
521 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
522 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
523 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
524 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
525 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
526 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
527 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
528 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
529 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
530 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
531 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
532 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
533 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
534 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
535 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
536 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
537 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
538 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
539 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
540 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
541 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
542 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
543 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
544 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
545 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
546 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
547 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
548 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
549 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
550 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
551 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
552 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
553 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
554 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
555 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
556 .popsection
557
558 ALIGN (4)
559L(aligned_16_112bytes):
560 movdqa %xmm0, -112(%edx)
561L(aligned_16_96bytes):
562 movdqa %xmm0, -96(%edx)
563L(aligned_16_80bytes):
564 movdqa %xmm0, -80(%edx)
565L(aligned_16_64bytes):
566 movdqa %xmm0, -64(%edx)
567L(aligned_16_48bytes):
568 movdqa %xmm0, -48(%edx)
569L(aligned_16_32bytes):
570 movdqa %xmm0, -32(%edx)
571L(aligned_16_16bytes):
572 movdqa %xmm0, -16(%edx)
573L(aligned_16_0bytes):
574 SETRTNVAL
575 RETURN
576
577 ALIGN (4)
578L(aligned_16_113bytes):
579 movdqa %xmm0, -113(%edx)
580L(aligned_16_97bytes):
581 movdqa %xmm0, -97(%edx)
582L(aligned_16_81bytes):
583 movdqa %xmm0, -81(%edx)
584L(aligned_16_65bytes):
585 movdqa %xmm0, -65(%edx)
586L(aligned_16_49bytes):
587 movdqa %xmm0, -49(%edx)
588L(aligned_16_33bytes):
589 movdqa %xmm0, -33(%edx)
590L(aligned_16_17bytes):
591 movdqa %xmm0, -17(%edx)
592L(aligned_16_1bytes):
593 movb %al, -1(%edx)
594 SETRTNVAL
595 RETURN
596
597 ALIGN (4)
598L(aligned_16_114bytes):
599 movdqa %xmm0, -114(%edx)
600L(aligned_16_98bytes):
601 movdqa %xmm0, -98(%edx)
602L(aligned_16_82bytes):
603 movdqa %xmm0, -82(%edx)
604L(aligned_16_66bytes):
605 movdqa %xmm0, -66(%edx)
606L(aligned_16_50bytes):
607 movdqa %xmm0, -50(%edx)
608L(aligned_16_34bytes):
609 movdqa %xmm0, -34(%edx)
610L(aligned_16_18bytes):
611 movdqa %xmm0, -18(%edx)
612L(aligned_16_2bytes):
613 movw %ax, -2(%edx)
614 SETRTNVAL
615 RETURN
616
617 ALIGN (4)
618L(aligned_16_115bytes):
619 movdqa %xmm0, -115(%edx)
620L(aligned_16_99bytes):
621 movdqa %xmm0, -99(%edx)
622L(aligned_16_83bytes):
623 movdqa %xmm0, -83(%edx)
624L(aligned_16_67bytes):
625 movdqa %xmm0, -67(%edx)
626L(aligned_16_51bytes):
627 movdqa %xmm0, -51(%edx)
628L(aligned_16_35bytes):
629 movdqa %xmm0, -35(%edx)
630L(aligned_16_19bytes):
631 movdqa %xmm0, -19(%edx)
632L(aligned_16_3bytes):
633 movw %ax, -3(%edx)
634 movb %al, -1(%edx)
635 SETRTNVAL
636 RETURN
637
638 ALIGN (4)
639L(aligned_16_116bytes):
640 movdqa %xmm0, -116(%edx)
641L(aligned_16_100bytes):
642 movdqa %xmm0, -100(%edx)
643L(aligned_16_84bytes):
644 movdqa %xmm0, -84(%edx)
645L(aligned_16_68bytes):
646 movdqa %xmm0, -68(%edx)
647L(aligned_16_52bytes):
648 movdqa %xmm0, -52(%edx)
649L(aligned_16_36bytes):
650 movdqa %xmm0, -36(%edx)
651L(aligned_16_20bytes):
652 movdqa %xmm0, -20(%edx)
653L(aligned_16_4bytes):
654 movl %eax, -4(%edx)
655 SETRTNVAL
656 RETURN
657
658 ALIGN (4)
659L(aligned_16_117bytes):
660 movdqa %xmm0, -117(%edx)
661L(aligned_16_101bytes):
662 movdqa %xmm0, -101(%edx)
663L(aligned_16_85bytes):
664 movdqa %xmm0, -85(%edx)
665L(aligned_16_69bytes):
666 movdqa %xmm0, -69(%edx)
667L(aligned_16_53bytes):
668 movdqa %xmm0, -53(%edx)
669L(aligned_16_37bytes):
670 movdqa %xmm0, -37(%edx)
671L(aligned_16_21bytes):
672 movdqa %xmm0, -21(%edx)
673L(aligned_16_5bytes):
674 movl %eax, -5(%edx)
675 movb %al, -1(%edx)
676 SETRTNVAL
677 RETURN
678
679 ALIGN (4)
680L(aligned_16_118bytes):
681 movdqa %xmm0, -118(%edx)
682L(aligned_16_102bytes):
683 movdqa %xmm0, -102(%edx)
684L(aligned_16_86bytes):
685 movdqa %xmm0, -86(%edx)
686L(aligned_16_70bytes):
687 movdqa %xmm0, -70(%edx)
688L(aligned_16_54bytes):
689 movdqa %xmm0, -54(%edx)
690L(aligned_16_38bytes):
691 movdqa %xmm0, -38(%edx)
692L(aligned_16_22bytes):
693 movdqa %xmm0, -22(%edx)
694L(aligned_16_6bytes):
695 movl %eax, -6(%edx)
696 movw %ax, -2(%edx)
697 SETRTNVAL
698 RETURN
699
700 ALIGN (4)
701L(aligned_16_119bytes):
702 movdqa %xmm0, -119(%edx)
703L(aligned_16_103bytes):
704 movdqa %xmm0, -103(%edx)
705L(aligned_16_87bytes):
706 movdqa %xmm0, -87(%edx)
707L(aligned_16_71bytes):
708 movdqa %xmm0, -71(%edx)
709L(aligned_16_55bytes):
710 movdqa %xmm0, -55(%edx)
711L(aligned_16_39bytes):
712 movdqa %xmm0, -39(%edx)
713L(aligned_16_23bytes):
714 movdqa %xmm0, -23(%edx)
715L(aligned_16_7bytes):
716 movl %eax, -7(%edx)
717 movw %ax, -3(%edx)
718 movb %al, -1(%edx)
719 SETRTNVAL
720 RETURN
721
722 ALIGN (4)
723L(aligned_16_120bytes):
724 movdqa %xmm0, -120(%edx)
725L(aligned_16_104bytes):
726 movdqa %xmm0, -104(%edx)
727L(aligned_16_88bytes):
728 movdqa %xmm0, -88(%edx)
729L(aligned_16_72bytes):
730 movdqa %xmm0, -72(%edx)
731L(aligned_16_56bytes):
732 movdqa %xmm0, -56(%edx)
733L(aligned_16_40bytes):
734 movdqa %xmm0, -40(%edx)
735L(aligned_16_24bytes):
736 movdqa %xmm0, -24(%edx)
737L(aligned_16_8bytes):
738 movq %xmm0, -8(%edx)
739 SETRTNVAL
740 RETURN
741
742 ALIGN (4)
743L(aligned_16_121bytes):
744 movdqa %xmm0, -121(%edx)
745L(aligned_16_105bytes):
746 movdqa %xmm0, -105(%edx)
747L(aligned_16_89bytes):
748 movdqa %xmm0, -89(%edx)
749L(aligned_16_73bytes):
750 movdqa %xmm0, -73(%edx)
751L(aligned_16_57bytes):
752 movdqa %xmm0, -57(%edx)
753L(aligned_16_41bytes):
754 movdqa %xmm0, -41(%edx)
755L(aligned_16_25bytes):
756 movdqa %xmm0, -25(%edx)
757L(aligned_16_9bytes):
758 movq %xmm0, -9(%edx)
759 movb %al, -1(%edx)
760 SETRTNVAL
761 RETURN
762
763 ALIGN (4)
764L(aligned_16_122bytes):
765 movdqa %xmm0, -122(%edx)
766L(aligned_16_106bytes):
767 movdqa %xmm0, -106(%edx)
768L(aligned_16_90bytes):
769 movdqa %xmm0, -90(%edx)
770L(aligned_16_74bytes):
771 movdqa %xmm0, -74(%edx)
772L(aligned_16_58bytes):
773 movdqa %xmm0, -58(%edx)
774L(aligned_16_42bytes):
775 movdqa %xmm0, -42(%edx)
776L(aligned_16_26bytes):
777 movdqa %xmm0, -26(%edx)
778L(aligned_16_10bytes):
779 movq %xmm0, -10(%edx)
780 movw %ax, -2(%edx)
781 SETRTNVAL
782 RETURN
783
784 ALIGN (4)
785L(aligned_16_123bytes):
786 movdqa %xmm0, -123(%edx)
787L(aligned_16_107bytes):
788 movdqa %xmm0, -107(%edx)
789L(aligned_16_91bytes):
790 movdqa %xmm0, -91(%edx)
791L(aligned_16_75bytes):
792 movdqa %xmm0, -75(%edx)
793L(aligned_16_59bytes):
794 movdqa %xmm0, -59(%edx)
795L(aligned_16_43bytes):
796 movdqa %xmm0, -43(%edx)
797L(aligned_16_27bytes):
798 movdqa %xmm0, -27(%edx)
799L(aligned_16_11bytes):
800 movq %xmm0, -11(%edx)
801 movw %ax, -3(%edx)
802 movb %al, -1(%edx)
803 SETRTNVAL
804 RETURN
805
806 ALIGN (4)
807L(aligned_16_124bytes):
808 movdqa %xmm0, -124(%edx)
809L(aligned_16_108bytes):
810 movdqa %xmm0, -108(%edx)
811L(aligned_16_92bytes):
812 movdqa %xmm0, -92(%edx)
813L(aligned_16_76bytes):
814 movdqa %xmm0, -76(%edx)
815L(aligned_16_60bytes):
816 movdqa %xmm0, -60(%edx)
817L(aligned_16_44bytes):
818 movdqa %xmm0, -44(%edx)
819L(aligned_16_28bytes):
820 movdqa %xmm0, -28(%edx)
821L(aligned_16_12bytes):
822 movq %xmm0, -12(%edx)
823 movl %eax, -4(%edx)
824 SETRTNVAL
825 RETURN
826
827 ALIGN (4)
828L(aligned_16_125bytes):
829 movdqa %xmm0, -125(%edx)
830L(aligned_16_109bytes):
831 movdqa %xmm0, -109(%edx)
832L(aligned_16_93bytes):
833 movdqa %xmm0, -93(%edx)
834L(aligned_16_77bytes):
835 movdqa %xmm0, -77(%edx)
836L(aligned_16_61bytes):
837 movdqa %xmm0, -61(%edx)
838L(aligned_16_45bytes):
839 movdqa %xmm0, -45(%edx)
840L(aligned_16_29bytes):
841 movdqa %xmm0, -29(%edx)
842L(aligned_16_13bytes):
843 movq %xmm0, -13(%edx)
844 movl %eax, -5(%edx)
845 movb %al, -1(%edx)
846 SETRTNVAL
847 RETURN
848
849 ALIGN (4)
850L(aligned_16_126bytes):
851 movdqa %xmm0, -126(%edx)
852L(aligned_16_110bytes):
853 movdqa %xmm0, -110(%edx)
854L(aligned_16_94bytes):
855 movdqa %xmm0, -94(%edx)
856L(aligned_16_78bytes):
857 movdqa %xmm0, -78(%edx)
858L(aligned_16_62bytes):
859 movdqa %xmm0, -62(%edx)
860L(aligned_16_46bytes):
861 movdqa %xmm0, -46(%edx)
862L(aligned_16_30bytes):
863 movdqa %xmm0, -30(%edx)
864L(aligned_16_14bytes):
865 movq %xmm0, -14(%edx)
866 movl %eax, -6(%edx)
867 movw %ax, -2(%edx)
868 SETRTNVAL
869 RETURN
870
871 ALIGN (4)
872L(aligned_16_127bytes):
873 movdqa %xmm0, -127(%edx)
874L(aligned_16_111bytes):
875 movdqa %xmm0, -111(%edx)
876L(aligned_16_95bytes):
877 movdqa %xmm0, -95(%edx)
878L(aligned_16_79bytes):
879 movdqa %xmm0, -79(%edx)
880L(aligned_16_63bytes):
881 movdqa %xmm0, -63(%edx)
882L(aligned_16_47bytes):
883 movdqa %xmm0, -47(%edx)
884L(aligned_16_31bytes):
885 movdqa %xmm0, -31(%edx)
886L(aligned_16_15bytes):
887 movq %xmm0, -15(%edx)
888 movl %eax, -7(%edx)
889 movw %ax, -3(%edx)
890 movb %al, -1(%edx)
891 SETRTNVAL
892 RETURN_END
893
Elliott Hughes81d6a182016-03-03 16:10:33 -0800894END (memset)