blob: f5182baafcbc4f7dc27d4e9c5e04f31ee338b17d [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
Varvara Rainchik5a922842014-04-24 15:41:20 +04002Copyright (c) 2014, Intel Corporation
Bruce Beare8ff1a272010-03-04 11:03:37 -08003All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Elliott Hughes81d6a182016-03-03 16:10:33 -080031#include <private/bionic_asm.h>
Varvara Rainchik5a922842014-04-24 15:41:20 +040032
Elliott Hughes81d6a182016-03-03 16:10:33 -080033#include "cache.h"
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040034
Bruce Beare8ff1a272010-03-04 11:03:37 -080035#ifndef L
36# define L(label) .L##label
37#endif
38
39#ifndef ALIGN
40# define ALIGN(n) .p2align n
41#endif
42
Bruce Beare8ff1a272010-03-04 11:03:37 -080043#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
Bruce Beare124a5422010-10-11 12:24:41 -070048# define cfi_restore(reg) .cfi_restore reg
Bruce Beare8ff1a272010-03-04 11:03:37 -080049#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
Bruce Beare8ff1a272010-03-04 11:03:37 -080055#define CFI_PUSH(REG) \
56 cfi_adjust_cfa_offset (4); \
57 cfi_rel_offset (REG, 0)
58
59#define CFI_POP(REG) \
60 cfi_adjust_cfa_offset (-4); \
61 cfi_restore (REG)
62
63#define PUSH(REG) pushl REG; CFI_PUSH (REG)
64#define POP(REG) popl REG; CFI_POP (REG)
65
Elliott Hughes81d6a182016-03-03 16:10:33 -080066#define DST PARMS
67#define CHR DST+4
Elliott Hughes01d5b942016-03-02 17:18:18 -080068#define LEN CHR+4
Elliott Hughes81d6a182016-03-03 16:10:33 -080069#define CHK_DST_LEN (LEN+4)
70#define SETRTNVAL movl DST(%esp), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -080071
Nick Kralevich0aa82892011-11-11 15:47:24 -080072#if (defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -080073# define ENTRANCE PUSH (%ebx);
74# define RETURN_END POP (%ebx); ret
75# define RETURN RETURN_END; CFI_PUSH (%ebx)
76# define PARMS 8 /* Preserve EBX. */
77# define JMPTBL(I, B) I - B
78
79/* Load an entry in a jump table into EBX and branch to it. TABLE is a
80 jump table with relative offsets. */
81# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
82 /* We first load PC into EBX. */ \
Varvara Rainchik5a922842014-04-24 15:41:20 +040083 call __x86.get_pc_thunk.bx; \
Bruce Beare8ff1a272010-03-04 11:03:37 -080084 /* Get the address of the jump table. */ \
85 add $(TABLE - .), %ebx; \
86 /* Get the entry and convert the relative offset to the \
87 absolute address. */ \
88 add (%ebx,%ecx,4), %ebx; \
89 add %ecx, %edx; \
90 /* We loaded the jump table and adjuested EDX. Go. */ \
91 jmp *%ebx
92
Varvara Rainchik5a922842014-04-24 15:41:20 +040093 .section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
94 .globl __x86.get_pc_thunk.bx
95 .hidden __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -080096 ALIGN (4)
Varvara Rainchik5a922842014-04-24 15:41:20 +040097 .type __x86.get_pc_thunk.bx,@function
98__x86.get_pc_thunk.bx:
Bruce Beare8ff1a272010-03-04 11:03:37 -080099 movl (%esp), %ebx
100 ret
101#else
102# define ENTRANCE
103# define RETURN_END ret
104# define RETURN RETURN_END
105# define PARMS 4
106# define JMPTBL(I, B) I
107
108/* Branch to an entry in a jump table. TABLE is a jump table with
109 absolute offsets. */
110# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
111 add %ecx, %edx; \
112 jmp *TABLE(,%ecx,4)
113#endif
114
Elliott Hughes81d6a182016-03-03 16:10:33 -0800115ENTRY(__memset_chk)
Elliott Hughes204990c2016-03-24 22:34:47 -0700116 ENTRANCE
Elliott Hughes81d6a182016-03-03 16:10:33 -0800117
Elliott Hughes204990c2016-03-24 22:34:47 -0700118 movl LEN(%esp), %ecx
119 cmpl CHK_DST_LEN(%esp), %ecx
120 ja __memset_chk_fail
121 jmp L(memset_length_loaded)
Elliott Hughes81d6a182016-03-03 16:10:33 -0800122END(__memset_chk)
123
Bruce Beare8ff1a272010-03-04 11:03:37 -0800124 .section .text.sse2,"ax",@progbits
125 ALIGN (4)
Elliott Hughes81d6a182016-03-03 16:10:33 -0800126ENTRY(memset)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800127 ENTRANCE
128
129 movl LEN(%esp), %ecx
Elliott Hughes204990c2016-03-24 22:34:47 -0700130L(memset_length_loaded):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400131 cmp $0, %ecx
132 ja L(1byteormore)
133 SETRTNVAL
134 RETURN
135
136L(1byteormore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800137 movzbl CHR(%esp), %eax
138 movb %al, %ah
139 /* Fill the whole EAX with pattern. */
140 movl %eax, %edx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400141 shl $16, %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -0800142 or %edx, %eax
Elliott Hughes81d6a182016-03-03 16:10:33 -0800143 movl DST(%esp), %edx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400144 cmp $1, %ecx
145 je L(1byte)
146 cmp $16, %ecx
147 jae L(16bytesormore)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800148
Varvara Rainchik5a922842014-04-24 15:41:20 +0400149 cmp $4, %ecx
150 jb L(4bytesless)
151 movl %eax, (%edx)
152 movl %eax, -4(%edx, %ecx)
153 cmp $8, %ecx
154 jb L(8bytesless)
155 movl %eax, 4(%edx)
156 movl %eax, -8(%edx, %ecx)
157L(8bytesless):
158 SETRTNVAL
159 RETURN
Bruce Beare8ff1a272010-03-04 11:03:37 -0800160
Varvara Rainchik5a922842014-04-24 15:41:20 +0400161L(4bytesless):
162 movw %ax, (%edx)
163 movw %ax, -2(%edx, %ecx)
164 SETRTNVAL
165 RETURN
Bruce Beare8ff1a272010-03-04 11:03:37 -0800166
Varvara Rainchik5a922842014-04-24 15:41:20 +0400167L(1byte):
168 movb %al, (%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800169 SETRTNVAL
170 RETURN
171
172 ALIGN (4)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400173L(16bytesormore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800174 movd %eax, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800175 pshufd $0, %xmm0, %xmm0
Varvara Rainchik5a922842014-04-24 15:41:20 +0400176
177 cmp $64, %ecx
178 ja L(64bytesmore)
179 movdqu %xmm0, (%edx)
180 movdqu %xmm0, -16(%edx, %ecx)
181 cmp $32, %ecx
182 jbe L(32bytesless)
183 movdqu %xmm0, 16(%edx)
184 movdqu %xmm0, -32(%edx, %ecx)
185L(32bytesless):
186 SETRTNVAL
187 RETURN
188
189L(64bytesmore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800190 testl $0xf, %edx
191 jz L(aligned_16)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800192L(not_aligned_16):
193 movdqu %xmm0, (%edx)
194 movl %edx, %eax
195 and $-16, %edx
196 add $16, %edx
197 sub %edx, %eax
198 add %eax, %ecx
199 movd %xmm0, %eax
200
201 ALIGN (4)
202L(aligned_16):
203 cmp $128, %ecx
204 jae L(128bytesormore)
205
206L(aligned_16_less128bytes):
207 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
208
209 ALIGN (4)
210L(128bytesormore):
211#ifdef SHARED_CACHE_SIZE
212 PUSH (%ebx)
213 mov $SHARED_CACHE_SIZE, %ebx
214#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800215# if (defined SHARED || defined __PIC__)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400216 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800217 add $_GLOBAL_OFFSET_TABLE_, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400218 mov $__x86_shared_cache_size@GOTOFF(%ebx), %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800219# else
220 PUSH (%ebx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400221 mov $__x86_shared_cache_size, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800222# endif
223#endif
224 cmp %ebx, %ecx
225 jae L(128bytesormore_nt_start)
226
Varvara Rainchik5a922842014-04-24 15:41:20 +0400227 POP (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800228
229#ifdef DATA_CACHE_SIZE
Varvara Rainchik5a922842014-04-24 15:41:20 +0400230 PUSH (%ebx)
231 mov $DATA_CACHE_SIZE, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800232#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800233# if (defined SHARED || defined __PIC__)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400234 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800235 add $_GLOBAL_OFFSET_TABLE_, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400236 mov $__x86_data_cache_size@GOTOFF(%ebx), %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800237# else
Varvara Rainchik5a922842014-04-24 15:41:20 +0400238 PUSH (%ebx)
239 mov $__x86_data_cache_size, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800240# endif
241#endif
242
Varvara Rainchik5a922842014-04-24 15:41:20 +0400243 cmp %ebx, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800244 jae L(128bytes_L2_normal)
245 subl $128, %ecx
246L(128bytesormore_normal):
247 sub $128, %ecx
248 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400249 movaps %xmm0, 0x10(%edx)
250 movaps %xmm0, 0x20(%edx)
251 movaps %xmm0, 0x30(%edx)
252 movaps %xmm0, 0x40(%edx)
253 movaps %xmm0, 0x50(%edx)
254 movaps %xmm0, 0x60(%edx)
255 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800256 lea 128(%edx), %edx
257 jb L(128bytesless_normal)
258
259
260 sub $128, %ecx
261 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400262 movaps %xmm0, 0x10(%edx)
263 movaps %xmm0, 0x20(%edx)
264 movaps %xmm0, 0x30(%edx)
265 movaps %xmm0, 0x40(%edx)
266 movaps %xmm0, 0x50(%edx)
267 movaps %xmm0, 0x60(%edx)
268 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800269 lea 128(%edx), %edx
270 jae L(128bytesormore_normal)
271
272L(128bytesless_normal):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400273 lea 128(%ecx), %ecx
274#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
275 POP (%ebx)
276#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800277 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
278
279 ALIGN (4)
280L(128bytes_L2_normal):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400281 prefetchnta 0x380(%edx)
282 prefetchnta 0x3c0(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800283 sub $128, %ecx
284 movdqa %xmm0, (%edx)
285 movaps %xmm0, 0x10(%edx)
286 movaps %xmm0, 0x20(%edx)
287 movaps %xmm0, 0x30(%edx)
288 movaps %xmm0, 0x40(%edx)
289 movaps %xmm0, 0x50(%edx)
290 movaps %xmm0, 0x60(%edx)
291 movaps %xmm0, 0x70(%edx)
292 add $128, %edx
293 cmp $128, %ecx
294 jae L(128bytes_L2_normal)
295
296L(128bytesless_L2_normal):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400297#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
298 POP (%ebx)
299#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800300 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
301
302L(128bytesormore_nt_start):
303 sub %ebx, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800304 ALIGN (4)
305L(128bytesormore_shared_cache_loop):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400306 prefetchnta 0x3c0(%edx)
307 prefetchnta 0x380(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800308 sub $0x80, %ebx
309 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400310 movaps %xmm0, 0x10(%edx)
311 movaps %xmm0, 0x20(%edx)
312 movaps %xmm0, 0x30(%edx)
313 movaps %xmm0, 0x40(%edx)
314 movaps %xmm0, 0x50(%edx)
315 movaps %xmm0, 0x60(%edx)
316 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800317 add $0x80, %edx
318 cmp $0x80, %ebx
319 jae L(128bytesormore_shared_cache_loop)
320 cmp $0x80, %ecx
321 jb L(shared_cache_loop_end)
322 ALIGN (4)
323L(128bytesormore_nt):
324 sub $0x80, %ecx
325 movntdq %xmm0, (%edx)
326 movntdq %xmm0, 0x10(%edx)
327 movntdq %xmm0, 0x20(%edx)
328 movntdq %xmm0, 0x30(%edx)
329 movntdq %xmm0, 0x40(%edx)
330 movntdq %xmm0, 0x50(%edx)
331 movntdq %xmm0, 0x60(%edx)
332 movntdq %xmm0, 0x70(%edx)
333 add $0x80, %edx
334 cmp $0x80, %ecx
335 jae L(128bytesormore_nt)
336 sfence
337L(shared_cache_loop_end):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400338#if defined SHARED_CACHE_SIZE || !(defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800339 POP (%ebx)
340#endif
341 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
342
343
344 .pushsection .rodata.sse2,"a",@progbits
345 ALIGN (2)
346L(table_16_128bytes):
347 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
348 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
349 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
350 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
351 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
352 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
353 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
354 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
355 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
356 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
357 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
358 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
359 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
360 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
361 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
362 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
363 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
364 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
365 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
366 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
367 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
368 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
369 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
370 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
371 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
372 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
373 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
374 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
375 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
376 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
377 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
378 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
379 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
380 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
381 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
382 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
383 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
384 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
385 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
386 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
387 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
388 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
389 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
390 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
391 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
392 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
393 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
394 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
395 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
396 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
397 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
398 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
399 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
400 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
401 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
402 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
403 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
404 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
405 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
406 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
407 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
408 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
409 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
410 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
411 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
412 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
413 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
414 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
415 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
416 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
417 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
418 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
419 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
420 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
421 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
422 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
423 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
424 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
425 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
426 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
427 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
428 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
429 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
430 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
431 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
432 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
433 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
434 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
435 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
436 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
437 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
438 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
439 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
440 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
441 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
442 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
443 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
444 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
445 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
446 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
447 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
448 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
449 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
450 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
451 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
452 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
453 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
454 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
455 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
456 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
457 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
458 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
459 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
460 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
461 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
462 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
463 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
464 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
465 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
466 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
467 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
468 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
469 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
470 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
471 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
472 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
473 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
474 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
475 .popsection
476
477 ALIGN (4)
478L(aligned_16_112bytes):
479 movdqa %xmm0, -112(%edx)
480L(aligned_16_96bytes):
481 movdqa %xmm0, -96(%edx)
482L(aligned_16_80bytes):
483 movdqa %xmm0, -80(%edx)
484L(aligned_16_64bytes):
485 movdqa %xmm0, -64(%edx)
486L(aligned_16_48bytes):
487 movdqa %xmm0, -48(%edx)
488L(aligned_16_32bytes):
489 movdqa %xmm0, -32(%edx)
490L(aligned_16_16bytes):
491 movdqa %xmm0, -16(%edx)
492L(aligned_16_0bytes):
493 SETRTNVAL
494 RETURN
495
496 ALIGN (4)
497L(aligned_16_113bytes):
498 movdqa %xmm0, -113(%edx)
499L(aligned_16_97bytes):
500 movdqa %xmm0, -97(%edx)
501L(aligned_16_81bytes):
502 movdqa %xmm0, -81(%edx)
503L(aligned_16_65bytes):
504 movdqa %xmm0, -65(%edx)
505L(aligned_16_49bytes):
506 movdqa %xmm0, -49(%edx)
507L(aligned_16_33bytes):
508 movdqa %xmm0, -33(%edx)
509L(aligned_16_17bytes):
510 movdqa %xmm0, -17(%edx)
511L(aligned_16_1bytes):
512 movb %al, -1(%edx)
513 SETRTNVAL
514 RETURN
515
516 ALIGN (4)
517L(aligned_16_114bytes):
518 movdqa %xmm0, -114(%edx)
519L(aligned_16_98bytes):
520 movdqa %xmm0, -98(%edx)
521L(aligned_16_82bytes):
522 movdqa %xmm0, -82(%edx)
523L(aligned_16_66bytes):
524 movdqa %xmm0, -66(%edx)
525L(aligned_16_50bytes):
526 movdqa %xmm0, -50(%edx)
527L(aligned_16_34bytes):
528 movdqa %xmm0, -34(%edx)
529L(aligned_16_18bytes):
530 movdqa %xmm0, -18(%edx)
531L(aligned_16_2bytes):
532 movw %ax, -2(%edx)
533 SETRTNVAL
534 RETURN
535
536 ALIGN (4)
537L(aligned_16_115bytes):
538 movdqa %xmm0, -115(%edx)
539L(aligned_16_99bytes):
540 movdqa %xmm0, -99(%edx)
541L(aligned_16_83bytes):
542 movdqa %xmm0, -83(%edx)
543L(aligned_16_67bytes):
544 movdqa %xmm0, -67(%edx)
545L(aligned_16_51bytes):
546 movdqa %xmm0, -51(%edx)
547L(aligned_16_35bytes):
548 movdqa %xmm0, -35(%edx)
549L(aligned_16_19bytes):
550 movdqa %xmm0, -19(%edx)
551L(aligned_16_3bytes):
552 movw %ax, -3(%edx)
553 movb %al, -1(%edx)
554 SETRTNVAL
555 RETURN
556
557 ALIGN (4)
558L(aligned_16_116bytes):
559 movdqa %xmm0, -116(%edx)
560L(aligned_16_100bytes):
561 movdqa %xmm0, -100(%edx)
562L(aligned_16_84bytes):
563 movdqa %xmm0, -84(%edx)
564L(aligned_16_68bytes):
565 movdqa %xmm0, -68(%edx)
566L(aligned_16_52bytes):
567 movdqa %xmm0, -52(%edx)
568L(aligned_16_36bytes):
569 movdqa %xmm0, -36(%edx)
570L(aligned_16_20bytes):
571 movdqa %xmm0, -20(%edx)
572L(aligned_16_4bytes):
573 movl %eax, -4(%edx)
574 SETRTNVAL
575 RETURN
576
577 ALIGN (4)
578L(aligned_16_117bytes):
579 movdqa %xmm0, -117(%edx)
580L(aligned_16_101bytes):
581 movdqa %xmm0, -101(%edx)
582L(aligned_16_85bytes):
583 movdqa %xmm0, -85(%edx)
584L(aligned_16_69bytes):
585 movdqa %xmm0, -69(%edx)
586L(aligned_16_53bytes):
587 movdqa %xmm0, -53(%edx)
588L(aligned_16_37bytes):
589 movdqa %xmm0, -37(%edx)
590L(aligned_16_21bytes):
591 movdqa %xmm0, -21(%edx)
592L(aligned_16_5bytes):
593 movl %eax, -5(%edx)
594 movb %al, -1(%edx)
595 SETRTNVAL
596 RETURN
597
598 ALIGN (4)
599L(aligned_16_118bytes):
600 movdqa %xmm0, -118(%edx)
601L(aligned_16_102bytes):
602 movdqa %xmm0, -102(%edx)
603L(aligned_16_86bytes):
604 movdqa %xmm0, -86(%edx)
605L(aligned_16_70bytes):
606 movdqa %xmm0, -70(%edx)
607L(aligned_16_54bytes):
608 movdqa %xmm0, -54(%edx)
609L(aligned_16_38bytes):
610 movdqa %xmm0, -38(%edx)
611L(aligned_16_22bytes):
612 movdqa %xmm0, -22(%edx)
613L(aligned_16_6bytes):
614 movl %eax, -6(%edx)
615 movw %ax, -2(%edx)
616 SETRTNVAL
617 RETURN
618
619 ALIGN (4)
620L(aligned_16_119bytes):
621 movdqa %xmm0, -119(%edx)
622L(aligned_16_103bytes):
623 movdqa %xmm0, -103(%edx)
624L(aligned_16_87bytes):
625 movdqa %xmm0, -87(%edx)
626L(aligned_16_71bytes):
627 movdqa %xmm0, -71(%edx)
628L(aligned_16_55bytes):
629 movdqa %xmm0, -55(%edx)
630L(aligned_16_39bytes):
631 movdqa %xmm0, -39(%edx)
632L(aligned_16_23bytes):
633 movdqa %xmm0, -23(%edx)
634L(aligned_16_7bytes):
635 movl %eax, -7(%edx)
636 movw %ax, -3(%edx)
637 movb %al, -1(%edx)
638 SETRTNVAL
639 RETURN
640
641 ALIGN (4)
642L(aligned_16_120bytes):
643 movdqa %xmm0, -120(%edx)
644L(aligned_16_104bytes):
645 movdqa %xmm0, -104(%edx)
646L(aligned_16_88bytes):
647 movdqa %xmm0, -88(%edx)
648L(aligned_16_72bytes):
649 movdqa %xmm0, -72(%edx)
650L(aligned_16_56bytes):
651 movdqa %xmm0, -56(%edx)
652L(aligned_16_40bytes):
653 movdqa %xmm0, -40(%edx)
654L(aligned_16_24bytes):
655 movdqa %xmm0, -24(%edx)
656L(aligned_16_8bytes):
657 movq %xmm0, -8(%edx)
658 SETRTNVAL
659 RETURN
660
661 ALIGN (4)
662L(aligned_16_121bytes):
663 movdqa %xmm0, -121(%edx)
664L(aligned_16_105bytes):
665 movdqa %xmm0, -105(%edx)
666L(aligned_16_89bytes):
667 movdqa %xmm0, -89(%edx)
668L(aligned_16_73bytes):
669 movdqa %xmm0, -73(%edx)
670L(aligned_16_57bytes):
671 movdqa %xmm0, -57(%edx)
672L(aligned_16_41bytes):
673 movdqa %xmm0, -41(%edx)
674L(aligned_16_25bytes):
675 movdqa %xmm0, -25(%edx)
676L(aligned_16_9bytes):
677 movq %xmm0, -9(%edx)
678 movb %al, -1(%edx)
679 SETRTNVAL
680 RETURN
681
682 ALIGN (4)
683L(aligned_16_122bytes):
684 movdqa %xmm0, -122(%edx)
685L(aligned_16_106bytes):
686 movdqa %xmm0, -106(%edx)
687L(aligned_16_90bytes):
688 movdqa %xmm0, -90(%edx)
689L(aligned_16_74bytes):
690 movdqa %xmm0, -74(%edx)
691L(aligned_16_58bytes):
692 movdqa %xmm0, -58(%edx)
693L(aligned_16_42bytes):
694 movdqa %xmm0, -42(%edx)
695L(aligned_16_26bytes):
696 movdqa %xmm0, -26(%edx)
697L(aligned_16_10bytes):
698 movq %xmm0, -10(%edx)
699 movw %ax, -2(%edx)
700 SETRTNVAL
701 RETURN
702
703 ALIGN (4)
704L(aligned_16_123bytes):
705 movdqa %xmm0, -123(%edx)
706L(aligned_16_107bytes):
707 movdqa %xmm0, -107(%edx)
708L(aligned_16_91bytes):
709 movdqa %xmm0, -91(%edx)
710L(aligned_16_75bytes):
711 movdqa %xmm0, -75(%edx)
712L(aligned_16_59bytes):
713 movdqa %xmm0, -59(%edx)
714L(aligned_16_43bytes):
715 movdqa %xmm0, -43(%edx)
716L(aligned_16_27bytes):
717 movdqa %xmm0, -27(%edx)
718L(aligned_16_11bytes):
719 movq %xmm0, -11(%edx)
720 movw %ax, -3(%edx)
721 movb %al, -1(%edx)
722 SETRTNVAL
723 RETURN
724
725 ALIGN (4)
726L(aligned_16_124bytes):
727 movdqa %xmm0, -124(%edx)
728L(aligned_16_108bytes):
729 movdqa %xmm0, -108(%edx)
730L(aligned_16_92bytes):
731 movdqa %xmm0, -92(%edx)
732L(aligned_16_76bytes):
733 movdqa %xmm0, -76(%edx)
734L(aligned_16_60bytes):
735 movdqa %xmm0, -60(%edx)
736L(aligned_16_44bytes):
737 movdqa %xmm0, -44(%edx)
738L(aligned_16_28bytes):
739 movdqa %xmm0, -28(%edx)
740L(aligned_16_12bytes):
741 movq %xmm0, -12(%edx)
742 movl %eax, -4(%edx)
743 SETRTNVAL
744 RETURN
745
746 ALIGN (4)
747L(aligned_16_125bytes):
748 movdqa %xmm0, -125(%edx)
749L(aligned_16_109bytes):
750 movdqa %xmm0, -109(%edx)
751L(aligned_16_93bytes):
752 movdqa %xmm0, -93(%edx)
753L(aligned_16_77bytes):
754 movdqa %xmm0, -77(%edx)
755L(aligned_16_61bytes):
756 movdqa %xmm0, -61(%edx)
757L(aligned_16_45bytes):
758 movdqa %xmm0, -45(%edx)
759L(aligned_16_29bytes):
760 movdqa %xmm0, -29(%edx)
761L(aligned_16_13bytes):
762 movq %xmm0, -13(%edx)
763 movl %eax, -5(%edx)
764 movb %al, -1(%edx)
765 SETRTNVAL
766 RETURN
767
768 ALIGN (4)
769L(aligned_16_126bytes):
770 movdqa %xmm0, -126(%edx)
771L(aligned_16_110bytes):
772 movdqa %xmm0, -110(%edx)
773L(aligned_16_94bytes):
774 movdqa %xmm0, -94(%edx)
775L(aligned_16_78bytes):
776 movdqa %xmm0, -78(%edx)
777L(aligned_16_62bytes):
778 movdqa %xmm0, -62(%edx)
779L(aligned_16_46bytes):
780 movdqa %xmm0, -46(%edx)
781L(aligned_16_30bytes):
782 movdqa %xmm0, -30(%edx)
783L(aligned_16_14bytes):
784 movq %xmm0, -14(%edx)
785 movl %eax, -6(%edx)
786 movw %ax, -2(%edx)
787 SETRTNVAL
788 RETURN
789
790 ALIGN (4)
791L(aligned_16_127bytes):
792 movdqa %xmm0, -127(%edx)
793L(aligned_16_111bytes):
794 movdqa %xmm0, -111(%edx)
795L(aligned_16_95bytes):
796 movdqa %xmm0, -95(%edx)
797L(aligned_16_79bytes):
798 movdqa %xmm0, -79(%edx)
799L(aligned_16_63bytes):
800 movdqa %xmm0, -63(%edx)
801L(aligned_16_47bytes):
802 movdqa %xmm0, -47(%edx)
803L(aligned_16_31bytes):
804 movdqa %xmm0, -31(%edx)
805L(aligned_16_15bytes):
806 movq %xmm0, -15(%edx)
807 movl %eax, -7(%edx)
808 movw %ax, -3(%edx)
809 movb %al, -1(%edx)
810 SETRTNVAL
811 RETURN_END
812
Elliott Hughes81d6a182016-03-03 16:10:33 -0800813END(memset)