blob: 0718fa7e64eb6f4ef4257e0bad214617cc4ae32d [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
Varvara Rainchik5a922842014-04-24 15:41:20 +04002Copyright (c) 2014, Intel Corporation
Bruce Beare8ff1a272010-03-04 11:03:37 -08003All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Elliott Hughes81d6a182016-03-03 16:10:33 -080031#include <private/bionic_asm.h>
Varvara Rainchik5a922842014-04-24 15:41:20 +040032
Elliott Hughes81d6a182016-03-03 16:10:33 -080033#include "cache.h"
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040034
Bruce Beare8ff1a272010-03-04 11:03:37 -080035#ifndef L
36# define L(label) .L##label
37#endif
38
39#ifndef ALIGN
40# define ALIGN(n) .p2align n
41#endif
42
Bruce Beare8ff1a272010-03-04 11:03:37 -080043#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
Bruce Beare124a5422010-10-11 12:24:41 -070048# define cfi_restore(reg) .cfi_restore reg
Bruce Beare8ff1a272010-03-04 11:03:37 -080049#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
Bruce Beare8ff1a272010-03-04 11:03:37 -080055#define CFI_PUSH(REG) \
56 cfi_adjust_cfa_offset (4); \
57 cfi_rel_offset (REG, 0)
58
59#define CFI_POP(REG) \
60 cfi_adjust_cfa_offset (-4); \
61 cfi_restore (REG)
62
63#define PUSH(REG) pushl REG; CFI_PUSH (REG)
64#define POP(REG) popl REG; CFI_POP (REG)
65
Elliott Hughes81d6a182016-03-03 16:10:33 -080066#define DST PARMS
67#define CHR DST+4
Elliott Hughes01d5b942016-03-02 17:18:18 -080068#define LEN CHR+4
Elliott Hughes81d6a182016-03-03 16:10:33 -080069#define CHK_DST_LEN (LEN+4)
70#define SETRTNVAL movl DST(%esp), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -080071
Nick Kralevich0aa82892011-11-11 15:47:24 -080072#if (defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -080073# define ENTRANCE PUSH (%ebx);
74# define RETURN_END POP (%ebx); ret
75# define RETURN RETURN_END; CFI_PUSH (%ebx)
76# define PARMS 8 /* Preserve EBX. */
77# define JMPTBL(I, B) I - B
78
79/* Load an entry in a jump table into EBX and branch to it. TABLE is a
80 jump table with relative offsets. */
81# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
82 /* We first load PC into EBX. */ \
Varvara Rainchik5a922842014-04-24 15:41:20 +040083 call __x86.get_pc_thunk.bx; \
Bruce Beare8ff1a272010-03-04 11:03:37 -080084 /* Get the address of the jump table. */ \
85 add $(TABLE - .), %ebx; \
86 /* Get the entry and convert the relative offset to the \
87 absolute address. */ \
88 add (%ebx,%ecx,4), %ebx; \
89 add %ecx, %edx; \
90 /* We loaded the jump table and adjuested EDX. Go. */ \
91 jmp *%ebx
92
Varvara Rainchik5a922842014-04-24 15:41:20 +040093 .section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
94 .globl __x86.get_pc_thunk.bx
95 .hidden __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -080096 ALIGN (4)
Varvara Rainchik5a922842014-04-24 15:41:20 +040097 .type __x86.get_pc_thunk.bx,@function
98__x86.get_pc_thunk.bx:
Bruce Beare8ff1a272010-03-04 11:03:37 -080099 movl (%esp), %ebx
100 ret
101#else
102# define ENTRANCE
103# define RETURN_END ret
104# define RETURN RETURN_END
105# define PARMS 4
106# define JMPTBL(I, B) I
107
108/* Branch to an entry in a jump table. TABLE is a jump table with
109 absolute offsets. */
110# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
111 add %ecx, %edx; \
112 jmp *TABLE(,%ecx,4)
113#endif
114
Elliott Hughes81d6a182016-03-03 16:10:33 -0800115ENTRY(__memset_chk)
116 movl LEN(%esp), %ecx
117 cmpl %ecx, CHK_DST_LEN(%esp)
118 jbe memset
119
120 jmp __memset_chk_fail
121END(__memset_chk)
122
Bruce Beare8ff1a272010-03-04 11:03:37 -0800123 .section .text.sse2,"ax",@progbits
124 ALIGN (4)
Elliott Hughes81d6a182016-03-03 16:10:33 -0800125ENTRY(memset)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800126 ENTRANCE
127
128 movl LEN(%esp), %ecx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400129 cmp $0, %ecx
130 ja L(1byteormore)
131 SETRTNVAL
132 RETURN
133
134L(1byteormore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800135 movzbl CHR(%esp), %eax
136 movb %al, %ah
137 /* Fill the whole EAX with pattern. */
138 movl %eax, %edx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400139 shl $16, %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -0800140 or %edx, %eax
Elliott Hughes81d6a182016-03-03 16:10:33 -0800141 movl DST(%esp), %edx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400142 cmp $1, %ecx
143 je L(1byte)
144 cmp $16, %ecx
145 jae L(16bytesormore)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800146
Varvara Rainchik5a922842014-04-24 15:41:20 +0400147 cmp $4, %ecx
148 jb L(4bytesless)
149 movl %eax, (%edx)
150 movl %eax, -4(%edx, %ecx)
151 cmp $8, %ecx
152 jb L(8bytesless)
153 movl %eax, 4(%edx)
154 movl %eax, -8(%edx, %ecx)
155L(8bytesless):
156 SETRTNVAL
157 RETURN
Bruce Beare8ff1a272010-03-04 11:03:37 -0800158
Varvara Rainchik5a922842014-04-24 15:41:20 +0400159L(4bytesless):
160 movw %ax, (%edx)
161 movw %ax, -2(%edx, %ecx)
162 SETRTNVAL
163 RETURN
Bruce Beare8ff1a272010-03-04 11:03:37 -0800164
Varvara Rainchik5a922842014-04-24 15:41:20 +0400165L(1byte):
166 movb %al, (%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800167 SETRTNVAL
168 RETURN
169
170 ALIGN (4)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400171L(16bytesormore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800172 movd %eax, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800173 pshufd $0, %xmm0, %xmm0
Varvara Rainchik5a922842014-04-24 15:41:20 +0400174
175 cmp $64, %ecx
176 ja L(64bytesmore)
177 movdqu %xmm0, (%edx)
178 movdqu %xmm0, -16(%edx, %ecx)
179 cmp $32, %ecx
180 jbe L(32bytesless)
181 movdqu %xmm0, 16(%edx)
182 movdqu %xmm0, -32(%edx, %ecx)
183L(32bytesless):
184 SETRTNVAL
185 RETURN
186
187L(64bytesmore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800188 testl $0xf, %edx
189 jz L(aligned_16)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800190L(not_aligned_16):
191 movdqu %xmm0, (%edx)
192 movl %edx, %eax
193 and $-16, %edx
194 add $16, %edx
195 sub %edx, %eax
196 add %eax, %ecx
197 movd %xmm0, %eax
198
199 ALIGN (4)
200L(aligned_16):
201 cmp $128, %ecx
202 jae L(128bytesormore)
203
204L(aligned_16_less128bytes):
205 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
206
207 ALIGN (4)
208L(128bytesormore):
209#ifdef SHARED_CACHE_SIZE
210 PUSH (%ebx)
211 mov $SHARED_CACHE_SIZE, %ebx
212#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800213# if (defined SHARED || defined __PIC__)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400214 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800215 add $_GLOBAL_OFFSET_TABLE_, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400216 mov $__x86_shared_cache_size@GOTOFF(%ebx), %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800217# else
218 PUSH (%ebx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400219 mov $__x86_shared_cache_size, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800220# endif
221#endif
222 cmp %ebx, %ecx
223 jae L(128bytesormore_nt_start)
224
Varvara Rainchik5a922842014-04-24 15:41:20 +0400225 POP (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800226
227#ifdef DATA_CACHE_SIZE
Varvara Rainchik5a922842014-04-24 15:41:20 +0400228 PUSH (%ebx)
229 mov $DATA_CACHE_SIZE, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800230#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800231# if (defined SHARED || defined __PIC__)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400232 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800233 add $_GLOBAL_OFFSET_TABLE_, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400234 mov $__x86_data_cache_size@GOTOFF(%ebx), %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800235# else
Varvara Rainchik5a922842014-04-24 15:41:20 +0400236 PUSH (%ebx)
237 mov $__x86_data_cache_size, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800238# endif
239#endif
240
Varvara Rainchik5a922842014-04-24 15:41:20 +0400241 cmp %ebx, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800242 jae L(128bytes_L2_normal)
243 subl $128, %ecx
244L(128bytesormore_normal):
245 sub $128, %ecx
246 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400247 movaps %xmm0, 0x10(%edx)
248 movaps %xmm0, 0x20(%edx)
249 movaps %xmm0, 0x30(%edx)
250 movaps %xmm0, 0x40(%edx)
251 movaps %xmm0, 0x50(%edx)
252 movaps %xmm0, 0x60(%edx)
253 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800254 lea 128(%edx), %edx
255 jb L(128bytesless_normal)
256
257
258 sub $128, %ecx
259 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400260 movaps %xmm0, 0x10(%edx)
261 movaps %xmm0, 0x20(%edx)
262 movaps %xmm0, 0x30(%edx)
263 movaps %xmm0, 0x40(%edx)
264 movaps %xmm0, 0x50(%edx)
265 movaps %xmm0, 0x60(%edx)
266 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800267 lea 128(%edx), %edx
268 jae L(128bytesormore_normal)
269
270L(128bytesless_normal):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400271 lea 128(%ecx), %ecx
272#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
273 POP (%ebx)
274#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800275 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
276
277 ALIGN (4)
278L(128bytes_L2_normal):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400279 prefetchnta 0x380(%edx)
280 prefetchnta 0x3c0(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800281 sub $128, %ecx
282 movdqa %xmm0, (%edx)
283 movaps %xmm0, 0x10(%edx)
284 movaps %xmm0, 0x20(%edx)
285 movaps %xmm0, 0x30(%edx)
286 movaps %xmm0, 0x40(%edx)
287 movaps %xmm0, 0x50(%edx)
288 movaps %xmm0, 0x60(%edx)
289 movaps %xmm0, 0x70(%edx)
290 add $128, %edx
291 cmp $128, %ecx
292 jae L(128bytes_L2_normal)
293
294L(128bytesless_L2_normal):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400295#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
296 POP (%ebx)
297#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800298 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
299
300L(128bytesormore_nt_start):
301 sub %ebx, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800302 ALIGN (4)
303L(128bytesormore_shared_cache_loop):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400304 prefetchnta 0x3c0(%edx)
305 prefetchnta 0x380(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800306 sub $0x80, %ebx
307 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400308 movaps %xmm0, 0x10(%edx)
309 movaps %xmm0, 0x20(%edx)
310 movaps %xmm0, 0x30(%edx)
311 movaps %xmm0, 0x40(%edx)
312 movaps %xmm0, 0x50(%edx)
313 movaps %xmm0, 0x60(%edx)
314 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800315 add $0x80, %edx
316 cmp $0x80, %ebx
317 jae L(128bytesormore_shared_cache_loop)
318 cmp $0x80, %ecx
319 jb L(shared_cache_loop_end)
320 ALIGN (4)
321L(128bytesormore_nt):
322 sub $0x80, %ecx
323 movntdq %xmm0, (%edx)
324 movntdq %xmm0, 0x10(%edx)
325 movntdq %xmm0, 0x20(%edx)
326 movntdq %xmm0, 0x30(%edx)
327 movntdq %xmm0, 0x40(%edx)
328 movntdq %xmm0, 0x50(%edx)
329 movntdq %xmm0, 0x60(%edx)
330 movntdq %xmm0, 0x70(%edx)
331 add $0x80, %edx
332 cmp $0x80, %ecx
333 jae L(128bytesormore_nt)
334 sfence
335L(shared_cache_loop_end):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400336#if defined SHARED_CACHE_SIZE || !(defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800337 POP (%ebx)
338#endif
339 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
340
341
342 .pushsection .rodata.sse2,"a",@progbits
343 ALIGN (2)
344L(table_16_128bytes):
345 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
346 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
347 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
348 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
349 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
350 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
351 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
352 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
353 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
354 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
355 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
356 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
357 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
358 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
359 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
360 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
361 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
362 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
363 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
364 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
365 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
366 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
367 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
368 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
369 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
370 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
371 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
372 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
373 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
374 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
375 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
376 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
377 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
378 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
379 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
380 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
381 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
382 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
383 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
384 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
385 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
386 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
387 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
388 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
389 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
390 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
391 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
392 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
393 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
394 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
395 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
396 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
397 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
398 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
399 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
400 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
401 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
402 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
403 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
404 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
405 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
406 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
407 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
408 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
409 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
410 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
411 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
412 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
413 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
414 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
415 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
416 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
417 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
418 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
419 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
420 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
421 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
422 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
423 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
424 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
425 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
426 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
427 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
428 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
429 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
430 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
431 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
432 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
433 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
434 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
435 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
436 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
437 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
438 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
439 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
440 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
441 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
442 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
443 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
444 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
445 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
446 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
447 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
448 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
449 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
450 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
451 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
452 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
453 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
454 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
455 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
456 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
457 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
458 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
459 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
460 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
461 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
462 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
463 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
464 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
465 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
466 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
467 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
468 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
469 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
470 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
471 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
472 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
473 .popsection
474
475 ALIGN (4)
476L(aligned_16_112bytes):
477 movdqa %xmm0, -112(%edx)
478L(aligned_16_96bytes):
479 movdqa %xmm0, -96(%edx)
480L(aligned_16_80bytes):
481 movdqa %xmm0, -80(%edx)
482L(aligned_16_64bytes):
483 movdqa %xmm0, -64(%edx)
484L(aligned_16_48bytes):
485 movdqa %xmm0, -48(%edx)
486L(aligned_16_32bytes):
487 movdqa %xmm0, -32(%edx)
488L(aligned_16_16bytes):
489 movdqa %xmm0, -16(%edx)
490L(aligned_16_0bytes):
491 SETRTNVAL
492 RETURN
493
494 ALIGN (4)
495L(aligned_16_113bytes):
496 movdqa %xmm0, -113(%edx)
497L(aligned_16_97bytes):
498 movdqa %xmm0, -97(%edx)
499L(aligned_16_81bytes):
500 movdqa %xmm0, -81(%edx)
501L(aligned_16_65bytes):
502 movdqa %xmm0, -65(%edx)
503L(aligned_16_49bytes):
504 movdqa %xmm0, -49(%edx)
505L(aligned_16_33bytes):
506 movdqa %xmm0, -33(%edx)
507L(aligned_16_17bytes):
508 movdqa %xmm0, -17(%edx)
509L(aligned_16_1bytes):
510 movb %al, -1(%edx)
511 SETRTNVAL
512 RETURN
513
514 ALIGN (4)
515L(aligned_16_114bytes):
516 movdqa %xmm0, -114(%edx)
517L(aligned_16_98bytes):
518 movdqa %xmm0, -98(%edx)
519L(aligned_16_82bytes):
520 movdqa %xmm0, -82(%edx)
521L(aligned_16_66bytes):
522 movdqa %xmm0, -66(%edx)
523L(aligned_16_50bytes):
524 movdqa %xmm0, -50(%edx)
525L(aligned_16_34bytes):
526 movdqa %xmm0, -34(%edx)
527L(aligned_16_18bytes):
528 movdqa %xmm0, -18(%edx)
529L(aligned_16_2bytes):
530 movw %ax, -2(%edx)
531 SETRTNVAL
532 RETURN
533
534 ALIGN (4)
535L(aligned_16_115bytes):
536 movdqa %xmm0, -115(%edx)
537L(aligned_16_99bytes):
538 movdqa %xmm0, -99(%edx)
539L(aligned_16_83bytes):
540 movdqa %xmm0, -83(%edx)
541L(aligned_16_67bytes):
542 movdqa %xmm0, -67(%edx)
543L(aligned_16_51bytes):
544 movdqa %xmm0, -51(%edx)
545L(aligned_16_35bytes):
546 movdqa %xmm0, -35(%edx)
547L(aligned_16_19bytes):
548 movdqa %xmm0, -19(%edx)
549L(aligned_16_3bytes):
550 movw %ax, -3(%edx)
551 movb %al, -1(%edx)
552 SETRTNVAL
553 RETURN
554
555 ALIGN (4)
556L(aligned_16_116bytes):
557 movdqa %xmm0, -116(%edx)
558L(aligned_16_100bytes):
559 movdqa %xmm0, -100(%edx)
560L(aligned_16_84bytes):
561 movdqa %xmm0, -84(%edx)
562L(aligned_16_68bytes):
563 movdqa %xmm0, -68(%edx)
564L(aligned_16_52bytes):
565 movdqa %xmm0, -52(%edx)
566L(aligned_16_36bytes):
567 movdqa %xmm0, -36(%edx)
568L(aligned_16_20bytes):
569 movdqa %xmm0, -20(%edx)
570L(aligned_16_4bytes):
571 movl %eax, -4(%edx)
572 SETRTNVAL
573 RETURN
574
575 ALIGN (4)
576L(aligned_16_117bytes):
577 movdqa %xmm0, -117(%edx)
578L(aligned_16_101bytes):
579 movdqa %xmm0, -101(%edx)
580L(aligned_16_85bytes):
581 movdqa %xmm0, -85(%edx)
582L(aligned_16_69bytes):
583 movdqa %xmm0, -69(%edx)
584L(aligned_16_53bytes):
585 movdqa %xmm0, -53(%edx)
586L(aligned_16_37bytes):
587 movdqa %xmm0, -37(%edx)
588L(aligned_16_21bytes):
589 movdqa %xmm0, -21(%edx)
590L(aligned_16_5bytes):
591 movl %eax, -5(%edx)
592 movb %al, -1(%edx)
593 SETRTNVAL
594 RETURN
595
596 ALIGN (4)
597L(aligned_16_118bytes):
598 movdqa %xmm0, -118(%edx)
599L(aligned_16_102bytes):
600 movdqa %xmm0, -102(%edx)
601L(aligned_16_86bytes):
602 movdqa %xmm0, -86(%edx)
603L(aligned_16_70bytes):
604 movdqa %xmm0, -70(%edx)
605L(aligned_16_54bytes):
606 movdqa %xmm0, -54(%edx)
607L(aligned_16_38bytes):
608 movdqa %xmm0, -38(%edx)
609L(aligned_16_22bytes):
610 movdqa %xmm0, -22(%edx)
611L(aligned_16_6bytes):
612 movl %eax, -6(%edx)
613 movw %ax, -2(%edx)
614 SETRTNVAL
615 RETURN
616
617 ALIGN (4)
618L(aligned_16_119bytes):
619 movdqa %xmm0, -119(%edx)
620L(aligned_16_103bytes):
621 movdqa %xmm0, -103(%edx)
622L(aligned_16_87bytes):
623 movdqa %xmm0, -87(%edx)
624L(aligned_16_71bytes):
625 movdqa %xmm0, -71(%edx)
626L(aligned_16_55bytes):
627 movdqa %xmm0, -55(%edx)
628L(aligned_16_39bytes):
629 movdqa %xmm0, -39(%edx)
630L(aligned_16_23bytes):
631 movdqa %xmm0, -23(%edx)
632L(aligned_16_7bytes):
633 movl %eax, -7(%edx)
634 movw %ax, -3(%edx)
635 movb %al, -1(%edx)
636 SETRTNVAL
637 RETURN
638
639 ALIGN (4)
640L(aligned_16_120bytes):
641 movdqa %xmm0, -120(%edx)
642L(aligned_16_104bytes):
643 movdqa %xmm0, -104(%edx)
644L(aligned_16_88bytes):
645 movdqa %xmm0, -88(%edx)
646L(aligned_16_72bytes):
647 movdqa %xmm0, -72(%edx)
648L(aligned_16_56bytes):
649 movdqa %xmm0, -56(%edx)
650L(aligned_16_40bytes):
651 movdqa %xmm0, -40(%edx)
652L(aligned_16_24bytes):
653 movdqa %xmm0, -24(%edx)
654L(aligned_16_8bytes):
655 movq %xmm0, -8(%edx)
656 SETRTNVAL
657 RETURN
658
659 ALIGN (4)
660L(aligned_16_121bytes):
661 movdqa %xmm0, -121(%edx)
662L(aligned_16_105bytes):
663 movdqa %xmm0, -105(%edx)
664L(aligned_16_89bytes):
665 movdqa %xmm0, -89(%edx)
666L(aligned_16_73bytes):
667 movdqa %xmm0, -73(%edx)
668L(aligned_16_57bytes):
669 movdqa %xmm0, -57(%edx)
670L(aligned_16_41bytes):
671 movdqa %xmm0, -41(%edx)
672L(aligned_16_25bytes):
673 movdqa %xmm0, -25(%edx)
674L(aligned_16_9bytes):
675 movq %xmm0, -9(%edx)
676 movb %al, -1(%edx)
677 SETRTNVAL
678 RETURN
679
680 ALIGN (4)
681L(aligned_16_122bytes):
682 movdqa %xmm0, -122(%edx)
683L(aligned_16_106bytes):
684 movdqa %xmm0, -106(%edx)
685L(aligned_16_90bytes):
686 movdqa %xmm0, -90(%edx)
687L(aligned_16_74bytes):
688 movdqa %xmm0, -74(%edx)
689L(aligned_16_58bytes):
690 movdqa %xmm0, -58(%edx)
691L(aligned_16_42bytes):
692 movdqa %xmm0, -42(%edx)
693L(aligned_16_26bytes):
694 movdqa %xmm0, -26(%edx)
695L(aligned_16_10bytes):
696 movq %xmm0, -10(%edx)
697 movw %ax, -2(%edx)
698 SETRTNVAL
699 RETURN
700
701 ALIGN (4)
702L(aligned_16_123bytes):
703 movdqa %xmm0, -123(%edx)
704L(aligned_16_107bytes):
705 movdqa %xmm0, -107(%edx)
706L(aligned_16_91bytes):
707 movdqa %xmm0, -91(%edx)
708L(aligned_16_75bytes):
709 movdqa %xmm0, -75(%edx)
710L(aligned_16_59bytes):
711 movdqa %xmm0, -59(%edx)
712L(aligned_16_43bytes):
713 movdqa %xmm0, -43(%edx)
714L(aligned_16_27bytes):
715 movdqa %xmm0, -27(%edx)
716L(aligned_16_11bytes):
717 movq %xmm0, -11(%edx)
718 movw %ax, -3(%edx)
719 movb %al, -1(%edx)
720 SETRTNVAL
721 RETURN
722
723 ALIGN (4)
724L(aligned_16_124bytes):
725 movdqa %xmm0, -124(%edx)
726L(aligned_16_108bytes):
727 movdqa %xmm0, -108(%edx)
728L(aligned_16_92bytes):
729 movdqa %xmm0, -92(%edx)
730L(aligned_16_76bytes):
731 movdqa %xmm0, -76(%edx)
732L(aligned_16_60bytes):
733 movdqa %xmm0, -60(%edx)
734L(aligned_16_44bytes):
735 movdqa %xmm0, -44(%edx)
736L(aligned_16_28bytes):
737 movdqa %xmm0, -28(%edx)
738L(aligned_16_12bytes):
739 movq %xmm0, -12(%edx)
740 movl %eax, -4(%edx)
741 SETRTNVAL
742 RETURN
743
744 ALIGN (4)
745L(aligned_16_125bytes):
746 movdqa %xmm0, -125(%edx)
747L(aligned_16_109bytes):
748 movdqa %xmm0, -109(%edx)
749L(aligned_16_93bytes):
750 movdqa %xmm0, -93(%edx)
751L(aligned_16_77bytes):
752 movdqa %xmm0, -77(%edx)
753L(aligned_16_61bytes):
754 movdqa %xmm0, -61(%edx)
755L(aligned_16_45bytes):
756 movdqa %xmm0, -45(%edx)
757L(aligned_16_29bytes):
758 movdqa %xmm0, -29(%edx)
759L(aligned_16_13bytes):
760 movq %xmm0, -13(%edx)
761 movl %eax, -5(%edx)
762 movb %al, -1(%edx)
763 SETRTNVAL
764 RETURN
765
766 ALIGN (4)
767L(aligned_16_126bytes):
768 movdqa %xmm0, -126(%edx)
769L(aligned_16_110bytes):
770 movdqa %xmm0, -110(%edx)
771L(aligned_16_94bytes):
772 movdqa %xmm0, -94(%edx)
773L(aligned_16_78bytes):
774 movdqa %xmm0, -78(%edx)
775L(aligned_16_62bytes):
776 movdqa %xmm0, -62(%edx)
777L(aligned_16_46bytes):
778 movdqa %xmm0, -46(%edx)
779L(aligned_16_30bytes):
780 movdqa %xmm0, -30(%edx)
781L(aligned_16_14bytes):
782 movq %xmm0, -14(%edx)
783 movl %eax, -6(%edx)
784 movw %ax, -2(%edx)
785 SETRTNVAL
786 RETURN
787
788 ALIGN (4)
789L(aligned_16_127bytes):
790 movdqa %xmm0, -127(%edx)
791L(aligned_16_111bytes):
792 movdqa %xmm0, -111(%edx)
793L(aligned_16_95bytes):
794 movdqa %xmm0, -95(%edx)
795L(aligned_16_79bytes):
796 movdqa %xmm0, -79(%edx)
797L(aligned_16_63bytes):
798 movdqa %xmm0, -63(%edx)
799L(aligned_16_47bytes):
800 movdqa %xmm0, -47(%edx)
801L(aligned_16_31bytes):
802 movdqa %xmm0, -31(%edx)
803L(aligned_16_15bytes):
804 movq %xmm0, -15(%edx)
805 movl %eax, -7(%edx)
806 movw %ax, -3(%edx)
807 movb %al, -1(%edx)
808 SETRTNVAL
809 RETURN_END
810
Elliott Hughes81d6a182016-03-03 16:10:33 -0800811END(memset)