blob: 03a552d94515b4ccb712984d6509d508bf2eca9e [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
Varvara Rainchik5a922842014-04-24 15:41:20 +04002Copyright (c) 2014, Intel Corporation
Bruce Beare8ff1a272010-03-04 11:03:37 -08003All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Elliott Hughes81d6a182016-03-03 16:10:33 -080031#include <private/bionic_asm.h>
Varvara Rainchik5a922842014-04-24 15:41:20 +040032
Elliott Hughes81d6a182016-03-03 16:10:33 -080033#include "cache.h"
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040034
Bruce Beare8ff1a272010-03-04 11:03:37 -080035#ifndef L
36# define L(label) .L##label
37#endif
38
39#ifndef ALIGN
40# define ALIGN(n) .p2align n
41#endif
42
Bruce Beare8ff1a272010-03-04 11:03:37 -080043#define CFI_PUSH(REG) \
Elliott Hughese412f892016-08-01 14:00:45 -070044 .cfi_adjust_cfa_offset 4; \
45 .cfi_rel_offset REG, 0
Bruce Beare8ff1a272010-03-04 11:03:37 -080046
47#define CFI_POP(REG) \
Elliott Hughese412f892016-08-01 14:00:45 -070048 .cfi_adjust_cfa_offset -4; \
49 .cfi_restore REG
Bruce Beare8ff1a272010-03-04 11:03:37 -080050
Elliott Hughese412f892016-08-01 14:00:45 -070051#define PUSH(REG) pushl REG; CFI_PUSH(REG)
52#define POP(REG) popl REG; CFI_POP(REG)
Bruce Beare8ff1a272010-03-04 11:03:37 -080053
Elliott Hughese412f892016-08-01 14:00:45 -070054#define PARMS 8 /* Preserve EBX. */
55#define DST PARMS
56#define CHR (DST+4)
57#define LEN (CHR+4)
Elliott Hughes81d6a182016-03-03 16:10:33 -080058#define CHK_DST_LEN (LEN+4)
59#define SETRTNVAL movl DST(%esp), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -080060
Elliott Hughese412f892016-08-01 14:00:45 -070061# define ENTRANCE PUSH(%ebx);
62# define RETURN_END POP(%ebx); ret
63# define RETURN RETURN_END; CFI_PUSH(%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -080064# define JMPTBL(I, B) I - B
65
66/* Load an entry in a jump table into EBX and branch to it. TABLE is a
67 jump table with relative offsets. */
68# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
69 /* We first load PC into EBX. */ \
Varvara Rainchik5a922842014-04-24 15:41:20 +040070 call __x86.get_pc_thunk.bx; \
Bruce Beare8ff1a272010-03-04 11:03:37 -080071 /* Get the address of the jump table. */ \
72 add $(TABLE - .), %ebx; \
73 /* Get the entry and convert the relative offset to the \
74 absolute address. */ \
75 add (%ebx,%ecx,4), %ebx; \
76 add %ecx, %edx; \
Elliott Hughese412f892016-08-01 14:00:45 -070077 /* We loaded the jump table and adjusted EDX. Go. */ \
Bruce Beare8ff1a272010-03-04 11:03:37 -080078 jmp *%ebx
79
Varvara Rainchik5a922842014-04-24 15:41:20 +040080 .section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
81 .globl __x86.get_pc_thunk.bx
82 .hidden __x86.get_pc_thunk.bx
Elliott Hughese412f892016-08-01 14:00:45 -070083 ALIGN(4)
Varvara Rainchik5a922842014-04-24 15:41:20 +040084 .type __x86.get_pc_thunk.bx,@function
85__x86.get_pc_thunk.bx:
Bruce Beare8ff1a272010-03-04 11:03:37 -080086 movl (%esp), %ebx
87 ret
Bruce Beare8ff1a272010-03-04 11:03:37 -080088
Elliott Hughes81d6a182016-03-03 16:10:33 -080089ENTRY(__memset_chk)
Elliott Hughes204990c2016-03-24 22:34:47 -070090 ENTRANCE
Elliott Hughes81d6a182016-03-03 16:10:33 -080091
Elliott Hughes204990c2016-03-24 22:34:47 -070092 movl LEN(%esp), %ecx
93 cmpl CHK_DST_LEN(%esp), %ecx
Lev Rumyantsev3a528f12016-07-29 17:55:42 -070094 jna L(memset_length_loaded)
95
Elliott Hughese412f892016-08-01 14:00:45 -070096 POP(%ebx) // Undo ENTRANCE without returning.
Lev Rumyantsev3a528f12016-07-29 17:55:42 -070097 jmp __memset_chk_fail
Elliott Hughes81d6a182016-03-03 16:10:33 -080098END(__memset_chk)
99
Bruce Beare8ff1a272010-03-04 11:03:37 -0800100 .section .text.sse2,"ax",@progbits
Elliott Hughese412f892016-08-01 14:00:45 -0700101 ALIGN(4)
Elliott Hughes81d6a182016-03-03 16:10:33 -0800102ENTRY(memset)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800103 ENTRANCE
104
105 movl LEN(%esp), %ecx
Elliott Hughes204990c2016-03-24 22:34:47 -0700106L(memset_length_loaded):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400107 cmp $0, %ecx
108 ja L(1byteormore)
109 SETRTNVAL
110 RETURN
111
112L(1byteormore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800113 movzbl CHR(%esp), %eax
114 movb %al, %ah
115 /* Fill the whole EAX with pattern. */
116 movl %eax, %edx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400117 shl $16, %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -0800118 or %edx, %eax
Elliott Hughes81d6a182016-03-03 16:10:33 -0800119 movl DST(%esp), %edx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400120 cmp $1, %ecx
121 je L(1byte)
122 cmp $16, %ecx
123 jae L(16bytesormore)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800124
Varvara Rainchik5a922842014-04-24 15:41:20 +0400125 cmp $4, %ecx
126 jb L(4bytesless)
127 movl %eax, (%edx)
128 movl %eax, -4(%edx, %ecx)
129 cmp $8, %ecx
130 jb L(8bytesless)
131 movl %eax, 4(%edx)
132 movl %eax, -8(%edx, %ecx)
133L(8bytesless):
134 SETRTNVAL
135 RETURN
Bruce Beare8ff1a272010-03-04 11:03:37 -0800136
Varvara Rainchik5a922842014-04-24 15:41:20 +0400137L(4bytesless):
138 movw %ax, (%edx)
139 movw %ax, -2(%edx, %ecx)
140 SETRTNVAL
141 RETURN
Bruce Beare8ff1a272010-03-04 11:03:37 -0800142
Varvara Rainchik5a922842014-04-24 15:41:20 +0400143L(1byte):
144 movb %al, (%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800145 SETRTNVAL
146 RETURN
147
Elliott Hughese412f892016-08-01 14:00:45 -0700148 ALIGN(4)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400149L(16bytesormore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800150 movd %eax, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800151 pshufd $0, %xmm0, %xmm0
Varvara Rainchik5a922842014-04-24 15:41:20 +0400152
153 cmp $64, %ecx
154 ja L(64bytesmore)
155 movdqu %xmm0, (%edx)
156 movdqu %xmm0, -16(%edx, %ecx)
157 cmp $32, %ecx
158 jbe L(32bytesless)
159 movdqu %xmm0, 16(%edx)
160 movdqu %xmm0, -32(%edx, %ecx)
161L(32bytesless):
162 SETRTNVAL
163 RETURN
164
165L(64bytesmore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800166 testl $0xf, %edx
167 jz L(aligned_16)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800168L(not_aligned_16):
169 movdqu %xmm0, (%edx)
170 movl %edx, %eax
171 and $-16, %edx
172 add $16, %edx
173 sub %edx, %eax
174 add %eax, %ecx
175 movd %xmm0, %eax
176
Elliott Hughese412f892016-08-01 14:00:45 -0700177 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800178L(aligned_16):
179 cmp $128, %ecx
180 jae L(128bytesormore)
181
182L(aligned_16_less128bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700183 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800184
Elliott Hughese412f892016-08-01 14:00:45 -0700185 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800186L(128bytesormore):
Elliott Hughese412f892016-08-01 14:00:45 -0700187 PUSH(%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800188 mov $SHARED_CACHE_SIZE, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800189 cmp %ebx, %ecx
190 jae L(128bytesormore_nt_start)
191
Elliott Hughese412f892016-08-01 14:00:45 -0700192 POP(%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800193
Elliott Hughese412f892016-08-01 14:00:45 -0700194 PUSH(%ebx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400195 mov $DATA_CACHE_SIZE, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800196
Varvara Rainchik5a922842014-04-24 15:41:20 +0400197 cmp %ebx, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800198 jae L(128bytes_L2_normal)
199 subl $128, %ecx
200L(128bytesormore_normal):
201 sub $128, %ecx
202 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400203 movaps %xmm0, 0x10(%edx)
204 movaps %xmm0, 0x20(%edx)
205 movaps %xmm0, 0x30(%edx)
206 movaps %xmm0, 0x40(%edx)
207 movaps %xmm0, 0x50(%edx)
208 movaps %xmm0, 0x60(%edx)
209 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800210 lea 128(%edx), %edx
211 jb L(128bytesless_normal)
212
213
214 sub $128, %ecx
215 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400216 movaps %xmm0, 0x10(%edx)
217 movaps %xmm0, 0x20(%edx)
218 movaps %xmm0, 0x30(%edx)
219 movaps %xmm0, 0x40(%edx)
220 movaps %xmm0, 0x50(%edx)
221 movaps %xmm0, 0x60(%edx)
222 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800223 lea 128(%edx), %edx
224 jae L(128bytesormore_normal)
225
226L(128bytesless_normal):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400227 lea 128(%ecx), %ecx
Elliott Hughese412f892016-08-01 14:00:45 -0700228 POP(%ebx)
229 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800230
Elliott Hughese412f892016-08-01 14:00:45 -0700231 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800232L(128bytes_L2_normal):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400233 prefetchnta 0x380(%edx)
234 prefetchnta 0x3c0(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800235 sub $128, %ecx
236 movdqa %xmm0, (%edx)
237 movaps %xmm0, 0x10(%edx)
238 movaps %xmm0, 0x20(%edx)
239 movaps %xmm0, 0x30(%edx)
240 movaps %xmm0, 0x40(%edx)
241 movaps %xmm0, 0x50(%edx)
242 movaps %xmm0, 0x60(%edx)
243 movaps %xmm0, 0x70(%edx)
244 add $128, %edx
245 cmp $128, %ecx
246 jae L(128bytes_L2_normal)
247
248L(128bytesless_L2_normal):
Elliott Hughese412f892016-08-01 14:00:45 -0700249 POP(%ebx)
250 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800251
252L(128bytesormore_nt_start):
253 sub %ebx, %ecx
Elliott Hughese412f892016-08-01 14:00:45 -0700254 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800255L(128bytesormore_shared_cache_loop):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400256 prefetchnta 0x3c0(%edx)
257 prefetchnta 0x380(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800258 sub $0x80, %ebx
259 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400260 movaps %xmm0, 0x10(%edx)
261 movaps %xmm0, 0x20(%edx)
262 movaps %xmm0, 0x30(%edx)
263 movaps %xmm0, 0x40(%edx)
264 movaps %xmm0, 0x50(%edx)
265 movaps %xmm0, 0x60(%edx)
266 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800267 add $0x80, %edx
268 cmp $0x80, %ebx
269 jae L(128bytesormore_shared_cache_loop)
270 cmp $0x80, %ecx
271 jb L(shared_cache_loop_end)
Elliott Hughese412f892016-08-01 14:00:45 -0700272 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800273L(128bytesormore_nt):
274 sub $0x80, %ecx
275 movntdq %xmm0, (%edx)
276 movntdq %xmm0, 0x10(%edx)
277 movntdq %xmm0, 0x20(%edx)
278 movntdq %xmm0, 0x30(%edx)
279 movntdq %xmm0, 0x40(%edx)
280 movntdq %xmm0, 0x50(%edx)
281 movntdq %xmm0, 0x60(%edx)
282 movntdq %xmm0, 0x70(%edx)
283 add $0x80, %edx
284 cmp $0x80, %ecx
285 jae L(128bytesormore_nt)
286 sfence
287L(shared_cache_loop_end):
Elliott Hughese412f892016-08-01 14:00:45 -0700288 POP(%ebx)
289 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800290
291
292 .pushsection .rodata.sse2,"a",@progbits
Elliott Hughese412f892016-08-01 14:00:45 -0700293 ALIGN(2)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800294L(table_16_128bytes):
Elliott Hughese412f892016-08-01 14:00:45 -0700295 .int JMPTBL(L(aligned_16_0bytes), L(table_16_128bytes))
296 .int JMPTBL(L(aligned_16_1bytes), L(table_16_128bytes))
297 .int JMPTBL(L(aligned_16_2bytes), L(table_16_128bytes))
298 .int JMPTBL(L(aligned_16_3bytes), L(table_16_128bytes))
299 .int JMPTBL(L(aligned_16_4bytes), L(table_16_128bytes))
300 .int JMPTBL(L(aligned_16_5bytes), L(table_16_128bytes))
301 .int JMPTBL(L(aligned_16_6bytes), L(table_16_128bytes))
302 .int JMPTBL(L(aligned_16_7bytes), L(table_16_128bytes))
303 .int JMPTBL(L(aligned_16_8bytes), L(table_16_128bytes))
304 .int JMPTBL(L(aligned_16_9bytes), L(table_16_128bytes))
305 .int JMPTBL(L(aligned_16_10bytes), L(table_16_128bytes))
306 .int JMPTBL(L(aligned_16_11bytes), L(table_16_128bytes))
307 .int JMPTBL(L(aligned_16_12bytes), L(table_16_128bytes))
308 .int JMPTBL(L(aligned_16_13bytes), L(table_16_128bytes))
309 .int JMPTBL(L(aligned_16_14bytes), L(table_16_128bytes))
310 .int JMPTBL(L(aligned_16_15bytes), L(table_16_128bytes))
311 .int JMPTBL(L(aligned_16_16bytes), L(table_16_128bytes))
312 .int JMPTBL(L(aligned_16_17bytes), L(table_16_128bytes))
313 .int JMPTBL(L(aligned_16_18bytes), L(table_16_128bytes))
314 .int JMPTBL(L(aligned_16_19bytes), L(table_16_128bytes))
315 .int JMPTBL(L(aligned_16_20bytes), L(table_16_128bytes))
316 .int JMPTBL(L(aligned_16_21bytes), L(table_16_128bytes))
317 .int JMPTBL(L(aligned_16_22bytes), L(table_16_128bytes))
318 .int JMPTBL(L(aligned_16_23bytes), L(table_16_128bytes))
319 .int JMPTBL(L(aligned_16_24bytes), L(table_16_128bytes))
320 .int JMPTBL(L(aligned_16_25bytes), L(table_16_128bytes))
321 .int JMPTBL(L(aligned_16_26bytes), L(table_16_128bytes))
322 .int JMPTBL(L(aligned_16_27bytes), L(table_16_128bytes))
323 .int JMPTBL(L(aligned_16_28bytes), L(table_16_128bytes))
324 .int JMPTBL(L(aligned_16_29bytes), L(table_16_128bytes))
325 .int JMPTBL(L(aligned_16_30bytes), L(table_16_128bytes))
326 .int JMPTBL(L(aligned_16_31bytes), L(table_16_128bytes))
327 .int JMPTBL(L(aligned_16_32bytes), L(table_16_128bytes))
328 .int JMPTBL(L(aligned_16_33bytes), L(table_16_128bytes))
329 .int JMPTBL(L(aligned_16_34bytes), L(table_16_128bytes))
330 .int JMPTBL(L(aligned_16_35bytes), L(table_16_128bytes))
331 .int JMPTBL(L(aligned_16_36bytes), L(table_16_128bytes))
332 .int JMPTBL(L(aligned_16_37bytes), L(table_16_128bytes))
333 .int JMPTBL(L(aligned_16_38bytes), L(table_16_128bytes))
334 .int JMPTBL(L(aligned_16_39bytes), L(table_16_128bytes))
335 .int JMPTBL(L(aligned_16_40bytes), L(table_16_128bytes))
336 .int JMPTBL(L(aligned_16_41bytes), L(table_16_128bytes))
337 .int JMPTBL(L(aligned_16_42bytes), L(table_16_128bytes))
338 .int JMPTBL(L(aligned_16_43bytes), L(table_16_128bytes))
339 .int JMPTBL(L(aligned_16_44bytes), L(table_16_128bytes))
340 .int JMPTBL(L(aligned_16_45bytes), L(table_16_128bytes))
341 .int JMPTBL(L(aligned_16_46bytes), L(table_16_128bytes))
342 .int JMPTBL(L(aligned_16_47bytes), L(table_16_128bytes))
343 .int JMPTBL(L(aligned_16_48bytes), L(table_16_128bytes))
344 .int JMPTBL(L(aligned_16_49bytes), L(table_16_128bytes))
345 .int JMPTBL(L(aligned_16_50bytes), L(table_16_128bytes))
346 .int JMPTBL(L(aligned_16_51bytes), L(table_16_128bytes))
347 .int JMPTBL(L(aligned_16_52bytes), L(table_16_128bytes))
348 .int JMPTBL(L(aligned_16_53bytes), L(table_16_128bytes))
349 .int JMPTBL(L(aligned_16_54bytes), L(table_16_128bytes))
350 .int JMPTBL(L(aligned_16_55bytes), L(table_16_128bytes))
351 .int JMPTBL(L(aligned_16_56bytes), L(table_16_128bytes))
352 .int JMPTBL(L(aligned_16_57bytes), L(table_16_128bytes))
353 .int JMPTBL(L(aligned_16_58bytes), L(table_16_128bytes))
354 .int JMPTBL(L(aligned_16_59bytes), L(table_16_128bytes))
355 .int JMPTBL(L(aligned_16_60bytes), L(table_16_128bytes))
356 .int JMPTBL(L(aligned_16_61bytes), L(table_16_128bytes))
357 .int JMPTBL(L(aligned_16_62bytes), L(table_16_128bytes))
358 .int JMPTBL(L(aligned_16_63bytes), L(table_16_128bytes))
359 .int JMPTBL(L(aligned_16_64bytes), L(table_16_128bytes))
360 .int JMPTBL(L(aligned_16_65bytes), L(table_16_128bytes))
361 .int JMPTBL(L(aligned_16_66bytes), L(table_16_128bytes))
362 .int JMPTBL(L(aligned_16_67bytes), L(table_16_128bytes))
363 .int JMPTBL(L(aligned_16_68bytes), L(table_16_128bytes))
364 .int JMPTBL(L(aligned_16_69bytes), L(table_16_128bytes))
365 .int JMPTBL(L(aligned_16_70bytes), L(table_16_128bytes))
366 .int JMPTBL(L(aligned_16_71bytes), L(table_16_128bytes))
367 .int JMPTBL(L(aligned_16_72bytes), L(table_16_128bytes))
368 .int JMPTBL(L(aligned_16_73bytes), L(table_16_128bytes))
369 .int JMPTBL(L(aligned_16_74bytes), L(table_16_128bytes))
370 .int JMPTBL(L(aligned_16_75bytes), L(table_16_128bytes))
371 .int JMPTBL(L(aligned_16_76bytes), L(table_16_128bytes))
372 .int JMPTBL(L(aligned_16_77bytes), L(table_16_128bytes))
373 .int JMPTBL(L(aligned_16_78bytes), L(table_16_128bytes))
374 .int JMPTBL(L(aligned_16_79bytes), L(table_16_128bytes))
375 .int JMPTBL(L(aligned_16_80bytes), L(table_16_128bytes))
376 .int JMPTBL(L(aligned_16_81bytes), L(table_16_128bytes))
377 .int JMPTBL(L(aligned_16_82bytes), L(table_16_128bytes))
378 .int JMPTBL(L(aligned_16_83bytes), L(table_16_128bytes))
379 .int JMPTBL(L(aligned_16_84bytes), L(table_16_128bytes))
380 .int JMPTBL(L(aligned_16_85bytes), L(table_16_128bytes))
381 .int JMPTBL(L(aligned_16_86bytes), L(table_16_128bytes))
382 .int JMPTBL(L(aligned_16_87bytes), L(table_16_128bytes))
383 .int JMPTBL(L(aligned_16_88bytes), L(table_16_128bytes))
384 .int JMPTBL(L(aligned_16_89bytes), L(table_16_128bytes))
385 .int JMPTBL(L(aligned_16_90bytes), L(table_16_128bytes))
386 .int JMPTBL(L(aligned_16_91bytes), L(table_16_128bytes))
387 .int JMPTBL(L(aligned_16_92bytes), L(table_16_128bytes))
388 .int JMPTBL(L(aligned_16_93bytes), L(table_16_128bytes))
389 .int JMPTBL(L(aligned_16_94bytes), L(table_16_128bytes))
390 .int JMPTBL(L(aligned_16_95bytes), L(table_16_128bytes))
391 .int JMPTBL(L(aligned_16_96bytes), L(table_16_128bytes))
392 .int JMPTBL(L(aligned_16_97bytes), L(table_16_128bytes))
393 .int JMPTBL(L(aligned_16_98bytes), L(table_16_128bytes))
394 .int JMPTBL(L(aligned_16_99bytes), L(table_16_128bytes))
395 .int JMPTBL(L(aligned_16_100bytes), L(table_16_128bytes))
396 .int JMPTBL(L(aligned_16_101bytes), L(table_16_128bytes))
397 .int JMPTBL(L(aligned_16_102bytes), L(table_16_128bytes))
398 .int JMPTBL(L(aligned_16_103bytes), L(table_16_128bytes))
399 .int JMPTBL(L(aligned_16_104bytes), L(table_16_128bytes))
400 .int JMPTBL(L(aligned_16_105bytes), L(table_16_128bytes))
401 .int JMPTBL(L(aligned_16_106bytes), L(table_16_128bytes))
402 .int JMPTBL(L(aligned_16_107bytes), L(table_16_128bytes))
403 .int JMPTBL(L(aligned_16_108bytes), L(table_16_128bytes))
404 .int JMPTBL(L(aligned_16_109bytes), L(table_16_128bytes))
405 .int JMPTBL(L(aligned_16_110bytes), L(table_16_128bytes))
406 .int JMPTBL(L(aligned_16_111bytes), L(table_16_128bytes))
407 .int JMPTBL(L(aligned_16_112bytes), L(table_16_128bytes))
408 .int JMPTBL(L(aligned_16_113bytes), L(table_16_128bytes))
409 .int JMPTBL(L(aligned_16_114bytes), L(table_16_128bytes))
410 .int JMPTBL(L(aligned_16_115bytes), L(table_16_128bytes))
411 .int JMPTBL(L(aligned_16_116bytes), L(table_16_128bytes))
412 .int JMPTBL(L(aligned_16_117bytes), L(table_16_128bytes))
413 .int JMPTBL(L(aligned_16_118bytes), L(table_16_128bytes))
414 .int JMPTBL(L(aligned_16_119bytes), L(table_16_128bytes))
415 .int JMPTBL(L(aligned_16_120bytes), L(table_16_128bytes))
416 .int JMPTBL(L(aligned_16_121bytes), L(table_16_128bytes))
417 .int JMPTBL(L(aligned_16_122bytes), L(table_16_128bytes))
418 .int JMPTBL(L(aligned_16_123bytes), L(table_16_128bytes))
419 .int JMPTBL(L(aligned_16_124bytes), L(table_16_128bytes))
420 .int JMPTBL(L(aligned_16_125bytes), L(table_16_128bytes))
421 .int JMPTBL(L(aligned_16_126bytes), L(table_16_128bytes))
422 .int JMPTBL(L(aligned_16_127bytes), L(table_16_128bytes))
Bruce Beare8ff1a272010-03-04 11:03:37 -0800423 .popsection
424
Elliott Hughese412f892016-08-01 14:00:45 -0700425 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800426L(aligned_16_112bytes):
427 movdqa %xmm0, -112(%edx)
428L(aligned_16_96bytes):
429 movdqa %xmm0, -96(%edx)
430L(aligned_16_80bytes):
431 movdqa %xmm0, -80(%edx)
432L(aligned_16_64bytes):
433 movdqa %xmm0, -64(%edx)
434L(aligned_16_48bytes):
435 movdqa %xmm0, -48(%edx)
436L(aligned_16_32bytes):
437 movdqa %xmm0, -32(%edx)
438L(aligned_16_16bytes):
439 movdqa %xmm0, -16(%edx)
440L(aligned_16_0bytes):
441 SETRTNVAL
442 RETURN
443
Elliott Hughese412f892016-08-01 14:00:45 -0700444 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800445L(aligned_16_113bytes):
446 movdqa %xmm0, -113(%edx)
447L(aligned_16_97bytes):
448 movdqa %xmm0, -97(%edx)
449L(aligned_16_81bytes):
450 movdqa %xmm0, -81(%edx)
451L(aligned_16_65bytes):
452 movdqa %xmm0, -65(%edx)
453L(aligned_16_49bytes):
454 movdqa %xmm0, -49(%edx)
455L(aligned_16_33bytes):
456 movdqa %xmm0, -33(%edx)
457L(aligned_16_17bytes):
458 movdqa %xmm0, -17(%edx)
459L(aligned_16_1bytes):
460 movb %al, -1(%edx)
461 SETRTNVAL
462 RETURN
463
Elliott Hughese412f892016-08-01 14:00:45 -0700464 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800465L(aligned_16_114bytes):
466 movdqa %xmm0, -114(%edx)
467L(aligned_16_98bytes):
468 movdqa %xmm0, -98(%edx)
469L(aligned_16_82bytes):
470 movdqa %xmm0, -82(%edx)
471L(aligned_16_66bytes):
472 movdqa %xmm0, -66(%edx)
473L(aligned_16_50bytes):
474 movdqa %xmm0, -50(%edx)
475L(aligned_16_34bytes):
476 movdqa %xmm0, -34(%edx)
477L(aligned_16_18bytes):
478 movdqa %xmm0, -18(%edx)
479L(aligned_16_2bytes):
480 movw %ax, -2(%edx)
481 SETRTNVAL
482 RETURN
483
Elliott Hughese412f892016-08-01 14:00:45 -0700484 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800485L(aligned_16_115bytes):
486 movdqa %xmm0, -115(%edx)
487L(aligned_16_99bytes):
488 movdqa %xmm0, -99(%edx)
489L(aligned_16_83bytes):
490 movdqa %xmm0, -83(%edx)
491L(aligned_16_67bytes):
492 movdqa %xmm0, -67(%edx)
493L(aligned_16_51bytes):
494 movdqa %xmm0, -51(%edx)
495L(aligned_16_35bytes):
496 movdqa %xmm0, -35(%edx)
497L(aligned_16_19bytes):
498 movdqa %xmm0, -19(%edx)
499L(aligned_16_3bytes):
500 movw %ax, -3(%edx)
501 movb %al, -1(%edx)
502 SETRTNVAL
503 RETURN
504
Elliott Hughese412f892016-08-01 14:00:45 -0700505 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800506L(aligned_16_116bytes):
507 movdqa %xmm0, -116(%edx)
508L(aligned_16_100bytes):
509 movdqa %xmm0, -100(%edx)
510L(aligned_16_84bytes):
511 movdqa %xmm0, -84(%edx)
512L(aligned_16_68bytes):
513 movdqa %xmm0, -68(%edx)
514L(aligned_16_52bytes):
515 movdqa %xmm0, -52(%edx)
516L(aligned_16_36bytes):
517 movdqa %xmm0, -36(%edx)
518L(aligned_16_20bytes):
519 movdqa %xmm0, -20(%edx)
520L(aligned_16_4bytes):
521 movl %eax, -4(%edx)
522 SETRTNVAL
523 RETURN
524
Elliott Hughese412f892016-08-01 14:00:45 -0700525 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800526L(aligned_16_117bytes):
527 movdqa %xmm0, -117(%edx)
528L(aligned_16_101bytes):
529 movdqa %xmm0, -101(%edx)
530L(aligned_16_85bytes):
531 movdqa %xmm0, -85(%edx)
532L(aligned_16_69bytes):
533 movdqa %xmm0, -69(%edx)
534L(aligned_16_53bytes):
535 movdqa %xmm0, -53(%edx)
536L(aligned_16_37bytes):
537 movdqa %xmm0, -37(%edx)
538L(aligned_16_21bytes):
539 movdqa %xmm0, -21(%edx)
540L(aligned_16_5bytes):
541 movl %eax, -5(%edx)
542 movb %al, -1(%edx)
543 SETRTNVAL
544 RETURN
545
Elliott Hughese412f892016-08-01 14:00:45 -0700546 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800547L(aligned_16_118bytes):
548 movdqa %xmm0, -118(%edx)
549L(aligned_16_102bytes):
550 movdqa %xmm0, -102(%edx)
551L(aligned_16_86bytes):
552 movdqa %xmm0, -86(%edx)
553L(aligned_16_70bytes):
554 movdqa %xmm0, -70(%edx)
555L(aligned_16_54bytes):
556 movdqa %xmm0, -54(%edx)
557L(aligned_16_38bytes):
558 movdqa %xmm0, -38(%edx)
559L(aligned_16_22bytes):
560 movdqa %xmm0, -22(%edx)
561L(aligned_16_6bytes):
562 movl %eax, -6(%edx)
563 movw %ax, -2(%edx)
564 SETRTNVAL
565 RETURN
566
Elliott Hughese412f892016-08-01 14:00:45 -0700567 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800568L(aligned_16_119bytes):
569 movdqa %xmm0, -119(%edx)
570L(aligned_16_103bytes):
571 movdqa %xmm0, -103(%edx)
572L(aligned_16_87bytes):
573 movdqa %xmm0, -87(%edx)
574L(aligned_16_71bytes):
575 movdqa %xmm0, -71(%edx)
576L(aligned_16_55bytes):
577 movdqa %xmm0, -55(%edx)
578L(aligned_16_39bytes):
579 movdqa %xmm0, -39(%edx)
580L(aligned_16_23bytes):
581 movdqa %xmm0, -23(%edx)
582L(aligned_16_7bytes):
583 movl %eax, -7(%edx)
584 movw %ax, -3(%edx)
585 movb %al, -1(%edx)
586 SETRTNVAL
587 RETURN
588
Elliott Hughese412f892016-08-01 14:00:45 -0700589 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800590L(aligned_16_120bytes):
591 movdqa %xmm0, -120(%edx)
592L(aligned_16_104bytes):
593 movdqa %xmm0, -104(%edx)
594L(aligned_16_88bytes):
595 movdqa %xmm0, -88(%edx)
596L(aligned_16_72bytes):
597 movdqa %xmm0, -72(%edx)
598L(aligned_16_56bytes):
599 movdqa %xmm0, -56(%edx)
600L(aligned_16_40bytes):
601 movdqa %xmm0, -40(%edx)
602L(aligned_16_24bytes):
603 movdqa %xmm0, -24(%edx)
604L(aligned_16_8bytes):
605 movq %xmm0, -8(%edx)
606 SETRTNVAL
607 RETURN
608
Elliott Hughese412f892016-08-01 14:00:45 -0700609 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800610L(aligned_16_121bytes):
611 movdqa %xmm0, -121(%edx)
612L(aligned_16_105bytes):
613 movdqa %xmm0, -105(%edx)
614L(aligned_16_89bytes):
615 movdqa %xmm0, -89(%edx)
616L(aligned_16_73bytes):
617 movdqa %xmm0, -73(%edx)
618L(aligned_16_57bytes):
619 movdqa %xmm0, -57(%edx)
620L(aligned_16_41bytes):
621 movdqa %xmm0, -41(%edx)
622L(aligned_16_25bytes):
623 movdqa %xmm0, -25(%edx)
624L(aligned_16_9bytes):
625 movq %xmm0, -9(%edx)
626 movb %al, -1(%edx)
627 SETRTNVAL
628 RETURN
629
Elliott Hughese412f892016-08-01 14:00:45 -0700630 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800631L(aligned_16_122bytes):
632 movdqa %xmm0, -122(%edx)
633L(aligned_16_106bytes):
634 movdqa %xmm0, -106(%edx)
635L(aligned_16_90bytes):
636 movdqa %xmm0, -90(%edx)
637L(aligned_16_74bytes):
638 movdqa %xmm0, -74(%edx)
639L(aligned_16_58bytes):
640 movdqa %xmm0, -58(%edx)
641L(aligned_16_42bytes):
642 movdqa %xmm0, -42(%edx)
643L(aligned_16_26bytes):
644 movdqa %xmm0, -26(%edx)
645L(aligned_16_10bytes):
646 movq %xmm0, -10(%edx)
647 movw %ax, -2(%edx)
648 SETRTNVAL
649 RETURN
650
Elliott Hughese412f892016-08-01 14:00:45 -0700651 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800652L(aligned_16_123bytes):
653 movdqa %xmm0, -123(%edx)
654L(aligned_16_107bytes):
655 movdqa %xmm0, -107(%edx)
656L(aligned_16_91bytes):
657 movdqa %xmm0, -91(%edx)
658L(aligned_16_75bytes):
659 movdqa %xmm0, -75(%edx)
660L(aligned_16_59bytes):
661 movdqa %xmm0, -59(%edx)
662L(aligned_16_43bytes):
663 movdqa %xmm0, -43(%edx)
664L(aligned_16_27bytes):
665 movdqa %xmm0, -27(%edx)
666L(aligned_16_11bytes):
667 movq %xmm0, -11(%edx)
668 movw %ax, -3(%edx)
669 movb %al, -1(%edx)
670 SETRTNVAL
671 RETURN
672
Elliott Hughese412f892016-08-01 14:00:45 -0700673 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800674L(aligned_16_124bytes):
675 movdqa %xmm0, -124(%edx)
676L(aligned_16_108bytes):
677 movdqa %xmm0, -108(%edx)
678L(aligned_16_92bytes):
679 movdqa %xmm0, -92(%edx)
680L(aligned_16_76bytes):
681 movdqa %xmm0, -76(%edx)
682L(aligned_16_60bytes):
683 movdqa %xmm0, -60(%edx)
684L(aligned_16_44bytes):
685 movdqa %xmm0, -44(%edx)
686L(aligned_16_28bytes):
687 movdqa %xmm0, -28(%edx)
688L(aligned_16_12bytes):
689 movq %xmm0, -12(%edx)
690 movl %eax, -4(%edx)
691 SETRTNVAL
692 RETURN
693
Elliott Hughese412f892016-08-01 14:00:45 -0700694 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800695L(aligned_16_125bytes):
696 movdqa %xmm0, -125(%edx)
697L(aligned_16_109bytes):
698 movdqa %xmm0, -109(%edx)
699L(aligned_16_93bytes):
700 movdqa %xmm0, -93(%edx)
701L(aligned_16_77bytes):
702 movdqa %xmm0, -77(%edx)
703L(aligned_16_61bytes):
704 movdqa %xmm0, -61(%edx)
705L(aligned_16_45bytes):
706 movdqa %xmm0, -45(%edx)
707L(aligned_16_29bytes):
708 movdqa %xmm0, -29(%edx)
709L(aligned_16_13bytes):
710 movq %xmm0, -13(%edx)
711 movl %eax, -5(%edx)
712 movb %al, -1(%edx)
713 SETRTNVAL
714 RETURN
715
Elliott Hughese412f892016-08-01 14:00:45 -0700716 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800717L(aligned_16_126bytes):
718 movdqa %xmm0, -126(%edx)
719L(aligned_16_110bytes):
720 movdqa %xmm0, -110(%edx)
721L(aligned_16_94bytes):
722 movdqa %xmm0, -94(%edx)
723L(aligned_16_78bytes):
724 movdqa %xmm0, -78(%edx)
725L(aligned_16_62bytes):
726 movdqa %xmm0, -62(%edx)
727L(aligned_16_46bytes):
728 movdqa %xmm0, -46(%edx)
729L(aligned_16_30bytes):
730 movdqa %xmm0, -30(%edx)
731L(aligned_16_14bytes):
732 movq %xmm0, -14(%edx)
733 movl %eax, -6(%edx)
734 movw %ax, -2(%edx)
735 SETRTNVAL
736 RETURN
737
Elliott Hughese412f892016-08-01 14:00:45 -0700738 ALIGN(4)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800739L(aligned_16_127bytes):
740 movdqa %xmm0, -127(%edx)
741L(aligned_16_111bytes):
742 movdqa %xmm0, -111(%edx)
743L(aligned_16_95bytes):
744 movdqa %xmm0, -95(%edx)
745L(aligned_16_79bytes):
746 movdqa %xmm0, -79(%edx)
747L(aligned_16_63bytes):
748 movdqa %xmm0, -63(%edx)
749L(aligned_16_47bytes):
750 movdqa %xmm0, -47(%edx)
751L(aligned_16_31bytes):
752 movdqa %xmm0, -31(%edx)
753L(aligned_16_15bytes):
754 movq %xmm0, -15(%edx)
755 movl %eax, -7(%edx)
756 movw %ax, -3(%edx)
757 movb %al, -1(%edx)
758 SETRTNVAL
759 RETURN_END
760
Elliott Hughes81d6a182016-03-03 16:10:33 -0800761END(memset)