blob: 489f64e795b2a24e780ad09e21439bcbea0a0456 [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
Varvara Rainchik5a922842014-04-24 15:41:20 +04002Copyright (c) 2014, Intel Corporation
Bruce Beare8ff1a272010-03-04 11:03:37 -08003All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040031#include "cache.h"
Varvara Rainchik5a922842014-04-24 15:41:20 +040032
33#ifndef MEMSET
34# define MEMSET memset
35#endif
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040036
Bruce Beare8ff1a272010-03-04 11:03:37 -080037#ifndef L
38# define L(label) .L##label
39#endif
40
41#ifndef ALIGN
42# define ALIGN(n) .p2align n
43#endif
44
45#ifndef cfi_startproc
46# define cfi_startproc .cfi_startproc
47#endif
48
49#ifndef cfi_endproc
50# define cfi_endproc .cfi_endproc
51#endif
52
53#ifndef cfi_rel_offset
54# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
55#endif
56
57#ifndef cfi_restore
Bruce Beare124a5422010-10-11 12:24:41 -070058# define cfi_restore(reg) .cfi_restore reg
Bruce Beare8ff1a272010-03-04 11:03:37 -080059#endif
60
61#ifndef cfi_adjust_cfa_offset
62# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
63#endif
64
65#ifndef ENTRY
66# define ENTRY(name) \
Varvara Rainchik5a922842014-04-24 15:41:20 +040067 .type name, @function; \
Bruce Beare8ff1a272010-03-04 11:03:37 -080068 .globl name; \
69 .p2align 4; \
70name: \
71 cfi_startproc
72#endif
73
74#ifndef END
75# define END(name) \
76 cfi_endproc; \
77 .size name, .-name
78#endif
79
80#define CFI_PUSH(REG) \
81 cfi_adjust_cfa_offset (4); \
82 cfi_rel_offset (REG, 0)
83
84#define CFI_POP(REG) \
85 cfi_adjust_cfa_offset (-4); \
86 cfi_restore (REG)
87
88#define PUSH(REG) pushl REG; CFI_PUSH (REG)
89#define POP(REG) popl REG; CFI_POP (REG)
90
Elliott Hughes01d5b942016-03-02 17:18:18 -080091#define DEST PARMS
92#define CHR DEST+4
93#define LEN CHR+4
94#define SETRTNVAL movl DEST(%esp), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -080095
Nick Kralevich0aa82892011-11-11 15:47:24 -080096#if (defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -080097# define ENTRANCE PUSH (%ebx);
98# define RETURN_END POP (%ebx); ret
99# define RETURN RETURN_END; CFI_PUSH (%ebx)
100# define PARMS 8 /* Preserve EBX. */
101# define JMPTBL(I, B) I - B
102
103/* Load an entry in a jump table into EBX and branch to it. TABLE is a
104 jump table with relative offsets. */
105# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
106 /* We first load PC into EBX. */ \
Varvara Rainchik5a922842014-04-24 15:41:20 +0400107 call __x86.get_pc_thunk.bx; \
Bruce Beare8ff1a272010-03-04 11:03:37 -0800108 /* Get the address of the jump table. */ \
109 add $(TABLE - .), %ebx; \
110 /* Get the entry and convert the relative offset to the \
111 absolute address. */ \
112 add (%ebx,%ecx,4), %ebx; \
113 add %ecx, %edx; \
114 /* We loaded the jump table and adjuested EDX. Go. */ \
115 jmp *%ebx
116
Varvara Rainchik5a922842014-04-24 15:41:20 +0400117 .section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
118 .globl __x86.get_pc_thunk.bx
119 .hidden __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800120 ALIGN (4)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400121 .type __x86.get_pc_thunk.bx,@function
122__x86.get_pc_thunk.bx:
Bruce Beare8ff1a272010-03-04 11:03:37 -0800123 movl (%esp), %ebx
124 ret
125#else
126# define ENTRANCE
127# define RETURN_END ret
128# define RETURN RETURN_END
129# define PARMS 4
130# define JMPTBL(I, B) I
131
132/* Branch to an entry in a jump table. TABLE is a jump table with
133 absolute offsets. */
134# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
135 add %ecx, %edx; \
136 jmp *TABLE(,%ecx,4)
137#endif
138
139 .section .text.sse2,"ax",@progbits
140 ALIGN (4)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400141ENTRY (MEMSET)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800142 ENTRANCE
143
144 movl LEN(%esp), %ecx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400145 cmp $0, %ecx
146 ja L(1byteormore)
147 SETRTNVAL
148 RETURN
149
150L(1byteormore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800151 movzbl CHR(%esp), %eax
152 movb %al, %ah
153 /* Fill the whole EAX with pattern. */
154 movl %eax, %edx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400155 shl $16, %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -0800156 or %edx, %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -0800157 movl DEST(%esp), %edx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400158 cmp $1, %ecx
159 je L(1byte)
160 cmp $16, %ecx
161 jae L(16bytesormore)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800162
Varvara Rainchik5a922842014-04-24 15:41:20 +0400163 cmp $4, %ecx
164 jb L(4bytesless)
165 movl %eax, (%edx)
166 movl %eax, -4(%edx, %ecx)
167 cmp $8, %ecx
168 jb L(8bytesless)
169 movl %eax, 4(%edx)
170 movl %eax, -8(%edx, %ecx)
171L(8bytesless):
172 SETRTNVAL
173 RETURN
Bruce Beare8ff1a272010-03-04 11:03:37 -0800174
Varvara Rainchik5a922842014-04-24 15:41:20 +0400175L(4bytesless):
176 movw %ax, (%edx)
177 movw %ax, -2(%edx, %ecx)
178 SETRTNVAL
179 RETURN
Bruce Beare8ff1a272010-03-04 11:03:37 -0800180
Varvara Rainchik5a922842014-04-24 15:41:20 +0400181L(1byte):
182 movb %al, (%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800183 SETRTNVAL
184 RETURN
185
186 ALIGN (4)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400187L(16bytesormore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800188 movd %eax, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800189 pshufd $0, %xmm0, %xmm0
Varvara Rainchik5a922842014-04-24 15:41:20 +0400190
191 cmp $64, %ecx
192 ja L(64bytesmore)
193 movdqu %xmm0, (%edx)
194 movdqu %xmm0, -16(%edx, %ecx)
195 cmp $32, %ecx
196 jbe L(32bytesless)
197 movdqu %xmm0, 16(%edx)
198 movdqu %xmm0, -32(%edx, %ecx)
199L(32bytesless):
200 SETRTNVAL
201 RETURN
202
203L(64bytesmore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800204 testl $0xf, %edx
205 jz L(aligned_16)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800206L(not_aligned_16):
207 movdqu %xmm0, (%edx)
208 movl %edx, %eax
209 and $-16, %edx
210 add $16, %edx
211 sub %edx, %eax
212 add %eax, %ecx
213 movd %xmm0, %eax
214
215 ALIGN (4)
216L(aligned_16):
217 cmp $128, %ecx
218 jae L(128bytesormore)
219
220L(aligned_16_less128bytes):
221 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
222
223 ALIGN (4)
224L(128bytesormore):
225#ifdef SHARED_CACHE_SIZE
226 PUSH (%ebx)
227 mov $SHARED_CACHE_SIZE, %ebx
228#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800229# if (defined SHARED || defined __PIC__)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400230 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800231 add $_GLOBAL_OFFSET_TABLE_, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400232 mov $__x86_shared_cache_size@GOTOFF(%ebx), %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800233# else
234 PUSH (%ebx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400235 mov $__x86_shared_cache_size, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800236# endif
237#endif
238 cmp %ebx, %ecx
239 jae L(128bytesormore_nt_start)
240
Varvara Rainchik5a922842014-04-24 15:41:20 +0400241 POP (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800242
243#ifdef DATA_CACHE_SIZE
Varvara Rainchik5a922842014-04-24 15:41:20 +0400244 PUSH (%ebx)
245 mov $DATA_CACHE_SIZE, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800246#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800247# if (defined SHARED || defined __PIC__)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400248 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800249 add $_GLOBAL_OFFSET_TABLE_, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400250 mov $__x86_data_cache_size@GOTOFF(%ebx), %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800251# else
Varvara Rainchik5a922842014-04-24 15:41:20 +0400252 PUSH (%ebx)
253 mov $__x86_data_cache_size, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800254# endif
255#endif
256
Varvara Rainchik5a922842014-04-24 15:41:20 +0400257 cmp %ebx, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800258 jae L(128bytes_L2_normal)
259 subl $128, %ecx
260L(128bytesormore_normal):
261 sub $128, %ecx
262 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400263 movaps %xmm0, 0x10(%edx)
264 movaps %xmm0, 0x20(%edx)
265 movaps %xmm0, 0x30(%edx)
266 movaps %xmm0, 0x40(%edx)
267 movaps %xmm0, 0x50(%edx)
268 movaps %xmm0, 0x60(%edx)
269 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800270 lea 128(%edx), %edx
271 jb L(128bytesless_normal)
272
273
274 sub $128, %ecx
275 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400276 movaps %xmm0, 0x10(%edx)
277 movaps %xmm0, 0x20(%edx)
278 movaps %xmm0, 0x30(%edx)
279 movaps %xmm0, 0x40(%edx)
280 movaps %xmm0, 0x50(%edx)
281 movaps %xmm0, 0x60(%edx)
282 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800283 lea 128(%edx), %edx
284 jae L(128bytesormore_normal)
285
286L(128bytesless_normal):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400287 lea 128(%ecx), %ecx
288#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
289 POP (%ebx)
290#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800291 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
292
293 ALIGN (4)
294L(128bytes_L2_normal):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400295 prefetchnta 0x380(%edx)
296 prefetchnta 0x3c0(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800297 sub $128, %ecx
298 movdqa %xmm0, (%edx)
299 movaps %xmm0, 0x10(%edx)
300 movaps %xmm0, 0x20(%edx)
301 movaps %xmm0, 0x30(%edx)
302 movaps %xmm0, 0x40(%edx)
303 movaps %xmm0, 0x50(%edx)
304 movaps %xmm0, 0x60(%edx)
305 movaps %xmm0, 0x70(%edx)
306 add $128, %edx
307 cmp $128, %ecx
308 jae L(128bytes_L2_normal)
309
310L(128bytesless_L2_normal):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400311#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
312 POP (%ebx)
313#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800314 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
315
316L(128bytesormore_nt_start):
317 sub %ebx, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800318 ALIGN (4)
319L(128bytesormore_shared_cache_loop):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400320 prefetchnta 0x3c0(%edx)
321 prefetchnta 0x380(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800322 sub $0x80, %ebx
323 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400324 movaps %xmm0, 0x10(%edx)
325 movaps %xmm0, 0x20(%edx)
326 movaps %xmm0, 0x30(%edx)
327 movaps %xmm0, 0x40(%edx)
328 movaps %xmm0, 0x50(%edx)
329 movaps %xmm0, 0x60(%edx)
330 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800331 add $0x80, %edx
332 cmp $0x80, %ebx
333 jae L(128bytesormore_shared_cache_loop)
334 cmp $0x80, %ecx
335 jb L(shared_cache_loop_end)
336 ALIGN (4)
337L(128bytesormore_nt):
338 sub $0x80, %ecx
339 movntdq %xmm0, (%edx)
340 movntdq %xmm0, 0x10(%edx)
341 movntdq %xmm0, 0x20(%edx)
342 movntdq %xmm0, 0x30(%edx)
343 movntdq %xmm0, 0x40(%edx)
344 movntdq %xmm0, 0x50(%edx)
345 movntdq %xmm0, 0x60(%edx)
346 movntdq %xmm0, 0x70(%edx)
347 add $0x80, %edx
348 cmp $0x80, %ecx
349 jae L(128bytesormore_nt)
350 sfence
351L(shared_cache_loop_end):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400352#if defined SHARED_CACHE_SIZE || !(defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800353 POP (%ebx)
354#endif
355 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
356
357
358 .pushsection .rodata.sse2,"a",@progbits
359 ALIGN (2)
360L(table_16_128bytes):
361 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
362 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
363 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
364 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
365 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
366 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
367 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
368 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
369 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
370 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
371 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
372 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
373 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
374 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
375 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
376 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
377 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
378 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
379 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
380 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
381 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
382 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
383 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
384 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
385 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
386 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
387 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
388 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
389 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
390 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
391 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
392 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
393 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
394 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
395 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
396 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
397 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
398 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
399 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
400 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
401 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
402 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
403 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
404 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
405 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
406 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
407 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
408 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
409 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
410 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
411 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
412 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
413 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
414 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
415 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
416 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
417 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
418 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
419 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
420 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
421 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
422 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
423 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
424 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
425 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
426 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
427 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
428 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
429 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
430 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
431 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
432 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
433 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
434 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
435 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
436 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
437 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
438 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
439 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
440 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
441 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
442 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
443 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
444 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
445 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
446 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
447 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
448 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
449 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
450 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
451 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
452 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
453 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
454 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
455 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
456 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
457 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
458 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
459 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
460 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
461 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
462 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
463 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
464 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
465 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
466 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
467 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
468 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
469 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
470 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
471 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
472 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
473 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
474 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
475 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
476 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
477 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
478 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
479 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
480 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
481 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
482 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
483 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
484 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
485 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
486 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
487 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
488 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
489 .popsection
490
491 ALIGN (4)
492L(aligned_16_112bytes):
493 movdqa %xmm0, -112(%edx)
494L(aligned_16_96bytes):
495 movdqa %xmm0, -96(%edx)
496L(aligned_16_80bytes):
497 movdqa %xmm0, -80(%edx)
498L(aligned_16_64bytes):
499 movdqa %xmm0, -64(%edx)
500L(aligned_16_48bytes):
501 movdqa %xmm0, -48(%edx)
502L(aligned_16_32bytes):
503 movdqa %xmm0, -32(%edx)
504L(aligned_16_16bytes):
505 movdqa %xmm0, -16(%edx)
506L(aligned_16_0bytes):
507 SETRTNVAL
508 RETURN
509
510 ALIGN (4)
511L(aligned_16_113bytes):
512 movdqa %xmm0, -113(%edx)
513L(aligned_16_97bytes):
514 movdqa %xmm0, -97(%edx)
515L(aligned_16_81bytes):
516 movdqa %xmm0, -81(%edx)
517L(aligned_16_65bytes):
518 movdqa %xmm0, -65(%edx)
519L(aligned_16_49bytes):
520 movdqa %xmm0, -49(%edx)
521L(aligned_16_33bytes):
522 movdqa %xmm0, -33(%edx)
523L(aligned_16_17bytes):
524 movdqa %xmm0, -17(%edx)
525L(aligned_16_1bytes):
526 movb %al, -1(%edx)
527 SETRTNVAL
528 RETURN
529
530 ALIGN (4)
531L(aligned_16_114bytes):
532 movdqa %xmm0, -114(%edx)
533L(aligned_16_98bytes):
534 movdqa %xmm0, -98(%edx)
535L(aligned_16_82bytes):
536 movdqa %xmm0, -82(%edx)
537L(aligned_16_66bytes):
538 movdqa %xmm0, -66(%edx)
539L(aligned_16_50bytes):
540 movdqa %xmm0, -50(%edx)
541L(aligned_16_34bytes):
542 movdqa %xmm0, -34(%edx)
543L(aligned_16_18bytes):
544 movdqa %xmm0, -18(%edx)
545L(aligned_16_2bytes):
546 movw %ax, -2(%edx)
547 SETRTNVAL
548 RETURN
549
550 ALIGN (4)
551L(aligned_16_115bytes):
552 movdqa %xmm0, -115(%edx)
553L(aligned_16_99bytes):
554 movdqa %xmm0, -99(%edx)
555L(aligned_16_83bytes):
556 movdqa %xmm0, -83(%edx)
557L(aligned_16_67bytes):
558 movdqa %xmm0, -67(%edx)
559L(aligned_16_51bytes):
560 movdqa %xmm0, -51(%edx)
561L(aligned_16_35bytes):
562 movdqa %xmm0, -35(%edx)
563L(aligned_16_19bytes):
564 movdqa %xmm0, -19(%edx)
565L(aligned_16_3bytes):
566 movw %ax, -3(%edx)
567 movb %al, -1(%edx)
568 SETRTNVAL
569 RETURN
570
571 ALIGN (4)
572L(aligned_16_116bytes):
573 movdqa %xmm0, -116(%edx)
574L(aligned_16_100bytes):
575 movdqa %xmm0, -100(%edx)
576L(aligned_16_84bytes):
577 movdqa %xmm0, -84(%edx)
578L(aligned_16_68bytes):
579 movdqa %xmm0, -68(%edx)
580L(aligned_16_52bytes):
581 movdqa %xmm0, -52(%edx)
582L(aligned_16_36bytes):
583 movdqa %xmm0, -36(%edx)
584L(aligned_16_20bytes):
585 movdqa %xmm0, -20(%edx)
586L(aligned_16_4bytes):
587 movl %eax, -4(%edx)
588 SETRTNVAL
589 RETURN
590
591 ALIGN (4)
592L(aligned_16_117bytes):
593 movdqa %xmm0, -117(%edx)
594L(aligned_16_101bytes):
595 movdqa %xmm0, -101(%edx)
596L(aligned_16_85bytes):
597 movdqa %xmm0, -85(%edx)
598L(aligned_16_69bytes):
599 movdqa %xmm0, -69(%edx)
600L(aligned_16_53bytes):
601 movdqa %xmm0, -53(%edx)
602L(aligned_16_37bytes):
603 movdqa %xmm0, -37(%edx)
604L(aligned_16_21bytes):
605 movdqa %xmm0, -21(%edx)
606L(aligned_16_5bytes):
607 movl %eax, -5(%edx)
608 movb %al, -1(%edx)
609 SETRTNVAL
610 RETURN
611
612 ALIGN (4)
613L(aligned_16_118bytes):
614 movdqa %xmm0, -118(%edx)
615L(aligned_16_102bytes):
616 movdqa %xmm0, -102(%edx)
617L(aligned_16_86bytes):
618 movdqa %xmm0, -86(%edx)
619L(aligned_16_70bytes):
620 movdqa %xmm0, -70(%edx)
621L(aligned_16_54bytes):
622 movdqa %xmm0, -54(%edx)
623L(aligned_16_38bytes):
624 movdqa %xmm0, -38(%edx)
625L(aligned_16_22bytes):
626 movdqa %xmm0, -22(%edx)
627L(aligned_16_6bytes):
628 movl %eax, -6(%edx)
629 movw %ax, -2(%edx)
630 SETRTNVAL
631 RETURN
632
633 ALIGN (4)
634L(aligned_16_119bytes):
635 movdqa %xmm0, -119(%edx)
636L(aligned_16_103bytes):
637 movdqa %xmm0, -103(%edx)
638L(aligned_16_87bytes):
639 movdqa %xmm0, -87(%edx)
640L(aligned_16_71bytes):
641 movdqa %xmm0, -71(%edx)
642L(aligned_16_55bytes):
643 movdqa %xmm0, -55(%edx)
644L(aligned_16_39bytes):
645 movdqa %xmm0, -39(%edx)
646L(aligned_16_23bytes):
647 movdqa %xmm0, -23(%edx)
648L(aligned_16_7bytes):
649 movl %eax, -7(%edx)
650 movw %ax, -3(%edx)
651 movb %al, -1(%edx)
652 SETRTNVAL
653 RETURN
654
655 ALIGN (4)
656L(aligned_16_120bytes):
657 movdqa %xmm0, -120(%edx)
658L(aligned_16_104bytes):
659 movdqa %xmm0, -104(%edx)
660L(aligned_16_88bytes):
661 movdqa %xmm0, -88(%edx)
662L(aligned_16_72bytes):
663 movdqa %xmm0, -72(%edx)
664L(aligned_16_56bytes):
665 movdqa %xmm0, -56(%edx)
666L(aligned_16_40bytes):
667 movdqa %xmm0, -40(%edx)
668L(aligned_16_24bytes):
669 movdqa %xmm0, -24(%edx)
670L(aligned_16_8bytes):
671 movq %xmm0, -8(%edx)
672 SETRTNVAL
673 RETURN
674
675 ALIGN (4)
676L(aligned_16_121bytes):
677 movdqa %xmm0, -121(%edx)
678L(aligned_16_105bytes):
679 movdqa %xmm0, -105(%edx)
680L(aligned_16_89bytes):
681 movdqa %xmm0, -89(%edx)
682L(aligned_16_73bytes):
683 movdqa %xmm0, -73(%edx)
684L(aligned_16_57bytes):
685 movdqa %xmm0, -57(%edx)
686L(aligned_16_41bytes):
687 movdqa %xmm0, -41(%edx)
688L(aligned_16_25bytes):
689 movdqa %xmm0, -25(%edx)
690L(aligned_16_9bytes):
691 movq %xmm0, -9(%edx)
692 movb %al, -1(%edx)
693 SETRTNVAL
694 RETURN
695
696 ALIGN (4)
697L(aligned_16_122bytes):
698 movdqa %xmm0, -122(%edx)
699L(aligned_16_106bytes):
700 movdqa %xmm0, -106(%edx)
701L(aligned_16_90bytes):
702 movdqa %xmm0, -90(%edx)
703L(aligned_16_74bytes):
704 movdqa %xmm0, -74(%edx)
705L(aligned_16_58bytes):
706 movdqa %xmm0, -58(%edx)
707L(aligned_16_42bytes):
708 movdqa %xmm0, -42(%edx)
709L(aligned_16_26bytes):
710 movdqa %xmm0, -26(%edx)
711L(aligned_16_10bytes):
712 movq %xmm0, -10(%edx)
713 movw %ax, -2(%edx)
714 SETRTNVAL
715 RETURN
716
717 ALIGN (4)
718L(aligned_16_123bytes):
719 movdqa %xmm0, -123(%edx)
720L(aligned_16_107bytes):
721 movdqa %xmm0, -107(%edx)
722L(aligned_16_91bytes):
723 movdqa %xmm0, -91(%edx)
724L(aligned_16_75bytes):
725 movdqa %xmm0, -75(%edx)
726L(aligned_16_59bytes):
727 movdqa %xmm0, -59(%edx)
728L(aligned_16_43bytes):
729 movdqa %xmm0, -43(%edx)
730L(aligned_16_27bytes):
731 movdqa %xmm0, -27(%edx)
732L(aligned_16_11bytes):
733 movq %xmm0, -11(%edx)
734 movw %ax, -3(%edx)
735 movb %al, -1(%edx)
736 SETRTNVAL
737 RETURN
738
739 ALIGN (4)
740L(aligned_16_124bytes):
741 movdqa %xmm0, -124(%edx)
742L(aligned_16_108bytes):
743 movdqa %xmm0, -108(%edx)
744L(aligned_16_92bytes):
745 movdqa %xmm0, -92(%edx)
746L(aligned_16_76bytes):
747 movdqa %xmm0, -76(%edx)
748L(aligned_16_60bytes):
749 movdqa %xmm0, -60(%edx)
750L(aligned_16_44bytes):
751 movdqa %xmm0, -44(%edx)
752L(aligned_16_28bytes):
753 movdqa %xmm0, -28(%edx)
754L(aligned_16_12bytes):
755 movq %xmm0, -12(%edx)
756 movl %eax, -4(%edx)
757 SETRTNVAL
758 RETURN
759
760 ALIGN (4)
761L(aligned_16_125bytes):
762 movdqa %xmm0, -125(%edx)
763L(aligned_16_109bytes):
764 movdqa %xmm0, -109(%edx)
765L(aligned_16_93bytes):
766 movdqa %xmm0, -93(%edx)
767L(aligned_16_77bytes):
768 movdqa %xmm0, -77(%edx)
769L(aligned_16_61bytes):
770 movdqa %xmm0, -61(%edx)
771L(aligned_16_45bytes):
772 movdqa %xmm0, -45(%edx)
773L(aligned_16_29bytes):
774 movdqa %xmm0, -29(%edx)
775L(aligned_16_13bytes):
776 movq %xmm0, -13(%edx)
777 movl %eax, -5(%edx)
778 movb %al, -1(%edx)
779 SETRTNVAL
780 RETURN
781
782 ALIGN (4)
783L(aligned_16_126bytes):
784 movdqa %xmm0, -126(%edx)
785L(aligned_16_110bytes):
786 movdqa %xmm0, -110(%edx)
787L(aligned_16_94bytes):
788 movdqa %xmm0, -94(%edx)
789L(aligned_16_78bytes):
790 movdqa %xmm0, -78(%edx)
791L(aligned_16_62bytes):
792 movdqa %xmm0, -62(%edx)
793L(aligned_16_46bytes):
794 movdqa %xmm0, -46(%edx)
795L(aligned_16_30bytes):
796 movdqa %xmm0, -30(%edx)
797L(aligned_16_14bytes):
798 movq %xmm0, -14(%edx)
799 movl %eax, -6(%edx)
800 movw %ax, -2(%edx)
801 SETRTNVAL
802 RETURN
803
804 ALIGN (4)
805L(aligned_16_127bytes):
806 movdqa %xmm0, -127(%edx)
807L(aligned_16_111bytes):
808 movdqa %xmm0, -111(%edx)
809L(aligned_16_95bytes):
810 movdqa %xmm0, -95(%edx)
811L(aligned_16_79bytes):
812 movdqa %xmm0, -79(%edx)
813L(aligned_16_63bytes):
814 movdqa %xmm0, -63(%edx)
815L(aligned_16_47bytes):
816 movdqa %xmm0, -47(%edx)
817L(aligned_16_31bytes):
818 movdqa %xmm0, -31(%edx)
819L(aligned_16_15bytes):
820 movq %xmm0, -15(%edx)
821 movl %eax, -7(%edx)
822 movw %ax, -3(%edx)
823 movb %al, -1(%edx)
824 SETRTNVAL
825 RETURN_END
826
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400827END (MEMSET)