blob: c30bf746431eb82d125708ba29de7e0d8f25a82f [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
Varvara Rainchik5a922842014-04-24 15:41:20 +04002Copyright (c) 2014, Intel Corporation
Bruce Beare8ff1a272010-03-04 11:03:37 -08003All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040031#include "cache.h"
Varvara Rainchik5a922842014-04-24 15:41:20 +040032
33#ifndef MEMSET
34# define MEMSET memset
35#endif
Liubov Dmitrieva0a490662012-01-17 12:55:46 +040036
Bruce Beare8ff1a272010-03-04 11:03:37 -080037#ifndef L
38# define L(label) .L##label
39#endif
40
41#ifndef ALIGN
42# define ALIGN(n) .p2align n
43#endif
44
45#ifndef cfi_startproc
46# define cfi_startproc .cfi_startproc
47#endif
48
49#ifndef cfi_endproc
50# define cfi_endproc .cfi_endproc
51#endif
52
53#ifndef cfi_rel_offset
54# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
55#endif
56
57#ifndef cfi_restore
Bruce Beare124a5422010-10-11 12:24:41 -070058# define cfi_restore(reg) .cfi_restore reg
Bruce Beare8ff1a272010-03-04 11:03:37 -080059#endif
60
61#ifndef cfi_adjust_cfa_offset
62# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
63#endif
64
65#ifndef ENTRY
66# define ENTRY(name) \
Varvara Rainchik5a922842014-04-24 15:41:20 +040067 .type name, @function; \
Bruce Beare8ff1a272010-03-04 11:03:37 -080068 .globl name; \
69 .p2align 4; \
70name: \
71 cfi_startproc
72#endif
73
74#ifndef END
75# define END(name) \
76 cfi_endproc; \
77 .size name, .-name
78#endif
79
80#define CFI_PUSH(REG) \
81 cfi_adjust_cfa_offset (4); \
82 cfi_rel_offset (REG, 0)
83
84#define CFI_POP(REG) \
85 cfi_adjust_cfa_offset (-4); \
86 cfi_restore (REG)
87
88#define PUSH(REG) pushl REG; CFI_PUSH (REG)
89#define POP(REG) popl REG; CFI_POP (REG)
90
91#ifdef USE_AS_BZERO
92# define DEST PARMS
93# define LEN DEST+4
94# define SETRTNVAL
95#else
96# define DEST PARMS
97# define CHR DEST+4
98# define LEN CHR+4
99# define SETRTNVAL movl DEST(%esp), %eax
100#endif
101
Nick Kralevich0aa82892011-11-11 15:47:24 -0800102#if (defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800103# define ENTRANCE PUSH (%ebx);
104# define RETURN_END POP (%ebx); ret
105# define RETURN RETURN_END; CFI_PUSH (%ebx)
106# define PARMS 8 /* Preserve EBX. */
107# define JMPTBL(I, B) I - B
108
109/* Load an entry in a jump table into EBX and branch to it. TABLE is a
110 jump table with relative offsets. */
111# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
112 /* We first load PC into EBX. */ \
Varvara Rainchik5a922842014-04-24 15:41:20 +0400113 call __x86.get_pc_thunk.bx; \
Bruce Beare8ff1a272010-03-04 11:03:37 -0800114 /* Get the address of the jump table. */ \
115 add $(TABLE - .), %ebx; \
116 /* Get the entry and convert the relative offset to the \
117 absolute address. */ \
118 add (%ebx,%ecx,4), %ebx; \
119 add %ecx, %edx; \
120 /* We loaded the jump table and adjuested EDX. Go. */ \
121 jmp *%ebx
122
Varvara Rainchik5a922842014-04-24 15:41:20 +0400123 .section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
124 .globl __x86.get_pc_thunk.bx
125 .hidden __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800126 ALIGN (4)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400127 .type __x86.get_pc_thunk.bx,@function
128__x86.get_pc_thunk.bx:
Bruce Beare8ff1a272010-03-04 11:03:37 -0800129 movl (%esp), %ebx
130 ret
131#else
132# define ENTRANCE
133# define RETURN_END ret
134# define RETURN RETURN_END
135# define PARMS 4
136# define JMPTBL(I, B) I
137
138/* Branch to an entry in a jump table. TABLE is a jump table with
139 absolute offsets. */
140# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
141 add %ecx, %edx; \
142 jmp *TABLE(,%ecx,4)
143#endif
144
145 .section .text.sse2,"ax",@progbits
146 ALIGN (4)
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400147ENTRY (MEMSET)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800148 ENTRANCE
149
150 movl LEN(%esp), %ecx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400151 cmp $0, %ecx
152 ja L(1byteormore)
153 SETRTNVAL
154 RETURN
155
156L(1byteormore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800157#ifdef USE_AS_BZERO
158 xor %eax, %eax
159#else
160 movzbl CHR(%esp), %eax
161 movb %al, %ah
162 /* Fill the whole EAX with pattern. */
163 movl %eax, %edx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400164 shl $16, %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -0800165 or %edx, %eax
166#endif
167 movl DEST(%esp), %edx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400168 cmp $1, %ecx
169 je L(1byte)
170 cmp $16, %ecx
171 jae L(16bytesormore)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800172
Varvara Rainchik5a922842014-04-24 15:41:20 +0400173 cmp $4, %ecx
174 jb L(4bytesless)
175 movl %eax, (%edx)
176 movl %eax, -4(%edx, %ecx)
177 cmp $8, %ecx
178 jb L(8bytesless)
179 movl %eax, 4(%edx)
180 movl %eax, -8(%edx, %ecx)
181L(8bytesless):
182 SETRTNVAL
183 RETURN
Bruce Beare8ff1a272010-03-04 11:03:37 -0800184
Varvara Rainchik5a922842014-04-24 15:41:20 +0400185L(4bytesless):
186 movw %ax, (%edx)
187 movw %ax, -2(%edx, %ecx)
188 SETRTNVAL
189 RETURN
Bruce Beare8ff1a272010-03-04 11:03:37 -0800190
Varvara Rainchik5a922842014-04-24 15:41:20 +0400191L(1byte):
192 movb %al, (%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800193 SETRTNVAL
194 RETURN
195
196 ALIGN (4)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400197L(16bytesormore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800198#ifdef USE_AS_BZERO
199 pxor %xmm0, %xmm0
200#else
201 movd %eax, %xmm0
Bruce Beare8ff1a272010-03-04 11:03:37 -0800202 pshufd $0, %xmm0, %xmm0
203#endif
Varvara Rainchik5a922842014-04-24 15:41:20 +0400204
205 cmp $64, %ecx
206 ja L(64bytesmore)
207 movdqu %xmm0, (%edx)
208 movdqu %xmm0, -16(%edx, %ecx)
209 cmp $32, %ecx
210 jbe L(32bytesless)
211 movdqu %xmm0, 16(%edx)
212 movdqu %xmm0, -32(%edx, %ecx)
213L(32bytesless):
214 SETRTNVAL
215 RETURN
216
217L(64bytesmore):
Bruce Beare8ff1a272010-03-04 11:03:37 -0800218 testl $0xf, %edx
219 jz L(aligned_16)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800220L(not_aligned_16):
221 movdqu %xmm0, (%edx)
222 movl %edx, %eax
223 and $-16, %edx
224 add $16, %edx
225 sub %edx, %eax
226 add %eax, %ecx
227 movd %xmm0, %eax
228
229 ALIGN (4)
230L(aligned_16):
231 cmp $128, %ecx
232 jae L(128bytesormore)
233
234L(aligned_16_less128bytes):
235 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
236
237 ALIGN (4)
238L(128bytesormore):
239#ifdef SHARED_CACHE_SIZE
240 PUSH (%ebx)
241 mov $SHARED_CACHE_SIZE, %ebx
242#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800243# if (defined SHARED || defined __PIC__)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400244 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800245 add $_GLOBAL_OFFSET_TABLE_, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400246 mov $__x86_shared_cache_size@GOTOFF(%ebx), %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800247# else
248 PUSH (%ebx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400249 mov $__x86_shared_cache_size, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800250# endif
251#endif
252 cmp %ebx, %ecx
253 jae L(128bytesormore_nt_start)
254
Varvara Rainchik5a922842014-04-24 15:41:20 +0400255 POP (%ebx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800256
257#ifdef DATA_CACHE_SIZE
Varvara Rainchik5a922842014-04-24 15:41:20 +0400258 PUSH (%ebx)
259 mov $DATA_CACHE_SIZE, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800260#else
Nick Kralevich0aa82892011-11-11 15:47:24 -0800261# if (defined SHARED || defined __PIC__)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400262 call __x86.get_pc_thunk.bx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800263 add $_GLOBAL_OFFSET_TABLE_, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400264 mov $__x86_data_cache_size@GOTOFF(%ebx), %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800265# else
Varvara Rainchik5a922842014-04-24 15:41:20 +0400266 PUSH (%ebx)
267 mov $__x86_data_cache_size, %ebx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800268# endif
269#endif
270
Varvara Rainchik5a922842014-04-24 15:41:20 +0400271 cmp %ebx, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800272 jae L(128bytes_L2_normal)
273 subl $128, %ecx
274L(128bytesormore_normal):
275 sub $128, %ecx
276 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400277 movaps %xmm0, 0x10(%edx)
278 movaps %xmm0, 0x20(%edx)
279 movaps %xmm0, 0x30(%edx)
280 movaps %xmm0, 0x40(%edx)
281 movaps %xmm0, 0x50(%edx)
282 movaps %xmm0, 0x60(%edx)
283 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800284 lea 128(%edx), %edx
285 jb L(128bytesless_normal)
286
287
288 sub $128, %ecx
289 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400290 movaps %xmm0, 0x10(%edx)
291 movaps %xmm0, 0x20(%edx)
292 movaps %xmm0, 0x30(%edx)
293 movaps %xmm0, 0x40(%edx)
294 movaps %xmm0, 0x50(%edx)
295 movaps %xmm0, 0x60(%edx)
296 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800297 lea 128(%edx), %edx
298 jae L(128bytesormore_normal)
299
300L(128bytesless_normal):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400301 lea 128(%ecx), %ecx
302#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
303 POP (%ebx)
304#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800305 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
306
307 ALIGN (4)
308L(128bytes_L2_normal):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400309 prefetchnta 0x380(%edx)
310 prefetchnta 0x3c0(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800311 sub $128, %ecx
312 movdqa %xmm0, (%edx)
313 movaps %xmm0, 0x10(%edx)
314 movaps %xmm0, 0x20(%edx)
315 movaps %xmm0, 0x30(%edx)
316 movaps %xmm0, 0x40(%edx)
317 movaps %xmm0, 0x50(%edx)
318 movaps %xmm0, 0x60(%edx)
319 movaps %xmm0, 0x70(%edx)
320 add $128, %edx
321 cmp $128, %ecx
322 jae L(128bytes_L2_normal)
323
324L(128bytesless_L2_normal):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400325#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
326 POP (%ebx)
327#endif
Bruce Beare8ff1a272010-03-04 11:03:37 -0800328 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
329
330L(128bytesormore_nt_start):
331 sub %ebx, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800332 ALIGN (4)
333L(128bytesormore_shared_cache_loop):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400334 prefetchnta 0x3c0(%edx)
335 prefetchnta 0x380(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800336 sub $0x80, %ebx
337 movdqa %xmm0, (%edx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400338 movaps %xmm0, 0x10(%edx)
339 movaps %xmm0, 0x20(%edx)
340 movaps %xmm0, 0x30(%edx)
341 movaps %xmm0, 0x40(%edx)
342 movaps %xmm0, 0x50(%edx)
343 movaps %xmm0, 0x60(%edx)
344 movaps %xmm0, 0x70(%edx)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800345 add $0x80, %edx
346 cmp $0x80, %ebx
347 jae L(128bytesormore_shared_cache_loop)
348 cmp $0x80, %ecx
349 jb L(shared_cache_loop_end)
350 ALIGN (4)
351L(128bytesormore_nt):
352 sub $0x80, %ecx
353 movntdq %xmm0, (%edx)
354 movntdq %xmm0, 0x10(%edx)
355 movntdq %xmm0, 0x20(%edx)
356 movntdq %xmm0, 0x30(%edx)
357 movntdq %xmm0, 0x40(%edx)
358 movntdq %xmm0, 0x50(%edx)
359 movntdq %xmm0, 0x60(%edx)
360 movntdq %xmm0, 0x70(%edx)
361 add $0x80, %edx
362 cmp $0x80, %ecx
363 jae L(128bytesormore_nt)
364 sfence
365L(shared_cache_loop_end):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400366#if defined SHARED_CACHE_SIZE || !(defined SHARED || defined __PIC__)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800367 POP (%ebx)
368#endif
369 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
370
371
372 .pushsection .rodata.sse2,"a",@progbits
373 ALIGN (2)
374L(table_16_128bytes):
375 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
376 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
377 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
378 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
379 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
380 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
381 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
382 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
383 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
384 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
385 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
386 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
387 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
388 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
389 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
390 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
391 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
392 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
393 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
394 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
395 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
396 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
397 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
398 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
399 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
400 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
401 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
402 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
403 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
404 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
405 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
406 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
407 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
408 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
409 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
410 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
411 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
412 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
413 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
414 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
415 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
416 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
417 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
418 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
419 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
420 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
421 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
422 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
423 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
424 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
425 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
426 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
427 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
428 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
429 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
430 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
431 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
432 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
433 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
434 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
435 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
436 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
437 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
438 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
439 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
440 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
441 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
442 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
443 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
444 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
445 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
446 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
447 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
448 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
449 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
450 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
451 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
452 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
453 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
454 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
455 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
456 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
457 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
458 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
459 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
460 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
461 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
462 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
463 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
464 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
465 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
466 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
467 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
468 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
469 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
470 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
471 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
472 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
473 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
474 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
475 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
476 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
477 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
478 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
479 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
480 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
481 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
482 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
483 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
484 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
485 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
486 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
487 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
488 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
489 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
490 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
491 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
492 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
493 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
494 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
495 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
496 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
497 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
498 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
499 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
500 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
501 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
502 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
503 .popsection
504
505 ALIGN (4)
506L(aligned_16_112bytes):
507 movdqa %xmm0, -112(%edx)
508L(aligned_16_96bytes):
509 movdqa %xmm0, -96(%edx)
510L(aligned_16_80bytes):
511 movdqa %xmm0, -80(%edx)
512L(aligned_16_64bytes):
513 movdqa %xmm0, -64(%edx)
514L(aligned_16_48bytes):
515 movdqa %xmm0, -48(%edx)
516L(aligned_16_32bytes):
517 movdqa %xmm0, -32(%edx)
518L(aligned_16_16bytes):
519 movdqa %xmm0, -16(%edx)
520L(aligned_16_0bytes):
521 SETRTNVAL
522 RETURN
523
524 ALIGN (4)
525L(aligned_16_113bytes):
526 movdqa %xmm0, -113(%edx)
527L(aligned_16_97bytes):
528 movdqa %xmm0, -97(%edx)
529L(aligned_16_81bytes):
530 movdqa %xmm0, -81(%edx)
531L(aligned_16_65bytes):
532 movdqa %xmm0, -65(%edx)
533L(aligned_16_49bytes):
534 movdqa %xmm0, -49(%edx)
535L(aligned_16_33bytes):
536 movdqa %xmm0, -33(%edx)
537L(aligned_16_17bytes):
538 movdqa %xmm0, -17(%edx)
539L(aligned_16_1bytes):
540 movb %al, -1(%edx)
541 SETRTNVAL
542 RETURN
543
544 ALIGN (4)
545L(aligned_16_114bytes):
546 movdqa %xmm0, -114(%edx)
547L(aligned_16_98bytes):
548 movdqa %xmm0, -98(%edx)
549L(aligned_16_82bytes):
550 movdqa %xmm0, -82(%edx)
551L(aligned_16_66bytes):
552 movdqa %xmm0, -66(%edx)
553L(aligned_16_50bytes):
554 movdqa %xmm0, -50(%edx)
555L(aligned_16_34bytes):
556 movdqa %xmm0, -34(%edx)
557L(aligned_16_18bytes):
558 movdqa %xmm0, -18(%edx)
559L(aligned_16_2bytes):
560 movw %ax, -2(%edx)
561 SETRTNVAL
562 RETURN
563
564 ALIGN (4)
565L(aligned_16_115bytes):
566 movdqa %xmm0, -115(%edx)
567L(aligned_16_99bytes):
568 movdqa %xmm0, -99(%edx)
569L(aligned_16_83bytes):
570 movdqa %xmm0, -83(%edx)
571L(aligned_16_67bytes):
572 movdqa %xmm0, -67(%edx)
573L(aligned_16_51bytes):
574 movdqa %xmm0, -51(%edx)
575L(aligned_16_35bytes):
576 movdqa %xmm0, -35(%edx)
577L(aligned_16_19bytes):
578 movdqa %xmm0, -19(%edx)
579L(aligned_16_3bytes):
580 movw %ax, -3(%edx)
581 movb %al, -1(%edx)
582 SETRTNVAL
583 RETURN
584
585 ALIGN (4)
586L(aligned_16_116bytes):
587 movdqa %xmm0, -116(%edx)
588L(aligned_16_100bytes):
589 movdqa %xmm0, -100(%edx)
590L(aligned_16_84bytes):
591 movdqa %xmm0, -84(%edx)
592L(aligned_16_68bytes):
593 movdqa %xmm0, -68(%edx)
594L(aligned_16_52bytes):
595 movdqa %xmm0, -52(%edx)
596L(aligned_16_36bytes):
597 movdqa %xmm0, -36(%edx)
598L(aligned_16_20bytes):
599 movdqa %xmm0, -20(%edx)
600L(aligned_16_4bytes):
601 movl %eax, -4(%edx)
602 SETRTNVAL
603 RETURN
604
605 ALIGN (4)
606L(aligned_16_117bytes):
607 movdqa %xmm0, -117(%edx)
608L(aligned_16_101bytes):
609 movdqa %xmm0, -101(%edx)
610L(aligned_16_85bytes):
611 movdqa %xmm0, -85(%edx)
612L(aligned_16_69bytes):
613 movdqa %xmm0, -69(%edx)
614L(aligned_16_53bytes):
615 movdqa %xmm0, -53(%edx)
616L(aligned_16_37bytes):
617 movdqa %xmm0, -37(%edx)
618L(aligned_16_21bytes):
619 movdqa %xmm0, -21(%edx)
620L(aligned_16_5bytes):
621 movl %eax, -5(%edx)
622 movb %al, -1(%edx)
623 SETRTNVAL
624 RETURN
625
626 ALIGN (4)
627L(aligned_16_118bytes):
628 movdqa %xmm0, -118(%edx)
629L(aligned_16_102bytes):
630 movdqa %xmm0, -102(%edx)
631L(aligned_16_86bytes):
632 movdqa %xmm0, -86(%edx)
633L(aligned_16_70bytes):
634 movdqa %xmm0, -70(%edx)
635L(aligned_16_54bytes):
636 movdqa %xmm0, -54(%edx)
637L(aligned_16_38bytes):
638 movdqa %xmm0, -38(%edx)
639L(aligned_16_22bytes):
640 movdqa %xmm0, -22(%edx)
641L(aligned_16_6bytes):
642 movl %eax, -6(%edx)
643 movw %ax, -2(%edx)
644 SETRTNVAL
645 RETURN
646
647 ALIGN (4)
648L(aligned_16_119bytes):
649 movdqa %xmm0, -119(%edx)
650L(aligned_16_103bytes):
651 movdqa %xmm0, -103(%edx)
652L(aligned_16_87bytes):
653 movdqa %xmm0, -87(%edx)
654L(aligned_16_71bytes):
655 movdqa %xmm0, -71(%edx)
656L(aligned_16_55bytes):
657 movdqa %xmm0, -55(%edx)
658L(aligned_16_39bytes):
659 movdqa %xmm0, -39(%edx)
660L(aligned_16_23bytes):
661 movdqa %xmm0, -23(%edx)
662L(aligned_16_7bytes):
663 movl %eax, -7(%edx)
664 movw %ax, -3(%edx)
665 movb %al, -1(%edx)
666 SETRTNVAL
667 RETURN
668
669 ALIGN (4)
670L(aligned_16_120bytes):
671 movdqa %xmm0, -120(%edx)
672L(aligned_16_104bytes):
673 movdqa %xmm0, -104(%edx)
674L(aligned_16_88bytes):
675 movdqa %xmm0, -88(%edx)
676L(aligned_16_72bytes):
677 movdqa %xmm0, -72(%edx)
678L(aligned_16_56bytes):
679 movdqa %xmm0, -56(%edx)
680L(aligned_16_40bytes):
681 movdqa %xmm0, -40(%edx)
682L(aligned_16_24bytes):
683 movdqa %xmm0, -24(%edx)
684L(aligned_16_8bytes):
685 movq %xmm0, -8(%edx)
686 SETRTNVAL
687 RETURN
688
689 ALIGN (4)
690L(aligned_16_121bytes):
691 movdqa %xmm0, -121(%edx)
692L(aligned_16_105bytes):
693 movdqa %xmm0, -105(%edx)
694L(aligned_16_89bytes):
695 movdqa %xmm0, -89(%edx)
696L(aligned_16_73bytes):
697 movdqa %xmm0, -73(%edx)
698L(aligned_16_57bytes):
699 movdqa %xmm0, -57(%edx)
700L(aligned_16_41bytes):
701 movdqa %xmm0, -41(%edx)
702L(aligned_16_25bytes):
703 movdqa %xmm0, -25(%edx)
704L(aligned_16_9bytes):
705 movq %xmm0, -9(%edx)
706 movb %al, -1(%edx)
707 SETRTNVAL
708 RETURN
709
710 ALIGN (4)
711L(aligned_16_122bytes):
712 movdqa %xmm0, -122(%edx)
713L(aligned_16_106bytes):
714 movdqa %xmm0, -106(%edx)
715L(aligned_16_90bytes):
716 movdqa %xmm0, -90(%edx)
717L(aligned_16_74bytes):
718 movdqa %xmm0, -74(%edx)
719L(aligned_16_58bytes):
720 movdqa %xmm0, -58(%edx)
721L(aligned_16_42bytes):
722 movdqa %xmm0, -42(%edx)
723L(aligned_16_26bytes):
724 movdqa %xmm0, -26(%edx)
725L(aligned_16_10bytes):
726 movq %xmm0, -10(%edx)
727 movw %ax, -2(%edx)
728 SETRTNVAL
729 RETURN
730
731 ALIGN (4)
732L(aligned_16_123bytes):
733 movdqa %xmm0, -123(%edx)
734L(aligned_16_107bytes):
735 movdqa %xmm0, -107(%edx)
736L(aligned_16_91bytes):
737 movdqa %xmm0, -91(%edx)
738L(aligned_16_75bytes):
739 movdqa %xmm0, -75(%edx)
740L(aligned_16_59bytes):
741 movdqa %xmm0, -59(%edx)
742L(aligned_16_43bytes):
743 movdqa %xmm0, -43(%edx)
744L(aligned_16_27bytes):
745 movdqa %xmm0, -27(%edx)
746L(aligned_16_11bytes):
747 movq %xmm0, -11(%edx)
748 movw %ax, -3(%edx)
749 movb %al, -1(%edx)
750 SETRTNVAL
751 RETURN
752
753 ALIGN (4)
754L(aligned_16_124bytes):
755 movdqa %xmm0, -124(%edx)
756L(aligned_16_108bytes):
757 movdqa %xmm0, -108(%edx)
758L(aligned_16_92bytes):
759 movdqa %xmm0, -92(%edx)
760L(aligned_16_76bytes):
761 movdqa %xmm0, -76(%edx)
762L(aligned_16_60bytes):
763 movdqa %xmm0, -60(%edx)
764L(aligned_16_44bytes):
765 movdqa %xmm0, -44(%edx)
766L(aligned_16_28bytes):
767 movdqa %xmm0, -28(%edx)
768L(aligned_16_12bytes):
769 movq %xmm0, -12(%edx)
770 movl %eax, -4(%edx)
771 SETRTNVAL
772 RETURN
773
774 ALIGN (4)
775L(aligned_16_125bytes):
776 movdqa %xmm0, -125(%edx)
777L(aligned_16_109bytes):
778 movdqa %xmm0, -109(%edx)
779L(aligned_16_93bytes):
780 movdqa %xmm0, -93(%edx)
781L(aligned_16_77bytes):
782 movdqa %xmm0, -77(%edx)
783L(aligned_16_61bytes):
784 movdqa %xmm0, -61(%edx)
785L(aligned_16_45bytes):
786 movdqa %xmm0, -45(%edx)
787L(aligned_16_29bytes):
788 movdqa %xmm0, -29(%edx)
789L(aligned_16_13bytes):
790 movq %xmm0, -13(%edx)
791 movl %eax, -5(%edx)
792 movb %al, -1(%edx)
793 SETRTNVAL
794 RETURN
795
796 ALIGN (4)
797L(aligned_16_126bytes):
798 movdqa %xmm0, -126(%edx)
799L(aligned_16_110bytes):
800 movdqa %xmm0, -110(%edx)
801L(aligned_16_94bytes):
802 movdqa %xmm0, -94(%edx)
803L(aligned_16_78bytes):
804 movdqa %xmm0, -78(%edx)
805L(aligned_16_62bytes):
806 movdqa %xmm0, -62(%edx)
807L(aligned_16_46bytes):
808 movdqa %xmm0, -46(%edx)
809L(aligned_16_30bytes):
810 movdqa %xmm0, -30(%edx)
811L(aligned_16_14bytes):
812 movq %xmm0, -14(%edx)
813 movl %eax, -6(%edx)
814 movw %ax, -2(%edx)
815 SETRTNVAL
816 RETURN
817
818 ALIGN (4)
819L(aligned_16_127bytes):
820 movdqa %xmm0, -127(%edx)
821L(aligned_16_111bytes):
822 movdqa %xmm0, -111(%edx)
823L(aligned_16_95bytes):
824 movdqa %xmm0, -95(%edx)
825L(aligned_16_79bytes):
826 movdqa %xmm0, -79(%edx)
827L(aligned_16_63bytes):
828 movdqa %xmm0, -63(%edx)
829L(aligned_16_47bytes):
830 movdqa %xmm0, -47(%edx)
831L(aligned_16_31bytes):
832 movdqa %xmm0, -31(%edx)
833L(aligned_16_15bytes):
834 movq %xmm0, -15(%edx)
835 movl %eax, -7(%edx)
836 movw %ax, -3(%edx)
837 movb %al, -1(%edx)
838 SETRTNVAL
839 RETURN_END
840
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400841END (MEMSET)