blob: bf9f85dd97abd2fdad52a16e148b6ac55e9e8c86 [file] [log] [blame]
Varvara Rainchik5a922842014-04-24 15:41:20 +04001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#include "cache.h"
32
33#ifndef MEMMOVE
34# define MEMMOVE memmove
35#endif
36
37#ifndef L
38# define L(label) .L##label
39#endif
40
41#ifndef cfi_startproc
42# define cfi_startproc .cfi_startproc
43#endif
44
45#ifndef cfi_endproc
46# define cfi_endproc .cfi_endproc
47#endif
48
49#ifndef cfi_rel_offset
50# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
51#endif
52
53#ifndef cfi_restore
54# define cfi_restore(reg) .cfi_restore reg
55#endif
56
57#ifndef cfi_adjust_cfa_offset
58# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
59#endif
60
61#ifndef ENTRY
62# define ENTRY(name) \
63 .type name, @function; \
64 .globl name; \
65 .p2align 4; \
66name: \
67 cfi_startproc
68#endif
69
70#ifndef END
71# define END(name) \
72 cfi_endproc; \
73 .size name, .-name
74#endif
75
Elliott Hughesbed110a2016-03-03 10:41:42 -080076#define DEST PARMS
77#define SRC DEST+4
78#define LEN SRC+4
Varvara Rainchik5a922842014-04-24 15:41:20 +040079
80#define CFI_PUSH(REG) \
81 cfi_adjust_cfa_offset (4); \
82 cfi_rel_offset (REG, 0)
83
84#define CFI_POP(REG) \
85 cfi_adjust_cfa_offset (-4); \
86 cfi_restore (REG)
87
88#define PUSH(REG) pushl REG; CFI_PUSH (REG)
89#define POP(REG) popl REG; CFI_POP (REG)
90
91#define PARMS 8 /* Preserve EBX. */
92#define ENTRANCE PUSH (%ebx);
93#define RETURN_END POP (%ebx); ret
94#define RETURN RETURN_END; CFI_PUSH (%ebx)
95
96 .section .text.sse2,"ax",@progbits
97ENTRY (MEMMOVE)
98 ENTRANCE
99 movl LEN(%esp), %ecx
100 movl SRC(%esp), %eax
101 movl DEST(%esp), %edx
102
103/* Check whether we should copy backward or forward. */
104 cmp %eax, %edx
105 je L(mm_return)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400106 jg L(mm_len_0_or_more_backward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400107
108/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128]
109 separately. */
110 cmp $16, %ecx
111 jbe L(mm_len_0_16_bytes_forward)
112
Varvara Rainchikfce86142014-05-27 12:41:55 +0400113 cmpl $32, %ecx
114 ja L(mm_len_32_or_more_forward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400115
116/* Copy [0..32] and return. */
117 movdqu (%eax), %xmm0
118 movdqu -16(%eax, %ecx), %xmm1
119 movdqu %xmm0, (%edx)
120 movdqu %xmm1, -16(%edx, %ecx)
121 jmp L(mm_return)
122
123L(mm_len_32_or_more_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400124 cmpl $64, %ecx
125 ja L(mm_len_64_or_more_forward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400126
127/* Copy [0..64] and return. */
128 movdqu (%eax), %xmm0
129 movdqu 16(%eax), %xmm1
130 movdqu -16(%eax, %ecx), %xmm2
131 movdqu -32(%eax, %ecx), %xmm3
132 movdqu %xmm0, (%edx)
133 movdqu %xmm1, 16(%edx)
134 movdqu %xmm2, -16(%edx, %ecx)
135 movdqu %xmm3, -32(%edx, %ecx)
136 jmp L(mm_return)
137
138L(mm_len_64_or_more_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400139 cmpl $128, %ecx
140 ja L(mm_len_128_or_more_forward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400141
142/* Copy [0..128] and return. */
143 movdqu (%eax), %xmm0
144 movdqu 16(%eax), %xmm1
145 movdqu 32(%eax), %xmm2
146 movdqu 48(%eax), %xmm3
147 movdqu -64(%eax, %ecx), %xmm4
148 movdqu -48(%eax, %ecx), %xmm5
149 movdqu -32(%eax, %ecx), %xmm6
150 movdqu -16(%eax, %ecx), %xmm7
151 movdqu %xmm0, (%edx)
152 movdqu %xmm1, 16(%edx)
153 movdqu %xmm2, 32(%edx)
154 movdqu %xmm3, 48(%edx)
155 movdqu %xmm4, -64(%edx, %ecx)
156 movdqu %xmm5, -48(%edx, %ecx)
157 movdqu %xmm6, -32(%edx, %ecx)
158 movdqu %xmm7, -16(%edx, %ecx)
159 jmp L(mm_return)
160
161L(mm_len_128_or_more_forward):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400162 PUSH (%esi)
163 PUSH (%edi)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400164
165/* Aligning the address of destination. */
Varvara Rainchikfce86142014-05-27 12:41:55 +0400166 movdqu (%eax), %xmm0
167 movdqu 16(%eax), %xmm1
168 movdqu 32(%eax), %xmm2
169 movdqu 48(%eax), %xmm3
Varvara Rainchik5a922842014-04-24 15:41:20 +0400170
Varvara Rainchikfce86142014-05-27 12:41:55 +0400171 leal 64(%edx), %edi
172 andl $-64, %edi
173 subl %edx, %eax
Varvara Rainchik5a922842014-04-24 15:41:20 +0400174
Varvara Rainchikfce86142014-05-27 12:41:55 +0400175 movdqu (%eax, %edi), %xmm4
176 movdqu 16(%eax, %edi), %xmm5
177 movdqu 32(%eax, %edi), %xmm6
178 movdqu 48(%eax, %edi), %xmm7
Varvara Rainchik5a922842014-04-24 15:41:20 +0400179
Varvara Rainchikfce86142014-05-27 12:41:55 +0400180 movdqu %xmm0, (%edx)
181 movdqu %xmm1, 16(%edx)
182 movdqu %xmm2, 32(%edx)
183 movdqu %xmm3, 48(%edx)
184 movdqa %xmm4, (%edi)
185 movaps %xmm5, 16(%edi)
186 movaps %xmm6, 32(%edi)
187 movaps %xmm7, 48(%edi)
188 addl $64, %edi
Varvara Rainchik5a922842014-04-24 15:41:20 +0400189
Varvara Rainchikfce86142014-05-27 12:41:55 +0400190 leal (%edx, %ecx), %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400191 andl $-64, %ebx
Varvara Rainchikfce86142014-05-27 12:41:55 +0400192 cmp %edi, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400193 jbe L(mm_copy_remaining_forward)
194
Varvara Rainchikfce86142014-05-27 12:41:55 +0400195 cmp $SHARED_CACHE_SIZE_HALF, %ecx
196 jae L(mm_large_page_loop_forward)
197
Varvara Rainchik5a922842014-04-24 15:41:20 +0400198 .p2align 4
199L(mm_main_loop_forward):
200
Varvara Rainchikfce86142014-05-27 12:41:55 +0400201 prefetcht0 128(%eax, %edi)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400202
Varvara Rainchikfce86142014-05-27 12:41:55 +0400203 movdqu (%eax, %edi), %xmm0
204 movdqu 16(%eax, %edi), %xmm1
205 movdqu 32(%eax, %edi), %xmm2
206 movdqu 48(%eax, %edi), %xmm3
207 movdqa %xmm0, (%edi)
208 movaps %xmm1, 16(%edi)
209 movaps %xmm2, 32(%edi)
210 movaps %xmm3, 48(%edi)
211 leal 64(%edi), %edi
212 cmp %edi, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400213 ja L(mm_main_loop_forward)
214
215L(mm_copy_remaining_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400216 addl %edx, %ecx
217 subl %edi, %ecx
218/* We copied all up till %edi position in the dst.
Varvara Rainchik5a922842014-04-24 15:41:20 +0400219 In %ecx now is how many bytes are left to copy.
220 Now we need to advance %esi. */
Varvara Rainchikfce86142014-05-27 12:41:55 +0400221 leal (%edi, %eax), %esi
Varvara Rainchik5a922842014-04-24 15:41:20 +0400222
223L(mm_remaining_0_64_bytes_forward):
224 cmp $32, %ecx
225 ja L(mm_remaining_33_64_bytes_forward)
226 cmp $16, %ecx
227 ja L(mm_remaining_17_32_bytes_forward)
228 testl %ecx, %ecx
229 .p2align 4,,2
230 je L(mm_return_pop_all)
231
232 cmpb $8, %cl
233 ja L(mm_remaining_9_16_bytes_forward)
234 cmpb $4, %cl
235 .p2align 4,,5
236 ja L(mm_remaining_5_8_bytes_forward)
237 cmpb $2, %cl
238 .p2align 4,,1
239 ja L(mm_remaining_3_4_bytes_forward)
240 movzbl -1(%esi,%ecx), %eax
241 movzbl (%esi), %ebx
Varvara Rainchikfce86142014-05-27 12:41:55 +0400242 movb %al, -1(%edi,%ecx)
243 movb %bl, (%edi)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400244 jmp L(mm_return_pop_all)
245
246L(mm_remaining_33_64_bytes_forward):
247 movdqu (%esi), %xmm0
248 movdqu 16(%esi), %xmm1
249 movdqu -32(%esi, %ecx), %xmm2
250 movdqu -16(%esi, %ecx), %xmm3
Varvara Rainchikfce86142014-05-27 12:41:55 +0400251 movdqu %xmm0, (%edi)
252 movdqu %xmm1, 16(%edi)
253 movdqu %xmm2, -32(%edi, %ecx)
254 movdqu %xmm3, -16(%edi, %ecx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400255 jmp L(mm_return_pop_all)
256
257L(mm_remaining_17_32_bytes_forward):
258 movdqu (%esi), %xmm0
259 movdqu -16(%esi, %ecx), %xmm1
Varvara Rainchikfce86142014-05-27 12:41:55 +0400260 movdqu %xmm0, (%edi)
261 movdqu %xmm1, -16(%edi, %ecx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400262 jmp L(mm_return_pop_all)
263
264L(mm_remaining_9_16_bytes_forward):
265 movq (%esi), %xmm0
266 movq -8(%esi, %ecx), %xmm1
Varvara Rainchikfce86142014-05-27 12:41:55 +0400267 movq %xmm0, (%edi)
268 movq %xmm1, -8(%edi, %ecx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400269 jmp L(mm_return_pop_all)
270
Varvara Rainchikfce86142014-05-27 12:41:55 +0400271L(mm_remaining_5_8_bytes_forward):
272 movl (%esi), %eax
273 movl -4(%esi,%ecx), %ebx
274 movl %eax, (%edi)
275 movl %ebx, -4(%edi,%ecx)
276 jmp L(mm_return_pop_all)
277
278L(mm_remaining_3_4_bytes_forward):
279 movzwl -2(%esi,%ecx), %eax
280 movzwl (%esi), %ebx
281 movw %ax, -2(%edi,%ecx)
282 movw %bx, (%edi)
283 jmp L(mm_return_pop_all)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400284
285L(mm_len_0_16_bytes_forward):
286 testb $24, %cl
287 jne L(mm_len_9_16_bytes_forward)
288 testb $4, %cl
289 .p2align 4,,5
290 jne L(mm_len_5_8_bytes_forward)
291 testl %ecx, %ecx
292 .p2align 4,,2
293 je L(mm_return)
294 testb $2, %cl
295 .p2align 4,,1
296 jne L(mm_len_2_4_bytes_forward)
297 movzbl -1(%eax,%ecx), %ebx
298 movzbl (%eax), %eax
299 movb %bl, -1(%edx,%ecx)
300 movb %al, (%edx)
301 jmp L(mm_return)
302
303L(mm_len_2_4_bytes_forward):
304 movzwl -2(%eax,%ecx), %ebx
305 movzwl (%eax), %eax
306 movw %bx, -2(%edx,%ecx)
307 movw %ax, (%edx)
308 jmp L(mm_return)
309
310L(mm_len_5_8_bytes_forward):
311 movl (%eax), %ebx
312 movl -4(%eax,%ecx), %eax
313 movl %ebx, (%edx)
314 movl %eax, -4(%edx,%ecx)
315 jmp L(mm_return)
316
317L(mm_len_9_16_bytes_forward):
318 movq (%eax), %xmm0
319 movq -8(%eax, %ecx), %xmm1
320 movq %xmm0, (%edx)
321 movq %xmm1, -8(%edx, %ecx)
322 jmp L(mm_return)
323
Christopher Ferris97b6e132016-02-17 19:17:02 -0800324 CFI_POP (%edi)
325 CFI_POP (%esi)
326
Varvara Rainchikfce86142014-05-27 12:41:55 +0400327L(mm_recalc_len):
328/* Compute in %ecx how many bytes are left to copy after
329 the main loop stops. */
330 movl %ebx, %ecx
331 subl %edx, %ecx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400332/* The code for copying backwards. */
333L(mm_len_0_or_more_backward):
334
Varvara Rainchikfce86142014-05-27 12:41:55 +0400335/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128]
Varvara Rainchik5a922842014-04-24 15:41:20 +0400336 separately. */
337 cmp $16, %ecx
338 jbe L(mm_len_0_16_bytes_backward)
339
Varvara Rainchikfce86142014-05-27 12:41:55 +0400340 cmpl $32, %ecx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400341 jg L(mm_len_32_or_more_backward)
342
343/* Copy [0..32] and return. */
344 movdqu (%eax), %xmm0
345 movdqu -16(%eax, %ecx), %xmm1
346 movdqu %xmm0, (%edx)
347 movdqu %xmm1, -16(%edx, %ecx)
348 jmp L(mm_return)
349
350L(mm_len_32_or_more_backward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400351 cmpl $64, %ecx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400352 jg L(mm_len_64_or_more_backward)
353
354/* Copy [0..64] and return. */
355 movdqu (%eax), %xmm0
356 movdqu 16(%eax), %xmm1
357 movdqu -16(%eax, %ecx), %xmm2
358 movdqu -32(%eax, %ecx), %xmm3
359 movdqu %xmm0, (%edx)
360 movdqu %xmm1, 16(%edx)
361 movdqu %xmm2, -16(%edx, %ecx)
362 movdqu %xmm3, -32(%edx, %ecx)
363 jmp L(mm_return)
364
365L(mm_len_64_or_more_backward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400366 cmpl $128, %ecx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400367 jg L(mm_len_128_or_more_backward)
368
369/* Copy [0..128] and return. */
370 movdqu (%eax), %xmm0
371 movdqu 16(%eax), %xmm1
372 movdqu 32(%eax), %xmm2
373 movdqu 48(%eax), %xmm3
374 movdqu -64(%eax, %ecx), %xmm4
375 movdqu -48(%eax, %ecx), %xmm5
376 movdqu -32(%eax, %ecx), %xmm6
377 movdqu -16(%eax, %ecx), %xmm7
378 movdqu %xmm0, (%edx)
379 movdqu %xmm1, 16(%edx)
380 movdqu %xmm2, 32(%edx)
381 movdqu %xmm3, 48(%edx)
382 movdqu %xmm4, -64(%edx, %ecx)
383 movdqu %xmm5, -48(%edx, %ecx)
384 movdqu %xmm6, -32(%edx, %ecx)
385 movdqu %xmm7, -16(%edx, %ecx)
386 jmp L(mm_return)
387
388L(mm_len_128_or_more_backward):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400389 PUSH (%esi)
390 PUSH (%edi)
391
392/* Aligning the address of destination. We need to save
393 16 bits from the source in order not to overwrite them. */
394 movdqu -16(%eax, %ecx), %xmm0
395 movdqu -32(%eax, %ecx), %xmm1
396 movdqu -48(%eax, %ecx), %xmm2
397 movdqu -64(%eax, %ecx), %xmm3
398
399 leal (%edx, %ecx), %edi
400 andl $-64, %edi
401
402 movl %eax, %esi
403 subl %edx, %esi
404
405 movdqu -16(%edi, %esi), %xmm4
406 movdqu -32(%edi, %esi), %xmm5
407 movdqu -48(%edi, %esi), %xmm6
408 movdqu -64(%edi, %esi), %xmm7
409
410 movdqu %xmm0, -16(%edx, %ecx)
411 movdqu %xmm1, -32(%edx, %ecx)
412 movdqu %xmm2, -48(%edx, %ecx)
413 movdqu %xmm3, -64(%edx, %ecx)
414 movdqa %xmm4, -16(%edi)
415 movdqa %xmm5, -32(%edi)
416 movdqa %xmm6, -48(%edi)
417 movdqa %xmm7, -64(%edi)
418 leal -64(%edi), %edi
419
420 leal 64(%edx), %ebx
421 andl $-64, %ebx
422
Varvara Rainchik5a922842014-04-24 15:41:20 +0400423 cmp %edi, %ebx
Varvara Rainchikfce86142014-05-27 12:41:55 +0400424 jae L(mm_main_loop_backward_end)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400425
Varvara Rainchikfce86142014-05-27 12:41:55 +0400426 cmp $SHARED_CACHE_SIZE_HALF, %ecx
427 jae L(mm_large_page_loop_backward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400428
429 .p2align 4
430L(mm_main_loop_backward):
431
432 prefetcht0 -128(%edi, %esi)
433
434 movdqu -64(%edi, %esi), %xmm0
435 movdqu -48(%edi, %esi), %xmm1
436 movdqu -32(%edi, %esi), %xmm2
437 movdqu -16(%edi, %esi), %xmm3
438 movdqa %xmm0, -64(%edi)
439 movdqa %xmm1, -48(%edi)
440 movdqa %xmm2, -32(%edi)
441 movdqa %xmm3, -16(%edi)
442 leal -64(%edi), %edi
443 cmp %edi, %ebx
444 jb L(mm_main_loop_backward)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400445L(mm_main_loop_backward_end):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400446 POP (%edi)
447 POP (%esi)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400448 jmp L(mm_recalc_len)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400449
450/* Copy [0..16] and return. */
451L(mm_len_0_16_bytes_backward):
452 testb $24, %cl
453 jnz L(mm_len_9_16_bytes_backward)
454 testb $4, %cl
455 .p2align 4,,5
456 jnz L(mm_len_5_8_bytes_backward)
457 testl %ecx, %ecx
458 .p2align 4,,2
459 je L(mm_return)
460 testb $2, %cl
461 .p2align 4,,1
462 jne L(mm_len_3_4_bytes_backward)
463 movzbl -1(%eax,%ecx), %ebx
464 movzbl (%eax), %eax
465 movb %bl, -1(%edx,%ecx)
466 movb %al, (%edx)
467 jmp L(mm_return)
468
469L(mm_len_3_4_bytes_backward):
470 movzwl -2(%eax,%ecx), %ebx
471 movzwl (%eax), %eax
472 movw %bx, -2(%edx,%ecx)
473 movw %ax, (%edx)
474 jmp L(mm_return)
475
476L(mm_len_9_16_bytes_backward):
477 PUSH (%esi)
478 movl -4(%eax,%ecx), %ebx
479 movl -8(%eax,%ecx), %esi
480 movl %ebx, -4(%edx,%ecx)
481 movl %esi, -8(%edx,%ecx)
482 subl $8, %ecx
483 POP (%esi)
484 jmp L(mm_len_0_16_bytes_backward)
485
486L(mm_len_5_8_bytes_backward):
487 movl (%eax), %ebx
488 movl -4(%eax,%ecx), %eax
489 movl %ebx, (%edx)
490 movl %eax, -4(%edx,%ecx)
491
492L(mm_return):
493 movl %edx, %eax
494 RETURN
495
496L(mm_return_pop_all):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400497 movl %edx, %eax
Varvara Rainchik5a922842014-04-24 15:41:20 +0400498 POP (%edi)
499 POP (%esi)
500 RETURN
501
502/* Big length copy forward part. */
503
Varvara Rainchik5a922842014-04-24 15:41:20 +0400504 .p2align 4
505L(mm_large_page_loop_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400506 movdqu (%eax, %edi), %xmm0
507 movdqu 16(%eax, %edi), %xmm1
508 movdqu 32(%eax, %edi), %xmm2
509 movdqu 48(%eax, %edi), %xmm3
510 movntdq %xmm0, (%edi)
511 movntdq %xmm1, 16(%edi)
512 movntdq %xmm2, 32(%edi)
513 movntdq %xmm3, 48(%edi)
514 leal 64(%edi), %edi
515 cmp %edi, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400516 ja L(mm_large_page_loop_forward)
517 sfence
Varvara Rainchikfce86142014-05-27 12:41:55 +0400518 jmp L(mm_copy_remaining_forward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400519
520/* Big length copy backward part. */
Varvara Rainchik5a922842014-04-24 15:41:20 +0400521 .p2align 4
522L(mm_large_page_loop_backward):
523 movdqu -64(%edi, %esi), %xmm0
524 movdqu -48(%edi, %esi), %xmm1
525 movdqu -32(%edi, %esi), %xmm2
526 movdqu -16(%edi, %esi), %xmm3
527 movntdq %xmm0, -64(%edi)
528 movntdq %xmm1, -48(%edi)
529 movntdq %xmm2, -32(%edi)
530 movntdq %xmm3, -16(%edi)
531 leal -64(%edi), %edi
532 cmp %edi, %ebx
533 jb L(mm_large_page_loop_backward)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400534 sfence
Varvara Rainchik5a922842014-04-24 15:41:20 +0400535 POP (%edi)
536 POP (%esi)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400537 jmp L(mm_recalc_len)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400538
539END (MEMMOVE)