blob: 739502888b977cca2074e1e90a8c08d3ef6bd4a7 [file] [log] [blame]
Varvara Rainchika020a242014-04-29 17:44:56 +04001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#include "cache.h"
32
33#ifndef MEMMOVE
34# define MEMMOVE memmove
35#endif
36
37#ifndef L
38# define L(label) .L##label
39#endif
40
41#ifndef cfi_startproc
42# define cfi_startproc .cfi_startproc
43#endif
44
45#ifndef cfi_endproc
46# define cfi_endproc .cfi_endproc
47#endif
48
49#ifndef cfi_rel_offset
50# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
51#endif
52
53#ifndef cfi_restore
54# define cfi_restore(reg) .cfi_restore reg
55#endif
56
57#ifndef cfi_adjust_cfa_offset
58# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
59#endif
60
61#ifndef ENTRY
62# define ENTRY(name) \
63 .type name, @function; \
64 .globl name; \
65 .p2align 4; \
66name: \
67 cfi_startproc
68#endif
69
Haibo Huang8a0f0ed2018-05-24 20:39:18 -070070#ifndef ALIAS_SYMBOL
71# define ALIAS_SYMBOL(alias, original) \
72 .globl alias; \
73 .equ alias, original
74#endif
75
Varvara Rainchika020a242014-04-29 17:44:56 +040076#ifndef END
77# define END(name) \
78 cfi_endproc; \
79 .size name, .-name
80#endif
81
82#define CFI_PUSH(REG) \
83 cfi_adjust_cfa_offset (4); \
84 cfi_rel_offset (REG, 0)
85
86#define CFI_POP(REG) \
87 cfi_adjust_cfa_offset (-4); \
88 cfi_restore (REG)
89
90#define PUSH(REG) push REG;
91#define POP(REG) pop REG;
92
93#define ENTRANCE PUSH (%rbx);
94#define RETURN_END POP (%rbx); ret
95#define RETURN RETURN_END;
96
97 .section .text.sse2,"ax",@progbits
98ENTRY (MEMMOVE)
99 ENTRANCE
Varvara Rainchika020a242014-04-29 17:44:56 +0400100 mov %rdi, %rax
101
102/* Check whether we should copy backward or forward. */
103 cmp %rsi, %rdi
104 je L(mm_return)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400105 jg L(mm_len_0_or_more_backward)
Varvara Rainchika020a242014-04-29 17:44:56 +0400106
107/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128]
108 separately. */
109 cmp $16, %rdx
110 jbe L(mm_len_0_16_bytes_forward)
111
112 cmp $32, %rdx
Varvara Rainchikfce86142014-05-27 12:41:55 +0400113 ja L(mm_len_32_or_more_forward)
Varvara Rainchika020a242014-04-29 17:44:56 +0400114
115/* Copy [0..32] and return. */
116 movdqu (%rsi), %xmm0
117 movdqu -16(%rsi, %rdx), %xmm1
118 movdqu %xmm0, (%rdi)
119 movdqu %xmm1, -16(%rdi, %rdx)
120 jmp L(mm_return)
121
122L(mm_len_32_or_more_forward):
123 cmp $64, %rdx
Varvara Rainchikfce86142014-05-27 12:41:55 +0400124 ja L(mm_len_64_or_more_forward)
Varvara Rainchika020a242014-04-29 17:44:56 +0400125
126/* Copy [0..64] and return. */
127 movdqu (%rsi), %xmm0
128 movdqu 16(%rsi), %xmm1
129 movdqu -16(%rsi, %rdx), %xmm2
130 movdqu -32(%rsi, %rdx), %xmm3
131 movdqu %xmm0, (%rdi)
132 movdqu %xmm1, 16(%rdi)
133 movdqu %xmm2, -16(%rdi, %rdx)
134 movdqu %xmm3, -32(%rdi, %rdx)
135 jmp L(mm_return)
136
137L(mm_len_64_or_more_forward):
138 cmp $128, %rdx
Varvara Rainchikfce86142014-05-27 12:41:55 +0400139 ja L(mm_len_128_or_more_forward)
Varvara Rainchika020a242014-04-29 17:44:56 +0400140
141/* Copy [0..128] and return. */
142 movdqu (%rsi), %xmm0
143 movdqu 16(%rsi), %xmm1
144 movdqu 32(%rsi), %xmm2
145 movdqu 48(%rsi), %xmm3
146 movdqu -64(%rsi, %rdx), %xmm4
147 movdqu -48(%rsi, %rdx), %xmm5
148 movdqu -32(%rsi, %rdx), %xmm6
149 movdqu -16(%rsi, %rdx), %xmm7
150 movdqu %xmm0, (%rdi)
151 movdqu %xmm1, 16(%rdi)
152 movdqu %xmm2, 32(%rdi)
153 movdqu %xmm3, 48(%rdi)
154 movdqu %xmm4, -64(%rdi, %rdx)
155 movdqu %xmm5, -48(%rdi, %rdx)
156 movdqu %xmm6, -32(%rdi, %rdx)
157 movdqu %xmm7, -16(%rdi, %rdx)
158 jmp L(mm_return)
159
160L(mm_len_128_or_more_forward):
Varvara Rainchika020a242014-04-29 17:44:56 +0400161/* Aligning the address of destination. */
162/* save first unaligned 64 bytes */
163 movdqu (%rsi), %xmm0
164 movdqu 16(%rsi), %xmm1
165 movdqu 32(%rsi), %xmm2
166 movdqu 48(%rsi), %xmm3
167
Varvara Rainchikfce86142014-05-27 12:41:55 +0400168 lea 64(%rdi), %r8
169 and $-64, %r8 /* r8 now aligned to next 64 byte boundary */
170 sub %rdi, %rsi /* rsi = src - dst = diff */
Varvara Rainchika020a242014-04-29 17:44:56 +0400171
Varvara Rainchikfce86142014-05-27 12:41:55 +0400172 movdqu (%r8, %rsi), %xmm4
173 movdqu 16(%r8, %rsi), %xmm5
174 movdqu 32(%r8, %rsi), %xmm6
175 movdqu 48(%r8, %rsi), %xmm7
Varvara Rainchika020a242014-04-29 17:44:56 +0400176
Varvara Rainchikfce86142014-05-27 12:41:55 +0400177 movdqu %xmm0, (%rdi)
178 movdqu %xmm1, 16(%rdi)
179 movdqu %xmm2, 32(%rdi)
180 movdqu %xmm3, 48(%rdi)
181 movdqa %xmm4, (%r8)
182 movaps %xmm5, 16(%r8)
183 movaps %xmm6, 32(%r8)
184 movaps %xmm7, 48(%r8)
185 add $64, %r8
Varvara Rainchika020a242014-04-29 17:44:56 +0400186
Varvara Rainchikfce86142014-05-27 12:41:55 +0400187 lea (%rdi, %rdx), %rbx
Varvara Rainchika020a242014-04-29 17:44:56 +0400188 and $-64, %rbx
Varvara Rainchikfce86142014-05-27 12:41:55 +0400189 cmp %r8, %rbx
Varvara Rainchika020a242014-04-29 17:44:56 +0400190 jbe L(mm_copy_remaining_forward)
191
Varvara Rainchikfce86142014-05-27 12:41:55 +0400192 cmp $SHARED_CACHE_SIZE_HALF, %rdx
193 jae L(mm_large_page_loop_forward)
194
Varvara Rainchika020a242014-04-29 17:44:56 +0400195 .p2align 4
196L(mm_main_loop_forward):
197
Varvara Rainchikfce86142014-05-27 12:41:55 +0400198 prefetcht0 128(%r8, %rsi)
Varvara Rainchika020a242014-04-29 17:44:56 +0400199
Varvara Rainchikfce86142014-05-27 12:41:55 +0400200 movdqu (%r8, %rsi), %xmm0
201 movdqu 16(%r8, %rsi), %xmm1
202 movdqu 32(%r8, %rsi), %xmm2
203 movdqu 48(%r8, %rsi), %xmm3
204 movdqa %xmm0, (%r8)
205 movaps %xmm1, 16(%r8)
206 movaps %xmm2, 32(%r8)
207 movaps %xmm3, 48(%r8)
208 lea 64(%r8), %r8
209 cmp %r8, %rbx
Varvara Rainchika020a242014-04-29 17:44:56 +0400210 ja L(mm_main_loop_forward)
211
212L(mm_copy_remaining_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400213 add %rdi, %rdx
214 sub %r8, %rdx
Varvara Rainchika020a242014-04-29 17:44:56 +0400215/* We copied all up till %rdi position in the dst.
216 In %rdx now is how many bytes are left to copy.
217 Now we need to advance %r8. */
Varvara Rainchikfce86142014-05-27 12:41:55 +0400218 lea (%r8, %rsi), %r9
Varvara Rainchika020a242014-04-29 17:44:56 +0400219
220L(mm_remaining_0_64_bytes_forward):
221 cmp $32, %rdx
222 ja L(mm_remaining_33_64_bytes_forward)
223 cmp $16, %rdx
224 ja L(mm_remaining_17_32_bytes_forward)
225 test %rdx, %rdx
226 .p2align 4,,2
227 je L(mm_return)
228
229 cmpb $8, %dl
230 ja L(mm_remaining_9_16_bytes_forward)
231 cmpb $4, %dl
232 .p2align 4,,5
233 ja L(mm_remaining_5_8_bytes_forward)
234 cmpb $2, %dl
235 .p2align 4,,1
236 ja L(mm_remaining_3_4_bytes_forward)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400237 movzbl -1(%r9,%rdx), %esi
238 movzbl (%r9), %ebx
239 movb %sil, -1(%r8,%rdx)
240 movb %bl, (%r8)
Varvara Rainchika020a242014-04-29 17:44:56 +0400241 jmp L(mm_return)
242
243L(mm_remaining_33_64_bytes_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400244 movdqu (%r9), %xmm0
245 movdqu 16(%r9), %xmm1
246 movdqu -32(%r9, %rdx), %xmm2
247 movdqu -16(%r9, %rdx), %xmm3
248 movdqu %xmm0, (%r8)
249 movdqu %xmm1, 16(%r8)
250 movdqu %xmm2, -32(%r8, %rdx)
251 movdqu %xmm3, -16(%r8, %rdx)
Varvara Rainchika020a242014-04-29 17:44:56 +0400252 jmp L(mm_return)
253
254L(mm_remaining_17_32_bytes_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400255 movdqu (%r9), %xmm0
256 movdqu -16(%r9, %rdx), %xmm1
257 movdqu %xmm0, (%r8)
258 movdqu %xmm1, -16(%r8, %rdx)
Varvara Rainchika020a242014-04-29 17:44:56 +0400259 jmp L(mm_return)
260
261L(mm_remaining_5_8_bytes_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400262 movl (%r9), %esi
263 movl -4(%r9,%rdx), %ebx
264 movl %esi, (%r8)
265 movl %ebx, -4(%r8,%rdx)
Varvara Rainchika020a242014-04-29 17:44:56 +0400266 jmp L(mm_return)
267
268L(mm_remaining_9_16_bytes_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400269 mov (%r9), %rsi
270 mov -8(%r9, %rdx), %rbx
271 mov %rsi, (%r8)
272 mov %rbx, -8(%r8, %rdx)
273 jmp L(mm_return)
274
275L(mm_remaining_3_4_bytes_forward):
276 movzwl -2(%r9,%rdx), %esi
277 movzwl (%r9), %ebx
278 movw %si, -2(%r8,%rdx)
279 movw %bx, (%r8)
Varvara Rainchika020a242014-04-29 17:44:56 +0400280 jmp L(mm_return)
281
282L(mm_len_0_16_bytes_forward):
283 testb $24, %dl
284 jne L(mm_len_9_16_bytes_forward)
285 testb $4, %dl
286 .p2align 4,,5
287 jne L(mm_len_5_8_bytes_forward)
288 test %rdx, %rdx
289 .p2align 4,,2
290 je L(mm_return)
291 testb $2, %dl
292 .p2align 4,,1
293 jne L(mm_len_2_4_bytes_forward)
294 movzbl -1(%rsi,%rdx), %ebx
295 movzbl (%rsi), %esi
296 movb %bl, -1(%rdi,%rdx)
297 movb %sil, (%rdi)
298 jmp L(mm_return)
299
300L(mm_len_2_4_bytes_forward):
301 movzwl -2(%rsi,%rdx), %ebx
302 movzwl (%rsi), %esi
303 movw %bx, -2(%rdi,%rdx)
304 movw %si, (%rdi)
305 jmp L(mm_return)
306
307L(mm_len_5_8_bytes_forward):
308 movl (%rsi), %ebx
309 movl -4(%rsi,%rdx), %esi
310 movl %ebx, (%rdi)
311 movl %esi, -4(%rdi,%rdx)
312 jmp L(mm_return)
313
314L(mm_len_9_16_bytes_forward):
315 mov (%rsi), %rbx
316 mov -8(%rsi, %rdx), %rsi
317 mov %rbx, (%rdi)
318 mov %rsi, -8(%rdi, %rdx)
319 jmp L(mm_return)
320
Varvara Rainchikfce86142014-05-27 12:41:55 +0400321L(mm_recalc_len):
322/* Compute in %rdx how many bytes are left to copy after
323 the main loop stops. */
324 mov %rbx, %rdx
325 sub %rdi, %rdx
Varvara Rainchika020a242014-04-29 17:44:56 +0400326/* The code for copying backwards. */
327L(mm_len_0_or_more_backward):
328
Varvara Rainchikfce86142014-05-27 12:41:55 +0400329/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128]
Varvara Rainchika020a242014-04-29 17:44:56 +0400330 separately. */
331 cmp $16, %rdx
332 jbe L(mm_len_0_16_bytes_backward)
333
334 cmp $32, %rdx
Varvara Rainchikfce86142014-05-27 12:41:55 +0400335 ja L(mm_len_32_or_more_backward)
Varvara Rainchika020a242014-04-29 17:44:56 +0400336
337/* Copy [0..32] and return. */
338 movdqu (%rsi), %xmm0
339 movdqu -16(%rsi, %rdx), %xmm1
340 movdqu %xmm0, (%rdi)
341 movdqu %xmm1, -16(%rdi, %rdx)
342 jmp L(mm_return)
343
344L(mm_len_32_or_more_backward):
345 cmp $64, %rdx
Varvara Rainchikfce86142014-05-27 12:41:55 +0400346 ja L(mm_len_64_or_more_backward)
Varvara Rainchika020a242014-04-29 17:44:56 +0400347
348/* Copy [0..64] and return. */
349 movdqu (%rsi), %xmm0
350 movdqu 16(%rsi), %xmm1
351 movdqu -16(%rsi, %rdx), %xmm2
352 movdqu -32(%rsi, %rdx), %xmm3
353 movdqu %xmm0, (%rdi)
354 movdqu %xmm1, 16(%rdi)
355 movdqu %xmm2, -16(%rdi, %rdx)
356 movdqu %xmm3, -32(%rdi, %rdx)
357 jmp L(mm_return)
358
359L(mm_len_64_or_more_backward):
360 cmp $128, %rdx
Varvara Rainchikfce86142014-05-27 12:41:55 +0400361 ja L(mm_len_128_or_more_backward)
Varvara Rainchika020a242014-04-29 17:44:56 +0400362
363/* Copy [0..128] and return. */
364 movdqu (%rsi), %xmm0
365 movdqu 16(%rsi), %xmm1
366 movdqu 32(%rsi), %xmm2
367 movdqu 48(%rsi), %xmm3
368 movdqu -64(%rsi, %rdx), %xmm4
369 movdqu -48(%rsi, %rdx), %xmm5
370 movdqu -32(%rsi, %rdx), %xmm6
371 movdqu -16(%rsi, %rdx), %xmm7
372 movdqu %xmm0, (%rdi)
373 movdqu %xmm1, 16(%rdi)
374 movdqu %xmm2, 32(%rdi)
375 movdqu %xmm3, 48(%rdi)
376 movdqu %xmm4, -64(%rdi, %rdx)
377 movdqu %xmm5, -48(%rdi, %rdx)
378 movdqu %xmm6, -32(%rdi, %rdx)
379 movdqu %xmm7, -16(%rdi, %rdx)
380 jmp L(mm_return)
381
382L(mm_len_128_or_more_backward):
Varvara Rainchika020a242014-04-29 17:44:56 +0400383/* Aligning the address of destination. We need to save
384 16 bits from the source in order not to overwrite them. */
385 movdqu -16(%rsi, %rdx), %xmm0
386 movdqu -32(%rsi, %rdx), %xmm1
387 movdqu -48(%rsi, %rdx), %xmm2
388 movdqu -64(%rsi, %rdx), %xmm3
389
390 lea (%rdi, %rdx), %r9
391 and $-64, %r9 /* r9 = aligned dst */
392
393 mov %rsi, %r8
394 sub %rdi, %r8 /* r8 = src - dst, diff */
395
396 movdqu -16(%r9, %r8), %xmm4
397 movdqu -32(%r9, %r8), %xmm5
398 movdqu -48(%r9, %r8), %xmm6
399 movdqu -64(%r9, %r8), %xmm7
400
401 movdqu %xmm0, -16(%rdi, %rdx)
402 movdqu %xmm1, -32(%rdi, %rdx)
403 movdqu %xmm2, -48(%rdi, %rdx)
404 movdqu %xmm3, -64(%rdi, %rdx)
405 movdqa %xmm4, -16(%r9)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400406 movaps %xmm5, -32(%r9)
407 movaps %xmm6, -48(%r9)
408 movaps %xmm7, -64(%r9)
Varvara Rainchika020a242014-04-29 17:44:56 +0400409 lea -64(%r9), %r9
410
411 lea 64(%rdi), %rbx
412 and $-64, %rbx
413
Varvara Rainchika020a242014-04-29 17:44:56 +0400414 cmp %r9, %rbx
Varvara Rainchikfce86142014-05-27 12:41:55 +0400415 jae L(mm_recalc_len)
416
417 cmp $SHARED_CACHE_SIZE_HALF, %rdx
418 jae L(mm_large_page_loop_backward)
Varvara Rainchika020a242014-04-29 17:44:56 +0400419
420 .p2align 4
421L(mm_main_loop_backward):
422
423 prefetcht0 -128(%r9, %r8)
424
425 movdqu -64(%r9, %r8), %xmm0
426 movdqu -48(%r9, %r8), %xmm1
427 movdqu -32(%r9, %r8), %xmm2
428 movdqu -16(%r9, %r8), %xmm3
429 movdqa %xmm0, -64(%r9)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400430 movaps %xmm1, -48(%r9)
431 movaps %xmm2, -32(%r9)
432 movaps %xmm3, -16(%r9)
Varvara Rainchika020a242014-04-29 17:44:56 +0400433 lea -64(%r9), %r9
434 cmp %r9, %rbx
435 jb L(mm_main_loop_backward)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400436 jmp L(mm_recalc_len)
Varvara Rainchika020a242014-04-29 17:44:56 +0400437
438/* Copy [0..16] and return. */
439L(mm_len_0_16_bytes_backward):
440 testb $24, %dl
441 jnz L(mm_len_9_16_bytes_backward)
442 testb $4, %dl
443 .p2align 4,,5
444 jnz L(mm_len_5_8_bytes_backward)
445 test %rdx, %rdx
446 .p2align 4,,2
447 je L(mm_return)
448 testb $2, %dl
449 .p2align 4,,1
450 jne L(mm_len_3_4_bytes_backward)
451 movzbl -1(%rsi,%rdx), %ebx
452 movzbl (%rsi), %ecx
453 movb %bl, -1(%rdi,%rdx)
454 movb %cl, (%rdi)
455 jmp L(mm_return)
456
457L(mm_len_3_4_bytes_backward):
458 movzwl -2(%rsi,%rdx), %ebx
459 movzwl (%rsi), %ecx
460 movw %bx, -2(%rdi,%rdx)
461 movw %cx, (%rdi)
462 jmp L(mm_return)
463
464L(mm_len_9_16_bytes_backward):
465 movl -4(%rsi,%rdx), %ebx
466 movl -8(%rsi,%rdx), %ecx
467 movl %ebx, -4(%rdi,%rdx)
468 movl %ecx, -8(%rdi,%rdx)
469 sub $8, %rdx
470 jmp L(mm_len_0_16_bytes_backward)
471
472L(mm_len_5_8_bytes_backward):
473 movl (%rsi), %ebx
474 movl -4(%rsi,%rdx), %ecx
475 movl %ebx, (%rdi)
476 movl %ecx, -4(%rdi,%rdx)
477
478L(mm_return):
479 RETURN
480
481/* Big length copy forward part. */
482
Varvara Rainchika020a242014-04-29 17:44:56 +0400483 .p2align 4
484L(mm_large_page_loop_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400485 movdqu (%r8, %rsi), %xmm0
486 movdqu 16(%r8, %rsi), %xmm1
487 movdqu 32(%r8, %rsi), %xmm2
488 movdqu 48(%r8, %rsi), %xmm3
489 movntdq %xmm0, (%r8)
490 movntdq %xmm1, 16(%r8)
491 movntdq %xmm2, 32(%r8)
492 movntdq %xmm3, 48(%r8)
493 lea 64(%r8), %r8
494 cmp %r8, %rbx
Varvara Rainchika020a242014-04-29 17:44:56 +0400495 ja L(mm_large_page_loop_forward)
496 sfence
Varvara Rainchikfce86142014-05-27 12:41:55 +0400497 jmp L(mm_copy_remaining_forward)
Varvara Rainchika020a242014-04-29 17:44:56 +0400498
499/* Big length copy backward part. */
Varvara Rainchika020a242014-04-29 17:44:56 +0400500 .p2align 4
501L(mm_large_page_loop_backward):
502 movdqu -64(%r9, %r8), %xmm0
503 movdqu -48(%r9, %r8), %xmm1
504 movdqu -32(%r9, %r8), %xmm2
505 movdqu -16(%r9, %r8), %xmm3
506 movntdq %xmm0, -64(%r9)
507 movntdq %xmm1, -48(%r9)
508 movntdq %xmm2, -32(%r9)
509 movntdq %xmm3, -16(%r9)
510 lea -64(%r9), %r9
511 cmp %r9, %rbx
512 jb L(mm_large_page_loop_backward)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400513 sfence
514 jmp L(mm_recalc_len)
Varvara Rainchika020a242014-04-29 17:44:56 +0400515
516END (MEMMOVE)
Haibo Huang8a0f0ed2018-05-24 20:39:18 -0700517
518ALIAS_SYMBOL(memcpy, MEMMOVE)