blob: a25b4c7953d19846128a19c3872517313748c678 [file] [log] [blame]
Varvara Rainchik5a922842014-04-24 15:41:20 +04001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Elliott Hughesed777142022-07-25 16:25:11 +000031#define FOR_SILVERMONT
Varvara Rainchik5a922842014-04-24 15:41:20 +040032
33#ifndef MEMMOVE
George Burgess IV0193c3d2024-09-18 11:00:33 -060034# define MEMMOVE memmove
Varvara Rainchik5a922842014-04-24 15:41:20 +040035#endif
36
37#ifndef L
38# define L(label) .L##label
39#endif
40
41#ifndef cfi_startproc
42# define cfi_startproc .cfi_startproc
43#endif
44
45#ifndef cfi_endproc
46# define cfi_endproc .cfi_endproc
47#endif
48
49#ifndef cfi_rel_offset
50# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
51#endif
52
53#ifndef cfi_restore
54# define cfi_restore(reg) .cfi_restore reg
55#endif
56
57#ifndef cfi_adjust_cfa_offset
58# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
59#endif
60
61#ifndef ENTRY
62# define ENTRY(name) \
63 .type name, @function; \
64 .globl name; \
65 .p2align 4; \
66name: \
67 cfi_startproc
68#endif
69
70#ifndef END
71# define END(name) \
72 cfi_endproc; \
73 .size name, .-name
74#endif
75
Elliott Hughesbed110a2016-03-03 10:41:42 -080076#define DEST PARMS
77#define SRC DEST+4
78#define LEN SRC+4
Varvara Rainchik5a922842014-04-24 15:41:20 +040079
80#define CFI_PUSH(REG) \
81 cfi_adjust_cfa_offset (4); \
82 cfi_rel_offset (REG, 0)
83
84#define CFI_POP(REG) \
85 cfi_adjust_cfa_offset (-4); \
86 cfi_restore (REG)
87
88#define PUSH(REG) pushl REG; CFI_PUSH (REG)
89#define POP(REG) popl REG; CFI_POP (REG)
90
91#define PARMS 8 /* Preserve EBX. */
92#define ENTRANCE PUSH (%ebx);
93#define RETURN_END POP (%ebx); ret
94#define RETURN RETURN_END; CFI_PUSH (%ebx)
95
Elliott Hughes4f3b7e12024-07-19 12:00:17 +000096#define SETUP_PIC_REG(x) call __x86.get_pc_thunk.x
97
Varvara Rainchik5a922842014-04-24 15:41:20 +040098 .section .text.sse2,"ax",@progbits
Daniel Verkamp901e9a82025-01-21 14:38:02 -080099ENTRY (__memcpy_chk)
100/* NOTE: We can't use LEN here because ebx has not been pushed yet. */
101 movl 12(%esp), %ecx
102 cmpl 16(%esp), %ecx
103 ja __memcpy_chk_fail
104/* Fall through to memcpy/memmove. */
105END (__memcpy_chk)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400106ENTRY (MEMMOVE)
107 ENTRANCE
108 movl LEN(%esp), %ecx
109 movl SRC(%esp), %eax
110 movl DEST(%esp), %edx
111
112/* Check whether we should copy backward or forward. */
113 cmp %eax, %edx
114 je L(mm_return)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400115 jg L(mm_len_0_or_more_backward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400116
117/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128]
118 separately. */
119 cmp $16, %ecx
120 jbe L(mm_len_0_16_bytes_forward)
121
Varvara Rainchikfce86142014-05-27 12:41:55 +0400122 cmpl $32, %ecx
123 ja L(mm_len_32_or_more_forward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400124
125/* Copy [0..32] and return. */
126 movdqu (%eax), %xmm0
127 movdqu -16(%eax, %ecx), %xmm1
128 movdqu %xmm0, (%edx)
129 movdqu %xmm1, -16(%edx, %ecx)
130 jmp L(mm_return)
131
132L(mm_len_32_or_more_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400133 cmpl $64, %ecx
134 ja L(mm_len_64_or_more_forward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400135
136/* Copy [0..64] and return. */
137 movdqu (%eax), %xmm0
138 movdqu 16(%eax), %xmm1
139 movdqu -16(%eax, %ecx), %xmm2
140 movdqu -32(%eax, %ecx), %xmm3
141 movdqu %xmm0, (%edx)
142 movdqu %xmm1, 16(%edx)
143 movdqu %xmm2, -16(%edx, %ecx)
144 movdqu %xmm3, -32(%edx, %ecx)
145 jmp L(mm_return)
146
147L(mm_len_64_or_more_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400148 cmpl $128, %ecx
149 ja L(mm_len_128_or_more_forward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400150
151/* Copy [0..128] and return. */
152 movdqu (%eax), %xmm0
153 movdqu 16(%eax), %xmm1
154 movdqu 32(%eax), %xmm2
155 movdqu 48(%eax), %xmm3
156 movdqu -64(%eax, %ecx), %xmm4
157 movdqu -48(%eax, %ecx), %xmm5
158 movdqu -32(%eax, %ecx), %xmm6
159 movdqu -16(%eax, %ecx), %xmm7
160 movdqu %xmm0, (%edx)
161 movdqu %xmm1, 16(%edx)
162 movdqu %xmm2, 32(%edx)
163 movdqu %xmm3, 48(%edx)
164 movdqu %xmm4, -64(%edx, %ecx)
165 movdqu %xmm5, -48(%edx, %ecx)
166 movdqu %xmm6, -32(%edx, %ecx)
167 movdqu %xmm7, -16(%edx, %ecx)
168 jmp L(mm_return)
169
170L(mm_len_128_or_more_forward):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400171 PUSH (%esi)
172 PUSH (%edi)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400173
174/* Aligning the address of destination. */
Varvara Rainchikfce86142014-05-27 12:41:55 +0400175 movdqu (%eax), %xmm0
176 movdqu 16(%eax), %xmm1
177 movdqu 32(%eax), %xmm2
178 movdqu 48(%eax), %xmm3
Varvara Rainchik5a922842014-04-24 15:41:20 +0400179
Varvara Rainchikfce86142014-05-27 12:41:55 +0400180 leal 64(%edx), %edi
181 andl $-64, %edi
182 subl %edx, %eax
Varvara Rainchik5a922842014-04-24 15:41:20 +0400183
Varvara Rainchikfce86142014-05-27 12:41:55 +0400184 movdqu (%eax, %edi), %xmm4
185 movdqu 16(%eax, %edi), %xmm5
186 movdqu 32(%eax, %edi), %xmm6
187 movdqu 48(%eax, %edi), %xmm7
Varvara Rainchik5a922842014-04-24 15:41:20 +0400188
Varvara Rainchikfce86142014-05-27 12:41:55 +0400189 movdqu %xmm0, (%edx)
190 movdqu %xmm1, 16(%edx)
191 movdqu %xmm2, 32(%edx)
192 movdqu %xmm3, 48(%edx)
193 movdqa %xmm4, (%edi)
194 movaps %xmm5, 16(%edi)
195 movaps %xmm6, 32(%edi)
196 movaps %xmm7, 48(%edi)
197 addl $64, %edi
Varvara Rainchik5a922842014-04-24 15:41:20 +0400198
Varvara Rainchikfce86142014-05-27 12:41:55 +0400199 leal (%edx, %ecx), %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400200 andl $-64, %ebx
Varvara Rainchikfce86142014-05-27 12:41:55 +0400201 cmp %edi, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400202 jbe L(mm_copy_remaining_forward)
203
Elliott Hughes4f3b7e12024-07-19 12:00:17 +0000204 PUSH(%ebx)
205 SETUP_PIC_REG(bx)
206 add $_GLOBAL_OFFSET_TABLE_, %ebx
207 cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
208 /* Restore ebx. We can place a pop before jump as it doesn't affect any flags. */
209 POP(%ebx)
210
Varvara Rainchikfce86142014-05-27 12:41:55 +0400211 jae L(mm_large_page_loop_forward)
212
Varvara Rainchik5a922842014-04-24 15:41:20 +0400213 .p2align 4
214L(mm_main_loop_forward):
215
Varvara Rainchikfce86142014-05-27 12:41:55 +0400216 prefetcht0 128(%eax, %edi)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400217
Varvara Rainchikfce86142014-05-27 12:41:55 +0400218 movdqu (%eax, %edi), %xmm0
219 movdqu 16(%eax, %edi), %xmm1
220 movdqu 32(%eax, %edi), %xmm2
221 movdqu 48(%eax, %edi), %xmm3
222 movdqa %xmm0, (%edi)
223 movaps %xmm1, 16(%edi)
224 movaps %xmm2, 32(%edi)
225 movaps %xmm3, 48(%edi)
226 leal 64(%edi), %edi
227 cmp %edi, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400228 ja L(mm_main_loop_forward)
229
230L(mm_copy_remaining_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400231 addl %edx, %ecx
232 subl %edi, %ecx
233/* We copied all up till %edi position in the dst.
Varvara Rainchik5a922842014-04-24 15:41:20 +0400234 In %ecx now is how many bytes are left to copy.
235 Now we need to advance %esi. */
Varvara Rainchikfce86142014-05-27 12:41:55 +0400236 leal (%edi, %eax), %esi
Varvara Rainchik5a922842014-04-24 15:41:20 +0400237
238L(mm_remaining_0_64_bytes_forward):
239 cmp $32, %ecx
240 ja L(mm_remaining_33_64_bytes_forward)
241 cmp $16, %ecx
242 ja L(mm_remaining_17_32_bytes_forward)
243 testl %ecx, %ecx
244 .p2align 4,,2
245 je L(mm_return_pop_all)
246
247 cmpb $8, %cl
248 ja L(mm_remaining_9_16_bytes_forward)
249 cmpb $4, %cl
250 .p2align 4,,5
251 ja L(mm_remaining_5_8_bytes_forward)
252 cmpb $2, %cl
253 .p2align 4,,1
254 ja L(mm_remaining_3_4_bytes_forward)
255 movzbl -1(%esi,%ecx), %eax
256 movzbl (%esi), %ebx
Varvara Rainchikfce86142014-05-27 12:41:55 +0400257 movb %al, -1(%edi,%ecx)
258 movb %bl, (%edi)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400259 jmp L(mm_return_pop_all)
260
261L(mm_remaining_33_64_bytes_forward):
262 movdqu (%esi), %xmm0
263 movdqu 16(%esi), %xmm1
264 movdqu -32(%esi, %ecx), %xmm2
265 movdqu -16(%esi, %ecx), %xmm3
Varvara Rainchikfce86142014-05-27 12:41:55 +0400266 movdqu %xmm0, (%edi)
267 movdqu %xmm1, 16(%edi)
268 movdqu %xmm2, -32(%edi, %ecx)
269 movdqu %xmm3, -16(%edi, %ecx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400270 jmp L(mm_return_pop_all)
271
272L(mm_remaining_17_32_bytes_forward):
273 movdqu (%esi), %xmm0
274 movdqu -16(%esi, %ecx), %xmm1
Varvara Rainchikfce86142014-05-27 12:41:55 +0400275 movdqu %xmm0, (%edi)
276 movdqu %xmm1, -16(%edi, %ecx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400277 jmp L(mm_return_pop_all)
278
279L(mm_remaining_9_16_bytes_forward):
280 movq (%esi), %xmm0
281 movq -8(%esi, %ecx), %xmm1
Varvara Rainchikfce86142014-05-27 12:41:55 +0400282 movq %xmm0, (%edi)
283 movq %xmm1, -8(%edi, %ecx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400284 jmp L(mm_return_pop_all)
285
Varvara Rainchikfce86142014-05-27 12:41:55 +0400286L(mm_remaining_5_8_bytes_forward):
287 movl (%esi), %eax
288 movl -4(%esi,%ecx), %ebx
289 movl %eax, (%edi)
290 movl %ebx, -4(%edi,%ecx)
291 jmp L(mm_return_pop_all)
292
293L(mm_remaining_3_4_bytes_forward):
294 movzwl -2(%esi,%ecx), %eax
295 movzwl (%esi), %ebx
296 movw %ax, -2(%edi,%ecx)
297 movw %bx, (%edi)
298 jmp L(mm_return_pop_all)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400299
300L(mm_len_0_16_bytes_forward):
301 testb $24, %cl
302 jne L(mm_len_9_16_bytes_forward)
303 testb $4, %cl
304 .p2align 4,,5
305 jne L(mm_len_5_8_bytes_forward)
306 testl %ecx, %ecx
307 .p2align 4,,2
308 je L(mm_return)
309 testb $2, %cl
310 .p2align 4,,1
311 jne L(mm_len_2_4_bytes_forward)
312 movzbl -1(%eax,%ecx), %ebx
313 movzbl (%eax), %eax
314 movb %bl, -1(%edx,%ecx)
315 movb %al, (%edx)
316 jmp L(mm_return)
317
318L(mm_len_2_4_bytes_forward):
319 movzwl -2(%eax,%ecx), %ebx
320 movzwl (%eax), %eax
321 movw %bx, -2(%edx,%ecx)
322 movw %ax, (%edx)
323 jmp L(mm_return)
324
325L(mm_len_5_8_bytes_forward):
326 movl (%eax), %ebx
327 movl -4(%eax,%ecx), %eax
328 movl %ebx, (%edx)
329 movl %eax, -4(%edx,%ecx)
330 jmp L(mm_return)
331
332L(mm_len_9_16_bytes_forward):
333 movq (%eax), %xmm0
334 movq -8(%eax, %ecx), %xmm1
335 movq %xmm0, (%edx)
336 movq %xmm1, -8(%edx, %ecx)
337 jmp L(mm_return)
338
Christopher Ferris97b6e132016-02-17 19:17:02 -0800339 CFI_POP (%edi)
340 CFI_POP (%esi)
341
Varvara Rainchikfce86142014-05-27 12:41:55 +0400342L(mm_recalc_len):
343/* Compute in %ecx how many bytes are left to copy after
344 the main loop stops. */
345 movl %ebx, %ecx
346 subl %edx, %ecx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400347/* The code for copying backwards. */
348L(mm_len_0_or_more_backward):
349
Varvara Rainchikfce86142014-05-27 12:41:55 +0400350/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128]
Varvara Rainchik5a922842014-04-24 15:41:20 +0400351 separately. */
352 cmp $16, %ecx
353 jbe L(mm_len_0_16_bytes_backward)
354
Varvara Rainchikfce86142014-05-27 12:41:55 +0400355 cmpl $32, %ecx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400356 jg L(mm_len_32_or_more_backward)
357
358/* Copy [0..32] and return. */
359 movdqu (%eax), %xmm0
360 movdqu -16(%eax, %ecx), %xmm1
361 movdqu %xmm0, (%edx)
362 movdqu %xmm1, -16(%edx, %ecx)
363 jmp L(mm_return)
364
365L(mm_len_32_or_more_backward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400366 cmpl $64, %ecx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400367 jg L(mm_len_64_or_more_backward)
368
369/* Copy [0..64] and return. */
370 movdqu (%eax), %xmm0
371 movdqu 16(%eax), %xmm1
372 movdqu -16(%eax, %ecx), %xmm2
373 movdqu -32(%eax, %ecx), %xmm3
374 movdqu %xmm0, (%edx)
375 movdqu %xmm1, 16(%edx)
376 movdqu %xmm2, -16(%edx, %ecx)
377 movdqu %xmm3, -32(%edx, %ecx)
378 jmp L(mm_return)
379
380L(mm_len_64_or_more_backward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400381 cmpl $128, %ecx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400382 jg L(mm_len_128_or_more_backward)
383
384/* Copy [0..128] and return. */
385 movdqu (%eax), %xmm0
386 movdqu 16(%eax), %xmm1
387 movdqu 32(%eax), %xmm2
388 movdqu 48(%eax), %xmm3
389 movdqu -64(%eax, %ecx), %xmm4
390 movdqu -48(%eax, %ecx), %xmm5
391 movdqu -32(%eax, %ecx), %xmm6
392 movdqu -16(%eax, %ecx), %xmm7
393 movdqu %xmm0, (%edx)
394 movdqu %xmm1, 16(%edx)
395 movdqu %xmm2, 32(%edx)
396 movdqu %xmm3, 48(%edx)
397 movdqu %xmm4, -64(%edx, %ecx)
398 movdqu %xmm5, -48(%edx, %ecx)
399 movdqu %xmm6, -32(%edx, %ecx)
400 movdqu %xmm7, -16(%edx, %ecx)
401 jmp L(mm_return)
402
403L(mm_len_128_or_more_backward):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400404 PUSH (%esi)
405 PUSH (%edi)
406
407/* Aligning the address of destination. We need to save
408 16 bits from the source in order not to overwrite them. */
409 movdqu -16(%eax, %ecx), %xmm0
410 movdqu -32(%eax, %ecx), %xmm1
411 movdqu -48(%eax, %ecx), %xmm2
412 movdqu -64(%eax, %ecx), %xmm3
413
414 leal (%edx, %ecx), %edi
415 andl $-64, %edi
416
417 movl %eax, %esi
418 subl %edx, %esi
419
420 movdqu -16(%edi, %esi), %xmm4
421 movdqu -32(%edi, %esi), %xmm5
422 movdqu -48(%edi, %esi), %xmm6
423 movdqu -64(%edi, %esi), %xmm7
424
425 movdqu %xmm0, -16(%edx, %ecx)
426 movdqu %xmm1, -32(%edx, %ecx)
427 movdqu %xmm2, -48(%edx, %ecx)
428 movdqu %xmm3, -64(%edx, %ecx)
429 movdqa %xmm4, -16(%edi)
430 movdqa %xmm5, -32(%edi)
431 movdqa %xmm6, -48(%edi)
432 movdqa %xmm7, -64(%edi)
433 leal -64(%edi), %edi
434
435 leal 64(%edx), %ebx
436 andl $-64, %ebx
437
Varvara Rainchik5a922842014-04-24 15:41:20 +0400438 cmp %edi, %ebx
Varvara Rainchikfce86142014-05-27 12:41:55 +0400439 jae L(mm_main_loop_backward_end)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400440
Elliott Hughes4f3b7e12024-07-19 12:00:17 +0000441 PUSH(%ebx)
442 SETUP_PIC_REG(bx)
443 add $_GLOBAL_OFFSET_TABLE_, %ebx
444 cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
445 /* Restore ebx. We can place a pop before jump as it doesn't affect any flags. */
446 POP(%ebx)
447
Varvara Rainchikfce86142014-05-27 12:41:55 +0400448 jae L(mm_large_page_loop_backward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400449
450 .p2align 4
451L(mm_main_loop_backward):
452
453 prefetcht0 -128(%edi, %esi)
454
455 movdqu -64(%edi, %esi), %xmm0
456 movdqu -48(%edi, %esi), %xmm1
457 movdqu -32(%edi, %esi), %xmm2
458 movdqu -16(%edi, %esi), %xmm3
459 movdqa %xmm0, -64(%edi)
460 movdqa %xmm1, -48(%edi)
461 movdqa %xmm2, -32(%edi)
462 movdqa %xmm3, -16(%edi)
463 leal -64(%edi), %edi
464 cmp %edi, %ebx
465 jb L(mm_main_loop_backward)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400466L(mm_main_loop_backward_end):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400467 POP (%edi)
468 POP (%esi)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400469 jmp L(mm_recalc_len)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400470
471/* Copy [0..16] and return. */
472L(mm_len_0_16_bytes_backward):
473 testb $24, %cl
474 jnz L(mm_len_9_16_bytes_backward)
475 testb $4, %cl
476 .p2align 4,,5
477 jnz L(mm_len_5_8_bytes_backward)
478 testl %ecx, %ecx
479 .p2align 4,,2
480 je L(mm_return)
481 testb $2, %cl
482 .p2align 4,,1
483 jne L(mm_len_3_4_bytes_backward)
484 movzbl -1(%eax,%ecx), %ebx
485 movzbl (%eax), %eax
486 movb %bl, -1(%edx,%ecx)
487 movb %al, (%edx)
488 jmp L(mm_return)
489
490L(mm_len_3_4_bytes_backward):
491 movzwl -2(%eax,%ecx), %ebx
492 movzwl (%eax), %eax
493 movw %bx, -2(%edx,%ecx)
494 movw %ax, (%edx)
495 jmp L(mm_return)
496
497L(mm_len_9_16_bytes_backward):
498 PUSH (%esi)
499 movl -4(%eax,%ecx), %ebx
500 movl -8(%eax,%ecx), %esi
501 movl %ebx, -4(%edx,%ecx)
502 movl %esi, -8(%edx,%ecx)
503 subl $8, %ecx
504 POP (%esi)
505 jmp L(mm_len_0_16_bytes_backward)
506
507L(mm_len_5_8_bytes_backward):
508 movl (%eax), %ebx
509 movl -4(%eax,%ecx), %eax
510 movl %ebx, (%edx)
511 movl %eax, -4(%edx,%ecx)
512
513L(mm_return):
514 movl %edx, %eax
515 RETURN
516
517L(mm_return_pop_all):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400518 movl %edx, %eax
Varvara Rainchik5a922842014-04-24 15:41:20 +0400519 POP (%edi)
520 POP (%esi)
521 RETURN
522
523/* Big length copy forward part. */
524
Varvara Rainchik5a922842014-04-24 15:41:20 +0400525 .p2align 4
526L(mm_large_page_loop_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400527 movdqu (%eax, %edi), %xmm0
528 movdqu 16(%eax, %edi), %xmm1
529 movdqu 32(%eax, %edi), %xmm2
530 movdqu 48(%eax, %edi), %xmm3
531 movntdq %xmm0, (%edi)
532 movntdq %xmm1, 16(%edi)
533 movntdq %xmm2, 32(%edi)
534 movntdq %xmm3, 48(%edi)
535 leal 64(%edi), %edi
536 cmp %edi, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400537 ja L(mm_large_page_loop_forward)
538 sfence
Varvara Rainchikfce86142014-05-27 12:41:55 +0400539 jmp L(mm_copy_remaining_forward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400540
541/* Big length copy backward part. */
Varvara Rainchik5a922842014-04-24 15:41:20 +0400542 .p2align 4
543L(mm_large_page_loop_backward):
544 movdqu -64(%edi, %esi), %xmm0
545 movdqu -48(%edi, %esi), %xmm1
546 movdqu -32(%edi, %esi), %xmm2
547 movdqu -16(%edi, %esi), %xmm3
548 movntdq %xmm0, -64(%edi)
549 movntdq %xmm1, -48(%edi)
550 movntdq %xmm2, -32(%edi)
551 movntdq %xmm3, -16(%edi)
552 leal -64(%edi), %edi
553 cmp %edi, %ebx
554 jb L(mm_large_page_loop_backward)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400555 sfence
Varvara Rainchik5a922842014-04-24 15:41:20 +0400556 POP (%edi)
557 POP (%esi)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400558 jmp L(mm_recalc_len)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400559
560END (MEMMOVE)
George Burgess IV0193c3d2024-09-18 11:00:33 -0600561
562// N.B., `private/bionic_asm.h` provides ALIAS_SYMBOL, but that file provides
563// conflicting definitions for some macros in this file. Since ALIAS_SYMBOL is
564// small, inline it here.
565.globl memcpy;
566.equ memcpy, MEMMOVE