blob: 2ed4e7b65df6598421dd7379ce334dc7a2dddc03 [file] [log] [blame]
Varvara Rainchik5a922842014-04-24 15:41:20 +04001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Elliott Hughesed777142022-07-25 16:25:11 +000031#define FOR_SILVERMONT
Varvara Rainchik5a922842014-04-24 15:41:20 +040032
33#ifndef MEMMOVE
George Burgess IV0193c3d2024-09-18 11:00:33 -060034# define MEMMOVE memmove
Varvara Rainchik5a922842014-04-24 15:41:20 +040035#endif
36
37#ifndef L
38# define L(label) .L##label
39#endif
40
41#ifndef cfi_startproc
42# define cfi_startproc .cfi_startproc
43#endif
44
45#ifndef cfi_endproc
46# define cfi_endproc .cfi_endproc
47#endif
48
49#ifndef cfi_rel_offset
50# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
51#endif
52
53#ifndef cfi_restore
54# define cfi_restore(reg) .cfi_restore reg
55#endif
56
57#ifndef cfi_adjust_cfa_offset
58# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
59#endif
60
61#ifndef ENTRY
62# define ENTRY(name) \
63 .type name, @function; \
64 .globl name; \
65 .p2align 4; \
66name: \
67 cfi_startproc
68#endif
69
70#ifndef END
71# define END(name) \
72 cfi_endproc; \
73 .size name, .-name
74#endif
75
Elliott Hughesbed110a2016-03-03 10:41:42 -080076#define DEST PARMS
77#define SRC DEST+4
78#define LEN SRC+4
Varvara Rainchik5a922842014-04-24 15:41:20 +040079
80#define CFI_PUSH(REG) \
81 cfi_adjust_cfa_offset (4); \
82 cfi_rel_offset (REG, 0)
83
84#define CFI_POP(REG) \
85 cfi_adjust_cfa_offset (-4); \
86 cfi_restore (REG)
87
88#define PUSH(REG) pushl REG; CFI_PUSH (REG)
89#define POP(REG) popl REG; CFI_POP (REG)
90
91#define PARMS 8 /* Preserve EBX. */
92#define ENTRANCE PUSH (%ebx);
93#define RETURN_END POP (%ebx); ret
94#define RETURN RETURN_END; CFI_PUSH (%ebx)
95
Elliott Hughes4f3b7e12024-07-19 12:00:17 +000096#define SETUP_PIC_REG(x) call __x86.get_pc_thunk.x
97
Varvara Rainchik5a922842014-04-24 15:41:20 +040098 .section .text.sse2,"ax",@progbits
99ENTRY (MEMMOVE)
100 ENTRANCE
101 movl LEN(%esp), %ecx
102 movl SRC(%esp), %eax
103 movl DEST(%esp), %edx
104
105/* Check whether we should copy backward or forward. */
106 cmp %eax, %edx
107 je L(mm_return)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400108 jg L(mm_len_0_or_more_backward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400109
110/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128]
111 separately. */
112 cmp $16, %ecx
113 jbe L(mm_len_0_16_bytes_forward)
114
Varvara Rainchikfce86142014-05-27 12:41:55 +0400115 cmpl $32, %ecx
116 ja L(mm_len_32_or_more_forward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400117
118/* Copy [0..32] and return. */
119 movdqu (%eax), %xmm0
120 movdqu -16(%eax, %ecx), %xmm1
121 movdqu %xmm0, (%edx)
122 movdqu %xmm1, -16(%edx, %ecx)
123 jmp L(mm_return)
124
125L(mm_len_32_or_more_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400126 cmpl $64, %ecx
127 ja L(mm_len_64_or_more_forward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400128
129/* Copy [0..64] and return. */
130 movdqu (%eax), %xmm0
131 movdqu 16(%eax), %xmm1
132 movdqu -16(%eax, %ecx), %xmm2
133 movdqu -32(%eax, %ecx), %xmm3
134 movdqu %xmm0, (%edx)
135 movdqu %xmm1, 16(%edx)
136 movdqu %xmm2, -16(%edx, %ecx)
137 movdqu %xmm3, -32(%edx, %ecx)
138 jmp L(mm_return)
139
140L(mm_len_64_or_more_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400141 cmpl $128, %ecx
142 ja L(mm_len_128_or_more_forward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400143
144/* Copy [0..128] and return. */
145 movdqu (%eax), %xmm0
146 movdqu 16(%eax), %xmm1
147 movdqu 32(%eax), %xmm2
148 movdqu 48(%eax), %xmm3
149 movdqu -64(%eax, %ecx), %xmm4
150 movdqu -48(%eax, %ecx), %xmm5
151 movdqu -32(%eax, %ecx), %xmm6
152 movdqu -16(%eax, %ecx), %xmm7
153 movdqu %xmm0, (%edx)
154 movdqu %xmm1, 16(%edx)
155 movdqu %xmm2, 32(%edx)
156 movdqu %xmm3, 48(%edx)
157 movdqu %xmm4, -64(%edx, %ecx)
158 movdqu %xmm5, -48(%edx, %ecx)
159 movdqu %xmm6, -32(%edx, %ecx)
160 movdqu %xmm7, -16(%edx, %ecx)
161 jmp L(mm_return)
162
163L(mm_len_128_or_more_forward):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400164 PUSH (%esi)
165 PUSH (%edi)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400166
167/* Aligning the address of destination. */
Varvara Rainchikfce86142014-05-27 12:41:55 +0400168 movdqu (%eax), %xmm0
169 movdqu 16(%eax), %xmm1
170 movdqu 32(%eax), %xmm2
171 movdqu 48(%eax), %xmm3
Varvara Rainchik5a922842014-04-24 15:41:20 +0400172
Varvara Rainchikfce86142014-05-27 12:41:55 +0400173 leal 64(%edx), %edi
174 andl $-64, %edi
175 subl %edx, %eax
Varvara Rainchik5a922842014-04-24 15:41:20 +0400176
Varvara Rainchikfce86142014-05-27 12:41:55 +0400177 movdqu (%eax, %edi), %xmm4
178 movdqu 16(%eax, %edi), %xmm5
179 movdqu 32(%eax, %edi), %xmm6
180 movdqu 48(%eax, %edi), %xmm7
Varvara Rainchik5a922842014-04-24 15:41:20 +0400181
Varvara Rainchikfce86142014-05-27 12:41:55 +0400182 movdqu %xmm0, (%edx)
183 movdqu %xmm1, 16(%edx)
184 movdqu %xmm2, 32(%edx)
185 movdqu %xmm3, 48(%edx)
186 movdqa %xmm4, (%edi)
187 movaps %xmm5, 16(%edi)
188 movaps %xmm6, 32(%edi)
189 movaps %xmm7, 48(%edi)
190 addl $64, %edi
Varvara Rainchik5a922842014-04-24 15:41:20 +0400191
Varvara Rainchikfce86142014-05-27 12:41:55 +0400192 leal (%edx, %ecx), %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400193 andl $-64, %ebx
Varvara Rainchikfce86142014-05-27 12:41:55 +0400194 cmp %edi, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400195 jbe L(mm_copy_remaining_forward)
196
Elliott Hughes4f3b7e12024-07-19 12:00:17 +0000197 PUSH(%ebx)
198 SETUP_PIC_REG(bx)
199 add $_GLOBAL_OFFSET_TABLE_, %ebx
200 cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
201 /* Restore ebx. We can place a pop before jump as it doesn't affect any flags. */
202 POP(%ebx)
203
Varvara Rainchikfce86142014-05-27 12:41:55 +0400204 jae L(mm_large_page_loop_forward)
205
Varvara Rainchik5a922842014-04-24 15:41:20 +0400206 .p2align 4
207L(mm_main_loop_forward):
208
Varvara Rainchikfce86142014-05-27 12:41:55 +0400209 prefetcht0 128(%eax, %edi)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400210
Varvara Rainchikfce86142014-05-27 12:41:55 +0400211 movdqu (%eax, %edi), %xmm0
212 movdqu 16(%eax, %edi), %xmm1
213 movdqu 32(%eax, %edi), %xmm2
214 movdqu 48(%eax, %edi), %xmm3
215 movdqa %xmm0, (%edi)
216 movaps %xmm1, 16(%edi)
217 movaps %xmm2, 32(%edi)
218 movaps %xmm3, 48(%edi)
219 leal 64(%edi), %edi
220 cmp %edi, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400221 ja L(mm_main_loop_forward)
222
223L(mm_copy_remaining_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400224 addl %edx, %ecx
225 subl %edi, %ecx
226/* We copied all up till %edi position in the dst.
Varvara Rainchik5a922842014-04-24 15:41:20 +0400227 In %ecx now is how many bytes are left to copy.
228 Now we need to advance %esi. */
Varvara Rainchikfce86142014-05-27 12:41:55 +0400229 leal (%edi, %eax), %esi
Varvara Rainchik5a922842014-04-24 15:41:20 +0400230
231L(mm_remaining_0_64_bytes_forward):
232 cmp $32, %ecx
233 ja L(mm_remaining_33_64_bytes_forward)
234 cmp $16, %ecx
235 ja L(mm_remaining_17_32_bytes_forward)
236 testl %ecx, %ecx
237 .p2align 4,,2
238 je L(mm_return_pop_all)
239
240 cmpb $8, %cl
241 ja L(mm_remaining_9_16_bytes_forward)
242 cmpb $4, %cl
243 .p2align 4,,5
244 ja L(mm_remaining_5_8_bytes_forward)
245 cmpb $2, %cl
246 .p2align 4,,1
247 ja L(mm_remaining_3_4_bytes_forward)
248 movzbl -1(%esi,%ecx), %eax
249 movzbl (%esi), %ebx
Varvara Rainchikfce86142014-05-27 12:41:55 +0400250 movb %al, -1(%edi,%ecx)
251 movb %bl, (%edi)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400252 jmp L(mm_return_pop_all)
253
254L(mm_remaining_33_64_bytes_forward):
255 movdqu (%esi), %xmm0
256 movdqu 16(%esi), %xmm1
257 movdqu -32(%esi, %ecx), %xmm2
258 movdqu -16(%esi, %ecx), %xmm3
Varvara Rainchikfce86142014-05-27 12:41:55 +0400259 movdqu %xmm0, (%edi)
260 movdqu %xmm1, 16(%edi)
261 movdqu %xmm2, -32(%edi, %ecx)
262 movdqu %xmm3, -16(%edi, %ecx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400263 jmp L(mm_return_pop_all)
264
265L(mm_remaining_17_32_bytes_forward):
266 movdqu (%esi), %xmm0
267 movdqu -16(%esi, %ecx), %xmm1
Varvara Rainchikfce86142014-05-27 12:41:55 +0400268 movdqu %xmm0, (%edi)
269 movdqu %xmm1, -16(%edi, %ecx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400270 jmp L(mm_return_pop_all)
271
272L(mm_remaining_9_16_bytes_forward):
273 movq (%esi), %xmm0
274 movq -8(%esi, %ecx), %xmm1
Varvara Rainchikfce86142014-05-27 12:41:55 +0400275 movq %xmm0, (%edi)
276 movq %xmm1, -8(%edi, %ecx)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400277 jmp L(mm_return_pop_all)
278
Varvara Rainchikfce86142014-05-27 12:41:55 +0400279L(mm_remaining_5_8_bytes_forward):
280 movl (%esi), %eax
281 movl -4(%esi,%ecx), %ebx
282 movl %eax, (%edi)
283 movl %ebx, -4(%edi,%ecx)
284 jmp L(mm_return_pop_all)
285
286L(mm_remaining_3_4_bytes_forward):
287 movzwl -2(%esi,%ecx), %eax
288 movzwl (%esi), %ebx
289 movw %ax, -2(%edi,%ecx)
290 movw %bx, (%edi)
291 jmp L(mm_return_pop_all)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400292
293L(mm_len_0_16_bytes_forward):
294 testb $24, %cl
295 jne L(mm_len_9_16_bytes_forward)
296 testb $4, %cl
297 .p2align 4,,5
298 jne L(mm_len_5_8_bytes_forward)
299 testl %ecx, %ecx
300 .p2align 4,,2
301 je L(mm_return)
302 testb $2, %cl
303 .p2align 4,,1
304 jne L(mm_len_2_4_bytes_forward)
305 movzbl -1(%eax,%ecx), %ebx
306 movzbl (%eax), %eax
307 movb %bl, -1(%edx,%ecx)
308 movb %al, (%edx)
309 jmp L(mm_return)
310
311L(mm_len_2_4_bytes_forward):
312 movzwl -2(%eax,%ecx), %ebx
313 movzwl (%eax), %eax
314 movw %bx, -2(%edx,%ecx)
315 movw %ax, (%edx)
316 jmp L(mm_return)
317
318L(mm_len_5_8_bytes_forward):
319 movl (%eax), %ebx
320 movl -4(%eax,%ecx), %eax
321 movl %ebx, (%edx)
322 movl %eax, -4(%edx,%ecx)
323 jmp L(mm_return)
324
325L(mm_len_9_16_bytes_forward):
326 movq (%eax), %xmm0
327 movq -8(%eax, %ecx), %xmm1
328 movq %xmm0, (%edx)
329 movq %xmm1, -8(%edx, %ecx)
330 jmp L(mm_return)
331
Christopher Ferris97b6e132016-02-17 19:17:02 -0800332 CFI_POP (%edi)
333 CFI_POP (%esi)
334
Varvara Rainchikfce86142014-05-27 12:41:55 +0400335L(mm_recalc_len):
336/* Compute in %ecx how many bytes are left to copy after
337 the main loop stops. */
338 movl %ebx, %ecx
339 subl %edx, %ecx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400340/* The code for copying backwards. */
341L(mm_len_0_or_more_backward):
342
Varvara Rainchikfce86142014-05-27 12:41:55 +0400343/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128]
Varvara Rainchik5a922842014-04-24 15:41:20 +0400344 separately. */
345 cmp $16, %ecx
346 jbe L(mm_len_0_16_bytes_backward)
347
Varvara Rainchikfce86142014-05-27 12:41:55 +0400348 cmpl $32, %ecx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400349 jg L(mm_len_32_or_more_backward)
350
351/* Copy [0..32] and return. */
352 movdqu (%eax), %xmm0
353 movdqu -16(%eax, %ecx), %xmm1
354 movdqu %xmm0, (%edx)
355 movdqu %xmm1, -16(%edx, %ecx)
356 jmp L(mm_return)
357
358L(mm_len_32_or_more_backward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400359 cmpl $64, %ecx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400360 jg L(mm_len_64_or_more_backward)
361
362/* Copy [0..64] and return. */
363 movdqu (%eax), %xmm0
364 movdqu 16(%eax), %xmm1
365 movdqu -16(%eax, %ecx), %xmm2
366 movdqu -32(%eax, %ecx), %xmm3
367 movdqu %xmm0, (%edx)
368 movdqu %xmm1, 16(%edx)
369 movdqu %xmm2, -16(%edx, %ecx)
370 movdqu %xmm3, -32(%edx, %ecx)
371 jmp L(mm_return)
372
373L(mm_len_64_or_more_backward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400374 cmpl $128, %ecx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400375 jg L(mm_len_128_or_more_backward)
376
377/* Copy [0..128] and return. */
378 movdqu (%eax), %xmm0
379 movdqu 16(%eax), %xmm1
380 movdqu 32(%eax), %xmm2
381 movdqu 48(%eax), %xmm3
382 movdqu -64(%eax, %ecx), %xmm4
383 movdqu -48(%eax, %ecx), %xmm5
384 movdqu -32(%eax, %ecx), %xmm6
385 movdqu -16(%eax, %ecx), %xmm7
386 movdqu %xmm0, (%edx)
387 movdqu %xmm1, 16(%edx)
388 movdqu %xmm2, 32(%edx)
389 movdqu %xmm3, 48(%edx)
390 movdqu %xmm4, -64(%edx, %ecx)
391 movdqu %xmm5, -48(%edx, %ecx)
392 movdqu %xmm6, -32(%edx, %ecx)
393 movdqu %xmm7, -16(%edx, %ecx)
394 jmp L(mm_return)
395
396L(mm_len_128_or_more_backward):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400397 PUSH (%esi)
398 PUSH (%edi)
399
400/* Aligning the address of destination. We need to save
401 16 bits from the source in order not to overwrite them. */
402 movdqu -16(%eax, %ecx), %xmm0
403 movdqu -32(%eax, %ecx), %xmm1
404 movdqu -48(%eax, %ecx), %xmm2
405 movdqu -64(%eax, %ecx), %xmm3
406
407 leal (%edx, %ecx), %edi
408 andl $-64, %edi
409
410 movl %eax, %esi
411 subl %edx, %esi
412
413 movdqu -16(%edi, %esi), %xmm4
414 movdqu -32(%edi, %esi), %xmm5
415 movdqu -48(%edi, %esi), %xmm6
416 movdqu -64(%edi, %esi), %xmm7
417
418 movdqu %xmm0, -16(%edx, %ecx)
419 movdqu %xmm1, -32(%edx, %ecx)
420 movdqu %xmm2, -48(%edx, %ecx)
421 movdqu %xmm3, -64(%edx, %ecx)
422 movdqa %xmm4, -16(%edi)
423 movdqa %xmm5, -32(%edi)
424 movdqa %xmm6, -48(%edi)
425 movdqa %xmm7, -64(%edi)
426 leal -64(%edi), %edi
427
428 leal 64(%edx), %ebx
429 andl $-64, %ebx
430
Varvara Rainchik5a922842014-04-24 15:41:20 +0400431 cmp %edi, %ebx
Varvara Rainchikfce86142014-05-27 12:41:55 +0400432 jae L(mm_main_loop_backward_end)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400433
Elliott Hughes4f3b7e12024-07-19 12:00:17 +0000434 PUSH(%ebx)
435 SETUP_PIC_REG(bx)
436 add $_GLOBAL_OFFSET_TABLE_, %ebx
437 cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
438 /* Restore ebx. We can place a pop before jump as it doesn't affect any flags. */
439 POP(%ebx)
440
Varvara Rainchikfce86142014-05-27 12:41:55 +0400441 jae L(mm_large_page_loop_backward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400442
443 .p2align 4
444L(mm_main_loop_backward):
445
446 prefetcht0 -128(%edi, %esi)
447
448 movdqu -64(%edi, %esi), %xmm0
449 movdqu -48(%edi, %esi), %xmm1
450 movdqu -32(%edi, %esi), %xmm2
451 movdqu -16(%edi, %esi), %xmm3
452 movdqa %xmm0, -64(%edi)
453 movdqa %xmm1, -48(%edi)
454 movdqa %xmm2, -32(%edi)
455 movdqa %xmm3, -16(%edi)
456 leal -64(%edi), %edi
457 cmp %edi, %ebx
458 jb L(mm_main_loop_backward)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400459L(mm_main_loop_backward_end):
Varvara Rainchik5a922842014-04-24 15:41:20 +0400460 POP (%edi)
461 POP (%esi)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400462 jmp L(mm_recalc_len)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400463
464/* Copy [0..16] and return. */
465L(mm_len_0_16_bytes_backward):
466 testb $24, %cl
467 jnz L(mm_len_9_16_bytes_backward)
468 testb $4, %cl
469 .p2align 4,,5
470 jnz L(mm_len_5_8_bytes_backward)
471 testl %ecx, %ecx
472 .p2align 4,,2
473 je L(mm_return)
474 testb $2, %cl
475 .p2align 4,,1
476 jne L(mm_len_3_4_bytes_backward)
477 movzbl -1(%eax,%ecx), %ebx
478 movzbl (%eax), %eax
479 movb %bl, -1(%edx,%ecx)
480 movb %al, (%edx)
481 jmp L(mm_return)
482
483L(mm_len_3_4_bytes_backward):
484 movzwl -2(%eax,%ecx), %ebx
485 movzwl (%eax), %eax
486 movw %bx, -2(%edx,%ecx)
487 movw %ax, (%edx)
488 jmp L(mm_return)
489
490L(mm_len_9_16_bytes_backward):
491 PUSH (%esi)
492 movl -4(%eax,%ecx), %ebx
493 movl -8(%eax,%ecx), %esi
494 movl %ebx, -4(%edx,%ecx)
495 movl %esi, -8(%edx,%ecx)
496 subl $8, %ecx
497 POP (%esi)
498 jmp L(mm_len_0_16_bytes_backward)
499
500L(mm_len_5_8_bytes_backward):
501 movl (%eax), %ebx
502 movl -4(%eax,%ecx), %eax
503 movl %ebx, (%edx)
504 movl %eax, -4(%edx,%ecx)
505
506L(mm_return):
507 movl %edx, %eax
508 RETURN
509
510L(mm_return_pop_all):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400511 movl %edx, %eax
Varvara Rainchik5a922842014-04-24 15:41:20 +0400512 POP (%edi)
513 POP (%esi)
514 RETURN
515
516/* Big length copy forward part. */
517
Varvara Rainchik5a922842014-04-24 15:41:20 +0400518 .p2align 4
519L(mm_large_page_loop_forward):
Varvara Rainchikfce86142014-05-27 12:41:55 +0400520 movdqu (%eax, %edi), %xmm0
521 movdqu 16(%eax, %edi), %xmm1
522 movdqu 32(%eax, %edi), %xmm2
523 movdqu 48(%eax, %edi), %xmm3
524 movntdq %xmm0, (%edi)
525 movntdq %xmm1, 16(%edi)
526 movntdq %xmm2, 32(%edi)
527 movntdq %xmm3, 48(%edi)
528 leal 64(%edi), %edi
529 cmp %edi, %ebx
Varvara Rainchik5a922842014-04-24 15:41:20 +0400530 ja L(mm_large_page_loop_forward)
531 sfence
Varvara Rainchikfce86142014-05-27 12:41:55 +0400532 jmp L(mm_copy_remaining_forward)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400533
534/* Big length copy backward part. */
Varvara Rainchik5a922842014-04-24 15:41:20 +0400535 .p2align 4
536L(mm_large_page_loop_backward):
537 movdqu -64(%edi, %esi), %xmm0
538 movdqu -48(%edi, %esi), %xmm1
539 movdqu -32(%edi, %esi), %xmm2
540 movdqu -16(%edi, %esi), %xmm3
541 movntdq %xmm0, -64(%edi)
542 movntdq %xmm1, -48(%edi)
543 movntdq %xmm2, -32(%edi)
544 movntdq %xmm3, -16(%edi)
545 leal -64(%edi), %edi
546 cmp %edi, %ebx
547 jb L(mm_large_page_loop_backward)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400548 sfence
Varvara Rainchik5a922842014-04-24 15:41:20 +0400549 POP (%edi)
550 POP (%esi)
Varvara Rainchikfce86142014-05-27 12:41:55 +0400551 jmp L(mm_recalc_len)
Varvara Rainchik5a922842014-04-24 15:41:20 +0400552
553END (MEMMOVE)
George Burgess IV0193c3d2024-09-18 11:00:33 -0600554
555// N.B., `private/bionic_asm.h` provides ALIAS_SYMBOL, but that file provides
556// conflicting definitions for some macros in this file. Since ALIAS_SYMBOL is
557// small, inline it here.
558.globl memcpy;
559.equ memcpy, MEMMOVE