blob: 7e96f7d93d462bc5e6bee09ffdd35b552ff9def5 [file] [log] [blame]
Jake Weinstein04d99df2016-08-25 20:03:25 -04001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
24 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
25 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
27 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <private/bionic_asm.h>
32
33#define PLDOFFS (16)
34#define PLDSIZE (128) /* L2 cache line size */
35
36 .code 32
37ENTRY(__memcpy_chk)
38 cmp r2, r3
39 bls memcpy
40
41 // Preserve lr for backtrace.
42 push {lr}
43 .cfi_def_cfa_offset 4
44 .cfi_rel_offset lr, 0
45
46 bl __memcpy_chk_fail
47END(__memcpy_chk)
48
49ENTRY(memcpy)
50 push {r0}
51 .cfi_def_cfa_offset 4
52 .cfi_rel_offset r0, 0
53 cmp r2, #4
54 blt .Lneon_lt4
55 cmp r2, #16
56 blt .Lneon_lt16
57 cmp r2, #32
58 blt .Lneon_16
59 cmp r2, #128
60 blt .Lneon_copy_32_a
61 /* Copy blocks of 128-bytes (word-aligned) at a time*/
62 /* Code below is optimized for PLDSIZE=128 only */
63 mov r12, r2, lsr #7
64 cmp r12, #PLDOFFS
65 ble .Lneon_copy_128_loop_nopld
66 sub r12, #PLDOFFS
67 pld [r1, #(PLDOFFS-1)*PLDSIZE]
68.Lneon_copy_128_loop_outer:
69 pld [r1, #(PLDOFFS*PLDSIZE)]
70 pld [r1, #(PLDOFFS)*(PLDSIZE)+64]
71 vld1.32 {q0, q1}, [r1]!
72 vld1.32 {q2, q3}, [r1]!
73 vld1.32 {q8, q9}, [r1]!
74 vld1.32 {q10, q11}, [r1]!
75 subs r12, r12, #1
76 vst1.32 {q0, q1}, [r0]!
77 vst1.32 {q2, q3}, [r0]!
78 vst1.32 {q8, q9}, [r0]!
79 vst1.32 {q10, q11}, [r0]!
80 bne .Lneon_copy_128_loop_outer
81 mov r12, #PLDOFFS
82.Lneon_copy_128_loop_nopld:
83 vld1.32 {q0, q1}, [r1]!
84 vld1.32 {q2, q3}, [r1]!
85 vld1.32 {q8, q9}, [r1]!
86 vld1.32 {q10, q11}, [r1]!
87 subs r12, r12, #1
88 vst1.32 {q0, q1}, [r0]!
89 vst1.32 {q2, q3}, [r0]!
90 vst1.32 {q8, q9}, [r0]!
91 vst1.32 {q10, q11}, [r0]!
92 bne .Lneon_copy_128_loop_nopld
93 ands r2, r2, #0x7f
94 beq .Lneon_exit
95 cmp r2, #32
96 blt .Lneon_16
97 nop
98 /* Copy blocks of 32-bytes (word aligned) at a time*/
99.Lneon_copy_32_a:
100 mov r12, r2, lsr #5
101.Lneon_copy_32_loop_a:
102 vld1.32 {q0,q1}, [r1]!
103 subs r12, r12, #1
104 vst1.32 {q0,q1}, [r0]!
105 bne .Lneon_copy_32_loop_a
106 ands r2, r2, #0x1f
107 beq .Lneon_exit
108.Lneon_16:
109 subs r2, r2, #16
110 blt .Lneon_lt16
111 vld1.32 {q8}, [r1]!
112 vst1.32 {q8}, [r0]!
113 beq .Lneon_exit
114.Lneon_lt16:
115 movs r12, r2, lsl #29
116 bcc .Lneon_skip8
117 ldr r3, [r1], #4
118 ldr r12, [r1], #4
119 str r3, [r0], #4
120 str r12, [r0], #4
121.Lneon_skip8:
122 bpl .Lneon_lt4
123 ldr r3, [r1], #4
124 str r3, [r0], #4
125.Lneon_lt4:
126 movs r2, r2, lsl #31
127 bcc .Lneon_lt2
128 ldrh r3, [r1], #2
129 strh r3, [r0], #2
130.Lneon_lt2:
131 bpl .Lneon_exit
132 ldrb r12, [r1]
133 strb r12, [r0]
134.Lneon_exit:
135 pop {r0}
136 bx lr
137
138END(memcpy)