blob: 6fac194874b5adc2263bcd5a2d25ddee6194d455 [file] [log] [blame]
Shu Zhang5b5d6e72014-03-12 11:18:41 +08001/*
2 * Copyright (C) 2013 The Android Open Source Project
3 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
23 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <machine/cpu-features.h>
31#include <private/bionic_asm.h>
Shu Zhang5b5d6e72014-03-12 11:18:41 +080032
33 /*
34 * Optimized memset() for ARM.
35 *
36 * memset() returns its first argument.
37 */
38
Bernhard Rosenkränzerce46f552014-11-30 22:17:30 +010039 .cpu cortex-a15
Shu Zhang5b5d6e72014-03-12 11:18:41 +080040 .fpu neon
41 .syntax unified
42
43ENTRY(__memset_chk)
44 cmp r2, r3
45 bls .L_done
46
47 // Preserve lr for backtrace.
48 push {lr}
49 .cfi_def_cfa_offset 4
50 .cfi_rel_offset lr, 0
51
52
53 ldr r0, error_message
Shu Zhang5b5d6e72014-03-12 11:18:41 +0800541:
55 add r0, pc
Elliott Hughesb83d6742016-02-25 20:33:47 -080056 bl __fortify_fatal
Shu Zhang5b5d6e72014-03-12 11:18:41 +080057error_message:
58 .word error_string-(1b+8)
59END(__memset_chk)
60
61ENTRY(bzero)
62 mov r2, r1
63 mov r1, #0
64.L_done:
65 // Fall through to memset...
66END(bzero)
67
68ENTRY(memset)
69 pldw [r0]
70 mov r3, r0
71
72 // Duplicate the low byte of r1
73 mov r1, r1, lsl #24
74 orr r1, r1, r1, lsr #8
75 orr r1, r1, r1, lsr #16
76
77 cmp r2, #16
78 blo .L_less_than_16
79
80 // This section handles regions 16 bytes or larger
81 //
82 // Use aligned vst1.8 and vstm when possible. Register values will be:
83 // ip is scratch
84 // q0, q1, and r1 contain the memset value
85 // r2 is the number of bytes to set
86 // r3 is the advancing destination pointer
87 vdup.32 q0, r1
88
89 ands ip, r3, 0xF
90 beq .L_memset_aligned
91
92 // Align dest pointer to 16-byte boundary.
93 pldw [r0, #64]
94 rsb ip, ip, #16
95
96 // Pre-adjust the byte count to reflect post-aligment value. Expecting
97 // 8-byte alignment to be rather common so we special case that one.
98 sub r2, r2, ip
99
100 /* set 1 byte */
101 tst ip, #1
102 it ne
103 strbne r1, [r3], #1
104 /* set 2 bytes */
105 tst ip, #2
106 it ne
107 strhne r1, [r3], #2
108 /* set 4 bytes */
109 movs ip, ip, lsl #29
110 it mi
111 strmi r1, [r3], #4
112 /* set 8 bytes */
113 itt cs
114 strcs r1, [r3], #4
115 strcs r1, [r3], #4
116
117.L_memset_aligned:
118 // Destination is now 16-byte aligned. Determine how to handle
119 // remaining bytes.
120 vmov q1, q0
121 cmp r2, #128
122 blo .L_less_than_128
123
124 // We need to set a larger block of memory. Use four Q regs to
125 // set a full cache line in one instruction. Pre-decrement
126 // r2 to simplify end-of-loop detection
127 vmov q2, q0
128 vmov q3, q0
129 pldw [r0, #128]
130 sub r2, r2, #128
131 .align 4
132.L_memset_loop_128:
133 pldw [r3, #192]
134 vstm r3!, {q0, q1, q2, q3}
135 vstm r3!, {q0, q1, q2, q3}
136 subs r2, r2, #128
137 bhs .L_memset_loop_128
138
139 // Un-bias r2 so it contains the number of bytes left. Early
140 // exit if we are done.
141 adds r2, r2, #128
142 beq 2f
143
144 .align 4
145.L_less_than_128:
146 // set 64 bytes
147 movs ip, r2, lsl #26
148 bcc 1f
149 vst1.8 {q0, q1}, [r3, :128]!
150 vst1.8 {q0, q1}, [r3, :128]!
151 beq 2f
1521:
153 // set 32 bytes
154 bpl 1f
155 vst1.8 {q0, q1}, [r3, :128]!
1561:
157 // set 16 bytes
158 movs ip, r2, lsl #28
159 bcc 1f
160 vst1.8 {q0}, [r3, :128]!
161 beq 2f
1621:
163 // set 8 bytes
164 bpl 1f
165 vst1.8 {d0}, [r3, :64]!
1661:
167 // set 4 bytes
168 tst r2, #4
169 it ne
170 strne r1, [r3], #4
1711:
172 // set 2 bytes
173 movs ip, r2, lsl #31
174 it cs
175 strhcs r1, [r3], #2
176 // set 1 byte
177 it mi
178 strbmi r1, [r3]
1792:
180 bx lr
181
182.L_less_than_16:
183 // Store up to 15 bytes without worrying about byte alignment
184 movs ip, r2, lsl #29
185 bcc 1f
186 str r1, [r3], #4
187 str r1, [r3], #4
188 beq 2f
1891:
190 it mi
191 strmi r1, [r3], #4
192 movs ip, r2, lsl #31
193 it mi
194 strbmi r1, [r3], #1
195 itt cs
196 strbcs r1, [r3], #1
197 strbcs r1, [r3]
1982:
199 bx lr
200END(memset)
201
202 .data
203error_string:
204 .string "memset: prevented write past end of buffer"