blob: 145ae6317584289a0d95e8eee18e72855a5c2d9a [file] [log] [blame]
Jake Weinstein372f19e2016-11-17 16:01:25 -05001/* Copyright (c) 2012-2013, Linaro Limited
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +01002 All rights reserved.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 * Neither the name of the Linaro nor the
12 names of its contributors may be used to endorse or promote products
13 derived from this software without specific prior written permission.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
Jake Weinstein372f19e2016-11-17 16:01:25 -050025 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
26
27/*
28 * Copyright (c) 2015 ARM Ltd
29 * All rights reserved.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 3. The name of the company may not be used to endorse or promote
40 * products derived from this software without specific prior written
41 * permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
44 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
45 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
46 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
48 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
49 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
50 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
51 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
52 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 */
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010054
55/* Assumptions:
56 *
Jake Weinstein372f19e2016-11-17 16:01:25 -050057 * ARMv8-a, AArch64, unaligned accesses
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010058 *
59 */
60
61#include <private/bionic_asm.h>
62
63/* By default we assume that the DC instruction can be used to zero
64 data blocks more efficiently. In some circumstances this might be
65 unsafe, for example in an asymmetric multiprocessor environment with
66 different DC clear lengths (neither the upper nor lower lengths are
Bernhard Rosenkraenzer62d92e12014-05-19 13:16:41 +020067 safe to use).
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010068
69 If code may be run in a virtualized environment, then define
70 MAYBE_VIRT. This will cause the code to cache the system register
71 values rather than re-reading them each call. */
72
73#define dstin x0
Jake Weinstein372f19e2016-11-17 16:01:25 -050074#define val x1
75#define valw w1
Elliott Hughes78460932016-03-02 11:58:41 -080076#define count x2
Jake Weinstein372f19e2016-11-17 16:01:25 -050077#define dst x3
78#define dstend x4
79#define tmp1 x5
80#define tmp1w w5
81#define tmp2 x6
82#define tmp2w w6
Kevin Brodskyf19eeb82017-05-16 11:29:49 +010083#define zva_len x7
Jake Weinstein372f19e2016-11-17 16:01:25 -050084#define zva_lenw w7
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010085
Jake Weinstein372f19e2016-11-17 16:01:25 -050086#define L(l) .L ## l
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010087
88ENTRY(memset)
89
Jake Weinstein372f19e2016-11-17 16:01:25 -050090 dup v0.16B, valw
91 add dstend, dstin, count
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010092
Jake Weinstein372f19e2016-11-17 16:01:25 -050093 cmp count, 96
94 b.hi L(set_long)
95 cmp count, 16
96 b.hs L(set_medium)
97 mov val, v0.D[0]
98
99 /* Set 0..15 bytes. */
100 tbz count, 3, 1f
101 str val, [dstin]
102 str val, [dstend, -8]
103 ret
104 nop
1051: tbz count, 2, 2f
106 str valw, [dstin]
107 str valw, [dstend, -4]
108 ret
1092: cbz count, 3f
110 strb valw, [dstin]
111 tbz count, 1, 3f
112 strh valw, [dstend, -2]
1133: ret
114
115 /* Set 17..96 bytes. */
116L(set_medium):
117 str q0, [dstin]
118 tbnz count, 6, L(set96)
119 str q0, [dstend, -16]
120 tbz count, 5, 1f
121 str q0, [dstin, 16]
122 str q0, [dstend, -32]
1231: ret
124
125 .p2align 4
126 /* Set 64..96 bytes. Write 64 bytes from the start and
127 32 bytes from the end. */
128L(set96):
129 str q0, [dstin, 16]
130 stp q0, q0, [dstin, 32]
131 stp q0, q0, [dstend, -32]
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100132 ret
133
Jake Weinstein372f19e2016-11-17 16:01:25 -0500134 .p2align 3
135 nop
136L(set_long):
137 and valw, valw, 255
138 bic dst, dstin, 15
139 str q0, [dstin]
140 cmp count, 256
141 ccmp valw, 0, 0, cs
142 b.eq L(try_zva)
143L(no_zva):
144 sub count, dstend, dst /* Count is 16 too large. */
145 add dst, dst, 16
146 sub count, count, 64 + 16 /* Adjust count and bias for loop. */
1471: stp q0, q0, [dst], 64
148 stp q0, q0, [dst, -32]
149L(tail64):
150 subs count, count, 64
151 b.hi 1b
1522: stp q0, q0, [dstend, -64]
153 stp q0, q0, [dstend, -32]
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100154 ret
155
Jake Weinstein372f19e2016-11-17 16:01:25 -0500156 .p2align 3
157L(try_zva):
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100158 mrs tmp1, dczid_el0
Jake Weinstein372f19e2016-11-17 16:01:25 -0500159 tbnz tmp1w, 4, L(no_zva)
160 and tmp1w, tmp1w, 15
161 cmp tmp1w, 4 /* ZVA size is 64 bytes. */
162 b.ne L(zva_128)
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100163
Jake Weinstein372f19e2016-11-17 16:01:25 -0500164 /* Write the first and last 64 byte aligned block using stp rather
165 than using DC ZVA. This is faster on some cores.
166 */
167L(zva_64):
168 str q0, [dst, 16]
169 stp q0, q0, [dst, 32]
170 bic dst, dst, 63
171 stp q0, q0, [dst, 64]
172 stp q0, q0, [dst, 96]
173 sub count, dstend, dst /* Count is now 128 too large. */
174 sub count, count, 128+64+64 /* Adjust count and bias for loop. */
175 add dst, dst, 128
176 nop
1771: dc zva, dst
178 add dst, dst, 64
179 subs count, count, 64
180 b.hi 1b
181 stp q0, q0, [dst, 0]
182 stp q0, q0, [dst, 32]
183 stp q0, q0, [dstend, -64]
184 stp q0, q0, [dstend, -32]
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100185 ret
Jake Weinstein372f19e2016-11-17 16:01:25 -0500186
187 .p2align 3
188L(zva_128):
189 cmp tmp1w, 5 /* ZVA size is 128 bytes. */
190 b.ne L(zva_other)
191
192 str q0, [dst, 16]
193 stp q0, q0, [dst, 32]
194 stp q0, q0, [dst, 64]
195 stp q0, q0, [dst, 96]
196 bic dst, dst, 127
197 sub count, dstend, dst /* Count is now 128 too large. */
198 sub count, count, 128+128 /* Adjust count and bias for loop. */
199 add dst, dst, 128
2001: dc zva, dst
201 add dst, dst, 128
202 subs count, count, 128
203 b.hi 1b
204 stp q0, q0, [dstend, -128]
205 stp q0, q0, [dstend, -96]
206 stp q0, q0, [dstend, -64]
207 stp q0, q0, [dstend, -32]
208 ret
209
210L(zva_other):
211 mov tmp2w, 4
212 lsl zva_lenw, tmp2w, tmp1w
213 add tmp1, zva_len, 64 /* Max alignment bytes written. */
214 cmp count, tmp1
215 blo L(no_zva)
216
217 sub tmp2, zva_len, 1
218 add tmp1, dst, zva_len
219 add dst, dst, 16
220 subs count, tmp1, dst /* Actual alignment bytes to write. */
221 bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */
222 beq 2f
2231: stp q0, q0, [dst], 64
224 stp q0, q0, [dst, -32]
225 subs count, count, 64
226 b.hi 1b
2272: mov dst, tmp1
228 sub count, dstend, tmp1 /* Remaining bytes to write. */
229 subs count, count, zva_len
230 b.lo 4f
2313: dc zva, dst
232 add dst, dst, zva_len
233 subs count, count, zva_len
234 b.hs 3b
2354: add count, count, zva_len
236 b L(tail64)
237
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100238END(memset)
Tamas Petzf5bdee72020-08-31 15:09:40 +0200239
240NOTE_GNU_PROPERTY()