blob: 198ecf3ecd99e9cb29d146d1c3b4ad1fbf0f0ec7 [file] [log] [blame]
Shu Zhang5b5d6e72014-03-12 11:18:41 +08001/*
2 * Copyright (C) 2013 The Android Open Source Project
3 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
23 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
Shu Zhang5b5d6e72014-03-12 11:18:41 +080030#include <private/bionic_asm.h>
Shu Zhang5b5d6e72014-03-12 11:18:41 +080031
32 /*
33 * Optimized memset() for ARM.
34 *
35 * memset() returns its first argument.
36 */
37
Bernhard Rosenkränzerce46f552014-11-30 22:17:30 +010038 .cpu cortex-a15
Shu Zhang5b5d6e72014-03-12 11:18:41 +080039 .fpu neon
40 .syntax unified
41
Haibo Huangce4ff9c2018-10-24 10:47:34 -070042ENTRY(__memset_chk_denver)
Shu Zhang5b5d6e72014-03-12 11:18:41 +080043 cmp r2, r3
Elliott Hughes01d5b942016-03-02 17:18:18 -080044 bls memset
Shu Zhang5b5d6e72014-03-12 11:18:41 +080045
46 // Preserve lr for backtrace.
47 push {lr}
48 .cfi_def_cfa_offset 4
49 .cfi_rel_offset lr, 0
50
Elliott Hughes62e59642016-03-01 11:22:42 -080051 bl __memset_chk_fail
Haibo Huangce4ff9c2018-10-24 10:47:34 -070052END(__memset_chk_denver)
Shu Zhang5b5d6e72014-03-12 11:18:41 +080053
Haibo Huangce4ff9c2018-10-24 10:47:34 -070054ENTRY(memset_denver)
Shu Zhang5b5d6e72014-03-12 11:18:41 +080055 pldw [r0]
56 mov r3, r0
57
58 // Duplicate the low byte of r1
59 mov r1, r1, lsl #24
60 orr r1, r1, r1, lsr #8
61 orr r1, r1, r1, lsr #16
62
63 cmp r2, #16
64 blo .L_less_than_16
65
66 // This section handles regions 16 bytes or larger
67 //
68 // Use aligned vst1.8 and vstm when possible. Register values will be:
69 // ip is scratch
70 // q0, q1, and r1 contain the memset value
71 // r2 is the number of bytes to set
72 // r3 is the advancing destination pointer
73 vdup.32 q0, r1
74
75 ands ip, r3, 0xF
76 beq .L_memset_aligned
77
78 // Align dest pointer to 16-byte boundary.
79 pldw [r0, #64]
80 rsb ip, ip, #16
81
82 // Pre-adjust the byte count to reflect post-aligment value. Expecting
83 // 8-byte alignment to be rather common so we special case that one.
84 sub r2, r2, ip
85
86 /* set 1 byte */
87 tst ip, #1
88 it ne
89 strbne r1, [r3], #1
90 /* set 2 bytes */
91 tst ip, #2
92 it ne
93 strhne r1, [r3], #2
94 /* set 4 bytes */
95 movs ip, ip, lsl #29
96 it mi
97 strmi r1, [r3], #4
98 /* set 8 bytes */
99 itt cs
100 strcs r1, [r3], #4
101 strcs r1, [r3], #4
102
103.L_memset_aligned:
104 // Destination is now 16-byte aligned. Determine how to handle
105 // remaining bytes.
106 vmov q1, q0
107 cmp r2, #128
108 blo .L_less_than_128
109
110 // We need to set a larger block of memory. Use four Q regs to
111 // set a full cache line in one instruction. Pre-decrement
112 // r2 to simplify end-of-loop detection
113 vmov q2, q0
114 vmov q3, q0
115 pldw [r0, #128]
116 sub r2, r2, #128
117 .align 4
118.L_memset_loop_128:
119 pldw [r3, #192]
120 vstm r3!, {q0, q1, q2, q3}
121 vstm r3!, {q0, q1, q2, q3}
122 subs r2, r2, #128
123 bhs .L_memset_loop_128
124
125 // Un-bias r2 so it contains the number of bytes left. Early
126 // exit if we are done.
127 adds r2, r2, #128
128 beq 2f
129
130 .align 4
131.L_less_than_128:
132 // set 64 bytes
133 movs ip, r2, lsl #26
134 bcc 1f
135 vst1.8 {q0, q1}, [r3, :128]!
136 vst1.8 {q0, q1}, [r3, :128]!
137 beq 2f
1381:
139 // set 32 bytes
140 bpl 1f
141 vst1.8 {q0, q1}, [r3, :128]!
1421:
143 // set 16 bytes
144 movs ip, r2, lsl #28
145 bcc 1f
146 vst1.8 {q0}, [r3, :128]!
147 beq 2f
1481:
149 // set 8 bytes
150 bpl 1f
151 vst1.8 {d0}, [r3, :64]!
1521:
153 // set 4 bytes
154 tst r2, #4
155 it ne
156 strne r1, [r3], #4
1571:
158 // set 2 bytes
159 movs ip, r2, lsl #31
160 it cs
161 strhcs r1, [r3], #2
162 // set 1 byte
163 it mi
164 strbmi r1, [r3]
1652:
166 bx lr
167
168.L_less_than_16:
169 // Store up to 15 bytes without worrying about byte alignment
170 movs ip, r2, lsl #29
171 bcc 1f
172 str r1, [r3], #4
173 str r1, [r3], #4
174 beq 2f
1751:
176 it mi
177 strmi r1, [r3], #4
178 movs ip, r2, lsl #31
179 it mi
180 strbmi r1, [r3], #1
181 itt cs
182 strbcs r1, [r3], #1
183 strbcs r1, [r3]
1842:
185 bx lr
Haibo Huangce4ff9c2018-10-24 10:47:34 -0700186END(memset_denver)