blob: d77c2443b9e575badb4d8ef7fa186fa4f1388a0a [file] [log] [blame]
Shu Zhang5b5d6e72014-03-12 11:18:41 +08001/*
2 * Copyright (C) 2013 The Android Open Source Project
3 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
23 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <machine/cpu-features.h>
31#include <private/bionic_asm.h>
32#include <private/libc_events.h>
33
34 /*
35 * Optimized memset() for ARM.
36 *
37 * memset() returns its first argument.
38 */
39
Bernhard Rosenkränzerce46f552014-11-30 22:17:30 +010040 .cpu cortex-a15
Shu Zhang5b5d6e72014-03-12 11:18:41 +080041 .fpu neon
42 .syntax unified
43
44ENTRY(__memset_chk)
45 cmp r2, r3
46 bls .L_done
47
48 // Preserve lr for backtrace.
49 push {lr}
50 .cfi_def_cfa_offset 4
51 .cfi_rel_offset lr, 0
52
53
54 ldr r0, error_message
55 ldr r1, error_code
561:
57 add r0, pc
58 bl __fortify_chk_fail
59error_code:
60 .word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW
61error_message:
62 .word error_string-(1b+8)
63END(__memset_chk)
64
65ENTRY(bzero)
66 mov r2, r1
67 mov r1, #0
68.L_done:
69 // Fall through to memset...
70END(bzero)
71
72ENTRY(memset)
73 pldw [r0]
74 mov r3, r0
75
76 // Duplicate the low byte of r1
77 mov r1, r1, lsl #24
78 orr r1, r1, r1, lsr #8
79 orr r1, r1, r1, lsr #16
80
81 cmp r2, #16
82 blo .L_less_than_16
83
84 // This section handles regions 16 bytes or larger
85 //
86 // Use aligned vst1.8 and vstm when possible. Register values will be:
87 // ip is scratch
88 // q0, q1, and r1 contain the memset value
89 // r2 is the number of bytes to set
90 // r3 is the advancing destination pointer
91 vdup.32 q0, r1
92
93 ands ip, r3, 0xF
94 beq .L_memset_aligned
95
96 // Align dest pointer to 16-byte boundary.
97 pldw [r0, #64]
98 rsb ip, ip, #16
99
100 // Pre-adjust the byte count to reflect post-aligment value. Expecting
101 // 8-byte alignment to be rather common so we special case that one.
102 sub r2, r2, ip
103
104 /* set 1 byte */
105 tst ip, #1
106 it ne
107 strbne r1, [r3], #1
108 /* set 2 bytes */
109 tst ip, #2
110 it ne
111 strhne r1, [r3], #2
112 /* set 4 bytes */
113 movs ip, ip, lsl #29
114 it mi
115 strmi r1, [r3], #4
116 /* set 8 bytes */
117 itt cs
118 strcs r1, [r3], #4
119 strcs r1, [r3], #4
120
121.L_memset_aligned:
122 // Destination is now 16-byte aligned. Determine how to handle
123 // remaining bytes.
124 vmov q1, q0
125 cmp r2, #128
126 blo .L_less_than_128
127
128 // We need to set a larger block of memory. Use four Q regs to
129 // set a full cache line in one instruction. Pre-decrement
130 // r2 to simplify end-of-loop detection
131 vmov q2, q0
132 vmov q3, q0
133 pldw [r0, #128]
134 sub r2, r2, #128
135 .align 4
136.L_memset_loop_128:
137 pldw [r3, #192]
138 vstm r3!, {q0, q1, q2, q3}
139 vstm r3!, {q0, q1, q2, q3}
140 subs r2, r2, #128
141 bhs .L_memset_loop_128
142
143 // Un-bias r2 so it contains the number of bytes left. Early
144 // exit if we are done.
145 adds r2, r2, #128
146 beq 2f
147
148 .align 4
149.L_less_than_128:
150 // set 64 bytes
151 movs ip, r2, lsl #26
152 bcc 1f
153 vst1.8 {q0, q1}, [r3, :128]!
154 vst1.8 {q0, q1}, [r3, :128]!
155 beq 2f
1561:
157 // set 32 bytes
158 bpl 1f
159 vst1.8 {q0, q1}, [r3, :128]!
1601:
161 // set 16 bytes
162 movs ip, r2, lsl #28
163 bcc 1f
164 vst1.8 {q0}, [r3, :128]!
165 beq 2f
1661:
167 // set 8 bytes
168 bpl 1f
169 vst1.8 {d0}, [r3, :64]!
1701:
171 // set 4 bytes
172 tst r2, #4
173 it ne
174 strne r1, [r3], #4
1751:
176 // set 2 bytes
177 movs ip, r2, lsl #31
178 it cs
179 strhcs r1, [r3], #2
180 // set 1 byte
181 it mi
182 strbmi r1, [r3]
1832:
184 bx lr
185
186.L_less_than_16:
187 // Store up to 15 bytes without worrying about byte alignment
188 movs ip, r2, lsl #29
189 bcc 1f
190 str r1, [r3], #4
191 str r1, [r3], #4
192 beq 2f
1931:
194 it mi
195 strmi r1, [r3], #4
196 movs ip, r2, lsl #31
197 it mi
198 strbmi r1, [r3], #1
199 itt cs
200 strbcs r1, [r3], #1
201 strbcs r1, [r3]
2022:
203 bx lr
204END(memset)
205
206 .data
207error_string:
208 .string "memset: prevented write past end of buffer"