Blame - libc/arch-arm64/generic/bionic/memset.S - android_bionic

blob: 9626c0bd626899e70054d79ba2f58e8971ea92b3 [file] [log] [blame]

Bernhard Rosenkraenzer	7e4fa56	2014-03-05 11:40:57 +0100	[diff] [blame]	1	/* Copyright (c) 2012, Linaro Limited
				2	All rights reserved.
				3
				4	Redistribution and use in source and binary forms, with or without
				5	modification, are permitted provided that the following conditions are met:
				6	* Redistributions of source code must retain the above copyright
				7	notice, this list of conditions and the following disclaimer.
				8	* Redistributions in binary form must reproduce the above copyright
				9	notice, this list of conditions and the following disclaimer in the
				10	documentation and/or other materials provided with the distribution.
				11	* Neither the name of the Linaro nor the
				12	names of its contributors may be used to endorse or promote products
				13	derived from this software without specific prior written permission.
				14
				15	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				16	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				17	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				18	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				19	HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				20	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				21	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				22	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				23	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				24	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				25	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				26	*/
				27
				28	/* Assumptions:
				29	*
				30	* ARMv8-a, AArch64
				31	* Unaligned accesses
				32	*
				33	*/
				34
				35	#include <private/bionic_asm.h>
				36
				37	/* By default we assume that the DC instruction can be used to zero
				38	data blocks more efficiently. In some circumstances this might be
				39	unsafe, for example in an asymmetric multiprocessor environment with
				40	different DC clear lengths (neither the upper nor lower lengths are
Bernhard Rosenkraenzer	62d92e1	2014-05-19 13:16:41 +0200	[diff] [blame]	41	safe to use).
Bernhard Rosenkraenzer	7e4fa56	2014-03-05 11:40:57 +0100	[diff] [blame]	42
				43	If code may be run in a virtualized environment, then define
				44	MAYBE_VIRT. This will cause the code to cache the system register
				45	values rather than re-reading them each call. */
				46
				47	#define dstin x0
Bernhard Rosenkraenzer	62d92e1	2014-05-19 13:16:41 +0200	[diff] [blame]	48	#define val w1
Elliott Hughes	7846093	2016-03-02 11:58:41 -0800	[diff] [blame]	49	#define count x2
				50	#define dst_count x3 /* for __memset_chk */
Bernhard Rosenkraenzer	7e4fa56	2014-03-05 11:40:57 +0100	[diff] [blame]	51	#define tmp1 x3
				52	#define tmp1w w3
				53	#define tmp2 x4
				54	#define tmp2w w4
				55	#define zva_len_x x5
				56	#define zva_len w5
				57	#define zva_bits_x x6
				58
				59	#define A_l x7
				60	#define A_lw w7
				61	#define dst x8
				62	#define tmp3w w9
				63
Elliott Hughes	7846093	2016-03-02 11:58:41 -0800	[diff] [blame]	64	ENTRY(__memset_chk)
				65	cmp count, dst_count
				66	bls memset
				67
				68	// Preserve for accurate backtrace.
				69	stp x29, x30, [sp, -16]!
				70	.cfi_def_cfa_offset 16
				71	.cfi_rel_offset x29, 0
				72	.cfi_rel_offset x30, 8
				73
				74	bl __memset_chk_fail
				75	END(__memset_chk)
				76
Bernhard Rosenkraenzer	7e4fa56	2014-03-05 11:40:57 +0100	[diff] [blame]	77	ENTRY(memset)
				78
				79	mov dst, dstin /* Preserve return value. */
Bernhard Rosenkraenzer	62d92e1	2014-05-19 13:16:41 +0200	[diff] [blame]	80	ands A_lw, val, #255
				81	b.eq .Lzero_mem
Bernhard Rosenkraenzer	7e4fa56	2014-03-05 11:40:57 +0100	[diff] [blame]	82	orr A_lw, A_lw, A_lw, lsl #8
				83	orr A_lw, A_lw, A_lw, lsl #16
				84	orr A_l, A_l, A_l, lsl #32
				85	.Ltail_maybe_long:
				86	cmp count, #64
				87	b.ge .Lnot_short
				88	.Ltail_maybe_tiny:
				89	cmp count, #15
				90	b.le .Ltail15tiny
				91	.Ltail63:
				92	ands tmp1, count, #0x30
				93	b.eq .Ltail15
				94	add dst, dst, tmp1
				95	cmp tmp1w, #0x20
				96	b.eq 1f
				97	b.lt 2f
				98	stp A_l, A_l, [dst, #-48]
				99	1:
				100	stp A_l, A_l, [dst, #-32]
				101	2:
				102	stp A_l, A_l, [dst, #-16]
				103
				104	.Ltail15:
				105	and count, count, #15
				106	add dst, dst, count
				107	stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */
				108	ret
				109
				110	.Ltail15tiny:
				111	/* Set up to 15 bytes. Does not assume earlier memory
				112	being set. */
				113	tbz count, #3, 1f
				114	str A_l, [dst], #8
				115	1:
				116	tbz count, #2, 1f
				117	str A_lw, [dst], #4
				118	1:
				119	tbz count, #1, 1f
				120	strh A_lw, [dst], #2
				121	1:
				122	tbz count, #0, 1f
				123	strb A_lw, [dst]
				124	1:
				125	ret
				126
				127	/* Critical loop. Start at a new cache line boundary. Assuming
				128	* 64 bytes per line, this ensures the entire loop is in one line. */
				129	.p2align 6
				130	.Lnot_short:
				131	neg tmp2, dst
				132	ands tmp2, tmp2, #15
				133	b.eq 2f
				134	/* Bring DST to 128-bit (16-byte) alignment. We know that there's
				135	* more than that to set, so we simply store 16 bytes and advance by
				136	* the amount required to reach alignment. */
				137	sub count, count, tmp2
				138	stp A_l, A_l, [dst]
				139	add dst, dst, tmp2
				140	/* There may be less than 63 bytes to go now. */
				141	cmp count, #63
				142	b.le .Ltail63
				143	2:
				144	sub dst, dst, #16 /* Pre-bias. */
				145	sub count, count, #64
				146	1:
				147	stp A_l, A_l, [dst, #16]
				148	stp A_l, A_l, [dst, #32]
				149	stp A_l, A_l, [dst, #48]
				150	stp A_l, A_l, [dst, #64]!
				151	subs count, count, #64
				152	b.ge 1b
				153	tst count, #0x3f
				154	add dst, dst, #16
				155	b.ne .Ltail63
				156	ret
				157
Bernhard Rosenkraenzer	7e4fa56	2014-03-05 11:40:57 +0100	[diff] [blame]	158	/* For zeroing memory, check to see if we can use the ZVA feature to
				159	* zero entire 'cache' lines. */
				160	.Lzero_mem:
				161	mov A_l, #0
				162	cmp count, #63
				163	b.le .Ltail_maybe_tiny
				164	neg tmp2, dst
				165	ands tmp2, tmp2, #15
				166	b.eq 1f
				167	sub count, count, tmp2
				168	stp A_l, A_l, [dst]
				169	add dst, dst, tmp2
				170	cmp count, #63
				171	b.le .Ltail63
				172	1:
				173	/* For zeroing small amounts of memory, it's not worth setting up
				174	* the line-clear code. */
				175	cmp count, #128
				176	b.lt .Lnot_short
				177	#ifdef MAYBE_VIRT
				178	/* For efficiency when virtualized, we cache the ZVA capability. */
				179	adrp tmp2, .Lcache_clear
				180	ldr zva_len, [tmp2, #:lo12:.Lcache_clear]
				181	tbnz zva_len, #31, .Lnot_short
				182	cbnz zva_len, .Lzero_by_line
				183	mrs tmp1, dczid_el0
				184	tbz tmp1, #4, 1f
				185	/* ZVA not available. Remember this for next time. */
				186	mov zva_len, #~0
				187	str zva_len, [tmp2, #:lo12:.Lcache_clear]
				188	b .Lnot_short
				189	1:
				190	mov tmp3w, #4
				191	and zva_len, tmp1w, #15 /* Safety: other bits reserved. */
				192	lsl zva_len, tmp3w, zva_len
				193	str zva_len, [tmp2, #:lo12:.Lcache_clear]
				194	#else
				195	mrs tmp1, dczid_el0
				196	tbnz tmp1, #4, .Lnot_short
				197	mov tmp3w, #4
				198	and zva_len, tmp1w, #15 /* Safety: other bits reserved. */
				199	lsl zva_len, tmp3w, zva_len
				200	#endif
				201
				202	.Lzero_by_line:
				203	/* Compute how far we need to go to become suitably aligned. We're
				204	* already at quad-word alignment. */
				205	cmp count, zva_len_x
				206	b.lt .Lnot_short /* Not enough to reach alignment. */
				207	sub zva_bits_x, zva_len_x, #1
				208	neg tmp2, dst
				209	ands tmp2, tmp2, zva_bits_x
				210	b.eq 1f /* Already aligned. */
				211	/* Not aligned, check that there's enough to copy after alignment. */
				212	sub tmp1, count, tmp2
				213	cmp tmp1, #64
				214	ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */
				215	b.lt .Lnot_short
				216	/* We know that there's at least 64 bytes to zero and that it's safe
				217	* to overrun by 64 bytes. */
				218	mov count, tmp1
				219	2:
				220	stp A_l, A_l, [dst]
				221	stp A_l, A_l, [dst, #16]
				222	stp A_l, A_l, [dst, #32]
				223	subs tmp2, tmp2, #64
				224	stp A_l, A_l, [dst, #48]
				225	add dst, dst, #64
				226	b.ge 2b
				227	/* We've overrun a bit, so adjust dst downwards. */
				228	add dst, dst, tmp2
				229	1:
				230	sub count, count, zva_len_x
				231	3:
				232	dc zva, dst
				233	add dst, dst, zva_len_x
				234	subs count, count, zva_len_x
				235	b.ge 3b
				236	ands count, count, zva_bits_x
				237	b.ne .Ltail_maybe_long
				238	ret
				239	END(memset)
				240
				241	#ifdef MAYBE_VIRT
				242	.bss
				243	.p2align 2
				244	.Lcache_clear:
				245	.space 4
				246	#endif