Add Aarch64 optimized bzero based on memset

Change-Id: I62cf25bfe23b9d811e00af7307bbd19d89937792
Signed-off-by: Bernhard Rosenkraenzer <Bernhard.Rosenkranzer@linaro.org>
diff --git a/libc/arch-arm64/generic/bionic/memset.S b/libc/arch-arm64/generic/bionic/memset.S
index 4750fe3..7c204b4 100644
--- a/libc/arch-arm64/generic/bionic/memset.S
+++ b/libc/arch-arm64/generic/bionic/memset.S
@@ -38,15 +38,19 @@
    data blocks more efficiently.  In some circumstances this might be
    unsafe, for example in an asymmetric multiprocessor environment with
    different DC clear lengths (neither the upper nor lower lengths are
-   safe to use).  The feature can be disabled by defining DONT_USE_DC.
+   safe to use).
 
    If code may be run in a virtualized environment, then define
    MAYBE_VIRT.  This will cause the code to cache the system register
    values rather than re-reading them each call.  */
 
 #define dstin		x0
-#define val		w1
+#ifdef BZERO
+#define count		x1
+#else
 #define count		x2
+#endif
+#define val		w1
 #define tmp1		x3
 #define tmp1w		w3
 #define tmp2		x4
@@ -60,13 +64,18 @@
 #define dst		x8
 #define tmp3w		w9
 
+#ifdef BZERO
+ENTRY(bzero)
+#else
 ENTRY(memset)
+#endif
 
 	mov	dst, dstin		/* Preserve return value.  */
-	ands	A_lw, val, #255
-#ifndef DONT_USE_DC
-	b.eq	.Lzero_mem
+#ifdef BZERO
+	b	.Lzero_mem
 #endif
+	ands	A_lw, val, #255
+	b.eq	.Lzero_mem
 	orr	A_lw, A_lw, A_lw, lsl #8
 	orr	A_lw, A_lw, A_lw, lsl #16
 	orr	A_l, A_l, A_l, lsl #32
@@ -143,7 +152,6 @@
 	b.ne	.Ltail63
 	ret
 
-#ifndef DONT_USE_DC
 	/* For zeroing memory, check to see if we can use the ZVA feature to
 	 * zero entire 'cache' lines.  */
 .Lzero_mem:
@@ -225,7 +233,11 @@
 	ands	count, count, zva_bits_x
 	b.ne	.Ltail_maybe_long
 	ret
+#ifdef BZERO
+END(bzero)
+#else
 END(memset)
+#endif
 
 #ifdef MAYBE_VIRT
 	.bss
@@ -233,4 +245,3 @@
 .Lcache_clear:
 	.space 4
 #endif
-#endif /* DONT_USE_DC */