Merge "Use unified syntax to compile with both llvm and gcc."
diff --git a/libc/Android.mk b/libc/Android.mk
index 6c2c4ae..54047c8 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -941,10 +941,6 @@
 LOCAL_CFLAGS := $(libc_common_cflags) \
     -Wframe-larger-than=2048 \
 
-# memcpy.S, memchr.S, etc. do not compile with Clang.
-LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as
-LOCAL_CLANG_ASFLAGS_arm64 += -no-integrated-as
-
 LOCAL_CONLYFLAGS := $(libc_common_conlyflags)
 LOCAL_CPPFLAGS := $(libc_common_cppflags) -Wold-style-cast
 LOCAL_C_INCLUDES := $(libc_common_c_includes) bionic/libstdc++/include
@@ -972,10 +968,6 @@
 LOCAL_CFLAGS := $(libc_common_cflags) \
     -Wframe-larger-than=2048 \
 
-# memcpy.S, memchr.S, etc. do not compile with Clang.
-LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as
-LOCAL_CLANG_ASFLAGS_arm64 += -no-integrated-as
-
 LOCAL_CONLYFLAGS := $(libc_common_conlyflags)
 LOCAL_CPPFLAGS := $(libc_common_cppflags) -Wold-style-cast
 LOCAL_C_INCLUDES := $(libc_common_c_includes) bionic/libstdc++/include
@@ -1024,10 +1016,6 @@
 LOCAL_CFLAGS := $(libc_common_cflags) \
     -Wframe-larger-than=2048 \
 
-# memcpy.S, memchr.S, etc. do not compile with Clang.
-LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as
-LOCAL_CLANG_ASFLAGS_arm64 += -no-integrated-as
-
 LOCAL_CONLYFLAGS := $(libc_common_conlyflags)
 LOCAL_CPPFLAGS := $(libc_common_cppflags) -Wold-style-cast
 LOCAL_C_INCLUDES := $(libc_common_c_includes)
diff --git a/libc/arch-arm/cortex-a9/bionic/memset.S b/libc/arch-arm/cortex-a9/bionic/memset.S
index 299f5a2..8ee6ac2 100644
--- a/libc/arch-arm/cortex-a9/bionic/memset.S
+++ b/libc/arch-arm/cortex-a9/bionic/memset.S
@@ -35,6 +35,7 @@
  */
 
     .fpu    neon
+    .syntax unified
 
 ENTRY(__memset_chk)
         cmp         r2, r3
@@ -100,9 +101,9 @@
 1:      bge         2f
         vst1.32     {d0[0]}, [r0]!
 2:      movs        ip, r2, lsl #31
-        strmib      r1, [r0], #1
-        strcsb      r1, [r0], #1
-        strcsb      r1, [r0], #1
+        strbmi      r1, [r0], #1
+        strbcs      r1, [r0], #1
+        strbcs      r1, [r0], #1
         ldmfd       sp!, {r0}
         bx          lr
 END(memset)
@@ -131,11 +132,11 @@
         orr         r1, r1, r1, lsr #16
 
         movs        r12, r3, lsl #31
-        strcsb      r1, [r0], #1    /* can't use strh (alignment unknown) */
-        strcsb      r1, [r0], #1
-        strmib      r1, [r0], #1
+        strbcs      r1, [r0], #1    /* can't use strh (alignment unknown) */
+        strbcs      r1, [r0], #1
+        strbmi      r1, [r0], #1
         subs        r2, r2, r3
-        ldmlsfd     sp!, {r0, r4-r7, lr}   /* return */
+        popls       {r0, r4-r7, lr}   /* return */
         bxls        lr
 
         /* align the destination to a cache-line */
@@ -155,9 +156,9 @@
 
         /* conditionally writes 0 to 7 words (length in r3) */
         movs        r3, r3, lsl #28
-        stmcsia     r0!, {r1, lr}
-        stmcsia     r0!, {r1, lr}
-        stmmiia     r0!, {r1, lr}
+        stmcs       r0!, {r1, lr}
+        stmcs       r0!, {r1, lr}
+        stmmi       r0!, {r1, lr}
         movs        r3, r3, lsl #2
         strcs       r1, [r0], #4
 
@@ -172,13 +173,13 @@
 
         /* conditionally stores 0 to 31 bytes */
         movs        r2, r2, lsl #28
-        stmcsia     r0!, {r1,r3,r12,lr}
-        stmmiia     r0!, {r1, lr}
+        stmcs       r0!, {r1,r3,r12,lr}
+        stmmi       r0!, {r1, lr}
         movs        r2, r2, lsl #2
         strcs       r1, [r0], #4
-        strmih      r1, [r0], #2
+        strhmi      r1, [r0], #2
         movs        r2, r2, lsl #2
-        strcsb      r1, [r0]
+        strbcs      r1, [r0]
         ldmfd       sp!, {r0, r4-r7, lr}
         bx          lr
 END(__memset_large_copy)
diff --git a/libc/arch-arm/generic/bionic/memcmp.S b/libc/arch-arm/generic/bionic/memcmp.S
index 70a2a58..c78dbd4 100644
--- a/libc/arch-arm/generic/bionic/memcmp.S
+++ b/libc/arch-arm/generic/bionic/memcmp.S
@@ -40,6 +40,8 @@
  * Optimized memcmp() for Cortex-A9.
  */
 
+.syntax unified
+
 ENTRY(memcmp)
         pld         [r0, #(CACHE_LINE_SIZE * 0)]
         pld         [r0, #(CACHE_LINE_SIZE * 1)]
@@ -161,25 +163,25 @@
         eors        r0, r0, ip
         ldreq       r0, [r4], #4
         ldreq       ip, [r1, #4]!
-        eoreqs      r0, r0, lr
+        eorseq      r0, r0, lr
         ldreq       r0, [r4], #4
         ldreq       lr, [r1, #4]!
-        eoreqs      r0, r0, ip
+        eorseq      r0, r0, ip
         ldreq       r0, [r4], #4
         ldreq       ip, [r1, #4]!
-        eoreqs      r0, r0, lr
+        eorseq      r0, r0, lr
         ldreq       r0, [r4], #4
         ldreq       lr, [r1, #4]!
-        eoreqs      r0, r0, ip
+        eorseq      r0, r0, ip
         ldreq       r0, [r4], #4
         ldreq       ip, [r1, #4]!
-        eoreqs      r0, r0, lr
+        eorseq      r0, r0, lr
         ldreq       r0, [r4], #4
         ldreq       lr, [r1, #4]!
-        eoreqs      r0, r0, ip
+        eorseq      r0, r0, ip
         ldreq       r0, [r4], #4
         ldreq       ip, [r1, #4]!
-        eoreqs      r0, r0, lr
+        eorseq      r0, r0, lr
         bne         2f
         subs        r2, r2, #32
         bhs         0b
@@ -263,17 +265,17 @@
         ldreq       lr, [r1], #4
         ldreq       r0, [r4], #4
         orreq       ip, ip, lr, lsl #16
-        eoreqs      r0, r0, ip
+        eorseq      r0, r0, ip
         moveq       ip, lr, lsr #16
         ldreq       lr, [r1], #4
         ldreq       r0, [r4], #4
         orreq       ip, ip, lr, lsl #16
-        eoreqs      r0, r0, ip
+        eorseq      r0, r0, ip
         moveq       ip, lr, lsr #16
         ldreq       lr, [r1], #4
         ldreq       r0, [r4], #4
         orreq       ip, ip, lr, lsl #16
-        eoreqs      r0, r0, ip
+        eorseq      r0, r0, ip
         bne         7f
         subs        r2, r2, #16
         bhs         6b
@@ -317,7 +319,7 @@
         ldreq       r7, [r1], #4
         ldreq       r0, [r4], #4
         orreq       ip, ip, r7, lsl r6
-        eoreqs      r0, r0, ip
+        eorseq      r0, r0, ip
         bne         7f
         subs        r2, r2, #8
         bhs         6b
diff --git a/libc/arch-arm/generic/bionic/memcpy.S b/libc/arch-arm/generic/bionic/memcpy.S
index b0c79ab..ea5a399 100644
--- a/libc/arch-arm/generic/bionic/memcpy.S
+++ b/libc/arch-arm/generic/bionic/memcpy.S
@@ -37,6 +37,8 @@
          * so we have to preserve R0.
          */
 
+         .syntax unified
+
 ENTRY(__memcpy_chk)
         cmp         r2, r3
         bhi         __memcpy_chk_fail
@@ -81,12 +83,12 @@
          */
         movs        r12, r3, lsl #31
         sub         r2, r2, r3      /* we know that r3 <= r2 because r2 >= 4 */
-        ldrmib      r3, [r1], #1
-        ldrcsb      r4, [r1], #1
-        ldrcsb      r12,[r1], #1
-        strmib      r3, [r0], #1
-        strcsb      r4, [r0], #1
-        strcsb      r12,[r0], #1
+        ldrbmi      r3, [r1], #1
+        ldrbcs      r4, [r1], #1
+        ldrbcs      r12,[r1], #1
+        strbmi      r3, [r0], #1
+        strbcs      r4, [r0], #1
+        strbcs      r12,[r0], #1
 
 .Lsrc_aligned:
 
@@ -109,10 +111,10 @@
 
         /* conditionally copies 0 to 7 words (length in r3) */
         movs        r12, r3, lsl #28
-        ldmcsia     r1!, {r4, r5, r6, r7}   /* 16 bytes */
-        ldmmiia     r1!, {r8, r9}           /*  8 bytes */
-        stmcsia     r0!, {r4, r5, r6, r7}
-        stmmiia     r0!, {r8, r9}
+        ldmcs       r1!, {r4, r5, r6, r7}   /* 16 bytes */
+        ldmmi       r1!, {r8, r9}           /*  8 bytes */
+        stmcs       r0!, {r4, r5, r6, r7}
+        stmmi       r0!, {r8, r9}
         tst         r3, #0x4
         ldrne       r10,[r1], #4            /*  4 bytes */
         strne       r10,[r0], #4
@@ -177,18 +179,18 @@
 
         /* conditionnaly copies 0 to 31 bytes */
         movs        r12, r2, lsl #28
-        ldmcsia     r1!, {r4, r5, r6, r7}   /* 16 bytes */
-        ldmmiia     r1!, {r8, r9}           /*  8 bytes */
-        stmcsia     r0!, {r4, r5, r6, r7}
-        stmmiia     r0!, {r8, r9}
+        ldmcs       r1!, {r4, r5, r6, r7}   /* 16 bytes */
+        ldmmi       r1!, {r8, r9}           /*  8 bytes */
+        stmcs       r0!, {r4, r5, r6, r7}
+        stmmi       r0!, {r8, r9}
         movs        r12, r2, lsl #30
         ldrcs       r3, [r1], #4            /*  4 bytes */
-        ldrmih      r4, [r1], #2            /*  2 bytes */
+        ldrhmi      r4, [r1], #2            /*  2 bytes */
         strcs       r3, [r0], #4
-        strmih      r4, [r0], #2
+        strhmi      r4, [r0], #2
         tst         r2, #0x1
-        ldrneb      r3, [r1]                /*  last byte  */
-        strneb      r3, [r0]
+        ldrbne      r3, [r1]                /*  last byte  */
+        strbne      r3, [r0]
 
         /* we're done! restore everything and return */
 1:      ldmfd       sp!, {r5-r11}
@@ -228,11 +230,11 @@
          * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
          */
         movs        r5, r5, lsl #31
-        strmib      r3, [r0], #1
+        strbmi      r3, [r0], #1
         movmi       r3, r3, lsr #8
-        strcsb      r3, [r0], #1
+        strbcs      r3, [r0], #1
         movcs       r3, r3, lsr #8
-        strcsb      r3, [r0], #1
+        strbcs      r3, [r0], #1
         movcs       r3, r3, lsr #8
 
         cmp         r2, #4
@@ -363,23 +365,23 @@
 .Lpartial_word_tail:
         /* we have a partial word in the input buffer */
         movs        r5, lr, lsl #(31-3)
-        strmib      r3, [r0], #1
+        strbmi      r3, [r0], #1
         movmi       r3, r3, lsr #8
-        strcsb      r3, [r0], #1
+        strbcs      r3, [r0], #1
         movcs       r3, r3, lsr #8
-        strcsb      r3, [r0], #1
+        strbcs      r3, [r0], #1
 
         /* Refill spilled registers from the stack. Don't update sp. */
         ldmfd       sp, {r5-r11}
 
 .Lcopy_last_3_and_return:
         movs        r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */
-        ldrmib      r2, [r1], #1
-        ldrcsb      r3, [r1], #1
-        ldrcsb      r12,[r1]
-        strmib      r2, [r0], #1
-        strcsb      r3, [r0], #1
-        strcsb      r12,[r0]
+        ldrbmi      r2, [r1], #1
+        ldrbcs      r3, [r1], #1
+        ldrbcs      r12,[r1]
+        strbmi      r2, [r0], #1
+        strbcs      r3, [r0], #1
+        strbcs      r12,[r0]
 
         /* we're done! restore sp and spilled registers and return */
         add         sp,  sp, #28
diff --git a/libc/arch-arm/generic/bionic/memset.S b/libc/arch-arm/generic/bionic/memset.S
index be35de9..d17a9c4 100644
--- a/libc/arch-arm/generic/bionic/memset.S
+++ b/libc/arch-arm/generic/bionic/memset.S
@@ -35,6 +35,8 @@
          * memset() returns its first argument.
          */
 
+         .syntax unified
+
 ENTRY(__memset_chk)
         cmp         r2, r3
         bls         done
@@ -76,11 +78,11 @@
         orr         r1, r1, r1, lsr #16
 
         movs        r12, r3, lsl #31
-        strcsb      r1, [r0], #1    /* can't use strh (alignment unknown) */
-        strcsb      r1, [r0], #1
-        strmib      r1, [r0], #1
+        strbcs      r1, [r0], #1    /* can't use strh (alignment unknown) */
+        strbcs      r1, [r0], #1
+        strbmi      r1, [r0], #1
         subs        r2, r2, r3
-        ldmlsfd     sp!, {r0, r4-r7, lr}    /* return */
+        popls       {r0, r4-r7, lr}    /* return */
         bxls        lr
 
         /* align the destination to a cache-line */
@@ -100,9 +102,9 @@
 
         /* conditionally writes 0 to 7 words (length in r3) */
         movs        r3, r3, lsl #28
-        stmcsia     r0!, {r1, lr}
-        stmcsia     r0!, {r1, lr}
-        stmmiia     r0!, {r1, lr}
+        stmcs       r0!, {r1, lr}
+        stmcs       r0!, {r1, lr}
+        stmmi       r0!, {r1, lr}
         movs        r3, r3, lsl #2
         strcs       r1, [r0], #4
 
@@ -117,13 +119,13 @@
 
         /* conditionally stores 0 to 31 bytes */
         movs        r2, r2, lsl #28
-        stmcsia     r0!, {r1,r3,r12,lr}
-        stmmiia     r0!, {r1, lr}
+        stmcs       r0!, {r1,r3,r12,lr}
+        stmmi       r0!, {r1, lr}
         movs        r2, r2, lsl #2
         strcs       r1, [r0], #4
-        strmih      r1, [r0], #2
+        strhmi      r1, [r0], #2
         movs        r2, r2, lsl #2
-        strcsb      r1, [r0]
+        strbcs      r1, [r0]
         ldmfd       sp!, {r0, r4-r7, lr}
         bx          lr
 END(memset)
diff --git a/libc/arch-arm/generic/bionic/strcpy.S b/libc/arch-arm/generic/bionic/strcpy.S
index 802a62d..89ea098 100644
--- a/libc/arch-arm/generic/bionic/strcpy.S
+++ b/libc/arch-arm/generic/bionic/strcpy.S
@@ -32,6 +32,8 @@
 #include <machine/cpu-features.h>
 #include <private/bionic_asm.h>
 
+.syntax unified
+
 ENTRY(strcpy)
 	pld	[r1, #0]
 	eor	r2, r0, r1
@@ -108,15 +110,15 @@
 #ifdef __ARMEB__
 	tst	r2, #0xff00
 	iteet	ne
-	strneh	r2, [ip], #2
+	strhne	r2, [ip], #2
 	lsreq	r2, r2, #8
-	streqb	r2, [ip]
+	strbeq	r2, [ip]
 	tstne	r2, #0xff
 #else
 	tst	r2, #0xff
 	itet	ne
-	strneh	r2, [ip], #2
-	streqb	r2, [ip]
+	strhne	r2, [ip], #2
+	strbeq	r2, [ip]
 	tstne	r2, #0xff00
 #endif
 	bne	5b
diff --git a/libc/arch-arm/krait/bionic/memset.S b/libc/arch-arm/krait/bionic/memset.S
index e9f6431..a4fbe17 100644
--- a/libc/arch-arm/krait/bionic/memset.S
+++ b/libc/arch-arm/krait/bionic/memset.S
@@ -37,6 +37,7 @@
  */
 
     .fpu    neon
+    .syntax unified
 
 ENTRY(__memset_chk)
         cmp         r2, r3
@@ -98,9 +99,9 @@
 1:      bge         2f
         vst1.32     {d0[0]}, [r0]!
 2:      movs        ip, r2, lsl #31
-        strmib      r1, [r0], #1
-        strcsb      r1, [r0], #1
-        strcsb      r1, [r0], #1
+        strbmi      r1, [r0], #1
+        strbcs      r1, [r0], #1
+        strbcs      r1, [r0], #1
         ldmfd       sp!, {r0}
         bx          lr
 END(memset)
diff --git a/libc/arch-arm64/generic/bionic/memchr.S b/libc/arch-arm64/generic/bionic/memchr.S
index e5ea57d..a00dd8d 100644
--- a/libc/arch-arm64/generic/bionic/memchr.S
+++ b/libc/arch-arm64/generic/bionic/memchr.S
@@ -101,7 +101,7 @@
 	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
 	addp	vend.16b, vhas_chr1.16b, vhas_chr2.16b		/* 256->128 */
 	addp	vend.16b, vend.16b, vend.16b			/* 128->64 */
-	mov	synd, vend.2d[0]
+	mov	synd, vend.d[0]
 	/* Clear the soff*2 lower bits */
 	lsl	tmp, soff, #1
 	lsr	synd, synd, tmp
@@ -121,7 +121,7 @@
 	/* Use a fast check for the termination condition */
 	orr	vend.16b, vhas_chr1.16b, vhas_chr2.16b
 	addp	vend.2d, vend.2d, vend.2d
-	mov	synd, vend.2d[0]
+	mov	synd, vend.d[0]
 	/* We're not out of data, loop if we haven't found the character */
 	cbz	synd, .Lloop
 
@@ -131,7 +131,7 @@
 	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
 	addp	vend.16b, vhas_chr1.16b, vhas_chr2.16b		/* 256->128 */
 	addp	vend.16b, vend.16b, vend.16b			/* 128->64 */
-	mov	synd, vend.2d[0]
+	mov	synd, vend.d[0]
 	/* Only do the clear for the last possible block */
 	b.hi	.Ltail
 
diff --git a/libc/arch-arm64/generic/bionic/strchr.S b/libc/arch-arm64/generic/bionic/strchr.S
index 469b83c..b54106d 100644
--- a/libc/arch-arm64/generic/bionic/strchr.S
+++ b/libc/arch-arm64/generic/bionic/strchr.S
@@ -109,7 +109,7 @@
 	addp	vend1.16b, vend1.16b, vend2.16b		// 128->64
 	lsr	tmp1, tmp3, tmp1
 
-	mov	tmp3, vend1.2d[0]
+	mov	tmp3, vend1.d[0]
 	bic	tmp1, tmp3, tmp1	// Mask padding bits.
 	cbnz	tmp1, .Ltail
 
@@ -124,7 +124,7 @@
 	orr	vend2.16b, vhas_nul2.16b, vhas_chr2.16b
 	orr	vend1.16b, vend1.16b, vend2.16b
 	addp	vend1.2d, vend1.2d, vend1.2d
-	mov	tmp1, vend1.2d[0]
+	mov	tmp1, vend1.d[0]
 	cbz	tmp1, .Lloop
 
 	/* Termination condition found.  Now need to establish exactly why
@@ -138,7 +138,7 @@
 	addp	vend1.16b, vend1.16b, vend2.16b		// 256->128
 	addp	vend1.16b, vend1.16b, vend2.16b		// 128->64
 
-	mov	tmp1, vend1.2d[0]
+	mov	tmp1, vend1.d[0]
 .Ltail:
 	/* Count the trailing zeros, by bit reversing...  */
 	rbit	tmp1, tmp1