system/core: rename aarch64 target to arm64

Rename aarch64 build targets to arm64.  The gcc toolchain is still
aarch64.

Change-Id: Ia92d8a50824e5329cf00fd6f4f92eae112b7f3a3
diff --git a/libpixelflinger/arch-arm64/col32cb16blend.S b/libpixelflinger/arch-arm64/col32cb16blend.S
new file mode 100644
index 0000000..18a01fd
--- /dev/null
+++ b/libpixelflinger/arch-arm64/col32cb16blend.S
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+    .text
+    .align
+
+    .global scanline_col32cb16blend_arm64
+
+//
+// This function alpha blends a fixed color into a destination scanline, using
+// the formula:
+//
+//     d = s + (((a + (a >> 7)) * d) >> 8)
+//
+// where d is the destination pixel,
+//       s is the source color,
+//       a is the alpha channel of the source color.
+//
+
+// x0 = destination buffer pointer
+// w1 = color value
+// w2 = count
+
+
+scanline_col32cb16blend_arm64:
+
+    lsr         w5, w1, #24                     // shift down alpha
+    mov         w9, #0xff                       // create mask
+    add         w5, w5, w5, lsr #7              // add in top bit
+    mov         w4, #256                        // create #0x100
+    sub         w5, w4, w5                      // invert alpha
+    and         w10, w1, #0xff                  // extract red
+    and         w12, w9, w1, lsr #8             // extract green
+    and         w4,  w9, w1, lsr #16            // extract blue
+    lsl         w10, w10, #5                    // prescale red
+    lsl         w12, w12, #6                    // prescale green
+    lsl         w4,  w4,  #5                    // prescale blue
+    lsr         w9,  w9,  #2                    // create dest green mask
+
+1:
+    ldrh        w8, [x0]                        // load dest pixel
+    subs        w2, w2, #1                      // decrement loop counter
+    lsr         w6, w8, #11                     // extract dest red
+    and         w7, w9, w8, lsr #5              // extract dest green
+    and         w8, w8, #0x1f                   // extract dest blue
+
+    madd        w6, w6, w5, w10                 // dest red * alpha + src red
+    madd        w7, w7, w5, w12                 // dest green * alpha + src green
+    madd        w8, w8, w5, w4                  // dest blue * alpha + src blue
+
+    lsr         w6, w6, #8                      // shift down red
+    lsr         w7, w7, #8                      // shift down green
+    lsl         w6, w6, #11                     // shift red into 565
+    orr         w6, w6, w7, lsl #5              // shift green into 565
+    orr         w6, w6, w8, lsr #8              // shift blue into 565
+
+    strh        w6, [x0], #2                    // store pixel to dest, update ptr
+    b.ne        1b                              // if count != 0, loop
+
+    ret
+
+
+
diff --git a/libpixelflinger/arch-arm64/t32cb16blend.S b/libpixelflinger/arch-arm64/t32cb16blend.S
new file mode 100644
index 0000000..7da8cf5
--- /dev/null
+++ b/libpixelflinger/arch-arm64/t32cb16blend.S
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+    .text
+    .align
+
+    .global scanline_t32cb16blend_arm64
+
+/*
+ * .macro pixel
+ *
+ *  This macro alpha blends RGB565 original pixel located in either
+ *  top or bottom 16 bits of DREG register with SRC 32 bit pixel value
+ *  and writes the result to FB register
+ *
+ * \DREG is a 32-bit register containing *two* original destination RGB565
+ *       pixels, with the even one in the low-16 bits, and the odd one in the
+ *       high 16 bits.
+ *
+ * \SRC is a 32-bit 0xAABBGGRR pixel value, with pre-multiplied colors.
+ *
+ * \FB is a target register that will contain the blended pixel values.
+ *
+ * \ODD is either 0 or 1 and indicates if we're blending the lower or
+ *      upper 16-bit pixels in DREG into FB
+ *
+ *
+ * clobbered: w6, w7, w16, w17, w18
+ *
+ */
+
+.macro pixel,   DREG, SRC, FB, ODD
+
+    // SRC = 0xAABBGGRR
+    lsr     w7, \SRC, #24               // sA
+    add     w7, w7, w7, lsr #7          // sA + (sA >> 7)
+    mov     w6, #0x100
+    sub     w7, w6, w7                  // sA = 0x100 - (sA+(sA>>7))
+
+1:
+
+.if \ODD //Blending odd pixel present in top 16 bits of DREG register
+
+    // red
+    lsr     w16, \DREG, #(16 + 11)
+    mul     w16, w7, w16
+    lsr     w6, \SRC, #3
+    and     w6, w6, #0x1F
+    add     w16, w6, w16, lsr #8
+    cmp     w16, #0x1F
+    orr     w17, \FB, #(0x1F<<(16 + 11))
+    orr     w18, \FB, w16, lsl #(16 + 11)
+    csel    \FB, w17, w18, hi
+        // green
+        and     w6, \DREG, #(0x3F<<(16 + 5))
+        lsr     w17,w6,#(16+5)
+        mul     w6, w7, w17
+        lsr     w16, \SRC, #(8+2)
+        and     w16, w16, #0x3F
+        add     w6, w16, w6, lsr #8
+        cmp     w6, #0x3F
+        orr     w17, \FB, #(0x3F<<(16 + 5))
+        orr     w18, \FB, w6, lsl #(16 + 5)
+        csel    \FB, w17, w18, hi
+            // blue
+            and     w16, \DREG, #(0x1F << 16)
+            lsr     w17,w16,#16
+            mul     w16, w7, w17
+            lsr     w6, \SRC, #(8+8+3)
+            and     w6, w6, #0x1F
+            add     w16, w6, w16, lsr #8
+            cmp     w16, #0x1F
+            orr     w17, \FB, #(0x1F << 16)
+            orr     w18, \FB, w16, lsl #16
+            csel    \FB, w17, w18, hi
+
+.else //Blending even pixel present in bottom 16 bits of DREG register
+
+    // red
+    lsr     w16, \DREG, #11
+    and     w16, w16, #0x1F
+    mul     w16, w7, w16
+    lsr     w6, \SRC, #3
+    and     w6, w6, #0x1F
+    add     w16, w6, w16, lsr #8
+    cmp     w16, #0x1F
+    mov     w17, #(0x1F<<11)
+    lsl     w18, w16, #11
+    csel    \FB, w17, w18, hi
+
+
+        // green
+        and     w6, \DREG, #(0x3F<<5)
+        mul     w6, w7, w6
+        lsr     w16, \SRC, #(8+2)
+        and     w16, w16, #0x3F
+        add     w6, w16, w6, lsr #(5+8)
+        cmp     w6, #0x3F
+        orr     w17, \FB, #(0x3F<<5)
+        orr     w18, \FB, w6, lsl #5
+        csel    \FB, w17, w18, hi
+
+            // blue
+            and     w16, \DREG, #0x1F
+            mul     w16, w7, w16
+            lsr     w6, \SRC, #(8+8+3)
+            and     w6, w6, #0x1F
+            add     w16, w6, w16, lsr #8
+            cmp     w16, #0x1F
+            orr     w17, \FB, #0x1F
+            orr     w18, \FB, w16
+            csel    \FB, w17, w18, hi
+
+.endif // End of blending even pixel
+
+.endm // End of pixel macro
+
+
+// x0:  dst ptr
+// x1:  src ptr
+// w2:  count
+// w3:  d
+// w4:  s0
+// w5:  s1
+// w6:  pixel
+// w7:  pixel
+// w8:  free
+// w9:  free
+// w10: free
+// w11: free
+// w12: scratch
+// w14: pixel
+
+scanline_t32cb16blend_arm64:
+
+    // align DST to 32 bits
+    tst     x0, #0x3
+    b.eq    aligned
+    subs    w2, w2, #1
+    b.lo    return
+
+last:
+    ldr     w4, [x1], #4
+    ldrh    w3, [x0]
+    pixel   w3, w4, w12, 0
+    strh    w12, [x0], #2
+
+aligned:
+    subs    w2, w2, #2
+    b.lo    9f
+
+    // The main loop is unrolled twice and processes 4 pixels
+8:
+    ldp   w4,w5, [x1], #8
+    add     x0, x0, #4
+    // it's all zero, skip this pixel
+    orr     w3, w4, w5
+    cbz     w3, 7f
+
+    // load the destination
+    ldr     w3, [x0, #-4]
+    // stream the destination
+    pixel   w3, w4, w12, 0
+    pixel   w3, w5, w12, 1
+    str     w12, [x0, #-4]
+
+    // 2nd iteration of the loop, don't stream anything
+    subs    w2, w2, #2
+    csel    w4, w5, w4, lt
+    blt     9f
+    ldp     w4,w5, [x1], #8
+    add     x0, x0, #4
+    orr     w3, w4, w5
+    cbz     w3, 7f
+    ldr     w3, [x0, #-4]
+    pixel   w3, w4, w12, 0
+    pixel   w3, w5, w12, 1
+    str     w12, [x0, #-4]
+
+7:  subs    w2, w2, #2
+    bhs     8b
+    mov     w4, w5
+
+9:  adds    w2, w2, #1
+    b.lo    return
+    b       last
+
+return:
+    ret