blob: 3a138bf720ede54f44e29c2573fd26e1b868e79f [file] [log] [blame]
Sebastian Poped9bfc42017-06-19 12:39:02 -05001/*
2 * Copyright (c) 2017 ARM Ltd
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. The name of the company may not be used to endorse or promote
14 * products derived from this software without specific prior written
15 * permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010028
29/* Assumptions:
30 *
Sebastian Poped9bfc42017-06-19 12:39:02 -050031 * ARMv8-a, AArch64, unaligned accesses.
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010032 */
33
34#include <private/bionic_asm.h>
35
36/* Parameters and result. */
37#define src1 x0
38#define src2 x1
39#define limit x2
Sebastian Poped9bfc42017-06-19 12:39:02 -050040#define result w0
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010041
42/* Internal variables. */
43#define data1 x3
44#define data1w w3
45#define data2 x4
46#define data2w w4
Sebastian Poped9bfc42017-06-19 12:39:02 -050047#define tmp1 x5
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010048
Sebastian Poped9bfc42017-06-19 12:39:02 -050049/* Small inputs of less than 8 bytes are handled separately. This allows the
50 main code to be sped up using unaligned loads since there are now at least
51 8 bytes to be compared. If the first 8 bytes are equal, align src1.
52 This ensures each iteration does at most one unaligned access even if both
53 src1 and src2 are unaligned, and mutually aligned inputs behave as if
54 aligned. After the main loop, process the last 8 bytes using unaligned
55 accesses. */
56
57.p2align 6
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010058ENTRY(memcmp)
Sebastian Poped9bfc42017-06-19 12:39:02 -050059 subs limit, limit, 8
60 b.lo .Lless8
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010061
Sebastian Poped9bfc42017-06-19 12:39:02 -050062 /* Limit >= 8, so check first 8 bytes using unaligned loads. */
63 ldr data1, [src1], 8
64 ldr data2, [src2], 8
65 and tmp1, src1, 7
66 add limit, limit, tmp1
67 cmp data1, data2
68 bne .Lreturn
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010069
Sebastian Poped9bfc42017-06-19 12:39:02 -050070 /* Align src1 and adjust src2 with bytes not yet done. */
71 sub src1, src1, tmp1
72 sub src2, src2, tmp1
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010073
Sebastian Poped9bfc42017-06-19 12:39:02 -050074 subs limit, limit, 8
75 b.ls .Llast_bytes
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010076
Sebastian Poped9bfc42017-06-19 12:39:02 -050077 /* Loop performing 8 bytes per iteration using aligned src1.
78 Limit is pre-decremented by 8 and must be larger than zero.
79 Exit if <= 8 bytes left to do or if the data is not equal. */
80 .p2align 4
81.Lloop8:
82 ldr data1, [src1], 8
83 ldr data2, [src2], 8
84 subs limit, limit, 8
85 ccmp data1, data2, 0, hi /* NZCV = 0b0000. */
86 b.eq .Lloop8
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010087
Sebastian Poped9bfc42017-06-19 12:39:02 -050088 cmp data1, data2
89 bne .Lreturn
90
91 /* Compare last 1-8 bytes using unaligned access. */
92.Llast_bytes:
93 ldr data1, [src1, limit]
94 ldr data2, [src2, limit]
95
96 /* Compare data bytes and set return value to 0, -1 or 1. */
97.Lreturn:
98#ifndef __AARCH64EB__
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010099 rev data1, data1
100 rev data2, data2
101#endif
Sebastian Poped9bfc42017-06-19 12:39:02 -0500102 cmp data1, data2
103.Lret_eq:
104 cset result, ne
105 cneg result, result, lo
106 ret
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100107
Sebastian Poped9bfc42017-06-19 12:39:02 -0500108 .p2align 4
109 /* Compare up to 8 bytes. Limit is [-8..-1]. */
110.Lless8:
111 adds limit, limit, 4
112 b.lo .Lless4
113 ldr data1w, [src1], 4
114 ldr data2w, [src2], 4
115 cmp data1w, data2w
116 b.ne .Lreturn
117 sub limit, limit, 4
118.Lless4:
119 adds limit, limit, 4
120 beq .Lret_eq
121.Lbyte_loop:
122 ldrb data1w, [src1], 1
123 ldrb data2w, [src2], 1
124 subs limit, limit, 1
125 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
126 b.eq .Lbyte_loop
127 sub result, data1w, data2w
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100128 ret
129END(memcmp)