blob: bff54aea3dccf5168db8b3f3156303a76f1f6e90 [file] [log] [blame]
Sebastian Poped9bfc42017-06-19 12:39:02 -05001/*
2 * Copyright (c) 2017 ARM Ltd
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. The name of the company may not be used to endorse or promote
14 * products derived from this software without specific prior written
15 * permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010028
29/* Assumptions:
30 *
Sebastian Poped9bfc42017-06-19 12:39:02 -050031 * ARMv8-a, AArch64, unaligned accesses.
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010032 */
33
34#include <private/bionic_asm.h>
35
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -030036#define L(l) .L ## l
37
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010038/* Parameters and result. */
39#define src1 x0
40#define src2 x1
41#define limit x2
Sebastian Poped9bfc42017-06-19 12:39:02 -050042#define result w0
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010043
44/* Internal variables. */
45#define data1 x3
46#define data1w w3
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -030047#define data1h x4
48#define data2 x5
49#define data2w w5
50#define data2h x6
51#define tmp1 x7
52#define tmp2 x8
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010053
Sebastian Poped9bfc42017-06-19 12:39:02 -050054/* Small inputs of less than 8 bytes are handled separately. This allows the
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -030055 main code to be speed up using unaligned loads since there are now at least
Sebastian Poped9bfc42017-06-19 12:39:02 -050056 8 bytes to be compared. If the first 8 bytes are equal, align src1.
57 This ensures each iteration does at most one unaligned access even if both
58 src1 and src2 are unaligned, and mutually aligned inputs behave as if
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -030059 aligned. After the main loop, process the last 16 bytes using unaligned
Sebastian Poped9bfc42017-06-19 12:39:02 -050060 accesses. */
61
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010062ENTRY(memcmp)
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -030063.p2align 6
Sebastian Poped9bfc42017-06-19 12:39:02 -050064 subs limit, limit, 8
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -030065 b.lo L(less8)
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010066
Sebastian Poped9bfc42017-06-19 12:39:02 -050067 /* Limit >= 8, so check first 8 bytes using unaligned loads. */
68 ldr data1, [src1], 8
69 ldr data2, [src2], 8
Sebastian Poped9bfc42017-06-19 12:39:02 -050070 cmp data1, data2
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -030071 b.ne L(return)
72
73 subs limit, limit, 8
74 b.gt L(more16)
75
76 ldr data1, [src1, limit]
77 ldr data2, [src2, limit]
78 b L(return)
79
80L(more16):
81 ldr data1, [src1], 8
82 ldr data2, [src2], 8
83 cmp data1, data2
84 bne L(return)
85
86 /* Jump directly to comparing the last 16 bytes for 32 byte (or less)
87 strings. */
88 subs limit, limit, 16
89 b.ls L(last_bytes)
90
91 /* We overlap loads between 0-32 bytes at either side of SRC1 when we
92 try to align, so limit it only to strings larger than 128 bytes. */
93 cmp limit, 96
94 b.ls L(loop16)
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010095
Sebastian Poped9bfc42017-06-19 12:39:02 -050096 /* Align src1 and adjust src2 with bytes not yet done. */
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -030097 and tmp1, src1, 15
98 add limit, limit, tmp1
Sebastian Poped9bfc42017-06-19 12:39:02 -050099 sub src1, src1, tmp1
100 sub src2, src2, tmp1
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100101
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -0300102 /* Loop performing 16 bytes per iteration using aligned src1.
103 Limit is pre-decremented by 16 and must be larger than zero.
104 Exit if <= 16 bytes left to do or if the data is not equal. */
Sebastian Poped9bfc42017-06-19 12:39:02 -0500105 .p2align 4
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -0300106L(loop16):
107 ldp data1, data1h, [src1], 16
108 ldp data2, data2h, [src2], 16
109 subs limit, limit, 16
110 ccmp data1, data2, 0, hi
111 ccmp data1h, data2h, 0, eq
112 b.eq L(loop16)
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100113
Sebastian Poped9bfc42017-06-19 12:39:02 -0500114 cmp data1, data2
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -0300115 bne L(return)
116 mov data1, data1h
117 mov data2, data2h
118 cmp data1, data2
119 bne L(return)
Sebastian Poped9bfc42017-06-19 12:39:02 -0500120
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -0300121 /* Compare last 1-16 bytes using unaligned access. */
122L(last_bytes):
123 add src1, src1, limit
124 add src2, src2, limit
125 ldp data1, data1h, [src1]
126 ldp data2, data2h, [src2]
127 cmp data1, data2
128 bne L(return)
129 mov data1, data1h
130 mov data2, data2h
131 cmp data1, data2
Sebastian Poped9bfc42017-06-19 12:39:02 -0500132
133 /* Compare data bytes and set return value to 0, -1 or 1. */
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -0300134L(return):
Sebastian Poped9bfc42017-06-19 12:39:02 -0500135#ifndef __AARCH64EB__
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100136 rev data1, data1
137 rev data2, data2
138#endif
Sebastian Poped9bfc42017-06-19 12:39:02 -0500139 cmp data1, data2
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -0300140L(ret_eq):
Sebastian Poped9bfc42017-06-19 12:39:02 -0500141 cset result, ne
142 cneg result, result, lo
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -0300143 ret
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100144
Sebastian Poped9bfc42017-06-19 12:39:02 -0500145 .p2align 4
146 /* Compare up to 8 bytes. Limit is [-8..-1]. */
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -0300147L(less8):
Sebastian Poped9bfc42017-06-19 12:39:02 -0500148 adds limit, limit, 4
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -0300149 b.lo L(less4)
Sebastian Poped9bfc42017-06-19 12:39:02 -0500150 ldr data1w, [src1], 4
151 ldr data2w, [src2], 4
152 cmp data1w, data2w
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -0300153 b.ne L(return)
Sebastian Poped9bfc42017-06-19 12:39:02 -0500154 sub limit, limit, 4
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -0300155L(less4):
Sebastian Poped9bfc42017-06-19 12:39:02 -0500156 adds limit, limit, 4
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -0300157 beq L(ret_eq)
158L(byte_loop):
Sebastian Poped9bfc42017-06-19 12:39:02 -0500159 ldrb data1w, [src1], 1
160 ldrb data2w, [src2], 1
161 subs limit, limit, 1
162 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -0300163 b.eq L(byte_loop)
Sebastian Poped9bfc42017-06-19 12:39:02 -0500164 sub result, data1w, data2w
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100165 ret
Adhemerval Zanella4ab56af2018-06-22 13:31:34 -0300166
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +0100167END(memcmp)