blob: d6ad96e4d5b38c589df72b5028900ce0db7b4550 [file] [log] [blame]
Yun Hsiang40a82d02023-05-26 10:10:40 +08001/*
2 * Copyright (C) 2023 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * Copyright (c) 2023 SiFive, Inc.
30 * All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. The name of the company may not be used to endorse or promote
41 * products derived from this software without specific prior written
42 * permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY SIFIVE INC ``AS IS'' AND ANY EXPRESS OR IMPLIED
45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL SIFIVE INC BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 */
55
56#if defined(__riscv_v)
57
58#include "sys/asm.h"
59
60#define iResult a0
61
62#define pStr1 a0
63#define pStr2 a1
64
65#define iVL a2
66#define iTemp1 a3
67#define iTemp2 a4
68#define iLMUL1 a5
69#define iLMUL2 a6
70#define iLMUL4 a7
71
72#define iLMUL t0
73
74#define vStr1 v0
75#define vStr2 v8
76#define vMask1 v16
77#define vMask2 v17
78
79ENTRY(strcmp_vext)
80
81 # increase the lmul using the following sequences:
82 # 1/2, 1/2, 1, 2, 4, 4, 4, ...
83
84 # lmul=1/2
85 vsetvli iVL, zero, e8, mf2, ta, ma
86
87 vle8ff.v vStr1, (pStr1)
88 # check if vStr1[i] == 0
89 vmseq.vx vMask1, vStr1, zero
90
91 vle8ff.v vStr2, (pStr2)
92 # check if vStr1[i] != vStr2[i]
93 vmsne.vv vMask2, vStr1, vStr2
94
95 # find the index x for vStr1[x]==0
96 vfirst.m iTemp1, vMask1
97 # find the index x for vStr1[x]!=vStr2[x]
98 vfirst.m iTemp2, vMask2
99
100 bgez iTemp1, L(check1)
101 bgez iTemp2, L(check2)
102
103 # get the current vl updated by vle8ff.
104 csrr iVL, vl
105 add pStr1, pStr1, iVL
106 add pStr2, pStr2, iVL
107
108 vsetvli iVL, zero, e8, mf2, ta, ma
109 addi iLMUL1, zero, 1
110 addi iLMUL, zero, 1
111 j L(loop)
112L(m1):
113 vsetvli iVL, zero, e8, m1, ta, ma
114 addi iLMUL2, zero, 2
115 addi iLMUL, zero, 2
116 j L(loop)
117L(m2):
118 vsetvli iVL, zero, e8, m2, ta, ma
119 addi iLMUL4, zero, 4
120 addi iLMUL, zero, 4
121 j L(loop)
122L(m4):
123 vsetvli iVL, zero, e8, m4, ta, ma
124
125L(loop):
126 vle8ff.v vStr1, (pStr1)
127 vmseq.vx vMask1, vStr1, zero
128
129 vle8ff.v vStr2, (pStr2)
130 vmsne.vv vMask2, vStr1, vStr2
131
132 vfirst.m iTemp1, vMask1
133 vfirst.m iTemp2, vMask2
134
135 bgez iTemp1, L(check1)
136 bgez iTemp2, L(check2)
137
138 csrr iVL, vl
139 add pStr1, pStr1, iVL
140 add pStr2, pStr2, iVL
141
142 beq iLMUL, iLMUL1, L(m1)
143 beq iLMUL, iLMUL2, L(m2)
144 beq iLMUL, iLMUL4, L(m4)
145 j L(loop)
146
147 // iTemp1>=0
148L(check1):
149 bltz iTemp2, 1f
150 blt iTemp2, iTemp1, L(check2)
1511:
152 // iTemp2<0
153 // iTemp2>=0 && iTemp1<iTemp2
154 add pStr1, pStr1, iTemp1
155 add pStr2, pStr2, iTemp1
156 lbu iTemp1, 0(pStr1)
157 lbu iTemp2, 0(pStr2)
158 sub iResult, iTemp1, iTemp2
159 ret
160
161 // iTemp1<0
162 // iTemp2>=0
163L(check2):
164 add pStr1, pStr1, iTemp2
165 add pStr2, pStr2, iTemp2
166 lbu iTemp1, 0(pStr1)
167 lbu iTemp2, 0(pStr2)
168 sub iResult, iTemp1, iTemp2
169 ret
170
171END(strcmp_vext)
172
173#endif