| Raghu Gandham | 405b802 | 2012-07-25 18:16:42 -0700 | [diff] [blame] | 1 | /* | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 2 | * Copyright (c) 2013 | 
| Raghu Gandham | 405b802 | 2012-07-25 18:16:42 -0700 | [diff] [blame] | 3 | *      MIPS Technologies, Inc., California. | 
|  | 4 | * | 
|  | 5 | * Redistribution and use in source and binary forms, with or without | 
|  | 6 | * modification, are permitted provided that the following conditions | 
|  | 7 | * are met: | 
|  | 8 | * 1. Redistributions of source code must retain the above copyright | 
|  | 9 | *    notice, this list of conditions and the following disclaimer. | 
|  | 10 | * 2. Redistributions in binary form must reproduce the above copyright | 
|  | 11 | *    notice, this list of conditions and the following disclaimer in the | 
|  | 12 | *    documentation and/or other materials provided with the distribution. | 
|  | 13 | * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its | 
|  | 14 | *    contributors may be used to endorse or promote products derived from | 
|  | 15 | *    this software without specific prior written permission. | 
|  | 16 | * | 
|  | 17 | * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND | 
|  | 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
|  | 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
|  | 20 | * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE | 
|  | 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 
|  | 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 
|  | 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 
|  | 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 
|  | 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 
|  | 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 
|  | 27 | * SUCH DAMAGE. | 
|  | 28 | */ | 
|  | 29 |  | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 30 | #ifdef __ANDROID__ | 
|  | 31 | # include <private/bionic_asm.h> | 
|  | 32 | # define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE | 
|  | 33 | #elif _LIBC | 
|  | 34 | # include <sysdep.h> | 
|  | 35 | # include <regdef.h> | 
|  | 36 | # include <sys/asm.h> | 
|  | 37 | # define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE | 
|  | 38 | #elif _COMPILING_NEWLIB | 
|  | 39 | # include "machine/asm.h" | 
|  | 40 | # include "machine/regdef.h" | 
|  | 41 | # define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE | 
| Raghu Gandham | 405b802 | 2012-07-25 18:16:42 -0700 | [diff] [blame] | 42 | #else | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 43 | # include <regdef.h> | 
|  | 44 | # include <sys/asm.h> | 
| Raghu Gandham | 405b802 | 2012-07-25 18:16:42 -0700 | [diff] [blame] | 45 | #endif | 
|  | 46 |  | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 47 | /* Check to see if the MIPS architecture we are compiling for supports | 
|  | 48 | prefetching.  */ | 
| Raghu Gandham | 405b802 | 2012-07-25 18:16:42 -0700 | [diff] [blame] | 49 |  | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 50 | #if (__mips == 4) || (__mips == 5) || (__mips == 32) || (__mips == 64) | 
|  | 51 | # ifndef DISABLE_PREFETCH | 
|  | 52 | #  define USE_PREFETCH | 
|  | 53 | # endif | 
|  | 54 | #endif | 
|  | 55 |  | 
|  | 56 | #if defined(_MIPS_SIM) && ((_MIPS_SIM == _ABI64) || (_MIPS_SIM == _ABIN32)) | 
|  | 57 | # ifndef DISABLE_DOUBLE | 
|  | 58 | #  define USE_DOUBLE | 
|  | 59 | # endif | 
|  | 60 | #endif | 
|  | 61 |  | 
|  | 62 | #ifndef USE_DOUBLE | 
|  | 63 | # ifndef DISABLE_DOUBLE_ALIGN | 
|  | 64 | #  define DOUBLE_ALIGN | 
|  | 65 | # endif | 
|  | 66 | #endif | 
|  | 67 |  | 
|  | 68 | /* Some asm.h files do not have the L macro definition.  */ | 
|  | 69 | #ifndef L | 
|  | 70 | # if _MIPS_SIM == _ABIO32 | 
|  | 71 | #  define L(label) $L ## label | 
|  | 72 | # else | 
|  | 73 | #  define L(label) .L ## label | 
|  | 74 | # endif | 
|  | 75 | #endif | 
|  | 76 |  | 
|  | 77 | /* Some asm.h files do not have the PTR_ADDIU macro definition.  */ | 
|  | 78 | #ifndef PTR_ADDIU | 
|  | 79 | # if _MIPS_SIM == _ABIO32 | 
|  | 80 | #  define PTR_ADDIU	addiu | 
|  | 81 | # else | 
|  | 82 | #  define PTR_ADDIU	daddiu | 
|  | 83 | # endif | 
|  | 84 | #endif | 
|  | 85 |  | 
|  | 86 | /* New R6 instructions that may not be in asm.h.  */ | 
|  | 87 | #ifndef PTR_LSA | 
|  | 88 | # if _MIPS_SIM == _ABIO32 | 
|  | 89 | #  define PTR_LSA        lsa | 
|  | 90 | # else | 
|  | 91 | #  define PTR_LSA        dlsa | 
|  | 92 | # endif | 
|  | 93 | #endif | 
|  | 94 |  | 
|  | 95 | /* Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE | 
|  | 96 | or PREFETCH_STORE_STREAMED offers a large performance advantage | 
|  | 97 | but PREPAREFORSTORE has some special restrictions to consider. | 
|  | 98 |  | 
|  | 99 | Prefetch with the 'prepare for store' hint does not copy a memory | 
|  | 100 | location into the cache, it just allocates a cache line and zeros | 
|  | 101 | it out.  This means that if you do not write to the entire cache | 
|  | 102 | line before writing it out to memory some data will get zero'ed out | 
|  | 103 | when the cache line is written back to memory and data will be lost. | 
|  | 104 |  | 
|  | 105 | There are ifdef'ed sections of this memcpy to make sure that it does not | 
|  | 106 | do prefetches on cache lines that are not going to be completely written. | 
|  | 107 | This code is only needed and only used when PREFETCH_STORE_HINT is set to | 
|  | 108 | PREFETCH_HINT_PREPAREFORSTORE.  This code assumes that cache lines are | 
|  | 109 | less than MAX_PREFETCH_SIZE bytes and if the cache line is larger it will | 
|  | 110 | not work correctly.  */ | 
|  | 111 |  | 
|  | 112 | #ifdef USE_PREFETCH | 
|  | 113 | # define PREFETCH_HINT_STORE		1 | 
|  | 114 | # define PREFETCH_HINT_STORE_STREAMED	5 | 
|  | 115 | # define PREFETCH_HINT_STORE_RETAINED	7 | 
|  | 116 | # define PREFETCH_HINT_PREPAREFORSTORE	30 | 
|  | 117 |  | 
|  | 118 | /* If we have not picked out what hints to use at this point use the | 
|  | 119 | standard load and store prefetch hints.  */ | 
|  | 120 | # ifndef PREFETCH_STORE_HINT | 
|  | 121 | #  define PREFETCH_STORE_HINT PREFETCH_HINT_STORE | 
|  | 122 | # endif | 
|  | 123 |  | 
|  | 124 | /* We double everything when USE_DOUBLE is true so we do 2 prefetches to | 
|  | 125 | get 64 bytes in that case.  The assumption is that each individual | 
|  | 126 | prefetch brings in 32 bytes.  */ | 
|  | 127 | # ifdef USE_DOUBLE | 
|  | 128 | #  define PREFETCH_CHUNK 64 | 
|  | 129 | #  define PREFETCH_FOR_STORE(chunk, reg) \ | 
|  | 130 | pref PREFETCH_STORE_HINT, (chunk)*64(reg); \ | 
|  | 131 | pref PREFETCH_STORE_HINT, ((chunk)*64)+32(reg) | 
|  | 132 | # else | 
|  | 133 | #  define PREFETCH_CHUNK 32 | 
|  | 134 | #  define PREFETCH_FOR_STORE(chunk, reg) \ | 
|  | 135 | pref PREFETCH_STORE_HINT, (chunk)*32(reg) | 
|  | 136 | # endif | 
|  | 137 |  | 
|  | 138 | /* MAX_PREFETCH_SIZE is the maximum size of a prefetch, it must not be less | 
|  | 139 | than PREFETCH_CHUNK, the assumed size of each prefetch.  If the real size | 
|  | 140 | of a prefetch is greater than MAX_PREFETCH_SIZE and the PREPAREFORSTORE | 
|  | 141 | hint is used, the code will not work correctly.  If PREPAREFORSTORE is not | 
|  | 142 | used than MAX_PREFETCH_SIZE does not matter.  */ | 
|  | 143 | # define MAX_PREFETCH_SIZE 128 | 
|  | 144 | /* PREFETCH_LIMIT is set based on the fact that we never use an offset greater | 
|  | 145 | than 5 on a STORE prefetch and that a single prefetch can never be larger | 
|  | 146 | than MAX_PREFETCH_SIZE.  We add the extra 32 when USE_DOUBLE is set because | 
|  | 147 | we actually do two prefetches in that case, one 32 bytes after the other.  */ | 
|  | 148 | # ifdef USE_DOUBLE | 
|  | 149 | #  define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + 32 + MAX_PREFETCH_SIZE | 
|  | 150 | # else | 
|  | 151 | #  define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + MAX_PREFETCH_SIZE | 
|  | 152 | # endif | 
|  | 153 |  | 
|  | 154 | # if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) \ | 
|  | 155 | && ((PREFETCH_CHUNK * 4) < MAX_PREFETCH_SIZE) | 
|  | 156 | /* We cannot handle this because the initial prefetches may fetch bytes that | 
|  | 157 | are before the buffer being copied.  We start copies with an offset | 
|  | 158 | of 4 so avoid this situation when using PREPAREFORSTORE.  */ | 
|  | 159 | #  error "PREFETCH_CHUNK is too large and/or MAX_PREFETCH_SIZE is too small." | 
|  | 160 | # endif | 
|  | 161 | #else /* USE_PREFETCH not defined */ | 
|  | 162 | # define PREFETCH_FOR_STORE(offset, reg) | 
|  | 163 | #endif | 
|  | 164 |  | 
|  | 165 | #if __mips_isa_rev > 5 | 
|  | 166 | # if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) | 
|  | 167 | #  undef PREFETCH_STORE_HINT | 
|  | 168 | #  define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED | 
|  | 169 | # endif | 
|  | 170 | # define R6_CODE | 
|  | 171 | #endif | 
|  | 172 |  | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 173 | /* We load/store 64 bits at a time when USE_DOUBLE is true. | 
|  | 174 | The C_ prefix stands for CHUNK and is used to avoid macro name | 
|  | 175 | conflicts with system header files.  */ | 
|  | 176 |  | 
|  | 177 | #ifdef USE_DOUBLE | 
|  | 178 | # define C_ST	sd | 
|  | 179 | # if __MIPSEB | 
|  | 180 | #  define C_STHI	sdl	/* high part is left in big-endian	*/ | 
|  | 181 | # else | 
|  | 182 | #  define C_STHI	sdr	/* high part is right in little-endian	*/ | 
|  | 183 | # endif | 
|  | 184 | #else | 
|  | 185 | # define C_ST	sw | 
|  | 186 | # if __MIPSEB | 
|  | 187 | #  define C_STHI	swl	/* high part is left in big-endian	*/ | 
|  | 188 | # else | 
|  | 189 | #  define C_STHI	swr	/* high part is right in little-endian	*/ | 
|  | 190 | # endif | 
|  | 191 | #endif | 
|  | 192 |  | 
|  | 193 | /* Bookkeeping values for 32 vs. 64 bit mode.  */ | 
|  | 194 | #ifdef USE_DOUBLE | 
|  | 195 | # define NSIZE 8 | 
|  | 196 | # define NSIZEMASK 0x3f | 
|  | 197 | # define NSIZEDMASK 0x7f | 
|  | 198 | #else | 
|  | 199 | # define NSIZE 4 | 
|  | 200 | # define NSIZEMASK 0x1f | 
|  | 201 | # define NSIZEDMASK 0x3f | 
|  | 202 | #endif | 
|  | 203 | #define UNIT(unit) ((unit)*NSIZE) | 
|  | 204 | #define UNITM1(unit) (((unit)*NSIZE)-1) | 
|  | 205 |  | 
|  | 206 | #ifdef __ANDROID__ | 
| Douglas Leung | 29d4b71 | 2016-03-03 18:55:39 -0800 | [diff] [blame] | 207 | LEAF(__memset_chk,0) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 208 | #else | 
| Douglas Leung | 29d4b71 | 2016-03-03 18:55:39 -0800 | [diff] [blame] | 209 | LEAF(__memset_chk) | 
|  | 210 | #endif | 
| Douglas Leung | f3b9c89 | 2016-03-16 16:59:23 -0700 | [diff] [blame] | 211 | .set	noreorder | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 212 | sltu    $t2, $a3, $a2 | 
|  | 213 | beq     $t2, $zero, memset | 
| Predrag Blagojevic | b640e03 | 2016-04-22 13:01:19 +0200 | [diff] [blame] | 214 | nop | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 215 | .cpsetup $t9, $t8, __memset_chk | 
|  | 216 | LA      $t9, __memset_chk_fail | 
|  | 217 | jr      $t9 | 
| Predrag Blagojevic | b640e03 | 2016-04-22 13:01:19 +0200 | [diff] [blame] | 218 | nop | 
|  | 219 | .set	reorder | 
| Douglas Leung | 29d4b71 | 2016-03-03 18:55:39 -0800 | [diff] [blame] | 220 | END(__memset_chk) | 
|  | 221 |  | 
|  | 222 | #ifdef __ANDROID__ | 
|  | 223 | LEAF(memset,0) | 
|  | 224 | #else | 
|  | 225 | LEAF(memset) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 226 | #endif | 
|  | 227 |  | 
|  | 228 | .set	nomips16 | 
| Raghu Gandham | 405b802 | 2012-07-25 18:16:42 -0700 | [diff] [blame] | 229 | .set	noreorder | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 230 | /* If the size is less than 2*NSIZE (8 or 16), go to L(lastb).  Regardless of | 
|  | 231 | size, copy dst pointer to v0 for the return value.  */ | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 232 | slti	$t2,$a2,(2 * NSIZE) | 
|  | 233 | bne	$t2,$zero,L(lastb) | 
|  | 234 | move	$v0,$a0 | 
| Raghu Gandham | 405b802 | 2012-07-25 18:16:42 -0700 | [diff] [blame] | 235 |  | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 236 | /* If memset value is not zero, we copy it to all the bytes in a 32 or 64 | 
|  | 237 | bit word.  */ | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 238 | beq	$a1,$zero,L(set0)		/* If memset value is zero no smear  */ | 
|  | 239 | PTR_SUBU $a3,$zero,$a0 | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 240 | nop | 
| Raghu Gandham | 405b802 | 2012-07-25 18:16:42 -0700 | [diff] [blame] | 241 |  | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 242 | /* smear byte into 32 or 64 bit word */ | 
|  | 243 | #if ((__mips == 64) || (__mips == 32)) && (__mips_isa_rev >= 2) | 
|  | 244 | # ifdef USE_DOUBLE | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 245 | dins	$a1, $a1, 8, 8        /* Replicate fill byte into half-word.  */ | 
|  | 246 | dins	$a1, $a1, 16, 16      /* Replicate fill byte into word.       */ | 
|  | 247 | dins	$a1, $a1, 32, 32      /* Replicate fill byte into dbl word.   */ | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 248 | # else | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 249 | ins	$a1, $a1, 8, 8        /* Replicate fill byte into half-word.  */ | 
|  | 250 | ins	$a1, $a1, 16, 16      /* Replicate fill byte into word.       */ | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 251 | # endif | 
| Raghu Gandham | 405b802 | 2012-07-25 18:16:42 -0700 | [diff] [blame] | 252 | #else | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 253 | # ifdef USE_DOUBLE | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 254 | and     $a1,0xff | 
|  | 255 | dsll	$t2,$a1,8 | 
|  | 256 | or	$a1,$t2 | 
|  | 257 | dsll	$t2,$a1,16 | 
|  | 258 | or	$a1,$t2 | 
|  | 259 | dsll	$t2,$a1,32 | 
|  | 260 | or	$a1,$t2 | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 261 | # else | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 262 | and     $a1,0xff | 
|  | 263 | sll	$t2,$a1,8 | 
|  | 264 | or	$a1,$t2 | 
|  | 265 | sll	$t2,$a1,16 | 
|  | 266 | or	$a1,$t2 | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 267 | # endif | 
| Raghu Gandham | 405b802 | 2012-07-25 18:16:42 -0700 | [diff] [blame] | 268 | #endif | 
|  | 269 |  | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 270 | /* If the destination address is not aligned do a partial store to get it | 
|  | 271 | aligned.  If it is already aligned just jump to L(aligned).  */ | 
|  | 272 | L(set0): | 
|  | 273 | #ifndef R6_CODE | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 274 | andi	$t2,$a3,(NSIZE-1)		/* word-unaligned address?          */ | 
|  | 275 | beq	$t2,$zero,L(aligned)	/* t2 is the unalignment count      */ | 
|  | 276 | PTR_SUBU $a2,$a2,$t2 | 
|  | 277 | C_STHI	$a1,0($a0) | 
|  | 278 | PTR_ADDU $a0,$a0,$t2 | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 279 | #else /* R6_CODE */ | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 280 | andi	$t2,$a0,(NSIZE-1) | 
|  | 281 | lapc	$t9,L(atable) | 
|  | 282 | PTR_LSA	$t9,$t2,$t9,2 | 
|  | 283 | jrc	$t9 | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 284 | L(atable): | 
|  | 285 | bc	L(aligned) | 
|  | 286 | # ifdef USE_DOUBLE | 
|  | 287 | bc	L(lb7) | 
|  | 288 | bc	L(lb6) | 
|  | 289 | bc	L(lb5) | 
|  | 290 | bc	L(lb4) | 
|  | 291 | # endif | 
|  | 292 | bc	L(lb3) | 
|  | 293 | bc	L(lb2) | 
|  | 294 | bc	L(lb1) | 
|  | 295 | L(lb7): | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 296 | sb	$a1,6($a0) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 297 | L(lb6): | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 298 | sb	$a1,5($a0) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 299 | L(lb5): | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 300 | sb	$a1,4($a0) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 301 | L(lb4): | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 302 | sb	$a1,3($a0) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 303 | L(lb3): | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 304 | sb	$a1,2($a0) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 305 | L(lb2): | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 306 | sb	$a1,1($a0) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 307 | L(lb1): | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 308 | sb	$a1,0($a0) | 
| Raghu Gandham | 405b802 | 2012-07-25 18:16:42 -0700 | [diff] [blame] | 309 |  | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 310 | li	$t9,NSIZE | 
|  | 311 | subu	$t2,$t9,$t2 | 
|  | 312 | PTR_SUBU $a2,$a2,$t2 | 
|  | 313 | PTR_ADDU $a0,$a0,$t2 | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 314 | #endif /* R6_CODE */ | 
| Raghu Gandham | 405b802 | 2012-07-25 18:16:42 -0700 | [diff] [blame] | 315 |  | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 316 | L(aligned): | 
|  | 317 | /* If USE_DOUBLE is not set we may still want to align the data on a 16 | 
|  | 318 | byte boundry instead of an 8 byte boundry to maximize the opportunity | 
|  | 319 | of proAptiv chips to do memory bonding (combining two sequential 4 | 
|  | 320 | byte stores into one 8 byte store).  We know there are at least 4 bytes | 
|  | 321 | left to store or we would have jumped to L(lastb) earlier in the code.  */ | 
|  | 322 | #ifdef DOUBLE_ALIGN | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 323 | andi	$t2,$a3,4 | 
|  | 324 | beq	$t2,$zero,L(double_aligned) | 
|  | 325 | PTR_SUBU $a2,$a2,$t2 | 
|  | 326 | sw	$a1,0($a0) | 
|  | 327 | PTR_ADDU $a0,$a0,$t2 | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 328 | L(double_aligned): | 
|  | 329 | #endif | 
| Raghu Gandham | 405b802 | 2012-07-25 18:16:42 -0700 | [diff] [blame] | 330 |  | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 331 | /* Now the destination is aligned to (word or double word) aligned address | 
|  | 332 | Set a2 to count how many bytes we have to copy after all the 64/128 byte | 
|  | 333 | chunks are copied and a3 to the dest pointer after all the 64/128 byte | 
|  | 334 | chunks have been copied.  We will loop, incrementing a0 until it equals | 
|  | 335 | a3.  */ | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 336 | andi	$t8,$a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */ | 
|  | 337 | beq	$a2,$t8,L(chkw)	 /* if a2==t8, no 64-byte/128-byte chunks */ | 
|  | 338 | PTR_SUBU $a3,$a2,$t8	 /* subtract from a2 the reminder */ | 
|  | 339 | PTR_ADDU $a3,$a0,$a3	 /* Now a3 is the final dst after loop */ | 
| Raghu Gandham | 405b802 | 2012-07-25 18:16:42 -0700 | [diff] [blame] | 340 |  | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 341 | /* When in the loop we may prefetch with the 'prepare to store' hint, | 
|  | 342 | in this case the a0+x should not be past the "t0-32" address.  This | 
|  | 343 | means: for x=128 the last "safe" a0 address is "t0-160".  Alternatively, | 
|  | 344 | for x=64 the last "safe" a0 address is "t0-96" In the current version we | 
|  | 345 | will use "prefetch hint,128(a0)", so "t0-160" is the limit.  */ | 
|  | 346 | #if defined(USE_PREFETCH) \ | 
|  | 347 | && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 348 | PTR_ADDU $t0,$a0,$a2		/* t0 is the "past the end" address */ | 
|  | 349 | PTR_SUBU $t9,$t0,PREFETCH_LIMIT	/* t9 is the "last safe pref" address */ | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 350 | #endif | 
|  | 351 | #if defined(USE_PREFETCH) \ | 
|  | 352 | && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE) | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 353 | PREFETCH_FOR_STORE (1, $a0) | 
|  | 354 | PREFETCH_FOR_STORE (2, $a0) | 
|  | 355 | PREFETCH_FOR_STORE (3, $a0) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 356 | #endif | 
| Raghu Gandham | 405b802 | 2012-07-25 18:16:42 -0700 | [diff] [blame] | 357 |  | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 358 | L(loop16w): | 
|  | 359 | #if defined(USE_PREFETCH) \ | 
|  | 360 | && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 361 | sltu	$v1,$t9,$a0		/* If a0 > t9 don't use next prefetch */ | 
|  | 362 | bgtz	$v1,L(skip_pref) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 363 | nop | 
|  | 364 | #endif | 
|  | 365 | #ifndef R6_CODE | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 366 | PREFETCH_FOR_STORE (4, $a0) | 
|  | 367 | PREFETCH_FOR_STORE (5, $a0) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 368 | #else | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 369 | PREFETCH_FOR_STORE (2, $a0) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 370 | #endif | 
|  | 371 | L(skip_pref): | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 372 | C_ST	$a1,UNIT(0)($a0) | 
|  | 373 | C_ST	$a1,UNIT(1)($a0) | 
|  | 374 | C_ST	$a1,UNIT(2)($a0) | 
|  | 375 | C_ST	$a1,UNIT(3)($a0) | 
|  | 376 | C_ST	$a1,UNIT(4)($a0) | 
|  | 377 | C_ST	$a1,UNIT(5)($a0) | 
|  | 378 | C_ST	$a1,UNIT(6)($a0) | 
|  | 379 | C_ST	$a1,UNIT(7)($a0) | 
|  | 380 | C_ST	$a1,UNIT(8)($a0) | 
|  | 381 | C_ST	$a1,UNIT(9)($a0) | 
|  | 382 | C_ST	$a1,UNIT(10)($a0) | 
|  | 383 | C_ST	$a1,UNIT(11)($a0) | 
|  | 384 | C_ST	$a1,UNIT(12)($a0) | 
|  | 385 | C_ST	$a1,UNIT(13)($a0) | 
|  | 386 | C_ST	$a1,UNIT(14)($a0) | 
|  | 387 | C_ST	$a1,UNIT(15)($a0) | 
|  | 388 | PTR_ADDIU $a0,$a0,UNIT(16)	/* adding 64/128 to dest */ | 
|  | 389 | bne	$a0,$a3,L(loop16w) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 390 | nop | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 391 | move	$a2,$t8 | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 392 |  | 
|  | 393 | /* Here we have dest word-aligned but less than 64-bytes or 128 bytes to go. | 
|  | 394 | Check for a 32(64) byte chunk and copy if if there is one.  Otherwise | 
|  | 395 | jump down to L(chk1w) to handle the tail end of the copy.  */ | 
|  | 396 | L(chkw): | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 397 | andi	$t8,$a2,NSIZEMASK	/* is there a 32-byte/64-byte chunk.  */ | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 398 | /* the t8 is the reminder count past 32-bytes */ | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 399 | beq	$a2,$t8,L(chk1w)/* when a2==t8, no 32-byte chunk */ | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 400 | nop | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 401 | C_ST	$a1,UNIT(0)($a0) | 
|  | 402 | C_ST	$a1,UNIT(1)($a0) | 
|  | 403 | C_ST	$a1,UNIT(2)($a0) | 
|  | 404 | C_ST	$a1,UNIT(3)($a0) | 
|  | 405 | C_ST	$a1,UNIT(4)($a0) | 
|  | 406 | C_ST	$a1,UNIT(5)($a0) | 
|  | 407 | C_ST	$a1,UNIT(6)($a0) | 
|  | 408 | C_ST	$a1,UNIT(7)($a0) | 
|  | 409 | PTR_ADDIU $a0,$a0,UNIT(8) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 410 |  | 
|  | 411 | /* Here we have less than 32(64) bytes to set.  Set up for a loop to | 
|  | 412 | copy one word (or double word) at a time.  Set a2 to count how many | 
|  | 413 | bytes we have to copy after all the word (or double word) chunks are | 
|  | 414 | copied and a3 to the dest pointer after all the (d)word chunks have | 
|  | 415 | been copied.  We will loop, incrementing a0 until a0 equals a3.  */ | 
|  | 416 | L(chk1w): | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 417 | andi	$a2,$t8,(NSIZE-1)	/* a2 is the reminder past one (d)word chunks */ | 
|  | 418 | beq	$a2,$t8,L(lastb) | 
|  | 419 | PTR_SUBU $a3,$t8,$a2	/* a3 is count of bytes in one (d)word chunks */ | 
|  | 420 | PTR_ADDU $a3,$a0,$a3	/* a3 is the dst address after loop */ | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 421 |  | 
|  | 422 | /* copying in words (4-byte or 8 byte chunks) */ | 
|  | 423 | L(wordCopy_loop): | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 424 | PTR_ADDIU $a0,$a0,UNIT(1) | 
|  | 425 | bne	$a0,$a3,L(wordCopy_loop) | 
|  | 426 | C_ST	$a1,UNIT(-1)($a0) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 427 |  | 
|  | 428 | /* Copy the last 8 (or 16) bytes */ | 
|  | 429 | L(lastb): | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 430 | blez	$a2,L(leave) | 
|  | 431 | PTR_ADDU $a3,$a0,$a2       /* a3 is the last dst address */ | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 432 | L(lastbloop): | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 433 | PTR_ADDIU $a0,$a0,1 | 
|  | 434 | bne	$a0,$a3,L(lastbloop) | 
|  | 435 | sb	$a1,-1($a0) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 436 | L(leave): | 
| Elliott Hughes | d4ca231 | 2017-10-11 22:27:45 -0700 | [diff] [blame] | 437 | j	$ra | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 438 | nop | 
| Raghu Gandham | 405b802 | 2012-07-25 18:16:42 -0700 | [diff] [blame] | 439 |  | 
|  | 440 | .set	at | 
|  | 441 | .set	reorder | 
| Douglas Leung | 29d4b71 | 2016-03-03 18:55:39 -0800 | [diff] [blame] | 442 | END(memset) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 443 | #ifndef __ANDROID__ | 
|  | 444 | # ifdef _LIBC | 
| Douglas Leung | 29d4b71 | 2016-03-03 18:55:39 -0800 | [diff] [blame] | 445 | libc_hidden_builtin_def (memset) | 
|  | 446 | libc_hidden_builtin_def (__memset_chk) | 
| Nikola Veljkovic | 38f2eaa | 2015-05-26 12:06:09 +0200 | [diff] [blame] | 447 | # endif | 
|  | 448 | #endif |