Shalini Salomi Bodapati | 4ed2f47 | 2019-06-13 09:54:08 +0530 | [diff] [blame^] | 1 | /* |
| 2 | Copyright (C) 2019 The Android Open Source Project |
| 3 | All rights reserved. |
| 4 | |
| 5 | Redistribution and use in source and binary forms, with or without |
| 6 | modification, are permitted provided that the following conditions |
| 7 | are met: |
| 8 | * Redistributions of source code must retain the above copyright |
| 9 | notice, this list of conditions and the following disclaimer. |
| 10 | * Redistributions in binary form must reproduce the above copyright |
| 11 | notice, this list of conditions and the following disclaimer in |
| 12 | the documentation and/or other materials provided with the |
| 13 | distribution. |
| 14 | |
| 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 16 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
| 18 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
| 19 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| 20 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| 21 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS |
| 22 | OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
| 23 | AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| 25 | OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 26 | SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #include <private/bionic_asm.h> |
| 30 | |
| 31 | #ifndef WMEMSET |
| 32 | #define WMEMSET wmemset_avx2 |
| 33 | #endif |
| 34 | |
| 35 | ENTRY(WMEMSET) |
| 36 | # BB#0: |
| 37 | pushl %ebp |
| 38 | pushl %ebx |
| 39 | pushl %edi |
| 40 | pushl %esi |
| 41 | pushl %eax |
| 42 | movl 32(%esp), %ecx |
| 43 | movl 24(%esp), %eax |
| 44 | testl %ecx, %ecx |
| 45 | je .LBB0_12 |
| 46 | # BB#1: |
| 47 | movl 28(%esp), %edx |
| 48 | xorl %edi, %edi |
| 49 | movl %eax, %esi |
| 50 | cmpl $32, %ecx |
| 51 | jb .LBB0_10 |
| 52 | # BB#2: |
| 53 | movl %ecx, %eax |
| 54 | andl $-32, %eax |
| 55 | vmovd %edx, %xmm0 |
| 56 | vpbroadcastd %xmm0, %ymm0 |
| 57 | movl %eax, (%esp) # 4-byte Spill |
| 58 | leal -32(%eax), %esi |
| 59 | movl %esi, %eax |
| 60 | shrl $5, %eax |
| 61 | leal 1(%eax), %edi |
| 62 | andl $7, %edi |
| 63 | xorl %ebx, %ebx |
| 64 | cmpl $224, %esi |
| 65 | jb .LBB0_5 |
| 66 | # BB#3: |
| 67 | movl 24(%esp), %esi |
| 68 | leal 992(%esi), %ebp |
| 69 | leal -1(%edi), %esi |
| 70 | subl %eax, %esi |
| 71 | xorl %ebx, %ebx |
| 72 | .p2align 4, 0x90 |
| 73 | .LBB0_4: # =>This Inner Loop Header: Depth=1 |
| 74 | vmovdqu %ymm0, -992(%ebp,%ebx,4) |
| 75 | vmovdqu %ymm0, -960(%ebp,%ebx,4) |
| 76 | vmovdqu %ymm0, -928(%ebp,%ebx,4) |
| 77 | vmovdqu %ymm0, -896(%ebp,%ebx,4) |
| 78 | vmovdqu %ymm0, -864(%ebp,%ebx,4) |
| 79 | vmovdqu %ymm0, -832(%ebp,%ebx,4) |
| 80 | vmovdqu %ymm0, -800(%ebp,%ebx,4) |
| 81 | vmovdqu %ymm0, -768(%ebp,%ebx,4) |
| 82 | vmovdqu %ymm0, -736(%ebp,%ebx,4) |
| 83 | vmovdqu %ymm0, -704(%ebp,%ebx,4) |
| 84 | vmovdqu %ymm0, -672(%ebp,%ebx,4) |
| 85 | vmovdqu %ymm0, -640(%ebp,%ebx,4) |
| 86 | vmovdqu %ymm0, -608(%ebp,%ebx,4) |
| 87 | vmovdqu %ymm0, -576(%ebp,%ebx,4) |
| 88 | vmovdqu %ymm0, -544(%ebp,%ebx,4) |
| 89 | vmovdqu %ymm0, -512(%ebp,%ebx,4) |
| 90 | vmovdqu %ymm0, -480(%ebp,%ebx,4) |
| 91 | vmovdqu %ymm0, -448(%ebp,%ebx,4) |
| 92 | vmovdqu %ymm0, -416(%ebp,%ebx,4) |
| 93 | vmovdqu %ymm0, -384(%ebp,%ebx,4) |
| 94 | vmovdqu %ymm0, -352(%ebp,%ebx,4) |
| 95 | vmovdqu %ymm0, -320(%ebp,%ebx,4) |
| 96 | vmovdqu %ymm0, -288(%ebp,%ebx,4) |
| 97 | vmovdqu %ymm0, -256(%ebp,%ebx,4) |
| 98 | vmovdqu %ymm0, -224(%ebp,%ebx,4) |
| 99 | vmovdqu %ymm0, -192(%ebp,%ebx,4) |
| 100 | vmovdqu %ymm0, -160(%ebp,%ebx,4) |
| 101 | vmovdqu %ymm0, -128(%ebp,%ebx,4) |
| 102 | vmovdqu %ymm0, -96(%ebp,%ebx,4) |
| 103 | vmovdqu %ymm0, -64(%ebp,%ebx,4) |
| 104 | vmovdqu %ymm0, -32(%ebp,%ebx,4) |
| 105 | vmovdqu %ymm0, (%ebp,%ebx,4) |
| 106 | addl $256, %ebx # imm = 0x100 |
| 107 | addl $8, %esi |
| 108 | jne .LBB0_4 |
| 109 | .LBB0_5: |
| 110 | testl %edi, %edi |
| 111 | movl 24(%esp), %eax |
| 112 | je .LBB0_8 |
| 113 | # BB#6: |
| 114 | leal (%eax,%ebx,4), %esi |
| 115 | addl $96, %esi |
| 116 | negl %edi |
| 117 | .p2align 4, 0x90 |
| 118 | .LBB0_7: # =>This Inner Loop Header: Depth=1 |
| 119 | vmovdqu %ymm0, -96(%esi) |
| 120 | vmovdqu %ymm0, -64(%esi) |
| 121 | vmovdqu %ymm0, -32(%esi) |
| 122 | vmovdqu %ymm0, (%esi) |
| 123 | subl $-128, %esi |
| 124 | addl $1, %edi |
| 125 | jne .LBB0_7 |
| 126 | .LBB0_8: |
| 127 | movl (%esp), %edi # 4-byte Reload |
| 128 | cmpl %ecx, %edi |
| 129 | je .LBB0_12 |
| 130 | # BB#9: |
| 131 | leal (%eax,%edi,4), %esi |
| 132 | .LBB0_10: |
| 133 | subl %edi, %ecx |
| 134 | .p2align 4, 0x90 |
| 135 | .LBB0_11: # =>This Inner Loop Header: Depth=1 |
| 136 | movl %edx, (%esi) |
| 137 | addl $4, %esi |
| 138 | addl $-1, %ecx |
| 139 | jne .LBB0_11 |
| 140 | .LBB0_12: |
| 141 | addl $4, %esp |
| 142 | popl %esi |
| 143 | popl %edi |
| 144 | popl %ebx |
| 145 | popl %ebp |
| 146 | vzeroupper |
| 147 | retl |
| 148 | END(WMEMSET) |