Shalini Salomi Bodapati | 4ed2f47 | 2019-06-13 09:54:08 +0530 | [diff] [blame^] | 1 | /* |
| 2 | Copyright (C) 2019 The Android Open Source Project |
| 3 | All rights reserved. |
| 4 | |
| 5 | Redistribution and use in source and binary forms, with or without |
| 6 | modification, are permitted provided that the following conditions |
| 7 | are met: |
| 8 | * Redistributions of source code must retain the above copyright |
| 9 | notice, this list of conditions and the following disclaimer. |
| 10 | * Redistributions in binary form must reproduce the above copyright |
| 11 | notice, this list of conditions and the following disclaimer in |
| 12 | the documentation and/or other materials provided with the |
| 13 | distribution. |
| 14 | |
| 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 16 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
| 18 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
| 19 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| 20 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| 21 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS |
| 22 | OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
| 23 | AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| 25 | OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 26 | SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #include <private/bionic_asm.h> |
| 30 | |
| 31 | #ifndef WMEMSET |
| 32 | #define WMEMSET wmemset_avx2 |
| 33 | #endif |
| 34 | |
| 35 | .section .text.avx2,"ax",@progbits |
| 36 | |
| 37 | ENTRY (WMEMSET) |
| 38 | # BB#0: |
| 39 | testq %rdx, %rdx |
| 40 | je .LBB0_14 |
| 41 | # BB#1: |
| 42 | cmpq $32, %rdx |
| 43 | jae .LBB0_3 |
| 44 | # BB#2: |
| 45 | xorl %r8d, %r8d |
| 46 | movq %rdi, %rax |
| 47 | jmp .LBB0_12 |
| 48 | .LBB0_3: |
| 49 | movq %rdx, %r8 |
| 50 | andq $-32, %r8 |
| 51 | vmovd %esi, %xmm0 |
| 52 | vpbroadcastd %xmm0, %ymm0 |
| 53 | leaq -32(%r8), %rcx |
| 54 | movq %rcx, %rax |
| 55 | shrq $5, %rax |
| 56 | leal 1(%rax), %r9d |
| 57 | andl $7, %r9d |
| 58 | cmpq $224, %rcx |
| 59 | jae .LBB0_5 |
| 60 | # BB#4: |
| 61 | xorl %eax, %eax |
| 62 | testq %r9, %r9 |
| 63 | jne .LBB0_8 |
| 64 | jmp .LBB0_10 |
| 65 | .LBB0_5: |
| 66 | leaq 992(%rdi), %rcx |
| 67 | leaq -1(%r9), %r10 |
| 68 | subq %rax, %r10 |
| 69 | xorl %eax, %eax |
| 70 | .p2align 4, 0x90 |
| 71 | .LBB0_6: # =>This Inner Loop Header: Depth=1 |
| 72 | vmovdqu %ymm0, -992(%rcx,%rax,4) |
| 73 | vmovdqu %ymm0, -960(%rcx,%rax,4) |
| 74 | vmovdqu %ymm0, -928(%rcx,%rax,4) |
| 75 | vmovdqu %ymm0, -896(%rcx,%rax,4) |
| 76 | vmovdqu %ymm0, -864(%rcx,%rax,4) |
| 77 | vmovdqu %ymm0, -832(%rcx,%rax,4) |
| 78 | vmovdqu %ymm0, -800(%rcx,%rax,4) |
| 79 | vmovdqu %ymm0, -768(%rcx,%rax,4) |
| 80 | vmovdqu %ymm0, -736(%rcx,%rax,4) |
| 81 | vmovdqu %ymm0, -704(%rcx,%rax,4) |
| 82 | vmovdqu %ymm0, -672(%rcx,%rax,4) |
| 83 | vmovdqu %ymm0, -640(%rcx,%rax,4) |
| 84 | vmovdqu %ymm0, -608(%rcx,%rax,4) |
| 85 | vmovdqu %ymm0, -576(%rcx,%rax,4) |
| 86 | vmovdqu %ymm0, -544(%rcx,%rax,4) |
| 87 | vmovdqu %ymm0, -512(%rcx,%rax,4) |
| 88 | vmovdqu %ymm0, -480(%rcx,%rax,4) |
| 89 | vmovdqu %ymm0, -448(%rcx,%rax,4) |
| 90 | vmovdqu %ymm0, -416(%rcx,%rax,4) |
| 91 | vmovdqu %ymm0, -384(%rcx,%rax,4) |
| 92 | vmovdqu %ymm0, -352(%rcx,%rax,4) |
| 93 | vmovdqu %ymm0, -320(%rcx,%rax,4) |
| 94 | vmovdqu %ymm0, -288(%rcx,%rax,4) |
| 95 | vmovdqu %ymm0, -256(%rcx,%rax,4) |
| 96 | vmovdqu %ymm0, -224(%rcx,%rax,4) |
| 97 | vmovdqu %ymm0, -192(%rcx,%rax,4) |
| 98 | vmovdqu %ymm0, -160(%rcx,%rax,4) |
| 99 | vmovdqu %ymm0, -128(%rcx,%rax,4) |
| 100 | vmovdqu %ymm0, -96(%rcx,%rax,4) |
| 101 | vmovdqu %ymm0, -64(%rcx,%rax,4) |
| 102 | vmovdqu %ymm0, -32(%rcx,%rax,4) |
| 103 | vmovdqu %ymm0, (%rcx,%rax,4) |
| 104 | addq $256, %rax # imm = 0x100 |
| 105 | addq $8, %r10 |
| 106 | jne .LBB0_6 |
| 107 | # BB#7: |
| 108 | testq %r9, %r9 |
| 109 | je .LBB0_10 |
| 110 | .LBB0_8: |
| 111 | leaq (%rdi,%rax,4), %rax |
| 112 | addq $96, %rax |
| 113 | negq %r9 |
| 114 | .p2align 4, 0x90 |
| 115 | .LBB0_9: # =>This Inner Loop Header: Depth=1 |
| 116 | vmovdqu %ymm0, -96(%rax) |
| 117 | vmovdqu %ymm0, -64(%rax) |
| 118 | vmovdqu %ymm0, -32(%rax) |
| 119 | vmovdqu %ymm0, (%rax) |
| 120 | subq $-128, %rax |
| 121 | addq $1, %r9 |
| 122 | jne .LBB0_9 |
| 123 | .LBB0_10: |
| 124 | cmpq %rdx, %r8 |
| 125 | je .LBB0_14 |
| 126 | # BB#11: |
| 127 | leaq (%rdi,%r8,4), %rax |
| 128 | .LBB0_12: |
| 129 | subq %r8, %rdx |
| 130 | .p2align 4, 0x90 |
| 131 | .LBB0_13: # =>This Inner Loop Header: Depth=1 |
| 132 | movl %esi, (%rax) |
| 133 | addq $4, %rax |
| 134 | addq $-1, %rdx |
| 135 | jne .LBB0_13 |
| 136 | .LBB0_14: |
| 137 | movq %rdi, %rax |
| 138 | vzeroupper |
| 139 | retq |
| 140 | END(WMEMSET) |