blob: 7c485cf70a5ee848d5eddf21bde2f98ff2f38c94 [file] [log] [blame]
Shalini Salomi Bodapati4ed2f472019-06-13 09:54:08 +05301/*
2Copyright (C) 2019 The Android Open Source Project
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions
7are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in
12 the documentation and/or other materials provided with the
13 distribution.
14
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26SUCH DAMAGE.
27*/
28
29#include <private/bionic_asm.h>
30
31#ifndef WMEMSET
32 #define WMEMSET wmemset_avx2
33#endif
34
35 .section .text.avx2,"ax",@progbits
36
37ENTRY (WMEMSET)
38# BB#0:
39 testq %rdx, %rdx
40 je .LBB0_14
41# BB#1:
42 cmpq $32, %rdx
43 jae .LBB0_3
44# BB#2:
45 xorl %r8d, %r8d
46 movq %rdi, %rax
47 jmp .LBB0_12
48.LBB0_3:
49 movq %rdx, %r8
50 andq $-32, %r8
51 vmovd %esi, %xmm0
52 vpbroadcastd %xmm0, %ymm0
53 leaq -32(%r8), %rcx
54 movq %rcx, %rax
55 shrq $5, %rax
56 leal 1(%rax), %r9d
57 andl $7, %r9d
58 cmpq $224, %rcx
59 jae .LBB0_5
60# BB#4:
61 xorl %eax, %eax
62 testq %r9, %r9
63 jne .LBB0_8
64 jmp .LBB0_10
65.LBB0_5:
66 leaq 992(%rdi), %rcx
67 leaq -1(%r9), %r10
68 subq %rax, %r10
69 xorl %eax, %eax
70 .p2align 4, 0x90
71.LBB0_6: # =>This Inner Loop Header: Depth=1
72 vmovdqu %ymm0, -992(%rcx,%rax,4)
73 vmovdqu %ymm0, -960(%rcx,%rax,4)
74 vmovdqu %ymm0, -928(%rcx,%rax,4)
75 vmovdqu %ymm0, -896(%rcx,%rax,4)
76 vmovdqu %ymm0, -864(%rcx,%rax,4)
77 vmovdqu %ymm0, -832(%rcx,%rax,4)
78 vmovdqu %ymm0, -800(%rcx,%rax,4)
79 vmovdqu %ymm0, -768(%rcx,%rax,4)
80 vmovdqu %ymm0, -736(%rcx,%rax,4)
81 vmovdqu %ymm0, -704(%rcx,%rax,4)
82 vmovdqu %ymm0, -672(%rcx,%rax,4)
83 vmovdqu %ymm0, -640(%rcx,%rax,4)
84 vmovdqu %ymm0, -608(%rcx,%rax,4)
85 vmovdqu %ymm0, -576(%rcx,%rax,4)
86 vmovdqu %ymm0, -544(%rcx,%rax,4)
87 vmovdqu %ymm0, -512(%rcx,%rax,4)
88 vmovdqu %ymm0, -480(%rcx,%rax,4)
89 vmovdqu %ymm0, -448(%rcx,%rax,4)
90 vmovdqu %ymm0, -416(%rcx,%rax,4)
91 vmovdqu %ymm0, -384(%rcx,%rax,4)
92 vmovdqu %ymm0, -352(%rcx,%rax,4)
93 vmovdqu %ymm0, -320(%rcx,%rax,4)
94 vmovdqu %ymm0, -288(%rcx,%rax,4)
95 vmovdqu %ymm0, -256(%rcx,%rax,4)
96 vmovdqu %ymm0, -224(%rcx,%rax,4)
97 vmovdqu %ymm0, -192(%rcx,%rax,4)
98 vmovdqu %ymm0, -160(%rcx,%rax,4)
99 vmovdqu %ymm0, -128(%rcx,%rax,4)
100 vmovdqu %ymm0, -96(%rcx,%rax,4)
101 vmovdqu %ymm0, -64(%rcx,%rax,4)
102 vmovdqu %ymm0, -32(%rcx,%rax,4)
103 vmovdqu %ymm0, (%rcx,%rax,4)
104 addq $256, %rax # imm = 0x100
105 addq $8, %r10
106 jne .LBB0_6
107# BB#7:
108 testq %r9, %r9
109 je .LBB0_10
110.LBB0_8:
111 leaq (%rdi,%rax,4), %rax
112 addq $96, %rax
113 negq %r9
114 .p2align 4, 0x90
115.LBB0_9: # =>This Inner Loop Header: Depth=1
116 vmovdqu %ymm0, -96(%rax)
117 vmovdqu %ymm0, -64(%rax)
118 vmovdqu %ymm0, -32(%rax)
119 vmovdqu %ymm0, (%rax)
120 subq $-128, %rax
121 addq $1, %r9
122 jne .LBB0_9
123.LBB0_10:
124 cmpq %rdx, %r8
125 je .LBB0_14
126# BB#11:
127 leaq (%rdi,%r8,4), %rax
128.LBB0_12:
129 subq %r8, %rdx
130 .p2align 4, 0x90
131.LBB0_13: # =>This Inner Loop Header: Depth=1
132 movl %esi, (%rax)
133 addq $4, %rax
134 addq $-1, %rdx
135 jne .LBB0_13
136.LBB0_14:
137 movq %rdi, %rax
138 vzeroupper
139 retq
140END(WMEMSET)