blob: 69b66c795930c9b8f62cb50d8c9b1237f7867b0b [file] [log] [blame]
Shalini Salomi Bodapati4ed2f472019-06-13 09:54:08 +05301/*
2Copyright (C) 2019 The Android Open Source Project
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions
7are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in
12 the documentation and/or other materials provided with the
13 distribution.
14
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26SUCH DAMAGE.
27*/
28
29#include <private/bionic_asm.h>
30
31#ifndef WMEMSET
32 #define WMEMSET wmemset_avx2
33#endif
34
35ENTRY(WMEMSET)
36# BB#0:
37 pushl %ebp
38 pushl %ebx
39 pushl %edi
40 pushl %esi
41 pushl %eax
42 movl 32(%esp), %ecx
43 movl 24(%esp), %eax
44 testl %ecx, %ecx
45 je .LBB0_12
46# BB#1:
47 movl 28(%esp), %edx
48 xorl %edi, %edi
49 movl %eax, %esi
50 cmpl $32, %ecx
51 jb .LBB0_10
52# BB#2:
53 movl %ecx, %eax
54 andl $-32, %eax
55 vmovd %edx, %xmm0
56 vpbroadcastd %xmm0, %ymm0
57 movl %eax, (%esp) # 4-byte Spill
58 leal -32(%eax), %esi
59 movl %esi, %eax
60 shrl $5, %eax
61 leal 1(%eax), %edi
62 andl $7, %edi
63 xorl %ebx, %ebx
64 cmpl $224, %esi
65 jb .LBB0_5
66# BB#3:
67 movl 24(%esp), %esi
68 leal 992(%esi), %ebp
69 leal -1(%edi), %esi
70 subl %eax, %esi
71 xorl %ebx, %ebx
72 .p2align 4, 0x90
73.LBB0_4: # =>This Inner Loop Header: Depth=1
74 vmovdqu %ymm0, -992(%ebp,%ebx,4)
75 vmovdqu %ymm0, -960(%ebp,%ebx,4)
76 vmovdqu %ymm0, -928(%ebp,%ebx,4)
77 vmovdqu %ymm0, -896(%ebp,%ebx,4)
78 vmovdqu %ymm0, -864(%ebp,%ebx,4)
79 vmovdqu %ymm0, -832(%ebp,%ebx,4)
80 vmovdqu %ymm0, -800(%ebp,%ebx,4)
81 vmovdqu %ymm0, -768(%ebp,%ebx,4)
82 vmovdqu %ymm0, -736(%ebp,%ebx,4)
83 vmovdqu %ymm0, -704(%ebp,%ebx,4)
84 vmovdqu %ymm0, -672(%ebp,%ebx,4)
85 vmovdqu %ymm0, -640(%ebp,%ebx,4)
86 vmovdqu %ymm0, -608(%ebp,%ebx,4)
87 vmovdqu %ymm0, -576(%ebp,%ebx,4)
88 vmovdqu %ymm0, -544(%ebp,%ebx,4)
89 vmovdqu %ymm0, -512(%ebp,%ebx,4)
90 vmovdqu %ymm0, -480(%ebp,%ebx,4)
91 vmovdqu %ymm0, -448(%ebp,%ebx,4)
92 vmovdqu %ymm0, -416(%ebp,%ebx,4)
93 vmovdqu %ymm0, -384(%ebp,%ebx,4)
94 vmovdqu %ymm0, -352(%ebp,%ebx,4)
95 vmovdqu %ymm0, -320(%ebp,%ebx,4)
96 vmovdqu %ymm0, -288(%ebp,%ebx,4)
97 vmovdqu %ymm0, -256(%ebp,%ebx,4)
98 vmovdqu %ymm0, -224(%ebp,%ebx,4)
99 vmovdqu %ymm0, -192(%ebp,%ebx,4)
100 vmovdqu %ymm0, -160(%ebp,%ebx,4)
101 vmovdqu %ymm0, -128(%ebp,%ebx,4)
102 vmovdqu %ymm0, -96(%ebp,%ebx,4)
103 vmovdqu %ymm0, -64(%ebp,%ebx,4)
104 vmovdqu %ymm0, -32(%ebp,%ebx,4)
105 vmovdqu %ymm0, (%ebp,%ebx,4)
106 addl $256, %ebx # imm = 0x100
107 addl $8, %esi
108 jne .LBB0_4
109.LBB0_5:
110 testl %edi, %edi
111 movl 24(%esp), %eax
112 je .LBB0_8
113# BB#6:
114 leal (%eax,%ebx,4), %esi
115 addl $96, %esi
116 negl %edi
117 .p2align 4, 0x90
118.LBB0_7: # =>This Inner Loop Header: Depth=1
119 vmovdqu %ymm0, -96(%esi)
120 vmovdqu %ymm0, -64(%esi)
121 vmovdqu %ymm0, -32(%esi)
122 vmovdqu %ymm0, (%esi)
123 subl $-128, %esi
124 addl $1, %edi
125 jne .LBB0_7
126.LBB0_8:
127 movl (%esp), %edi # 4-byte Reload
128 cmpl %ecx, %edi
129 je .LBB0_12
130# BB#9:
131 leal (%eax,%edi,4), %esi
132.LBB0_10:
133 subl %edi, %ecx
134 .p2align 4, 0x90
135.LBB0_11: # =>This Inner Loop Header: Depth=1
136 movl %edx, (%esi)
137 addl $4, %esi
138 addl $-1, %ecx
139 jne .LBB0_11
140.LBB0_12:
141 addl $4, %esp
142 popl %esi
143 popl %edi
144 popl %ebx
145 popl %ebp
146 vzeroupper
147 retl
148END(WMEMSET)