blob: 1cf9f4be477b69f85a2893fa393a20b451d56303 [file] [log] [blame]
Varvara Rainchika020a242014-04-29 17:44:56 +04001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
Elliott Hughes61c95fe2016-03-02 16:39:29 -080031#include <private/bionic_asm.h>
Varvara Rainchika020a242014-04-29 17:44:56 +040032
Elliott Hughes61c95fe2016-03-02 16:39:29 -080033#include "cache.h"
Varvara Rainchika020a242014-04-29 17:44:56 +040034
35#ifndef L
36# define L(label) .L##label
37#endif
38
39#ifndef ALIGN
40# define ALIGN(n) .p2align n
41#endif
42
Varvara Rainchika020a242014-04-29 17:44:56 +040043
Elliott Hughes61c95fe2016-03-02 16:39:29 -080044ENTRY(__memset_chk)
45 # %rdi = dst, %rsi = byte, %rdx = n, %rcx = dst_len
46 cmp %rdx, %rcx
47 jl memset
Varvara Rainchika020a242014-04-29 17:44:56 +040048
Elliott Hughes61c95fe2016-03-02 16:39:29 -080049 # TODO: include __memset_chk_fail in the backtrace?
50 call PIC_PLT(__memset_chk_fail)
51END(__memset_chk)
Varvara Rainchika020a242014-04-29 17:44:56 +040052
Varvara Rainchika020a242014-04-29 17:44:56 +040053
54 .section .text.sse2,"ax",@progbits
Elliott Hughes61c95fe2016-03-02 16:39:29 -080055ENTRY(memset)
Varvara Rainchika020a242014-04-29 17:44:56 +040056 movq %rdi, %rax
Varvara Rainchika020a242014-04-29 17:44:56 +040057 and $0xff, %rsi
58 mov $0x0101010101010101, %rcx
59 imul %rsi, %rcx
Varvara Rainchika020a242014-04-29 17:44:56 +040060 cmpq $16, %rdx
61 jae L(16bytesormore)
62 testb $8, %dl
63 jnz L(8_15bytes)
64 testb $4, %dl
65 jnz L(4_7bytes)
66 testb $2, %dl
67 jnz L(2_3bytes)
68 testb $1, %dl
69 jz L(return)
70 movb %cl, (%rdi)
71L(return):
72 ret
73
74L(8_15bytes):
75 movq %rcx, (%rdi)
76 movq %rcx, -8(%rdi, %rdx)
77 ret
78
79L(4_7bytes):
80 movl %ecx, (%rdi)
81 movl %ecx, -4(%rdi, %rdx)
82 ret
83
84L(2_3bytes):
85 movw %cx, (%rdi)
86 movw %cx, -2(%rdi, %rdx)
87 ret
88
89 ALIGN (4)
90L(16bytesormore):
Varvara Rainchika020a242014-04-29 17:44:56 +040091 movd %rcx, %xmm0
92 pshufd $0, %xmm0, %xmm0
Varvara Rainchika020a242014-04-29 17:44:56 +040093 movdqu %xmm0, (%rdi)
94 movdqu %xmm0, -16(%rdi, %rdx)
95 cmpq $32, %rdx
96 jbe L(32bytesless)
97 movdqu %xmm0, 16(%rdi)
98 movdqu %xmm0, -32(%rdi, %rdx)
99 cmpq $64, %rdx
100 jbe L(64bytesless)
101 movdqu %xmm0, 32(%rdi)
102 movdqu %xmm0, 48(%rdi)
103 movdqu %xmm0, -64(%rdi, %rdx)
104 movdqu %xmm0, -48(%rdi, %rdx)
105 cmpq $128, %rdx
106 ja L(128bytesmore)
107L(32bytesless):
108L(64bytesless):
109 ret
110
111 ALIGN (4)
112L(128bytesmore):
113 leaq 64(%rdi), %rcx
114 andq $-64, %rcx
115 movq %rdx, %r8
116 addq %rdi, %rdx
117 andq $-64, %rdx
118 cmpq %rcx, %rdx
119 je L(return)
120
121#ifdef SHARED_CACHE_SIZE
122 cmp $SHARED_CACHE_SIZE, %r8
123#else
124 cmp __x86_64_shared_cache_size(%rip), %r8
125#endif
126 ja L(128bytesmore_nt)
127
128 ALIGN (4)
129L(128bytesmore_normal):
130 movdqa %xmm0, (%rcx)
131 movaps %xmm0, 0x10(%rcx)
132 movaps %xmm0, 0x20(%rcx)
133 movaps %xmm0, 0x30(%rcx)
134 addq $64, %rcx
135 cmpq %rcx, %rdx
136 jne L(128bytesmore_normal)
137 ret
138
139 ALIGN (4)
140L(128bytesmore_nt):
141 movntdq %xmm0, (%rcx)
142 movntdq %xmm0, 0x10(%rcx)
143 movntdq %xmm0, 0x20(%rcx)
144 movntdq %xmm0, 0x30(%rcx)
145 leaq 64(%rcx), %rcx
146 cmpq %rcx, %rdx
147 jne L(128bytesmore_nt)
148 sfence
149 ret
150
Elliott Hughes61c95fe2016-03-02 16:39:29 -0800151END(memset)