xref: /llvm-project/llvm/test/CodeGen/X86/vec_fcopysign.ll (revision 5f91335a55cd65dda8351f85b93eeaa7493e06c4)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX1OR2,X86-AVX1
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX1OR2,X86-AVX2
5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512VL
6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512FP16
7; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512VLDQ
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64,X64-SSE
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1OR2,X64-AVX1
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1OR2,X64-AVX2
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512VL
12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512FP16
13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512VLDQ
14
15;
16; 128-bit Vectors
17;
18
19define <2 x double> @fcopysign_v2f64(<2 x double> %a0, <2 x double> %a1) nounwind {
20; X86-SSE-LABEL: fcopysign_v2f64:
21; X86-SSE:       # %bb.0:
22; X86-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
23; X86-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
24; X86-SSE-NEXT:    orps %xmm1, %xmm0
25; X86-SSE-NEXT:    retl
26;
27; X86-AVX1OR2-LABEL: fcopysign_v2f64:
28; X86-AVX1OR2:       # %bb.0:
29; X86-AVX1OR2-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
30; X86-AVX1OR2-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
31; X86-AVX1OR2-NEXT:    vorps %xmm1, %xmm0, %xmm0
32; X86-AVX1OR2-NEXT:    retl
33;
34; X86-AVX512-LABEL: fcopysign_v2f64:
35; X86-AVX512:       # %bb.0:
36; X86-AVX512-NEXT:    vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm1, %xmm0
37; X86-AVX512-NEXT:    retl
38;
39; X64-SSE-LABEL: fcopysign_v2f64:
40; X64-SSE:       # %bb.0:
41; X64-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
42; X64-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
43; X64-SSE-NEXT:    orps %xmm1, %xmm0
44; X64-SSE-NEXT:    retq
45;
46; X64-AVX1OR2-LABEL: fcopysign_v2f64:
47; X64-AVX1OR2:       # %bb.0:
48; X64-AVX1OR2-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
49; X64-AVX1OR2-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
50; X64-AVX1OR2-NEXT:    vorps %xmm1, %xmm0, %xmm0
51; X64-AVX1OR2-NEXT:    retq
52;
53; X64-AVX512-LABEL: fcopysign_v2f64:
54; X64-AVX512:       # %bb.0:
55; X64-AVX512-NEXT:    vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0
56; X64-AVX512-NEXT:    retq
57  %t = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a0, <2 x double> %a1)
58  ret <2 x double> %t
59}
60declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
61
62define <4 x float> @fcopysign_v4f32(<4 x float> %a0, <4 x float> %a1) nounwind {
63; X86-SSE-LABEL: fcopysign_v4f32:
64; X86-SSE:       # %bb.0:
65; X86-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
66; X86-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
67; X86-SSE-NEXT:    orps %xmm1, %xmm0
68; X86-SSE-NEXT:    retl
69;
70; X86-AVX1-LABEL: fcopysign_v4f32:
71; X86-AVX1:       # %bb.0:
72; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
73; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
74; X86-AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
75; X86-AVX1-NEXT:    retl
76;
77; X86-AVX2-LABEL: fcopysign_v4f32:
78; X86-AVX2:       # %bb.0:
79; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
80; X86-AVX2-NEXT:    vandps %xmm2, %xmm1, %xmm1
81; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
82; X86-AVX2-NEXT:    vandps %xmm2, %xmm0, %xmm0
83; X86-AVX2-NEXT:    vorps %xmm1, %xmm0, %xmm0
84; X86-AVX2-NEXT:    retl
85;
86; X86-AVX512-LABEL: fcopysign_v4f32:
87; X86-AVX512:       # %bb.0:
88; X86-AVX512-NEXT:    vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm1, %xmm0
89; X86-AVX512-NEXT:    retl
90;
91; X64-SSE-LABEL: fcopysign_v4f32:
92; X64-SSE:       # %bb.0:
93; X64-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
94; X64-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
95; X64-SSE-NEXT:    orps %xmm1, %xmm0
96; X64-SSE-NEXT:    retq
97;
98; X64-AVX1-LABEL: fcopysign_v4f32:
99; X64-AVX1:       # %bb.0:
100; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
101; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
102; X64-AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
103; X64-AVX1-NEXT:    retq
104;
105; X64-AVX2-LABEL: fcopysign_v4f32:
106; X64-AVX2:       # %bb.0:
107; X64-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
108; X64-AVX2-NEXT:    vandps %xmm2, %xmm1, %xmm1
109; X64-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
110; X64-AVX2-NEXT:    vandps %xmm2, %xmm0, %xmm0
111; X64-AVX2-NEXT:    vorps %xmm1, %xmm0, %xmm0
112; X64-AVX2-NEXT:    retq
113;
114; X64-AVX512-LABEL: fcopysign_v4f32:
115; X64-AVX512:       # %bb.0:
116; X64-AVX512-NEXT:    vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
117; X64-AVX512-NEXT:    retq
118  %t = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a0, <4 x float> %a1)
119  ret <4 x float> %t
120}
121declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
122
123define <8 x half> @fcopysign_v8f16(ptr %p0, ptr %p1) nounwind {
124; X86-SSE-LABEL: fcopysign_v8f16:
125; X86-SSE:       # %bb.0:
126; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
127; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
128; X86-SSE-NEXT:    movaps (%ecx), %xmm0
129; X86-SSE-NEXT:    movaps (%eax), %xmm1
130; X86-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
131; X86-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
132; X86-SSE-NEXT:    orps %xmm1, %xmm0
133; X86-SSE-NEXT:    retl
134;
135; X86-AVX1-LABEL: fcopysign_v8f16:
136; X86-AVX1:       # %bb.0:
137; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax
138; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
139; X86-AVX1-NEXT:    vmovaps (%ecx), %xmm0
140; X86-AVX1-NEXT:    vmovaps (%eax), %xmm1
141; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
142; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
143; X86-AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
144; X86-AVX1-NEXT:    retl
145;
146; X86-AVX2-LABEL: fcopysign_v8f16:
147; X86-AVX2:       # %bb.0:
148; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
149; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
150; X86-AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
151; X86-AVX2-NEXT:    vpand (%ecx), %xmm0, %xmm0
152; X86-AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
153; X86-AVX2-NEXT:    vpand (%eax), %xmm1, %xmm1
154; X86-AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
155; X86-AVX2-NEXT:    retl
156;
157; X86-AVX512-LABEL: fcopysign_v8f16:
158; X86-AVX512:       # %bb.0:
159; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
160; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %ecx
161; X86-AVX512-NEXT:    vmovdqa (%ecx), %xmm1
162; X86-AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [2147450879,2147450879,2147450879,2147450879]
163; X86-AVX512-NEXT:    vpternlogd $202, (%eax), %xmm1, %xmm0
164; X86-AVX512-NEXT:    retl
165;
166; X64-SSE-LABEL: fcopysign_v8f16:
167; X64-SSE:       # %bb.0:
168; X64-SSE-NEXT:    movaps (%rdi), %xmm0
169; X64-SSE-NEXT:    movaps (%rsi), %xmm1
170; X64-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
171; X64-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
172; X64-SSE-NEXT:    orps %xmm1, %xmm0
173; X64-SSE-NEXT:    retq
174;
175; X64-AVX1-LABEL: fcopysign_v8f16:
176; X64-AVX1:       # %bb.0:
177; X64-AVX1-NEXT:    vmovaps (%rdi), %xmm0
178; X64-AVX1-NEXT:    vmovaps (%rsi), %xmm1
179; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
180; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
181; X64-AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
182; X64-AVX1-NEXT:    retq
183;
184; X64-AVX2-LABEL: fcopysign_v8f16:
185; X64-AVX2:       # %bb.0:
186; X64-AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
187; X64-AVX2-NEXT:    vpand (%rsi), %xmm0, %xmm0
188; X64-AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
189; X64-AVX2-NEXT:    vpand (%rdi), %xmm1, %xmm1
190; X64-AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
191; X64-AVX2-NEXT:    retq
192;
193; X64-AVX512-LABEL: fcopysign_v8f16:
194; X64-AVX512:       # %bb.0:
195; X64-AVX512-NEXT:    vmovdqa (%rdi), %xmm1
196; X64-AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [2147450879,2147450879,2147450879,2147450879]
197; X64-AVX512-NEXT:    vpternlogd $202, (%rsi), %xmm1, %xmm0
198; X64-AVX512-NEXT:    retq
199  %a0 = load <8 x half>, ptr %p0, align 16
200  %a1 = load <8 x half>, ptr %p1, align 16
201  %t = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a0, <8 x half> %a1)
202  ret <8 x half> %t
203}
204declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
205
206;
207; 256-bit Vectors
208;
209
210define <4 x double> @fcopysign_v4f64(<4 x double> %a0, <4 x double> %a1) nounwind {
211; X86-SSE-LABEL: fcopysign_v4f64:
212; X86-SSE:       # %bb.0:
213; X86-SSE-NEXT:    pushl %ebp
214; X86-SSE-NEXT:    movl %esp, %ebp
215; X86-SSE-NEXT:    andl $-16, %esp
216; X86-SSE-NEXT:    subl $16, %esp
217; X86-SSE-NEXT:    movaps {{.*#+}} xmm3 = [NaN,NaN]
218; X86-SSE-NEXT:    movaps %xmm3, %xmm4
219; X86-SSE-NEXT:    andnps %xmm2, %xmm4
220; X86-SSE-NEXT:    andps %xmm3, %xmm0
221; X86-SSE-NEXT:    orps %xmm4, %xmm0
222; X86-SSE-NEXT:    andps %xmm3, %xmm1
223; X86-SSE-NEXT:    andnps 8(%ebp), %xmm3
224; X86-SSE-NEXT:    orps %xmm3, %xmm1
225; X86-SSE-NEXT:    movl %ebp, %esp
226; X86-SSE-NEXT:    popl %ebp
227; X86-SSE-NEXT:    retl
228;
229; X86-AVX1-LABEL: fcopysign_v4f64:
230; X86-AVX1:       # %bb.0:
231; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
232; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
233; X86-AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
234; X86-AVX1-NEXT:    retl
235;
236; X86-AVX2-LABEL: fcopysign_v4f64:
237; X86-AVX2:       # %bb.0:
238; X86-AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
239; X86-AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
240; X86-AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
241; X86-AVX2-NEXT:    vandps %ymm2, %ymm0, %ymm0
242; X86-AVX2-NEXT:    vorps %ymm1, %ymm0, %ymm0
243; X86-AVX2-NEXT:    retl
244;
245; X86-AVX512-LABEL: fcopysign_v4f64:
246; X86-AVX512:       # %bb.0:
247; X86-AVX512-NEXT:    vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm1, %ymm0
248; X86-AVX512-NEXT:    retl
249;
250; X64-SSE-LABEL: fcopysign_v4f64:
251; X64-SSE:       # %bb.0:
252; X64-SSE-NEXT:    movaps {{.*#+}} xmm4 = [NaN,NaN]
253; X64-SSE-NEXT:    movaps %xmm4, %xmm5
254; X64-SSE-NEXT:    andnps %xmm2, %xmm5
255; X64-SSE-NEXT:    andps %xmm4, %xmm0
256; X64-SSE-NEXT:    orps %xmm5, %xmm0
257; X64-SSE-NEXT:    andps %xmm4, %xmm1
258; X64-SSE-NEXT:    andnps %xmm3, %xmm4
259; X64-SSE-NEXT:    orps %xmm4, %xmm1
260; X64-SSE-NEXT:    retq
261;
262; X64-AVX1-LABEL: fcopysign_v4f64:
263; X64-AVX1:       # %bb.0:
264; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
265; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
266; X64-AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
267; X64-AVX1-NEXT:    retq
268;
269; X64-AVX2-LABEL: fcopysign_v4f64:
270; X64-AVX2:       # %bb.0:
271; X64-AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
272; X64-AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
273; X64-AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
274; X64-AVX2-NEXT:    vandps %ymm2, %ymm0, %ymm0
275; X64-AVX2-NEXT:    vorps %ymm1, %ymm0, %ymm0
276; X64-AVX2-NEXT:    retq
277;
278; X64-AVX512-LABEL: fcopysign_v4f64:
279; X64-AVX512:       # %bb.0:
280; X64-AVX512-NEXT:    vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
281; X64-AVX512-NEXT:    retq
282  %t = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a0, <4 x double> %a1)
283  ret <4 x double> %t
284}
285declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>)
286
287define <8 x float> @fcopysign_v8f32(<8 x float> %a0, <8 x float> %a1) nounwind {
288; X86-SSE-LABEL: fcopysign_v8f32:
289; X86-SSE:       # %bb.0:
290; X86-SSE-NEXT:    pushl %ebp
291; X86-SSE-NEXT:    movl %esp, %ebp
292; X86-SSE-NEXT:    andl $-16, %esp
293; X86-SSE-NEXT:    subl $16, %esp
294; X86-SSE-NEXT:    movaps {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN]
295; X86-SSE-NEXT:    movaps %xmm3, %xmm4
296; X86-SSE-NEXT:    andnps %xmm2, %xmm4
297; X86-SSE-NEXT:    andps %xmm3, %xmm0
298; X86-SSE-NEXT:    orps %xmm4, %xmm0
299; X86-SSE-NEXT:    andps %xmm3, %xmm1
300; X86-SSE-NEXT:    andnps 8(%ebp), %xmm3
301; X86-SSE-NEXT:    orps %xmm3, %xmm1
302; X86-SSE-NEXT:    movl %ebp, %esp
303; X86-SSE-NEXT:    popl %ebp
304; X86-SSE-NEXT:    retl
305;
306; X86-AVX1-LABEL: fcopysign_v8f32:
307; X86-AVX1:       # %bb.0:
308; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
309; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
310; X86-AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
311; X86-AVX1-NEXT:    retl
312;
313; X86-AVX2-LABEL: fcopysign_v8f32:
314; X86-AVX2:       # %bb.0:
315; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
316; X86-AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
317; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
318; X86-AVX2-NEXT:    vandps %ymm2, %ymm0, %ymm0
319; X86-AVX2-NEXT:    vorps %ymm1, %ymm0, %ymm0
320; X86-AVX2-NEXT:    retl
321;
322; X86-AVX512-LABEL: fcopysign_v8f32:
323; X86-AVX512:       # %bb.0:
324; X86-AVX512-NEXT:    vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm1, %ymm0
325; X86-AVX512-NEXT:    retl
326;
327; X64-SSE-LABEL: fcopysign_v8f32:
328; X64-SSE:       # %bb.0:
329; X64-SSE-NEXT:    movaps {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN]
330; X64-SSE-NEXT:    movaps %xmm4, %xmm5
331; X64-SSE-NEXT:    andnps %xmm2, %xmm5
332; X64-SSE-NEXT:    andps %xmm4, %xmm0
333; X64-SSE-NEXT:    orps %xmm5, %xmm0
334; X64-SSE-NEXT:    andps %xmm4, %xmm1
335; X64-SSE-NEXT:    andnps %xmm3, %xmm4
336; X64-SSE-NEXT:    orps %xmm4, %xmm1
337; X64-SSE-NEXT:    retq
338;
339; X64-AVX1-LABEL: fcopysign_v8f32:
340; X64-AVX1:       # %bb.0:
341; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
342; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
343; X64-AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
344; X64-AVX1-NEXT:    retq
345;
346; X64-AVX2-LABEL: fcopysign_v8f32:
347; X64-AVX2:       # %bb.0:
348; X64-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
349; X64-AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
350; X64-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
351; X64-AVX2-NEXT:    vandps %ymm2, %ymm0, %ymm0
352; X64-AVX2-NEXT:    vorps %ymm1, %ymm0, %ymm0
353; X64-AVX2-NEXT:    retq
354;
355; X64-AVX512-LABEL: fcopysign_v8f32:
356; X64-AVX512:       # %bb.0:
357; X64-AVX512-NEXT:    vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0
358; X64-AVX512-NEXT:    retq
359  %t = call <8 x float> @llvm.copysign.v8f32(<8 x float> %a0, <8 x float> %a1)
360  ret <8 x float> %t
361}
362declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>)
363
364define <16 x half> @fcopysign_v16f16(ptr %p0, ptr %p1) nounwind {
365; X86-SSE-LABEL: fcopysign_v16f16:
366; X86-SSE:       # %bb.0:
367; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
368; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
369; X86-SSE-NEXT:    movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
370; X86-SSE-NEXT:    movaps %xmm1, %xmm2
371; X86-SSE-NEXT:    andnps (%ecx), %xmm2
372; X86-SSE-NEXT:    movaps (%eax), %xmm0
373; X86-SSE-NEXT:    andps %xmm1, %xmm0
374; X86-SSE-NEXT:    orps %xmm2, %xmm0
375; X86-SSE-NEXT:    movaps %xmm1, %xmm2
376; X86-SSE-NEXT:    andnps 16(%ecx), %xmm2
377; X86-SSE-NEXT:    andps 16(%eax), %xmm1
378; X86-SSE-NEXT:    orps %xmm2, %xmm1
379; X86-SSE-NEXT:    retl
380;
381; X86-AVX1-LABEL: fcopysign_v16f16:
382; X86-AVX1:       # %bb.0:
383; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax
384; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
385; X86-AVX1-NEXT:    vmovups (%ecx), %ymm0
386; X86-AVX1-NEXT:    vmovups (%eax), %ymm1
387; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
388; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
389; X86-AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
390; X86-AVX1-NEXT:    retl
391;
392; X86-AVX2-LABEL: fcopysign_v16f16:
393; X86-AVX2:       # %bb.0:
394; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
395; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
396; X86-AVX2-NEXT:    vpbroadcastw {{.*#+}} ymm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
397; X86-AVX2-NEXT:    vpand (%ecx), %ymm0, %ymm0
398; X86-AVX2-NEXT:    vpbroadcastw {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
399; X86-AVX2-NEXT:    vpand (%eax), %ymm1, %ymm1
400; X86-AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
401; X86-AVX2-NEXT:    retl
402;
403; X86-AVX512-LABEL: fcopysign_v16f16:
404; X86-AVX512:       # %bb.0:
405; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
406; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %ecx
407; X86-AVX512-NEXT:    vmovdqu (%ecx), %ymm1
408; X86-AVX512-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879]
409; X86-AVX512-NEXT:    vpternlogd $202, (%eax), %ymm1, %ymm0
410; X86-AVX512-NEXT:    retl
411;
412; X64-SSE-LABEL: fcopysign_v16f16:
413; X64-SSE:       # %bb.0:
414; X64-SSE-NEXT:    movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
415; X64-SSE-NEXT:    movaps %xmm1, %xmm2
416; X64-SSE-NEXT:    andnps (%rsi), %xmm2
417; X64-SSE-NEXT:    movaps (%rdi), %xmm0
418; X64-SSE-NEXT:    andps %xmm1, %xmm0
419; X64-SSE-NEXT:    orps %xmm2, %xmm0
420; X64-SSE-NEXT:    movaps %xmm1, %xmm2
421; X64-SSE-NEXT:    andnps 16(%rsi), %xmm2
422; X64-SSE-NEXT:    andps 16(%rdi), %xmm1
423; X64-SSE-NEXT:    orps %xmm2, %xmm1
424; X64-SSE-NEXT:    retq
425;
426; X64-AVX1-LABEL: fcopysign_v16f16:
427; X64-AVX1:       # %bb.0:
428; X64-AVX1-NEXT:    vmovups (%rdi), %ymm0
429; X64-AVX1-NEXT:    vmovups (%rsi), %ymm1
430; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
431; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
432; X64-AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
433; X64-AVX1-NEXT:    retq
434;
435; X64-AVX2-LABEL: fcopysign_v16f16:
436; X64-AVX2:       # %bb.0:
437; X64-AVX2-NEXT:    vpbroadcastw {{.*#+}} ymm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
438; X64-AVX2-NEXT:    vpand (%rsi), %ymm0, %ymm0
439; X64-AVX2-NEXT:    vpbroadcastw {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
440; X64-AVX2-NEXT:    vpand (%rdi), %ymm1, %ymm1
441; X64-AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
442; X64-AVX2-NEXT:    retq
443;
444; X64-AVX512-LABEL: fcopysign_v16f16:
445; X64-AVX512:       # %bb.0:
446; X64-AVX512-NEXT:    vmovdqu (%rdi), %ymm1
447; X64-AVX512-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879]
448; X64-AVX512-NEXT:    vpternlogd $202, (%rsi), %ymm1, %ymm0
449; X64-AVX512-NEXT:    retq
450  %a0 = load <16 x half>, ptr %p0, align 16
451  %a1 = load <16 x half>, ptr %p1, align 16
452  %t = call <16 x half> @llvm.copysign.v16f16(<16 x half> %a0, <16 x half> %a1)
453  ret <16 x half> %t
454}
455declare <16 x half> @llvm.copysign.v16f16(<16 x half>, <16 x half>)
456
457;
458; 512-bit Vectors
459;
460
461define <8 x double> @fcopysign_v8f64(<8 x double> %a0, <8 x double> %a1) nounwind {
462; X86-SSE-LABEL: fcopysign_v8f64:
463; X86-SSE:       # %bb.0:
464; X86-SSE-NEXT:    pushl %ebp
465; X86-SSE-NEXT:    movl %esp, %ebp
466; X86-SSE-NEXT:    andl $-16, %esp
467; X86-SSE-NEXT:    subl $16, %esp
468; X86-SSE-NEXT:    movaps {{.*#+}} xmm3 = [NaN,NaN]
469; X86-SSE-NEXT:    andps %xmm3, %xmm0
470; X86-SSE-NEXT:    movaps %xmm3, %xmm4
471; X86-SSE-NEXT:    andnps 24(%ebp), %xmm4
472; X86-SSE-NEXT:    orps %xmm4, %xmm0
473; X86-SSE-NEXT:    andps %xmm3, %xmm1
474; X86-SSE-NEXT:    movaps %xmm3, %xmm4
475; X86-SSE-NEXT:    andnps 40(%ebp), %xmm4
476; X86-SSE-NEXT:    orps %xmm4, %xmm1
477; X86-SSE-NEXT:    andps %xmm3, %xmm2
478; X86-SSE-NEXT:    movaps %xmm3, %xmm4
479; X86-SSE-NEXT:    andnps 56(%ebp), %xmm4
480; X86-SSE-NEXT:    orps %xmm4, %xmm2
481; X86-SSE-NEXT:    movaps %xmm3, %xmm4
482; X86-SSE-NEXT:    andnps 72(%ebp), %xmm4
483; X86-SSE-NEXT:    andps 8(%ebp), %xmm3
484; X86-SSE-NEXT:    orps %xmm4, %xmm3
485; X86-SSE-NEXT:    movl %ebp, %esp
486; X86-SSE-NEXT:    popl %ebp
487; X86-SSE-NEXT:    retl
488;
489; X86-AVX1OR2-LABEL: fcopysign_v8f64:
490; X86-AVX1OR2:       # %bb.0:
491; X86-AVX1OR2-NEXT:    pushl %ebp
492; X86-AVX1OR2-NEXT:    movl %esp, %ebp
493; X86-AVX1OR2-NEXT:    andl $-32, %esp
494; X86-AVX1OR2-NEXT:    subl $32, %esp
495; X86-AVX1OR2-NEXT:    vbroadcastsd {{.*#+}} ymm3 = [NaN,NaN,NaN,NaN]
496; X86-AVX1OR2-NEXT:    vandnps %ymm2, %ymm3, %ymm2
497; X86-AVX1OR2-NEXT:    vandps %ymm3, %ymm0, %ymm0
498; X86-AVX1OR2-NEXT:    vorps %ymm2, %ymm0, %ymm0
499; X86-AVX1OR2-NEXT:    vandps %ymm3, %ymm1, %ymm1
500; X86-AVX1OR2-NEXT:    vandnps 8(%ebp), %ymm3, %ymm2
501; X86-AVX1OR2-NEXT:    vorps %ymm2, %ymm1, %ymm1
502; X86-AVX1OR2-NEXT:    movl %ebp, %esp
503; X86-AVX1OR2-NEXT:    popl %ebp
504; X86-AVX1OR2-NEXT:    retl
505;
506; X86-AVX512-LABEL: fcopysign_v8f64:
507; X86-AVX512:       # %bb.0:
508; X86-AVX512-NEXT:    vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm1, %zmm0
509; X86-AVX512-NEXT:    retl
510;
511; X64-SSE-LABEL: fcopysign_v8f64:
512; X64-SSE:       # %bb.0:
513; X64-SSE-NEXT:    movaps {{.*#+}} xmm8 = [NaN,NaN]
514; X64-SSE-NEXT:    movaps %xmm8, %xmm9
515; X64-SSE-NEXT:    andnps %xmm4, %xmm9
516; X64-SSE-NEXT:    andps %xmm8, %xmm0
517; X64-SSE-NEXT:    orps %xmm9, %xmm0
518; X64-SSE-NEXT:    movaps %xmm8, %xmm4
519; X64-SSE-NEXT:    andnps %xmm5, %xmm4
520; X64-SSE-NEXT:    andps %xmm8, %xmm1
521; X64-SSE-NEXT:    orps %xmm4, %xmm1
522; X64-SSE-NEXT:    movaps %xmm8, %xmm4
523; X64-SSE-NEXT:    andnps %xmm6, %xmm4
524; X64-SSE-NEXT:    andps %xmm8, %xmm2
525; X64-SSE-NEXT:    orps %xmm4, %xmm2
526; X64-SSE-NEXT:    andps %xmm8, %xmm3
527; X64-SSE-NEXT:    andnps %xmm7, %xmm8
528; X64-SSE-NEXT:    orps %xmm8, %xmm3
529; X64-SSE-NEXT:    retq
530;
531; X64-AVX1OR2-LABEL: fcopysign_v8f64:
532; X64-AVX1OR2:       # %bb.0:
533; X64-AVX1OR2-NEXT:    vbroadcastsd {{.*#+}} ymm4 = [NaN,NaN,NaN,NaN]
534; X64-AVX1OR2-NEXT:    vandnps %ymm2, %ymm4, %ymm2
535; X64-AVX1OR2-NEXT:    vandps %ymm4, %ymm0, %ymm0
536; X64-AVX1OR2-NEXT:    vorps %ymm2, %ymm0, %ymm0
537; X64-AVX1OR2-NEXT:    vandnps %ymm3, %ymm4, %ymm2
538; X64-AVX1OR2-NEXT:    vandps %ymm4, %ymm1, %ymm1
539; X64-AVX1OR2-NEXT:    vorps %ymm2, %ymm1, %ymm1
540; X64-AVX1OR2-NEXT:    retq
541;
542; X64-AVX512-LABEL: fcopysign_v8f64:
543; X64-AVX512:       # %bb.0:
544; X64-AVX512-NEXT:    vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
545; X64-AVX512-NEXT:    retq
546  %t = call <8 x double> @llvm.copysign.v8f64(<8 x double> %a0, <8 x double> %a1)
547  ret <8 x double> %t
548}
549declare <8 x double> @llvm.copysign.v8f64(<8 x double>, <8 x double>)
550
551define <16 x float> @fcopysign_v16f32(<16 x float> %a0, <16 x float> %a1) nounwind {
552; X86-SSE-LABEL: fcopysign_v16f32:
553; X86-SSE:       # %bb.0:
554; X86-SSE-NEXT:    pushl %ebp
555; X86-SSE-NEXT:    movl %esp, %ebp
556; X86-SSE-NEXT:    andl $-16, %esp
557; X86-SSE-NEXT:    subl $16, %esp
558; X86-SSE-NEXT:    movaps {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN]
559; X86-SSE-NEXT:    andps %xmm3, %xmm0
560; X86-SSE-NEXT:    movaps %xmm3, %xmm4
561; X86-SSE-NEXT:    andnps 24(%ebp), %xmm4
562; X86-SSE-NEXT:    orps %xmm4, %xmm0
563; X86-SSE-NEXT:    andps %xmm3, %xmm1
564; X86-SSE-NEXT:    movaps %xmm3, %xmm4
565; X86-SSE-NEXT:    andnps 40(%ebp), %xmm4
566; X86-SSE-NEXT:    orps %xmm4, %xmm1
567; X86-SSE-NEXT:    andps %xmm3, %xmm2
568; X86-SSE-NEXT:    movaps %xmm3, %xmm4
569; X86-SSE-NEXT:    andnps 56(%ebp), %xmm4
570; X86-SSE-NEXT:    orps %xmm4, %xmm2
571; X86-SSE-NEXT:    movaps %xmm3, %xmm4
572; X86-SSE-NEXT:    andnps 72(%ebp), %xmm4
573; X86-SSE-NEXT:    andps 8(%ebp), %xmm3
574; X86-SSE-NEXT:    orps %xmm4, %xmm3
575; X86-SSE-NEXT:    movl %ebp, %esp
576; X86-SSE-NEXT:    popl %ebp
577; X86-SSE-NEXT:    retl
578;
579; X86-AVX1OR2-LABEL: fcopysign_v16f32:
580; X86-AVX1OR2:       # %bb.0:
581; X86-AVX1OR2-NEXT:    pushl %ebp
582; X86-AVX1OR2-NEXT:    movl %esp, %ebp
583; X86-AVX1OR2-NEXT:    andl $-32, %esp
584; X86-AVX1OR2-NEXT:    subl $32, %esp
585; X86-AVX1OR2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
586; X86-AVX1OR2-NEXT:    vandnps %ymm2, %ymm3, %ymm2
587; X86-AVX1OR2-NEXT:    vandps %ymm3, %ymm0, %ymm0
588; X86-AVX1OR2-NEXT:    vorps %ymm2, %ymm0, %ymm0
589; X86-AVX1OR2-NEXT:    vandps %ymm3, %ymm1, %ymm1
590; X86-AVX1OR2-NEXT:    vandnps 8(%ebp), %ymm3, %ymm2
591; X86-AVX1OR2-NEXT:    vorps %ymm2, %ymm1, %ymm1
592; X86-AVX1OR2-NEXT:    movl %ebp, %esp
593; X86-AVX1OR2-NEXT:    popl %ebp
594; X86-AVX1OR2-NEXT:    retl
595;
596; X86-AVX512-LABEL: fcopysign_v16f32:
597; X86-AVX512:       # %bb.0:
598; X86-AVX512-NEXT:    vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm1, %zmm0
599; X86-AVX512-NEXT:    retl
600;
601; X64-SSE-LABEL: fcopysign_v16f32:
602; X64-SSE:       # %bb.0:
603; X64-SSE-NEXT:    movaps {{.*#+}} xmm8 = [NaN,NaN,NaN,NaN]
604; X64-SSE-NEXT:    movaps %xmm8, %xmm9
605; X64-SSE-NEXT:    andnps %xmm4, %xmm9
606; X64-SSE-NEXT:    andps %xmm8, %xmm0
607; X64-SSE-NEXT:    orps %xmm9, %xmm0
608; X64-SSE-NEXT:    movaps %xmm8, %xmm4
609; X64-SSE-NEXT:    andnps %xmm5, %xmm4
610; X64-SSE-NEXT:    andps %xmm8, %xmm1
611; X64-SSE-NEXT:    orps %xmm4, %xmm1
612; X64-SSE-NEXT:    movaps %xmm8, %xmm4
613; X64-SSE-NEXT:    andnps %xmm6, %xmm4
614; X64-SSE-NEXT:    andps %xmm8, %xmm2
615; X64-SSE-NEXT:    orps %xmm4, %xmm2
616; X64-SSE-NEXT:    andps %xmm8, %xmm3
617; X64-SSE-NEXT:    andnps %xmm7, %xmm8
618; X64-SSE-NEXT:    orps %xmm8, %xmm3
619; X64-SSE-NEXT:    retq
620;
621; X64-AVX1OR2-LABEL: fcopysign_v16f32:
622; X64-AVX1OR2:       # %bb.0:
623; X64-AVX1OR2-NEXT:    vbroadcastss {{.*#+}} ymm4 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
624; X64-AVX1OR2-NEXT:    vandnps %ymm2, %ymm4, %ymm2
625; X64-AVX1OR2-NEXT:    vandps %ymm4, %ymm0, %ymm0
626; X64-AVX1OR2-NEXT:    vorps %ymm2, %ymm0, %ymm0
627; X64-AVX1OR2-NEXT:    vandnps %ymm3, %ymm4, %ymm2
628; X64-AVX1OR2-NEXT:    vandps %ymm4, %ymm1, %ymm1
629; X64-AVX1OR2-NEXT:    vorps %ymm2, %ymm1, %ymm1
630; X64-AVX1OR2-NEXT:    retq
631;
632; X64-AVX512-LABEL: fcopysign_v16f32:
633; X64-AVX512:       # %bb.0:
634; X64-AVX512-NEXT:    vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
635; X64-AVX512-NEXT:    retq
636  %t = call <16 x float> @llvm.copysign.v16f32(<16 x float> %a0, <16 x float> %a1)
637  ret <16 x float> %t
638}
639declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>)
640
641define <32 x half> @fcopysign_v32f16(ptr %p0, ptr %p1) nounwind {
642; X86-SSE-LABEL: fcopysign_v32f16:
643; X86-SSE:       # %bb.0:
644; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
645; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
646; X86-SSE-NEXT:    movaps {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
647; X86-SSE-NEXT:    movaps %xmm3, %xmm1
648; X86-SSE-NEXT:    andnps (%ecx), %xmm1
649; X86-SSE-NEXT:    movaps (%eax), %xmm0
650; X86-SSE-NEXT:    andps %xmm3, %xmm0
651; X86-SSE-NEXT:    orps %xmm1, %xmm0
652; X86-SSE-NEXT:    movaps %xmm3, %xmm2
653; X86-SSE-NEXT:    andnps 16(%ecx), %xmm2
654; X86-SSE-NEXT:    movaps 16(%eax), %xmm1
655; X86-SSE-NEXT:    andps %xmm3, %xmm1
656; X86-SSE-NEXT:    orps %xmm2, %xmm1
657; X86-SSE-NEXT:    movaps %xmm3, %xmm4
658; X86-SSE-NEXT:    andnps 32(%ecx), %xmm4
659; X86-SSE-NEXT:    movaps 32(%eax), %xmm2
660; X86-SSE-NEXT:    andps %xmm3, %xmm2
661; X86-SSE-NEXT:    orps %xmm4, %xmm2
662; X86-SSE-NEXT:    movaps %xmm3, %xmm4
663; X86-SSE-NEXT:    andnps 48(%ecx), %xmm4
664; X86-SSE-NEXT:    andps 48(%eax), %xmm3
665; X86-SSE-NEXT:    orps %xmm4, %xmm3
666; X86-SSE-NEXT:    retl
667;
668; X86-AVX1-LABEL: fcopysign_v32f16:
669; X86-AVX1:       # %bb.0:
670; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax
671; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
672; X86-AVX1-NEXT:    vbroadcastss {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
673; X86-AVX1-NEXT:    vandnps (%ecx), %ymm1, %ymm0
674; X86-AVX1-NEXT:    vandps (%eax), %ymm1, %ymm2
675; X86-AVX1-NEXT:    vorps %ymm0, %ymm2, %ymm0
676; X86-AVX1-NEXT:    vandnps 32(%ecx), %ymm1, %ymm2
677; X86-AVX1-NEXT:    vandps 32(%eax), %ymm1, %ymm1
678; X86-AVX1-NEXT:    vorps %ymm2, %ymm1, %ymm1
679; X86-AVX1-NEXT:    retl
680;
681; X86-AVX2-LABEL: fcopysign_v32f16:
682; X86-AVX2:       # %bb.0:
683; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
684; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
685; X86-AVX2-NEXT:    vpbroadcastw {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
686; X86-AVX2-NEXT:    vpandn (%ecx), %ymm1, %ymm0
687; X86-AVX2-NEXT:    vpand (%eax), %ymm1, %ymm2
688; X86-AVX2-NEXT:    vpor %ymm0, %ymm2, %ymm0
689; X86-AVX2-NEXT:    vpandn 32(%ecx), %ymm1, %ymm2
690; X86-AVX2-NEXT:    vpand 32(%eax), %ymm1, %ymm1
691; X86-AVX2-NEXT:    vpor %ymm2, %ymm1, %ymm1
692; X86-AVX2-NEXT:    retl
693;
694; X86-AVX512-LABEL: fcopysign_v32f16:
695; X86-AVX512:       # %bb.0:
696; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
697; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %ecx
698; X86-AVX512-NEXT:    vmovdqu64 (%ecx), %zmm1
699; X86-AVX512-NEXT:    vpbroadcastd {{.*#+}} zmm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879]
700; X86-AVX512-NEXT:    vpternlogd $202, (%eax), %zmm1, %zmm0
701; X86-AVX512-NEXT:    retl
702;
703; X64-SSE-LABEL: fcopysign_v32f16:
704; X64-SSE:       # %bb.0:
705; X64-SSE-NEXT:    movaps {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
706; X64-SSE-NEXT:    movaps %xmm3, %xmm1
707; X64-SSE-NEXT:    andnps (%rsi), %xmm1
708; X64-SSE-NEXT:    movaps (%rdi), %xmm0
709; X64-SSE-NEXT:    andps %xmm3, %xmm0
710; X64-SSE-NEXT:    orps %xmm1, %xmm0
711; X64-SSE-NEXT:    movaps %xmm3, %xmm2
712; X64-SSE-NEXT:    andnps 16(%rsi), %xmm2
713; X64-SSE-NEXT:    movaps 16(%rdi), %xmm1
714; X64-SSE-NEXT:    andps %xmm3, %xmm1
715; X64-SSE-NEXT:    orps %xmm2, %xmm1
716; X64-SSE-NEXT:    movaps %xmm3, %xmm4
717; X64-SSE-NEXT:    andnps 32(%rsi), %xmm4
718; X64-SSE-NEXT:    movaps 32(%rdi), %xmm2
719; X64-SSE-NEXT:    andps %xmm3, %xmm2
720; X64-SSE-NEXT:    orps %xmm4, %xmm2
721; X64-SSE-NEXT:    movaps %xmm3, %xmm4
722; X64-SSE-NEXT:    andnps 48(%rsi), %xmm4
723; X64-SSE-NEXT:    andps 48(%rdi), %xmm3
724; X64-SSE-NEXT:    orps %xmm4, %xmm3
725; X64-SSE-NEXT:    retq
726;
727; X64-AVX1-LABEL: fcopysign_v32f16:
728; X64-AVX1:       # %bb.0:
729; X64-AVX1-NEXT:    vbroadcastss {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
730; X64-AVX1-NEXT:    vandnps (%rsi), %ymm1, %ymm0
731; X64-AVX1-NEXT:    vandps (%rdi), %ymm1, %ymm2
732; X64-AVX1-NEXT:    vorps %ymm0, %ymm2, %ymm0
733; X64-AVX1-NEXT:    vandnps 32(%rsi), %ymm1, %ymm2
734; X64-AVX1-NEXT:    vandps 32(%rdi), %ymm1, %ymm1
735; X64-AVX1-NEXT:    vorps %ymm2, %ymm1, %ymm1
736; X64-AVX1-NEXT:    retq
737;
738; X64-AVX2-LABEL: fcopysign_v32f16:
739; X64-AVX2:       # %bb.0:
740; X64-AVX2-NEXT:    vpbroadcastw {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
741; X64-AVX2-NEXT:    vpandn (%rsi), %ymm1, %ymm0
742; X64-AVX2-NEXT:    vpand (%rdi), %ymm1, %ymm2
743; X64-AVX2-NEXT:    vpor %ymm0, %ymm2, %ymm0
744; X64-AVX2-NEXT:    vpandn 32(%rsi), %ymm1, %ymm2
745; X64-AVX2-NEXT:    vpand 32(%rdi), %ymm1, %ymm1
746; X64-AVX2-NEXT:    vpor %ymm2, %ymm1, %ymm1
747; X64-AVX2-NEXT:    retq
748;
749; X64-AVX512-LABEL: fcopysign_v32f16:
750; X64-AVX512:       # %bb.0:
751; X64-AVX512-NEXT:    vmovdqu64 (%rdi), %zmm1
752; X64-AVX512-NEXT:    vpbroadcastd {{.*#+}} zmm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879]
753; X64-AVX512-NEXT:    vpternlogd $202, (%rsi), %zmm1, %zmm0
754; X64-AVX512-NEXT:    retq
755  %a0 = load <32 x half>, ptr %p0, align 16
756  %a1 = load <32 x half>, ptr %p1, align 16
757  %t = call <32 x half> @llvm.copysign.v32f16(<32 x half> %a0, <32 x half> %a1)
758  ret <32 x half> %t
759}
760declare <32 x half> @llvm.copysign.v32f16(<32 x half>, <32 x half>)
761;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
762; X64: {{.*}}
763; X64-AVX: {{.*}}
764; X64-AVX512FP16: {{.*}}
765; X64-AVX512VL: {{.*}}
766; X64-AVX512VLDQ: {{.*}}
767; X86: {{.*}}
768; X86-AVX: {{.*}}
769; X86-AVX512FP16: {{.*}}
770; X86-AVX512VL: {{.*}}
771; X86-AVX512VLDQ: {{.*}}
772