xref: /llvm-project/llvm/test/CodeGen/X86/vec_fabs.ll (revision 06b9d92e0e6536d460db3bef6a55de0ecd244927)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX1OR2,X86-AVX1
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX1OR2,X86-AVX2
5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512VL
6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512FP16
7; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512VLDQ
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64,X64-SSE
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1OR2,X64-AVX1
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1OR2,X64-AVX2
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512VL
12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512FP16
13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512VLDQ
14
15;
16; 128-bit Vectors
17;
18
19define <2 x double> @fabs_v2f64(<2 x double> %p) nounwind {
20; X86-SSE-LABEL: fabs_v2f64:
21; X86-SSE:       # %bb.0:
22; X86-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
23; X86-SSE-NEXT:    retl
24;
25; X86-AVX1OR2-LABEL: fabs_v2f64:
26; X86-AVX1OR2:       # %bb.0:
27; X86-AVX1OR2-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
28; X86-AVX1OR2-NEXT:    retl
29;
30; X86-AVX512VL-LABEL: fabs_v2f64:
31; X86-AVX512VL:       # %bb.0:
32; X86-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
33; X86-AVX512VL-NEXT:    retl
34;
35; X86-AVX512FP16-LABEL: fabs_v2f64:
36; X86-AVX512FP16:       # %bb.0:
37; X86-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
38; X86-AVX512FP16-NEXT:    retl
39;
40; X86-AVX512VLDQ-LABEL: fabs_v2f64:
41; X86-AVX512VLDQ:       # %bb.0:
42; X86-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
43; X86-AVX512VLDQ-NEXT:    retl
44;
45; X64-SSE-LABEL: fabs_v2f64:
46; X64-SSE:       # %bb.0:
47; X64-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
48; X64-SSE-NEXT:    retq
49;
50; X64-AVX1OR2-LABEL: fabs_v2f64:
51; X64-AVX1OR2:       # %bb.0:
52; X64-AVX1OR2-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
53; X64-AVX1OR2-NEXT:    retq
54;
55; X64-AVX512VL-LABEL: fabs_v2f64:
56; X64-AVX512VL:       # %bb.0:
57; X64-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
58; X64-AVX512VL-NEXT:    retq
59;
60; X64-AVX512FP16-LABEL: fabs_v2f64:
61; X64-AVX512FP16:       # %bb.0:
62; X64-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
63; X64-AVX512FP16-NEXT:    retq
64;
65; X64-AVX512VLDQ-LABEL: fabs_v2f64:
66; X64-AVX512VLDQ:       # %bb.0:
67; X64-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
68; X64-AVX512VLDQ-NEXT:    retq
69  %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
70  ret <2 x double> %t
71}
72declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
73
74define <4 x float> @fabs_v4f32(<4 x float> %p) nounwind {
75; X86-SSE-LABEL: fabs_v4f32:
76; X86-SSE:       # %bb.0:
77; X86-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
78; X86-SSE-NEXT:    retl
79;
80; X86-AVX1-LABEL: fabs_v4f32:
81; X86-AVX1:       # %bb.0:
82; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
83; X86-AVX1-NEXT:    retl
84;
85; X86-AVX2-LABEL: fabs_v4f32:
86; X86-AVX2:       # %bb.0:
87; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
88; X86-AVX2-NEXT:    vandps %xmm1, %xmm0, %xmm0
89; X86-AVX2-NEXT:    retl
90;
91; X86-AVX512VL-LABEL: fabs_v4f32:
92; X86-AVX512VL:       # %bb.0:
93; X86-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
94; X86-AVX512VL-NEXT:    retl
95;
96; X86-AVX512FP16-LABEL: fabs_v4f32:
97; X86-AVX512FP16:       # %bb.0:
98; X86-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
99; X86-AVX512FP16-NEXT:    retl
100;
101; X86-AVX512VLDQ-LABEL: fabs_v4f32:
102; X86-AVX512VLDQ:       # %bb.0:
103; X86-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
104; X86-AVX512VLDQ-NEXT:    retl
105;
106; X64-SSE-LABEL: fabs_v4f32:
107; X64-SSE:       # %bb.0:
108; X64-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
109; X64-SSE-NEXT:    retq
110;
111; X64-AVX1-LABEL: fabs_v4f32:
112; X64-AVX1:       # %bb.0:
113; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
114; X64-AVX1-NEXT:    retq
115;
116; X64-AVX2-LABEL: fabs_v4f32:
117; X64-AVX2:       # %bb.0:
118; X64-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
119; X64-AVX2-NEXT:    vandps %xmm1, %xmm0, %xmm0
120; X64-AVX2-NEXT:    retq
121;
122; X64-AVX512VL-LABEL: fabs_v4f32:
123; X64-AVX512VL:       # %bb.0:
124; X64-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
125; X64-AVX512VL-NEXT:    retq
126;
127; X64-AVX512FP16-LABEL: fabs_v4f32:
128; X64-AVX512FP16:       # %bb.0:
129; X64-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
130; X64-AVX512FP16-NEXT:    retq
131;
132; X64-AVX512VLDQ-LABEL: fabs_v4f32:
133; X64-AVX512VLDQ:       # %bb.0:
134; X64-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
135; X64-AVX512VLDQ-NEXT:    retq
136  %t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
137  ret <4 x float> %t
138}
139declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
140
141define <8 x half> @fabs_v8f16(ptr %p) nounwind {
142; X86-SSE-LABEL: fabs_v8f16:
143; X86-SSE:       # %bb.0:
144; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
145; X86-SSE-NEXT:    movaps (%eax), %xmm0
146; X86-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
147; X86-SSE-NEXT:    retl
148;
149; X86-AVX1-LABEL: fabs_v8f16:
150; X86-AVX1:       # %bb.0:
151; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax
152; X86-AVX1-NEXT:    vmovaps (%eax), %xmm0
153; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
154; X86-AVX1-NEXT:    retl
155;
156; X86-AVX2-LABEL: fabs_v8f16:
157; X86-AVX2:       # %bb.0:
158; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
159; X86-AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
160; X86-AVX2-NEXT:    vpand (%eax), %xmm0, %xmm0
161; X86-AVX2-NEXT:    retl
162;
163; X86-AVX512-LABEL: fabs_v8f16:
164; X86-AVX512:       # %bb.0:
165; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
166; X86-AVX512-NEXT:    vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
167; X86-AVX512-NEXT:    vpand (%eax), %xmm0, %xmm0
168; X86-AVX512-NEXT:    retl
169;
170; X64-SSE-LABEL: fabs_v8f16:
171; X64-SSE:       # %bb.0:
172; X64-SSE-NEXT:    movaps (%rdi), %xmm0
173; X64-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
174; X64-SSE-NEXT:    retq
175;
176; X64-AVX1-LABEL: fabs_v8f16:
177; X64-AVX1:       # %bb.0:
178; X64-AVX1-NEXT:    vmovaps (%rdi), %xmm0
179; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
180; X64-AVX1-NEXT:    retq
181;
182; X64-AVX2-LABEL: fabs_v8f16:
183; X64-AVX2:       # %bb.0:
184; X64-AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
185; X64-AVX2-NEXT:    vpand (%rdi), %xmm0, %xmm0
186; X64-AVX2-NEXT:    retq
187;
188; X64-AVX512-LABEL: fabs_v8f16:
189; X64-AVX512:       # %bb.0:
190; X64-AVX512-NEXT:    vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
191; X64-AVX512-NEXT:    vpand (%rdi), %xmm0, %xmm0
192; X64-AVX512-NEXT:    retq
193  %v = load <8 x half>, ptr %p, align 16
194  %nnv = call <8 x half> @llvm.fabs.v8f16(<8 x half> %v)
195  ret <8 x half> %nnv
196}
197declare <8 x half> @llvm.fabs.v8f16(<8 x half> %p)
198
199;
200; 256-bit Vectors
201;
202
203define <4 x double> @fabs_v4f64(<4 x double> %p) nounwind {
204; X86-SSE-LABEL: fabs_v4f64:
205; X86-SSE:       # %bb.0:
206; X86-SSE-NEXT:    movaps {{.*#+}} xmm2 = [NaN,NaN]
207; X86-SSE-NEXT:    andps %xmm2, %xmm0
208; X86-SSE-NEXT:    andps %xmm2, %xmm1
209; X86-SSE-NEXT:    retl
210;
211; X86-AVX1-LABEL: fabs_v4f64:
212; X86-AVX1:       # %bb.0:
213; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
214; X86-AVX1-NEXT:    retl
215;
216; X86-AVX2-LABEL: fabs_v4f64:
217; X86-AVX2:       # %bb.0:
218; X86-AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN]
219; X86-AVX2-NEXT:    vandps %ymm1, %ymm0, %ymm0
220; X86-AVX2-NEXT:    retl
221;
222; X86-AVX512VL-LABEL: fabs_v4f64:
223; X86-AVX512VL:       # %bb.0:
224; X86-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0
225; X86-AVX512VL-NEXT:    retl
226;
227; X86-AVX512FP16-LABEL: fabs_v4f64:
228; X86-AVX512FP16:       # %bb.0:
229; X86-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0
230; X86-AVX512FP16-NEXT:    retl
231;
232; X86-AVX512VLDQ-LABEL: fabs_v4f64:
233; X86-AVX512VLDQ:       # %bb.0:
234; X86-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0
235; X86-AVX512VLDQ-NEXT:    retl
236;
237; X64-SSE-LABEL: fabs_v4f64:
238; X64-SSE:       # %bb.0:
239; X64-SSE-NEXT:    movaps {{.*#+}} xmm2 = [NaN,NaN]
240; X64-SSE-NEXT:    andps %xmm2, %xmm0
241; X64-SSE-NEXT:    andps %xmm2, %xmm1
242; X64-SSE-NEXT:    retq
243;
244; X64-AVX1-LABEL: fabs_v4f64:
245; X64-AVX1:       # %bb.0:
246; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
247; X64-AVX1-NEXT:    retq
248;
249; X64-AVX2-LABEL: fabs_v4f64:
250; X64-AVX2:       # %bb.0:
251; X64-AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN]
252; X64-AVX2-NEXT:    vandps %ymm1, %ymm0, %ymm0
253; X64-AVX2-NEXT:    retq
254;
255; X64-AVX512VL-LABEL: fabs_v4f64:
256; X64-AVX512VL:       # %bb.0:
257; X64-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
258; X64-AVX512VL-NEXT:    retq
259;
260; X64-AVX512FP16-LABEL: fabs_v4f64:
261; X64-AVX512FP16:       # %bb.0:
262; X64-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
263; X64-AVX512FP16-NEXT:    retq
264;
265; X64-AVX512VLDQ-LABEL: fabs_v4f64:
266; X64-AVX512VLDQ:       # %bb.0:
267; X64-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
268; X64-AVX512VLDQ-NEXT:    retq
269  %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
270  ret <4 x double> %t
271}
272declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
273
274define <8 x float> @fabs_v8f32(<8 x float> %p) nounwind {
275; X86-SSE-LABEL: fabs_v8f32:
276; X86-SSE:       # %bb.0:
277; X86-SSE-NEXT:    movaps {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
278; X86-SSE-NEXT:    andps %xmm2, %xmm0
279; X86-SSE-NEXT:    andps %xmm2, %xmm1
280; X86-SSE-NEXT:    retl
281;
282; X86-AVX1-LABEL: fabs_v8f32:
283; X86-AVX1:       # %bb.0:
284; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
285; X86-AVX1-NEXT:    retl
286;
287; X86-AVX2-LABEL: fabs_v8f32:
288; X86-AVX2:       # %bb.0:
289; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
290; X86-AVX2-NEXT:    vandps %ymm1, %ymm0, %ymm0
291; X86-AVX2-NEXT:    retl
292;
293; X86-AVX512VL-LABEL: fabs_v8f32:
294; X86-AVX512VL:       # %bb.0:
295; X86-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
296; X86-AVX512VL-NEXT:    retl
297;
298; X86-AVX512FP16-LABEL: fabs_v8f32:
299; X86-AVX512FP16:       # %bb.0:
300; X86-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
301; X86-AVX512FP16-NEXT:    retl
302;
303; X86-AVX512VLDQ-LABEL: fabs_v8f32:
304; X86-AVX512VLDQ:       # %bb.0:
305; X86-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
306; X86-AVX512VLDQ-NEXT:    retl
307;
308; X64-SSE-LABEL: fabs_v8f32:
309; X64-SSE:       # %bb.0:
310; X64-SSE-NEXT:    movaps {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
311; X64-SSE-NEXT:    andps %xmm2, %xmm0
312; X64-SSE-NEXT:    andps %xmm2, %xmm1
313; X64-SSE-NEXT:    retq
314;
315; X64-AVX1-LABEL: fabs_v8f32:
316; X64-AVX1:       # %bb.0:
317; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
318; X64-AVX1-NEXT:    retq
319;
320; X64-AVX2-LABEL: fabs_v8f32:
321; X64-AVX2:       # %bb.0:
322; X64-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
323; X64-AVX2-NEXT:    vandps %ymm1, %ymm0, %ymm0
324; X64-AVX2-NEXT:    retq
325;
326; X64-AVX512VL-LABEL: fabs_v8f32:
327; X64-AVX512VL:       # %bb.0:
328; X64-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
329; X64-AVX512VL-NEXT:    retq
330;
331; X64-AVX512FP16-LABEL: fabs_v8f32:
332; X64-AVX512FP16:       # %bb.0:
333; X64-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
334; X64-AVX512FP16-NEXT:    retq
335;
336; X64-AVX512VLDQ-LABEL: fabs_v8f32:
337; X64-AVX512VLDQ:       # %bb.0:
338; X64-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
339; X64-AVX512VLDQ-NEXT:    retq
340  %t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
341  ret <8 x float> %t
342}
343declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
344
345define <16 x half> @fabs_v16f16(ptr %p) nounwind {
346; X86-SSE-LABEL: fabs_v16f16:
347; X86-SSE:       # %bb.0:
348; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
349; X86-SSE-NEXT:    movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
350; X86-SSE-NEXT:    movaps (%eax), %xmm0
351; X86-SSE-NEXT:    andps %xmm1, %xmm0
352; X86-SSE-NEXT:    andps 16(%eax), %xmm1
353; X86-SSE-NEXT:    retl
354;
355; X86-AVX1-LABEL: fabs_v16f16:
356; X86-AVX1:       # %bb.0:
357; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax
358; X86-AVX1-NEXT:    vmovaps (%eax), %ymm0
359; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
360; X86-AVX1-NEXT:    retl
361;
362; X86-AVX2-LABEL: fabs_v16f16:
363; X86-AVX2:       # %bb.0:
364; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
365; X86-AVX2-NEXT:    vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
366; X86-AVX2-NEXT:    vpand (%eax), %ymm0, %ymm0
367; X86-AVX2-NEXT:    retl
368;
369; X86-AVX512-LABEL: fabs_v16f16:
370; X86-AVX512:       # %bb.0:
371; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
372; X86-AVX512-NEXT:    vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
373; X86-AVX512-NEXT:    vpand (%eax), %ymm0, %ymm0
374; X86-AVX512-NEXT:    retl
375;
376; X64-SSE-LABEL: fabs_v16f16:
377; X64-SSE:       # %bb.0:
378; X64-SSE-NEXT:    movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
379; X64-SSE-NEXT:    movaps (%rdi), %xmm0
380; X64-SSE-NEXT:    andps %xmm1, %xmm0
381; X64-SSE-NEXT:    andps 16(%rdi), %xmm1
382; X64-SSE-NEXT:    retq
383;
384; X64-AVX1-LABEL: fabs_v16f16:
385; X64-AVX1:       # %bb.0:
386; X64-AVX1-NEXT:    vmovaps (%rdi), %ymm0
387; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
388; X64-AVX1-NEXT:    retq
389;
390; X64-AVX2-LABEL: fabs_v16f16:
391; X64-AVX2:       # %bb.0:
392; X64-AVX2-NEXT:    vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
393; X64-AVX2-NEXT:    vpand (%rdi), %ymm0, %ymm0
394; X64-AVX2-NEXT:    retq
395;
396; X64-AVX512-LABEL: fabs_v16f16:
397; X64-AVX512:       # %bb.0:
398; X64-AVX512-NEXT:    vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
399; X64-AVX512-NEXT:    vpand (%rdi), %ymm0, %ymm0
400; X64-AVX512-NEXT:    retq
401  %v = load <16 x half>, ptr %p, align 32
402  %nnv = call <16 x half> @llvm.fabs.v16f16(<16 x half> %v)
403  ret <16 x half> %nnv
404}
405declare <16 x half> @llvm.fabs.v16f16(<16 x half> %p)
406
407;
408; 512-bit Vectors
409;
410
411define <8 x double> @fabs_v8f64(<8 x double> %p) nounwind {
412; X86-SSE-LABEL: fabs_v8f64:
413; X86-SSE:       # %bb.0:
414; X86-SSE-NEXT:    pushl %ebp
415; X86-SSE-NEXT:    movl %esp, %ebp
416; X86-SSE-NEXT:    andl $-16, %esp
417; X86-SSE-NEXT:    subl $16, %esp
418; X86-SSE-NEXT:    movaps {{.*#+}} xmm3 = [NaN,NaN]
419; X86-SSE-NEXT:    andps %xmm3, %xmm0
420; X86-SSE-NEXT:    andps %xmm3, %xmm1
421; X86-SSE-NEXT:    andps %xmm3, %xmm2
422; X86-SSE-NEXT:    andps 8(%ebp), %xmm3
423; X86-SSE-NEXT:    movl %ebp, %esp
424; X86-SSE-NEXT:    popl %ebp
425; X86-SSE-NEXT:    retl
426;
427; X86-AVX1OR2-LABEL: fabs_v8f64:
428; X86-AVX1OR2:       # %bb.0:
429; X86-AVX1OR2-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
430; X86-AVX1OR2-NEXT:    vandps %ymm2, %ymm0, %ymm0
431; X86-AVX1OR2-NEXT:    vandps %ymm2, %ymm1, %ymm1
432; X86-AVX1OR2-NEXT:    retl
433;
434; X86-AVX512VL-LABEL: fabs_v8f64:
435; X86-AVX512VL:       # %bb.0:
436; X86-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0
437; X86-AVX512VL-NEXT:    retl
438;
439; X86-AVX512FP16-LABEL: fabs_v8f64:
440; X86-AVX512FP16:       # %bb.0:
441; X86-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0
442; X86-AVX512FP16-NEXT:    retl
443;
444; X86-AVX512VLDQ-LABEL: fabs_v8f64:
445; X86-AVX512VLDQ:       # %bb.0:
446; X86-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0
447; X86-AVX512VLDQ-NEXT:    retl
448;
449; X64-SSE-LABEL: fabs_v8f64:
450; X64-SSE:       # %bb.0:
451; X64-SSE-NEXT:    movaps {{.*#+}} xmm4 = [NaN,NaN]
452; X64-SSE-NEXT:    andps %xmm4, %xmm0
453; X64-SSE-NEXT:    andps %xmm4, %xmm1
454; X64-SSE-NEXT:    andps %xmm4, %xmm2
455; X64-SSE-NEXT:    andps %xmm4, %xmm3
456; X64-SSE-NEXT:    retq
457;
458; X64-AVX1OR2-LABEL: fabs_v8f64:
459; X64-AVX1OR2:       # %bb.0:
460; X64-AVX1OR2-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
461; X64-AVX1OR2-NEXT:    vandps %ymm2, %ymm0, %ymm0
462; X64-AVX1OR2-NEXT:    vandps %ymm2, %ymm1, %ymm1
463; X64-AVX1OR2-NEXT:    retq
464;
465; X64-AVX512VL-LABEL: fabs_v8f64:
466; X64-AVX512VL:       # %bb.0:
467; X64-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
468; X64-AVX512VL-NEXT:    retq
469;
470; X64-AVX512FP16-LABEL: fabs_v8f64:
471; X64-AVX512FP16:       # %bb.0:
472; X64-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
473; X64-AVX512FP16-NEXT:    retq
474;
475; X64-AVX512VLDQ-LABEL: fabs_v8f64:
476; X64-AVX512VLDQ:       # %bb.0:
477; X64-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
478; X64-AVX512VLDQ-NEXT:    retq
479  %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
480  ret <8 x double> %t
481}
482declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
483
484define <16 x float> @fabs_v16f32(<16 x float> %p) nounwind {
485; X86-SSE-LABEL: fabs_v16f32:
486; X86-SSE:       # %bb.0:
487; X86-SSE-NEXT:    pushl %ebp
488; X86-SSE-NEXT:    movl %esp, %ebp
489; X86-SSE-NEXT:    andl $-16, %esp
490; X86-SSE-NEXT:    subl $16, %esp
491; X86-SSE-NEXT:    movaps {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN]
492; X86-SSE-NEXT:    andps %xmm3, %xmm0
493; X86-SSE-NEXT:    andps %xmm3, %xmm1
494; X86-SSE-NEXT:    andps %xmm3, %xmm2
495; X86-SSE-NEXT:    andps 8(%ebp), %xmm3
496; X86-SSE-NEXT:    movl %ebp, %esp
497; X86-SSE-NEXT:    popl %ebp
498; X86-SSE-NEXT:    retl
499;
500; X86-AVX1OR2-LABEL: fabs_v16f32:
501; X86-AVX1OR2:       # %bb.0:
502; X86-AVX1OR2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
503; X86-AVX1OR2-NEXT:    vandps %ymm2, %ymm0, %ymm0
504; X86-AVX1OR2-NEXT:    vandps %ymm2, %ymm1, %ymm1
505; X86-AVX1OR2-NEXT:    retl
506;
507; X86-AVX512VL-LABEL: fabs_v16f32:
508; X86-AVX512VL:       # %bb.0:
509; X86-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
510; X86-AVX512VL-NEXT:    retl
511;
512; X86-AVX512FP16-LABEL: fabs_v16f32:
513; X86-AVX512FP16:       # %bb.0:
514; X86-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
515; X86-AVX512FP16-NEXT:    retl
516;
517; X86-AVX512VLDQ-LABEL: fabs_v16f32:
518; X86-AVX512VLDQ:       # %bb.0:
519; X86-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
520; X86-AVX512VLDQ-NEXT:    retl
521;
522; X64-SSE-LABEL: fabs_v16f32:
523; X64-SSE:       # %bb.0:
524; X64-SSE-NEXT:    movaps {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN]
525; X64-SSE-NEXT:    andps %xmm4, %xmm0
526; X64-SSE-NEXT:    andps %xmm4, %xmm1
527; X64-SSE-NEXT:    andps %xmm4, %xmm2
528; X64-SSE-NEXT:    andps %xmm4, %xmm3
529; X64-SSE-NEXT:    retq
530;
531; X64-AVX1OR2-LABEL: fabs_v16f32:
532; X64-AVX1OR2:       # %bb.0:
533; X64-AVX1OR2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
534; X64-AVX1OR2-NEXT:    vandps %ymm2, %ymm0, %ymm0
535; X64-AVX1OR2-NEXT:    vandps %ymm2, %ymm1, %ymm1
536; X64-AVX1OR2-NEXT:    retq
537;
538; X64-AVX512VL-LABEL: fabs_v16f32:
539; X64-AVX512VL:       # %bb.0:
540; X64-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
541; X64-AVX512VL-NEXT:    retq
542;
543; X64-AVX512FP16-LABEL: fabs_v16f32:
544; X64-AVX512FP16:       # %bb.0:
545; X64-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
546; X64-AVX512FP16-NEXT:    retq
547;
548; X64-AVX512VLDQ-LABEL: fabs_v16f32:
549; X64-AVX512VLDQ:       # %bb.0:
550; X64-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
551; X64-AVX512VLDQ-NEXT:    retq
552  %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
553  ret <16 x float> %t
554}
555declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
556
557define <32 x half> @fabs_v32f16(ptr %p) nounwind {
558; X86-SSE-LABEL: fabs_v32f16:
559; X86-SSE:       # %bb.0:
560; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
561; X86-SSE-NEXT:    movaps {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
562; X86-SSE-NEXT:    movaps (%eax), %xmm0
563; X86-SSE-NEXT:    andps %xmm3, %xmm0
564; X86-SSE-NEXT:    movaps 16(%eax), %xmm1
565; X86-SSE-NEXT:    andps %xmm3, %xmm1
566; X86-SSE-NEXT:    movaps 32(%eax), %xmm2
567; X86-SSE-NEXT:    andps %xmm3, %xmm2
568; X86-SSE-NEXT:    andps 48(%eax), %xmm3
569; X86-SSE-NEXT:    retl
570;
571; X86-AVX1-LABEL: fabs_v32f16:
572; X86-AVX1:       # %bb.0:
573; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax
574; X86-AVX1-NEXT:    vbroadcastss {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
575; X86-AVX1-NEXT:    vandps (%eax), %ymm1, %ymm0
576; X86-AVX1-NEXT:    vandps 32(%eax), %ymm1, %ymm1
577; X86-AVX1-NEXT:    retl
578;
579; X86-AVX2-LABEL: fabs_v32f16:
580; X86-AVX2:       # %bb.0:
581; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
582; X86-AVX2-NEXT:    vpbroadcastw {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
583; X86-AVX2-NEXT:    vpand (%eax), %ymm1, %ymm0
584; X86-AVX2-NEXT:    vpand 32(%eax), %ymm1, %ymm1
585; X86-AVX2-NEXT:    retl
586;
587; X86-AVX512VL-LABEL: fabs_v32f16:
588; X86-AVX512VL:       # %bb.0:
589; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
590; X86-AVX512VL-NEXT:    vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
591; X86-AVX512VL-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
592; X86-AVX512VL-NEXT:    vpandq (%eax), %zmm0, %zmm0
593; X86-AVX512VL-NEXT:    retl
594;
595; X86-AVX512FP16-LABEL: fabs_v32f16:
596; X86-AVX512FP16:       # %bb.0:
597; X86-AVX512FP16-NEXT:    movl {{[0-9]+}}(%esp), %eax
598; X86-AVX512FP16-NEXT:    vpbroadcastw {{.*#+}} zmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
599; X86-AVX512FP16-NEXT:    vpandq (%eax), %zmm0, %zmm0
600; X86-AVX512FP16-NEXT:    retl
601;
602; X86-AVX512VLDQ-LABEL: fabs_v32f16:
603; X86-AVX512VLDQ:       # %bb.0:
604; X86-AVX512VLDQ-NEXT:    movl {{[0-9]+}}(%esp), %eax
605; X86-AVX512VLDQ-NEXT:    vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
606; X86-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
607; X86-AVX512VLDQ-NEXT:    vpandq (%eax), %zmm0, %zmm0
608; X86-AVX512VLDQ-NEXT:    retl
609;
610; X64-SSE-LABEL: fabs_v32f16:
611; X64-SSE:       # %bb.0:
612; X64-SSE-NEXT:    movaps {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
613; X64-SSE-NEXT:    movaps (%rdi), %xmm0
614; X64-SSE-NEXT:    andps %xmm3, %xmm0
615; X64-SSE-NEXT:    movaps 16(%rdi), %xmm1
616; X64-SSE-NEXT:    andps %xmm3, %xmm1
617; X64-SSE-NEXT:    movaps 32(%rdi), %xmm2
618; X64-SSE-NEXT:    andps %xmm3, %xmm2
619; X64-SSE-NEXT:    andps 48(%rdi), %xmm3
620; X64-SSE-NEXT:    retq
621;
622; X64-AVX1-LABEL: fabs_v32f16:
623; X64-AVX1:       # %bb.0:
624; X64-AVX1-NEXT:    vbroadcastss {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
625; X64-AVX1-NEXT:    vandps (%rdi), %ymm1, %ymm0
626; X64-AVX1-NEXT:    vandps 32(%rdi), %ymm1, %ymm1
627; X64-AVX1-NEXT:    retq
628;
629; X64-AVX2-LABEL: fabs_v32f16:
630; X64-AVX2:       # %bb.0:
631; X64-AVX2-NEXT:    vpbroadcastw {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
632; X64-AVX2-NEXT:    vpand (%rdi), %ymm1, %ymm0
633; X64-AVX2-NEXT:    vpand 32(%rdi), %ymm1, %ymm1
634; X64-AVX2-NEXT:    retq
635;
636; X64-AVX512VL-LABEL: fabs_v32f16:
637; X64-AVX512VL:       # %bb.0:
638; X64-AVX512VL-NEXT:    vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
639; X64-AVX512VL-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
640; X64-AVX512VL-NEXT:    vpandq (%rdi), %zmm0, %zmm0
641; X64-AVX512VL-NEXT:    retq
642;
643; X64-AVX512FP16-LABEL: fabs_v32f16:
644; X64-AVX512FP16:       # %bb.0:
645; X64-AVX512FP16-NEXT:    vpbroadcastw {{.*#+}} zmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
646; X64-AVX512FP16-NEXT:    vpandq (%rdi), %zmm0, %zmm0
647; X64-AVX512FP16-NEXT:    retq
648;
649; X64-AVX512VLDQ-LABEL: fabs_v32f16:
650; X64-AVX512VLDQ:       # %bb.0:
651; X64-AVX512VLDQ-NEXT:    vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
652; X64-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
653; X64-AVX512VLDQ-NEXT:    vpandq (%rdi), %zmm0, %zmm0
654; X64-AVX512VLDQ-NEXT:    retq
655  %v = load <32 x half>, ptr %p, align 64
656  %nnv = call <32 x half> @llvm.fabs.v32f16(<32 x half> %v)
657  ret <32 x half> %nnv
658}
659declare <32 x half> @llvm.fabs.v32f16(<32 x half> %p)
660
661; PR20354: when generating code for a vector fabs op,
662; make sure that we're only turning off the sign bit of each float value.
663; No constant pool loads or vector ops are needed for the fabs of a
664; bitcasted integer constant; we should just return an integer constant
665; that has the sign bits turned off.
666;
667; So instead of something like this:
668;    movabsq (constant pool load of mask for sign bits)
669;    vmovq   (move from integer register to vector/fp register)
670;    vandps  (mask off sign bits)
671;    vmovq   (move vector/fp register back to integer return register)
672;
673; We should generate:
674;    mov     (put constant value in return register)
675
676define i64 @fabs_v2f32_1() nounwind {
677; X86-LABEL: fabs_v2f32_1:
678; X86:       # %bb.0:
679; X86-NEXT:    xorl %eax, %eax
680; X86-NEXT:    movl $2147483647, %edx # imm = 0x7FFFFFFF
681; X86-NEXT:    retl
682;
683; X64-LABEL: fabs_v2f32_1:
684; X64:       # %bb.0:
685; X64-NEXT:    movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000
686; X64-NEXT:    retq
687 %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000
688 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
689 %ret = bitcast <2 x float> %fabs to i64
690 ret i64 %ret
691}
692
693define i64 @fabs_v2f32_2() nounwind {
694; X86-LABEL: fabs_v2f32_2:
695; X86:       # %bb.0:
696; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
697; X86-NEXT:    xorl %edx, %edx
698; X86-NEXT:    retl
699;
700; X64-LABEL: fabs_v2f32_2:
701; X64:       # %bb.0:
702; X64-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
703; X64-NEXT:    retq
704 %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF
705 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
706 %ret = bitcast <2 x float> %fabs to i64
707 ret i64 %ret
708}
709
710declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p)
711
712; PR70947 - remove duplicate xmm/ymm constant loads
713define void @PR70947(ptr %src, ptr %dst) nounwind {
714; X86-SSE-LABEL: PR70947:
715; X86-SSE:       # %bb.0:
716; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
717; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
718; X86-SSE-NEXT:    movups (%ecx), %xmm0
719; X86-SSE-NEXT:    movups 32(%ecx), %xmm1
720; X86-SSE-NEXT:    movaps {{.*#+}} xmm2 = [NaN,NaN]
721; X86-SSE-NEXT:    andps %xmm2, %xmm0
722; X86-SSE-NEXT:    andps %xmm2, %xmm1
723; X86-SSE-NEXT:    movups %xmm0, (%eax)
724; X86-SSE-NEXT:    movups %xmm1, 16(%eax)
725; X86-SSE-NEXT:    retl
726;
727; X86-AVX-LABEL: PR70947:
728; X86-AVX:       # %bb.0:
729; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
730; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
731; X86-AVX-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
732; X86-AVX-NEXT:    vandps (%ecx), %ymm0, %ymm1
733; X86-AVX-NEXT:    vandps 32(%ecx), %xmm0, %xmm0
734; X86-AVX-NEXT:    vmovups %ymm1, (%eax)
735; X86-AVX-NEXT:    vmovups %xmm0, 16(%eax)
736; X86-AVX-NEXT:    vzeroupper
737; X86-AVX-NEXT:    retl
738;
739; X64-SSE-LABEL: PR70947:
740; X64-SSE:       # %bb.0:
741; X64-SSE-NEXT:    movups (%rdi), %xmm0
742; X64-SSE-NEXT:    movups 32(%rdi), %xmm1
743; X64-SSE-NEXT:    movaps {{.*#+}} xmm2 = [NaN,NaN]
744; X64-SSE-NEXT:    andps %xmm2, %xmm0
745; X64-SSE-NEXT:    andps %xmm2, %xmm1
746; X64-SSE-NEXT:    movups %xmm0, (%rsi)
747; X64-SSE-NEXT:    movups %xmm1, 16(%rsi)
748; X64-SSE-NEXT:    retq
749;
750; X64-AVX-LABEL: PR70947:
751; X64-AVX:       # %bb.0:
752; X64-AVX-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
753; X64-AVX-NEXT:    vandps (%rdi), %ymm0, %ymm1
754; X64-AVX-NEXT:    vandps 32(%rdi), %xmm0, %xmm0
755; X64-AVX-NEXT:    vmovups %ymm1, (%rsi)
756; X64-AVX-NEXT:    vmovups %xmm0, 16(%rsi)
757; X64-AVX-NEXT:    vzeroupper
758; X64-AVX-NEXT:    retq
759  %src4 = getelementptr inbounds double, ptr %src, i64 4
760  %dst4 = getelementptr inbounds i32, ptr %dst, i64 4
761  %ld0 = load <4 x double>, ptr %src, align 8
762  %ld4 = load <2 x double>, ptr %src4, align 8
763  %fabs0 = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %ld0)
764  %fabs4 = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %ld4)
765  store <4 x double> %fabs0, ptr %dst, align 4
766  store <2 x double> %fabs4, ptr %dst4, align 4
767  ret void
768}
769