xref: /llvm-project/llvm/test/CodeGen/X86/avx512vl-arith.ll (revision 24194090e17b599522a080d502ab0f68125d53dd)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl,-evex512 --show-mc-encoding| FileCheck %s
4
5; 256-bit
6
7define <4 x i64> @vpaddq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
8; CHECK-LABEL: vpaddq256_test:
9; CHECK:       ## %bb.0:
10; CHECK-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc1]
11; CHECK-NEXT:    retq ## encoding: [0xc3]
12  %x = add <4 x i64> %i, %j
13  ret <4 x i64> %x
14}
15
16define <4 x i64> @vpaddq256_fold_test(<4 x i64> %i, ptr %j) nounwind {
17; CHECK-LABEL: vpaddq256_fold_test:
18; CHECK:       ## %bb.0:
19; CHECK-NEXT:    vpaddq (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0x07]
20; CHECK-NEXT:    retq ## encoding: [0xc3]
21  %tmp = load <4 x i64>, ptr %j, align 4
22  %x = add <4 x i64> %i, %tmp
23  ret <4 x i64> %x
24}
25
26define <4 x i64> @vpaddq256_broadcast_test(<4 x i64> %i) nounwind {
27; CHECK-LABEL: vpaddq256_broadcast_test:
28; CHECK:       ## %bb.0:
29; CHECK-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xd4,0x05,A,A,A,A]
30; CHECK-NEXT:    ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
31; CHECK-NEXT:    retq ## encoding: [0xc3]
32  %x = add <4 x i64> %i, <i64 2, i64 2, i64 2, i64 2>
33  ret <4 x i64> %x
34}
35
36define <4 x i64> @vpaddq256_broadcast2_test(<4 x i64> %i, ptr %j.ptr) nounwind {
37; CHECK-LABEL: vpaddq256_broadcast2_test:
38; CHECK:       ## %bb.0:
39; CHECK-NEXT:    vpaddq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xd4,0x07]
40; CHECK-NEXT:    retq ## encoding: [0xc3]
41  %j = load i64, ptr %j.ptr
42  %j.0 = insertelement <4 x i64> undef, i64 %j, i32 0
43  %j.v = shufflevector <4 x i64> %j.0, <4 x i64> undef, <4 x i32> zeroinitializer
44  %x = add <4 x i64> %i, %j.v
45  ret <4 x i64> %x
46}
47
48define <8 x i32> @vpaddd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
49; CHECK-LABEL: vpaddd256_test:
50; CHECK:       ## %bb.0:
51; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
52; CHECK-NEXT:    retq ## encoding: [0xc3]
53  %x = add <8 x i32> %i, %j
54  ret <8 x i32> %x
55}
56
57define <8 x i32> @vpaddd256_fold_test(<8 x i32> %i, ptr %j) nounwind {
58; CHECK-LABEL: vpaddd256_fold_test:
59; CHECK:       ## %bb.0:
60; CHECK-NEXT:    vpaddd (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0x07]
61; CHECK-NEXT:    retq ## encoding: [0xc3]
62  %tmp = load <8 x i32>, ptr %j, align 4
63  %x = add <8 x i32> %i, %tmp
64  ret <8 x i32> %x
65}
66
67define <8 x i32> @vpaddd256_broadcast_test(<8 x i32> %i) nounwind {
68; CHECK-LABEL: vpaddd256_broadcast_test:
69; CHECK:       ## %bb.0:
70; CHECK-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A]
71; CHECK-NEXT:    ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
72; CHECK-NEXT:    retq ## encoding: [0xc3]
73  %x = add <8 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
74  ret <8 x i32> %x
75}
76
77define <8 x i32> @vpaddd256_mask_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone {
78; CHECK-LABEL: vpaddd256_mask_test:
79; CHECK:       ## %bb.0:
80; CHECK-NEXT:    vptestmd %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x28,0x27,0xca]
81; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xc1]
82; CHECK-NEXT:    retq ## encoding: [0xc3]
83  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
84  %x = add <8 x i32> %i, %j
85  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
86  ret <8 x i32> %r
87}
88
89define <8 x i32> @vpaddd256_maskz_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone {
90; CHECK-LABEL: vpaddd256_maskz_test:
91; CHECK:       ## %bb.0:
92; CHECK-NEXT:    vptestmd %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x28,0x27,0xca]
93; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
94; CHECK-NEXT:    retq ## encoding: [0xc3]
95  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
96  %x = add <8 x i32> %i, %j
97  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
98  ret <8 x i32> %r
99}
100
101define <8 x i32> @vpaddd256_mask_fold_test(<8 x i32> %i, ptr %j.ptr, <8 x i32> %mask1) nounwind readnone {
102; CHECK-LABEL: vpaddd256_mask_fold_test:
103; CHECK:       ## %bb.0:
104; CHECK-NEXT:    vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9]
105; CHECK-NEXT:    vpaddd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x07]
106; CHECK-NEXT:    retq ## encoding: [0xc3]
107  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
108  %j = load <8 x i32>, ptr %j.ptr
109  %x = add <8 x i32> %i, %j
110  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
111  ret <8 x i32> %r
112}
113
114define <8 x i32> @vpaddd256_mask_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone {
115; CHECK-LABEL: vpaddd256_mask_broadcast_test:
116; CHECK:       ## %bb.0:
117; CHECK-NEXT:    vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9]
118; CHECK-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x05,A,A,A,A]
119; CHECK-NEXT:    ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
120; CHECK-NEXT:    retq ## encoding: [0xc3]
121  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
122  %x = add <8 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
123  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
124  ret <8 x i32> %r
125}
126
127define <8 x i32> @vpaddd256_maskz_fold_test(<8 x i32> %i, ptr %j.ptr, <8 x i32> %mask1) nounwind readnone {
128; CHECK-LABEL: vpaddd256_maskz_fold_test:
129; CHECK:       ## %bb.0:
130; CHECK-NEXT:    vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9]
131; CHECK-NEXT:    vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07]
132; CHECK-NEXT:    retq ## encoding: [0xc3]
133  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
134  %j = load <8 x i32>, ptr %j.ptr
135  %x = add <8 x i32> %i, %j
136  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
137  ret <8 x i32> %r
138}
139
140define <8 x i32> @vpaddd256_maskz_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone {
141; CHECK-LABEL: vpaddd256_maskz_broadcast_test:
142; CHECK:       ## %bb.0:
143; CHECK-NEXT:    vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9]
144; CHECK-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x05,A,A,A,A]
145; CHECK-NEXT:    ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
146; CHECK-NEXT:    retq ## encoding: [0xc3]
147  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
148  %x = add <8 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
149  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
150  ret <8 x i32> %r
151}
152
153define <4 x i64> @vpsubq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
154; CHECK-LABEL: vpsubq256_test:
155; CHECK:       ## %bb.0:
156; CHECK-NEXT:    vpsubq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfb,0xc1]
157; CHECK-NEXT:    retq ## encoding: [0xc3]
158  %x = sub <4 x i64> %i, %j
159  ret <4 x i64> %x
160}
161
162define <8 x i32> @vpsubd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
163; CHECK-LABEL: vpsubd256_test:
164; CHECK:       ## %bb.0:
165; CHECK-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xc1]
166; CHECK-NEXT:    retq ## encoding: [0xc3]
167  %x = sub <8 x i32> %i, %j
168  ret <8 x i32> %x
169}
170
171define <8 x i32> @vpmulld256_test(<8 x i32> %i, <8 x i32> %j) {
172; CHECK-LABEL: vpmulld256_test:
173; CHECK:       ## %bb.0:
174; CHECK-NEXT:    vpmulld %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x40,0xc1]
175; CHECK-NEXT:    retq ## encoding: [0xc3]
176  %x = mul <8 x i32> %i, %j
177  ret <8 x i32> %x
178}
179
180define <4 x double> @test_vaddpd_256(<4 x double> %y, <4 x double> %x) {
181; CHECK-LABEL: test_vaddpd_256:
182; CHECK:       ## %bb.0: ## %entry
183; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
184; CHECK-NEXT:    retq ## encoding: [0xc3]
185entry:
186  %add.i = fadd <4 x double> %x, %y
187  ret <4 x double> %add.i
188}
189
190define <4 x double> @test_fold_vaddpd_256(<4 x double> %y) {
191; CHECK-LABEL: test_fold_vaddpd_256:
192; CHECK:       ## %bb.0: ## %entry
193; CHECK-NEXT:    vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0x05,A,A,A,A]
194; CHECK-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
195; CHECK-NEXT:    retq ## encoding: [0xc3]
196entry:
197  %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 4.500000e+00, double 5.600000e+00>
198  ret <4 x double> %add.i
199}
200
201define <8 x float> @test_broadcast_vaddpd_256(<8 x float> %a) nounwind {
202; CHECK-LABEL: test_broadcast_vaddpd_256:
203; CHECK:       ## %bb.0:
204; CHECK-NEXT:    vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x58,0x05,A,A,A,A]
205; CHECK-NEXT:    ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
206; CHECK-NEXT:    retq ## encoding: [0xc3]
207  %b = fadd <8 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
208  ret <8 x float> %b
209}
210
211define <8 x float> @test_mask_vaddps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone {
212; CHECK-LABEL: test_mask_vaddps_256:
213; CHECK:       ## %bb.0:
214; CHECK-NEXT:    vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb]
215; CHECK-NEXT:    vaddps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x58,0xc2]
216; CHECK-NEXT:    retq ## encoding: [0xc3]
217  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
218  %x = fadd <8 x float> %i, %j
219  %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
220  ret <8 x float> %r
221}
222
223define <8 x float> @test_mask_vmulps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone {
224; CHECK-LABEL: test_mask_vmulps_256:
225; CHECK:       ## %bb.0:
226; CHECK-NEXT:    vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb]
227; CHECK-NEXT:    vmulps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x59,0xc2]
228; CHECK-NEXT:    retq ## encoding: [0xc3]
229  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
230  %x = fmul <8 x float> %i, %j
231  %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
232  ret <8 x float> %r
233}
234
235define <8 x float> @test_mask_vminps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1)nounwind readnone {
236; CHECK-LABEL: test_mask_vminps_256:
237; CHECK:       ## %bb.0:
238; CHECK-NEXT:    vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb]
239; CHECK-NEXT:    vminps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5d,0xc2]
240; CHECK-NEXT:    retq ## encoding: [0xc3]
241  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
242  %cmp_res = fcmp olt <8 x float> %i, %j
243  %min = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j
244  %r = select <8 x i1> %mask, <8 x float> %min, <8 x float> %dst
245  ret <8 x float> %r
246}
247
248define <8 x float> @test_mask_vmaxps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone {
249; CHECK-LABEL: test_mask_vmaxps_256:
250; CHECK:       ## %bb.0:
251; CHECK-NEXT:    vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb]
252; CHECK-NEXT:    vmaxps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5f,0xc2]
253; CHECK-NEXT:    retq ## encoding: [0xc3]
254  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
255  %cmp_res = fcmp ogt <8 x float> %i, %j
256  %max = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j
257  %r = select <8 x i1> %mask, <8 x float> %max, <8 x float> %dst
258  ret <8 x float> %r
259}
260
261define <8 x float> @test_mask_vsubps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone {
262; CHECK-LABEL: test_mask_vsubps_256:
263; CHECK:       ## %bb.0:
264; CHECK-NEXT:    vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb]
265; CHECK-NEXT:    vsubps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5c,0xc2]
266; CHECK-NEXT:    retq ## encoding: [0xc3]
267  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
268  %x = fsub <8 x float> %i, %j
269  %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
270  ret <8 x float> %r
271}
272
273define <8 x float> @test_mask_vdivps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone {
274; CHECK-LABEL: test_mask_vdivps_256:
275; CHECK:       ## %bb.0:
276; CHECK-NEXT:    vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb]
277; CHECK-NEXT:    vdivps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5e,0xc2]
278; CHECK-NEXT:    retq ## encoding: [0xc3]
279  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
280  %x = fdiv <8 x float> %i, %j
281  %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
282  ret <8 x float> %r
283}
284
285define <4 x double> @test_mask_vmulpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
286; CHECK-LABEL: test_mask_vmulpd_256:
287; CHECK:       ## %bb.0:
288; CHECK-NEXT:    vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb]
289; CHECK-NEXT:    vmulpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x59,0xc2]
290; CHECK-NEXT:    retq ## encoding: [0xc3]
291  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
292  %x = fmul <4 x double> %i, %j
293  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
294  ret <4 x double> %r
295}
296
297define <4 x double> @test_mask_vminpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
298; CHECK-LABEL: test_mask_vminpd_256:
299; CHECK:       ## %bb.0:
300; CHECK-NEXT:    vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb]
301; CHECK-NEXT:    vminpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5d,0xc2]
302; CHECK-NEXT:    retq ## encoding: [0xc3]
303  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
304  %cmp_res = fcmp olt <4 x double> %i, %j
305  %min = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j
306  %r = select <4 x i1> %mask, <4 x double> %min, <4 x double> %dst
307  ret <4 x double> %r
308}
309
310define <4 x double> @test_mask_vmaxpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
311; CHECK-LABEL: test_mask_vmaxpd_256:
312; CHECK:       ## %bb.0:
313; CHECK-NEXT:    vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb]
314; CHECK-NEXT:    vmaxpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5f,0xc2]
315; CHECK-NEXT:    retq ## encoding: [0xc3]
316  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
317  %cmp_res = fcmp ogt <4 x double> %i, %j
318  %max = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j
319  %r = select <4 x i1> %mask, <4 x double> %max, <4 x double> %dst
320  ret <4 x double> %r
321}
322
323define <4 x double> @test_mask_vsubpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
324; CHECK-LABEL: test_mask_vsubpd_256:
325; CHECK:       ## %bb.0:
326; CHECK-NEXT:    vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb]
327; CHECK-NEXT:    vsubpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5c,0xc2]
328; CHECK-NEXT:    retq ## encoding: [0xc3]
329  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
330  %x = fsub <4 x double> %i, %j
331  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
332  ret <4 x double> %r
333}
334
335define <4 x double> @test_mask_vdivpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
336; CHECK-LABEL: test_mask_vdivpd_256:
337; CHECK:       ## %bb.0:
338; CHECK-NEXT:    vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb]
339; CHECK-NEXT:    vdivpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5e,0xc2]
340; CHECK-NEXT:    retq ## encoding: [0xc3]
341  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
342  %x = fdiv <4 x double> %i, %j
343  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
344  ret <4 x double> %r
345}
346
347define <4 x double> @test_mask_vaddpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
348; CHECK-LABEL: test_mask_vaddpd_256:
349; CHECK:       ## %bb.0:
350; CHECK-NEXT:    vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb]
351; CHECK-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x58,0xc2]
352; CHECK-NEXT:    retq ## encoding: [0xc3]
353  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
354  %x = fadd <4 x double> %i, %j
355  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
356  ret <4 x double> %r
357}
358
359define <4 x double> @test_maskz_vaddpd_256(<4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone {
360; CHECK-LABEL: test_maskz_vaddpd_256:
361; CHECK:       ## %bb.0:
362; CHECK-NEXT:    vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca]
363; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x58,0xc1]
364; CHECK-NEXT:    retq ## encoding: [0xc3]
365  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
366  %x = fadd <4 x double> %i, %j
367  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
368  ret <4 x double> %r
369}
370
371define <4 x double> @test_mask_fold_vaddpd_256(<4 x double> %dst, <4 x double> %i, ptr %j,  <4 x i64> %mask1) nounwind {
372; CHECK-LABEL: test_mask_fold_vaddpd_256:
373; CHECK:       ## %bb.0:
374; CHECK-NEXT:    vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca]
375; CHECK-NEXT:    vaddpd (%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x58,0x07]
376; CHECK-NEXT:    retq ## encoding: [0xc3]
377  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
378  %tmp = load <4 x double>, ptr %j
379  %x = fadd <4 x double> %i, %tmp
380  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
381  ret <4 x double> %r
382}
383
384define <4 x double> @test_maskz_fold_vaddpd_256(<4 x double> %i, ptr %j, <4 x i64> %mask1) nounwind {
385; CHECK-LABEL: test_maskz_fold_vaddpd_256:
386; CHECK:       ## %bb.0:
387; CHECK-NEXT:    vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
388; CHECK-NEXT:    vaddpd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x58,0x07]
389; CHECK-NEXT:    retq ## encoding: [0xc3]
390  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
391  %tmp = load <4 x double>, ptr %j
392  %x = fadd <4 x double> %i, %tmp
393  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
394  ret <4 x double> %r
395}
396
397define <4 x double> @test_broadcast2_vaddpd_256(<4 x double> %i, ptr %j) nounwind {
398; CHECK-LABEL: test_broadcast2_vaddpd_256:
399; CHECK:       ## %bb.0:
400; CHECK-NEXT:    vaddpd (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0x58,0x07]
401; CHECK-NEXT:    retq ## encoding: [0xc3]
402  %tmp = load double, ptr %j
403  %b = insertelement <4 x double> undef, double %tmp, i32 0
404  %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
405  %x = fadd <4 x double> %c, %i
406  ret <4 x double> %x
407}
408
409define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i, ptr %j, <4 x i64> %mask1) nounwind {
410; CHECK-LABEL: test_mask_broadcast_vaddpd_256:
411; CHECK:       ## %bb.0:
412; CHECK-NEXT:    vmovapd %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
413; CHECK-NEXT:    vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca]
414; CHECK-NEXT:    vaddpd (%rdi){1to4}, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x39,0x58,0x07]
415; CHECK-NEXT:    retq ## encoding: [0xc3]
416  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
417  %tmp = load double, ptr %j
418  %b = insertelement <4 x double> undef, double %tmp, i32 0
419  %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
420  %x = fadd <4 x double> %c, %i
421  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %i
422  ret <4 x double> %r
423}
424
425define <4 x double> @test_maskz_broadcast_vaddpd_256(<4 x double> %i, ptr %j, <4 x i64> %mask1) nounwind {
426; CHECK-LABEL: test_maskz_broadcast_vaddpd_256:
427; CHECK:       ## %bb.0:
428; CHECK-NEXT:    vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
429; CHECK-NEXT:    vaddpd (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x58,0x07]
430; CHECK-NEXT:    retq ## encoding: [0xc3]
431  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
432  %tmp = load double, ptr %j
433  %b = insertelement <4 x double> undef, double %tmp, i32 0
434  %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
435  %x = fadd <4 x double> %c, %i
436  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
437  ret <4 x double> %r
438}
439
440; 128-bit
441
442define <2 x i64> @vpaddq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
443; CHECK-LABEL: vpaddq128_test:
444; CHECK:       ## %bb.0:
445; CHECK-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1]
446; CHECK-NEXT:    retq ## encoding: [0xc3]
447  %x = add <2 x i64> %i, %j
448  ret <2 x i64> %x
449}
450
451define <2 x i64> @vpaddq128_fold_test(<2 x i64> %i, ptr %j) nounwind {
452; CHECK-LABEL: vpaddq128_fold_test:
453; CHECK:       ## %bb.0:
454; CHECK-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0x07]
455; CHECK-NEXT:    retq ## encoding: [0xc3]
456  %tmp = load <2 x i64>, ptr %j, align 4
457  %x = add <2 x i64> %i, %tmp
458  ret <2 x i64> %x
459}
460
461define <2 x i64> @vpaddq128_broadcast2_test(<2 x i64> %i, ptr %j) nounwind {
462; CHECK-LABEL: vpaddq128_broadcast2_test:
463; CHECK:       ## %bb.0:
464; CHECK-NEXT:    vpaddq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xd4,0x07]
465; CHECK-NEXT:    retq ## encoding: [0xc3]
466  %tmp = load i64, ptr %j
467  %j.0 = insertelement <2 x i64> undef, i64 %tmp, i32 0
468  %j.1 = insertelement <2 x i64> %j.0, i64 %tmp, i32 1
469  %x = add <2 x i64> %i, %j.1
470  ret <2 x i64> %x
471}
472
473define <4 x i32> @vpaddd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
474; CHECK-LABEL: vpaddd128_test:
475; CHECK:       ## %bb.0:
476; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
477; CHECK-NEXT:    retq ## encoding: [0xc3]
478  %x = add <4 x i32> %i, %j
479  ret <4 x i32> %x
480}
481
482define <4 x i32> @vpaddd128_fold_test(<4 x i32> %i, ptr %j) nounwind {
483; CHECK-LABEL: vpaddd128_fold_test:
484; CHECK:       ## %bb.0:
485; CHECK-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0x07]
486; CHECK-NEXT:    retq ## encoding: [0xc3]
487  %tmp = load <4 x i32>, ptr %j, align 4
488  %x = add <4 x i32> %i, %tmp
489  ret <4 x i32> %x
490}
491
492define <4 x i32> @vpaddd128_broadcast_test(<4 x i32> %i) nounwind {
493; CHECK-LABEL: vpaddd128_broadcast_test:
494; CHECK:       ## %bb.0:
495; CHECK-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A]
496; CHECK-NEXT:    ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
497; CHECK-NEXT:    retq ## encoding: [0xc3]
498  %x = add <4 x i32> %i, <i32 6, i32 6, i32 6, i32 6>
499  ret <4 x i32> %x
500}
501
502define <4 x i32> @vpaddd128_mask_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone {
503; CHECK-LABEL: vpaddd128_mask_test:
504; CHECK:       ## %bb.0:
505; CHECK-NEXT:    vptestmd %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x08,0x27,0xca]
506; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xc1]
507; CHECK-NEXT:    retq ## encoding: [0xc3]
508  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
509  %x = add <4 x i32> %i, %j
510  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
511  ret <4 x i32> %r
512}
513
514define <4 x i32> @vpaddd128_maskz_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone {
515; CHECK-LABEL: vpaddd128_maskz_test:
516; CHECK:       ## %bb.0:
517; CHECK-NEXT:    vptestmd %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x08,0x27,0xca]
518; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1]
519; CHECK-NEXT:    retq ## encoding: [0xc3]
520  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
521  %x = add <4 x i32> %i, %j
522  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
523  ret <4 x i32> %r
524}
525
526define <4 x i32> @vpaddd128_mask_fold_test(<4 x i32> %i, ptr %j.ptr, <4 x i32> %mask1) nounwind readnone {
527; CHECK-LABEL: vpaddd128_mask_fold_test:
528; CHECK:       ## %bb.0:
529; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
530; CHECK-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x07]
531; CHECK-NEXT:    retq ## encoding: [0xc3]
532  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
533  %j = load <4 x i32>, ptr %j.ptr
534  %x = add <4 x i32> %i, %j
535  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
536  ret <4 x i32> %r
537}
538
539define <4 x i32> @vpaddd128_mask_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone {
540; CHECK-LABEL: vpaddd128_mask_broadcast_test:
541; CHECK:       ## %bb.0:
542; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
543; CHECK-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x05,A,A,A,A]
544; CHECK-NEXT:    ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
545; CHECK-NEXT:    retq ## encoding: [0xc3]
546  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
547  %x = add <4 x i32> %i, <i32 7, i32 7, i32 7, i32 7>
548  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
549  ret <4 x i32> %r
550}
551
552define <4 x i32> @vpaddd128_maskz_fold_test(<4 x i32> %i, ptr %j.ptr, <4 x i32> %mask1) nounwind readnone {
553; CHECK-LABEL: vpaddd128_maskz_fold_test:
554; CHECK:       ## %bb.0:
555; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
556; CHECK-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07]
557; CHECK-NEXT:    retq ## encoding: [0xc3]
558  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
559  %j = load <4 x i32>, ptr %j.ptr
560  %x = add <4 x i32> %i, %j
561  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
562  ret <4 x i32> %r
563}
564
565define <4 x i32> @vpaddd128_maskz_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone {
566; CHECK-LABEL: vpaddd128_maskz_broadcast_test:
567; CHECK:       ## %bb.0:
568; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
569; CHECK-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x05,A,A,A,A]
570; CHECK-NEXT:    ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
571; CHECK-NEXT:    retq ## encoding: [0xc3]
572  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
573  %x = add <4 x i32> %i, <i32 8, i32 8, i32 8, i32 8>
574  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
575  ret <4 x i32> %r
576}
577
578define <2 x i64> @vpsubq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
579; CHECK-LABEL: vpsubq128_test:
580; CHECK:       ## %bb.0:
581; CHECK-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
582; CHECK-NEXT:    retq ## encoding: [0xc3]
583  %x = sub <2 x i64> %i, %j
584  ret <2 x i64> %x
585}
586
587define <4 x i32> @vpsubd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
588; CHECK-LABEL: vpsubd128_test:
589; CHECK:       ## %bb.0:
590; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1]
591; CHECK-NEXT:    retq ## encoding: [0xc3]
592  %x = sub <4 x i32> %i, %j
593  ret <4 x i32> %x
594}
595
596define <4 x i32> @vpmulld128_test(<4 x i32> %i, <4 x i32> %j) {
597; CHECK-LABEL: vpmulld128_test:
598; CHECK:       ## %bb.0:
599; CHECK-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x40,0xc1]
600; CHECK-NEXT:    retq ## encoding: [0xc3]
601  %x = mul <4 x i32> %i, %j
602  ret <4 x i32> %x
603}
604
605define <2 x double> @test_vaddpd_128(<2 x double> %y, <2 x double> %x) {
606; CHECK-LABEL: test_vaddpd_128:
607; CHECK:       ## %bb.0: ## %entry
608; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
609; CHECK-NEXT:    retq ## encoding: [0xc3]
610entry:
611  %add.i = fadd <2 x double> %x, %y
612  ret <2 x double> %add.i
613}
614
615define <2 x double> @test_fold_vaddpd_128(<2 x double> %y) {
616; CHECK-LABEL: test_fold_vaddpd_128:
617; CHECK:       ## %bb.0: ## %entry
618; CHECK-NEXT:    vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0x05,A,A,A,A]
619; CHECK-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
620; CHECK-NEXT:    retq ## encoding: [0xc3]
621entry:
622  %add.i = fadd <2 x double> %y, <double 4.500000e+00, double 3.400000e+00>
623  ret <2 x double> %add.i
624}
625
626define <4 x float> @test_broadcast_vaddpd_128(<4 x float> %a) nounwind {
627; CHECK-LABEL: test_broadcast_vaddpd_128:
628; CHECK:       ## %bb.0:
629; CHECK-NEXT:    vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x58,0x05,A,A,A,A]
630; CHECK-NEXT:    ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
631; CHECK-NEXT:    retq ## encoding: [0xc3]
632  %b = fadd <4 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
633  ret <4 x float> %b
634}
635
636define <4 x float> @test_mask_vaddps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone {
637; CHECK-LABEL: test_mask_vaddps_128:
638; CHECK:       ## %bb.0:
639; CHECK-NEXT:    vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb]
640; CHECK-NEXT:    vaddps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x58,0xc2]
641; CHECK-NEXT:    retq ## encoding: [0xc3]
642  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
643  %x = fadd <4 x float> %i, %j
644  %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
645  ret <4 x float> %r
646}
647
648define <4 x float> @test_mask_vmulps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone {
649; CHECK-LABEL: test_mask_vmulps_128:
650; CHECK:       ## %bb.0:
651; CHECK-NEXT:    vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb]
652; CHECK-NEXT:    vmulps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x59,0xc2]
653; CHECK-NEXT:    retq ## encoding: [0xc3]
654  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
655  %x = fmul <4 x float> %i, %j
656  %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
657  ret <4 x float> %r
658}
659
660define <4 x float> @test_mask_vminps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone {
661; CHECK-LABEL: test_mask_vminps_128:
662; CHECK:       ## %bb.0:
663; CHECK-NEXT:    vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb]
664; CHECK-NEXT:    vminps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x5d,0xc2]
665; CHECK-NEXT:    retq ## encoding: [0xc3]
666  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
667  %cmp_res = fcmp olt <4 x float> %i, %j
668  %min = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j
669  %r = select <4 x i1> %mask, <4 x float> %min, <4 x float> %dst
670  ret <4 x float> %r
671}
672
673define <4 x float> @test_mask_vmaxps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone {
674; CHECK-LABEL: test_mask_vmaxps_128:
675; CHECK:       ## %bb.0:
676; CHECK-NEXT:    vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb]
677; CHECK-NEXT:    vmaxps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x5f,0xc2]
678; CHECK-NEXT:    retq ## encoding: [0xc3]
679  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
680  %cmp_res = fcmp ogt <4 x float> %i, %j
681  %max = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j
682  %r = select <4 x i1> %mask, <4 x float> %max, <4 x float> %dst
683  ret <4 x float> %r
684}
685
686define <4 x float> @test_mask_vsubps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone {
687; CHECK-LABEL: test_mask_vsubps_128:
688; CHECK:       ## %bb.0:
689; CHECK-NEXT:    vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb]
690; CHECK-NEXT:    vsubps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x5c,0xc2]
691; CHECK-NEXT:    retq ## encoding: [0xc3]
692  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
693  %x = fsub <4 x float> %i, %j
694  %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
695  ret <4 x float> %r
696}
697
698
699define <4 x float> @test_mask_vdivps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone {
700; CHECK-LABEL: test_mask_vdivps_128:
701; CHECK:       ## %bb.0:
702; CHECK-NEXT:    vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb]
703; CHECK-NEXT:    vdivps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x5e,0xc2]
704; CHECK-NEXT:    retq ## encoding: [0xc3]
705  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
706  %x = fdiv <4 x float> %i, %j
707  %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
708  ret <4 x float> %r
709}
710
711define <2 x double> @test_mask_vmulpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone {
712; CHECK-LABEL: test_mask_vmulpd_128:
713; CHECK:       ## %bb.0:
714; CHECK-NEXT:    vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb]
715; CHECK-NEXT:    vmulpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x59,0xc2]
716; CHECK-NEXT:    retq ## encoding: [0xc3]
717  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
718  %x = fmul <2 x double> %i, %j
719  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
720  ret <2 x double> %r
721}
722
723define <2 x double> @test_mask_vminpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone {
724; CHECK-LABEL: test_mask_vminpd_128:
725; CHECK:       ## %bb.0:
726; CHECK-NEXT:    vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb]
727; CHECK-NEXT:    vminpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x5d,0xc2]
728; CHECK-NEXT:    retq ## encoding: [0xc3]
729  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
730  %cmp_res = fcmp olt <2 x double> %i, %j
731  %min = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j
732  %r = select <2 x i1> %mask, <2 x double> %min, <2 x double> %dst
733  ret <2 x double> %r
734}
735
736define <2 x double> @test_mask_vmaxpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone {
737; CHECK-LABEL: test_mask_vmaxpd_128:
738; CHECK:       ## %bb.0:
739; CHECK-NEXT:    vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb]
740; CHECK-NEXT:    vmaxpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x5f,0xc2]
741; CHECK-NEXT:    retq ## encoding: [0xc3]
742  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
743  %cmp_res = fcmp ogt <2 x double> %i, %j
744  %max = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j
745  %r = select <2 x i1> %mask, <2 x double> %max, <2 x double> %dst
746  ret <2 x double> %r
747}
748
749define <2 x double> @test_mask_vsubpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone {
750; CHECK-LABEL: test_mask_vsubpd_128:
751; CHECK:       ## %bb.0:
752; CHECK-NEXT:    vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb]
753; CHECK-NEXT:    vsubpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x5c,0xc2]
754; CHECK-NEXT:    retq ## encoding: [0xc3]
755  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
756  %x = fsub <2 x double> %i, %j
757  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
758  ret <2 x double> %r
759}
760
761define <2 x double> @test_mask_vdivpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone {
762; CHECK-LABEL: test_mask_vdivpd_128:
763; CHECK:       ## %bb.0:
764; CHECK-NEXT:    vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb]
765; CHECK-NEXT:    vdivpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x5e,0xc2]
766; CHECK-NEXT:    retq ## encoding: [0xc3]
767  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
768  %x = fdiv <2 x double> %i, %j
769  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
770  ret <2 x double> %r
771}
772
773define <2 x double> @test_mask_vaddpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone {
774; CHECK-LABEL: test_mask_vaddpd_128:
775; CHECK:       ## %bb.0:
776; CHECK-NEXT:    vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb]
777; CHECK-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x58,0xc2]
778; CHECK-NEXT:    retq ## encoding: [0xc3]
779  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
780  %x = fadd <2 x double> %i, %j
781  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
782  ret <2 x double> %r
783}
784
785define <2 x double> @test_maskz_vaddpd_128(<2 x double> %i, <2 x double> %j,
786; CHECK-LABEL: test_maskz_vaddpd_128:
787; CHECK:       ## %bb.0:
788; CHECK-NEXT:    vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca]
789; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x58,0xc1]
790; CHECK-NEXT:    retq ## encoding: [0xc3]
791                                          <2 x i64> %mask1) nounwind readnone {
792  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
793  %x = fadd <2 x double> %i, %j
794  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
795  ret <2 x double> %r
796}
797
798define <2 x double> @test_mask_fold_vaddpd_128(<2 x double> %dst, <2 x double> %i, ptr %j,  <2 x i64> %mask1) nounwind {
799; CHECK-LABEL: test_mask_fold_vaddpd_128:
800; CHECK:       ## %bb.0:
801; CHECK-NEXT:    vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca]
802; CHECK-NEXT:    vaddpd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x58,0x07]
803; CHECK-NEXT:    retq ## encoding: [0xc3]
804  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
805  %tmp = load <2 x double>, ptr %j
806  %x = fadd <2 x double> %i, %tmp
807  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
808  ret <2 x double> %r
809}
810
811define <2 x double> @test_maskz_fold_vaddpd_128(<2 x double> %i, ptr %j, <2 x i64> %mask1) nounwind {
812; CHECK-LABEL: test_maskz_fold_vaddpd_128:
813; CHECK:       ## %bb.0:
814; CHECK-NEXT:    vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
815; CHECK-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x58,0x07]
816; CHECK-NEXT:    retq ## encoding: [0xc3]
817  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
818  %tmp = load <2 x double>, ptr %j
819  %x = fadd <2 x double> %i, %tmp
820  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
821  ret <2 x double> %r
822}
823
824define <2 x double> @test_broadcast2_vaddpd_128(<2 x double> %i, ptr %j) nounwind {
825; CHECK-LABEL: test_broadcast2_vaddpd_128:
826; CHECK:       ## %bb.0:
827; CHECK-NEXT:    vaddpd (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0x58,0x07]
828; CHECK-NEXT:    retq ## encoding: [0xc3]
829  %tmp = load double, ptr %j
830  %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
831  %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
832  %x = fadd <2 x double> %j.1, %i
833  ret <2 x double> %x
834}
835
836define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i, ptr %j, <2 x i64> %mask1) nounwind {
837; CHECK-LABEL: test_mask_broadcast_vaddpd_128:
838; CHECK:       ## %bb.0:
839; CHECK-NEXT:    vmovapd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
840; CHECK-NEXT:    vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca]
841; CHECK-NEXT:    vaddpd (%rdi){1to2}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x19,0x58,0x07]
842; CHECK-NEXT:    retq ## encoding: [0xc3]
843  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
844  %tmp = load double, ptr %j
845  %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
846  %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
847  %x = fadd <2 x double> %j.1, %i
848  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %i
849  ret <2 x double> %r
850}
851
852define <2 x double> @test_maskz_broadcast_vaddpd_128(<2 x double> %i, ptr %j, <2 x i64> %mask1) nounwind {
853; CHECK-LABEL: test_maskz_broadcast_vaddpd_128:
854; CHECK:       ## %bb.0:
855; CHECK-NEXT:    vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
856; CHECK-NEXT:    vaddpd (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x58,0x07]
857; CHECK-NEXT:    retq ## encoding: [0xc3]
858  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
859  %tmp = load double, ptr %j
860  %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
861  %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
862  %x = fadd <2 x double> %j.1, %i
863  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
864  ret <2 x double> %r
865}
866