xref: /llvm-project/llvm/test/CodeGen/X86/avx512vl-mov.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
3
4define <8 x i32> @test_256_1(ptr %addr) {
5; CHECK-LABEL: test_256_1:
6; CHECK:       ## %bb.0:
7; CHECK-NEXT:    vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
8; CHECK-NEXT:    retq ## encoding: [0xc3]
9  %res = load <8 x i32>, ptr %addr, align 1
10  ret <8 x i32>%res
11}
12
13define <8 x i32> @test_256_2(ptr %addr) {
14; CHECK-LABEL: test_256_2:
15; CHECK:       ## %bb.0:
16; CHECK-NEXT:    vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
17; CHECK-NEXT:    retq ## encoding: [0xc3]
18  %res = load <8 x i32>, ptr %addr, align 32
19  ret <8 x i32>%res
20}
21
22define void @test_256_3(ptr %addr, <4 x i64> %data) {
23; CHECK-LABEL: test_256_3:
24; CHECK:       ## %bb.0:
25; CHECK-NEXT:    vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
26; CHECK-NEXT:    retq ## encoding: [0xc3]
27  store <4 x i64>%data, ptr %addr, align 32
28  ret void
29}
30
31define void @test_256_4(ptr %addr, <8 x i32> %data) {
32; CHECK-LABEL: test_256_4:
33; CHECK:       ## %bb.0:
34; CHECK-NEXT:    vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
35; CHECK-NEXT:    retq ## encoding: [0xc3]
36  store <8 x i32>%data, ptr %addr, align 1
37  ret void
38}
39
40define void @test_256_5(ptr %addr, <8 x i32> %data) {
41; CHECK-LABEL: test_256_5:
42; CHECK:       ## %bb.0:
43; CHECK-NEXT:    vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
44; CHECK-NEXT:    retq ## encoding: [0xc3]
45  store <8 x i32>%data, ptr %addr, align 32
46  ret void
47}
48
49define  <4 x i64> @test_256_6(ptr %addr) {
50; CHECK-LABEL: test_256_6:
51; CHECK:       ## %bb.0:
52; CHECK-NEXT:    vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
53; CHECK-NEXT:    retq ## encoding: [0xc3]
54  %res = load <4 x i64>, ptr %addr, align 32
55  ret <4 x i64>%res
56}
57
58define void @test_256_7(ptr %addr, <4 x i64> %data) {
59; CHECK-LABEL: test_256_7:
60; CHECK:       ## %bb.0:
61; CHECK-NEXT:    vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
62; CHECK-NEXT:    retq ## encoding: [0xc3]
63  store <4 x i64>%data, ptr %addr, align 1
64  ret void
65}
66
67define <4 x i64> @test_256_8(ptr %addr) {
68; CHECK-LABEL: test_256_8:
69; CHECK:       ## %bb.0:
70; CHECK-NEXT:    vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
71; CHECK-NEXT:    retq ## encoding: [0xc3]
72  %res = load <4 x i64>, ptr %addr, align 1
73  ret <4 x i64>%res
74}
75
76define void @test_256_9(ptr %addr, <4 x double> %data) {
77; CHECK-LABEL: test_256_9:
78; CHECK:       ## %bb.0:
79; CHECK-NEXT:    vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
80; CHECK-NEXT:    retq ## encoding: [0xc3]
81  store <4 x double>%data, ptr %addr, align 32
82  ret void
83}
84
85define <4 x double> @test_256_10(ptr %addr) {
86; CHECK-LABEL: test_256_10:
87; CHECK:       ## %bb.0:
88; CHECK-NEXT:    vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
89; CHECK-NEXT:    retq ## encoding: [0xc3]
90  %res = load <4 x double>, ptr %addr, align 32
91  ret <4 x double>%res
92}
93
94define void @test_256_11(ptr %addr, <8 x float> %data) {
95; CHECK-LABEL: test_256_11:
96; CHECK:       ## %bb.0:
97; CHECK-NEXT:    vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
98; CHECK-NEXT:    retq ## encoding: [0xc3]
99  store <8 x float>%data, ptr %addr, align 32
100  ret void
101}
102
103define <8 x float> @test_256_12(ptr %addr) {
104; CHECK-LABEL: test_256_12:
105; CHECK:       ## %bb.0:
106; CHECK-NEXT:    vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
107; CHECK-NEXT:    retq ## encoding: [0xc3]
108  %res = load <8 x float>, ptr %addr, align 32
109  ret <8 x float>%res
110}
111
112define void @test_256_13(ptr %addr, <4 x double> %data) {
113; CHECK-LABEL: test_256_13:
114; CHECK:       ## %bb.0:
115; CHECK-NEXT:    vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
116; CHECK-NEXT:    retq ## encoding: [0xc3]
117  store <4 x double>%data, ptr %addr, align 1
118  ret void
119}
120
121define <4 x double> @test_256_14(ptr %addr) {
122; CHECK-LABEL: test_256_14:
123; CHECK:       ## %bb.0:
124; CHECK-NEXT:    vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
125; CHECK-NEXT:    retq ## encoding: [0xc3]
126  %res = load <4 x double>, ptr %addr, align 1
127  ret <4 x double>%res
128}
129
130define void @test_256_15(ptr %addr, <8 x float> %data) {
131; CHECK-LABEL: test_256_15:
132; CHECK:       ## %bb.0:
133; CHECK-NEXT:    vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
134; CHECK-NEXT:    retq ## encoding: [0xc3]
135  store <8 x float>%data, ptr %addr, align 1
136  ret void
137}
138
139define <8 x float> @test_256_16(ptr %addr) {
140; CHECK-LABEL: test_256_16:
141; CHECK:       ## %bb.0:
142; CHECK-NEXT:    vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
143; CHECK-NEXT:    retq ## encoding: [0xc3]
144  %res = load <8 x float>, ptr %addr, align 1
145  ret <8 x float>%res
146}
147
148define <8 x i32> @test_256_17(ptr %addr, <8 x i32> %old, <8 x i32> %mask1) {
149; CHECK-LABEL: test_256_17:
150; CHECK:       ## %bb.0:
151; CHECK-NEXT:    vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9]
152; CHECK-NEXT:    vmovdqa32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07]
153; CHECK-NEXT:    retq ## encoding: [0xc3]
154  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
155  %r = load <8 x i32>, ptr %addr, align 32
156  %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
157  ret <8 x i32>%res
158}
159
160define <8 x i32> @test_256_18(ptr %addr, <8 x i32> %old, <8 x i32> %mask1) {
161; CHECK-LABEL: test_256_18:
162; CHECK:       ## %bb.0:
163; CHECK-NEXT:    vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9]
164; CHECK-NEXT:    vmovdqu32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x07]
165; CHECK-NEXT:    retq ## encoding: [0xc3]
166  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
167  %r = load <8 x i32>, ptr %addr, align 1
168  %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
169  ret <8 x i32>%res
170}
171
172define <8 x i32> @test_256_19(ptr %addr, <8 x i32> %mask1) {
173; CHECK-LABEL: test_256_19:
174; CHECK:       ## %bb.0:
175; CHECK-NEXT:    vptestmd %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc8]
176; CHECK-NEXT:    vmovdqa32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x07]
177; CHECK-NEXT:    retq ## encoding: [0xc3]
178  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
179  %r = load <8 x i32>, ptr %addr, align 32
180  %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
181  ret <8 x i32>%res
182}
183
184define <8 x i32> @test_256_20(ptr %addr, <8 x i32> %mask1) {
185; CHECK-LABEL: test_256_20:
186; CHECK:       ## %bb.0:
187; CHECK-NEXT:    vptestmd %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc8]
188; CHECK-NEXT:    vmovdqu32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x07]
189; CHECK-NEXT:    retq ## encoding: [0xc3]
190  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
191  %r = load <8 x i32>, ptr %addr, align 1
192  %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
193  ret <8 x i32>%res
194}
195
196define <4 x i64> @test_256_21(ptr %addr, <4 x i64> %old, <4 x i64> %mask1) {
197; CHECK-LABEL: test_256_21:
198; CHECK:       ## %bb.0:
199; CHECK-NEXT:    vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
200; CHECK-NEXT:    vmovdqa64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07]
201; CHECK-NEXT:    retq ## encoding: [0xc3]
202  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
203  %r = load <4 x i64>, ptr %addr, align 32
204  %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
205  ret <4 x i64>%res
206}
207
208define <4 x i64> @test_256_22(ptr %addr, <4 x i64> %old, <4 x i64> %mask1) {
209; CHECK-LABEL: test_256_22:
210; CHECK:       ## %bb.0:
211; CHECK-NEXT:    vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
212; CHECK-NEXT:    vmovdqu64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x07]
213; CHECK-NEXT:    retq ## encoding: [0xc3]
214  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
215  %r = load <4 x i64>, ptr %addr, align 1
216  %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
217  ret <4 x i64>%res
218}
219
220define <4 x i64> @test_256_23(ptr %addr, <4 x i64> %mask1) {
221; CHECK-LABEL: test_256_23:
222; CHECK:       ## %bb.0:
223; CHECK-NEXT:    vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8]
224; CHECK-NEXT:    vmovdqa64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x07]
225; CHECK-NEXT:    retq ## encoding: [0xc3]
226  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
227  %r = load <4 x i64>, ptr %addr, align 32
228  %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
229  ret <4 x i64>%res
230}
231
232define <4 x i64> @test_256_24(ptr %addr, <4 x i64> %mask1) {
233; CHECK-LABEL: test_256_24:
234; CHECK:       ## %bb.0:
235; CHECK-NEXT:    vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8]
236; CHECK-NEXT:    vmovdqu64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x07]
237; CHECK-NEXT:    retq ## encoding: [0xc3]
238  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
239  %r = load <4 x i64>, ptr %addr, align 1
240  %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
241  ret <4 x i64>%res
242}
243
244define <8 x float> @test_256_25(ptr %addr, <8 x float> %old, <8 x float> %mask1) {
245; CHECK-LABEL: test_256_25:
246; CHECK:       ## %bb.0:
247; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
248; CHECK-NEXT:    vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c]
249; CHECK-NEXT:    vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07]
250; CHECK-NEXT:    retq ## encoding: [0xc3]
251  %mask = fcmp one <8 x float> %mask1, zeroinitializer
252  %r = load <8 x float>, ptr %addr, align 32
253  %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
254  ret <8 x float>%res
255}
256
257define <8 x float> @test_256_26(ptr %addr, <8 x float> %old, <8 x float> %mask1) {
258; CHECK-LABEL: test_256_26:
259; CHECK:       ## %bb.0:
260; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
261; CHECK-NEXT:    vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c]
262; CHECK-NEXT:    vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07]
263; CHECK-NEXT:    retq ## encoding: [0xc3]
264  %mask = fcmp one <8 x float> %mask1, zeroinitializer
265  %r = load <8 x float>, ptr %addr, align 1
266  %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
267  ret <8 x float>%res
268}
269
270define <8 x float> @test_256_27(ptr %addr, <8 x float> %mask1) {
271; CHECK-LABEL: test_256_27:
272; CHECK:       ## %bb.0:
273; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
274; CHECK-NEXT:    vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c]
275; CHECK-NEXT:    vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07]
276; CHECK-NEXT:    retq ## encoding: [0xc3]
277  %mask = fcmp one <8 x float> %mask1, zeroinitializer
278  %r = load <8 x float>, ptr %addr, align 32
279  %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
280  ret <8 x float>%res
281}
282
283define <8 x float> @test_256_28(ptr %addr, <8 x float> %mask1) {
284; CHECK-LABEL: test_256_28:
285; CHECK:       ## %bb.0:
286; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
287; CHECK-NEXT:    vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c]
288; CHECK-NEXT:    vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07]
289; CHECK-NEXT:    retq ## encoding: [0xc3]
290  %mask = fcmp one <8 x float> %mask1, zeroinitializer
291  %r = load <8 x float>, ptr %addr, align 1
292  %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
293  ret <8 x float>%res
294}
295
296define <4 x double> @test_256_29(ptr %addr, <4 x double> %old, <4 x i64> %mask1) {
297; CHECK-LABEL: test_256_29:
298; CHECK:       ## %bb.0:
299; CHECK-NEXT:    vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
300; CHECK-NEXT:    vmovapd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07]
301; CHECK-NEXT:    retq ## encoding: [0xc3]
302  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
303  %r = load <4 x double>, ptr %addr, align 32
304  %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
305  ret <4 x double>%res
306}
307
308define <4 x double> @test_256_30(ptr %addr, <4 x double> %old, <4 x i64> %mask1) {
309; CHECK-LABEL: test_256_30:
310; CHECK:       ## %bb.0:
311; CHECK-NEXT:    vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
312; CHECK-NEXT:    vmovupd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07]
313; CHECK-NEXT:    retq ## encoding: [0xc3]
314  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
315  %r = load <4 x double>, ptr %addr, align 1
316  %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
317  ret <4 x double>%res
318}
319
320define <4 x double> @test_256_31(ptr %addr, <4 x i64> %mask1) {
321; CHECK-LABEL: test_256_31:
322; CHECK:       ## %bb.0:
323; CHECK-NEXT:    vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8]
324; CHECK-NEXT:    vmovapd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x07]
325; CHECK-NEXT:    retq ## encoding: [0xc3]
326  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
327  %r = load <4 x double>, ptr %addr, align 32
328  %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
329  ret <4 x double>%res
330}
331
332define <4 x double> @test_256_32(ptr %addr, <4 x i64> %mask1) {
333; CHECK-LABEL: test_256_32:
334; CHECK:       ## %bb.0:
335; CHECK-NEXT:    vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8]
336; CHECK-NEXT:    vmovupd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x07]
337; CHECK-NEXT:    retq ## encoding: [0xc3]
338  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
339  %r = load <4 x double>, ptr %addr, align 1
340  %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
341  ret <4 x double>%res
342}
343
344define <4 x i32> @test_128_1(ptr %addr) {
345; CHECK-LABEL: test_128_1:
346; CHECK:       ## %bb.0:
347; CHECK-NEXT:    vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
348; CHECK-NEXT:    retq ## encoding: [0xc3]
349  %res = load <4 x i32>, ptr %addr, align 1
350  ret <4 x i32>%res
351}
352
353define <4 x i32> @test_128_2(ptr %addr) {
354; CHECK-LABEL: test_128_2:
355; CHECK:       ## %bb.0:
356; CHECK-NEXT:    vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
357; CHECK-NEXT:    retq ## encoding: [0xc3]
358  %res = load <4 x i32>, ptr %addr, align 16
359  ret <4 x i32>%res
360}
361
362define void @test_128_3(ptr %addr, <2 x i64> %data) {
363; CHECK-LABEL: test_128_3:
364; CHECK:       ## %bb.0:
365; CHECK-NEXT:    vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
366; CHECK-NEXT:    retq ## encoding: [0xc3]
367  store <2 x i64>%data, ptr %addr, align 16
368  ret void
369}
370
371define void @test_128_4(ptr %addr, <4 x i32> %data) {
372; CHECK-LABEL: test_128_4:
373; CHECK:       ## %bb.0:
374; CHECK-NEXT:    vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
375; CHECK-NEXT:    retq ## encoding: [0xc3]
376  store <4 x i32>%data, ptr %addr, align 1
377  ret void
378}
379
380define void @test_128_5(ptr %addr, <4 x i32> %data) {
381; CHECK-LABEL: test_128_5:
382; CHECK:       ## %bb.0:
383; CHECK-NEXT:    vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
384; CHECK-NEXT:    retq ## encoding: [0xc3]
385  store <4 x i32>%data, ptr %addr, align 16
386  ret void
387}
388
389define  <2 x i64> @test_128_6(ptr %addr) {
390; CHECK-LABEL: test_128_6:
391; CHECK:       ## %bb.0:
392; CHECK-NEXT:    vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
393; CHECK-NEXT:    retq ## encoding: [0xc3]
394  %res = load <2 x i64>, ptr %addr, align 16
395  ret <2 x i64>%res
396}
397
398define void @test_128_7(ptr %addr, <2 x i64> %data) {
399; CHECK-LABEL: test_128_7:
400; CHECK:       ## %bb.0:
401; CHECK-NEXT:    vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
402; CHECK-NEXT:    retq ## encoding: [0xc3]
403  store <2 x i64>%data, ptr %addr, align 1
404  ret void
405}
406
407define <2 x i64> @test_128_8(ptr %addr) {
408; CHECK-LABEL: test_128_8:
409; CHECK:       ## %bb.0:
410; CHECK-NEXT:    vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
411; CHECK-NEXT:    retq ## encoding: [0xc3]
412  %res = load <2 x i64>, ptr %addr, align 1
413  ret <2 x i64>%res
414}
415
416define void @test_128_9(ptr %addr, <2 x double> %data) {
417; CHECK-LABEL: test_128_9:
418; CHECK:       ## %bb.0:
419; CHECK-NEXT:    vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
420; CHECK-NEXT:    retq ## encoding: [0xc3]
421  store <2 x double>%data, ptr %addr, align 16
422  ret void
423}
424
425define <2 x double> @test_128_10(ptr %addr) {
426; CHECK-LABEL: test_128_10:
427; CHECK:       ## %bb.0:
428; CHECK-NEXT:    vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
429; CHECK-NEXT:    retq ## encoding: [0xc3]
430  %res = load <2 x double>, ptr %addr, align 16
431  ret <2 x double>%res
432}
433
434define void @test_128_11(ptr %addr, <4 x float> %data) {
435; CHECK-LABEL: test_128_11:
436; CHECK:       ## %bb.0:
437; CHECK-NEXT:    vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
438; CHECK-NEXT:    retq ## encoding: [0xc3]
439  store <4 x float>%data, ptr %addr, align 16
440  ret void
441}
442
443define <4 x float> @test_128_12(ptr %addr) {
444; CHECK-LABEL: test_128_12:
445; CHECK:       ## %bb.0:
446; CHECK-NEXT:    vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
447; CHECK-NEXT:    retq ## encoding: [0xc3]
448  %res = load <4 x float>, ptr %addr, align 16
449  ret <4 x float>%res
450}
451
452define void @test_128_13(ptr %addr, <2 x double> %data) {
453; CHECK-LABEL: test_128_13:
454; CHECK:       ## %bb.0:
455; CHECK-NEXT:    vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
456; CHECK-NEXT:    retq ## encoding: [0xc3]
457  store <2 x double>%data, ptr %addr, align 1
458  ret void
459}
460
461define <2 x double> @test_128_14(ptr %addr) {
462; CHECK-LABEL: test_128_14:
463; CHECK:       ## %bb.0:
464; CHECK-NEXT:    vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
465; CHECK-NEXT:    retq ## encoding: [0xc3]
466  %res = load <2 x double>, ptr %addr, align 1
467  ret <2 x double>%res
468}
469
470define void @test_128_15(ptr %addr, <4 x float> %data) {
471; CHECK-LABEL: test_128_15:
472; CHECK:       ## %bb.0:
473; CHECK-NEXT:    vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
474; CHECK-NEXT:    retq ## encoding: [0xc3]
475  store <4 x float>%data, ptr %addr, align 1
476  ret void
477}
478
479define <4 x float> @test_128_16(ptr %addr) {
480; CHECK-LABEL: test_128_16:
481; CHECK:       ## %bb.0:
482; CHECK-NEXT:    vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
483; CHECK-NEXT:    retq ## encoding: [0xc3]
484  %res = load <4 x float>, ptr %addr, align 1
485  ret <4 x float>%res
486}
487
488define <4 x i32> @test_128_17(ptr %addr, <4 x i32> %old, <4 x i32> %mask1) {
489; CHECK-LABEL: test_128_17:
490; CHECK:       ## %bb.0:
491; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
492; CHECK-NEXT:    vmovdqa32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07]
493; CHECK-NEXT:    retq ## encoding: [0xc3]
494  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
495  %r = load <4 x i32>, ptr %addr, align 16
496  %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
497  ret <4 x i32>%res
498}
499
500define <4 x i32> @test_128_18(ptr %addr, <4 x i32> %old, <4 x i32> %mask1) {
501; CHECK-LABEL: test_128_18:
502; CHECK:       ## %bb.0:
503; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
504; CHECK-NEXT:    vmovdqu32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x07]
505; CHECK-NEXT:    retq ## encoding: [0xc3]
506  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
507  %r = load <4 x i32>, ptr %addr, align 1
508  %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
509  ret <4 x i32>%res
510}
511
512define <4 x i32> @test_128_19(ptr %addr, <4 x i32> %mask1) {
513; CHECK-LABEL: test_128_19:
514; CHECK:       ## %bb.0:
515; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8]
516; CHECK-NEXT:    vmovdqa32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x07]
517; CHECK-NEXT:    retq ## encoding: [0xc3]
518  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
519  %r = load <4 x i32>, ptr %addr, align 16
520  %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
521  ret <4 x i32>%res
522}
523
524define <4 x i32> @test_128_20(ptr %addr, <4 x i32> %mask1) {
525; CHECK-LABEL: test_128_20:
526; CHECK:       ## %bb.0:
527; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8]
528; CHECK-NEXT:    vmovdqu32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x07]
529; CHECK-NEXT:    retq ## encoding: [0xc3]
530  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
531  %r = load <4 x i32>, ptr %addr, align 1
532  %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
533  ret <4 x i32>%res
534}
535
536define <2 x i64> @test_128_21(ptr %addr, <2 x i64> %old, <2 x i64> %mask1) {
537; CHECK-LABEL: test_128_21:
538; CHECK:       ## %bb.0:
539; CHECK-NEXT:    vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
540; CHECK-NEXT:    vmovdqa64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07]
541; CHECK-NEXT:    retq ## encoding: [0xc3]
542  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
543  %r = load <2 x i64>, ptr %addr, align 16
544  %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
545  ret <2 x i64>%res
546}
547
548define <2 x i64> @test_128_22(ptr %addr, <2 x i64> %old, <2 x i64> %mask1) {
549; CHECK-LABEL: test_128_22:
550; CHECK:       ## %bb.0:
551; CHECK-NEXT:    vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
552; CHECK-NEXT:    vmovdqu64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x07]
553; CHECK-NEXT:    retq ## encoding: [0xc3]
554  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
555  %r = load <2 x i64>, ptr %addr, align 1
556  %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
557  ret <2 x i64>%res
558}
559
560define <2 x i64> @test_128_23(ptr %addr, <2 x i64> %mask1) {
561; CHECK-LABEL: test_128_23:
562; CHECK:       ## %bb.0:
563; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
564; CHECK-NEXT:    vmovdqa64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x07]
565; CHECK-NEXT:    retq ## encoding: [0xc3]
566  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
567  %r = load <2 x i64>, ptr %addr, align 16
568  %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
569  ret <2 x i64>%res
570}
571
572define <2 x i64> @test_128_24(ptr %addr, <2 x i64> %mask1) {
573; CHECK-LABEL: test_128_24:
574; CHECK:       ## %bb.0:
575; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
576; CHECK-NEXT:    vmovdqu64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x07]
577; CHECK-NEXT:    retq ## encoding: [0xc3]
578  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
579  %r = load <2 x i64>, ptr %addr, align 1
580  %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
581  ret <2 x i64>%res
582}
583
584define <4 x float> @test_128_25(ptr %addr, <4 x float> %old, <4 x i32> %mask1) {
585; CHECK-LABEL: test_128_25:
586; CHECK:       ## %bb.0:
587; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
588; CHECK-NEXT:    vmovaps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07]
589; CHECK-NEXT:    retq ## encoding: [0xc3]
590  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
591  %r = load <4 x float>, ptr %addr, align 16
592  %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
593  ret <4 x float>%res
594}
595
596define <4 x float> @test_128_26(ptr %addr, <4 x float> %old, <4 x i32> %mask1) {
597; CHECK-LABEL: test_128_26:
598; CHECK:       ## %bb.0:
599; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
600; CHECK-NEXT:    vmovups (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07]
601; CHECK-NEXT:    retq ## encoding: [0xc3]
602  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
603  %r = load <4 x float>, ptr %addr, align 1
604  %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
605  ret <4 x float>%res
606}
607
608define <4 x float> @test_128_27(ptr %addr, <4 x i32> %mask1) {
609; CHECK-LABEL: test_128_27:
610; CHECK:       ## %bb.0:
611; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8]
612; CHECK-NEXT:    vmovaps (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x07]
613; CHECK-NEXT:    retq ## encoding: [0xc3]
614  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
615  %r = load <4 x float>, ptr %addr, align 16
616  %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
617  ret <4 x float>%res
618}
619
620define <4 x float> @test_128_28(ptr %addr, <4 x i32> %mask1) {
621; CHECK-LABEL: test_128_28:
622; CHECK:       ## %bb.0:
623; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8]
624; CHECK-NEXT:    vmovups (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x07]
625; CHECK-NEXT:    retq ## encoding: [0xc3]
626  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
627  %r = load <4 x float>, ptr %addr, align 1
628  %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
629  ret <4 x float>%res
630}
631
632define <2 x double> @test_128_29(ptr %addr, <2 x double> %old, <2 x i64> %mask1) {
633; CHECK-LABEL: test_128_29:
634; CHECK:       ## %bb.0:
635; CHECK-NEXT:    vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
636; CHECK-NEXT:    vmovapd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07]
637; CHECK-NEXT:    retq ## encoding: [0xc3]
638  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
639  %r = load <2 x double>, ptr %addr, align 16
640  %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
641  ret <2 x double>%res
642}
643
644define <2 x double> @test_128_30(ptr %addr, <2 x double> %old, <2 x i64> %mask1) {
645; CHECK-LABEL: test_128_30:
646; CHECK:       ## %bb.0:
647; CHECK-NEXT:    vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
648; CHECK-NEXT:    vmovupd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07]
649; CHECK-NEXT:    retq ## encoding: [0xc3]
650  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
651  %r = load <2 x double>, ptr %addr, align 1
652  %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
653  ret <2 x double>%res
654}
655
656define <2 x double> @test_128_31(ptr %addr, <2 x i64> %mask1) {
657; CHECK-LABEL: test_128_31:
658; CHECK:       ## %bb.0:
659; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
660; CHECK-NEXT:    vmovapd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x07]
661; CHECK-NEXT:    retq ## encoding: [0xc3]
662  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
663  %r = load <2 x double>, ptr %addr, align 16
664  %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
665  ret <2 x double>%res
666}
667
668define <2 x double> @test_128_32(ptr %addr, <2 x i64> %mask1) {
669; CHECK-LABEL: test_128_32:
670; CHECK:       ## %bb.0:
671; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
672; CHECK-NEXT:    vmovupd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x07]
673; CHECK-NEXT:    retq ## encoding: [0xc3]
674  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
675  %r = load <2 x double>, ptr %addr, align 1
676  %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
677  ret <2 x double>%res
678}
679
680