xref: /llvm-project/llvm/test/CodeGen/X86/perm.avx2-false-deps.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mcpu=alderlake -mattr=+false-deps-perm -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=ENABLE,ENABLE-ADL
3; RUN: llc -verify-machineinstrs -mcpu=sapphirerapids -mattr=+false-deps-perm -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=ENABLE,ENABLE-SPR
4; RUN: llc -verify-machineinstrs -mcpu=alderlake -mattr=-false-deps-perm -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=DISABLE,DISABLE-ADL
5; RUN: llc -verify-machineinstrs -mcpu=sapphirerapids -mattr=-false-deps-perm -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefixes=DISABLE,DISABLE-SPR
6
7define <8 x i32> @permd(<8 x i32> %a0, <8 x i32> %a1) {
8; ENABLE-ADL-LABEL: permd:
9; ENABLE-ADL:       # %bb.0:
10; ENABLE-ADL-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
11; ENABLE-ADL-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
12; ENABLE-ADL-NEXT:    #APP
13; ENABLE-ADL-NEXT:    nop
14; ENABLE-ADL-NEXT:    #NO_APP
15; ENABLE-ADL-NEXT:    vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
16; ENABLE-ADL-NEXT:    vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
17; ENABLE-ADL-NEXT:    vxorps %xmm0, %xmm0, %xmm0
18; ENABLE-ADL-NEXT:    vpermd %ymm2, %ymm1, %ymm0
19; ENABLE-ADL-NEXT:    vpaddd %ymm1, %ymm2, %ymm1
20; ENABLE-ADL-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
21; ENABLE-ADL-NEXT:    retq
22;
23; ENABLE-SPR-LABEL: permd:
24; ENABLE-SPR:       # %bb.0:
25; ENABLE-SPR-NEXT:    vmovdqa64 %ymm1, %ymm16
26; ENABLE-SPR-NEXT:    vmovdqa64 %ymm0, %ymm17
27; ENABLE-SPR-NEXT:    #APP
28; ENABLE-SPR-NEXT:    nop
29; ENABLE-SPR-NEXT:    #NO_APP
30; ENABLE-SPR-NEXT:    vxorps %xmm0, %xmm0, %xmm0
31; ENABLE-SPR-NEXT:    vpermd %ymm17, %ymm16, %ymm0
32; ENABLE-SPR-NEXT:    vpaddd %ymm16, %ymm17, %ymm1
33; ENABLE-SPR-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
34; ENABLE-SPR-NEXT:    retq
35;
36; DISABLE-ADL-LABEL: permd:
37; DISABLE-ADL:       # %bb.0:
38; DISABLE-ADL-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
39; DISABLE-ADL-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
40; DISABLE-ADL-NEXT:    #APP
41; DISABLE-ADL-NEXT:    nop
42; DISABLE-ADL-NEXT:    #NO_APP
43; DISABLE-ADL-NEXT:    vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
44; DISABLE-ADL-NEXT:    vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
45; DISABLE-ADL-NEXT:    vpermd %ymm2, %ymm1, %ymm0
46; DISABLE-ADL-NEXT:    vpaddd %ymm1, %ymm2, %ymm1
47; DISABLE-ADL-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
48; DISABLE-ADL-NEXT:    retq
49;
50; DISABLE-SPR-LABEL: permd:
51; DISABLE-SPR:       # %bb.0:
52; DISABLE-SPR-NEXT:    vmovdqa64 %ymm1, %ymm16
53; DISABLE-SPR-NEXT:    vmovdqa64 %ymm0, %ymm17
54; DISABLE-SPR-NEXT:    #APP
55; DISABLE-SPR-NEXT:    nop
56; DISABLE-SPR-NEXT:    #NO_APP
57; DISABLE-SPR-NEXT:    vpermd %ymm17, %ymm16, %ymm0
58; DISABLE-SPR-NEXT:    vpaddd %ymm16, %ymm17, %ymm1
59; DISABLE-SPR-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
60; DISABLE-SPR-NEXT:    retq
61  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
62  %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1)
63  %3 = add <8 x i32> %a0, %a1
64  %res = add <8 x i32> %2, %3
65  ret <8 x i32> %res
66}
67
68define <8 x i32> @permd_mem(ptr %p0, <8 x i32> %a1) {
69; ENABLE-ADL-LABEL: permd_mem:
70; ENABLE-ADL:       # %bb.0:
71; ENABLE-ADL-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
72; ENABLE-ADL-NEXT:    #APP
73; ENABLE-ADL-NEXT:    nop
74; ENABLE-ADL-NEXT:    #NO_APP
75; ENABLE-ADL-NEXT:    vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
76; ENABLE-ADL-NEXT:    vxorps %xmm0, %xmm0, %xmm0
77; ENABLE-ADL-NEXT:    vpermd (%rdi), %ymm1, %ymm0
78; ENABLE-ADL-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
79; ENABLE-ADL-NEXT:    retq
80;
81; ENABLE-SPR-LABEL: permd_mem:
82; ENABLE-SPR:       # %bb.0:
83; ENABLE-SPR-NEXT:    vmovdqa64 %ymm0, %ymm16
84; ENABLE-SPR-NEXT:    #APP
85; ENABLE-SPR-NEXT:    nop
86; ENABLE-SPR-NEXT:    #NO_APP
87; ENABLE-SPR-NEXT:    vxorps %xmm0, %xmm0, %xmm0
88; ENABLE-SPR-NEXT:    vpermd (%rdi), %ymm16, %ymm0
89; ENABLE-SPR-NEXT:    vpaddd %ymm16, %ymm0, %ymm0
90; ENABLE-SPR-NEXT:    retq
91;
92; DISABLE-ADL-LABEL: permd_mem:
93; DISABLE-ADL:       # %bb.0:
94; DISABLE-ADL-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
95; DISABLE-ADL-NEXT:    #APP
96; DISABLE-ADL-NEXT:    nop
97; DISABLE-ADL-NEXT:    #NO_APP
98; DISABLE-ADL-NEXT:    vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
99; DISABLE-ADL-NEXT:    vpermd (%rdi), %ymm1, %ymm0
100; DISABLE-ADL-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
101; DISABLE-ADL-NEXT:    retq
102;
103; DISABLE-SPR-LABEL: permd_mem:
104; DISABLE-SPR:       # %bb.0:
105; DISABLE-SPR-NEXT:    vmovdqa64 %ymm0, %ymm16
106; DISABLE-SPR-NEXT:    #APP
107; DISABLE-SPR-NEXT:    nop
108; DISABLE-SPR-NEXT:    #NO_APP
109; DISABLE-SPR-NEXT:    vpermd (%rdi), %ymm16, %ymm0
110; DISABLE-SPR-NEXT:    vpaddd %ymm16, %ymm0, %ymm0
111; DISABLE-SPR-NEXT:    retq
112  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
113  %a0 = load <8 x i32>, ptr %p0, align 64
114  %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1)
115  %res = add <8 x i32> %2, %a1
116  ret <8 x i32> %res
117}
118
119declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
120
121define <4 x i64> @permq(<4 x i64> %a0) {
122; ENABLE-LABEL: permq:
123; ENABLE:       # %bb.0:
124; ENABLE-NEXT:    #APP
125; ENABLE-NEXT:    nop
126; ENABLE-NEXT:    #NO_APP
127; ENABLE-NEXT:    vxorps %xmm1, %xmm1, %xmm1
128; ENABLE-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,2,1,0]
129; ENABLE-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
130; ENABLE-NEXT:    retq
131;
132; DISABLE-LABEL: permq:
133; DISABLE:       # %bb.0:
134; DISABLE-NEXT:    #APP
135; DISABLE-NEXT:    nop
136; DISABLE-NEXT:    #NO_APP
137; DISABLE-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,2,1,0]
138; DISABLE-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
139; DISABLE-NEXT:    retq
140  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
141  %2 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 1, i32 0>
142  %res = add <4 x i64> %2, %a0
143  ret <4 x i64> %res
144}
145
146define <4 x i64> @permq_mem(ptr %p0) {
147; ENABLE-LABEL: permq_mem:
148; ENABLE:       # %bb.0:
149; ENABLE-NEXT:    #APP
150; ENABLE-NEXT:    nop
151; ENABLE-NEXT:    #NO_APP
152; ENABLE-NEXT:    vxorps %xmm0, %xmm0, %xmm0
153; ENABLE-NEXT:    vpermpd {{.*#+}} ymm0 = mem[1,2,1,0]
154; ENABLE-NEXT:    retq
155;
156; DISABLE-LABEL: permq_mem:
157; DISABLE:       # %bb.0:
158; DISABLE-NEXT:    #APP
159; DISABLE-NEXT:    nop
160; DISABLE-NEXT:    #NO_APP
161; DISABLE-NEXT:    vpermpd {{.*#+}} ymm0 = mem[1,2,1,0]
162; DISABLE-NEXT:    retq
163  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
164  %a0 = load <4 x i64>, ptr %p0, align 64
165  %2 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 1, i32 0>
166  ret <4 x i64> %2
167}
168
169define <8 x float> @permps(<8 x float> %a0, <8 x i32> %a1) {
170; ENABLE-ADL-LABEL: permps:
171; ENABLE-ADL:       # %bb.0:
172; ENABLE-ADL-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
173; ENABLE-ADL-NEXT:    #APP
174; ENABLE-ADL-NEXT:    nop
175; ENABLE-ADL-NEXT:    #NO_APP
176; ENABLE-ADL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
177; ENABLE-ADL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
178; ENABLE-ADL-NEXT:    vpermps %ymm2, %ymm0, %ymm1
179; ENABLE-ADL-NEXT:    vcvtdq2ps %ymm0, %ymm0
180; ENABLE-ADL-NEXT:    vaddps %ymm2, %ymm0, %ymm0
181; ENABLE-ADL-NEXT:    vaddps %ymm0, %ymm1, %ymm0
182; ENABLE-ADL-NEXT:    retq
183;
184; ENABLE-SPR-LABEL: permps:
185; ENABLE-SPR:       # %bb.0:
186; ENABLE-SPR-NEXT:    vmovaps %ymm0, %ymm16
187; ENABLE-SPR-NEXT:    #APP
188; ENABLE-SPR-NEXT:    nop
189; ENABLE-SPR-NEXT:    #NO_APP
190; ENABLE-SPR-NEXT:    vxorps %xmm1, %xmm1, %xmm1
191; ENABLE-SPR-NEXT:    vpermps %ymm16, %ymm0, %ymm1
192; ENABLE-SPR-NEXT:    vcvtdq2ps %ymm0, %ymm0
193; ENABLE-SPR-NEXT:    vaddps %ymm16, %ymm0, %ymm0
194; ENABLE-SPR-NEXT:    vaddps %ymm0, %ymm1, %ymm0
195; ENABLE-SPR-NEXT:    retq
196;
197; DISABLE-ADL-LABEL: permps:
198; DISABLE-ADL:       # %bb.0:
199; DISABLE-ADL-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
200; DISABLE-ADL-NEXT:    #APP
201; DISABLE-ADL-NEXT:    nop
202; DISABLE-ADL-NEXT:    #NO_APP
203; DISABLE-ADL-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
204; DISABLE-ADL-NEXT:    vpermps %ymm2, %ymm0, %ymm1
205; DISABLE-ADL-NEXT:    vcvtdq2ps %ymm0, %ymm0
206; DISABLE-ADL-NEXT:    vaddps %ymm2, %ymm0, %ymm0
207; DISABLE-ADL-NEXT:    vaddps %ymm0, %ymm1, %ymm0
208; DISABLE-ADL-NEXT:    retq
209;
210; DISABLE-SPR-LABEL: permps:
211; DISABLE-SPR:       # %bb.0:
212; DISABLE-SPR-NEXT:    vmovaps %ymm0, %ymm16
213; DISABLE-SPR-NEXT:    #APP
214; DISABLE-SPR-NEXT:    nop
215; DISABLE-SPR-NEXT:    #NO_APP
216; DISABLE-SPR-NEXT:    vpermps %ymm16, %ymm0, %ymm1
217; DISABLE-SPR-NEXT:    vcvtdq2ps %ymm0, %ymm0
218; DISABLE-SPR-NEXT:    vaddps %ymm16, %ymm0, %ymm0
219; DISABLE-SPR-NEXT:    vaddps %ymm0, %ymm1, %ymm0
220; DISABLE-SPR-NEXT:    retq
221  %1 = tail call <8 x i32> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
222  %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %1)
223  %t = sitofp <8 x i32> %1 to <8 x float>
224  %3 = fadd <8 x float> %t, %a0
225  %res = fadd <8 x float> %2, %3
226  ret <8 x float> %res
227}
228
229define <8 x float> @permps_mem(ptr %p0, <8 x i32> %a1) {
230; ENABLE-LABEL: permps_mem:
231; ENABLE:       # %bb.0:
232; ENABLE-NEXT:    #APP
233; ENABLE-NEXT:    nop
234; ENABLE-NEXT:    #NO_APP
235; ENABLE-NEXT:    vxorps %xmm1, %xmm1, %xmm1
236; ENABLE-NEXT:    vpermps (%rdi), %ymm0, %ymm1
237; ENABLE-NEXT:    vcvtdq2ps %ymm0, %ymm0
238; ENABLE-NEXT:    vaddps %ymm0, %ymm1, %ymm0
239; ENABLE-NEXT:    retq
240;
241; DISABLE-LABEL: permps_mem:
242; DISABLE:       # %bb.0:
243; DISABLE-NEXT:    #APP
244; DISABLE-NEXT:    nop
245; DISABLE-NEXT:    #NO_APP
246; DISABLE-NEXT:    vpermps (%rdi), %ymm0, %ymm1
247; DISABLE-NEXT:    vcvtdq2ps %ymm0, %ymm0
248; DISABLE-NEXT:    vaddps %ymm0, %ymm1, %ymm0
249; DISABLE-NEXT:    retq
250  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
251  %a0 = load <8 x float>, ptr %p0, align 64
252  %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1)
253  %t = sitofp <8 x i32> %a1 to <8 x float>
254  %res = fadd <8 x float> %2, %t
255  ret <8 x float> %res
256}
257
258declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
259
260define <4 x double> @permpd(<4 x double> %a0) {
261; ENABLE-LABEL: permpd:
262; ENABLE:       # %bb.0:
263; ENABLE-NEXT:    #APP
264; ENABLE-NEXT:    nop
265; ENABLE-NEXT:    #NO_APP
266; ENABLE-NEXT:    vxorps %xmm1, %xmm1, %xmm1
267; ENABLE-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[1,2,1,0]
268; ENABLE-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
269; ENABLE-NEXT:    retq
270;
271; DISABLE-LABEL: permpd:
272; DISABLE:       # %bb.0:
273; DISABLE-NEXT:    #APP
274; DISABLE-NEXT:    nop
275; DISABLE-NEXT:    #NO_APP
276; DISABLE-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[1,2,1,0]
277; DISABLE-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
278; DISABLE-NEXT:    retq
279  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
280  %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 2, i32 1, i32 0>
281  %res = fadd <4 x double> %2, %a0
282  ret <4 x double> %res
283}
284
285define <4 x double> @permpd_mem(ptr %p0) {
286; ENABLE-LABEL: permpd_mem:
287; ENABLE:       # %bb.0:
288; ENABLE-NEXT:    #APP
289; ENABLE-NEXT:    nop
290; ENABLE-NEXT:    #NO_APP
291; ENABLE-NEXT:    vxorps %xmm0, %xmm0, %xmm0
292; ENABLE-NEXT:    vpermpd {{.*#+}} ymm0 = mem[1,2,1,0]
293; ENABLE-NEXT:    retq
294;
295; DISABLE-LABEL: permpd_mem:
296; DISABLE:       # %bb.0:
297; DISABLE-NEXT:    #APP
298; DISABLE-NEXT:    nop
299; DISABLE-NEXT:    #NO_APP
300; DISABLE-NEXT:    vpermpd {{.*#+}} ymm0 = mem[1,2,1,0]
301; DISABLE-NEXT:    retq
302  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
303  %a0 = load <4 x double>, ptr %p0, align 64
304  %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 2, i32 1, i32 0>
305  ret <4 x double> %2
306}
307