xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.swap.ll (revision 27a8afa3fcf7e0378dff65cf3374f7a4e4e2b9a6)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s
4
5; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s
6; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s
7
8; ERR-SDAG: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.permlane16.swap
9; ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32), %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.permlane16.swap)
10
11
12declare { i32, i32 } @llvm.amdgcn.permlane16.swap(i32, i32, i1 immarg, i1 immarg)
13
14define { i32, i32 } @v_permlane16_swap_b32_vv(i32 %vdst_old, i32 %src0_old) {
15; GCN-LABEL: v_permlane16_swap_b32_vv:
16; GCN:       ; %bb.0:
17; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GCN-NEXT:    v_permlane16_swap_b32_e32 v0, v1
19; GCN-NEXT:    s_setpc_b64 s[30:31]
20  %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false)
21  ret { i32, i32 } %v
22}
23
24define { i32, i32 } @v_permlane16_swap_b32_vi(i32 %vdst_old) {
25; GCN-LABEL: v_permlane16_swap_b32_vi:
26; GCN:       ; %bb.0:
27; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28; GCN-NEXT:    v_mov_b32_e32 v1, 1
29; GCN-NEXT:    s_nop 1
30; GCN-NEXT:    v_permlane16_swap_b32_e32 v0, v1
31; GCN-NEXT:    s_setpc_b64 s[30:31]
32  %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 1, i1 false, i1 false)
33  ret { i32, i32 } %v
34}
35
36define { i32, i32 } @v_permlane16_swap_b32_vl(i32 %vdst_old) {
37; GCN-LABEL: v_permlane16_swap_b32_vl:
38; GCN:       ; %bb.0:
39; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40; GCN-NEXT:    v_mov_b32_e32 v1, 0xc1d1
41; GCN-NEXT:    s_nop 1
42; GCN-NEXT:    v_permlane16_swap_b32_e32 v0, v1
43; GCN-NEXT:    s_setpc_b64 s[30:31]
44  %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 49617, i1 false, i1 false)
45  ret { i32, i32 } %v
46}
47
48define { i32, i32 } @v_permlane16_swap_b32_iv(i32 %src0_old) {
49; GCN-LABEL: v_permlane16_swap_b32_iv:
50; GCN:       ; %bb.0:
51; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52; GCN-NEXT:    v_mov_b32_e32 v1, v0
53; GCN-NEXT:    v_mov_b32_e32 v0, 1
54; GCN-NEXT:    s_nop 1
55; GCN-NEXT:    v_permlane16_swap_b32_e32 v0, v1
56; GCN-NEXT:    s_setpc_b64 s[30:31]
57  %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 1, i32 %src0_old, i1 false, i1 false)
58  ret { i32, i32 } %v
59}
60
61define { i32, i32 } @v_permlane16_swap_b32_ss(i32 inreg %vdst_old, i32 inreg %src0_old) {
62; GCN-LABEL: v_permlane16_swap_b32_ss:
63; GCN:       ; %bb.0:
64; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
65; GCN-NEXT:    v_mov_b32_e32 v0, s0
66; GCN-NEXT:    v_mov_b32_e32 v1, s1
67; GCN-NEXT:    s_nop 1
68; GCN-NEXT:    v_permlane16_swap_b32_e32 v0, v1
69; GCN-NEXT:    s_setpc_b64 s[30:31]
70  %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false)
71  ret { i32, i32 } %v
72}
73
74define { i32, i32 } @v_permlane16_swap_b32_sv(i32 inreg %vdst_old, i32 %src0_old) {
75; GCN-LABEL: v_permlane16_swap_b32_sv:
76; GCN:       ; %bb.0:
77; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78; GCN-NEXT:    v_mov_b32_e32 v1, v0
79; GCN-NEXT:    v_mov_b32_e32 v0, s0
80; GCN-NEXT:    s_nop 1
81; GCN-NEXT:    v_permlane16_swap_b32_e32 v0, v1
82; GCN-NEXT:    s_setpc_b64 s[30:31]
83  %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false)
84  ret { i32, i32 } %v
85}
86
87define { i32, i32 } @v_permlane16_swap_b32_vs(i32 %vdst_old, i32 inreg %src0_old) {
88; GCN-LABEL: v_permlane16_swap_b32_vs:
89; GCN:       ; %bb.0:
90; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91; GCN-NEXT:    v_mov_b32_e32 v1, s0
92; GCN-NEXT:    s_nop 1
93; GCN-NEXT:    v_permlane16_swap_b32_e32 v0, v1
94; GCN-NEXT:    s_setpc_b64 s[30:31]
95  %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false)
96  ret { i32, i32 } %v
97}
98
99define { i32, i32 } @v_permlane16_swap_b32_vv_fi(i32 %vdst_old, i32 %src0_old) {
100; GCN-LABEL: v_permlane16_swap_b32_vv_fi:
101; GCN:       ; %bb.0:
102; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103; GCN-NEXT:    v_permlane16_swap_b32_e64 v0, v1 fi:1
104; GCN-NEXT:    s_setpc_b64 s[30:31]
105  %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 true, i1 false)
106  ret { i32, i32 } %v
107}
108
109define { i32, i32 } @v_permlane16_swap_b32_vv_bc(i32 %vdst_old, i32 %src0_old) {
110; GCN-LABEL: v_permlane16_swap_b32_vv_bc:
111; GCN:       ; %bb.0:
112; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113; GCN-NEXT:    v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1
114; GCN-NEXT:    s_setpc_b64 s[30:31]
115  %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 true)
116  ret { i32, i32 } %v
117}
118
119define { i32, i32 } @v_permlane16_swap_b32_vv_fi_bc(i32 %vdst_old, i32 %src0_old) {
120; GCN-LABEL: v_permlane16_swap_b32_vv_fi_bc:
121; GCN:       ; %bb.0:
122; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123; GCN-NEXT:    v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1 fi:1
124; GCN-NEXT:    s_setpc_b64 s[30:31]
125  %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 true, i1 true)
126  ret { i32, i32 } %v
127}
128