xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE64 %s
3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE32 %s
4
5; End to end tests for scalar vs. vector boolean legalization strategies.
6
7define amdgpu_ps float @select_vgpr_sgpr_trunc_cond(i32 inreg %a, i32 %b, i32 %c) {
8; WAVE64-LABEL: select_vgpr_sgpr_trunc_cond:
9; WAVE64:       ; %bb.0:
10; WAVE64-NEXT:    s_and_b32 s0, 1, s0
11; WAVE64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
12; WAVE64-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
13; WAVE64-NEXT:    ; return to shader part epilog
14;
15; WAVE32-LABEL: select_vgpr_sgpr_trunc_cond:
16; WAVE32:       ; %bb.0:
17; WAVE32-NEXT:    s_and_b32 s0, 1, s0
18; WAVE32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
19; WAVE32-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
20; WAVE32-NEXT:    ; return to shader part epilog
21  %cc = trunc i32 %a to i1
22  %r = select i1 %cc, i32 %b, i32 %c
23  %r.f = bitcast i32 %r to float
24  ret float %r.f
25}
26
27define amdgpu_ps float @select_vgpr_sgpr_trunc_and_cond(i32 inreg %a.0, i32 inreg %a.1, i32 %b, i32 %c) {
28; WAVE64-LABEL: select_vgpr_sgpr_trunc_and_cond:
29; WAVE64:       ; %bb.0:
30; WAVE64-NEXT:    s_and_b32 s0, s0, s1
31; WAVE64-NEXT:    s_and_b32 s0, 1, s0
32; WAVE64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
33; WAVE64-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
34; WAVE64-NEXT:    ; return to shader part epilog
35;
36; WAVE32-LABEL: select_vgpr_sgpr_trunc_and_cond:
37; WAVE32:       ; %bb.0:
38; WAVE32-NEXT:    s_and_b32 s0, s0, s1
39; WAVE32-NEXT:    s_and_b32 s0, 1, s0
40; WAVE32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
41; WAVE32-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
42; WAVE32-NEXT:    ; return to shader part epilog
43  %cc.0 = trunc i32 %a.0 to i1
44  %cc.1 = trunc i32 %a.1 to i1
45  %and = and i1 %cc.0, %cc.1
46  %r = select i1 %and, i32 %b, i32 %c
47  %r.f = bitcast i32 %r to float
48  ret float %r.f
49}
50
51define amdgpu_ps i32 @select_sgpr_trunc_and_cond(i32 inreg %a.0, i32 inreg %a.1, i32 inreg %b, i32 inreg %c) {
52; GCN-LABEL: select_sgpr_trunc_and_cond:
53; GCN:       ; %bb.0:
54; GCN-NEXT:    s_and_b32 s0, s0, s1
55; GCN-NEXT:    s_and_b32 s0, s0, 1
56; GCN-NEXT:    s_cmp_lg_u32 s0, 0
57; GCN-NEXT:    s_cselect_b32 s0, s2, s3
58; GCN-NEXT:    ; return to shader part epilog
59  %cc.0 = trunc i32 %a.0 to i1
60  %cc.1 = trunc i32 %a.1 to i1
61  %and = and i1 %cc.0, %cc.1
62  %r = select i1 %and, i32 %b, i32 %c
63  ret i32 %r
64}
65
66define amdgpu_kernel void @sgpr_trunc_brcond(i32 %cond) {
67; WAVE64-LABEL: sgpr_trunc_brcond:
68; WAVE64:       ; %bb.0: ; %entry
69; WAVE64-NEXT:    s_load_dword s0, s[4:5], 0x24
70; WAVE64-NEXT:    s_waitcnt lgkmcnt(0)
71; WAVE64-NEXT:    s_xor_b32 s0, s0, 1
72; WAVE64-NEXT:    s_and_b32 s0, s0, 1
73; WAVE64-NEXT:    s_cmp_lg_u32 s0, 0
74; WAVE64-NEXT:    s_cbranch_scc1 .LBB3_2
75; WAVE64-NEXT:  ; %bb.1: ; %bb0
76; WAVE64-NEXT:    v_mov_b32_e32 v0, 0
77; WAVE64-NEXT:    global_store_dword v[0:1], v0, off
78; WAVE64-NEXT:    s_waitcnt vmcnt(0)
79; WAVE64-NEXT:  .LBB3_2: ; %bb1
80; WAVE64-NEXT:    v_mov_b32_e32 v0, 1
81; WAVE64-NEXT:    global_store_dword v[0:1], v0, off
82; WAVE64-NEXT:    s_waitcnt vmcnt(0)
83;
84; WAVE32-LABEL: sgpr_trunc_brcond:
85; WAVE32:       ; %bb.0: ; %entry
86; WAVE32-NEXT:    s_load_dword s0, s[4:5], 0x24
87; WAVE32-NEXT:    s_waitcnt lgkmcnt(0)
88; WAVE32-NEXT:    s_xor_b32 s0, s0, 1
89; WAVE32-NEXT:    s_and_b32 s0, s0, 1
90; WAVE32-NEXT:    s_cmp_lg_u32 s0, 0
91; WAVE32-NEXT:    s_cbranch_scc1 .LBB3_2
92; WAVE32-NEXT:  ; %bb.1: ; %bb0
93; WAVE32-NEXT:    v_mov_b32_e32 v0, 0
94; WAVE32-NEXT:    global_store_dword v[0:1], v0, off
95; WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
96; WAVE32-NEXT:  .LBB3_2: ; %bb1
97; WAVE32-NEXT:    v_mov_b32_e32 v0, 1
98; WAVE32-NEXT:    global_store_dword v[0:1], v0, off
99; WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
100entry:
101  %trunc = trunc i32 %cond to i1
102  br i1 %trunc, label %bb0, label %bb1
103
104bb0:
105  store volatile i32 0, ptr addrspace(1) undef
106  unreachable
107
108bb1:
109  store volatile i32 1, ptr addrspace(1) undef
110  unreachable
111}
112
113define amdgpu_kernel void @brcond_sgpr_trunc_and(i32 %cond0, i32 %cond1) {
114; WAVE64-LABEL: brcond_sgpr_trunc_and:
115; WAVE64:       ; %bb.0: ; %entry
116; WAVE64-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
117; WAVE64-NEXT:    s_waitcnt lgkmcnt(0)
118; WAVE64-NEXT:    s_and_b32 s0, s0, s1
119; WAVE64-NEXT:    s_xor_b32 s0, s0, 1
120; WAVE64-NEXT:    s_and_b32 s0, s0, 1
121; WAVE64-NEXT:    s_cmp_lg_u32 s0, 0
122; WAVE64-NEXT:    s_cbranch_scc1 .LBB4_2
123; WAVE64-NEXT:  ; %bb.1: ; %bb0
124; WAVE64-NEXT:    v_mov_b32_e32 v0, 0
125; WAVE64-NEXT:    global_store_dword v[0:1], v0, off
126; WAVE64-NEXT:    s_waitcnt vmcnt(0)
127; WAVE64-NEXT:  .LBB4_2: ; %bb1
128; WAVE64-NEXT:    v_mov_b32_e32 v0, 1
129; WAVE64-NEXT:    global_store_dword v[0:1], v0, off
130; WAVE64-NEXT:    s_waitcnt vmcnt(0)
131;
132; WAVE32-LABEL: brcond_sgpr_trunc_and:
133; WAVE32:       ; %bb.0: ; %entry
134; WAVE32-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
135; WAVE32-NEXT:    s_waitcnt lgkmcnt(0)
136; WAVE32-NEXT:    s_and_b32 s0, s0, s1
137; WAVE32-NEXT:    s_xor_b32 s0, s0, 1
138; WAVE32-NEXT:    s_and_b32 s0, s0, 1
139; WAVE32-NEXT:    s_cmp_lg_u32 s0, 0
140; WAVE32-NEXT:    s_cbranch_scc1 .LBB4_2
141; WAVE32-NEXT:  ; %bb.1: ; %bb0
142; WAVE32-NEXT:    v_mov_b32_e32 v0, 0
143; WAVE32-NEXT:    global_store_dword v[0:1], v0, off
144; WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
145; WAVE32-NEXT:  .LBB4_2: ; %bb1
146; WAVE32-NEXT:    v_mov_b32_e32 v0, 1
147; WAVE32-NEXT:    global_store_dword v[0:1], v0, off
148; WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
149entry:
150  %trunc0 = trunc i32 %cond0 to i1
151  %trunc1 = trunc i32 %cond1 to i1
152  %and = and i1 %trunc0, %trunc1
153  br i1 %and, label %bb0, label %bb1
154
155bb0:
156  store volatile i32 0, ptr addrspace(1) undef
157  unreachable
158
159bb1:
160  store volatile i32 1, ptr addrspace(1) undef
161  unreachable
162}
163