xref: /llvm-project/llvm/test/CodeGen/AMDGPU/select-opt.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2
3; Make sure to test with f32 and i32 compares. If we have to use float
4; compares, we always have multiple condition registers. If we can do
5; scalar compares, we don't want to use multiple condition registers.
6
7; GCN-LABEL: {{^}}opt_select_i32_and_cmp_i32:
8; GCN-DAG: s_cmp_lg_u32
9; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0
10; GCN-DAG: s_cmp_lg_u32
11; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0
12; GCN: s_and_b64 [[AND1:s\[[0-9]+:[0-9]+\]]], [[CMP1]], [[CMP2]]
13; GCN: s_and_b64 [[AND2:s\[[0-9]+:[0-9]+\]]], [[AND1]], exec
14; GCN: s_cselect_b32 [[RESULT:s[0-9]+]]
15; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
16; GCN: buffer_store_dword [[VRESULT]]
17define amdgpu_kernel void @opt_select_i32_and_cmp_i32(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 {
18  %icmp0 = icmp ne i32 %a, %b
19  %icmp1 = icmp ne i32 %a, %c
20  %and = and i1 %icmp0, %icmp1
21  %select = select i1 %and, i32 %x, i32 %y
22  store i32 %select, ptr addrspace(1) %out
23  ret void
24}
25
26; GCN-LABEL: {{^}}opt_select_i32_and_cmp_f32:
27; GCN-DAG: v_cmp_lg_f32_e32 vcc
28; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
29; GCN: s_and_b64 [[CMP1]], vcc, [[CMP1]]
30; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP1]], exec
31; GCN: s_cselect_b32 [[RESULT:s[0-9]+]]
32; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
33; GCN: buffer_store_dword [[VRESULT]]
34define amdgpu_kernel void @opt_select_i32_and_cmp_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 {
35  %fcmp0 = fcmp one float %a, %b
36  %fcmp1 = fcmp one float %a, %c
37  %and = and i1 %fcmp0, %fcmp1
38  %select = select i1 %and, i32 %x, i32 %y
39  store i32 %select, ptr addrspace(1) %out
40  ret void
41}
42
43; GCN-LABEL: {{^}}opt_select_i64_and_cmp_i32:
44; GCN-DAG: s_cmp_lg_u32
45; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0
46; GCN-DAG: s_cmp_lg_u32
47; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0
48; GCN: s_and_b64 [[AND1:s\[[0-9]+:[0-9]+\]]], [[CMP1]], [[CMP2]]
49; GCN: s_and_b64 [[AND2:s\[[0-9]+:[0-9]+\]]], [[AND1]], exec
50; GCN-DAG: s_cselect_b32 [[RESULT0:s[0-9]+]]
51; GCN-DAG: s_cselect_b32 [[RESULT1:s[0-9]+]]
52; GCN-DAG: v_mov_b32_e32 v[[VRESULT1:[0-9]+]], [[RESULT0]]
53; GCN-DAG: v_mov_b32_e32 v[[VRESULT0:[0-9]+]], [[RESULT1]]
54; GCN: buffer_store_dwordx2 v[[[VRESULT0]]:[[VRESULT1]]]
55define amdgpu_kernel void @opt_select_i64_and_cmp_i32(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
56  %icmp0 = icmp ne i32 %a, %b
57  %icmp1 = icmp ne i32 %a, %c
58  %and = and i1 %icmp0, %icmp1
59  %select = select i1 %and, i64 %x, i64 %y
60  store i64 %select, ptr addrspace(1) %out
61  ret void
62}
63
64; GCN-LABEL: {{^}}opt_select_i64_and_cmp_f32:
65; GCN-DAG: v_cmp_lg_f32_e32 vcc,
66; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
67; GCN: s_and_b64 [[AND1:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP1]]
68; GCN: s_and_b64 [[AND2:s\[[0-9]+:[0-9]+\]]], [[AND1]], exec
69; GCN-DAG: s_cselect_b32 [[RESULT0:s[0-9]+]]
70; GCN-DAG: s_cselect_b32 [[RESULT1:s[0-9]+]]
71; GCN-DAG: v_mov_b32_e32 v[[VRESULT1:[0-9]+]], [[RESULT0]]
72; GCN-DAG: v_mov_b32_e32 v[[VRESULT0:[0-9]+]], [[RESULT1]]
73; GCN: buffer_store_dwordx2 v[[[VRESULT0]]:[[VRESULT1]]]
74define amdgpu_kernel void @opt_select_i64_and_cmp_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
75  %fcmp0 = fcmp one float %a, %b
76  %fcmp1 = fcmp one float %a, %c
77  %and = and i1 %fcmp0, %fcmp1
78  %select = select i1 %and, i64 %x, i64 %y
79  store i64 %select, ptr addrspace(1) %out
80  ret void
81}
82
83; GCN-LABEL: {{^}}opt_select_i32_or_cmp_i32:
84; GCN-DAG: s_cmp_lg_u32
85; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0
86; GCN-DAG: s_cmp_lg_u32
87; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0
88; GCN: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], [[CMP1]], [[CMP2]]
89; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[OR]], exec
90; GCN-DAG: s_cselect_b32 [[RESULT:s[0-9]+]]
91; GCN-DAG: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
92; GCN: buffer_store_dword [[VRESULT]]
93; GCN: s_endpgm
94define amdgpu_kernel void @opt_select_i32_or_cmp_i32(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 {
95  %icmp0 = icmp ne i32 %a, %b
96  %icmp1 = icmp ne i32 %a, %c
97  %or = or i1 %icmp0, %icmp1
98  %select = select i1 %or, i32 %x, i32 %y
99  store i32 %select, ptr addrspace(1) %out
100  ret void
101}
102
103; GCN-LABEL: {{^}}opt_select_i32_or_cmp_f32:
104; GCN-DAG: v_cmp_lg_f32_e32 vcc
105; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
106; GCN: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP1]]
107; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[OR]], exec
108; GCN-DAG: s_cselect_b32 [[RESULT:s[0-9]+]]
109; GCN-DAG: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
110; GCN: buffer_store_dword [[VRESULT]]
111define amdgpu_kernel void @opt_select_i32_or_cmp_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 {
112  %fcmp0 = fcmp one float %a, %b
113  %fcmp1 = fcmp one float %a, %c
114  %or = or i1 %fcmp0, %fcmp1
115  %select = select i1 %or, i32 %x, i32 %y
116  store i32 %select, ptr addrspace(1) %out
117  ret void
118}
119
120; GCN-LABEL: {{^}}opt_select_i64_or_cmp_i32:
121; GCN-DAG: s_cmp_lg_u32
122; GCN: s_cselect_b64 [[CMP1:s\[[0-9]+:[0-9]+\]]], -1, 0
123; GCN-DAG: s_cmp_lg_u32
124; GCN: s_cselect_b64 [[CMP2:s\[[0-9]+:[0-9]+\]]], -1, 0
125; GCN: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], [[CMP1]], [[CMP2]]
126; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[OR]], exec
127; GCN-DAG: s_cselect_b32 [[RESULT0:s[0-9]+]]
128; GCN-DAG: s_cselect_b32 [[RESULT1:s[0-9]+]]
129; GCN-DAG: v_mov_b32_e32 v[[VRESULT1:[0-9]+]], [[RESULT0]]
130; GCN-DAG: v_mov_b32_e32 v[[VRESULT0:[0-9]+]], [[RESULT1]]
131; GCN: buffer_store_dwordx2 v[[[VRESULT0]]:[[VRESULT1]]]
132define amdgpu_kernel void @opt_select_i64_or_cmp_i32(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
133  %icmp0 = icmp ne i32 %a, %b
134  %icmp1 = icmp ne i32 %a, %c
135  %or = or i1 %icmp0, %icmp1
136  %select = select i1 %or, i64 %x, i64 %y
137  store i64 %select, ptr addrspace(1) %out
138  ret void
139}
140
141; GCN-LABEL: {{^}}opt_select_i64_or_cmp_f32:
142; GCN-DAG: v_cmp_lg_f32_e32 vcc,
143; GCN-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
144; GCN: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP1]]
145; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[OR]], exec
146; GCN-DAG: s_cselect_b32 [[RESULT0:s[0-9]+]]
147; GCN-DAG: s_cselect_b32 [[RESULT1:s[0-9]+]]
148; GCN-DAG: v_mov_b32_e32 v[[VRESULT1:[0-9]+]], [[RESULT0]]
149; GCN-DAG: v_mov_b32_e32 v[[VRESULT0:[0-9]+]], [[RESULT1]]
150; GCN: buffer_store_dwordx2 v[[[VRESULT0]]:[[VRESULT1]]]
151define amdgpu_kernel void @opt_select_i64_or_cmp_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
152  %fcmp0 = fcmp one float %a, %b
153  %fcmp1 = fcmp one float %a, %c
154  %or = or i1 %fcmp0, %fcmp1
155  %select = select i1 %or, i64 %x, i64 %y
156  store i64 %select, ptr addrspace(1) %out
157  ret void
158}
159
160; GCN-LABEL: {{^}}regression:
161; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 1.0
162
163define amdgpu_kernel void @regression(ptr addrspace(1) %out, float %c0, float %c1) #0 {
164entry:
165  %cmp0 = fcmp oeq float %c0, 1.0
166  br i1 %cmp0, label %if0, label %endif
167
168if0:
169  %cmp1 = fcmp oeq float %c1, 0.0
170  br i1 %cmp1, label %if1, label %endif
171
172if1:
173  %cmp2 = xor i1 %cmp1, true
174  br label %endif
175
176endif:
177  %tmp0 = phi i1 [ true, %entry ], [ %cmp2, %if1 ], [ false, %if0 ]
178  %tmp2 = select i1 %tmp0, float 4.0, float 0.0
179  store float %tmp2, ptr addrspace(1) %out
180  ret void
181}
182
183attributes #0 = { nounwind }
184