xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll (revision b60c118f53e6f7e5328e54dc26b4d6787030c02b)
1fef54d03SPetar Avramovic; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2fef54d03SPetar Avramovic; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=OLD_RBS %s
30ee037b8SPetar Avramovic; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=NEW_RBS %s
4fef54d03SPetar Avramovic
5fef54d03SPetar Avramovic; if instruction is uniform and there is available instruction, select SALU instruction
6fef54d03SPetar Avramovicdefine amdgpu_ps void @uniform_in_vgpr(float inreg %a, i32 inreg %b, ptr addrspace(1) %ptr) {
7fef54d03SPetar Avramovic; OLD_RBS-LABEL: uniform_in_vgpr:
8fef54d03SPetar Avramovic; OLD_RBS:       ; %bb.0:
9fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_cvt_u32_f32_e32 v2, s0
10fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_add_nc_u32_e32 v2, s1, v2
11fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_store_dword v[0:1], v2, off
12fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_endpgm
13fef54d03SPetar Avramovic;
14fef54d03SPetar Avramovic; NEW_RBS-LABEL: uniform_in_vgpr:
15fef54d03SPetar Avramovic; NEW_RBS:       ; %bb.0:
16fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_cvt_u32_f32_e32 v2, s0
170ee037b8SPetar Avramovic; NEW_RBS-NEXT:    v_readfirstlane_b32 s0, v2
180ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_add_i32 s0, s0, s1
190ee037b8SPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v2, s0
20fef54d03SPetar Avramovic; NEW_RBS-NEXT:    global_store_dword v[0:1], v2, off
21fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_endpgm
22fef54d03SPetar Avramovic  %a.i32 = fptoui float %a to i32
23fef54d03SPetar Avramovic  %res = add i32 %a.i32, %b
24fef54d03SPetar Avramovic  store i32 %res, ptr addrspace(1) %ptr
25fef54d03SPetar Avramovic  ret void
26fef54d03SPetar Avramovic}
27fef54d03SPetar Avramovic
28fef54d03SPetar Avramovic; copy sgpr to vgpr + readfirstlane vgpr to sgpr combine from rb-legalize
29fef54d03SPetar Avramovicdefine amdgpu_ps void @back_to_back_uniform_in_vgpr(float inreg %a, float inreg %b, i32 inreg %c, ptr addrspace(1) %ptr) {
30fef54d03SPetar Avramovic; OLD_RBS-LABEL: back_to_back_uniform_in_vgpr:
31fef54d03SPetar Avramovic; OLD_RBS:       ; %bb.0:
32fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_add_f32_e64 v2, s0, s1
33fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_cvt_u32_f32_e32 v2, v2
34fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_add_nc_u32_e32 v2, s2, v2
35fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_store_dword v[0:1], v2, off
36fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_endpgm
37fef54d03SPetar Avramovic;
38fef54d03SPetar Avramovic; NEW_RBS-LABEL: back_to_back_uniform_in_vgpr:
39fef54d03SPetar Avramovic; NEW_RBS:       ; %bb.0:
40fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_add_f32_e64 v2, s0, s1
41fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_cvt_u32_f32_e32 v2, v2
420ee037b8SPetar Avramovic; NEW_RBS-NEXT:    v_readfirstlane_b32 s0, v2
430ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_add_i32 s0, s0, s2
440ee037b8SPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v2, s0
45fef54d03SPetar Avramovic; NEW_RBS-NEXT:    global_store_dword v[0:1], v2, off
46fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_endpgm
47fef54d03SPetar Avramovic  %add = fadd float %a, %b
48fef54d03SPetar Avramovic  %add.i32 = fptoui float %add to i32
49fef54d03SPetar Avramovic  %res = add i32 %add.i32, %c
50fef54d03SPetar Avramovic  store i32 %res, ptr addrspace(1) %ptr
51fef54d03SPetar Avramovic  ret void
52fef54d03SPetar Avramovic}
53fef54d03SPetar Avramovic
54fef54d03SPetar Avramovic; fast rules for vector instructions
55fef54d03SPetar Avramovicdefine amdgpu_cs void @buffer_load_uniform(<4 x i32> inreg %rsrc, i32 inreg %voffset, ptr addrspace(1) %ptr) {
56fef54d03SPetar Avramovic; OLD_RBS-LABEL: buffer_load_uniform:
57fef54d03SPetar Avramovic; OLD_RBS:       ; %bb.0: ; %.entry
58fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_mov_b32_e32 v2, s4
59fef54d03SPetar Avramovic; OLD_RBS-NEXT:    buffer_load_dwordx4 v[2:5], v2, s[0:3], 0 offen
60fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_waitcnt vmcnt(0)
61fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_add_nc_u32_e32 v2, 1, v3
62fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_store_dword v[0:1], v2, off
63fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_endpgm
64fef54d03SPetar Avramovic;
65fef54d03SPetar Avramovic; NEW_RBS-LABEL: buffer_load_uniform:
66fef54d03SPetar Avramovic; NEW_RBS:       ; %bb.0: ; %.entry
67fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v2, s4
68fef54d03SPetar Avramovic; NEW_RBS-NEXT:    buffer_load_dwordx4 v[2:5], v2, s[0:3], 0 offen
69fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_waitcnt vmcnt(0)
700ee037b8SPetar Avramovic; NEW_RBS-NEXT:    v_readfirstlane_b32 s0, v3
710ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_add_i32 s0, s0, 1
720ee037b8SPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v2, s0
73fef54d03SPetar Avramovic; NEW_RBS-NEXT:    global_store_dword v[0:1], v2, off
74fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_endpgm
75fef54d03SPetar Avramovic.entry:
76fef54d03SPetar Avramovic  %vec = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
77fef54d03SPetar Avramovic  %el1 = extractelement <4 x i32> %vec, i64 1
78fef54d03SPetar Avramovic  %res = add i32 %el1, 1
79fef54d03SPetar Avramovic  store i32 %res, ptr addrspace(1) %ptr
80fef54d03SPetar Avramovic  ret void
81fef54d03SPetar Avramovic}
82fef54d03SPetar Avramovic
83fef54d03SPetar Avramovicdefine amdgpu_cs void @buffer_load_divergent(<4 x i32> inreg %rsrc, i32 %voffset, ptr addrspace(1) %ptr) {
84fef54d03SPetar Avramovic; OLD_RBS-LABEL: buffer_load_divergent:
85fef54d03SPetar Avramovic; OLD_RBS:       ; %bb.0: ; %.entry
86fef54d03SPetar Avramovic; OLD_RBS-NEXT:    buffer_load_dwordx4 v[3:6], v0, s[0:3], 0 offen
87fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_waitcnt vmcnt(0)
88fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_add_nc_u32_e32 v0, 1, v4
89fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_store_dword v[1:2], v0, off
90fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_endpgm
91fef54d03SPetar Avramovic;
92fef54d03SPetar Avramovic; NEW_RBS-LABEL: buffer_load_divergent:
93fef54d03SPetar Avramovic; NEW_RBS:       ; %bb.0: ; %.entry
94fef54d03SPetar Avramovic; NEW_RBS-NEXT:    buffer_load_dwordx4 v[3:6], v0, s[0:3], 0 offen
95fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_waitcnt vmcnt(0)
96fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_add_nc_u32_e32 v0, 1, v4
97fef54d03SPetar Avramovic; NEW_RBS-NEXT:    global_store_dword v[1:2], v0, off
98fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_endpgm
99fef54d03SPetar Avramovic.entry:
100fef54d03SPetar Avramovic  %vec = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
101fef54d03SPetar Avramovic  %el1 = extractelement <4 x i32> %vec, i64 1
102fef54d03SPetar Avramovic  %res = add i32 %el1, 1
103fef54d03SPetar Avramovic  store i32 %res, ptr addrspace(1) %ptr
104fef54d03SPetar Avramovic  ret void
105fef54d03SPetar Avramovic}
106fef54d03SPetar Avramovic
107fef54d03SPetar Avramovic;lowering in rb-legalize (sgpr S64 is legal, vgpr has to be split to S32)
108fef54d03SPetar Avramovicdefine amdgpu_ps void @vgpr_and_i64(i64 %a, i64 %b, ptr addrspace(1) %ptr) {
109fef54d03SPetar Avramovic; OLD_RBS-LABEL: vgpr_and_i64:
110fef54d03SPetar Avramovic; OLD_RBS:       ; %bb.0:
111fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_and_b32_e32 v0, v0, v2
112fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_and_b32_e32 v1, v1, v3
113fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_store_dwordx2 v[4:5], v[0:1], off
114fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_endpgm
115fef54d03SPetar Avramovic;
116fef54d03SPetar Avramovic; NEW_RBS-LABEL: vgpr_and_i64:
117fef54d03SPetar Avramovic; NEW_RBS:       ; %bb.0:
118fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_and_b32_e32 v0, v0, v2
119fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_and_b32_e32 v1, v1, v3
120fef54d03SPetar Avramovic; NEW_RBS-NEXT:    global_store_dwordx2 v[4:5], v[0:1], off
121fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_endpgm
122fef54d03SPetar Avramovic  %res = and i64 %a, %b
123fef54d03SPetar Avramovic  store i64 %res, ptr addrspace(1) %ptr
124fef54d03SPetar Avramovic  ret void
125fef54d03SPetar Avramovic}
126fef54d03SPetar Avramovic
127fef54d03SPetar Avramovic; It is up to user instruction to deal with potential truncated bits in reg.
128fef54d03SPetar Avramovic; Here G_ABS needs to sign extend S16 in reg to S32 and then do S32 G_ABS.
129fef54d03SPetar Avramovicdefine amdgpu_ps void @abs_sgpr_i16(i16 inreg %arg, ptr addrspace(1) %ptr) {
130fef54d03SPetar Avramovic; OLD_RBS-LABEL: abs_sgpr_i16:
131fef54d03SPetar Avramovic; OLD_RBS:       ; %bb.0:
132fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_sext_i32_i16 s0, s0
133fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_abs_i32 s0, s0
134fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_mov_b32_e32 v2, s0
135fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_store_short v[0:1], v2, off
136fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_endpgm
137fef54d03SPetar Avramovic;
138fef54d03SPetar Avramovic; NEW_RBS-LABEL: abs_sgpr_i16:
139fef54d03SPetar Avramovic; NEW_RBS:       ; %bb.0:
140fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_sext_i32_i16 s0, s0
141fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_abs_i32 s0, s0
142fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v2, s0
143fef54d03SPetar Avramovic; NEW_RBS-NEXT:    global_store_short v[0:1], v2, off
144fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_endpgm
145fef54d03SPetar Avramovic  %res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
146fef54d03SPetar Avramovic  store i16 %res, ptr addrspace(1) %ptr
147fef54d03SPetar Avramovic  ret void
148fef54d03SPetar Avramovic}
149fef54d03SPetar Avramovic
150fef54d03SPetar Avramovicdefine amdgpu_ps void @uniform_i1_phi(ptr addrspace(1) %out, i32 inreg %tid, i32 inreg %cond) {
151fef54d03SPetar Avramovic; OLD_RBS-LABEL: uniform_i1_phi:
152fef54d03SPetar Avramovic; OLD_RBS:       ; %bb.0: ; %A
153fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cmp_ge_u32 s0, 6
154fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cselect_b32 s2, 1, 0
155fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cmp_lg_u32 s1, 0
156fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cbranch_scc1 .LBB6_2
157fef54d03SPetar Avramovic; OLD_RBS-NEXT:  ; %bb.1: ; %B
158fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cmp_lt_u32 s0, 1
159fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cselect_b32 s2, 1, 0
160fef54d03SPetar Avramovic; OLD_RBS-NEXT:  .LBB6_2: ; %exit
161fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_bfe_i32 s0, s2, 0x10000
162fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_add_i32 s0, s0, 2
163fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_mov_b32_e32 v2, s0
164fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_store_dword v[0:1], v2, off
165fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_endpgm
166fef54d03SPetar Avramovic;
167fef54d03SPetar Avramovic; NEW_RBS-LABEL: uniform_i1_phi:
168fef54d03SPetar Avramovic; NEW_RBS:       ; %bb.0: ; %A
169fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cmp_ge_u32 s0, 6
170fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cselect_b32 s2, 1, 0
171fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cmp_lg_u32 s1, 0
172fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cbranch_scc1 .LBB6_2
173fef54d03SPetar Avramovic; NEW_RBS-NEXT:  ; %bb.1: ; %B
174fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cmp_lt_u32 s0, 1
175fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cselect_b32 s2, 1, 0
176fef54d03SPetar Avramovic; NEW_RBS-NEXT:  .LBB6_2: ; %exit
1770ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_cmp_lg_u32 s2, 0
1780ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_cselect_b32 s0, -1, 0
179fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_add_i32 s0, s0, 2
180fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v2, s0
181fef54d03SPetar Avramovic; NEW_RBS-NEXT:    global_store_dword v[0:1], v2, off
182fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_endpgm
183fef54d03SPetar AvramovicA:
184fef54d03SPetar Avramovic  %val_A = icmp uge i32 %tid, 6
185fef54d03SPetar Avramovic  %cmp = icmp eq i32 %cond, 0
186fef54d03SPetar Avramovic  br i1 %cmp, label %B, label %exit
187fef54d03SPetar Avramovic
188fef54d03SPetar AvramovicB:
189fef54d03SPetar Avramovic  %val_B = icmp ult i32 %tid, 1
190fef54d03SPetar Avramovic  br label %exit
191fef54d03SPetar Avramovic
192fef54d03SPetar Avramovicexit:
193fef54d03SPetar Avramovic  %phi = phi i1 [ %val_A, %A ], [ %val_B, %B ]
194fef54d03SPetar Avramovic  %sel = select i1 %phi, i32 1, i32 2
195fef54d03SPetar Avramovic  store i32 %sel, ptr addrspace(1) %out
196fef54d03SPetar Avramovic  ret void
197fef54d03SPetar Avramovic}
198fef54d03SPetar Avramovic
199fef54d03SPetar Avramovic; this is kind of i1 readfirstlane
200fef54d03SPetar Avramovic; uniform i1 result on instruction that is only available on VALU
201fef54d03SPetar Avramovicdefine amdgpu_ps void @vcc_to_scc(float inreg %a, i32 inreg %b, i32 inreg %c, ptr addrspace(1) %ptr) {
202fef54d03SPetar Avramovic; OLD_RBS-LABEL: vcc_to_scc:
203fef54d03SPetar Avramovic; OLD_RBS:       ; %bb.0:
204fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_mov_b32_e32 v2, s2
205fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_cmp_eq_f32_e64 s0, s0, 0
206fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_cndmask_b32_e64 v2, v2, s1, s0
207fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_store_dword v[0:1], v2, off
208fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_endpgm
209fef54d03SPetar Avramovic;
210fef54d03SPetar Avramovic; NEW_RBS-LABEL: vcc_to_scc:
211fef54d03SPetar Avramovic; NEW_RBS:       ; %bb.0:
212fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_cmp_eq_f32_e64 s0, s0, 0
2130ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_cmp_lg_u32 s0, 0
2140ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_cselect_b32 s0, 1, 0
2150ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_and_b32 s0, s0, 1
2160ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_cmp_lg_u32 s0, 0
2170ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_cselect_b32 s0, s1, s2
2180ee037b8SPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v2, s0
219fef54d03SPetar Avramovic; NEW_RBS-NEXT:    global_store_dword v[0:1], v2, off
220fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_endpgm
221fef54d03SPetar Avramovic  %vcc_to_scc = fcmp oeq float %a, 0.0
222fef54d03SPetar Avramovic  %select = select i1 %vcc_to_scc, i32 %b, i32 %c
223fef54d03SPetar Avramovic  store i32 %select, ptr addrspace(1) %ptr
224fef54d03SPetar Avramovic  ret void
225fef54d03SPetar Avramovic}
226fef54d03SPetar Avramovic
227fef54d03SPetar Avramovic; combiner in rb-legalize recognizes sgpr S1 to vcc copy
228fef54d03SPetar Avramovicdefine amdgpu_ps void @scc_to_vcc(i32 inreg %a, i32 %b, i32 %c, ptr addrspace(1) %ptr) {
229fef54d03SPetar Avramovic; OLD_RBS-LABEL: scc_to_vcc:
230fef54d03SPetar Avramovic; OLD_RBS:       ; %bb.0:
231fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cmp_eq_u32 s0, 0
232fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cselect_b32 s0, 1, 0
233fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_and_b32 s0, 1, s0
234fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
235fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
236fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_store_dword v[2:3], v0, off
237fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_endpgm
238fef54d03SPetar Avramovic;
239fef54d03SPetar Avramovic; NEW_RBS-LABEL: scc_to_vcc:
240fef54d03SPetar Avramovic; NEW_RBS:       ; %bb.0:
241fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cmp_eq_u32 s0, 0
2420ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_cselect_b32 vcc_lo, exec_lo, 0
243fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
244fef54d03SPetar Avramovic; NEW_RBS-NEXT:    global_store_dword v[2:3], v0, off
245fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_endpgm
246fef54d03SPetar Avramovic  %scc_to_vcc = icmp eq i32 %a, 0
247fef54d03SPetar Avramovic  %select = select i1 %scc_to_vcc, i32 %b, i32 %c
248fef54d03SPetar Avramovic  store i32 %select, ptr addrspace(1) %ptr
249fef54d03SPetar Avramovic  ret void
250fef54d03SPetar Avramovic}
251fef54d03SPetar Avramovic
252fef54d03SPetar Avramovic; this is only G_TRUNC that is not no-op in global-isel for AMDGPU
253fef54d03SPetar Avramovicdefine amdgpu_ps void @vgpr_to_vcc_trunc(i32 %a, i32 %b, i32 %c, ptr addrspace(1) %ptr) {
254fef54d03SPetar Avramovic; OLD_RBS-LABEL: vgpr_to_vcc_trunc:
255fef54d03SPetar Avramovic; OLD_RBS:       ; %bb.0:
256fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_and_b32_e32 v0, 1, v0
257fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
258fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
259fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_store_dword v[3:4], v0, off
260fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_endpgm
261fef54d03SPetar Avramovic;
262fef54d03SPetar Avramovic; NEW_RBS-LABEL: vgpr_to_vcc_trunc:
263fef54d03SPetar Avramovic; NEW_RBS:       ; %bb.0:
264fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_and_b32_e32 v0, 1, v0
265fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
266fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
267fef54d03SPetar Avramovic; NEW_RBS-NEXT:    global_store_dword v[3:4], v0, off
268fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_endpgm
269fef54d03SPetar Avramovic  %vcc = trunc i32 %a to i1
270fef54d03SPetar Avramovic  %select = select i1 %vcc, i32 %b, i32 %c
271fef54d03SPetar Avramovic  store i32 %select, ptr addrspace(1) %ptr
272fef54d03SPetar Avramovic  ret void
273fef54d03SPetar Avramovic}
274fef54d03SPetar Avramovic
275fef54d03SPetar Avramovic; i1 input to zext and sext is something that survived legalizer (not trunc)
276fef54d03SPetar Avramovic; lower to select
277fef54d03SPetar Avramovicdefine amdgpu_ps void @zext(i32 inreg %a, ptr addrspace(1) %ptr) {
278fef54d03SPetar Avramovic; OLD_RBS-LABEL: zext:
279fef54d03SPetar Avramovic; OLD_RBS:       ; %bb.0:
280fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cmp_eq_u32 s0, 10
281fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cselect_b32 s0, 1, 0
282fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_mov_b32_e32 v2, s0
283fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_store_dword v[0:1], v2, off
284fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_endpgm
285fef54d03SPetar Avramovic;
286fef54d03SPetar Avramovic; NEW_RBS-LABEL: zext:
287fef54d03SPetar Avramovic; NEW_RBS:       ; %bb.0:
288fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cmp_eq_u32 s0, 10
289fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cselect_b32 s0, 1, 0
290fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v2, s0
291fef54d03SPetar Avramovic; NEW_RBS-NEXT:    global_store_dword v[0:1], v2, off
292fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_endpgm
293fef54d03SPetar Avramovic  %bool = icmp eq i32 %a, 10
294fef54d03SPetar Avramovic  %zext = zext i1 %bool to i32
295fef54d03SPetar Avramovic  store i32 %zext, ptr addrspace(1) %ptr
296fef54d03SPetar Avramovic  ret void
297fef54d03SPetar Avramovic}
298fef54d03SPetar Avramovic
299fef54d03SPetar Avramovicdefine amdgpu_ps void @sext(i32 inreg %a, ptr addrspace(1) %ptr) {
300fef54d03SPetar Avramovic; OLD_RBS-LABEL: sext:
301fef54d03SPetar Avramovic; OLD_RBS:       ; %bb.0:
302fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cmp_eq_u32 s0, 10
303fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cselect_b32 s0, 1, 0
304fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_bfe_i32 s0, s0, 0x10000
305fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_mov_b32_e32 v2, s0
306fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_store_dword v[0:1], v2, off
307fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_endpgm
308fef54d03SPetar Avramovic;
309fef54d03SPetar Avramovic; NEW_RBS-LABEL: sext:
310fef54d03SPetar Avramovic; NEW_RBS:       ; %bb.0:
311fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cmp_eq_u32 s0, 10
3120ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_cselect_b32 s0, -1, 0
313fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v2, s0
314fef54d03SPetar Avramovic; NEW_RBS-NEXT:    global_store_dword v[0:1], v2, off
315fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_endpgm
316fef54d03SPetar Avramovic  %bool = icmp eq i32 %a, 10
317fef54d03SPetar Avramovic  %sext = sext i1 %bool to i32
318fef54d03SPetar Avramovic  store i32 %sext, ptr addrspace(1) %ptr
319fef54d03SPetar Avramovic  ret void
320fef54d03SPetar Avramovic}
321fef54d03SPetar Avramovic
322fef54d03SPetar Avramovic; divergent i1 bitwise, i1 vcc.
323fef54d03SPetar Avramovic; inst selected into s_and_b32 on wave32 or s_and_b64 on wave64.
324fef54d03SPetar Avramovicdefine amdgpu_ps void @and_i1_vcc(i32 %a, i32 %b, ptr addrspace(1) %ptr) {
325fef54d03SPetar Avramovic; OLD_RBS-LABEL: and_i1_vcc:
326fef54d03SPetar Avramovic; OLD_RBS:       ; %bb.0:
327fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_cmp_le_u32_e32 vcc_lo, 10, v0
328fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_cmp_le_u32_e64 s0, 20, v1
329fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
330fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
331fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_store_dword v[2:3], v0, off
332fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_endpgm
333fef54d03SPetar Avramovic;
334fef54d03SPetar Avramovic; NEW_RBS-LABEL: and_i1_vcc:
335fef54d03SPetar Avramovic; NEW_RBS:       ; %bb.0:
336fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_cmp_le_u32_e32 vcc_lo, 10, v0
337fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_cmp_le_u32_e64 s0, 20, v1
338fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
339fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
340fef54d03SPetar Avramovic; NEW_RBS-NEXT:    global_store_dword v[2:3], v0, off
341fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_endpgm
342fef54d03SPetar Avramovic  %cmp_a = icmp uge i32 %a, 10
343fef54d03SPetar Avramovic  %cmp_b = icmp uge i32 %b, 20
344fef54d03SPetar Avramovic  %cc = and i1 %cmp_a, %cmp_b
345fef54d03SPetar Avramovic  %res = select i1 %cc, i32 %a, i32 %b
346fef54d03SPetar Avramovic  store i32 %res, ptr addrspace(1) %ptr
347fef54d03SPetar Avramovic  ret void
348fef54d03SPetar Avramovic}
349fef54d03SPetar Avramovic
350fef54d03SPetar Avramovic; uniform i1 bitwise, i32 sgpr. inst selected into s_and_b32.
351fef54d03SPetar Avramovicdefine amdgpu_ps void @and_i1_scc(i32 inreg %a, i32 inreg %b, ptr addrspace(1) %ptr) {
352fef54d03SPetar Avramovic; OLD_RBS-LABEL: and_i1_scc:
353fef54d03SPetar Avramovic; OLD_RBS:       ; %bb.0:
354fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cmp_ge_u32 s0, 10
355fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cselect_b32 s2, 1, 0
356fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cmp_ge_u32 s1, 20
357fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cselect_b32 s3, 1, 0
358fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_and_b32 s2, s2, s3
359fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_and_b32 s2, s2, 1
360fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cmp_lg_u32 s2, 0
361fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cselect_b32 s0, s0, s1
362fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_mov_b32_e32 v2, s0
363fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_store_dword v[0:1], v2, off
364fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_endpgm
365fef54d03SPetar Avramovic;
366fef54d03SPetar Avramovic; NEW_RBS-LABEL: and_i1_scc:
367fef54d03SPetar Avramovic; NEW_RBS:       ; %bb.0:
368fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cmp_ge_u32 s0, 10
369fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cselect_b32 s2, 1, 0
370fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cmp_ge_u32 s1, 20
371fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cselect_b32 s3, 1, 0
372fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_and_b32 s2, s2, s3
373fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cmp_lg_u32 s2, 0
374fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cselect_b32 s0, s0, s1
375fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v2, s0
376fef54d03SPetar Avramovic; NEW_RBS-NEXT:    global_store_dword v[0:1], v2, off
377fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_endpgm
378fef54d03SPetar Avramovic  %cmp_a = icmp uge i32 %a, 10
379fef54d03SPetar Avramovic  %cmp_b = icmp uge i32 %b, 20
380fef54d03SPetar Avramovic  %cc = and i1 %cmp_a, %cmp_b
381fef54d03SPetar Avramovic  %res = select i1 %cc, i32 %a, i32 %b
382fef54d03SPetar Avramovic  store i32 %res, ptr addrspace(1) %ptr
383fef54d03SPetar Avramovic  ret void
384fef54d03SPetar Avramovic}
385fef54d03SPetar Avramovic
386fef54d03SPetar Avramovic; old RBS selects sgpr phi because it had sgpr inputs.
387fef54d03SPetar Avramovicdefine amdgpu_ps void @divergent_phi_with_uniform_inputs(i32 %a, ptr addrspace(1) %out) {
388fef54d03SPetar Avramovic; OLD_RBS-LABEL: divergent_phi_with_uniform_inputs:
389fef54d03SPetar Avramovic; OLD_RBS:       ; %bb.0: ; %A
390fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_mov_b32 s0, 0
391fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
392fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_and_saveexec_b32 s1, vcc_lo
393fef54d03SPetar Avramovic; OLD_RBS-NEXT:  ; %bb.1: ; %B
394fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_mov_b32 s0, 1
395fef54d03SPetar Avramovic; OLD_RBS-NEXT:  ; %bb.2: ; %exit
396fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_or_b32 exec_lo, exec_lo, s1
397fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_mov_b32_e32 v0, s0
398fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_store_dword v[1:2], v0, off
399fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_endpgm
400fef54d03SPetar Avramovic;
401fef54d03SPetar Avramovic; NEW_RBS-LABEL: divergent_phi_with_uniform_inputs:
402fef54d03SPetar Avramovic; NEW_RBS:       ; %bb.0: ; %A
403fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_mov_b32 s0, 0
404fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
405fef54d03SPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v0, s0
4060ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_and_saveexec_b32 s0, vcc_lo
4070ee037b8SPetar Avramovic; NEW_RBS-NEXT:  ; %bb.1: ; %B
4080ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_mov_b32 s1, 1
4090ee037b8SPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v0, s1
4100ee037b8SPetar Avramovic; NEW_RBS-NEXT:  ; %bb.2: ; %exit
4110ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_or_b32 exec_lo, exec_lo, s0
412fef54d03SPetar Avramovic; NEW_RBS-NEXT:    global_store_dword v[1:2], v0, off
413fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_endpgm
414fef54d03SPetar AvramovicA:
415fef54d03SPetar Avramovic  %cmp = icmp eq i32 %a, 0
416fef54d03SPetar Avramovic  br i1 %cmp, label %B, label %exit
417fef54d03SPetar Avramovic
418fef54d03SPetar AvramovicB:
419fef54d03SPetar Avramovic  br label %exit
420fef54d03SPetar Avramovic
421fef54d03SPetar Avramovicexit:
422fef54d03SPetar Avramovic  %phi = phi i32 [ 0, %A ], [ 1, %B ]
423fef54d03SPetar Avramovic  store i32 %phi, ptr addrspace(1) %out
424fef54d03SPetar Avramovic  ret void
425fef54d03SPetar Avramovic}
426fef54d03SPetar Avramovic
427fef54d03SPetar Avramovic; old RBS assigned vgpr to uniform phi (because one input had undetermined bank)
428fef54d03SPetar Avramovic; and it propagated to mul, which was not wrong.
429fef54d03SPetar Avramovic; new RBS assigns vgpr to destination of mul even though both inputs are sgpr.
430fef54d03SPetar Avramovic; TODO: implement temporal divergence lowering
431fef54d03SPetar Avramovicdefine amdgpu_ps void @divergent_because_of_temporal_divergent_use(float %val, ptr addrspace(1) %addr) {
432fef54d03SPetar Avramovic; OLD_RBS-LABEL: divergent_because_of_temporal_divergent_use:
433fef54d03SPetar Avramovic; OLD_RBS:       ; %bb.0: ; %entry
434fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_mov_b32 s0, -1
435fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_mov_b32_e32 v3, s0
436fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_mov_b32 s0, 0
437fef54d03SPetar Avramovic; OLD_RBS-NEXT:  .LBB15_1: ; %loop
438fef54d03SPetar Avramovic; OLD_RBS-NEXT:    ; =>This Inner Loop Header: Depth=1
439fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_add_nc_u32_e32 v3, 1, v3
440fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_cvt_f32_u32_e32 v4, v3
441fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_cmp_gt_f32_e32 vcc_lo, v4, v0
442fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_or_b32 s0, vcc_lo, s0
443fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_andn2_b32 exec_lo, exec_lo, s0
444fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cbranch_execnz .LBB15_1
445fef54d03SPetar Avramovic; OLD_RBS-NEXT:  ; %bb.2: ; %exit
446fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_or_b32 exec_lo, exec_lo, s0
447fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_mul_lo_u32 v0, v3, 10
448fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_store_dword v[1:2], v0, off
449fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_endpgm
450fef54d03SPetar Avramovic;
451fef54d03SPetar Avramovic; NEW_RBS-LABEL: divergent_because_of_temporal_divergent_use:
452fef54d03SPetar Avramovic; NEW_RBS:       ; %bb.0: ; %entry
453fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_mov_b32 s0, -1
4540ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_mov_b32 s1, 0
455fef54d03SPetar Avramovic; NEW_RBS-NEXT:  .LBB15_1: ; %loop
456fef54d03SPetar Avramovic; NEW_RBS-NEXT:    ; =>This Inner Loop Header: Depth=1
4570ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_add_i32 s0, s0, 1
4580ee037b8SPetar Avramovic; NEW_RBS-NEXT:    v_cvt_f32_u32_e32 v3, s0
4590ee037b8SPetar Avramovic; NEW_RBS-NEXT:    v_cmp_gt_f32_e32 vcc_lo, v3, v0
4600ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_or_b32 s1, vcc_lo, s1
4610ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_andn2_b32 exec_lo, exec_lo, s1
462fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cbranch_execnz .LBB15_1
463fef54d03SPetar Avramovic; NEW_RBS-NEXT:  ; %bb.2: ; %exit
4640ee037b8SPetar Avramovic; NEW_RBS-NEXT:    s_or_b32 exec_lo, exec_lo, s1
4650ee037b8SPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v0, s0
4660ee037b8SPetar Avramovic; NEW_RBS-NEXT:    v_mul_lo_u32 v0, v0, 10
467fef54d03SPetar Avramovic; NEW_RBS-NEXT:    global_store_dword v[1:2], v0, off
468fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_endpgm
469fef54d03SPetar Avramovicentry:
470fef54d03SPetar Avramovic  br label %loop
471fef54d03SPetar Avramovic
472fef54d03SPetar Avramovicloop:
473fef54d03SPetar Avramovic  %counter = phi i32 [ 0, %entry ], [ %counter.plus.1, %loop ]
474fef54d03SPetar Avramovic  %f.counter = uitofp i32 %counter to float
475fef54d03SPetar Avramovic  %cond = fcmp ogt float %f.counter, %val
476fef54d03SPetar Avramovic  %counter.plus.1 = add i32 %counter, 1
477fef54d03SPetar Avramovic  br i1 %cond, label %exit, label %loop
478fef54d03SPetar Avramovic
479fef54d03SPetar Avramovicexit:
480fef54d03SPetar Avramovic  %ceilx10 = mul i32 %counter, 10
481fef54d03SPetar Avramovic  store i32 %ceilx10, ptr addrspace(1) %addr
482fef54d03SPetar Avramovic  ret void
483fef54d03SPetar Avramovic}
484fef54d03SPetar Avramovic
485fef54d03SPetar Avramovic; Variables that hande counter can be allocated to sgprs.
486fef54d03SPetar Avramovicdefine amdgpu_cs void @loop_with_2breaks(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %b) {
487fef54d03SPetar Avramovic; OLD_RBS-LABEL: loop_with_2breaks:
488fef54d03SPetar Avramovic; OLD_RBS:       ; %bb.0: ; %entry
489fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_mov_b32 s0, 0
490fef54d03SPetar Avramovic; OLD_RBS-NEXT:    ; implicit-def: $sgpr1
491fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_mov_b32_e32 v6, s0
492fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_branch .LBB16_3
493fef54d03SPetar Avramovic; OLD_RBS-NEXT:  .LBB16_1: ; %Flow3
494fef54d03SPetar Avramovic; OLD_RBS-NEXT:    ; in Loop: Header=BB16_3 Depth=1
495fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_waitcnt_depctr 0xffe3
496fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_or_b32 exec_lo, exec_lo, s3
497fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_andn2_b32 s1, s1, exec_lo
498fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_and_b32 s3, exec_lo, s4
499fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_or_b32 s1, s1, s3
500fef54d03SPetar Avramovic; OLD_RBS-NEXT:  .LBB16_2: ; %Flow
501fef54d03SPetar Avramovic; OLD_RBS-NEXT:    ; in Loop: Header=BB16_3 Depth=1
502fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_or_b32 exec_lo, exec_lo, s2
503fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_and_b32 s2, exec_lo, s1
504fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_or_b32 s0, s2, s0
505fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_andn2_b32 exec_lo, exec_lo, s0
506fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cbranch_execz .LBB16_6
507fef54d03SPetar Avramovic; OLD_RBS-NEXT:  .LBB16_3: ; %A
508fef54d03SPetar Avramovic; OLD_RBS-NEXT:    ; =>This Inner Loop Header: Depth=1
509fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
510fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_andn2_b32 s1, s1, exec_lo
511fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_and_b32 s2, exec_lo, -1
512fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_or_b32 s1, s1, s2
513fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_lshlrev_b64 v[7:8], 2, v[6:7]
514fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_add_co_u32 v9, vcc_lo, v2, v7
515fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo
516fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_load_dword v9, v[9:10], off
517fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_waitcnt vmcnt(0)
518fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v9
519fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_and_saveexec_b32 s2, vcc_lo
520fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cbranch_execz .LBB16_2
521fef54d03SPetar Avramovic; OLD_RBS-NEXT:  ; %bb.4: ; %B
522fef54d03SPetar Avramovic; OLD_RBS-NEXT:    ; in Loop: Header=BB16_3 Depth=1
523fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_add_co_u32 v9, vcc_lo, v4, v7
524fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_add_co_ci_u32_e32 v10, vcc_lo, v5, v8, vcc_lo
525fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_mov_b32 s4, -1
526fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_load_dword v9, v[9:10], off
527fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_waitcnt vmcnt(0)
528fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v9
529fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_and_saveexec_b32 s3, vcc_lo
530fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_cbranch_execz .LBB16_1
531fef54d03SPetar Avramovic; OLD_RBS-NEXT:  ; %bb.5: ; %loop.body
532fef54d03SPetar Avramovic; OLD_RBS-NEXT:    ; in Loop: Header=BB16_3 Depth=1
533fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_add_co_u32 v7, vcc_lo, v0, v7
534fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo
535fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_add_nc_u32_e32 v10, 1, v6
536fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 0x64, v6
537fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_andn2_b32 s4, -1, exec_lo
538fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_load_dword v9, v[7:8], off
539fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_mov_b32_e32 v6, v10
540fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_and_b32 s5, exec_lo, vcc_lo
541fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_or_b32 s4, s4, s5
542fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_waitcnt vmcnt(0)
543fef54d03SPetar Avramovic; OLD_RBS-NEXT:    v_add_nc_u32_e32 v9, 1, v9
544fef54d03SPetar Avramovic; OLD_RBS-NEXT:    global_store_dword v[7:8], v9, off
545fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_branch .LBB16_1
546fef54d03SPetar Avramovic; OLD_RBS-NEXT:  .LBB16_6: ; %exit
547fef54d03SPetar Avramovic; OLD_RBS-NEXT:    s_endpgm
548fef54d03SPetar Avramovic;
549fef54d03SPetar Avramovic; NEW_RBS-LABEL: loop_with_2breaks:
550fef54d03SPetar Avramovic; NEW_RBS:       ; %bb.0: ; %entry
551*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_mov_b32 s4, 0
552fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_mov_b32 s0, 0
553*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    ; implicit-def: $sgpr5
554fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_branch .LBB16_3
555fef54d03SPetar Avramovic; NEW_RBS-NEXT:  .LBB16_1: ; %Flow3
556fef54d03SPetar Avramovic; NEW_RBS-NEXT:    ; in Loop: Header=BB16_3 Depth=1
557fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_waitcnt_depctr 0xffe3
558*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_or_b32 exec_lo, exec_lo, s7
559*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_andn2_b32 s2, s5, exec_lo
560*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_and_b32 s3, exec_lo, s6
561*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_or_b32 s5, s2, s3
562fef54d03SPetar Avramovic; NEW_RBS-NEXT:  .LBB16_2: ; %Flow
563fef54d03SPetar Avramovic; NEW_RBS-NEXT:    ; in Loop: Header=BB16_3 Depth=1
564*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_or_b32 exec_lo, exec_lo, s1
565*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_and_b32 s1, exec_lo, s5
566*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_or_b32 s4, s1, s4
567*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_andn2_b32 exec_lo, exec_lo, s4
568fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cbranch_execz .LBB16_6
569fef54d03SPetar Avramovic; NEW_RBS-NEXT:  .LBB16_3: ; %A
570fef54d03SPetar Avramovic; NEW_RBS-NEXT:    ; =>This Inner Loop Header: Depth=1
571*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_ashr_i32 s1, s0, 31
572*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_lshl_b64 s[2:3], s[0:1], 2
573*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_andn2_b32 s1, s5, exec_lo
574*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v7, s3
575*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v6, s2
576*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_and_b32 s5, exec_lo, exec_lo
577*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_or_b32 s5, s1, s5
578*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    v_add_co_u32 v6, vcc_lo, v2, v6
579*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    v_add_co_ci_u32_e32 v7, vcc_lo, v3, v7, vcc_lo
580*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    global_load_dword v6, v[6:7], off
581fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_waitcnt vmcnt(0)
582*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v6
583*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_and_saveexec_b32 s1, vcc_lo
584fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cbranch_execz .LBB16_2
585fef54d03SPetar Avramovic; NEW_RBS-NEXT:  ; %bb.4: ; %B
586fef54d03SPetar Avramovic; NEW_RBS-NEXT:    ; in Loop: Header=BB16_3 Depth=1
587*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v7, s3
588*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v6, s2
589*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_mov_b32 s6, exec_lo
590*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    v_add_co_u32 v6, vcc_lo, v4, v6
591*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    v_add_co_ci_u32_e32 v7, vcc_lo, v5, v7, vcc_lo
592*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    global_load_dword v6, v[6:7], off
593fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_waitcnt vmcnt(0)
594*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v6
595*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_and_saveexec_b32 s7, vcc_lo
596fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_cbranch_execz .LBB16_1
597fef54d03SPetar Avramovic; NEW_RBS-NEXT:  ; %bb.5: ; %loop.body
598fef54d03SPetar Avramovic; NEW_RBS-NEXT:    ; in Loop: Header=BB16_3 Depth=1
599*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v7, s3
600*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    v_mov_b32_e32 v6, s2
601*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_add_i32 s2, s0, 1
602*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_cmpk_lt_u32 s0, 0x64
603*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_cselect_b32 s0, exec_lo, 0
604*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    v_add_co_u32 v6, vcc_lo, v0, v6
605*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    v_add_co_ci_u32_e32 v7, vcc_lo, v1, v7, vcc_lo
606*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_andn2_b32 s3, s6, exec_lo
607*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_and_b32 s0, exec_lo, s0
608*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_or_b32 s6, s3, s0
609*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    global_load_dword v8, v[6:7], off
610*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    s_mov_b32 s0, s2
611fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_waitcnt vmcnt(0)
612*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    v_add_nc_u32_e32 v8, 1, v8
613*b60c118fSPetar Avramovic; NEW_RBS-NEXT:    global_store_dword v[6:7], v8, off
614fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_branch .LBB16_1
615fef54d03SPetar Avramovic; NEW_RBS-NEXT:  .LBB16_6: ; %exit
616fef54d03SPetar Avramovic; NEW_RBS-NEXT:    s_endpgm
617fef54d03SPetar Avramovicentry:
618fef54d03SPetar Avramovic  br label %A
619fef54d03SPetar Avramovic
620fef54d03SPetar AvramovicA:
621fef54d03SPetar Avramovic  %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ]
622fef54d03SPetar Avramovic  %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter
623fef54d03SPetar Avramovic  %a.val = load i32, ptr addrspace(1) %a.plus.counter
624fef54d03SPetar Avramovic  %a.cond = icmp eq i32 %a.val, 0
625fef54d03SPetar Avramovic  br i1 %a.cond, label %exit, label %B
626fef54d03SPetar Avramovic
627fef54d03SPetar AvramovicB:
628fef54d03SPetar Avramovic  %b.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %b, i32 %counter
629fef54d03SPetar Avramovic  %b.val = load i32, ptr addrspace(1) %b.plus.counter
630fef54d03SPetar Avramovic  %b.cond = icmp eq i32 %b.val, 0
631fef54d03SPetar Avramovic  br i1 %b.cond, label %exit, label %loop.body
632fef54d03SPetar Avramovic
633fef54d03SPetar Avramovicloop.body:
634fef54d03SPetar Avramovic  %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter
635fef54d03SPetar Avramovic  %x.val = load i32, ptr addrspace(1) %x.plus.counter
636fef54d03SPetar Avramovic  %x.val.plus.1 = add i32 %x.val, 1
637fef54d03SPetar Avramovic  store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter
638fef54d03SPetar Avramovic  %counter.plus.1 = add i32 %counter, 1
639fef54d03SPetar Avramovic  %x.cond = icmp ult i32 %counter, 100
640fef54d03SPetar Avramovic  br i1 %x.cond, label %exit, label %A
641fef54d03SPetar Avramovic
642fef54d03SPetar Avramovicexit:
643fef54d03SPetar Avramovic  ret void
644fef54d03SPetar Avramovic}
645fef54d03SPetar Avramovic
646fef54d03SPetar Avramovicdeclare i16 @llvm.abs.i16(i16, i1)
647fef54d03SPetar Avramovicdeclare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32 immarg)
648