1fef54d03SPetar Avramovic; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2fef54d03SPetar Avramovic; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=OLD_RBS %s 30ee037b8SPetar Avramovic; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=NEW_RBS %s 4fef54d03SPetar Avramovic 5fef54d03SPetar Avramovic; if instruction is uniform and there is available instruction, select SALU instruction 6fef54d03SPetar Avramovicdefine amdgpu_ps void @uniform_in_vgpr(float inreg %a, i32 inreg %b, ptr addrspace(1) %ptr) { 7fef54d03SPetar Avramovic; OLD_RBS-LABEL: uniform_in_vgpr: 8fef54d03SPetar Avramovic; OLD_RBS: ; %bb.0: 9fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_cvt_u32_f32_e32 v2, s0 10fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_add_nc_u32_e32 v2, s1, v2 11fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off 12fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_endpgm 13fef54d03SPetar Avramovic; 14fef54d03SPetar Avramovic; NEW_RBS-LABEL: uniform_in_vgpr: 15fef54d03SPetar Avramovic; NEW_RBS: ; %bb.0: 16fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_cvt_u32_f32_e32 v2, s0 170ee037b8SPetar Avramovic; NEW_RBS-NEXT: v_readfirstlane_b32 s0, v2 180ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_add_i32 s0, s0, s1 190ee037b8SPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 20fef54d03SPetar Avramovic; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off 21fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_endpgm 22fef54d03SPetar Avramovic %a.i32 = fptoui float %a to i32 23fef54d03SPetar Avramovic %res = add i32 %a.i32, %b 24fef54d03SPetar Avramovic store i32 %res, ptr addrspace(1) %ptr 25fef54d03SPetar Avramovic ret void 26fef54d03SPetar Avramovic} 27fef54d03SPetar Avramovic 28fef54d03SPetar Avramovic; copy sgpr to vgpr + readfirstlane vgpr to sgpr combine from rb-legalize 29fef54d03SPetar Avramovicdefine amdgpu_ps void @back_to_back_uniform_in_vgpr(float inreg %a, float inreg %b, i32 inreg %c, ptr addrspace(1) %ptr) { 30fef54d03SPetar Avramovic; OLD_RBS-LABEL: back_to_back_uniform_in_vgpr: 31fef54d03SPetar Avramovic; OLD_RBS: ; %bb.0: 32fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_add_f32_e64 v2, s0, s1 33fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_cvt_u32_f32_e32 v2, v2 34fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_add_nc_u32_e32 v2, s2, v2 35fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off 36fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_endpgm 37fef54d03SPetar Avramovic; 38fef54d03SPetar Avramovic; NEW_RBS-LABEL: back_to_back_uniform_in_vgpr: 39fef54d03SPetar Avramovic; NEW_RBS: ; %bb.0: 40fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_add_f32_e64 v2, s0, s1 41fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_cvt_u32_f32_e32 v2, v2 420ee037b8SPetar Avramovic; NEW_RBS-NEXT: v_readfirstlane_b32 s0, v2 430ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_add_i32 s0, s0, s2 440ee037b8SPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 45fef54d03SPetar Avramovic; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off 46fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_endpgm 47fef54d03SPetar Avramovic %add = fadd float %a, %b 48fef54d03SPetar Avramovic %add.i32 = fptoui float %add to i32 49fef54d03SPetar Avramovic %res = add i32 %add.i32, %c 50fef54d03SPetar Avramovic store i32 %res, ptr addrspace(1) %ptr 51fef54d03SPetar Avramovic ret void 52fef54d03SPetar Avramovic} 53fef54d03SPetar Avramovic 54fef54d03SPetar Avramovic; fast rules for vector instructions 55fef54d03SPetar Avramovicdefine amdgpu_cs void @buffer_load_uniform(<4 x i32> inreg %rsrc, i32 inreg %voffset, ptr addrspace(1) %ptr) { 56fef54d03SPetar Avramovic; OLD_RBS-LABEL: buffer_load_uniform: 57fef54d03SPetar Avramovic; OLD_RBS: ; %bb.0: ; %.entry 58fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_mov_b32_e32 v2, s4 59fef54d03SPetar Avramovic; OLD_RBS-NEXT: buffer_load_dwordx4 v[2:5], v2, s[0:3], 0 offen 60fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_waitcnt vmcnt(0) 61fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_add_nc_u32_e32 v2, 1, v3 62fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off 63fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_endpgm 64fef54d03SPetar Avramovic; 65fef54d03SPetar Avramovic; NEW_RBS-LABEL: buffer_load_uniform: 66fef54d03SPetar Avramovic; NEW_RBS: ; %bb.0: ; %.entry 67fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v2, s4 68fef54d03SPetar Avramovic; NEW_RBS-NEXT: buffer_load_dwordx4 v[2:5], v2, s[0:3], 0 offen 69fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_waitcnt vmcnt(0) 700ee037b8SPetar Avramovic; NEW_RBS-NEXT: v_readfirstlane_b32 s0, v3 710ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_add_i32 s0, s0, 1 720ee037b8SPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 73fef54d03SPetar Avramovic; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off 74fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_endpgm 75fef54d03SPetar Avramovic.entry: 76fef54d03SPetar Avramovic %vec = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 0, i32 0) 77fef54d03SPetar Avramovic %el1 = extractelement <4 x i32> %vec, i64 1 78fef54d03SPetar Avramovic %res = add i32 %el1, 1 79fef54d03SPetar Avramovic store i32 %res, ptr addrspace(1) %ptr 80fef54d03SPetar Avramovic ret void 81fef54d03SPetar Avramovic} 82fef54d03SPetar Avramovic 83fef54d03SPetar Avramovicdefine amdgpu_cs void @buffer_load_divergent(<4 x i32> inreg %rsrc, i32 %voffset, ptr addrspace(1) %ptr) { 84fef54d03SPetar Avramovic; OLD_RBS-LABEL: buffer_load_divergent: 85fef54d03SPetar Avramovic; OLD_RBS: ; %bb.0: ; %.entry 86fef54d03SPetar Avramovic; OLD_RBS-NEXT: buffer_load_dwordx4 v[3:6], v0, s[0:3], 0 offen 87fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_waitcnt vmcnt(0) 88fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_add_nc_u32_e32 v0, 1, v4 89fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_store_dword v[1:2], v0, off 90fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_endpgm 91fef54d03SPetar Avramovic; 92fef54d03SPetar Avramovic; NEW_RBS-LABEL: buffer_load_divergent: 93fef54d03SPetar Avramovic; NEW_RBS: ; %bb.0: ; %.entry 94fef54d03SPetar Avramovic; NEW_RBS-NEXT: buffer_load_dwordx4 v[3:6], v0, s[0:3], 0 offen 95fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_waitcnt vmcnt(0) 96fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_add_nc_u32_e32 v0, 1, v4 97fef54d03SPetar Avramovic; NEW_RBS-NEXT: global_store_dword v[1:2], v0, off 98fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_endpgm 99fef54d03SPetar Avramovic.entry: 100fef54d03SPetar Avramovic %vec = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 0, i32 0) 101fef54d03SPetar Avramovic %el1 = extractelement <4 x i32> %vec, i64 1 102fef54d03SPetar Avramovic %res = add i32 %el1, 1 103fef54d03SPetar Avramovic store i32 %res, ptr addrspace(1) %ptr 104fef54d03SPetar Avramovic ret void 105fef54d03SPetar Avramovic} 106fef54d03SPetar Avramovic 107fef54d03SPetar Avramovic;lowering in rb-legalize (sgpr S64 is legal, vgpr has to be split to S32) 108fef54d03SPetar Avramovicdefine amdgpu_ps void @vgpr_and_i64(i64 %a, i64 %b, ptr addrspace(1) %ptr) { 109fef54d03SPetar Avramovic; OLD_RBS-LABEL: vgpr_and_i64: 110fef54d03SPetar Avramovic; OLD_RBS: ; %bb.0: 111fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_and_b32_e32 v0, v0, v2 112fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_and_b32_e32 v1, v1, v3 113fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_store_dwordx2 v[4:5], v[0:1], off 114fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_endpgm 115fef54d03SPetar Avramovic; 116fef54d03SPetar Avramovic; NEW_RBS-LABEL: vgpr_and_i64: 117fef54d03SPetar Avramovic; NEW_RBS: ; %bb.0: 118fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_and_b32_e32 v0, v0, v2 119fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_and_b32_e32 v1, v1, v3 120fef54d03SPetar Avramovic; NEW_RBS-NEXT: global_store_dwordx2 v[4:5], v[0:1], off 121fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_endpgm 122fef54d03SPetar Avramovic %res = and i64 %a, %b 123fef54d03SPetar Avramovic store i64 %res, ptr addrspace(1) %ptr 124fef54d03SPetar Avramovic ret void 125fef54d03SPetar Avramovic} 126fef54d03SPetar Avramovic 127fef54d03SPetar Avramovic; It is up to user instruction to deal with potential truncated bits in reg. 128fef54d03SPetar Avramovic; Here G_ABS needs to sign extend S16 in reg to S32 and then do S32 G_ABS. 129fef54d03SPetar Avramovicdefine amdgpu_ps void @abs_sgpr_i16(i16 inreg %arg, ptr addrspace(1) %ptr) { 130fef54d03SPetar Avramovic; OLD_RBS-LABEL: abs_sgpr_i16: 131fef54d03SPetar Avramovic; OLD_RBS: ; %bb.0: 132fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_sext_i32_i16 s0, s0 133fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_abs_i32 s0, s0 134fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0 135fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_store_short v[0:1], v2, off 136fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_endpgm 137fef54d03SPetar Avramovic; 138fef54d03SPetar Avramovic; NEW_RBS-LABEL: abs_sgpr_i16: 139fef54d03SPetar Avramovic; NEW_RBS: ; %bb.0: 140fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_sext_i32_i16 s0, s0 141fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_abs_i32 s0, s0 142fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 143fef54d03SPetar Avramovic; NEW_RBS-NEXT: global_store_short v[0:1], v2, off 144fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_endpgm 145fef54d03SPetar Avramovic %res = call i16 @llvm.abs.i16(i16 %arg, i1 false) 146fef54d03SPetar Avramovic store i16 %res, ptr addrspace(1) %ptr 147fef54d03SPetar Avramovic ret void 148fef54d03SPetar Avramovic} 149fef54d03SPetar Avramovic 150fef54d03SPetar Avramovicdefine amdgpu_ps void @uniform_i1_phi(ptr addrspace(1) %out, i32 inreg %tid, i32 inreg %cond) { 151fef54d03SPetar Avramovic; OLD_RBS-LABEL: uniform_i1_phi: 152fef54d03SPetar Avramovic; OLD_RBS: ; %bb.0: ; %A 153fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cmp_ge_u32 s0, 6 154fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cselect_b32 s2, 1, 0 155fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cmp_lg_u32 s1, 0 156fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cbranch_scc1 .LBB6_2 157fef54d03SPetar Avramovic; OLD_RBS-NEXT: ; %bb.1: ; %B 158fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cmp_lt_u32 s0, 1 159fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cselect_b32 s2, 1, 0 160fef54d03SPetar Avramovic; OLD_RBS-NEXT: .LBB6_2: ; %exit 161fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_bfe_i32 s0, s2, 0x10000 162fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_add_i32 s0, s0, 2 163fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0 164fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off 165fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_endpgm 166fef54d03SPetar Avramovic; 167fef54d03SPetar Avramovic; NEW_RBS-LABEL: uniform_i1_phi: 168fef54d03SPetar Avramovic; NEW_RBS: ; %bb.0: ; %A 169fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cmp_ge_u32 s0, 6 170fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cselect_b32 s2, 1, 0 171fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cmp_lg_u32 s1, 0 172fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cbranch_scc1 .LBB6_2 173fef54d03SPetar Avramovic; NEW_RBS-NEXT: ; %bb.1: ; %B 174fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cmp_lt_u32 s0, 1 175fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cselect_b32 s2, 1, 0 176fef54d03SPetar Avramovic; NEW_RBS-NEXT: .LBB6_2: ; %exit 1770ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_cmp_lg_u32 s2, 0 1780ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_cselect_b32 s0, -1, 0 179fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_add_i32 s0, s0, 2 180fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 181fef54d03SPetar Avramovic; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off 182fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_endpgm 183fef54d03SPetar AvramovicA: 184fef54d03SPetar Avramovic %val_A = icmp uge i32 %tid, 6 185fef54d03SPetar Avramovic %cmp = icmp eq i32 %cond, 0 186fef54d03SPetar Avramovic br i1 %cmp, label %B, label %exit 187fef54d03SPetar Avramovic 188fef54d03SPetar AvramovicB: 189fef54d03SPetar Avramovic %val_B = icmp ult i32 %tid, 1 190fef54d03SPetar Avramovic br label %exit 191fef54d03SPetar Avramovic 192fef54d03SPetar Avramovicexit: 193fef54d03SPetar Avramovic %phi = phi i1 [ %val_A, %A ], [ %val_B, %B ] 194fef54d03SPetar Avramovic %sel = select i1 %phi, i32 1, i32 2 195fef54d03SPetar Avramovic store i32 %sel, ptr addrspace(1) %out 196fef54d03SPetar Avramovic ret void 197fef54d03SPetar Avramovic} 198fef54d03SPetar Avramovic 199fef54d03SPetar Avramovic; this is kind of i1 readfirstlane 200fef54d03SPetar Avramovic; uniform i1 result on instruction that is only available on VALU 201fef54d03SPetar Avramovicdefine amdgpu_ps void @vcc_to_scc(float inreg %a, i32 inreg %b, i32 inreg %c, ptr addrspace(1) %ptr) { 202fef54d03SPetar Avramovic; OLD_RBS-LABEL: vcc_to_scc: 203fef54d03SPetar Avramovic; OLD_RBS: ; %bb.0: 204fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_mov_b32_e32 v2, s2 205fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_cmp_eq_f32_e64 s0, s0, 0 206fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_cndmask_b32_e64 v2, v2, s1, s0 207fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off 208fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_endpgm 209fef54d03SPetar Avramovic; 210fef54d03SPetar Avramovic; NEW_RBS-LABEL: vcc_to_scc: 211fef54d03SPetar Avramovic; NEW_RBS: ; %bb.0: 212fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_cmp_eq_f32_e64 s0, s0, 0 2130ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_cmp_lg_u32 s0, 0 2140ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_cselect_b32 s0, 1, 0 2150ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_and_b32 s0, s0, 1 2160ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_cmp_lg_u32 s0, 0 2170ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_cselect_b32 s0, s1, s2 2180ee037b8SPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 219fef54d03SPetar Avramovic; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off 220fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_endpgm 221fef54d03SPetar Avramovic %vcc_to_scc = fcmp oeq float %a, 0.0 222fef54d03SPetar Avramovic %select = select i1 %vcc_to_scc, i32 %b, i32 %c 223fef54d03SPetar Avramovic store i32 %select, ptr addrspace(1) %ptr 224fef54d03SPetar Avramovic ret void 225fef54d03SPetar Avramovic} 226fef54d03SPetar Avramovic 227fef54d03SPetar Avramovic; combiner in rb-legalize recognizes sgpr S1 to vcc copy 228fef54d03SPetar Avramovicdefine amdgpu_ps void @scc_to_vcc(i32 inreg %a, i32 %b, i32 %c, ptr addrspace(1) %ptr) { 229fef54d03SPetar Avramovic; OLD_RBS-LABEL: scc_to_vcc: 230fef54d03SPetar Avramovic; OLD_RBS: ; %bb.0: 231fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cmp_eq_u32 s0, 0 232fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cselect_b32 s0, 1, 0 233fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_and_b32 s0, 1, s0 234fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 235fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 236fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_store_dword v[2:3], v0, off 237fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_endpgm 238fef54d03SPetar Avramovic; 239fef54d03SPetar Avramovic; NEW_RBS-LABEL: scc_to_vcc: 240fef54d03SPetar Avramovic; NEW_RBS: ; %bb.0: 241fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cmp_eq_u32 s0, 0 2420ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_cselect_b32 vcc_lo, exec_lo, 0 243fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 244fef54d03SPetar Avramovic; NEW_RBS-NEXT: global_store_dword v[2:3], v0, off 245fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_endpgm 246fef54d03SPetar Avramovic %scc_to_vcc = icmp eq i32 %a, 0 247fef54d03SPetar Avramovic %select = select i1 %scc_to_vcc, i32 %b, i32 %c 248fef54d03SPetar Avramovic store i32 %select, ptr addrspace(1) %ptr 249fef54d03SPetar Avramovic ret void 250fef54d03SPetar Avramovic} 251fef54d03SPetar Avramovic 252fef54d03SPetar Avramovic; this is only G_TRUNC that is not no-op in global-isel for AMDGPU 253fef54d03SPetar Avramovicdefine amdgpu_ps void @vgpr_to_vcc_trunc(i32 %a, i32 %b, i32 %c, ptr addrspace(1) %ptr) { 254fef54d03SPetar Avramovic; OLD_RBS-LABEL: vgpr_to_vcc_trunc: 255fef54d03SPetar Avramovic; OLD_RBS: ; %bb.0: 256fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_and_b32_e32 v0, 1, v0 257fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 258fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 259fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_store_dword v[3:4], v0, off 260fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_endpgm 261fef54d03SPetar Avramovic; 262fef54d03SPetar Avramovic; NEW_RBS-LABEL: vgpr_to_vcc_trunc: 263fef54d03SPetar Avramovic; NEW_RBS: ; %bb.0: 264fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_and_b32_e32 v0, 1, v0 265fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 266fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 267fef54d03SPetar Avramovic; NEW_RBS-NEXT: global_store_dword v[3:4], v0, off 268fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_endpgm 269fef54d03SPetar Avramovic %vcc = trunc i32 %a to i1 270fef54d03SPetar Avramovic %select = select i1 %vcc, i32 %b, i32 %c 271fef54d03SPetar Avramovic store i32 %select, ptr addrspace(1) %ptr 272fef54d03SPetar Avramovic ret void 273fef54d03SPetar Avramovic} 274fef54d03SPetar Avramovic 275fef54d03SPetar Avramovic; i1 input to zext and sext is something that survived legalizer (not trunc) 276fef54d03SPetar Avramovic; lower to select 277fef54d03SPetar Avramovicdefine amdgpu_ps void @zext(i32 inreg %a, ptr addrspace(1) %ptr) { 278fef54d03SPetar Avramovic; OLD_RBS-LABEL: zext: 279fef54d03SPetar Avramovic; OLD_RBS: ; %bb.0: 280fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cmp_eq_u32 s0, 10 281fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cselect_b32 s0, 1, 0 282fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0 283fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off 284fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_endpgm 285fef54d03SPetar Avramovic; 286fef54d03SPetar Avramovic; NEW_RBS-LABEL: zext: 287fef54d03SPetar Avramovic; NEW_RBS: ; %bb.0: 288fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cmp_eq_u32 s0, 10 289fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cselect_b32 s0, 1, 0 290fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 291fef54d03SPetar Avramovic; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off 292fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_endpgm 293fef54d03SPetar Avramovic %bool = icmp eq i32 %a, 10 294fef54d03SPetar Avramovic %zext = zext i1 %bool to i32 295fef54d03SPetar Avramovic store i32 %zext, ptr addrspace(1) %ptr 296fef54d03SPetar Avramovic ret void 297fef54d03SPetar Avramovic} 298fef54d03SPetar Avramovic 299fef54d03SPetar Avramovicdefine amdgpu_ps void @sext(i32 inreg %a, ptr addrspace(1) %ptr) { 300fef54d03SPetar Avramovic; OLD_RBS-LABEL: sext: 301fef54d03SPetar Avramovic; OLD_RBS: ; %bb.0: 302fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cmp_eq_u32 s0, 10 303fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cselect_b32 s0, 1, 0 304fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_bfe_i32 s0, s0, 0x10000 305fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0 306fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off 307fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_endpgm 308fef54d03SPetar Avramovic; 309fef54d03SPetar Avramovic; NEW_RBS-LABEL: sext: 310fef54d03SPetar Avramovic; NEW_RBS: ; %bb.0: 311fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cmp_eq_u32 s0, 10 3120ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_cselect_b32 s0, -1, 0 313fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 314fef54d03SPetar Avramovic; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off 315fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_endpgm 316fef54d03SPetar Avramovic %bool = icmp eq i32 %a, 10 317fef54d03SPetar Avramovic %sext = sext i1 %bool to i32 318fef54d03SPetar Avramovic store i32 %sext, ptr addrspace(1) %ptr 319fef54d03SPetar Avramovic ret void 320fef54d03SPetar Avramovic} 321fef54d03SPetar Avramovic 322fef54d03SPetar Avramovic; divergent i1 bitwise, i1 vcc. 323fef54d03SPetar Avramovic; inst selected into s_and_b32 on wave32 or s_and_b64 on wave64. 324fef54d03SPetar Avramovicdefine amdgpu_ps void @and_i1_vcc(i32 %a, i32 %b, ptr addrspace(1) %ptr) { 325fef54d03SPetar Avramovic; OLD_RBS-LABEL: and_i1_vcc: 326fef54d03SPetar Avramovic; OLD_RBS: ; %bb.0: 327fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_cmp_le_u32_e32 vcc_lo, 10, v0 328fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_cmp_le_u32_e64 s0, 20, v1 329fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_and_b32 vcc_lo, vcc_lo, s0 330fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 331fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_store_dword v[2:3], v0, off 332fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_endpgm 333fef54d03SPetar Avramovic; 334fef54d03SPetar Avramovic; NEW_RBS-LABEL: and_i1_vcc: 335fef54d03SPetar Avramovic; NEW_RBS: ; %bb.0: 336fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_cmp_le_u32_e32 vcc_lo, 10, v0 337fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_cmp_le_u32_e64 s0, 20, v1 338fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_and_b32 vcc_lo, vcc_lo, s0 339fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 340fef54d03SPetar Avramovic; NEW_RBS-NEXT: global_store_dword v[2:3], v0, off 341fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_endpgm 342fef54d03SPetar Avramovic %cmp_a = icmp uge i32 %a, 10 343fef54d03SPetar Avramovic %cmp_b = icmp uge i32 %b, 20 344fef54d03SPetar Avramovic %cc = and i1 %cmp_a, %cmp_b 345fef54d03SPetar Avramovic %res = select i1 %cc, i32 %a, i32 %b 346fef54d03SPetar Avramovic store i32 %res, ptr addrspace(1) %ptr 347fef54d03SPetar Avramovic ret void 348fef54d03SPetar Avramovic} 349fef54d03SPetar Avramovic 350fef54d03SPetar Avramovic; uniform i1 bitwise, i32 sgpr. inst selected into s_and_b32. 351fef54d03SPetar Avramovicdefine amdgpu_ps void @and_i1_scc(i32 inreg %a, i32 inreg %b, ptr addrspace(1) %ptr) { 352fef54d03SPetar Avramovic; OLD_RBS-LABEL: and_i1_scc: 353fef54d03SPetar Avramovic; OLD_RBS: ; %bb.0: 354fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cmp_ge_u32 s0, 10 355fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cselect_b32 s2, 1, 0 356fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cmp_ge_u32 s1, 20 357fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cselect_b32 s3, 1, 0 358fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_and_b32 s2, s2, s3 359fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_and_b32 s2, s2, 1 360fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cmp_lg_u32 s2, 0 361fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cselect_b32 s0, s0, s1 362fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0 363fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off 364fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_endpgm 365fef54d03SPetar Avramovic; 366fef54d03SPetar Avramovic; NEW_RBS-LABEL: and_i1_scc: 367fef54d03SPetar Avramovic; NEW_RBS: ; %bb.0: 368fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cmp_ge_u32 s0, 10 369fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cselect_b32 s2, 1, 0 370fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cmp_ge_u32 s1, 20 371fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cselect_b32 s3, 1, 0 372fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_and_b32 s2, s2, s3 373fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cmp_lg_u32 s2, 0 374fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cselect_b32 s0, s0, s1 375fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 376fef54d03SPetar Avramovic; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off 377fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_endpgm 378fef54d03SPetar Avramovic %cmp_a = icmp uge i32 %a, 10 379fef54d03SPetar Avramovic %cmp_b = icmp uge i32 %b, 20 380fef54d03SPetar Avramovic %cc = and i1 %cmp_a, %cmp_b 381fef54d03SPetar Avramovic %res = select i1 %cc, i32 %a, i32 %b 382fef54d03SPetar Avramovic store i32 %res, ptr addrspace(1) %ptr 383fef54d03SPetar Avramovic ret void 384fef54d03SPetar Avramovic} 385fef54d03SPetar Avramovic 386fef54d03SPetar Avramovic; old RBS selects sgpr phi because it had sgpr inputs. 387fef54d03SPetar Avramovicdefine amdgpu_ps void @divergent_phi_with_uniform_inputs(i32 %a, ptr addrspace(1) %out) { 388fef54d03SPetar Avramovic; OLD_RBS-LABEL: divergent_phi_with_uniform_inputs: 389fef54d03SPetar Avramovic; OLD_RBS: ; %bb.0: ; %A 390fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_mov_b32 s0, 0 391fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 392fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_and_saveexec_b32 s1, vcc_lo 393fef54d03SPetar Avramovic; OLD_RBS-NEXT: ; %bb.1: ; %B 394fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_mov_b32 s0, 1 395fef54d03SPetar Avramovic; OLD_RBS-NEXT: ; %bb.2: ; %exit 396fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s1 397fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_mov_b32_e32 v0, s0 398fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_store_dword v[1:2], v0, off 399fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_endpgm 400fef54d03SPetar Avramovic; 401fef54d03SPetar Avramovic; NEW_RBS-LABEL: divergent_phi_with_uniform_inputs: 402fef54d03SPetar Avramovic; NEW_RBS: ; %bb.0: ; %A 403fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_mov_b32 s0, 0 404fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 405fef54d03SPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v0, s0 4060ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_and_saveexec_b32 s0, vcc_lo 4070ee037b8SPetar Avramovic; NEW_RBS-NEXT: ; %bb.1: ; %B 4080ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_mov_b32 s1, 1 4090ee037b8SPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v0, s1 4100ee037b8SPetar Avramovic; NEW_RBS-NEXT: ; %bb.2: ; %exit 4110ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s0 412fef54d03SPetar Avramovic; NEW_RBS-NEXT: global_store_dword v[1:2], v0, off 413fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_endpgm 414fef54d03SPetar AvramovicA: 415fef54d03SPetar Avramovic %cmp = icmp eq i32 %a, 0 416fef54d03SPetar Avramovic br i1 %cmp, label %B, label %exit 417fef54d03SPetar Avramovic 418fef54d03SPetar AvramovicB: 419fef54d03SPetar Avramovic br label %exit 420fef54d03SPetar Avramovic 421fef54d03SPetar Avramovicexit: 422fef54d03SPetar Avramovic %phi = phi i32 [ 0, %A ], [ 1, %B ] 423fef54d03SPetar Avramovic store i32 %phi, ptr addrspace(1) %out 424fef54d03SPetar Avramovic ret void 425fef54d03SPetar Avramovic} 426fef54d03SPetar Avramovic 427fef54d03SPetar Avramovic; old RBS assigned vgpr to uniform phi (because one input had undetermined bank) 428fef54d03SPetar Avramovic; and it propagated to mul, which was not wrong. 429fef54d03SPetar Avramovic; new RBS assigns vgpr to destination of mul even though both inputs are sgpr. 430fef54d03SPetar Avramovic; TODO: implement temporal divergence lowering 431fef54d03SPetar Avramovicdefine amdgpu_ps void @divergent_because_of_temporal_divergent_use(float %val, ptr addrspace(1) %addr) { 432fef54d03SPetar Avramovic; OLD_RBS-LABEL: divergent_because_of_temporal_divergent_use: 433fef54d03SPetar Avramovic; OLD_RBS: ; %bb.0: ; %entry 434fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_mov_b32 s0, -1 435fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_mov_b32_e32 v3, s0 436fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_mov_b32 s0, 0 437fef54d03SPetar Avramovic; OLD_RBS-NEXT: .LBB15_1: ; %loop 438fef54d03SPetar Avramovic; OLD_RBS-NEXT: ; =>This Inner Loop Header: Depth=1 439fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_add_nc_u32_e32 v3, 1, v3 440fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_cvt_f32_u32_e32 v4, v3 441fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v0 442fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_or_b32 s0, vcc_lo, s0 443fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 444fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cbranch_execnz .LBB15_1 445fef54d03SPetar Avramovic; OLD_RBS-NEXT: ; %bb.2: ; %exit 446fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s0 447fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_mul_lo_u32 v0, v3, 10 448fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_store_dword v[1:2], v0, off 449fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_endpgm 450fef54d03SPetar Avramovic; 451fef54d03SPetar Avramovic; NEW_RBS-LABEL: divergent_because_of_temporal_divergent_use: 452fef54d03SPetar Avramovic; NEW_RBS: ; %bb.0: ; %entry 453fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_mov_b32 s0, -1 4540ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_mov_b32 s1, 0 455fef54d03SPetar Avramovic; NEW_RBS-NEXT: .LBB15_1: ; %loop 456fef54d03SPetar Avramovic; NEW_RBS-NEXT: ; =>This Inner Loop Header: Depth=1 4570ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_add_i32 s0, s0, 1 4580ee037b8SPetar Avramovic; NEW_RBS-NEXT: v_cvt_f32_u32_e32 v3, s0 4590ee037b8SPetar Avramovic; NEW_RBS-NEXT: v_cmp_gt_f32_e32 vcc_lo, v3, v0 4600ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_or_b32 s1, vcc_lo, s1 4610ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_andn2_b32 exec_lo, exec_lo, s1 462fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cbranch_execnz .LBB15_1 463fef54d03SPetar Avramovic; NEW_RBS-NEXT: ; %bb.2: ; %exit 4640ee037b8SPetar Avramovic; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s1 4650ee037b8SPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v0, s0 4660ee037b8SPetar Avramovic; NEW_RBS-NEXT: v_mul_lo_u32 v0, v0, 10 467fef54d03SPetar Avramovic; NEW_RBS-NEXT: global_store_dword v[1:2], v0, off 468fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_endpgm 469fef54d03SPetar Avramovicentry: 470fef54d03SPetar Avramovic br label %loop 471fef54d03SPetar Avramovic 472fef54d03SPetar Avramovicloop: 473fef54d03SPetar Avramovic %counter = phi i32 [ 0, %entry ], [ %counter.plus.1, %loop ] 474fef54d03SPetar Avramovic %f.counter = uitofp i32 %counter to float 475fef54d03SPetar Avramovic %cond = fcmp ogt float %f.counter, %val 476fef54d03SPetar Avramovic %counter.plus.1 = add i32 %counter, 1 477fef54d03SPetar Avramovic br i1 %cond, label %exit, label %loop 478fef54d03SPetar Avramovic 479fef54d03SPetar Avramovicexit: 480fef54d03SPetar Avramovic %ceilx10 = mul i32 %counter, 10 481fef54d03SPetar Avramovic store i32 %ceilx10, ptr addrspace(1) %addr 482fef54d03SPetar Avramovic ret void 483fef54d03SPetar Avramovic} 484fef54d03SPetar Avramovic 485fef54d03SPetar Avramovic; Variables that hande counter can be allocated to sgprs. 486fef54d03SPetar Avramovicdefine amdgpu_cs void @loop_with_2breaks(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %b) { 487fef54d03SPetar Avramovic; OLD_RBS-LABEL: loop_with_2breaks: 488fef54d03SPetar Avramovic; OLD_RBS: ; %bb.0: ; %entry 489fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_mov_b32 s0, 0 490fef54d03SPetar Avramovic; OLD_RBS-NEXT: ; implicit-def: $sgpr1 491fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_mov_b32_e32 v6, s0 492fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_branch .LBB16_3 493fef54d03SPetar Avramovic; OLD_RBS-NEXT: .LBB16_1: ; %Flow3 494fef54d03SPetar Avramovic; OLD_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 495fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_waitcnt_depctr 0xffe3 496fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s3 497fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_andn2_b32 s1, s1, exec_lo 498fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_and_b32 s3, exec_lo, s4 499fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_or_b32 s1, s1, s3 500fef54d03SPetar Avramovic; OLD_RBS-NEXT: .LBB16_2: ; %Flow 501fef54d03SPetar Avramovic; OLD_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 502fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s2 503fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_and_b32 s2, exec_lo, s1 504fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_or_b32 s0, s2, s0 505fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 506fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cbranch_execz .LBB16_6 507fef54d03SPetar Avramovic; OLD_RBS-NEXT: .LBB16_3: ; %A 508fef54d03SPetar Avramovic; OLD_RBS-NEXT: ; =>This Inner Loop Header: Depth=1 509fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_ashrrev_i32_e32 v7, 31, v6 510fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_andn2_b32 s1, s1, exec_lo 511fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_and_b32 s2, exec_lo, -1 512fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_or_b32 s1, s1, s2 513fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_lshlrev_b64 v[7:8], 2, v[6:7] 514fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_add_co_u32 v9, vcc_lo, v2, v7 515fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo 516fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_load_dword v9, v[9:10], off 517fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_waitcnt vmcnt(0) 518fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 519fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_and_saveexec_b32 s2, vcc_lo 520fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cbranch_execz .LBB16_2 521fef54d03SPetar Avramovic; OLD_RBS-NEXT: ; %bb.4: ; %B 522fef54d03SPetar Avramovic; OLD_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 523fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_add_co_u32 v9, vcc_lo, v4, v7 524fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v5, v8, vcc_lo 525fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_mov_b32 s4, -1 526fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_load_dword v9, v[9:10], off 527fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_waitcnt vmcnt(0) 528fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 529fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_and_saveexec_b32 s3, vcc_lo 530fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_cbranch_execz .LBB16_1 531fef54d03SPetar Avramovic; OLD_RBS-NEXT: ; %bb.5: ; %loop.body 532fef54d03SPetar Avramovic; OLD_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 533fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7 534fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo 535fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_add_nc_u32_e32 v10, 1, v6 536fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x64, v6 537fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_andn2_b32 s4, -1, exec_lo 538fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_load_dword v9, v[7:8], off 539fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_mov_b32_e32 v6, v10 540fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_and_b32 s5, exec_lo, vcc_lo 541fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_or_b32 s4, s4, s5 542fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_waitcnt vmcnt(0) 543fef54d03SPetar Avramovic; OLD_RBS-NEXT: v_add_nc_u32_e32 v9, 1, v9 544fef54d03SPetar Avramovic; OLD_RBS-NEXT: global_store_dword v[7:8], v9, off 545fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_branch .LBB16_1 546fef54d03SPetar Avramovic; OLD_RBS-NEXT: .LBB16_6: ; %exit 547fef54d03SPetar Avramovic; OLD_RBS-NEXT: s_endpgm 548fef54d03SPetar Avramovic; 549fef54d03SPetar Avramovic; NEW_RBS-LABEL: loop_with_2breaks: 550fef54d03SPetar Avramovic; NEW_RBS: ; %bb.0: ; %entry 551*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_mov_b32 s4, 0 552fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_mov_b32 s0, 0 553*b60c118fSPetar Avramovic; NEW_RBS-NEXT: ; implicit-def: $sgpr5 554fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_branch .LBB16_3 555fef54d03SPetar Avramovic; NEW_RBS-NEXT: .LBB16_1: ; %Flow3 556fef54d03SPetar Avramovic; NEW_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 557fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_waitcnt_depctr 0xffe3 558*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s7 559*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_andn2_b32 s2, s5, exec_lo 560*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_and_b32 s3, exec_lo, s6 561*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_or_b32 s5, s2, s3 562fef54d03SPetar Avramovic; NEW_RBS-NEXT: .LBB16_2: ; %Flow 563fef54d03SPetar Avramovic; NEW_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 564*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s1 565*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_and_b32 s1, exec_lo, s5 566*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_or_b32 s4, s1, s4 567*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 568fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cbranch_execz .LBB16_6 569fef54d03SPetar Avramovic; NEW_RBS-NEXT: .LBB16_3: ; %A 570fef54d03SPetar Avramovic; NEW_RBS-NEXT: ; =>This Inner Loop Header: Depth=1 571*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_ashr_i32 s1, s0, 31 572*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_lshl_b64 s[2:3], s[0:1], 2 573*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_andn2_b32 s1, s5, exec_lo 574*b60c118fSPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v7, s3 575*b60c118fSPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v6, s2 576*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_and_b32 s5, exec_lo, exec_lo 577*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_or_b32 s5, s1, s5 578*b60c118fSPetar Avramovic; NEW_RBS-NEXT: v_add_co_u32 v6, vcc_lo, v2, v6 579*b60c118fSPetar Avramovic; NEW_RBS-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v3, v7, vcc_lo 580*b60c118fSPetar Avramovic; NEW_RBS-NEXT: global_load_dword v6, v[6:7], off 581fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_waitcnt vmcnt(0) 582*b60c118fSPetar Avramovic; NEW_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 583*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_and_saveexec_b32 s1, vcc_lo 584fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cbranch_execz .LBB16_2 585fef54d03SPetar Avramovic; NEW_RBS-NEXT: ; %bb.4: ; %B 586fef54d03SPetar Avramovic; NEW_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 587*b60c118fSPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v7, s3 588*b60c118fSPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v6, s2 589*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_mov_b32 s6, exec_lo 590*b60c118fSPetar Avramovic; NEW_RBS-NEXT: v_add_co_u32 v6, vcc_lo, v4, v6 591*b60c118fSPetar Avramovic; NEW_RBS-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v5, v7, vcc_lo 592*b60c118fSPetar Avramovic; NEW_RBS-NEXT: global_load_dword v6, v[6:7], off 593fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_waitcnt vmcnt(0) 594*b60c118fSPetar Avramovic; NEW_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 595*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_and_saveexec_b32 s7, vcc_lo 596fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_cbranch_execz .LBB16_1 597fef54d03SPetar Avramovic; NEW_RBS-NEXT: ; %bb.5: ; %loop.body 598fef54d03SPetar Avramovic; NEW_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 599*b60c118fSPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v7, s3 600*b60c118fSPetar Avramovic; NEW_RBS-NEXT: v_mov_b32_e32 v6, s2 601*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_add_i32 s2, s0, 1 602*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_cmpk_lt_u32 s0, 0x64 603*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_cselect_b32 s0, exec_lo, 0 604*b60c118fSPetar Avramovic; NEW_RBS-NEXT: v_add_co_u32 v6, vcc_lo, v0, v6 605*b60c118fSPetar Avramovic; NEW_RBS-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v1, v7, vcc_lo 606*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_andn2_b32 s3, s6, exec_lo 607*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_and_b32 s0, exec_lo, s0 608*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_or_b32 s6, s3, s0 609*b60c118fSPetar Avramovic; NEW_RBS-NEXT: global_load_dword v8, v[6:7], off 610*b60c118fSPetar Avramovic; NEW_RBS-NEXT: s_mov_b32 s0, s2 611fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_waitcnt vmcnt(0) 612*b60c118fSPetar Avramovic; NEW_RBS-NEXT: v_add_nc_u32_e32 v8, 1, v8 613*b60c118fSPetar Avramovic; NEW_RBS-NEXT: global_store_dword v[6:7], v8, off 614fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_branch .LBB16_1 615fef54d03SPetar Avramovic; NEW_RBS-NEXT: .LBB16_6: ; %exit 616fef54d03SPetar Avramovic; NEW_RBS-NEXT: s_endpgm 617fef54d03SPetar Avramovicentry: 618fef54d03SPetar Avramovic br label %A 619fef54d03SPetar Avramovic 620fef54d03SPetar AvramovicA: 621fef54d03SPetar Avramovic %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ] 622fef54d03SPetar Avramovic %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter 623fef54d03SPetar Avramovic %a.val = load i32, ptr addrspace(1) %a.plus.counter 624fef54d03SPetar Avramovic %a.cond = icmp eq i32 %a.val, 0 625fef54d03SPetar Avramovic br i1 %a.cond, label %exit, label %B 626fef54d03SPetar Avramovic 627fef54d03SPetar AvramovicB: 628fef54d03SPetar Avramovic %b.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %b, i32 %counter 629fef54d03SPetar Avramovic %b.val = load i32, ptr addrspace(1) %b.plus.counter 630fef54d03SPetar Avramovic %b.cond = icmp eq i32 %b.val, 0 631fef54d03SPetar Avramovic br i1 %b.cond, label %exit, label %loop.body 632fef54d03SPetar Avramovic 633fef54d03SPetar Avramovicloop.body: 634fef54d03SPetar Avramovic %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter 635fef54d03SPetar Avramovic %x.val = load i32, ptr addrspace(1) %x.plus.counter 636fef54d03SPetar Avramovic %x.val.plus.1 = add i32 %x.val, 1 637fef54d03SPetar Avramovic store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter 638fef54d03SPetar Avramovic %counter.plus.1 = add i32 %counter, 1 639fef54d03SPetar Avramovic %x.cond = icmp ult i32 %counter, 100 640fef54d03SPetar Avramovic br i1 %x.cond, label %exit, label %A 641fef54d03SPetar Avramovic 642fef54d03SPetar Avramovicexit: 643fef54d03SPetar Avramovic ret void 644fef54d03SPetar Avramovic} 645fef54d03SPetar Avramovic 646fef54d03SPetar Avramovicdeclare i16 @llvm.abs.i16(i16, i1) 647fef54d03SPetar Avramovicdeclare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32 immarg) 648