16892c175SPetar Avramovic; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2*6c2eec5cSPetar Avramovic; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 36892c175SPetar Avramovic 46892c175SPetar Avramovic; Simples case, if - then, that requires lane mask merging, 56892c175SPetar Avramovic; %phi lane mask will hold %val_A at %A. Lanes that are active in %B 66892c175SPetar Avramovic; will overwrite its own lane bit in lane mask with val_B 76892c175SPetar Avramovicdefine amdgpu_ps void @divergent_i1_phi_if_then(ptr addrspace(1) %out, i32 %tid, i32 %cond) { 86892c175SPetar Avramovic; GFX10-LABEL: divergent_i1_phi_if_then: 96892c175SPetar Avramovic; GFX10: ; %bb.0: ; %A 106892c175SPetar Avramovic; GFX10-NEXT: v_cmp_le_u32_e64 s0, 6, v2 116892c175SPetar Avramovic; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 126892c175SPetar Avramovic; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo 136892c175SPetar Avramovic; GFX10-NEXT: ; %bb.1: ; %B 14433f8e74SPetar Avramovic; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 1, v2 15433f8e74SPetar Avramovic; GFX10-NEXT: s_andn2_b32 s0, s0, exec_lo 16433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s2, exec_lo, vcc_lo 17433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s0, s0, s2 186892c175SPetar Avramovic; GFX10-NEXT: ; %bb.2: ; %exit 196892c175SPetar Avramovic; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 204b919495SThorsten Schütt; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, s0 214b919495SThorsten Schütt; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2 226892c175SPetar Avramovic; GFX10-NEXT: global_store_dword v[0:1], v2, off 236892c175SPetar Avramovic; GFX10-NEXT: s_endpgm 246892c175SPetar AvramovicA: 256892c175SPetar Avramovic %val_A = icmp uge i32 %tid, 6 266892c175SPetar Avramovic %cmp = icmp eq i32 %cond, 0 276892c175SPetar Avramovic br i1 %cmp, label %B, label %exit 286892c175SPetar Avramovic 296892c175SPetar AvramovicB: 306892c175SPetar Avramovic %val_B = icmp ult i32 %tid, 1 316892c175SPetar Avramovic br label %exit 326892c175SPetar Avramovic 336892c175SPetar Avramovicexit: 346892c175SPetar Avramovic %phi = phi i1 [ %val_A, %A ], [ %val_B, %B ] 356892c175SPetar Avramovic %sel = select i1 %phi, i32 1, i32 2 366892c175SPetar Avramovic store i32 %sel, ptr addrspace(1) %out 376892c175SPetar Avramovic ret void 386892c175SPetar Avramovic} 396892c175SPetar Avramovic 406892c175SPetar Avramovic; if - else 416892c175SPetar Avramovicdefine amdgpu_ps void @divergent_i1_phi_if_else(ptr addrspace(1) %out, i32 %tid, i32 %cond) { 426892c175SPetar Avramovic; GFX10-LABEL: divergent_i1_phi_if_else: 436892c175SPetar Avramovic; GFX10: ; %bb.0: ; %entry 446892c175SPetar Avramovic; GFX10-NEXT: s_and_b32 s0, 1, s0 456892c175SPetar Avramovic; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3 46*6c2eec5cSPetar Avramovic; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 476892c175SPetar Avramovic; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo 486892c175SPetar Avramovic; GFX10-NEXT: s_xor_b32 s1, exec_lo, s1 496892c175SPetar Avramovic; GFX10-NEXT: ; %bb.1: ; %B 50433f8e74SPetar Avramovic; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 2, v2 51*6c2eec5cSPetar Avramovic; GFX10-NEXT: s_andn2_b32 s0, s0, exec_lo 526892c175SPetar Avramovic; GFX10-NEXT: ; implicit-def: $vgpr2 53433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s2, exec_lo, vcc_lo 54433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s0, s0, s2 556892c175SPetar Avramovic; GFX10-NEXT: ; %bb.2: ; %Flow 566892c175SPetar Avramovic; GFX10-NEXT: s_andn2_saveexec_b32 s1, s1 576892c175SPetar Avramovic; GFX10-NEXT: ; %bb.3: ; %A 58433f8e74SPetar Avramovic; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 1, v2 59433f8e74SPetar Avramovic; GFX10-NEXT: s_andn2_b32 s0, s0, exec_lo 60433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s2, exec_lo, vcc_lo 61433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s0, s0, s2 626892c175SPetar Avramovic; GFX10-NEXT: ; %bb.4: ; %exit 636892c175SPetar Avramovic; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 644b919495SThorsten Schütt; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, s0 654b919495SThorsten Schütt; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2 666892c175SPetar Avramovic; GFX10-NEXT: global_store_dword v[0:1], v2, off 676892c175SPetar Avramovic; GFX10-NEXT: s_endpgm 686892c175SPetar Avramovicentry: 696892c175SPetar Avramovic %cmp = icmp eq i32 %cond, 0 706892c175SPetar Avramovic br i1 %cmp, label %A, label %B 716892c175SPetar Avramovic 726892c175SPetar AvramovicA: 736892c175SPetar Avramovic %val_A = icmp uge i32 %tid, 1 746892c175SPetar Avramovic br label %exit 756892c175SPetar Avramovic 766892c175SPetar AvramovicB: 776892c175SPetar Avramovic %val_B = icmp ult i32 %tid, 2 786892c175SPetar Avramovic br label %exit 796892c175SPetar Avramovic 806892c175SPetar Avramovicexit: 816892c175SPetar Avramovic %phi = phi i1 [ %val_A, %A ], [ %val_B, %B ] 826892c175SPetar Avramovic %sel = select i1 %phi, i32 1, i32 2 836892c175SPetar Avramovic store i32 %sel, ptr addrspace(1) %out 846892c175SPetar Avramovic ret void 856892c175SPetar Avramovic} 866892c175SPetar Avramovic 876892c175SPetar Avramovic; if - break; 886892c175SPetar Avramovic 896892c175SPetar Avramovic; counter = 0; 906892c175SPetar Avramovic; do { 916892c175SPetar Avramovic; if (a[counter] == 0) 926892c175SPetar Avramovic; break; 936892c175SPetar Avramovic; if (b[counter] == 0) 946892c175SPetar Avramovic; break; 956892c175SPetar Avramovic; if (c[counter] == 0) 966892c175SPetar Avramovic; break; 976892c175SPetar Avramovic; x[counter++]+=1; 986892c175SPetar Avramovic; } while (counter<100); 996892c175SPetar Avramovic 1006892c175SPetar Avramovic; Tests with multiple break conditions. Divergent phis will be used to track 1016892c175SPetar Avramovic; if any of the break conditions was reached. We only need to do simple lane 1026892c175SPetar Avramovic; mask merging (for current loop iteration only). There is an intrinsic, 1036892c175SPetar Avramovic; if_break, that will merge lane masks across all iterations of the loop. 1046892c175SPetar Avramovic 1056892c175SPetar Avramovicdefine amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a) { 1066892c175SPetar Avramovic; GFX10-LABEL: loop_with_1break: 1076892c175SPetar Avramovic; GFX10: ; %bb.0: ; %entry 1086892c175SPetar Avramovic; GFX10-NEXT: s_mov_b32 s0, 0 109433f8e74SPetar Avramovic; GFX10-NEXT: ; implicit-def: $sgpr1 1106892c175SPetar Avramovic; GFX10-NEXT: v_mov_b32_e32 v4, s0 1116892c175SPetar Avramovic; GFX10-NEXT: s_branch .LBB2_2 1126892c175SPetar Avramovic; GFX10-NEXT: .LBB2_1: ; %Flow 1136892c175SPetar Avramovic; GFX10-NEXT: ; in Loop: Header=BB2_2 Depth=1 1146892c175SPetar Avramovic; GFX10-NEXT: s_waitcnt_depctr 0xffe3 115433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2 116433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s2, exec_lo, s1 117433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s0, s2, s0 1186892c175SPetar Avramovic; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 1196892c175SPetar Avramovic; GFX10-NEXT: s_cbranch_execz .LBB2_4 1206892c175SPetar Avramovic; GFX10-NEXT: .LBB2_2: ; %A 1216892c175SPetar Avramovic; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 1226892c175SPetar Avramovic; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4 123433f8e74SPetar Avramovic; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo 124433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s2, exec_lo, -1 125433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s1, s1, s2 1266892c175SPetar Avramovic; GFX10-NEXT: v_lshlrev_b64 v[5:6], 2, v[4:5] 1276892c175SPetar Avramovic; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v2, v5 1286892c175SPetar Avramovic; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v3, v6, vcc_lo 1296892c175SPetar Avramovic; GFX10-NEXT: global_load_dword v7, v[7:8], off 1306892c175SPetar Avramovic; GFX10-NEXT: s_waitcnt vmcnt(0) 1316892c175SPetar Avramovic; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v7 132433f8e74SPetar Avramovic; GFX10-NEXT: s_and_saveexec_b32 s2, vcc_lo 1336892c175SPetar Avramovic; GFX10-NEXT: s_cbranch_execz .LBB2_1 1346892c175SPetar Avramovic; GFX10-NEXT: ; %bb.3: ; %loop.body 1356892c175SPetar Avramovic; GFX10-NEXT: ; in Loop: Header=BB2_2 Depth=1 1366892c175SPetar Avramovic; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v0, v5 1376892c175SPetar Avramovic; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v1, v6, vcc_lo 1386892c175SPetar Avramovic; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v4 139433f8e74SPetar Avramovic; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x64, v4 140433f8e74SPetar Avramovic; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo 1416892c175SPetar Avramovic; GFX10-NEXT: global_load_dword v7, v[5:6], off 1426892c175SPetar Avramovic; GFX10-NEXT: v_mov_b32_e32 v4, v8 143433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s3, exec_lo, vcc_lo 144433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s1, s1, s3 1456892c175SPetar Avramovic; GFX10-NEXT: s_waitcnt vmcnt(0) 1466892c175SPetar Avramovic; GFX10-NEXT: v_add_nc_u32_e32 v7, 1, v7 1476892c175SPetar Avramovic; GFX10-NEXT: global_store_dword v[5:6], v7, off 1486892c175SPetar Avramovic; GFX10-NEXT: s_branch .LBB2_1 1496892c175SPetar Avramovic; GFX10-NEXT: .LBB2_4: ; %exit 1506892c175SPetar Avramovic; GFX10-NEXT: s_endpgm 1516892c175SPetar Avramovicentry: 1526892c175SPetar Avramovic br label %A 1536892c175SPetar Avramovic 1546892c175SPetar AvramovicA: 1556892c175SPetar Avramovic %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ] 1566892c175SPetar Avramovic %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter 1576892c175SPetar Avramovic %a.val = load i32, ptr addrspace(1) %a.plus.counter 1586892c175SPetar Avramovic %a.cond = icmp eq i32 %a.val, 0 1596892c175SPetar Avramovic br i1 %a.cond, label %exit, label %loop.body 1606892c175SPetar Avramovic 1616892c175SPetar Avramovicloop.body: 1626892c175SPetar Avramovic %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter 1636892c175SPetar Avramovic %x.val = load i32, ptr addrspace(1) %x.plus.counter 1646892c175SPetar Avramovic %x.val.plus.1 = add i32 %x.val, 1 1656892c175SPetar Avramovic store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter 1666892c175SPetar Avramovic %counter.plus.1 = add i32 %counter, 1 1676892c175SPetar Avramovic %x.cond = icmp ult i32 %counter, 100 1686892c175SPetar Avramovic br i1 %x.cond, label %exit, label %A 1696892c175SPetar Avramovic 1706892c175SPetar Avramovicexit: 1716892c175SPetar Avramovic ret void 1726892c175SPetar Avramovic} 1736892c175SPetar Avramovic 1746892c175SPetar Avramovicdefine amdgpu_cs void @loop_with_2breaks(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %b) { 1756892c175SPetar Avramovic; GFX10-LABEL: loop_with_2breaks: 1766892c175SPetar Avramovic; GFX10: ; %bb.0: ; %entry 1776892c175SPetar Avramovic; GFX10-NEXT: s_mov_b32 s0, 0 178433f8e74SPetar Avramovic; GFX10-NEXT: ; implicit-def: $sgpr1 1796892c175SPetar Avramovic; GFX10-NEXT: v_mov_b32_e32 v6, s0 1806892c175SPetar Avramovic; GFX10-NEXT: s_branch .LBB3_3 1816892c175SPetar Avramovic; GFX10-NEXT: .LBB3_1: ; %Flow3 1826892c175SPetar Avramovic; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 1836892c175SPetar Avramovic; GFX10-NEXT: s_waitcnt_depctr 0xffe3 1846892c175SPetar Avramovic; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s3 185433f8e74SPetar Avramovic; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo 186433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s3, exec_lo, s4 187433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s1, s1, s3 1886892c175SPetar Avramovic; GFX10-NEXT: .LBB3_2: ; %Flow 1896892c175SPetar Avramovic; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 190433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2 191433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s2, exec_lo, s1 192433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s0, s2, s0 1936892c175SPetar Avramovic; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 1946892c175SPetar Avramovic; GFX10-NEXT: s_cbranch_execz .LBB3_6 1956892c175SPetar Avramovic; GFX10-NEXT: .LBB3_3: ; %A 1966892c175SPetar Avramovic; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 1976892c175SPetar Avramovic; GFX10-NEXT: v_ashrrev_i32_e32 v7, 31, v6 198433f8e74SPetar Avramovic; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo 199433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s2, exec_lo, -1 200433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s1, s1, s2 2016892c175SPetar Avramovic; GFX10-NEXT: v_lshlrev_b64 v[7:8], 2, v[6:7] 2026892c175SPetar Avramovic; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v2, v7 2036892c175SPetar Avramovic; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo 2046892c175SPetar Avramovic; GFX10-NEXT: global_load_dword v9, v[9:10], off 2056892c175SPetar Avramovic; GFX10-NEXT: s_waitcnt vmcnt(0) 2066892c175SPetar Avramovic; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 207433f8e74SPetar Avramovic; GFX10-NEXT: s_and_saveexec_b32 s2, vcc_lo 2086892c175SPetar Avramovic; GFX10-NEXT: s_cbranch_execz .LBB3_2 2096892c175SPetar Avramovic; GFX10-NEXT: ; %bb.4: ; %B 2106892c175SPetar Avramovic; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 2116892c175SPetar Avramovic; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v4, v7 2126892c175SPetar Avramovic; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v5, v8, vcc_lo 213*6c2eec5cSPetar Avramovic; GFX10-NEXT: s_mov_b32 s4, -1 2146892c175SPetar Avramovic; GFX10-NEXT: global_load_dword v9, v[9:10], off 2156892c175SPetar Avramovic; GFX10-NEXT: s_waitcnt vmcnt(0) 2166892c175SPetar Avramovic; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 2176892c175SPetar Avramovic; GFX10-NEXT: s_and_saveexec_b32 s3, vcc_lo 2186892c175SPetar Avramovic; GFX10-NEXT: s_cbranch_execz .LBB3_1 2196892c175SPetar Avramovic; GFX10-NEXT: ; %bb.5: ; %loop.body 2206892c175SPetar Avramovic; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 2216892c175SPetar Avramovic; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7 2226892c175SPetar Avramovic; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo 2236892c175SPetar Avramovic; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v6 224433f8e74SPetar Avramovic; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x64, v6 225433f8e74SPetar Avramovic; GFX10-NEXT: s_andn2_b32 s4, -1, exec_lo 2266892c175SPetar Avramovic; GFX10-NEXT: global_load_dword v9, v[7:8], off 2276892c175SPetar Avramovic; GFX10-NEXT: v_mov_b32_e32 v6, v10 228433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s5, exec_lo, vcc_lo 229433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s4, s4, s5 2306892c175SPetar Avramovic; GFX10-NEXT: s_waitcnt vmcnt(0) 2316892c175SPetar Avramovic; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v9 2326892c175SPetar Avramovic; GFX10-NEXT: global_store_dword v[7:8], v9, off 2336892c175SPetar Avramovic; GFX10-NEXT: s_branch .LBB3_1 2346892c175SPetar Avramovic; GFX10-NEXT: .LBB3_6: ; %exit 2356892c175SPetar Avramovic; GFX10-NEXT: s_endpgm 2366892c175SPetar Avramovicentry: 2376892c175SPetar Avramovic br label %A 2386892c175SPetar Avramovic 2396892c175SPetar AvramovicA: 2406892c175SPetar Avramovic %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ] 2416892c175SPetar Avramovic %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter 2426892c175SPetar Avramovic %a.val = load i32, ptr addrspace(1) %a.plus.counter 2436892c175SPetar Avramovic %a.cond = icmp eq i32 %a.val, 0 2446892c175SPetar Avramovic br i1 %a.cond, label %exit, label %B 2456892c175SPetar Avramovic 2466892c175SPetar AvramovicB: 2476892c175SPetar Avramovic %b.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %b, i32 %counter 2486892c175SPetar Avramovic %b.val = load i32, ptr addrspace(1) %b.plus.counter 2496892c175SPetar Avramovic %b.cond = icmp eq i32 %b.val, 0 2506892c175SPetar Avramovic br i1 %b.cond, label %exit, label %loop.body 2516892c175SPetar Avramovic 2526892c175SPetar Avramovicloop.body: 2536892c175SPetar Avramovic %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter 2546892c175SPetar Avramovic %x.val = load i32, ptr addrspace(1) %x.plus.counter 2556892c175SPetar Avramovic %x.val.plus.1 = add i32 %x.val, 1 2566892c175SPetar Avramovic store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter 2576892c175SPetar Avramovic %counter.plus.1 = add i32 %counter, 1 2586892c175SPetar Avramovic %x.cond = icmp ult i32 %counter, 100 2596892c175SPetar Avramovic br i1 %x.cond, label %exit, label %A 2606892c175SPetar Avramovic 2616892c175SPetar Avramovicexit: 2626892c175SPetar Avramovic ret void 2636892c175SPetar Avramovic} 2646892c175SPetar Avramovic 2656892c175SPetar Avramovicdefine amdgpu_cs void @loop_with_3breaks(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c) { 2666892c175SPetar Avramovic; GFX10-LABEL: loop_with_3breaks: 2676892c175SPetar Avramovic; GFX10: ; %bb.0: ; %entry 2686892c175SPetar Avramovic; GFX10-NEXT: s_mov_b32 s0, 0 269433f8e74SPetar Avramovic; GFX10-NEXT: ; implicit-def: $sgpr1 2706892c175SPetar Avramovic; GFX10-NEXT: v_mov_b32_e32 v8, s0 2716892c175SPetar Avramovic; GFX10-NEXT: s_branch .LBB4_4 2726892c175SPetar Avramovic; GFX10-NEXT: .LBB4_1: ; %Flow5 2736892c175SPetar Avramovic; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 2746892c175SPetar Avramovic; GFX10-NEXT: s_waitcnt_depctr 0xffe3 2756892c175SPetar Avramovic; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 276433f8e74SPetar Avramovic; GFX10-NEXT: s_andn2_b32 s4, -1, exec_lo 277433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s5, exec_lo, s5 278433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s4, s4, s5 2796892c175SPetar Avramovic; GFX10-NEXT: .LBB4_2: ; %Flow4 2806892c175SPetar Avramovic; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 2816892c175SPetar Avramovic; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s3 282433f8e74SPetar Avramovic; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo 283433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s3, exec_lo, s4 284433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s1, s1, s3 2856892c175SPetar Avramovic; GFX10-NEXT: .LBB4_3: ; %Flow 2866892c175SPetar Avramovic; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 287433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2 288433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s2, exec_lo, s1 289433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s0, s2, s0 2906892c175SPetar Avramovic; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 2916892c175SPetar Avramovic; GFX10-NEXT: s_cbranch_execz .LBB4_8 2926892c175SPetar Avramovic; GFX10-NEXT: .LBB4_4: ; %A 2936892c175SPetar Avramovic; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 2946892c175SPetar Avramovic; GFX10-NEXT: v_ashrrev_i32_e32 v9, 31, v8 295433f8e74SPetar Avramovic; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo 296433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s2, exec_lo, -1 297433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s1, s1, s2 2986892c175SPetar Avramovic; GFX10-NEXT: v_lshlrev_b64 v[9:10], 2, v[8:9] 2996892c175SPetar Avramovic; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v2, v9 3006892c175SPetar Avramovic; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, v3, v10, vcc_lo 3016892c175SPetar Avramovic; GFX10-NEXT: global_load_dword v11, v[11:12], off 3026892c175SPetar Avramovic; GFX10-NEXT: s_waitcnt vmcnt(0) 3036892c175SPetar Avramovic; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 304433f8e74SPetar Avramovic; GFX10-NEXT: s_and_saveexec_b32 s2, vcc_lo 3056892c175SPetar Avramovic; GFX10-NEXT: s_cbranch_execz .LBB4_3 3066892c175SPetar Avramovic; GFX10-NEXT: ; %bb.5: ; %B 3076892c175SPetar Avramovic; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 3086892c175SPetar Avramovic; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v4, v9 3096892c175SPetar Avramovic; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, v5, v10, vcc_lo 310*6c2eec5cSPetar Avramovic; GFX10-NEXT: s_mov_b32 s4, -1 3116892c175SPetar Avramovic; GFX10-NEXT: global_load_dword v11, v[11:12], off 3126892c175SPetar Avramovic; GFX10-NEXT: s_waitcnt vmcnt(0) 3136892c175SPetar Avramovic; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 3146892c175SPetar Avramovic; GFX10-NEXT: s_and_saveexec_b32 s3, vcc_lo 3156892c175SPetar Avramovic; GFX10-NEXT: s_cbranch_execz .LBB4_2 3166892c175SPetar Avramovic; GFX10-NEXT: ; %bb.6: ; %C 3176892c175SPetar Avramovic; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 3186892c175SPetar Avramovic; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v6, v9 3196892c175SPetar Avramovic; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, v7, v10, vcc_lo 320*6c2eec5cSPetar Avramovic; GFX10-NEXT: s_mov_b32 s5, -1 3216892c175SPetar Avramovic; GFX10-NEXT: global_load_dword v11, v[11:12], off 3226892c175SPetar Avramovic; GFX10-NEXT: s_waitcnt vmcnt(0) 3236892c175SPetar Avramovic; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 3246892c175SPetar Avramovic; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo 3256892c175SPetar Avramovic; GFX10-NEXT: s_cbranch_execz .LBB4_1 3266892c175SPetar Avramovic; GFX10-NEXT: ; %bb.7: ; %loop.body 3276892c175SPetar Avramovic; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 3286892c175SPetar Avramovic; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v0, v9 3296892c175SPetar Avramovic; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v1, v10, vcc_lo 3306892c175SPetar Avramovic; GFX10-NEXT: v_add_nc_u32_e32 v12, 1, v8 331433f8e74SPetar Avramovic; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x64, v8 332433f8e74SPetar Avramovic; GFX10-NEXT: s_andn2_b32 s5, -1, exec_lo 3336892c175SPetar Avramovic; GFX10-NEXT: global_load_dword v11, v[9:10], off 3346892c175SPetar Avramovic; GFX10-NEXT: v_mov_b32_e32 v8, v12 335433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s6, exec_lo, vcc_lo 336433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s5, s5, s6 3376892c175SPetar Avramovic; GFX10-NEXT: s_waitcnt vmcnt(0) 3386892c175SPetar Avramovic; GFX10-NEXT: v_add_nc_u32_e32 v11, 1, v11 3396892c175SPetar Avramovic; GFX10-NEXT: global_store_dword v[9:10], v11, off 3406892c175SPetar Avramovic; GFX10-NEXT: s_branch .LBB4_1 3416892c175SPetar Avramovic; GFX10-NEXT: .LBB4_8: ; %exit 3426892c175SPetar Avramovic; GFX10-NEXT: s_endpgm 3436892c175SPetar Avramovicentry: 3446892c175SPetar Avramovic br label %A 3456892c175SPetar Avramovic 3466892c175SPetar AvramovicA: 3476892c175SPetar Avramovic %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ] 3486892c175SPetar Avramovic %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter 3496892c175SPetar Avramovic %a.val = load i32, ptr addrspace(1) %a.plus.counter 3506892c175SPetar Avramovic %a.cond = icmp eq i32 %a.val, 0 3516892c175SPetar Avramovic br i1 %a.cond, label %exit, label %B 3526892c175SPetar Avramovic 3536892c175SPetar AvramovicB: 3546892c175SPetar Avramovic %b.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %b, i32 %counter 3556892c175SPetar Avramovic %b.val = load i32, ptr addrspace(1) %b.plus.counter 3566892c175SPetar Avramovic %b.cond = icmp eq i32 %b.val, 0 3576892c175SPetar Avramovic br i1 %b.cond, label %exit, label %C 3586892c175SPetar Avramovic 3596892c175SPetar AvramovicC: 3606892c175SPetar Avramovic %c.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %c, i32 %counter 3616892c175SPetar Avramovic %c.val = load i32, ptr addrspace(1) %c.plus.counter 3626892c175SPetar Avramovic %c.cond = icmp eq i32 %c.val, 0 3636892c175SPetar Avramovic br i1 %c.cond, label %exit, label %loop.body 3646892c175SPetar Avramovic 3656892c175SPetar Avramovicloop.body: 3666892c175SPetar Avramovic %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter 3676892c175SPetar Avramovic %x.val = load i32, ptr addrspace(1) %x.plus.counter 3686892c175SPetar Avramovic %x.val.plus.1 = add i32 %x.val, 1 3696892c175SPetar Avramovic store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter 3706892c175SPetar Avramovic %counter.plus.1 = add i32 %counter, 1 3716892c175SPetar Avramovic %x.cond = icmp ult i32 %counter, 100 3726892c175SPetar Avramovic br i1 %x.cond, label %exit, label %A 3736892c175SPetar Avramovic 3746892c175SPetar Avramovicexit: 3756892c175SPetar Avramovic ret void 3766892c175SPetar Avramovic} 3776892c175SPetar Avramovic 3786892c175SPetar Avramovic; Divergent condition if with body, ending with break. This is loop with two 3796892c175SPetar Avramovic; exits but structurizer will create phi that will track exit from break 3806892c175SPetar Avramovic; and move break.body after the loop. Loop will then have one exit and phi 3816892c175SPetar Avramovic; used outside of the loop by condition used to enter the break.body. 3826892c175SPetar Avramovicdefine amdgpu_cs void @loop_with_div_break_with_body(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %a.break) { 3836892c175SPetar Avramovic; GFX10-LABEL: loop_with_div_break_with_body: 3846892c175SPetar Avramovic; GFX10: ; %bb.0: ; %entry 3856892c175SPetar Avramovic; GFX10-NEXT: s_mov_b32 s0, 0 386433f8e74SPetar Avramovic; GFX10-NEXT: ; implicit-def: $sgpr1 387433f8e74SPetar Avramovic; GFX10-NEXT: ; implicit-def: $sgpr2 388433f8e74SPetar Avramovic; GFX10-NEXT: ; implicit-def: $sgpr3 3896892c175SPetar Avramovic; GFX10-NEXT: v_mov_b32_e32 v6, s0 3906892c175SPetar Avramovic; GFX10-NEXT: s_branch .LBB5_2 3916892c175SPetar Avramovic; GFX10-NEXT: .LBB5_1: ; %Flow 3926892c175SPetar Avramovic; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1 3936892c175SPetar Avramovic; GFX10-NEXT: s_waitcnt_depctr 0xffe3 394433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 395433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s4, exec_lo, s2 396433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s0, s4, s0 397433f8e74SPetar Avramovic; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo 398433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s4, exec_lo, s3 399433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s1, s1, s4 4006892c175SPetar Avramovic; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 4016892c175SPetar Avramovic; GFX10-NEXT: s_cbranch_execz .LBB5_4 4026892c175SPetar Avramovic; GFX10-NEXT: .LBB5_2: ; %A 4036892c175SPetar Avramovic; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 4046892c175SPetar Avramovic; GFX10-NEXT: v_ashrrev_i32_e32 v7, 31, v6 405433f8e74SPetar Avramovic; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo 406433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s4, exec_lo, -1 407433f8e74SPetar Avramovic; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo 408433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s3, s3, s4 4096892c175SPetar Avramovic; GFX10-NEXT: v_lshlrev_b64 v[7:8], 2, v[6:7] 410433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s2, s2, s4 4116892c175SPetar Avramovic; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v2, v7 4126892c175SPetar Avramovic; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo 4136892c175SPetar Avramovic; GFX10-NEXT: global_load_dword v9, v[9:10], off 4146892c175SPetar Avramovic; GFX10-NEXT: s_waitcnt vmcnt(0) 4156892c175SPetar Avramovic; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 416433f8e74SPetar Avramovic; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo 4176892c175SPetar Avramovic; GFX10-NEXT: s_cbranch_execz .LBB5_1 4186892c175SPetar Avramovic; GFX10-NEXT: ; %bb.3: ; %loop.body 4196892c175SPetar Avramovic; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1 4206892c175SPetar Avramovic; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7 4216892c175SPetar Avramovic; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo 4226892c175SPetar Avramovic; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v6 423433f8e74SPetar Avramovic; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x64, v6 424433f8e74SPetar Avramovic; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo 4256892c175SPetar Avramovic; GFX10-NEXT: global_load_dword v9, v[7:8], off 426433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s5, exec_lo, 0 4276892c175SPetar Avramovic; GFX10-NEXT: v_mov_b32_e32 v6, v10 428433f8e74SPetar Avramovic; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo 429433f8e74SPetar Avramovic; GFX10-NEXT: s_and_b32 s6, exec_lo, vcc_lo 430433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s3, s3, s5 431433f8e74SPetar Avramovic; GFX10-NEXT: s_or_b32 s2, s2, s6 4326892c175SPetar Avramovic; GFX10-NEXT: s_waitcnt vmcnt(0) 4336892c175SPetar Avramovic; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v9 4346892c175SPetar Avramovic; GFX10-NEXT: global_store_dword v[7:8], v9, off 4356892c175SPetar Avramovic; GFX10-NEXT: s_branch .LBB5_1 4366892c175SPetar Avramovic; GFX10-NEXT: .LBB5_4: ; %loop.exit.guard 4376892c175SPetar Avramovic; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0 4386892c175SPetar Avramovic; GFX10-NEXT: s_and_saveexec_b32 s0, s1 4396892c175SPetar Avramovic; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0 4406892c175SPetar Avramovic; GFX10-NEXT: s_cbranch_execz .LBB5_6 4416892c175SPetar Avramovic; GFX10-NEXT: ; %bb.5: ; %break.body 4426892c175SPetar Avramovic; GFX10-NEXT: v_mov_b32_e32 v0, 10 4436892c175SPetar Avramovic; GFX10-NEXT: global_store_dword v[4:5], v0, off 4446892c175SPetar Avramovic; GFX10-NEXT: .LBB5_6: ; %exit 4456892c175SPetar Avramovic; GFX10-NEXT: s_endpgm 4466892c175SPetar Avramovicentry: 4476892c175SPetar Avramovic br label %A 4486892c175SPetar Avramovic 4496892c175SPetar AvramovicA: 4506892c175SPetar Avramovic %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ] 4516892c175SPetar Avramovic %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter 4526892c175SPetar Avramovic %a.val = load i32, ptr addrspace(1) %a.plus.counter 4536892c175SPetar Avramovic %a.cond = icmp eq i32 %a.val, 0 4546892c175SPetar Avramovic br i1 %a.cond, label %break.body, label %loop.body 4556892c175SPetar Avramovic 4566892c175SPetar Avramovicbreak.body: 4576892c175SPetar Avramovic store i32 10, ptr addrspace(1) %a.break 4586892c175SPetar Avramovic br label %exit 4596892c175SPetar Avramovic 4606892c175SPetar Avramovic 4616892c175SPetar Avramovicloop.body: 4626892c175SPetar Avramovic %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter 4636892c175SPetar Avramovic %x.val = load i32, ptr addrspace(1) %x.plus.counter 4646892c175SPetar Avramovic %x.val.plus.1 = add i32 %x.val, 1 4656892c175SPetar Avramovic store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter 4666892c175SPetar Avramovic %counter.plus.1 = add i32 %counter, 1 4676892c175SPetar Avramovic %x.cond = icmp ult i32 %counter, 100 4686892c175SPetar Avramovic br i1 %x.cond, label %exit, label %A 4696892c175SPetar Avramovic 4706892c175SPetar Avramovicexit: 4716892c175SPetar Avramovic ret void 4726892c175SPetar Avramovic} 4736892c175SPetar Avramovic 4746892c175SPetar Avramovic; Snippet from test generated by the GraphicsFuzz tool, frontend generates ir 4756892c175SPetar Avramovic; with irreducible control flow graph. FixIrreducible converts it into natural 4766892c175SPetar Avramovic; loop and in the process creates i1 phi with three incoming values. 4776892c175SPetar Avramovic 4786892c175SPetar Avramovic; int loop(int x, int y, int a0, int a1, int a2, int a3, int a4) { 4796892c175SPetar Avramovic; do { 4806892c175SPetar Avramovic; if (y < a2) { 4816892c175SPetar Avramovic; do { 4826892c175SPetar Avramovic; } while (x < a2); 4836892c175SPetar Avramovic; } 4846892c175SPetar Avramovic; if (x < a3) { 4856892c175SPetar Avramovic; return a1; 4866892c175SPetar Avramovic; } 4876892c175SPetar Avramovic; } while (y < a2); 4886892c175SPetar Avramovic; return a0; 4896892c175SPetar Avramovic; } 4906892c175SPetar Avramovic 4916892c175SPetar Avramovic; This test is also interesting because it has phi with three incomings 4926892c175SPetar Avramovic;define amdgpu_ps i32 @irreducible_cfg(i32 %x, i32 %y, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { 4936892c175SPetar Avramovic;.entry: 4946892c175SPetar Avramovic; %.y_lt_a2 = icmp sgt i32 %a2, %y 4956892c175SPetar Avramovic; %.x_lt_a2 = icmp sgt i32 %a2, %x 4966892c175SPetar Avramovic; %.x_lt_a3 = icmp sgt i32 %a3, %x 4976892c175SPetar Avramovic; br i1 %.y_lt_a2, label %.preheader, label %.loopexit ; first iteration, jump to inner loop if 'y < a2' or start with 'if (x < a3)' 4986892c175SPetar Avramovic; 4996892c175SPetar Avramovic;.preheader: ; if (y < a2), 5006892c175SPetar Avramovic; br label %.inner_loop 5016892c175SPetar Avramovic; 5026892c175SPetar Avramovic;.inner_loop: ; do while x < a2 5036892c175SPetar Avramovic; br i1 %.x_lt_a2, label %.inner_loop, label %.loopexit 5046892c175SPetar Avramovic; 5056892c175SPetar Avramovic;.loopexit: ; if x < a3 5066892c175SPetar Avramovic; %not.inner_loop = xor i1 %.y_lt_a2, true 5076892c175SPetar Avramovic; %brmerge = select i1 %.x_lt_a3, i1 true, i1 %not.inner_loop ; exit loop if 'x < a3' or 'loop ends since !(y < a2)' 5086892c175SPetar Avramovic; %.ret = select i1 %.x_lt_a3, i32 %a1, i32 %a0 ; select retrun value a1 'x < a3' or a0 'loop ends' 5096892c175SPetar Avramovic; br i1 %brmerge, label %.exit, label %.preheader 5106892c175SPetar Avramovic; 5116892c175SPetar Avramovic;.exit: 5126892c175SPetar Avramovic; ret i32 %.ret 5136892c175SPetar Avramovic;} 5146892c175SPetar Avramovic 515