xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll (revision 6c2eec5ceadf26ce8d732d718a8906d075a7d6c7)
16892c175SPetar Avramovic; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2*6c2eec5cSPetar Avramovic; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
36892c175SPetar Avramovic
46892c175SPetar Avramovic; Simples case, if - then, that requires lane mask merging,
56892c175SPetar Avramovic; %phi lane mask will hold %val_A at %A. Lanes that are active in %B
66892c175SPetar Avramovic; will overwrite its own lane bit in lane mask with val_B
76892c175SPetar Avramovicdefine amdgpu_ps void @divergent_i1_phi_if_then(ptr addrspace(1) %out, i32 %tid, i32 %cond) {
86892c175SPetar Avramovic; GFX10-LABEL: divergent_i1_phi_if_then:
96892c175SPetar Avramovic; GFX10:       ; %bb.0: ; %A
106892c175SPetar Avramovic; GFX10-NEXT:    v_cmp_le_u32_e64 s0, 6, v2
116892c175SPetar Avramovic; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v3
126892c175SPetar Avramovic; GFX10-NEXT:    s_and_saveexec_b32 s1, vcc_lo
136892c175SPetar Avramovic; GFX10-NEXT:  ; %bb.1: ; %B
14433f8e74SPetar Avramovic; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 1, v2
15433f8e74SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 s0, s0, exec_lo
16433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s2, exec_lo, vcc_lo
17433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s0, s0, s2
186892c175SPetar Avramovic; GFX10-NEXT:  ; %bb.2: ; %exit
196892c175SPetar Avramovic; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s1
204b919495SThorsten Schütt; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s0
214b919495SThorsten Schütt; GFX10-NEXT:    v_add_nc_u32_e32 v2, 2, v2
226892c175SPetar Avramovic; GFX10-NEXT:    global_store_dword v[0:1], v2, off
236892c175SPetar Avramovic; GFX10-NEXT:    s_endpgm
246892c175SPetar AvramovicA:
256892c175SPetar Avramovic  %val_A = icmp uge i32 %tid, 6
266892c175SPetar Avramovic  %cmp = icmp eq i32 %cond, 0
276892c175SPetar Avramovic  br i1 %cmp, label %B, label %exit
286892c175SPetar Avramovic
296892c175SPetar AvramovicB:
306892c175SPetar Avramovic  %val_B = icmp ult i32 %tid, 1
316892c175SPetar Avramovic  br label %exit
326892c175SPetar Avramovic
336892c175SPetar Avramovicexit:
346892c175SPetar Avramovic  %phi = phi i1 [ %val_A, %A ], [ %val_B, %B ]
356892c175SPetar Avramovic  %sel = select i1 %phi, i32 1, i32 2
366892c175SPetar Avramovic  store i32 %sel, ptr addrspace(1) %out
376892c175SPetar Avramovic  ret void
386892c175SPetar Avramovic}
396892c175SPetar Avramovic
406892c175SPetar Avramovic; if - else
416892c175SPetar Avramovicdefine amdgpu_ps void @divergent_i1_phi_if_else(ptr addrspace(1) %out, i32 %tid, i32 %cond) {
426892c175SPetar Avramovic; GFX10-LABEL: divergent_i1_phi_if_else:
436892c175SPetar Avramovic; GFX10:       ; %bb.0: ; %entry
446892c175SPetar Avramovic; GFX10-NEXT:    s_and_b32 s0, 1, s0
456892c175SPetar Avramovic; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v3
46*6c2eec5cSPetar Avramovic; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0
476892c175SPetar Avramovic; GFX10-NEXT:    s_and_saveexec_b32 s1, vcc_lo
486892c175SPetar Avramovic; GFX10-NEXT:    s_xor_b32 s1, exec_lo, s1
496892c175SPetar Avramovic; GFX10-NEXT:  ; %bb.1: ; %B
50433f8e74SPetar Avramovic; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 2, v2
51*6c2eec5cSPetar Avramovic; GFX10-NEXT:    s_andn2_b32 s0, s0, exec_lo
526892c175SPetar Avramovic; GFX10-NEXT:    ; implicit-def: $vgpr2
53433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s2, exec_lo, vcc_lo
54433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s0, s0, s2
556892c175SPetar Avramovic; GFX10-NEXT:  ; %bb.2: ; %Flow
566892c175SPetar Avramovic; GFX10-NEXT:    s_andn2_saveexec_b32 s1, s1
576892c175SPetar Avramovic; GFX10-NEXT:  ; %bb.3: ; %A
58433f8e74SPetar Avramovic; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 1, v2
59433f8e74SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 s0, s0, exec_lo
60433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s2, exec_lo, vcc_lo
61433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s0, s0, s2
626892c175SPetar Avramovic; GFX10-NEXT:  ; %bb.4: ; %exit
636892c175SPetar Avramovic; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s1
644b919495SThorsten Schütt; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s0
654b919495SThorsten Schütt; GFX10-NEXT:    v_add_nc_u32_e32 v2, 2, v2
666892c175SPetar Avramovic; GFX10-NEXT:    global_store_dword v[0:1], v2, off
676892c175SPetar Avramovic; GFX10-NEXT:    s_endpgm
686892c175SPetar Avramovicentry:
696892c175SPetar Avramovic  %cmp = icmp eq i32 %cond, 0
706892c175SPetar Avramovic  br i1 %cmp, label %A, label %B
716892c175SPetar Avramovic
726892c175SPetar AvramovicA:
736892c175SPetar Avramovic  %val_A = icmp uge i32 %tid, 1
746892c175SPetar Avramovic  br label %exit
756892c175SPetar Avramovic
766892c175SPetar AvramovicB:
776892c175SPetar Avramovic  %val_B = icmp ult i32 %tid, 2
786892c175SPetar Avramovic  br label %exit
796892c175SPetar Avramovic
806892c175SPetar Avramovicexit:
816892c175SPetar Avramovic  %phi = phi i1 [ %val_A, %A ], [ %val_B, %B ]
826892c175SPetar Avramovic  %sel = select i1 %phi, i32 1, i32 2
836892c175SPetar Avramovic  store i32 %sel, ptr addrspace(1) %out
846892c175SPetar Avramovic  ret void
856892c175SPetar Avramovic}
866892c175SPetar Avramovic
876892c175SPetar Avramovic; if - break;
886892c175SPetar Avramovic
896892c175SPetar Avramovic;  counter = 0;
906892c175SPetar Avramovic;  do {
916892c175SPetar Avramovic;    if (a[counter] == 0)
926892c175SPetar Avramovic;      break;
936892c175SPetar Avramovic;    if (b[counter] == 0)
946892c175SPetar Avramovic;      break;
956892c175SPetar Avramovic;    if (c[counter] == 0)
966892c175SPetar Avramovic;      break;
976892c175SPetar Avramovic;    x[counter++]+=1;
986892c175SPetar Avramovic;  } while (counter<100);
996892c175SPetar Avramovic
1006892c175SPetar Avramovic; Tests with multiple break conditions. Divergent phis will be used to track
1016892c175SPetar Avramovic; if any of the break conditions was reached. We only need to do simple lane
1026892c175SPetar Avramovic; mask merging (for current loop iteration only). There is an intrinsic,
1036892c175SPetar Avramovic; if_break, that will merge lane masks across all iterations of the loop.
1046892c175SPetar Avramovic
1056892c175SPetar Avramovicdefine amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a) {
1066892c175SPetar Avramovic; GFX10-LABEL: loop_with_1break:
1076892c175SPetar Avramovic; GFX10:       ; %bb.0: ; %entry
1086892c175SPetar Avramovic; GFX10-NEXT:    s_mov_b32 s0, 0
109433f8e74SPetar Avramovic; GFX10-NEXT:    ; implicit-def: $sgpr1
1106892c175SPetar Avramovic; GFX10-NEXT:    v_mov_b32_e32 v4, s0
1116892c175SPetar Avramovic; GFX10-NEXT:    s_branch .LBB2_2
1126892c175SPetar Avramovic; GFX10-NEXT:  .LBB2_1: ; %Flow
1136892c175SPetar Avramovic; GFX10-NEXT:    ; in Loop: Header=BB2_2 Depth=1
1146892c175SPetar Avramovic; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
115433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s2
116433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s2, exec_lo, s1
117433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s0, s2, s0
1186892c175SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 exec_lo, exec_lo, s0
1196892c175SPetar Avramovic; GFX10-NEXT:    s_cbranch_execz .LBB2_4
1206892c175SPetar Avramovic; GFX10-NEXT:  .LBB2_2: ; %A
1216892c175SPetar Avramovic; GFX10-NEXT:    ; =>This Inner Loop Header: Depth=1
1226892c175SPetar Avramovic; GFX10-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
123433f8e74SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 s1, s1, exec_lo
124433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s2, exec_lo, -1
125433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s1, s1, s2
1266892c175SPetar Avramovic; GFX10-NEXT:    v_lshlrev_b64 v[5:6], 2, v[4:5]
1276892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_u32 v7, vcc_lo, v2, v5
1286892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_ci_u32_e32 v8, vcc_lo, v3, v6, vcc_lo
1296892c175SPetar Avramovic; GFX10-NEXT:    global_load_dword v7, v[7:8], off
1306892c175SPetar Avramovic; GFX10-NEXT:    s_waitcnt vmcnt(0)
1316892c175SPetar Avramovic; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v7
132433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_saveexec_b32 s2, vcc_lo
1336892c175SPetar Avramovic; GFX10-NEXT:    s_cbranch_execz .LBB2_1
1346892c175SPetar Avramovic; GFX10-NEXT:  ; %bb.3: ; %loop.body
1356892c175SPetar Avramovic; GFX10-NEXT:    ; in Loop: Header=BB2_2 Depth=1
1366892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_u32 v5, vcc_lo, v0, v5
1376892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_ci_u32_e32 v6, vcc_lo, v1, v6, vcc_lo
1386892c175SPetar Avramovic; GFX10-NEXT:    v_add_nc_u32_e32 v8, 1, v4
139433f8e74SPetar Avramovic; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 0x64, v4
140433f8e74SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 s1, s1, exec_lo
1416892c175SPetar Avramovic; GFX10-NEXT:    global_load_dword v7, v[5:6], off
1426892c175SPetar Avramovic; GFX10-NEXT:    v_mov_b32_e32 v4, v8
143433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s3, exec_lo, vcc_lo
144433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s1, s1, s3
1456892c175SPetar Avramovic; GFX10-NEXT:    s_waitcnt vmcnt(0)
1466892c175SPetar Avramovic; GFX10-NEXT:    v_add_nc_u32_e32 v7, 1, v7
1476892c175SPetar Avramovic; GFX10-NEXT:    global_store_dword v[5:6], v7, off
1486892c175SPetar Avramovic; GFX10-NEXT:    s_branch .LBB2_1
1496892c175SPetar Avramovic; GFX10-NEXT:  .LBB2_4: ; %exit
1506892c175SPetar Avramovic; GFX10-NEXT:    s_endpgm
1516892c175SPetar Avramovicentry:
1526892c175SPetar Avramovic  br label %A
1536892c175SPetar Avramovic
1546892c175SPetar AvramovicA:
1556892c175SPetar Avramovic  %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ]
1566892c175SPetar Avramovic  %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter
1576892c175SPetar Avramovic  %a.val = load i32, ptr addrspace(1) %a.plus.counter
1586892c175SPetar Avramovic  %a.cond = icmp eq i32 %a.val, 0
1596892c175SPetar Avramovic  br i1 %a.cond, label %exit, label %loop.body
1606892c175SPetar Avramovic
1616892c175SPetar Avramovicloop.body:
1626892c175SPetar Avramovic  %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter
1636892c175SPetar Avramovic  %x.val = load i32, ptr addrspace(1) %x.plus.counter
1646892c175SPetar Avramovic  %x.val.plus.1 = add i32 %x.val, 1
1656892c175SPetar Avramovic  store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter
1666892c175SPetar Avramovic  %counter.plus.1 = add i32 %counter, 1
1676892c175SPetar Avramovic  %x.cond = icmp ult i32 %counter, 100
1686892c175SPetar Avramovic  br i1 %x.cond, label %exit, label %A
1696892c175SPetar Avramovic
1706892c175SPetar Avramovicexit:
1716892c175SPetar Avramovic  ret void
1726892c175SPetar Avramovic}
1736892c175SPetar Avramovic
1746892c175SPetar Avramovicdefine amdgpu_cs void @loop_with_2breaks(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %b) {
1756892c175SPetar Avramovic; GFX10-LABEL: loop_with_2breaks:
1766892c175SPetar Avramovic; GFX10:       ; %bb.0: ; %entry
1776892c175SPetar Avramovic; GFX10-NEXT:    s_mov_b32 s0, 0
178433f8e74SPetar Avramovic; GFX10-NEXT:    ; implicit-def: $sgpr1
1796892c175SPetar Avramovic; GFX10-NEXT:    v_mov_b32_e32 v6, s0
1806892c175SPetar Avramovic; GFX10-NEXT:    s_branch .LBB3_3
1816892c175SPetar Avramovic; GFX10-NEXT:  .LBB3_1: ; %Flow3
1826892c175SPetar Avramovic; GFX10-NEXT:    ; in Loop: Header=BB3_3 Depth=1
1836892c175SPetar Avramovic; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
1846892c175SPetar Avramovic; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s3
185433f8e74SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 s1, s1, exec_lo
186433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s3, exec_lo, s4
187433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s1, s1, s3
1886892c175SPetar Avramovic; GFX10-NEXT:  .LBB3_2: ; %Flow
1896892c175SPetar Avramovic; GFX10-NEXT:    ; in Loop: Header=BB3_3 Depth=1
190433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s2
191433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s2, exec_lo, s1
192433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s0, s2, s0
1936892c175SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 exec_lo, exec_lo, s0
1946892c175SPetar Avramovic; GFX10-NEXT:    s_cbranch_execz .LBB3_6
1956892c175SPetar Avramovic; GFX10-NEXT:  .LBB3_3: ; %A
1966892c175SPetar Avramovic; GFX10-NEXT:    ; =>This Inner Loop Header: Depth=1
1976892c175SPetar Avramovic; GFX10-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
198433f8e74SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 s1, s1, exec_lo
199433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s2, exec_lo, -1
200433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s1, s1, s2
2016892c175SPetar Avramovic; GFX10-NEXT:    v_lshlrev_b64 v[7:8], 2, v[6:7]
2026892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_u32 v9, vcc_lo, v2, v7
2036892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo
2046892c175SPetar Avramovic; GFX10-NEXT:    global_load_dword v9, v[9:10], off
2056892c175SPetar Avramovic; GFX10-NEXT:    s_waitcnt vmcnt(0)
2066892c175SPetar Avramovic; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v9
207433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_saveexec_b32 s2, vcc_lo
2086892c175SPetar Avramovic; GFX10-NEXT:    s_cbranch_execz .LBB3_2
2096892c175SPetar Avramovic; GFX10-NEXT:  ; %bb.4: ; %B
2106892c175SPetar Avramovic; GFX10-NEXT:    ; in Loop: Header=BB3_3 Depth=1
2116892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_u32 v9, vcc_lo, v4, v7
2126892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_ci_u32_e32 v10, vcc_lo, v5, v8, vcc_lo
213*6c2eec5cSPetar Avramovic; GFX10-NEXT:    s_mov_b32 s4, -1
2146892c175SPetar Avramovic; GFX10-NEXT:    global_load_dword v9, v[9:10], off
2156892c175SPetar Avramovic; GFX10-NEXT:    s_waitcnt vmcnt(0)
2166892c175SPetar Avramovic; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v9
2176892c175SPetar Avramovic; GFX10-NEXT:    s_and_saveexec_b32 s3, vcc_lo
2186892c175SPetar Avramovic; GFX10-NEXT:    s_cbranch_execz .LBB3_1
2196892c175SPetar Avramovic; GFX10-NEXT:  ; %bb.5: ; %loop.body
2206892c175SPetar Avramovic; GFX10-NEXT:    ; in Loop: Header=BB3_3 Depth=1
2216892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_u32 v7, vcc_lo, v0, v7
2226892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo
2236892c175SPetar Avramovic; GFX10-NEXT:    v_add_nc_u32_e32 v10, 1, v6
224433f8e74SPetar Avramovic; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 0x64, v6
225433f8e74SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 s4, -1, exec_lo
2266892c175SPetar Avramovic; GFX10-NEXT:    global_load_dword v9, v[7:8], off
2276892c175SPetar Avramovic; GFX10-NEXT:    v_mov_b32_e32 v6, v10
228433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s5, exec_lo, vcc_lo
229433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s4, s4, s5
2306892c175SPetar Avramovic; GFX10-NEXT:    s_waitcnt vmcnt(0)
2316892c175SPetar Avramovic; GFX10-NEXT:    v_add_nc_u32_e32 v9, 1, v9
2326892c175SPetar Avramovic; GFX10-NEXT:    global_store_dword v[7:8], v9, off
2336892c175SPetar Avramovic; GFX10-NEXT:    s_branch .LBB3_1
2346892c175SPetar Avramovic; GFX10-NEXT:  .LBB3_6: ; %exit
2356892c175SPetar Avramovic; GFX10-NEXT:    s_endpgm
2366892c175SPetar Avramovicentry:
2376892c175SPetar Avramovic  br label %A
2386892c175SPetar Avramovic
2396892c175SPetar AvramovicA:
2406892c175SPetar Avramovic  %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ]
2416892c175SPetar Avramovic  %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter
2426892c175SPetar Avramovic  %a.val = load i32, ptr addrspace(1) %a.plus.counter
2436892c175SPetar Avramovic  %a.cond = icmp eq i32 %a.val, 0
2446892c175SPetar Avramovic  br i1 %a.cond, label %exit, label %B
2456892c175SPetar Avramovic
2466892c175SPetar AvramovicB:
2476892c175SPetar Avramovic  %b.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %b, i32 %counter
2486892c175SPetar Avramovic  %b.val = load i32, ptr addrspace(1) %b.plus.counter
2496892c175SPetar Avramovic  %b.cond = icmp eq i32 %b.val, 0
2506892c175SPetar Avramovic  br i1 %b.cond, label %exit, label %loop.body
2516892c175SPetar Avramovic
2526892c175SPetar Avramovicloop.body:
2536892c175SPetar Avramovic  %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter
2546892c175SPetar Avramovic  %x.val = load i32, ptr addrspace(1) %x.plus.counter
2556892c175SPetar Avramovic  %x.val.plus.1 = add i32 %x.val, 1
2566892c175SPetar Avramovic  store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter
2576892c175SPetar Avramovic  %counter.plus.1 = add i32 %counter, 1
2586892c175SPetar Avramovic  %x.cond = icmp ult i32 %counter, 100
2596892c175SPetar Avramovic  br i1 %x.cond, label %exit, label %A
2606892c175SPetar Avramovic
2616892c175SPetar Avramovicexit:
2626892c175SPetar Avramovic  ret void
2636892c175SPetar Avramovic}
2646892c175SPetar Avramovic
2656892c175SPetar Avramovicdefine amdgpu_cs void @loop_with_3breaks(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c) {
2666892c175SPetar Avramovic; GFX10-LABEL: loop_with_3breaks:
2676892c175SPetar Avramovic; GFX10:       ; %bb.0: ; %entry
2686892c175SPetar Avramovic; GFX10-NEXT:    s_mov_b32 s0, 0
269433f8e74SPetar Avramovic; GFX10-NEXT:    ; implicit-def: $sgpr1
2706892c175SPetar Avramovic; GFX10-NEXT:    v_mov_b32_e32 v8, s0
2716892c175SPetar Avramovic; GFX10-NEXT:    s_branch .LBB4_4
2726892c175SPetar Avramovic; GFX10-NEXT:  .LBB4_1: ; %Flow5
2736892c175SPetar Avramovic; GFX10-NEXT:    ; in Loop: Header=BB4_4 Depth=1
2746892c175SPetar Avramovic; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
2756892c175SPetar Avramovic; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
276433f8e74SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 s4, -1, exec_lo
277433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s5, exec_lo, s5
278433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s4, s4, s5
2796892c175SPetar Avramovic; GFX10-NEXT:  .LBB4_2: ; %Flow4
2806892c175SPetar Avramovic; GFX10-NEXT:    ; in Loop: Header=BB4_4 Depth=1
2816892c175SPetar Avramovic; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s3
282433f8e74SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 s1, s1, exec_lo
283433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s3, exec_lo, s4
284433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s1, s1, s3
2856892c175SPetar Avramovic; GFX10-NEXT:  .LBB4_3: ; %Flow
2866892c175SPetar Avramovic; GFX10-NEXT:    ; in Loop: Header=BB4_4 Depth=1
287433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s2
288433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s2, exec_lo, s1
289433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s0, s2, s0
2906892c175SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 exec_lo, exec_lo, s0
2916892c175SPetar Avramovic; GFX10-NEXT:    s_cbranch_execz .LBB4_8
2926892c175SPetar Avramovic; GFX10-NEXT:  .LBB4_4: ; %A
2936892c175SPetar Avramovic; GFX10-NEXT:    ; =>This Inner Loop Header: Depth=1
2946892c175SPetar Avramovic; GFX10-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
295433f8e74SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 s1, s1, exec_lo
296433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s2, exec_lo, -1
297433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s1, s1, s2
2986892c175SPetar Avramovic; GFX10-NEXT:    v_lshlrev_b64 v[9:10], 2, v[8:9]
2996892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_u32 v11, vcc_lo, v2, v9
3006892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_ci_u32_e32 v12, vcc_lo, v3, v10, vcc_lo
3016892c175SPetar Avramovic; GFX10-NEXT:    global_load_dword v11, v[11:12], off
3026892c175SPetar Avramovic; GFX10-NEXT:    s_waitcnt vmcnt(0)
3036892c175SPetar Avramovic; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v11
304433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_saveexec_b32 s2, vcc_lo
3056892c175SPetar Avramovic; GFX10-NEXT:    s_cbranch_execz .LBB4_3
3066892c175SPetar Avramovic; GFX10-NEXT:  ; %bb.5: ; %B
3076892c175SPetar Avramovic; GFX10-NEXT:    ; in Loop: Header=BB4_4 Depth=1
3086892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_u32 v11, vcc_lo, v4, v9
3096892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_ci_u32_e32 v12, vcc_lo, v5, v10, vcc_lo
310*6c2eec5cSPetar Avramovic; GFX10-NEXT:    s_mov_b32 s4, -1
3116892c175SPetar Avramovic; GFX10-NEXT:    global_load_dword v11, v[11:12], off
3126892c175SPetar Avramovic; GFX10-NEXT:    s_waitcnt vmcnt(0)
3136892c175SPetar Avramovic; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v11
3146892c175SPetar Avramovic; GFX10-NEXT:    s_and_saveexec_b32 s3, vcc_lo
3156892c175SPetar Avramovic; GFX10-NEXT:    s_cbranch_execz .LBB4_2
3166892c175SPetar Avramovic; GFX10-NEXT:  ; %bb.6: ; %C
3176892c175SPetar Avramovic; GFX10-NEXT:    ; in Loop: Header=BB4_4 Depth=1
3186892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_u32 v11, vcc_lo, v6, v9
3196892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_ci_u32_e32 v12, vcc_lo, v7, v10, vcc_lo
320*6c2eec5cSPetar Avramovic; GFX10-NEXT:    s_mov_b32 s5, -1
3216892c175SPetar Avramovic; GFX10-NEXT:    global_load_dword v11, v[11:12], off
3226892c175SPetar Avramovic; GFX10-NEXT:    s_waitcnt vmcnt(0)
3236892c175SPetar Avramovic; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v11
3246892c175SPetar Avramovic; GFX10-NEXT:    s_and_saveexec_b32 s4, vcc_lo
3256892c175SPetar Avramovic; GFX10-NEXT:    s_cbranch_execz .LBB4_1
3266892c175SPetar Avramovic; GFX10-NEXT:  ; %bb.7: ; %loop.body
3276892c175SPetar Avramovic; GFX10-NEXT:    ; in Loop: Header=BB4_4 Depth=1
3286892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_u32 v9, vcc_lo, v0, v9
3296892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_ci_u32_e32 v10, vcc_lo, v1, v10, vcc_lo
3306892c175SPetar Avramovic; GFX10-NEXT:    v_add_nc_u32_e32 v12, 1, v8
331433f8e74SPetar Avramovic; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 0x64, v8
332433f8e74SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 s5, -1, exec_lo
3336892c175SPetar Avramovic; GFX10-NEXT:    global_load_dword v11, v[9:10], off
3346892c175SPetar Avramovic; GFX10-NEXT:    v_mov_b32_e32 v8, v12
335433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s6, exec_lo, vcc_lo
336433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s5, s5, s6
3376892c175SPetar Avramovic; GFX10-NEXT:    s_waitcnt vmcnt(0)
3386892c175SPetar Avramovic; GFX10-NEXT:    v_add_nc_u32_e32 v11, 1, v11
3396892c175SPetar Avramovic; GFX10-NEXT:    global_store_dword v[9:10], v11, off
3406892c175SPetar Avramovic; GFX10-NEXT:    s_branch .LBB4_1
3416892c175SPetar Avramovic; GFX10-NEXT:  .LBB4_8: ; %exit
3426892c175SPetar Avramovic; GFX10-NEXT:    s_endpgm
3436892c175SPetar Avramovicentry:
3446892c175SPetar Avramovic  br label %A
3456892c175SPetar Avramovic
3466892c175SPetar AvramovicA:
3476892c175SPetar Avramovic  %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ]
3486892c175SPetar Avramovic  %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter
3496892c175SPetar Avramovic  %a.val = load i32, ptr addrspace(1) %a.plus.counter
3506892c175SPetar Avramovic  %a.cond = icmp eq i32 %a.val, 0
3516892c175SPetar Avramovic  br i1 %a.cond, label %exit, label %B
3526892c175SPetar Avramovic
3536892c175SPetar AvramovicB:
3546892c175SPetar Avramovic  %b.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %b, i32 %counter
3556892c175SPetar Avramovic  %b.val = load i32, ptr addrspace(1) %b.plus.counter
3566892c175SPetar Avramovic  %b.cond = icmp eq i32 %b.val, 0
3576892c175SPetar Avramovic  br i1 %b.cond, label %exit, label %C
3586892c175SPetar Avramovic
3596892c175SPetar AvramovicC:
3606892c175SPetar Avramovic  %c.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %c, i32 %counter
3616892c175SPetar Avramovic  %c.val = load i32, ptr addrspace(1) %c.plus.counter
3626892c175SPetar Avramovic  %c.cond = icmp eq i32 %c.val, 0
3636892c175SPetar Avramovic  br i1 %c.cond, label %exit, label %loop.body
3646892c175SPetar Avramovic
3656892c175SPetar Avramovicloop.body:
3666892c175SPetar Avramovic  %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter
3676892c175SPetar Avramovic  %x.val = load i32, ptr addrspace(1) %x.plus.counter
3686892c175SPetar Avramovic  %x.val.plus.1 = add i32 %x.val, 1
3696892c175SPetar Avramovic  store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter
3706892c175SPetar Avramovic  %counter.plus.1 = add i32 %counter, 1
3716892c175SPetar Avramovic  %x.cond = icmp ult i32 %counter, 100
3726892c175SPetar Avramovic  br i1 %x.cond, label %exit, label %A
3736892c175SPetar Avramovic
3746892c175SPetar Avramovicexit:
3756892c175SPetar Avramovic  ret void
3766892c175SPetar Avramovic}
3776892c175SPetar Avramovic
3786892c175SPetar Avramovic; Divergent condition if with body, ending with break. This is loop with two
3796892c175SPetar Avramovic; exits but structurizer will create phi that will track exit from break
3806892c175SPetar Avramovic; and move break.body after the loop. Loop will then have one exit and phi
3816892c175SPetar Avramovic; used outside of the loop by condition used to enter the break.body.
3826892c175SPetar Avramovicdefine amdgpu_cs void @loop_with_div_break_with_body(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %a.break) {
3836892c175SPetar Avramovic; GFX10-LABEL: loop_with_div_break_with_body:
3846892c175SPetar Avramovic; GFX10:       ; %bb.0: ; %entry
3856892c175SPetar Avramovic; GFX10-NEXT:    s_mov_b32 s0, 0
386433f8e74SPetar Avramovic; GFX10-NEXT:    ; implicit-def: $sgpr1
387433f8e74SPetar Avramovic; GFX10-NEXT:    ; implicit-def: $sgpr2
388433f8e74SPetar Avramovic; GFX10-NEXT:    ; implicit-def: $sgpr3
3896892c175SPetar Avramovic; GFX10-NEXT:    v_mov_b32_e32 v6, s0
3906892c175SPetar Avramovic; GFX10-NEXT:    s_branch .LBB5_2
3916892c175SPetar Avramovic; GFX10-NEXT:  .LBB5_1: ; %Flow
3926892c175SPetar Avramovic; GFX10-NEXT:    ; in Loop: Header=BB5_2 Depth=1
3936892c175SPetar Avramovic; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
394433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
395433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s4, exec_lo, s2
396433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s0, s4, s0
397433f8e74SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 s1, s1, exec_lo
398433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s4, exec_lo, s3
399433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s1, s1, s4
4006892c175SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 exec_lo, exec_lo, s0
4016892c175SPetar Avramovic; GFX10-NEXT:    s_cbranch_execz .LBB5_4
4026892c175SPetar Avramovic; GFX10-NEXT:  .LBB5_2: ; %A
4036892c175SPetar Avramovic; GFX10-NEXT:    ; =>This Inner Loop Header: Depth=1
4046892c175SPetar Avramovic; GFX10-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
405433f8e74SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 s3, s3, exec_lo
406433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s4, exec_lo, -1
407433f8e74SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 s2, s2, exec_lo
408433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s3, s3, s4
4096892c175SPetar Avramovic; GFX10-NEXT:    v_lshlrev_b64 v[7:8], 2, v[6:7]
410433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s2, s2, s4
4116892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_u32 v9, vcc_lo, v2, v7
4126892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo
4136892c175SPetar Avramovic; GFX10-NEXT:    global_load_dword v9, v[9:10], off
4146892c175SPetar Avramovic; GFX10-NEXT:    s_waitcnt vmcnt(0)
4156892c175SPetar Avramovic; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v9
416433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_saveexec_b32 s4, vcc_lo
4176892c175SPetar Avramovic; GFX10-NEXT:    s_cbranch_execz .LBB5_1
4186892c175SPetar Avramovic; GFX10-NEXT:  ; %bb.3: ; %loop.body
4196892c175SPetar Avramovic; GFX10-NEXT:    ; in Loop: Header=BB5_2 Depth=1
4206892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_u32 v7, vcc_lo, v0, v7
4216892c175SPetar Avramovic; GFX10-NEXT:    v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo
4226892c175SPetar Avramovic; GFX10-NEXT:    v_add_nc_u32_e32 v10, 1, v6
423433f8e74SPetar Avramovic; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 0x64, v6
424433f8e74SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 s3, s3, exec_lo
4256892c175SPetar Avramovic; GFX10-NEXT:    global_load_dword v9, v[7:8], off
426433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s5, exec_lo, 0
4276892c175SPetar Avramovic; GFX10-NEXT:    v_mov_b32_e32 v6, v10
428433f8e74SPetar Avramovic; GFX10-NEXT:    s_andn2_b32 s2, s2, exec_lo
429433f8e74SPetar Avramovic; GFX10-NEXT:    s_and_b32 s6, exec_lo, vcc_lo
430433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s3, s3, s5
431433f8e74SPetar Avramovic; GFX10-NEXT:    s_or_b32 s2, s2, s6
4326892c175SPetar Avramovic; GFX10-NEXT:    s_waitcnt vmcnt(0)
4336892c175SPetar Avramovic; GFX10-NEXT:    v_add_nc_u32_e32 v9, 1, v9
4346892c175SPetar Avramovic; GFX10-NEXT:    global_store_dword v[7:8], v9, off
4356892c175SPetar Avramovic; GFX10-NEXT:    s_branch .LBB5_1
4366892c175SPetar Avramovic; GFX10-NEXT:  .LBB5_4: ; %loop.exit.guard
4376892c175SPetar Avramovic; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s0
4386892c175SPetar Avramovic; GFX10-NEXT:    s_and_saveexec_b32 s0, s1
4396892c175SPetar Avramovic; GFX10-NEXT:    s_xor_b32 s0, exec_lo, s0
4406892c175SPetar Avramovic; GFX10-NEXT:    s_cbranch_execz .LBB5_6
4416892c175SPetar Avramovic; GFX10-NEXT:  ; %bb.5: ; %break.body
4426892c175SPetar Avramovic; GFX10-NEXT:    v_mov_b32_e32 v0, 10
4436892c175SPetar Avramovic; GFX10-NEXT:    global_store_dword v[4:5], v0, off
4446892c175SPetar Avramovic; GFX10-NEXT:  .LBB5_6: ; %exit
4456892c175SPetar Avramovic; GFX10-NEXT:    s_endpgm
4466892c175SPetar Avramovicentry:
4476892c175SPetar Avramovic  br label %A
4486892c175SPetar Avramovic
4496892c175SPetar AvramovicA:
4506892c175SPetar Avramovic  %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ]
4516892c175SPetar Avramovic  %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter
4526892c175SPetar Avramovic  %a.val = load i32, ptr addrspace(1) %a.plus.counter
4536892c175SPetar Avramovic  %a.cond = icmp eq i32 %a.val, 0
4546892c175SPetar Avramovic  br i1 %a.cond, label %break.body, label %loop.body
4556892c175SPetar Avramovic
4566892c175SPetar Avramovicbreak.body:
4576892c175SPetar Avramovic  store i32 10, ptr addrspace(1) %a.break
4586892c175SPetar Avramovic  br label %exit
4596892c175SPetar Avramovic
4606892c175SPetar Avramovic
4616892c175SPetar Avramovicloop.body:
4626892c175SPetar Avramovic  %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter
4636892c175SPetar Avramovic  %x.val = load i32, ptr addrspace(1) %x.plus.counter
4646892c175SPetar Avramovic  %x.val.plus.1 = add i32 %x.val, 1
4656892c175SPetar Avramovic  store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter
4666892c175SPetar Avramovic  %counter.plus.1 = add i32 %counter, 1
4676892c175SPetar Avramovic  %x.cond = icmp ult i32 %counter, 100
4686892c175SPetar Avramovic  br i1 %x.cond, label %exit, label %A
4696892c175SPetar Avramovic
4706892c175SPetar Avramovicexit:
4716892c175SPetar Avramovic  ret void
4726892c175SPetar Avramovic}
4736892c175SPetar Avramovic
4746892c175SPetar Avramovic; Snippet from test generated by the GraphicsFuzz tool, frontend generates ir
4756892c175SPetar Avramovic; with irreducible control flow graph. FixIrreducible converts it into natural
4766892c175SPetar Avramovic; loop and in the process creates i1 phi with three incoming values.
4776892c175SPetar Avramovic
4786892c175SPetar Avramovic; int loop(int x, int y, int a0, int a1, int a2, int a3, int a4) {
4796892c175SPetar Avramovic;   do {
4806892c175SPetar Avramovic;     if (y < a2) {
4816892c175SPetar Avramovic;       do {
4826892c175SPetar Avramovic;       } while (x < a2);
4836892c175SPetar Avramovic;     }
4846892c175SPetar Avramovic;     if (x < a3) {
4856892c175SPetar Avramovic;       return a1;
4866892c175SPetar Avramovic;     }
4876892c175SPetar Avramovic;   } while (y < a2);
4886892c175SPetar Avramovic;   return a0;
4896892c175SPetar Avramovic; }
4906892c175SPetar Avramovic
4916892c175SPetar Avramovic; This test is also interesting because it has phi with three incomings
4926892c175SPetar Avramovic;define amdgpu_ps i32 @irreducible_cfg(i32 %x, i32 %y, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
4936892c175SPetar Avramovic;.entry:
4946892c175SPetar Avramovic; %.y_lt_a2 = icmp sgt i32 %a2, %y
4956892c175SPetar Avramovic; %.x_lt_a2 = icmp sgt i32 %a2, %x
4966892c175SPetar Avramovic; %.x_lt_a3 = icmp sgt i32 %a3, %x
4976892c175SPetar Avramovic; br i1 %.y_lt_a2, label %.preheader, label %.loopexit ; first iteration, jump to inner loop if 'y < a2' or start with 'if (x < a3)'
4986892c175SPetar Avramovic;
4996892c175SPetar Avramovic;.preheader: ; if (y < a2),
5006892c175SPetar Avramovic; br label %.inner_loop
5016892c175SPetar Avramovic;
5026892c175SPetar Avramovic;.inner_loop: ; do while x < a2
5036892c175SPetar Avramovic; br i1 %.x_lt_a2, label %.inner_loop, label %.loopexit
5046892c175SPetar Avramovic;
5056892c175SPetar Avramovic;.loopexit: ; if x < a3
5066892c175SPetar Avramovic; %not.inner_loop = xor i1 %.y_lt_a2, true
5076892c175SPetar Avramovic; %brmerge = select i1 %.x_lt_a3, i1 true, i1 %not.inner_loop ; exit loop if 'x < a3' or 'loop ends since !(y < a2)'
5086892c175SPetar Avramovic; %.ret = select i1 %.x_lt_a3, i32 %a1, i32 %a0               ; select retrun value a1 'x < a3' or a0 'loop ends'
5096892c175SPetar Avramovic; br i1 %brmerge, label %.exit, label %.preheader
5106892c175SPetar Avramovic;
5116892c175SPetar Avramovic;.exit:
5126892c175SPetar Avramovic; ret i32 %.ret
5136892c175SPetar Avramovic;}
5146892c175SPetar Avramovic
515