1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY 3; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs | FileCheck %s 4 5declare void @llvm.trap() 6declare i32 @llvm.amdgcn.workitem.id.x() 7 8define amdgpu_kernel void @kernel(i32 %a, ptr addrspace(1) %x, i32 noundef %n) { 9; This used to bypass the structurization process because structurizer is unable to 10; handle multiple-exits CFG. This should be correctly structurized. 11; CHECK-LABEL: kernel: 12; CHECK: ; %bb.0: ; %entry 13; CHECK-NEXT: s_load_dword s0, s[8:9], 0x10 14; CHECK-NEXT: s_load_dword s10, s[8:9], 0x0 15; CHECK-NEXT: s_waitcnt lgkmcnt(0) 16; CHECK-NEXT: s_cmpk_lg_i32 s0, 0x100 17; CHECK-NEXT: s_cbranch_scc0 .LBB0_6 18; CHECK-NEXT: ; %bb.1: ; %if.else 19; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 10, v0 20; CHECK-NEXT: s_mov_b64 s[4:5], 0 21; CHECK-NEXT: s_mov_b64 s[2:3], 0 22; CHECK-NEXT: s_mov_b64 s[0:1], 0 23; CHECK-NEXT: s_and_saveexec_b64 s[6:7], vcc 24; CHECK-NEXT: s_cbranch_execz .LBB0_5 25; CHECK-NEXT: ; %bb.2: ; %if.then3 26; CHECK-NEXT: s_cmp_lg_u32 s10, 0 27; CHECK-NEXT: s_cbranch_scc1 .LBB0_14 28; CHECK-NEXT: ; %bb.3: 29; CHECK-NEXT: s_mov_b64 s[0:1], -1 30; CHECK-NEXT: .LBB0_4: ; %Flow3 31; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec 32; CHECK-NEXT: s_and_b64 s[2:3], s[2:3], exec 33; CHECK-NEXT: .LBB0_5: ; %Flow2 34; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] 35; CHECK-NEXT: s_and_b64 vcc, exec, s[4:5] 36; CHECK-NEXT: s_cbranch_vccz .LBB0_8 37; CHECK-NEXT: s_branch .LBB0_7 38; CHECK-NEXT: .LBB0_6: 39; CHECK-NEXT: s_mov_b64 s[2:3], 0 40; CHECK-NEXT: s_mov_b64 s[0:1], 0 41; CHECK-NEXT: s_cbranch_execz .LBB0_8 42; CHECK-NEXT: .LBB0_7: ; %if.then 43; CHECK-NEXT: s_cmp_lg_u32 s10, 0 44; CHECK-NEXT: s_mov_b64 s[0:1], -1 45; CHECK-NEXT: s_cbranch_scc1 .LBB0_13 46; CHECK-NEXT: .LBB0_8: ; %Flow4 47; CHECK-NEXT: s_and_saveexec_b64 s[4:5], s[2:3] 48; CHECK-NEXT: .LBB0_9: ; %UnifiedUnreachableBlock 49; CHECK-NEXT: ; divergent unreachable 50; CHECK-NEXT: .LBB0_10: ; %Flow6 51; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] 52; CHECK-NEXT: s_and_saveexec_b64 s[2:3], s[0:1] 53; CHECK-NEXT: s_cbranch_execz .LBB0_12 54; CHECK-NEXT: ; %bb.11: ; %if.end6.sink.split 55; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8 56; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 57; CHECK-NEXT: v_mov_b32_e32 v1, s10 58; CHECK-NEXT: s_waitcnt lgkmcnt(0) 59; CHECK-NEXT: global_store_dword v0, v1, s[0:1] 60; CHECK-NEXT: .LBB0_12: ; %UnifiedReturnBlock 61; CHECK-NEXT: s_endpgm 62; CHECK-NEXT: .LBB0_13: ; %cond.false 63; CHECK-NEXT: s_mov_b64 s[0:1], 0 64; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], exec 65; CHECK-NEXT: s_trap 2 66; CHECK-NEXT: s_and_saveexec_b64 s[4:5], s[2:3] 67; CHECK-NEXT: s_cbranch_execnz .LBB0_9 68; CHECK-NEXT: s_branch .LBB0_10 69; CHECK-NEXT: .LBB0_14: ; %cond.false.i8 70; CHECK-NEXT: s_mov_b64 s[2:3], -1 71; CHECK-NEXT: s_trap 2 72; CHECK-NEXT: s_branch .LBB0_4 73 74 75entry: 76 %tid = call i32 @llvm.amdgcn.workitem.id.x() 77 %cmp = icmp eq i32 %n, 256 78 br i1 %cmp, label %if.then, label %if.else 79 80if.then: 81 %cmp1 = icmp eq i32 %a, 0 82 br i1 %cmp1, label %if.end6.sink.split, label %cond.false 83 84cond.false: 85 call void @llvm.trap() 86 unreachable 87 88if.else: 89 %cmp2 = icmp ult i32 %tid, 10 90 br i1 %cmp2, label %if.then3, label %if.end6 91 92if.then3: 93 %cmp1.i7 = icmp eq i32 %a, 0 94 br i1 %cmp1.i7, label %if.end6.sink.split, label %cond.false.i8 95 96cond.false.i8: 97 call void @llvm.trap() 98 unreachable 99 100if.end6.sink.split: 101 %x1 = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %tid 102 store i32 %a, ptr addrspace(1) %x1, align 4 103 br label %if.end6 104 105if.end6: 106 ret void 107} 108;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 109; UNIFY: {{.*}} 110