1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s 4; RUN: opt -mtriple=amdgcn-amd-amdhsa -lowerswitch -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -verify -S %s -o - | FileCheck -check-prefix=IR %s 5 6; A test with a divergent unreachable block and uniform return block. The 7; compiler needs to create a regions that includes them so that 8; StructurizeCFG correctly transform the CFG, and then SI Annotate Control 9; Flow does not fail during annotation. 10 11define void @my_func(i32 %0) { 12; IR-LABEL: @my_func( 13; IR-NEXT: entry: 14; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) null, align 8 15; IR-NEXT: br label [[NODEBLOCK:%.*]] 16; IR: NodeBlock: 17; IR-NEXT: [[PIVOT:%.*]] = icmp sge i32 [[TMP1]], 1 18; IR-NEXT: br i1 [[PIVOT]], label [[LEAFBLOCK1:%.*]], label [[FLOW:%.*]] 19; IR: LeafBlock1: 20; IR-NEXT: [[SWITCHLEAF2:%.*]] = icmp ne i32 [[TMP1]], 1 21; IR-NEXT: br label [[FLOW]] 22; IR: Flow: 23; IR-NEXT: [[TMP2:%.*]] = phi i1 [ [[SWITCHLEAF2]], [[LEAFBLOCK1]] ], [ false, [[NODEBLOCK]] ] 24; IR-NEXT: [[TMP3:%.*]] = phi i1 [ false, [[LEAFBLOCK1]] ], [ true, [[NODEBLOCK]] ] 25; IR-NEXT: br i1 [[TMP3]], label [[LEAFBLOCK:%.*]], label [[FLOW11:%.*]] 26; IR: LeafBlock: 27; IR-NEXT: [[SWITCHLEAF:%.*]] = icmp eq i32 [[TMP1]], 0 28; IR-NEXT: br i1 [[SWITCHLEAF]], label [[SW_BB2:%.*]], label [[FLOW12:%.*]] 29; IR: Flow11: 30; IR-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP9:%.*]], [[FLOW12]] ], [ false, [[FLOW]] ] 31; IR-NEXT: [[TMP5:%.*]] = phi i1 [ [[TMP10:%.*]], [[FLOW12]] ], [ [[TMP2]], [[FLOW]] ] 32; IR-NEXT: [[TMP6:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP5]]) 33; IR-NEXT: [[TMP7:%.*]] = extractvalue { i1, i64 } [[TMP6]], 0 34; IR-NEXT: [[TMP8:%.*]] = extractvalue { i1, i64 } [[TMP6]], 1 35; IR-NEXT: br i1 [[TMP7]], label [[DO_BODY:%.*]], label [[FLOW17:%.*]] 36; IR: sw.bb2: 37; IR-NEXT: br label [[NODEBLOCK7:%.*]] 38; IR: Flow12: 39; IR-NEXT: [[TMP9]] = phi i1 [ [[TMP24:%.*]], [[FLOW15:%.*]] ], [ false, [[LEAFBLOCK]] ] 40; IR-NEXT: [[TMP10]] = phi i1 [ [[TMP25:%.*]], [[FLOW15]] ], [ true, [[LEAFBLOCK]] ] 41; IR-NEXT: br label [[FLOW11]] 42; IR: NodeBlock7: 43; IR-NEXT: [[PIVOT8:%.*]] = icmp sge i32 [[TMP0:%.*]], 2 44; IR-NEXT: [[TMP11:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[PIVOT8]]) 45; IR-NEXT: [[TMP12:%.*]] = extractvalue { i1, i64 } [[TMP11]], 0 46; IR-NEXT: [[TMP13:%.*]] = extractvalue { i1, i64 } [[TMP11]], 1 47; IR-NEXT: br i1 [[TMP12]], label [[LEAFBLOCK5:%.*]], label [[FLOW13:%.*]] 48; IR: LeafBlock5: 49; IR-NEXT: [[SWITCHLEAF6:%.*]] = icmp eq i32 [[TMP0]], 2 50; IR-NEXT: br label [[FLOW13]] 51; IR: Flow13: 52; IR-NEXT: [[TMP14:%.*]] = phi i1 [ true, [[LEAFBLOCK5]] ], [ false, [[NODEBLOCK7]] ] 53; IR-NEXT: [[TMP15:%.*]] = phi i1 [ [[SWITCHLEAF6]], [[LEAFBLOCK5]] ], [ false, [[NODEBLOCK7]] ] 54; IR-NEXT: [[TMP16:%.*]] = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 [[TMP13]]) 55; IR-NEXT: [[TMP17:%.*]] = extractvalue { i1, i64 } [[TMP16]], 0 56; IR-NEXT: [[TMP18:%.*]] = extractvalue { i1, i64 } [[TMP16]], 1 57; IR-NEXT: br i1 [[TMP17]], label [[LEAFBLOCK3:%.*]], label [[FLOW14:%.*]] 58; IR: LeafBlock3: 59; IR-NEXT: [[SWITCHLEAF4:%.*]] = icmp eq i32 [[TMP0]], 0 60; IR-NEXT: [[SWITCHLEAF4_INV:%.*]] = xor i1 [[SWITCHLEAF4]], true 61; IR-NEXT: br label [[FLOW14]] 62; IR: Flow14: 63; IR-NEXT: [[TMP19:%.*]] = phi i1 [ [[SWITCHLEAF4_INV]], [[LEAFBLOCK3]] ], [ [[TMP14]], [[FLOW13]] ] 64; IR-NEXT: [[TMP20:%.*]] = phi i1 [ [[SWITCHLEAF4]], [[LEAFBLOCK3]] ], [ [[TMP15]], [[FLOW13]] ] 65; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP18]]) 66; IR-NEXT: [[TMP21:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP20]]) 67; IR-NEXT: [[TMP22:%.*]] = extractvalue { i1, i64 } [[TMP21]], 0 68; IR-NEXT: [[TMP23:%.*]] = extractvalue { i1, i64 } [[TMP21]], 1 69; IR-NEXT: br i1 [[TMP22]], label [[LAND_LHS_TRUE_I:%.*]], label [[FLOW15]] 70; IR: land.lhs.true.i: 71; IR-NEXT: br label [[LEAFBLOCK9:%.*]] 72; IR: Flow15: 73; IR-NEXT: [[TMP24]] = phi i1 [ [[TMP29:%.*]], [[FLOW16:%.*]] ], [ false, [[FLOW14]] ] 74; IR-NEXT: [[TMP25]] = phi i1 [ [[TMP30:%.*]], [[FLOW16]] ], [ [[TMP19]], [[FLOW14]] ] 75; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP23]]) 76; IR-NEXT: br label [[FLOW12]] 77; IR: LeafBlock9: 78; IR-NEXT: [[SWITCHLEAF10:%.*]] = icmp sgt i32 [[TMP0]], 1 79; IR-NEXT: [[TMP26:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[SWITCHLEAF10]]) 80; IR-NEXT: [[TMP27:%.*]] = extractvalue { i1, i64 } [[TMP26]], 0 81; IR-NEXT: [[TMP28:%.*]] = extractvalue { i1, i64 } [[TMP26]], 1 82; IR-NEXT: br i1 [[TMP27]], label [[DO_BODY_I_I_I_I:%.*]], label [[FLOW16]] 83; IR: do.body.i.i.i.i: 84; IR-NEXT: tail call fastcc void null() 85; IR-NEXT: br label [[FLOW16]] 86; IR: Flow16: 87; IR-NEXT: [[TMP29]] = phi i1 [ true, [[DO_BODY_I_I_I_I]] ], [ false, [[LEAFBLOCK9]] ] 88; IR-NEXT: [[TMP30]] = phi i1 [ false, [[DO_BODY_I_I_I_I]] ], [ true, [[LEAFBLOCK9]] ] 89; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP28]]) 90; IR-NEXT: br label [[FLOW15]] 91; IR: do.body: 92; IR-NEXT: tail call fastcc void null() 93; IR-NEXT: br label [[FLOW17]] 94; IR: Flow17: 95; IR-NEXT: [[TMP31:%.*]] = phi i1 [ true, [[DO_BODY]] ], [ [[TMP4]], [[FLOW11]] ] 96; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]]) 97; IR-NEXT: [[TMP32:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP31]]) 98; IR-NEXT: [[TMP33:%.*]] = extractvalue { i1, i64 } [[TMP32]], 0 99; IR-NEXT: [[TMP34:%.*]] = extractvalue { i1, i64 } [[TMP32]], 1 100; IR-NEXT: br i1 [[TMP33]], label [[UNIFIEDUNREACHABLEBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]] 101; IR: UnifiedUnreachableBlock: 102; IR-NEXT: call void @llvm.amdgcn.unreachable() 103; IR-NEXT: br label [[UNIFIEDRETURNBLOCK]] 104; IR: UnifiedReturnBlock: 105; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP34]]) 106; IR-NEXT: ret void 107; 108; GCN-LABEL: my_func: 109; GCN: ; %bb.0: ; %entry 110; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 111; GCN-NEXT: s_mov_b64 s[4:5], 0 112; GCN-NEXT: s_load_dword s10, s[4:5], 0x0 113; GCN-NEXT: s_mov_b64 s[8:9], -1 114; GCN-NEXT: s_waitcnt lgkmcnt(0) 115; GCN-NEXT: s_cmp_lt_i32 s10, 1 116; GCN-NEXT: s_mov_b64 s[6:7], 0 117; GCN-NEXT: s_cbranch_scc1 .LBB0_7 118; GCN-NEXT: ; %bb.1: ; %LeafBlock1 119; GCN-NEXT: s_cmp_lg_u32 s10, 1 120; GCN-NEXT: s_cselect_b64 s[6:7], -1, 0 121; GCN-NEXT: s_mov_b64 vcc, exec 122; GCN-NEXT: s_cbranch_execz .LBB0_8 123; GCN-NEXT: .LBB0_2: ; %Flow11 124; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[6:7] 125; GCN-NEXT: s_cbranch_execz .LBB0_4 126; GCN-NEXT: .LBB0_3: ; %do.body 127; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 128; GCN-NEXT: .LBB0_4: ; %Flow17 129; GCN-NEXT: s_or_b64 exec, exec, s[8:9] 130; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 131; GCN-NEXT: s_cbranch_execz .LBB0_6 132; GCN-NEXT: ; %bb.5: ; %UnifiedUnreachableBlock 133; GCN-NEXT: ; divergent unreachable 134; GCN-NEXT: .LBB0_6: ; %UnifiedReturnBlock 135; GCN-NEXT: s_or_b64 exec, exec, s[6:7] 136; GCN-NEXT: s_setpc_b64 s[30:31] 137; GCN-NEXT: .LBB0_7: ; %Flow 138; GCN-NEXT: s_andn2_b64 vcc, exec, s[8:9] 139; GCN-NEXT: s_cbranch_vccnz .LBB0_2 140; GCN-NEXT: .LBB0_8: ; %LeafBlock 141; GCN-NEXT: s_cmp_eq_u32 s10, 0 142; GCN-NEXT: s_cbranch_scc1 .LBB0_10 143; GCN-NEXT: ; %bb.9: 144; GCN-NEXT: s_mov_b64 s[6:7], -1 145; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[6:7] 146; GCN-NEXT: s_cbranch_execnz .LBB0_3 147; GCN-NEXT: s_branch .LBB0_4 148; GCN-NEXT: .LBB0_10: ; %NodeBlock7 149; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0 150; GCN-NEXT: s_mov_b64 s[8:9], 0 151; GCN-NEXT: s_mov_b64 s[6:7], 0 152; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc 153; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 154; GCN-NEXT: s_cbranch_execz .LBB0_12 155; GCN-NEXT: ; %bb.11: ; %LeafBlock5 156; GCN-NEXT: s_mov_b64 s[6:7], exec 157; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 158; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 159; GCN-NEXT: .LBB0_12: ; %Flow13 160; GCN-NEXT: s_andn2_saveexec_b64 s[10:11], s[4:5] 161; GCN-NEXT: s_cbranch_execz .LBB0_14 162; GCN-NEXT: ; %bb.13: ; %LeafBlock3 163; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 164; GCN-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 165; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec 166; GCN-NEXT: s_andn2_b64 s[8:9], s[8:9], exec 167; GCN-NEXT: s_and_b64 s[4:5], s[4:5], exec 168; GCN-NEXT: s_and_b64 s[12:13], vcc, exec 169; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[4:5] 170; GCN-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] 171; GCN-NEXT: .LBB0_14: ; %Flow14 172; GCN-NEXT: s_or_b64 exec, exec, s[10:11] 173; GCN-NEXT: s_mov_b64 s[4:5], 0 174; GCN-NEXT: s_and_saveexec_b64 s[10:11], s[8:9] 175; GCN-NEXT: s_cbranch_execz .LBB0_18 176; GCN-NEXT: ; %bb.15: ; %LeafBlock9 177; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0 178; GCN-NEXT: s_mov_b64 s[8:9], -1 179; GCN-NEXT: s_and_saveexec_b64 s[12:13], vcc 180; GCN-NEXT: s_cbranch_execz .LBB0_17 181; GCN-NEXT: ; %bb.16: ; %do.body.i.i.i.i 182; GCN-NEXT: s_mov_b64 s[4:5], exec 183; GCN-NEXT: s_xor_b64 s[8:9], exec, -1 184; GCN-NEXT: .LBB0_17: ; %Flow16 185; GCN-NEXT: s_or_b64 exec, exec, s[12:13] 186; GCN-NEXT: s_and_b64 s[4:5], s[4:5], exec 187; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec 188; GCN-NEXT: s_and_b64 s[8:9], s[8:9], exec 189; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 190; GCN-NEXT: .LBB0_18: ; %Flow15 191; GCN-NEXT: s_or_b64 exec, exec, s[10:11] 192; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[6:7] 193; GCN-NEXT: s_cbranch_execnz .LBB0_3 194; GCN-NEXT: s_branch .LBB0_4 195entry: 196 %1 = load i32, ptr addrspace(4) null, align 8 197 switch i32 %1, label %do.body [ 198 i32 1, label %sw.bb 199 i32 0, label %sw.bb2 200 ] 201 202sw.bb: 203 ret void 204 205sw.bb2: 206 switch i32 %0, label %do.body [ 207 i32 0, label %land.lhs.true.i 208 i32 2, label %land.lhs.true.i 209 ] 210 211land.lhs.true.i: 212 switch i32 %0, label %do.body.i.i.i.i [ 213 i32 0, label %do.body 214 i32 1, label %do.body 215 ] 216 217do.body.i.i.i.i: 218 tail call fastcc void null() 219 unreachable 220 221do.body: 222 tail call fastcc void null() 223 unreachable 224 225} 226