1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s 3; RUN: llc -mtriple=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s 4 5; Uses llvm.amdgcn.break 6 7define amdgpu_kernel void @break_loop(i32 %arg) #0 { 8; OPT-LABEL: @break_loop( 9; OPT-NEXT: bb: 10; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 11; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 12; OPT-NEXT: br label [[BB1:%.*]] 13; OPT: bb1: 14; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 15; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[TMP0:%.*]], [[FLOW]] ] 16; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1 17; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 18; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 19; OPT: bb4: 20; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4 21; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 22; OPT-NEXT: br label [[FLOW]] 23; OPT: Flow: 24; OPT-NEXT: [[TMP0]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ] 25; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ] 26; OPT-NEXT: [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]]) 27; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]]) 28; OPT-NEXT: br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]] 29; OPT: bb9: 30; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) 31; OPT-NEXT: ret void 32; 33; GCN-LABEL: break_loop: 34; GCN: ; %bb.0: ; %bb 35; GCN-NEXT: s_load_dword s3, s[4:5], 0x9 36; GCN-NEXT: s_mov_b64 s[0:1], 0 37; GCN-NEXT: s_mov_b32 s2, -1 38; GCN-NEXT: s_waitcnt lgkmcnt(0) 39; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 40; GCN-NEXT: s_mov_b32 s3, 0xf000 41; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 42; GCN-NEXT: ; implicit-def: $sgpr6 43; GCN-NEXT: .LBB0_1: ; %bb1 44; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 45; GCN-NEXT: s_add_i32 s6, s6, 1 46; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 47; GCN-NEXT: s_cmp_gt_i32 s6, -1 48; GCN-NEXT: s_cbranch_scc0 .LBB0_3 49; GCN-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1 50; GCN-NEXT: ; implicit-def: $sgpr6 51; GCN-NEXT: s_branch .LBB0_4 52; GCN-NEXT: .LBB0_3: ; %bb4 53; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1 54; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 55; GCN-NEXT: s_waitcnt vmcnt(0) 56; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 57; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 58; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 59; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 60; GCN-NEXT: .LBB0_4: ; %Flow 61; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1 62; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 63; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 64; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 65; GCN-NEXT: s_cbranch_execnz .LBB0_1 66; GCN-NEXT: ; %bb.5: ; %bb9 67; GCN-NEXT: s_endpgm 68bb: 69 %id = call i32 @llvm.amdgcn.workitem.id.x() 70 %my.tmp = sub i32 %id, %arg 71 br label %bb1 72 73bb1: 74 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ] 75 %lsr.iv.next = add i32 %lsr.iv, 1 76 %cmp0 = icmp slt i32 %lsr.iv.next, 0 77 br i1 %cmp0, label %bb4, label %bb9 78 79bb4: 80 %load = load volatile i32, ptr addrspace(1) undef, align 4 81 %cmp1 = icmp slt i32 %my.tmp, %load 82 br i1 %cmp1, label %bb1, label %bb9 83 84bb9: 85 ret void 86} 87 88define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 { 89; OPT-LABEL: @undef_phi_cond_break_loop( 90; OPT-NEXT: bb: 91; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 92; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 93; OPT-NEXT: br label [[BB1:%.*]] 94; OPT: bb1: 95; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 96; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ] 97; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1 98; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 99; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 100; OPT: bb4: 101; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4 102; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 103; OPT-NEXT: br label [[FLOW]] 104; OPT: Flow: 105; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ] 106; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ] 107; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 108; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 109; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 110; OPT: bb9: 111; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 112; OPT-NEXT: store volatile i32 7, ptr addrspace(3) undef, align 4 113; OPT-NEXT: ret void 114; 115; GCN-LABEL: undef_phi_cond_break_loop: 116; GCN: ; %bb.0: ; %bb 117; GCN-NEXT: s_load_dword s3, s[4:5], 0x9 118; GCN-NEXT: s_mov_b64 s[0:1], 0 119; GCN-NEXT: s_mov_b32 s2, -1 120; GCN-NEXT: s_waitcnt lgkmcnt(0) 121; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 122; GCN-NEXT: s_mov_b32 s3, 0xf000 123; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 124; GCN-NEXT: ; implicit-def: $sgpr6 125; GCN-NEXT: .LBB1_1: ; %bb1 126; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 127; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 128; GCN-NEXT: s_cmp_gt_i32 s6, -1 129; GCN-NEXT: s_cbranch_scc1 .LBB1_3 130; GCN-NEXT: ; %bb.2: ; %bb4 131; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1 132; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 133; GCN-NEXT: s_waitcnt vmcnt(0) 134; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 135; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 136; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 137; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 138; GCN-NEXT: .LBB1_3: ; %Flow 139; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1 140; GCN-NEXT: s_add_i32 s6, s6, 1 141; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 142; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 143; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 144; GCN-NEXT: s_cbranch_execnz .LBB1_1 145; GCN-NEXT: ; %bb.4: ; %bb9 146; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 147; GCN-NEXT: v_mov_b32_e32 v0, 7 148; GCN-NEXT: s_mov_b32 m0, -1 149; GCN-NEXT: ds_write_b32 v0, v0 150; GCN-NEXT: s_endpgm 151bb: 152 %id = call i32 @llvm.amdgcn.workitem.id.x() 153 %my.tmp = sub i32 %id, %arg 154 br label %bb1 155 156bb1: ; preds = %Flow, %bb 157 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 158 %lsr.iv.next = add i32 %lsr.iv, 1 159 %cmp0 = icmp slt i32 %lsr.iv.next, 0 160 br i1 %cmp0, label %bb4, label %Flow 161 162bb4: ; preds = %bb1 163 %load = load volatile i32, ptr addrspace(1) undef, align 4 164 %cmp1 = icmp sge i32 %my.tmp, %load 165 br label %Flow 166 167Flow: ; preds = %bb4, %bb1 168 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 169 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ] 170 br i1 %my.tmp3, label %bb9, label %bb1 171 172bb9: ; preds = %Flow 173 store volatile i32 7, ptr addrspace(3) undef 174 ret void 175} 176 177; FIXME: ConstantExpr compare of address to null folds away 178@lds = addrspace(3) global i32 undef 179 180define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 { 181; OPT-LABEL: @constexpr_phi_cond_break_loop( 182; OPT-NEXT: bb: 183; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 184; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 185; OPT-NEXT: br label [[BB1:%.*]] 186; OPT: bb1: 187; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 188; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ] 189; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1 190; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 191; OPT-NEXT: [[CMP2:%.*]] = icmp ne ptr addrspace(3) inttoptr (i32 4 to ptr addrspace(3)), @lds 192; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 193; OPT: bb4: 194; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4 195; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 196; OPT-NEXT: br label [[FLOW]] 197; OPT: Flow: 198; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ] 199; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ [[CMP2]], [[BB1]] ] 200; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 201; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 202; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 203; OPT: bb9: 204; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 205; OPT-NEXT: store volatile i32 7, ptr addrspace(3) undef, align 4 206; OPT-NEXT: ret void 207; 208; GCN-LABEL: constexpr_phi_cond_break_loop: 209; GCN: ; %bb.0: ; %bb 210; GCN-NEXT: s_load_dword s3, s[4:5], 0x9 211; GCN-NEXT: s_mov_b64 s[0:1], 0 212; GCN-NEXT: s_mov_b32 s2, -1 213; GCN-NEXT: s_waitcnt lgkmcnt(0) 214; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 215; GCN-NEXT: s_mov_b32 s3, 0xf000 216; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 217; GCN-NEXT: ; implicit-def: $sgpr6 218; GCN-NEXT: .LBB2_1: ; %bb1 219; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 220; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 221; GCN-NEXT: s_cmp_gt_i32 s6, -1 222; GCN-NEXT: s_cbranch_scc1 .LBB2_3 223; GCN-NEXT: ; %bb.2: ; %bb4 224; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1 225; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 226; GCN-NEXT: s_waitcnt vmcnt(0) 227; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 228; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 229; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 230; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 231; GCN-NEXT: .LBB2_3: ; %Flow 232; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1 233; GCN-NEXT: s_add_i32 s6, s6, 1 234; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 235; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 236; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 237; GCN-NEXT: s_cbranch_execnz .LBB2_1 238; GCN-NEXT: ; %bb.4: ; %bb9 239; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 240; GCN-NEXT: v_mov_b32_e32 v0, 7 241; GCN-NEXT: s_mov_b32 m0, -1 242; GCN-NEXT: ds_write_b32 v0, v0 243; GCN-NEXT: s_endpgm 244bb: 245 %id = call i32 @llvm.amdgcn.workitem.id.x() 246 %my.tmp = sub i32 %id, %arg 247 br label %bb1 248 249bb1: ; preds = %Flow, %bb 250 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 251 %lsr.iv.next = add i32 %lsr.iv, 1 252 %cmp0 = icmp slt i32 %lsr.iv.next, 0 253 %cmp2 = icmp ne ptr addrspace(3) inttoptr (i32 4 to ptr addrspace(3)), @lds 254 br i1 %cmp0, label %bb4, label %Flow 255 256bb4: ; preds = %bb1 257 %load = load volatile i32, ptr addrspace(1) undef, align 4 258 %cmp1 = icmp sge i32 %my.tmp, %load 259 br label %Flow 260 261Flow: ; preds = %bb4, %bb1 262 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 263 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ %cmp2, %bb1 ] 264 br i1 %my.tmp3, label %bb9, label %bb1 265 266bb9: ; preds = %Flow 267 store volatile i32 7, ptr addrspace(3) undef 268 ret void 269} 270 271define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 { 272; OPT-LABEL: @true_phi_cond_break_loop( 273; OPT-NEXT: bb: 274; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 275; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 276; OPT-NEXT: br label [[BB1:%.*]] 277; OPT: bb1: 278; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 279; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ] 280; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1 281; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 282; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 283; OPT: bb4: 284; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4 285; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 286; OPT-NEXT: br label [[FLOW]] 287; OPT: Flow: 288; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ] 289; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ] 290; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 291; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 292; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 293; OPT: bb9: 294; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 295; OPT-NEXT: store volatile i32 7, ptr addrspace(3) undef, align 4 296; OPT-NEXT: ret void 297; 298; GCN-LABEL: true_phi_cond_break_loop: 299; GCN: ; %bb.0: ; %bb 300; GCN-NEXT: s_load_dword s3, s[4:5], 0x9 301; GCN-NEXT: s_mov_b64 s[0:1], 0 302; GCN-NEXT: s_mov_b32 s2, -1 303; GCN-NEXT: s_waitcnt lgkmcnt(0) 304; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 305; GCN-NEXT: s_mov_b32 s3, 0xf000 306; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 307; GCN-NEXT: ; implicit-def: $sgpr6 308; GCN-NEXT: .LBB3_1: ; %bb1 309; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 310; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 311; GCN-NEXT: s_cmp_gt_i32 s6, -1 312; GCN-NEXT: s_cbranch_scc1 .LBB3_3 313; GCN-NEXT: ; %bb.2: ; %bb4 314; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1 315; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 316; GCN-NEXT: s_waitcnt vmcnt(0) 317; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 318; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 319; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 320; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 321; GCN-NEXT: .LBB3_3: ; %Flow 322; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1 323; GCN-NEXT: s_add_i32 s6, s6, 1 324; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 325; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 326; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 327; GCN-NEXT: s_cbranch_execnz .LBB3_1 328; GCN-NEXT: ; %bb.4: ; %bb9 329; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 330; GCN-NEXT: v_mov_b32_e32 v0, 7 331; GCN-NEXT: s_mov_b32 m0, -1 332; GCN-NEXT: ds_write_b32 v0, v0 333; GCN-NEXT: s_endpgm 334bb: 335 %id = call i32 @llvm.amdgcn.workitem.id.x() 336 %my.tmp = sub i32 %id, %arg 337 br label %bb1 338 339bb1: ; preds = %Flow, %bb 340 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 341 %lsr.iv.next = add i32 %lsr.iv, 1 342 %cmp0 = icmp slt i32 %lsr.iv.next, 0 343 br i1 %cmp0, label %bb4, label %Flow 344 345bb4: ; preds = %bb1 346 %load = load volatile i32, ptr addrspace(1) undef, align 4 347 %cmp1 = icmp sge i32 %my.tmp, %load 348 br label %Flow 349 350Flow: ; preds = %bb4, %bb1 351 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 352 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 353 br i1 %my.tmp3, label %bb9, label %bb1 354 355bb9: ; preds = %Flow 356 store volatile i32 7, ptr addrspace(3) undef 357 ret void 358} 359 360define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 { 361; OPT-LABEL: @false_phi_cond_break_loop( 362; OPT-NEXT: bb: 363; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 364; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 365; OPT-NEXT: br label [[BB1:%.*]] 366; OPT: bb1: 367; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 368; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ] 369; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1 370; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 371; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 372; OPT: bb4: 373; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4 374; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 375; OPT-NEXT: br label [[FLOW]] 376; OPT: Flow: 377; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ] 378; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ] 379; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 380; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 381; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 382; OPT: bb9: 383; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 384; OPT-NEXT: store volatile i32 7, ptr addrspace(3) undef, align 4 385; OPT-NEXT: ret void 386; 387; GCN-LABEL: false_phi_cond_break_loop: 388; GCN: ; %bb.0: ; %bb 389; GCN-NEXT: s_load_dword s3, s[4:5], 0x9 390; GCN-NEXT: s_mov_b64 s[0:1], 0 391; GCN-NEXT: s_mov_b32 s2, -1 392; GCN-NEXT: s_waitcnt lgkmcnt(0) 393; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 394; GCN-NEXT: s_mov_b32 s3, 0xf000 395; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 396; GCN-NEXT: ; implicit-def: $sgpr6 397; GCN-NEXT: .LBB4_1: ; %bb1 398; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 399; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 400; GCN-NEXT: s_cmp_gt_i32 s6, -1 401; GCN-NEXT: s_cbranch_scc1 .LBB4_3 402; GCN-NEXT: ; %bb.2: ; %bb4 403; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1 404; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 405; GCN-NEXT: s_waitcnt vmcnt(0) 406; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 407; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 408; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 409; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 410; GCN-NEXT: .LBB4_3: ; %Flow 411; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1 412; GCN-NEXT: s_add_i32 s6, s6, 1 413; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 414; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 415; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 416; GCN-NEXT: s_cbranch_execnz .LBB4_1 417; GCN-NEXT: ; %bb.4: ; %bb9 418; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 419; GCN-NEXT: v_mov_b32_e32 v0, 7 420; GCN-NEXT: s_mov_b32 m0, -1 421; GCN-NEXT: ds_write_b32 v0, v0 422; GCN-NEXT: s_endpgm 423bb: 424 %id = call i32 @llvm.amdgcn.workitem.id.x() 425 %my.tmp = sub i32 %id, %arg 426 br label %bb1 427 428bb1: ; preds = %Flow, %bb 429 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 430 %lsr.iv.next = add i32 %lsr.iv, 1 431 %cmp0 = icmp slt i32 %lsr.iv.next, 0 432 br i1 %cmp0, label %bb4, label %Flow 433 434bb4: ; preds = %bb1 435 %load = load volatile i32, ptr addrspace(1) undef, align 4 436 %cmp1 = icmp sge i32 %my.tmp, %load 437 br label %Flow 438 439Flow: ; preds = %bb4, %bb1 440 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 441 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ] 442 br i1 %my.tmp3, label %bb9, label %bb1 443 444bb9: ; preds = %Flow 445 store volatile i32 7, ptr addrspace(3) undef 446 ret void 447} 448 449; Swap order of branches in flow block so that the true phi is 450; continue. 451 452define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 { 453; OPT-LABEL: @invert_true_phi_cond_break_loop( 454; OPT-NEXT: bb: 455; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 456; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 457; OPT-NEXT: br label [[BB1:%.*]] 458; OPT: bb1: 459; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 460; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ] 461; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1 462; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 463; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 464; OPT: bb4: 465; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4 466; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 467; OPT-NEXT: br label [[FLOW]] 468; OPT: Flow: 469; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ] 470; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ] 471; OPT-NEXT: [[MY_TMP3_INV:%.*]] = xor i1 [[MY_TMP3]], true 472; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3_INV]], i64 [[PHI_BROKEN]]) 473; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 474; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 475; OPT: bb9: 476; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 477; OPT-NEXT: store volatile i32 7, ptr addrspace(3) undef, align 4 478; OPT-NEXT: ret void 479; 480; GCN-LABEL: invert_true_phi_cond_break_loop: 481; GCN: ; %bb.0: ; %bb 482; GCN-NEXT: s_load_dword s3, s[4:5], 0x9 483; GCN-NEXT: s_mov_b64 s[0:1], 0 484; GCN-NEXT: s_mov_b32 s2, -1 485; GCN-NEXT: s_waitcnt lgkmcnt(0) 486; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 487; GCN-NEXT: s_mov_b32 s3, 0xf000 488; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 489; GCN-NEXT: ; implicit-def: $sgpr6 490; GCN-NEXT: .LBB5_1: ; %bb1 491; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 492; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 493; GCN-NEXT: s_cmp_gt_i32 s6, -1 494; GCN-NEXT: s_cbranch_scc1 .LBB5_3 495; GCN-NEXT: ; %bb.2: ; %bb4 496; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1 497; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 498; GCN-NEXT: s_waitcnt vmcnt(0) 499; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 500; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 501; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 502; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 503; GCN-NEXT: .LBB5_3: ; %Flow 504; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1 505; GCN-NEXT: s_xor_b64 s[8:9], s[4:5], -1 506; GCN-NEXT: s_add_i32 s6, s6, 1 507; GCN-NEXT: s_and_b64 s[8:9], exec, s[8:9] 508; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 509; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 510; GCN-NEXT: s_cbranch_execnz .LBB5_1 511; GCN-NEXT: ; %bb.4: ; %bb9 512; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 513; GCN-NEXT: v_mov_b32_e32 v0, 7 514; GCN-NEXT: s_mov_b32 m0, -1 515; GCN-NEXT: ds_write_b32 v0, v0 516; GCN-NEXT: s_endpgm 517bb: 518 %id = call i32 @llvm.amdgcn.workitem.id.x() 519 %my.tmp = sub i32 %id, %arg 520 br label %bb1 521 522bb1: ; preds = %Flow, %bb 523 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 524 %lsr.iv.next = add i32 %lsr.iv, 1 525 %cmp0 = icmp slt i32 %lsr.iv.next, 0 526 br i1 %cmp0, label %bb4, label %Flow 527 528bb4: ; preds = %bb1 529 %load = load volatile i32, ptr addrspace(1) undef, align 4 530 %cmp1 = icmp sge i32 %my.tmp, %load 531 br label %Flow 532 533Flow: ; preds = %bb4, %bb1 534 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 535 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 536 br i1 %my.tmp3, label %bb1, label %bb9 537 538bb9: ; preds = %Flow 539 store volatile i32 7, ptr addrspace(3) undef 540 ret void 541} 542 543declare i32 @llvm.amdgcn.workitem.id.x() #1 544 545attributes #0 = { nounwind } 546attributes #1 = { nounwind readnone } 547