1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=IR %s 3; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 4 5; After structurizing, there are 3 levels of loops. The i1 phi 6; conditions mutually depend on each other, so it isn't safe to delete 7; the condition that appears to have no uses until the loop is 8; completely processed. 9 10define amdgpu_kernel void @reduced_nested_loop_conditions(ptr addrspace(3) captures(none) %arg) #0 { 11; GCN-LABEL: reduced_nested_loop_conditions: 12; GCN: ; %bb.0: ; %bb 13; GCN-NEXT: s_load_dword s0, s[4:5], 0x9 14; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0 15; GCN-NEXT: s_mov_b32 m0, -1 16; GCN-NEXT: s_mov_b32 s2, 0 17; GCN-NEXT: s_waitcnt lgkmcnt(0) 18; GCN-NEXT: v_add_i32_e32 v0, vcc, s0, v0 19; GCN-NEXT: ds_read_b64 v[0:1], v0 20; GCN-NEXT: s_and_b64 vcc, exec, 0 21; GCN-NEXT: s_branch .LBB0_2 22; GCN-NEXT: .LBB0_1: ; %Flow 23; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1 24; GCN-NEXT: ; implicit-def: $sgpr2 25; GCN-NEXT: s_mov_b64 vcc, vcc 26; GCN-NEXT: s_cbranch_vccz .LBB0_4 27; GCN-NEXT: .LBB0_2: ; %bb5 28; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 29; GCN-NEXT: s_cmp_lg_u32 s2, 1 30; GCN-NEXT: s_mov_b64 s[0:1], -1 31; GCN-NEXT: s_cbranch_scc0 .LBB0_1 32; GCN-NEXT: ; %bb.3: ; %bb10 33; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1 34; GCN-NEXT: s_mov_b64 s[0:1], 0 35; GCN-NEXT: s_branch .LBB0_1 36; GCN-NEXT: .LBB0_4: ; %loop.exit.guard 37; GCN-NEXT: s_and_b64 vcc, exec, s[0:1] 38; GCN-NEXT: s_cbranch_vccz .LBB0_7 39; GCN-NEXT: ; %bb.5: ; %bb8 40; GCN-NEXT: s_waitcnt lgkmcnt(0) 41; GCN-NEXT: ds_read_b32 v0, v0 42; GCN-NEXT: s_and_b64 vcc, exec, 0 43; GCN-NEXT: .LBB0_6: ; %bb9 44; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 45; GCN-NEXT: s_mov_b64 vcc, vcc 46; GCN-NEXT: s_cbranch_vccz .LBB0_6 47; GCN-NEXT: .LBB0_7: ; %DummyReturnBlock 48; GCN-NEXT: s_endpgm 49; IR-LABEL: define amdgpu_kernel void @reduced_nested_loop_conditions( 50; IR-SAME: ptr addrspace(3) captures(none) [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { 51; IR-NEXT: [[BB:.*]]: 52; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR4:[0-9]+]] 53; IR-NEXT: [[MY_TMP1:%.*]] = getelementptr inbounds i64, ptr addrspace(3) [[ARG]], i32 [[MY_TMP]] 54; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, ptr addrspace(3) [[MY_TMP1]], align 8 55; IR-NEXT: br label %[[BB5:.*]] 56; IR: [[BB3:.*]]: 57; IR-NEXT: br i1 true, label %[[BB4:.*]], label %[[BB13:.*]] 58; IR: [[BB4]]: 59; IR-NEXT: br label %[[FLOW:.*]] 60; IR: [[BB5]]: 61; IR-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP6:%.*]], %[[BB10:.*]] ], [ 0, %[[BB]] ] 62; IR-NEXT: [[MY_TMP6:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP5:%.*]], %[[BB10]] ] 63; IR-NEXT: [[MY_TMP7:%.*]] = icmp eq i32 [[MY_TMP6]], 1 64; IR-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[MY_TMP7]]) 65; IR-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0 66; IR-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1 67; IR-NEXT: br i1 [[TMP1]], label %[[BB8:.*]], label %[[FLOW]] 68; IR: [[BB8]]: 69; IR-NEXT: br label %[[BB13]] 70; IR: [[BB9:.*]]: 71; IR-NEXT: br i1 false, label %[[BB3]], label %[[BB9]] 72; IR: [[BB10]]: 73; IR-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP6]]) 74; IR-NEXT: br i1 [[TMP3]], label %[[BB23:.*]], label %[[BB5]] 75; IR: [[FLOW]]: 76; IR-NEXT: [[TMP4:%.*]] = phi i1 [ [[MY_TMP22:%.*]], %[[BB4]] ], [ true, %[[BB5]] ] 77; IR-NEXT: [[TMP5]] = phi i32 [ [[MY_TMP21:%.*]], %[[BB4]] ], [ undef, %[[BB5]] ] 78; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) 79; IR-NEXT: [[TMP6]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP4]], i64 [[PHI_BROKEN]]) 80; IR-NEXT: br label %[[BB10]] 81; IR: [[BB13]]: 82; IR-NEXT: [[MY_TMP14:%.*]] = phi i1 [ [[MY_TMP22]], %[[BB3]] ], [ true, %[[BB8]] ] 83; IR-NEXT: [[MY_TMP15:%.*]] = bitcast i64 [[MY_TMP2]] to <2 x i32> 84; IR-NEXT: br i1 [[MY_TMP14]], label %[[BB16:.*]], label %[[BB20:.*]] 85; IR: [[BB16]]: 86; IR-NEXT: [[MY_TMP17:%.*]] = extractelement <2 x i32> [[MY_TMP15]], i64 1 87; IR-NEXT: [[MY_TMP18:%.*]] = getelementptr inbounds i32, ptr addrspace(3) undef, i32 [[MY_TMP17]] 88; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[MY_TMP18]], align 4 89; IR-NEXT: br label %[[BB20]] 90; IR: [[BB20]]: 91; IR-NEXT: [[MY_TMP21]] = phi i32 [ [[MY_TMP19]], %[[BB16]] ], [ 0, %[[BB13]] ] 92; IR-NEXT: [[MY_TMP22]] = phi i1 [ false, %[[BB16]] ], [ [[MY_TMP14]], %[[BB13]] ] 93; IR-NEXT: br label %[[BB9]] 94; IR: [[BB23]]: 95; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]]) 96; IR-NEXT: ret void 97bb: 98 %my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1 99 %my.tmp1 = getelementptr inbounds i64, ptr addrspace(3) %arg, i32 %my.tmp 100 %my.tmp2 = load volatile i64, ptr addrspace(3) %my.tmp1 101 br label %bb5 102 103bb3: ; preds = %bb9 104 br i1 true, label %bb4, label %bb13 105 106bb4: ; preds = %bb3 107 br label %bb10 108 109bb5: ; preds = %bb10, %bb 110 %my.tmp6 = phi i32 [ 0, %bb ], [ %my.tmp11, %bb10 ] 111 %my.tmp7 = icmp eq i32 %my.tmp6, 1 112 br i1 %my.tmp7, label %bb8, label %bb10 113 114bb8: ; preds = %bb5 115 br label %bb13 116 117bb9: ; preds = %bb20, %bb9 118 br i1 false, label %bb3, label %bb9 119 120bb10: ; preds = %bb5, %bb4 121 %my.tmp11 = phi i32 [ %my.tmp21, %bb4 ], [ undef, %bb5 ] 122 %my.tmp12 = phi i1 [ %my.tmp22, %bb4 ], [ true, %bb5 ] 123 br i1 %my.tmp12, label %bb23, label %bb5 124 125bb13: ; preds = %bb8, %bb3 126 %my.tmp14 = phi i1 [ %my.tmp22, %bb3 ], [ true, %bb8 ] 127 %my.tmp15 = bitcast i64 %my.tmp2 to <2 x i32> 128 br i1 %my.tmp14, label %bb16, label %bb20 129 130bb16: ; preds = %bb13 131 %my.tmp17 = extractelement <2 x i32> %my.tmp15, i64 1 132 %my.tmp18 = getelementptr inbounds i32, ptr addrspace(3) undef, i32 %my.tmp17 133 %my.tmp19 = load volatile i32, ptr addrspace(3) %my.tmp18 134 br label %bb20 135 136bb20: ; preds = %bb16, %bb13 137 %my.tmp21 = phi i32 [ %my.tmp19, %bb16 ], [ 0, %bb13 ] 138 %my.tmp22 = phi i1 [ false, %bb16 ], [ %my.tmp14, %bb13 ] 139 br label %bb9 140 141bb23: ; preds = %bb10 142 ret void 143} 144 145; Earlier version of above, before a run of the structurizer. 146 147define amdgpu_kernel void @nested_loop_conditions(ptr addrspace(1) captures(none) %arg) #0 { 148; GCN-LABEL: nested_loop_conditions: 149; GCN: ; %bb.0: ; %bb 150; GCN-NEXT: s_mov_b32 s3, 0xf000 151; GCN-NEXT: s_mov_b32 s2, -1 152; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 153; GCN-NEXT: s_waitcnt vmcnt(0) 154; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 8, v0 155; GCN-NEXT: s_cbranch_vccnz .LBB1_6 156; GCN-NEXT: ; %bb.1: ; %bb14.lr.ph 157; GCN-NEXT: s_load_dword s4, s[0:1], 0x0 158; GCN-NEXT: s_branch .LBB1_3 159; GCN-NEXT: .LBB1_2: ; %Flow 160; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1 161; GCN-NEXT: s_and_b64 vcc, exec, s[0:1] 162; GCN-NEXT: s_waitcnt lgkmcnt(0) 163; GCN-NEXT: s_mov_b64 vcc, vcc 164; GCN-NEXT: s_cbranch_vccnz .LBB1_6 165; GCN-NEXT: .LBB1_3: ; %bb14 166; GCN-NEXT: ; =>This Loop Header: Depth=1 167; GCN-NEXT: ; Child Loop BB1_4 Depth 2 168; GCN-NEXT: s_waitcnt lgkmcnt(0) 169; GCN-NEXT: s_cmp_lg_u32 s4, 1 170; GCN-NEXT: s_mov_b64 s[0:1], -1 171; GCN-NEXT: ; implicit-def: $sgpr4 172; GCN-NEXT: s_cbranch_scc1 .LBB1_2 173; GCN-NEXT: .LBB1_4: ; %bb18 174; GCN-NEXT: ; Parent Loop BB1_3 Depth=1 175; GCN-NEXT: ; => This Inner Loop Header: Depth=2 176; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 177; GCN-NEXT: s_waitcnt vmcnt(0) 178; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 8, v0 179; GCN-NEXT: s_cbranch_vccnz .LBB1_4 180; GCN-NEXT: ; %bb.5: ; %bb21 181; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1 182; GCN-NEXT: s_load_dword s4, s[0:1], 0x0 183; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 184; GCN-NEXT: s_waitcnt vmcnt(0) 185; GCN-NEXT: v_cmp_lt_i32_e64 s[0:1], 8, v0 186; GCN-NEXT: s_branch .LBB1_2 187; GCN-NEXT: .LBB1_6: ; %bb31 188; GCN-NEXT: v_mov_b32_e32 v0, 0 189; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 190; GCN-NEXT: s_waitcnt vmcnt(0) 191; GCN-NEXT: s_endpgm 192; IR-LABEL: define amdgpu_kernel void @nested_loop_conditions( 193; IR-SAME: ptr addrspace(1) captures(none) [[ARG:%.*]]) #[[ATTR0]] { 194; IR-NEXT: [[BB:.*]]: 195; IR-NEXT: [[MY_TMP1134:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4 196; IR-NEXT: [[MY_TMP1235:%.*]] = icmp slt i32 [[MY_TMP1134]], 9 197; IR-NEXT: br i1 [[MY_TMP1235]], label %[[BB14_LR_PH:.*]], label %[[FLOW:.*]] 198; IR: [[BB14_LR_PH]]: 199; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR4]] 200; IR-NEXT: [[MY_TMP1:%.*]] = zext i32 [[MY_TMP]] to i64 201; IR-NEXT: [[MY_TMP2:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[ARG]], i64 [[MY_TMP1]] 202; IR-NEXT: [[MY_TMP3:%.*]] = load i64, ptr addrspace(1) [[MY_TMP2]], align 16 203; IR-NEXT: [[MY_TMP932:%.*]] = load <4 x i32>, ptr addrspace(1) undef, align 16 204; IR-NEXT: [[MY_TMP1033:%.*]] = extractelement <4 x i32> [[MY_TMP932]], i64 0 205; IR-NEXT: br label %[[BB14:.*]] 206; IR: [[FLOW3:.*]]: 207; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP20:%.*]]) 208; IR-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP14:%.*]]) 209; IR-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0 210; IR-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1 211; IR-NEXT: br i1 [[TMP1]], label %[[BB4_BB13_CRIT_EDGE:.*]], label %[[FLOW4:.*]] 212; IR: [[BB4_BB13_CRIT_EDGE]]: 213; IR-NEXT: br label %[[FLOW4]] 214; IR: [[FLOW4]]: 215; IR-NEXT: [[TMP3:%.*]] = phi i1 [ true, %[[BB4_BB13_CRIT_EDGE]] ], [ false, %[[FLOW3]] ] 216; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) 217; IR-NEXT: br label %[[FLOW]] 218; IR: [[BB13:.*]]: 219; IR-NEXT: br label %[[BB31:.*]] 220; IR: [[FLOW]]: 221; IR-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP3]], %[[FLOW4]] ], [ true, %[[BB]] ] 222; IR-NEXT: [[TMP5:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP4]]) 223; IR-NEXT: [[TMP6:%.*]] = extractvalue { i1, i64 } [[TMP5]], 0 224; IR-NEXT: [[TMP7:%.*]] = extractvalue { i1, i64 } [[TMP5]], 1 225; IR-NEXT: br i1 [[TMP6]], label %[[BB13]], label %[[BB31]] 226; IR: [[BB14]]: 227; IR-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP16:%.*]], %[[FLOW1:.*]] ], [ 0, %[[BB14_LR_PH]] ] 228; IR-NEXT: [[MY_TMP1037:%.*]] = phi i32 [ [[MY_TMP1033]], %[[BB14_LR_PH]] ], [ [[TMP12:%.*]], %[[FLOW1]] ] 229; IR-NEXT: [[MY_TMP936:%.*]] = phi <4 x i32> [ [[MY_TMP932]], %[[BB14_LR_PH]] ], [ [[TMP11:%.*]], %[[FLOW1]] ] 230; IR-NEXT: [[MY_TMP15:%.*]] = icmp eq i32 [[MY_TMP1037]], 1 231; IR-NEXT: [[TMP8:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[MY_TMP15]]) 232; IR-NEXT: [[TMP9:%.*]] = extractvalue { i1, i64 } [[TMP8]], 0 233; IR-NEXT: [[TMP10:%.*]] = extractvalue { i1, i64 } [[TMP8]], 1 234; IR-NEXT: br i1 [[TMP9]], label %[[BB16:.*]], label %[[FLOW1]] 235; IR: [[BB16]]: 236; IR-NEXT: [[MY_TMP17:%.*]] = bitcast i64 [[MY_TMP3]] to <2 x i32> 237; IR-NEXT: br label %[[BB18:.*]] 238; IR: [[FLOW1]]: 239; IR-NEXT: [[TMP11]] = phi <4 x i32> [ [[MY_TMP9:%.*]], %[[BB21:.*]] ], [ undef, %[[BB14]] ] 240; IR-NEXT: [[TMP12]] = phi i32 [ [[MY_TMP10:%.*]], %[[BB21]] ], [ undef, %[[BB14]] ] 241; IR-NEXT: [[TMP13:%.*]] = phi i1 [ [[MY_TMP12:%.*]], %[[BB21]] ], [ true, %[[BB14]] ] 242; IR-NEXT: [[TMP14]] = phi i1 [ [[MY_TMP12]], %[[BB21]] ], [ false, %[[BB14]] ] 243; IR-NEXT: [[TMP15:%.*]] = phi i1 [ false, %[[BB21]] ], [ true, %[[BB14]] ] 244; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP10]]) 245; IR-NEXT: [[TMP16]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP13]], i64 [[PHI_BROKEN]]) 246; IR-NEXT: [[TMP17:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP16]]) 247; IR-NEXT: br i1 [[TMP17]], label %[[FLOW2:.*]], label %[[BB14]] 248; IR: [[BB18]]: 249; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4 250; IR-NEXT: [[MY_TMP20:%.*]] = icmp slt i32 [[MY_TMP19]], 9 251; IR-NEXT: br i1 [[MY_TMP20]], label %[[BB21]], label %[[BB18]] 252; IR: [[BB21]]: 253; IR-NEXT: [[MY_TMP22:%.*]] = extractelement <2 x i32> [[MY_TMP17]], i64 1 254; IR-NEXT: [[MY_TMP23:%.*]] = lshr i32 [[MY_TMP22]], 16 255; IR-NEXT: [[MY_TMP24:%.*]] = select i1 undef, i32 undef, i32 [[MY_TMP23]] 256; IR-NEXT: [[MY_TMP25:%.*]] = uitofp i32 [[MY_TMP24]] to float 257; IR-NEXT: [[MY_TMP26:%.*]] = fmul float [[MY_TMP25]], 0x3EF0001000000000 258; IR-NEXT: [[MY_TMP27:%.*]] = fsub float [[MY_TMP26]], undef 259; IR-NEXT: [[MY_TMP28:%.*]] = fcmp olt float [[MY_TMP27]], 5.000000e-01 260; IR-NEXT: [[MY_TMP29:%.*]] = select i1 [[MY_TMP28]], i64 1, i64 2 261; IR-NEXT: [[MY_TMP30:%.*]] = extractelement <4 x i32> [[MY_TMP936]], i64 [[MY_TMP29]] 262; IR-NEXT: [[MY_TMP7:%.*]] = zext i32 [[MY_TMP30]] to i64 263; IR-NEXT: [[MY_TMP8:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) undef, i64 [[MY_TMP7]] 264; IR-NEXT: [[MY_TMP9]] = load <4 x i32>, ptr addrspace(1) [[MY_TMP8]], align 16 265; IR-NEXT: [[MY_TMP10]] = extractelement <4 x i32> [[MY_TMP9]], i64 0 266; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4 267; IR-NEXT: [[MY_TMP12]] = icmp sge i32 [[MY_TMP11]], 9 268; IR-NEXT: br label %[[FLOW1]] 269; IR: [[FLOW2]]: 270; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP16]]) 271; IR-NEXT: [[TMP18:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP15]]) 272; IR-NEXT: [[TMP19:%.*]] = extractvalue { i1, i64 } [[TMP18]], 0 273; IR-NEXT: [[TMP20]] = extractvalue { i1, i64 } [[TMP18]], 1 274; IR-NEXT: br i1 [[TMP19]], label %[[BB31_LOOPEXIT:.*]], label %[[FLOW3]] 275; IR: [[BB31_LOOPEXIT]]: 276; IR-NEXT: br label %[[FLOW3]] 277; IR: [[BB31]]: 278; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7]]) 279; IR-NEXT: store volatile i32 0, ptr addrspace(1) undef, align 4 280; IR-NEXT: ret void 281bb: 282 %my.tmp1134 = load volatile i32, ptr addrspace(1) undef 283 %my.tmp1235 = icmp slt i32 %my.tmp1134, 9 284 br i1 %my.tmp1235, label %bb14.lr.ph, label %bb13 285 286bb14.lr.ph: ; preds = %bb 287 %my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1 288 %my.tmp1 = zext i32 %my.tmp to i64 289 %my.tmp2 = getelementptr inbounds i64, ptr addrspace(1) %arg, i64 %my.tmp1 290 %my.tmp3 = load i64, ptr addrspace(1) %my.tmp2, align 16 291 %my.tmp932 = load <4 x i32>, ptr addrspace(1) undef, align 16 292 %my.tmp1033 = extractelement <4 x i32> %my.tmp932, i64 0 293 br label %bb14 294 295bb4.bb13_crit_edge: ; preds = %bb21 296 br label %bb13 297 298bb13: ; preds = %bb4.bb13_crit_edge, %bb 299 br label %bb31 300 301bb14: ; preds = %bb21, %bb14.lr.ph 302 %my.tmp1037 = phi i32 [ %my.tmp1033, %bb14.lr.ph ], [ %my.tmp10, %bb21 ] 303 %my.tmp936 = phi <4 x i32> [ %my.tmp932, %bb14.lr.ph ], [ %my.tmp9, %bb21 ] 304 %my.tmp15 = icmp eq i32 %my.tmp1037, 1 305 br i1 %my.tmp15, label %bb16, label %bb31.loopexit 306 307bb16: ; preds = %bb14 308 %my.tmp17 = bitcast i64 %my.tmp3 to <2 x i32> 309 br label %bb18 310 311bb18: ; preds = %bb18, %bb16 312 %my.tmp19 = load volatile i32, ptr addrspace(1) undef 313 %my.tmp20 = icmp slt i32 %my.tmp19, 9 314 br i1 %my.tmp20, label %bb21, label %bb18 315 316bb21: ; preds = %bb18 317 %my.tmp22 = extractelement <2 x i32> %my.tmp17, i64 1 318 %my.tmp23 = lshr i32 %my.tmp22, 16 319 %my.tmp24 = select i1 undef, i32 undef, i32 %my.tmp23 320 %my.tmp25 = uitofp i32 %my.tmp24 to float 321 %my.tmp26 = fmul float %my.tmp25, 0x3EF0001000000000 322 %my.tmp27 = fsub float %my.tmp26, undef 323 %my.tmp28 = fcmp olt float %my.tmp27, 5.000000e-01 324 %my.tmp29 = select i1 %my.tmp28, i64 1, i64 2 325 %my.tmp30 = extractelement <4 x i32> %my.tmp936, i64 %my.tmp29 326 %my.tmp7 = zext i32 %my.tmp30 to i64 327 %my.tmp8 = getelementptr inbounds <4 x i32>, ptr addrspace(1) undef, i64 %my.tmp7 328 %my.tmp9 = load <4 x i32>, ptr addrspace(1) %my.tmp8, align 16 329 %my.tmp10 = extractelement <4 x i32> %my.tmp9, i64 0 330 %my.tmp11 = load volatile i32, ptr addrspace(1) undef 331 %my.tmp12 = icmp slt i32 %my.tmp11, 9 332 br i1 %my.tmp12, label %bb14, label %bb4.bb13_crit_edge 333 334bb31.loopexit: ; preds = %bb14 335 br label %bb31 336 337bb31: ; preds = %bb31.loopexit, %bb13 338 store volatile i32 0, ptr addrspace(1) undef 339 ret void 340} 341 342declare i32 @llvm.amdgcn.workitem.id.x() #1 343 344attributes #0 = { nounwind } 345attributes #1 = { nounwind readnone } 346