1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -O3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s 3 4%"struct.__llvm_libc::rpc::Buffer" = type { [8 x i64] } 5 6define void @issue63986(i64 %0, i64 %idxprom) { 7; CHECK-LABEL: issue63986: 8; CHECK: ; %bb.0: ; %entry 9; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; CHECK-NEXT: v_lshlrev_b64 v[4:5], 6, v[2:3] 11; CHECK-NEXT: s_mov_b64 s[4:5], 0 12; CHECK-NEXT: .LBB0_1: ; %loop-memcpy-expansion 13; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 14; CHECK-NEXT: v_mov_b32_e32 v7, s5 15; CHECK-NEXT: v_mov_b32_e32 v6, s4 16; CHECK-NEXT: flat_load_dwordx4 v[6:9], v[6:7] 17; CHECK-NEXT: v_add_co_u32_e32 v10, vcc, s4, v4 18; CHECK-NEXT: s_add_u32 s4, s4, 16 19; CHECK-NEXT: v_mov_b32_e32 v11, s5 20; CHECK-NEXT: s_addc_u32 s5, s5, 0 21; CHECK-NEXT: v_cmp_ge_u64_e64 s[6:7], s[4:5], 32 22; CHECK-NEXT: v_addc_co_u32_e32 v11, vcc, v5, v11, vcc 23; CHECK-NEXT: s_and_b64 vcc, exec, s[6:7] 24; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 25; CHECK-NEXT: flat_store_dwordx4 v[10:11], v[6:9] 26; CHECK-NEXT: s_cbranch_vccz .LBB0_1 27; CHECK-NEXT: ; %bb.2: ; %loop-memcpy-residual-header 28; CHECK-NEXT: s_branch .LBB0_4 29; CHECK-NEXT: ; %bb.3: 30; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7 31; CHECK-NEXT: s_branch .LBB0_5 32; CHECK-NEXT: .LBB0_4: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge 33; CHECK-NEXT: v_lshlrev_b64 v[6:7], 6, v[2:3] 34; CHECK-NEXT: s_cbranch_execnz .LBB0_7 35; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual.preheader 36; CHECK-NEXT: v_or_b32_e32 v2, 32, v4 37; CHECK-NEXT: v_mov_b32_e32 v3, v5 38; CHECK-NEXT: s_mov_b64 s[4:5], 0 39; CHECK-NEXT: ; %bb.6: ; %loop-memcpy-residual 40; CHECK-NEXT: s_add_u32 s6, 32, s4 41; CHECK-NEXT: s_addc_u32 s7, 0, s5 42; CHECK-NEXT: v_mov_b32_e32 v6, s6 43; CHECK-NEXT: v_mov_b32_e32 v7, s7 44; CHECK-NEXT: flat_load_ubyte v10, v[6:7] 45; CHECK-NEXT: v_mov_b32_e32 v9, s5 46; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s4, v2 47; CHECK-NEXT: v_mov_b32_e32 v7, v5 48; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v3, v9, vcc 49; CHECK-NEXT: s_add_u32 s4, s4, 1 50; CHECK-NEXT: v_mov_b32_e32 v6, v4 51; CHECK-NEXT: s_addc_u32 s5, s5, 0 52; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 53; CHECK-NEXT: flat_store_byte v[8:9], v10 54; CHECK-NEXT: .LBB0_7: ; %post-loop-memcpy-expansion 55; CHECK-NEXT: v_and_b32_e32 v2, 15, v0 56; CHECK-NEXT: v_mov_b32_e32 v3, 0 57; CHECK-NEXT: v_and_b32_e32 v0, -16, v0 58; CHECK-NEXT: v_cmp_ne_u64_e64 s[4:5], 0, v[0:1] 59; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[2:3] 60; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, v6, v0 61; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v1, vcc 62; CHECK-NEXT: s_branch .LBB0_10 63; CHECK-NEXT: .LBB0_8: ; %Flow14 64; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1 65; CHECK-NEXT: s_or_b64 exec, exec, s[10:11] 66; CHECK-NEXT: s_mov_b64 s[8:9], 0 67; CHECK-NEXT: .LBB0_9: ; %Flow16 68; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1 69; CHECK-NEXT: s_andn2_b64 vcc, exec, s[8:9] 70; CHECK-NEXT: s_cbranch_vccz .LBB0_18 71; CHECK-NEXT: .LBB0_10: ; %while.cond 72; CHECK-NEXT: ; =>This Loop Header: Depth=1 73; CHECK-NEXT: ; Child Loop BB0_12 Depth 2 74; CHECK-NEXT: ; Child Loop BB0_16 Depth 2 75; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] 76; CHECK-NEXT: s_cbranch_execz .LBB0_13 77; CHECK-NEXT: ; %bb.11: ; %loop-memcpy-expansion2.preheader 78; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1 79; CHECK-NEXT: s_mov_b64 s[10:11], 0 80; CHECK-NEXT: s_mov_b64 s[12:13], 0 81; CHECK-NEXT: .LBB0_12: ; %loop-memcpy-expansion2 82; CHECK-NEXT: ; Parent Loop BB0_10 Depth=1 83; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 84; CHECK-NEXT: v_mov_b32_e32 v8, s12 85; CHECK-NEXT: v_mov_b32_e32 v9, s13 86; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[8:9] 87; CHECK-NEXT: v_mov_b32_e32 v13, s13 88; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s12, v4 89; CHECK-NEXT: s_add_u32 s12, s12, 16 90; CHECK-NEXT: v_addc_co_u32_e32 v13, vcc, v5, v13, vcc 91; CHECK-NEXT: s_addc_u32 s13, s13, 0 92; CHECK-NEXT: v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1] 93; CHECK-NEXT: s_or_b64 s[10:11], vcc, s[10:11] 94; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 95; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 96; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11] 97; CHECK-NEXT: s_cbranch_execnz .LBB0_12 98; CHECK-NEXT: .LBB0_13: ; %Flow15 99; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1 100; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] 101; CHECK-NEXT: s_mov_b64 s[8:9], -1 102; CHECK-NEXT: s_cbranch_execz .LBB0_9 103; CHECK-NEXT: ; %bb.14: ; %loop-memcpy-residual-header5 104; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1 105; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[6:7] 106; CHECK-NEXT: s_xor_b64 s[10:11], exec, s[8:9] 107; CHECK-NEXT: s_cbranch_execz .LBB0_8 108; CHECK-NEXT: ; %bb.15: ; %loop-memcpy-residual4.preheader 109; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1 110; CHECK-NEXT: s_mov_b64 s[12:13], 0 111; CHECK-NEXT: s_mov_b64 s[14:15], 0 112; CHECK-NEXT: .LBB0_16: ; %loop-memcpy-residual4 113; CHECK-NEXT: ; Parent Loop BB0_10 Depth=1 114; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 115; CHECK-NEXT: v_mov_b32_e32 v10, s15 116; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s14, v0 117; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v1, v10, vcc 118; CHECK-NEXT: flat_load_ubyte v11, v[8:9] 119; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s14, v6 120; CHECK-NEXT: s_add_u32 s14, s14, 1 121; CHECK-NEXT: s_addc_u32 s15, s15, 0 122; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[14:15], v[2:3] 123; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v7, v10, vcc 124; CHECK-NEXT: s_or_b64 s[12:13], s[8:9], s[12:13] 125; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 126; CHECK-NEXT: flat_store_byte v[8:9], v11 127; CHECK-NEXT: s_andn2_b64 exec, exec, s[12:13] 128; CHECK-NEXT: s_cbranch_execnz .LBB0_16 129; CHECK-NEXT: ; %bb.17: ; %Flow 130; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1 131; CHECK-NEXT: s_or_b64 exec, exec, s[12:13] 132; CHECK-NEXT: s_branch .LBB0_8 133; CHECK-NEXT: .LBB0_18: ; %DummyReturnBlock 134; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 135; CHECK-NEXT: s_setpc_b64 s[30:31] 136entry: 137 %arrayidx = getelementptr [32 x %"struct.__llvm_libc::rpc::Buffer"], ptr null, i64 0, i64 %idxprom 138 %spec.select = tail call i64 @llvm.umin.i64(i64 sub (i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) inttoptr (i64 32 to ptr addrspace(4)) to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) null to ptr) to i64)), i64 56) 139 tail call void @llvm.memcpy.p0.p0.i64(ptr %arrayidx, ptr null, i64 %spec.select, i1 false) 140 br label %while.cond 141 142while.cond: ; preds = %while.cond 143 tail call void @llvm.memcpy.p0.p0.i64(ptr %arrayidx, ptr null, i64 %0, i1 false) 144 br label %while.cond 145} 146 147define void @issue63986_reduced_expanded(i64 %idxprom) { 148; CHECK-LABEL: issue63986_reduced_expanded: 149; CHECK: ; %bb.0: ; %entry 150; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 151; CHECK-NEXT: ; %bb.1: ; %loop-memcpy-expansion.preheader 152; CHECK-NEXT: s_setpc_b64 s[30:31] 153; CHECK-NEXT: ; %bb.2: ; %loop-memcpy-residual-header 154; CHECK-NEXT: s_and_b32 s4, 32, 15 155; CHECK-NEXT: s_mov_b32 s5, 0 156; CHECK-NEXT: s_cbranch_scc0 .LBB1_4 157; CHECK-NEXT: ; %bb.3: 158; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 159; CHECK-NEXT: s_branch .LBB1_5 160; CHECK-NEXT: .LBB1_4: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge 161; CHECK-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 162; CHECK-NEXT: s_cbranch_execnz .LBB1_8 163; CHECK-NEXT: .LBB1_5: ; %loop-memcpy-residual.preheader 164; CHECK-NEXT: v_mov_b32_e32 v0, s4 165; CHECK-NEXT: s_mov_b64 s[6:7], 0 166; CHECK-NEXT: v_mov_b32_e32 v1, s5 167; CHECK-NEXT: .LBB1_6: ; %loop-memcpy-residual 168; CHECK-NEXT: s_add_u32 s4, s6, 1 169; CHECK-NEXT: s_addc_u32 s5, s7, 0 170; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1] 171; CHECK-NEXT: s_mov_b64 s[6:7], 1 172; CHECK-NEXT: s_cbranch_vccnz .LBB1_6 173; CHECK-NEXT: ; %bb.7: ; %Flow 174; CHECK-NEXT: v_mov_b32_e32 v0, 0 175; CHECK-NEXT: v_mov_b32_e32 v1, 0 176; CHECK-NEXT: .LBB1_8: ; %post-loop-memcpy-expansion 177; CHECK-NEXT: v_mov_b32_e32 v2, 0 178; CHECK-NEXT: v_mov_b32_e32 v3, v2 179; CHECK-NEXT: v_mov_b32_e32 v4, v2 180; CHECK-NEXT: v_mov_b32_e32 v5, v2 181; CHECK-NEXT: s_and_b64 vcc, exec, 0 182; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 183; CHECK-NEXT: .LBB1_9: ; %loop-memcpy-expansion2 184; CHECK-NEXT: s_mov_b64 vcc, vcc 185; CHECK-NEXT: s_cbranch_vccz .LBB1_9 186; CHECK-NEXT: ; %bb.10: ; %DummyReturnBlock 187; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 188; CHECK-NEXT: s_setpc_b64 s[30:31] 189entry: 190 %spec.select = tail call i64 @llvm.umin.i64(i64 sub (i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) inttoptr (i64 32 to ptr addrspace(4)) to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) null to ptr) to i64)), i64 56) 191 %i = trunc i64 %spec.select to i32 192 %i1 = urem i32 %i, 16 193 %i2 = zext i32 %i to i64 194 %i3 = zext i32 %i1 to i64 195 %i4 = icmp ne i64 %i2, 0 196 br i1 %i4, label %loop-memcpy-expansion.preheader, label %loop-memcpy-residual-header 197 198loop-memcpy-expansion.preheader: ; preds = %entry 199 ret void 200 201loop-memcpy-residual: ; preds = %loop-memcpy-residual.preheader, %loop-memcpy-residual 202 %residual-loop-index1 = phi i64 [ 1, %loop-memcpy-residual ], [ 0, %loop-memcpy-residual.preheader ] 203 %i5 = add i64 %residual-loop-index1, 1 204 %i6 = icmp ult i64 %i5, %i3 205 br i1 %i6, label %loop-memcpy-residual, label %post-loop-memcpy-expansion 206 207post-loop-memcpy-expansion: ; preds = %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge, %loop-memcpy-residual 208 %.pre-phi = phi i64 [ %.pre, %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge ], [ 0, %loop-memcpy-residual ] 209 br label %loop-memcpy-expansion2 210 211loop-memcpy-expansion2: ; preds = %loop-memcpy-expansion2, %post-loop-memcpy-expansion 212 %scevgep7 = getelementptr i8, ptr null, i64 %.pre-phi 213 store <4 x i32> zeroinitializer, ptr %scevgep7, align 1 214 br label %loop-memcpy-expansion2 215 216loop-memcpy-residual-header: ; preds = %entry 217 %i7 = icmp ne i64 %i3, 0 218 br i1 %i7, label %loop-memcpy-residual.preheader, label %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge 219 220loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge: ; preds = %loop-memcpy-residual-header 221 %.pre = shl i64 %idxprom, 1 222 br label %post-loop-memcpy-expansion 223 224loop-memcpy-residual.preheader: ; preds = %loop-memcpy-residual-header 225 br label %loop-memcpy-residual 226} 227 228declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0 229declare i64 @llvm.umin.i64(i64, i64) #1 230 231attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } 232attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } 233