1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=riscv64 -mattr=+v,+c < %s | FileCheck %s 3 4; This previously crashed when spilling a GPR because when we removed a dead 5; ADDI we weren't removing it from the LIS instruction map. Needs +c to trigger. 6 7define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscale x 4 x i1> %arg.6, i64 %arg.7, i1 %arg.8, i64 %arg.9, i32 %arg.10) vscale_range(2,2) { 8; CHECK-LABEL: main: 9; CHECK: # %bb.0: # %entry 10; CHECK-NEXT: addi sp, sp, -112 11; CHECK-NEXT: .cfi_def_cfa_offset 112 12; CHECK-NEXT: sd ra, 104(sp) # 8-byte Folded Spill 13; CHECK-NEXT: sd s0, 96(sp) # 8-byte Folded Spill 14; CHECK-NEXT: sd s1, 88(sp) # 8-byte Folded Spill 15; CHECK-NEXT: sd s2, 80(sp) # 8-byte Folded Spill 16; CHECK-NEXT: sd s3, 72(sp) # 8-byte Folded Spill 17; CHECK-NEXT: sd s4, 64(sp) # 8-byte Folded Spill 18; CHECK-NEXT: sd s5, 56(sp) # 8-byte Folded Spill 19; CHECK-NEXT: sd s6, 48(sp) # 8-byte Folded Spill 20; CHECK-NEXT: sd s7, 40(sp) # 8-byte Folded Spill 21; CHECK-NEXT: sd s8, 32(sp) # 8-byte Folded Spill 22; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill 23; CHECK-NEXT: sd s10, 16(sp) # 8-byte Folded Spill 24; CHECK-NEXT: sd s11, 8(sp) # 8-byte Folded Spill 25; CHECK-NEXT: .cfi_offset ra, -8 26; CHECK-NEXT: .cfi_offset s0, -16 27; CHECK-NEXT: .cfi_offset s1, -24 28; CHECK-NEXT: .cfi_offset s2, -32 29; CHECK-NEXT: .cfi_offset s3, -40 30; CHECK-NEXT: .cfi_offset s4, -48 31; CHECK-NEXT: .cfi_offset s5, -56 32; CHECK-NEXT: .cfi_offset s6, -64 33; CHECK-NEXT: .cfi_offset s7, -72 34; CHECK-NEXT: .cfi_offset s8, -80 35; CHECK-NEXT: .cfi_offset s9, -88 36; CHECK-NEXT: .cfi_offset s10, -96 37; CHECK-NEXT: .cfi_offset s11, -104 38; CHECK-NEXT: li a6, 0 39; CHECK-NEXT: li s2, 8 40; CHECK-NEXT: li t0, 12 41; CHECK-NEXT: li s0, 4 42; CHECK-NEXT: li t1, 20 43; CHECK-NEXT: ld a1, 112(sp) 44; CHECK-NEXT: sd a1, 0(sp) # 8-byte Folded Spill 45; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 46; CHECK-NEXT: vmv.v.i v8, 0 47; CHECK-NEXT: andi t3, a4, 1 48; CHECK-NEXT: li t2, 4 49; CHECK-NEXT: .LBB0_1: # %for.cond1.preheader.i 50; CHECK-NEXT: # =>This Loop Header: Depth=1 51; CHECK-NEXT: # Child Loop BB0_2 Depth 2 52; CHECK-NEXT: # Child Loop BB0_3 Depth 3 53; CHECK-NEXT: # Child Loop BB0_4 Depth 4 54; CHECK-NEXT: # Child Loop BB0_5 Depth 5 55; CHECK-NEXT: mv t4, t1 56; CHECK-NEXT: mv t5, t2 57; CHECK-NEXT: mv t6, t0 58; CHECK-NEXT: mv a7, s2 59; CHECK-NEXT: mv s4, a6 60; CHECK-NEXT: .LBB0_2: # %for.cond5.preheader.i 61; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 62; CHECK-NEXT: # => This Loop Header: Depth=2 63; CHECK-NEXT: # Child Loop BB0_3 Depth 3 64; CHECK-NEXT: # Child Loop BB0_4 Depth 4 65; CHECK-NEXT: # Child Loop BB0_5 Depth 5 66; CHECK-NEXT: mv s5, t4 67; CHECK-NEXT: mv s6, t5 68; CHECK-NEXT: mv s7, t6 69; CHECK-NEXT: mv s3, a7 70; CHECK-NEXT: mv s9, s4 71; CHECK-NEXT: .LBB0_3: # %for.cond9.preheader.i 72; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 73; CHECK-NEXT: # Parent Loop BB0_2 Depth=2 74; CHECK-NEXT: # => This Loop Header: Depth=3 75; CHECK-NEXT: # Child Loop BB0_4 Depth 4 76; CHECK-NEXT: # Child Loop BB0_5 Depth 5 77; CHECK-NEXT: mv s11, s5 78; CHECK-NEXT: mv a3, s6 79; CHECK-NEXT: mv ra, s7 80; CHECK-NEXT: mv s8, s3 81; CHECK-NEXT: mv s1, s9 82; CHECK-NEXT: .LBB0_4: # %vector.ph.i 83; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 84; CHECK-NEXT: # Parent Loop BB0_2 Depth=2 85; CHECK-NEXT: # Parent Loop BB0_3 Depth=3 86; CHECK-NEXT: # => This Loop Header: Depth=4 87; CHECK-NEXT: # Child Loop BB0_5 Depth 5 88; CHECK-NEXT: li a1, 0 89; CHECK-NEXT: .LBB0_5: # %vector.body.i 90; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 91; CHECK-NEXT: # Parent Loop BB0_2 Depth=2 92; CHECK-NEXT: # Parent Loop BB0_3 Depth=3 93; CHECK-NEXT: # Parent Loop BB0_4 Depth=4 94; CHECK-NEXT: # => This Inner Loop Header: Depth=5 95; CHECK-NEXT: addi a5, a1, 4 96; CHECK-NEXT: add a4, s8, a1 97; CHECK-NEXT: add a1, a1, a3 98; CHECK-NEXT: vse32.v v8, (a4), v0.t 99; CHECK-NEXT: vse32.v v8, (a1), v0.t 100; CHECK-NEXT: mv a1, a5 101; CHECK-NEXT: bne a5, s0, .LBB0_5 102; CHECK-NEXT: # %bb.6: # %for.cond.cleanup15.i 103; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=4 104; CHECK-NEXT: addi s1, s1, 4 105; CHECK-NEXT: addi s8, s8, 4 106; CHECK-NEXT: addi ra, ra, 4 107; CHECK-NEXT: addi a3, a3, 4 108; CHECK-NEXT: andi s10, a0, 1 109; CHECK-NEXT: addi s11, s11, 4 110; CHECK-NEXT: beqz s10, .LBB0_4 111; CHECK-NEXT: # %bb.7: # %for.cond.cleanup11.i 112; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=3 113; CHECK-NEXT: addi s9, s9, 4 114; CHECK-NEXT: addi s3, s3, 4 115; CHECK-NEXT: addi s7, s7, 4 116; CHECK-NEXT: addi s6, s6, 4 117; CHECK-NEXT: andi a1, a2, 1 118; CHECK-NEXT: addi s5, s5, 4 119; CHECK-NEXT: beqz a1, .LBB0_3 120; CHECK-NEXT: # %bb.8: # %for.cond.cleanup7.i 121; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=2 122; CHECK-NEXT: addi s4, s4, 4 123; CHECK-NEXT: addi a7, a7, 4 124; CHECK-NEXT: addi t6, t6, 4 125; CHECK-NEXT: addi t5, t5, 4 126; CHECK-NEXT: addi t4, t4, 4 127; CHECK-NEXT: beqz t3, .LBB0_2 128; CHECK-NEXT: # %bb.9: # %for.cond.cleanup3.i 129; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 130; CHECK-NEXT: addi a6, a6, 4 131; CHECK-NEXT: addi s2, s2, 4 132; CHECK-NEXT: addi t0, t0, 4 133; CHECK-NEXT: addi t2, t2, 4 134; CHECK-NEXT: addi t1, t1, 4 135; CHECK-NEXT: beqz a1, .LBB0_1 136; CHECK-NEXT: # %bb.10: # %l.exit 137; CHECK-NEXT: li a0, 0 138; CHECK-NEXT: jalr a0 139; CHECK-NEXT: beqz s10, .LBB0_12 140; CHECK-NEXT: .LBB0_11: # %for.body7.us.14 141; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 142; CHECK-NEXT: j .LBB0_11 143; CHECK-NEXT: .LBB0_12: # %for.body7.us.19 144; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 145; CHECK-NEXT: ld a0, 0(sp) # 8-byte Folded Reload 146; CHECK-NEXT: vmv.s.x v8, a0 147; CHECK-NEXT: vmv.v.i v16, 0 148; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma 149; CHECK-NEXT: vslideup.vi v16, v8, 1 150; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 151; CHECK-NEXT: vmsne.vi v8, v16, 0 152; CHECK-NEXT: vmv.x.s a0, v8 153; CHECK-NEXT: snez a0, a0 154; CHECK-NEXT: sb a0, 0(zero) 155; CHECK-NEXT: li a0, 0 156; CHECK-NEXT: ld ra, 104(sp) # 8-byte Folded Reload 157; CHECK-NEXT: ld s0, 96(sp) # 8-byte Folded Reload 158; CHECK-NEXT: ld s1, 88(sp) # 8-byte Folded Reload 159; CHECK-NEXT: ld s2, 80(sp) # 8-byte Folded Reload 160; CHECK-NEXT: ld s3, 72(sp) # 8-byte Folded Reload 161; CHECK-NEXT: ld s4, 64(sp) # 8-byte Folded Reload 162; CHECK-NEXT: ld s5, 56(sp) # 8-byte Folded Reload 163; CHECK-NEXT: ld s6, 48(sp) # 8-byte Folded Reload 164; CHECK-NEXT: ld s7, 40(sp) # 8-byte Folded Reload 165; CHECK-NEXT: ld s8, 32(sp) # 8-byte Folded Reload 166; CHECK-NEXT: ld s9, 24(sp) # 8-byte Folded Reload 167; CHECK-NEXT: ld s10, 16(sp) # 8-byte Folded Reload 168; CHECK-NEXT: ld s11, 8(sp) # 8-byte Folded Reload 169; CHECK-NEXT: .cfi_restore ra 170; CHECK-NEXT: .cfi_restore s0 171; CHECK-NEXT: .cfi_restore s1 172; CHECK-NEXT: .cfi_restore s2 173; CHECK-NEXT: .cfi_restore s3 174; CHECK-NEXT: .cfi_restore s4 175; CHECK-NEXT: .cfi_restore s5 176; CHECK-NEXT: .cfi_restore s6 177; CHECK-NEXT: .cfi_restore s7 178; CHECK-NEXT: .cfi_restore s8 179; CHECK-NEXT: .cfi_restore s9 180; CHECK-NEXT: .cfi_restore s10 181; CHECK-NEXT: .cfi_restore s11 182; CHECK-NEXT: addi sp, sp, 112 183; CHECK-NEXT: .cfi_def_cfa_offset 0 184; CHECK-NEXT: ret 185entry: 186 %0 = tail call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() 187 br label %for.cond1.preheader.i 188 189for.cond1.preheader.i: ; preds = %for.cond.cleanup3.i, %entry 190 %arg.21 = phi i64 [ 0, %entry ], [ %indvars.iv.next74.i, %for.cond.cleanup3.i ] 191 br label %for.cond5.preheader.i 192 193for.cond5.preheader.i: ; preds = %for.cond.cleanup7.i, %for.cond1.preheader.i 194 %arg.42 = phi i64 [ 0, %for.cond1.preheader.i ], [ %indvars.iv.next70.i, %for.cond.cleanup7.i ] 195 %1 = add i64 %arg.42, %arg.21 196 br label %for.cond9.preheader.i 197 198for.cond.cleanup3.i: ; preds = %for.cond.cleanup7.i 199 %indvars.iv.next74.i = add i64 %arg.21, 1 200 br i1 %arg.3, label %l.exit, label %for.cond1.preheader.i 201 202for.cond9.preheader.i: ; preds = %for.cond.cleanup11.i, %for.cond5.preheader.i 203 %arg.74 = phi i64 [ 0, %for.cond5.preheader.i ], [ %indvars.iv.next66.i, %for.cond.cleanup11.i ] 204 %2 = add i64 %1, %arg.74 205 br label %vector.ph.i 206 207for.cond.cleanup7.i: ; preds = %for.cond.cleanup11.i 208 %indvars.iv.next70.i = add i64 %arg.42, 1 209 br i1 %arg.5, label %for.cond.cleanup3.i, label %for.cond5.preheader.i 210 211vector.ph.i: ; preds = %for.cond.cleanup15.i, %for.cond9.preheader.i 212 %arg.96 = phi i64 [ 0, %for.cond9.preheader.i ], [ %indvars.iv.next62.i, %for.cond.cleanup15.i ] 213 %3 = add i64 %2, %arg.96 214 %broadcast.splatinsert.i = insertelement <vscale x 4 x i64> zeroinitializer, i64 %3, i64 0 215 %broadcast.splat.i = shufflevector <vscale x 4 x i64> %broadcast.splatinsert.i, <vscale x 4 x i64> zeroinitializer, <vscale x 4 x i32> zeroinitializer 216 br label %vector.body.i 217 218vector.body.i: ; preds = %vector.body.i, %vector.ph.i 219 %index.i = phi i64 [ 0, %vector.ph.i ], [ %index.next.i, %vector.body.i ] 220 %vec.ind.i = phi <vscale x 4 x i64> [ %0, %vector.ph.i ], [ %6, %vector.body.i ] 221 %4 = add <vscale x 4 x i64> %vec.ind.i, %broadcast.splat.i 222 %5 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %4 223 tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %5, i32 4, <vscale x 4 x i1> zeroinitializer) 224 %6 = add <vscale x 4 x i64> %vec.ind.i, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) 225 %7 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %6 226 tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %7, i32 4, <vscale x 4 x i1> zeroinitializer) 227 %arg.100 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) 228 %arg.101 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.100 229 tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.101, i32 4, <vscale x 4 x i1> %arg.6) 230 %arg.102 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) 231 %arg.103 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.102 232 tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.103, i32 4, <vscale x 4 x i1> zeroinitializer) 233 %arg.104 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) 234 %arg.105 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.104 235 tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.105, i32 4, <vscale x 4 x i1> %arg.6) 236 %arg.106 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 5, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) 237 %arg.107 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.106 238 tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.107, i32 4, <vscale x 4 x i1> zeroinitializer) 239 %index.next.i = add i64 %index.i, 1 240 %arg.108 = icmp eq i64 %index.i, 0 241 br i1 %arg.108, label %for.cond.cleanup15.i, label %vector.body.i 242 243for.cond.cleanup11.i: ; preds = %for.cond.cleanup15.i 244 %indvars.iv.next66.i = add i64 %arg.74, 1 245 br i1 %arg.3, label %for.cond.cleanup7.i, label %for.cond9.preheader.i 246 247for.cond.cleanup15.i: ; preds = %vector.body.i 248 %indvars.iv.next62.i = add i64 %arg.96, 1 249 br i1 %arg.1, label %for.cond.cleanup11.i, label %vector.ph.i 250 251l.exit: ; preds = %for.cond.cleanup3.i 252 tail call void null() 253 br i1 %arg.1, label %for.body7.us.14, label %for.body7.us.19 254 255for.body7.us.14: ; preds = %for.body7.us.14, %l.exit 256 br label %for.body7.us.14 257 258for.body7.us.19: ; preds = %l.exit 259 %arg.109 = insertelement <32 x i32> zeroinitializer, i32 %arg.10, i64 1 260 %8 = icmp ne <32 x i32> %arg.109, zeroinitializer 261 %9 = bitcast <32 x i1> %8 to i32 262 %op.rdx13 = icmp ne i32 %9, 0 263 %op.rdx = zext i1 %op.rdx13 to i8 264 store i8 %op.rdx, ptr null, align 1 265 ret i32 0 266} 267