1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+v \ 3; RUN: -target-abi=lp64d -verify-machineinstrs -O2 < %s | FileCheck %s 4 5; The following tests check whether inserting VSETVLI avoids inserting 6; unneeded vsetvlis across basic blocks. 7 8declare i64 @llvm.riscv.vsetvli(i64, i64, i64) 9 10declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, i64, i64) 11declare <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, i64, i64) 12 13declare <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, i64, i64) 14 15declare <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, i64, i64) 16 17declare <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double>, double, i64) 18declare <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32( <vscale x 2 x float>, float, i64) 19 20declare void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double>, ptr nocapture, i64) 21declare void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float>, ptr nocapture, i64) 22 23define <vscale x 1 x double> @test1(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind { 24; CHECK-LABEL: test1: 25; CHECK: # %bb.0: # %entry 26; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 27; CHECK-NEXT: beqz a1, .LBB0_2 28; CHECK-NEXT: # %bb.1: # %if.then 29; CHECK-NEXT: vfadd.vv v8, v8, v9 30; CHECK-NEXT: ret 31; CHECK-NEXT: .LBB0_2: # %if.else 32; CHECK-NEXT: vfsub.vv v8, v8, v9 33; CHECK-NEXT: ret 34entry: 35 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) 36 %tobool = icmp eq i8 %cond, 0 37 br i1 %tobool, label %if.else, label %if.then 38 39if.then: ; preds = %entry 40 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0) 41 br label %if.end 42 43if.else: ; preds = %entry 44 %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0) 45 br label %if.end 46 47if.end: ; preds = %if.else, %if.then 48 %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ] 49 ret <vscale x 1 x double> %c.0 50} 51 52@scratch = global i8 0, align 16 53 54define <vscale x 1 x double> @test2(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind { 55; CHECK-LABEL: test2: 56; CHECK: # %bb.0: # %entry 57; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 58; CHECK-NEXT: beqz a1, .LBB1_2 59; CHECK-NEXT: # %bb.1: # %if.then 60; CHECK-NEXT: vfadd.vv v9, v8, v9 61; CHECK-NEXT: vfmul.vv v8, v9, v8 62; CHECK-NEXT: ret 63; CHECK-NEXT: .LBB1_2: # %if.else 64; CHECK-NEXT: vfsub.vv v9, v8, v9 65; CHECK-NEXT: vfmul.vv v8, v9, v8 66; CHECK-NEXT: ret 67entry: 68 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) 69 %tobool = icmp eq i8 %cond, 0 70 br i1 %tobool, label %if.else, label %if.then 71 72if.then: ; preds = %entry 73 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0) 74 br label %if.end 75 76if.else: ; preds = %entry 77 %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0) 78 br label %if.end 79 80if.end: ; preds = %if.else, %if.then 81 %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ] 82 %3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 7, i64 %0) 83 ret <vscale x 1 x double> %3 84} 85 86define <vscale x 1 x double> @test3(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind { 87; CHECK-LABEL: test3: 88; CHECK: # %bb.0: # %entry 89; CHECK-NEXT: beqz a1, .LBB2_2 90; CHECK-NEXT: # %bb.1: # %if.then 91; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 92; CHECK-NEXT: vfadd.vv v9, v8, v9 93; CHECK-NEXT: vfmul.vv v8, v9, v8 94; CHECK-NEXT: ret 95; CHECK-NEXT: .LBB2_2: # %if.else 96; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 97; CHECK-NEXT: vfsub.vv v9, v8, v9 98; CHECK-NEXT: vfmul.vv v8, v9, v8 99; CHECK-NEXT: ret 100entry: 101 %tobool = icmp eq i8 %cond, 0 102 br i1 %tobool, label %if.else, label %if.then 103 104if.then: ; preds = %entry 105 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) 106 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0) 107 br label %if.end 108 109if.else: ; preds = %entry 110 %2 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) 111 %3 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %2) 112 br label %if.end 113 114if.end: ; preds = %if.else, %if.then 115 %vl.0 = phi i64 [ %0, %if.then], [ %2, %if.else ] 116 %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %3, %if.else ] 117 %4 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 7, i64 %vl.0) 118 ret <vscale x 1 x double> %4 119} 120 121define <vscale x 1 x double> @test4(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %l, <vscale x 1 x double> %r) nounwind { 122; CHECK-LABEL: test4: 123; CHECK: # %bb.0: # %entry 124; CHECK-NEXT: beqz a1, .LBB3_2 125; CHECK-NEXT: # %bb.1: # %if.then 126; CHECK-NEXT: lui a1, %hi(.LCPI3_0) 127; CHECK-NEXT: fld fa5, %lo(.LCPI3_0)(a1) 128; CHECK-NEXT: lui a1, %hi(.LCPI3_1) 129; CHECK-NEXT: fld fa4, %lo(.LCPI3_1)(a1) 130; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 131; CHECK-NEXT: vfmv.v.f v10, fa5 132; CHECK-NEXT: vfmv.v.f v11, fa4 133; CHECK-NEXT: vfadd.vv v10, v10, v11 134; CHECK-NEXT: lui a1, %hi(scratch) 135; CHECK-NEXT: addi a1, a1, %lo(scratch) 136; CHECK-NEXT: vse64.v v10, (a1) 137; CHECK-NEXT: j .LBB3_3 138; CHECK-NEXT: .LBB3_2: # %if.else 139; CHECK-NEXT: lui a1, 260096 140; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 141; CHECK-NEXT: vmv.v.x v10, a1 142; CHECK-NEXT: lui a1, 262144 143; CHECK-NEXT: vmv.v.x v11, a1 144; CHECK-NEXT: vfadd.vv v10, v10, v11 145; CHECK-NEXT: lui a1, %hi(scratch) 146; CHECK-NEXT: addi a1, a1, %lo(scratch) 147; CHECK-NEXT: vse32.v v10, (a1) 148; CHECK-NEXT: .LBB3_3: # %if.end 149; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 150; CHECK-NEXT: vfmul.vv v8, v8, v9 151; CHECK-NEXT: ret 152entry: 153 %tobool = icmp eq i8 %cond, 0 154 br i1 %tobool, label %if.else, label %if.then 155 156if.then: ; preds = %entry 157 %0 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double> undef, double 1.000000e+00, i64 %avl) 158 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double> undef, double 2.000000e+00, i64 %avl) 159 %2 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %0, <vscale x 1 x double> %1, i64 7, i64 %avl) 160 %3 = bitcast ptr @scratch to ptr 161 tail call void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double> %2, ptr %3, i64 %avl) 162 br label %if.end 163 164if.else: ; preds = %entry 165 %4 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(<vscale x 2 x float> undef, float 1.000000e+00, i64 %avl) 166 %5 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(<vscale x 2 x float> undef, float 2.000000e+00, i64 %avl) 167 %6 = tail call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %4, <vscale x 2 x float> %5, i64 7, i64 %avl) 168 %7 = bitcast ptr @scratch to ptr 169 tail call void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float> %6, ptr %7, i64 %avl) 170 br label %if.end 171 172if.end: ; preds = %if.else, %if.then 173 %8 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %l, <vscale x 1 x double> %r, i64 7, i64 %avl) 174 ret <vscale x 1 x double> %8 175} 176 177define <vscale x 1 x double> @test5(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind { 178; CHECK-LABEL: test5: 179; CHECK: # %bb.0: # %entry 180; CHECK-NEXT: andi a2, a1, 1 181; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 182; CHECK-NEXT: bnez a2, .LBB4_3 183; CHECK-NEXT: # %bb.1: # %if.else 184; CHECK-NEXT: vfsub.vv v9, v8, v9 185; CHECK-NEXT: andi a1, a1, 2 186; CHECK-NEXT: beqz a1, .LBB4_4 187; CHECK-NEXT: .LBB4_2: # %if.then4 188; CHECK-NEXT: vfmul.vv v8, v9, v8 189; CHECK-NEXT: ret 190; CHECK-NEXT: .LBB4_3: # %if.then 191; CHECK-NEXT: vfadd.vv v9, v8, v9 192; CHECK-NEXT: andi a1, a1, 2 193; CHECK-NEXT: bnez a1, .LBB4_2 194; CHECK-NEXT: .LBB4_4: # %if.else5 195; CHECK-NEXT: vfmul.vv v8, v8, v9 196; CHECK-NEXT: ret 197entry: 198 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) 199 %conv = zext i8 %cond to i32 200 %and = and i32 %conv, 1 201 %tobool = icmp eq i32 %and, 0 202 br i1 %tobool, label %if.else, label %if.then 203 204if.then: ; preds = %entry 205 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0) 206 br label %if.end 207 208if.else: ; preds = %entry 209 %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0) 210 br label %if.end 211 212if.end: ; preds = %if.else, %if.then 213 %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ] 214 %and2 = and i32 %conv, 2 215 %tobool3 = icmp eq i32 %and2, 0 216 br i1 %tobool3, label %if.else5, label %if.then4 217 218if.then4: ; preds = %if.end 219 %3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 7, i64 %0) 220 br label %if.end6 221 222if.else5: ; preds = %if.end 223 %4 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %c.0, i64 7, i64 %0) 224 br label %if.end6 225 226if.end6: ; preds = %if.else5, %if.then4 227 %c.1 = phi <vscale x 1 x double> [ %3, %if.then4 ], [ %4, %if.else5 ] 228 ret <vscale x 1 x double> %c.1 229} 230 231; FIXME: The explicit vsetvli in if.then4 could be removed as it is redundant 232; with the one in the entry, but we lack the ability to remove explicit 233; vsetvli instructions. 234define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind { 235; CHECK-LABEL: test6: 236; CHECK: # %bb.0: # %entry 237; CHECK-NEXT: andi a2, a1, 1 238; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 239; CHECK-NEXT: bnez a2, .LBB5_3 240; CHECK-NEXT: # %bb.1: # %if.else 241; CHECK-NEXT: vfsub.vv v8, v8, v9 242; CHECK-NEXT: andi a1, a1, 2 243; CHECK-NEXT: beqz a1, .LBB5_4 244; CHECK-NEXT: .LBB5_2: # %if.then4 245; CHECK-NEXT: lui a1, %hi(.LCPI5_0) 246; CHECK-NEXT: fld fa5, %lo(.LCPI5_0)(a1) 247; CHECK-NEXT: lui a1, %hi(.LCPI5_1) 248; CHECK-NEXT: fld fa4, %lo(.LCPI5_1)(a1) 249; CHECK-NEXT: vfmv.v.f v9, fa5 250; CHECK-NEXT: vfmv.v.f v10, fa4 251; CHECK-NEXT: vfadd.vv v9, v9, v10 252; CHECK-NEXT: lui a1, %hi(scratch) 253; CHECK-NEXT: addi a1, a1, %lo(scratch) 254; CHECK-NEXT: vse64.v v9, (a1) 255; CHECK-NEXT: j .LBB5_5 256; CHECK-NEXT: .LBB5_3: # %if.then 257; CHECK-NEXT: vfadd.vv v8, v8, v9 258; CHECK-NEXT: andi a1, a1, 2 259; CHECK-NEXT: bnez a1, .LBB5_2 260; CHECK-NEXT: .LBB5_4: # %if.else5 261; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 262; CHECK-NEXT: lui a1, 260096 263; CHECK-NEXT: vmv.v.x v9, a1 264; CHECK-NEXT: lui a1, 262144 265; CHECK-NEXT: vmv.v.x v10, a1 266; CHECK-NEXT: vfadd.vv v9, v9, v10 267; CHECK-NEXT: lui a1, %hi(scratch) 268; CHECK-NEXT: addi a1, a1, %lo(scratch) 269; CHECK-NEXT: vse32.v v9, (a1) 270; CHECK-NEXT: .LBB5_5: # %if.end10 271; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 272; CHECK-NEXT: vfmul.vv v8, v8, v8 273; CHECK-NEXT: ret 274entry: 275 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) 276 %conv = zext i8 %cond to i32 277 %and = and i32 %conv, 1 278 %tobool = icmp eq i32 %and, 0 279 br i1 %tobool, label %if.else, label %if.then 280 281if.then: ; preds = %entry 282 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0) 283 br label %if.end 284 285if.else: ; preds = %entry 286 %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0) 287 br label %if.end 288 289if.end: ; preds = %if.else, %if.then 290 %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ] 291 %and2 = and i32 %conv, 2 292 %tobool3 = icmp eq i32 %and2, 0 293 br i1 %tobool3, label %if.else5, label %if.then4 294 295if.then4: ; preds = %if.end 296 %3 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) 297 %4 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double> undef, double 1.000000e+00, i64 %3) 298 %5 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(<vscale x 1 x double> undef, double 2.000000e+00, i64 %3) 299 %6 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %4, <vscale x 1 x double> %5, i64 7, i64 %3) 300 %7 = bitcast ptr @scratch to ptr 301 tail call void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double> %6, ptr %7, i64 %3) 302 br label %if.end10 303 304if.else5: ; preds = %if.end 305 %8 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 0) 306 %9 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(<vscale x 2 x float> undef, float 1.000000e+00, i64 %8) 307 %10 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32( <vscale x 2 x float> undef, float 2.000000e+00, i64 %8) 308 %11 = tail call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %9, <vscale x 2 x float> %10, i64 7, i64 %8) 309 %12 = bitcast ptr @scratch to ptr 310 tail call void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float> %11, ptr %12, i64 %8) 311 br label %if.end10 312 313if.end10: ; preds = %if.else5, %if.then4 314 %13 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %c.0, i64 7, i64 %0) 315 ret <vscale x 1 x double> %13 316} 317 318declare void @foo() 319 320; Similar to test1, but contains a call to @foo to act as barrier to analyzing 321; VL/VTYPE. 322define <vscale x 1 x double> @test8(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind { 323; CHECK-LABEL: test8: 324; CHECK: # %bb.0: # %entry 325; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 326; CHECK-NEXT: beqz a1, .LBB6_2 327; CHECK-NEXT: # %bb.1: # %if.then 328; CHECK-NEXT: vfadd.vv v8, v8, v9 329; CHECK-NEXT: ret 330; CHECK-NEXT: .LBB6_2: # %if.else 331; CHECK-NEXT: addi sp, sp, -32 332; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 333; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 334; CHECK-NEXT: csrr a1, vlenb 335; CHECK-NEXT: slli a1, a1, 1 336; CHECK-NEXT: sub sp, sp, a1 337; CHECK-NEXT: mv s0, a0 338; CHECK-NEXT: csrr a0, vlenb 339; CHECK-NEXT: add a0, a0, sp 340; CHECK-NEXT: addi a0, a0, 16 341; CHECK-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill 342; CHECK-NEXT: addi a0, sp, 16 343; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill 344; CHECK-NEXT: call foo 345; CHECK-NEXT: csrr a0, vlenb 346; CHECK-NEXT: add a0, a0, sp 347; CHECK-NEXT: addi a0, a0, 16 348; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload 349; CHECK-NEXT: addi a0, sp, 16 350; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload 351; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, ma 352; CHECK-NEXT: vfsub.vv v8, v9, v8 353; CHECK-NEXT: csrr a0, vlenb 354; CHECK-NEXT: slli a0, a0, 1 355; CHECK-NEXT: add sp, sp, a0 356; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 357; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 358; CHECK-NEXT: addi sp, sp, 32 359; CHECK-NEXT: ret 360entry: 361 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) 362 %tobool = icmp eq i8 %cond, 0 363 br i1 %tobool, label %if.else, label %if.then 364 365if.then: ; preds = %entry 366 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0) 367 br label %if.end 368 369if.else: ; preds = %entry 370 call void @foo() 371 %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0) 372 br label %if.end 373 374if.end: ; preds = %if.else, %if.then 375 %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ] 376 ret <vscale x 1 x double> %c.0 377} 378 379; Similar to test2, but contains a call to @foo to act as barrier to analyzing 380; VL/VTYPE. 381define <vscale x 1 x double> @test9(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind { 382; CHECK-LABEL: test9: 383; CHECK: # %bb.0: # %entry 384; CHECK-NEXT: addi sp, sp, -32 385; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 386; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 387; CHECK-NEXT: csrr a2, vlenb 388; CHECK-NEXT: slli a2, a2, 1 389; CHECK-NEXT: sub sp, sp, a2 390; CHECK-NEXT: mv s0, a0 391; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 392; CHECK-NEXT: beqz a1, .LBB7_2 393; CHECK-NEXT: # %bb.1: # %if.then 394; CHECK-NEXT: vfadd.vv v9, v8, v9 395; CHECK-NEXT: addi a0, sp, 16 396; CHECK-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill 397; CHECK-NEXT: csrr a0, vlenb 398; CHECK-NEXT: add a0, a0, sp 399; CHECK-NEXT: addi a0, a0, 16 400; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill 401; CHECK-NEXT: call foo 402; CHECK-NEXT: addi a0, sp, 16 403; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload 404; CHECK-NEXT: csrr a0, vlenb 405; CHECK-NEXT: add a0, a0, sp 406; CHECK-NEXT: addi a0, a0, 16 407; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload 408; CHECK-NEXT: j .LBB7_3 409; CHECK-NEXT: .LBB7_2: # %if.else 410; CHECK-NEXT: vfsub.vv v9, v8, v9 411; CHECK-NEXT: .LBB7_3: # %if.end 412; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, ma 413; CHECK-NEXT: vfmul.vv v8, v9, v8 414; CHECK-NEXT: csrr a0, vlenb 415; CHECK-NEXT: slli a0, a0, 1 416; CHECK-NEXT: add sp, sp, a0 417; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 418; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 419; CHECK-NEXT: addi sp, sp, 32 420; CHECK-NEXT: ret 421entry: 422 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) 423 %tobool = icmp eq i8 %cond, 0 424 br i1 %tobool, label %if.else, label %if.then 425 426if.then: ; preds = %entry 427 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0) 428 call void @foo() 429 br label %if.end 430 431if.else: ; preds = %entry 432 %2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %0) 433 br label %if.end 434 435if.end: ; preds = %if.else, %if.then 436 %c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ] 437 %3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 7, i64 %0) 438 ret <vscale x 1 x double> %3 439} 440 441define void @saxpy_vec(i64 %n, float %a, ptr nocapture readonly %x, ptr nocapture %y) { 442; CHECK-LABEL: saxpy_vec: 443; CHECK: # %bb.0: # %entry 444; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, ma 445; CHECK-NEXT: beqz a3, .LBB8_2 446; CHECK-NEXT: .LBB8_1: # %for.body 447; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 448; CHECK-NEXT: vle32.v v8, (a1) 449; CHECK-NEXT: vle32.v v16, (a2) 450; CHECK-NEXT: slli a4, a3, 2 451; CHECK-NEXT: sub a0, a0, a3 452; CHECK-NEXT: add a1, a1, a4 453; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma 454; CHECK-NEXT: vfmacc.vf v16, fa0, v8 455; CHECK-NEXT: vse32.v v16, (a2) 456; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, ma 457; CHECK-NEXT: add a2, a2, a4 458; CHECK-NEXT: bnez a3, .LBB8_1 459; CHECK-NEXT: .LBB8_2: # %for.end 460; CHECK-NEXT: ret 461entry: 462 %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 3) 463 %cmp.not13 = icmp eq i64 %0, 0 464 br i1 %cmp.not13, label %for.end, label %for.body 465 466for.body: ; preds = %for.body, %entry 467 %1 = phi i64 [ %7, %for.body ], [ %0, %entry ] 468 %n.addr.016 = phi i64 [ %sub, %for.body ], [ %n, %entry ] 469 %x.addr.015 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] 470 %y.addr.014 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] 471 %2 = bitcast ptr %x.addr.015 to ptr 472 %3 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %2, i64 %1) 473 %add.ptr = getelementptr inbounds float, ptr %x.addr.015, i64 %1 474 %4 = bitcast ptr %y.addr.014 to ptr 475 %5 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %4, i64 %1) 476 %6 = tail call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float> %5, float %a, <vscale x 16 x float> %3, i64 7, i64 %1, i64 0) 477 tail call void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float> %6, ptr %4, i64 %1) 478 %add.ptr1 = getelementptr inbounds float, ptr %y.addr.014, i64 %1 479 %sub = sub i64 %n.addr.016, %1 480 %7 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub, i64 2, i64 3) 481 %cmp.not = icmp eq i64 %7, 0 482 br i1 %cmp.not, label %for.end, label %for.body 483 484for.end: ; preds = %for.body, %entry 485 ret void 486} 487 488define void @saxpy_vec_demanded_fields(i64 %n, float %a, ptr nocapture readonly %x, ptr nocapture %y) { 489; CHECK-LABEL: saxpy_vec_demanded_fields: 490; CHECK: # %bb.0: # %entry 491; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, ma 492; CHECK-NEXT: beqz a3, .LBB9_2 493; CHECK-NEXT: .LBB9_1: # %for.body 494; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 495; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma 496; CHECK-NEXT: vle32.v v8, (a1) 497; CHECK-NEXT: vle32.v v16, (a2) 498; CHECK-NEXT: slli a4, a3, 2 499; CHECK-NEXT: sub a0, a0, a3 500; CHECK-NEXT: add a1, a1, a4 501; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma 502; CHECK-NEXT: vfmacc.vf v16, fa0, v8 503; CHECK-NEXT: vse32.v v16, (a2) 504; CHECK-NEXT: vsetvli a3, a0, e16, m4, ta, ma 505; CHECK-NEXT: add a2, a2, a4 506; CHECK-NEXT: bnez a3, .LBB9_1 507; CHECK-NEXT: .LBB9_2: # %for.end 508; CHECK-NEXT: ret 509entry: 510 %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 3) 511 %cmp.not13 = icmp eq i64 %0, 0 512 br i1 %cmp.not13, label %for.end, label %for.body 513 514for.body: ; preds = %for.body, %entry 515 %1 = phi i64 [ %7, %for.body ], [ %0, %entry ] 516 %n.addr.016 = phi i64 [ %sub, %for.body ], [ %n, %entry ] 517 %x.addr.015 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] 518 %y.addr.014 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] 519 %2 = bitcast ptr %x.addr.015 to ptr 520 %3 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %2, i64 %1) 521 %add.ptr = getelementptr inbounds float, ptr %x.addr.015, i64 %1 522 %4 = bitcast ptr %y.addr.014 to ptr 523 %5 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %4, i64 %1) 524 %6 = tail call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float> %5, float %a, <vscale x 16 x float> %3, i64 7, i64 %1, i64 0) 525 tail call void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float> %6, ptr %4, i64 %1) 526 %add.ptr1 = getelementptr inbounds float, ptr %y.addr.014, i64 %1 527 %sub = sub i64 %n.addr.016, %1 528 %7 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub, i64 1, i64 2) 529 %cmp.not = icmp eq i64 %7, 0 530 br i1 %cmp.not, label %for.end, label %for.body 531 532for.end: ; preds = %for.body, %entry 533 ret void 534} 535 536declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) 537declare <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float>, ptr nocapture, i64) 538declare <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float>, float, <vscale x 16 x float>, i64, i64, i64) 539declare void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float>, ptr nocapture, i64) 540 541; We need a vsetvli in the last block because the predecessors have different 542; VTYPEs. The AVL is the same and the SEW/LMUL ratio implies the same VLMAX so 543; we don't need to read AVL and can keep VL unchanged. 544define <vscale x 2 x i32> @test_vsetvli_x0_x0(ptr %x, ptr %y, <vscale x 2 x i32> %z, i64 %vl, i1 %cond) nounwind { 545; CHECK-LABEL: test_vsetvli_x0_x0: 546; CHECK: # %bb.0: # %entry 547; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma 548; CHECK-NEXT: vle32.v v9, (a0) 549; CHECK-NEXT: andi a3, a3, 1 550; CHECK-NEXT: beqz a3, .LBB10_2 551; CHECK-NEXT: # %bb.1: # %if 552; CHECK-NEXT: vle16.v v10, (a1) 553; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 554; CHECK-NEXT: vwcvt.x.x.v v8, v10 555; CHECK-NEXT: .LBB10_2: # %if.end 556; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 557; CHECK-NEXT: vadd.vv v8, v9, v8 558; CHECK-NEXT: ret 559entry: 560 %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, ptr %x, i64 %vl) 561 br i1 %cond, label %if, label %if.end 562 563if: 564 %b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %y, i64 %vl) 565 %c = call <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i16> %b, i16 0, i64 %vl) 566 br label %if.end 567 568if.end: 569 %d = phi <vscale x 2 x i32> [ %z, %entry ], [ %c, %if ] 570 %e = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i32> %d, i64 %vl) 571 ret <vscale x 2 x i32> %e 572} 573declare <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>, ptr, i64) 574declare <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>, ptr, i64) 575declare <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i16>, i16, i64) 576declare <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, i64) 577 578; We can use X0, X0 vsetvli in if2 and if2.end. The merge point as if.end will 579; see two different vtypes with the same SEW/LMUL ratio. At if2.end we will only 580; know the SEW/LMUL ratio for the if.end predecessor and the full vtype for 581; the if2 predecessor. This makes sure we can merge a SEW/LMUL predecessor with 582; a predecessor we know the vtype for. 583define <vscale x 2 x i32> @test_vsetvli_x0_x0_2(ptr %x, ptr %y, ptr %z, i64 %vl, i1 %cond, i1 %cond2, <vscale x 2 x i32> %w) nounwind { 584; CHECK-LABEL: test_vsetvli_x0_x0_2: 585; CHECK: # %bb.0: # %entry 586; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma 587; CHECK-NEXT: vle32.v v9, (a0) 588; CHECK-NEXT: andi a4, a4, 1 589; CHECK-NEXT: beqz a4, .LBB11_2 590; CHECK-NEXT: # %bb.1: # %if 591; CHECK-NEXT: vle16.v v10, (a1) 592; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 593; CHECK-NEXT: vwadd.wv v9, v9, v10 594; CHECK-NEXT: .LBB11_2: # %if.end 595; CHECK-NEXT: andi a5, a5, 1 596; CHECK-NEXT: beqz a5, .LBB11_4 597; CHECK-NEXT: # %bb.3: # %if2 598; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 599; CHECK-NEXT: vle16.v v10, (a2) 600; CHECK-NEXT: vwadd.wv v9, v9, v10 601; CHECK-NEXT: .LBB11_4: # %if2.end 602; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 603; CHECK-NEXT: vadd.vv v8, v9, v8 604; CHECK-NEXT: ret 605entry: 606 %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, ptr %x, i64 %vl) 607 br i1 %cond, label %if, label %if.end 608 609if: 610 %b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %y, i64 %vl) 611 %c = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i16> %b, i64 %vl) 612 br label %if.end 613 614if.end: 615 %d = phi <vscale x 2 x i32> [ %a, %entry ], [ %c, %if ] 616 br i1 %cond2, label %if2, label %if2.end 617 618if2: 619 %e = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %z, i64 %vl) 620 %f = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> undef, <vscale x 2 x i32> %d, <vscale x 2 x i16> %e, i64 %vl) 621 br label %if2.end 622 623if2.end: 624 %g = phi <vscale x 2 x i32> [ %d, %if.end ], [ %f, %if2 ] 625 %h = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %g, <vscale x 2 x i32> %w, i64 %vl) 626 ret <vscale x 2 x i32> %h 627} 628declare <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i16>, i64) 629 630; We should only need 1 vsetvli for this code. 631define void @vlmax(i64 %N, ptr %c, ptr %a, ptr %b) { 632; CHECK-LABEL: vlmax: 633; CHECK: # %bb.0: # %entry 634; CHECK-NEXT: blez a0, .LBB12_3 635; CHECK-NEXT: # %bb.1: # %for.body.preheader 636; CHECK-NEXT: li a4, 0 637; CHECK-NEXT: vsetvli a6, zero, e64, m1, ta, ma 638; CHECK-NEXT: slli a5, a6, 3 639; CHECK-NEXT: .LBB12_2: # %for.body 640; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 641; CHECK-NEXT: vle64.v v8, (a2) 642; CHECK-NEXT: vle64.v v9, (a3) 643; CHECK-NEXT: add a4, a4, a6 644; CHECK-NEXT: add a3, a3, a5 645; CHECK-NEXT: vfadd.vv v8, v8, v9 646; CHECK-NEXT: vse64.v v8, (a1) 647; CHECK-NEXT: add a1, a1, a5 648; CHECK-NEXT: add a2, a2, a5 649; CHECK-NEXT: blt a4, a0, .LBB12_2 650; CHECK-NEXT: .LBB12_3: # %for.end 651; CHECK-NEXT: ret 652entry: 653 %0 = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0) 654 %cmp13 = icmp sgt i64 %N, 0 655 br i1 %cmp13, label %for.body, label %for.end 656 657for.body: ; preds = %entry, %for.body 658 %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ] 659 %arrayidx = getelementptr inbounds double, ptr %a, i64 %i.014 660 %1 = bitcast ptr %arrayidx to ptr 661 %2 = tail call <vscale x 1 x double> @llvm.riscv.vle.nxv1f64.i64(<vscale x 1 x double> undef, ptr %1, i64 %0) 662 %arrayidx1 = getelementptr inbounds double, ptr %b, i64 %i.014 663 %3 = bitcast ptr %arrayidx1 to ptr 664 %4 = tail call <vscale x 1 x double> @llvm.riscv.vle.nxv1f64.i64(<vscale x 1 x double> undef, ptr %3, i64 %0) 665 %5 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> undef, <vscale x 1 x double> %2, <vscale x 1 x double> %4, i64 7, i64 %0) 666 %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014 667 %6 = bitcast ptr %arrayidx2 to ptr 668 tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> %5, ptr %6, i64 %0) 669 %add = add nuw nsw i64 %i.014, %0 670 %cmp = icmp slt i64 %add, %N 671 br i1 %cmp, label %for.body, label %for.end 672 673for.end: ; preds = %for.body, %entry 674 ret void 675} 676 677; A single vector store in the loop with VL controlled by VLMAX 678define void @vector_init_vlmax(i64 %N, ptr %c) { 679; CHECK-LABEL: vector_init_vlmax: 680; CHECK: # %bb.0: # %entry 681; CHECK-NEXT: blez a0, .LBB13_3 682; CHECK-NEXT: # %bb.1: # %for.body.preheader 683; CHECK-NEXT: li a2, 0 684; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma 685; CHECK-NEXT: slli a4, a3, 3 686; CHECK-NEXT: vmv.v.i v8, 0 687; CHECK-NEXT: .LBB13_2: # %for.body 688; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 689; CHECK-NEXT: vse64.v v8, (a1) 690; CHECK-NEXT: add a2, a2, a3 691; CHECK-NEXT: add a1, a1, a4 692; CHECK-NEXT: blt a2, a0, .LBB13_2 693; CHECK-NEXT: .LBB13_3: # %for.end 694; CHECK-NEXT: ret 695entry: 696 %0 = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0) 697 %cmp13 = icmp sgt i64 %N, 0 698 br i1 %cmp13, label %for.body, label %for.end 699 700for.body: ; preds = %entry, %for.body 701 %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ] 702 %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014 703 %addr = bitcast ptr %arrayidx2 to ptr 704 tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 %0) 705 %add = add nuw nsw i64 %i.014, %0 706 %cmp = icmp slt i64 %add, %N 707 br i1 %cmp, label %for.body, label %for.end 708 709for.end: ; preds = %for.body, %entry 710 ret void 711} 712 713; Same as above, but VL comes from user provided AVL value 714define void @vector_init_vsetvli_N(i64 %N, ptr %c) { 715; CHECK-LABEL: vector_init_vsetvli_N: 716; CHECK: # %bb.0: # %entry 717; CHECK-NEXT: blez a0, .LBB14_3 718; CHECK-NEXT: # %bb.1: # %for.body.preheader 719; CHECK-NEXT: li a2, 0 720; CHECK-NEXT: vsetvli a3, a0, e64, m1, ta, ma 721; CHECK-NEXT: slli a4, a3, 3 722; CHECK-NEXT: vmv.v.i v8, 0 723; CHECK-NEXT: .LBB14_2: # %for.body 724; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 725; CHECK-NEXT: vse64.v v8, (a1) 726; CHECK-NEXT: add a2, a2, a3 727; CHECK-NEXT: add a1, a1, a4 728; CHECK-NEXT: blt a2, a0, .LBB14_2 729; CHECK-NEXT: .LBB14_3: # %for.end 730; CHECK-NEXT: ret 731entry: 732 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %N, i64 3, i64 0) 733 %cmp13 = icmp sgt i64 %N, 0 734 br i1 %cmp13, label %for.body, label %for.end 735 736for.body: ; preds = %entry, %for.body 737 %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ] 738 %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014 739 %addr = bitcast ptr %arrayidx2 to ptr 740 tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 %0) 741 %add = add nuw nsw i64 %i.014, %0 742 %cmp = icmp slt i64 %add, %N 743 br i1 %cmp, label %for.body, label %for.end 744 745for.end: ; preds = %for.body, %entry 746 ret void 747} 748 749; Same as above, but VL is a hard coded constant (in the preheader) 750define void @vector_init_vsetvli_fv(i64 %N, ptr %c) { 751; CHECK-LABEL: vector_init_vsetvli_fv: 752; CHECK: # %bb.0: # %entry 753; CHECK-NEXT: li a2, 0 754; CHECK-NEXT: vsetivli a3, 4, e64, m1, ta, ma 755; CHECK-NEXT: slli a4, a3, 3 756; CHECK-NEXT: vmv.v.i v8, 0 757; CHECK-NEXT: .LBB15_1: # %for.body 758; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 759; CHECK-NEXT: vse64.v v8, (a1) 760; CHECK-NEXT: add a2, a2, a3 761; CHECK-NEXT: add a1, a1, a4 762; CHECK-NEXT: blt a2, a0, .LBB15_1 763; CHECK-NEXT: # %bb.2: # %for.end 764; CHECK-NEXT: ret 765entry: 766 %0 = tail call i64 @llvm.riscv.vsetvli(i64 4, i64 3, i64 0) 767 br label %for.body 768 769for.body: ; preds = %entry, %for.body 770 %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ] 771 %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014 772 %addr = bitcast ptr %arrayidx2 to ptr 773 tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 %0) 774 %add = add nuw nsw i64 %i.014, %0 775 %cmp = icmp slt i64 %add, %N 776 br i1 %cmp, label %for.body, label %for.end 777 778for.end: ; preds = %for.body 779 ret void 780} 781 782; Same as above, but result of vsetvli in preheader isn't used, and 783; constant is repeated in loop 784define void @vector_init_vsetvli_fv2(i64 %N, ptr %c) { 785; CHECK-LABEL: vector_init_vsetvli_fv2: 786; CHECK: # %bb.0: # %entry 787; CHECK-NEXT: li a2, 0 788; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma 789; CHECK-NEXT: vmv.v.i v8, 0 790; CHECK-NEXT: .LBB16_1: # %for.body 791; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 792; CHECK-NEXT: vse64.v v8, (a1) 793; CHECK-NEXT: addi a2, a2, 4 794; CHECK-NEXT: addi a1, a1, 32 795; CHECK-NEXT: blt a2, a0, .LBB16_1 796; CHECK-NEXT: # %bb.2: # %for.end 797; CHECK-NEXT: ret 798entry: 799 tail call i64 @llvm.riscv.vsetvli(i64 4, i64 3, i64 0) 800 br label %for.body 801 802for.body: ; preds = %entry, %for.body 803 %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ] 804 %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014 805 %addr = bitcast ptr %arrayidx2 to ptr 806 tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 4) 807 %add = add nuw nsw i64 %i.014, 4 808 %cmp = icmp slt i64 %add, %N 809 br i1 %cmp, label %for.body, label %for.end 810 811for.end: ; preds = %for.body 812 ret void 813} 814 815; Same as above, but AVL is only specified on the store intrinsic 816; This case will require some form of hoisting or PRE 817define void @vector_init_vsetvli_fv3(i64 %N, ptr %c) { 818; CHECK-LABEL: vector_init_vsetvli_fv3: 819; CHECK: # %bb.0: # %entry 820; CHECK-NEXT: li a2, 0 821; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma 822; CHECK-NEXT: vmv.v.i v8, 0 823; CHECK-NEXT: .LBB17_1: # %for.body 824; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 825; CHECK-NEXT: vse64.v v8, (a1) 826; CHECK-NEXT: addi a2, a2, 4 827; CHECK-NEXT: addi a1, a1, 32 828; CHECK-NEXT: blt a2, a0, .LBB17_1 829; CHECK-NEXT: # %bb.2: # %for.end 830; CHECK-NEXT: ret 831entry: 832 br label %for.body 833 834for.body: ; preds = %entry, %for.body 835 %i.014 = phi i64 [ %add, %for.body ], [ 0, %entry ] 836 %arrayidx2 = getelementptr inbounds double, ptr %c, i64 %i.014 837 %addr = bitcast ptr %arrayidx2 to ptr 838 tail call void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double> zeroinitializer, ptr %addr, i64 4) 839 %add = add nuw nsw i64 %i.014, 4 840 %cmp = icmp slt i64 %add, %N 841 br i1 %cmp, label %for.body, label %for.end 842 843for.end: ; preds = %for.body 844 ret void 845} 846 847; Demonstrates a case where mutation in phase3 is problematic. We mutate the 848; vsetvli without considering that it changes the compatibility result of the 849; vadd in the second block. 850define <vscale x 4 x i32> @cross_block_mutate(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, 851; CHECK-LABEL: cross_block_mutate: 852; CHECK: # %bb.0: # %entry 853; CHECK-NEXT: vsetivli a0, 6, e32, m2, tu, ma 854; CHECK-NEXT: vmv.s.x v8, a0 855; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 856; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t 857; CHECK-NEXT: ret 858 <vscale x 4 x i1> %mask) { 859entry: 860 %vl = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 1, i64 0) 861 %vl.trunc = trunc i64 %vl to i32 862 %a.mod = insertelement <vscale x 4 x i32> %a, i32 %vl.trunc, i32 0 863 br label %fallthrough 864 865fallthrough: 866 %res = call <vscale x 4 x i32> @llvm.riscv.vadd.mask.nxv4i32.nxv4i32( 867 <vscale x 4 x i32> undef, <vscale x 4 x i32> %a.mod, 868 <vscale x 4 x i32> %b, <vscale x 4 x i1> %mask, i64 %vl, i64 0) 869 ret <vscale x 4 x i32> %res 870} 871 872define <vscale x 2 x i32> @pre_lmul(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i1 %cond) nounwind { 873; CHECK-LABEL: pre_lmul: 874; CHECK: # %bb.0: # %entry 875; CHECK-NEXT: andi a0, a0, 1 876; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma 877; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 878; CHECK-NEXT: vadd.vv v8, v8, v9 879; CHECK-NEXT: vadd.vv v8, v8, v9 880; CHECK-NEXT: ret 881entry: 882 %vl = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0) 883 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %vl) 884 br i1 %cond, label %if, label %if.end 885 886if: 887 ; Deliberately change vtype - this could be an unknown call, but the broader 888 ; code quality is distractingly bad 889 tail call i64 @llvm.riscv.vsetvlimax.i64(i64 2, i64 1) 890 br label %if.end 891 892if.end: 893 %b = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i32> %y, i64 %vl) 894 ret <vscale x 2 x i32> %b 895} 896 897define <vscale x 1 x double> @compat_store_consistency(i1 %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b, ptr %p1, <vscale x 1 x float> %c, ptr %p2) { 898; CHECK-LABEL: compat_store_consistency: 899; CHECK: # %bb.0: # %entry 900; CHECK-NEXT: andi a0, a0, 1 901; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma 902; CHECK-NEXT: vfadd.vv v8, v8, v9 903; CHECK-NEXT: vs1r.v v8, (a1) 904; CHECK-NEXT: beqz a0, .LBB20_2 905; CHECK-NEXT: # %bb.1: # %if.then 906; CHECK-NEXT: vse32.v v10, (a2) 907; CHECK-NEXT: .LBB20_2: # %if.end 908; CHECK-NEXT: ret 909entry: 910 %res = fadd <vscale x 1 x double> %a, %b 911 store <vscale x 1 x double> %res, ptr %p1 912 br i1 %cond, label %if.then, label %if.end 913 914if.then: ; preds = %entry 915 store <vscale x 1 x float> %c, ptr %p2 916 br label %if.end 917 918if.end: ; preds = %if.else, %if.then 919 ret <vscale x 1 x double> %res 920} 921 922; Next two tests (which are the same except for swapped block order), make sure that the 923; demanded reasoning around vmv.s.x correctly handles a forward state with only a valid 924; SEWLMULRatio. We previously had a crash bug in this case. 925define <vscale x 2 x i32> @test_ratio_only_vmv_s_x(ptr %x, ptr %y, i1 %cond) nounwind { 926; CHECK-LABEL: test_ratio_only_vmv_s_x: 927; CHECK: # %bb.0: # %entry 928; CHECK-NEXT: andi a2, a2, 1 929; CHECK-NEXT: beqz a2, .LBB21_2 930; CHECK-NEXT: # %bb.1: # %if 931; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 932; CHECK-NEXT: vle16.v v9, (a1) 933; CHECK-NEXT: vwcvt.x.x.v v8, v9 934; CHECK-NEXT: j .LBB21_3 935; CHECK-NEXT: .LBB21_2: 936; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma 937; CHECK-NEXT: vle32.v v8, (a0) 938; CHECK-NEXT: .LBB21_3: # %if.end 939; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma 940; CHECK-NEXT: vmv.s.x v8, zero 941; CHECK-NEXT: ret 942entry: 943 %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, ptr %x, i64 2) 944 br i1 %cond, label %if, label %if.end 945 946if: 947 %b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %y, i64 2) 948 %c = call <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i16> %b, i16 0, i64 2) 949 br label %if.end 950 951if.end: 952 %d = phi <vscale x 2 x i32> [ %a, %entry ], [ %c, %if ] 953 %e = insertelement <vscale x 2 x i32> %d, i32 0, i32 0 954 ret <vscale x 2 x i32> %e 955} 956 957define <vscale x 2 x i32> @test_ratio_only_vmv_s_x2(ptr %x, ptr %y, i1 %cond) nounwind { 958; CHECK-LABEL: test_ratio_only_vmv_s_x2: 959; CHECK: # %bb.0: # %entry 960; CHECK-NEXT: andi a2, a2, 1 961; CHECK-NEXT: beqz a2, .LBB22_2 962; CHECK-NEXT: # %bb.1: # %if 963; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma 964; CHECK-NEXT: vle32.v v8, (a0) 965; CHECK-NEXT: j .LBB22_3 966; CHECK-NEXT: .LBB22_2: 967; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 968; CHECK-NEXT: vle16.v v9, (a1) 969; CHECK-NEXT: vwcvt.x.x.v v8, v9 970; CHECK-NEXT: .LBB22_3: # %if.end 971; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma 972; CHECK-NEXT: vmv.s.x v8, zero 973; CHECK-NEXT: ret 974entry: 975 %b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, ptr %y, i64 2) 976 %c = call <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i16> %b, i16 0, i64 2) 977 br i1 %cond, label %if, label %if.end 978 979if: 980 %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, ptr %x, i64 2) 981 br label %if.end 982 983if.end: 984 %d = phi <vscale x 2 x i32> [ %a, %if ], [ %c, %entry ] 985 %e = insertelement <vscale x 2 x i32> %d, i32 0, i32 0 986 ret <vscale x 2 x i32> %e 987} 988 989; This case demonstrates a PRE case where the first instruction in the block 990; doesn't require a state transition. 991define void @pre_over_vle(ptr %A) { 992; CHECK-LABEL: pre_over_vle: 993; CHECK: # %bb.0: # %entry 994; CHECK-NEXT: addi a1, a0, 800 995; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 996; CHECK-NEXT: .LBB23_1: # %vector.body 997; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 998; CHECK-NEXT: vle8.v v8, (a0) 999; CHECK-NEXT: vsext.vf4 v9, v8 1000; CHECK-NEXT: vse32.v v9, (a0) 1001; CHECK-NEXT: addi a0, a0, 8 1002; CHECK-NEXT: bne a0, a1, .LBB23_1 1003; CHECK-NEXT: # %bb.2: # %exit 1004; CHECK-NEXT: ret 1005entry: 1006 br label %vector.body 1007 1008vector.body: 1009 %iv = phi i64 [ 0, %entry], [%iv.next, %vector.body] 1010 %addr = getelementptr inbounds <2 x i32>, ptr %A, i64 %iv 1011 %v = load <2 x i8>, ptr %addr 1012 %v2 = sext <2 x i8> %v to <2 x i32> 1013 store <2 x i32> %v2, ptr %addr 1014 %iv.next = add i64 %iv, 1 1015 %cmp = icmp ne i64 %iv.next, 100 1016 br i1 %cmp, label %vector.body, label %exit 1017exit: 1018 ret void 1019} 1020 1021declare i64 @llvm.riscv.vsetvlimax.i64(i64, i64) 1022declare <vscale x 1 x double> @llvm.riscv.vle.nxv1f64.i64(<vscale x 1 x double>, ptr nocapture, i64) 1023declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, i64, i64) 1024declare void @llvm.riscv.vse.nxv1f64.i64(<vscale x 1 x double>, ptr nocapture, i64) 1025declare <vscale x 4 x i32> @llvm.riscv.vadd.mask.nxv4i32.nxv4i32( 1026 <vscale x 4 x i32>, 1027 <vscale x 4 x i32>, 1028 <vscale x 4 x i32>, 1029 <vscale x 4 x i1>, 1030 i64, 1031 i64); 1032 1033; Normally a pseudo's AVL is already live in its block, so it will already be 1034; live where we're inserting the vsetvli, before the pseudo. In some cases the 1035; AVL can be from a predecessor block, so make sure we extend its live range 1036; across blocks. 1037define <vscale x 2 x i32> @cross_block_avl_extend(i64 %avl, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b) { 1038; CHECK-LABEL: cross_block_avl_extend: 1039; CHECK: # %bb.0: # %entry 1040; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1041; CHECK-NEXT: vadd.vv v9, v8, v9 1042; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1043; CHECK-NEXT: vadd.vv v8, v8, v9 1044; CHECK-NEXT: ret 1045entry: 1046 ; Get the output vl from a vsetvli 1047 %vl = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 0) 1048 ; Force a vsetvli toggle so we need to insert a new vsetvli in exit 1049 %d = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i32> %b, i64 1) 1050 br label %exit 1051exit: 1052 ; The use of the vl from the vsetvli will be replaced with its %avl because 1053 ; VLMAX is the same. So %avl, which was previously only live in %entry, will 1054 ; need to be extended down toe %exit. 1055 %c = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %a, <vscale x 2 x i32> %d, i64 %vl) 1056 ret <vscale x 2 x i32> %c 1057} 1058 1059define void @cross_block_avl_extend_backwards(i1 %cond, <vscale x 8 x i8> %v, ptr %p, i64 %avl) { 1060; CHECK-LABEL: cross_block_avl_extend_backwards: 1061; CHECK: # %bb.0: # %entry 1062; CHECK-NEXT: andi a0, a0, 1 1063; CHECK-NEXT: beqz a0, .LBB25_2 1064; CHECK-NEXT: # %bb.1: # %exit 1065; CHECK-NEXT: ret 1066; CHECK-NEXT: .LBB25_2: # %bar 1067; CHECK-NEXT: addi a2, a2, 1 1068; CHECK-NEXT: .LBB25_3: # %foo 1069; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1070; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1071; CHECK-NEXT: vse8.v v8, (a1) 1072; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma 1073; CHECK-NEXT: vse8.v v8, (a1) 1074; CHECK-NEXT: j .LBB25_3 1075entry: 1076 br i1 %cond, label %exit, label %bar 1077foo: 1078 ; Force a vl toggle 1079 call void @llvm.riscv.vse.nxv8i8.i64(<vscale x 8 x i8> %v, ptr %p, i64 1) 1080 ; %add's LiveRange needs to be extended backwards to here. 1081 call void @llvm.riscv.vse.nxv8i8.i64(<vscale x 8 x i8> %v, ptr %p, i64 %add) 1082 br label %foo 1083exit: 1084 ret void 1085bar: 1086 %add = add i64 %avl, 1 1087 br label %foo 1088} 1089 1090define void @vlmax_avl_phi(i1 %cmp, ptr %p, i64 %a, i64 %b) { 1091; CHECK-LABEL: vlmax_avl_phi: 1092; CHECK: # %bb.0: # %entry 1093; CHECK-NEXT: andi a0, a0, 1 1094; CHECK-NEXT: beqz a0, .LBB26_2 1095; CHECK-NEXT: # %bb.1: # %foo 1096; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma 1097; CHECK-NEXT: j .LBB26_3 1098; CHECK-NEXT: .LBB26_2: # %bar 1099; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma 1100; CHECK-NEXT: .LBB26_3: # %exit 1101; CHECK-NEXT: vmv.v.i v8, 0 1102; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1103; CHECK-NEXT: vse8.v v8, (a1) 1104; CHECK-NEXT: ret 1105entry: 1106 br i1 %cmp, label %foo, label %bar 1107 1108foo: 1109 %vl.foo = tail call i64 @llvm.riscv.vsetvli.i64(i64 %a, i64 0, i64 0) 1110 br label %exit 1111 1112bar: 1113 %vl.bar = tail call i64 @llvm.riscv.vsetvli.i64(i64 %b, i64 0, i64 0) 1114 br label %exit 1115 1116exit: 1117 %phivl = phi i64 [ %vl.foo, %foo ], [ %vl.bar, %bar ] 1118 %1 = tail call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> poison, i8 0, i64 %phivl) 1119 call void @llvm.riscv.vse.nxv8i8(<vscale x 8 x i8> %1, ptr %p, i64 1) 1120 ret void 1121} 1122 1123; Check that if we forward an AVL whose value is clobbered in its LiveInterval 1124; we emit a copy instead. 1125define <vscale x 4 x i32> @clobbered_forwarded_avl(i64 %n, <vscale x 4 x i32> %v, i1 %cmp) { 1126; CHECK-LABEL: clobbered_forwarded_avl: 1127; CHECK: # %bb.0: # %entry 1128; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1129; CHECK-NEXT: andi a1, a1, 1 1130; CHECK-NEXT: .LBB27_1: # %for.body 1131; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1132; CHECK-NEXT: addi a0, a0, 1 1133; CHECK-NEXT: bnez a1, .LBB27_1 1134; CHECK-NEXT: # %bb.2: # %for.cond.cleanup 1135; CHECK-NEXT: vadd.vv v10, v8, v8 1136; CHECK-NEXT: vadd.vv v8, v10, v8 1137; CHECK-NEXT: ret 1138entry: 1139 %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 1) 1140 br label %for.body 1141 1142for.body: 1143 ; Use %n in a PHI here so its virtual register is assigned to a second time here. 1144 %1 = phi i64 [ %3, %for.body ], [ %n, %entry ] 1145 %2 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %1, i64 0, i64 0) 1146 %3 = add i64 %1, 1 1147 br i1 %cmp, label %for.body, label %for.cond.cleanup 1148 1149for.cond.cleanup: 1150 %4 = tail call <vscale x 4 x i32> @llvm.riscv.vadd.nxv2f32.nxv2f32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32> %v, <vscale x 4 x i32> %v, i64 -1) 1151 ; VL toggle needed here: If the %n AVL was forwarded here we wouldn't be able 1152 ; to extend it's LiveInterval because it would clobber the assignment at %1. 1153 %5 = tail call <vscale x 4 x i32> @llvm.riscv.vadd.nxv2f32.nxv2f32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32> %4, <vscale x 4 x i32> %v, i64 %0) 1154 ret <vscale x 4 x i32> %5 1155} 1156