1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+v \ 3; RUN: -target-abi=lp64d -verify-machineinstrs -O2 < %s | FileCheck %s 4 5declare i64 @llvm.riscv.vsetvli(i64, i64, i64) 6declare i64 @llvm.riscv.vsetvlimax(i64, i64) 7declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64( 8 <vscale x 1 x double>, 9 <vscale x 1 x double>, 10 <vscale x 1 x double>, 11 i64, i64) 12declare <vscale x 1 x i64> @llvm.riscv.vle.mask.nxv1i64( 13 <vscale x 1 x i64>, 14 ptr, 15 <vscale x 1 x i1>, 16 i64, i64) 17 18define <vscale x 1 x double> @test1(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind { 19; CHECK-LABEL: test1: 20; CHECK: # %bb.0: # %entry 21; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 22; CHECK-NEXT: vfadd.vv v8, v8, v9 23; CHECK-NEXT: ret 24entry: 25 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 7) 26 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64( 27 <vscale x 1 x double> undef, 28 <vscale x 1 x double> %a, 29 <vscale x 1 x double> %b, 30 i64 7, i64 %0) 31 ret <vscale x 1 x double> %1 32} 33 34define <vscale x 1 x double> @test2(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind { 35; CHECK-LABEL: test2: 36; CHECK: # %bb.0: # %entry 37; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 38; CHECK-NEXT: vfadd.vv v8, v8, v9 39; CHECK-NEXT: ret 40entry: 41 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 7) 42 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64( 43 <vscale x 1 x double> undef, 44 <vscale x 1 x double> %a, 45 <vscale x 1 x double> %b, 46 i64 7, i64 %avl) 47 ret <vscale x 1 x double> %1 48} 49 50define <vscale x 1 x i64> @test3(i64 %avl, <vscale x 1 x i64> %a, ptr %b, <vscale x 1 x i1> %c) nounwind { 51; CHECK-LABEL: test3: 52; CHECK: # %bb.0: # %entry 53; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu 54; CHECK-NEXT: vle64.v v8, (a1), v0.t 55; CHECK-NEXT: ret 56entry: 57 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) 58 %1 = call <vscale x 1 x i64> @llvm.riscv.vle.mask.nxv1i64( 59 <vscale x 1 x i64> %a, 60 ptr %b, 61 <vscale x 1 x i1> %c, 62 i64 %0, i64 1) 63 64 ret <vscale x 1 x i64> %1 65} 66 67define <vscale x 1 x i64> @test4(i64 %avl, <vscale x 1 x i64> %a, ptr %b, <vscale x 1 x i1> %c) nounwind { 68; CHECK-LABEL: test4: 69; CHECK: # %bb.0: # %entry 70; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu 71; CHECK-NEXT: vle64.v v8, (a1), v0.t 72; CHECK-NEXT: ret 73entry: 74 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) 75 %1 = call <vscale x 1 x i64> @llvm.riscv.vle.mask.nxv1i64( 76 <vscale x 1 x i64> %a, 77 ptr %b, 78 <vscale x 1 x i1> %c, 79 i64 %avl, i64 1) 80 81 ret <vscale x 1 x i64> %1 82} 83 84; Make sure we don't insert a vsetvli for the vmand instruction. 85define <vscale x 1 x i1> @test5(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i64 %avl) nounwind { 86; CHECK-LABEL: test5: 87; CHECK: # %bb.0: # %entry 88; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 89; CHECK-NEXT: vmseq.vv v8, v8, v9 90; CHECK-NEXT: vmand.mm v0, v8, v0 91; CHECK-NEXT: ret 92entry: 93 %vl = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) 94 %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i64.i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %vl) 95 %b = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> %a, <vscale x 1 x i1> %2, i64 %vl) 96 ret <vscale x 1 x i1> %b 97} 98declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>, i64) 99declare <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1>, <vscale x 1 x i1>, i64) 100 101; Make sure we don't insert a vsetvli for the vmor instruction. 102define void @test6(ptr nocapture readonly %A, ptr nocapture %B, i64 %n) { 103; CHECK-LABEL: test6: 104; CHECK: # %bb.0: # %entry 105; CHECK-NEXT: vsetvli a2, a2, e32, m1, ta, ma 106; CHECK-NEXT: beqz a2, .LBB5_3 107; CHECK-NEXT: # %bb.1: # %for.body.preheader 108; CHECK-NEXT: li a3, 0 109; CHECK-NEXT: .LBB5_2: # %for.body 110; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 111; CHECK-NEXT: slli a4, a3, 2 112; CHECK-NEXT: add a5, a0, a4 113; CHECK-NEXT: vle32.v v8, (a5) 114; CHECK-NEXT: add a3, a3, a2 115; CHECK-NEXT: vmsle.vi v9, v8, -3 116; CHECK-NEXT: vmsgt.vi v10, v8, 2 117; CHECK-NEXT: vmor.mm v0, v9, v10 118; CHECK-NEXT: add a4, a4, a1 119; CHECK-NEXT: vse32.v v8, (a4), v0.t 120; CHECK-NEXT: vsetvli a2, a2, e32, m1, ta, ma 121; CHECK-NEXT: bnez a2, .LBB5_2 122; CHECK-NEXT: .LBB5_3: # %for.cond.cleanup 123; CHECK-NEXT: ret 124entry: 125 %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 0) 126 %cmp.not11 = icmp eq i64 %0, 0 127 br i1 %cmp.not11, label %for.cond.cleanup, label %for.body 128 129for.cond.cleanup: ; preds = %for.body, %entry 130 ret void 131 132for.body: ; preds = %entry, %for.body 133 %1 = phi i64 [ %8, %for.body ], [ %0, %entry ] 134 %i.012 = phi i64 [ %add, %for.body ], [ 0, %entry ] 135 %add.ptr = getelementptr inbounds i32, ptr %A, i64 %i.012 136 %2 = bitcast ptr %add.ptr to ptr 137 %3 = tail call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.i64(<vscale x 2 x i32> undef, ptr %2, i64 %1) 138 %4 = tail call <vscale x 2 x i1> @llvm.riscv.vmslt.nxv2i32.i32.i64(<vscale x 2 x i32> %3, i32 -2, i64 %1) 139 %5 = tail call <vscale x 2 x i1> @llvm.riscv.vmsgt.nxv2i32.i32.i64(<vscale x 2 x i32> %3, i32 2, i64 %1) 140 %6 = tail call <vscale x 2 x i1> @llvm.riscv.vmor.nxv2i1.i64(<vscale x 2 x i1> %4, <vscale x 2 x i1> %5, i64 %1) 141 %add.ptr1 = getelementptr inbounds i32, ptr %B, i64 %i.012 142 %7 = bitcast ptr %add.ptr1 to ptr 143 tail call void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32> %3, ptr %7, <vscale x 2 x i1> %6, i64 %1) 144 %add = add i64 %1, %i.012 145 %8 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %1, i64 2, i64 0) 146 %cmp.not = icmp eq i64 %8, 0 147 br i1 %cmp.not, label %for.cond.cleanup, label %for.body 148} 149 150define <vscale x 1 x i64> @test7(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i1> %mask) nounwind { 151; CHECK-LABEL: test7: 152; CHECK: # %bb.0: # %entry 153; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, ma 154; CHECK-NEXT: vmv.s.x v8, a0 155; CHECK-NEXT: ret 156entry: 157 %x = tail call i64 @llvm.riscv.vsetvlimax(i64 3, i64 0) 158 %y = call <vscale x 1 x i64> @llvm.riscv.vmv.s.x.nxv1i64( 159 <vscale x 1 x i64> %a, 160 i64 %b, i64 1) 161 162 ret <vscale x 1 x i64> %y 163} 164 165define <vscale x 1 x i64> @test8(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i1> %mask) nounwind { 166; CHECK-LABEL: test8: 167; CHECK: # %bb.0: # %entry 168; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, ma 169; CHECK-NEXT: vmv.s.x v8, a0 170; CHECK-NEXT: ret 171entry: 172 %x = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 3, i64 0) 173 %y = call <vscale x 1 x i64> @llvm.riscv.vmv.s.x.nxv1i64(<vscale x 1 x i64> %a, i64 %b, i64 2) 174 ret <vscale x 1 x i64> %y 175} 176 177define <vscale x 1 x i64> @test9(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i1> %mask) nounwind { 178; CHECK-LABEL: test9: 179; CHECK: # %bb.0: # %entry 180; CHECK-NEXT: vsetivli zero, 9, e64, m1, tu, mu 181; CHECK-NEXT: vadd.vv v8, v8, v8, v0.t 182; CHECK-NEXT: vmv.s.x v8, a0 183; CHECK-NEXT: ret 184entry: 185 %x = call <vscale x 1 x i64> @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( 186 <vscale x 1 x i64> %a, 187 <vscale x 1 x i64> %a, 188 <vscale x 1 x i64> %a, 189 <vscale x 1 x i1> %mask, 190 i64 9, 191 i64 0) 192 %y = call <vscale x 1 x i64> @llvm.riscv.vmv.s.x.nxv1i64(<vscale x 1 x i64> %x, i64 %b, i64 2) 193 ret <vscale x 1 x i64> %y 194} 195 196define <vscale x 1 x double> @test10(<vscale x 1 x double> %a, double %b) nounwind { 197; CHECK-LABEL: test10: 198; CHECK: # %bb.0: # %entry 199; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, ma 200; CHECK-NEXT: vfmv.s.f v8, fa0 201; CHECK-NEXT: ret 202entry: 203 %x = tail call i64 @llvm.riscv.vsetvlimax(i64 3, i64 0) 204 %y = call <vscale x 1 x double> @llvm.riscv.vfmv.s.f.nxv1f64( 205 <vscale x 1 x double> %a, double %b, i64 1) 206 ret <vscale x 1 x double> %y 207} 208 209define <vscale x 1 x double> @test11(<vscale x 1 x double> %a, double %b) nounwind { 210; CHECK-LABEL: test11: 211; CHECK: # %bb.0: # %entry 212; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, ma 213; CHECK-NEXT: vfmv.s.f v8, fa0 214; CHECK-NEXT: ret 215entry: 216 %x = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 3, i64 0) 217 %y = call <vscale x 1 x double> @llvm.riscv.vfmv.s.f.nxv1f64( 218 <vscale x 1 x double> %a, double %b, i64 2) 219 ret <vscale x 1 x double> %y 220} 221 222define <vscale x 1 x double> @test12(<vscale x 1 x double> %a, double %b, <vscale x 1 x i1> %mask) nounwind { 223; CHECK-LABEL: test12: 224; CHECK: # %bb.0: # %entry 225; CHECK-NEXT: vsetivli zero, 9, e64, m1, tu, mu 226; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t 227; CHECK-NEXT: vfmv.s.f v8, fa0 228; CHECK-NEXT: ret 229entry: 230 %x = call <vscale x 1 x double> @llvm.riscv.vfadd.mask.nxv1f64.f64( 231 <vscale x 1 x double> %a, 232 <vscale x 1 x double> %a, 233 <vscale x 1 x double> %a, 234 <vscale x 1 x i1> %mask, 235 i64 7, 236 i64 9, 237 i64 0) 238 %y = call <vscale x 1 x double> @llvm.riscv.vfmv.s.f.nxv1f64( 239 <vscale x 1 x double> %x, double %b, i64 2) 240 ret <vscale x 1 x double> %y 241} 242 243define <vscale x 1 x double> @test13(<vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind { 244; CHECK-LABEL: test13: 245; CHECK: # %bb.0: # %entry 246; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 247; CHECK-NEXT: vfadd.vv v8, v8, v9 248; CHECK-NEXT: ret 249entry: 250 %0 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64( 251 <vscale x 1 x double> undef, 252 <vscale x 1 x double> %a, 253 <vscale x 1 x double> %b, 254 i64 7, i64 -1) 255 ret <vscale x 1 x double> %0 256} 257 258define <vscale x 1 x double> @test14(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind { 259; CHECK-LABEL: test14: 260; CHECK: # %bb.0: # %entry 261; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma 262; CHECK-NEXT: vfadd.vv v8, v8, v9 263; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 264; CHECK-NEXT: vfadd.vv v8, v8, v9 265; CHECK-NEXT: ret 266entry: 267 %vsetvli = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 7) 268 %f1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64( 269 <vscale x 1 x double> undef, 270 <vscale x 1 x double> %a, 271 <vscale x 1 x double> %b, 272 i64 7, i64 1) 273 %f2 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64( 274 <vscale x 1 x double> undef, 275 <vscale x 1 x double> %f1, 276 <vscale x 1 x double> %b, 277 i64 7, i64 %vsetvli) 278 ret <vscale x 1 x double> %f2 279} 280 281define <vscale x 1 x double> @test15(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind { 282; CHECK-LABEL: test15: 283; CHECK: # %bb.0: # %entry 284; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 285; CHECK-NEXT: vfadd.vv v8, v8, v9 286; CHECK-NEXT: vfadd.vv v8, v8, v9 287; CHECK-NEXT: ret 288entry: 289 %vsetvli = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 7) 290 %f1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64( 291 <vscale x 1 x double> undef, 292 <vscale x 1 x double> %a, 293 <vscale x 1 x double> %b, 294 i64 7, i64 %avl) 295 %f2 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64( 296 <vscale x 1 x double> undef, 297 <vscale x 1 x double> %f1, 298 <vscale x 1 x double> %b, 299 i64 7, i64 %vsetvli) 300 ret <vscale x 1 x double> %f2 301} 302 303 304@gdouble = external global double 305 306define <vscale x 1 x double> @test16(i64 %avl, double %a, <vscale x 1 x double> %b) nounwind { 307; CHECK-LABEL: test16: 308; CHECK: # %bb.0: # %entry 309; CHECK-NEXT: vsetvli a0, a0, e64, mf2, ta, ma 310; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma 311; CHECK-NEXT: vfmv.v.f v9, fa0 312; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 313; CHECK-NEXT: vfadd.vv v8, v9, v8 314; CHECK-NEXT: ret 315entry: 316 %vsetvli = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 7) 317 318 %head = insertelement <vscale x 1 x double> poison, double %a, i32 0 319 %splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer 320 %f2 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64( 321 <vscale x 1 x double> undef, 322 <vscale x 1 x double> %splat, 323 <vscale x 1 x double> %b, 324 i64 7, i64 %vsetvli) 325 ret <vscale x 1 x double> %f2 326} 327 328define double @test17(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind { 329; CHECK-LABEL: test17: 330; CHECK: # %bb.0: # %entry 331; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 332; CHECK-NEXT: vfmv.f.s fa5, v8 333; CHECK-NEXT: vfadd.vv v8, v8, v9 334; CHECK-NEXT: vfmv.f.s fa4, v8 335; CHECK-NEXT: fadd.d fa0, fa5, fa4 336; CHECK-NEXT: ret 337entry: 338 %vsetvli = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 7) 339 %c1 = extractelement <vscale x 1 x double> %a, i32 0 340 %f2 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64( 341 <vscale x 1 x double> undef, 342 <vscale x 1 x double> %a, 343 <vscale x 1 x double> %b, 344 i64 7, i64 %vsetvli) 345 %c2 = extractelement <vscale x 1 x double> %f2, i32 0 346 %c3 = fadd double %c1, %c2 347 ret double %c3 348} 349 350 351define <vscale x 1 x double> @test18(<vscale x 1 x double> %a, double %b) nounwind { 352; CHECK-LABEL: test18: 353; CHECK: # %bb.0: # %entry 354; CHECK-NEXT: vsetivli zero, 6, e64, m1, ta, ma 355; CHECK-NEXT: vfadd.vv v9, v8, v8 356; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma 357; CHECK-NEXT: vfmv.s.f v8, fa0 358; CHECK-NEXT: vfmv.s.f v9, fa0 359; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 360; CHECK-NEXT: vfadd.vv v8, v8, v9 361; CHECK-NEXT: ret 362entry: 363 %x = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 3, i64 0) 364 %y = call <vscale x 1 x double> @llvm.riscv.vfmv.s.f.nxv1f64( 365 <vscale x 1 x double> %a, double %b, i64 2) 366 %f2 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64( 367 <vscale x 1 x double> undef, 368 <vscale x 1 x double> %a, 369 <vscale x 1 x double> %a, 370 i64 7, i64 %x) 371 %y2 = call <vscale x 1 x double> @llvm.riscv.vfmv.s.f.nxv1f64( 372 <vscale x 1 x double> %f2, double %b, i64 1) 373 %res = fadd <vscale x 1 x double> %y, %y2 374 ret <vscale x 1 x double> %res 375} 376 377define <vscale x 1 x double> @test19(<vscale x 1 x double> %a, double %b) nounwind { 378; CHECK-LABEL: test19: 379; CHECK: # %bb.0: # %entry 380; CHECK-NEXT: vsetivli zero, 1, e64, m8, tu, ma 381; CHECK-NEXT: vmv1r.v v9, v8 382; CHECK-NEXT: vfmv.s.f v9, fa0 383; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 384; CHECK-NEXT: vfadd.vv v8, v9, v8 385; CHECK-NEXT: ret 386entry: 387 %x = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 3, i64 0) 388 %y = call <vscale x 1 x double> @llvm.riscv.vfmv.s.f.nxv1f64( 389 <vscale x 1 x double> %a, double %b, i64 2) 390 %y2 = fadd <vscale x 1 x double> %y, %a 391 ret <vscale x 1 x double> %y2 392} 393 394define i64 @avl_forward1(<vscale x 2 x i32> %v, ptr %p) nounwind { 395; CHECK-LABEL: avl_forward1: 396; CHECK: # %bb.0: # %entry 397; CHECK-NEXT: vsetivli a1, 6, e32, m1, ta, ma 398; CHECK-NEXT: vse32.v v8, (a0) 399; CHECK-NEXT: mv a0, a1 400; CHECK-NEXT: ret 401entry: 402 %vl = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 2, i64 0) 403 call void @llvm.riscv.vse.nxv2i32.i64(<vscale x 2 x i32> %v, ptr %p, i64 %vl) 404 ret i64 %vl 405} 406 407; Incompatible vtype 408define i64 @avl_forward1b_neg(<vscale x 2 x i32> %v, ptr %p) nounwind { 409; CHECK-LABEL: avl_forward1b_neg: 410; CHECK: # %bb.0: # %entry 411; CHECK-NEXT: vsetivli a1, 6, e16, m1, ta, ma 412; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 413; CHECK-NEXT: vse32.v v8, (a0) 414; CHECK-NEXT: mv a0, a1 415; CHECK-NEXT: ret 416entry: 417 %vl = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 1, i64 0) 418 call void @llvm.riscv.vse.nxv2i32.i64(<vscale x 2 x i32> %v, ptr %p, i64 %vl) 419 ret i64 %vl 420} 421 422define i64 @avl_forward2(<vscale x 2 x i32> %v, ptr %p) nounwind { 423; CHECK-LABEL: avl_forward2: 424; CHECK: # %bb.0: # %entry 425; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma 426; CHECK-NEXT: vse32.v v8, (a0) 427; CHECK-NEXT: mv a0, a1 428; CHECK-NEXT: ret 429entry: 430 %vl = tail call i64 @llvm.riscv.vsetvlimax(i64 2, i64 0) 431 call void @llvm.riscv.vse.nxv2i32.i64(<vscale x 2 x i32> %v, ptr %p, i64 %vl) 432 ret i64 %vl 433} 434 435 436; %vl is intentionally used only once 437define void @avl_forward3(<vscale x 2 x i32> %v, ptr %p, i64 %reg) nounwind { 438; CHECK-LABEL: avl_forward3: 439; CHECK: # %bb.0: # %entry 440; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 441; CHECK-NEXT: vse32.v v8, (a0) 442; CHECK-NEXT: ret 443entry: 444 %vl = tail call i64 @llvm.riscv.vsetvli(i64 %reg, i64 2, i64 0) 445 call void @llvm.riscv.vse.nxv2i32.i64(<vscale x 2 x i32> %v, ptr %p, i64 %vl) 446 ret void 447} 448 449; %vl has multiple uses 450define i64 @avl_forward3b(<vscale x 2 x i32> %v, ptr %p, i64 %reg) nounwind { 451; CHECK-LABEL: avl_forward3b: 452; CHECK: # %bb.0: # %entry 453; CHECK-NEXT: vsetvli a1, a1, e32, m1, ta, ma 454; CHECK-NEXT: vse32.v v8, (a0) 455; CHECK-NEXT: mv a0, a1 456; CHECK-NEXT: ret 457entry: 458 %vl = tail call i64 @llvm.riscv.vsetvli(i64 %reg, i64 2, i64 0) 459 call void @llvm.riscv.vse.nxv2i32.i64(<vscale x 2 x i32> %v, ptr %p, i64 %vl) 460 ret i64 %vl 461} 462 463; Like4, but with incompatible VTYPE 464define void @avl_forward4(<vscale x 2 x i32> %v, ptr %p, i64 %reg) nounwind { 465; CHECK-LABEL: avl_forward4: 466; CHECK: # %bb.0: # %entry 467; CHECK-NEXT: vsetvli a1, a1, e16, m1, ta, ma 468; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 469; CHECK-NEXT: vse32.v v8, (a0) 470; CHECK-NEXT: ret 471entry: 472 %vl = tail call i64 @llvm.riscv.vsetvli(i64 %reg, i64 1, i64 0) 473 call void @llvm.riscv.vse.nxv2i32.i64(<vscale x 2 x i32> %v, ptr %p, i64 %vl) 474 ret void 475} 476 477; Like4b, but with incompatible VTYPE 478define i64 @avl_forward4b(<vscale x 2 x i32> %v, ptr %p, i64 %reg) nounwind { 479; CHECK-LABEL: avl_forward4b: 480; CHECK: # %bb.0: # %entry 481; CHECK-NEXT: vsetvli a1, a1, e16, m1, ta, ma 482; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 483; CHECK-NEXT: vse32.v v8, (a0) 484; CHECK-NEXT: mv a0, a1 485; CHECK-NEXT: ret 486entry: 487 %vl = tail call i64 @llvm.riscv.vsetvli(i64 %reg, i64 1, i64 0) 488 call void @llvm.riscv.vse.nxv2i32.i64(<vscale x 2 x i32> %v, ptr %p, i64 %vl) 489 ret i64 %vl 490} 491 492; Fault first loads can modify VL. 493; TODO: The VSETVLI of vadd could be removed here. 494define <vscale x 1 x i64> @vleNff(ptr %str, i64 %n, i64 %x) { 495; CHECK-LABEL: vleNff: 496; CHECK: # %bb.0: # %entry 497; CHECK-NEXT: vsetvli a1, a1, e8, m4, ta, ma 498; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma 499; CHECK-NEXT: vle64ff.v v8, (a0) 500; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma 501; CHECK-NEXT: vadd.vx v8, v8, a2 502; CHECK-NEXT: ret 503entry: 504 %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 0, i64 2) 505 %1 = bitcast ptr %str to ptr 506 %2 = tail call { <vscale x 1 x i64>, i64 } @llvm.riscv.vleff.nxv1i64.i64(<vscale x 1 x i64> undef, ptr %1, i64 %0) 507 %3 = extractvalue { <vscale x 1 x i64>, i64 } %2, 0 508 %4 = extractvalue { <vscale x 1 x i64>, i64 } %2, 1 509 %5 = tail call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.i64.i64(<vscale x 1 x i64> %3, <vscale x 1 x i64> %3, i64 %x, i64 %4) 510 ret <vscale x 1 x i64> %5 511} 512 513; Similiar test case, but use same policy for vleff and vadd. 514; Note: The test may be redundant if we could fix the TODO of @vleNff. 515define <vscale x 1 x i64> @vleNff2(ptr %str, i64 %n, i64 %x) { 516; CHECK-LABEL: vleNff2: 517; CHECK: # %bb.0: # %entry 518; CHECK-NEXT: vsetvli a1, a1, e8, m4, ta, ma 519; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma 520; CHECK-NEXT: vle64ff.v v8, (a0) 521; CHECK-NEXT: vadd.vx v8, v8, a2 522; CHECK-NEXT: ret 523entry: 524 %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 0, i64 2) 525 %1 = bitcast ptr %str to ptr 526 %2 = tail call { <vscale x 1 x i64>, i64 } @llvm.riscv.vleff.nxv1i64.i64(<vscale x 1 x i64> undef, ptr %1, i64 %0) 527 %3 = extractvalue { <vscale x 1 x i64>, i64 } %2, 0 528 %4 = extractvalue { <vscale x 1 x i64>, i64 } %2, 1 529 %5 = tail call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %3, i64 %x, i64 %4) 530 ret <vscale x 1 x i64> %5 531} 532 533declare { <vscale x 1 x i64>, i64 } @llvm.riscv.vleff.nxv1i64.i64( 534 <vscale x 1 x i64>, ptr nocapture, i64) 535 536declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i64.i64.i64( 537 <vscale x 1 x i64>, i64, i64) 538 539; Ensure AVL register is alive when forwarding an AVL immediate that does not fit in 5 bits 540define <vscale x 2 x i32> @avl_forward5(ptr %addr) { 541; CHECK-LABEL: avl_forward5: 542; CHECK: # %bb.0: 543; CHECK-NEXT: li a1, 32 544; CHECK-NEXT: vsetvli a1, a1, e8, m4, ta, ma 545; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma 546; CHECK-NEXT: vle32.v v8, (a0) 547; CHECK-NEXT: ret 548 %gvl = tail call i64 @llvm.riscv.vsetvli.i64(i64 32, i64 0, i64 2) 549 %ret = tail call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.i64(<vscale x 2 x i32> undef, ptr %addr, i64 %gvl) 550 ret <vscale x 2 x i32> %ret 551} 552 553declare <vscale x 1 x double> @llvm.riscv.vfwadd.nxv1f64.nxv1f32.nxv1f32(<vscale x 1 x double>, <vscale x 1 x float>, <vscale x 1 x float>, i64, i64) 554 555define <vscale x 1 x double> @test20(i64 %avl, <vscale x 1 x float> %a, <vscale x 1 x float> %b, <vscale x 1 x double> %c) nounwind { 556; CHECK-LABEL: test20: 557; CHECK: # %bb.0: # %entry 558; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 559; CHECK-NEXT: vfwadd.vv v11, v8, v9 560; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma 561; CHECK-NEXT: vfadd.vv v8, v11, v10 562; CHECK-NEXT: ret 563entry: 564 %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 7) 565 %1 = tail call <vscale x 1 x double> @llvm.riscv.vfwadd.nxv1f64.nxv1f32.nxv1f32( 566 <vscale x 1 x double> undef, 567 <vscale x 1 x float> %a, 568 <vscale x 1 x float> %b, 569 i64 7, i64 %0) 570 %2 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64( 571 <vscale x 1 x double> undef, 572 <vscale x 1 x double> %1, 573 <vscale x 1 x double> %c, 574 i64 7, i64 %0) 575 ret <vscale x 1 x double> %2 576} 577 578; This used to fail the machine verifier due to the vsetvli being removed 579; while the add was still using it. 580define i64 @bad_removal(<2 x i64> %arg) { 581; CHECK-LABEL: bad_removal: 582; CHECK: # %bb.0: # %bb 583; CHECK-NEXT: vsetivli a0, 16, e64, m1, ta, ma 584; CHECK-NEXT: vmv.x.s a1, v8 585; CHECK-NEXT: add a0, a0, a1 586; CHECK-NEXT: ret 587bb: 588 %tmp = extractelement <2 x i64> %arg, i64 0 589 %tmp1 = call i64 @llvm.riscv.vsetvli.i64(i64 16, i64 3, i64 0) 590 %tmp2 = add i64 %tmp, %tmp1 591 ret i64 %tmp2 592} 593 594 595define void @add_v128i8(ptr %x, ptr %y) vscale_range(2,2) { 596; CHECK-LABEL: add_v128i8: 597; CHECK: # %bb.0: 598; CHECK-NEXT: vl8r.v v8, (a0) 599; CHECK-NEXT: vl8r.v v16, (a1) 600; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma 601; CHECK-NEXT: vadd.vv v8, v8, v16 602; CHECK-NEXT: vs8r.v v8, (a0) 603; CHECK-NEXT: ret 604 %a = load <128 x i8>, ptr %x 605 %b = load <128 x i8>, ptr %y 606 %c = add <128 x i8> %a, %b 607 store <128 x i8> %c, ptr %x 608 ret void 609} 610 611define void @add_v16i64(ptr %x, ptr %y) vscale_range(2,2) { 612; CHECK-LABEL: add_v16i64: 613; CHECK: # %bb.0: 614; CHECK-NEXT: vl8re64.v v8, (a0) 615; CHECK-NEXT: vl8re64.v v16, (a1) 616; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 617; CHECK-NEXT: vadd.vv v8, v8, v16 618; CHECK-NEXT: vs8r.v v8, (a0) 619; CHECK-NEXT: ret 620 %a = load <16 x i64>, ptr %x 621 %b = load <16 x i64>, ptr %y 622 %c = add <16 x i64> %a, %b 623 store <16 x i64> %c, ptr %x 624 ret void 625} 626 627define <vscale x 2 x float> @fp_reduction_vfmv_s_f(float %0, <vscale x 8 x float> %1, i64 %2) { 628; CHECK-LABEL: fp_reduction_vfmv_s_f: 629; CHECK: # %bb.0: 630; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 631; CHECK-NEXT: vfmv.s.f v12, fa0 632; CHECK-NEXT: vfredusum.vs v8, v8, v12 633; CHECK-NEXT: ret 634 %4 = tail call <vscale x 8 x float> @llvm.riscv.vfmv.s.f.nxv8f32.i64(<vscale x 8 x float> poison, float %0, i64 %2) 635 %5 = tail call <vscale x 2 x float> @llvm.vector.extract.nxv2f32.nxv8f32(<vscale x 8 x float> %4, i64 0) 636 %6 = tail call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv8f32.i64(<vscale x 2 x float> poison, <vscale x 8 x float> %1, <vscale x 2 x float> %5, i64 7, i64 %2) 637 ret <vscale x 2 x float> %6 638} 639 640define dso_local <vscale x 2 x i32> @int_reduction_vmv_s_x(i32 signext %0, <vscale x 8 x i32> %1, i64 %2) { 641; CHECK-LABEL: int_reduction_vmv_s_x: 642; CHECK: # %bb.0: 643; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma 644; CHECK-NEXT: vmv.s.x v12, a0 645; CHECK-NEXT: vredsum.vs v8, v8, v12 646; CHECK-NEXT: ret 647 %4 = tail call <vscale x 8 x i32> @llvm.riscv.vmv.s.x.nxv8i32.i64(<vscale x 8 x i32> poison, i32 %0, i64 %2) 648 %5 = tail call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv8i32(<vscale x 8 x i32> %4, i64 0) 649 %6 = tail call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv8i32.i64(<vscale x 2 x i32> poison, <vscale x 8 x i32> %1, <vscale x 2 x i32> %5, i64 %2) 650 ret <vscale x 2 x i32> %6 651} 652 653declare <vscale x 8 x float> @llvm.riscv.vfmv.s.f.nxv8f32.i64(<vscale x 8 x float>, float, i64) 654declare <vscale x 2 x float> @llvm.vector.extract.nxv2f32.nxv8f32(<vscale x 8 x float>, i64) 655declare <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv8f32.i64(<vscale x 2 x float>, <vscale x 8 x float>, <vscale x 2 x float>, i64, i64) 656 657declare <vscale x 8 x i32> @llvm.riscv.vmv.s.x.nxv8i32.i64(<vscale x 8 x i32>, i32, i64) #1 658declare <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv8i32(<vscale x 8 x i32>, i64 immarg) #2 659declare <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv8i32.i64(<vscale x 2 x i32>, <vscale x 8 x i32>, <vscale x 2 x i32>, i64) #1 660 661declare <vscale x 1 x i64> @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( 662 <vscale x 1 x i64>, 663 <vscale x 1 x i64>, 664 <vscale x 1 x i64>, 665 <vscale x 1 x i1>, 666 i64, 667 i64); 668 669declare <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.i64.i64( 670 <vscale x 1 x i64>, 671 <vscale x 1 x i64>, 672 i64, 673 i64); 674 675declare <vscale x 1 x double> @llvm.riscv.vfadd.mask.nxv1f64.f64( 676 <vscale x 1 x double>, 677 <vscale x 1 x double>, 678 <vscale x 1 x double>, 679 <vscale x 1 x i1>, 680 i64, 681 i64, 682 i64); 683 684declare <vscale x 1 x i64> @llvm.riscv.vmv.s.x.nxv1i64( 685 <vscale x 1 x i64>, 686 i64, 687 i64); 688 689declare <vscale x 1 x double> @llvm.riscv.vfmv.s.f.nxv1f64 690 (<vscale x 1 x double>, 691 double, 692 i64) 693 694declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) 695declare <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.i64(<vscale x 2 x i32>, ptr nocapture, i64) 696declare <vscale x 2 x i1> @llvm.riscv.vmslt.nxv2i32.i32.i64(<vscale x 2 x i32>, i32, i64) 697declare <vscale x 2 x i1> @llvm.riscv.vmsgt.nxv2i32.i32.i64(<vscale x 2 x i32>, i32, i64) 698declare <vscale x 2 x i1> @llvm.riscv.vmor.nxv2i1.i64(<vscale x 2 x i1>, <vscale x 2 x i1>, i64) 699declare void @llvm.riscv.vse.mask.nxv2i32.i64(<vscale x 2 x i32>, ptr nocapture, <vscale x 2 x i1>, i64) 700declare void @llvm.riscv.vse.nxv2i32.i64(<vscale x 2 x i32>, ptr nocapture, i64) 701 702define <vscale x 2 x i32> @avl_undef1(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>) { 703; CHECK-LABEL: avl_undef1: 704; CHECK: # %bb.0: 705; CHECK-NEXT: vsetivli zero, 1, e32, m1, tu, ma 706; CHECK-NEXT: vadd.vv v8, v9, v10 707; CHECK-NEXT: ret 708 %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32( 709 <vscale x 2 x i32> %0, 710 <vscale x 2 x i32> %1, 711 <vscale x 2 x i32> %2, 712 i64 undef 713 ) 714 ret <vscale x 2 x i32> %a 715} 716 717define i64 @avl_undef2() { 718; CHECK-LABEL: avl_undef2: 719; CHECK: # %bb.0: 720; CHECK-NEXT: vsetvli a0, a0, e32, mf2, ta, ma 721; CHECK-NEXT: ret 722 %1 = tail call i64 @llvm.riscv.vsetvli(i64 poison, i64 2, i64 7) 723 ret i64 %1 724} 725