1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s 3 4define <vscale x 1 x i64> @test_vp_reverse_nxv1i64_masked(<vscale x 1 x i64> %src, <vscale x 1 x i1> %mask, i32 zeroext %evl) { 5; CHECK-LABEL: test_vp_reverse_nxv1i64_masked: 6; CHECK: # %bb.0: 7; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 8; CHECK-NEXT: vid.v v9, v0.t 9; CHECK-NEXT: addi a0, a0, -1 10; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t 11; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t 12; CHECK-NEXT: vmv.v.v v8, v9 13; CHECK-NEXT: ret 14 %dst = call <vscale x 1 x i64> @llvm.experimental.vp.reverse.nxv1i64(<vscale x 1 x i64> %src, <vscale x 1 x i1> %mask, i32 %evl) 15 ret <vscale x 1 x i64> %dst 16} 17 18define <vscale x 1 x i64> @test_vp_reverse_nxv1i64(<vscale x 1 x i64> %src, i32 zeroext %evl) { 19; CHECK-LABEL: test_vp_reverse_nxv1i64: 20; CHECK: # %bb.0: 21; CHECK-NEXT: addi a1, a0, -1 22; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 23; CHECK-NEXT: vid.v v9 24; CHECK-NEXT: vrsub.vx v10, v9, a1 25; CHECK-NEXT: vrgather.vv v9, v8, v10 26; CHECK-NEXT: vmv.v.v v8, v9 27; CHECK-NEXT: ret 28 29 %dst = call <vscale x 1 x i64> @llvm.experimental.vp.reverse.nxv1i64(<vscale x 1 x i64> %src, <vscale x 1 x i1> splat (i1 1), i32 %evl) 30 ret <vscale x 1 x i64> %dst 31} 32 33define <vscale x 2 x i32> @test_vp_reverse_nxv2i32_masked(<vscale x 2 x i32> %src, <vscale x 2 x i1> %mask, i32 zeroext %evl) { 34; CHECK-LABEL: test_vp_reverse_nxv2i32_masked: 35; CHECK: # %bb.0: 36; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 37; CHECK-NEXT: vid.v v9, v0.t 38; CHECK-NEXT: addi a0, a0, -1 39; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t 40; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t 41; CHECK-NEXT: vmv.v.v v8, v9 42; CHECK-NEXT: ret 43 %dst = call <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32> %src, <vscale x 2 x i1> %mask, i32 %evl) 44 ret <vscale x 2 x i32> %dst 45} 46 47define <vscale x 2 x i32> @test_vp_reverse_nxv2i32(<vscale x 2 x i32> %src, i32 zeroext %evl) { 48; CHECK-LABEL: test_vp_reverse_nxv2i32: 49; CHECK: # %bb.0: 50; CHECK-NEXT: addi a1, a0, -1 51; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 52; CHECK-NEXT: vid.v v9 53; CHECK-NEXT: vrsub.vx v10, v9, a1 54; CHECK-NEXT: vrgather.vv v9, v8, v10 55; CHECK-NEXT: vmv.v.v v8, v9 56; CHECK-NEXT: ret 57 58 %dst = call <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32> %src, <vscale x 2 x i1> splat (i1 1), i32 %evl) 59 ret <vscale x 2 x i32> %dst 60} 61 62define <vscale x 4 x i16> @test_vp_reverse_nxv4i16_masked(<vscale x 4 x i16> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) { 63; CHECK-LABEL: test_vp_reverse_nxv4i16_masked: 64; CHECK: # %bb.0: 65; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 66; CHECK-NEXT: vid.v v9, v0.t 67; CHECK-NEXT: addi a0, a0, -1 68; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t 69; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t 70; CHECK-NEXT: vmv.v.v v8, v9 71; CHECK-NEXT: ret 72 %dst = call <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16> %src, <vscale x 4 x i1> %mask, i32 %evl) 73 ret <vscale x 4 x i16> %dst 74} 75 76define <vscale x 4 x i16> @test_vp_reverse_nxv4i16(<vscale x 4 x i16> %src, i32 zeroext %evl) { 77; CHECK-LABEL: test_vp_reverse_nxv4i16: 78; CHECK: # %bb.0: 79; CHECK-NEXT: addi a1, a0, -1 80; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 81; CHECK-NEXT: vid.v v9 82; CHECK-NEXT: vrsub.vx v10, v9, a1 83; CHECK-NEXT: vrgather.vv v9, v8, v10 84; CHECK-NEXT: vmv.v.v v8, v9 85; CHECK-NEXT: ret 86 87 %dst = call <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16> %src, <vscale x 4 x i1> splat (i1 1), i32 %evl) 88 ret <vscale x 4 x i16> %dst 89} 90 91define <vscale x 8 x i8> @test_vp_reverse_nxv8i8_masked(<vscale x 8 x i8> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) { 92; CHECK-LABEL: test_vp_reverse_nxv8i8_masked: 93; CHECK: # %bb.0: 94; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 95; CHECK-NEXT: vid.v v10, v0.t 96; CHECK-NEXT: addi a0, a0, -1 97; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t 98; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 99; CHECK-NEXT: vrgatherei16.vv v9, v8, v10, v0.t 100; CHECK-NEXT: vmv.v.v v8, v9 101; CHECK-NEXT: ret 102 %dst = call <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8> %src, <vscale x 8 x i1> %mask, i32 %evl) 103 ret <vscale x 8 x i8> %dst 104} 105 106define <vscale x 8 x i8> @test_vp_reverse_nxv8i8(<vscale x 8 x i8> %src, i32 zeroext %evl) { 107; CHECK-LABEL: test_vp_reverse_nxv8i8: 108; CHECK: # %bb.0: 109; CHECK-NEXT: addi a1, a0, -1 110; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 111; CHECK-NEXT: vid.v v10 112; CHECK-NEXT: vrsub.vx v10, v10, a1 113; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 114; CHECK-NEXT: vrgatherei16.vv v9, v8, v10 115; CHECK-NEXT: vmv.v.v v8, v9 116; CHECK-NEXT: ret 117 118 %dst = call <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl) 119 ret <vscale x 8 x i8> %dst 120} 121 122define <vscale x 2 x i64> @test_vp_reverse_nxv2i64_masked(<vscale x 2 x i64> %src, <vscale x 2 x i1> %mask, i32 zeroext %evl) { 123; CHECK-LABEL: test_vp_reverse_nxv2i64_masked: 124; CHECK: # %bb.0: 125; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 126; CHECK-NEXT: vid.v v10, v0.t 127; CHECK-NEXT: addi a0, a0, -1 128; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t 129; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t 130; CHECK-NEXT: vmv.v.v v8, v10 131; CHECK-NEXT: ret 132 %dst = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> %src, <vscale x 2 x i1> %mask, i32 %evl) 133 ret <vscale x 2 x i64> %dst 134} 135 136define <vscale x 2 x i64> @test_vp_reverse_nxv2i64(<vscale x 2 x i64> %src, i32 zeroext %evl) { 137; CHECK-LABEL: test_vp_reverse_nxv2i64: 138; CHECK: # %bb.0: 139; CHECK-NEXT: addi a1, a0, -1 140; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 141; CHECK-NEXT: vid.v v10 142; CHECK-NEXT: vrsub.vx v12, v10, a1 143; CHECK-NEXT: vrgather.vv v10, v8, v12 144; CHECK-NEXT: vmv.v.v v8, v10 145; CHECK-NEXT: ret 146 147 %dst = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> %src, <vscale x 2 x i1> splat (i1 1), i32 %evl) 148 ret <vscale x 2 x i64> %dst 149} 150 151define <vscale x 4 x i32> @test_vp_reverse_nxv4i32_masked(<vscale x 4 x i32> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) { 152; CHECK-LABEL: test_vp_reverse_nxv4i32_masked: 153; CHECK: # %bb.0: 154; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 155; CHECK-NEXT: vid.v v10, v0.t 156; CHECK-NEXT: addi a0, a0, -1 157; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t 158; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t 159; CHECK-NEXT: vmv.v.v v8, v10 160; CHECK-NEXT: ret 161 %dst = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> %src, <vscale x 4 x i1> %mask, i32 %evl) 162 ret <vscale x 4 x i32> %dst 163} 164 165define <vscale x 4 x i32> @test_vp_reverse_nxv4i32(<vscale x 4 x i32> %src, i32 zeroext %evl) { 166; CHECK-LABEL: test_vp_reverse_nxv4i32: 167; CHECK: # %bb.0: 168; CHECK-NEXT: addi a1, a0, -1 169; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 170; CHECK-NEXT: vid.v v10 171; CHECK-NEXT: vrsub.vx v12, v10, a1 172; CHECK-NEXT: vrgather.vv v10, v8, v12 173; CHECK-NEXT: vmv.v.v v8, v10 174; CHECK-NEXT: ret 175 176 %dst = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> %src, <vscale x 4 x i1> splat (i1 1), i32 %evl) 177 ret <vscale x 4 x i32> %dst 178} 179 180define <vscale x 8 x i16> @test_vp_reverse_nxv8i16_masked(<vscale x 8 x i16> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) { 181; CHECK-LABEL: test_vp_reverse_nxv8i16_masked: 182; CHECK: # %bb.0: 183; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 184; CHECK-NEXT: vid.v v10, v0.t 185; CHECK-NEXT: addi a0, a0, -1 186; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t 187; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t 188; CHECK-NEXT: vmv.v.v v8, v10 189; CHECK-NEXT: ret 190 %dst = call <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16> %src, <vscale x 8 x i1> %mask, i32 %evl) 191 ret <vscale x 8 x i16> %dst 192} 193 194define <vscale x 8 x i16> @test_vp_reverse_nxv8i16(<vscale x 8 x i16> %src, i32 zeroext %evl) { 195; CHECK-LABEL: test_vp_reverse_nxv8i16: 196; CHECK: # %bb.0: 197; CHECK-NEXT: addi a1, a0, -1 198; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 199; CHECK-NEXT: vid.v v10 200; CHECK-NEXT: vrsub.vx v12, v10, a1 201; CHECK-NEXT: vrgather.vv v10, v8, v12 202; CHECK-NEXT: vmv.v.v v8, v10 203; CHECK-NEXT: ret 204 205 %dst = call <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl) 206 ret <vscale x 8 x i16> %dst 207} 208 209define <vscale x 16 x i8> @test_vp_reverse_nxv16i8_masked(<vscale x 16 x i8> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) { 210; CHECK-LABEL: test_vp_reverse_nxv16i8_masked: 211; CHECK: # %bb.0: 212; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 213; CHECK-NEXT: vid.v v12, v0.t 214; CHECK-NEXT: addi a0, a0, -1 215; CHECK-NEXT: vrsub.vx v12, v12, a0, v0.t 216; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma 217; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t 218; CHECK-NEXT: vmv.v.v v8, v10 219; CHECK-NEXT: ret 220 %dst = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> %src, <vscale x 16 x i1> %mask, i32 %evl) 221 ret <vscale x 16 x i8> %dst 222} 223 224define <vscale x 16 x i8> @test_vp_reverse_nxv16i8(<vscale x 16 x i8> %src, i32 zeroext %evl) { 225; CHECK-LABEL: test_vp_reverse_nxv16i8: 226; CHECK: # %bb.0: 227; CHECK-NEXT: addi a1, a0, -1 228; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 229; CHECK-NEXT: vid.v v12 230; CHECK-NEXT: vrsub.vx v12, v12, a1 231; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma 232; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 233; CHECK-NEXT: vmv.v.v v8, v10 234; CHECK-NEXT: ret 235 236 %dst = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> %src, <vscale x 16 x i1> splat (i1 1), i32 %evl) 237 ret <vscale x 16 x i8> %dst 238} 239 240define <vscale x 4 x i64> @test_vp_reverse_nxv4i64_masked(<vscale x 4 x i64> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) { 241; CHECK-LABEL: test_vp_reverse_nxv4i64_masked: 242; CHECK: # %bb.0: 243; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 244; CHECK-NEXT: vid.v v12, v0.t 245; CHECK-NEXT: addi a0, a0, -1 246; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t 247; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t 248; CHECK-NEXT: vmv.v.v v8, v12 249; CHECK-NEXT: ret 250 %dst = call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> %src, <vscale x 4 x i1> %mask, i32 %evl) 251 ret <vscale x 4 x i64> %dst 252} 253 254define <vscale x 4 x i64> @test_vp_reverse_nxv4i64(<vscale x 4 x i64> %src, i32 zeroext %evl) { 255; CHECK-LABEL: test_vp_reverse_nxv4i64: 256; CHECK: # %bb.0: 257; CHECK-NEXT: addi a1, a0, -1 258; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 259; CHECK-NEXT: vid.v v12 260; CHECK-NEXT: vrsub.vx v16, v12, a1 261; CHECK-NEXT: vrgather.vv v12, v8, v16 262; CHECK-NEXT: vmv.v.v v8, v12 263; CHECK-NEXT: ret 264 265 %dst = call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> %src, <vscale x 4 x i1> splat (i1 1), i32 %evl) 266 ret <vscale x 4 x i64> %dst 267} 268 269define <vscale x 8 x i32> @test_vp_reverse_nxv8i32_masked(<vscale x 8 x i32> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) { 270; CHECK-LABEL: test_vp_reverse_nxv8i32_masked: 271; CHECK: # %bb.0: 272; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 273; CHECK-NEXT: vid.v v12, v0.t 274; CHECK-NEXT: addi a0, a0, -1 275; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t 276; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t 277; CHECK-NEXT: vmv.v.v v8, v12 278; CHECK-NEXT: ret 279 %dst = call <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32> %src, <vscale x 8 x i1> %mask, i32 %evl) 280 ret <vscale x 8 x i32> %dst 281} 282 283define <vscale x 8 x i32> @test_vp_reverse_nxv8i32(<vscale x 8 x i32> %src, i32 zeroext %evl) { 284; CHECK-LABEL: test_vp_reverse_nxv8i32: 285; CHECK: # %bb.0: 286; CHECK-NEXT: addi a1, a0, -1 287; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 288; CHECK-NEXT: vid.v v12 289; CHECK-NEXT: vrsub.vx v16, v12, a1 290; CHECK-NEXT: vrgather.vv v12, v8, v16 291; CHECK-NEXT: vmv.v.v v8, v12 292; CHECK-NEXT: ret 293 294 %dst = call <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl) 295 ret <vscale x 8 x i32> %dst 296} 297 298define <vscale x 16 x i16> @test_vp_reverse_nxv16i16_masked(<vscale x 16 x i16> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) { 299; CHECK-LABEL: test_vp_reverse_nxv16i16_masked: 300; CHECK: # %bb.0: 301; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 302; CHECK-NEXT: vid.v v12, v0.t 303; CHECK-NEXT: addi a0, a0, -1 304; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t 305; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t 306; CHECK-NEXT: vmv.v.v v8, v12 307; CHECK-NEXT: ret 308 %dst = call <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16> %src, <vscale x 16 x i1> %mask, i32 %evl) 309 ret <vscale x 16 x i16> %dst 310} 311 312define <vscale x 16 x i16> @test_vp_reverse_nxv16i16(<vscale x 16 x i16> %src, i32 zeroext %evl) { 313; CHECK-LABEL: test_vp_reverse_nxv16i16: 314; CHECK: # %bb.0: 315; CHECK-NEXT: addi a1, a0, -1 316; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 317; CHECK-NEXT: vid.v v12 318; CHECK-NEXT: vrsub.vx v16, v12, a1 319; CHECK-NEXT: vrgather.vv v12, v8, v16 320; CHECK-NEXT: vmv.v.v v8, v12 321; CHECK-NEXT: ret 322 323 %dst = call <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16> %src, <vscale x 16 x i1> splat (i1 1), i32 %evl) 324 ret <vscale x 16 x i16> %dst 325} 326 327define <vscale x 32 x i8> @test_vp_reverse_nxv32i8_masked(<vscale x 32 x i8> %src, <vscale x 32 x i1> %mask, i32 zeroext %evl) { 328; CHECK-LABEL: test_vp_reverse_nxv32i8_masked: 329; CHECK: # %bb.0: 330; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 331; CHECK-NEXT: vid.v v16, v0.t 332; CHECK-NEXT: addi a0, a0, -1 333; CHECK-NEXT: vrsub.vx v16, v16, a0, v0.t 334; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma 335; CHECK-NEXT: vrgatherei16.vv v12, v8, v16, v0.t 336; CHECK-NEXT: vmv.v.v v8, v12 337; CHECK-NEXT: ret 338 %dst = call <vscale x 32 x i8> @llvm.experimental.vp.reverse.nxv32i8(<vscale x 32 x i8> %src, <vscale x 32 x i1> %mask, i32 %evl) 339 ret <vscale x 32 x i8> %dst 340} 341 342define <vscale x 32 x i8> @test_vp_reverse_nxv32i8(<vscale x 32 x i8> %src, i32 zeroext %evl) { 343; CHECK-LABEL: test_vp_reverse_nxv32i8: 344; CHECK: # %bb.0: 345; CHECK-NEXT: addi a1, a0, -1 346; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 347; CHECK-NEXT: vid.v v16 348; CHECK-NEXT: vrsub.vx v16, v16, a1 349; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma 350; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 351; CHECK-NEXT: vmv.v.v v8, v12 352; CHECK-NEXT: ret 353 354 %dst = call <vscale x 32 x i8> @llvm.experimental.vp.reverse.nxv32i8(<vscale x 32 x i8> %src, <vscale x 32 x i1> splat (i1 1), i32 %evl) 355 ret <vscale x 32 x i8> %dst 356} 357 358define <vscale x 8 x i64> @test_vp_reverse_nxv8i64_masked(<vscale x 8 x i64> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) { 359; CHECK-LABEL: test_vp_reverse_nxv8i64_masked: 360; CHECK: # %bb.0: 361; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 362; CHECK-NEXT: vid.v v16, v0.t 363; CHECK-NEXT: addi a0, a0, -1 364; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t 365; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t 366; CHECK-NEXT: vmv.v.v v8, v16 367; CHECK-NEXT: ret 368 %dst = call <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64> %src, <vscale x 8 x i1> %mask, i32 %evl) 369 ret <vscale x 8 x i64> %dst 370} 371 372define <vscale x 8 x i64> @test_vp_reverse_nxv8i64(<vscale x 8 x i64> %src, i32 zeroext %evl) { 373; CHECK-LABEL: test_vp_reverse_nxv8i64: 374; CHECK: # %bb.0: 375; CHECK-NEXT: addi a1, a0, -1 376; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 377; CHECK-NEXT: vid.v v16 378; CHECK-NEXT: vrsub.vx v24, v16, a1 379; CHECK-NEXT: vrgather.vv v16, v8, v24 380; CHECK-NEXT: vmv.v.v v8, v16 381; CHECK-NEXT: ret 382 383 %dst = call <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl) 384 ret <vscale x 8 x i64> %dst 385} 386 387define <vscale x 16 x i32> @test_vp_reverse_nxv16i32_masked(<vscale x 16 x i32> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) { 388; CHECK-LABEL: test_vp_reverse_nxv16i32_masked: 389; CHECK: # %bb.0: 390; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 391; CHECK-NEXT: vid.v v16, v0.t 392; CHECK-NEXT: addi a0, a0, -1 393; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t 394; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t 395; CHECK-NEXT: vmv.v.v v8, v16 396; CHECK-NEXT: ret 397 %dst = call <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32> %src, <vscale x 16 x i1> %mask, i32 %evl) 398 ret <vscale x 16 x i32> %dst 399} 400 401define <vscale x 16 x i32> @test_vp_reverse_nxv16i32(<vscale x 16 x i32> %src, i32 zeroext %evl) { 402; CHECK-LABEL: test_vp_reverse_nxv16i32: 403; CHECK: # %bb.0: 404; CHECK-NEXT: addi a1, a0, -1 405; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 406; CHECK-NEXT: vid.v v16 407; CHECK-NEXT: vrsub.vx v24, v16, a1 408; CHECK-NEXT: vrgather.vv v16, v8, v24 409; CHECK-NEXT: vmv.v.v v8, v16 410; CHECK-NEXT: ret 411 412 %dst = call <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32> %src, <vscale x 16 x i1> splat (i1 1), i32 %evl) 413 ret <vscale x 16 x i32> %dst 414} 415 416define <vscale x 32 x i16> @test_vp_reverse_nxv32i16_masked(<vscale x 32 x i16> %src, <vscale x 32 x i1> %mask, i32 zeroext %evl) { 417; CHECK-LABEL: test_vp_reverse_nxv32i16_masked: 418; CHECK: # %bb.0: 419; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 420; CHECK-NEXT: vid.v v16, v0.t 421; CHECK-NEXT: addi a0, a0, -1 422; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t 423; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t 424; CHECK-NEXT: vmv.v.v v8, v16 425; CHECK-NEXT: ret 426 %dst = call <vscale x 32 x i16> @llvm.experimental.vp.reverse.nxv32i16(<vscale x 32 x i16> %src, <vscale x 32 x i1> %mask, i32 %evl) 427 ret <vscale x 32 x i16> %dst 428} 429 430define <vscale x 32 x i16> @test_vp_reverse_nxv32i16(<vscale x 32 x i16> %src, i32 zeroext %evl) { 431; CHECK-LABEL: test_vp_reverse_nxv32i16: 432; CHECK: # %bb.0: 433; CHECK-NEXT: addi a1, a0, -1 434; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 435; CHECK-NEXT: vid.v v16 436; CHECK-NEXT: vrsub.vx v24, v16, a1 437; CHECK-NEXT: vrgather.vv v16, v8, v24 438; CHECK-NEXT: vmv.v.v v8, v16 439; CHECK-NEXT: ret 440 441 %dst = call <vscale x 32 x i16> @llvm.experimental.vp.reverse.nxv32i16(<vscale x 32 x i16> %src, <vscale x 32 x i1> splat (i1 1), i32 %evl) 442 ret <vscale x 32 x i16> %dst 443} 444 445define <vscale x 64 x i8> @test_vp_reverse_nxv64i8_masked(<vscale x 64 x i8> %src, <vscale x 64 x i1> %mask, i32 zeroext %evl) { 446; CHECK-LABEL: test_vp_reverse_nxv64i8_masked: 447; CHECK: # %bb.0: 448; CHECK-NEXT: csrr a1, vlenb 449; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma 450; CHECK-NEXT: vid.v v16 451; CHECK-NEXT: addi a2, a1, -1 452; CHECK-NEXT: slli a1, a1, 3 453; CHECK-NEXT: vrsub.vx v24, v16, a2 454; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 455; CHECK-NEXT: vrgatherei16.vv v23, v8, v24 456; CHECK-NEXT: vrgatherei16.vv v22, v9, v24 457; CHECK-NEXT: vrgatherei16.vv v21, v10, v24 458; CHECK-NEXT: vrgatherei16.vv v20, v11, v24 459; CHECK-NEXT: vrgatherei16.vv v19, v12, v24 460; CHECK-NEXT: vrgatherei16.vv v18, v13, v24 461; CHECK-NEXT: vrgatherei16.vv v17, v14, v24 462; CHECK-NEXT: vrgatherei16.vv v16, v15, v24 463; CHECK-NEXT: sub a1, a1, a0 464; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 465; CHECK-NEXT: vslidedown.vx v8, v16, a1, v0.t 466; CHECK-NEXT: ret 467 %dst = call <vscale x 64 x i8> @llvm.experimental.vp.reverse.nxv64i8(<vscale x 64 x i8> %src, <vscale x 64 x i1> %mask, i32 %evl) 468 ret <vscale x 64 x i8> %dst 469} 470 471define <vscale x 64 x i8> @test_vp_reverse_nxv64i8(<vscale x 64 x i8> %src, i32 zeroext %evl) { 472; CHECK-LABEL: test_vp_reverse_nxv64i8: 473; CHECK: # %bb.0: 474; CHECK-NEXT: csrr a1, vlenb 475; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma 476; CHECK-NEXT: vid.v v16 477; CHECK-NEXT: addi a2, a1, -1 478; CHECK-NEXT: slli a1, a1, 3 479; CHECK-NEXT: vrsub.vx v24, v16, a2 480; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 481; CHECK-NEXT: vrgatherei16.vv v23, v8, v24 482; CHECK-NEXT: vrgatherei16.vv v22, v9, v24 483; CHECK-NEXT: vrgatherei16.vv v21, v10, v24 484; CHECK-NEXT: vrgatherei16.vv v20, v11, v24 485; CHECK-NEXT: vrgatherei16.vv v19, v12, v24 486; CHECK-NEXT: vrgatherei16.vv v18, v13, v24 487; CHECK-NEXT: vrgatherei16.vv v17, v14, v24 488; CHECK-NEXT: vrgatherei16.vv v16, v15, v24 489; CHECK-NEXT: sub a1, a1, a0 490; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 491; CHECK-NEXT: vslidedown.vx v8, v16, a1 492; CHECK-NEXT: ret 493 494 %dst = call <vscale x 64 x i8> @llvm.experimental.vp.reverse.nxv64i8(<vscale x 64 x i8> %src, <vscale x 64 x i1> splat (i1 1), i32 %evl) 495 ret <vscale x 64 x i8> %dst 496} 497 498define <vscale x 128 x i8> @test_vp_reverse_nxv128i8(<vscale x 128 x i8> %src, i32 zeroext %evl) { 499; CHECK-LABEL: test_vp_reverse_nxv128i8: 500; CHECK: # %bb.0: 501; CHECK-NEXT: csrr a2, vlenb 502; CHECK-NEXT: slli a2, a2, 3 503; CHECK-NEXT: mv a1, a0 504; CHECK-NEXT: bltu a0, a2, .LBB32_2 505; CHECK-NEXT: # %bb.1: 506; CHECK-NEXT: mv a1, a2 507; CHECK-NEXT: .LBB32_2: 508; CHECK-NEXT: addi sp, sp, -80 509; CHECK-NEXT: .cfi_def_cfa_offset 80 510; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill 511; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill 512; CHECK-NEXT: .cfi_offset ra, -8 513; CHECK-NEXT: .cfi_offset s0, -16 514; CHECK-NEXT: addi s0, sp, 80 515; CHECK-NEXT: .cfi_def_cfa s0, 0 516; CHECK-NEXT: csrr a3, vlenb 517; CHECK-NEXT: slli a3, a3, 4 518; CHECK-NEXT: sub sp, sp, a3 519; CHECK-NEXT: andi sp, sp, -64 520; CHECK-NEXT: addi a3, sp, 64 521; CHECK-NEXT: li a4, -1 522; CHECK-NEXT: sub a5, a0, a2 523; CHECK-NEXT: add a6, a0, a3 524; CHECK-NEXT: sltu a0, a0, a5 525; CHECK-NEXT: add a2, a3, a2 526; CHECK-NEXT: addi a6, a6, -1 527; CHECK-NEXT: addi a0, a0, -1 528; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma 529; CHECK-NEXT: vsse8.v v8, (a6), a4 530; CHECK-NEXT: sub a6, a6, a1 531; CHECK-NEXT: and a0, a0, a5 532; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 533; CHECK-NEXT: vsse8.v v16, (a6), a4 534; CHECK-NEXT: vle8.v v16, (a2) 535; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma 536; CHECK-NEXT: vle8.v v8, (a3) 537; CHECK-NEXT: addi sp, s0, -80 538; CHECK-NEXT: .cfi_def_cfa sp, 80 539; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload 540; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload 541; CHECK-NEXT: .cfi_restore ra 542; CHECK-NEXT: .cfi_restore s0 543; CHECK-NEXT: addi sp, sp, 80 544; CHECK-NEXT: .cfi_def_cfa_offset 0 545; CHECK-NEXT: ret 546 547 %dst = call <vscale x 128 x i8> @llvm.experimental.vp.reverse.nxv128i8(<vscale x 128 x i8> %src, <vscale x 128 x i1> splat (i1 1), i32 %evl) 548 ret <vscale x 128 x i8> %dst 549} 550 551; LMUL = 1 552declare <vscale x 1 x i64> @llvm.experimental.vp.reverse.nxv1i64(<vscale x 1 x i64>,<vscale x 1 x i1>,i32) 553declare <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32>,<vscale x 2 x i1>,i32) 554declare <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16>,<vscale x 4 x i1>,i32) 555declare <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8>,<vscale x 8 x i1>,i32) 556 557; LMUL = 2 558declare <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i1>,i32) 559declare <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i1>,i32) 560declare <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i1>,i32) 561declare <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i1>,i32) 562 563; LMUL = 4 564declare <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64>,<vscale x 4 x i1>,i32) 565declare <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32>,<vscale x 8 x i1>,i32) 566declare <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i1>,i32) 567declare <vscale x 32 x i8> @llvm.experimental.vp.reverse.nxv32i8(<vscale x 32 x i8>,<vscale x 32 x i1>,i32) 568 569; LMUL = 8 570declare <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64>,<vscale x 8 x i1>,i32) 571declare <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32>,<vscale x 16 x i1>,i32) 572declare <vscale x 32 x i16> @llvm.experimental.vp.reverse.nxv32i16(<vscale x 32 x i16>,<vscale x 32 x i1>,i32) 573declare <vscale x 64 x i8> @llvm.experimental.vp.reverse.nxv64i8(<vscale x 64 x i8>,<vscale x 64 x i1>,i32) 574 575declare <vscale x 128 x i8> @llvm.experimental.vp.reverse.nxv128i8(<vscale x 128 x i8>,<vscale x 128 x i1>,i32) 576