1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v -verify-machineinstrs < %s | FileCheck %s 3 4define <vscale x 1 x double> @test_vp_reverse_nxv1f64_masked(<vscale x 1 x double> %src, <vscale x 1 x i1> %mask, i32 zeroext %evl) { 5; CHECK-LABEL: test_vp_reverse_nxv1f64_masked: 6; CHECK: # %bb.0: 7; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 8; CHECK-NEXT: vid.v v9, v0.t 9; CHECK-NEXT: addi a0, a0, -1 10; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t 11; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t 12; CHECK-NEXT: vmv.v.v v8, v9 13; CHECK-NEXT: ret 14 %dst = call <vscale x 1 x double> @llvm.experimental.vp.reverse.nxv1f64(<vscale x 1 x double> %src, <vscale x 1 x i1> %mask, i32 %evl) 15 ret <vscale x 1 x double> %dst 16} 17 18define <vscale x 1 x double> @test_vp_reverse_nxv1f64(<vscale x 1 x double> %src, i32 zeroext %evl) { 19; CHECK-LABEL: test_vp_reverse_nxv1f64: 20; CHECK: # %bb.0: 21; CHECK-NEXT: addi a1, a0, -1 22; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 23; CHECK-NEXT: vid.v v9 24; CHECK-NEXT: vrsub.vx v10, v9, a1 25; CHECK-NEXT: vrgather.vv v9, v8, v10 26; CHECK-NEXT: vmv.v.v v8, v9 27; CHECK-NEXT: ret 28 29 %dst = call <vscale x 1 x double> @llvm.experimental.vp.reverse.nxv1f64(<vscale x 1 x double> %src, <vscale x 1 x i1> splat (i1 1), i32 %evl) 30 ret <vscale x 1 x double> %dst 31} 32 33define <vscale x 2 x float> @test_vp_reverse_nxv2f32_masked(<vscale x 2 x float> %src, <vscale x 2 x i1> %mask, i32 zeroext %evl) { 34; CHECK-LABEL: test_vp_reverse_nxv2f32_masked: 35; CHECK: # %bb.0: 36; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 37; CHECK-NEXT: vid.v v9, v0.t 38; CHECK-NEXT: addi a0, a0, -1 39; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t 40; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t 41; CHECK-NEXT: vmv.v.v v8, v9 42; CHECK-NEXT: ret 43 %dst = call <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float> %src, <vscale x 2 x i1> %mask, i32 %evl) 44 ret <vscale x 2 x float> %dst 45} 46 47define <vscale x 2 x float> @test_vp_reverse_nxv2f32(<vscale x 2 x float> %src, i32 zeroext %evl) { 48; CHECK-LABEL: test_vp_reverse_nxv2f32: 49; CHECK: # %bb.0: 50; CHECK-NEXT: addi a1, a0, -1 51; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 52; CHECK-NEXT: vid.v v9 53; CHECK-NEXT: vrsub.vx v10, v9, a1 54; CHECK-NEXT: vrgather.vv v9, v8, v10 55; CHECK-NEXT: vmv.v.v v8, v9 56; CHECK-NEXT: ret 57 58 %dst = call <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float> %src, <vscale x 2 x i1> splat (i1 1), i32 %evl) 59 ret <vscale x 2 x float> %dst 60} 61 62define <vscale x 2 x double> @test_vp_reverse_nxv2f64_masked(<vscale x 2 x double> %src, <vscale x 2 x i1> %mask, i32 zeroext %evl) { 63; CHECK-LABEL: test_vp_reverse_nxv2f64_masked: 64; CHECK: # %bb.0: 65; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 66; CHECK-NEXT: vid.v v10, v0.t 67; CHECK-NEXT: addi a0, a0, -1 68; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t 69; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t 70; CHECK-NEXT: vmv.v.v v8, v10 71; CHECK-NEXT: ret 72 %dst = call <vscale x 2 x double> @llvm.experimental.vp.reverse.nxv2f64(<vscale x 2 x double> %src, <vscale x 2 x i1> %mask, i32 %evl) 73 ret <vscale x 2 x double> %dst 74} 75 76define <vscale x 2 x double> @test_vp_reverse_nxv2f64(<vscale x 2 x double> %src, i32 zeroext %evl) { 77; CHECK-LABEL: test_vp_reverse_nxv2f64: 78; CHECK: # %bb.0: 79; CHECK-NEXT: addi a1, a0, -1 80; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 81; CHECK-NEXT: vid.v v10 82; CHECK-NEXT: vrsub.vx v12, v10, a1 83; CHECK-NEXT: vrgather.vv v10, v8, v12 84; CHECK-NEXT: vmv.v.v v8, v10 85; CHECK-NEXT: ret 86 87 %dst = call <vscale x 2 x double> @llvm.experimental.vp.reverse.nxv2f64(<vscale x 2 x double> %src, <vscale x 2 x i1> splat (i1 1), i32 %evl) 88 ret <vscale x 2 x double> %dst 89} 90 91define <vscale x 4 x float> @test_vp_reverse_nxv4f32_masked(<vscale x 4 x float> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) { 92; CHECK-LABEL: test_vp_reverse_nxv4f32_masked: 93; CHECK: # %bb.0: 94; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 95; CHECK-NEXT: vid.v v10, v0.t 96; CHECK-NEXT: addi a0, a0, -1 97; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t 98; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t 99; CHECK-NEXT: vmv.v.v v8, v10 100; CHECK-NEXT: ret 101 %dst = call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> %src, <vscale x 4 x i1> %mask, i32 %evl) 102 ret <vscale x 4 x float> %dst 103} 104 105define <vscale x 4 x float> @test_vp_reverse_nxv4f32(<vscale x 4 x float> %src, i32 zeroext %evl) { 106; CHECK-LABEL: test_vp_reverse_nxv4f32: 107; CHECK: # %bb.0: 108; CHECK-NEXT: addi a1, a0, -1 109; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 110; CHECK-NEXT: vid.v v10 111; CHECK-NEXT: vrsub.vx v12, v10, a1 112; CHECK-NEXT: vrgather.vv v10, v8, v12 113; CHECK-NEXT: vmv.v.v v8, v10 114; CHECK-NEXT: ret 115 116 %dst = call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> %src, <vscale x 4 x i1> splat (i1 1), i32 %evl) 117 ret <vscale x 4 x float> %dst 118} 119 120define <vscale x 4 x double> @test_vp_reverse_nxv4f64_masked(<vscale x 4 x double> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) { 121; CHECK-LABEL: test_vp_reverse_nxv4f64_masked: 122; CHECK: # %bb.0: 123; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 124; CHECK-NEXT: vid.v v12, v0.t 125; CHECK-NEXT: addi a0, a0, -1 126; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t 127; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t 128; CHECK-NEXT: vmv.v.v v8, v12 129; CHECK-NEXT: ret 130 %dst = call <vscale x 4 x double> @llvm.experimental.vp.reverse.nxv4f64(<vscale x 4 x double> %src, <vscale x 4 x i1> %mask, i32 %evl) 131 ret <vscale x 4 x double> %dst 132} 133 134define <vscale x 4 x double> @test_vp_reverse_nxv4f64(<vscale x 4 x double> %src, i32 zeroext %evl) { 135; CHECK-LABEL: test_vp_reverse_nxv4f64: 136; CHECK: # %bb.0: 137; CHECK-NEXT: addi a1, a0, -1 138; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 139; CHECK-NEXT: vid.v v12 140; CHECK-NEXT: vrsub.vx v16, v12, a1 141; CHECK-NEXT: vrgather.vv v12, v8, v16 142; CHECK-NEXT: vmv.v.v v8, v12 143; CHECK-NEXT: ret 144 145 %dst = call <vscale x 4 x double> @llvm.experimental.vp.reverse.nxv4f64(<vscale x 4 x double> %src, <vscale x 4 x i1> splat (i1 1), i32 %evl) 146 ret <vscale x 4 x double> %dst 147} 148 149define <vscale x 8 x float> @test_vp_reverse_nxv8f32_masked(<vscale x 8 x float> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) { 150; CHECK-LABEL: test_vp_reverse_nxv8f32_masked: 151; CHECK: # %bb.0: 152; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 153; CHECK-NEXT: vid.v v12, v0.t 154; CHECK-NEXT: addi a0, a0, -1 155; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t 156; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t 157; CHECK-NEXT: vmv.v.v v8, v12 158; CHECK-NEXT: ret 159 %dst = call <vscale x 8 x float> @llvm.experimental.vp.reverse.nxv8f32(<vscale x 8 x float> %src, <vscale x 8 x i1> %mask, i32 %evl) 160 ret <vscale x 8 x float> %dst 161} 162 163define <vscale x 8 x float> @test_vp_reverse_nxv8f32(<vscale x 8 x float> %src, i32 zeroext %evl) { 164; CHECK-LABEL: test_vp_reverse_nxv8f32: 165; CHECK: # %bb.0: 166; CHECK-NEXT: addi a1, a0, -1 167; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 168; CHECK-NEXT: vid.v v12 169; CHECK-NEXT: vrsub.vx v16, v12, a1 170; CHECK-NEXT: vrgather.vv v12, v8, v16 171; CHECK-NEXT: vmv.v.v v8, v12 172; CHECK-NEXT: ret 173 174 %dst = call <vscale x 8 x float> @llvm.experimental.vp.reverse.nxv8f32(<vscale x 8 x float> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl) 175 ret <vscale x 8 x float> %dst 176} 177 178define <vscale x 8 x double> @test_vp_reverse_nxv8f64_masked(<vscale x 8 x double> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) { 179; CHECK-LABEL: test_vp_reverse_nxv8f64_masked: 180; CHECK: # %bb.0: 181; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 182; CHECK-NEXT: vid.v v16, v0.t 183; CHECK-NEXT: addi a0, a0, -1 184; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t 185; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t 186; CHECK-NEXT: vmv.v.v v8, v16 187; CHECK-NEXT: ret 188 %dst = call <vscale x 8 x double> @llvm.experimental.vp.reverse.nxv8f64(<vscale x 8 x double> %src, <vscale x 8 x i1> %mask, i32 %evl) 189 ret <vscale x 8 x double> %dst 190} 191 192define <vscale x 8 x double> @test_vp_reverse_nxv8f64(<vscale x 8 x double> %src, i32 zeroext %evl) { 193; CHECK-LABEL: test_vp_reverse_nxv8f64: 194; CHECK: # %bb.0: 195; CHECK-NEXT: addi a1, a0, -1 196; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 197; CHECK-NEXT: vid.v v16 198; CHECK-NEXT: vrsub.vx v24, v16, a1 199; CHECK-NEXT: vrgather.vv v16, v8, v24 200; CHECK-NEXT: vmv.v.v v8, v16 201; CHECK-NEXT: ret 202 203 %dst = call <vscale x 8 x double> @llvm.experimental.vp.reverse.nxv8f64(<vscale x 8 x double> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl) 204 ret <vscale x 8 x double> %dst 205} 206 207define <vscale x 16 x float> @test_vp_reverse_nxv16f32_masked(<vscale x 16 x float> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) { 208; CHECK-LABEL: test_vp_reverse_nxv16f32_masked: 209; CHECK: # %bb.0: 210; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 211; CHECK-NEXT: vid.v v16, v0.t 212; CHECK-NEXT: addi a0, a0, -1 213; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t 214; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t 215; CHECK-NEXT: vmv.v.v v8, v16 216; CHECK-NEXT: ret 217 %dst = call <vscale x 16 x float> @llvm.experimental.vp.reverse.nxv16f32(<vscale x 16 x float> %src, <vscale x 16 x i1> %mask, i32 %evl) 218 ret <vscale x 16 x float> %dst 219} 220 221define <vscale x 16 x float> @test_vp_reverse_nxv16f32(<vscale x 16 x float> %src, i32 zeroext %evl) { 222; CHECK-LABEL: test_vp_reverse_nxv16f32: 223; CHECK: # %bb.0: 224; CHECK-NEXT: addi a1, a0, -1 225; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 226; CHECK-NEXT: vid.v v16 227; CHECK-NEXT: vrsub.vx v24, v16, a1 228; CHECK-NEXT: vrgather.vv v16, v8, v24 229; CHECK-NEXT: vmv.v.v v8, v16 230; CHECK-NEXT: ret 231 232 %dst = call <vscale x 16 x float> @llvm.experimental.vp.reverse.nxv16f32(<vscale x 16 x float> %src, <vscale x 16 x i1> splat (i1 1), i32 %evl) 233 ret <vscale x 16 x float> %dst 234} 235 236; LMUL = 1 237declare <vscale x 1 x double> @llvm.experimental.vp.reverse.nxv1f64(<vscale x 1 x double>,<vscale x 1 x i1>,i32) 238declare <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float>,<vscale x 2 x i1>,i32) 239 240; LMUL = 2 241declare <vscale x 2 x double> @llvm.experimental.vp.reverse.nxv2f64(<vscale x 2 x double>,<vscale x 2 x i1>,i32) 242declare <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float>,<vscale x 4 x i1>,i32) 243 244; LMUL = 4 245declare <vscale x 4 x double> @llvm.experimental.vp.reverse.nxv4f64(<vscale x 4 x double>,<vscale x 4 x i1>,i32) 246declare <vscale x 8 x float> @llvm.experimental.vp.reverse.nxv8f32(<vscale x 8 x float>,<vscale x 8 x i1>,i32) 247 248; LMUL = 8 249declare <vscale x 8 x double> @llvm.experimental.vp.reverse.nxv8f64(<vscale x 8 x double>,<vscale x 8 x i1>,i32) 250declare <vscale x 16 x float> @llvm.experimental.vp.reverse.nxv16f32(<vscale x 16 x float>,<vscale x 16 x i1>,i32) 251