1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s 3 4define <vscale x 1 x i1> @test_vp_reverse_nxv1i1_masked(<vscale x 1 x i1> %src, <vscale x 1 x i1> %mask, i32 zeroext %evl) { 5; CHECK-LABEL: test_vp_reverse_nxv1i1_masked: 6; CHECK: # %bb.0: 7; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 8; CHECK-NEXT: vmv.v.i v9, 0 9; CHECK-NEXT: addi a0, a0, -1 10; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 11; CHECK-NEXT: vmv1r.v v0, v8 12; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 13; CHECK-NEXT: vid.v v10, v0.t 14; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t 15; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 16; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t 17; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t 18; CHECK-NEXT: ret 19 %dst = call <vscale x 1 x i1> @llvm.experimental.vp.reverse.nxv1i1(<vscale x 1 x i1> %src, <vscale x 1 x i1> %mask, i32 %evl) 20 ret <vscale x 1 x i1> %dst 21} 22 23define <vscale x 1 x i1> @test_vp_reverse_nxv1i1(<vscale x 1 x i1> %src, i32 zeroext %evl) { 24; CHECK-LABEL: test_vp_reverse_nxv1i1: 25; CHECK: # %bb.0: 26; CHECK-NEXT: addi a1, a0, -1 27; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 28; CHECK-NEXT: vid.v v8 29; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 30; CHECK-NEXT: vmv.v.i v9, 0 31; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 32; CHECK-NEXT: vrsub.vx v8, v8, a1 33; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 34; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 35; CHECK-NEXT: vrgatherei16.vv v10, v9, v8 36; CHECK-NEXT: vmsne.vi v0, v10, 0 37; CHECK-NEXT: ret 38 39 %dst = call <vscale x 1 x i1> @llvm.experimental.vp.reverse.nxv1i1(<vscale x 1 x i1> %src, <vscale x 1 x i1> splat (i1 1), i32 %evl) 40 ret <vscale x 1 x i1> %dst 41} 42 43define <vscale x 2 x i1> @test_vp_reverse_nxv2i1_masked(<vscale x 2 x i1> %src, <vscale x 2 x i1> %mask, i32 zeroext %evl) { 44; CHECK-LABEL: test_vp_reverse_nxv2i1_masked: 45; CHECK: # %bb.0: 46; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 47; CHECK-NEXT: vmv.v.i v9, 0 48; CHECK-NEXT: addi a0, a0, -1 49; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 50; CHECK-NEXT: vmv1r.v v0, v8 51; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 52; CHECK-NEXT: vid.v v10, v0.t 53; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t 54; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 55; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t 56; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t 57; CHECK-NEXT: ret 58 %dst = call <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1> %src, <vscale x 2 x i1> %mask, i32 %evl) 59 ret <vscale x 2 x i1> %dst 60} 61 62define <vscale x 2 x i1> @test_vp_reverse_nxv2i1(<vscale x 2 x i1> %src, i32 zeroext %evl) { 63; CHECK-LABEL: test_vp_reverse_nxv2i1: 64; CHECK: # %bb.0: 65; CHECK-NEXT: addi a1, a0, -1 66; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 67; CHECK-NEXT: vid.v v8 68; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 69; CHECK-NEXT: vmv.v.i v9, 0 70; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 71; CHECK-NEXT: vrsub.vx v8, v8, a1 72; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 73; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 74; CHECK-NEXT: vrgatherei16.vv v10, v9, v8 75; CHECK-NEXT: vmsne.vi v0, v10, 0 76; CHECK-NEXT: ret 77 78 %dst = call <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1> %src, <vscale x 2 x i1> splat (i1 1), i32 %evl) 79 ret <vscale x 2 x i1> %dst 80} 81 82define <vscale x 4 x i1> @test_vp_reverse_nxv4i1_masked(<vscale x 4 x i1> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) { 83; CHECK-LABEL: test_vp_reverse_nxv4i1_masked: 84; CHECK: # %bb.0: 85; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 86; CHECK-NEXT: vmv.v.i v9, 0 87; CHECK-NEXT: addi a0, a0, -1 88; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 89; CHECK-NEXT: vmv1r.v v0, v8 90; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 91; CHECK-NEXT: vid.v v10, v0.t 92; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t 93; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 94; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t 95; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t 96; CHECK-NEXT: ret 97 %dst = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> %src, <vscale x 4 x i1> %mask, i32 %evl) 98 ret <vscale x 4 x i1> %dst 99} 100 101define <vscale x 4 x i1> @test_vp_reverse_nxv4i1(<vscale x 4 x i1> %src, i32 zeroext %evl) { 102; CHECK-LABEL: test_vp_reverse_nxv4i1: 103; CHECK: # %bb.0: 104; CHECK-NEXT: addi a1, a0, -1 105; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 106; CHECK-NEXT: vid.v v8 107; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 108; CHECK-NEXT: vmv.v.i v9, 0 109; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 110; CHECK-NEXT: vrsub.vx v8, v8, a1 111; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 112; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 113; CHECK-NEXT: vrgatherei16.vv v10, v9, v8 114; CHECK-NEXT: vmsne.vi v0, v10, 0 115; CHECK-NEXT: ret 116 117 %dst = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> %src, <vscale x 4 x i1> splat (i1 1), i32 %evl) 118 ret <vscale x 4 x i1> %dst 119} 120 121define <vscale x 8 x i1> @test_vp_reverse_nxv8i1_masked(<vscale x 8 x i1> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) { 122; CHECK-LABEL: test_vp_reverse_nxv8i1_masked: 123; CHECK: # %bb.0: 124; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 125; CHECK-NEXT: vmv.v.i v9, 0 126; CHECK-NEXT: addi a0, a0, -1 127; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 128; CHECK-NEXT: vmv1r.v v0, v8 129; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 130; CHECK-NEXT: vid.v v10, v0.t 131; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t 132; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 133; CHECK-NEXT: vrgatherei16.vv v12, v9, v10, v0.t 134; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t 135; CHECK-NEXT: ret 136 %dst = call <vscale x 8 x i1> @llvm.experimental.vp.reverse.nxv8i1(<vscale x 8 x i1> %src, <vscale x 8 x i1> %mask, i32 %evl) 137 ret <vscale x 8 x i1> %dst 138} 139 140define <vscale x 8 x i1> @test_vp_reverse_nxv8i1(<vscale x 8 x i1> %src, i32 zeroext %evl) { 141; CHECK-LABEL: test_vp_reverse_nxv8i1: 142; CHECK: # %bb.0: 143; CHECK-NEXT: addi a1, a0, -1 144; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 145; CHECK-NEXT: vid.v v8 146; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 147; CHECK-NEXT: vmv.v.i v10, 0 148; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 149; CHECK-NEXT: vrsub.vx v8, v8, a1 150; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 151; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 152; CHECK-NEXT: vrgatherei16.vv v11, v10, v8 153; CHECK-NEXT: vmsne.vi v0, v11, 0 154; CHECK-NEXT: ret 155 156 %dst = call <vscale x 8 x i1> @llvm.experimental.vp.reverse.nxv8i1(<vscale x 8 x i1> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl) 157 ret <vscale x 8 x i1> %dst 158} 159 160define <vscale x 16 x i1> @test_vp_reverse_nxv16i1_masked(<vscale x 16 x i1> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) { 161; CHECK-LABEL: test_vp_reverse_nxv16i1_masked: 162; CHECK: # %bb.0: 163; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 164; CHECK-NEXT: vmv.v.i v10, 0 165; CHECK-NEXT: addi a0, a0, -1 166; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 167; CHECK-NEXT: vmv1r.v v0, v8 168; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 169; CHECK-NEXT: vid.v v12, v0.t 170; CHECK-NEXT: vrsub.vx v12, v12, a0, v0.t 171; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma 172; CHECK-NEXT: vrgatherei16.vv v16, v10, v12, v0.t 173; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t 174; CHECK-NEXT: vmv1r.v v0, v8 175; CHECK-NEXT: ret 176 %dst = call <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1> %src, <vscale x 16 x i1> %mask, i32 %evl) 177 ret <vscale x 16 x i1> %dst 178} 179 180define <vscale x 16 x i1> @test_vp_reverse_nxv16i1(<vscale x 16 x i1> %src, i32 zeroext %evl) { 181; CHECK-LABEL: test_vp_reverse_nxv16i1: 182; CHECK: # %bb.0: 183; CHECK-NEXT: addi a1, a0, -1 184; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 185; CHECK-NEXT: vid.v v8 186; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma 187; CHECK-NEXT: vmv.v.i v12, 0 188; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 189; CHECK-NEXT: vrsub.vx v8, v8, a1 190; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma 191; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 192; CHECK-NEXT: vrgatherei16.vv v14, v12, v8 193; CHECK-NEXT: vmsne.vi v0, v14, 0 194; CHECK-NEXT: ret 195 196 %dst = call <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1> %src, <vscale x 16 x i1> splat (i1 1), i32 %evl) 197 ret <vscale x 16 x i1> %dst 198} 199 200define <vscale x 32 x i1> @test_vp_reverse_nxv32i1_masked(<vscale x 32 x i1> %src, <vscale x 32 x i1> %mask, i32 zeroext %evl) { 201; CHECK-LABEL: test_vp_reverse_nxv32i1_masked: 202; CHECK: # %bb.0: 203; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma 204; CHECK-NEXT: vmv.v.i v12, 0 205; CHECK-NEXT: addi a0, a0, -1 206; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 207; CHECK-NEXT: vmv1r.v v0, v8 208; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma 209; CHECK-NEXT: vid.v v16, v0.t 210; CHECK-NEXT: vrsub.vx v16, v16, a0, v0.t 211; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma 212; CHECK-NEXT: vrgatherei16.vv v24, v12, v16, v0.t 213; CHECK-NEXT: vmsne.vi v8, v24, 0, v0.t 214; CHECK-NEXT: vmv1r.v v0, v8 215; CHECK-NEXT: ret 216 %dst = call <vscale x 32 x i1> @llvm.experimental.vp.reverse.nxv32i1(<vscale x 32 x i1> %src, <vscale x 32 x i1> %mask, i32 %evl) 217 ret <vscale x 32 x i1> %dst 218} 219 220define <vscale x 32 x i1> @test_vp_reverse_nxv32i1(<vscale x 32 x i1> %src, i32 zeroext %evl) { 221; CHECK-LABEL: test_vp_reverse_nxv32i1: 222; CHECK: # %bb.0: 223; CHECK-NEXT: addi a1, a0, -1 224; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 225; CHECK-NEXT: vid.v v8 226; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma 227; CHECK-NEXT: vmv.v.i v16, 0 228; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma 229; CHECK-NEXT: vrsub.vx v8, v8, a1 230; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma 231; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 232; CHECK-NEXT: vrgatherei16.vv v20, v16, v8 233; CHECK-NEXT: vmsne.vi v0, v20, 0 234; CHECK-NEXT: ret 235 236 %dst = call <vscale x 32 x i1> @llvm.experimental.vp.reverse.nxv32i1(<vscale x 32 x i1> %src, <vscale x 32 x i1> splat (i1 1), i32 %evl) 237 ret <vscale x 32 x i1> %dst 238} 239 240define <vscale x 64 x i1> @test_vp_reverse_nxv64i1_masked(<vscale x 64 x i1> %src, <vscale x 64 x i1> %mask, i32 zeroext %evl) { 241; CHECK-LABEL: test_vp_reverse_nxv64i1_masked: 242; CHECK: # %bb.0: 243; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 244; CHECK-NEXT: vmv.v.i v16, 0 245; CHECK-NEXT: csrr a1, vlenb 246; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma 247; CHECK-NEXT: vid.v v10 248; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 249; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 250; CHECK-NEXT: addi a2, a1, -1 251; CHECK-NEXT: slli a1, a1, 3 252; CHECK-NEXT: vsetvli a3, zero, e16, m2, ta, ma 253; CHECK-NEXT: vrsub.vx v10, v10, a2 254; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 255; CHECK-NEXT: vrgatherei16.vv v31, v16, v10 256; CHECK-NEXT: vrgatherei16.vv v30, v17, v10 257; CHECK-NEXT: vrgatherei16.vv v29, v18, v10 258; CHECK-NEXT: vrgatherei16.vv v28, v19, v10 259; CHECK-NEXT: vrgatherei16.vv v27, v20, v10 260; CHECK-NEXT: vrgatherei16.vv v26, v21, v10 261; CHECK-NEXT: vrgatherei16.vv v25, v22, v10 262; CHECK-NEXT: vrgatherei16.vv v24, v23, v10 263; CHECK-NEXT: sub a1, a1, a0 264; CHECK-NEXT: vmv1r.v v0, v8 265; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 266; CHECK-NEXT: vslidedown.vx v16, v24, a1, v0.t 267; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t 268; CHECK-NEXT: vmv1r.v v0, v8 269; CHECK-NEXT: ret 270 %dst = call <vscale x 64 x i1> @llvm.experimental.vp.reverse.nxv64i1(<vscale x 64 x i1> %src, <vscale x 64 x i1> %mask, i32 %evl) 271 ret <vscale x 64 x i1> %dst 272} 273 274define <vscale x 64 x i1> @test_vp_reverse_nxv64i1(<vscale x 64 x i1> %src, i32 zeroext %evl) { 275; CHECK-LABEL: test_vp_reverse_nxv64i1: 276; CHECK: # %bb.0: 277; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 278; CHECK-NEXT: vmv.v.i v8, 0 279; CHECK-NEXT: csrr a1, vlenb 280; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma 281; CHECK-NEXT: vid.v v16 282; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 283; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 284; CHECK-NEXT: addi a2, a1, -1 285; CHECK-NEXT: slli a1, a1, 3 286; CHECK-NEXT: vsetvli a3, zero, e16, m2, ta, ma 287; CHECK-NEXT: vrsub.vx v24, v16, a2 288; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 289; CHECK-NEXT: vrgatherei16.vv v23, v8, v24 290; CHECK-NEXT: vrgatherei16.vv v22, v9, v24 291; CHECK-NEXT: vrgatherei16.vv v21, v10, v24 292; CHECK-NEXT: vrgatherei16.vv v20, v11, v24 293; CHECK-NEXT: vrgatherei16.vv v19, v12, v24 294; CHECK-NEXT: vrgatherei16.vv v18, v13, v24 295; CHECK-NEXT: vrgatherei16.vv v17, v14, v24 296; CHECK-NEXT: vrgatherei16.vv v16, v15, v24 297; CHECK-NEXT: sub a1, a1, a0 298; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 299; CHECK-NEXT: vslidedown.vx v8, v16, a1 300; CHECK-NEXT: vmsne.vi v0, v8, 0 301; CHECK-NEXT: ret 302 303 %dst = call <vscale x 64 x i1> @llvm.experimental.vp.reverse.nxv64i1(<vscale x 64 x i1> %src, <vscale x 64 x i1> splat (i1 1), i32 %evl) 304 ret <vscale x 64 x i1> %dst 305} 306 307declare <vscale x 1 x i1> @llvm.experimental.vp.reverse.nxv1i1(<vscale x 1 x i1>,<vscale x 1 x i1>,i32) 308declare <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1>,<vscale x 2 x i1>,i32) 309declare <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1>,<vscale x 4 x i1>,i32) 310declare <vscale x 8 x i1> @llvm.experimental.vp.reverse.nxv8i1(<vscale x 8 x i1>,<vscale x 8 x i1>,i32) 311declare <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1>,<vscale x 16 x i1>,i32) 312declare <vscale x 32 x i1> @llvm.experimental.vp.reverse.nxv32i1(<vscale x 32 x i1>,<vscale x 32 x i1>,i32) 313declare <vscale x 64 x i1> @llvm.experimental.vp.reverse.nxv64i1(<vscale x 64 x i1>,<vscale x 64 x i1>,i32) 314