1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 6; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 8; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 10 11declare <vscale x 1 x i8> @llvm.vp.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x i1>, i32) 12 13define <vscale x 1 x i8> @vpgather_nxv1i8(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 14; RV32-LABEL: vpgather_nxv1i8: 15; RV32: # %bb.0: 16; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 17; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 18; RV32-NEXT: vmv1r.v v8, v9 19; RV32-NEXT: ret 20; 21; RV64-LABEL: vpgather_nxv1i8: 22; RV64: # %bb.0: 23; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 24; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 25; RV64-NEXT: vmv1r.v v8, v9 26; RV64-NEXT: ret 27 %v = call <vscale x 1 x i8> @llvm.vp.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 28 ret <vscale x 1 x i8> %v 29} 30 31declare <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x i1>, i32) 32 33define <vscale x 2 x i8> @vpgather_nxv2i8(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 34; RV32-LABEL: vpgather_nxv2i8: 35; RV32: # %bb.0: 36; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 37; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 38; RV32-NEXT: vmv1r.v v8, v9 39; RV32-NEXT: ret 40; 41; RV64-LABEL: vpgather_nxv2i8: 42; RV64: # %bb.0: 43; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 44; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 45; RV64-NEXT: vmv1r.v v8, v10 46; RV64-NEXT: ret 47 %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 48 ret <vscale x 2 x i8> %v 49} 50 51define <vscale x 2 x i16> @vpgather_nxv2i8_sextload_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 52; RV32-LABEL: vpgather_nxv2i8_sextload_nxv2i16: 53; RV32: # %bb.0: 54; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 55; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 56; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 57; RV32-NEXT: vsext.vf2 v8, v9 58; RV32-NEXT: ret 59; 60; RV64-LABEL: vpgather_nxv2i8_sextload_nxv2i16: 61; RV64: # %bb.0: 62; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 63; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 64; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 65; RV64-NEXT: vsext.vf2 v8, v10 66; RV64-NEXT: ret 67 %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 68 %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i16> 69 ret <vscale x 2 x i16> %ev 70} 71 72define <vscale x 2 x i16> @vpgather_nxv2i8_zextload_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 73; RV32-LABEL: vpgather_nxv2i8_zextload_nxv2i16: 74; RV32: # %bb.0: 75; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 76; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 77; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 78; RV32-NEXT: vzext.vf2 v8, v9 79; RV32-NEXT: ret 80; 81; RV64-LABEL: vpgather_nxv2i8_zextload_nxv2i16: 82; RV64: # %bb.0: 83; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 84; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 85; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 86; RV64-NEXT: vzext.vf2 v8, v10 87; RV64-NEXT: ret 88 %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 89 %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i16> 90 ret <vscale x 2 x i16> %ev 91} 92 93define <vscale x 2 x i32> @vpgather_nxv2i8_sextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 94; RV32-LABEL: vpgather_nxv2i8_sextload_nxv2i32: 95; RV32: # %bb.0: 96; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 97; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 98; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma 99; RV32-NEXT: vsext.vf4 v8, v9 100; RV32-NEXT: ret 101; 102; RV64-LABEL: vpgather_nxv2i8_sextload_nxv2i32: 103; RV64: # %bb.0: 104; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 105; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 106; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma 107; RV64-NEXT: vsext.vf4 v8, v10 108; RV64-NEXT: ret 109 %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 110 %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i32> 111 ret <vscale x 2 x i32> %ev 112} 113 114define <vscale x 2 x i32> @vpgather_nxv2i8_zextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 115; RV32-LABEL: vpgather_nxv2i8_zextload_nxv2i32: 116; RV32: # %bb.0: 117; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 118; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 119; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma 120; RV32-NEXT: vzext.vf4 v8, v9 121; RV32-NEXT: ret 122; 123; RV64-LABEL: vpgather_nxv2i8_zextload_nxv2i32: 124; RV64: # %bb.0: 125; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 126; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 127; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma 128; RV64-NEXT: vzext.vf4 v8, v10 129; RV64-NEXT: ret 130 %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 131 %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i32> 132 ret <vscale x 2 x i32> %ev 133} 134 135define <vscale x 2 x i64> @vpgather_nxv2i8_sextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 136; RV32-LABEL: vpgather_nxv2i8_sextload_nxv2i64: 137; RV32: # %bb.0: 138; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 139; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 140; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 141; RV32-NEXT: vsext.vf8 v8, v10 142; RV32-NEXT: ret 143; 144; RV64-LABEL: vpgather_nxv2i8_sextload_nxv2i64: 145; RV64: # %bb.0: 146; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 147; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 148; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma 149; RV64-NEXT: vsext.vf8 v8, v10 150; RV64-NEXT: ret 151 %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 152 %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i64> 153 ret <vscale x 2 x i64> %ev 154} 155 156define <vscale x 2 x i64> @vpgather_nxv2i8_zextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 157; RV32-LABEL: vpgather_nxv2i8_zextload_nxv2i64: 158; RV32: # %bb.0: 159; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 160; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 161; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 162; RV32-NEXT: vzext.vf8 v8, v10 163; RV32-NEXT: ret 164; 165; RV64-LABEL: vpgather_nxv2i8_zextload_nxv2i64: 166; RV64: # %bb.0: 167; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 168; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 169; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma 170; RV64-NEXT: vzext.vf8 v8, v10 171; RV64-NEXT: ret 172 %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 173 %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i64> 174 ret <vscale x 2 x i64> %ev 175} 176 177declare <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32) 178 179define <vscale x 4 x i8> @vpgather_nxv4i8(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 180; RV32-LABEL: vpgather_nxv4i8: 181; RV32: # %bb.0: 182; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 183; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 184; RV32-NEXT: vmv1r.v v8, v10 185; RV32-NEXT: ret 186; 187; RV64-LABEL: vpgather_nxv4i8: 188; RV64: # %bb.0: 189; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 190; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 191; RV64-NEXT: vmv1r.v v8, v12 192; RV64-NEXT: ret 193 %v = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 194 ret <vscale x 4 x i8> %v 195} 196 197define <vscale x 4 x i8> @vpgather_truemask_nxv4i8(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) { 198; RV32-LABEL: vpgather_truemask_nxv4i8: 199; RV32: # %bb.0: 200; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 201; RV32-NEXT: vluxei32.v v10, (zero), v8 202; RV32-NEXT: vmv1r.v v8, v10 203; RV32-NEXT: ret 204; 205; RV64-LABEL: vpgather_truemask_nxv4i8: 206; RV64: # %bb.0: 207; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 208; RV64-NEXT: vluxei64.v v12, (zero), v8 209; RV64-NEXT: vmv1r.v v8, v12 210; RV64-NEXT: ret 211 %v = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl) 212 ret <vscale x 4 x i8> %v 213} 214 215declare <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x i1>, i32) 216 217define <vscale x 8 x i8> @vpgather_nxv8i8(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 218; RV32-LABEL: vpgather_nxv8i8: 219; RV32: # %bb.0: 220; RV32-NEXT: vsetvli zero, a0, e8, m1, ta, ma 221; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 222; RV32-NEXT: vmv.v.v v8, v12 223; RV32-NEXT: ret 224; 225; RV64-LABEL: vpgather_nxv8i8: 226; RV64: # %bb.0: 227; RV64-NEXT: vsetvli zero, a0, e8, m1, ta, ma 228; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 229; RV64-NEXT: vmv.v.v v8, v16 230; RV64-NEXT: ret 231 %v = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 232 ret <vscale x 8 x i8> %v 233} 234 235define <vscale x 8 x i8> @vpgather_baseidx_nxv8i8(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 236; RV32-LABEL: vpgather_baseidx_nxv8i8: 237; RV32: # %bb.0: 238; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 239; RV32-NEXT: vsext.vf4 v12, v8 240; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma 241; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 242; RV32-NEXT: ret 243; 244; RV64-LABEL: vpgather_baseidx_nxv8i8: 245; RV64: # %bb.0: 246; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 247; RV64-NEXT: vsext.vf8 v16, v8 248; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma 249; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 250; RV64-NEXT: ret 251 %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs 252 %v = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 253 ret <vscale x 8 x i8> %v 254} 255 256declare <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr>, <vscale x 32 x i1>, i32) 257 258define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(ptr %base, <vscale x 32 x i8> %idxs, <vscale x 32 x i1> %m, i32 zeroext %evl) { 259; RV32-LABEL: vpgather_baseidx_nxv32i8: 260; RV32: # %bb.0: 261; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma 262; RV32-NEXT: vmv1r.v v12, v0 263; RV32-NEXT: csrr a3, vlenb 264; RV32-NEXT: slli a2, a3, 1 265; RV32-NEXT: srli a3, a3, 2 266; RV32-NEXT: sub a4, a1, a2 267; RV32-NEXT: sltu a5, a1, a4 268; RV32-NEXT: addi a5, a5, -1 269; RV32-NEXT: and a4, a5, a4 270; RV32-NEXT: vslidedown.vx v0, v0, a3 271; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma 272; RV32-NEXT: vsext.vf4 v16, v10 273; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, ma 274; RV32-NEXT: vluxei32.v v10, (a0), v16, v0.t 275; RV32-NEXT: bltu a1, a2, .LBB12_2 276; RV32-NEXT: # %bb.1: 277; RV32-NEXT: mv a1, a2 278; RV32-NEXT: .LBB12_2: 279; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma 280; RV32-NEXT: vsext.vf4 v16, v8 281; RV32-NEXT: vmv1r.v v0, v12 282; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, ma 283; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 284; RV32-NEXT: ret 285; 286; RV64-LABEL: vpgather_baseidx_nxv32i8: 287; RV64: # %bb.0: 288; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma 289; RV64-NEXT: vmv1r.v v12, v0 290; RV64-NEXT: csrr a2, vlenb 291; RV64-NEXT: slli a3, a2, 1 292; RV64-NEXT: srli a4, a2, 2 293; RV64-NEXT: sub a5, a1, a3 294; RV64-NEXT: vslidedown.vx v13, v0, a4 295; RV64-NEXT: sltu a4, a1, a5 296; RV64-NEXT: addi a4, a4, -1 297; RV64-NEXT: and a5, a4, a5 298; RV64-NEXT: sub a4, a5, a2 299; RV64-NEXT: sltu a6, a5, a4 300; RV64-NEXT: addi a6, a6, -1 301; RV64-NEXT: and a6, a6, a4 302; RV64-NEXT: srli a4, a2, 3 303; RV64-NEXT: vsetvli a7, zero, e8, mf4, ta, ma 304; RV64-NEXT: vslidedown.vx v0, v13, a4 305; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 306; RV64-NEXT: vsext.vf8 v16, v11 307; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma 308; RV64-NEXT: vluxei64.v v11, (a0), v16, v0.t 309; RV64-NEXT: bltu a5, a2, .LBB12_2 310; RV64-NEXT: # %bb.1: 311; RV64-NEXT: mv a5, a2 312; RV64-NEXT: .LBB12_2: 313; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma 314; RV64-NEXT: vsext.vf8 v16, v10 315; RV64-NEXT: vmv1r.v v0, v13 316; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma 317; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t 318; RV64-NEXT: bltu a1, a3, .LBB12_4 319; RV64-NEXT: # %bb.3: 320; RV64-NEXT: mv a1, a3 321; RV64-NEXT: .LBB12_4: 322; RV64-NEXT: sub a3, a1, a2 323; RV64-NEXT: sltu a5, a1, a3 324; RV64-NEXT: addi a5, a5, -1 325; RV64-NEXT: and a3, a5, a3 326; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma 327; RV64-NEXT: vslidedown.vx v0, v12, a4 328; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma 329; RV64-NEXT: vsext.vf8 v16, v9 330; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma 331; RV64-NEXT: vluxei64.v v9, (a0), v16, v0.t 332; RV64-NEXT: bltu a1, a2, .LBB12_6 333; RV64-NEXT: # %bb.5: 334; RV64-NEXT: mv a1, a2 335; RV64-NEXT: .LBB12_6: 336; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 337; RV64-NEXT: vsext.vf8 v16, v8 338; RV64-NEXT: vmv1r.v v0, v12 339; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma 340; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 341; RV64-NEXT: ret 342 %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 32 x i8> %idxs 343 %v = call <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> %m, i32 %evl) 344 ret <vscale x 32 x i8> %v 345} 346 347declare <vscale x 1 x i16> @llvm.vp.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x i1>, i32) 348 349define <vscale x 1 x i16> @vpgather_nxv1i16(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 350; RV32-LABEL: vpgather_nxv1i16: 351; RV32: # %bb.0: 352; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 353; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 354; RV32-NEXT: vmv1r.v v8, v9 355; RV32-NEXT: ret 356; 357; RV64-LABEL: vpgather_nxv1i16: 358; RV64: # %bb.0: 359; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 360; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 361; RV64-NEXT: vmv1r.v v8, v9 362; RV64-NEXT: ret 363 %v = call <vscale x 1 x i16> @llvm.vp.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 364 ret <vscale x 1 x i16> %v 365} 366 367declare <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x i1>, i32) 368 369define <vscale x 2 x i16> @vpgather_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 370; RV32-LABEL: vpgather_nxv2i16: 371; RV32: # %bb.0: 372; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 373; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 374; RV32-NEXT: vmv1r.v v8, v9 375; RV32-NEXT: ret 376; 377; RV64-LABEL: vpgather_nxv2i16: 378; RV64: # %bb.0: 379; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 380; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 381; RV64-NEXT: vmv1r.v v8, v10 382; RV64-NEXT: ret 383 %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 384 ret <vscale x 2 x i16> %v 385} 386 387define <vscale x 2 x i32> @vpgather_nxv2i16_sextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 388; RV32-LABEL: vpgather_nxv2i16_sextload_nxv2i32: 389; RV32: # %bb.0: 390; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 391; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 392; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma 393; RV32-NEXT: vsext.vf2 v8, v9 394; RV32-NEXT: ret 395; 396; RV64-LABEL: vpgather_nxv2i16_sextload_nxv2i32: 397; RV64: # %bb.0: 398; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 399; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 400; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma 401; RV64-NEXT: vsext.vf2 v8, v10 402; RV64-NEXT: ret 403 %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 404 %ev = sext <vscale x 2 x i16> %v to <vscale x 2 x i32> 405 ret <vscale x 2 x i32> %ev 406} 407 408define <vscale x 2 x i32> @vpgather_nxv2i16_zextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 409; RV32-LABEL: vpgather_nxv2i16_zextload_nxv2i32: 410; RV32: # %bb.0: 411; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 412; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 413; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma 414; RV32-NEXT: vzext.vf2 v8, v9 415; RV32-NEXT: ret 416; 417; RV64-LABEL: vpgather_nxv2i16_zextload_nxv2i32: 418; RV64: # %bb.0: 419; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 420; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 421; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma 422; RV64-NEXT: vzext.vf2 v8, v10 423; RV64-NEXT: ret 424 %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 425 %ev = zext <vscale x 2 x i16> %v to <vscale x 2 x i32> 426 ret <vscale x 2 x i32> %ev 427} 428 429define <vscale x 2 x i64> @vpgather_nxv2i16_sextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 430; RV32-LABEL: vpgather_nxv2i16_sextload_nxv2i64: 431; RV32: # %bb.0: 432; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 433; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 434; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 435; RV32-NEXT: vsext.vf4 v8, v10 436; RV32-NEXT: ret 437; 438; RV64-LABEL: vpgather_nxv2i16_sextload_nxv2i64: 439; RV64: # %bb.0: 440; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 441; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 442; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma 443; RV64-NEXT: vsext.vf4 v8, v10 444; RV64-NEXT: ret 445 %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 446 %ev = sext <vscale x 2 x i16> %v to <vscale x 2 x i64> 447 ret <vscale x 2 x i64> %ev 448} 449 450define <vscale x 2 x i64> @vpgather_nxv2i16_zextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 451; RV32-LABEL: vpgather_nxv2i16_zextload_nxv2i64: 452; RV32: # %bb.0: 453; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 454; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 455; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 456; RV32-NEXT: vzext.vf4 v8, v10 457; RV32-NEXT: ret 458; 459; RV64-LABEL: vpgather_nxv2i16_zextload_nxv2i64: 460; RV64: # %bb.0: 461; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 462; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 463; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma 464; RV64-NEXT: vzext.vf4 v8, v10 465; RV64-NEXT: ret 466 %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 467 %ev = zext <vscale x 2 x i16> %v to <vscale x 2 x i64> 468 ret <vscale x 2 x i64> %ev 469} 470 471declare <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32) 472 473define <vscale x 4 x i16> @vpgather_nxv4i16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 474; RV32-LABEL: vpgather_nxv4i16: 475; RV32: # %bb.0: 476; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 477; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 478; RV32-NEXT: vmv.v.v v8, v10 479; RV32-NEXT: ret 480; 481; RV64-LABEL: vpgather_nxv4i16: 482; RV64: # %bb.0: 483; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 484; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 485; RV64-NEXT: vmv.v.v v8, v12 486; RV64-NEXT: ret 487 %v = call <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 488 ret <vscale x 4 x i16> %v 489} 490 491define <vscale x 4 x i16> @vpgather_truemask_nxv4i16(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) { 492; RV32-LABEL: vpgather_truemask_nxv4i16: 493; RV32: # %bb.0: 494; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 495; RV32-NEXT: vluxei32.v v10, (zero), v8 496; RV32-NEXT: vmv.v.v v8, v10 497; RV32-NEXT: ret 498; 499; RV64-LABEL: vpgather_truemask_nxv4i16: 500; RV64: # %bb.0: 501; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 502; RV64-NEXT: vluxei64.v v12, (zero), v8 503; RV64-NEXT: vmv.v.v v8, v12 504; RV64-NEXT: ret 505 %v = call <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl) 506 ret <vscale x 4 x i16> %v 507} 508 509declare <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x i1>, i32) 510 511define <vscale x 8 x i16> @vpgather_nxv8i16(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 512; RV32-LABEL: vpgather_nxv8i16: 513; RV32: # %bb.0: 514; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma 515; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 516; RV32-NEXT: vmv.v.v v8, v12 517; RV32-NEXT: ret 518; 519; RV64-LABEL: vpgather_nxv8i16: 520; RV64: # %bb.0: 521; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma 522; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 523; RV64-NEXT: vmv.v.v v8, v16 524; RV64-NEXT: ret 525 %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 526 ret <vscale x 8 x i16> %v 527} 528 529define <vscale x 8 x i16> @vpgather_baseidx_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 530; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i16: 531; RV32: # %bb.0: 532; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 533; RV32-NEXT: vsext.vf4 v12, v8 534; RV32-NEXT: vadd.vv v12, v12, v12 535; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 536; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 537; RV32-NEXT: ret 538; 539; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i16: 540; RV64: # %bb.0: 541; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 542; RV64-NEXT: vsext.vf8 v16, v8 543; RV64-NEXT: vadd.vv v16, v16, v16 544; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 545; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 546; RV64-NEXT: ret 547 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs 548 %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 549 ret <vscale x 8 x i16> %v 550} 551 552define <vscale x 8 x i16> @vpgather_baseidx_sext_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 553; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i16: 554; RV32: # %bb.0: 555; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 556; RV32-NEXT: vsext.vf4 v12, v8 557; RV32-NEXT: vadd.vv v12, v12, v12 558; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 559; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 560; RV32-NEXT: ret 561; 562; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i16: 563; RV64: # %bb.0: 564; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 565; RV64-NEXT: vsext.vf8 v16, v8 566; RV64-NEXT: vadd.vv v16, v16, v16 567; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 568; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 569; RV64-NEXT: ret 570 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 571 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs 572 %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 573 ret <vscale x 8 x i16> %v 574} 575 576define <vscale x 8 x i16> @vpgather_baseidx_zext_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 577; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16: 578; RV32: # %bb.0: 579; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma 580; RV32-NEXT: vwaddu.vv v10, v8, v8 581; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 582; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t 583; RV32-NEXT: ret 584; 585; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16: 586; RV64: # %bb.0: 587; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma 588; RV64-NEXT: vwaddu.vv v10, v8, v8 589; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 590; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t 591; RV64-NEXT: ret 592 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 593 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs 594 %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 595 ret <vscale x 8 x i16> %v 596} 597 598define <vscale x 8 x i16> @vpgather_baseidx_nxv8i16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 599; RV32-LABEL: vpgather_baseidx_nxv8i16: 600; RV32: # %bb.0: 601; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 602; RV32-NEXT: vwadd.vv v12, v8, v8 603; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 604; RV32-NEXT: ret 605; 606; RV64-LABEL: vpgather_baseidx_nxv8i16: 607; RV64: # %bb.0: 608; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 609; RV64-NEXT: vsext.vf4 v16, v8 610; RV64-NEXT: vadd.vv v16, v16, v16 611; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 612; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 613; RV64-NEXT: ret 614 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs 615 %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 616 ret <vscale x 8 x i16> %v 617} 618 619declare <vscale x 1 x i32> @llvm.vp.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x i1>, i32) 620 621define <vscale x 1 x i32> @vpgather_nxv1i32(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 622; RV32-LABEL: vpgather_nxv1i32: 623; RV32: # %bb.0: 624; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 625; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 626; RV32-NEXT: ret 627; 628; RV64-LABEL: vpgather_nxv1i32: 629; RV64: # %bb.0: 630; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 631; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 632; RV64-NEXT: vmv1r.v v8, v9 633; RV64-NEXT: ret 634 %v = call <vscale x 1 x i32> @llvm.vp.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 635 ret <vscale x 1 x i32> %v 636} 637 638declare <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x i1>, i32) 639 640define <vscale x 2 x i32> @vpgather_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 641; RV32-LABEL: vpgather_nxv2i32: 642; RV32: # %bb.0: 643; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma 644; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 645; RV32-NEXT: ret 646; 647; RV64-LABEL: vpgather_nxv2i32: 648; RV64: # %bb.0: 649; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma 650; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 651; RV64-NEXT: vmv.v.v v8, v10 652; RV64-NEXT: ret 653 %v = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 654 ret <vscale x 2 x i32> %v 655} 656 657define <vscale x 2 x i64> @vpgather_nxv2i32_sextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 658; RV32-LABEL: vpgather_nxv2i32_sextload_nxv2i64: 659; RV32: # %bb.0: 660; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma 661; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 662; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 663; RV32-NEXT: vsext.vf2 v8, v10 664; RV32-NEXT: ret 665; 666; RV64-LABEL: vpgather_nxv2i32_sextload_nxv2i64: 667; RV64: # %bb.0: 668; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma 669; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 670; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma 671; RV64-NEXT: vsext.vf2 v8, v10 672; RV64-NEXT: ret 673 %v = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 674 %ev = sext <vscale x 2 x i32> %v to <vscale x 2 x i64> 675 ret <vscale x 2 x i64> %ev 676} 677 678define <vscale x 2 x i64> @vpgather_nxv2i32_zextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 679; RV32-LABEL: vpgather_nxv2i32_zextload_nxv2i64: 680; RV32: # %bb.0: 681; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma 682; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 683; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 684; RV32-NEXT: vzext.vf2 v8, v10 685; RV32-NEXT: ret 686; 687; RV64-LABEL: vpgather_nxv2i32_zextload_nxv2i64: 688; RV64: # %bb.0: 689; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma 690; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 691; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma 692; RV64-NEXT: vzext.vf2 v8, v10 693; RV64-NEXT: ret 694 %v = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 695 %ev = zext <vscale x 2 x i32> %v to <vscale x 2 x i64> 696 ret <vscale x 2 x i64> %ev 697} 698 699declare <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32) 700 701define <vscale x 4 x i32> @vpgather_nxv4i32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 702; RV32-LABEL: vpgather_nxv4i32: 703; RV32: # %bb.0: 704; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma 705; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 706; RV32-NEXT: ret 707; 708; RV64-LABEL: vpgather_nxv4i32: 709; RV64: # %bb.0: 710; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma 711; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 712; RV64-NEXT: vmv.v.v v8, v12 713; RV64-NEXT: ret 714 %v = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 715 ret <vscale x 4 x i32> %v 716} 717 718define <vscale x 4 x i32> @vpgather_truemask_nxv4i32(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) { 719; RV32-LABEL: vpgather_truemask_nxv4i32: 720; RV32: # %bb.0: 721; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma 722; RV32-NEXT: vluxei32.v v8, (zero), v8 723; RV32-NEXT: ret 724; 725; RV64-LABEL: vpgather_truemask_nxv4i32: 726; RV64: # %bb.0: 727; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma 728; RV64-NEXT: vluxei64.v v12, (zero), v8 729; RV64-NEXT: vmv.v.v v8, v12 730; RV64-NEXT: ret 731 %v = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl) 732 ret <vscale x 4 x i32> %v 733} 734 735declare <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x i1>, i32) 736 737define <vscale x 8 x i32> @vpgather_nxv8i32(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 738; RV32-LABEL: vpgather_nxv8i32: 739; RV32: # %bb.0: 740; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma 741; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 742; RV32-NEXT: ret 743; 744; RV64-LABEL: vpgather_nxv8i32: 745; RV64: # %bb.0: 746; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma 747; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 748; RV64-NEXT: vmv.v.v v8, v16 749; RV64-NEXT: ret 750 %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 751 ret <vscale x 8 x i32> %v 752} 753 754define <vscale x 8 x i32> @vpgather_baseidx_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 755; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i32: 756; RV32: # %bb.0: 757; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 758; RV32-NEXT: vsext.vf4 v12, v8 759; RV32-NEXT: vsll.vi v8, v12, 2 760; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 761; RV32-NEXT: ret 762; 763; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i32: 764; RV64: # %bb.0: 765; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 766; RV64-NEXT: vsext.vf8 v16, v8 767; RV64-NEXT: vsll.vi v16, v16, 2 768; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 769; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 770; RV64-NEXT: ret 771 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs 772 %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 773 ret <vscale x 8 x i32> %v 774} 775 776define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 777; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i32: 778; RV32: # %bb.0: 779; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 780; RV32-NEXT: vsext.vf4 v12, v8 781; RV32-NEXT: vsll.vi v8, v12, 2 782; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 783; RV32-NEXT: ret 784; 785; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i32: 786; RV64: # %bb.0: 787; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 788; RV64-NEXT: vsext.vf8 v16, v8 789; RV64-NEXT: vsll.vi v16, v16, 2 790; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 791; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 792; RV64-NEXT: ret 793 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 794 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs 795 %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 796 ret <vscale x 8 x i32> %v 797} 798 799define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 800; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32: 801; RV32: # %bb.0: 802; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 803; RV32-NEXT: vzext.vf2 v10, v8 804; RV32-NEXT: vsll.vi v12, v10, 2 805; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma 806; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t 807; RV32-NEXT: ret 808; 809; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32: 810; RV64: # %bb.0: 811; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma 812; RV64-NEXT: vzext.vf2 v10, v8 813; RV64-NEXT: vsll.vi v12, v10, 2 814; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 815; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t 816; RV64-NEXT: ret 817 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 818 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs 819 %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 820 ret <vscale x 8 x i32> %v 821} 822 823define <vscale x 8 x i32> @vpgather_baseidx_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 824; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i32: 825; RV32: # %bb.0: 826; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 827; RV32-NEXT: vsext.vf2 v12, v8 828; RV32-NEXT: vsll.vi v8, v12, 2 829; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 830; RV32-NEXT: ret 831; 832; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i32: 833; RV64: # %bb.0: 834; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 835; RV64-NEXT: vsext.vf4 v16, v8 836; RV64-NEXT: vsll.vi v16, v16, 2 837; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 838; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 839; RV64-NEXT: ret 840 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs 841 %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 842 ret <vscale x 8 x i32> %v 843} 844 845define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 846; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32: 847; RV32: # %bb.0: 848; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 849; RV32-NEXT: vsext.vf2 v12, v8 850; RV32-NEXT: vsll.vi v8, v12, 2 851; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 852; RV32-NEXT: ret 853; 854; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32: 855; RV64: # %bb.0: 856; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 857; RV64-NEXT: vsext.vf4 v16, v8 858; RV64-NEXT: vsll.vi v16, v16, 2 859; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 860; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 861; RV64-NEXT: ret 862 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 863 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs 864 %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 865 ret <vscale x 8 x i32> %v 866} 867 868define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 869; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32: 870; RV32: # %bb.0: 871; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 872; RV32-NEXT: vzext.vf2 v12, v8 873; RV32-NEXT: vsll.vi v8, v12, 2 874; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 875; RV32-NEXT: ret 876; 877; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32: 878; RV64: # %bb.0: 879; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma 880; RV64-NEXT: vzext.vf2 v12, v8 881; RV64-NEXT: vsll.vi v8, v12, 2 882; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t 883; RV64-NEXT: ret 884 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 885 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs 886 %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 887 ret <vscale x 8 x i32> %v 888} 889 890define <vscale x 8 x i32> @vpgather_baseidx_nxv8i32(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 891; RV32-LABEL: vpgather_baseidx_nxv8i32: 892; RV32: # %bb.0: 893; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 894; RV32-NEXT: vsll.vi v8, v8, 2 895; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 896; RV32-NEXT: ret 897; 898; RV64-LABEL: vpgather_baseidx_nxv8i32: 899; RV64: # %bb.0: 900; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 901; RV64-NEXT: vsext.vf2 v16, v8 902; RV64-NEXT: vsll.vi v16, v16, 2 903; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 904; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 905; RV64-NEXT: ret 906 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs 907 %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 908 ret <vscale x 8 x i32> %v 909} 910 911declare <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x i1>, i32) 912 913define <vscale x 1 x i64> @vpgather_nxv1i64(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 914; RV32-LABEL: vpgather_nxv1i64: 915; RV32: # %bb.0: 916; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 917; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 918; RV32-NEXT: vmv.v.v v8, v9 919; RV32-NEXT: ret 920; 921; RV64-LABEL: vpgather_nxv1i64: 922; RV64: # %bb.0: 923; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 924; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 925; RV64-NEXT: ret 926 %v = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 927 ret <vscale x 1 x i64> %v 928} 929 930declare <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x i1>, i32) 931 932define <vscale x 2 x i64> @vpgather_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 933; RV32-LABEL: vpgather_nxv2i64: 934; RV32: # %bb.0: 935; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 936; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 937; RV32-NEXT: vmv.v.v v8, v10 938; RV32-NEXT: ret 939; 940; RV64-LABEL: vpgather_nxv2i64: 941; RV64: # %bb.0: 942; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 943; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 944; RV64-NEXT: ret 945 %v = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 946 ret <vscale x 2 x i64> %v 947} 948 949declare <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32) 950 951define <vscale x 4 x i64> @vpgather_nxv4i64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 952; RV32-LABEL: vpgather_nxv4i64: 953; RV32: # %bb.0: 954; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 955; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 956; RV32-NEXT: vmv.v.v v8, v12 957; RV32-NEXT: ret 958; 959; RV64-LABEL: vpgather_nxv4i64: 960; RV64: # %bb.0: 961; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 962; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 963; RV64-NEXT: ret 964 %v = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 965 ret <vscale x 4 x i64> %v 966} 967 968define <vscale x 4 x i64> @vpgather_truemask_nxv4i64(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) { 969; RV32-LABEL: vpgather_truemask_nxv4i64: 970; RV32: # %bb.0: 971; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 972; RV32-NEXT: vluxei32.v v12, (zero), v8 973; RV32-NEXT: vmv.v.v v8, v12 974; RV32-NEXT: ret 975; 976; RV64-LABEL: vpgather_truemask_nxv4i64: 977; RV64: # %bb.0: 978; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 979; RV64-NEXT: vluxei64.v v8, (zero), v8 980; RV64-NEXT: ret 981 %v = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl) 982 ret <vscale x 4 x i64> %v 983} 984 985declare <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x i1>, i32) 986 987define <vscale x 8 x i64> @vpgather_nxv8i64(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 988; RV32-LABEL: vpgather_nxv8i64: 989; RV32: # %bb.0: 990; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 991; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t 992; RV32-NEXT: vmv.v.v v8, v16 993; RV32-NEXT: ret 994; 995; RV64-LABEL: vpgather_nxv8i64: 996; RV64: # %bb.0: 997; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 998; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 999; RV64-NEXT: ret 1000 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1001 ret <vscale x 8 x i64> %v 1002} 1003 1004define <vscale x 8 x i64> @vpgather_baseidx_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1005; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i64: 1006; RV32: # %bb.0: 1007; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1008; RV32-NEXT: vsext.vf4 v12, v8 1009; RV32-NEXT: vsll.vi v16, v12, 3 1010; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1011; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1012; RV32-NEXT: ret 1013; 1014; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i64: 1015; RV64: # %bb.0: 1016; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1017; RV64-NEXT: vsext.vf8 v16, v8 1018; RV64-NEXT: vsll.vi v8, v16, 3 1019; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1020; RV64-NEXT: ret 1021 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs 1022 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1023 ret <vscale x 8 x i64> %v 1024} 1025 1026define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1027; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64: 1028; RV32: # %bb.0: 1029; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1030; RV32-NEXT: vsext.vf4 v12, v8 1031; RV32-NEXT: vsll.vi v16, v12, 3 1032; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1033; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1034; RV32-NEXT: ret 1035; 1036; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64: 1037; RV64: # %bb.0: 1038; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1039; RV64-NEXT: vsext.vf8 v16, v8 1040; RV64-NEXT: vsll.vi v8, v16, 3 1041; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1042; RV64-NEXT: ret 1043 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 1044 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 1045 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1046 ret <vscale x 8 x i64> %v 1047} 1048 1049define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1050; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64: 1051; RV32: # %bb.0: 1052; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 1053; RV32-NEXT: vzext.vf2 v10, v8 1054; RV32-NEXT: vsll.vi v16, v10, 3 1055; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1056; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t 1057; RV32-NEXT: ret 1058; 1059; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64: 1060; RV64: # %bb.0: 1061; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma 1062; RV64-NEXT: vzext.vf2 v10, v8 1063; RV64-NEXT: vsll.vi v16, v10, 3 1064; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1065; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t 1066; RV64-NEXT: ret 1067 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 1068 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 1069 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1070 ret <vscale x 8 x i64> %v 1071} 1072 1073define <vscale x 8 x i64> @vpgather_baseidx_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1074; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i64: 1075; RV32: # %bb.0: 1076; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1077; RV32-NEXT: vsext.vf2 v12, v8 1078; RV32-NEXT: vsll.vi v16, v12, 3 1079; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1080; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1081; RV32-NEXT: ret 1082; 1083; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i64: 1084; RV64: # %bb.0: 1085; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1086; RV64-NEXT: vsext.vf4 v16, v8 1087; RV64-NEXT: vsll.vi v8, v16, 3 1088; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1089; RV64-NEXT: ret 1090 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs 1091 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1092 ret <vscale x 8 x i64> %v 1093} 1094 1095define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1096; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64: 1097; RV32: # %bb.0: 1098; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1099; RV32-NEXT: vsext.vf2 v12, v8 1100; RV32-NEXT: vsll.vi v16, v12, 3 1101; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1102; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1103; RV32-NEXT: ret 1104; 1105; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64: 1106; RV64: # %bb.0: 1107; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1108; RV64-NEXT: vsext.vf4 v16, v8 1109; RV64-NEXT: vsll.vi v8, v16, 3 1110; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1111; RV64-NEXT: ret 1112 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 1113 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 1114 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1115 ret <vscale x 8 x i64> %v 1116} 1117 1118define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1119; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64: 1120; RV32: # %bb.0: 1121; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1122; RV32-NEXT: vzext.vf2 v12, v8 1123; RV32-NEXT: vsll.vi v16, v12, 3 1124; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1125; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1126; RV32-NEXT: ret 1127; 1128; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64: 1129; RV64: # %bb.0: 1130; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1131; RV64-NEXT: vzext.vf2 v12, v8 1132; RV64-NEXT: vsll.vi v16, v12, 3 1133; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1134; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t 1135; RV64-NEXT: ret 1136 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 1137 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 1138 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1139 ret <vscale x 8 x i64> %v 1140} 1141 1142define <vscale x 8 x i64> @vpgather_baseidx_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1143; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8i64: 1144; RV32: # %bb.0: 1145; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1146; RV32-NEXT: vsll.vi v16, v8, 3 1147; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1148; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1149; RV32-NEXT: ret 1150; 1151; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8i64: 1152; RV64: # %bb.0: 1153; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1154; RV64-NEXT: vsext.vf2 v16, v8 1155; RV64-NEXT: vsll.vi v8, v16, 3 1156; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1157; RV64-NEXT: ret 1158 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs 1159 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1160 ret <vscale x 8 x i64> %v 1161} 1162 1163define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1164; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64: 1165; RV32: # %bb.0: 1166; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1167; RV32-NEXT: vsll.vi v16, v8, 3 1168; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1169; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1170; RV32-NEXT: ret 1171; 1172; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64: 1173; RV64: # %bb.0: 1174; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1175; RV64-NEXT: vsext.vf2 v16, v8 1176; RV64-NEXT: vsll.vi v8, v16, 3 1177; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1178; RV64-NEXT: ret 1179 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1180 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 1181 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1182 ret <vscale x 8 x i64> %v 1183} 1184 1185define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1186; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64: 1187; RV32: # %bb.0: 1188; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1189; RV32-NEXT: vsll.vi v16, v8, 3 1190; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1191; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1192; RV32-NEXT: ret 1193; 1194; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64: 1195; RV64: # %bb.0: 1196; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1197; RV64-NEXT: vzext.vf2 v16, v8 1198; RV64-NEXT: vsll.vi v8, v16, 3 1199; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1200; RV64-NEXT: ret 1201 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1202 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 1203 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1204 ret <vscale x 8 x i64> %v 1205} 1206 1207define <vscale x 8 x i64> @vpgather_baseidx_nxv8i64(ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1208; RV32-LABEL: vpgather_baseidx_nxv8i64: 1209; RV32: # %bb.0: 1210; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1211; RV32-NEXT: vnsrl.wi v16, v8, 0 1212; RV32-NEXT: vsll.vi v16, v16, 3 1213; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1214; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1215; RV32-NEXT: ret 1216; 1217; RV64-LABEL: vpgather_baseidx_nxv8i64: 1218; RV64: # %bb.0: 1219; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1220; RV64-NEXT: vsll.vi v8, v8, 3 1221; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1222; RV64-NEXT: ret 1223 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs 1224 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1225 ret <vscale x 8 x i64> %v 1226} 1227 1228declare <vscale x 1 x bfloat> @llvm.vp.gather.nxv1bf16.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x i1>, i32) 1229 1230define <vscale x 1 x bfloat> @vpgather_nxv1bf16(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1231; RV32-LABEL: vpgather_nxv1bf16: 1232; RV32: # %bb.0: 1233; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1234; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1235; RV32-NEXT: vmv1r.v v8, v9 1236; RV32-NEXT: ret 1237; 1238; RV64-LABEL: vpgather_nxv1bf16: 1239; RV64: # %bb.0: 1240; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1241; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 1242; RV64-NEXT: vmv1r.v v8, v9 1243; RV64-NEXT: ret 1244 %v = call <vscale x 1 x bfloat> @llvm.vp.gather.nxv1bf16.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 1245 ret <vscale x 1 x bfloat> %v 1246} 1247 1248declare <vscale x 2 x bfloat> @llvm.vp.gather.nxv2bf16.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x i1>, i32) 1249 1250define <vscale x 2 x bfloat> @vpgather_nxv2bf16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1251; RV32-LABEL: vpgather_nxv2bf16: 1252; RV32: # %bb.0: 1253; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1254; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1255; RV32-NEXT: vmv1r.v v8, v9 1256; RV32-NEXT: ret 1257; 1258; RV64-LABEL: vpgather_nxv2bf16: 1259; RV64: # %bb.0: 1260; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1261; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 1262; RV64-NEXT: vmv1r.v v8, v10 1263; RV64-NEXT: ret 1264 %v = call <vscale x 2 x bfloat> @llvm.vp.gather.nxv2bf16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 1265 ret <vscale x 2 x bfloat> %v 1266} 1267 1268declare <vscale x 4 x bfloat> @llvm.vp.gather.nxv4bf16.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32) 1269 1270define <vscale x 4 x bfloat> @vpgather_nxv4bf16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1271; RV32-LABEL: vpgather_nxv4bf16: 1272; RV32: # %bb.0: 1273; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1274; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 1275; RV32-NEXT: vmv.v.v v8, v10 1276; RV32-NEXT: ret 1277; 1278; RV64-LABEL: vpgather_nxv4bf16: 1279; RV64: # %bb.0: 1280; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1281; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 1282; RV64-NEXT: vmv.v.v v8, v12 1283; RV64-NEXT: ret 1284 %v = call <vscale x 4 x bfloat> @llvm.vp.gather.nxv4bf16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 1285 ret <vscale x 4 x bfloat> %v 1286} 1287 1288define <vscale x 4 x bfloat> @vpgather_truemask_nxv4bf16(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) { 1289; RV32-LABEL: vpgather_truemask_nxv4bf16: 1290; RV32: # %bb.0: 1291; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1292; RV32-NEXT: vluxei32.v v10, (zero), v8 1293; RV32-NEXT: vmv.v.v v8, v10 1294; RV32-NEXT: ret 1295; 1296; RV64-LABEL: vpgather_truemask_nxv4bf16: 1297; RV64: # %bb.0: 1298; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1299; RV64-NEXT: vluxei64.v v12, (zero), v8 1300; RV64-NEXT: vmv.v.v v8, v12 1301; RV64-NEXT: ret 1302 %v = call <vscale x 4 x bfloat> @llvm.vp.gather.nxv4bf16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl) 1303 ret <vscale x 4 x bfloat> %v 1304} 1305 1306declare <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x i1>, i32) 1307 1308define <vscale x 8 x bfloat> @vpgather_nxv8bf16(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1309; RV32-LABEL: vpgather_nxv8bf16: 1310; RV32: # %bb.0: 1311; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1312; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 1313; RV32-NEXT: vmv.v.v v8, v12 1314; RV32-NEXT: ret 1315; 1316; RV64-LABEL: vpgather_nxv8bf16: 1317; RV64: # %bb.0: 1318; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1319; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 1320; RV64-NEXT: vmv.v.v v8, v16 1321; RV64-NEXT: ret 1322 %v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1323 ret <vscale x 8 x bfloat> %v 1324} 1325 1326define <vscale x 8 x bfloat> @vpgather_baseidx_nxv8i8_nxv8bf16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1327; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8bf16: 1328; RV32: # %bb.0: 1329; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1330; RV32-NEXT: vsext.vf4 v12, v8 1331; RV32-NEXT: vadd.vv v12, v12, v12 1332; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1333; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1334; RV32-NEXT: ret 1335; 1336; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8bf16: 1337; RV64: # %bb.0: 1338; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1339; RV64-NEXT: vsext.vf8 v16, v8 1340; RV64-NEXT: vadd.vv v16, v16, v16 1341; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1342; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1343; RV64-NEXT: ret 1344 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i8> %idxs 1345 %v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1346 ret <vscale x 8 x bfloat> %v 1347} 1348 1349define <vscale x 8 x bfloat> @vpgather_baseidx_sext_nxv8i8_nxv8bf16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1350; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8bf16: 1351; RV32: # %bb.0: 1352; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1353; RV32-NEXT: vsext.vf4 v12, v8 1354; RV32-NEXT: vadd.vv v12, v12, v12 1355; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1356; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1357; RV32-NEXT: ret 1358; 1359; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8bf16: 1360; RV64: # %bb.0: 1361; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1362; RV64-NEXT: vsext.vf8 v16, v8 1363; RV64-NEXT: vadd.vv v16, v16, v16 1364; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1365; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1366; RV64-NEXT: ret 1367 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1368 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs 1369 %v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1370 ret <vscale x 8 x bfloat> %v 1371} 1372 1373define <vscale x 8 x bfloat> @vpgather_baseidx_zext_nxv8i8_nxv8bf16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1374; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8bf16: 1375; RV32: # %bb.0: 1376; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma 1377; RV32-NEXT: vwaddu.vv v10, v8, v8 1378; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1379; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t 1380; RV32-NEXT: ret 1381; 1382; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8bf16: 1383; RV64: # %bb.0: 1384; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma 1385; RV64-NEXT: vwaddu.vv v10, v8, v8 1386; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1387; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t 1388; RV64-NEXT: ret 1389 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1390 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs 1391 %v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1392 ret <vscale x 8 x bfloat> %v 1393} 1394 1395define <vscale x 8 x bfloat> @vpgather_baseidx_nxv8bf16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1396; RV32-LABEL: vpgather_baseidx_nxv8bf16: 1397; RV32: # %bb.0: 1398; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 1399; RV32-NEXT: vwadd.vv v12, v8, v8 1400; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1401; RV32-NEXT: ret 1402; 1403; RV64-LABEL: vpgather_baseidx_nxv8bf16: 1404; RV64: # %bb.0: 1405; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1406; RV64-NEXT: vsext.vf4 v16, v8 1407; RV64-NEXT: vadd.vv v16, v16, v16 1408; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1409; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1410; RV64-NEXT: ret 1411 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %idxs 1412 %v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1413 ret <vscale x 8 x bfloat> %v 1414} 1415 1416declare <vscale x 1 x half> @llvm.vp.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x i1>, i32) 1417 1418define <vscale x 1 x half> @vpgather_nxv1f16(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1419; RV32-LABEL: vpgather_nxv1f16: 1420; RV32: # %bb.0: 1421; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1422; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1423; RV32-NEXT: vmv1r.v v8, v9 1424; RV32-NEXT: ret 1425; 1426; RV64-LABEL: vpgather_nxv1f16: 1427; RV64: # %bb.0: 1428; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1429; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 1430; RV64-NEXT: vmv1r.v v8, v9 1431; RV64-NEXT: ret 1432 %v = call <vscale x 1 x half> @llvm.vp.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 1433 ret <vscale x 1 x half> %v 1434} 1435 1436declare <vscale x 2 x half> @llvm.vp.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x i1>, i32) 1437 1438define <vscale x 2 x half> @vpgather_nxv2f16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1439; RV32-LABEL: vpgather_nxv2f16: 1440; RV32: # %bb.0: 1441; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1442; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1443; RV32-NEXT: vmv1r.v v8, v9 1444; RV32-NEXT: ret 1445; 1446; RV64-LABEL: vpgather_nxv2f16: 1447; RV64: # %bb.0: 1448; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1449; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 1450; RV64-NEXT: vmv1r.v v8, v10 1451; RV64-NEXT: ret 1452 %v = call <vscale x 2 x half> @llvm.vp.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 1453 ret <vscale x 2 x half> %v 1454} 1455 1456declare <vscale x 4 x half> @llvm.vp.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32) 1457 1458define <vscale x 4 x half> @vpgather_nxv4f16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1459; RV32-LABEL: vpgather_nxv4f16: 1460; RV32: # %bb.0: 1461; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1462; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 1463; RV32-NEXT: vmv.v.v v8, v10 1464; RV32-NEXT: ret 1465; 1466; RV64-LABEL: vpgather_nxv4f16: 1467; RV64: # %bb.0: 1468; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1469; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 1470; RV64-NEXT: vmv.v.v v8, v12 1471; RV64-NEXT: ret 1472 %v = call <vscale x 4 x half> @llvm.vp.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 1473 ret <vscale x 4 x half> %v 1474} 1475 1476define <vscale x 4 x half> @vpgather_truemask_nxv4f16(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) { 1477; RV32-LABEL: vpgather_truemask_nxv4f16: 1478; RV32: # %bb.0: 1479; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1480; RV32-NEXT: vluxei32.v v10, (zero), v8 1481; RV32-NEXT: vmv.v.v v8, v10 1482; RV32-NEXT: ret 1483; 1484; RV64-LABEL: vpgather_truemask_nxv4f16: 1485; RV64: # %bb.0: 1486; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1487; RV64-NEXT: vluxei64.v v12, (zero), v8 1488; RV64-NEXT: vmv.v.v v8, v12 1489; RV64-NEXT: ret 1490 %v = call <vscale x 4 x half> @llvm.vp.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl) 1491 ret <vscale x 4 x half> %v 1492} 1493 1494declare <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x i1>, i32) 1495 1496define <vscale x 8 x half> @vpgather_nxv8f16(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1497; RV32-LABEL: vpgather_nxv8f16: 1498; RV32: # %bb.0: 1499; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1500; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 1501; RV32-NEXT: vmv.v.v v8, v12 1502; RV32-NEXT: ret 1503; 1504; RV64-LABEL: vpgather_nxv8f16: 1505; RV64: # %bb.0: 1506; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1507; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 1508; RV64-NEXT: vmv.v.v v8, v16 1509; RV64-NEXT: ret 1510 %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1511 ret <vscale x 8 x half> %v 1512} 1513 1514define <vscale x 8 x half> @vpgather_baseidx_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1515; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f16: 1516; RV32: # %bb.0: 1517; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1518; RV32-NEXT: vsext.vf4 v12, v8 1519; RV32-NEXT: vadd.vv v12, v12, v12 1520; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1521; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1522; RV32-NEXT: ret 1523; 1524; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f16: 1525; RV64: # %bb.0: 1526; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1527; RV64-NEXT: vsext.vf8 v16, v8 1528; RV64-NEXT: vadd.vv v16, v16, v16 1529; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1530; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1531; RV64-NEXT: ret 1532 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs 1533 %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1534 ret <vscale x 8 x half> %v 1535} 1536 1537define <vscale x 8 x half> @vpgather_baseidx_sext_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1538; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f16: 1539; RV32: # %bb.0: 1540; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1541; RV32-NEXT: vsext.vf4 v12, v8 1542; RV32-NEXT: vadd.vv v12, v12, v12 1543; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1544; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1545; RV32-NEXT: ret 1546; 1547; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f16: 1548; RV64: # %bb.0: 1549; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1550; RV64-NEXT: vsext.vf8 v16, v8 1551; RV64-NEXT: vadd.vv v16, v16, v16 1552; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1553; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1554; RV64-NEXT: ret 1555 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1556 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs 1557 %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1558 ret <vscale x 8 x half> %v 1559} 1560 1561define <vscale x 8 x half> @vpgather_baseidx_zext_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1562; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16: 1563; RV32: # %bb.0: 1564; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma 1565; RV32-NEXT: vwaddu.vv v10, v8, v8 1566; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1567; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t 1568; RV32-NEXT: ret 1569; 1570; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16: 1571; RV64: # %bb.0: 1572; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma 1573; RV64-NEXT: vwaddu.vv v10, v8, v8 1574; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1575; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t 1576; RV64-NEXT: ret 1577 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1578 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs 1579 %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1580 ret <vscale x 8 x half> %v 1581} 1582 1583define <vscale x 8 x half> @vpgather_baseidx_nxv8f16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1584; RV32-LABEL: vpgather_baseidx_nxv8f16: 1585; RV32: # %bb.0: 1586; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 1587; RV32-NEXT: vwadd.vv v12, v8, v8 1588; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1589; RV32-NEXT: ret 1590; 1591; RV64-LABEL: vpgather_baseidx_nxv8f16: 1592; RV64: # %bb.0: 1593; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1594; RV64-NEXT: vsext.vf4 v16, v8 1595; RV64-NEXT: vadd.vv v16, v16, v16 1596; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1597; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1598; RV64-NEXT: ret 1599 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs 1600 %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1601 ret <vscale x 8 x half> %v 1602} 1603 1604declare <vscale x 1 x float> @llvm.vp.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x i1>, i32) 1605 1606define <vscale x 1 x float> @vpgather_nxv1f32(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1607; RV32-LABEL: vpgather_nxv1f32: 1608; RV32: # %bb.0: 1609; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1610; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 1611; RV32-NEXT: ret 1612; 1613; RV64-LABEL: vpgather_nxv1f32: 1614; RV64: # %bb.0: 1615; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1616; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 1617; RV64-NEXT: vmv1r.v v8, v9 1618; RV64-NEXT: ret 1619 %v = call <vscale x 1 x float> @llvm.vp.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 1620 ret <vscale x 1 x float> %v 1621} 1622 1623declare <vscale x 2 x float> @llvm.vp.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x i1>, i32) 1624 1625define <vscale x 2 x float> @vpgather_nxv2f32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1626; RV32-LABEL: vpgather_nxv2f32: 1627; RV32: # %bb.0: 1628; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1629; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 1630; RV32-NEXT: ret 1631; 1632; RV64-LABEL: vpgather_nxv2f32: 1633; RV64: # %bb.0: 1634; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1635; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 1636; RV64-NEXT: vmv.v.v v8, v10 1637; RV64-NEXT: ret 1638 %v = call <vscale x 2 x float> @llvm.vp.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 1639 ret <vscale x 2 x float> %v 1640} 1641 1642declare <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32) 1643 1644define <vscale x 4 x float> @vpgather_nxv4f32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1645; RV32-LABEL: vpgather_nxv4f32: 1646; RV32: # %bb.0: 1647; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1648; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 1649; RV32-NEXT: ret 1650; 1651; RV64-LABEL: vpgather_nxv4f32: 1652; RV64: # %bb.0: 1653; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1654; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 1655; RV64-NEXT: vmv.v.v v8, v12 1656; RV64-NEXT: ret 1657 %v = call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 1658 ret <vscale x 4 x float> %v 1659} 1660 1661define <vscale x 4 x float> @vpgather_truemask_nxv4f32(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) { 1662; RV32-LABEL: vpgather_truemask_nxv4f32: 1663; RV32: # %bb.0: 1664; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1665; RV32-NEXT: vluxei32.v v8, (zero), v8 1666; RV32-NEXT: ret 1667; 1668; RV64-LABEL: vpgather_truemask_nxv4f32: 1669; RV64: # %bb.0: 1670; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1671; RV64-NEXT: vluxei64.v v12, (zero), v8 1672; RV64-NEXT: vmv.v.v v8, v12 1673; RV64-NEXT: ret 1674 %v = call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl) 1675 ret <vscale x 4 x float> %v 1676} 1677 1678declare <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x i1>, i32) 1679 1680define <vscale x 8 x float> @vpgather_nxv8f32(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1681; RV32-LABEL: vpgather_nxv8f32: 1682; RV32: # %bb.0: 1683; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1684; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 1685; RV32-NEXT: ret 1686; 1687; RV64-LABEL: vpgather_nxv8f32: 1688; RV64: # %bb.0: 1689; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1690; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 1691; RV64-NEXT: vmv.v.v v8, v16 1692; RV64-NEXT: ret 1693 %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1694 ret <vscale x 8 x float> %v 1695} 1696 1697define <vscale x 8 x float> @vpgather_baseidx_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1698; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f32: 1699; RV32: # %bb.0: 1700; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1701; RV32-NEXT: vsext.vf4 v12, v8 1702; RV32-NEXT: vsll.vi v8, v12, 2 1703; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1704; RV32-NEXT: ret 1705; 1706; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f32: 1707; RV64: # %bb.0: 1708; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1709; RV64-NEXT: vsext.vf8 v16, v8 1710; RV64-NEXT: vsll.vi v16, v16, 2 1711; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1712; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1713; RV64-NEXT: ret 1714 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs 1715 %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1716 ret <vscale x 8 x float> %v 1717} 1718 1719define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1720; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f32: 1721; RV32: # %bb.0: 1722; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1723; RV32-NEXT: vsext.vf4 v12, v8 1724; RV32-NEXT: vsll.vi v8, v12, 2 1725; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1726; RV32-NEXT: ret 1727; 1728; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f32: 1729; RV64: # %bb.0: 1730; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1731; RV64-NEXT: vsext.vf8 v16, v8 1732; RV64-NEXT: vsll.vi v16, v16, 2 1733; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1734; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1735; RV64-NEXT: ret 1736 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1737 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs 1738 %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1739 ret <vscale x 8 x float> %v 1740} 1741 1742define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1743; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32: 1744; RV32: # %bb.0: 1745; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 1746; RV32-NEXT: vzext.vf2 v10, v8 1747; RV32-NEXT: vsll.vi v12, v10, 2 1748; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1749; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t 1750; RV32-NEXT: ret 1751; 1752; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32: 1753; RV64: # %bb.0: 1754; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma 1755; RV64-NEXT: vzext.vf2 v10, v8 1756; RV64-NEXT: vsll.vi v12, v10, 2 1757; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1758; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t 1759; RV64-NEXT: ret 1760 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1761 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs 1762 %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1763 ret <vscale x 8 x float> %v 1764} 1765 1766define <vscale x 8 x float> @vpgather_baseidx_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1767; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f32: 1768; RV32: # %bb.0: 1769; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1770; RV32-NEXT: vsext.vf2 v12, v8 1771; RV32-NEXT: vsll.vi v8, v12, 2 1772; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1773; RV32-NEXT: ret 1774; 1775; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f32: 1776; RV64: # %bb.0: 1777; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1778; RV64-NEXT: vsext.vf4 v16, v8 1779; RV64-NEXT: vsll.vi v16, v16, 2 1780; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1781; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1782; RV64-NEXT: ret 1783 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs 1784 %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1785 ret <vscale x 8 x float> %v 1786} 1787 1788define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1789; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32: 1790; RV32: # %bb.0: 1791; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1792; RV32-NEXT: vsext.vf2 v12, v8 1793; RV32-NEXT: vsll.vi v8, v12, 2 1794; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1795; RV32-NEXT: ret 1796; 1797; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32: 1798; RV64: # %bb.0: 1799; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1800; RV64-NEXT: vsext.vf4 v16, v8 1801; RV64-NEXT: vsll.vi v16, v16, 2 1802; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1803; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1804; RV64-NEXT: ret 1805 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1806 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs 1807 %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1808 ret <vscale x 8 x float> %v 1809} 1810 1811define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1812; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32: 1813; RV32: # %bb.0: 1814; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1815; RV32-NEXT: vzext.vf2 v12, v8 1816; RV32-NEXT: vsll.vi v8, v12, 2 1817; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1818; RV32-NEXT: ret 1819; 1820; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32: 1821; RV64: # %bb.0: 1822; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1823; RV64-NEXT: vzext.vf2 v12, v8 1824; RV64-NEXT: vsll.vi v8, v12, 2 1825; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t 1826; RV64-NEXT: ret 1827 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1828 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs 1829 %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1830 ret <vscale x 8 x float> %v 1831} 1832 1833define <vscale x 8 x float> @vpgather_baseidx_nxv8f32(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1834; RV32-LABEL: vpgather_baseidx_nxv8f32: 1835; RV32: # %bb.0: 1836; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1837; RV32-NEXT: vsll.vi v8, v8, 2 1838; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1839; RV32-NEXT: ret 1840; 1841; RV64-LABEL: vpgather_baseidx_nxv8f32: 1842; RV64: # %bb.0: 1843; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1844; RV64-NEXT: vsext.vf2 v16, v8 1845; RV64-NEXT: vsll.vi v16, v16, 2 1846; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1847; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1848; RV64-NEXT: ret 1849 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs 1850 %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1851 ret <vscale x 8 x float> %v 1852} 1853 1854declare <vscale x 1 x double> @llvm.vp.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x i1>, i32) 1855 1856define <vscale x 1 x double> @vpgather_nxv1f64(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1857; RV32-LABEL: vpgather_nxv1f64: 1858; RV32: # %bb.0: 1859; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1860; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1861; RV32-NEXT: vmv.v.v v8, v9 1862; RV32-NEXT: ret 1863; 1864; RV64-LABEL: vpgather_nxv1f64: 1865; RV64: # %bb.0: 1866; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1867; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 1868; RV64-NEXT: ret 1869 %v = call <vscale x 1 x double> @llvm.vp.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 1870 ret <vscale x 1 x double> %v 1871} 1872 1873declare <vscale x 2 x double> @llvm.vp.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x i1>, i32) 1874 1875define <vscale x 2 x double> @vpgather_nxv2f64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1876; RV32-LABEL: vpgather_nxv2f64: 1877; RV32: # %bb.0: 1878; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1879; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 1880; RV32-NEXT: vmv.v.v v8, v10 1881; RV32-NEXT: ret 1882; 1883; RV64-LABEL: vpgather_nxv2f64: 1884; RV64: # %bb.0: 1885; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1886; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 1887; RV64-NEXT: ret 1888 %v = call <vscale x 2 x double> @llvm.vp.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 1889 ret <vscale x 2 x double> %v 1890} 1891 1892declare <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32) 1893 1894define <vscale x 4 x double> @vpgather_nxv4f64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1895; RV32-LABEL: vpgather_nxv4f64: 1896; RV32: # %bb.0: 1897; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1898; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 1899; RV32-NEXT: vmv.v.v v8, v12 1900; RV32-NEXT: ret 1901; 1902; RV64-LABEL: vpgather_nxv4f64: 1903; RV64: # %bb.0: 1904; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1905; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 1906; RV64-NEXT: ret 1907 %v = call <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 1908 ret <vscale x 4 x double> %v 1909} 1910 1911define <vscale x 4 x double> @vpgather_truemask_nxv4f64(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) { 1912; RV32-LABEL: vpgather_truemask_nxv4f64: 1913; RV32: # %bb.0: 1914; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1915; RV32-NEXT: vluxei32.v v12, (zero), v8 1916; RV32-NEXT: vmv.v.v v8, v12 1917; RV32-NEXT: ret 1918; 1919; RV64-LABEL: vpgather_truemask_nxv4f64: 1920; RV64: # %bb.0: 1921; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1922; RV64-NEXT: vluxei64.v v8, (zero), v8 1923; RV64-NEXT: ret 1924 %v = call <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl) 1925 ret <vscale x 4 x double> %v 1926} 1927 1928declare <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr>, <vscale x 6 x i1>, i32) 1929 1930define <vscale x 6 x double> @vpgather_nxv6f64(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1931; RV32-LABEL: vpgather_nxv6f64: 1932; RV32: # %bb.0: 1933; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1934; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t 1935; RV32-NEXT: vmv.v.v v8, v16 1936; RV32-NEXT: ret 1937; 1938; RV64-LABEL: vpgather_nxv6f64: 1939; RV64: # %bb.0: 1940; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1941; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 1942; RV64-NEXT: ret 1943 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1944 ret <vscale x 6 x double> %v 1945} 1946 1947define <vscale x 6 x double> @vpgather_baseidx_nxv6i8_nxv6f64(ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1948; RV32-LABEL: vpgather_baseidx_nxv6i8_nxv6f64: 1949; RV32: # %bb.0: 1950; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1951; RV32-NEXT: vsext.vf4 v12, v8 1952; RV32-NEXT: vsll.vi v16, v12, 3 1953; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1954; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1955; RV32-NEXT: ret 1956; 1957; RV64-LABEL: vpgather_baseidx_nxv6i8_nxv6f64: 1958; RV64: # %bb.0: 1959; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1960; RV64-NEXT: vsext.vf8 v16, v8 1961; RV64-NEXT: vsll.vi v8, v16, 3 1962; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1963; RV64-NEXT: ret 1964 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i8> %idxs 1965 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1966 ret <vscale x 6 x double> %v 1967} 1968 1969define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i8_nxv6f64(ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1970; RV32-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64: 1971; RV32: # %bb.0: 1972; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1973; RV32-NEXT: vsext.vf4 v12, v8 1974; RV32-NEXT: vsll.vi v16, v12, 3 1975; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1976; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1977; RV32-NEXT: ret 1978; 1979; RV64-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64: 1980; RV64: # %bb.0: 1981; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1982; RV64-NEXT: vsext.vf8 v16, v8 1983; RV64-NEXT: vsll.vi v8, v16, 3 1984; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1985; RV64-NEXT: ret 1986 %eidxs = sext <vscale x 6 x i8> %idxs to <vscale x 6 x i64> 1987 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs 1988 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1989 ret <vscale x 6 x double> %v 1990} 1991 1992define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i8_nxv6f64(ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1993; RV32-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64: 1994; RV32: # %bb.0: 1995; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 1996; RV32-NEXT: vzext.vf2 v10, v8 1997; RV32-NEXT: vsll.vi v16, v10, 3 1998; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1999; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t 2000; RV32-NEXT: ret 2001; 2002; RV64-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64: 2003; RV64: # %bb.0: 2004; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma 2005; RV64-NEXT: vzext.vf2 v10, v8 2006; RV64-NEXT: vsll.vi v16, v10, 3 2007; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2008; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t 2009; RV64-NEXT: ret 2010 %eidxs = zext <vscale x 6 x i8> %idxs to <vscale x 6 x i64> 2011 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs 2012 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 2013 ret <vscale x 6 x double> %v 2014} 2015 2016define <vscale x 6 x double> @vpgather_baseidx_nxv6i16_nxv6f64(ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 2017; RV32-LABEL: vpgather_baseidx_nxv6i16_nxv6f64: 2018; RV32: # %bb.0: 2019; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2020; RV32-NEXT: vsext.vf2 v12, v8 2021; RV32-NEXT: vsll.vi v16, v12, 3 2022; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2023; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2024; RV32-NEXT: ret 2025; 2026; RV64-LABEL: vpgather_baseidx_nxv6i16_nxv6f64: 2027; RV64: # %bb.0: 2028; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2029; RV64-NEXT: vsext.vf4 v16, v8 2030; RV64-NEXT: vsll.vi v8, v16, 3 2031; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2032; RV64-NEXT: ret 2033 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i16> %idxs 2034 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 2035 ret <vscale x 6 x double> %v 2036} 2037 2038define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i16_nxv6f64(ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 2039; RV32-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64: 2040; RV32: # %bb.0: 2041; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2042; RV32-NEXT: vsext.vf2 v12, v8 2043; RV32-NEXT: vsll.vi v16, v12, 3 2044; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2045; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2046; RV32-NEXT: ret 2047; 2048; RV64-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64: 2049; RV64: # %bb.0: 2050; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2051; RV64-NEXT: vsext.vf4 v16, v8 2052; RV64-NEXT: vsll.vi v8, v16, 3 2053; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2054; RV64-NEXT: ret 2055 %eidxs = sext <vscale x 6 x i16> %idxs to <vscale x 6 x i64> 2056 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs 2057 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 2058 ret <vscale x 6 x double> %v 2059} 2060 2061define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i16_nxv6f64(ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 2062; RV32-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64: 2063; RV32: # %bb.0: 2064; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2065; RV32-NEXT: vzext.vf2 v12, v8 2066; RV32-NEXT: vsll.vi v16, v12, 3 2067; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2068; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2069; RV32-NEXT: ret 2070; 2071; RV64-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64: 2072; RV64: # %bb.0: 2073; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2074; RV64-NEXT: vzext.vf2 v12, v8 2075; RV64-NEXT: vsll.vi v16, v12, 3 2076; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2077; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t 2078; RV64-NEXT: ret 2079 %eidxs = zext <vscale x 6 x i16> %idxs to <vscale x 6 x i64> 2080 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs 2081 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 2082 ret <vscale x 6 x double> %v 2083} 2084 2085define <vscale x 6 x double> @vpgather_baseidx_nxv6i32_nxv6f64(ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 2086; RV32-LABEL: vpgather_baseidx_nxv6i32_nxv6f64: 2087; RV32: # %bb.0: 2088; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2089; RV32-NEXT: vsll.vi v16, v8, 3 2090; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2091; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2092; RV32-NEXT: ret 2093; 2094; RV64-LABEL: vpgather_baseidx_nxv6i32_nxv6f64: 2095; RV64: # %bb.0: 2096; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2097; RV64-NEXT: vsext.vf2 v16, v8 2098; RV64-NEXT: vsll.vi v8, v16, 3 2099; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2100; RV64-NEXT: ret 2101 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i32> %idxs 2102 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 2103 ret <vscale x 6 x double> %v 2104} 2105 2106define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i32_nxv6f64(ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 2107; RV32-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64: 2108; RV32: # %bb.0: 2109; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2110; RV32-NEXT: vsll.vi v16, v8, 3 2111; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2112; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2113; RV32-NEXT: ret 2114; 2115; RV64-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64: 2116; RV64: # %bb.0: 2117; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2118; RV64-NEXT: vsext.vf2 v16, v8 2119; RV64-NEXT: vsll.vi v8, v16, 3 2120; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2121; RV64-NEXT: ret 2122 %eidxs = sext <vscale x 6 x i32> %idxs to <vscale x 6 x i64> 2123 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs 2124 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 2125 ret <vscale x 6 x double> %v 2126} 2127 2128define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i32_nxv6f64(ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 2129; RV32-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64: 2130; RV32: # %bb.0: 2131; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2132; RV32-NEXT: vsll.vi v16, v8, 3 2133; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2134; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2135; RV32-NEXT: ret 2136; 2137; RV64-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64: 2138; RV64: # %bb.0: 2139; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2140; RV64-NEXT: vzext.vf2 v16, v8 2141; RV64-NEXT: vsll.vi v8, v16, 3 2142; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2143; RV64-NEXT: ret 2144 %eidxs = zext <vscale x 6 x i32> %idxs to <vscale x 6 x i64> 2145 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs 2146 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 2147 ret <vscale x 6 x double> %v 2148} 2149 2150define <vscale x 6 x double> @vpgather_baseidx_nxv6f64(ptr %base, <vscale x 6 x i64> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 2151; RV32-LABEL: vpgather_baseidx_nxv6f64: 2152; RV32: # %bb.0: 2153; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2154; RV32-NEXT: vnsrl.wi v16, v8, 0 2155; RV32-NEXT: vsll.vi v16, v16, 3 2156; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2157; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2158; RV32-NEXT: ret 2159; 2160; RV64-LABEL: vpgather_baseidx_nxv6f64: 2161; RV64: # %bb.0: 2162; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2163; RV64-NEXT: vsll.vi v8, v8, 3 2164; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2165; RV64-NEXT: ret 2166 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %idxs 2167 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 2168 ret <vscale x 6 x double> %v 2169} 2170 2171declare <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x i1>, i32) 2172 2173define <vscale x 8 x double> @vpgather_nxv8f64(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2174; RV32-LABEL: vpgather_nxv8f64: 2175; RV32: # %bb.0: 2176; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2177; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t 2178; RV32-NEXT: vmv.v.v v8, v16 2179; RV32-NEXT: ret 2180; 2181; RV64-LABEL: vpgather_nxv8f64: 2182; RV64: # %bb.0: 2183; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2184; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 2185; RV64-NEXT: ret 2186 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2187 ret <vscale x 8 x double> %v 2188} 2189 2190define <vscale x 8 x double> @vpgather_baseidx_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2191; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f64: 2192; RV32: # %bb.0: 2193; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2194; RV32-NEXT: vsext.vf4 v12, v8 2195; RV32-NEXT: vsll.vi v16, v12, 3 2196; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2197; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2198; RV32-NEXT: ret 2199; 2200; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f64: 2201; RV64: # %bb.0: 2202; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2203; RV64-NEXT: vsext.vf8 v16, v8 2204; RV64-NEXT: vsll.vi v8, v16, 3 2205; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2206; RV64-NEXT: ret 2207 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs 2208 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2209 ret <vscale x 8 x double> %v 2210} 2211 2212define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2213; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64: 2214; RV32: # %bb.0: 2215; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2216; RV32-NEXT: vsext.vf4 v12, v8 2217; RV32-NEXT: vsll.vi v16, v12, 3 2218; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2219; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2220; RV32-NEXT: ret 2221; 2222; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64: 2223; RV64: # %bb.0: 2224; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2225; RV64-NEXT: vsext.vf8 v16, v8 2226; RV64-NEXT: vsll.vi v8, v16, 3 2227; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2228; RV64-NEXT: ret 2229 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 2230 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 2231 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2232 ret <vscale x 8 x double> %v 2233} 2234 2235define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2236; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64: 2237; RV32: # %bb.0: 2238; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 2239; RV32-NEXT: vzext.vf2 v10, v8 2240; RV32-NEXT: vsll.vi v16, v10, 3 2241; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2242; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t 2243; RV32-NEXT: ret 2244; 2245; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64: 2246; RV64: # %bb.0: 2247; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma 2248; RV64-NEXT: vzext.vf2 v10, v8 2249; RV64-NEXT: vsll.vi v16, v10, 3 2250; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2251; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t 2252; RV64-NEXT: ret 2253 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 2254 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 2255 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2256 ret <vscale x 8 x double> %v 2257} 2258 2259define <vscale x 8 x double> @vpgather_baseidx_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2260; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f64: 2261; RV32: # %bb.0: 2262; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2263; RV32-NEXT: vsext.vf2 v12, v8 2264; RV32-NEXT: vsll.vi v16, v12, 3 2265; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2266; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2267; RV32-NEXT: ret 2268; 2269; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f64: 2270; RV64: # %bb.0: 2271; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2272; RV64-NEXT: vsext.vf4 v16, v8 2273; RV64-NEXT: vsll.vi v8, v16, 3 2274; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2275; RV64-NEXT: ret 2276 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs 2277 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2278 ret <vscale x 8 x double> %v 2279} 2280 2281define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2282; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64: 2283; RV32: # %bb.0: 2284; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2285; RV32-NEXT: vsext.vf2 v12, v8 2286; RV32-NEXT: vsll.vi v16, v12, 3 2287; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2288; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2289; RV32-NEXT: ret 2290; 2291; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64: 2292; RV64: # %bb.0: 2293; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2294; RV64-NEXT: vsext.vf4 v16, v8 2295; RV64-NEXT: vsll.vi v8, v16, 3 2296; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2297; RV64-NEXT: ret 2298 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 2299 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 2300 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2301 ret <vscale x 8 x double> %v 2302} 2303 2304define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2305; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64: 2306; RV32: # %bb.0: 2307; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2308; RV32-NEXT: vzext.vf2 v12, v8 2309; RV32-NEXT: vsll.vi v16, v12, 3 2310; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2311; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2312; RV32-NEXT: ret 2313; 2314; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64: 2315; RV64: # %bb.0: 2316; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2317; RV64-NEXT: vzext.vf2 v12, v8 2318; RV64-NEXT: vsll.vi v16, v12, 3 2319; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2320; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t 2321; RV64-NEXT: ret 2322 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 2323 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 2324 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2325 ret <vscale x 8 x double> %v 2326} 2327 2328define <vscale x 8 x double> @vpgather_baseidx_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2329; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8f64: 2330; RV32: # %bb.0: 2331; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2332; RV32-NEXT: vsll.vi v16, v8, 3 2333; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2334; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2335; RV32-NEXT: ret 2336; 2337; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8f64: 2338; RV64: # %bb.0: 2339; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2340; RV64-NEXT: vsext.vf2 v16, v8 2341; RV64-NEXT: vsll.vi v8, v16, 3 2342; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2343; RV64-NEXT: ret 2344 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs 2345 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2346 ret <vscale x 8 x double> %v 2347} 2348 2349define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2350; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64: 2351; RV32: # %bb.0: 2352; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2353; RV32-NEXT: vsll.vi v16, v8, 3 2354; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2355; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2356; RV32-NEXT: ret 2357; 2358; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64: 2359; RV64: # %bb.0: 2360; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2361; RV64-NEXT: vsext.vf2 v16, v8 2362; RV64-NEXT: vsll.vi v8, v16, 3 2363; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2364; RV64-NEXT: ret 2365 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 2366 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 2367 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2368 ret <vscale x 8 x double> %v 2369} 2370 2371define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2372; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64: 2373; RV32: # %bb.0: 2374; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2375; RV32-NEXT: vsll.vi v16, v8, 3 2376; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2377; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2378; RV32-NEXT: ret 2379; 2380; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64: 2381; RV64: # %bb.0: 2382; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2383; RV64-NEXT: vzext.vf2 v16, v8 2384; RV64-NEXT: vsll.vi v8, v16, 3 2385; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2386; RV64-NEXT: ret 2387 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 2388 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 2389 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2390 ret <vscale x 8 x double> %v 2391} 2392 2393define <vscale x 8 x double> @vpgather_baseidx_nxv8f64(ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2394; RV32-LABEL: vpgather_baseidx_nxv8f64: 2395; RV32: # %bb.0: 2396; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2397; RV32-NEXT: vnsrl.wi v16, v8, 0 2398; RV32-NEXT: vsll.vi v16, v16, 3 2399; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2400; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2401; RV32-NEXT: ret 2402; 2403; RV64-LABEL: vpgather_baseidx_nxv8f64: 2404; RV64: # %bb.0: 2405; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2406; RV64-NEXT: vsll.vi v8, v8, 3 2407; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2408; RV64-NEXT: ret 2409 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs 2410 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2411 ret <vscale x 8 x double> %v 2412} 2413 2414declare <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0(<vscale x 16 x ptr>, <vscale x 16 x i1>, i32) 2415 2416define <vscale x 16 x double> @vpgather_nxv16f64(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2417; RV32-LABEL: vpgather_nxv16f64: 2418; RV32: # %bb.0: 2419; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 2420; RV32-NEXT: vmv1r.v v24, v0 2421; RV32-NEXT: csrr a1, vlenb 2422; RV32-NEXT: sub a2, a0, a1 2423; RV32-NEXT: srli a3, a1, 3 2424; RV32-NEXT: vslidedown.vx v0, v0, a3 2425; RV32-NEXT: sltu a3, a0, a2 2426; RV32-NEXT: addi a3, a3, -1 2427; RV32-NEXT: and a2, a3, a2 2428; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2429; RV32-NEXT: vluxei32.v v16, (zero), v12, v0.t 2430; RV32-NEXT: bltu a0, a1, .LBB111_2 2431; RV32-NEXT: # %bb.1: 2432; RV32-NEXT: mv a0, a1 2433; RV32-NEXT: .LBB111_2: 2434; RV32-NEXT: vmv1r.v v0, v24 2435; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2436; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t 2437; RV32-NEXT: vmv.v.v v8, v24 2438; RV32-NEXT: ret 2439; 2440; RV64-LABEL: vpgather_nxv16f64: 2441; RV64: # %bb.0: 2442; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 2443; RV64-NEXT: vmv1r.v v24, v0 2444; RV64-NEXT: csrr a1, vlenb 2445; RV64-NEXT: sub a2, a0, a1 2446; RV64-NEXT: srli a3, a1, 3 2447; RV64-NEXT: vslidedown.vx v0, v0, a3 2448; RV64-NEXT: sltu a3, a0, a2 2449; RV64-NEXT: addi a3, a3, -1 2450; RV64-NEXT: and a2, a3, a2 2451; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2452; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t 2453; RV64-NEXT: bltu a0, a1, .LBB111_2 2454; RV64-NEXT: # %bb.1: 2455; RV64-NEXT: mv a0, a1 2456; RV64-NEXT: .LBB111_2: 2457; RV64-NEXT: vmv1r.v v0, v24 2458; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2459; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 2460; RV64-NEXT: ret 2461 %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl) 2462 ret <vscale x 16 x double> %v 2463} 2464 2465define <vscale x 16 x double> @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2466; RV32-LABEL: vpgather_baseidx_nxv16i16_nxv16f64: 2467; RV32: # %bb.0: 2468; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 2469; RV32-NEXT: vmv1r.v v12, v0 2470; RV32-NEXT: vsext.vf2 v16, v8 2471; RV32-NEXT: csrr a2, vlenb 2472; RV32-NEXT: vsll.vi v24, v16, 3 2473; RV32-NEXT: sub a3, a1, a2 2474; RV32-NEXT: srli a4, a2, 3 2475; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma 2476; RV32-NEXT: vslidedown.vx v0, v0, a4 2477; RV32-NEXT: sltu a4, a1, a3 2478; RV32-NEXT: addi a4, a4, -1 2479; RV32-NEXT: and a3, a4, a3 2480; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2481; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t 2482; RV32-NEXT: bltu a1, a2, .LBB112_2 2483; RV32-NEXT: # %bb.1: 2484; RV32-NEXT: mv a1, a2 2485; RV32-NEXT: .LBB112_2: 2486; RV32-NEXT: vmv1r.v v0, v12 2487; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2488; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t 2489; RV32-NEXT: ret 2490; 2491; RV64-LABEL: vpgather_baseidx_nxv16i16_nxv16f64: 2492; RV64: # %bb.0: 2493; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma 2494; RV64-NEXT: vmv1r.v v12, v0 2495; RV64-NEXT: vsext.vf4 v16, v10 2496; RV64-NEXT: csrr a2, vlenb 2497; RV64-NEXT: vsll.vi v16, v16, 3 2498; RV64-NEXT: sub a3, a1, a2 2499; RV64-NEXT: srli a4, a2, 3 2500; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma 2501; RV64-NEXT: vslidedown.vx v0, v0, a4 2502; RV64-NEXT: sltu a4, a1, a3 2503; RV64-NEXT: addi a4, a4, -1 2504; RV64-NEXT: and a3, a4, a3 2505; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2506; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t 2507; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma 2508; RV64-NEXT: vsext.vf4 v24, v8 2509; RV64-NEXT: vsll.vi v24, v24, 3 2510; RV64-NEXT: bltu a1, a2, .LBB112_2 2511; RV64-NEXT: # %bb.1: 2512; RV64-NEXT: mv a1, a2 2513; RV64-NEXT: .LBB112_2: 2514; RV64-NEXT: vmv1r.v v0, v12 2515; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2516; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t 2517; RV64-NEXT: ret 2518 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i16> %idxs 2519 %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl) 2520 ret <vscale x 16 x double> %v 2521} 2522 2523define <vscale x 16 x double> @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2524; RV32-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64: 2525; RV32: # %bb.0: 2526; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 2527; RV32-NEXT: vmv1r.v v12, v0 2528; RV32-NEXT: vsext.vf2 v16, v8 2529; RV32-NEXT: csrr a2, vlenb 2530; RV32-NEXT: vsll.vi v24, v16, 3 2531; RV32-NEXT: sub a3, a1, a2 2532; RV32-NEXT: srli a4, a2, 3 2533; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma 2534; RV32-NEXT: vslidedown.vx v0, v0, a4 2535; RV32-NEXT: sltu a4, a1, a3 2536; RV32-NEXT: addi a4, a4, -1 2537; RV32-NEXT: and a3, a4, a3 2538; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2539; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t 2540; RV32-NEXT: bltu a1, a2, .LBB113_2 2541; RV32-NEXT: # %bb.1: 2542; RV32-NEXT: mv a1, a2 2543; RV32-NEXT: .LBB113_2: 2544; RV32-NEXT: vmv1r.v v0, v12 2545; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2546; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t 2547; RV32-NEXT: ret 2548; 2549; RV64-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64: 2550; RV64: # %bb.0: 2551; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma 2552; RV64-NEXT: vmv1r.v v12, v0 2553; RV64-NEXT: vsext.vf4 v16, v10 2554; RV64-NEXT: csrr a2, vlenb 2555; RV64-NEXT: vsll.vi v16, v16, 3 2556; RV64-NEXT: sub a3, a1, a2 2557; RV64-NEXT: srli a4, a2, 3 2558; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma 2559; RV64-NEXT: vslidedown.vx v0, v0, a4 2560; RV64-NEXT: sltu a4, a1, a3 2561; RV64-NEXT: addi a4, a4, -1 2562; RV64-NEXT: and a3, a4, a3 2563; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2564; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t 2565; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma 2566; RV64-NEXT: vsext.vf4 v24, v8 2567; RV64-NEXT: vsll.vi v24, v24, 3 2568; RV64-NEXT: bltu a1, a2, .LBB113_2 2569; RV64-NEXT: # %bb.1: 2570; RV64-NEXT: mv a1, a2 2571; RV64-NEXT: .LBB113_2: 2572; RV64-NEXT: vmv1r.v v0, v12 2573; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2574; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t 2575; RV64-NEXT: ret 2576 %eidxs = sext <vscale x 16 x i16> %idxs to <vscale x 16 x i64> 2577 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs 2578 %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl) 2579 ret <vscale x 16 x double> %v 2580} 2581 2582define <vscale x 16 x double> @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2583; RV32-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: 2584; RV32: # %bb.0: 2585; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 2586; RV32-NEXT: vmv1r.v v12, v0 2587; RV32-NEXT: vzext.vf2 v16, v8 2588; RV32-NEXT: csrr a2, vlenb 2589; RV32-NEXT: vsll.vi v24, v16, 3 2590; RV32-NEXT: sub a3, a1, a2 2591; RV32-NEXT: srli a4, a2, 3 2592; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma 2593; RV32-NEXT: vslidedown.vx v0, v0, a4 2594; RV32-NEXT: sltu a4, a1, a3 2595; RV32-NEXT: addi a4, a4, -1 2596; RV32-NEXT: and a3, a4, a3 2597; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2598; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t 2599; RV32-NEXT: bltu a1, a2, .LBB114_2 2600; RV32-NEXT: # %bb.1: 2601; RV32-NEXT: mv a1, a2 2602; RV32-NEXT: .LBB114_2: 2603; RV32-NEXT: vmv1r.v v0, v12 2604; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2605; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t 2606; RV32-NEXT: ret 2607; 2608; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: 2609; RV64: # %bb.0: 2610; RV64-NEXT: vsetvli a2, zero, e32, m8, ta, ma 2611; RV64-NEXT: vmv1r.v v12, v0 2612; RV64-NEXT: vzext.vf2 v16, v8 2613; RV64-NEXT: csrr a2, vlenb 2614; RV64-NEXT: vsll.vi v24, v16, 3 2615; RV64-NEXT: sub a3, a1, a2 2616; RV64-NEXT: srli a4, a2, 3 2617; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma 2618; RV64-NEXT: vslidedown.vx v0, v0, a4 2619; RV64-NEXT: sltu a4, a1, a3 2620; RV64-NEXT: addi a4, a4, -1 2621; RV64-NEXT: and a3, a4, a3 2622; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2623; RV64-NEXT: vluxei32.v v16, (a0), v28, v0.t 2624; RV64-NEXT: bltu a1, a2, .LBB114_2 2625; RV64-NEXT: # %bb.1: 2626; RV64-NEXT: mv a1, a2 2627; RV64-NEXT: .LBB114_2: 2628; RV64-NEXT: vmv1r.v v0, v12 2629; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2630; RV64-NEXT: vluxei32.v v8, (a0), v24, v0.t 2631; RV64-NEXT: ret 2632 %eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64> 2633 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs 2634 %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl) 2635 ret <vscale x 16 x double> %v 2636} 2637