1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 6; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 8; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 10 11declare <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr>, <2 x i1>, i32) 12 13define <2 x i8> @vpgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 14; RV32-LABEL: vpgather_v2i8: 15; RV32: # %bb.0: 16; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 17; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 18; RV32-NEXT: vmv1r.v v8, v9 19; RV32-NEXT: ret 20; 21; RV64-LABEL: vpgather_v2i8: 22; RV64: # %bb.0: 23; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 24; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 25; RV64-NEXT: vmv1r.v v8, v9 26; RV64-NEXT: ret 27 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 28 ret <2 x i8> %v 29} 30 31define <2 x i16> @vpgather_v2i8_sextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 32; RV32-LABEL: vpgather_v2i8_sextload_v2i16: 33; RV32: # %bb.0: 34; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 35; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 36; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 37; RV32-NEXT: vsext.vf2 v8, v9 38; RV32-NEXT: ret 39; 40; RV64-LABEL: vpgather_v2i8_sextload_v2i16: 41; RV64: # %bb.0: 42; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 43; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 44; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 45; RV64-NEXT: vsext.vf2 v8, v9 46; RV64-NEXT: ret 47 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 48 %ev = sext <2 x i8> %v to <2 x i16> 49 ret <2 x i16> %ev 50} 51 52define <2 x i16> @vpgather_v2i8_zextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 53; RV32-LABEL: vpgather_v2i8_zextload_v2i16: 54; RV32: # %bb.0: 55; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 56; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 57; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 58; RV32-NEXT: vzext.vf2 v8, v9 59; RV32-NEXT: ret 60; 61; RV64-LABEL: vpgather_v2i8_zextload_v2i16: 62; RV64: # %bb.0: 63; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 64; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 65; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 66; RV64-NEXT: vzext.vf2 v8, v9 67; RV64-NEXT: ret 68 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 69 %ev = zext <2 x i8> %v to <2 x i16> 70 ret <2 x i16> %ev 71} 72 73define <2 x i32> @vpgather_v2i8_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 74; RV32-LABEL: vpgather_v2i8_sextload_v2i32: 75; RV32: # %bb.0: 76; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 77; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 78; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 79; RV32-NEXT: vsext.vf4 v8, v9 80; RV32-NEXT: ret 81; 82; RV64-LABEL: vpgather_v2i8_sextload_v2i32: 83; RV64: # %bb.0: 84; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 85; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 86; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 87; RV64-NEXT: vsext.vf4 v8, v9 88; RV64-NEXT: ret 89 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 90 %ev = sext <2 x i8> %v to <2 x i32> 91 ret <2 x i32> %ev 92} 93 94define <2 x i32> @vpgather_v2i8_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 95; RV32-LABEL: vpgather_v2i8_zextload_v2i32: 96; RV32: # %bb.0: 97; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 98; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 99; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 100; RV32-NEXT: vzext.vf4 v8, v9 101; RV32-NEXT: ret 102; 103; RV64-LABEL: vpgather_v2i8_zextload_v2i32: 104; RV64: # %bb.0: 105; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 106; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 107; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 108; RV64-NEXT: vzext.vf4 v8, v9 109; RV64-NEXT: ret 110 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 111 %ev = zext <2 x i8> %v to <2 x i32> 112 ret <2 x i32> %ev 113} 114 115define <2 x i64> @vpgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 116; RV32-LABEL: vpgather_v2i8_sextload_v2i64: 117; RV32: # %bb.0: 118; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 119; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 120; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 121; RV32-NEXT: vsext.vf8 v8, v9 122; RV32-NEXT: ret 123; 124; RV64-LABEL: vpgather_v2i8_sextload_v2i64: 125; RV64: # %bb.0: 126; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 127; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 128; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 129; RV64-NEXT: vsext.vf8 v8, v9 130; RV64-NEXT: ret 131 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 132 %ev = sext <2 x i8> %v to <2 x i64> 133 ret <2 x i64> %ev 134} 135 136define <2 x i64> @vpgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 137; RV32-LABEL: vpgather_v2i8_zextload_v2i64: 138; RV32: # %bb.0: 139; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 140; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 141; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 142; RV32-NEXT: vzext.vf8 v8, v9 143; RV32-NEXT: ret 144; 145; RV64-LABEL: vpgather_v2i8_zextload_v2i64: 146; RV64: # %bb.0: 147; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 148; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 149; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 150; RV64-NEXT: vzext.vf8 v8, v9 151; RV64-NEXT: ret 152 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 153 %ev = zext <2 x i8> %v to <2 x i64> 154 ret <2 x i64> %ev 155} 156 157declare <3 x i8> @llvm.vp.gather.v3i8.v3p0(<3 x ptr>, <3 x i1>, i32) 158 159define <3 x i8> @vpgather_v3i8(<3 x ptr> %ptrs, <3 x i1> %m, i32 zeroext %evl) { 160; RV32-LABEL: vpgather_v3i8: 161; RV32: # %bb.0: 162; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 163; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 164; RV32-NEXT: vmv1r.v v8, v9 165; RV32-NEXT: ret 166; 167; RV64-LABEL: vpgather_v3i8: 168; RV64: # %bb.0: 169; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 170; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 171; RV64-NEXT: vmv1r.v v8, v10 172; RV64-NEXT: ret 173 %v = call <3 x i8> @llvm.vp.gather.v3i8.v3p0(<3 x ptr> %ptrs, <3 x i1> %m, i32 %evl) 174 ret <3 x i8> %v 175} 176 177define <3 x i8> @vpgather_truemask_v3i8(<3 x ptr> %ptrs, i32 zeroext %evl) { 178; RV32-LABEL: vpgather_truemask_v3i8: 179; RV32: # %bb.0: 180; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 181; RV32-NEXT: vluxei32.v v9, (zero), v8 182; RV32-NEXT: vmv1r.v v8, v9 183; RV32-NEXT: ret 184; 185; RV64-LABEL: vpgather_truemask_v3i8: 186; RV64: # %bb.0: 187; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 188; RV64-NEXT: vluxei64.v v10, (zero), v8 189; RV64-NEXT: vmv1r.v v8, v10 190; RV64-NEXT: ret 191 %v = call <3 x i8> @llvm.vp.gather.v3i8.v3p0(<3 x ptr> %ptrs, <3 x i1> splat (i1 1), i32 %evl) 192 ret <3 x i8> %v 193} 194 195declare <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr>, <4 x i1>, i32) 196 197define <4 x i8> @vpgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) { 198; RV32-LABEL: vpgather_v4i8: 199; RV32: # %bb.0: 200; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 201; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 202; RV32-NEXT: vmv1r.v v8, v9 203; RV32-NEXT: ret 204; 205; RV64-LABEL: vpgather_v4i8: 206; RV64: # %bb.0: 207; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 208; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 209; RV64-NEXT: vmv1r.v v8, v10 210; RV64-NEXT: ret 211 %v = call <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl) 212 ret <4 x i8> %v 213} 214 215define <4 x i8> @vpgather_truemask_v4i8(<4 x ptr> %ptrs, i32 zeroext %evl) { 216; RV32-LABEL: vpgather_truemask_v4i8: 217; RV32: # %bb.0: 218; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 219; RV32-NEXT: vluxei32.v v9, (zero), v8 220; RV32-NEXT: vmv1r.v v8, v9 221; RV32-NEXT: ret 222; 223; RV64-LABEL: vpgather_truemask_v4i8: 224; RV64: # %bb.0: 225; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 226; RV64-NEXT: vluxei64.v v10, (zero), v8 227; RV64-NEXT: vmv1r.v v8, v10 228; RV64-NEXT: ret 229 %v = call <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) 230 ret <4 x i8> %v 231} 232 233declare <8 x i8> @llvm.vp.gather.v8i8.v8p0(<8 x ptr>, <8 x i1>, i32) 234 235define <8 x i8> @vpgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) { 236; RV32-LABEL: vpgather_v8i8: 237; RV32: # %bb.0: 238; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 239; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 240; RV32-NEXT: vmv1r.v v8, v10 241; RV32-NEXT: ret 242; 243; RV64-LABEL: vpgather_v8i8: 244; RV64: # %bb.0: 245; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 246; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 247; RV64-NEXT: vmv1r.v v8, v12 248; RV64-NEXT: ret 249 %v = call <8 x i8> @llvm.vp.gather.v8i8.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 250 ret <8 x i8> %v 251} 252 253define <8 x i8> @vpgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 254; RV32-LABEL: vpgather_baseidx_v8i8: 255; RV32: # %bb.0: 256; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 257; RV32-NEXT: vsext.vf4 v10, v8 258; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma 259; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t 260; RV32-NEXT: ret 261; 262; RV64-LABEL: vpgather_baseidx_v8i8: 263; RV64: # %bb.0: 264; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 265; RV64-NEXT: vsext.vf8 v12, v8 266; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma 267; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 268; RV64-NEXT: ret 269 %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs 270 %v = call <8 x i8> @llvm.vp.gather.v8i8.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 271 ret <8 x i8> %v 272} 273 274declare <32 x i8> @llvm.vp.gather.v32i8.v32p0(<32 x ptr>, <32 x i1>, i32) 275 276define <32 x i8> @vpgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) { 277; RV32-LABEL: vpgather_baseidx_v32i8: 278; RV32: # %bb.0: 279; RV32-NEXT: li a2, 32 280; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 281; RV32-NEXT: vsext.vf4 v16, v8 282; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma 283; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 284; RV32-NEXT: ret 285; 286; RV64-LABEL: vpgather_baseidx_v32i8: 287; RV64: # %bb.0: 288; RV64-NEXT: li a3, 16 289; RV64-NEXT: mv a2, a1 290; RV64-NEXT: bltu a1, a3, .LBB13_2 291; RV64-NEXT: # %bb.1: 292; RV64-NEXT: li a2, 16 293; RV64-NEXT: .LBB13_2: 294; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 295; RV64-NEXT: vsext.vf8 v16, v8 296; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, ma 297; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t 298; RV64-NEXT: addi a2, a1, -16 299; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma 300; RV64-NEXT: vslidedown.vi v8, v8, 16 301; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 302; RV64-NEXT: vslidedown.vi v0, v0, 2 303; RV64-NEXT: sltu a1, a1, a2 304; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 305; RV64-NEXT: vsext.vf8 v16, v8 306; RV64-NEXT: addi a1, a1, -1 307; RV64-NEXT: and a1, a1, a2 308; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma 309; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 310; RV64-NEXT: li a0, 32 311; RV64-NEXT: vsetvli zero, a0, e8, m2, ta, ma 312; RV64-NEXT: vslideup.vi v10, v8, 16 313; RV64-NEXT: vmv.v.v v8, v10 314; RV64-NEXT: ret 315 %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs 316 %v = call <32 x i8> @llvm.vp.gather.v32i8.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) 317 ret <32 x i8> %v 318} 319 320declare <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr>, <2 x i1>, i32) 321 322define <2 x i16> @vpgather_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 323; RV32-LABEL: vpgather_v2i16: 324; RV32: # %bb.0: 325; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 326; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 327; RV32-NEXT: vmv1r.v v8, v9 328; RV32-NEXT: ret 329; 330; RV64-LABEL: vpgather_v2i16: 331; RV64: # %bb.0: 332; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 333; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 334; RV64-NEXT: vmv1r.v v8, v9 335; RV64-NEXT: ret 336 %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 337 ret <2 x i16> %v 338} 339 340define <2 x i32> @vpgather_v2i16_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 341; RV32-LABEL: vpgather_v2i16_sextload_v2i32: 342; RV32: # %bb.0: 343; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 344; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 345; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 346; RV32-NEXT: vsext.vf2 v8, v9 347; RV32-NEXT: ret 348; 349; RV64-LABEL: vpgather_v2i16_sextload_v2i32: 350; RV64: # %bb.0: 351; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 352; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 353; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 354; RV64-NEXT: vsext.vf2 v8, v9 355; RV64-NEXT: ret 356 %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 357 %ev = sext <2 x i16> %v to <2 x i32> 358 ret <2 x i32> %ev 359} 360 361define <2 x i32> @vpgather_v2i16_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 362; RV32-LABEL: vpgather_v2i16_zextload_v2i32: 363; RV32: # %bb.0: 364; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 365; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 366; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 367; RV32-NEXT: vzext.vf2 v8, v9 368; RV32-NEXT: ret 369; 370; RV64-LABEL: vpgather_v2i16_zextload_v2i32: 371; RV64: # %bb.0: 372; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 373; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 374; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 375; RV64-NEXT: vzext.vf2 v8, v9 376; RV64-NEXT: ret 377 %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 378 %ev = zext <2 x i16> %v to <2 x i32> 379 ret <2 x i32> %ev 380} 381 382define <2 x i64> @vpgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 383; RV32-LABEL: vpgather_v2i16_sextload_v2i64: 384; RV32: # %bb.0: 385; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 386; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 387; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 388; RV32-NEXT: vsext.vf4 v8, v9 389; RV32-NEXT: ret 390; 391; RV64-LABEL: vpgather_v2i16_sextload_v2i64: 392; RV64: # %bb.0: 393; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 394; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 395; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 396; RV64-NEXT: vsext.vf4 v8, v9 397; RV64-NEXT: ret 398 %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 399 %ev = sext <2 x i16> %v to <2 x i64> 400 ret <2 x i64> %ev 401} 402 403define <2 x i64> @vpgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 404; RV32-LABEL: vpgather_v2i16_zextload_v2i64: 405; RV32: # %bb.0: 406; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 407; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 408; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 409; RV32-NEXT: vzext.vf4 v8, v9 410; RV32-NEXT: ret 411; 412; RV64-LABEL: vpgather_v2i16_zextload_v2i64: 413; RV64: # %bb.0: 414; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 415; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 416; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 417; RV64-NEXT: vzext.vf4 v8, v9 418; RV64-NEXT: ret 419 %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 420 %ev = zext <2 x i16> %v to <2 x i64> 421 ret <2 x i64> %ev 422} 423 424declare <4 x i16> @llvm.vp.gather.v4i16.v4p0(<4 x ptr>, <4 x i1>, i32) 425 426define <4 x i16> @vpgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) { 427; RV32-LABEL: vpgather_v4i16: 428; RV32: # %bb.0: 429; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 430; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 431; RV32-NEXT: vmv1r.v v8, v9 432; RV32-NEXT: ret 433; 434; RV64-LABEL: vpgather_v4i16: 435; RV64: # %bb.0: 436; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 437; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 438; RV64-NEXT: vmv1r.v v8, v10 439; RV64-NEXT: ret 440 %v = call <4 x i16> @llvm.vp.gather.v4i16.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl) 441 ret <4 x i16> %v 442} 443 444define <4 x i16> @vpgather_truemask_v4i16(<4 x ptr> %ptrs, i32 zeroext %evl) { 445; RV32-LABEL: vpgather_truemask_v4i16: 446; RV32: # %bb.0: 447; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 448; RV32-NEXT: vluxei32.v v9, (zero), v8 449; RV32-NEXT: vmv1r.v v8, v9 450; RV32-NEXT: ret 451; 452; RV64-LABEL: vpgather_truemask_v4i16: 453; RV64: # %bb.0: 454; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 455; RV64-NEXT: vluxei64.v v10, (zero), v8 456; RV64-NEXT: vmv1r.v v8, v10 457; RV64-NEXT: ret 458 %v = call <4 x i16> @llvm.vp.gather.v4i16.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) 459 ret <4 x i16> %v 460} 461 462declare <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr>, <8 x i1>, i32) 463 464define <8 x i16> @vpgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) { 465; RV32-LABEL: vpgather_v8i16: 466; RV32: # %bb.0: 467; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 468; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 469; RV32-NEXT: vmv.v.v v8, v10 470; RV32-NEXT: ret 471; 472; RV64-LABEL: vpgather_v8i16: 473; RV64: # %bb.0: 474; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 475; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 476; RV64-NEXT: vmv.v.v v8, v12 477; RV64-NEXT: ret 478 %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 479 ret <8 x i16> %v 480} 481 482define <8 x i16> @vpgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 483; RV32-LABEL: vpgather_baseidx_v8i8_v8i16: 484; RV32: # %bb.0: 485; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 486; RV32-NEXT: vsext.vf4 v10, v8 487; RV32-NEXT: vadd.vv v10, v10, v10 488; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 489; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t 490; RV32-NEXT: ret 491; 492; RV64-LABEL: vpgather_baseidx_v8i8_v8i16: 493; RV64: # %bb.0: 494; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 495; RV64-NEXT: vsext.vf8 v12, v8 496; RV64-NEXT: vadd.vv v12, v12, v12 497; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 498; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 499; RV64-NEXT: ret 500 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs 501 %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 502 ret <8 x i16> %v 503} 504 505define <8 x i16> @vpgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 506; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8i16: 507; RV32: # %bb.0: 508; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 509; RV32-NEXT: vsext.vf4 v10, v8 510; RV32-NEXT: vadd.vv v10, v10, v10 511; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 512; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t 513; RV32-NEXT: ret 514; 515; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8i16: 516; RV64: # %bb.0: 517; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 518; RV64-NEXT: vsext.vf8 v12, v8 519; RV64-NEXT: vadd.vv v12, v12, v12 520; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 521; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 522; RV64-NEXT: ret 523 %eidxs = sext <8 x i8> %idxs to <8 x i16> 524 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs 525 %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 526 ret <8 x i16> %v 527} 528 529define <8 x i16> @vpgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 530; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i16: 531; RV32: # %bb.0: 532; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 533; RV32-NEXT: vwaddu.vv v9, v8, v8 534; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 535; RV32-NEXT: vluxei16.v v8, (a0), v9, v0.t 536; RV32-NEXT: ret 537; 538; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i16: 539; RV64: # %bb.0: 540; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 541; RV64-NEXT: vwaddu.vv v9, v8, v8 542; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 543; RV64-NEXT: vluxei16.v v8, (a0), v9, v0.t 544; RV64-NEXT: ret 545 %eidxs = zext <8 x i8> %idxs to <8 x i16> 546 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs 547 %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 548 ret <8 x i16> %v 549} 550 551define <8 x i16> @vpgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 552; RV32-LABEL: vpgather_baseidx_v8i16: 553; RV32: # %bb.0: 554; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 555; RV32-NEXT: vwadd.vv v10, v8, v8 556; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 557; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t 558; RV32-NEXT: ret 559; 560; RV64-LABEL: vpgather_baseidx_v8i16: 561; RV64: # %bb.0: 562; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 563; RV64-NEXT: vsext.vf4 v12, v8 564; RV64-NEXT: vadd.vv v12, v12, v12 565; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 566; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 567; RV64-NEXT: ret 568 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs 569 %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 570 ret <8 x i16> %v 571} 572 573declare <2 x i32> @llvm.vp.gather.v2i32.v2p0(<2 x ptr>, <2 x i1>, i32) 574 575define <2 x i32> @vpgather_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 576; RV32-LABEL: vpgather_v2i32: 577; RV32: # %bb.0: 578; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 579; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 580; RV32-NEXT: ret 581; 582; RV64-LABEL: vpgather_v2i32: 583; RV64: # %bb.0: 584; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 585; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 586; RV64-NEXT: vmv1r.v v8, v9 587; RV64-NEXT: ret 588 %v = call <2 x i32> @llvm.vp.gather.v2i32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 589 ret <2 x i32> %v 590} 591 592define <2 x i64> @vpgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 593; RV32-LABEL: vpgather_v2i32_sextload_v2i64: 594; RV32: # %bb.0: 595; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 596; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 597; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 598; RV32-NEXT: vsext.vf2 v8, v9 599; RV32-NEXT: ret 600; 601; RV64-LABEL: vpgather_v2i32_sextload_v2i64: 602; RV64: # %bb.0: 603; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 604; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 605; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 606; RV64-NEXT: vsext.vf2 v8, v9 607; RV64-NEXT: ret 608 %v = call <2 x i32> @llvm.vp.gather.v2i32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 609 %ev = sext <2 x i32> %v to <2 x i64> 610 ret <2 x i64> %ev 611} 612 613define <2 x i64> @vpgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 614; RV32-LABEL: vpgather_v2i32_zextload_v2i64: 615; RV32: # %bb.0: 616; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 617; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 618; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 619; RV32-NEXT: vzext.vf2 v8, v9 620; RV32-NEXT: ret 621; 622; RV64-LABEL: vpgather_v2i32_zextload_v2i64: 623; RV64: # %bb.0: 624; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 625; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 626; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 627; RV64-NEXT: vzext.vf2 v8, v9 628; RV64-NEXT: ret 629 %v = call <2 x i32> @llvm.vp.gather.v2i32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 630 %ev = zext <2 x i32> %v to <2 x i64> 631 ret <2 x i64> %ev 632} 633 634declare <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr>, <4 x i1>, i32) 635 636define <4 x i32> @vpgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) { 637; RV32-LABEL: vpgather_v4i32: 638; RV32: # %bb.0: 639; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma 640; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 641; RV32-NEXT: ret 642; 643; RV64-LABEL: vpgather_v4i32: 644; RV64: # %bb.0: 645; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma 646; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 647; RV64-NEXT: vmv.v.v v8, v10 648; RV64-NEXT: ret 649 %v = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl) 650 ret <4 x i32> %v 651} 652 653define <4 x i32> @vpgather_truemask_v4i32(<4 x ptr> %ptrs, i32 zeroext %evl) { 654; RV32-LABEL: vpgather_truemask_v4i32: 655; RV32: # %bb.0: 656; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma 657; RV32-NEXT: vluxei32.v v8, (zero), v8 658; RV32-NEXT: ret 659; 660; RV64-LABEL: vpgather_truemask_v4i32: 661; RV64: # %bb.0: 662; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma 663; RV64-NEXT: vluxei64.v v10, (zero), v8 664; RV64-NEXT: vmv.v.v v8, v10 665; RV64-NEXT: ret 666 %v = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) 667 ret <4 x i32> %v 668} 669 670declare <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr>, <8 x i1>, i32) 671 672define <8 x i32> @vpgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) { 673; RV32-LABEL: vpgather_v8i32: 674; RV32: # %bb.0: 675; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma 676; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 677; RV32-NEXT: ret 678; 679; RV64-LABEL: vpgather_v8i32: 680; RV64: # %bb.0: 681; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma 682; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 683; RV64-NEXT: vmv.v.v v8, v12 684; RV64-NEXT: ret 685 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 686 ret <8 x i32> %v 687} 688 689define <8 x i32> @vpgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 690; RV32-LABEL: vpgather_baseidx_v8i8_v8i32: 691; RV32: # %bb.0: 692; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 693; RV32-NEXT: vsext.vf4 v10, v8 694; RV32-NEXT: vsll.vi v8, v10, 2 695; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 696; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 697; RV32-NEXT: ret 698; 699; RV64-LABEL: vpgather_baseidx_v8i8_v8i32: 700; RV64: # %bb.0: 701; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 702; RV64-NEXT: vsext.vf8 v12, v8 703; RV64-NEXT: vsll.vi v12, v12, 2 704; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 705; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 706; RV64-NEXT: ret 707 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs 708 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 709 ret <8 x i32> %v 710} 711 712define <8 x i32> @vpgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 713; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8i32: 714; RV32: # %bb.0: 715; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 716; RV32-NEXT: vsext.vf4 v10, v8 717; RV32-NEXT: vsll.vi v8, v10, 2 718; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 719; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 720; RV32-NEXT: ret 721; 722; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8i32: 723; RV64: # %bb.0: 724; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 725; RV64-NEXT: vsext.vf8 v12, v8 726; RV64-NEXT: vsll.vi v12, v12, 2 727; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 728; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 729; RV64-NEXT: ret 730 %eidxs = sext <8 x i8> %idxs to <8 x i32> 731 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs 732 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 733 ret <8 x i32> %v 734} 735 736define <8 x i32> @vpgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 737; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i32: 738; RV32: # %bb.0: 739; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 740; RV32-NEXT: vzext.vf2 v9, v8 741; RV32-NEXT: vsll.vi v10, v9, 2 742; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 743; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t 744; RV32-NEXT: ret 745; 746; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i32: 747; RV64: # %bb.0: 748; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma 749; RV64-NEXT: vzext.vf2 v9, v8 750; RV64-NEXT: vsll.vi v10, v9, 2 751; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 752; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t 753; RV64-NEXT: ret 754 %eidxs = zext <8 x i8> %idxs to <8 x i32> 755 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs 756 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 757 ret <8 x i32> %v 758} 759 760define <8 x i32> @vpgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 761; RV32-LABEL: vpgather_baseidx_v8i16_v8i32: 762; RV32: # %bb.0: 763; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 764; RV32-NEXT: vsext.vf2 v10, v8 765; RV32-NEXT: vsll.vi v8, v10, 2 766; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 767; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 768; RV32-NEXT: ret 769; 770; RV64-LABEL: vpgather_baseidx_v8i16_v8i32: 771; RV64: # %bb.0: 772; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 773; RV64-NEXT: vsext.vf4 v12, v8 774; RV64-NEXT: vsll.vi v12, v12, 2 775; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 776; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 777; RV64-NEXT: ret 778 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs 779 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 780 ret <8 x i32> %v 781} 782 783define <8 x i32> @vpgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 784; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8i32: 785; RV32: # %bb.0: 786; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 787; RV32-NEXT: vsext.vf2 v10, v8 788; RV32-NEXT: vsll.vi v8, v10, 2 789; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 790; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 791; RV32-NEXT: ret 792; 793; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8i32: 794; RV64: # %bb.0: 795; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 796; RV64-NEXT: vsext.vf4 v12, v8 797; RV64-NEXT: vsll.vi v12, v12, 2 798; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 799; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 800; RV64-NEXT: ret 801 %eidxs = sext <8 x i16> %idxs to <8 x i32> 802 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs 803 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 804 ret <8 x i32> %v 805} 806 807define <8 x i32> @vpgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 808; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8i32: 809; RV32: # %bb.0: 810; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 811; RV32-NEXT: vzext.vf2 v10, v8 812; RV32-NEXT: vsll.vi v8, v10, 2 813; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 814; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 815; RV32-NEXT: ret 816; 817; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8i32: 818; RV64: # %bb.0: 819; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma 820; RV64-NEXT: vzext.vf2 v10, v8 821; RV64-NEXT: vsll.vi v8, v10, 2 822; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 823; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t 824; RV64-NEXT: ret 825 %eidxs = zext <8 x i16> %idxs to <8 x i32> 826 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs 827 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 828 ret <8 x i32> %v 829} 830 831define <8 x i32> @vpgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { 832; RV32-LABEL: vpgather_baseidx_v8i32: 833; RV32: # %bb.0: 834; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 835; RV32-NEXT: vsll.vi v8, v8, 2 836; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 837; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 838; RV32-NEXT: ret 839; 840; RV64-LABEL: vpgather_baseidx_v8i32: 841; RV64: # %bb.0: 842; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 843; RV64-NEXT: vsext.vf2 v12, v8 844; RV64-NEXT: vsll.vi v12, v12, 2 845; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 846; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 847; RV64-NEXT: ret 848 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs 849 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 850 ret <8 x i32> %v 851} 852 853declare <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr>, <2 x i1>, i32) 854 855define <2 x i64> @vpgather_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 856; RV32-LABEL: vpgather_v2i64: 857; RV32: # %bb.0: 858; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 859; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 860; RV32-NEXT: vmv.v.v v8, v9 861; RV32-NEXT: ret 862; 863; RV64-LABEL: vpgather_v2i64: 864; RV64: # %bb.0: 865; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 866; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 867; RV64-NEXT: ret 868 %v = call <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 869 ret <2 x i64> %v 870} 871 872declare <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr>, <4 x i1>, i32) 873 874define <4 x i64> @vpgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) { 875; RV32-LABEL: vpgather_v4i64: 876; RV32: # %bb.0: 877; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 878; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 879; RV32-NEXT: vmv.v.v v8, v10 880; RV32-NEXT: ret 881; 882; RV64-LABEL: vpgather_v4i64: 883; RV64: # %bb.0: 884; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 885; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 886; RV64-NEXT: ret 887 %v = call <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl) 888 ret <4 x i64> %v 889} 890 891define <4 x i64> @vpgather_truemask_v4i64(<4 x ptr> %ptrs, i32 zeroext %evl) { 892; RV32-LABEL: vpgather_truemask_v4i64: 893; RV32: # %bb.0: 894; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 895; RV32-NEXT: vluxei32.v v10, (zero), v8 896; RV32-NEXT: vmv.v.v v8, v10 897; RV32-NEXT: ret 898; 899; RV64-LABEL: vpgather_truemask_v4i64: 900; RV64: # %bb.0: 901; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 902; RV64-NEXT: vluxei64.v v8, (zero), v8 903; RV64-NEXT: ret 904 %v = call <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) 905 ret <4 x i64> %v 906} 907 908declare <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr>, <8 x i1>, i32) 909 910define <8 x i64> @vpgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) { 911; RV32-LABEL: vpgather_v8i64: 912; RV32: # %bb.0: 913; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 914; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 915; RV32-NEXT: vmv.v.v v8, v12 916; RV32-NEXT: ret 917; 918; RV64-LABEL: vpgather_v8i64: 919; RV64: # %bb.0: 920; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 921; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 922; RV64-NEXT: ret 923 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 924 ret <8 x i64> %v 925} 926 927define <8 x i64> @vpgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 928; RV32-LABEL: vpgather_baseidx_v8i8_v8i64: 929; RV32: # %bb.0: 930; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 931; RV32-NEXT: vsext.vf4 v10, v8 932; RV32-NEXT: vsll.vi v12, v10, 3 933; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 934; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 935; RV32-NEXT: ret 936; 937; RV64-LABEL: vpgather_baseidx_v8i8_v8i64: 938; RV64: # %bb.0: 939; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 940; RV64-NEXT: vsext.vf8 v12, v8 941; RV64-NEXT: vsll.vi v8, v12, 3 942; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 943; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 944; RV64-NEXT: ret 945 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs 946 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 947 ret <8 x i64> %v 948} 949 950define <8 x i64> @vpgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 951; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8i64: 952; RV32: # %bb.0: 953; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 954; RV32-NEXT: vsext.vf4 v10, v8 955; RV32-NEXT: vsll.vi v12, v10, 3 956; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 957; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 958; RV32-NEXT: ret 959; 960; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8i64: 961; RV64: # %bb.0: 962; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 963; RV64-NEXT: vsext.vf8 v12, v8 964; RV64-NEXT: vsll.vi v8, v12, 3 965; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 966; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 967; RV64-NEXT: ret 968 %eidxs = sext <8 x i8> %idxs to <8 x i64> 969 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 970 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 971 ret <8 x i64> %v 972} 973 974define <8 x i64> @vpgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 975; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i64: 976; RV32: # %bb.0: 977; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 978; RV32-NEXT: vzext.vf2 v9, v8 979; RV32-NEXT: vsll.vi v12, v9, 3 980; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 981; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t 982; RV32-NEXT: ret 983; 984; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i64: 985; RV64: # %bb.0: 986; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma 987; RV64-NEXT: vzext.vf2 v9, v8 988; RV64-NEXT: vsll.vi v12, v9, 3 989; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 990; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t 991; RV64-NEXT: ret 992 %eidxs = zext <8 x i8> %idxs to <8 x i64> 993 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 994 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 995 ret <8 x i64> %v 996} 997 998define <8 x i64> @vpgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 999; RV32-LABEL: vpgather_baseidx_v8i16_v8i64: 1000; RV32: # %bb.0: 1001; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1002; RV32-NEXT: vsext.vf2 v10, v8 1003; RV32-NEXT: vsll.vi v12, v10, 3 1004; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1005; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1006; RV32-NEXT: ret 1007; 1008; RV64-LABEL: vpgather_baseidx_v8i16_v8i64: 1009; RV64: # %bb.0: 1010; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1011; RV64-NEXT: vsext.vf4 v12, v8 1012; RV64-NEXT: vsll.vi v8, v12, 3 1013; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1014; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1015; RV64-NEXT: ret 1016 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs 1017 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1018 ret <8 x i64> %v 1019} 1020 1021define <8 x i64> @vpgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1022; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8i64: 1023; RV32: # %bb.0: 1024; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1025; RV32-NEXT: vsext.vf2 v10, v8 1026; RV32-NEXT: vsll.vi v12, v10, 3 1027; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1028; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1029; RV32-NEXT: ret 1030; 1031; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8i64: 1032; RV64: # %bb.0: 1033; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1034; RV64-NEXT: vsext.vf4 v12, v8 1035; RV64-NEXT: vsll.vi v8, v12, 3 1036; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1037; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1038; RV64-NEXT: ret 1039 %eidxs = sext <8 x i16> %idxs to <8 x i64> 1040 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 1041 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1042 ret <8 x i64> %v 1043} 1044 1045define <8 x i64> @vpgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1046; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8i64: 1047; RV32: # %bb.0: 1048; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1049; RV32-NEXT: vzext.vf2 v10, v8 1050; RV32-NEXT: vsll.vi v12, v10, 3 1051; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1052; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1053; RV32-NEXT: ret 1054; 1055; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8i64: 1056; RV64: # %bb.0: 1057; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1058; RV64-NEXT: vzext.vf2 v10, v8 1059; RV64-NEXT: vsll.vi v12, v10, 3 1060; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1061; RV64-NEXT: vluxei32.v v8, (a0), v12, v0.t 1062; RV64-NEXT: ret 1063 %eidxs = zext <8 x i16> %idxs to <8 x i64> 1064 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 1065 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1066 ret <8 x i64> %v 1067} 1068 1069define <8 x i64> @vpgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1070; RV32-LABEL: vpgather_baseidx_v8i32_v8i64: 1071; RV32: # %bb.0: 1072; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1073; RV32-NEXT: vsll.vi v12, v8, 3 1074; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1075; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1076; RV32-NEXT: ret 1077; 1078; RV64-LABEL: vpgather_baseidx_v8i32_v8i64: 1079; RV64: # %bb.0: 1080; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1081; RV64-NEXT: vsext.vf2 v12, v8 1082; RV64-NEXT: vsll.vi v8, v12, 3 1083; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1084; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1085; RV64-NEXT: ret 1086 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs 1087 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1088 ret <8 x i64> %v 1089} 1090 1091define <8 x i64> @vpgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1092; RV32-LABEL: vpgather_baseidx_sext_v8i32_v8i64: 1093; RV32: # %bb.0: 1094; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1095; RV32-NEXT: vsll.vi v12, v8, 3 1096; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1097; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1098; RV32-NEXT: ret 1099; 1100; RV64-LABEL: vpgather_baseidx_sext_v8i32_v8i64: 1101; RV64: # %bb.0: 1102; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1103; RV64-NEXT: vsext.vf2 v12, v8 1104; RV64-NEXT: vsll.vi v8, v12, 3 1105; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1106; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1107; RV64-NEXT: ret 1108 %eidxs = sext <8 x i32> %idxs to <8 x i64> 1109 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 1110 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1111 ret <8 x i64> %v 1112} 1113 1114define <8 x i64> @vpgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1115; RV32-LABEL: vpgather_baseidx_zext_v8i32_v8i64: 1116; RV32: # %bb.0: 1117; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1118; RV32-NEXT: vsll.vi v12, v8, 3 1119; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1120; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1121; RV32-NEXT: ret 1122; 1123; RV64-LABEL: vpgather_baseidx_zext_v8i32_v8i64: 1124; RV64: # %bb.0: 1125; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1126; RV64-NEXT: vzext.vf2 v12, v8 1127; RV64-NEXT: vsll.vi v8, v12, 3 1128; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1129; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1130; RV64-NEXT: ret 1131 %eidxs = zext <8 x i32> %idxs to <8 x i64> 1132 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 1133 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1134 ret <8 x i64> %v 1135} 1136 1137define <8 x i64> @vpgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1138; RV32-LABEL: vpgather_baseidx_v8i64: 1139; RV32: # %bb.0: 1140; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1141; RV32-NEXT: vnsrl.wi v12, v8, 0 1142; RV32-NEXT: vsll.vi v12, v12, 3 1143; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1144; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1145; RV32-NEXT: ret 1146; 1147; RV64-LABEL: vpgather_baseidx_v8i64: 1148; RV64: # %bb.0: 1149; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1150; RV64-NEXT: vsll.vi v8, v8, 3 1151; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1152; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1153; RV64-NEXT: ret 1154 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs 1155 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1156 ret <8 x i64> %v 1157} 1158 1159declare <2 x bfloat> @llvm.vp.gather.v2bf16.v2p0(<2 x ptr>, <2 x i1>, i32) 1160 1161define <2 x bfloat> @vpgather_v2bf16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 1162; RV32-LABEL: vpgather_v2bf16: 1163; RV32: # %bb.0: 1164; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1165; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1166; RV32-NEXT: vmv1r.v v8, v9 1167; RV32-NEXT: ret 1168; 1169; RV64-LABEL: vpgather_v2bf16: 1170; RV64: # %bb.0: 1171; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1172; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 1173; RV64-NEXT: vmv1r.v v8, v9 1174; RV64-NEXT: ret 1175 %v = call <2 x bfloat> @llvm.vp.gather.v2bf16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 1176 ret <2 x bfloat> %v 1177} 1178 1179declare <4 x bfloat> @llvm.vp.gather.v4bf16.v4p0(<4 x ptr>, <4 x i1>, i32) 1180 1181define <4 x bfloat> @vpgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) { 1182; RV32-LABEL: vpgather_v4bf16: 1183; RV32: # %bb.0: 1184; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1185; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1186; RV32-NEXT: vmv1r.v v8, v9 1187; RV32-NEXT: ret 1188; 1189; RV64-LABEL: vpgather_v4bf16: 1190; RV64: # %bb.0: 1191; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1192; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 1193; RV64-NEXT: vmv1r.v v8, v10 1194; RV64-NEXT: ret 1195 %v = call <4 x bfloat> @llvm.vp.gather.v4bf16.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl) 1196 ret <4 x bfloat> %v 1197} 1198 1199define <4 x bfloat> @vpgather_truemask_v4bf16(<4 x ptr> %ptrs, i32 zeroext %evl) { 1200; RV32-LABEL: vpgather_truemask_v4bf16: 1201; RV32: # %bb.0: 1202; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1203; RV32-NEXT: vluxei32.v v9, (zero), v8 1204; RV32-NEXT: vmv1r.v v8, v9 1205; RV32-NEXT: ret 1206; 1207; RV64-LABEL: vpgather_truemask_v4bf16: 1208; RV64: # %bb.0: 1209; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1210; RV64-NEXT: vluxei64.v v10, (zero), v8 1211; RV64-NEXT: vmv1r.v v8, v10 1212; RV64-NEXT: ret 1213 %v = call <4 x bfloat> @llvm.vp.gather.v4bf16.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) 1214 ret <4 x bfloat> %v 1215} 1216 1217declare <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr>, <8 x i1>, i32) 1218 1219define <8 x bfloat> @vpgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) { 1220; RV32-LABEL: vpgather_v8bf16: 1221; RV32: # %bb.0: 1222; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1223; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 1224; RV32-NEXT: vmv.v.v v8, v10 1225; RV32-NEXT: ret 1226; 1227; RV64-LABEL: vpgather_v8bf16: 1228; RV64: # %bb.0: 1229; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1230; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 1231; RV64-NEXT: vmv.v.v v8, v12 1232; RV64-NEXT: ret 1233 %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1234 ret <8 x bfloat> %v 1235} 1236 1237define <8 x bfloat> @vpgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1238; RV32-LABEL: vpgather_baseidx_v8i8_v8bf16: 1239; RV32: # %bb.0: 1240; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1241; RV32-NEXT: vsext.vf4 v10, v8 1242; RV32-NEXT: vadd.vv v10, v10, v10 1243; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1244; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t 1245; RV32-NEXT: ret 1246; 1247; RV64-LABEL: vpgather_baseidx_v8i8_v8bf16: 1248; RV64: # %bb.0: 1249; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1250; RV64-NEXT: vsext.vf8 v12, v8 1251; RV64-NEXT: vadd.vv v12, v12, v12 1252; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1253; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 1254; RV64-NEXT: ret 1255 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i8> %idxs 1256 %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1257 ret <8 x bfloat> %v 1258} 1259 1260define <8 x bfloat> @vpgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1261; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8bf16: 1262; RV32: # %bb.0: 1263; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1264; RV32-NEXT: vsext.vf4 v10, v8 1265; RV32-NEXT: vadd.vv v10, v10, v10 1266; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1267; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t 1268; RV32-NEXT: ret 1269; 1270; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8bf16: 1271; RV64: # %bb.0: 1272; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1273; RV64-NEXT: vsext.vf8 v12, v8 1274; RV64-NEXT: vadd.vv v12, v12, v12 1275; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1276; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 1277; RV64-NEXT: ret 1278 %eidxs = sext <8 x i8> %idxs to <8 x i16> 1279 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs 1280 %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1281 ret <8 x bfloat> %v 1282} 1283 1284define <8 x bfloat> @vpgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1285; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8bf16: 1286; RV32: # %bb.0: 1287; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 1288; RV32-NEXT: vwaddu.vv v9, v8, v8 1289; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1290; RV32-NEXT: vluxei16.v v8, (a0), v9, v0.t 1291; RV32-NEXT: ret 1292; 1293; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8bf16: 1294; RV64: # %bb.0: 1295; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 1296; RV64-NEXT: vwaddu.vv v9, v8, v8 1297; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1298; RV64-NEXT: vluxei16.v v8, (a0), v9, v0.t 1299; RV64-NEXT: ret 1300 %eidxs = zext <8 x i8> %idxs to <8 x i16> 1301 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs 1302 %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1303 ret <8 x bfloat> %v 1304} 1305 1306define <8 x bfloat> @vpgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1307; RV32-LABEL: vpgather_baseidx_v8bf16: 1308; RV32: # %bb.0: 1309; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1310; RV32-NEXT: vwadd.vv v10, v8, v8 1311; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1312; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t 1313; RV32-NEXT: ret 1314; 1315; RV64-LABEL: vpgather_baseidx_v8bf16: 1316; RV64: # %bb.0: 1317; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1318; RV64-NEXT: vsext.vf4 v12, v8 1319; RV64-NEXT: vadd.vv v12, v12, v12 1320; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1321; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 1322; RV64-NEXT: ret 1323 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %idxs 1324 %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1325 ret <8 x bfloat> %v 1326} 1327 1328declare <2 x half> @llvm.vp.gather.v2f16.v2p0(<2 x ptr>, <2 x i1>, i32) 1329 1330define <2 x half> @vpgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 1331; RV32-LABEL: vpgather_v2f16: 1332; RV32: # %bb.0: 1333; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1334; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1335; RV32-NEXT: vmv1r.v v8, v9 1336; RV32-NEXT: ret 1337; 1338; RV64-LABEL: vpgather_v2f16: 1339; RV64: # %bb.0: 1340; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1341; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 1342; RV64-NEXT: vmv1r.v v8, v9 1343; RV64-NEXT: ret 1344 %v = call <2 x half> @llvm.vp.gather.v2f16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 1345 ret <2 x half> %v 1346} 1347 1348declare <4 x half> @llvm.vp.gather.v4f16.v4p0(<4 x ptr>, <4 x i1>, i32) 1349 1350define <4 x half> @vpgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) { 1351; RV32-LABEL: vpgather_v4f16: 1352; RV32: # %bb.0: 1353; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1354; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1355; RV32-NEXT: vmv1r.v v8, v9 1356; RV32-NEXT: ret 1357; 1358; RV64-LABEL: vpgather_v4f16: 1359; RV64: # %bb.0: 1360; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1361; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 1362; RV64-NEXT: vmv1r.v v8, v10 1363; RV64-NEXT: ret 1364 %v = call <4 x half> @llvm.vp.gather.v4f16.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl) 1365 ret <4 x half> %v 1366} 1367 1368define <4 x half> @vpgather_truemask_v4f16(<4 x ptr> %ptrs, i32 zeroext %evl) { 1369; RV32-LABEL: vpgather_truemask_v4f16: 1370; RV32: # %bb.0: 1371; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1372; RV32-NEXT: vluxei32.v v9, (zero), v8 1373; RV32-NEXT: vmv1r.v v8, v9 1374; RV32-NEXT: ret 1375; 1376; RV64-LABEL: vpgather_truemask_v4f16: 1377; RV64: # %bb.0: 1378; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1379; RV64-NEXT: vluxei64.v v10, (zero), v8 1380; RV64-NEXT: vmv1r.v v8, v10 1381; RV64-NEXT: ret 1382 %v = call <4 x half> @llvm.vp.gather.v4f16.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) 1383 ret <4 x half> %v 1384} 1385 1386declare <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr>, <8 x i1>, i32) 1387 1388define <8 x half> @vpgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) { 1389; RV32-LABEL: vpgather_v8f16: 1390; RV32: # %bb.0: 1391; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1392; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 1393; RV32-NEXT: vmv.v.v v8, v10 1394; RV32-NEXT: ret 1395; 1396; RV64-LABEL: vpgather_v8f16: 1397; RV64: # %bb.0: 1398; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1399; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 1400; RV64-NEXT: vmv.v.v v8, v12 1401; RV64-NEXT: ret 1402 %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1403 ret <8 x half> %v 1404} 1405 1406define <8 x half> @vpgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1407; RV32-LABEL: vpgather_baseidx_v8i8_v8f16: 1408; RV32: # %bb.0: 1409; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1410; RV32-NEXT: vsext.vf4 v10, v8 1411; RV32-NEXT: vadd.vv v10, v10, v10 1412; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1413; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t 1414; RV32-NEXT: ret 1415; 1416; RV64-LABEL: vpgather_baseidx_v8i8_v8f16: 1417; RV64: # %bb.0: 1418; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1419; RV64-NEXT: vsext.vf8 v12, v8 1420; RV64-NEXT: vadd.vv v12, v12, v12 1421; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1422; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 1423; RV64-NEXT: ret 1424 %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs 1425 %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1426 ret <8 x half> %v 1427} 1428 1429define <8 x half> @vpgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1430; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8f16: 1431; RV32: # %bb.0: 1432; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1433; RV32-NEXT: vsext.vf4 v10, v8 1434; RV32-NEXT: vadd.vv v10, v10, v10 1435; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1436; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t 1437; RV32-NEXT: ret 1438; 1439; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8f16: 1440; RV64: # %bb.0: 1441; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1442; RV64-NEXT: vsext.vf8 v12, v8 1443; RV64-NEXT: vadd.vv v12, v12, v12 1444; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1445; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 1446; RV64-NEXT: ret 1447 %eidxs = sext <8 x i8> %idxs to <8 x i16> 1448 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs 1449 %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1450 ret <8 x half> %v 1451} 1452 1453define <8 x half> @vpgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1454; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f16: 1455; RV32: # %bb.0: 1456; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 1457; RV32-NEXT: vwaddu.vv v9, v8, v8 1458; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1459; RV32-NEXT: vluxei16.v v8, (a0), v9, v0.t 1460; RV32-NEXT: ret 1461; 1462; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f16: 1463; RV64: # %bb.0: 1464; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 1465; RV64-NEXT: vwaddu.vv v9, v8, v8 1466; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1467; RV64-NEXT: vluxei16.v v8, (a0), v9, v0.t 1468; RV64-NEXT: ret 1469 %eidxs = zext <8 x i8> %idxs to <8 x i16> 1470 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs 1471 %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1472 ret <8 x half> %v 1473} 1474 1475define <8 x half> @vpgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1476; RV32-LABEL: vpgather_baseidx_v8f16: 1477; RV32: # %bb.0: 1478; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1479; RV32-NEXT: vwadd.vv v10, v8, v8 1480; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1481; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t 1482; RV32-NEXT: ret 1483; 1484; RV64-LABEL: vpgather_baseidx_v8f16: 1485; RV64: # %bb.0: 1486; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1487; RV64-NEXT: vsext.vf4 v12, v8 1488; RV64-NEXT: vadd.vv v12, v12, v12 1489; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1490; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 1491; RV64-NEXT: ret 1492 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs 1493 %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1494 ret <8 x half> %v 1495} 1496 1497declare <2 x float> @llvm.vp.gather.v2f32.v2p0(<2 x ptr>, <2 x i1>, i32) 1498 1499define <2 x float> @vpgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 1500; RV32-LABEL: vpgather_v2f32: 1501; RV32: # %bb.0: 1502; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1503; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 1504; RV32-NEXT: ret 1505; 1506; RV64-LABEL: vpgather_v2f32: 1507; RV64: # %bb.0: 1508; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1509; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 1510; RV64-NEXT: vmv1r.v v8, v9 1511; RV64-NEXT: ret 1512 %v = call <2 x float> @llvm.vp.gather.v2f32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 1513 ret <2 x float> %v 1514} 1515 1516declare <4 x float> @llvm.vp.gather.v4f32.v4p0(<4 x ptr>, <4 x i1>, i32) 1517 1518define <4 x float> @vpgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) { 1519; RV32-LABEL: vpgather_v4f32: 1520; RV32: # %bb.0: 1521; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1522; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 1523; RV32-NEXT: ret 1524; 1525; RV64-LABEL: vpgather_v4f32: 1526; RV64: # %bb.0: 1527; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1528; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 1529; RV64-NEXT: vmv.v.v v8, v10 1530; RV64-NEXT: ret 1531 %v = call <4 x float> @llvm.vp.gather.v4f32.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl) 1532 ret <4 x float> %v 1533} 1534 1535define <4 x float> @vpgather_truemask_v4f32(<4 x ptr> %ptrs, i32 zeroext %evl) { 1536; RV32-LABEL: vpgather_truemask_v4f32: 1537; RV32: # %bb.0: 1538; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1539; RV32-NEXT: vluxei32.v v8, (zero), v8 1540; RV32-NEXT: ret 1541; 1542; RV64-LABEL: vpgather_truemask_v4f32: 1543; RV64: # %bb.0: 1544; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1545; RV64-NEXT: vluxei64.v v10, (zero), v8 1546; RV64-NEXT: vmv.v.v v8, v10 1547; RV64-NEXT: ret 1548 %v = call <4 x float> @llvm.vp.gather.v4f32.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) 1549 ret <4 x float> %v 1550} 1551 1552declare <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr>, <8 x i1>, i32) 1553 1554define <8 x float> @vpgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) { 1555; RV32-LABEL: vpgather_v8f32: 1556; RV32: # %bb.0: 1557; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1558; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 1559; RV32-NEXT: ret 1560; 1561; RV64-LABEL: vpgather_v8f32: 1562; RV64: # %bb.0: 1563; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1564; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 1565; RV64-NEXT: vmv.v.v v8, v12 1566; RV64-NEXT: ret 1567 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1568 ret <8 x float> %v 1569} 1570 1571define <8 x float> @vpgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1572; RV32-LABEL: vpgather_baseidx_v8i8_v8f32: 1573; RV32: # %bb.0: 1574; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1575; RV32-NEXT: vsext.vf4 v10, v8 1576; RV32-NEXT: vsll.vi v8, v10, 2 1577; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1578; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1579; RV32-NEXT: ret 1580; 1581; RV64-LABEL: vpgather_baseidx_v8i8_v8f32: 1582; RV64: # %bb.0: 1583; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1584; RV64-NEXT: vsext.vf8 v12, v8 1585; RV64-NEXT: vsll.vi v12, v12, 2 1586; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1587; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 1588; RV64-NEXT: ret 1589 %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs 1590 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1591 ret <8 x float> %v 1592} 1593 1594define <8 x float> @vpgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1595; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8f32: 1596; RV32: # %bb.0: 1597; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1598; RV32-NEXT: vsext.vf4 v10, v8 1599; RV32-NEXT: vsll.vi v8, v10, 2 1600; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1601; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1602; RV32-NEXT: ret 1603; 1604; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8f32: 1605; RV64: # %bb.0: 1606; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1607; RV64-NEXT: vsext.vf8 v12, v8 1608; RV64-NEXT: vsll.vi v12, v12, 2 1609; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1610; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 1611; RV64-NEXT: ret 1612 %eidxs = sext <8 x i8> %idxs to <8 x i32> 1613 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs 1614 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1615 ret <8 x float> %v 1616} 1617 1618define <8 x float> @vpgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1619; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f32: 1620; RV32: # %bb.0: 1621; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1622; RV32-NEXT: vzext.vf2 v9, v8 1623; RV32-NEXT: vsll.vi v10, v9, 2 1624; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1625; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t 1626; RV32-NEXT: ret 1627; 1628; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f32: 1629; RV64: # %bb.0: 1630; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1631; RV64-NEXT: vzext.vf2 v9, v8 1632; RV64-NEXT: vsll.vi v10, v9, 2 1633; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1634; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t 1635; RV64-NEXT: ret 1636 %eidxs = zext <8 x i8> %idxs to <8 x i32> 1637 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs 1638 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1639 ret <8 x float> %v 1640} 1641 1642define <8 x float> @vpgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1643; RV32-LABEL: vpgather_baseidx_v8i16_v8f32: 1644; RV32: # %bb.0: 1645; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1646; RV32-NEXT: vsext.vf2 v10, v8 1647; RV32-NEXT: vsll.vi v8, v10, 2 1648; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1649; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1650; RV32-NEXT: ret 1651; 1652; RV64-LABEL: vpgather_baseidx_v8i16_v8f32: 1653; RV64: # %bb.0: 1654; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1655; RV64-NEXT: vsext.vf4 v12, v8 1656; RV64-NEXT: vsll.vi v12, v12, 2 1657; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1658; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 1659; RV64-NEXT: ret 1660 %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs 1661 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1662 ret <8 x float> %v 1663} 1664 1665define <8 x float> @vpgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1666; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8f32: 1667; RV32: # %bb.0: 1668; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1669; RV32-NEXT: vsext.vf2 v10, v8 1670; RV32-NEXT: vsll.vi v8, v10, 2 1671; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1672; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1673; RV32-NEXT: ret 1674; 1675; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8f32: 1676; RV64: # %bb.0: 1677; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1678; RV64-NEXT: vsext.vf4 v12, v8 1679; RV64-NEXT: vsll.vi v12, v12, 2 1680; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1681; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 1682; RV64-NEXT: ret 1683 %eidxs = sext <8 x i16> %idxs to <8 x i32> 1684 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs 1685 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1686 ret <8 x float> %v 1687} 1688 1689define <8 x float> @vpgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1690; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8f32: 1691; RV32: # %bb.0: 1692; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1693; RV32-NEXT: vzext.vf2 v10, v8 1694; RV32-NEXT: vsll.vi v8, v10, 2 1695; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1696; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1697; RV32-NEXT: ret 1698; 1699; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8f32: 1700; RV64: # %bb.0: 1701; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1702; RV64-NEXT: vzext.vf2 v10, v8 1703; RV64-NEXT: vsll.vi v8, v10, 2 1704; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1705; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t 1706; RV64-NEXT: ret 1707 %eidxs = zext <8 x i16> %idxs to <8 x i32> 1708 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs 1709 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1710 ret <8 x float> %v 1711} 1712 1713define <8 x float> @vpgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1714; RV32-LABEL: vpgather_baseidx_v8f32: 1715; RV32: # %bb.0: 1716; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1717; RV32-NEXT: vsll.vi v8, v8, 2 1718; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1719; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1720; RV32-NEXT: ret 1721; 1722; RV64-LABEL: vpgather_baseidx_v8f32: 1723; RV64: # %bb.0: 1724; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1725; RV64-NEXT: vsext.vf2 v12, v8 1726; RV64-NEXT: vsll.vi v12, v12, 2 1727; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1728; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t 1729; RV64-NEXT: ret 1730 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs 1731 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1732 ret <8 x float> %v 1733} 1734 1735declare <2 x double> @llvm.vp.gather.v2f64.v2p0(<2 x ptr>, <2 x i1>, i32) 1736 1737define <2 x double> @vpgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 1738; RV32-LABEL: vpgather_v2f64: 1739; RV32: # %bb.0: 1740; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1741; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1742; RV32-NEXT: vmv.v.v v8, v9 1743; RV32-NEXT: ret 1744; 1745; RV64-LABEL: vpgather_v2f64: 1746; RV64: # %bb.0: 1747; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1748; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 1749; RV64-NEXT: ret 1750 %v = call <2 x double> @llvm.vp.gather.v2f64.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 1751 ret <2 x double> %v 1752} 1753 1754declare <4 x double> @llvm.vp.gather.v4f64.v4p0(<4 x ptr>, <4 x i1>, i32) 1755 1756define <4 x double> @vpgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) { 1757; RV32-LABEL: vpgather_v4f64: 1758; RV32: # %bb.0: 1759; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1760; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 1761; RV32-NEXT: vmv.v.v v8, v10 1762; RV32-NEXT: ret 1763; 1764; RV64-LABEL: vpgather_v4f64: 1765; RV64: # %bb.0: 1766; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1767; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 1768; RV64-NEXT: ret 1769 %v = call <4 x double> @llvm.vp.gather.v4f64.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl) 1770 ret <4 x double> %v 1771} 1772 1773define <4 x double> @vpgather_truemask_v4f64(<4 x ptr> %ptrs, i32 zeroext %evl) { 1774; RV32-LABEL: vpgather_truemask_v4f64: 1775; RV32: # %bb.0: 1776; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1777; RV32-NEXT: vluxei32.v v10, (zero), v8 1778; RV32-NEXT: vmv.v.v v8, v10 1779; RV32-NEXT: ret 1780; 1781; RV64-LABEL: vpgather_truemask_v4f64: 1782; RV64: # %bb.0: 1783; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1784; RV64-NEXT: vluxei64.v v8, (zero), v8 1785; RV64-NEXT: ret 1786 %v = call <4 x double> @llvm.vp.gather.v4f64.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) 1787 ret <4 x double> %v 1788} 1789 1790declare <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr>, <8 x i1>, i32) 1791 1792define <8 x double> @vpgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) { 1793; RV32-LABEL: vpgather_v8f64: 1794; RV32: # %bb.0: 1795; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1796; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 1797; RV32-NEXT: vmv.v.v v8, v12 1798; RV32-NEXT: ret 1799; 1800; RV64-LABEL: vpgather_v8f64: 1801; RV64: # %bb.0: 1802; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1803; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 1804; RV64-NEXT: ret 1805 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1806 ret <8 x double> %v 1807} 1808 1809define <8 x double> @vpgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1810; RV32-LABEL: vpgather_baseidx_v8i8_v8f64: 1811; RV32: # %bb.0: 1812; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1813; RV32-NEXT: vsext.vf4 v10, v8 1814; RV32-NEXT: vsll.vi v12, v10, 3 1815; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1816; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1817; RV32-NEXT: ret 1818; 1819; RV64-LABEL: vpgather_baseidx_v8i8_v8f64: 1820; RV64: # %bb.0: 1821; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1822; RV64-NEXT: vsext.vf8 v12, v8 1823; RV64-NEXT: vsll.vi v8, v12, 3 1824; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1825; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1826; RV64-NEXT: ret 1827 %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs 1828 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1829 ret <8 x double> %v 1830} 1831 1832define <8 x double> @vpgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1833; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8f64: 1834; RV32: # %bb.0: 1835; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1836; RV32-NEXT: vsext.vf4 v10, v8 1837; RV32-NEXT: vsll.vi v12, v10, 3 1838; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1839; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1840; RV32-NEXT: ret 1841; 1842; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8f64: 1843; RV64: # %bb.0: 1844; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1845; RV64-NEXT: vsext.vf8 v12, v8 1846; RV64-NEXT: vsll.vi v8, v12, 3 1847; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1848; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1849; RV64-NEXT: ret 1850 %eidxs = sext <8 x i8> %idxs to <8 x i64> 1851 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 1852 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1853 ret <8 x double> %v 1854} 1855 1856define <8 x double> @vpgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1857; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f64: 1858; RV32: # %bb.0: 1859; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1860; RV32-NEXT: vzext.vf2 v9, v8 1861; RV32-NEXT: vsll.vi v12, v9, 3 1862; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1863; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t 1864; RV32-NEXT: ret 1865; 1866; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f64: 1867; RV64: # %bb.0: 1868; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1869; RV64-NEXT: vzext.vf2 v9, v8 1870; RV64-NEXT: vsll.vi v12, v9, 3 1871; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1872; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t 1873; RV64-NEXT: ret 1874 %eidxs = zext <8 x i8> %idxs to <8 x i64> 1875 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 1876 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1877 ret <8 x double> %v 1878} 1879 1880define <8 x double> @vpgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1881; RV32-LABEL: vpgather_baseidx_v8i16_v8f64: 1882; RV32: # %bb.0: 1883; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1884; RV32-NEXT: vsext.vf2 v10, v8 1885; RV32-NEXT: vsll.vi v12, v10, 3 1886; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1887; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1888; RV32-NEXT: ret 1889; 1890; RV64-LABEL: vpgather_baseidx_v8i16_v8f64: 1891; RV64: # %bb.0: 1892; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1893; RV64-NEXT: vsext.vf4 v12, v8 1894; RV64-NEXT: vsll.vi v8, v12, 3 1895; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1896; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1897; RV64-NEXT: ret 1898 %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs 1899 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1900 ret <8 x double> %v 1901} 1902 1903define <8 x double> @vpgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1904; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8f64: 1905; RV32: # %bb.0: 1906; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1907; RV32-NEXT: vsext.vf2 v10, v8 1908; RV32-NEXT: vsll.vi v12, v10, 3 1909; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1910; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1911; RV32-NEXT: ret 1912; 1913; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8f64: 1914; RV64: # %bb.0: 1915; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1916; RV64-NEXT: vsext.vf4 v12, v8 1917; RV64-NEXT: vsll.vi v8, v12, 3 1918; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1919; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1920; RV64-NEXT: ret 1921 %eidxs = sext <8 x i16> %idxs to <8 x i64> 1922 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 1923 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1924 ret <8 x double> %v 1925} 1926 1927define <8 x double> @vpgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1928; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8f64: 1929; RV32: # %bb.0: 1930; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1931; RV32-NEXT: vzext.vf2 v10, v8 1932; RV32-NEXT: vsll.vi v12, v10, 3 1933; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1934; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1935; RV32-NEXT: ret 1936; 1937; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8f64: 1938; RV64: # %bb.0: 1939; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1940; RV64-NEXT: vzext.vf2 v10, v8 1941; RV64-NEXT: vsll.vi v12, v10, 3 1942; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1943; RV64-NEXT: vluxei32.v v8, (a0), v12, v0.t 1944; RV64-NEXT: ret 1945 %eidxs = zext <8 x i16> %idxs to <8 x i64> 1946 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 1947 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1948 ret <8 x double> %v 1949} 1950 1951define <8 x double> @vpgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1952; RV32-LABEL: vpgather_baseidx_v8i32_v8f64: 1953; RV32: # %bb.0: 1954; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1955; RV32-NEXT: vsll.vi v12, v8, 3 1956; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1957; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1958; RV32-NEXT: ret 1959; 1960; RV64-LABEL: vpgather_baseidx_v8i32_v8f64: 1961; RV64: # %bb.0: 1962; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1963; RV64-NEXT: vsext.vf2 v12, v8 1964; RV64-NEXT: vsll.vi v8, v12, 3 1965; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1966; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1967; RV64-NEXT: ret 1968 %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs 1969 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1970 ret <8 x double> %v 1971} 1972 1973define <8 x double> @vpgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1974; RV32-LABEL: vpgather_baseidx_sext_v8i32_v8f64: 1975; RV32: # %bb.0: 1976; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1977; RV32-NEXT: vsll.vi v12, v8, 3 1978; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1979; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1980; RV32-NEXT: ret 1981; 1982; RV64-LABEL: vpgather_baseidx_sext_v8i32_v8f64: 1983; RV64: # %bb.0: 1984; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1985; RV64-NEXT: vsext.vf2 v12, v8 1986; RV64-NEXT: vsll.vi v8, v12, 3 1987; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1988; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1989; RV64-NEXT: ret 1990 %eidxs = sext <8 x i32> %idxs to <8 x i64> 1991 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 1992 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1993 ret <8 x double> %v 1994} 1995 1996define <8 x double> @vpgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1997; RV32-LABEL: vpgather_baseidx_zext_v8i32_v8f64: 1998; RV32: # %bb.0: 1999; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2000; RV32-NEXT: vsll.vi v12, v8, 3 2001; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 2002; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 2003; RV32-NEXT: ret 2004; 2005; RV64-LABEL: vpgather_baseidx_zext_v8i32_v8f64: 2006; RV64: # %bb.0: 2007; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 2008; RV64-NEXT: vzext.vf2 v12, v8 2009; RV64-NEXT: vsll.vi v8, v12, 3 2010; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 2011; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2012; RV64-NEXT: ret 2013 %eidxs = zext <8 x i32> %idxs to <8 x i64> 2014 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 2015 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 2016 ret <8 x double> %v 2017} 2018 2019define <8 x double> @vpgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) { 2020; RV32-LABEL: vpgather_baseidx_v8f64: 2021; RV32: # %bb.0: 2022; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2023; RV32-NEXT: vnsrl.wi v12, v8, 0 2024; RV32-NEXT: vsll.vi v12, v12, 3 2025; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 2026; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 2027; RV32-NEXT: ret 2028; 2029; RV64-LABEL: vpgather_baseidx_v8f64: 2030; RV64: # %bb.0: 2031; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 2032; RV64-NEXT: vsll.vi v8, v8, 3 2033; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 2034; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2035; RV64-NEXT: ret 2036 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs 2037 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 2038 ret <8 x double> %v 2039} 2040 2041declare <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr>, <32 x i1>, i32) 2042 2043define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroext %evl) { 2044; RV32-LABEL: vpgather_v32f64: 2045; RV32: # %bb.0: 2046; RV32-NEXT: li a2, 16 2047; RV32-NEXT: mv a1, a0 2048; RV32-NEXT: bltu a0, a2, .LBB94_2 2049; RV32-NEXT: # %bb.1: 2050; RV32-NEXT: li a1, 16 2051; RV32-NEXT: .LBB94_2: 2052; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2053; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t 2054; RV32-NEXT: addi a1, a0, -16 2055; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2056; RV32-NEXT: vslidedown.vi v0, v0, 2 2057; RV32-NEXT: sltu a0, a0, a1 2058; RV32-NEXT: addi a0, a0, -1 2059; RV32-NEXT: and a0, a0, a1 2060; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma 2061; RV32-NEXT: vslidedown.vi v8, v8, 16 2062; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2063; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t 2064; RV32-NEXT: vmv8r.v v8, v24 2065; RV32-NEXT: ret 2066; 2067; RV64-LABEL: vpgather_v32f64: 2068; RV64: # %bb.0: 2069; RV64-NEXT: li a2, 16 2070; RV64-NEXT: mv a1, a0 2071; RV64-NEXT: bltu a0, a2, .LBB94_2 2072; RV64-NEXT: # %bb.1: 2073; RV64-NEXT: li a1, 16 2074; RV64-NEXT: .LBB94_2: 2075; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2076; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 2077; RV64-NEXT: addi a1, a0, -16 2078; RV64-NEXT: sltu a0, a0, a1 2079; RV64-NEXT: addi a0, a0, -1 2080; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2081; RV64-NEXT: vslidedown.vi v0, v0, 2 2082; RV64-NEXT: and a0, a0, a1 2083; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2084; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t 2085; RV64-NEXT: ret 2086 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) 2087 ret <32 x double> %v 2088} 2089 2090define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) { 2091; RV32-LABEL: vpgather_baseidx_v32i8_v32f64: 2092; RV32: # %bb.0: 2093; RV32-NEXT: li a2, 32 2094; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 2095; RV32-NEXT: vsext.vf4 v16, v8 2096; RV32-NEXT: li a3, 16 2097; RV32-NEXT: vsll.vi v16, v16, 3 2098; RV32-NEXT: mv a2, a1 2099; RV32-NEXT: bltu a1, a3, .LBB95_2 2100; RV32-NEXT: # %bb.1: 2101; RV32-NEXT: li a2, 16 2102; RV32-NEXT: .LBB95_2: 2103; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2104; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2105; RV32-NEXT: addi a2, a1, -16 2106; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2107; RV32-NEXT: vslidedown.vi v0, v0, 2 2108; RV32-NEXT: sltu a1, a1, a2 2109; RV32-NEXT: addi a1, a1, -1 2110; RV32-NEXT: and a1, a1, a2 2111; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma 2112; RV32-NEXT: vslidedown.vi v24, v16, 16 2113; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2114; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 2115; RV32-NEXT: ret 2116; 2117; RV64-LABEL: vpgather_baseidx_v32i8_v32f64: 2118; RV64: # %bb.0: 2119; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma 2120; RV64-NEXT: vslidedown.vi v10, v8, 16 2121; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2122; RV64-NEXT: vsext.vf8 v24, v8 2123; RV64-NEXT: li a3, 16 2124; RV64-NEXT: vsext.vf8 v16, v10 2125; RV64-NEXT: vsll.vi v16, v16, 3 2126; RV64-NEXT: vsll.vi v8, v24, 3 2127; RV64-NEXT: mv a2, a1 2128; RV64-NEXT: bltu a1, a3, .LBB95_2 2129; RV64-NEXT: # %bb.1: 2130; RV64-NEXT: li a2, 16 2131; RV64-NEXT: .LBB95_2: 2132; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2133; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2134; RV64-NEXT: addi a2, a1, -16 2135; RV64-NEXT: sltu a1, a1, a2 2136; RV64-NEXT: addi a1, a1, -1 2137; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2138; RV64-NEXT: vslidedown.vi v0, v0, 2 2139; RV64-NEXT: and a1, a1, a2 2140; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2141; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t 2142; RV64-NEXT: ret 2143 %ptrs = getelementptr inbounds double, ptr %base, <32 x i8> %idxs 2144 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) 2145 ret <32 x double> %v 2146} 2147 2148define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) { 2149; RV32-LABEL: vpgather_baseidx_sext_v32i8_v32f64: 2150; RV32: # %bb.0: 2151; RV32-NEXT: li a2, 32 2152; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 2153; RV32-NEXT: vsext.vf4 v16, v8 2154; RV32-NEXT: li a3, 16 2155; RV32-NEXT: vsll.vi v16, v16, 3 2156; RV32-NEXT: mv a2, a1 2157; RV32-NEXT: bltu a1, a3, .LBB96_2 2158; RV32-NEXT: # %bb.1: 2159; RV32-NEXT: li a2, 16 2160; RV32-NEXT: .LBB96_2: 2161; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2162; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2163; RV32-NEXT: addi a2, a1, -16 2164; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2165; RV32-NEXT: vslidedown.vi v0, v0, 2 2166; RV32-NEXT: sltu a1, a1, a2 2167; RV32-NEXT: addi a1, a1, -1 2168; RV32-NEXT: and a1, a1, a2 2169; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma 2170; RV32-NEXT: vslidedown.vi v24, v16, 16 2171; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2172; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 2173; RV32-NEXT: ret 2174; 2175; RV64-LABEL: vpgather_baseidx_sext_v32i8_v32f64: 2176; RV64: # %bb.0: 2177; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2178; RV64-NEXT: vsext.vf8 v24, v8 2179; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma 2180; RV64-NEXT: vslidedown.vi v8, v8, 16 2181; RV64-NEXT: li a3, 16 2182; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2183; RV64-NEXT: vsext.vf8 v16, v8 2184; RV64-NEXT: vsll.vi v16, v16, 3 2185; RV64-NEXT: vsll.vi v8, v24, 3 2186; RV64-NEXT: mv a2, a1 2187; RV64-NEXT: bltu a1, a3, .LBB96_2 2188; RV64-NEXT: # %bb.1: 2189; RV64-NEXT: li a2, 16 2190; RV64-NEXT: .LBB96_2: 2191; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2192; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2193; RV64-NEXT: addi a2, a1, -16 2194; RV64-NEXT: sltu a1, a1, a2 2195; RV64-NEXT: addi a1, a1, -1 2196; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2197; RV64-NEXT: vslidedown.vi v0, v0, 2 2198; RV64-NEXT: and a1, a1, a2 2199; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2200; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t 2201; RV64-NEXT: ret 2202 %eidxs = sext <32 x i8> %idxs to <32 x i64> 2203 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs 2204 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) 2205 ret <32 x double> %v 2206} 2207 2208define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) { 2209; RV32-LABEL: vpgather_baseidx_zext_v32i8_v32f64: 2210; RV32: # %bb.0: 2211; RV32-NEXT: li a2, 32 2212; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma 2213; RV32-NEXT: vzext.vf2 v12, v8 2214; RV32-NEXT: li a3, 16 2215; RV32-NEXT: vsll.vi v16, v12, 3 2216; RV32-NEXT: mv a2, a1 2217; RV32-NEXT: bltu a1, a3, .LBB97_2 2218; RV32-NEXT: # %bb.1: 2219; RV32-NEXT: li a2, 16 2220; RV32-NEXT: .LBB97_2: 2221; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2222; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t 2223; RV32-NEXT: addi a2, a1, -16 2224; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2225; RV32-NEXT: vslidedown.vi v0, v0, 2 2226; RV32-NEXT: sltu a1, a1, a2 2227; RV32-NEXT: addi a1, a1, -1 2228; RV32-NEXT: and a1, a1, a2 2229; RV32-NEXT: vsetivli zero, 16, e16, m4, ta, ma 2230; RV32-NEXT: vslidedown.vi v24, v16, 16 2231; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2232; RV32-NEXT: vluxei16.v v16, (a0), v24, v0.t 2233; RV32-NEXT: ret 2234; 2235; RV64-LABEL: vpgather_baseidx_zext_v32i8_v32f64: 2236; RV64: # %bb.0: 2237; RV64-NEXT: li a2, 32 2238; RV64-NEXT: vsetvli zero, a2, e16, m4, ta, ma 2239; RV64-NEXT: vzext.vf2 v12, v8 2240; RV64-NEXT: li a3, 16 2241; RV64-NEXT: vsll.vi v16, v12, 3 2242; RV64-NEXT: mv a2, a1 2243; RV64-NEXT: bltu a1, a3, .LBB97_2 2244; RV64-NEXT: # %bb.1: 2245; RV64-NEXT: li a2, 16 2246; RV64-NEXT: .LBB97_2: 2247; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2248; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t 2249; RV64-NEXT: addi a2, a1, -16 2250; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2251; RV64-NEXT: vslidedown.vi v0, v0, 2 2252; RV64-NEXT: sltu a1, a1, a2 2253; RV64-NEXT: addi a1, a1, -1 2254; RV64-NEXT: and a1, a1, a2 2255; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma 2256; RV64-NEXT: vslidedown.vi v24, v16, 16 2257; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2258; RV64-NEXT: vluxei16.v v16, (a0), v24, v0.t 2259; RV64-NEXT: ret 2260 %eidxs = zext <32 x i8> %idxs to <32 x i64> 2261 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs 2262 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) 2263 ret <32 x double> %v 2264} 2265 2266define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) { 2267; RV32-LABEL: vpgather_baseidx_v32i16_v32f64: 2268; RV32: # %bb.0: 2269; RV32-NEXT: li a2, 32 2270; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 2271; RV32-NEXT: vsext.vf2 v16, v8 2272; RV32-NEXT: li a3, 16 2273; RV32-NEXT: vsll.vi v16, v16, 3 2274; RV32-NEXT: mv a2, a1 2275; RV32-NEXT: bltu a1, a3, .LBB98_2 2276; RV32-NEXT: # %bb.1: 2277; RV32-NEXT: li a2, 16 2278; RV32-NEXT: .LBB98_2: 2279; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2280; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2281; RV32-NEXT: addi a2, a1, -16 2282; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2283; RV32-NEXT: vslidedown.vi v0, v0, 2 2284; RV32-NEXT: sltu a1, a1, a2 2285; RV32-NEXT: addi a1, a1, -1 2286; RV32-NEXT: and a1, a1, a2 2287; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma 2288; RV32-NEXT: vslidedown.vi v24, v16, 16 2289; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2290; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 2291; RV32-NEXT: ret 2292; 2293; RV64-LABEL: vpgather_baseidx_v32i16_v32f64: 2294; RV64: # %bb.0: 2295; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma 2296; RV64-NEXT: vslidedown.vi v12, v8, 16 2297; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2298; RV64-NEXT: vsext.vf4 v24, v8 2299; RV64-NEXT: li a3, 16 2300; RV64-NEXT: vsext.vf4 v16, v12 2301; RV64-NEXT: vsll.vi v16, v16, 3 2302; RV64-NEXT: vsll.vi v8, v24, 3 2303; RV64-NEXT: mv a2, a1 2304; RV64-NEXT: bltu a1, a3, .LBB98_2 2305; RV64-NEXT: # %bb.1: 2306; RV64-NEXT: li a2, 16 2307; RV64-NEXT: .LBB98_2: 2308; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2309; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2310; RV64-NEXT: addi a2, a1, -16 2311; RV64-NEXT: sltu a1, a1, a2 2312; RV64-NEXT: addi a1, a1, -1 2313; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2314; RV64-NEXT: vslidedown.vi v0, v0, 2 2315; RV64-NEXT: and a1, a1, a2 2316; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2317; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t 2318; RV64-NEXT: ret 2319 %ptrs = getelementptr inbounds double, ptr %base, <32 x i16> %idxs 2320 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) 2321 ret <32 x double> %v 2322} 2323 2324define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) { 2325; RV32-LABEL: vpgather_baseidx_sext_v32i16_v32f64: 2326; RV32: # %bb.0: 2327; RV32-NEXT: li a2, 32 2328; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 2329; RV32-NEXT: vsext.vf2 v16, v8 2330; RV32-NEXT: li a3, 16 2331; RV32-NEXT: vsll.vi v16, v16, 3 2332; RV32-NEXT: mv a2, a1 2333; RV32-NEXT: bltu a1, a3, .LBB99_2 2334; RV32-NEXT: # %bb.1: 2335; RV32-NEXT: li a2, 16 2336; RV32-NEXT: .LBB99_2: 2337; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2338; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2339; RV32-NEXT: addi a2, a1, -16 2340; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2341; RV32-NEXT: vslidedown.vi v0, v0, 2 2342; RV32-NEXT: sltu a1, a1, a2 2343; RV32-NEXT: addi a1, a1, -1 2344; RV32-NEXT: and a1, a1, a2 2345; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma 2346; RV32-NEXT: vslidedown.vi v24, v16, 16 2347; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2348; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 2349; RV32-NEXT: ret 2350; 2351; RV64-LABEL: vpgather_baseidx_sext_v32i16_v32f64: 2352; RV64: # %bb.0: 2353; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2354; RV64-NEXT: vsext.vf4 v24, v8 2355; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma 2356; RV64-NEXT: vslidedown.vi v8, v8, 16 2357; RV64-NEXT: li a3, 16 2358; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2359; RV64-NEXT: vsext.vf4 v16, v8 2360; RV64-NEXT: vsll.vi v16, v16, 3 2361; RV64-NEXT: vsll.vi v8, v24, 3 2362; RV64-NEXT: mv a2, a1 2363; RV64-NEXT: bltu a1, a3, .LBB99_2 2364; RV64-NEXT: # %bb.1: 2365; RV64-NEXT: li a2, 16 2366; RV64-NEXT: .LBB99_2: 2367; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2368; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2369; RV64-NEXT: addi a2, a1, -16 2370; RV64-NEXT: sltu a1, a1, a2 2371; RV64-NEXT: addi a1, a1, -1 2372; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2373; RV64-NEXT: vslidedown.vi v0, v0, 2 2374; RV64-NEXT: and a1, a1, a2 2375; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2376; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t 2377; RV64-NEXT: ret 2378 %eidxs = sext <32 x i16> %idxs to <32 x i64> 2379 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs 2380 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) 2381 ret <32 x double> %v 2382} 2383 2384define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) { 2385; RV32-LABEL: vpgather_baseidx_zext_v32i16_v32f64: 2386; RV32: # %bb.0: 2387; RV32-NEXT: li a2, 32 2388; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 2389; RV32-NEXT: vzext.vf2 v16, v8 2390; RV32-NEXT: li a3, 16 2391; RV32-NEXT: vsll.vi v16, v16, 3 2392; RV32-NEXT: mv a2, a1 2393; RV32-NEXT: bltu a1, a3, .LBB100_2 2394; RV32-NEXT: # %bb.1: 2395; RV32-NEXT: li a2, 16 2396; RV32-NEXT: .LBB100_2: 2397; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2398; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2399; RV32-NEXT: addi a2, a1, -16 2400; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2401; RV32-NEXT: vslidedown.vi v0, v0, 2 2402; RV32-NEXT: sltu a1, a1, a2 2403; RV32-NEXT: addi a1, a1, -1 2404; RV32-NEXT: and a1, a1, a2 2405; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma 2406; RV32-NEXT: vslidedown.vi v24, v16, 16 2407; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2408; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 2409; RV32-NEXT: ret 2410; 2411; RV64-LABEL: vpgather_baseidx_zext_v32i16_v32f64: 2412; RV64: # %bb.0: 2413; RV64-NEXT: li a2, 32 2414; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma 2415; RV64-NEXT: vzext.vf2 v16, v8 2416; RV64-NEXT: li a3, 16 2417; RV64-NEXT: vsll.vi v16, v16, 3 2418; RV64-NEXT: mv a2, a1 2419; RV64-NEXT: bltu a1, a3, .LBB100_2 2420; RV64-NEXT: # %bb.1: 2421; RV64-NEXT: li a2, 16 2422; RV64-NEXT: .LBB100_2: 2423; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2424; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t 2425; RV64-NEXT: addi a2, a1, -16 2426; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2427; RV64-NEXT: vslidedown.vi v0, v0, 2 2428; RV64-NEXT: sltu a1, a1, a2 2429; RV64-NEXT: addi a1, a1, -1 2430; RV64-NEXT: and a1, a1, a2 2431; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma 2432; RV64-NEXT: vslidedown.vi v24, v16, 16 2433; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2434; RV64-NEXT: vluxei32.v v16, (a0), v24, v0.t 2435; RV64-NEXT: ret 2436 %eidxs = zext <32 x i16> %idxs to <32 x i64> 2437 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs 2438 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) 2439 ret <32 x double> %v 2440} 2441 2442define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { 2443; RV32-LABEL: vpgather_baseidx_v32i32_v32f64: 2444; RV32: # %bb.0: 2445; RV32-NEXT: li a2, 32 2446; RV32-NEXT: li a3, 16 2447; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 2448; RV32-NEXT: vsll.vi v16, v8, 3 2449; RV32-NEXT: mv a2, a1 2450; RV32-NEXT: bltu a1, a3, .LBB101_2 2451; RV32-NEXT: # %bb.1: 2452; RV32-NEXT: li a2, 16 2453; RV32-NEXT: .LBB101_2: 2454; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2455; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2456; RV32-NEXT: addi a2, a1, -16 2457; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2458; RV32-NEXT: vslidedown.vi v0, v0, 2 2459; RV32-NEXT: sltu a1, a1, a2 2460; RV32-NEXT: addi a1, a1, -1 2461; RV32-NEXT: and a1, a1, a2 2462; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma 2463; RV32-NEXT: vslidedown.vi v24, v16, 16 2464; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2465; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 2466; RV32-NEXT: ret 2467; 2468; RV64-LABEL: vpgather_baseidx_v32i32_v32f64: 2469; RV64: # %bb.0: 2470; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma 2471; RV64-NEXT: vslidedown.vi v16, v8, 16 2472; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2473; RV64-NEXT: vsext.vf2 v24, v8 2474; RV64-NEXT: li a3, 16 2475; RV64-NEXT: vsext.vf2 v8, v16 2476; RV64-NEXT: vsll.vi v16, v8, 3 2477; RV64-NEXT: vsll.vi v8, v24, 3 2478; RV64-NEXT: mv a2, a1 2479; RV64-NEXT: bltu a1, a3, .LBB101_2 2480; RV64-NEXT: # %bb.1: 2481; RV64-NEXT: li a2, 16 2482; RV64-NEXT: .LBB101_2: 2483; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2484; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2485; RV64-NEXT: addi a2, a1, -16 2486; RV64-NEXT: sltu a1, a1, a2 2487; RV64-NEXT: addi a1, a1, -1 2488; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2489; RV64-NEXT: vslidedown.vi v0, v0, 2 2490; RV64-NEXT: and a1, a1, a2 2491; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2492; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t 2493; RV64-NEXT: ret 2494 %ptrs = getelementptr inbounds double, ptr %base, <32 x i32> %idxs 2495 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) 2496 ret <32 x double> %v 2497} 2498 2499define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { 2500; RV32-LABEL: vpgather_baseidx_sext_v32i32_v32f64: 2501; RV32: # %bb.0: 2502; RV32-NEXT: li a2, 32 2503; RV32-NEXT: li a3, 16 2504; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 2505; RV32-NEXT: vsll.vi v16, v8, 3 2506; RV32-NEXT: mv a2, a1 2507; RV32-NEXT: bltu a1, a3, .LBB102_2 2508; RV32-NEXT: # %bb.1: 2509; RV32-NEXT: li a2, 16 2510; RV32-NEXT: .LBB102_2: 2511; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2512; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2513; RV32-NEXT: addi a2, a1, -16 2514; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2515; RV32-NEXT: vslidedown.vi v0, v0, 2 2516; RV32-NEXT: sltu a1, a1, a2 2517; RV32-NEXT: addi a1, a1, -1 2518; RV32-NEXT: and a1, a1, a2 2519; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma 2520; RV32-NEXT: vslidedown.vi v24, v16, 16 2521; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2522; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 2523; RV32-NEXT: ret 2524; 2525; RV64-LABEL: vpgather_baseidx_sext_v32i32_v32f64: 2526; RV64: # %bb.0: 2527; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2528; RV64-NEXT: vsext.vf2 v24, v8 2529; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma 2530; RV64-NEXT: vslidedown.vi v8, v8, 16 2531; RV64-NEXT: li a3, 16 2532; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2533; RV64-NEXT: vsext.vf2 v16, v8 2534; RV64-NEXT: vsll.vi v16, v16, 3 2535; RV64-NEXT: vsll.vi v8, v24, 3 2536; RV64-NEXT: mv a2, a1 2537; RV64-NEXT: bltu a1, a3, .LBB102_2 2538; RV64-NEXT: # %bb.1: 2539; RV64-NEXT: li a2, 16 2540; RV64-NEXT: .LBB102_2: 2541; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2542; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2543; RV64-NEXT: addi a2, a1, -16 2544; RV64-NEXT: sltu a1, a1, a2 2545; RV64-NEXT: addi a1, a1, -1 2546; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2547; RV64-NEXT: vslidedown.vi v0, v0, 2 2548; RV64-NEXT: and a1, a1, a2 2549; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2550; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t 2551; RV64-NEXT: ret 2552 %eidxs = sext <32 x i32> %idxs to <32 x i64> 2553 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs 2554 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) 2555 ret <32 x double> %v 2556} 2557 2558define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { 2559; RV32-LABEL: vpgather_baseidx_zext_v32i32_v32f64: 2560; RV32: # %bb.0: 2561; RV32-NEXT: li a2, 32 2562; RV32-NEXT: li a3, 16 2563; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 2564; RV32-NEXT: vsll.vi v16, v8, 3 2565; RV32-NEXT: mv a2, a1 2566; RV32-NEXT: bltu a1, a3, .LBB103_2 2567; RV32-NEXT: # %bb.1: 2568; RV32-NEXT: li a2, 16 2569; RV32-NEXT: .LBB103_2: 2570; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2571; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2572; RV32-NEXT: addi a2, a1, -16 2573; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2574; RV32-NEXT: vslidedown.vi v0, v0, 2 2575; RV32-NEXT: sltu a1, a1, a2 2576; RV32-NEXT: addi a1, a1, -1 2577; RV32-NEXT: and a1, a1, a2 2578; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma 2579; RV32-NEXT: vslidedown.vi v24, v16, 16 2580; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2581; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 2582; RV32-NEXT: ret 2583; 2584; RV64-LABEL: vpgather_baseidx_zext_v32i32_v32f64: 2585; RV64: # %bb.0: 2586; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2587; RV64-NEXT: vzext.vf2 v24, v8 2588; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma 2589; RV64-NEXT: vslidedown.vi v8, v8, 16 2590; RV64-NEXT: li a3, 16 2591; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2592; RV64-NEXT: vzext.vf2 v16, v8 2593; RV64-NEXT: vsll.vi v16, v16, 3 2594; RV64-NEXT: vsll.vi v8, v24, 3 2595; RV64-NEXT: mv a2, a1 2596; RV64-NEXT: bltu a1, a3, .LBB103_2 2597; RV64-NEXT: # %bb.1: 2598; RV64-NEXT: li a2, 16 2599; RV64-NEXT: .LBB103_2: 2600; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2601; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2602; RV64-NEXT: addi a2, a1, -16 2603; RV64-NEXT: sltu a1, a1, a2 2604; RV64-NEXT: addi a1, a1, -1 2605; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2606; RV64-NEXT: vslidedown.vi v0, v0, 2 2607; RV64-NEXT: and a1, a1, a2 2608; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2609; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t 2610; RV64-NEXT: ret 2611 %eidxs = zext <32 x i32> %idxs to <32 x i64> 2612 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs 2613 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) 2614 ret <32 x double> %v 2615} 2616 2617define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x i1> %m, i32 zeroext %evl) { 2618; RV32-LABEL: vpgather_baseidx_v32f64: 2619; RV32: # %bb.0: 2620; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 2621; RV32-NEXT: vmv1r.v v7, v0 2622; RV32-NEXT: vnsrl.wi v24, v16, 0 2623; RV32-NEXT: vnsrl.wi v16, v8, 0 2624; RV32-NEXT: li a2, 32 2625; RV32-NEXT: addi a3, a1, -16 2626; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2627; RV32-NEXT: vslidedown.vi v0, v0, 2 2628; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 2629; RV32-NEXT: vslideup.vi v16, v24, 16 2630; RV32-NEXT: vsll.vi v24, v16, 3 2631; RV32-NEXT: sltu a2, a1, a3 2632; RV32-NEXT: addi a2, a2, -1 2633; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma 2634; RV32-NEXT: vslidedown.vi v8, v24, 16 2635; RV32-NEXT: and a2, a2, a3 2636; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2637; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t 2638; RV32-NEXT: li a2, 16 2639; RV32-NEXT: bltu a1, a2, .LBB104_2 2640; RV32-NEXT: # %bb.1: 2641; RV32-NEXT: li a1, 16 2642; RV32-NEXT: .LBB104_2: 2643; RV32-NEXT: vmv1r.v v0, v7 2644; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2645; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t 2646; RV32-NEXT: ret 2647; 2648; RV64-LABEL: vpgather_baseidx_v32f64: 2649; RV64: # %bb.0: 2650; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2651; RV64-NEXT: vsll.vi v16, v16, 3 2652; RV64-NEXT: li a3, 16 2653; RV64-NEXT: vsll.vi v8, v8, 3 2654; RV64-NEXT: mv a2, a1 2655; RV64-NEXT: bltu a1, a3, .LBB104_2 2656; RV64-NEXT: # %bb.1: 2657; RV64-NEXT: li a2, 16 2658; RV64-NEXT: .LBB104_2: 2659; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2660; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2661; RV64-NEXT: addi a2, a1, -16 2662; RV64-NEXT: sltu a1, a1, a2 2663; RV64-NEXT: addi a1, a1, -1 2664; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2665; RV64-NEXT: vslidedown.vi v0, v0, 2 2666; RV64-NEXT: and a1, a1, a2 2667; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2668; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t 2669; RV64-NEXT: ret 2670 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %idxs 2671 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) 2672 ret <32 x double> %v 2673} 2674