1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 6; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 8; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 10 11declare void @llvm.vp.scatter.v2i8.v2p0(<2 x i8>, <2 x ptr>, <2 x i1>, i32) 12 13define void @vpscatter_v2i8(<2 x i8> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 14; RV32-LABEL: vpscatter_v2i8: 15; RV32: # %bb.0: 16; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 17; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 18; RV32-NEXT: ret 19; 20; RV64-LABEL: vpscatter_v2i8: 21; RV64: # %bb.0: 22; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 23; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 24; RV64-NEXT: ret 25 call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 26 ret void 27} 28 29define void @vpscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 30; RV32-LABEL: vpscatter_v2i16_truncstore_v2i8: 31; RV32: # %bb.0: 32; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 33; RV32-NEXT: vnsrl.wi v8, v8, 0 34; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 35; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 36; RV32-NEXT: ret 37; 38; RV64-LABEL: vpscatter_v2i16_truncstore_v2i8: 39; RV64: # %bb.0: 40; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 41; RV64-NEXT: vnsrl.wi v8, v8, 0 42; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 43; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 44; RV64-NEXT: ret 45 %tval = trunc <2 x i16> %val to <2 x i8> 46 call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 47 ret void 48} 49 50define void @vpscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 51; RV32-LABEL: vpscatter_v2i32_truncstore_v2i8: 52; RV32: # %bb.0: 53; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 54; RV32-NEXT: vnsrl.wi v8, v8, 0 55; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 56; RV32-NEXT: vnsrl.wi v8, v8, 0 57; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 58; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 59; RV32-NEXT: ret 60; 61; RV64-LABEL: vpscatter_v2i32_truncstore_v2i8: 62; RV64: # %bb.0: 63; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 64; RV64-NEXT: vnsrl.wi v8, v8, 0 65; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 66; RV64-NEXT: vnsrl.wi v8, v8, 0 67; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 68; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 69; RV64-NEXT: ret 70 %tval = trunc <2 x i32> %val to <2 x i8> 71 call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 72 ret void 73} 74 75define void @vpscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 76; RV32-LABEL: vpscatter_v2i64_truncstore_v2i8: 77; RV32: # %bb.0: 78; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 79; RV32-NEXT: vnsrl.wi v8, v8, 0 80; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 81; RV32-NEXT: vnsrl.wi v8, v8, 0 82; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 83; RV32-NEXT: vnsrl.wi v8, v8, 0 84; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 85; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 86; RV32-NEXT: ret 87; 88; RV64-LABEL: vpscatter_v2i64_truncstore_v2i8: 89; RV64: # %bb.0: 90; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 91; RV64-NEXT: vnsrl.wi v8, v8, 0 92; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 93; RV64-NEXT: vnsrl.wi v8, v8, 0 94; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 95; RV64-NEXT: vnsrl.wi v8, v8, 0 96; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 97; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 98; RV64-NEXT: ret 99 %tval = trunc <2 x i64> %val to <2 x i8> 100 call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 101 ret void 102} 103 104declare void @llvm.vp.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, <4 x i1>, i32) 105 106define void @vpscatter_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) { 107; RV32-LABEL: vpscatter_v4i8: 108; RV32: # %bb.0: 109; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 110; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 111; RV32-NEXT: ret 112; 113; RV64-LABEL: vpscatter_v4i8: 114; RV64: # %bb.0: 115; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 116; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 117; RV64-NEXT: ret 118 call void @llvm.vp.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl) 119 ret void 120} 121 122define void @vpscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, i32 zeroext %evl) { 123; RV32-LABEL: vpscatter_truemask_v4i8: 124; RV32: # %bb.0: 125; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 126; RV32-NEXT: vsoxei32.v v8, (zero), v9 127; RV32-NEXT: ret 128; 129; RV64-LABEL: vpscatter_truemask_v4i8: 130; RV64: # %bb.0: 131; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 132; RV64-NEXT: vsoxei64.v v8, (zero), v10 133; RV64-NEXT: ret 134 call void @llvm.vp.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) 135 ret void 136} 137 138declare void @llvm.vp.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, <8 x i1>, i32) 139 140define void @vpscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) { 141; RV32-LABEL: vpscatter_v8i8: 142; RV32: # %bb.0: 143; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 144; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 145; RV32-NEXT: ret 146; 147; RV64-LABEL: vpscatter_v8i8: 148; RV64: # %bb.0: 149; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 150; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 151; RV64-NEXT: ret 152 call void @llvm.vp.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 153 ret void 154} 155 156define void @vpscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 157; RV32-LABEL: vpscatter_baseidx_v8i8: 158; RV32: # %bb.0: 159; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 160; RV32-NEXT: vsext.vf4 v10, v9 161; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma 162; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 163; RV32-NEXT: ret 164; 165; RV64-LABEL: vpscatter_baseidx_v8i8: 166; RV64: # %bb.0: 167; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 168; RV64-NEXT: vsext.vf8 v12, v9 169; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma 170; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 171; RV64-NEXT: ret 172 %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs 173 call void @llvm.vp.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 174 ret void 175} 176 177declare void @llvm.vp.scatter.v2i16.v2p0(<2 x i16>, <2 x ptr>, <2 x i1>, i32) 178 179define void @vpscatter_v2i16(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 180; RV32-LABEL: vpscatter_v2i16: 181; RV32: # %bb.0: 182; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 183; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 184; RV32-NEXT: ret 185; 186; RV64-LABEL: vpscatter_v2i16: 187; RV64: # %bb.0: 188; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 189; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 190; RV64-NEXT: ret 191 call void @llvm.vp.scatter.v2i16.v2p0(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 192 ret void 193} 194 195define void @vpscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 196; RV32-LABEL: vpscatter_v2i32_truncstore_v2i16: 197; RV32: # %bb.0: 198; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 199; RV32-NEXT: vnsrl.wi v8, v8, 0 200; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 201; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 202; RV32-NEXT: ret 203; 204; RV64-LABEL: vpscatter_v2i32_truncstore_v2i16: 205; RV64: # %bb.0: 206; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 207; RV64-NEXT: vnsrl.wi v8, v8, 0 208; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 209; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 210; RV64-NEXT: ret 211 %tval = trunc <2 x i32> %val to <2 x i16> 212 call void @llvm.vp.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 213 ret void 214} 215 216define void @vpscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 217; RV32-LABEL: vpscatter_v2i64_truncstore_v2i16: 218; RV32: # %bb.0: 219; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 220; RV32-NEXT: vnsrl.wi v8, v8, 0 221; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 222; RV32-NEXT: vnsrl.wi v8, v8, 0 223; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 224; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 225; RV32-NEXT: ret 226; 227; RV64-LABEL: vpscatter_v2i64_truncstore_v2i16: 228; RV64: # %bb.0: 229; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 230; RV64-NEXT: vnsrl.wi v8, v8, 0 231; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 232; RV64-NEXT: vnsrl.wi v8, v8, 0 233; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 234; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 235; RV64-NEXT: ret 236 %tval = trunc <2 x i64> %val to <2 x i16> 237 call void @llvm.vp.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 238 ret void 239} 240 241declare void @llvm.vp.scatter.v3i16.v3p0(<3 x i16>, <3 x ptr>, <3 x i1>, i32) 242 243define void @vpscatter_v3i16(<3 x i16> %val, <3 x ptr> %ptrs, <3 x i1> %m, i32 zeroext %evl) { 244; RV32-LABEL: vpscatter_v3i16: 245; RV32: # %bb.0: 246; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 247; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 248; RV32-NEXT: ret 249; 250; RV64-LABEL: vpscatter_v3i16: 251; RV64: # %bb.0: 252; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 253; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 254; RV64-NEXT: ret 255 call void @llvm.vp.scatter.v3i16.v3p0(<3 x i16> %val, <3 x ptr> %ptrs, <3 x i1> %m, i32 %evl) 256 ret void 257} 258 259define void @vpscatter_truemask_v3i16(<3 x i16> %val, <3 x ptr> %ptrs, i32 zeroext %evl) { 260; RV32-LABEL: vpscatter_truemask_v3i16: 261; RV32: # %bb.0: 262; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 263; RV32-NEXT: vsoxei32.v v8, (zero), v9 264; RV32-NEXT: ret 265; 266; RV64-LABEL: vpscatter_truemask_v3i16: 267; RV64: # %bb.0: 268; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 269; RV64-NEXT: vsoxei64.v v8, (zero), v10 270; RV64-NEXT: ret 271 call void @llvm.vp.scatter.v3i16.v3p0(<3 x i16> %val, <3 x ptr> %ptrs, <3 x i1> splat (i1 1), i32 %evl) 272 ret void 273} 274 275declare void @llvm.vp.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, <4 x i1>, i32) 276 277define void @vpscatter_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) { 278; RV32-LABEL: vpscatter_v4i16: 279; RV32: # %bb.0: 280; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 281; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 282; RV32-NEXT: ret 283; 284; RV64-LABEL: vpscatter_v4i16: 285; RV64: # %bb.0: 286; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 287; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 288; RV64-NEXT: ret 289 call void @llvm.vp.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl) 290 ret void 291} 292 293define void @vpscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, i32 zeroext %evl) { 294; RV32-LABEL: vpscatter_truemask_v4i16: 295; RV32: # %bb.0: 296; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 297; RV32-NEXT: vsoxei32.v v8, (zero), v9 298; RV32-NEXT: ret 299; 300; RV64-LABEL: vpscatter_truemask_v4i16: 301; RV64: # %bb.0: 302; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 303; RV64-NEXT: vsoxei64.v v8, (zero), v10 304; RV64-NEXT: ret 305 call void @llvm.vp.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) 306 ret void 307} 308 309declare void @llvm.vp.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, <8 x i1>, i32) 310 311define void @vpscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) { 312; RV32-LABEL: vpscatter_v8i16: 313; RV32: # %bb.0: 314; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 315; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 316; RV32-NEXT: ret 317; 318; RV64-LABEL: vpscatter_v8i16: 319; RV64: # %bb.0: 320; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 321; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 322; RV64-NEXT: ret 323 call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 324 ret void 325} 326 327define void @vpscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 328; RV32-LABEL: vpscatter_baseidx_v8i8_v8i16: 329; RV32: # %bb.0: 330; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 331; RV32-NEXT: vsext.vf4 v10, v9 332; RV32-NEXT: vadd.vv v10, v10, v10 333; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 334; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 335; RV32-NEXT: ret 336; 337; RV64-LABEL: vpscatter_baseidx_v8i8_v8i16: 338; RV64: # %bb.0: 339; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 340; RV64-NEXT: vsext.vf8 v12, v9 341; RV64-NEXT: vadd.vv v12, v12, v12 342; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 343; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 344; RV64-NEXT: ret 345 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs 346 call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 347 ret void 348} 349 350define void @vpscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 351; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8i16: 352; RV32: # %bb.0: 353; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 354; RV32-NEXT: vsext.vf4 v10, v9 355; RV32-NEXT: vadd.vv v10, v10, v10 356; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 357; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 358; RV32-NEXT: ret 359; 360; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8i16: 361; RV64: # %bb.0: 362; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 363; RV64-NEXT: vsext.vf8 v12, v9 364; RV64-NEXT: vadd.vv v12, v12, v12 365; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 366; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 367; RV64-NEXT: ret 368 %eidxs = sext <8 x i8> %idxs to <8 x i16> 369 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs 370 call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 371 ret void 372} 373 374define void @vpscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 375; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i16: 376; RV32: # %bb.0: 377; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 378; RV32-NEXT: vwaddu.vv v10, v9, v9 379; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 380; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t 381; RV32-NEXT: ret 382; 383; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i16: 384; RV64: # %bb.0: 385; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 386; RV64-NEXT: vwaddu.vv v10, v9, v9 387; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 388; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t 389; RV64-NEXT: ret 390 %eidxs = zext <8 x i8> %idxs to <8 x i16> 391 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs 392 call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 393 ret void 394} 395 396define void @vpscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 397; RV32-LABEL: vpscatter_baseidx_v8i16: 398; RV32: # %bb.0: 399; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 400; RV32-NEXT: vwadd.vv v10, v9, v9 401; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 402; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 403; RV32-NEXT: ret 404; 405; RV64-LABEL: vpscatter_baseidx_v8i16: 406; RV64: # %bb.0: 407; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 408; RV64-NEXT: vsext.vf4 v12, v9 409; RV64-NEXT: vadd.vv v12, v12, v12 410; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 411; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 412; RV64-NEXT: ret 413 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs 414 call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 415 ret void 416} 417 418declare void @llvm.vp.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, <2 x i1>, i32) 419 420define void @vpscatter_v2i32(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 421; RV32-LABEL: vpscatter_v2i32: 422; RV32: # %bb.0: 423; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 424; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 425; RV32-NEXT: ret 426; 427; RV64-LABEL: vpscatter_v2i32: 428; RV64: # %bb.0: 429; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 430; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 431; RV64-NEXT: ret 432 call void @llvm.vp.scatter.v2i32.v2p0(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 433 ret void 434} 435 436define void @vpscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 437; RV32-LABEL: vpscatter_v2i64_truncstore_v2i32: 438; RV32: # %bb.0: 439; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 440; RV32-NEXT: vnsrl.wi v8, v8, 0 441; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 442; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 443; RV32-NEXT: ret 444; 445; RV64-LABEL: vpscatter_v2i64_truncstore_v2i32: 446; RV64: # %bb.0: 447; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 448; RV64-NEXT: vnsrl.wi v8, v8, 0 449; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 450; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 451; RV64-NEXT: ret 452 %tval = trunc <2 x i64> %val to <2 x i32> 453 call void @llvm.vp.scatter.v2i32.v2p0(<2 x i32> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 454 ret void 455} 456 457declare void @llvm.vp.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, <4 x i1>, i32) 458 459define void @vpscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) { 460; RV32-LABEL: vpscatter_v4i32: 461; RV32: # %bb.0: 462; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma 463; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 464; RV32-NEXT: ret 465; 466; RV64-LABEL: vpscatter_v4i32: 467; RV64: # %bb.0: 468; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma 469; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 470; RV64-NEXT: ret 471 call void @llvm.vp.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl) 472 ret void 473} 474 475define void @vpscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, i32 zeroext %evl) { 476; RV32-LABEL: vpscatter_truemask_v4i32: 477; RV32: # %bb.0: 478; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma 479; RV32-NEXT: vsoxei32.v v8, (zero), v9 480; RV32-NEXT: ret 481; 482; RV64-LABEL: vpscatter_truemask_v4i32: 483; RV64: # %bb.0: 484; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma 485; RV64-NEXT: vsoxei64.v v8, (zero), v10 486; RV64-NEXT: ret 487 call void @llvm.vp.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) 488 ret void 489} 490 491declare void @llvm.vp.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, <8 x i1>, i32) 492 493define void @vpscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) { 494; RV32-LABEL: vpscatter_v8i32: 495; RV32: # %bb.0: 496; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma 497; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 498; RV32-NEXT: ret 499; 500; RV64-LABEL: vpscatter_v8i32: 501; RV64: # %bb.0: 502; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma 503; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 504; RV64-NEXT: ret 505 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 506 ret void 507} 508 509define void @vpscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 510; RV32-LABEL: vpscatter_baseidx_v8i8_v8i32: 511; RV32: # %bb.0: 512; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 513; RV32-NEXT: vsext.vf4 v12, v10 514; RV32-NEXT: vsll.vi v10, v12, 2 515; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 516; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 517; RV32-NEXT: ret 518; 519; RV64-LABEL: vpscatter_baseidx_v8i8_v8i32: 520; RV64: # %bb.0: 521; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 522; RV64-NEXT: vsext.vf8 v12, v10 523; RV64-NEXT: vsll.vi v12, v12, 2 524; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 525; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 526; RV64-NEXT: ret 527 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs 528 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 529 ret void 530} 531 532define void @vpscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 533; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8i32: 534; RV32: # %bb.0: 535; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 536; RV32-NEXT: vsext.vf4 v12, v10 537; RV32-NEXT: vsll.vi v10, v12, 2 538; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 539; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 540; RV32-NEXT: ret 541; 542; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8i32: 543; RV64: # %bb.0: 544; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 545; RV64-NEXT: vsext.vf8 v12, v10 546; RV64-NEXT: vsll.vi v12, v12, 2 547; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 548; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 549; RV64-NEXT: ret 550 %eidxs = sext <8 x i8> %idxs to <8 x i32> 551 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs 552 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 553 ret void 554} 555 556define void @vpscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 557; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i32: 558; RV32: # %bb.0: 559; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 560; RV32-NEXT: vzext.vf2 v11, v10 561; RV32-NEXT: vsll.vi v10, v11, 2 562; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 563; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t 564; RV32-NEXT: ret 565; 566; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i32: 567; RV64: # %bb.0: 568; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma 569; RV64-NEXT: vzext.vf2 v11, v10 570; RV64-NEXT: vsll.vi v10, v11, 2 571; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 572; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t 573; RV64-NEXT: ret 574 %eidxs = zext <8 x i8> %idxs to <8 x i32> 575 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs 576 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 577 ret void 578} 579 580define void @vpscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 581; RV32-LABEL: vpscatter_baseidx_v8i16_v8i32: 582; RV32: # %bb.0: 583; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 584; RV32-NEXT: vsext.vf2 v12, v10 585; RV32-NEXT: vsll.vi v10, v12, 2 586; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 587; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 588; RV32-NEXT: ret 589; 590; RV64-LABEL: vpscatter_baseidx_v8i16_v8i32: 591; RV64: # %bb.0: 592; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 593; RV64-NEXT: vsext.vf4 v12, v10 594; RV64-NEXT: vsll.vi v12, v12, 2 595; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 596; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 597; RV64-NEXT: ret 598 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs 599 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 600 ret void 601} 602 603define void @vpscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 604; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8i32: 605; RV32: # %bb.0: 606; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 607; RV32-NEXT: vsext.vf2 v12, v10 608; RV32-NEXT: vsll.vi v10, v12, 2 609; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 610; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 611; RV32-NEXT: ret 612; 613; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8i32: 614; RV64: # %bb.0: 615; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 616; RV64-NEXT: vsext.vf4 v12, v10 617; RV64-NEXT: vsll.vi v12, v12, 2 618; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 619; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 620; RV64-NEXT: ret 621 %eidxs = sext <8 x i16> %idxs to <8 x i32> 622 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs 623 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 624 ret void 625} 626 627define void @vpscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 628; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8i32: 629; RV32: # %bb.0: 630; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 631; RV32-NEXT: vzext.vf2 v12, v10 632; RV32-NEXT: vsll.vi v10, v12, 2 633; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 634; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 635; RV32-NEXT: ret 636; 637; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8i32: 638; RV64: # %bb.0: 639; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma 640; RV64-NEXT: vzext.vf2 v12, v10 641; RV64-NEXT: vsll.vi v10, v12, 2 642; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 643; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t 644; RV64-NEXT: ret 645 %eidxs = zext <8 x i16> %idxs to <8 x i32> 646 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs 647 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 648 ret void 649} 650 651define void @vpscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { 652; RV32-LABEL: vpscatter_baseidx_v8i32: 653; RV32: # %bb.0: 654; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 655; RV32-NEXT: vsll.vi v10, v10, 2 656; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 657; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 658; RV32-NEXT: ret 659; 660; RV64-LABEL: vpscatter_baseidx_v8i32: 661; RV64: # %bb.0: 662; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 663; RV64-NEXT: vsext.vf2 v12, v10 664; RV64-NEXT: vsll.vi v12, v12, 2 665; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 666; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 667; RV64-NEXT: ret 668 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs 669 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 670 ret void 671} 672 673declare void @llvm.vp.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, <2 x i1>, i32) 674 675define void @vpscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 676; RV32-LABEL: vpscatter_v2i64: 677; RV32: # %bb.0: 678; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 679; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 680; RV32-NEXT: ret 681; 682; RV64-LABEL: vpscatter_v2i64: 683; RV64: # %bb.0: 684; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 685; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 686; RV64-NEXT: ret 687 call void @llvm.vp.scatter.v2i64.v2p0(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 688 ret void 689} 690 691declare void @llvm.vp.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, <4 x i1>, i32) 692 693define void @vpscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) { 694; RV32-LABEL: vpscatter_v4i64: 695; RV32: # %bb.0: 696; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 697; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 698; RV32-NEXT: ret 699; 700; RV64-LABEL: vpscatter_v4i64: 701; RV64: # %bb.0: 702; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 703; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 704; RV64-NEXT: ret 705 call void @llvm.vp.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl) 706 ret void 707} 708 709define void @vpscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, i32 zeroext %evl) { 710; RV32-LABEL: vpscatter_truemask_v4i64: 711; RV32: # %bb.0: 712; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 713; RV32-NEXT: vsoxei32.v v8, (zero), v10 714; RV32-NEXT: ret 715; 716; RV64-LABEL: vpscatter_truemask_v4i64: 717; RV64: # %bb.0: 718; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 719; RV64-NEXT: vsoxei64.v v8, (zero), v10 720; RV64-NEXT: ret 721 call void @llvm.vp.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) 722 ret void 723} 724 725declare void @llvm.vp.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, <8 x i1>, i32) 726 727define void @vpscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) { 728; RV32-LABEL: vpscatter_v8i64: 729; RV32: # %bb.0: 730; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 731; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 732; RV32-NEXT: ret 733; 734; RV64-LABEL: vpscatter_v8i64: 735; RV64: # %bb.0: 736; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 737; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 738; RV64-NEXT: ret 739 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 740 ret void 741} 742 743define void @vpscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 744; RV32-LABEL: vpscatter_baseidx_v8i8_v8i64: 745; RV32: # %bb.0: 746; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 747; RV32-NEXT: vsext.vf4 v14, v12 748; RV32-NEXT: vsll.vi v12, v14, 3 749; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 750; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 751; RV32-NEXT: ret 752; 753; RV64-LABEL: vpscatter_baseidx_v8i8_v8i64: 754; RV64: # %bb.0: 755; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 756; RV64-NEXT: vsext.vf8 v16, v12 757; RV64-NEXT: vsll.vi v12, v16, 3 758; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 759; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 760; RV64-NEXT: ret 761 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs 762 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 763 ret void 764} 765 766define void @vpscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 767; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8i64: 768; RV32: # %bb.0: 769; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 770; RV32-NEXT: vsext.vf4 v14, v12 771; RV32-NEXT: vsll.vi v12, v14, 3 772; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 773; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 774; RV32-NEXT: ret 775; 776; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8i64: 777; RV64: # %bb.0: 778; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 779; RV64-NEXT: vsext.vf8 v16, v12 780; RV64-NEXT: vsll.vi v12, v16, 3 781; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 782; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 783; RV64-NEXT: ret 784 %eidxs = sext <8 x i8> %idxs to <8 x i64> 785 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 786 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 787 ret void 788} 789 790define void @vpscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 791; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i64: 792; RV32: # %bb.0: 793; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 794; RV32-NEXT: vzext.vf2 v13, v12 795; RV32-NEXT: vsll.vi v12, v13, 3 796; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 797; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t 798; RV32-NEXT: ret 799; 800; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i64: 801; RV64: # %bb.0: 802; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma 803; RV64-NEXT: vzext.vf2 v13, v12 804; RV64-NEXT: vsll.vi v12, v13, 3 805; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 806; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t 807; RV64-NEXT: ret 808 %eidxs = zext <8 x i8> %idxs to <8 x i64> 809 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 810 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 811 ret void 812} 813 814define void @vpscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 815; RV32-LABEL: vpscatter_baseidx_v8i16_v8i64: 816; RV32: # %bb.0: 817; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 818; RV32-NEXT: vsext.vf2 v14, v12 819; RV32-NEXT: vsll.vi v12, v14, 3 820; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 821; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 822; RV32-NEXT: ret 823; 824; RV64-LABEL: vpscatter_baseidx_v8i16_v8i64: 825; RV64: # %bb.0: 826; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 827; RV64-NEXT: vsext.vf4 v16, v12 828; RV64-NEXT: vsll.vi v12, v16, 3 829; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 830; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 831; RV64-NEXT: ret 832 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs 833 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 834 ret void 835} 836 837define void @vpscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 838; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8i64: 839; RV32: # %bb.0: 840; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 841; RV32-NEXT: vsext.vf2 v14, v12 842; RV32-NEXT: vsll.vi v12, v14, 3 843; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 844; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 845; RV32-NEXT: ret 846; 847; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8i64: 848; RV64: # %bb.0: 849; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 850; RV64-NEXT: vsext.vf4 v16, v12 851; RV64-NEXT: vsll.vi v12, v16, 3 852; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 853; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 854; RV64-NEXT: ret 855 %eidxs = sext <8 x i16> %idxs to <8 x i64> 856 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 857 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 858 ret void 859} 860 861define void @vpscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 862; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8i64: 863; RV32: # %bb.0: 864; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 865; RV32-NEXT: vzext.vf2 v14, v12 866; RV32-NEXT: vsll.vi v12, v14, 3 867; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 868; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 869; RV32-NEXT: ret 870; 871; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8i64: 872; RV64: # %bb.0: 873; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma 874; RV64-NEXT: vzext.vf2 v14, v12 875; RV64-NEXT: vsll.vi v12, v14, 3 876; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 877; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t 878; RV64-NEXT: ret 879 %eidxs = zext <8 x i16> %idxs to <8 x i64> 880 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 881 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 882 ret void 883} 884 885define void @vpscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { 886; RV32-LABEL: vpscatter_baseidx_v8i32_v8i64: 887; RV32: # %bb.0: 888; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 889; RV32-NEXT: vsll.vi v12, v12, 3 890; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 891; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 892; RV32-NEXT: ret 893; 894; RV64-LABEL: vpscatter_baseidx_v8i32_v8i64: 895; RV64: # %bb.0: 896; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 897; RV64-NEXT: vsext.vf2 v16, v12 898; RV64-NEXT: vsll.vi v12, v16, 3 899; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 900; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 901; RV64-NEXT: ret 902 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs 903 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 904 ret void 905} 906 907define void @vpscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { 908; RV32-LABEL: vpscatter_baseidx_sext_v8i32_v8i64: 909; RV32: # %bb.0: 910; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 911; RV32-NEXT: vsll.vi v12, v12, 3 912; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 913; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 914; RV32-NEXT: ret 915; 916; RV64-LABEL: vpscatter_baseidx_sext_v8i32_v8i64: 917; RV64: # %bb.0: 918; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 919; RV64-NEXT: vsext.vf2 v16, v12 920; RV64-NEXT: vsll.vi v12, v16, 3 921; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 922; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 923; RV64-NEXT: ret 924 %eidxs = sext <8 x i32> %idxs to <8 x i64> 925 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 926 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 927 ret void 928} 929 930define void @vpscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { 931; RV32-LABEL: vpscatter_baseidx_zext_v8i32_v8i64: 932; RV32: # %bb.0: 933; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 934; RV32-NEXT: vsll.vi v12, v12, 3 935; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 936; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 937; RV32-NEXT: ret 938; 939; RV64-LABEL: vpscatter_baseidx_zext_v8i32_v8i64: 940; RV64: # %bb.0: 941; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 942; RV64-NEXT: vzext.vf2 v16, v12 943; RV64-NEXT: vsll.vi v12, v16, 3 944; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 945; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 946; RV64-NEXT: ret 947 %eidxs = zext <8 x i32> %idxs to <8 x i64> 948 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 949 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 950 ret void 951} 952 953define void @vpscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) { 954; RV32-LABEL: vpscatter_baseidx_v8i64: 955; RV32: # %bb.0: 956; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 957; RV32-NEXT: vnsrl.wi v16, v12, 0 958; RV32-NEXT: vsll.vi v12, v16, 3 959; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 960; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 961; RV32-NEXT: ret 962; 963; RV64-LABEL: vpscatter_baseidx_v8i64: 964; RV64: # %bb.0: 965; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 966; RV64-NEXT: vsll.vi v12, v12, 3 967; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 968; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 969; RV64-NEXT: ret 970 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs 971 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 972 ret void 973} 974 975declare void @llvm.vp.scatter.v2bf16.v2p0(<2 x bfloat>, <2 x ptr>, <2 x i1>, i32) 976 977define void @vpscatter_v2bf16(<2 x bfloat> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 978; RV32-LABEL: vpscatter_v2bf16: 979; RV32: # %bb.0: 980; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 981; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 982; RV32-NEXT: ret 983; 984; RV64-LABEL: vpscatter_v2bf16: 985; RV64: # %bb.0: 986; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 987; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 988; RV64-NEXT: ret 989 call void @llvm.vp.scatter.v2bf16.v2p0(<2 x bfloat> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 990 ret void 991} 992 993declare void @llvm.vp.scatter.v4bf16.v4p0(<4 x bfloat>, <4 x ptr>, <4 x i1>, i32) 994 995define void @vpscatter_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) { 996; RV32-LABEL: vpscatter_v4bf16: 997; RV32: # %bb.0: 998; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 999; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1000; RV32-NEXT: ret 1001; 1002; RV64-LABEL: vpscatter_v4bf16: 1003; RV64: # %bb.0: 1004; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1005; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1006; RV64-NEXT: ret 1007 call void @llvm.vp.scatter.v4bf16.v4p0(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl) 1008 ret void 1009} 1010 1011define void @vpscatter_truemask_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs, i32 zeroext %evl) { 1012; RV32-LABEL: vpscatter_truemask_v4bf16: 1013; RV32: # %bb.0: 1014; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1015; RV32-NEXT: vsoxei32.v v8, (zero), v9 1016; RV32-NEXT: ret 1017; 1018; RV64-LABEL: vpscatter_truemask_v4bf16: 1019; RV64: # %bb.0: 1020; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1021; RV64-NEXT: vsoxei64.v v8, (zero), v10 1022; RV64-NEXT: ret 1023 call void @llvm.vp.scatter.v4bf16.v4p0(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) 1024 ret void 1025} 1026 1027declare void @llvm.vp.scatter.v8bf16.v8p0(<8 x bfloat>, <8 x ptr>, <8 x i1>, i32) 1028 1029define void @vpscatter_v8bf16(<8 x bfloat> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) { 1030; RV32-LABEL: vpscatter_v8bf16: 1031; RV32: # %bb.0: 1032; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1033; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1034; RV32-NEXT: ret 1035; 1036; RV64-LABEL: vpscatter_v8bf16: 1037; RV64: # %bb.0: 1038; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1039; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1040; RV64-NEXT: ret 1041 call void @llvm.vp.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1042 ret void 1043} 1044 1045declare void @llvm.vp.scatter.v2f16.v2p0(<2 x half>, <2 x ptr>, <2 x i1>, i32) 1046 1047define void @vpscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 1048; RV32-LABEL: vpscatter_v2f16: 1049; RV32: # %bb.0: 1050; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1051; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1052; RV32-NEXT: ret 1053; 1054; RV64-LABEL: vpscatter_v2f16: 1055; RV64: # %bb.0: 1056; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1057; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1058; RV64-NEXT: ret 1059 call void @llvm.vp.scatter.v2f16.v2p0(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 1060 ret void 1061} 1062 1063declare void @llvm.vp.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, <4 x i1>, i32) 1064 1065define void @vpscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) { 1066; RV32-LABEL: vpscatter_v4f16: 1067; RV32: # %bb.0: 1068; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1069; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1070; RV32-NEXT: ret 1071; 1072; RV64-LABEL: vpscatter_v4f16: 1073; RV64: # %bb.0: 1074; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1075; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1076; RV64-NEXT: ret 1077 call void @llvm.vp.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl) 1078 ret void 1079} 1080 1081define void @vpscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs, i32 zeroext %evl) { 1082; RV32-LABEL: vpscatter_truemask_v4f16: 1083; RV32: # %bb.0: 1084; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1085; RV32-NEXT: vsoxei32.v v8, (zero), v9 1086; RV32-NEXT: ret 1087; 1088; RV64-LABEL: vpscatter_truemask_v4f16: 1089; RV64: # %bb.0: 1090; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1091; RV64-NEXT: vsoxei64.v v8, (zero), v10 1092; RV64-NEXT: ret 1093 call void @llvm.vp.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) 1094 ret void 1095} 1096 1097declare void @llvm.vp.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, <8 x i1>, i32) 1098 1099define void @vpscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) { 1100; RV32-LABEL: vpscatter_v8f16: 1101; RV32: # %bb.0: 1102; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1103; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1104; RV32-NEXT: ret 1105; 1106; RV64-LABEL: vpscatter_v8f16: 1107; RV64: # %bb.0: 1108; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1109; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1110; RV64-NEXT: ret 1111 call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1112 ret void 1113} 1114 1115define void @vpscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1116; RV32-LABEL: vpscatter_baseidx_v8i8_v8f16: 1117; RV32: # %bb.0: 1118; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1119; RV32-NEXT: vsext.vf4 v10, v9 1120; RV32-NEXT: vadd.vv v10, v10, v10 1121; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1122; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 1123; RV32-NEXT: ret 1124; 1125; RV64-LABEL: vpscatter_baseidx_v8i8_v8f16: 1126; RV64: # %bb.0: 1127; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1128; RV64-NEXT: vsext.vf8 v12, v9 1129; RV64-NEXT: vadd.vv v12, v12, v12 1130; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1131; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1132; RV64-NEXT: ret 1133 %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs 1134 call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1135 ret void 1136} 1137 1138define void @vpscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1139; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8f16: 1140; RV32: # %bb.0: 1141; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1142; RV32-NEXT: vsext.vf4 v10, v9 1143; RV32-NEXT: vadd.vv v10, v10, v10 1144; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1145; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 1146; RV32-NEXT: ret 1147; 1148; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8f16: 1149; RV64: # %bb.0: 1150; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1151; RV64-NEXT: vsext.vf8 v12, v9 1152; RV64-NEXT: vadd.vv v12, v12, v12 1153; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1154; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1155; RV64-NEXT: ret 1156 %eidxs = sext <8 x i8> %idxs to <8 x i16> 1157 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs 1158 call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1159 ret void 1160} 1161 1162define void @vpscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1163; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f16: 1164; RV32: # %bb.0: 1165; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 1166; RV32-NEXT: vwaddu.vv v10, v9, v9 1167; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1168; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t 1169; RV32-NEXT: ret 1170; 1171; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f16: 1172; RV64: # %bb.0: 1173; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 1174; RV64-NEXT: vwaddu.vv v10, v9, v9 1175; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1176; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t 1177; RV64-NEXT: ret 1178 %eidxs = zext <8 x i8> %idxs to <8 x i16> 1179 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs 1180 call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1181 ret void 1182} 1183 1184define void @vpscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1185; RV32-LABEL: vpscatter_baseidx_v8f16: 1186; RV32: # %bb.0: 1187; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1188; RV32-NEXT: vwadd.vv v10, v9, v9 1189; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1190; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 1191; RV32-NEXT: ret 1192; 1193; RV64-LABEL: vpscatter_baseidx_v8f16: 1194; RV64: # %bb.0: 1195; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1196; RV64-NEXT: vsext.vf4 v12, v9 1197; RV64-NEXT: vadd.vv v12, v12, v12 1198; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma 1199; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1200; RV64-NEXT: ret 1201 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs 1202 call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1203 ret void 1204} 1205 1206declare void @llvm.vp.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, <2 x i1>, i32) 1207 1208define void @vpscatter_v2f32(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 1209; RV32-LABEL: vpscatter_v2f32: 1210; RV32: # %bb.0: 1211; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1212; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1213; RV32-NEXT: ret 1214; 1215; RV64-LABEL: vpscatter_v2f32: 1216; RV64: # %bb.0: 1217; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1218; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1219; RV64-NEXT: ret 1220 call void @llvm.vp.scatter.v2f32.v2p0(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 1221 ret void 1222} 1223 1224declare void @llvm.vp.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, <4 x i1>, i32) 1225 1226define void @vpscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) { 1227; RV32-LABEL: vpscatter_v4f32: 1228; RV32: # %bb.0: 1229; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1230; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1231; RV32-NEXT: ret 1232; 1233; RV64-LABEL: vpscatter_v4f32: 1234; RV64: # %bb.0: 1235; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1236; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1237; RV64-NEXT: ret 1238 call void @llvm.vp.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl) 1239 ret void 1240} 1241 1242define void @vpscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs, i32 zeroext %evl) { 1243; RV32-LABEL: vpscatter_truemask_v4f32: 1244; RV32: # %bb.0: 1245; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1246; RV32-NEXT: vsoxei32.v v8, (zero), v9 1247; RV32-NEXT: ret 1248; 1249; RV64-LABEL: vpscatter_truemask_v4f32: 1250; RV64: # %bb.0: 1251; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1252; RV64-NEXT: vsoxei64.v v8, (zero), v10 1253; RV64-NEXT: ret 1254 call void @llvm.vp.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) 1255 ret void 1256} 1257 1258declare void @llvm.vp.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, <8 x i1>, i32) 1259 1260define void @vpscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) { 1261; RV32-LABEL: vpscatter_v8f32: 1262; RV32: # %bb.0: 1263; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1264; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1265; RV32-NEXT: ret 1266; 1267; RV64-LABEL: vpscatter_v8f32: 1268; RV64: # %bb.0: 1269; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1270; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1271; RV64-NEXT: ret 1272 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1273 ret void 1274} 1275 1276define void @vpscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1277; RV32-LABEL: vpscatter_baseidx_v8i8_v8f32: 1278; RV32: # %bb.0: 1279; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1280; RV32-NEXT: vsext.vf4 v12, v10 1281; RV32-NEXT: vsll.vi v10, v12, 2 1282; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1283; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 1284; RV32-NEXT: ret 1285; 1286; RV64-LABEL: vpscatter_baseidx_v8i8_v8f32: 1287; RV64: # %bb.0: 1288; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1289; RV64-NEXT: vsext.vf8 v12, v10 1290; RV64-NEXT: vsll.vi v12, v12, 2 1291; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1292; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1293; RV64-NEXT: ret 1294 %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs 1295 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1296 ret void 1297} 1298 1299define void @vpscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1300; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8f32: 1301; RV32: # %bb.0: 1302; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1303; RV32-NEXT: vsext.vf4 v12, v10 1304; RV32-NEXT: vsll.vi v10, v12, 2 1305; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1306; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 1307; RV32-NEXT: ret 1308; 1309; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8f32: 1310; RV64: # %bb.0: 1311; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1312; RV64-NEXT: vsext.vf8 v12, v10 1313; RV64-NEXT: vsll.vi v12, v12, 2 1314; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1315; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1316; RV64-NEXT: ret 1317 %eidxs = sext <8 x i8> %idxs to <8 x i32> 1318 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs 1319 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1320 ret void 1321} 1322 1323define void @vpscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1324; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f32: 1325; RV32: # %bb.0: 1326; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1327; RV32-NEXT: vzext.vf2 v11, v10 1328; RV32-NEXT: vsll.vi v10, v11, 2 1329; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1330; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t 1331; RV32-NEXT: ret 1332; 1333; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f32: 1334; RV64: # %bb.0: 1335; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1336; RV64-NEXT: vzext.vf2 v11, v10 1337; RV64-NEXT: vsll.vi v10, v11, 2 1338; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1339; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t 1340; RV64-NEXT: ret 1341 %eidxs = zext <8 x i8> %idxs to <8 x i32> 1342 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs 1343 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1344 ret void 1345} 1346 1347define void @vpscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1348; RV32-LABEL: vpscatter_baseidx_v8i16_v8f32: 1349; RV32: # %bb.0: 1350; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1351; RV32-NEXT: vsext.vf2 v12, v10 1352; RV32-NEXT: vsll.vi v10, v12, 2 1353; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1354; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 1355; RV32-NEXT: ret 1356; 1357; RV64-LABEL: vpscatter_baseidx_v8i16_v8f32: 1358; RV64: # %bb.0: 1359; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1360; RV64-NEXT: vsext.vf4 v12, v10 1361; RV64-NEXT: vsll.vi v12, v12, 2 1362; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1363; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1364; RV64-NEXT: ret 1365 %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs 1366 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1367 ret void 1368} 1369 1370define void @vpscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1371; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8f32: 1372; RV32: # %bb.0: 1373; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1374; RV32-NEXT: vsext.vf2 v12, v10 1375; RV32-NEXT: vsll.vi v10, v12, 2 1376; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1377; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 1378; RV32-NEXT: ret 1379; 1380; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8f32: 1381; RV64: # %bb.0: 1382; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1383; RV64-NEXT: vsext.vf4 v12, v10 1384; RV64-NEXT: vsll.vi v12, v12, 2 1385; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1386; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1387; RV64-NEXT: ret 1388 %eidxs = sext <8 x i16> %idxs to <8 x i32> 1389 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs 1390 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1391 ret void 1392} 1393 1394define void @vpscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1395; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8f32: 1396; RV32: # %bb.0: 1397; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1398; RV32-NEXT: vzext.vf2 v12, v10 1399; RV32-NEXT: vsll.vi v10, v12, 2 1400; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1401; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 1402; RV32-NEXT: ret 1403; 1404; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8f32: 1405; RV64: # %bb.0: 1406; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1407; RV64-NEXT: vzext.vf2 v12, v10 1408; RV64-NEXT: vsll.vi v10, v12, 2 1409; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1410; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t 1411; RV64-NEXT: ret 1412 %eidxs = zext <8 x i16> %idxs to <8 x i32> 1413 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs 1414 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1415 ret void 1416} 1417 1418define void @vpscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1419; RV32-LABEL: vpscatter_baseidx_v8f32: 1420; RV32: # %bb.0: 1421; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1422; RV32-NEXT: vsll.vi v10, v10, 2 1423; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1424; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 1425; RV32-NEXT: ret 1426; 1427; RV64-LABEL: vpscatter_baseidx_v8f32: 1428; RV64: # %bb.0: 1429; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1430; RV64-NEXT: vsext.vf2 v12, v10 1431; RV64-NEXT: vsll.vi v12, v12, 2 1432; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma 1433; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1434; RV64-NEXT: ret 1435 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs 1436 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1437 ret void 1438} 1439 1440declare void @llvm.vp.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, <2 x i1>, i32) 1441 1442define void @vpscatter_v2f64(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) { 1443; RV32-LABEL: vpscatter_v2f64: 1444; RV32: # %bb.0: 1445; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1446; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1447; RV32-NEXT: ret 1448; 1449; RV64-LABEL: vpscatter_v2f64: 1450; RV64: # %bb.0: 1451; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1452; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1453; RV64-NEXT: ret 1454 call void @llvm.vp.scatter.v2f64.v2p0(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl) 1455 ret void 1456} 1457 1458declare void @llvm.vp.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, <4 x i1>, i32) 1459 1460define void @vpscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) { 1461; RV32-LABEL: vpscatter_v4f64: 1462; RV32: # %bb.0: 1463; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1464; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1465; RV32-NEXT: ret 1466; 1467; RV64-LABEL: vpscatter_v4f64: 1468; RV64: # %bb.0: 1469; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1470; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1471; RV64-NEXT: ret 1472 call void @llvm.vp.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl) 1473 ret void 1474} 1475 1476define void @vpscatter_truemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs, i32 zeroext %evl) { 1477; RV32-LABEL: vpscatter_truemask_v4f64: 1478; RV32: # %bb.0: 1479; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1480; RV32-NEXT: vsoxei32.v v8, (zero), v10 1481; RV32-NEXT: ret 1482; 1483; RV64-LABEL: vpscatter_truemask_v4f64: 1484; RV64: # %bb.0: 1485; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1486; RV64-NEXT: vsoxei64.v v8, (zero), v10 1487; RV64-NEXT: ret 1488 call void @llvm.vp.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) 1489 ret void 1490} 1491 1492declare void @llvm.vp.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, <8 x i1>, i32) 1493 1494define void @vpscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) { 1495; RV32-LABEL: vpscatter_v8f64: 1496; RV32: # %bb.0: 1497; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1498; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1499; RV32-NEXT: ret 1500; 1501; RV64-LABEL: vpscatter_v8f64: 1502; RV64: # %bb.0: 1503; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1504; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1505; RV64-NEXT: ret 1506 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1507 ret void 1508} 1509 1510define void @vpscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1511; RV32-LABEL: vpscatter_baseidx_v8i8_v8f64: 1512; RV32: # %bb.0: 1513; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1514; RV32-NEXT: vsext.vf4 v14, v12 1515; RV32-NEXT: vsll.vi v12, v14, 3 1516; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1517; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1518; RV32-NEXT: ret 1519; 1520; RV64-LABEL: vpscatter_baseidx_v8i8_v8f64: 1521; RV64: # %bb.0: 1522; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1523; RV64-NEXT: vsext.vf8 v16, v12 1524; RV64-NEXT: vsll.vi v12, v16, 3 1525; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1526; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1527; RV64-NEXT: ret 1528 %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs 1529 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1530 ret void 1531} 1532 1533define void @vpscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1534; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8f64: 1535; RV32: # %bb.0: 1536; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1537; RV32-NEXT: vsext.vf4 v14, v12 1538; RV32-NEXT: vsll.vi v12, v14, 3 1539; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1540; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1541; RV32-NEXT: ret 1542; 1543; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8f64: 1544; RV64: # %bb.0: 1545; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1546; RV64-NEXT: vsext.vf8 v16, v12 1547; RV64-NEXT: vsll.vi v12, v16, 3 1548; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1549; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1550; RV64-NEXT: ret 1551 %eidxs = sext <8 x i8> %idxs to <8 x i64> 1552 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 1553 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1554 ret void 1555} 1556 1557define void @vpscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1558; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f64: 1559; RV32: # %bb.0: 1560; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1561; RV32-NEXT: vzext.vf2 v13, v12 1562; RV32-NEXT: vsll.vi v12, v13, 3 1563; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1564; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t 1565; RV32-NEXT: ret 1566; 1567; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f64: 1568; RV64: # %bb.0: 1569; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1570; RV64-NEXT: vzext.vf2 v13, v12 1571; RV64-NEXT: vsll.vi v12, v13, 3 1572; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1573; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t 1574; RV64-NEXT: ret 1575 %eidxs = zext <8 x i8> %idxs to <8 x i64> 1576 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 1577 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1578 ret void 1579} 1580 1581define void @vpscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1582; RV32-LABEL: vpscatter_baseidx_v8i16_v8f64: 1583; RV32: # %bb.0: 1584; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1585; RV32-NEXT: vsext.vf2 v14, v12 1586; RV32-NEXT: vsll.vi v12, v14, 3 1587; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1588; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1589; RV32-NEXT: ret 1590; 1591; RV64-LABEL: vpscatter_baseidx_v8i16_v8f64: 1592; RV64: # %bb.0: 1593; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1594; RV64-NEXT: vsext.vf4 v16, v12 1595; RV64-NEXT: vsll.vi v12, v16, 3 1596; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1597; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1598; RV64-NEXT: ret 1599 %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs 1600 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1601 ret void 1602} 1603 1604define void @vpscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1605; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8f64: 1606; RV32: # %bb.0: 1607; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1608; RV32-NEXT: vsext.vf2 v14, v12 1609; RV32-NEXT: vsll.vi v12, v14, 3 1610; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1611; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1612; RV32-NEXT: ret 1613; 1614; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8f64: 1615; RV64: # %bb.0: 1616; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1617; RV64-NEXT: vsext.vf4 v16, v12 1618; RV64-NEXT: vsll.vi v12, v16, 3 1619; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1620; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1621; RV64-NEXT: ret 1622 %eidxs = sext <8 x i16> %idxs to <8 x i64> 1623 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 1624 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1625 ret void 1626} 1627 1628define void @vpscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1629; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8f64: 1630; RV32: # %bb.0: 1631; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1632; RV32-NEXT: vzext.vf2 v14, v12 1633; RV32-NEXT: vsll.vi v12, v14, 3 1634; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1635; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1636; RV32-NEXT: ret 1637; 1638; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8f64: 1639; RV64: # %bb.0: 1640; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1641; RV64-NEXT: vzext.vf2 v14, v12 1642; RV64-NEXT: vsll.vi v12, v14, 3 1643; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1644; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1645; RV64-NEXT: ret 1646 %eidxs = zext <8 x i16> %idxs to <8 x i64> 1647 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 1648 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1649 ret void 1650} 1651 1652define void @vpscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1653; RV32-LABEL: vpscatter_baseidx_v8i32_v8f64: 1654; RV32: # %bb.0: 1655; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1656; RV32-NEXT: vsll.vi v12, v12, 3 1657; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1658; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1659; RV32-NEXT: ret 1660; 1661; RV64-LABEL: vpscatter_baseidx_v8i32_v8f64: 1662; RV64: # %bb.0: 1663; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1664; RV64-NEXT: vsext.vf2 v16, v12 1665; RV64-NEXT: vsll.vi v12, v16, 3 1666; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1667; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1668; RV64-NEXT: ret 1669 %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs 1670 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1671 ret void 1672} 1673 1674define void @vpscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1675; RV32-LABEL: vpscatter_baseidx_sext_v8i32_v8f64: 1676; RV32: # %bb.0: 1677; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1678; RV32-NEXT: vsll.vi v12, v12, 3 1679; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1680; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1681; RV32-NEXT: ret 1682; 1683; RV64-LABEL: vpscatter_baseidx_sext_v8i32_v8f64: 1684; RV64: # %bb.0: 1685; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1686; RV64-NEXT: vsext.vf2 v16, v12 1687; RV64-NEXT: vsll.vi v12, v16, 3 1688; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1689; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1690; RV64-NEXT: ret 1691 %eidxs = sext <8 x i32> %idxs to <8 x i64> 1692 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 1693 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1694 ret void 1695} 1696 1697define void @vpscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1698; RV32-LABEL: vpscatter_baseidx_zext_v8i32_v8f64: 1699; RV32: # %bb.0: 1700; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1701; RV32-NEXT: vsll.vi v12, v12, 3 1702; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1703; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1704; RV32-NEXT: ret 1705; 1706; RV64-LABEL: vpscatter_baseidx_zext_v8i32_v8f64: 1707; RV64: # %bb.0: 1708; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1709; RV64-NEXT: vzext.vf2 v16, v12 1710; RV64-NEXT: vsll.vi v12, v16, 3 1711; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1712; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1713; RV64-NEXT: ret 1714 %eidxs = zext <8 x i32> %idxs to <8 x i64> 1715 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 1716 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1717 ret void 1718} 1719 1720define void @vpscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) { 1721; RV32-LABEL: vpscatter_baseidx_v8f64: 1722; RV32: # %bb.0: 1723; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1724; RV32-NEXT: vnsrl.wi v16, v12, 0 1725; RV32-NEXT: vsll.vi v12, v16, 3 1726; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1727; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1728; RV32-NEXT: ret 1729; 1730; RV64-LABEL: vpscatter_baseidx_v8f64: 1731; RV64: # %bb.0: 1732; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1733; RV64-NEXT: vsll.vi v12, v12, 3 1734; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma 1735; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1736; RV64-NEXT: ret 1737 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs 1738 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl) 1739 ret void 1740} 1741 1742declare void @llvm.vp.scatter.v32f64.v32p0(<32 x double>, <32 x ptr>, <32 x i1>, i32) 1743 1744define void @vpscatter_v32f64(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 zeroext %evl) { 1745; RV32-LABEL: vpscatter_v32f64: 1746; RV32: # %bb.0: 1747; RV32-NEXT: li a2, 32 1748; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 1749; RV32-NEXT: vle32.v v24, (a0) 1750; RV32-NEXT: li a2, 16 1751; RV32-NEXT: mv a0, a1 1752; RV32-NEXT: bltu a1, a2, .LBB83_2 1753; RV32-NEXT: # %bb.1: 1754; RV32-NEXT: li a0, 16 1755; RV32-NEXT: .LBB83_2: 1756; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1757; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t 1758; RV32-NEXT: addi a0, a1, -16 1759; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1760; RV32-NEXT: vslidedown.vi v0, v0, 2 1761; RV32-NEXT: sltu a1, a1, a0 1762; RV32-NEXT: addi a1, a1, -1 1763; RV32-NEXT: and a0, a1, a0 1764; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma 1765; RV32-NEXT: vslidedown.vi v8, v24, 16 1766; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1767; RV32-NEXT: vsoxei32.v v16, (zero), v8, v0.t 1768; RV32-NEXT: ret 1769; 1770; RV64-LABEL: vpscatter_v32f64: 1771; RV64: # %bb.0: 1772; RV64-NEXT: addi sp, sp, -16 1773; RV64-NEXT: .cfi_def_cfa_offset 16 1774; RV64-NEXT: csrr a1, vlenb 1775; RV64-NEXT: slli a1, a1, 3 1776; RV64-NEXT: sub sp, sp, a1 1777; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 1778; RV64-NEXT: addi a1, sp, 16 1779; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 1780; RV64-NEXT: addi a1, a0, 128 1781; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1782; RV64-NEXT: vle64.v v16, (a1) 1783; RV64-NEXT: vle64.v v24, (a0) 1784; RV64-NEXT: li a1, 16 1785; RV64-NEXT: mv a0, a2 1786; RV64-NEXT: bltu a2, a1, .LBB83_2 1787; RV64-NEXT: # %bb.1: 1788; RV64-NEXT: li a0, 16 1789; RV64-NEXT: .LBB83_2: 1790; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1791; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t 1792; RV64-NEXT: addi a0, a2, -16 1793; RV64-NEXT: sltu a1, a2, a0 1794; RV64-NEXT: addi a1, a1, -1 1795; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1796; RV64-NEXT: vslidedown.vi v0, v0, 2 1797; RV64-NEXT: and a0, a1, a0 1798; RV64-NEXT: addi a1, sp, 16 1799; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 1800; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1801; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1802; RV64-NEXT: csrr a0, vlenb 1803; RV64-NEXT: slli a0, a0, 3 1804; RV64-NEXT: add sp, sp, a0 1805; RV64-NEXT: .cfi_def_cfa sp, 16 1806; RV64-NEXT: addi sp, sp, 16 1807; RV64-NEXT: .cfi_def_cfa_offset 0 1808; RV64-NEXT: ret 1809 call void @llvm.vp.scatter.v32f64.v32p0(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) 1810 ret void 1811} 1812 1813define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { 1814; RV32-LABEL: vpscatter_baseidx_v32i32_v32f64: 1815; RV32: # %bb.0: 1816; RV32-NEXT: li a3, 32 1817; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma 1818; RV32-NEXT: vle32.v v24, (a1) 1819; RV32-NEXT: li a3, 16 1820; RV32-NEXT: vsll.vi v24, v24, 3 1821; RV32-NEXT: mv a1, a2 1822; RV32-NEXT: bltu a2, a3, .LBB84_2 1823; RV32-NEXT: # %bb.1: 1824; RV32-NEXT: li a1, 16 1825; RV32-NEXT: .LBB84_2: 1826; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1827; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1828; RV32-NEXT: addi a1, a2, -16 1829; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1830; RV32-NEXT: vslidedown.vi v0, v0, 2 1831; RV32-NEXT: sltu a2, a2, a1 1832; RV32-NEXT: addi a2, a2, -1 1833; RV32-NEXT: and a1, a2, a1 1834; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma 1835; RV32-NEXT: vslidedown.vi v8, v24, 16 1836; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1837; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t 1838; RV32-NEXT: ret 1839; 1840; RV64-LABEL: vpscatter_baseidx_v32i32_v32f64: 1841; RV64: # %bb.0: 1842; RV64-NEXT: addi sp, sp, -16 1843; RV64-NEXT: .cfi_def_cfa_offset 16 1844; RV64-NEXT: csrr a3, vlenb 1845; RV64-NEXT: slli a4, a3, 3 1846; RV64-NEXT: add a3, a4, a3 1847; RV64-NEXT: sub sp, sp, a3 1848; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb 1849; RV64-NEXT: addi a3, sp, 16 1850; RV64-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill 1851; RV64-NEXT: csrr a3, vlenb 1852; RV64-NEXT: add a3, sp, a3 1853; RV64-NEXT: addi a3, a3, 16 1854; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 1855; RV64-NEXT: li a3, 32 1856; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma 1857; RV64-NEXT: vle32.v v24, (a1) 1858; RV64-NEXT: li a3, 16 1859; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma 1860; RV64-NEXT: vslidedown.vi v16, v24, 16 1861; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1862; RV64-NEXT: vsext.vf2 v0, v24 1863; RV64-NEXT: vsext.vf2 v24, v16 1864; RV64-NEXT: vsll.vi v16, v24, 3 1865; RV64-NEXT: vsll.vi v24, v0, 3 1866; RV64-NEXT: mv a1, a2 1867; RV64-NEXT: bltu a2, a3, .LBB84_2 1868; RV64-NEXT: # %bb.1: 1869; RV64-NEXT: li a1, 16 1870; RV64-NEXT: .LBB84_2: 1871; RV64-NEXT: addi a3, sp, 16 1872; RV64-NEXT: vl1r.v v0, (a3) # Unknown-size Folded Reload 1873; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1874; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t 1875; RV64-NEXT: addi a1, a2, -16 1876; RV64-NEXT: sltu a2, a2, a1 1877; RV64-NEXT: addi a2, a2, -1 1878; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1879; RV64-NEXT: vslidedown.vi v0, v0, 2 1880; RV64-NEXT: and a1, a2, a1 1881; RV64-NEXT: csrr a2, vlenb 1882; RV64-NEXT: add a2, sp, a2 1883; RV64-NEXT: addi a2, a2, 16 1884; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 1885; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1886; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1887; RV64-NEXT: csrr a0, vlenb 1888; RV64-NEXT: slli a1, a0, 3 1889; RV64-NEXT: add a0, a1, a0 1890; RV64-NEXT: add sp, sp, a0 1891; RV64-NEXT: .cfi_def_cfa sp, 16 1892; RV64-NEXT: addi sp, sp, 16 1893; RV64-NEXT: .cfi_def_cfa_offset 0 1894; RV64-NEXT: ret 1895 %ptrs = getelementptr inbounds double, ptr %base, <32 x i32> %idxs 1896 call void @llvm.vp.scatter.v32f64.v32p0(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) 1897 ret void 1898} 1899 1900define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { 1901; RV32-LABEL: vpscatter_baseidx_sext_v32i32_v32f64: 1902; RV32: # %bb.0: 1903; RV32-NEXT: li a3, 32 1904; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma 1905; RV32-NEXT: vle32.v v24, (a1) 1906; RV32-NEXT: li a3, 16 1907; RV32-NEXT: vsll.vi v24, v24, 3 1908; RV32-NEXT: mv a1, a2 1909; RV32-NEXT: bltu a2, a3, .LBB85_2 1910; RV32-NEXT: # %bb.1: 1911; RV32-NEXT: li a1, 16 1912; RV32-NEXT: .LBB85_2: 1913; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1914; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1915; RV32-NEXT: addi a1, a2, -16 1916; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1917; RV32-NEXT: vslidedown.vi v0, v0, 2 1918; RV32-NEXT: sltu a2, a2, a1 1919; RV32-NEXT: addi a2, a2, -1 1920; RV32-NEXT: and a1, a2, a1 1921; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma 1922; RV32-NEXT: vslidedown.vi v8, v24, 16 1923; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1924; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t 1925; RV32-NEXT: ret 1926; 1927; RV64-LABEL: vpscatter_baseidx_sext_v32i32_v32f64: 1928; RV64: # %bb.0: 1929; RV64-NEXT: addi sp, sp, -16 1930; RV64-NEXT: .cfi_def_cfa_offset 16 1931; RV64-NEXT: csrr a3, vlenb 1932; RV64-NEXT: slli a3, a3, 4 1933; RV64-NEXT: sub sp, sp, a3 1934; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 1935; RV64-NEXT: csrr a3, vlenb 1936; RV64-NEXT: slli a3, a3, 3 1937; RV64-NEXT: add a3, sp, a3 1938; RV64-NEXT: addi a3, a3, 16 1939; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 1940; RV64-NEXT: addi a3, sp, 16 1941; RV64-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 1942; RV64-NEXT: li a3, 32 1943; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma 1944; RV64-NEXT: vle32.v v24, (a1) 1945; RV64-NEXT: li a3, 16 1946; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1947; RV64-NEXT: vsext.vf2 v16, v24 1948; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma 1949; RV64-NEXT: vslidedown.vi v8, v24, 16 1950; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1951; RV64-NEXT: vsext.vf2 v24, v8 1952; RV64-NEXT: vsll.vi v8, v24, 3 1953; RV64-NEXT: vsll.vi v24, v16, 3 1954; RV64-NEXT: mv a1, a2 1955; RV64-NEXT: bltu a2, a3, .LBB85_2 1956; RV64-NEXT: # %bb.1: 1957; RV64-NEXT: li a1, 16 1958; RV64-NEXT: .LBB85_2: 1959; RV64-NEXT: addi a3, sp, 16 1960; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 1961; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1962; RV64-NEXT: vsoxei64.v v16, (a0), v24, v0.t 1963; RV64-NEXT: addi a1, a2, -16 1964; RV64-NEXT: sltu a2, a2, a1 1965; RV64-NEXT: addi a2, a2, -1 1966; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1967; RV64-NEXT: vslidedown.vi v0, v0, 2 1968; RV64-NEXT: and a1, a2, a1 1969; RV64-NEXT: csrr a2, vlenb 1970; RV64-NEXT: slli a2, a2, 3 1971; RV64-NEXT: add a2, sp, a2 1972; RV64-NEXT: addi a2, a2, 16 1973; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload 1974; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1975; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t 1976; RV64-NEXT: csrr a0, vlenb 1977; RV64-NEXT: slli a0, a0, 4 1978; RV64-NEXT: add sp, sp, a0 1979; RV64-NEXT: .cfi_def_cfa sp, 16 1980; RV64-NEXT: addi sp, sp, 16 1981; RV64-NEXT: .cfi_def_cfa_offset 0 1982; RV64-NEXT: ret 1983 %eidxs = sext <32 x i32> %idxs to <32 x i64> 1984 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs 1985 call void @llvm.vp.scatter.v32f64.v32p0(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) 1986 ret void 1987} 1988 1989define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { 1990; RV32-LABEL: vpscatter_baseidx_zext_v32i32_v32f64: 1991; RV32: # %bb.0: 1992; RV32-NEXT: li a3, 32 1993; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma 1994; RV32-NEXT: vle32.v v24, (a1) 1995; RV32-NEXT: li a3, 16 1996; RV32-NEXT: vsll.vi v24, v24, 3 1997; RV32-NEXT: mv a1, a2 1998; RV32-NEXT: bltu a2, a3, .LBB86_2 1999; RV32-NEXT: # %bb.1: 2000; RV32-NEXT: li a1, 16 2001; RV32-NEXT: .LBB86_2: 2002; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2003; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 2004; RV32-NEXT: addi a1, a2, -16 2005; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2006; RV32-NEXT: vslidedown.vi v0, v0, 2 2007; RV32-NEXT: sltu a2, a2, a1 2008; RV32-NEXT: addi a2, a2, -1 2009; RV32-NEXT: and a1, a2, a1 2010; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma 2011; RV32-NEXT: vslidedown.vi v8, v24, 16 2012; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2013; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t 2014; RV32-NEXT: ret 2015; 2016; RV64-LABEL: vpscatter_baseidx_zext_v32i32_v32f64: 2017; RV64: # %bb.0: 2018; RV64-NEXT: addi sp, sp, -16 2019; RV64-NEXT: .cfi_def_cfa_offset 16 2020; RV64-NEXT: csrr a3, vlenb 2021; RV64-NEXT: slli a3, a3, 4 2022; RV64-NEXT: sub sp, sp, a3 2023; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 2024; RV64-NEXT: csrr a3, vlenb 2025; RV64-NEXT: slli a3, a3, 3 2026; RV64-NEXT: add a3, sp, a3 2027; RV64-NEXT: addi a3, a3, 16 2028; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 2029; RV64-NEXT: addi a3, sp, 16 2030; RV64-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 2031; RV64-NEXT: li a3, 32 2032; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma 2033; RV64-NEXT: vle32.v v24, (a1) 2034; RV64-NEXT: li a3, 16 2035; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2036; RV64-NEXT: vzext.vf2 v16, v24 2037; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma 2038; RV64-NEXT: vslidedown.vi v8, v24, 16 2039; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2040; RV64-NEXT: vzext.vf2 v24, v8 2041; RV64-NEXT: vsll.vi v8, v24, 3 2042; RV64-NEXT: vsll.vi v24, v16, 3 2043; RV64-NEXT: mv a1, a2 2044; RV64-NEXT: bltu a2, a3, .LBB86_2 2045; RV64-NEXT: # %bb.1: 2046; RV64-NEXT: li a1, 16 2047; RV64-NEXT: .LBB86_2: 2048; RV64-NEXT: addi a3, sp, 16 2049; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 2050; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2051; RV64-NEXT: vsoxei64.v v16, (a0), v24, v0.t 2052; RV64-NEXT: addi a1, a2, -16 2053; RV64-NEXT: sltu a2, a2, a1 2054; RV64-NEXT: addi a2, a2, -1 2055; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2056; RV64-NEXT: vslidedown.vi v0, v0, 2 2057; RV64-NEXT: and a1, a2, a1 2058; RV64-NEXT: csrr a2, vlenb 2059; RV64-NEXT: slli a2, a2, 3 2060; RV64-NEXT: add a2, sp, a2 2061; RV64-NEXT: addi a2, a2, 16 2062; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload 2063; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2064; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t 2065; RV64-NEXT: csrr a0, vlenb 2066; RV64-NEXT: slli a0, a0, 4 2067; RV64-NEXT: add sp, sp, a0 2068; RV64-NEXT: .cfi_def_cfa sp, 16 2069; RV64-NEXT: addi sp, sp, 16 2070; RV64-NEXT: .cfi_def_cfa_offset 0 2071; RV64-NEXT: ret 2072 %eidxs = zext <32 x i32> %idxs to <32 x i64> 2073 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs 2074 call void @llvm.vp.scatter.v32f64.v32p0(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) 2075 ret void 2076} 2077