1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v,+m \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v,+m \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 6; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v,+m \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 8; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v,+m \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 10 11declare void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32) 12 13define void @vpscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 14; RV32-LABEL: vpscatter_nxv1i8: 15; RV32: # %bb.0: 16; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 17; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 18; RV32-NEXT: ret 19; 20; RV64-LABEL: vpscatter_nxv1i8: 21; RV64: # %bb.0: 22; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 23; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 24; RV64-NEXT: ret 25 call void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 26 ret void 27} 28 29declare void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32) 30 31define void @vpscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 32; RV32-LABEL: vpscatter_nxv2i8: 33; RV32: # %bb.0: 34; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 35; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 36; RV32-NEXT: ret 37; 38; RV64-LABEL: vpscatter_nxv2i8: 39; RV64: # %bb.0: 40; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 41; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 42; RV64-NEXT: ret 43 call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 44 ret void 45} 46 47define void @vpscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 48; RV32-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8: 49; RV32: # %bb.0: 50; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 51; RV32-NEXT: vnsrl.wi v8, v8, 0 52; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 53; RV32-NEXT: ret 54; 55; RV64-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8: 56; RV64: # %bb.0: 57; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 58; RV64-NEXT: vnsrl.wi v8, v8, 0 59; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 60; RV64-NEXT: ret 61 %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8> 62 call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 63 ret void 64} 65 66define void @vpscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 67; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8: 68; RV32: # %bb.0: 69; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 70; RV32-NEXT: vnsrl.wi v8, v8, 0 71; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 72; RV32-NEXT: vnsrl.wi v8, v8, 0 73; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 74; RV32-NEXT: ret 75; 76; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8: 77; RV64: # %bb.0: 78; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 79; RV64-NEXT: vnsrl.wi v8, v8, 0 80; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 81; RV64-NEXT: vnsrl.wi v8, v8, 0 82; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 83; RV64-NEXT: ret 84 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8> 85 call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 86 ret void 87} 88 89define void @vpscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 90; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8: 91; RV32: # %bb.0: 92; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma 93; RV32-NEXT: vnsrl.wi v11, v8, 0 94; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 95; RV32-NEXT: vnsrl.wi v8, v11, 0 96; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 97; RV32-NEXT: vnsrl.wi v8, v8, 0 98; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 99; RV32-NEXT: ret 100; 101; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8: 102; RV64: # %bb.0: 103; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma 104; RV64-NEXT: vnsrl.wi v12, v8, 0 105; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 106; RV64-NEXT: vnsrl.wi v8, v12, 0 107; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 108; RV64-NEXT: vnsrl.wi v8, v8, 0 109; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 110; RV64-NEXT: ret 111 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8> 112 call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 113 ret void 114} 115 116declare void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32) 117 118define void @vpscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 119; RV32-LABEL: vpscatter_nxv4i8: 120; RV32: # %bb.0: 121; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 122; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 123; RV32-NEXT: ret 124; 125; RV64-LABEL: vpscatter_nxv4i8: 126; RV64: # %bb.0: 127; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 128; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 129; RV64-NEXT: ret 130 call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 131 ret void 132} 133 134define void @vpscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) { 135; RV32-LABEL: vpscatter_truemask_nxv4i8: 136; RV32: # %bb.0: 137; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 138; RV32-NEXT: vsoxei32.v v8, (zero), v10 139; RV32-NEXT: ret 140; 141; RV64-LABEL: vpscatter_truemask_nxv4i8: 142; RV64: # %bb.0: 143; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 144; RV64-NEXT: vsoxei64.v v8, (zero), v12 145; RV64-NEXT: ret 146 call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl) 147 ret void 148} 149 150declare void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32) 151 152define void @vpscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 153; RV32-LABEL: vpscatter_nxv8i8: 154; RV32: # %bb.0: 155; RV32-NEXT: vsetvli zero, a0, e8, m1, ta, ma 156; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 157; RV32-NEXT: ret 158; 159; RV64-LABEL: vpscatter_nxv8i8: 160; RV64: # %bb.0: 161; RV64-NEXT: vsetvli zero, a0, e8, m1, ta, ma 162; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 163; RV64-NEXT: ret 164 call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 165 ret void 166} 167 168define void @vpscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 169; RV32-LABEL: vpscatter_baseidx_nxv8i8: 170; RV32: # %bb.0: 171; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 172; RV32-NEXT: vsext.vf4 v12, v9 173; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma 174; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 175; RV32-NEXT: ret 176; 177; RV64-LABEL: vpscatter_baseidx_nxv8i8: 178; RV64: # %bb.0: 179; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 180; RV64-NEXT: vsext.vf8 v16, v9 181; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma 182; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 183; RV64-NEXT: ret 184 %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs 185 call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 186 ret void 187} 188 189declare void @llvm.vp.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32) 190 191define void @vpscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 192; RV32-LABEL: vpscatter_nxv1i16: 193; RV32: # %bb.0: 194; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 195; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 196; RV32-NEXT: ret 197; 198; RV64-LABEL: vpscatter_nxv1i16: 199; RV64: # %bb.0: 200; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 201; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 202; RV64-NEXT: ret 203 call void @llvm.vp.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 204 ret void 205} 206 207declare void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32) 208 209define void @vpscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 210; RV32-LABEL: vpscatter_nxv2i16: 211; RV32: # %bb.0: 212; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 213; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 214; RV32-NEXT: ret 215; 216; RV64-LABEL: vpscatter_nxv2i16: 217; RV64: # %bb.0: 218; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 219; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 220; RV64-NEXT: ret 221 call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 222 ret void 223} 224 225define void @vpscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 226; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16: 227; RV32: # %bb.0: 228; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 229; RV32-NEXT: vnsrl.wi v8, v8, 0 230; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 231; RV32-NEXT: ret 232; 233; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16: 234; RV64: # %bb.0: 235; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 236; RV64-NEXT: vnsrl.wi v8, v8, 0 237; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 238; RV64-NEXT: ret 239 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16> 240 call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 241 ret void 242} 243 244define void @vpscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 245; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16: 246; RV32: # %bb.0: 247; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma 248; RV32-NEXT: vnsrl.wi v11, v8, 0 249; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 250; RV32-NEXT: vnsrl.wi v8, v11, 0 251; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 252; RV32-NEXT: ret 253; 254; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16: 255; RV64: # %bb.0: 256; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma 257; RV64-NEXT: vnsrl.wi v12, v8, 0 258; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 259; RV64-NEXT: vnsrl.wi v8, v12, 0 260; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 261; RV64-NEXT: ret 262 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16> 263 call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 264 ret void 265} 266 267declare void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32) 268 269define void @vpscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 270; RV32-LABEL: vpscatter_nxv4i16: 271; RV32: # %bb.0: 272; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 273; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 274; RV32-NEXT: ret 275; 276; RV64-LABEL: vpscatter_nxv4i16: 277; RV64: # %bb.0: 278; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 279; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 280; RV64-NEXT: ret 281 call void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 282 ret void 283} 284 285define void @vpscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) { 286; RV32-LABEL: vpscatter_truemask_nxv4i16: 287; RV32: # %bb.0: 288; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 289; RV32-NEXT: vsoxei32.v v8, (zero), v10 290; RV32-NEXT: ret 291; 292; RV64-LABEL: vpscatter_truemask_nxv4i16: 293; RV64: # %bb.0: 294; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 295; RV64-NEXT: vsoxei64.v v8, (zero), v12 296; RV64-NEXT: ret 297 call void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl) 298 ret void 299} 300 301declare void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32) 302 303define void @vpscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 304; RV32-LABEL: vpscatter_nxv8i16: 305; RV32: # %bb.0: 306; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma 307; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 308; RV32-NEXT: ret 309; 310; RV64-LABEL: vpscatter_nxv8i16: 311; RV64: # %bb.0: 312; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma 313; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 314; RV64-NEXT: ret 315 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 316 ret void 317} 318 319define void @vpscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 320; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16: 321; RV32: # %bb.0: 322; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 323; RV32-NEXT: vsext.vf4 v12, v10 324; RV32-NEXT: vadd.vv v12, v12, v12 325; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 326; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 327; RV32-NEXT: ret 328; 329; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16: 330; RV64: # %bb.0: 331; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 332; RV64-NEXT: vsext.vf8 v16, v10 333; RV64-NEXT: vadd.vv v16, v16, v16 334; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 335; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 336; RV64-NEXT: ret 337 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs 338 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 339 ret void 340} 341 342define void @vpscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 343; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16: 344; RV32: # %bb.0: 345; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 346; RV32-NEXT: vsext.vf4 v12, v10 347; RV32-NEXT: vadd.vv v12, v12, v12 348; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 349; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 350; RV32-NEXT: ret 351; 352; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16: 353; RV64: # %bb.0: 354; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 355; RV64-NEXT: vsext.vf8 v16, v10 356; RV64-NEXT: vadd.vv v16, v16, v16 357; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 358; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 359; RV64-NEXT: ret 360 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 361 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs 362 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 363 ret void 364} 365 366define void @vpscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 367; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16: 368; RV32: # %bb.0: 369; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma 370; RV32-NEXT: vwaddu.vv v12, v10, v10 371; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 372; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t 373; RV32-NEXT: ret 374; 375; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16: 376; RV64: # %bb.0: 377; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma 378; RV64-NEXT: vwaddu.vv v12, v10, v10 379; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 380; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t 381; RV64-NEXT: ret 382 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 383 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs 384 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 385 ret void 386} 387 388define void @vpscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 389; RV32-LABEL: vpscatter_baseidx_nxv8i16: 390; RV32: # %bb.0: 391; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 392; RV32-NEXT: vwadd.vv v12, v10, v10 393; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 394; RV32-NEXT: ret 395; 396; RV64-LABEL: vpscatter_baseidx_nxv8i16: 397; RV64: # %bb.0: 398; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 399; RV64-NEXT: vsext.vf4 v16, v10 400; RV64-NEXT: vadd.vv v16, v16, v16 401; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 402; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 403; RV64-NEXT: ret 404 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs 405 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 406 ret void 407} 408 409declare <vscale x 8 x i32> @llvm.vp.sext.nxv8i16.nxv8i32(<vscale x 8 x i16>, <vscale x 8 x i1>, i32) 410define void @vpscatter_baseidx_vpsext_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 411; RV32-LABEL: vpscatter_baseidx_vpsext_nxv8i16_nxv8i16: 412; RV32: # %bb.0: 413; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 414; RV32-NEXT: vwadd.vv v12, v10, v10, v0.t 415; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 416; RV32-NEXT: ret 417; 418; RV64-LABEL: vpscatter_baseidx_vpsext_nxv8i16_nxv8i16: 419; RV64: # %bb.0: 420; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma 421; RV64-NEXT: vsext.vf2 v12, v10, v0.t 422; RV64-NEXT: vwadd.vv v16, v12, v12 423; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 424; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 425; RV64-NEXT: ret 426 %eidxs = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i16.nxv8i32(<vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 %evl) 427 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i32> %eidxs 428 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 429 ret void 430} 431 432declare <vscale x 8 x i32> @llvm.vp.zext.nxv8i16.nxv8i32(<vscale x 8 x i16>, <vscale x 8 x i1>, i32) 433define void @vpscatter_baseidx_vpzext_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 434; RV32-LABEL: vpscatter_baseidx_vpzext_nxv8i16_nxv8i16: 435; RV32: # %bb.0: 436; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 437; RV32-NEXT: vwaddu.vv v12, v10, v10, v0.t 438; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 439; RV32-NEXT: ret 440; 441; RV64-LABEL: vpscatter_baseidx_vpzext_nxv8i16_nxv8i16: 442; RV64: # %bb.0: 443; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma 444; RV64-NEXT: vzext.vf2 v12, v10, v0.t 445; RV64-NEXT: vwadd.vv v16, v12, v12 446; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 447; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 448; RV64-NEXT: ret 449 %eidxs = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i16.nxv8i32(<vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 %evl) 450 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i32> %eidxs 451 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 452 ret void 453} 454 455declare <vscale x 8 x i64> @llvm.vp.sext.nxv8i32.nxv8i64(<vscale x 8 x i32>, <vscale x 8 x i1>, i32) 456define void @vpscatter_baseidx_vpsext_nxv8i32_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 457; RV32-LABEL: vpscatter_baseidx_vpsext_nxv8i32_nxv8i16: 458; RV32: # %bb.0: 459; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 460; RV32-NEXT: vsext.vf2 v16, v12, v0.t 461; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma 462; RV32-NEXT: vnsrl.wi v12, v16, 0 463; RV32-NEXT: vadd.vv v12, v12, v12 464; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 465; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 466; RV32-NEXT: ret 467; 468; RV64-LABEL: vpscatter_baseidx_vpsext_nxv8i32_nxv8i16: 469; RV64: # %bb.0: 470; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma 471; RV64-NEXT: vwadd.vv v16, v12, v12, v0.t 472; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 473; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 474; RV64-NEXT: ret 475 %eidxs = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i32.nxv8i64(<vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 %evl) 476 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i64> %eidxs 477 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 478 ret void 479} 480 481declare <vscale x 8 x i64> @llvm.vp.zext.nxv8i32.nxv8i64(<vscale x 8 x i32>, <vscale x 8 x i1>, i32) 482define void @vpscatter_baseidx_vpzext_nxv8i32_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 483; RV32-LABEL: vpscatter_baseidx_vpzext_nxv8i32_nxv8i16: 484; RV32: # %bb.0: 485; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 486; RV32-NEXT: vzext.vf2 v16, v12, v0.t 487; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma 488; RV32-NEXT: vnsrl.wi v12, v16, 0 489; RV32-NEXT: vadd.vv v12, v12, v12 490; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 491; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 492; RV32-NEXT: ret 493; 494; RV64-LABEL: vpscatter_baseidx_vpzext_nxv8i32_nxv8i16: 495; RV64: # %bb.0: 496; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma 497; RV64-NEXT: vwaddu.vv v16, v12, v12, v0.t 498; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 499; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 500; RV64-NEXT: ret 501 %eidxs = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i32.nxv8i64(<vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 %evl) 502 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i64> %eidxs 503 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 504 ret void 505} 506 507declare void @llvm.vp.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32) 508 509define void @vpscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 510; RV32-LABEL: vpscatter_nxv1i32: 511; RV32: # %bb.0: 512; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 513; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 514; RV32-NEXT: ret 515; 516; RV64-LABEL: vpscatter_nxv1i32: 517; RV64: # %bb.0: 518; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 519; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 520; RV64-NEXT: ret 521 call void @llvm.vp.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 522 ret void 523} 524 525declare void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32) 526 527define void @vpscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 528; RV32-LABEL: vpscatter_nxv2i32: 529; RV32: # %bb.0: 530; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma 531; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 532; RV32-NEXT: ret 533; 534; RV64-LABEL: vpscatter_nxv2i32: 535; RV64: # %bb.0: 536; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma 537; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 538; RV64-NEXT: ret 539 call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 540 ret void 541} 542 543define void @vpscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 544; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32: 545; RV32: # %bb.0: 546; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma 547; RV32-NEXT: vnsrl.wi v11, v8, 0 548; RV32-NEXT: vsoxei32.v v11, (zero), v10, v0.t 549; RV32-NEXT: ret 550; 551; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32: 552; RV64: # %bb.0: 553; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma 554; RV64-NEXT: vnsrl.wi v12, v8, 0 555; RV64-NEXT: vsoxei64.v v12, (zero), v10, v0.t 556; RV64-NEXT: ret 557 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32> 558 call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 559 ret void 560} 561 562declare void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32) 563 564define void @vpscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 565; RV32-LABEL: vpscatter_nxv4i32: 566; RV32: # %bb.0: 567; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma 568; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 569; RV32-NEXT: ret 570; 571; RV64-LABEL: vpscatter_nxv4i32: 572; RV64: # %bb.0: 573; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma 574; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 575; RV64-NEXT: ret 576 call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 577 ret void 578} 579 580define void @vpscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) { 581; RV32-LABEL: vpscatter_truemask_nxv4i32: 582; RV32: # %bb.0: 583; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma 584; RV32-NEXT: vsoxei32.v v8, (zero), v10 585; RV32-NEXT: ret 586; 587; RV64-LABEL: vpscatter_truemask_nxv4i32: 588; RV64: # %bb.0: 589; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma 590; RV64-NEXT: vsoxei64.v v8, (zero), v12 591; RV64-NEXT: ret 592 call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl) 593 ret void 594} 595 596declare void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32) 597 598define void @vpscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 599; RV32-LABEL: vpscatter_nxv8i32: 600; RV32: # %bb.0: 601; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma 602; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 603; RV32-NEXT: ret 604; 605; RV64-LABEL: vpscatter_nxv8i32: 606; RV64: # %bb.0: 607; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma 608; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 609; RV64-NEXT: ret 610 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 611 ret void 612} 613 614define void @vpscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 615; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32: 616; RV32: # %bb.0: 617; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 618; RV32-NEXT: vsext.vf4 v16, v12 619; RV32-NEXT: vsll.vi v12, v16, 2 620; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 621; RV32-NEXT: ret 622; 623; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32: 624; RV64: # %bb.0: 625; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 626; RV64-NEXT: vsext.vf8 v16, v12 627; RV64-NEXT: vsll.vi v16, v16, 2 628; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 629; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 630; RV64-NEXT: ret 631 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs 632 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 633 ret void 634} 635 636define void @vpscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 637; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32: 638; RV32: # %bb.0: 639; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 640; RV32-NEXT: vsext.vf4 v16, v12 641; RV32-NEXT: vsll.vi v12, v16, 2 642; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 643; RV32-NEXT: ret 644; 645; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32: 646; RV64: # %bb.0: 647; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 648; RV64-NEXT: vsext.vf8 v16, v12 649; RV64-NEXT: vsll.vi v16, v16, 2 650; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 651; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 652; RV64-NEXT: ret 653 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 654 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs 655 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 656 ret void 657} 658 659define void @vpscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 660; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32: 661; RV32: # %bb.0: 662; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 663; RV32-NEXT: vzext.vf2 v14, v12 664; RV32-NEXT: vsll.vi v12, v14, 2 665; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma 666; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t 667; RV32-NEXT: ret 668; 669; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32: 670; RV64: # %bb.0: 671; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma 672; RV64-NEXT: vzext.vf2 v14, v12 673; RV64-NEXT: vsll.vi v12, v14, 2 674; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 675; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t 676; RV64-NEXT: ret 677 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 678 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs 679 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 680 ret void 681} 682 683define void @vpscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 684; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32: 685; RV32: # %bb.0: 686; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 687; RV32-NEXT: vsext.vf2 v16, v12 688; RV32-NEXT: vsll.vi v12, v16, 2 689; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 690; RV32-NEXT: ret 691; 692; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32: 693; RV64: # %bb.0: 694; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 695; RV64-NEXT: vsext.vf4 v16, v12 696; RV64-NEXT: vsll.vi v16, v16, 2 697; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 698; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 699; RV64-NEXT: ret 700 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs 701 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 702 ret void 703} 704 705define void @vpscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 706; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32: 707; RV32: # %bb.0: 708; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 709; RV32-NEXT: vsext.vf2 v16, v12 710; RV32-NEXT: vsll.vi v12, v16, 2 711; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 712; RV32-NEXT: ret 713; 714; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32: 715; RV64: # %bb.0: 716; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 717; RV64-NEXT: vsext.vf4 v16, v12 718; RV64-NEXT: vsll.vi v16, v16, 2 719; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 720; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 721; RV64-NEXT: ret 722 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 723 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs 724 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 725 ret void 726} 727 728define void @vpscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 729; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32: 730; RV32: # %bb.0: 731; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 732; RV32-NEXT: vzext.vf2 v16, v12 733; RV32-NEXT: vsll.vi v12, v16, 2 734; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 735; RV32-NEXT: ret 736; 737; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32: 738; RV64: # %bb.0: 739; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma 740; RV64-NEXT: vzext.vf2 v16, v12 741; RV64-NEXT: vsll.vi v12, v16, 2 742; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t 743; RV64-NEXT: ret 744 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 745 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs 746 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 747 ret void 748} 749 750define void @vpscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 751; RV32-LABEL: vpscatter_baseidx_nxv8i32: 752; RV32: # %bb.0: 753; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 754; RV32-NEXT: vsll.vi v12, v12, 2 755; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 756; RV32-NEXT: ret 757; 758; RV64-LABEL: vpscatter_baseidx_nxv8i32: 759; RV64: # %bb.0: 760; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 761; RV64-NEXT: vsext.vf2 v16, v12 762; RV64-NEXT: vsll.vi v16, v16, 2 763; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 764; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 765; RV64-NEXT: ret 766 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs 767 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 768 ret void 769} 770 771declare void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32) 772 773define void @vpscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 774; RV32-LABEL: vpscatter_nxv1i64: 775; RV32: # %bb.0: 776; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 777; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 778; RV32-NEXT: ret 779; 780; RV64-LABEL: vpscatter_nxv1i64: 781; RV64: # %bb.0: 782; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 783; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 784; RV64-NEXT: ret 785 call void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 786 ret void 787} 788 789declare void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32) 790 791define void @vpscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 792; RV32-LABEL: vpscatter_nxv2i64: 793; RV32: # %bb.0: 794; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 795; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 796; RV32-NEXT: ret 797; 798; RV64-LABEL: vpscatter_nxv2i64: 799; RV64: # %bb.0: 800; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 801; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 802; RV64-NEXT: ret 803 call void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 804 ret void 805} 806 807declare void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32) 808 809define void @vpscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 810; RV32-LABEL: vpscatter_nxv4i64: 811; RV32: # %bb.0: 812; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 813; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 814; RV32-NEXT: ret 815; 816; RV64-LABEL: vpscatter_nxv4i64: 817; RV64: # %bb.0: 818; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 819; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 820; RV64-NEXT: ret 821 call void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 822 ret void 823} 824 825define void @vpscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) { 826; RV32-LABEL: vpscatter_truemask_nxv4i64: 827; RV32: # %bb.0: 828; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 829; RV32-NEXT: vsoxei32.v v8, (zero), v12 830; RV32-NEXT: ret 831; 832; RV64-LABEL: vpscatter_truemask_nxv4i64: 833; RV64: # %bb.0: 834; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 835; RV64-NEXT: vsoxei64.v v8, (zero), v12 836; RV64-NEXT: ret 837 call void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl) 838 ret void 839} 840 841declare void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32) 842 843define void @vpscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 844; RV32-LABEL: vpscatter_nxv8i64: 845; RV32: # %bb.0: 846; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 847; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t 848; RV32-NEXT: ret 849; 850; RV64-LABEL: vpscatter_nxv8i64: 851; RV64: # %bb.0: 852; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 853; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 854; RV64-NEXT: ret 855 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 856 ret void 857} 858 859define void @vpscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 860; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64: 861; RV32: # %bb.0: 862; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 863; RV32-NEXT: vsext.vf4 v20, v16 864; RV32-NEXT: vsll.vi v16, v20, 3 865; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 866; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 867; RV32-NEXT: ret 868; 869; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64: 870; RV64: # %bb.0: 871; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 872; RV64-NEXT: vsext.vf8 v24, v16 873; RV64-NEXT: vsll.vi v16, v24, 3 874; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 875; RV64-NEXT: ret 876 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs 877 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 878 ret void 879} 880 881define void @vpscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 882; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64: 883; RV32: # %bb.0: 884; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 885; RV32-NEXT: vsext.vf4 v20, v16 886; RV32-NEXT: vsll.vi v16, v20, 3 887; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 888; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 889; RV32-NEXT: ret 890; 891; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64: 892; RV64: # %bb.0: 893; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 894; RV64-NEXT: vsext.vf8 v24, v16 895; RV64-NEXT: vsll.vi v16, v24, 3 896; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 897; RV64-NEXT: ret 898 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 899 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 900 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 901 ret void 902} 903 904define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 905; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64: 906; RV32: # %bb.0: 907; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 908; RV32-NEXT: vzext.vf2 v18, v16 909; RV32-NEXT: vsll.vi v16, v18, 3 910; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 911; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t 912; RV32-NEXT: ret 913; 914; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64: 915; RV64: # %bb.0: 916; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma 917; RV64-NEXT: vzext.vf2 v18, v16 918; RV64-NEXT: vsll.vi v16, v18, 3 919; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma 920; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t 921; RV64-NEXT: ret 922 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 923 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 924 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 925 ret void 926} 927 928define void @vpscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 929; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64: 930; RV32: # %bb.0: 931; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 932; RV32-NEXT: vsext.vf2 v20, v16 933; RV32-NEXT: vsll.vi v16, v20, 3 934; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 935; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 936; RV32-NEXT: ret 937; 938; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64: 939; RV64: # %bb.0: 940; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 941; RV64-NEXT: vsext.vf4 v24, v16 942; RV64-NEXT: vsll.vi v16, v24, 3 943; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 944; RV64-NEXT: ret 945 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs 946 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 947 ret void 948} 949 950define void @vpscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 951; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64: 952; RV32: # %bb.0: 953; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 954; RV32-NEXT: vsext.vf2 v20, v16 955; RV32-NEXT: vsll.vi v16, v20, 3 956; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 957; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 958; RV32-NEXT: ret 959; 960; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64: 961; RV64: # %bb.0: 962; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 963; RV64-NEXT: vsext.vf4 v24, v16 964; RV64-NEXT: vsll.vi v16, v24, 3 965; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 966; RV64-NEXT: ret 967 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 968 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 969 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 970 ret void 971} 972 973define void @vpscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 974; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64: 975; RV32: # %bb.0: 976; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 977; RV32-NEXT: vzext.vf2 v20, v16 978; RV32-NEXT: vsll.vi v16, v20, 3 979; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 980; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 981; RV32-NEXT: ret 982; 983; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64: 984; RV64: # %bb.0: 985; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma 986; RV64-NEXT: vzext.vf2 v20, v16 987; RV64-NEXT: vsll.vi v16, v20, 3 988; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma 989; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t 990; RV64-NEXT: ret 991 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 992 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 993 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 994 ret void 995} 996 997define void @vpscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 998; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64: 999; RV32: # %bb.0: 1000; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1001; RV32-NEXT: vsll.vi v16, v16, 3 1002; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1003; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1004; RV32-NEXT: ret 1005; 1006; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64: 1007; RV64: # %bb.0: 1008; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1009; RV64-NEXT: vsext.vf2 v24, v16 1010; RV64-NEXT: vsll.vi v16, v24, 3 1011; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1012; RV64-NEXT: ret 1013 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs 1014 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1015 ret void 1016} 1017 1018define void @vpscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1019; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64: 1020; RV32: # %bb.0: 1021; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1022; RV32-NEXT: vsll.vi v16, v16, 3 1023; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1024; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1025; RV32-NEXT: ret 1026; 1027; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64: 1028; RV64: # %bb.0: 1029; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1030; RV64-NEXT: vsext.vf2 v24, v16 1031; RV64-NEXT: vsll.vi v16, v24, 3 1032; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1033; RV64-NEXT: ret 1034 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1035 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 1036 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1037 ret void 1038} 1039 1040define void @vpscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1041; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64: 1042; RV32: # %bb.0: 1043; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1044; RV32-NEXT: vsll.vi v16, v16, 3 1045; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1046; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1047; RV32-NEXT: ret 1048; 1049; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64: 1050; RV64: # %bb.0: 1051; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1052; RV64-NEXT: vzext.vf2 v24, v16 1053; RV64-NEXT: vsll.vi v16, v24, 3 1054; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1055; RV64-NEXT: ret 1056 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1057 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 1058 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1059 ret void 1060} 1061 1062define void @vpscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1063; RV32-LABEL: vpscatter_baseidx_nxv8i64: 1064; RV32: # %bb.0: 1065; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1066; RV32-NEXT: vnsrl.wi v24, v16, 0 1067; RV32-NEXT: vsll.vi v16, v24, 3 1068; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1069; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1070; RV32-NEXT: ret 1071; 1072; RV64-LABEL: vpscatter_baseidx_nxv8i64: 1073; RV64: # %bb.0: 1074; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1075; RV64-NEXT: vsll.vi v16, v16, 3 1076; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1077; RV64-NEXT: ret 1078 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs 1079 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1080 ret void 1081} 1082 1083declare void @llvm.vp.scatter.nxv1bf16.nxv1p0(<vscale x 1 x bfloat>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32) 1084 1085define void @vpscatter_nxv1bf16(<vscale x 1 x bfloat> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1086; RV32-LABEL: vpscatter_nxv1bf16: 1087; RV32: # %bb.0: 1088; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1089; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1090; RV32-NEXT: ret 1091; 1092; RV64-LABEL: vpscatter_nxv1bf16: 1093; RV64: # %bb.0: 1094; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1095; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1096; RV64-NEXT: ret 1097 call void @llvm.vp.scatter.nxv1bf16.nxv1p0(<vscale x 1 x bfloat> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 1098 ret void 1099} 1100 1101declare void @llvm.vp.scatter.nxv2bf16.nxv2p0(<vscale x 2 x bfloat>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32) 1102 1103define void @vpscatter_nxv2bf16(<vscale x 2 x bfloat> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1104; RV32-LABEL: vpscatter_nxv2bf16: 1105; RV32: # %bb.0: 1106; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1107; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1108; RV32-NEXT: ret 1109; 1110; RV64-LABEL: vpscatter_nxv2bf16: 1111; RV64: # %bb.0: 1112; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1113; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1114; RV64-NEXT: ret 1115 call void @llvm.vp.scatter.nxv2bf16.nxv2p0(<vscale x 2 x bfloat> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 1116 ret void 1117} 1118 1119declare void @llvm.vp.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32) 1120 1121define void @vpscatter_nxv4bf16(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1122; RV32-LABEL: vpscatter_nxv4bf16: 1123; RV32: # %bb.0: 1124; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1125; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1126; RV32-NEXT: ret 1127; 1128; RV64-LABEL: vpscatter_nxv4bf16: 1129; RV64: # %bb.0: 1130; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1131; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1132; RV64-NEXT: ret 1133 call void @llvm.vp.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 1134 ret void 1135} 1136 1137define void @vpscatter_truemask_nxv4bf16(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) { 1138; RV32-LABEL: vpscatter_truemask_nxv4bf16: 1139; RV32: # %bb.0: 1140; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1141; RV32-NEXT: vsoxei32.v v8, (zero), v10 1142; RV32-NEXT: ret 1143; 1144; RV64-LABEL: vpscatter_truemask_nxv4bf16: 1145; RV64: # %bb.0: 1146; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1147; RV64-NEXT: vsoxei64.v v8, (zero), v12 1148; RV64-NEXT: ret 1149 call void @llvm.vp.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl) 1150 ret void 1151} 1152 1153declare void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32) 1154 1155define void @vpscatter_nxv8bf16(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1156; RV32-LABEL: vpscatter_nxv8bf16: 1157; RV32: # %bb.0: 1158; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1159; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1160; RV32-NEXT: ret 1161; 1162; RV64-LABEL: vpscatter_nxv8bf16: 1163; RV64: # %bb.0: 1164; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1165; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1166; RV64-NEXT: ret 1167 call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1168 ret void 1169} 1170 1171define void @vpscatter_baseidx_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1172; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8bf16: 1173; RV32: # %bb.0: 1174; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1175; RV32-NEXT: vsext.vf4 v12, v10 1176; RV32-NEXT: vadd.vv v12, v12, v12 1177; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1178; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1179; RV32-NEXT: ret 1180; 1181; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8bf16: 1182; RV64: # %bb.0: 1183; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1184; RV64-NEXT: vsext.vf8 v16, v10 1185; RV64-NEXT: vadd.vv v16, v16, v16 1186; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1187; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1188; RV64-NEXT: ret 1189 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i8> %idxs 1190 call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1191 ret void 1192} 1193 1194define void @vpscatter_baseidx_sext_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1195; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8bf16: 1196; RV32: # %bb.0: 1197; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1198; RV32-NEXT: vsext.vf4 v12, v10 1199; RV32-NEXT: vadd.vv v12, v12, v12 1200; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1201; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1202; RV32-NEXT: ret 1203; 1204; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8bf16: 1205; RV64: # %bb.0: 1206; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1207; RV64-NEXT: vsext.vf8 v16, v10 1208; RV64-NEXT: vadd.vv v16, v16, v16 1209; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1210; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1211; RV64-NEXT: ret 1212 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1213 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs 1214 call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1215 ret void 1216} 1217 1218define void @vpscatter_baseidx_zext_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1219; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8bf16: 1220; RV32: # %bb.0: 1221; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma 1222; RV32-NEXT: vwaddu.vv v12, v10, v10 1223; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1224; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t 1225; RV32-NEXT: ret 1226; 1227; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8bf16: 1228; RV64: # %bb.0: 1229; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma 1230; RV64-NEXT: vwaddu.vv v12, v10, v10 1231; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1232; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t 1233; RV64-NEXT: ret 1234 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1235 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs 1236 call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1237 ret void 1238} 1239 1240define void @vpscatter_baseidx_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1241; RV32-LABEL: vpscatter_baseidx_nxv8bf16: 1242; RV32: # %bb.0: 1243; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 1244; RV32-NEXT: vwadd.vv v12, v10, v10 1245; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1246; RV32-NEXT: ret 1247; 1248; RV64-LABEL: vpscatter_baseidx_nxv8bf16: 1249; RV64: # %bb.0: 1250; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1251; RV64-NEXT: vsext.vf4 v16, v10 1252; RV64-NEXT: vadd.vv v16, v16, v16 1253; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1254; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1255; RV64-NEXT: ret 1256 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %idxs 1257 call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1258 ret void 1259} 1260 1261declare void @llvm.vp.scatter.nxv1f16.nxv1p0(<vscale x 1 x half>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32) 1262 1263define void @vpscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1264; RV32-LABEL: vpscatter_nxv1f16: 1265; RV32: # %bb.0: 1266; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1267; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1268; RV32-NEXT: ret 1269; 1270; RV64-LABEL: vpscatter_nxv1f16: 1271; RV64: # %bb.0: 1272; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 1273; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1274; RV64-NEXT: ret 1275 call void @llvm.vp.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 1276 ret void 1277} 1278 1279declare void @llvm.vp.scatter.nxv2f16.nxv2p0(<vscale x 2 x half>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32) 1280 1281define void @vpscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1282; RV32-LABEL: vpscatter_nxv2f16: 1283; RV32: # %bb.0: 1284; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1285; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1286; RV32-NEXT: ret 1287; 1288; RV64-LABEL: vpscatter_nxv2f16: 1289; RV64: # %bb.0: 1290; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 1291; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1292; RV64-NEXT: ret 1293 call void @llvm.vp.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 1294 ret void 1295} 1296 1297declare void @llvm.vp.scatter.nxv4f16.nxv4p0(<vscale x 4 x half>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32) 1298 1299define void @vpscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1300; RV32-LABEL: vpscatter_nxv4f16: 1301; RV32: # %bb.0: 1302; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1303; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1304; RV32-NEXT: ret 1305; 1306; RV64-LABEL: vpscatter_nxv4f16: 1307; RV64: # %bb.0: 1308; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1309; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1310; RV64-NEXT: ret 1311 call void @llvm.vp.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 1312 ret void 1313} 1314 1315define void @vpscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) { 1316; RV32-LABEL: vpscatter_truemask_nxv4f16: 1317; RV32: # %bb.0: 1318; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1319; RV32-NEXT: vsoxei32.v v8, (zero), v10 1320; RV32-NEXT: ret 1321; 1322; RV64-LABEL: vpscatter_truemask_nxv4f16: 1323; RV64: # %bb.0: 1324; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1325; RV64-NEXT: vsoxei64.v v8, (zero), v12 1326; RV64-NEXT: ret 1327 call void @llvm.vp.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl) 1328 ret void 1329} 1330 1331declare void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32) 1332 1333define void @vpscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1334; RV32-LABEL: vpscatter_nxv8f16: 1335; RV32: # %bb.0: 1336; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1337; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1338; RV32-NEXT: ret 1339; 1340; RV64-LABEL: vpscatter_nxv8f16: 1341; RV64: # %bb.0: 1342; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1343; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1344; RV64-NEXT: ret 1345 call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1346 ret void 1347} 1348 1349define void @vpscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1350; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16: 1351; RV32: # %bb.0: 1352; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1353; RV32-NEXT: vsext.vf4 v12, v10 1354; RV32-NEXT: vadd.vv v12, v12, v12 1355; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1356; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1357; RV32-NEXT: ret 1358; 1359; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16: 1360; RV64: # %bb.0: 1361; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1362; RV64-NEXT: vsext.vf8 v16, v10 1363; RV64-NEXT: vadd.vv v16, v16, v16 1364; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1365; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1366; RV64-NEXT: ret 1367 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs 1368 call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1369 ret void 1370} 1371 1372define void @vpscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1373; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16: 1374; RV32: # %bb.0: 1375; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1376; RV32-NEXT: vsext.vf4 v12, v10 1377; RV32-NEXT: vadd.vv v12, v12, v12 1378; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1379; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1380; RV32-NEXT: ret 1381; 1382; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16: 1383; RV64: # %bb.0: 1384; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1385; RV64-NEXT: vsext.vf8 v16, v10 1386; RV64-NEXT: vadd.vv v16, v16, v16 1387; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1388; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1389; RV64-NEXT: ret 1390 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1391 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs 1392 call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1393 ret void 1394} 1395 1396define void @vpscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1397; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16: 1398; RV32: # %bb.0: 1399; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma 1400; RV32-NEXT: vwaddu.vv v12, v10, v10 1401; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1402; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t 1403; RV32-NEXT: ret 1404; 1405; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16: 1406; RV64: # %bb.0: 1407; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma 1408; RV64-NEXT: vwaddu.vv v12, v10, v10 1409; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1410; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t 1411; RV64-NEXT: ret 1412 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1413 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs 1414 call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1415 ret void 1416} 1417 1418define void @vpscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1419; RV32-LABEL: vpscatter_baseidx_nxv8f16: 1420; RV32: # %bb.0: 1421; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 1422; RV32-NEXT: vwadd.vv v12, v10, v10 1423; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1424; RV32-NEXT: ret 1425; 1426; RV64-LABEL: vpscatter_baseidx_nxv8f16: 1427; RV64: # %bb.0: 1428; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1429; RV64-NEXT: vsext.vf4 v16, v10 1430; RV64-NEXT: vadd.vv v16, v16, v16 1431; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1432; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1433; RV64-NEXT: ret 1434 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs 1435 call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1436 ret void 1437} 1438 1439declare void @llvm.vp.scatter.nxv1f32.nxv1p0(<vscale x 1 x float>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32) 1440 1441define void @vpscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1442; RV32-LABEL: vpscatter_nxv1f32: 1443; RV32: # %bb.0: 1444; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1445; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1446; RV32-NEXT: ret 1447; 1448; RV64-LABEL: vpscatter_nxv1f32: 1449; RV64: # %bb.0: 1450; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1451; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1452; RV64-NEXT: ret 1453 call void @llvm.vp.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 1454 ret void 1455} 1456 1457declare void @llvm.vp.scatter.nxv2f32.nxv2p0(<vscale x 2 x float>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32) 1458 1459define void @vpscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1460; RV32-LABEL: vpscatter_nxv2f32: 1461; RV32: # %bb.0: 1462; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1463; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1464; RV32-NEXT: ret 1465; 1466; RV64-LABEL: vpscatter_nxv2f32: 1467; RV64: # %bb.0: 1468; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1469; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1470; RV64-NEXT: ret 1471 call void @llvm.vp.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 1472 ret void 1473} 1474 1475declare void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32) 1476 1477define void @vpscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1478; RV32-LABEL: vpscatter_nxv4f32: 1479; RV32: # %bb.0: 1480; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1481; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1482; RV32-NEXT: ret 1483; 1484; RV64-LABEL: vpscatter_nxv4f32: 1485; RV64: # %bb.0: 1486; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1487; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1488; RV64-NEXT: ret 1489 call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 1490 ret void 1491} 1492 1493define void @vpscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) { 1494; RV32-LABEL: vpscatter_truemask_nxv4f32: 1495; RV32: # %bb.0: 1496; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1497; RV32-NEXT: vsoxei32.v v8, (zero), v10 1498; RV32-NEXT: ret 1499; 1500; RV64-LABEL: vpscatter_truemask_nxv4f32: 1501; RV64: # %bb.0: 1502; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1503; RV64-NEXT: vsoxei64.v v8, (zero), v12 1504; RV64-NEXT: ret 1505 call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl) 1506 ret void 1507} 1508 1509declare void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32) 1510 1511define void @vpscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1512; RV32-LABEL: vpscatter_nxv8f32: 1513; RV32: # %bb.0: 1514; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1515; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1516; RV32-NEXT: ret 1517; 1518; RV64-LABEL: vpscatter_nxv8f32: 1519; RV64: # %bb.0: 1520; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1521; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1522; RV64-NEXT: ret 1523 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1524 ret void 1525} 1526 1527define void @vpscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1528; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32: 1529; RV32: # %bb.0: 1530; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1531; RV32-NEXT: vsext.vf4 v16, v12 1532; RV32-NEXT: vsll.vi v12, v16, 2 1533; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1534; RV32-NEXT: ret 1535; 1536; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32: 1537; RV64: # %bb.0: 1538; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1539; RV64-NEXT: vsext.vf8 v16, v12 1540; RV64-NEXT: vsll.vi v16, v16, 2 1541; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1542; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1543; RV64-NEXT: ret 1544 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs 1545 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1546 ret void 1547} 1548 1549define void @vpscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1550; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32: 1551; RV32: # %bb.0: 1552; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1553; RV32-NEXT: vsext.vf4 v16, v12 1554; RV32-NEXT: vsll.vi v12, v16, 2 1555; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1556; RV32-NEXT: ret 1557; 1558; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32: 1559; RV64: # %bb.0: 1560; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1561; RV64-NEXT: vsext.vf8 v16, v12 1562; RV64-NEXT: vsll.vi v16, v16, 2 1563; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1564; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1565; RV64-NEXT: ret 1566 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1567 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs 1568 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1569 ret void 1570} 1571 1572define void @vpscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1573; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32: 1574; RV32: # %bb.0: 1575; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 1576; RV32-NEXT: vzext.vf2 v14, v12 1577; RV32-NEXT: vsll.vi v12, v14, 2 1578; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1579; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t 1580; RV32-NEXT: ret 1581; 1582; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32: 1583; RV64: # %bb.0: 1584; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma 1585; RV64-NEXT: vzext.vf2 v14, v12 1586; RV64-NEXT: vsll.vi v12, v14, 2 1587; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1588; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t 1589; RV64-NEXT: ret 1590 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1591 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs 1592 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1593 ret void 1594} 1595 1596define void @vpscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1597; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32: 1598; RV32: # %bb.0: 1599; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1600; RV32-NEXT: vsext.vf2 v16, v12 1601; RV32-NEXT: vsll.vi v12, v16, 2 1602; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1603; RV32-NEXT: ret 1604; 1605; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32: 1606; RV64: # %bb.0: 1607; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1608; RV64-NEXT: vsext.vf4 v16, v12 1609; RV64-NEXT: vsll.vi v16, v16, 2 1610; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1611; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1612; RV64-NEXT: ret 1613 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs 1614 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1615 ret void 1616} 1617 1618define void @vpscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1619; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32: 1620; RV32: # %bb.0: 1621; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1622; RV32-NEXT: vsext.vf2 v16, v12 1623; RV32-NEXT: vsll.vi v12, v16, 2 1624; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1625; RV32-NEXT: ret 1626; 1627; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32: 1628; RV64: # %bb.0: 1629; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1630; RV64-NEXT: vsext.vf4 v16, v12 1631; RV64-NEXT: vsll.vi v16, v16, 2 1632; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1633; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1634; RV64-NEXT: ret 1635 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1636 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs 1637 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1638 ret void 1639} 1640 1641define void @vpscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1642; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32: 1643; RV32: # %bb.0: 1644; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1645; RV32-NEXT: vzext.vf2 v16, v12 1646; RV32-NEXT: vsll.vi v12, v16, 2 1647; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1648; RV32-NEXT: ret 1649; 1650; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32: 1651; RV64: # %bb.0: 1652; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1653; RV64-NEXT: vzext.vf2 v16, v12 1654; RV64-NEXT: vsll.vi v12, v16, 2 1655; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1656; RV64-NEXT: ret 1657 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1658 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs 1659 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1660 ret void 1661} 1662 1663define void @vpscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1664; RV32-LABEL: vpscatter_baseidx_nxv8f32: 1665; RV32: # %bb.0: 1666; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1667; RV32-NEXT: vsll.vi v12, v12, 2 1668; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1669; RV32-NEXT: ret 1670; 1671; RV64-LABEL: vpscatter_baseidx_nxv8f32: 1672; RV64: # %bb.0: 1673; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1674; RV64-NEXT: vsext.vf2 v16, v12 1675; RV64-NEXT: vsll.vi v16, v16, 2 1676; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1677; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1678; RV64-NEXT: ret 1679 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs 1680 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1681 ret void 1682} 1683 1684declare void @llvm.vp.scatter.nxv1f64.nxv1p0(<vscale x 1 x double>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32) 1685 1686define void @vpscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1687; RV32-LABEL: vpscatter_nxv1f64: 1688; RV32: # %bb.0: 1689; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1690; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1691; RV32-NEXT: ret 1692; 1693; RV64-LABEL: vpscatter_nxv1f64: 1694; RV64: # %bb.0: 1695; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1696; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1697; RV64-NEXT: ret 1698 call void @llvm.vp.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 1699 ret void 1700} 1701 1702declare void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32) 1703 1704define void @vpscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1705; RV32-LABEL: vpscatter_nxv2f64: 1706; RV32: # %bb.0: 1707; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1708; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1709; RV32-NEXT: ret 1710; 1711; RV64-LABEL: vpscatter_nxv2f64: 1712; RV64: # %bb.0: 1713; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1714; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1715; RV64-NEXT: ret 1716 call void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 1717 ret void 1718} 1719 1720declare void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32) 1721 1722define void @vpscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1723; RV32-LABEL: vpscatter_nxv4f64: 1724; RV32: # %bb.0: 1725; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1726; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1727; RV32-NEXT: ret 1728; 1729; RV64-LABEL: vpscatter_nxv4f64: 1730; RV64: # %bb.0: 1731; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1732; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1733; RV64-NEXT: ret 1734 call void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 1735 ret void 1736} 1737 1738define void @vpscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) { 1739; RV32-LABEL: vpscatter_truemask_nxv4f64: 1740; RV32: # %bb.0: 1741; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1742; RV32-NEXT: vsoxei32.v v8, (zero), v12 1743; RV32-NEXT: ret 1744; 1745; RV64-LABEL: vpscatter_truemask_nxv4f64: 1746; RV64: # %bb.0: 1747; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1748; RV64-NEXT: vsoxei64.v v8, (zero), v12 1749; RV64-NEXT: ret 1750 call void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl) 1751 ret void 1752} 1753 1754declare void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double>, <vscale x 6 x ptr>, <vscale x 6 x i1>, i32) 1755 1756define void @vpscatter_nxv6f64(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1757; RV32-LABEL: vpscatter_nxv6f64: 1758; RV32: # %bb.0: 1759; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1760; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t 1761; RV32-NEXT: ret 1762; 1763; RV64-LABEL: vpscatter_nxv6f64: 1764; RV64: # %bb.0: 1765; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1766; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1767; RV64-NEXT: ret 1768 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1769 ret void 1770} 1771 1772define void @vpscatter_baseidx_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1773; RV32-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64: 1774; RV32: # %bb.0: 1775; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1776; RV32-NEXT: vsext.vf4 v20, v16 1777; RV32-NEXT: vsll.vi v16, v20, 3 1778; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1779; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1780; RV32-NEXT: ret 1781; 1782; RV64-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64: 1783; RV64: # %bb.0: 1784; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1785; RV64-NEXT: vsext.vf8 v24, v16 1786; RV64-NEXT: vsll.vi v16, v24, 3 1787; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1788; RV64-NEXT: ret 1789 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i8> %idxs 1790 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1791 ret void 1792} 1793 1794define void @vpscatter_baseidx_sext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1795; RV32-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64: 1796; RV32: # %bb.0: 1797; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1798; RV32-NEXT: vsext.vf4 v20, v16 1799; RV32-NEXT: vsll.vi v16, v20, 3 1800; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1801; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1802; RV32-NEXT: ret 1803; 1804; RV64-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64: 1805; RV64: # %bb.0: 1806; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1807; RV64-NEXT: vsext.vf8 v24, v16 1808; RV64-NEXT: vsll.vi v16, v24, 3 1809; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1810; RV64-NEXT: ret 1811 %eidxs = sext <vscale x 6 x i8> %idxs to <vscale x 6 x i64> 1812 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs 1813 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1814 ret void 1815} 1816 1817define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1818; RV32-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64: 1819; RV32: # %bb.0: 1820; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 1821; RV32-NEXT: vzext.vf2 v18, v16 1822; RV32-NEXT: vsll.vi v16, v18, 3 1823; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1824; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t 1825; RV32-NEXT: ret 1826; 1827; RV64-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64: 1828; RV64: # %bb.0: 1829; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma 1830; RV64-NEXT: vzext.vf2 v18, v16 1831; RV64-NEXT: vsll.vi v16, v18, 3 1832; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1833; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t 1834; RV64-NEXT: ret 1835 %eidxs = zext <vscale x 6 x i8> %idxs to <vscale x 6 x i64> 1836 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs 1837 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1838 ret void 1839} 1840 1841define void @vpscatter_baseidx_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1842; RV32-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64: 1843; RV32: # %bb.0: 1844; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1845; RV32-NEXT: vsext.vf2 v20, v16 1846; RV32-NEXT: vsll.vi v16, v20, 3 1847; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1848; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1849; RV32-NEXT: ret 1850; 1851; RV64-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64: 1852; RV64: # %bb.0: 1853; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1854; RV64-NEXT: vsext.vf4 v24, v16 1855; RV64-NEXT: vsll.vi v16, v24, 3 1856; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1857; RV64-NEXT: ret 1858 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i16> %idxs 1859 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1860 ret void 1861} 1862 1863define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1864; RV32-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64: 1865; RV32: # %bb.0: 1866; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1867; RV32-NEXT: vsext.vf2 v20, v16 1868; RV32-NEXT: vsll.vi v16, v20, 3 1869; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1870; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1871; RV32-NEXT: ret 1872; 1873; RV64-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64: 1874; RV64: # %bb.0: 1875; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1876; RV64-NEXT: vsext.vf4 v24, v16 1877; RV64-NEXT: vsll.vi v16, v24, 3 1878; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1879; RV64-NEXT: ret 1880 %eidxs = sext <vscale x 6 x i16> %idxs to <vscale x 6 x i64> 1881 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs 1882 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1883 ret void 1884} 1885 1886define void @vpscatter_baseidx_zext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1887; RV32-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64: 1888; RV32: # %bb.0: 1889; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1890; RV32-NEXT: vzext.vf2 v20, v16 1891; RV32-NEXT: vsll.vi v16, v20, 3 1892; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1893; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1894; RV32-NEXT: ret 1895; 1896; RV64-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64: 1897; RV64: # %bb.0: 1898; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1899; RV64-NEXT: vzext.vf2 v20, v16 1900; RV64-NEXT: vsll.vi v16, v20, 3 1901; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1902; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1903; RV64-NEXT: ret 1904 %eidxs = zext <vscale x 6 x i16> %idxs to <vscale x 6 x i64> 1905 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs 1906 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1907 ret void 1908} 1909 1910define void @vpscatter_baseidx_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1911; RV32-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64: 1912; RV32: # %bb.0: 1913; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1914; RV32-NEXT: vsll.vi v16, v16, 3 1915; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1916; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1917; RV32-NEXT: ret 1918; 1919; RV64-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64: 1920; RV64: # %bb.0: 1921; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1922; RV64-NEXT: vsext.vf2 v24, v16 1923; RV64-NEXT: vsll.vi v16, v24, 3 1924; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1925; RV64-NEXT: ret 1926 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i32> %idxs 1927 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1928 ret void 1929} 1930 1931define void @vpscatter_baseidx_sext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1932; RV32-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64: 1933; RV32: # %bb.0: 1934; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1935; RV32-NEXT: vsll.vi v16, v16, 3 1936; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1937; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1938; RV32-NEXT: ret 1939; 1940; RV64-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64: 1941; RV64: # %bb.0: 1942; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1943; RV64-NEXT: vsext.vf2 v24, v16 1944; RV64-NEXT: vsll.vi v16, v24, 3 1945; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1946; RV64-NEXT: ret 1947 %eidxs = sext <vscale x 6 x i32> %idxs to <vscale x 6 x i64> 1948 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs 1949 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1950 ret void 1951} 1952 1953define void @vpscatter_baseidx_zext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1954; RV32-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64: 1955; RV32: # %bb.0: 1956; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1957; RV32-NEXT: vsll.vi v16, v16, 3 1958; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1959; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1960; RV32-NEXT: ret 1961; 1962; RV64-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64: 1963; RV64: # %bb.0: 1964; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1965; RV64-NEXT: vzext.vf2 v24, v16 1966; RV64-NEXT: vsll.vi v16, v24, 3 1967; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1968; RV64-NEXT: ret 1969 %eidxs = zext <vscale x 6 x i32> %idxs to <vscale x 6 x i64> 1970 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs 1971 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1972 ret void 1973} 1974 1975define void @vpscatter_baseidx_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i64> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1976; RV32-LABEL: vpscatter_baseidx_nxv6f64: 1977; RV32: # %bb.0: 1978; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 1979; RV32-NEXT: vnsrl.wi v24, v16, 0 1980; RV32-NEXT: vsll.vi v16, v24, 3 1981; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1982; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1983; RV32-NEXT: ret 1984; 1985; RV64-LABEL: vpscatter_baseidx_nxv6f64: 1986; RV64: # %bb.0: 1987; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1988; RV64-NEXT: vsll.vi v16, v16, 3 1989; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1990; RV64-NEXT: ret 1991 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %idxs 1992 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1993 ret void 1994} 1995 1996declare void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32) 1997 1998define void @vpscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1999; RV32-LABEL: vpscatter_nxv8f64: 2000; RV32: # %bb.0: 2001; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2002; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t 2003; RV32-NEXT: ret 2004; 2005; RV64-LABEL: vpscatter_nxv8f64: 2006; RV64: # %bb.0: 2007; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2008; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 2009; RV64-NEXT: ret 2010 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2011 ret void 2012} 2013 2014define void @vpscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2015; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64: 2016; RV32: # %bb.0: 2017; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2018; RV32-NEXT: vsext.vf4 v20, v16 2019; RV32-NEXT: vsll.vi v16, v20, 3 2020; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2021; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 2022; RV32-NEXT: ret 2023; 2024; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64: 2025; RV64: # %bb.0: 2026; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2027; RV64-NEXT: vsext.vf8 v24, v16 2028; RV64-NEXT: vsll.vi v16, v24, 3 2029; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 2030; RV64-NEXT: ret 2031 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs 2032 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2033 ret void 2034} 2035 2036define void @vpscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2037; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64: 2038; RV32: # %bb.0: 2039; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2040; RV32-NEXT: vsext.vf4 v20, v16 2041; RV32-NEXT: vsll.vi v16, v20, 3 2042; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2043; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 2044; RV32-NEXT: ret 2045; 2046; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64: 2047; RV64: # %bb.0: 2048; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2049; RV64-NEXT: vsext.vf8 v24, v16 2050; RV64-NEXT: vsll.vi v16, v24, 3 2051; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 2052; RV64-NEXT: ret 2053 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 2054 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 2055 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2056 ret void 2057} 2058 2059define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2060; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64: 2061; RV32: # %bb.0: 2062; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma 2063; RV32-NEXT: vzext.vf2 v18, v16 2064; RV32-NEXT: vsll.vi v16, v18, 3 2065; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2066; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t 2067; RV32-NEXT: ret 2068; 2069; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64: 2070; RV64: # %bb.0: 2071; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma 2072; RV64-NEXT: vzext.vf2 v18, v16 2073; RV64-NEXT: vsll.vi v16, v18, 3 2074; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2075; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t 2076; RV64-NEXT: ret 2077 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 2078 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 2079 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2080 ret void 2081} 2082 2083define void @vpscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2084; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64: 2085; RV32: # %bb.0: 2086; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2087; RV32-NEXT: vsext.vf2 v20, v16 2088; RV32-NEXT: vsll.vi v16, v20, 3 2089; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2090; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 2091; RV32-NEXT: ret 2092; 2093; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64: 2094; RV64: # %bb.0: 2095; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2096; RV64-NEXT: vsext.vf4 v24, v16 2097; RV64-NEXT: vsll.vi v16, v24, 3 2098; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 2099; RV64-NEXT: ret 2100 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs 2101 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2102 ret void 2103} 2104 2105define void @vpscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2106; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64: 2107; RV32: # %bb.0: 2108; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2109; RV32-NEXT: vsext.vf2 v20, v16 2110; RV32-NEXT: vsll.vi v16, v20, 3 2111; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2112; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 2113; RV32-NEXT: ret 2114; 2115; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64: 2116; RV64: # %bb.0: 2117; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2118; RV64-NEXT: vsext.vf4 v24, v16 2119; RV64-NEXT: vsll.vi v16, v24, 3 2120; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 2121; RV64-NEXT: ret 2122 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 2123 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 2124 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2125 ret void 2126} 2127 2128define void @vpscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2129; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64: 2130; RV32: # %bb.0: 2131; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2132; RV32-NEXT: vzext.vf2 v20, v16 2133; RV32-NEXT: vsll.vi v16, v20, 3 2134; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2135; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 2136; RV32-NEXT: ret 2137; 2138; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64: 2139; RV64: # %bb.0: 2140; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2141; RV64-NEXT: vzext.vf2 v20, v16 2142; RV64-NEXT: vsll.vi v16, v20, 3 2143; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2144; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t 2145; RV64-NEXT: ret 2146 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 2147 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 2148 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2149 ret void 2150} 2151 2152define void @vpscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2153; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64: 2154; RV32: # %bb.0: 2155; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2156; RV32-NEXT: vsll.vi v16, v16, 3 2157; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2158; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 2159; RV32-NEXT: ret 2160; 2161; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64: 2162; RV64: # %bb.0: 2163; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2164; RV64-NEXT: vsext.vf2 v24, v16 2165; RV64-NEXT: vsll.vi v16, v24, 3 2166; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 2167; RV64-NEXT: ret 2168 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs 2169 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2170 ret void 2171} 2172 2173define void @vpscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2174; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64: 2175; RV32: # %bb.0: 2176; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2177; RV32-NEXT: vsll.vi v16, v16, 3 2178; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2179; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 2180; RV32-NEXT: ret 2181; 2182; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64: 2183; RV64: # %bb.0: 2184; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2185; RV64-NEXT: vsext.vf2 v24, v16 2186; RV64-NEXT: vsll.vi v16, v24, 3 2187; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 2188; RV64-NEXT: ret 2189 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 2190 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 2191 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2192 ret void 2193} 2194 2195define void @vpscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2196; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64: 2197; RV32: # %bb.0: 2198; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2199; RV32-NEXT: vsll.vi v16, v16, 3 2200; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2201; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 2202; RV32-NEXT: ret 2203; 2204; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64: 2205; RV64: # %bb.0: 2206; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2207; RV64-NEXT: vzext.vf2 v24, v16 2208; RV64-NEXT: vsll.vi v16, v24, 3 2209; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 2210; RV64-NEXT: ret 2211 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 2212 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 2213 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2214 ret void 2215} 2216 2217define void @vpscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2218; RV32-LABEL: vpscatter_baseidx_nxv8f64: 2219; RV32: # %bb.0: 2220; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma 2221; RV32-NEXT: vnsrl.wi v24, v16, 0 2222; RV32-NEXT: vsll.vi v16, v24, 3 2223; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 2224; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 2225; RV32-NEXT: ret 2226; 2227; RV64-LABEL: vpscatter_baseidx_nxv8f64: 2228; RV64: # %bb.0: 2229; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2230; RV64-NEXT: vsll.vi v16, v16, 3 2231; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 2232; RV64-NEXT: ret 2233 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs 2234 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2235 ret void 2236} 2237 2238declare void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double>, <vscale x 16 x ptr>, <vscale x 16 x i1>, i32) 2239 2240define void @vpscatter_nxv16f64(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2241; RV32-LABEL: vpscatter_nxv16f64: 2242; RV32: # %bb.0: 2243; RV32-NEXT: vl8re32.v v24, (a0) 2244; RV32-NEXT: csrr a0, vlenb 2245; RV32-NEXT: mv a2, a1 2246; RV32-NEXT: bltu a1, a0, .LBB108_2 2247; RV32-NEXT: # %bb.1: 2248; RV32-NEXT: mv a2, a0 2249; RV32-NEXT: .LBB108_2: 2250; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2251; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t 2252; RV32-NEXT: sub a2, a1, a0 2253; RV32-NEXT: srli a0, a0, 3 2254; RV32-NEXT: sltu a1, a1, a2 2255; RV32-NEXT: addi a1, a1, -1 2256; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma 2257; RV32-NEXT: vslidedown.vx v0, v0, a0 2258; RV32-NEXT: and a1, a1, a2 2259; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2260; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t 2261; RV32-NEXT: ret 2262; 2263; RV64-LABEL: vpscatter_nxv16f64: 2264; RV64: # %bb.0: 2265; RV64-NEXT: addi sp, sp, -16 2266; RV64-NEXT: .cfi_def_cfa_offset 16 2267; RV64-NEXT: csrr a1, vlenb 2268; RV64-NEXT: slli a1, a1, 3 2269; RV64-NEXT: sub sp, sp, a1 2270; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 2271; RV64-NEXT: addi a1, sp, 16 2272; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 2273; RV64-NEXT: csrr a1, vlenb 2274; RV64-NEXT: slli a3, a1, 3 2275; RV64-NEXT: add a3, a0, a3 2276; RV64-NEXT: vl8re64.v v16, (a3) 2277; RV64-NEXT: vl8re64.v v24, (a0) 2278; RV64-NEXT: mv a0, a2 2279; RV64-NEXT: bltu a2, a1, .LBB108_2 2280; RV64-NEXT: # %bb.1: 2281; RV64-NEXT: mv a0, a1 2282; RV64-NEXT: .LBB108_2: 2283; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2284; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t 2285; RV64-NEXT: sub a0, a2, a1 2286; RV64-NEXT: srli a1, a1, 3 2287; RV64-NEXT: sltu a2, a2, a0 2288; RV64-NEXT: addi a2, a2, -1 2289; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma 2290; RV64-NEXT: vslidedown.vx v0, v0, a1 2291; RV64-NEXT: and a0, a2, a0 2292; RV64-NEXT: addi a1, sp, 16 2293; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 2294; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2295; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 2296; RV64-NEXT: csrr a0, vlenb 2297; RV64-NEXT: slli a0, a0, 3 2298; RV64-NEXT: add sp, sp, a0 2299; RV64-NEXT: .cfi_def_cfa sp, 16 2300; RV64-NEXT: addi sp, sp, 16 2301; RV64-NEXT: .cfi_def_cfa_offset 0 2302; RV64-NEXT: ret 2303 call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl) 2304 ret void 2305} 2306 2307define void @vpscatter_baseidx_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2308; RV32-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64: 2309; RV32: # %bb.0: 2310; RV32-NEXT: addi sp, sp, -16 2311; RV32-NEXT: .cfi_def_cfa_offset 16 2312; RV32-NEXT: csrr a3, vlenb 2313; RV32-NEXT: sub sp, sp, a3 2314; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb 2315; RV32-NEXT: addi a3, sp, 16 2316; RV32-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill 2317; RV32-NEXT: vl4re16.v v24, (a1) 2318; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma 2319; RV32-NEXT: vsext.vf2 v0, v24 2320; RV32-NEXT: csrr a1, vlenb 2321; RV32-NEXT: vsll.vi v24, v0, 3 2322; RV32-NEXT: mv a3, a2 2323; RV32-NEXT: bltu a2, a1, .LBB109_2 2324; RV32-NEXT: # %bb.1: 2325; RV32-NEXT: mv a3, a1 2326; RV32-NEXT: .LBB109_2: 2327; RV32-NEXT: addi a4, sp, 16 2328; RV32-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload 2329; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2330; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 2331; RV32-NEXT: sub a3, a2, a1 2332; RV32-NEXT: srli a1, a1, 3 2333; RV32-NEXT: sltu a2, a2, a3 2334; RV32-NEXT: addi a2, a2, -1 2335; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma 2336; RV32-NEXT: vslidedown.vx v0, v0, a1 2337; RV32-NEXT: and a2, a2, a3 2338; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2339; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t 2340; RV32-NEXT: csrr a0, vlenb 2341; RV32-NEXT: add sp, sp, a0 2342; RV32-NEXT: .cfi_def_cfa sp, 16 2343; RV32-NEXT: addi sp, sp, 16 2344; RV32-NEXT: .cfi_def_cfa_offset 0 2345; RV32-NEXT: ret 2346; 2347; RV64-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64: 2348; RV64: # %bb.0: 2349; RV64-NEXT: addi sp, sp, -16 2350; RV64-NEXT: .cfi_def_cfa_offset 16 2351; RV64-NEXT: csrr a3, vlenb 2352; RV64-NEXT: slli a3, a3, 4 2353; RV64-NEXT: sub sp, sp, a3 2354; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 2355; RV64-NEXT: csrr a3, vlenb 2356; RV64-NEXT: slli a3, a3, 3 2357; RV64-NEXT: add a3, sp, a3 2358; RV64-NEXT: addi a3, a3, 16 2359; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 2360; RV64-NEXT: vl4re16.v v24, (a1) 2361; RV64-NEXT: csrr a1, vlenb 2362; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma 2363; RV64-NEXT: vsext.vf4 v16, v26 2364; RV64-NEXT: vsll.vi v16, v16, 3 2365; RV64-NEXT: addi a3, sp, 16 2366; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 2367; RV64-NEXT: vsext.vf4 v16, v24 2368; RV64-NEXT: vsll.vi v24, v16, 3 2369; RV64-NEXT: mv a3, a2 2370; RV64-NEXT: bltu a2, a1, .LBB109_2 2371; RV64-NEXT: # %bb.1: 2372; RV64-NEXT: mv a3, a1 2373; RV64-NEXT: .LBB109_2: 2374; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2375; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t 2376; RV64-NEXT: sub a3, a2, a1 2377; RV64-NEXT: srli a1, a1, 3 2378; RV64-NEXT: sltu a2, a2, a3 2379; RV64-NEXT: addi a2, a2, -1 2380; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma 2381; RV64-NEXT: vslidedown.vx v0, v0, a1 2382; RV64-NEXT: and a2, a2, a3 2383; RV64-NEXT: csrr a1, vlenb 2384; RV64-NEXT: slli a1, a1, 3 2385; RV64-NEXT: add a1, sp, a1 2386; RV64-NEXT: addi a1, a1, 16 2387; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 2388; RV64-NEXT: addi a1, sp, 16 2389; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 2390; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2391; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 2392; RV64-NEXT: csrr a0, vlenb 2393; RV64-NEXT: slli a0, a0, 4 2394; RV64-NEXT: add sp, sp, a0 2395; RV64-NEXT: .cfi_def_cfa sp, 16 2396; RV64-NEXT: addi sp, sp, 16 2397; RV64-NEXT: .cfi_def_cfa_offset 0 2398; RV64-NEXT: ret 2399 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i16> %idxs 2400 call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl) 2401 ret void 2402} 2403 2404define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2405; RV32-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64: 2406; RV32: # %bb.0: 2407; RV32-NEXT: addi sp, sp, -16 2408; RV32-NEXT: .cfi_def_cfa_offset 16 2409; RV32-NEXT: csrr a3, vlenb 2410; RV32-NEXT: sub sp, sp, a3 2411; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb 2412; RV32-NEXT: addi a3, sp, 16 2413; RV32-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill 2414; RV32-NEXT: vl4re16.v v24, (a1) 2415; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma 2416; RV32-NEXT: vsext.vf2 v0, v24 2417; RV32-NEXT: csrr a1, vlenb 2418; RV32-NEXT: vsll.vi v24, v0, 3 2419; RV32-NEXT: mv a3, a2 2420; RV32-NEXT: bltu a2, a1, .LBB110_2 2421; RV32-NEXT: # %bb.1: 2422; RV32-NEXT: mv a3, a1 2423; RV32-NEXT: .LBB110_2: 2424; RV32-NEXT: addi a4, sp, 16 2425; RV32-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload 2426; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2427; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 2428; RV32-NEXT: sub a3, a2, a1 2429; RV32-NEXT: srli a1, a1, 3 2430; RV32-NEXT: sltu a2, a2, a3 2431; RV32-NEXT: addi a2, a2, -1 2432; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma 2433; RV32-NEXT: vslidedown.vx v0, v0, a1 2434; RV32-NEXT: and a2, a2, a3 2435; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2436; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t 2437; RV32-NEXT: csrr a0, vlenb 2438; RV32-NEXT: add sp, sp, a0 2439; RV32-NEXT: .cfi_def_cfa sp, 16 2440; RV32-NEXT: addi sp, sp, 16 2441; RV32-NEXT: .cfi_def_cfa_offset 0 2442; RV32-NEXT: ret 2443; 2444; RV64-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64: 2445; RV64: # %bb.0: 2446; RV64-NEXT: addi sp, sp, -16 2447; RV64-NEXT: .cfi_def_cfa_offset 16 2448; RV64-NEXT: csrr a3, vlenb 2449; RV64-NEXT: slli a3, a3, 4 2450; RV64-NEXT: sub sp, sp, a3 2451; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 2452; RV64-NEXT: csrr a3, vlenb 2453; RV64-NEXT: slli a3, a3, 3 2454; RV64-NEXT: add a3, sp, a3 2455; RV64-NEXT: addi a3, a3, 16 2456; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 2457; RV64-NEXT: vl4re16.v v24, (a1) 2458; RV64-NEXT: csrr a1, vlenb 2459; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma 2460; RV64-NEXT: vsext.vf4 v16, v26 2461; RV64-NEXT: vsll.vi v16, v16, 3 2462; RV64-NEXT: addi a3, sp, 16 2463; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 2464; RV64-NEXT: vsext.vf4 v16, v24 2465; RV64-NEXT: vsll.vi v24, v16, 3 2466; RV64-NEXT: mv a3, a2 2467; RV64-NEXT: bltu a2, a1, .LBB110_2 2468; RV64-NEXT: # %bb.1: 2469; RV64-NEXT: mv a3, a1 2470; RV64-NEXT: .LBB110_2: 2471; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2472; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t 2473; RV64-NEXT: sub a3, a2, a1 2474; RV64-NEXT: srli a1, a1, 3 2475; RV64-NEXT: sltu a2, a2, a3 2476; RV64-NEXT: addi a2, a2, -1 2477; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma 2478; RV64-NEXT: vslidedown.vx v0, v0, a1 2479; RV64-NEXT: and a2, a2, a3 2480; RV64-NEXT: csrr a1, vlenb 2481; RV64-NEXT: slli a1, a1, 3 2482; RV64-NEXT: add a1, sp, a1 2483; RV64-NEXT: addi a1, a1, 16 2484; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 2485; RV64-NEXT: addi a1, sp, 16 2486; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 2487; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2488; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 2489; RV64-NEXT: csrr a0, vlenb 2490; RV64-NEXT: slli a0, a0, 4 2491; RV64-NEXT: add sp, sp, a0 2492; RV64-NEXT: .cfi_def_cfa sp, 16 2493; RV64-NEXT: addi sp, sp, 16 2494; RV64-NEXT: .cfi_def_cfa_offset 0 2495; RV64-NEXT: ret 2496 %eidxs = sext <vscale x 16 x i16> %idxs to <vscale x 16 x i64> 2497 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs 2498 call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl) 2499 ret void 2500} 2501 2502define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2503; RV32-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64: 2504; RV32: # %bb.0: 2505; RV32-NEXT: addi sp, sp, -16 2506; RV32-NEXT: .cfi_def_cfa_offset 16 2507; RV32-NEXT: csrr a3, vlenb 2508; RV32-NEXT: sub sp, sp, a3 2509; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb 2510; RV32-NEXT: addi a3, sp, 16 2511; RV32-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill 2512; RV32-NEXT: vl4re16.v v24, (a1) 2513; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma 2514; RV32-NEXT: vzext.vf2 v0, v24 2515; RV32-NEXT: csrr a1, vlenb 2516; RV32-NEXT: vsll.vi v24, v0, 3 2517; RV32-NEXT: mv a3, a2 2518; RV32-NEXT: bltu a2, a1, .LBB111_2 2519; RV32-NEXT: # %bb.1: 2520; RV32-NEXT: mv a3, a1 2521; RV32-NEXT: .LBB111_2: 2522; RV32-NEXT: addi a4, sp, 16 2523; RV32-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload 2524; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2525; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 2526; RV32-NEXT: sub a3, a2, a1 2527; RV32-NEXT: srli a1, a1, 3 2528; RV32-NEXT: sltu a2, a2, a3 2529; RV32-NEXT: addi a2, a2, -1 2530; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma 2531; RV32-NEXT: vslidedown.vx v0, v0, a1 2532; RV32-NEXT: and a2, a2, a3 2533; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2534; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t 2535; RV32-NEXT: csrr a0, vlenb 2536; RV32-NEXT: add sp, sp, a0 2537; RV32-NEXT: .cfi_def_cfa sp, 16 2538; RV32-NEXT: addi sp, sp, 16 2539; RV32-NEXT: .cfi_def_cfa_offset 0 2540; RV32-NEXT: ret 2541; 2542; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64: 2543; RV64: # %bb.0: 2544; RV64-NEXT: addi sp, sp, -16 2545; RV64-NEXT: .cfi_def_cfa_offset 16 2546; RV64-NEXT: csrr a3, vlenb 2547; RV64-NEXT: sub sp, sp, a3 2548; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb 2549; RV64-NEXT: addi a3, sp, 16 2550; RV64-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill 2551; RV64-NEXT: vl4re16.v v24, (a1) 2552; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma 2553; RV64-NEXT: vzext.vf2 v0, v24 2554; RV64-NEXT: csrr a1, vlenb 2555; RV64-NEXT: vsll.vi v24, v0, 3 2556; RV64-NEXT: mv a3, a2 2557; RV64-NEXT: bltu a2, a1, .LBB111_2 2558; RV64-NEXT: # %bb.1: 2559; RV64-NEXT: mv a3, a1 2560; RV64-NEXT: .LBB111_2: 2561; RV64-NEXT: addi a4, sp, 16 2562; RV64-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload 2563; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2564; RV64-NEXT: vsoxei32.v v8, (a0), v24, v0.t 2565; RV64-NEXT: sub a3, a2, a1 2566; RV64-NEXT: srli a1, a1, 3 2567; RV64-NEXT: sltu a2, a2, a3 2568; RV64-NEXT: addi a2, a2, -1 2569; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma 2570; RV64-NEXT: vslidedown.vx v0, v0, a1 2571; RV64-NEXT: and a2, a2, a3 2572; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2573; RV64-NEXT: vsoxei32.v v16, (a0), v28, v0.t 2574; RV64-NEXT: csrr a0, vlenb 2575; RV64-NEXT: add sp, sp, a0 2576; RV64-NEXT: .cfi_def_cfa sp, 16 2577; RV64-NEXT: addi sp, sp, 16 2578; RV64-NEXT: .cfi_def_cfa_offset 0 2579; RV64-NEXT: ret 2580 %eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64> 2581 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs 2582 call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl) 2583 ret void 2584} 2585