1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zvfbfmin,+v \ 3; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 4; RUN: --check-prefixes=CHECK,RV32 5; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zvfbfmin,+v \ 6; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 7; RUN: --check-prefixes=CHECK,RV64 8; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfhmin,+zvfbfmin,+v \ 9; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 10; RUN: --check-prefixes=CHECK,RV32 11; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfhmin,+zvfbfmin,+v \ 12; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 13; RUN: --check-prefixes=CHECK,RV64 14 15declare void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>) 16 17define void @mscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) { 18; RV32-LABEL: mscatter_nxv1i8: 19; RV32: # %bb.0: 20; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma 21; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 22; RV32-NEXT: ret 23; 24; RV64-LABEL: mscatter_nxv1i8: 25; RV64: # %bb.0: 26; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma 27; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 28; RV64-NEXT: ret 29 call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, i32 1, <vscale x 1 x i1> %m) 30 ret void 31} 32 33declare void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>) 34 35define void @mscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) { 36; RV32-LABEL: mscatter_nxv2i8: 37; RV32: # %bb.0: 38; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 39; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 40; RV32-NEXT: ret 41; 42; RV64-LABEL: mscatter_nxv2i8: 43; RV64: # %bb.0: 44; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 45; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 46; RV64-NEXT: ret 47 call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m) 48 ret void 49} 50 51define void @mscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) { 52; RV32-LABEL: mscatter_nxv2i16_truncstore_nxv2i8: 53; RV32: # %bb.0: 54; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 55; RV32-NEXT: vnsrl.wi v8, v8, 0 56; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 57; RV32-NEXT: ret 58; 59; RV64-LABEL: mscatter_nxv2i16_truncstore_nxv2i8: 60; RV64: # %bb.0: 61; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 62; RV64-NEXT: vnsrl.wi v8, v8, 0 63; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 64; RV64-NEXT: ret 65 %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8> 66 call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m) 67 ret void 68} 69 70define void @mscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) { 71; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i8: 72; RV32: # %bb.0: 73; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 74; RV32-NEXT: vnsrl.wi v8, v8, 0 75; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 76; RV32-NEXT: vnsrl.wi v8, v8, 0 77; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 78; RV32-NEXT: ret 79; 80; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i8: 81; RV64: # %bb.0: 82; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 83; RV64-NEXT: vnsrl.wi v8, v8, 0 84; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 85; RV64-NEXT: vnsrl.wi v8, v8, 0 86; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 87; RV64-NEXT: ret 88 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8> 89 call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m) 90 ret void 91} 92 93define void @mscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) { 94; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i8: 95; RV32: # %bb.0: 96; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma 97; RV32-NEXT: vnsrl.wi v11, v8, 0 98; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 99; RV32-NEXT: vnsrl.wi v8, v11, 0 100; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 101; RV32-NEXT: vnsrl.wi v8, v8, 0 102; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 103; RV32-NEXT: ret 104; 105; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i8: 106; RV64: # %bb.0: 107; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma 108; RV64-NEXT: vnsrl.wi v12, v8, 0 109; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 110; RV64-NEXT: vnsrl.wi v8, v12, 0 111; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 112; RV64-NEXT: vnsrl.wi v8, v8, 0 113; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 114; RV64-NEXT: ret 115 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8> 116 call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m) 117 ret void 118} 119 120declare void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>) 121 122define void @mscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) { 123; RV32-LABEL: mscatter_nxv4i8: 124; RV32: # %bb.0: 125; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 126; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 127; RV32-NEXT: ret 128; 129; RV64-LABEL: mscatter_nxv4i8: 130; RV64: # %bb.0: 131; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 132; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 133; RV64-NEXT: ret 134 call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> %m) 135 ret void 136} 137 138define void @mscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs) { 139; RV32-LABEL: mscatter_truemask_nxv4i8: 140; RV32: # %bb.0: 141; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 142; RV32-NEXT: vsoxei32.v v8, (zero), v10 143; RV32-NEXT: ret 144; 145; RV64-LABEL: mscatter_truemask_nxv4i8: 146; RV64: # %bb.0: 147; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 148; RV64-NEXT: vsoxei64.v v8, (zero), v12 149; RV64-NEXT: ret 150 call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> splat (i1 1)) 151 ret void 152} 153 154define void @mscatter_falsemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs) { 155; CHECK-LABEL: mscatter_falsemask_nxv4i8: 156; CHECK: # %bb.0: 157; CHECK-NEXT: ret 158 call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> zeroinitializer) 159 ret void 160} 161 162declare void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>) 163 164define void @mscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) { 165; RV32-LABEL: mscatter_nxv8i8: 166; RV32: # %bb.0: 167; RV32-NEXT: vsetvli a0, zero, e8, m1, ta, ma 168; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 169; RV32-NEXT: ret 170; 171; RV64-LABEL: mscatter_nxv8i8: 172; RV64: # %bb.0: 173; RV64-NEXT: vsetvli a0, zero, e8, m1, ta, ma 174; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 175; RV64-NEXT: ret 176 call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, i32 1, <vscale x 8 x i1> %m) 177 ret void 178} 179 180define void @mscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 181; RV32-LABEL: mscatter_baseidx_nxv8i8: 182; RV32: # %bb.0: 183; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 184; RV32-NEXT: vsext.vf4 v12, v9 185; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma 186; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 187; RV32-NEXT: ret 188; 189; RV64-LABEL: mscatter_baseidx_nxv8i8: 190; RV64: # %bb.0: 191; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 192; RV64-NEXT: vsext.vf8 v16, v9 193; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma 194; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 195; RV64-NEXT: ret 196 %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs 197 call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, i32 1, <vscale x 8 x i1> %m) 198 ret void 199} 200 201declare void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>) 202 203define void @mscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) { 204; RV32-LABEL: mscatter_nxv1i16: 205; RV32: # %bb.0: 206; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 207; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 208; RV32-NEXT: ret 209; 210; RV64-LABEL: mscatter_nxv1i16: 211; RV64: # %bb.0: 212; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 213; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 214; RV64-NEXT: ret 215 call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m) 216 ret void 217} 218 219declare void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>) 220 221define void @mscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) { 222; RV32-LABEL: mscatter_nxv2i16: 223; RV32: # %bb.0: 224; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 225; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 226; RV32-NEXT: ret 227; 228; RV64-LABEL: mscatter_nxv2i16: 229; RV64: # %bb.0: 230; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 231; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 232; RV64-NEXT: ret 233 call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m) 234 ret void 235} 236 237define void @mscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) { 238; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i16: 239; RV32: # %bb.0: 240; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 241; RV32-NEXT: vnsrl.wi v8, v8, 0 242; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 243; RV32-NEXT: ret 244; 245; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i16: 246; RV64: # %bb.0: 247; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 248; RV64-NEXT: vnsrl.wi v8, v8, 0 249; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 250; RV64-NEXT: ret 251 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16> 252 call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m) 253 ret void 254} 255 256define void @mscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) { 257; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i16: 258; RV32: # %bb.0: 259; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma 260; RV32-NEXT: vnsrl.wi v11, v8, 0 261; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 262; RV32-NEXT: vnsrl.wi v8, v11, 0 263; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 264; RV32-NEXT: ret 265; 266; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i16: 267; RV64: # %bb.0: 268; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma 269; RV64-NEXT: vnsrl.wi v12, v8, 0 270; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 271; RV64-NEXT: vnsrl.wi v8, v12, 0 272; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 273; RV64-NEXT: ret 274 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16> 275 call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m) 276 ret void 277} 278 279declare void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>) 280 281define void @mscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) { 282; RV32-LABEL: mscatter_nxv4i16: 283; RV32: # %bb.0: 284; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma 285; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 286; RV32-NEXT: ret 287; 288; RV64-LABEL: mscatter_nxv4i16: 289; RV64: # %bb.0: 290; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma 291; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 292; RV64-NEXT: ret 293 call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m) 294 ret void 295} 296 297define void @mscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs) { 298; RV32-LABEL: mscatter_truemask_nxv4i16: 299; RV32: # %bb.0: 300; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma 301; RV32-NEXT: vsoxei32.v v8, (zero), v10 302; RV32-NEXT: ret 303; 304; RV64-LABEL: mscatter_truemask_nxv4i16: 305; RV64: # %bb.0: 306; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma 307; RV64-NEXT: vsoxei64.v v8, (zero), v12 308; RV64-NEXT: ret 309 call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> splat (i1 1)) 310 ret void 311} 312 313define void @mscatter_falsemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs) { 314; CHECK-LABEL: mscatter_falsemask_nxv4i16: 315; CHECK: # %bb.0: 316; CHECK-NEXT: ret 317 call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer) 318 ret void 319} 320 321declare void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>) 322 323define void @mscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) { 324; RV32-LABEL: mscatter_nxv8i16: 325; RV32: # %bb.0: 326; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma 327; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 328; RV32-NEXT: ret 329; 330; RV64-LABEL: mscatter_nxv8i16: 331; RV64: # %bb.0: 332; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma 333; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 334; RV64-NEXT: ret 335 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m) 336 ret void 337} 338 339define void @mscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 340; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i16: 341; RV32: # %bb.0: 342; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 343; RV32-NEXT: vsext.vf4 v12, v10 344; RV32-NEXT: vadd.vv v12, v12, v12 345; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 346; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 347; RV32-NEXT: ret 348; 349; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i16: 350; RV64: # %bb.0: 351; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 352; RV64-NEXT: vsext.vf8 v16, v10 353; RV64-NEXT: vadd.vv v16, v16, v16 354; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 355; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 356; RV64-NEXT: ret 357 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs 358 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m) 359 ret void 360} 361 362define void @mscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 363; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16: 364; RV32: # %bb.0: 365; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 366; RV32-NEXT: vsext.vf4 v12, v10 367; RV32-NEXT: vadd.vv v12, v12, v12 368; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 369; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 370; RV32-NEXT: ret 371; 372; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16: 373; RV64: # %bb.0: 374; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 375; RV64-NEXT: vsext.vf8 v16, v10 376; RV64-NEXT: vadd.vv v16, v16, v16 377; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 378; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 379; RV64-NEXT: ret 380 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 381 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs 382 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m) 383 ret void 384} 385 386define void @mscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 387; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16: 388; CHECK: # %bb.0: 389; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma 390; CHECK-NEXT: vwaddu.vv v12, v10, v10 391; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 392; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t 393; CHECK-NEXT: ret 394 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 395 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs 396 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m) 397 ret void 398} 399 400define void @mscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 401; RV32-LABEL: mscatter_baseidx_nxv8i16: 402; RV32: # %bb.0: 403; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma 404; RV32-NEXT: vwadd.vv v12, v10, v10 405; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 406; RV32-NEXT: ret 407; 408; RV64-LABEL: mscatter_baseidx_nxv8i16: 409; RV64: # %bb.0: 410; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 411; RV64-NEXT: vsext.vf4 v16, v10 412; RV64-NEXT: vadd.vv v16, v16, v16 413; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 414; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 415; RV64-NEXT: ret 416 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs 417 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m) 418 ret void 419} 420 421declare void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>) 422 423define void @mscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) { 424; RV32-LABEL: mscatter_nxv1i32: 425; RV32: # %bb.0: 426; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 427; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 428; RV32-NEXT: ret 429; 430; RV64-LABEL: mscatter_nxv1i32: 431; RV64: # %bb.0: 432; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 433; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 434; RV64-NEXT: ret 435 call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, i32 4, <vscale x 1 x i1> %m) 436 ret void 437} 438 439declare void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>) 440 441define void @mscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) { 442; RV32-LABEL: mscatter_nxv2i32: 443; RV32: # %bb.0: 444; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma 445; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 446; RV32-NEXT: ret 447; 448; RV64-LABEL: mscatter_nxv2i32: 449; RV64: # %bb.0: 450; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma 451; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 452; RV64-NEXT: ret 453 call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m) 454 ret void 455} 456 457define void @mscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) { 458; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i32: 459; RV32: # %bb.0: 460; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma 461; RV32-NEXT: vnsrl.wi v11, v8, 0 462; RV32-NEXT: vsoxei32.v v11, (zero), v10, v0.t 463; RV32-NEXT: ret 464; 465; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i32: 466; RV64: # %bb.0: 467; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma 468; RV64-NEXT: vnsrl.wi v12, v8, 0 469; RV64-NEXT: vsoxei64.v v12, (zero), v10, v0.t 470; RV64-NEXT: ret 471 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32> 472 call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %tval, <vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m) 473 ret void 474} 475 476declare void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>) 477 478define void @mscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) { 479; RV32-LABEL: mscatter_nxv4i32: 480; RV32: # %bb.0: 481; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 482; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 483; RV32-NEXT: ret 484; 485; RV64-LABEL: mscatter_nxv4i32: 486; RV64: # %bb.0: 487; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 488; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 489; RV64-NEXT: ret 490 call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %m) 491 ret void 492} 493 494define void @mscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs) { 495; RV32-LABEL: mscatter_truemask_nxv4i32: 496; RV32: # %bb.0: 497; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 498; RV32-NEXT: vsoxei32.v v8, (zero), v10 499; RV32-NEXT: ret 500; 501; RV64-LABEL: mscatter_truemask_nxv4i32: 502; RV64: # %bb.0: 503; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 504; RV64-NEXT: vsoxei64.v v8, (zero), v12 505; RV64-NEXT: ret 506 call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> splat (i1 1)) 507 ret void 508} 509 510define void @mscatter_falsemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs) { 511; CHECK-LABEL: mscatter_falsemask_nxv4i32: 512; CHECK: # %bb.0: 513; CHECK-NEXT: ret 514 call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer) 515 ret void 516} 517 518declare void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>) 519 520define void @mscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) { 521; RV32-LABEL: mscatter_nxv8i32: 522; RV32: # %bb.0: 523; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma 524; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 525; RV32-NEXT: ret 526; 527; RV64-LABEL: mscatter_nxv8i32: 528; RV64: # %bb.0: 529; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, ma 530; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 531; RV64-NEXT: ret 532 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m) 533 ret void 534} 535 536define void @mscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 537; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i32: 538; RV32: # %bb.0: 539; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 540; RV32-NEXT: vsext.vf4 v16, v12 541; RV32-NEXT: vsll.vi v12, v16, 2 542; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 543; RV32-NEXT: ret 544; 545; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i32: 546; RV64: # %bb.0: 547; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 548; RV64-NEXT: vsext.vf8 v16, v12 549; RV64-NEXT: vsll.vi v16, v16, 2 550; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 551; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 552; RV64-NEXT: ret 553 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs 554 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m) 555 ret void 556} 557 558define void @mscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 559; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32: 560; RV32: # %bb.0: 561; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 562; RV32-NEXT: vsext.vf4 v16, v12 563; RV32-NEXT: vsll.vi v12, v16, 2 564; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 565; RV32-NEXT: ret 566; 567; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32: 568; RV64: # %bb.0: 569; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 570; RV64-NEXT: vsext.vf8 v16, v12 571; RV64-NEXT: vsll.vi v16, v16, 2 572; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 573; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 574; RV64-NEXT: ret 575 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 576 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs 577 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m) 578 ret void 579} 580 581define void @mscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 582; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32: 583; CHECK: # %bb.0: 584; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma 585; CHECK-NEXT: vzext.vf2 v14, v12 586; CHECK-NEXT: vsll.vi v12, v14, 2 587; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 588; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t 589; CHECK-NEXT: ret 590 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 591 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs 592 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m) 593 ret void 594} 595 596define void @mscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 597; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i32: 598; RV32: # %bb.0: 599; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 600; RV32-NEXT: vsext.vf2 v16, v12 601; RV32-NEXT: vsll.vi v12, v16, 2 602; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 603; RV32-NEXT: ret 604; 605; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i32: 606; RV64: # %bb.0: 607; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 608; RV64-NEXT: vsext.vf4 v16, v12 609; RV64-NEXT: vsll.vi v16, v16, 2 610; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 611; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 612; RV64-NEXT: ret 613 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs 614 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m) 615 ret void 616} 617 618define void @mscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 619; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32: 620; RV32: # %bb.0: 621; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 622; RV32-NEXT: vsext.vf2 v16, v12 623; RV32-NEXT: vsll.vi v12, v16, 2 624; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 625; RV32-NEXT: ret 626; 627; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32: 628; RV64: # %bb.0: 629; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 630; RV64-NEXT: vsext.vf4 v16, v12 631; RV64-NEXT: vsll.vi v16, v16, 2 632; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 633; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 634; RV64-NEXT: ret 635 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 636 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs 637 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m) 638 ret void 639} 640 641define void @mscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 642; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32: 643; CHECK: # %bb.0: 644; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma 645; CHECK-NEXT: vzext.vf2 v16, v12 646; CHECK-NEXT: vsll.vi v12, v16, 2 647; CHECK-NEXT: vsoxei32.v v8, (a0), v12, v0.t 648; CHECK-NEXT: ret 649 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 650 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs 651 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m) 652 ret void 653} 654 655define void @mscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 656; RV32-LABEL: mscatter_baseidx_nxv8i32: 657; RV32: # %bb.0: 658; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 659; RV32-NEXT: vsll.vi v12, v12, 2 660; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 661; RV32-NEXT: ret 662; 663; RV64-LABEL: mscatter_baseidx_nxv8i32: 664; RV64: # %bb.0: 665; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 666; RV64-NEXT: vsext.vf2 v16, v12 667; RV64-NEXT: vsll.vi v16, v16, 2 668; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 669; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 670; RV64-NEXT: ret 671 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs 672 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m) 673 ret void 674} 675 676declare void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>) 677 678define void @mscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) { 679; RV32-LABEL: mscatter_nxv1i64: 680; RV32: # %bb.0: 681; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma 682; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 683; RV32-NEXT: ret 684; 685; RV64-LABEL: mscatter_nxv1i64: 686; RV64: # %bb.0: 687; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma 688; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 689; RV64-NEXT: ret 690 call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, i32 8, <vscale x 1 x i1> %m) 691 ret void 692} 693 694declare void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>) 695 696define void @mscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) { 697; RV32-LABEL: mscatter_nxv2i64: 698; RV32: # %bb.0: 699; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 700; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 701; RV32-NEXT: ret 702; 703; RV64-LABEL: mscatter_nxv2i64: 704; RV64: # %bb.0: 705; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma 706; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 707; RV64-NEXT: ret 708 call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %m) 709 ret void 710} 711 712declare void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>) 713 714define void @mscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) { 715; RV32-LABEL: mscatter_nxv4i64: 716; RV32: # %bb.0: 717; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma 718; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 719; RV32-NEXT: ret 720; 721; RV64-LABEL: mscatter_nxv4i64: 722; RV64: # %bb.0: 723; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma 724; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 725; RV64-NEXT: ret 726 call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %m) 727 ret void 728} 729 730define void @mscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs) { 731; RV32-LABEL: mscatter_truemask_nxv4i64: 732; RV32: # %bb.0: 733; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma 734; RV32-NEXT: vsoxei32.v v8, (zero), v12 735; RV32-NEXT: ret 736; 737; RV64-LABEL: mscatter_truemask_nxv4i64: 738; RV64: # %bb.0: 739; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma 740; RV64-NEXT: vsoxei64.v v8, (zero), v12 741; RV64-NEXT: ret 742 call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> splat (i1 1)) 743 ret void 744} 745 746define void @mscatter_falsemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs) { 747; CHECK-LABEL: mscatter_falsemask_nxv4i64: 748; CHECK: # %bb.0: 749; CHECK-NEXT: ret 750 call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer) 751 ret void 752} 753 754declare void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>) 755 756define void @mscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) { 757; RV32-LABEL: mscatter_nxv8i64: 758; RV32: # %bb.0: 759; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma 760; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t 761; RV32-NEXT: ret 762; 763; RV64-LABEL: mscatter_nxv8i64: 764; RV64: # %bb.0: 765; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma 766; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 767; RV64-NEXT: ret 768 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 769 ret void 770} 771 772define void @mscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 773; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i64: 774; RV32: # %bb.0: 775; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 776; RV32-NEXT: vsext.vf4 v20, v16 777; RV32-NEXT: vsll.vi v16, v20, 3 778; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 779; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 780; RV32-NEXT: ret 781; 782; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i64: 783; RV64: # %bb.0: 784; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 785; RV64-NEXT: vsext.vf8 v24, v16 786; RV64-NEXT: vsll.vi v16, v24, 3 787; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 788; RV64-NEXT: ret 789 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs 790 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 791 ret void 792} 793 794define void @mscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 795; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64: 796; RV32: # %bb.0: 797; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 798; RV32-NEXT: vsext.vf4 v20, v16 799; RV32-NEXT: vsll.vi v16, v20, 3 800; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 801; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 802; RV32-NEXT: ret 803; 804; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64: 805; RV64: # %bb.0: 806; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 807; RV64-NEXT: vsext.vf8 v24, v16 808; RV64-NEXT: vsll.vi v16, v24, 3 809; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 810; RV64-NEXT: ret 811 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 812 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 813 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 814 ret void 815} 816 817define void @mscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 818; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64: 819; CHECK: # %bb.0: 820; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma 821; CHECK-NEXT: vzext.vf2 v18, v16 822; CHECK-NEXT: vsll.vi v16, v18, 3 823; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 824; CHECK-NEXT: vsoxei16.v v8, (a0), v16, v0.t 825; CHECK-NEXT: ret 826 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 827 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 828 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 829 ret void 830} 831 832define void @mscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 833; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i64: 834; RV32: # %bb.0: 835; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 836; RV32-NEXT: vsext.vf2 v20, v16 837; RV32-NEXT: vsll.vi v16, v20, 3 838; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 839; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 840; RV32-NEXT: ret 841; 842; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i64: 843; RV64: # %bb.0: 844; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 845; RV64-NEXT: vsext.vf4 v24, v16 846; RV64-NEXT: vsll.vi v16, v24, 3 847; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 848; RV64-NEXT: ret 849 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs 850 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 851 ret void 852} 853 854define void @mscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 855; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64: 856; RV32: # %bb.0: 857; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 858; RV32-NEXT: vsext.vf2 v20, v16 859; RV32-NEXT: vsll.vi v16, v20, 3 860; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 861; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 862; RV32-NEXT: ret 863; 864; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64: 865; RV64: # %bb.0: 866; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 867; RV64-NEXT: vsext.vf4 v24, v16 868; RV64-NEXT: vsll.vi v16, v24, 3 869; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 870; RV64-NEXT: ret 871 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 872 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 873 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 874 ret void 875} 876 877define void @mscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 878; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64: 879; CHECK: # %bb.0: 880; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma 881; CHECK-NEXT: vzext.vf2 v20, v16 882; CHECK-NEXT: vsll.vi v16, v20, 3 883; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 884; CHECK-NEXT: vsoxei32.v v8, (a0), v16, v0.t 885; CHECK-NEXT: ret 886 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 887 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 888 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 889 ret void 890} 891 892define void @mscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 893; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8i64: 894; RV32: # %bb.0: 895; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 896; RV32-NEXT: vsll.vi v16, v16, 3 897; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 898; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 899; RV32-NEXT: ret 900; 901; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8i64: 902; RV64: # %bb.0: 903; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 904; RV64-NEXT: vsext.vf2 v24, v16 905; RV64-NEXT: vsll.vi v16, v24, 3 906; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 907; RV64-NEXT: ret 908 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs 909 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 910 ret void 911} 912 913define void @mscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 914; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64: 915; RV32: # %bb.0: 916; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 917; RV32-NEXT: vsll.vi v16, v16, 3 918; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 919; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 920; RV32-NEXT: ret 921; 922; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64: 923; RV64: # %bb.0: 924; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 925; RV64-NEXT: vsext.vf2 v24, v16 926; RV64-NEXT: vsll.vi v16, v24, 3 927; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 928; RV64-NEXT: ret 929 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 930 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 931 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 932 ret void 933} 934 935define void @mscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 936; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64: 937; RV32: # %bb.0: 938; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 939; RV32-NEXT: vsll.vi v16, v16, 3 940; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 941; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 942; RV32-NEXT: ret 943; 944; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64: 945; RV64: # %bb.0: 946; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 947; RV64-NEXT: vzext.vf2 v24, v16 948; RV64-NEXT: vsll.vi v16, v24, 3 949; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 950; RV64-NEXT: ret 951 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 952 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs 953 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 954 ret void 955} 956 957define void @mscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) { 958; RV32-LABEL: mscatter_baseidx_nxv8i64: 959; RV32: # %bb.0: 960; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 961; RV32-NEXT: vnsrl.wi v24, v16, 0 962; RV32-NEXT: vsll.vi v16, v24, 3 963; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 964; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 965; RV32-NEXT: ret 966; 967; RV64-LABEL: mscatter_baseidx_nxv8i64: 968; RV64: # %bb.0: 969; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 970; RV64-NEXT: vsll.vi v16, v16, 3 971; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 972; RV64-NEXT: ret 973 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs 974 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 975 ret void 976} 977 978declare void @llvm.masked.scatter.nxv1bf16.nxv1p0(<vscale x 1 x bfloat>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>) 979 980define void @mscatter_nxv1bf16(<vscale x 1 x bfloat> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) { 981; RV32-LABEL: mscatter_nxv1bf16: 982; RV32: # %bb.0: 983; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 984; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 985; RV32-NEXT: ret 986; 987; RV64-LABEL: mscatter_nxv1bf16: 988; RV64: # %bb.0: 989; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 990; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 991; RV64-NEXT: ret 992 call void @llvm.masked.scatter.nxv1bf16.nxv1p0(<vscale x 1 x bfloat> %val, <vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m) 993 ret void 994} 995 996declare void @llvm.masked.scatter.nxv2bf16.nxv2p0(<vscale x 2 x bfloat>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>) 997 998define void @mscatter_nxv2bf16(<vscale x 2 x bfloat> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) { 999; RV32-LABEL: mscatter_nxv2bf16: 1000; RV32: # %bb.0: 1001; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 1002; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1003; RV32-NEXT: ret 1004; 1005; RV64-LABEL: mscatter_nxv2bf16: 1006; RV64: # %bb.0: 1007; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 1008; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1009; RV64-NEXT: ret 1010 call void @llvm.masked.scatter.nxv2bf16.nxv2p0(<vscale x 2 x bfloat> %val, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m) 1011 ret void 1012} 1013 1014declare void @llvm.masked.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>) 1015 1016define void @mscatter_nxv4bf16(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) { 1017; RV32-LABEL: mscatter_nxv4bf16: 1018; RV32: # %bb.0: 1019; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma 1020; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1021; RV32-NEXT: ret 1022; 1023; RV64-LABEL: mscatter_nxv4bf16: 1024; RV64: # %bb.0: 1025; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma 1026; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1027; RV64-NEXT: ret 1028 call void @llvm.masked.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m) 1029 ret void 1030} 1031 1032define void @mscatter_truemask_nxv4bf16(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs) { 1033; RV32-LABEL: mscatter_truemask_nxv4bf16: 1034; RV32: # %bb.0: 1035; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma 1036; RV32-NEXT: vsoxei32.v v8, (zero), v10 1037; RV32-NEXT: ret 1038; 1039; RV64-LABEL: mscatter_truemask_nxv4bf16: 1040; RV64: # %bb.0: 1041; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma 1042; RV64-NEXT: vsoxei64.v v8, (zero), v12 1043; RV64-NEXT: ret 1044 call void @llvm.masked.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> splat (i1 1)) 1045 ret void 1046} 1047 1048define void @mscatter_falsemask_nxv4bf16(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs) { 1049; CHECK-LABEL: mscatter_falsemask_nxv4bf16: 1050; CHECK: # %bb.0: 1051; CHECK-NEXT: ret 1052 call void @llvm.masked.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer) 1053 ret void 1054} 1055 1056declare void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>) 1057 1058define void @mscatter_nxv8bf16(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) { 1059; RV32-LABEL: mscatter_nxv8bf16: 1060; RV32: # %bb.0: 1061; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma 1062; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1063; RV32-NEXT: ret 1064; 1065; RV64-LABEL: mscatter_nxv8bf16: 1066; RV64: # %bb.0: 1067; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma 1068; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1069; RV64-NEXT: ret 1070 call void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m) 1071 ret void 1072} 1073 1074define void @mscatter_baseidx_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1075; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8bf16: 1076; RV32: # %bb.0: 1077; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1078; RV32-NEXT: vsext.vf4 v12, v10 1079; RV32-NEXT: vadd.vv v12, v12, v12 1080; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1081; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1082; RV32-NEXT: ret 1083; 1084; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8bf16: 1085; RV64: # %bb.0: 1086; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1087; RV64-NEXT: vsext.vf8 v16, v10 1088; RV64-NEXT: vadd.vv v16, v16, v16 1089; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1090; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1091; RV64-NEXT: ret 1092 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i8> %idxs 1093 call void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m) 1094 ret void 1095} 1096 1097define void @mscatter_baseidx_sext_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1098; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8bf16: 1099; RV32: # %bb.0: 1100; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1101; RV32-NEXT: vsext.vf4 v12, v10 1102; RV32-NEXT: vadd.vv v12, v12, v12 1103; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1104; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1105; RV32-NEXT: ret 1106; 1107; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8bf16: 1108; RV64: # %bb.0: 1109; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1110; RV64-NEXT: vsext.vf8 v16, v10 1111; RV64-NEXT: vadd.vv v16, v16, v16 1112; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1113; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1114; RV64-NEXT: ret 1115 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1116 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs 1117 call void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m) 1118 ret void 1119} 1120 1121define void @mscatter_baseidx_zext_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1122; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8bf16: 1123; CHECK: # %bb.0: 1124; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma 1125; CHECK-NEXT: vwaddu.vv v12, v10, v10 1126; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1127; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t 1128; CHECK-NEXT: ret 1129 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1130 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs 1131 call void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m) 1132 ret void 1133} 1134 1135define void @mscatter_baseidx_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1136; RV32-LABEL: mscatter_baseidx_nxv8bf16: 1137; RV32: # %bb.0: 1138; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma 1139; RV32-NEXT: vwadd.vv v12, v10, v10 1140; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1141; RV32-NEXT: ret 1142; 1143; RV64-LABEL: mscatter_baseidx_nxv8bf16: 1144; RV64: # %bb.0: 1145; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1146; RV64-NEXT: vsext.vf4 v16, v10 1147; RV64-NEXT: vadd.vv v16, v16, v16 1148; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1149; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1150; RV64-NEXT: ret 1151 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %idxs 1152 call void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m) 1153 ret void 1154} 1155 1156declare void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>) 1157 1158define void @mscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) { 1159; RV32-LABEL: mscatter_nxv1f16: 1160; RV32: # %bb.0: 1161; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 1162; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1163; RV32-NEXT: ret 1164; 1165; RV64-LABEL: mscatter_nxv1f16: 1166; RV64: # %bb.0: 1167; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 1168; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1169; RV64-NEXT: ret 1170 call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m) 1171 ret void 1172} 1173 1174declare void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>) 1175 1176define void @mscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) { 1177; RV32-LABEL: mscatter_nxv2f16: 1178; RV32: # %bb.0: 1179; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 1180; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1181; RV32-NEXT: ret 1182; 1183; RV64-LABEL: mscatter_nxv2f16: 1184; RV64: # %bb.0: 1185; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 1186; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1187; RV64-NEXT: ret 1188 call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m) 1189 ret void 1190} 1191 1192declare void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>) 1193 1194define void @mscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) { 1195; RV32-LABEL: mscatter_nxv4f16: 1196; RV32: # %bb.0: 1197; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma 1198; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1199; RV32-NEXT: ret 1200; 1201; RV64-LABEL: mscatter_nxv4f16: 1202; RV64: # %bb.0: 1203; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma 1204; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1205; RV64-NEXT: ret 1206 call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m) 1207 ret void 1208} 1209 1210define void @mscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs) { 1211; RV32-LABEL: mscatter_truemask_nxv4f16: 1212; RV32: # %bb.0: 1213; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma 1214; RV32-NEXT: vsoxei32.v v8, (zero), v10 1215; RV32-NEXT: ret 1216; 1217; RV64-LABEL: mscatter_truemask_nxv4f16: 1218; RV64: # %bb.0: 1219; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma 1220; RV64-NEXT: vsoxei64.v v8, (zero), v12 1221; RV64-NEXT: ret 1222 call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> splat (i1 1)) 1223 ret void 1224} 1225 1226define void @mscatter_falsemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs) { 1227; CHECK-LABEL: mscatter_falsemask_nxv4f16: 1228; CHECK: # %bb.0: 1229; CHECK-NEXT: ret 1230 call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer) 1231 ret void 1232} 1233 1234declare void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>) 1235 1236define void @mscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) { 1237; RV32-LABEL: mscatter_nxv8f16: 1238; RV32: # %bb.0: 1239; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma 1240; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1241; RV32-NEXT: ret 1242; 1243; RV64-LABEL: mscatter_nxv8f16: 1244; RV64: # %bb.0: 1245; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma 1246; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1247; RV64-NEXT: ret 1248 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m) 1249 ret void 1250} 1251 1252define void @mscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1253; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f16: 1254; RV32: # %bb.0: 1255; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1256; RV32-NEXT: vsext.vf4 v12, v10 1257; RV32-NEXT: vadd.vv v12, v12, v12 1258; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1259; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1260; RV32-NEXT: ret 1261; 1262; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f16: 1263; RV64: # %bb.0: 1264; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1265; RV64-NEXT: vsext.vf8 v16, v10 1266; RV64-NEXT: vadd.vv v16, v16, v16 1267; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1268; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1269; RV64-NEXT: ret 1270 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs 1271 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m) 1272 ret void 1273} 1274 1275define void @mscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1276; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16: 1277; RV32: # %bb.0: 1278; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1279; RV32-NEXT: vsext.vf4 v12, v10 1280; RV32-NEXT: vadd.vv v12, v12, v12 1281; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1282; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1283; RV32-NEXT: ret 1284; 1285; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16: 1286; RV64: # %bb.0: 1287; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1288; RV64-NEXT: vsext.vf8 v16, v10 1289; RV64-NEXT: vadd.vv v16, v16, v16 1290; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1291; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1292; RV64-NEXT: ret 1293 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1294 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs 1295 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m) 1296 ret void 1297} 1298 1299define void @mscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1300; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16: 1301; CHECK: # %bb.0: 1302; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma 1303; CHECK-NEXT: vwaddu.vv v12, v10, v10 1304; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1305; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t 1306; CHECK-NEXT: ret 1307 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1308 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs 1309 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m) 1310 ret void 1311} 1312 1313define void @mscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1314; RV32-LABEL: mscatter_baseidx_nxv8f16: 1315; RV32: # %bb.0: 1316; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma 1317; RV32-NEXT: vwadd.vv v12, v10, v10 1318; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1319; RV32-NEXT: ret 1320; 1321; RV64-LABEL: mscatter_baseidx_nxv8f16: 1322; RV64: # %bb.0: 1323; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1324; RV64-NEXT: vsext.vf4 v16, v10 1325; RV64-NEXT: vadd.vv v16, v16, v16 1326; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1327; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1328; RV64-NEXT: ret 1329 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs 1330 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m) 1331 ret void 1332} 1333 1334declare void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>) 1335 1336define void @mscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) { 1337; RV32-LABEL: mscatter_nxv1f32: 1338; RV32: # %bb.0: 1339; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 1340; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1341; RV32-NEXT: ret 1342; 1343; RV64-LABEL: mscatter_nxv1f32: 1344; RV64: # %bb.0: 1345; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 1346; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1347; RV64-NEXT: ret 1348 call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, i32 4, <vscale x 1 x i1> %m) 1349 ret void 1350} 1351 1352declare void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>) 1353 1354define void @mscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) { 1355; RV32-LABEL: mscatter_nxv2f32: 1356; RV32: # %bb.0: 1357; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma 1358; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1359; RV32-NEXT: ret 1360; 1361; RV64-LABEL: mscatter_nxv2f32: 1362; RV64: # %bb.0: 1363; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma 1364; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1365; RV64-NEXT: ret 1366 call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m) 1367 ret void 1368} 1369 1370declare void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>) 1371 1372define void @mscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) { 1373; RV32-LABEL: mscatter_nxv4f32: 1374; RV32: # %bb.0: 1375; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1376; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1377; RV32-NEXT: ret 1378; 1379; RV64-LABEL: mscatter_nxv4f32: 1380; RV64: # %bb.0: 1381; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1382; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1383; RV64-NEXT: ret 1384 call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %m) 1385 ret void 1386} 1387 1388define void @mscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs) { 1389; RV32-LABEL: mscatter_truemask_nxv4f32: 1390; RV32: # %bb.0: 1391; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1392; RV32-NEXT: vsoxei32.v v8, (zero), v10 1393; RV32-NEXT: ret 1394; 1395; RV64-LABEL: mscatter_truemask_nxv4f32: 1396; RV64: # %bb.0: 1397; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma 1398; RV64-NEXT: vsoxei64.v v8, (zero), v12 1399; RV64-NEXT: ret 1400 call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> splat (i1 1)) 1401 ret void 1402} 1403 1404define void @mscatter_falsemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs) { 1405; CHECK-LABEL: mscatter_falsemask_nxv4f32: 1406; CHECK: # %bb.0: 1407; CHECK-NEXT: ret 1408 call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer) 1409 ret void 1410} 1411 1412declare void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>) 1413 1414define void @mscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) { 1415; RV32-LABEL: mscatter_nxv8f32: 1416; RV32: # %bb.0: 1417; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma 1418; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1419; RV32-NEXT: ret 1420; 1421; RV64-LABEL: mscatter_nxv8f32: 1422; RV64: # %bb.0: 1423; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, ma 1424; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1425; RV64-NEXT: ret 1426 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m) 1427 ret void 1428} 1429 1430define void @mscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1431; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f32: 1432; RV32: # %bb.0: 1433; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1434; RV32-NEXT: vsext.vf4 v16, v12 1435; RV32-NEXT: vsll.vi v12, v16, 2 1436; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1437; RV32-NEXT: ret 1438; 1439; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f32: 1440; RV64: # %bb.0: 1441; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1442; RV64-NEXT: vsext.vf8 v16, v12 1443; RV64-NEXT: vsll.vi v16, v16, 2 1444; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1445; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1446; RV64-NEXT: ret 1447 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs 1448 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m) 1449 ret void 1450} 1451 1452define void @mscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1453; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32: 1454; RV32: # %bb.0: 1455; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1456; RV32-NEXT: vsext.vf4 v16, v12 1457; RV32-NEXT: vsll.vi v12, v16, 2 1458; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1459; RV32-NEXT: ret 1460; 1461; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32: 1462; RV64: # %bb.0: 1463; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1464; RV64-NEXT: vsext.vf8 v16, v12 1465; RV64-NEXT: vsll.vi v16, v16, 2 1466; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1467; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1468; RV64-NEXT: ret 1469 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1470 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs 1471 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m) 1472 ret void 1473} 1474 1475define void @mscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1476; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32: 1477; CHECK: # %bb.0: 1478; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma 1479; CHECK-NEXT: vzext.vf2 v14, v12 1480; CHECK-NEXT: vsll.vi v12, v14, 2 1481; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1482; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t 1483; CHECK-NEXT: ret 1484 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1485 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs 1486 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m) 1487 ret void 1488} 1489 1490define void @mscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1491; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f32: 1492; RV32: # %bb.0: 1493; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1494; RV32-NEXT: vsext.vf2 v16, v12 1495; RV32-NEXT: vsll.vi v12, v16, 2 1496; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1497; RV32-NEXT: ret 1498; 1499; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f32: 1500; RV64: # %bb.0: 1501; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1502; RV64-NEXT: vsext.vf4 v16, v12 1503; RV64-NEXT: vsll.vi v16, v16, 2 1504; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1505; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1506; RV64-NEXT: ret 1507 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs 1508 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m) 1509 ret void 1510} 1511 1512define void @mscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1513; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32: 1514; RV32: # %bb.0: 1515; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1516; RV32-NEXT: vsext.vf2 v16, v12 1517; RV32-NEXT: vsll.vi v12, v16, 2 1518; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1519; RV32-NEXT: ret 1520; 1521; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32: 1522; RV64: # %bb.0: 1523; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1524; RV64-NEXT: vsext.vf4 v16, v12 1525; RV64-NEXT: vsll.vi v16, v16, 2 1526; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1527; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1528; RV64-NEXT: ret 1529 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1530 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs 1531 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m) 1532 ret void 1533} 1534 1535define void @mscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1536; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32: 1537; CHECK: # %bb.0: 1538; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1539; CHECK-NEXT: vzext.vf2 v16, v12 1540; CHECK-NEXT: vsll.vi v12, v16, 2 1541; CHECK-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1542; CHECK-NEXT: ret 1543 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1544 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs 1545 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m) 1546 ret void 1547} 1548 1549define void @mscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 1550; RV32-LABEL: mscatter_baseidx_nxv8f32: 1551; RV32: # %bb.0: 1552; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1553; RV32-NEXT: vsll.vi v12, v12, 2 1554; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1555; RV32-NEXT: ret 1556; 1557; RV64-LABEL: mscatter_baseidx_nxv8f32: 1558; RV64: # %bb.0: 1559; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1560; RV64-NEXT: vsext.vf2 v16, v12 1561; RV64-NEXT: vsll.vi v16, v16, 2 1562; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1563; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1564; RV64-NEXT: ret 1565 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs 1566 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m) 1567 ret void 1568} 1569 1570declare void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>) 1571 1572define void @mscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) { 1573; RV32-LABEL: mscatter_nxv1f64: 1574; RV32: # %bb.0: 1575; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1576; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1577; RV32-NEXT: ret 1578; 1579; RV64-LABEL: mscatter_nxv1f64: 1580; RV64: # %bb.0: 1581; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma 1582; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1583; RV64-NEXT: ret 1584 call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, i32 8, <vscale x 1 x i1> %m) 1585 ret void 1586} 1587 1588declare void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>) 1589 1590define void @mscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) { 1591; RV32-LABEL: mscatter_nxv2f64: 1592; RV32: # %bb.0: 1593; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1594; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1595; RV32-NEXT: ret 1596; 1597; RV64-LABEL: mscatter_nxv2f64: 1598; RV64: # %bb.0: 1599; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma 1600; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1601; RV64-NEXT: ret 1602 call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %m) 1603 ret void 1604} 1605 1606declare void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>) 1607 1608define void @mscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) { 1609; RV32-LABEL: mscatter_nxv4f64: 1610; RV32: # %bb.0: 1611; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1612; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1613; RV32-NEXT: ret 1614; 1615; RV64-LABEL: mscatter_nxv4f64: 1616; RV64: # %bb.0: 1617; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1618; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1619; RV64-NEXT: ret 1620 call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %m) 1621 ret void 1622} 1623 1624define void @mscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs) { 1625; RV32-LABEL: mscatter_truemask_nxv4f64: 1626; RV32: # %bb.0: 1627; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1628; RV32-NEXT: vsoxei32.v v8, (zero), v12 1629; RV32-NEXT: ret 1630; 1631; RV64-LABEL: mscatter_truemask_nxv4f64: 1632; RV64: # %bb.0: 1633; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma 1634; RV64-NEXT: vsoxei64.v v8, (zero), v12 1635; RV64-NEXT: ret 1636 call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> splat (i1 1)) 1637 ret void 1638} 1639 1640define void @mscatter_falsemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs) { 1641; CHECK-LABEL: mscatter_falsemask_nxv4f64: 1642; CHECK: # %bb.0: 1643; CHECK-NEXT: ret 1644 call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer) 1645 ret void 1646} 1647 1648declare void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>) 1649 1650define void @mscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) { 1651; RV32-LABEL: mscatter_nxv8f64: 1652; RV32: # %bb.0: 1653; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1654; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t 1655; RV32-NEXT: ret 1656; 1657; RV64-LABEL: mscatter_nxv8f64: 1658; RV64: # %bb.0: 1659; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1660; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1661; RV64-NEXT: ret 1662 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 1663 ret void 1664} 1665 1666define void @mscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1667; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f64: 1668; RV32: # %bb.0: 1669; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1670; RV32-NEXT: vsext.vf4 v20, v16 1671; RV32-NEXT: vsll.vi v16, v20, 3 1672; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1673; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1674; RV32-NEXT: ret 1675; 1676; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f64: 1677; RV64: # %bb.0: 1678; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1679; RV64-NEXT: vsext.vf8 v24, v16 1680; RV64-NEXT: vsll.vi v16, v24, 3 1681; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1682; RV64-NEXT: ret 1683 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs 1684 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 1685 ret void 1686} 1687 1688define void @mscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1689; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64: 1690; RV32: # %bb.0: 1691; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1692; RV32-NEXT: vsext.vf4 v20, v16 1693; RV32-NEXT: vsll.vi v16, v20, 3 1694; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1695; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1696; RV32-NEXT: ret 1697; 1698; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64: 1699; RV64: # %bb.0: 1700; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1701; RV64-NEXT: vsext.vf8 v24, v16 1702; RV64-NEXT: vsll.vi v16, v24, 3 1703; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1704; RV64-NEXT: ret 1705 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 1706 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 1707 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 1708 ret void 1709} 1710 1711define void @mscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1712; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64: 1713; CHECK: # %bb.0: 1714; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma 1715; CHECK-NEXT: vzext.vf2 v18, v16 1716; CHECK-NEXT: vsll.vi v16, v18, 3 1717; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1718; CHECK-NEXT: vsoxei16.v v8, (a0), v16, v0.t 1719; CHECK-NEXT: ret 1720 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 1721 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 1722 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 1723 ret void 1724} 1725 1726define void @mscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1727; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f64: 1728; RV32: # %bb.0: 1729; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1730; RV32-NEXT: vsext.vf2 v20, v16 1731; RV32-NEXT: vsll.vi v16, v20, 3 1732; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1733; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1734; RV32-NEXT: ret 1735; 1736; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f64: 1737; RV64: # %bb.0: 1738; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1739; RV64-NEXT: vsext.vf4 v24, v16 1740; RV64-NEXT: vsll.vi v16, v24, 3 1741; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1742; RV64-NEXT: ret 1743 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs 1744 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 1745 ret void 1746} 1747 1748define void @mscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1749; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64: 1750; RV32: # %bb.0: 1751; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1752; RV32-NEXT: vsext.vf2 v20, v16 1753; RV32-NEXT: vsll.vi v16, v20, 3 1754; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1755; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1756; RV32-NEXT: ret 1757; 1758; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64: 1759; RV64: # %bb.0: 1760; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1761; RV64-NEXT: vsext.vf4 v24, v16 1762; RV64-NEXT: vsll.vi v16, v24, 3 1763; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1764; RV64-NEXT: ret 1765 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 1766 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 1767 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 1768 ret void 1769} 1770 1771define void @mscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1772; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64: 1773; CHECK: # %bb.0: 1774; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1775; CHECK-NEXT: vzext.vf2 v20, v16 1776; CHECK-NEXT: vsll.vi v16, v20, 3 1777; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1778; CHECK-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1779; CHECK-NEXT: ret 1780 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 1781 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 1782 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 1783 ret void 1784} 1785 1786define void @mscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 1787; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8f64: 1788; RV32: # %bb.0: 1789; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1790; RV32-NEXT: vsll.vi v16, v16, 3 1791; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1792; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1793; RV32-NEXT: ret 1794; 1795; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8f64: 1796; RV64: # %bb.0: 1797; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1798; RV64-NEXT: vsext.vf2 v24, v16 1799; RV64-NEXT: vsll.vi v16, v24, 3 1800; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1801; RV64-NEXT: ret 1802 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs 1803 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 1804 ret void 1805} 1806 1807define void @mscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 1808; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64: 1809; RV32: # %bb.0: 1810; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1811; RV32-NEXT: vsll.vi v16, v16, 3 1812; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1813; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1814; RV32-NEXT: ret 1815; 1816; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64: 1817; RV64: # %bb.0: 1818; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1819; RV64-NEXT: vsext.vf2 v24, v16 1820; RV64-NEXT: vsll.vi v16, v24, 3 1821; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1822; RV64-NEXT: ret 1823 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1824 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 1825 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 1826 ret void 1827} 1828 1829define void @mscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 1830; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64: 1831; RV32: # %bb.0: 1832; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1833; RV32-NEXT: vsll.vi v16, v16, 3 1834; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1835; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1836; RV32-NEXT: ret 1837; 1838; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64: 1839; RV64: # %bb.0: 1840; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1841; RV64-NEXT: vzext.vf2 v24, v16 1842; RV64-NEXT: vsll.vi v16, v24, 3 1843; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1844; RV64-NEXT: ret 1845 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1846 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs 1847 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 1848 ret void 1849} 1850 1851define void @mscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) { 1852; RV32-LABEL: mscatter_baseidx_nxv8f64: 1853; RV32: # %bb.0: 1854; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 1855; RV32-NEXT: vnsrl.wi v24, v16, 0 1856; RV32-NEXT: vsll.vi v16, v24, 3 1857; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 1858; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1859; RV32-NEXT: ret 1860; 1861; RV64-LABEL: mscatter_baseidx_nxv8f64: 1862; RV64: # %bb.0: 1863; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1864; RV64-NEXT: vsll.vi v16, v16, 3 1865; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1866; RV64-NEXT: ret 1867 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs 1868 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m) 1869 ret void 1870} 1871 1872declare void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double>, <vscale x 16 x ptr>, i32, <vscale x 16 x i1>) 1873 1874declare <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double>, <vscale x 8 x double>, i64) 1875declare <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr>, <vscale x 8 x ptr>, i64) 1876 1877define void @mscatter_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, <vscale x 8 x ptr> %ptrs0, <vscale x 8 x ptr> %ptrs1, <vscale x 16 x i1> %m) { 1878; RV32-LABEL: mscatter_nxv16f64: 1879; RV32: # %bb.0: 1880; RV32-NEXT: vl4re32.v v28, (a1) 1881; RV32-NEXT: vl4re32.v v4, (a0) 1882; RV32-NEXT: csrr a0, vlenb 1883; RV32-NEXT: srli a0, a0, 3 1884; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 1885; RV32-NEXT: vslidedown.vx v24, v0, a0 1886; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1887; RV32-NEXT: vsoxei32.v v8, (zero), v4, v0.t 1888; RV32-NEXT: vmv1r.v v0, v24 1889; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t 1890; RV32-NEXT: ret 1891; 1892; RV64-LABEL: mscatter_nxv16f64: 1893; RV64: # %bb.0: 1894; RV64-NEXT: addi sp, sp, -16 1895; RV64-NEXT: .cfi_def_cfa_offset 16 1896; RV64-NEXT: csrr a2, vlenb 1897; RV64-NEXT: slli a2, a2, 5 1898; RV64-NEXT: sub sp, sp, a2 1899; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb 1900; RV64-NEXT: csrr a2, vlenb 1901; RV64-NEXT: li a3, 24 1902; RV64-NEXT: mul a2, a2, a3 1903; RV64-NEXT: add a2, sp, a2 1904; RV64-NEXT: addi a2, a2, 16 1905; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill 1906; RV64-NEXT: csrr a2, vlenb 1907; RV64-NEXT: slli a2, a2, 4 1908; RV64-NEXT: add a2, sp, a2 1909; RV64-NEXT: addi a2, a2, 16 1910; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 1911; RV64-NEXT: vl8re64.v v8, (a0) 1912; RV64-NEXT: csrr a0, vlenb 1913; RV64-NEXT: slli a0, a0, 3 1914; RV64-NEXT: add a0, sp, a0 1915; RV64-NEXT: addi a0, a0, 16 1916; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 1917; RV64-NEXT: csrr a0, vlenb 1918; RV64-NEXT: vl8re64.v v8, (a1) 1919; RV64-NEXT: addi a1, sp, 16 1920; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 1921; RV64-NEXT: srli a0, a0, 3 1922; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 1923; RV64-NEXT: vslidedown.vx v24, v0, a0 1924; RV64-NEXT: csrr a0, vlenb 1925; RV64-NEXT: slli a0, a0, 4 1926; RV64-NEXT: add a0, sp, a0 1927; RV64-NEXT: addi a0, a0, 16 1928; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1929; RV64-NEXT: csrr a0, vlenb 1930; RV64-NEXT: slli a0, a0, 3 1931; RV64-NEXT: add a0, sp, a0 1932; RV64-NEXT: addi a0, a0, 16 1933; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 1934; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1935; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t 1936; RV64-NEXT: vmv1r.v v0, v24 1937; RV64-NEXT: csrr a0, vlenb 1938; RV64-NEXT: li a1, 24 1939; RV64-NEXT: mul a0, a0, a1 1940; RV64-NEXT: add a0, sp, a0 1941; RV64-NEXT: addi a0, a0, 16 1942; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1943; RV64-NEXT: addi a0, sp, 16 1944; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 1945; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t 1946; RV64-NEXT: csrr a0, vlenb 1947; RV64-NEXT: slli a0, a0, 5 1948; RV64-NEXT: add sp, sp, a0 1949; RV64-NEXT: .cfi_def_cfa sp, 16 1950; RV64-NEXT: addi sp, sp, 16 1951; RV64-NEXT: .cfi_def_cfa_offset 0 1952; RV64-NEXT: ret 1953 %p0 = call <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr> undef, <vscale x 8 x ptr> %ptrs0, i64 0) 1954 %p1 = call <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr> %p0, <vscale x 8 x ptr> %ptrs1, i64 8) 1955 %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0) 1956 %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8) 1957 call void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %v1, <vscale x 16 x ptr> %p1, i32 8, <vscale x 16 x i1> %m) 1958 ret void 1959} 1960 1961define void @mscatter_baseidx_nxv16i8_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, ptr %base, <vscale x 16 x i8> %idxs, <vscale x 16 x i1> %m) { 1962; RV32-LABEL: mscatter_baseidx_nxv16i8_nxv16f64: 1963; RV32: # %bb.0: 1964; RV32-NEXT: vl2r.v v4, (a1) 1965; RV32-NEXT: csrr a1, vlenb 1966; RV32-NEXT: srli a1, a1, 3 1967; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma 1968; RV32-NEXT: vslidedown.vx v7, v0, a1 1969; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma 1970; RV32-NEXT: vsext.vf4 v24, v4 1971; RV32-NEXT: vsll.vi v24, v24, 3 1972; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1973; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1974; RV32-NEXT: vmv1r.v v0, v7 1975; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t 1976; RV32-NEXT: ret 1977; 1978; RV64-LABEL: mscatter_baseidx_nxv16i8_nxv16f64: 1979; RV64: # %bb.0: 1980; RV64-NEXT: vl2r.v v6, (a1) 1981; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1982; RV64-NEXT: vsext.vf8 v24, v6 1983; RV64-NEXT: vsll.vi v24, v24, 3 1984; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t 1985; RV64-NEXT: csrr a1, vlenb 1986; RV64-NEXT: srli a1, a1, 3 1987; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma 1988; RV64-NEXT: vslidedown.vx v0, v0, a1 1989; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 1990; RV64-NEXT: vsext.vf8 v8, v7 1991; RV64-NEXT: vsll.vi v8, v8, 3 1992; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t 1993; RV64-NEXT: ret 1994 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i8> %idxs 1995 %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0) 1996 %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8) 1997 call void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %v1, <vscale x 16 x ptr> %ptrs, i32 8, <vscale x 16 x i1> %m) 1998 ret void 1999} 2000 2001define void @mscatter_baseidx_nxv16i16_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m) { 2002; RV32-LABEL: mscatter_baseidx_nxv16i16_nxv16f64: 2003; RV32: # %bb.0: 2004; RV32-NEXT: addi sp, sp, -16 2005; RV32-NEXT: .cfi_def_cfa_offset 16 2006; RV32-NEXT: csrr a2, vlenb 2007; RV32-NEXT: slli a2, a2, 3 2008; RV32-NEXT: sub sp, sp, a2 2009; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 2010; RV32-NEXT: addi a2, sp, 16 2011; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill 2012; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma 2013; RV32-NEXT: vmv8r.v v16, v8 2014; RV32-NEXT: vl4re16.v v8, (a1) 2015; RV32-NEXT: csrr a1, vlenb 2016; RV32-NEXT: srli a1, a1, 3 2017; RV32-NEXT: vslidedown.vx v7, v0, a1 2018; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma 2019; RV32-NEXT: vsext.vf2 v24, v8 2020; RV32-NEXT: vsll.vi v8, v24, 3 2021; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma 2022; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t 2023; RV32-NEXT: vmv1r.v v0, v7 2024; RV32-NEXT: addi a1, sp, 16 2025; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 2026; RV32-NEXT: vsoxei32.v v16, (a0), v12, v0.t 2027; RV32-NEXT: csrr a0, vlenb 2028; RV32-NEXT: slli a0, a0, 3 2029; RV32-NEXT: add sp, sp, a0 2030; RV32-NEXT: .cfi_def_cfa sp, 16 2031; RV32-NEXT: addi sp, sp, 16 2032; RV32-NEXT: .cfi_def_cfa_offset 0 2033; RV32-NEXT: ret 2034; 2035; RV64-LABEL: mscatter_baseidx_nxv16i16_nxv16f64: 2036; RV64: # %bb.0: 2037; RV64-NEXT: vl4re16.v v4, (a1) 2038; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 2039; RV64-NEXT: vsext.vf4 v24, v4 2040; RV64-NEXT: vsll.vi v24, v24, 3 2041; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t 2042; RV64-NEXT: csrr a1, vlenb 2043; RV64-NEXT: srli a1, a1, 3 2044; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma 2045; RV64-NEXT: vslidedown.vx v0, v0, a1 2046; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 2047; RV64-NEXT: vsext.vf4 v8, v6 2048; RV64-NEXT: vsll.vi v8, v8, 3 2049; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t 2050; RV64-NEXT: ret 2051 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i16> %idxs 2052 %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0) 2053 %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8) 2054 call void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %v1, <vscale x 16 x ptr> %ptrs, i32 8, <vscale x 16 x i1> %m) 2055 ret void 2056} 2057 2058define void @mscatter_baseidx_zext_nxv1i1_nxv1i8(<vscale x 1 x i8> %val, ptr %base, <vscale x 1 x i1> %idxs, <vscale x 1 x i1> %m) { 2059; CHECK-LABEL: mscatter_baseidx_zext_nxv1i1_nxv1i8: 2060; CHECK: # %bb.0: 2061; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma 2062; CHECK-NEXT: vmv.v.i v10, 0 2063; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 2064; CHECK-NEXT: vmv1r.v v0, v9 2065; CHECK-NEXT: vsoxei8.v v8, (a0), v10, v0.t 2066; CHECK-NEXT: ret 2067 %eidxs = zext <vscale x 1 x i1> %idxs to <vscale x 1 x i8> 2068 %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 1 x i8> %eidxs 2069 call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, i32 1, <vscale x 1 x i1> %m) 2070 ret void 2071} 2072