1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFH 4; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFH 6; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFHMIN 8; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFHMIN 10 11; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \ 12; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFH 13; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \ 14; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFH 15; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \ 16; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFHMIN 17; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \ 18; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFHMIN 19 20declare void @llvm.masked.scatter.v1i8.v1p0(<1 x i8>, <1 x ptr>, i32, <1 x i1>) 21 22define void @mscatter_v1i8(<1 x i8> %val, <1 x ptr> %ptrs, <1 x i1> %m) { 23; RV32V-LABEL: mscatter_v1i8: 24; RV32V: # %bb.0: 25; RV32V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 26; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 27; RV32V-NEXT: ret 28; 29; RV64V-LABEL: mscatter_v1i8: 30; RV64V: # %bb.0: 31; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 32; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 33; RV64V-NEXT: ret 34; 35; RV32ZVE32F-LABEL: mscatter_v1i8: 36; RV32ZVE32F: # %bb.0: 37; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 38; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t 39; RV32ZVE32F-NEXT: ret 40; 41; RV64ZVE32F-LABEL: mscatter_v1i8: 42; RV64ZVE32F: # %bb.0: 43; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 44; RV64ZVE32F-NEXT: vfirst.m a1, v0 45; RV64ZVE32F-NEXT: bnez a1, .LBB0_2 46; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 47; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 48; RV64ZVE32F-NEXT: vse8.v v8, (a0) 49; RV64ZVE32F-NEXT: .LBB0_2: # %else 50; RV64ZVE32F-NEXT: ret 51 call void @llvm.masked.scatter.v1i8.v1p0(<1 x i8> %val, <1 x ptr> %ptrs, i32 1, <1 x i1> %m) 52 ret void 53} 54 55declare void @llvm.masked.scatter.v2i8.v2p0(<2 x i8>, <2 x ptr>, i32, <2 x i1>) 56 57define void @mscatter_v2i8(<2 x i8> %val, <2 x ptr> %ptrs, <2 x i1> %m) { 58; RV32V-LABEL: mscatter_v2i8: 59; RV32V: # %bb.0: 60; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 61; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 62; RV32V-NEXT: ret 63; 64; RV64V-LABEL: mscatter_v2i8: 65; RV64V: # %bb.0: 66; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 67; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 68; RV64V-NEXT: ret 69; 70; RV32ZVE32F-LABEL: mscatter_v2i8: 71; RV32ZVE32F: # %bb.0: 72; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 73; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t 74; RV32ZVE32F-NEXT: ret 75; 76; RV64ZVE32F-LABEL: mscatter_v2i8: 77; RV64ZVE32F: # %bb.0: 78; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 79; RV64ZVE32F-NEXT: vmv.x.s a2, v0 80; RV64ZVE32F-NEXT: andi a3, a2, 1 81; RV64ZVE32F-NEXT: bnez a3, .LBB1_3 82; RV64ZVE32F-NEXT: # %bb.1: # %else 83; RV64ZVE32F-NEXT: andi a2, a2, 2 84; RV64ZVE32F-NEXT: bnez a2, .LBB1_4 85; RV64ZVE32F-NEXT: .LBB1_2: # %else2 86; RV64ZVE32F-NEXT: ret 87; RV64ZVE32F-NEXT: .LBB1_3: # %cond.store 88; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 89; RV64ZVE32F-NEXT: vse8.v v8, (a0) 90; RV64ZVE32F-NEXT: andi a2, a2, 2 91; RV64ZVE32F-NEXT: beqz a2, .LBB1_2 92; RV64ZVE32F-NEXT: .LBB1_4: # %cond.store1 93; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 94; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 95; RV64ZVE32F-NEXT: vse8.v v8, (a1) 96; RV64ZVE32F-NEXT: ret 97 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %val, <2 x ptr> %ptrs, i32 1, <2 x i1> %m) 98 ret void 99} 100 101define void @mscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m) { 102; RV32V-LABEL: mscatter_v2i16_truncstore_v2i8: 103; RV32V: # %bb.0: 104; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 105; RV32V-NEXT: vnsrl.wi v8, v8, 0 106; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 107; RV32V-NEXT: ret 108; 109; RV64V-LABEL: mscatter_v2i16_truncstore_v2i8: 110; RV64V: # %bb.0: 111; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 112; RV64V-NEXT: vnsrl.wi v8, v8, 0 113; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 114; RV64V-NEXT: ret 115; 116; RV32ZVE32F-LABEL: mscatter_v2i16_truncstore_v2i8: 117; RV32ZVE32F: # %bb.0: 118; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 119; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0 120; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t 121; RV32ZVE32F-NEXT: ret 122; 123; RV64ZVE32F-LABEL: mscatter_v2i16_truncstore_v2i8: 124; RV64ZVE32F: # %bb.0: 125; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 126; RV64ZVE32F-NEXT: vmv.x.s a2, v0 127; RV64ZVE32F-NEXT: andi a3, a2, 1 128; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0 129; RV64ZVE32F-NEXT: bnez a3, .LBB2_3 130; RV64ZVE32F-NEXT: # %bb.1: # %else 131; RV64ZVE32F-NEXT: andi a2, a2, 2 132; RV64ZVE32F-NEXT: bnez a2, .LBB2_4 133; RV64ZVE32F-NEXT: .LBB2_2: # %else2 134; RV64ZVE32F-NEXT: ret 135; RV64ZVE32F-NEXT: .LBB2_3: # %cond.store 136; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 137; RV64ZVE32F-NEXT: vse8.v v8, (a0) 138; RV64ZVE32F-NEXT: andi a2, a2, 2 139; RV64ZVE32F-NEXT: beqz a2, .LBB2_2 140; RV64ZVE32F-NEXT: .LBB2_4: # %cond.store1 141; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 142; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 143; RV64ZVE32F-NEXT: vse8.v v8, (a1) 144; RV64ZVE32F-NEXT: ret 145 %tval = trunc <2 x i16> %val to <2 x i8> 146 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m) 147 ret void 148} 149 150define void @mscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) { 151; RV32V-LABEL: mscatter_v2i32_truncstore_v2i8: 152; RV32V: # %bb.0: 153; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 154; RV32V-NEXT: vnsrl.wi v8, v8, 0 155; RV32V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 156; RV32V-NEXT: vnsrl.wi v8, v8, 0 157; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 158; RV32V-NEXT: ret 159; 160; RV64V-LABEL: mscatter_v2i32_truncstore_v2i8: 161; RV64V: # %bb.0: 162; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 163; RV64V-NEXT: vnsrl.wi v8, v8, 0 164; RV64V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 165; RV64V-NEXT: vnsrl.wi v8, v8, 0 166; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 167; RV64V-NEXT: ret 168; 169; RV32ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i8: 170; RV32ZVE32F: # %bb.0: 171; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 172; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0 173; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 174; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0 175; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t 176; RV32ZVE32F-NEXT: ret 177; 178; RV64ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i8: 179; RV64ZVE32F: # %bb.0: 180; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 181; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0 182; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 183; RV64ZVE32F-NEXT: vmv.x.s a2, v0 184; RV64ZVE32F-NEXT: andi a3, a2, 1 185; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0 186; RV64ZVE32F-NEXT: bnez a3, .LBB3_3 187; RV64ZVE32F-NEXT: # %bb.1: # %else 188; RV64ZVE32F-NEXT: andi a2, a2, 2 189; RV64ZVE32F-NEXT: bnez a2, .LBB3_4 190; RV64ZVE32F-NEXT: .LBB3_2: # %else2 191; RV64ZVE32F-NEXT: ret 192; RV64ZVE32F-NEXT: .LBB3_3: # %cond.store 193; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 194; RV64ZVE32F-NEXT: vse8.v v8, (a0) 195; RV64ZVE32F-NEXT: andi a2, a2, 2 196; RV64ZVE32F-NEXT: beqz a2, .LBB3_2 197; RV64ZVE32F-NEXT: .LBB3_4: # %cond.store1 198; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 199; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 200; RV64ZVE32F-NEXT: vse8.v v8, (a1) 201; RV64ZVE32F-NEXT: ret 202 %tval = trunc <2 x i32> %val to <2 x i8> 203 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m) 204 ret void 205} 206 207define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) { 208; RV32V-LABEL: mscatter_v2i64_truncstore_v2i8: 209; RV32V: # %bb.0: 210; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 211; RV32V-NEXT: vnsrl.wi v8, v8, 0 212; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 213; RV32V-NEXT: vnsrl.wi v8, v8, 0 214; RV32V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 215; RV32V-NEXT: vnsrl.wi v8, v8, 0 216; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 217; RV32V-NEXT: ret 218; 219; RV64V-LABEL: mscatter_v2i64_truncstore_v2i8: 220; RV64V: # %bb.0: 221; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 222; RV64V-NEXT: vnsrl.wi v8, v8, 0 223; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 224; RV64V-NEXT: vnsrl.wi v8, v8, 0 225; RV64V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 226; RV64V-NEXT: vnsrl.wi v8, v8, 0 227; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 228; RV64V-NEXT: ret 229; 230; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8: 231; RV32ZVE32F: # %bb.0: 232; RV32ZVE32F-NEXT: lw a1, 8(a0) 233; RV32ZVE32F-NEXT: lw a0, 0(a0) 234; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 235; RV32ZVE32F-NEXT: vmv.s.x v9, a1 236; RV32ZVE32F-NEXT: vmv.s.x v10, a0 237; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1 238; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t 239; RV32ZVE32F-NEXT: ret 240; 241; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8: 242; RV64ZVE32F: # %bb.0: 243; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 244; RV64ZVE32F-NEXT: vmv.s.x v9, a1 245; RV64ZVE32F-NEXT: vmv.s.x v8, a0 246; RV64ZVE32F-NEXT: vmv.x.s a0, v0 247; RV64ZVE32F-NEXT: andi a1, a0, 1 248; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 249; RV64ZVE32F-NEXT: bnez a1, .LBB4_3 250; RV64ZVE32F-NEXT: # %bb.1: # %else 251; RV64ZVE32F-NEXT: andi a0, a0, 2 252; RV64ZVE32F-NEXT: bnez a0, .LBB4_4 253; RV64ZVE32F-NEXT: .LBB4_2: # %else2 254; RV64ZVE32F-NEXT: ret 255; RV64ZVE32F-NEXT: .LBB4_3: # %cond.store 256; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 257; RV64ZVE32F-NEXT: vse8.v v8, (a2) 258; RV64ZVE32F-NEXT: andi a0, a0, 2 259; RV64ZVE32F-NEXT: beqz a0, .LBB4_2 260; RV64ZVE32F-NEXT: .LBB4_4: # %cond.store1 261; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 262; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 263; RV64ZVE32F-NEXT: vse8.v v8, (a3) 264; RV64ZVE32F-NEXT: ret 265 %tval = trunc <2 x i64> %val to <2 x i8> 266 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m) 267 ret void 268} 269 270declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32, <4 x i1>) 271 272define void @mscatter_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m) { 273; RV32-LABEL: mscatter_v4i8: 274; RV32: # %bb.0: 275; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 276; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 277; RV32-NEXT: ret 278; 279; RV64V-LABEL: mscatter_v4i8: 280; RV64V: # %bb.0: 281; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 282; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t 283; RV64V-NEXT: ret 284; 285; RV64ZVE32F-LABEL: mscatter_v4i8: 286; RV64ZVE32F: # %bb.0: 287; RV64ZVE32F-NEXT: ld a4, 8(a0) 288; RV64ZVE32F-NEXT: ld a2, 16(a0) 289; RV64ZVE32F-NEXT: ld a1, 24(a0) 290; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 291; RV64ZVE32F-NEXT: vmv.x.s a3, v0 292; RV64ZVE32F-NEXT: andi a5, a3, 1 293; RV64ZVE32F-NEXT: bnez a5, .LBB5_5 294; RV64ZVE32F-NEXT: # %bb.1: # %else 295; RV64ZVE32F-NEXT: andi a0, a3, 2 296; RV64ZVE32F-NEXT: bnez a0, .LBB5_6 297; RV64ZVE32F-NEXT: .LBB5_2: # %else2 298; RV64ZVE32F-NEXT: andi a0, a3, 4 299; RV64ZVE32F-NEXT: bnez a0, .LBB5_7 300; RV64ZVE32F-NEXT: .LBB5_3: # %else4 301; RV64ZVE32F-NEXT: andi a3, a3, 8 302; RV64ZVE32F-NEXT: bnez a3, .LBB5_8 303; RV64ZVE32F-NEXT: .LBB5_4: # %else6 304; RV64ZVE32F-NEXT: ret 305; RV64ZVE32F-NEXT: .LBB5_5: # %cond.store 306; RV64ZVE32F-NEXT: ld a0, 0(a0) 307; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 308; RV64ZVE32F-NEXT: vse8.v v8, (a0) 309; RV64ZVE32F-NEXT: andi a0, a3, 2 310; RV64ZVE32F-NEXT: beqz a0, .LBB5_2 311; RV64ZVE32F-NEXT: .LBB5_6: # %cond.store1 312; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 313; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 314; RV64ZVE32F-NEXT: vse8.v v9, (a4) 315; RV64ZVE32F-NEXT: andi a0, a3, 4 316; RV64ZVE32F-NEXT: beqz a0, .LBB5_3 317; RV64ZVE32F-NEXT: .LBB5_7: # %cond.store3 318; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 319; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 320; RV64ZVE32F-NEXT: vse8.v v9, (a2) 321; RV64ZVE32F-NEXT: andi a3, a3, 8 322; RV64ZVE32F-NEXT: beqz a3, .LBB5_4 323; RV64ZVE32F-NEXT: .LBB5_8: # %cond.store5 324; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 325; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 326; RV64ZVE32F-NEXT: vse8.v v8, (a1) 327; RV64ZVE32F-NEXT: ret 328 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %m) 329 ret void 330} 331 332define void @mscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) { 333; RV32-LABEL: mscatter_truemask_v4i8: 334; RV32: # %bb.0: 335; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 336; RV32-NEXT: vsoxei32.v v8, (zero), v9 337; RV32-NEXT: ret 338; 339; RV64V-LABEL: mscatter_truemask_v4i8: 340; RV64V: # %bb.0: 341; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 342; RV64V-NEXT: vsoxei64.v v8, (zero), v10 343; RV64V-NEXT: ret 344; 345; RV64ZVE32F-LABEL: mscatter_truemask_v4i8: 346; RV64ZVE32F: # %bb.0: 347; RV64ZVE32F-NEXT: ld a1, 0(a0) 348; RV64ZVE32F-NEXT: ld a2, 8(a0) 349; RV64ZVE32F-NEXT: ld a3, 16(a0) 350; RV64ZVE32F-NEXT: ld a0, 24(a0) 351; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 352; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 353; RV64ZVE32F-NEXT: vse8.v v8, (a1) 354; RV64ZVE32F-NEXT: vse8.v v9, (a2) 355; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 356; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 357; RV64ZVE32F-NEXT: vse8.v v9, (a3) 358; RV64ZVE32F-NEXT: vse8.v v8, (a0) 359; RV64ZVE32F-NEXT: ret 360 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1)) 361 ret void 362} 363 364define void @mscatter_falsemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) { 365; CHECK-LABEL: mscatter_falsemask_v4i8: 366; CHECK: # %bb.0: 367; CHECK-NEXT: ret 368 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> zeroinitializer) 369 ret void 370} 371 372declare void @llvm.masked.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, i32, <8 x i1>) 373 374define void @mscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m) { 375; RV32-LABEL: mscatter_v8i8: 376; RV32: # %bb.0: 377; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 378; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 379; RV32-NEXT: ret 380; 381; RV64V-LABEL: mscatter_v8i8: 382; RV64V: # %bb.0: 383; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 384; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t 385; RV64V-NEXT: ret 386; 387; RV64ZVE32F-LABEL: mscatter_v8i8: 388; RV64ZVE32F: # %bb.0: 389; RV64ZVE32F-NEXT: ld a3, 40(a0) 390; RV64ZVE32F-NEXT: ld a2, 48(a0) 391; RV64ZVE32F-NEXT: ld a1, 56(a0) 392; RV64ZVE32F-NEXT: ld t0, 8(a0) 393; RV64ZVE32F-NEXT: ld a7, 16(a0) 394; RV64ZVE32F-NEXT: ld a6, 24(a0) 395; RV64ZVE32F-NEXT: ld a5, 32(a0) 396; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 397; RV64ZVE32F-NEXT: vmv.x.s a4, v0 398; RV64ZVE32F-NEXT: andi t1, a4, 1 399; RV64ZVE32F-NEXT: bnez t1, .LBB8_9 400; RV64ZVE32F-NEXT: # %bb.1: # %else 401; RV64ZVE32F-NEXT: andi a0, a4, 2 402; RV64ZVE32F-NEXT: bnez a0, .LBB8_10 403; RV64ZVE32F-NEXT: .LBB8_2: # %else2 404; RV64ZVE32F-NEXT: andi a0, a4, 4 405; RV64ZVE32F-NEXT: bnez a0, .LBB8_11 406; RV64ZVE32F-NEXT: .LBB8_3: # %else4 407; RV64ZVE32F-NEXT: andi a0, a4, 8 408; RV64ZVE32F-NEXT: bnez a0, .LBB8_12 409; RV64ZVE32F-NEXT: .LBB8_4: # %else6 410; RV64ZVE32F-NEXT: andi a0, a4, 16 411; RV64ZVE32F-NEXT: bnez a0, .LBB8_13 412; RV64ZVE32F-NEXT: .LBB8_5: # %else8 413; RV64ZVE32F-NEXT: andi a0, a4, 32 414; RV64ZVE32F-NEXT: bnez a0, .LBB8_14 415; RV64ZVE32F-NEXT: .LBB8_6: # %else10 416; RV64ZVE32F-NEXT: andi a0, a4, 64 417; RV64ZVE32F-NEXT: bnez a0, .LBB8_15 418; RV64ZVE32F-NEXT: .LBB8_7: # %else12 419; RV64ZVE32F-NEXT: andi a0, a4, -128 420; RV64ZVE32F-NEXT: bnez a0, .LBB8_16 421; RV64ZVE32F-NEXT: .LBB8_8: # %else14 422; RV64ZVE32F-NEXT: ret 423; RV64ZVE32F-NEXT: .LBB8_9: # %cond.store 424; RV64ZVE32F-NEXT: ld a0, 0(a0) 425; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 426; RV64ZVE32F-NEXT: vse8.v v8, (a0) 427; RV64ZVE32F-NEXT: andi a0, a4, 2 428; RV64ZVE32F-NEXT: beqz a0, .LBB8_2 429; RV64ZVE32F-NEXT: .LBB8_10: # %cond.store1 430; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 431; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 432; RV64ZVE32F-NEXT: vse8.v v9, (t0) 433; RV64ZVE32F-NEXT: andi a0, a4, 4 434; RV64ZVE32F-NEXT: beqz a0, .LBB8_3 435; RV64ZVE32F-NEXT: .LBB8_11: # %cond.store3 436; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 437; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 438; RV64ZVE32F-NEXT: vse8.v v9, (a7) 439; RV64ZVE32F-NEXT: andi a0, a4, 8 440; RV64ZVE32F-NEXT: beqz a0, .LBB8_4 441; RV64ZVE32F-NEXT: .LBB8_12: # %cond.store5 442; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 443; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 444; RV64ZVE32F-NEXT: vse8.v v9, (a6) 445; RV64ZVE32F-NEXT: andi a0, a4, 16 446; RV64ZVE32F-NEXT: beqz a0, .LBB8_5 447; RV64ZVE32F-NEXT: .LBB8_13: # %cond.store7 448; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 449; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 450; RV64ZVE32F-NEXT: vse8.v v9, (a5) 451; RV64ZVE32F-NEXT: andi a0, a4, 32 452; RV64ZVE32F-NEXT: beqz a0, .LBB8_6 453; RV64ZVE32F-NEXT: .LBB8_14: # %cond.store9 454; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 455; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 456; RV64ZVE32F-NEXT: vse8.v v9, (a3) 457; RV64ZVE32F-NEXT: andi a0, a4, 64 458; RV64ZVE32F-NEXT: beqz a0, .LBB8_7 459; RV64ZVE32F-NEXT: .LBB8_15: # %cond.store11 460; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 461; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6 462; RV64ZVE32F-NEXT: vse8.v v9, (a2) 463; RV64ZVE32F-NEXT: andi a0, a4, -128 464; RV64ZVE32F-NEXT: beqz a0, .LBB8_8 465; RV64ZVE32F-NEXT: .LBB8_16: # %cond.store13 466; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 467; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 468; RV64ZVE32F-NEXT: vse8.v v8, (a1) 469; RV64ZVE32F-NEXT: ret 470 call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, i32 1, <8 x i1> %m) 471 ret void 472} 473 474define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 475; RV32-LABEL: mscatter_baseidx_v8i8: 476; RV32: # %bb.0: 477; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 478; RV32-NEXT: vsext.vf4 v10, v9 479; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 480; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 481; RV32-NEXT: ret 482; 483; RV64V-LABEL: mscatter_baseidx_v8i8: 484; RV64V: # %bb.0: 485; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 486; RV64V-NEXT: vsext.vf8 v12, v9 487; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 488; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 489; RV64V-NEXT: ret 490; 491; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8: 492; RV64ZVE32F: # %bb.0: 493; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 494; RV64ZVE32F-NEXT: vmv.x.s a1, v0 495; RV64ZVE32F-NEXT: andi a2, a1, 1 496; RV64ZVE32F-NEXT: beqz a2, .LBB9_2 497; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 498; RV64ZVE32F-NEXT: vmv.x.s a2, v9 499; RV64ZVE32F-NEXT: add a2, a0, a2 500; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 501; RV64ZVE32F-NEXT: vse8.v v8, (a2) 502; RV64ZVE32F-NEXT: .LBB9_2: # %else 503; RV64ZVE32F-NEXT: andi a2, a1, 2 504; RV64ZVE32F-NEXT: beqz a2, .LBB9_4 505; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 506; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 507; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 508; RV64ZVE32F-NEXT: vmv.x.s a2, v10 509; RV64ZVE32F-NEXT: add a2, a0, a2 510; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 511; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 512; RV64ZVE32F-NEXT: vse8.v v10, (a2) 513; RV64ZVE32F-NEXT: .LBB9_4: # %else2 514; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 515; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 516; RV64ZVE32F-NEXT: andi a2, a1, 4 517; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 518; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 519; RV64ZVE32F-NEXT: bnez a2, .LBB9_12 520; RV64ZVE32F-NEXT: # %bb.5: # %else4 521; RV64ZVE32F-NEXT: andi a2, a1, 8 522; RV64ZVE32F-NEXT: bnez a2, .LBB9_13 523; RV64ZVE32F-NEXT: .LBB9_6: # %else6 524; RV64ZVE32F-NEXT: andi a2, a1, 16 525; RV64ZVE32F-NEXT: bnez a2, .LBB9_14 526; RV64ZVE32F-NEXT: .LBB9_7: # %else8 527; RV64ZVE32F-NEXT: andi a2, a1, 32 528; RV64ZVE32F-NEXT: beqz a2, .LBB9_9 529; RV64ZVE32F-NEXT: .LBB9_8: # %cond.store9 530; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 531; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1 532; RV64ZVE32F-NEXT: vmv.x.s a2, v9 533; RV64ZVE32F-NEXT: add a2, a0, a2 534; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 535; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 536; RV64ZVE32F-NEXT: vse8.v v9, (a2) 537; RV64ZVE32F-NEXT: .LBB9_9: # %else10 538; RV64ZVE32F-NEXT: andi a2, a1, 64 539; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 540; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 541; RV64ZVE32F-NEXT: bnez a2, .LBB9_15 542; RV64ZVE32F-NEXT: # %bb.10: # %else12 543; RV64ZVE32F-NEXT: andi a1, a1, -128 544; RV64ZVE32F-NEXT: bnez a1, .LBB9_16 545; RV64ZVE32F-NEXT: .LBB9_11: # %else14 546; RV64ZVE32F-NEXT: ret 547; RV64ZVE32F-NEXT: .LBB9_12: # %cond.store3 548; RV64ZVE32F-NEXT: vmv.x.s a2, v9 549; RV64ZVE32F-NEXT: add a2, a0, a2 550; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 551; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 552; RV64ZVE32F-NEXT: vse8.v v11, (a2) 553; RV64ZVE32F-NEXT: andi a2, a1, 8 554; RV64ZVE32F-NEXT: beqz a2, .LBB9_6 555; RV64ZVE32F-NEXT: .LBB9_13: # %cond.store5 556; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 557; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 558; RV64ZVE32F-NEXT: vmv.x.s a2, v9 559; RV64ZVE32F-NEXT: add a2, a0, a2 560; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 561; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 562; RV64ZVE32F-NEXT: vse8.v v9, (a2) 563; RV64ZVE32F-NEXT: andi a2, a1, 16 564; RV64ZVE32F-NEXT: beqz a2, .LBB9_7 565; RV64ZVE32F-NEXT: .LBB9_14: # %cond.store7 566; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 567; RV64ZVE32F-NEXT: vmv.x.s a2, v10 568; RV64ZVE32F-NEXT: add a2, a0, a2 569; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 570; RV64ZVE32F-NEXT: vse8.v v9, (a2) 571; RV64ZVE32F-NEXT: andi a2, a1, 32 572; RV64ZVE32F-NEXT: bnez a2, .LBB9_8 573; RV64ZVE32F-NEXT: j .LBB9_9 574; RV64ZVE32F-NEXT: .LBB9_15: # %cond.store11 575; RV64ZVE32F-NEXT: vmv.x.s a2, v9 576; RV64ZVE32F-NEXT: add a2, a0, a2 577; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 578; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 579; RV64ZVE32F-NEXT: vse8.v v10, (a2) 580; RV64ZVE32F-NEXT: andi a1, a1, -128 581; RV64ZVE32F-NEXT: beqz a1, .LBB9_11 582; RV64ZVE32F-NEXT: .LBB9_16: # %cond.store13 583; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 584; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 585; RV64ZVE32F-NEXT: vmv.x.s a1, v9 586; RV64ZVE32F-NEXT: add a0, a0, a1 587; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 588; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 589; RV64ZVE32F-NEXT: vse8.v v8, (a0) 590; RV64ZVE32F-NEXT: ret 591 %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs 592 call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, i32 1, <8 x i1> %m) 593 ret void 594} 595 596declare void @llvm.masked.scatter.v1i16.v1p0(<1 x i16>, <1 x ptr>, i32, <1 x i1>) 597 598define void @mscatter_v1i16(<1 x i16> %val, <1 x ptr> %ptrs, <1 x i1> %m) { 599; RV32V-LABEL: mscatter_v1i16: 600; RV32V: # %bb.0: 601; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 602; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 603; RV32V-NEXT: ret 604; 605; RV64V-LABEL: mscatter_v1i16: 606; RV64V: # %bb.0: 607; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 608; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 609; RV64V-NEXT: ret 610; 611; RV32ZVE32F-LABEL: mscatter_v1i16: 612; RV32ZVE32F: # %bb.0: 613; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 614; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t 615; RV32ZVE32F-NEXT: ret 616; 617; RV64ZVE32F-LABEL: mscatter_v1i16: 618; RV64ZVE32F: # %bb.0: 619; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 620; RV64ZVE32F-NEXT: vfirst.m a1, v0 621; RV64ZVE32F-NEXT: bnez a1, .LBB10_2 622; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 623; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 624; RV64ZVE32F-NEXT: vse16.v v8, (a0) 625; RV64ZVE32F-NEXT: .LBB10_2: # %else 626; RV64ZVE32F-NEXT: ret 627 call void @llvm.masked.scatter.v1i16.v1p0(<1 x i16> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m) 628 ret void 629} 630 631declare void @llvm.masked.scatter.v2i16.v2p0(<2 x i16>, <2 x ptr>, i32, <2 x i1>) 632 633define void @mscatter_v2i16(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m) { 634; RV32V-LABEL: mscatter_v2i16: 635; RV32V: # %bb.0: 636; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 637; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 638; RV32V-NEXT: ret 639; 640; RV64V-LABEL: mscatter_v2i16: 641; RV64V: # %bb.0: 642; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 643; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 644; RV64V-NEXT: ret 645; 646; RV32ZVE32F-LABEL: mscatter_v2i16: 647; RV32ZVE32F: # %bb.0: 648; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 649; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t 650; RV32ZVE32F-NEXT: ret 651; 652; RV64ZVE32F-LABEL: mscatter_v2i16: 653; RV64ZVE32F: # %bb.0: 654; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 655; RV64ZVE32F-NEXT: vmv.x.s a2, v0 656; RV64ZVE32F-NEXT: andi a3, a2, 1 657; RV64ZVE32F-NEXT: bnez a3, .LBB11_3 658; RV64ZVE32F-NEXT: # %bb.1: # %else 659; RV64ZVE32F-NEXT: andi a2, a2, 2 660; RV64ZVE32F-NEXT: bnez a2, .LBB11_4 661; RV64ZVE32F-NEXT: .LBB11_2: # %else2 662; RV64ZVE32F-NEXT: ret 663; RV64ZVE32F-NEXT: .LBB11_3: # %cond.store 664; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 665; RV64ZVE32F-NEXT: vse16.v v8, (a0) 666; RV64ZVE32F-NEXT: andi a2, a2, 2 667; RV64ZVE32F-NEXT: beqz a2, .LBB11_2 668; RV64ZVE32F-NEXT: .LBB11_4: # %cond.store1 669; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 670; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 671; RV64ZVE32F-NEXT: vse16.v v8, (a1) 672; RV64ZVE32F-NEXT: ret 673 call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m) 674 ret void 675} 676 677define void @mscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) { 678; RV32V-LABEL: mscatter_v2i32_truncstore_v2i16: 679; RV32V: # %bb.0: 680; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 681; RV32V-NEXT: vnsrl.wi v8, v8, 0 682; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 683; RV32V-NEXT: ret 684; 685; RV64V-LABEL: mscatter_v2i32_truncstore_v2i16: 686; RV64V: # %bb.0: 687; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 688; RV64V-NEXT: vnsrl.wi v8, v8, 0 689; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 690; RV64V-NEXT: ret 691; 692; RV32ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i16: 693; RV32ZVE32F: # %bb.0: 694; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 695; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0 696; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t 697; RV32ZVE32F-NEXT: ret 698; 699; RV64ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i16: 700; RV64ZVE32F: # %bb.0: 701; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 702; RV64ZVE32F-NEXT: vmv.x.s a2, v0 703; RV64ZVE32F-NEXT: andi a3, a2, 1 704; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 705; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0 706; RV64ZVE32F-NEXT: bnez a3, .LBB12_3 707; RV64ZVE32F-NEXT: # %bb.1: # %else 708; RV64ZVE32F-NEXT: andi a2, a2, 2 709; RV64ZVE32F-NEXT: bnez a2, .LBB12_4 710; RV64ZVE32F-NEXT: .LBB12_2: # %else2 711; RV64ZVE32F-NEXT: ret 712; RV64ZVE32F-NEXT: .LBB12_3: # %cond.store 713; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 714; RV64ZVE32F-NEXT: vse16.v v8, (a0) 715; RV64ZVE32F-NEXT: andi a2, a2, 2 716; RV64ZVE32F-NEXT: beqz a2, .LBB12_2 717; RV64ZVE32F-NEXT: .LBB12_4: # %cond.store1 718; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 719; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 720; RV64ZVE32F-NEXT: vse16.v v8, (a1) 721; RV64ZVE32F-NEXT: ret 722 %tval = trunc <2 x i32> %val to <2 x i16> 723 call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, i32 2, <2 x i1> %m) 724 ret void 725} 726 727define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) { 728; RV32V-LABEL: mscatter_v2i64_truncstore_v2i16: 729; RV32V: # %bb.0: 730; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 731; RV32V-NEXT: vnsrl.wi v8, v8, 0 732; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 733; RV32V-NEXT: vnsrl.wi v8, v8, 0 734; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 735; RV32V-NEXT: ret 736; 737; RV64V-LABEL: mscatter_v2i64_truncstore_v2i16: 738; RV64V: # %bb.0: 739; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 740; RV64V-NEXT: vnsrl.wi v8, v8, 0 741; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 742; RV64V-NEXT: vnsrl.wi v8, v8, 0 743; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 744; RV64V-NEXT: ret 745; 746; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16: 747; RV32ZVE32F: # %bb.0: 748; RV32ZVE32F-NEXT: lw a1, 8(a0) 749; RV32ZVE32F-NEXT: lw a0, 0(a0) 750; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 751; RV32ZVE32F-NEXT: vmv.s.x v9, a1 752; RV32ZVE32F-NEXT: vmv.s.x v10, a0 753; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1 754; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t 755; RV32ZVE32F-NEXT: ret 756; 757; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16: 758; RV64ZVE32F: # %bb.0: 759; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 760; RV64ZVE32F-NEXT: vmv.s.x v9, a1 761; RV64ZVE32F-NEXT: vmv.s.x v8, a0 762; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 763; RV64ZVE32F-NEXT: vmv.x.s a0, v0 764; RV64ZVE32F-NEXT: andi a1, a0, 1 765; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 766; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 767; RV64ZVE32F-NEXT: bnez a1, .LBB13_3 768; RV64ZVE32F-NEXT: # %bb.1: # %else 769; RV64ZVE32F-NEXT: andi a0, a0, 2 770; RV64ZVE32F-NEXT: bnez a0, .LBB13_4 771; RV64ZVE32F-NEXT: .LBB13_2: # %else2 772; RV64ZVE32F-NEXT: ret 773; RV64ZVE32F-NEXT: .LBB13_3: # %cond.store 774; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 775; RV64ZVE32F-NEXT: vse16.v v8, (a2) 776; RV64ZVE32F-NEXT: andi a0, a0, 2 777; RV64ZVE32F-NEXT: beqz a0, .LBB13_2 778; RV64ZVE32F-NEXT: .LBB13_4: # %cond.store1 779; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 780; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 781; RV64ZVE32F-NEXT: vse16.v v8, (a3) 782; RV64ZVE32F-NEXT: ret 783 %tval = trunc <2 x i64> %val to <2 x i16> 784 call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, i32 2, <2 x i1> %m) 785 ret void 786} 787 788declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>) 789 790define void @mscatter_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) { 791; RV32-LABEL: mscatter_v4i16: 792; RV32: # %bb.0: 793; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 794; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 795; RV32-NEXT: ret 796; 797; RV64V-LABEL: mscatter_v4i16: 798; RV64V: # %bb.0: 799; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 800; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t 801; RV64V-NEXT: ret 802; 803; RV64ZVE32F-LABEL: mscatter_v4i16: 804; RV64ZVE32F: # %bb.0: 805; RV64ZVE32F-NEXT: ld a4, 8(a0) 806; RV64ZVE32F-NEXT: ld a2, 16(a0) 807; RV64ZVE32F-NEXT: ld a1, 24(a0) 808; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 809; RV64ZVE32F-NEXT: vmv.x.s a3, v0 810; RV64ZVE32F-NEXT: andi a5, a3, 1 811; RV64ZVE32F-NEXT: bnez a5, .LBB14_5 812; RV64ZVE32F-NEXT: # %bb.1: # %else 813; RV64ZVE32F-NEXT: andi a0, a3, 2 814; RV64ZVE32F-NEXT: bnez a0, .LBB14_6 815; RV64ZVE32F-NEXT: .LBB14_2: # %else2 816; RV64ZVE32F-NEXT: andi a0, a3, 4 817; RV64ZVE32F-NEXT: bnez a0, .LBB14_7 818; RV64ZVE32F-NEXT: .LBB14_3: # %else4 819; RV64ZVE32F-NEXT: andi a3, a3, 8 820; RV64ZVE32F-NEXT: bnez a3, .LBB14_8 821; RV64ZVE32F-NEXT: .LBB14_4: # %else6 822; RV64ZVE32F-NEXT: ret 823; RV64ZVE32F-NEXT: .LBB14_5: # %cond.store 824; RV64ZVE32F-NEXT: ld a0, 0(a0) 825; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 826; RV64ZVE32F-NEXT: vse16.v v8, (a0) 827; RV64ZVE32F-NEXT: andi a0, a3, 2 828; RV64ZVE32F-NEXT: beqz a0, .LBB14_2 829; RV64ZVE32F-NEXT: .LBB14_6: # %cond.store1 830; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 831; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 832; RV64ZVE32F-NEXT: vse16.v v9, (a4) 833; RV64ZVE32F-NEXT: andi a0, a3, 4 834; RV64ZVE32F-NEXT: beqz a0, .LBB14_3 835; RV64ZVE32F-NEXT: .LBB14_7: # %cond.store3 836; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 837; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 838; RV64ZVE32F-NEXT: vse16.v v9, (a2) 839; RV64ZVE32F-NEXT: andi a3, a3, 8 840; RV64ZVE32F-NEXT: beqz a3, .LBB14_4 841; RV64ZVE32F-NEXT: .LBB14_8: # %cond.store5 842; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 843; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 844; RV64ZVE32F-NEXT: vse16.v v8, (a1) 845; RV64ZVE32F-NEXT: ret 846 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m) 847 ret void 848} 849 850define void @mscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) { 851; RV32-LABEL: mscatter_truemask_v4i16: 852; RV32: # %bb.0: 853; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 854; RV32-NEXT: vsoxei32.v v8, (zero), v9 855; RV32-NEXT: ret 856; 857; RV64V-LABEL: mscatter_truemask_v4i16: 858; RV64V: # %bb.0: 859; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 860; RV64V-NEXT: vsoxei64.v v8, (zero), v10 861; RV64V-NEXT: ret 862; 863; RV64ZVE32F-LABEL: mscatter_truemask_v4i16: 864; RV64ZVE32F: # %bb.0: 865; RV64ZVE32F-NEXT: ld a1, 0(a0) 866; RV64ZVE32F-NEXT: ld a2, 8(a0) 867; RV64ZVE32F-NEXT: ld a3, 16(a0) 868; RV64ZVE32F-NEXT: ld a0, 24(a0) 869; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 870; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 871; RV64ZVE32F-NEXT: vse16.v v8, (a1) 872; RV64ZVE32F-NEXT: vse16.v v9, (a2) 873; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 874; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 875; RV64ZVE32F-NEXT: vse16.v v9, (a3) 876; RV64ZVE32F-NEXT: vse16.v v8, (a0) 877; RV64ZVE32F-NEXT: ret 878 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1)) 879 ret void 880} 881 882define void @mscatter_falsemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) { 883; CHECK-LABEL: mscatter_falsemask_v4i16: 884; CHECK: # %bb.0: 885; CHECK-NEXT: ret 886 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer) 887 ret void 888} 889 890declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>) 891 892define void @mscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m) { 893; RV32-LABEL: mscatter_v8i16: 894; RV32: # %bb.0: 895; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 896; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 897; RV32-NEXT: ret 898; 899; RV64V-LABEL: mscatter_v8i16: 900; RV64V: # %bb.0: 901; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 902; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t 903; RV64V-NEXT: ret 904; 905; RV64ZVE32F-LABEL: mscatter_v8i16: 906; RV64ZVE32F: # %bb.0: 907; RV64ZVE32F-NEXT: ld a3, 40(a0) 908; RV64ZVE32F-NEXT: ld a2, 48(a0) 909; RV64ZVE32F-NEXT: ld a1, 56(a0) 910; RV64ZVE32F-NEXT: ld t0, 8(a0) 911; RV64ZVE32F-NEXT: ld a7, 16(a0) 912; RV64ZVE32F-NEXT: ld a6, 24(a0) 913; RV64ZVE32F-NEXT: ld a5, 32(a0) 914; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 915; RV64ZVE32F-NEXT: vmv.x.s a4, v0 916; RV64ZVE32F-NEXT: andi t1, a4, 1 917; RV64ZVE32F-NEXT: bnez t1, .LBB17_9 918; RV64ZVE32F-NEXT: # %bb.1: # %else 919; RV64ZVE32F-NEXT: andi a0, a4, 2 920; RV64ZVE32F-NEXT: bnez a0, .LBB17_10 921; RV64ZVE32F-NEXT: .LBB17_2: # %else2 922; RV64ZVE32F-NEXT: andi a0, a4, 4 923; RV64ZVE32F-NEXT: bnez a0, .LBB17_11 924; RV64ZVE32F-NEXT: .LBB17_3: # %else4 925; RV64ZVE32F-NEXT: andi a0, a4, 8 926; RV64ZVE32F-NEXT: bnez a0, .LBB17_12 927; RV64ZVE32F-NEXT: .LBB17_4: # %else6 928; RV64ZVE32F-NEXT: andi a0, a4, 16 929; RV64ZVE32F-NEXT: bnez a0, .LBB17_13 930; RV64ZVE32F-NEXT: .LBB17_5: # %else8 931; RV64ZVE32F-NEXT: andi a0, a4, 32 932; RV64ZVE32F-NEXT: bnez a0, .LBB17_14 933; RV64ZVE32F-NEXT: .LBB17_6: # %else10 934; RV64ZVE32F-NEXT: andi a0, a4, 64 935; RV64ZVE32F-NEXT: bnez a0, .LBB17_15 936; RV64ZVE32F-NEXT: .LBB17_7: # %else12 937; RV64ZVE32F-NEXT: andi a0, a4, -128 938; RV64ZVE32F-NEXT: bnez a0, .LBB17_16 939; RV64ZVE32F-NEXT: .LBB17_8: # %else14 940; RV64ZVE32F-NEXT: ret 941; RV64ZVE32F-NEXT: .LBB17_9: # %cond.store 942; RV64ZVE32F-NEXT: ld a0, 0(a0) 943; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 944; RV64ZVE32F-NEXT: vse16.v v8, (a0) 945; RV64ZVE32F-NEXT: andi a0, a4, 2 946; RV64ZVE32F-NEXT: beqz a0, .LBB17_2 947; RV64ZVE32F-NEXT: .LBB17_10: # %cond.store1 948; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 949; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 950; RV64ZVE32F-NEXT: vse16.v v9, (t0) 951; RV64ZVE32F-NEXT: andi a0, a4, 4 952; RV64ZVE32F-NEXT: beqz a0, .LBB17_3 953; RV64ZVE32F-NEXT: .LBB17_11: # %cond.store3 954; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 955; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 956; RV64ZVE32F-NEXT: vse16.v v9, (a7) 957; RV64ZVE32F-NEXT: andi a0, a4, 8 958; RV64ZVE32F-NEXT: beqz a0, .LBB17_4 959; RV64ZVE32F-NEXT: .LBB17_12: # %cond.store5 960; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 961; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 962; RV64ZVE32F-NEXT: vse16.v v9, (a6) 963; RV64ZVE32F-NEXT: andi a0, a4, 16 964; RV64ZVE32F-NEXT: beqz a0, .LBB17_5 965; RV64ZVE32F-NEXT: .LBB17_13: # %cond.store7 966; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 967; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 968; RV64ZVE32F-NEXT: vse16.v v9, (a5) 969; RV64ZVE32F-NEXT: andi a0, a4, 32 970; RV64ZVE32F-NEXT: beqz a0, .LBB17_6 971; RV64ZVE32F-NEXT: .LBB17_14: # %cond.store9 972; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 973; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 974; RV64ZVE32F-NEXT: vse16.v v9, (a3) 975; RV64ZVE32F-NEXT: andi a0, a4, 64 976; RV64ZVE32F-NEXT: beqz a0, .LBB17_7 977; RV64ZVE32F-NEXT: .LBB17_15: # %cond.store11 978; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 979; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6 980; RV64ZVE32F-NEXT: vse16.v v9, (a2) 981; RV64ZVE32F-NEXT: andi a0, a4, -128 982; RV64ZVE32F-NEXT: beqz a0, .LBB17_8 983; RV64ZVE32F-NEXT: .LBB17_16: # %cond.store13 984; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 985; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 986; RV64ZVE32F-NEXT: vse16.v v8, (a1) 987; RV64ZVE32F-NEXT: ret 988 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) 989 ret void 990} 991 992define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 993; RV32-LABEL: mscatter_baseidx_v8i8_v8i16: 994; RV32: # %bb.0: 995; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 996; RV32-NEXT: vsext.vf4 v10, v9 997; RV32-NEXT: vadd.vv v10, v10, v10 998; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma 999; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 1000; RV32-NEXT: ret 1001; 1002; RV64V-LABEL: mscatter_baseidx_v8i8_v8i16: 1003; RV64V: # %bb.0: 1004; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1005; RV64V-NEXT: vsext.vf8 v12, v9 1006; RV64V-NEXT: vadd.vv v12, v12, v12 1007; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma 1008; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1009; RV64V-NEXT: ret 1010; 1011; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i16: 1012; RV64ZVE32F: # %bb.0: 1013; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1014; RV64ZVE32F-NEXT: vmv.x.s a1, v0 1015; RV64ZVE32F-NEXT: andi a2, a1, 1 1016; RV64ZVE32F-NEXT: beqz a2, .LBB18_2 1017; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 1018; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1019; RV64ZVE32F-NEXT: slli a2, a2, 1 1020; RV64ZVE32F-NEXT: add a2, a0, a2 1021; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1022; RV64ZVE32F-NEXT: vse16.v v8, (a2) 1023; RV64ZVE32F-NEXT: .LBB18_2: # %else 1024; RV64ZVE32F-NEXT: andi a2, a1, 2 1025; RV64ZVE32F-NEXT: beqz a2, .LBB18_4 1026; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 1027; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1028; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 1029; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1030; RV64ZVE32F-NEXT: slli a2, a2, 1 1031; RV64ZVE32F-NEXT: add a2, a0, a2 1032; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1033; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 1034; RV64ZVE32F-NEXT: vse16.v v10, (a2) 1035; RV64ZVE32F-NEXT: .LBB18_4: # %else2 1036; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 1037; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 1038; RV64ZVE32F-NEXT: andi a2, a1, 4 1039; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1040; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 1041; RV64ZVE32F-NEXT: bnez a2, .LBB18_12 1042; RV64ZVE32F-NEXT: # %bb.5: # %else4 1043; RV64ZVE32F-NEXT: andi a2, a1, 8 1044; RV64ZVE32F-NEXT: bnez a2, .LBB18_13 1045; RV64ZVE32F-NEXT: .LBB18_6: # %else6 1046; RV64ZVE32F-NEXT: andi a2, a1, 16 1047; RV64ZVE32F-NEXT: bnez a2, .LBB18_14 1048; RV64ZVE32F-NEXT: .LBB18_7: # %else8 1049; RV64ZVE32F-NEXT: andi a2, a1, 32 1050; RV64ZVE32F-NEXT: beqz a2, .LBB18_9 1051; RV64ZVE32F-NEXT: .LBB18_8: # %cond.store9 1052; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1053; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1 1054; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1055; RV64ZVE32F-NEXT: slli a2, a2, 1 1056; RV64ZVE32F-NEXT: add a2, a0, a2 1057; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1058; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 1059; RV64ZVE32F-NEXT: vse16.v v9, (a2) 1060; RV64ZVE32F-NEXT: .LBB18_9: # %else10 1061; RV64ZVE32F-NEXT: andi a2, a1, 64 1062; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1063; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 1064; RV64ZVE32F-NEXT: bnez a2, .LBB18_15 1065; RV64ZVE32F-NEXT: # %bb.10: # %else12 1066; RV64ZVE32F-NEXT: andi a1, a1, -128 1067; RV64ZVE32F-NEXT: bnez a1, .LBB18_16 1068; RV64ZVE32F-NEXT: .LBB18_11: # %else14 1069; RV64ZVE32F-NEXT: ret 1070; RV64ZVE32F-NEXT: .LBB18_12: # %cond.store3 1071; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1072; RV64ZVE32F-NEXT: slli a2, a2, 1 1073; RV64ZVE32F-NEXT: add a2, a0, a2 1074; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1075; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 1076; RV64ZVE32F-NEXT: vse16.v v11, (a2) 1077; RV64ZVE32F-NEXT: andi a2, a1, 8 1078; RV64ZVE32F-NEXT: beqz a2, .LBB18_6 1079; RV64ZVE32F-NEXT: .LBB18_13: # %cond.store5 1080; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1081; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 1082; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1083; RV64ZVE32F-NEXT: slli a2, a2, 1 1084; RV64ZVE32F-NEXT: add a2, a0, a2 1085; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1086; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 1087; RV64ZVE32F-NEXT: vse16.v v9, (a2) 1088; RV64ZVE32F-NEXT: andi a2, a1, 16 1089; RV64ZVE32F-NEXT: beqz a2, .LBB18_7 1090; RV64ZVE32F-NEXT: .LBB18_14: # %cond.store7 1091; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1092; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1093; RV64ZVE32F-NEXT: slli a2, a2, 1 1094; RV64ZVE32F-NEXT: add a2, a0, a2 1095; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1096; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 1097; RV64ZVE32F-NEXT: vse16.v v9, (a2) 1098; RV64ZVE32F-NEXT: andi a2, a1, 32 1099; RV64ZVE32F-NEXT: bnez a2, .LBB18_8 1100; RV64ZVE32F-NEXT: j .LBB18_9 1101; RV64ZVE32F-NEXT: .LBB18_15: # %cond.store11 1102; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1103; RV64ZVE32F-NEXT: slli a2, a2, 1 1104; RV64ZVE32F-NEXT: add a2, a0, a2 1105; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1106; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 1107; RV64ZVE32F-NEXT: vse16.v v10, (a2) 1108; RV64ZVE32F-NEXT: andi a1, a1, -128 1109; RV64ZVE32F-NEXT: beqz a1, .LBB18_11 1110; RV64ZVE32F-NEXT: .LBB18_16: # %cond.store13 1111; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1112; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 1113; RV64ZVE32F-NEXT: vmv.x.s a1, v9 1114; RV64ZVE32F-NEXT: slli a1, a1, 1 1115; RV64ZVE32F-NEXT: add a0, a0, a1 1116; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1117; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 1118; RV64ZVE32F-NEXT: vse16.v v8, (a0) 1119; RV64ZVE32F-NEXT: ret 1120 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs 1121 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) 1122 ret void 1123} 1124 1125define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 1126; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8i16: 1127; RV32: # %bb.0: 1128; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1129; RV32-NEXT: vsext.vf4 v10, v9 1130; RV32-NEXT: vadd.vv v10, v10, v10 1131; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma 1132; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 1133; RV32-NEXT: ret 1134; 1135; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8i16: 1136; RV64V: # %bb.0: 1137; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1138; RV64V-NEXT: vsext.vf8 v12, v9 1139; RV64V-NEXT: vadd.vv v12, v12, v12 1140; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma 1141; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1142; RV64V-NEXT: ret 1143; 1144; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i16: 1145; RV64ZVE32F: # %bb.0: 1146; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1147; RV64ZVE32F-NEXT: vmv.x.s a1, v0 1148; RV64ZVE32F-NEXT: andi a2, a1, 1 1149; RV64ZVE32F-NEXT: beqz a2, .LBB19_2 1150; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 1151; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1152; RV64ZVE32F-NEXT: slli a2, a2, 1 1153; RV64ZVE32F-NEXT: add a2, a0, a2 1154; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1155; RV64ZVE32F-NEXT: vse16.v v8, (a2) 1156; RV64ZVE32F-NEXT: .LBB19_2: # %else 1157; RV64ZVE32F-NEXT: andi a2, a1, 2 1158; RV64ZVE32F-NEXT: beqz a2, .LBB19_4 1159; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 1160; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1161; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 1162; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1163; RV64ZVE32F-NEXT: slli a2, a2, 1 1164; RV64ZVE32F-NEXT: add a2, a0, a2 1165; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1166; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 1167; RV64ZVE32F-NEXT: vse16.v v10, (a2) 1168; RV64ZVE32F-NEXT: .LBB19_4: # %else2 1169; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 1170; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 1171; RV64ZVE32F-NEXT: andi a2, a1, 4 1172; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1173; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 1174; RV64ZVE32F-NEXT: bnez a2, .LBB19_12 1175; RV64ZVE32F-NEXT: # %bb.5: # %else4 1176; RV64ZVE32F-NEXT: andi a2, a1, 8 1177; RV64ZVE32F-NEXT: bnez a2, .LBB19_13 1178; RV64ZVE32F-NEXT: .LBB19_6: # %else6 1179; RV64ZVE32F-NEXT: andi a2, a1, 16 1180; RV64ZVE32F-NEXT: bnez a2, .LBB19_14 1181; RV64ZVE32F-NEXT: .LBB19_7: # %else8 1182; RV64ZVE32F-NEXT: andi a2, a1, 32 1183; RV64ZVE32F-NEXT: beqz a2, .LBB19_9 1184; RV64ZVE32F-NEXT: .LBB19_8: # %cond.store9 1185; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1186; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1 1187; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1188; RV64ZVE32F-NEXT: slli a2, a2, 1 1189; RV64ZVE32F-NEXT: add a2, a0, a2 1190; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1191; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 1192; RV64ZVE32F-NEXT: vse16.v v9, (a2) 1193; RV64ZVE32F-NEXT: .LBB19_9: # %else10 1194; RV64ZVE32F-NEXT: andi a2, a1, 64 1195; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1196; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 1197; RV64ZVE32F-NEXT: bnez a2, .LBB19_15 1198; RV64ZVE32F-NEXT: # %bb.10: # %else12 1199; RV64ZVE32F-NEXT: andi a1, a1, -128 1200; RV64ZVE32F-NEXT: bnez a1, .LBB19_16 1201; RV64ZVE32F-NEXT: .LBB19_11: # %else14 1202; RV64ZVE32F-NEXT: ret 1203; RV64ZVE32F-NEXT: .LBB19_12: # %cond.store3 1204; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1205; RV64ZVE32F-NEXT: slli a2, a2, 1 1206; RV64ZVE32F-NEXT: add a2, a0, a2 1207; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1208; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 1209; RV64ZVE32F-NEXT: vse16.v v11, (a2) 1210; RV64ZVE32F-NEXT: andi a2, a1, 8 1211; RV64ZVE32F-NEXT: beqz a2, .LBB19_6 1212; RV64ZVE32F-NEXT: .LBB19_13: # %cond.store5 1213; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1214; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 1215; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1216; RV64ZVE32F-NEXT: slli a2, a2, 1 1217; RV64ZVE32F-NEXT: add a2, a0, a2 1218; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1219; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 1220; RV64ZVE32F-NEXT: vse16.v v9, (a2) 1221; RV64ZVE32F-NEXT: andi a2, a1, 16 1222; RV64ZVE32F-NEXT: beqz a2, .LBB19_7 1223; RV64ZVE32F-NEXT: .LBB19_14: # %cond.store7 1224; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1225; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1226; RV64ZVE32F-NEXT: slli a2, a2, 1 1227; RV64ZVE32F-NEXT: add a2, a0, a2 1228; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1229; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 1230; RV64ZVE32F-NEXT: vse16.v v9, (a2) 1231; RV64ZVE32F-NEXT: andi a2, a1, 32 1232; RV64ZVE32F-NEXT: bnez a2, .LBB19_8 1233; RV64ZVE32F-NEXT: j .LBB19_9 1234; RV64ZVE32F-NEXT: .LBB19_15: # %cond.store11 1235; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1236; RV64ZVE32F-NEXT: slli a2, a2, 1 1237; RV64ZVE32F-NEXT: add a2, a0, a2 1238; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1239; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 1240; RV64ZVE32F-NEXT: vse16.v v10, (a2) 1241; RV64ZVE32F-NEXT: andi a1, a1, -128 1242; RV64ZVE32F-NEXT: beqz a1, .LBB19_11 1243; RV64ZVE32F-NEXT: .LBB19_16: # %cond.store13 1244; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1245; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 1246; RV64ZVE32F-NEXT: vmv.x.s a1, v9 1247; RV64ZVE32F-NEXT: slli a1, a1, 1 1248; RV64ZVE32F-NEXT: add a0, a0, a1 1249; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1250; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 1251; RV64ZVE32F-NEXT: vse16.v v8, (a0) 1252; RV64ZVE32F-NEXT: ret 1253 %eidxs = sext <8 x i8> %idxs to <8 x i16> 1254 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs 1255 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) 1256 ret void 1257} 1258 1259define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 1260; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i16: 1261; RV32: # %bb.0: 1262; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 1263; RV32-NEXT: vwaddu.vv v10, v9, v9 1264; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma 1265; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t 1266; RV32-NEXT: ret 1267; 1268; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8i16: 1269; RV64V: # %bb.0: 1270; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 1271; RV64V-NEXT: vwaddu.vv v10, v9, v9 1272; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma 1273; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t 1274; RV64V-NEXT: ret 1275; 1276; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i16: 1277; RV64ZVE32F: # %bb.0: 1278; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1279; RV64ZVE32F-NEXT: vmv.x.s a1, v0 1280; RV64ZVE32F-NEXT: andi a2, a1, 1 1281; RV64ZVE32F-NEXT: beqz a2, .LBB20_2 1282; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 1283; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1284; RV64ZVE32F-NEXT: andi a2, a2, 255 1285; RV64ZVE32F-NEXT: slli a2, a2, 1 1286; RV64ZVE32F-NEXT: add a2, a0, a2 1287; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1288; RV64ZVE32F-NEXT: vse16.v v8, (a2) 1289; RV64ZVE32F-NEXT: .LBB20_2: # %else 1290; RV64ZVE32F-NEXT: andi a2, a1, 2 1291; RV64ZVE32F-NEXT: beqz a2, .LBB20_4 1292; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 1293; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1294; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 1295; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1296; RV64ZVE32F-NEXT: andi a2, a2, 255 1297; RV64ZVE32F-NEXT: slli a2, a2, 1 1298; RV64ZVE32F-NEXT: add a2, a0, a2 1299; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1300; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 1301; RV64ZVE32F-NEXT: vse16.v v10, (a2) 1302; RV64ZVE32F-NEXT: .LBB20_4: # %else2 1303; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 1304; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 1305; RV64ZVE32F-NEXT: andi a2, a1, 4 1306; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1307; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 1308; RV64ZVE32F-NEXT: bnez a2, .LBB20_12 1309; RV64ZVE32F-NEXT: # %bb.5: # %else4 1310; RV64ZVE32F-NEXT: andi a2, a1, 8 1311; RV64ZVE32F-NEXT: bnez a2, .LBB20_13 1312; RV64ZVE32F-NEXT: .LBB20_6: # %else6 1313; RV64ZVE32F-NEXT: andi a2, a1, 16 1314; RV64ZVE32F-NEXT: bnez a2, .LBB20_14 1315; RV64ZVE32F-NEXT: .LBB20_7: # %else8 1316; RV64ZVE32F-NEXT: andi a2, a1, 32 1317; RV64ZVE32F-NEXT: beqz a2, .LBB20_9 1318; RV64ZVE32F-NEXT: .LBB20_8: # %cond.store9 1319; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1320; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1 1321; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1322; RV64ZVE32F-NEXT: andi a2, a2, 255 1323; RV64ZVE32F-NEXT: slli a2, a2, 1 1324; RV64ZVE32F-NEXT: add a2, a0, a2 1325; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1326; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 1327; RV64ZVE32F-NEXT: vse16.v v9, (a2) 1328; RV64ZVE32F-NEXT: .LBB20_9: # %else10 1329; RV64ZVE32F-NEXT: andi a2, a1, 64 1330; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1331; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 1332; RV64ZVE32F-NEXT: bnez a2, .LBB20_15 1333; RV64ZVE32F-NEXT: # %bb.10: # %else12 1334; RV64ZVE32F-NEXT: andi a1, a1, -128 1335; RV64ZVE32F-NEXT: bnez a1, .LBB20_16 1336; RV64ZVE32F-NEXT: .LBB20_11: # %else14 1337; RV64ZVE32F-NEXT: ret 1338; RV64ZVE32F-NEXT: .LBB20_12: # %cond.store3 1339; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1340; RV64ZVE32F-NEXT: andi a2, a2, 255 1341; RV64ZVE32F-NEXT: slli a2, a2, 1 1342; RV64ZVE32F-NEXT: add a2, a0, a2 1343; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1344; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 1345; RV64ZVE32F-NEXT: vse16.v v11, (a2) 1346; RV64ZVE32F-NEXT: andi a2, a1, 8 1347; RV64ZVE32F-NEXT: beqz a2, .LBB20_6 1348; RV64ZVE32F-NEXT: .LBB20_13: # %cond.store5 1349; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1350; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 1351; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1352; RV64ZVE32F-NEXT: andi a2, a2, 255 1353; RV64ZVE32F-NEXT: slli a2, a2, 1 1354; RV64ZVE32F-NEXT: add a2, a0, a2 1355; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1356; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 1357; RV64ZVE32F-NEXT: vse16.v v9, (a2) 1358; RV64ZVE32F-NEXT: andi a2, a1, 16 1359; RV64ZVE32F-NEXT: beqz a2, .LBB20_7 1360; RV64ZVE32F-NEXT: .LBB20_14: # %cond.store7 1361; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1362; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1363; RV64ZVE32F-NEXT: andi a2, a2, 255 1364; RV64ZVE32F-NEXT: slli a2, a2, 1 1365; RV64ZVE32F-NEXT: add a2, a0, a2 1366; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1367; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 1368; RV64ZVE32F-NEXT: vse16.v v9, (a2) 1369; RV64ZVE32F-NEXT: andi a2, a1, 32 1370; RV64ZVE32F-NEXT: bnez a2, .LBB20_8 1371; RV64ZVE32F-NEXT: j .LBB20_9 1372; RV64ZVE32F-NEXT: .LBB20_15: # %cond.store11 1373; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1374; RV64ZVE32F-NEXT: andi a2, a2, 255 1375; RV64ZVE32F-NEXT: slli a2, a2, 1 1376; RV64ZVE32F-NEXT: add a2, a0, a2 1377; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1378; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 1379; RV64ZVE32F-NEXT: vse16.v v10, (a2) 1380; RV64ZVE32F-NEXT: andi a1, a1, -128 1381; RV64ZVE32F-NEXT: beqz a1, .LBB20_11 1382; RV64ZVE32F-NEXT: .LBB20_16: # %cond.store13 1383; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1384; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 1385; RV64ZVE32F-NEXT: vmv.x.s a1, v9 1386; RV64ZVE32F-NEXT: andi a1, a1, 255 1387; RV64ZVE32F-NEXT: slli a1, a1, 1 1388; RV64ZVE32F-NEXT: add a0, a0, a1 1389; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1390; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 1391; RV64ZVE32F-NEXT: vse16.v v8, (a0) 1392; RV64ZVE32F-NEXT: ret 1393 %eidxs = zext <8 x i8> %idxs to <8 x i16> 1394 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs 1395 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) 1396 ret void 1397} 1398 1399define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { 1400; RV32-LABEL: mscatter_baseidx_v8i16: 1401; RV32: # %bb.0: 1402; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1403; RV32-NEXT: vwadd.vv v10, v9, v9 1404; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 1405; RV32-NEXT: ret 1406; 1407; RV64V-LABEL: mscatter_baseidx_v8i16: 1408; RV64V: # %bb.0: 1409; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1410; RV64V-NEXT: vsext.vf4 v12, v9 1411; RV64V-NEXT: vadd.vv v12, v12, v12 1412; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma 1413; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1414; RV64V-NEXT: ret 1415; 1416; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16: 1417; RV64ZVE32F: # %bb.0: 1418; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1419; RV64ZVE32F-NEXT: vmv.x.s a1, v0 1420; RV64ZVE32F-NEXT: andi a2, a1, 1 1421; RV64ZVE32F-NEXT: beqz a2, .LBB21_2 1422; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 1423; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1424; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1425; RV64ZVE32F-NEXT: slli a2, a2, 1 1426; RV64ZVE32F-NEXT: add a2, a0, a2 1427; RV64ZVE32F-NEXT: vse16.v v8, (a2) 1428; RV64ZVE32F-NEXT: .LBB21_2: # %else 1429; RV64ZVE32F-NEXT: andi a2, a1, 2 1430; RV64ZVE32F-NEXT: beqz a2, .LBB21_4 1431; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 1432; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 1433; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 1434; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1435; RV64ZVE32F-NEXT: slli a2, a2, 1 1436; RV64ZVE32F-NEXT: add a2, a0, a2 1437; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1438; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 1439; RV64ZVE32F-NEXT: vse16.v v10, (a2) 1440; RV64ZVE32F-NEXT: .LBB21_4: # %else2 1441; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 1442; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 1443; RV64ZVE32F-NEXT: andi a2, a1, 4 1444; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 1445; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 1446; RV64ZVE32F-NEXT: bnez a2, .LBB21_12 1447; RV64ZVE32F-NEXT: # %bb.5: # %else4 1448; RV64ZVE32F-NEXT: andi a2, a1, 8 1449; RV64ZVE32F-NEXT: bnez a2, .LBB21_13 1450; RV64ZVE32F-NEXT: .LBB21_6: # %else6 1451; RV64ZVE32F-NEXT: andi a2, a1, 16 1452; RV64ZVE32F-NEXT: bnez a2, .LBB21_14 1453; RV64ZVE32F-NEXT: .LBB21_7: # %else8 1454; RV64ZVE32F-NEXT: andi a2, a1, 32 1455; RV64ZVE32F-NEXT: beqz a2, .LBB21_9 1456; RV64ZVE32F-NEXT: .LBB21_8: # %cond.store9 1457; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 1458; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1 1459; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1460; RV64ZVE32F-NEXT: slli a2, a2, 1 1461; RV64ZVE32F-NEXT: add a2, a0, a2 1462; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1463; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 1464; RV64ZVE32F-NEXT: vse16.v v9, (a2) 1465; RV64ZVE32F-NEXT: .LBB21_9: # %else10 1466; RV64ZVE32F-NEXT: andi a2, a1, 64 1467; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 1468; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 1469; RV64ZVE32F-NEXT: bnez a2, .LBB21_15 1470; RV64ZVE32F-NEXT: # %bb.10: # %else12 1471; RV64ZVE32F-NEXT: andi a1, a1, -128 1472; RV64ZVE32F-NEXT: bnez a1, .LBB21_16 1473; RV64ZVE32F-NEXT: .LBB21_11: # %else14 1474; RV64ZVE32F-NEXT: ret 1475; RV64ZVE32F-NEXT: .LBB21_12: # %cond.store3 1476; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1477; RV64ZVE32F-NEXT: slli a2, a2, 1 1478; RV64ZVE32F-NEXT: add a2, a0, a2 1479; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1480; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 1481; RV64ZVE32F-NEXT: vse16.v v11, (a2) 1482; RV64ZVE32F-NEXT: andi a2, a1, 8 1483; RV64ZVE32F-NEXT: beqz a2, .LBB21_6 1484; RV64ZVE32F-NEXT: .LBB21_13: # %cond.store5 1485; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 1486; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 1487; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1488; RV64ZVE32F-NEXT: slli a2, a2, 1 1489; RV64ZVE32F-NEXT: add a2, a0, a2 1490; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1491; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 1492; RV64ZVE32F-NEXT: vse16.v v9, (a2) 1493; RV64ZVE32F-NEXT: andi a2, a1, 16 1494; RV64ZVE32F-NEXT: beqz a2, .LBB21_7 1495; RV64ZVE32F-NEXT: .LBB21_14: # %cond.store7 1496; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1497; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1498; RV64ZVE32F-NEXT: slli a2, a2, 1 1499; RV64ZVE32F-NEXT: add a2, a0, a2 1500; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 1501; RV64ZVE32F-NEXT: vse16.v v9, (a2) 1502; RV64ZVE32F-NEXT: andi a2, a1, 32 1503; RV64ZVE32F-NEXT: bnez a2, .LBB21_8 1504; RV64ZVE32F-NEXT: j .LBB21_9 1505; RV64ZVE32F-NEXT: .LBB21_15: # %cond.store11 1506; RV64ZVE32F-NEXT: vmv.x.s a2, v9 1507; RV64ZVE32F-NEXT: slli a2, a2, 1 1508; RV64ZVE32F-NEXT: add a2, a0, a2 1509; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1510; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 1511; RV64ZVE32F-NEXT: vse16.v v10, (a2) 1512; RV64ZVE32F-NEXT: andi a1, a1, -128 1513; RV64ZVE32F-NEXT: beqz a1, .LBB21_11 1514; RV64ZVE32F-NEXT: .LBB21_16: # %cond.store13 1515; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 1516; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 1517; RV64ZVE32F-NEXT: vmv.x.s a1, v9 1518; RV64ZVE32F-NEXT: slli a1, a1, 1 1519; RV64ZVE32F-NEXT: add a0, a0, a1 1520; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 1521; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 1522; RV64ZVE32F-NEXT: vse16.v v8, (a0) 1523; RV64ZVE32F-NEXT: ret 1524 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs 1525 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) 1526 ret void 1527} 1528 1529declare void @llvm.masked.scatter.v1i32.v1p0(<1 x i32>, <1 x ptr>, i32, <1 x i1>) 1530 1531define void @mscatter_v1i32(<1 x i32> %val, <1 x ptr> %ptrs, <1 x i1> %m) { 1532; RV32V-LABEL: mscatter_v1i32: 1533; RV32V: # %bb.0: 1534; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 1535; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1536; RV32V-NEXT: ret 1537; 1538; RV64V-LABEL: mscatter_v1i32: 1539; RV64V: # %bb.0: 1540; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 1541; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1542; RV64V-NEXT: ret 1543; 1544; RV32ZVE32F-LABEL: mscatter_v1i32: 1545; RV32ZVE32F: # %bb.0: 1546; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1547; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1548; RV32ZVE32F-NEXT: ret 1549; 1550; RV64ZVE32F-LABEL: mscatter_v1i32: 1551; RV64ZVE32F: # %bb.0: 1552; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 1553; RV64ZVE32F-NEXT: vfirst.m a1, v0 1554; RV64ZVE32F-NEXT: bnez a1, .LBB22_2 1555; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 1556; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1557; RV64ZVE32F-NEXT: vse32.v v8, (a0) 1558; RV64ZVE32F-NEXT: .LBB22_2: # %else 1559; RV64ZVE32F-NEXT: ret 1560 call void @llvm.masked.scatter.v1i32.v1p0(<1 x i32> %val, <1 x ptr> %ptrs, i32 4, <1 x i1> %m) 1561 ret void 1562} 1563 1564declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>) 1565 1566define void @mscatter_v2i32(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) { 1567; RV32V-LABEL: mscatter_v2i32: 1568; RV32V: # %bb.0: 1569; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 1570; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1571; RV32V-NEXT: ret 1572; 1573; RV64V-LABEL: mscatter_v2i32: 1574; RV64V: # %bb.0: 1575; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 1576; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1577; RV64V-NEXT: ret 1578; 1579; RV32ZVE32F-LABEL: mscatter_v2i32: 1580; RV32ZVE32F: # %bb.0: 1581; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 1582; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1583; RV32ZVE32F-NEXT: ret 1584; 1585; RV64ZVE32F-LABEL: mscatter_v2i32: 1586; RV64ZVE32F: # %bb.0: 1587; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1588; RV64ZVE32F-NEXT: vmv.x.s a2, v0 1589; RV64ZVE32F-NEXT: andi a3, a2, 1 1590; RV64ZVE32F-NEXT: bnez a3, .LBB23_3 1591; RV64ZVE32F-NEXT: # %bb.1: # %else 1592; RV64ZVE32F-NEXT: andi a2, a2, 2 1593; RV64ZVE32F-NEXT: bnez a2, .LBB23_4 1594; RV64ZVE32F-NEXT: .LBB23_2: # %else2 1595; RV64ZVE32F-NEXT: ret 1596; RV64ZVE32F-NEXT: .LBB23_3: # %cond.store 1597; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1598; RV64ZVE32F-NEXT: vse32.v v8, (a0) 1599; RV64ZVE32F-NEXT: andi a2, a2, 2 1600; RV64ZVE32F-NEXT: beqz a2, .LBB23_2 1601; RV64ZVE32F-NEXT: .LBB23_4: # %cond.store1 1602; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1603; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 1604; RV64ZVE32F-NEXT: vse32.v v8, (a1) 1605; RV64ZVE32F-NEXT: ret 1606 call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %val, <2 x ptr> %ptrs, i32 4, <2 x i1> %m) 1607 ret void 1608} 1609 1610define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) { 1611; RV32V-LABEL: mscatter_v2i64_truncstore_v2i32: 1612; RV32V: # %bb.0: 1613; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 1614; RV32V-NEXT: vnsrl.wi v8, v8, 0 1615; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1616; RV32V-NEXT: ret 1617; 1618; RV64V-LABEL: mscatter_v2i64_truncstore_v2i32: 1619; RV64V: # %bb.0: 1620; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 1621; RV64V-NEXT: vnsrl.wi v8, v8, 0 1622; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1623; RV64V-NEXT: ret 1624; 1625; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32: 1626; RV32ZVE32F: # %bb.0: 1627; RV32ZVE32F-NEXT: lw a1, 0(a0) 1628; RV32ZVE32F-NEXT: lw a0, 8(a0) 1629; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 1630; RV32ZVE32F-NEXT: vmv.v.x v9, a1 1631; RV32ZVE32F-NEXT: vslide1down.vx v9, v9, a0 1632; RV32ZVE32F-NEXT: vsoxei32.v v9, (zero), v8, v0.t 1633; RV32ZVE32F-NEXT: ret 1634; 1635; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32: 1636; RV64ZVE32F: # %bb.0: 1637; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 1638; RV64ZVE32F-NEXT: vmv.v.x v8, a0 1639; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 1640; RV64ZVE32F-NEXT: vmv.x.s a0, v0 1641; RV64ZVE32F-NEXT: andi a4, a0, 1 1642; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 1643; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 1644; RV64ZVE32F-NEXT: bnez a4, .LBB24_3 1645; RV64ZVE32F-NEXT: # %bb.1: # %else 1646; RV64ZVE32F-NEXT: andi a0, a0, 2 1647; RV64ZVE32F-NEXT: bnez a0, .LBB24_4 1648; RV64ZVE32F-NEXT: .LBB24_2: # %else2 1649; RV64ZVE32F-NEXT: ret 1650; RV64ZVE32F-NEXT: .LBB24_3: # %cond.store 1651; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1652; RV64ZVE32F-NEXT: vse32.v v8, (a2) 1653; RV64ZVE32F-NEXT: andi a0, a0, 2 1654; RV64ZVE32F-NEXT: beqz a0, .LBB24_2 1655; RV64ZVE32F-NEXT: .LBB24_4: # %cond.store1 1656; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1657; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 1658; RV64ZVE32F-NEXT: vse32.v v8, (a3) 1659; RV64ZVE32F-NEXT: ret 1660 %tval = trunc <2 x i64> %val to <2 x i32> 1661 call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %tval, <2 x ptr> %ptrs, i32 4, <2 x i1> %m) 1662 ret void 1663} 1664 1665declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>) 1666 1667define void @mscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m) { 1668; RV32-LABEL: mscatter_v4i32: 1669; RV32: # %bb.0: 1670; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1671; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1672; RV32-NEXT: ret 1673; 1674; RV64V-LABEL: mscatter_v4i32: 1675; RV64V: # %bb.0: 1676; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1677; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1678; RV64V-NEXT: ret 1679; 1680; RV64ZVE32F-LABEL: mscatter_v4i32: 1681; RV64ZVE32F: # %bb.0: 1682; RV64ZVE32F-NEXT: ld a4, 8(a0) 1683; RV64ZVE32F-NEXT: ld a2, 16(a0) 1684; RV64ZVE32F-NEXT: ld a1, 24(a0) 1685; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1686; RV64ZVE32F-NEXT: vmv.x.s a3, v0 1687; RV64ZVE32F-NEXT: andi a5, a3, 1 1688; RV64ZVE32F-NEXT: bnez a5, .LBB25_5 1689; RV64ZVE32F-NEXT: # %bb.1: # %else 1690; RV64ZVE32F-NEXT: andi a0, a3, 2 1691; RV64ZVE32F-NEXT: bnez a0, .LBB25_6 1692; RV64ZVE32F-NEXT: .LBB25_2: # %else2 1693; RV64ZVE32F-NEXT: andi a0, a3, 4 1694; RV64ZVE32F-NEXT: bnez a0, .LBB25_7 1695; RV64ZVE32F-NEXT: .LBB25_3: # %else4 1696; RV64ZVE32F-NEXT: andi a3, a3, 8 1697; RV64ZVE32F-NEXT: bnez a3, .LBB25_8 1698; RV64ZVE32F-NEXT: .LBB25_4: # %else6 1699; RV64ZVE32F-NEXT: ret 1700; RV64ZVE32F-NEXT: .LBB25_5: # %cond.store 1701; RV64ZVE32F-NEXT: ld a0, 0(a0) 1702; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1703; RV64ZVE32F-NEXT: vse32.v v8, (a0) 1704; RV64ZVE32F-NEXT: andi a0, a3, 2 1705; RV64ZVE32F-NEXT: beqz a0, .LBB25_2 1706; RV64ZVE32F-NEXT: .LBB25_6: # %cond.store1 1707; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1708; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 1709; RV64ZVE32F-NEXT: vse32.v v9, (a4) 1710; RV64ZVE32F-NEXT: andi a0, a3, 4 1711; RV64ZVE32F-NEXT: beqz a0, .LBB25_3 1712; RV64ZVE32F-NEXT: .LBB25_7: # %cond.store3 1713; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1714; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 1715; RV64ZVE32F-NEXT: vse32.v v9, (a2) 1716; RV64ZVE32F-NEXT: andi a3, a3, 8 1717; RV64ZVE32F-NEXT: beqz a3, .LBB25_4 1718; RV64ZVE32F-NEXT: .LBB25_8: # %cond.store5 1719; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1720; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 1721; RV64ZVE32F-NEXT: vse32.v v8, (a1) 1722; RV64ZVE32F-NEXT: ret 1723 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %m) 1724 ret void 1725} 1726 1727define void @mscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) { 1728; RV32-LABEL: mscatter_truemask_v4i32: 1729; RV32: # %bb.0: 1730; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1731; RV32-NEXT: vsoxei32.v v8, (zero), v9 1732; RV32-NEXT: ret 1733; 1734; RV64V-LABEL: mscatter_truemask_v4i32: 1735; RV64V: # %bb.0: 1736; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1737; RV64V-NEXT: vsoxei64.v v8, (zero), v10 1738; RV64V-NEXT: ret 1739; 1740; RV64ZVE32F-LABEL: mscatter_truemask_v4i32: 1741; RV64ZVE32F: # %bb.0: 1742; RV64ZVE32F-NEXT: ld a1, 0(a0) 1743; RV64ZVE32F-NEXT: ld a2, 8(a0) 1744; RV64ZVE32F-NEXT: ld a3, 16(a0) 1745; RV64ZVE32F-NEXT: ld a0, 24(a0) 1746; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1747; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 1748; RV64ZVE32F-NEXT: vse32.v v8, (a1) 1749; RV64ZVE32F-NEXT: vse32.v v9, (a2) 1750; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 1751; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 1752; RV64ZVE32F-NEXT: vse32.v v9, (a3) 1753; RV64ZVE32F-NEXT: vse32.v v8, (a0) 1754; RV64ZVE32F-NEXT: ret 1755 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1)) 1756 ret void 1757} 1758 1759define void @mscatter_falsemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) { 1760; CHECK-LABEL: mscatter_falsemask_v4i32: 1761; CHECK: # %bb.0: 1762; CHECK-NEXT: ret 1763 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer) 1764 ret void 1765} 1766 1767declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32, <8 x i1>) 1768 1769define void @mscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m) { 1770; RV32-LABEL: mscatter_v8i32: 1771; RV32: # %bb.0: 1772; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1773; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1774; RV32-NEXT: ret 1775; 1776; RV64V-LABEL: mscatter_v8i32: 1777; RV64V: # %bb.0: 1778; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1779; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1780; RV64V-NEXT: ret 1781; 1782; RV64ZVE32F-LABEL: mscatter_v8i32: 1783; RV64ZVE32F: # %bb.0: 1784; RV64ZVE32F-NEXT: ld a3, 40(a0) 1785; RV64ZVE32F-NEXT: ld a2, 48(a0) 1786; RV64ZVE32F-NEXT: ld a1, 56(a0) 1787; RV64ZVE32F-NEXT: ld t0, 8(a0) 1788; RV64ZVE32F-NEXT: ld a7, 16(a0) 1789; RV64ZVE32F-NEXT: ld a6, 24(a0) 1790; RV64ZVE32F-NEXT: ld a5, 32(a0) 1791; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1792; RV64ZVE32F-NEXT: vmv.x.s a4, v0 1793; RV64ZVE32F-NEXT: andi t1, a4, 1 1794; RV64ZVE32F-NEXT: bnez t1, .LBB28_9 1795; RV64ZVE32F-NEXT: # %bb.1: # %else 1796; RV64ZVE32F-NEXT: andi a0, a4, 2 1797; RV64ZVE32F-NEXT: bnez a0, .LBB28_10 1798; RV64ZVE32F-NEXT: .LBB28_2: # %else2 1799; RV64ZVE32F-NEXT: andi a0, a4, 4 1800; RV64ZVE32F-NEXT: bnez a0, .LBB28_11 1801; RV64ZVE32F-NEXT: .LBB28_3: # %else4 1802; RV64ZVE32F-NEXT: andi a0, a4, 8 1803; RV64ZVE32F-NEXT: bnez a0, .LBB28_12 1804; RV64ZVE32F-NEXT: .LBB28_4: # %else6 1805; RV64ZVE32F-NEXT: andi a0, a4, 16 1806; RV64ZVE32F-NEXT: bnez a0, .LBB28_13 1807; RV64ZVE32F-NEXT: .LBB28_5: # %else8 1808; RV64ZVE32F-NEXT: andi a0, a4, 32 1809; RV64ZVE32F-NEXT: bnez a0, .LBB28_14 1810; RV64ZVE32F-NEXT: .LBB28_6: # %else10 1811; RV64ZVE32F-NEXT: andi a0, a4, 64 1812; RV64ZVE32F-NEXT: bnez a0, .LBB28_15 1813; RV64ZVE32F-NEXT: .LBB28_7: # %else12 1814; RV64ZVE32F-NEXT: andi a0, a4, -128 1815; RV64ZVE32F-NEXT: bnez a0, .LBB28_16 1816; RV64ZVE32F-NEXT: .LBB28_8: # %else14 1817; RV64ZVE32F-NEXT: ret 1818; RV64ZVE32F-NEXT: .LBB28_9: # %cond.store 1819; RV64ZVE32F-NEXT: ld a0, 0(a0) 1820; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1821; RV64ZVE32F-NEXT: vse32.v v8, (a0) 1822; RV64ZVE32F-NEXT: andi a0, a4, 2 1823; RV64ZVE32F-NEXT: beqz a0, .LBB28_2 1824; RV64ZVE32F-NEXT: .LBB28_10: # %cond.store1 1825; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1826; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 1827; RV64ZVE32F-NEXT: vse32.v v10, (t0) 1828; RV64ZVE32F-NEXT: andi a0, a4, 4 1829; RV64ZVE32F-NEXT: beqz a0, .LBB28_3 1830; RV64ZVE32F-NEXT: .LBB28_11: # %cond.store3 1831; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1832; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2 1833; RV64ZVE32F-NEXT: vse32.v v10, (a7) 1834; RV64ZVE32F-NEXT: andi a0, a4, 8 1835; RV64ZVE32F-NEXT: beqz a0, .LBB28_4 1836; RV64ZVE32F-NEXT: .LBB28_12: # %cond.store5 1837; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1838; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 1839; RV64ZVE32F-NEXT: vse32.v v10, (a6) 1840; RV64ZVE32F-NEXT: andi a0, a4, 16 1841; RV64ZVE32F-NEXT: beqz a0, .LBB28_5 1842; RV64ZVE32F-NEXT: .LBB28_13: # %cond.store7 1843; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 1844; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 1845; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1846; RV64ZVE32F-NEXT: vse32.v v10, (a5) 1847; RV64ZVE32F-NEXT: andi a0, a4, 32 1848; RV64ZVE32F-NEXT: beqz a0, .LBB28_6 1849; RV64ZVE32F-NEXT: .LBB28_14: # %cond.store9 1850; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 1851; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 1852; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1853; RV64ZVE32F-NEXT: vse32.v v10, (a3) 1854; RV64ZVE32F-NEXT: andi a0, a4, 64 1855; RV64ZVE32F-NEXT: beqz a0, .LBB28_7 1856; RV64ZVE32F-NEXT: .LBB28_15: # %cond.store11 1857; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 1858; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 1859; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1860; RV64ZVE32F-NEXT: vse32.v v10, (a2) 1861; RV64ZVE32F-NEXT: andi a0, a4, -128 1862; RV64ZVE32F-NEXT: beqz a0, .LBB28_8 1863; RV64ZVE32F-NEXT: .LBB28_16: # %cond.store13 1864; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 1865; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 1866; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1867; RV64ZVE32F-NEXT: vse32.v v8, (a1) 1868; RV64ZVE32F-NEXT: ret 1869 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m) 1870 ret void 1871} 1872 1873define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 1874; RV32-LABEL: mscatter_baseidx_v8i8_v8i32: 1875; RV32: # %bb.0: 1876; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1877; RV32-NEXT: vsext.vf4 v12, v10 1878; RV32-NEXT: vsll.vi v10, v12, 2 1879; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 1880; RV32-NEXT: ret 1881; 1882; RV64V-LABEL: mscatter_baseidx_v8i8_v8i32: 1883; RV64V: # %bb.0: 1884; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1885; RV64V-NEXT: vsext.vf8 v12, v10 1886; RV64V-NEXT: vsll.vi v12, v12, 2 1887; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1888; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 1889; RV64V-NEXT: ret 1890; 1891; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i32: 1892; RV64ZVE32F: # %bb.0: 1893; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1894; RV64ZVE32F-NEXT: vmv.x.s a1, v0 1895; RV64ZVE32F-NEXT: andi a2, a1, 1 1896; RV64ZVE32F-NEXT: beqz a2, .LBB29_2 1897; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 1898; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1899; RV64ZVE32F-NEXT: slli a2, a2, 2 1900; RV64ZVE32F-NEXT: add a2, a0, a2 1901; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1902; RV64ZVE32F-NEXT: vse32.v v8, (a2) 1903; RV64ZVE32F-NEXT: .LBB29_2: # %else 1904; RV64ZVE32F-NEXT: andi a2, a1, 2 1905; RV64ZVE32F-NEXT: beqz a2, .LBB29_4 1906; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 1907; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1908; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 1909; RV64ZVE32F-NEXT: vmv.x.s a2, v11 1910; RV64ZVE32F-NEXT: slli a2, a2, 2 1911; RV64ZVE32F-NEXT: add a2, a0, a2 1912; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 1913; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 1914; RV64ZVE32F-NEXT: vse32.v v11, (a2) 1915; RV64ZVE32F-NEXT: .LBB29_4: # %else2 1916; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 1917; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 1918; RV64ZVE32F-NEXT: andi a2, a1, 4 1919; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1920; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 1921; RV64ZVE32F-NEXT: bnez a2, .LBB29_12 1922; RV64ZVE32F-NEXT: # %bb.5: # %else4 1923; RV64ZVE32F-NEXT: andi a2, a1, 8 1924; RV64ZVE32F-NEXT: bnez a2, .LBB29_13 1925; RV64ZVE32F-NEXT: .LBB29_6: # %else6 1926; RV64ZVE32F-NEXT: andi a2, a1, 16 1927; RV64ZVE32F-NEXT: bnez a2, .LBB29_14 1928; RV64ZVE32F-NEXT: .LBB29_7: # %else8 1929; RV64ZVE32F-NEXT: andi a2, a1, 32 1930; RV64ZVE32F-NEXT: beqz a2, .LBB29_9 1931; RV64ZVE32F-NEXT: .LBB29_8: # %cond.store9 1932; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1933; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 1934; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1935; RV64ZVE32F-NEXT: slli a2, a2, 2 1936; RV64ZVE32F-NEXT: add a2, a0, a2 1937; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 1938; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 1939; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1940; RV64ZVE32F-NEXT: vse32.v v12, (a2) 1941; RV64ZVE32F-NEXT: .LBB29_9: # %else10 1942; RV64ZVE32F-NEXT: andi a2, a1, 64 1943; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1944; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 1945; RV64ZVE32F-NEXT: bnez a2, .LBB29_15 1946; RV64ZVE32F-NEXT: # %bb.10: # %else12 1947; RV64ZVE32F-NEXT: andi a1, a1, -128 1948; RV64ZVE32F-NEXT: bnez a1, .LBB29_16 1949; RV64ZVE32F-NEXT: .LBB29_11: # %else14 1950; RV64ZVE32F-NEXT: ret 1951; RV64ZVE32F-NEXT: .LBB29_12: # %cond.store3 1952; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1953; RV64ZVE32F-NEXT: slli a2, a2, 2 1954; RV64ZVE32F-NEXT: add a2, a0, a2 1955; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 1956; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 1957; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1958; RV64ZVE32F-NEXT: vse32.v v12, (a2) 1959; RV64ZVE32F-NEXT: andi a2, a1, 8 1960; RV64ZVE32F-NEXT: beqz a2, .LBB29_6 1961; RV64ZVE32F-NEXT: .LBB29_13: # %cond.store5 1962; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1963; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 1964; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1965; RV64ZVE32F-NEXT: slli a2, a2, 2 1966; RV64ZVE32F-NEXT: add a2, a0, a2 1967; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 1968; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 1969; RV64ZVE32F-NEXT: vse32.v v10, (a2) 1970; RV64ZVE32F-NEXT: andi a2, a1, 16 1971; RV64ZVE32F-NEXT: beqz a2, .LBB29_7 1972; RV64ZVE32F-NEXT: .LBB29_14: # %cond.store7 1973; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1974; RV64ZVE32F-NEXT: vmv.x.s a2, v11 1975; RV64ZVE32F-NEXT: slli a2, a2, 2 1976; RV64ZVE32F-NEXT: add a2, a0, a2 1977; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 1978; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 1979; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1980; RV64ZVE32F-NEXT: vse32.v v12, (a2) 1981; RV64ZVE32F-NEXT: andi a2, a1, 32 1982; RV64ZVE32F-NEXT: bnez a2, .LBB29_8 1983; RV64ZVE32F-NEXT: j .LBB29_9 1984; RV64ZVE32F-NEXT: .LBB29_15: # %cond.store11 1985; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1986; RV64ZVE32F-NEXT: slli a2, a2, 2 1987; RV64ZVE32F-NEXT: add a2, a0, a2 1988; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 1989; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 1990; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1991; RV64ZVE32F-NEXT: vse32.v v12, (a2) 1992; RV64ZVE32F-NEXT: andi a1, a1, -128 1993; RV64ZVE32F-NEXT: beqz a1, .LBB29_11 1994; RV64ZVE32F-NEXT: .LBB29_16: # %cond.store13 1995; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1996; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 1997; RV64ZVE32F-NEXT: vmv.x.s a1, v10 1998; RV64ZVE32F-NEXT: slli a1, a1, 2 1999; RV64ZVE32F-NEXT: add a0, a0, a1 2000; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2001; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 2002; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2003; RV64ZVE32F-NEXT: vse32.v v8, (a0) 2004; RV64ZVE32F-NEXT: ret 2005 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs 2006 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m) 2007 ret void 2008} 2009 2010define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 2011; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8i32: 2012; RV32: # %bb.0: 2013; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2014; RV32-NEXT: vsext.vf4 v12, v10 2015; RV32-NEXT: vsll.vi v10, v12, 2 2016; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 2017; RV32-NEXT: ret 2018; 2019; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8i32: 2020; RV64V: # %bb.0: 2021; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 2022; RV64V-NEXT: vsext.vf8 v12, v10 2023; RV64V-NEXT: vsll.vi v12, v12, 2 2024; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2025; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 2026; RV64V-NEXT: ret 2027; 2028; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i32: 2029; RV64ZVE32F: # %bb.0: 2030; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2031; RV64ZVE32F-NEXT: vmv.x.s a1, v0 2032; RV64ZVE32F-NEXT: andi a2, a1, 1 2033; RV64ZVE32F-NEXT: beqz a2, .LBB30_2 2034; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 2035; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2036; RV64ZVE32F-NEXT: slli a2, a2, 2 2037; RV64ZVE32F-NEXT: add a2, a0, a2 2038; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2039; RV64ZVE32F-NEXT: vse32.v v8, (a2) 2040; RV64ZVE32F-NEXT: .LBB30_2: # %else 2041; RV64ZVE32F-NEXT: andi a2, a1, 2 2042; RV64ZVE32F-NEXT: beqz a2, .LBB30_4 2043; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 2044; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2045; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 2046; RV64ZVE32F-NEXT: vmv.x.s a2, v11 2047; RV64ZVE32F-NEXT: slli a2, a2, 2 2048; RV64ZVE32F-NEXT: add a2, a0, a2 2049; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2050; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 2051; RV64ZVE32F-NEXT: vse32.v v11, (a2) 2052; RV64ZVE32F-NEXT: .LBB30_4: # %else2 2053; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 2054; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 2055; RV64ZVE32F-NEXT: andi a2, a1, 4 2056; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2057; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 2058; RV64ZVE32F-NEXT: bnez a2, .LBB30_12 2059; RV64ZVE32F-NEXT: # %bb.5: # %else4 2060; RV64ZVE32F-NEXT: andi a2, a1, 8 2061; RV64ZVE32F-NEXT: bnez a2, .LBB30_13 2062; RV64ZVE32F-NEXT: .LBB30_6: # %else6 2063; RV64ZVE32F-NEXT: andi a2, a1, 16 2064; RV64ZVE32F-NEXT: bnez a2, .LBB30_14 2065; RV64ZVE32F-NEXT: .LBB30_7: # %else8 2066; RV64ZVE32F-NEXT: andi a2, a1, 32 2067; RV64ZVE32F-NEXT: beqz a2, .LBB30_9 2068; RV64ZVE32F-NEXT: .LBB30_8: # %cond.store9 2069; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2070; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 2071; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2072; RV64ZVE32F-NEXT: slli a2, a2, 2 2073; RV64ZVE32F-NEXT: add a2, a0, a2 2074; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2075; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 2076; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2077; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2078; RV64ZVE32F-NEXT: .LBB30_9: # %else10 2079; RV64ZVE32F-NEXT: andi a2, a1, 64 2080; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2081; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 2082; RV64ZVE32F-NEXT: bnez a2, .LBB30_15 2083; RV64ZVE32F-NEXT: # %bb.10: # %else12 2084; RV64ZVE32F-NEXT: andi a1, a1, -128 2085; RV64ZVE32F-NEXT: bnez a1, .LBB30_16 2086; RV64ZVE32F-NEXT: .LBB30_11: # %else14 2087; RV64ZVE32F-NEXT: ret 2088; RV64ZVE32F-NEXT: .LBB30_12: # %cond.store3 2089; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2090; RV64ZVE32F-NEXT: slli a2, a2, 2 2091; RV64ZVE32F-NEXT: add a2, a0, a2 2092; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2093; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 2094; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2095; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2096; RV64ZVE32F-NEXT: andi a2, a1, 8 2097; RV64ZVE32F-NEXT: beqz a2, .LBB30_6 2098; RV64ZVE32F-NEXT: .LBB30_13: # %cond.store5 2099; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2100; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 2101; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2102; RV64ZVE32F-NEXT: slli a2, a2, 2 2103; RV64ZVE32F-NEXT: add a2, a0, a2 2104; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2105; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 2106; RV64ZVE32F-NEXT: vse32.v v10, (a2) 2107; RV64ZVE32F-NEXT: andi a2, a1, 16 2108; RV64ZVE32F-NEXT: beqz a2, .LBB30_7 2109; RV64ZVE32F-NEXT: .LBB30_14: # %cond.store7 2110; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2111; RV64ZVE32F-NEXT: vmv.x.s a2, v11 2112; RV64ZVE32F-NEXT: slli a2, a2, 2 2113; RV64ZVE32F-NEXT: add a2, a0, a2 2114; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2115; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 2116; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2117; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2118; RV64ZVE32F-NEXT: andi a2, a1, 32 2119; RV64ZVE32F-NEXT: bnez a2, .LBB30_8 2120; RV64ZVE32F-NEXT: j .LBB30_9 2121; RV64ZVE32F-NEXT: .LBB30_15: # %cond.store11 2122; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2123; RV64ZVE32F-NEXT: slli a2, a2, 2 2124; RV64ZVE32F-NEXT: add a2, a0, a2 2125; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2126; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 2127; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2128; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2129; RV64ZVE32F-NEXT: andi a1, a1, -128 2130; RV64ZVE32F-NEXT: beqz a1, .LBB30_11 2131; RV64ZVE32F-NEXT: .LBB30_16: # %cond.store13 2132; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2133; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 2134; RV64ZVE32F-NEXT: vmv.x.s a1, v10 2135; RV64ZVE32F-NEXT: slli a1, a1, 2 2136; RV64ZVE32F-NEXT: add a0, a0, a1 2137; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2138; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 2139; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2140; RV64ZVE32F-NEXT: vse32.v v8, (a0) 2141; RV64ZVE32F-NEXT: ret 2142 %eidxs = sext <8 x i8> %idxs to <8 x i32> 2143 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs 2144 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m) 2145 ret void 2146} 2147 2148define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 2149; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i32: 2150; RV32: # %bb.0: 2151; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2152; RV32-NEXT: vzext.vf2 v11, v10 2153; RV32-NEXT: vsll.vi v10, v11, 2 2154; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2155; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t 2156; RV32-NEXT: ret 2157; 2158; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8i32: 2159; RV64V: # %bb.0: 2160; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2161; RV64V-NEXT: vzext.vf2 v11, v10 2162; RV64V-NEXT: vsll.vi v10, v11, 2 2163; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2164; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t 2165; RV64V-NEXT: ret 2166; 2167; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i32: 2168; RV64ZVE32F: # %bb.0: 2169; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2170; RV64ZVE32F-NEXT: vmv.x.s a1, v0 2171; RV64ZVE32F-NEXT: andi a2, a1, 1 2172; RV64ZVE32F-NEXT: beqz a2, .LBB31_2 2173; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 2174; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2175; RV64ZVE32F-NEXT: andi a2, a2, 255 2176; RV64ZVE32F-NEXT: slli a2, a2, 2 2177; RV64ZVE32F-NEXT: add a2, a0, a2 2178; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2179; RV64ZVE32F-NEXT: vse32.v v8, (a2) 2180; RV64ZVE32F-NEXT: .LBB31_2: # %else 2181; RV64ZVE32F-NEXT: andi a2, a1, 2 2182; RV64ZVE32F-NEXT: beqz a2, .LBB31_4 2183; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 2184; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2185; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 2186; RV64ZVE32F-NEXT: vmv.x.s a2, v11 2187; RV64ZVE32F-NEXT: andi a2, a2, 255 2188; RV64ZVE32F-NEXT: slli a2, a2, 2 2189; RV64ZVE32F-NEXT: add a2, a0, a2 2190; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2191; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 2192; RV64ZVE32F-NEXT: vse32.v v11, (a2) 2193; RV64ZVE32F-NEXT: .LBB31_4: # %else2 2194; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 2195; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 2196; RV64ZVE32F-NEXT: andi a2, a1, 4 2197; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2198; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 2199; RV64ZVE32F-NEXT: bnez a2, .LBB31_12 2200; RV64ZVE32F-NEXT: # %bb.5: # %else4 2201; RV64ZVE32F-NEXT: andi a2, a1, 8 2202; RV64ZVE32F-NEXT: bnez a2, .LBB31_13 2203; RV64ZVE32F-NEXT: .LBB31_6: # %else6 2204; RV64ZVE32F-NEXT: andi a2, a1, 16 2205; RV64ZVE32F-NEXT: bnez a2, .LBB31_14 2206; RV64ZVE32F-NEXT: .LBB31_7: # %else8 2207; RV64ZVE32F-NEXT: andi a2, a1, 32 2208; RV64ZVE32F-NEXT: beqz a2, .LBB31_9 2209; RV64ZVE32F-NEXT: .LBB31_8: # %cond.store9 2210; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2211; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 2212; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2213; RV64ZVE32F-NEXT: andi a2, a2, 255 2214; RV64ZVE32F-NEXT: slli a2, a2, 2 2215; RV64ZVE32F-NEXT: add a2, a0, a2 2216; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2217; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 2218; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2219; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2220; RV64ZVE32F-NEXT: .LBB31_9: # %else10 2221; RV64ZVE32F-NEXT: andi a2, a1, 64 2222; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2223; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 2224; RV64ZVE32F-NEXT: bnez a2, .LBB31_15 2225; RV64ZVE32F-NEXT: # %bb.10: # %else12 2226; RV64ZVE32F-NEXT: andi a1, a1, -128 2227; RV64ZVE32F-NEXT: bnez a1, .LBB31_16 2228; RV64ZVE32F-NEXT: .LBB31_11: # %else14 2229; RV64ZVE32F-NEXT: ret 2230; RV64ZVE32F-NEXT: .LBB31_12: # %cond.store3 2231; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2232; RV64ZVE32F-NEXT: andi a2, a2, 255 2233; RV64ZVE32F-NEXT: slli a2, a2, 2 2234; RV64ZVE32F-NEXT: add a2, a0, a2 2235; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2236; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 2237; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2238; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2239; RV64ZVE32F-NEXT: andi a2, a1, 8 2240; RV64ZVE32F-NEXT: beqz a2, .LBB31_6 2241; RV64ZVE32F-NEXT: .LBB31_13: # %cond.store5 2242; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2243; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 2244; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2245; RV64ZVE32F-NEXT: andi a2, a2, 255 2246; RV64ZVE32F-NEXT: slli a2, a2, 2 2247; RV64ZVE32F-NEXT: add a2, a0, a2 2248; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2249; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 2250; RV64ZVE32F-NEXT: vse32.v v10, (a2) 2251; RV64ZVE32F-NEXT: andi a2, a1, 16 2252; RV64ZVE32F-NEXT: beqz a2, .LBB31_7 2253; RV64ZVE32F-NEXT: .LBB31_14: # %cond.store7 2254; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2255; RV64ZVE32F-NEXT: vmv.x.s a2, v11 2256; RV64ZVE32F-NEXT: andi a2, a2, 255 2257; RV64ZVE32F-NEXT: slli a2, a2, 2 2258; RV64ZVE32F-NEXT: add a2, a0, a2 2259; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2260; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 2261; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2262; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2263; RV64ZVE32F-NEXT: andi a2, a1, 32 2264; RV64ZVE32F-NEXT: bnez a2, .LBB31_8 2265; RV64ZVE32F-NEXT: j .LBB31_9 2266; RV64ZVE32F-NEXT: .LBB31_15: # %cond.store11 2267; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2268; RV64ZVE32F-NEXT: andi a2, a2, 255 2269; RV64ZVE32F-NEXT: slli a2, a2, 2 2270; RV64ZVE32F-NEXT: add a2, a0, a2 2271; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2272; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 2273; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2274; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2275; RV64ZVE32F-NEXT: andi a1, a1, -128 2276; RV64ZVE32F-NEXT: beqz a1, .LBB31_11 2277; RV64ZVE32F-NEXT: .LBB31_16: # %cond.store13 2278; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2279; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 2280; RV64ZVE32F-NEXT: vmv.x.s a1, v10 2281; RV64ZVE32F-NEXT: andi a1, a1, 255 2282; RV64ZVE32F-NEXT: slli a1, a1, 2 2283; RV64ZVE32F-NEXT: add a0, a0, a1 2284; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2285; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 2286; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2287; RV64ZVE32F-NEXT: vse32.v v8, (a0) 2288; RV64ZVE32F-NEXT: ret 2289 %eidxs = zext <8 x i8> %idxs to <8 x i32> 2290 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs 2291 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m) 2292 ret void 2293} 2294 2295define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { 2296; RV32-LABEL: mscatter_baseidx_v8i16_v8i32: 2297; RV32: # %bb.0: 2298; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2299; RV32-NEXT: vsext.vf2 v12, v10 2300; RV32-NEXT: vsll.vi v10, v12, 2 2301; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 2302; RV32-NEXT: ret 2303; 2304; RV64V-LABEL: mscatter_baseidx_v8i16_v8i32: 2305; RV64V: # %bb.0: 2306; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 2307; RV64V-NEXT: vsext.vf4 v12, v10 2308; RV64V-NEXT: vsll.vi v12, v12, 2 2309; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2310; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 2311; RV64V-NEXT: ret 2312; 2313; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i32: 2314; RV64ZVE32F: # %bb.0: 2315; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2316; RV64ZVE32F-NEXT: vmv.x.s a1, v0 2317; RV64ZVE32F-NEXT: andi a2, a1, 1 2318; RV64ZVE32F-NEXT: beqz a2, .LBB32_2 2319; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 2320; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 2321; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2322; RV64ZVE32F-NEXT: slli a2, a2, 2 2323; RV64ZVE32F-NEXT: add a2, a0, a2 2324; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2325; RV64ZVE32F-NEXT: vse32.v v8, (a2) 2326; RV64ZVE32F-NEXT: .LBB32_2: # %else 2327; RV64ZVE32F-NEXT: andi a2, a1, 2 2328; RV64ZVE32F-NEXT: beqz a2, .LBB32_4 2329; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 2330; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 2331; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 2332; RV64ZVE32F-NEXT: vmv.x.s a2, v11 2333; RV64ZVE32F-NEXT: slli a2, a2, 2 2334; RV64ZVE32F-NEXT: add a2, a0, a2 2335; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2336; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 2337; RV64ZVE32F-NEXT: vse32.v v11, (a2) 2338; RV64ZVE32F-NEXT: .LBB32_4: # %else2 2339; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 2340; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 2341; RV64ZVE32F-NEXT: andi a2, a1, 4 2342; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 2343; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 2344; RV64ZVE32F-NEXT: bnez a2, .LBB32_12 2345; RV64ZVE32F-NEXT: # %bb.5: # %else4 2346; RV64ZVE32F-NEXT: andi a2, a1, 8 2347; RV64ZVE32F-NEXT: bnez a2, .LBB32_13 2348; RV64ZVE32F-NEXT: .LBB32_6: # %else6 2349; RV64ZVE32F-NEXT: andi a2, a1, 16 2350; RV64ZVE32F-NEXT: bnez a2, .LBB32_14 2351; RV64ZVE32F-NEXT: .LBB32_7: # %else8 2352; RV64ZVE32F-NEXT: andi a2, a1, 32 2353; RV64ZVE32F-NEXT: beqz a2, .LBB32_9 2354; RV64ZVE32F-NEXT: .LBB32_8: # %cond.store9 2355; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 2356; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 2357; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2358; RV64ZVE32F-NEXT: slli a2, a2, 2 2359; RV64ZVE32F-NEXT: add a2, a0, a2 2360; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2361; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 2362; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2363; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2364; RV64ZVE32F-NEXT: .LBB32_9: # %else10 2365; RV64ZVE32F-NEXT: andi a2, a1, 64 2366; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 2367; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 2368; RV64ZVE32F-NEXT: bnez a2, .LBB32_15 2369; RV64ZVE32F-NEXT: # %bb.10: # %else12 2370; RV64ZVE32F-NEXT: andi a1, a1, -128 2371; RV64ZVE32F-NEXT: bnez a1, .LBB32_16 2372; RV64ZVE32F-NEXT: .LBB32_11: # %else14 2373; RV64ZVE32F-NEXT: ret 2374; RV64ZVE32F-NEXT: .LBB32_12: # %cond.store3 2375; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2376; RV64ZVE32F-NEXT: slli a2, a2, 2 2377; RV64ZVE32F-NEXT: add a2, a0, a2 2378; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2379; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 2380; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2381; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2382; RV64ZVE32F-NEXT: andi a2, a1, 8 2383; RV64ZVE32F-NEXT: beqz a2, .LBB32_6 2384; RV64ZVE32F-NEXT: .LBB32_13: # %cond.store5 2385; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 2386; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 2387; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2388; RV64ZVE32F-NEXT: slli a2, a2, 2 2389; RV64ZVE32F-NEXT: add a2, a0, a2 2390; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2391; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 2392; RV64ZVE32F-NEXT: vse32.v v10, (a2) 2393; RV64ZVE32F-NEXT: andi a2, a1, 16 2394; RV64ZVE32F-NEXT: beqz a2, .LBB32_7 2395; RV64ZVE32F-NEXT: .LBB32_14: # %cond.store7 2396; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 2397; RV64ZVE32F-NEXT: vmv.x.s a2, v11 2398; RV64ZVE32F-NEXT: slli a2, a2, 2 2399; RV64ZVE32F-NEXT: add a2, a0, a2 2400; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2401; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 2402; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2403; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2404; RV64ZVE32F-NEXT: andi a2, a1, 32 2405; RV64ZVE32F-NEXT: bnez a2, .LBB32_8 2406; RV64ZVE32F-NEXT: j .LBB32_9 2407; RV64ZVE32F-NEXT: .LBB32_15: # %cond.store11 2408; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2409; RV64ZVE32F-NEXT: slli a2, a2, 2 2410; RV64ZVE32F-NEXT: add a2, a0, a2 2411; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2412; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 2413; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2414; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2415; RV64ZVE32F-NEXT: andi a1, a1, -128 2416; RV64ZVE32F-NEXT: beqz a1, .LBB32_11 2417; RV64ZVE32F-NEXT: .LBB32_16: # %cond.store13 2418; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 2419; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 2420; RV64ZVE32F-NEXT: vmv.x.s a1, v10 2421; RV64ZVE32F-NEXT: slli a1, a1, 2 2422; RV64ZVE32F-NEXT: add a0, a0, a1 2423; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2424; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 2425; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2426; RV64ZVE32F-NEXT: vse32.v v8, (a0) 2427; RV64ZVE32F-NEXT: ret 2428 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs 2429 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m) 2430 ret void 2431} 2432 2433define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { 2434; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8i32: 2435; RV32: # %bb.0: 2436; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2437; RV32-NEXT: vsext.vf2 v12, v10 2438; RV32-NEXT: vsll.vi v10, v12, 2 2439; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 2440; RV32-NEXT: ret 2441; 2442; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8i32: 2443; RV64V: # %bb.0: 2444; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 2445; RV64V-NEXT: vsext.vf4 v12, v10 2446; RV64V-NEXT: vsll.vi v12, v12, 2 2447; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2448; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 2449; RV64V-NEXT: ret 2450; 2451; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i32: 2452; RV64ZVE32F: # %bb.0: 2453; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2454; RV64ZVE32F-NEXT: vmv.x.s a1, v0 2455; RV64ZVE32F-NEXT: andi a2, a1, 1 2456; RV64ZVE32F-NEXT: beqz a2, .LBB33_2 2457; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 2458; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 2459; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2460; RV64ZVE32F-NEXT: slli a2, a2, 2 2461; RV64ZVE32F-NEXT: add a2, a0, a2 2462; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2463; RV64ZVE32F-NEXT: vse32.v v8, (a2) 2464; RV64ZVE32F-NEXT: .LBB33_2: # %else 2465; RV64ZVE32F-NEXT: andi a2, a1, 2 2466; RV64ZVE32F-NEXT: beqz a2, .LBB33_4 2467; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 2468; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 2469; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 2470; RV64ZVE32F-NEXT: vmv.x.s a2, v11 2471; RV64ZVE32F-NEXT: slli a2, a2, 2 2472; RV64ZVE32F-NEXT: add a2, a0, a2 2473; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2474; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 2475; RV64ZVE32F-NEXT: vse32.v v11, (a2) 2476; RV64ZVE32F-NEXT: .LBB33_4: # %else2 2477; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 2478; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 2479; RV64ZVE32F-NEXT: andi a2, a1, 4 2480; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 2481; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 2482; RV64ZVE32F-NEXT: bnez a2, .LBB33_12 2483; RV64ZVE32F-NEXT: # %bb.5: # %else4 2484; RV64ZVE32F-NEXT: andi a2, a1, 8 2485; RV64ZVE32F-NEXT: bnez a2, .LBB33_13 2486; RV64ZVE32F-NEXT: .LBB33_6: # %else6 2487; RV64ZVE32F-NEXT: andi a2, a1, 16 2488; RV64ZVE32F-NEXT: bnez a2, .LBB33_14 2489; RV64ZVE32F-NEXT: .LBB33_7: # %else8 2490; RV64ZVE32F-NEXT: andi a2, a1, 32 2491; RV64ZVE32F-NEXT: beqz a2, .LBB33_9 2492; RV64ZVE32F-NEXT: .LBB33_8: # %cond.store9 2493; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 2494; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 2495; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2496; RV64ZVE32F-NEXT: slli a2, a2, 2 2497; RV64ZVE32F-NEXT: add a2, a0, a2 2498; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2499; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 2500; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2501; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2502; RV64ZVE32F-NEXT: .LBB33_9: # %else10 2503; RV64ZVE32F-NEXT: andi a2, a1, 64 2504; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 2505; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 2506; RV64ZVE32F-NEXT: bnez a2, .LBB33_15 2507; RV64ZVE32F-NEXT: # %bb.10: # %else12 2508; RV64ZVE32F-NEXT: andi a1, a1, -128 2509; RV64ZVE32F-NEXT: bnez a1, .LBB33_16 2510; RV64ZVE32F-NEXT: .LBB33_11: # %else14 2511; RV64ZVE32F-NEXT: ret 2512; RV64ZVE32F-NEXT: .LBB33_12: # %cond.store3 2513; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2514; RV64ZVE32F-NEXT: slli a2, a2, 2 2515; RV64ZVE32F-NEXT: add a2, a0, a2 2516; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2517; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 2518; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2519; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2520; RV64ZVE32F-NEXT: andi a2, a1, 8 2521; RV64ZVE32F-NEXT: beqz a2, .LBB33_6 2522; RV64ZVE32F-NEXT: .LBB33_13: # %cond.store5 2523; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 2524; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 2525; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2526; RV64ZVE32F-NEXT: slli a2, a2, 2 2527; RV64ZVE32F-NEXT: add a2, a0, a2 2528; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2529; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 2530; RV64ZVE32F-NEXT: vse32.v v10, (a2) 2531; RV64ZVE32F-NEXT: andi a2, a1, 16 2532; RV64ZVE32F-NEXT: beqz a2, .LBB33_7 2533; RV64ZVE32F-NEXT: .LBB33_14: # %cond.store7 2534; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 2535; RV64ZVE32F-NEXT: vmv.x.s a2, v11 2536; RV64ZVE32F-NEXT: slli a2, a2, 2 2537; RV64ZVE32F-NEXT: add a2, a0, a2 2538; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2539; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 2540; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2541; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2542; RV64ZVE32F-NEXT: andi a2, a1, 32 2543; RV64ZVE32F-NEXT: bnez a2, .LBB33_8 2544; RV64ZVE32F-NEXT: j .LBB33_9 2545; RV64ZVE32F-NEXT: .LBB33_15: # %cond.store11 2546; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2547; RV64ZVE32F-NEXT: slli a2, a2, 2 2548; RV64ZVE32F-NEXT: add a2, a0, a2 2549; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2550; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 2551; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2552; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2553; RV64ZVE32F-NEXT: andi a1, a1, -128 2554; RV64ZVE32F-NEXT: beqz a1, .LBB33_11 2555; RV64ZVE32F-NEXT: .LBB33_16: # %cond.store13 2556; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 2557; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 2558; RV64ZVE32F-NEXT: vmv.x.s a1, v10 2559; RV64ZVE32F-NEXT: slli a1, a1, 2 2560; RV64ZVE32F-NEXT: add a0, a0, a1 2561; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2562; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 2563; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2564; RV64ZVE32F-NEXT: vse32.v v8, (a0) 2565; RV64ZVE32F-NEXT: ret 2566 %eidxs = sext <8 x i16> %idxs to <8 x i32> 2567 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs 2568 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m) 2569 ret void 2570} 2571 2572define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { 2573; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8i32: 2574; RV32: # %bb.0: 2575; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2576; RV32-NEXT: vzext.vf2 v12, v10 2577; RV32-NEXT: vsll.vi v10, v12, 2 2578; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 2579; RV32-NEXT: ret 2580; 2581; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8i32: 2582; RV64V: # %bb.0: 2583; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2584; RV64V-NEXT: vzext.vf2 v12, v10 2585; RV64V-NEXT: vsll.vi v10, v12, 2 2586; RV64V-NEXT: vsoxei32.v v8, (a0), v10, v0.t 2587; RV64V-NEXT: ret 2588; 2589; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i32: 2590; RV64ZVE32F: # %bb.0: 2591; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2592; RV64ZVE32F-NEXT: vmv.x.s a1, v0 2593; RV64ZVE32F-NEXT: andi a2, a1, 1 2594; RV64ZVE32F-NEXT: beqz a2, .LBB34_2 2595; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 2596; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 2597; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2598; RV64ZVE32F-NEXT: slli a2, a2, 48 2599; RV64ZVE32F-NEXT: srli a2, a2, 46 2600; RV64ZVE32F-NEXT: add a2, a0, a2 2601; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2602; RV64ZVE32F-NEXT: vse32.v v8, (a2) 2603; RV64ZVE32F-NEXT: .LBB34_2: # %else 2604; RV64ZVE32F-NEXT: andi a2, a1, 2 2605; RV64ZVE32F-NEXT: beqz a2, .LBB34_4 2606; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 2607; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 2608; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 2609; RV64ZVE32F-NEXT: vmv.x.s a2, v11 2610; RV64ZVE32F-NEXT: slli a2, a2, 48 2611; RV64ZVE32F-NEXT: srli a2, a2, 46 2612; RV64ZVE32F-NEXT: add a2, a0, a2 2613; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2614; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 2615; RV64ZVE32F-NEXT: vse32.v v11, (a2) 2616; RV64ZVE32F-NEXT: .LBB34_4: # %else2 2617; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 2618; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 2619; RV64ZVE32F-NEXT: andi a2, a1, 4 2620; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 2621; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 2622; RV64ZVE32F-NEXT: bnez a2, .LBB34_12 2623; RV64ZVE32F-NEXT: # %bb.5: # %else4 2624; RV64ZVE32F-NEXT: andi a2, a1, 8 2625; RV64ZVE32F-NEXT: bnez a2, .LBB34_13 2626; RV64ZVE32F-NEXT: .LBB34_6: # %else6 2627; RV64ZVE32F-NEXT: andi a2, a1, 16 2628; RV64ZVE32F-NEXT: bnez a2, .LBB34_14 2629; RV64ZVE32F-NEXT: .LBB34_7: # %else8 2630; RV64ZVE32F-NEXT: andi a2, a1, 32 2631; RV64ZVE32F-NEXT: beqz a2, .LBB34_9 2632; RV64ZVE32F-NEXT: .LBB34_8: # %cond.store9 2633; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 2634; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 2635; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2636; RV64ZVE32F-NEXT: slli a2, a2, 48 2637; RV64ZVE32F-NEXT: srli a2, a2, 46 2638; RV64ZVE32F-NEXT: add a2, a0, a2 2639; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2640; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 2641; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2642; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2643; RV64ZVE32F-NEXT: .LBB34_9: # %else10 2644; RV64ZVE32F-NEXT: andi a2, a1, 64 2645; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 2646; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 2647; RV64ZVE32F-NEXT: bnez a2, .LBB34_15 2648; RV64ZVE32F-NEXT: # %bb.10: # %else12 2649; RV64ZVE32F-NEXT: andi a1, a1, -128 2650; RV64ZVE32F-NEXT: bnez a1, .LBB34_16 2651; RV64ZVE32F-NEXT: .LBB34_11: # %else14 2652; RV64ZVE32F-NEXT: ret 2653; RV64ZVE32F-NEXT: .LBB34_12: # %cond.store3 2654; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2655; RV64ZVE32F-NEXT: slli a2, a2, 48 2656; RV64ZVE32F-NEXT: srli a2, a2, 46 2657; RV64ZVE32F-NEXT: add a2, a0, a2 2658; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2659; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 2660; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2661; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2662; RV64ZVE32F-NEXT: andi a2, a1, 8 2663; RV64ZVE32F-NEXT: beqz a2, .LBB34_6 2664; RV64ZVE32F-NEXT: .LBB34_13: # %cond.store5 2665; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 2666; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 2667; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2668; RV64ZVE32F-NEXT: slli a2, a2, 48 2669; RV64ZVE32F-NEXT: srli a2, a2, 46 2670; RV64ZVE32F-NEXT: add a2, a0, a2 2671; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2672; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 2673; RV64ZVE32F-NEXT: vse32.v v10, (a2) 2674; RV64ZVE32F-NEXT: andi a2, a1, 16 2675; RV64ZVE32F-NEXT: beqz a2, .LBB34_7 2676; RV64ZVE32F-NEXT: .LBB34_14: # %cond.store7 2677; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 2678; RV64ZVE32F-NEXT: vmv.x.s a2, v11 2679; RV64ZVE32F-NEXT: slli a2, a2, 48 2680; RV64ZVE32F-NEXT: srli a2, a2, 46 2681; RV64ZVE32F-NEXT: add a2, a0, a2 2682; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2683; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 2684; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2685; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2686; RV64ZVE32F-NEXT: andi a2, a1, 32 2687; RV64ZVE32F-NEXT: bnez a2, .LBB34_8 2688; RV64ZVE32F-NEXT: j .LBB34_9 2689; RV64ZVE32F-NEXT: .LBB34_15: # %cond.store11 2690; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2691; RV64ZVE32F-NEXT: slli a2, a2, 48 2692; RV64ZVE32F-NEXT: srli a2, a2, 46 2693; RV64ZVE32F-NEXT: add a2, a0, a2 2694; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2695; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 2696; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2697; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2698; RV64ZVE32F-NEXT: andi a1, a1, -128 2699; RV64ZVE32F-NEXT: beqz a1, .LBB34_11 2700; RV64ZVE32F-NEXT: .LBB34_16: # %cond.store13 2701; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 2702; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 2703; RV64ZVE32F-NEXT: vmv.x.s a1, v10 2704; RV64ZVE32F-NEXT: slli a1, a1, 48 2705; RV64ZVE32F-NEXT: srli a1, a1, 46 2706; RV64ZVE32F-NEXT: add a0, a0, a1 2707; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2708; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 2709; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2710; RV64ZVE32F-NEXT: vse32.v v8, (a0) 2711; RV64ZVE32F-NEXT: ret 2712 %eidxs = zext <8 x i16> %idxs to <8 x i32> 2713 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs 2714 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m) 2715 ret void 2716} 2717 2718define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) { 2719; RV32-LABEL: mscatter_baseidx_v8i32: 2720; RV32: # %bb.0: 2721; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2722; RV32-NEXT: vsll.vi v10, v10, 2 2723; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 2724; RV32-NEXT: ret 2725; 2726; RV64V-LABEL: mscatter_baseidx_v8i32: 2727; RV64V: # %bb.0: 2728; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 2729; RV64V-NEXT: vsext.vf2 v12, v10 2730; RV64V-NEXT: vsll.vi v12, v12, 2 2731; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2732; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 2733; RV64V-NEXT: ret 2734; 2735; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32: 2736; RV64ZVE32F: # %bb.0: 2737; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2738; RV64ZVE32F-NEXT: vmv.x.s a1, v0 2739; RV64ZVE32F-NEXT: andi a2, a1, 1 2740; RV64ZVE32F-NEXT: beqz a2, .LBB35_2 2741; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 2742; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2743; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2744; RV64ZVE32F-NEXT: slli a2, a2, 2 2745; RV64ZVE32F-NEXT: add a2, a0, a2 2746; RV64ZVE32F-NEXT: vse32.v v8, (a2) 2747; RV64ZVE32F-NEXT: .LBB35_2: # %else 2748; RV64ZVE32F-NEXT: andi a2, a1, 2 2749; RV64ZVE32F-NEXT: beqz a2, .LBB35_4 2750; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 2751; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2752; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1 2753; RV64ZVE32F-NEXT: vmv.x.s a2, v12 2754; RV64ZVE32F-NEXT: slli a2, a2, 2 2755; RV64ZVE32F-NEXT: add a2, a0, a2 2756; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 2757; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2758; RV64ZVE32F-NEXT: .LBB35_4: # %else2 2759; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma 2760; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4 2761; RV64ZVE32F-NEXT: andi a2, a1, 4 2762; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 2763; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 2764; RV64ZVE32F-NEXT: bnez a2, .LBB35_12 2765; RV64ZVE32F-NEXT: # %bb.5: # %else4 2766; RV64ZVE32F-NEXT: andi a2, a1, 8 2767; RV64ZVE32F-NEXT: bnez a2, .LBB35_13 2768; RV64ZVE32F-NEXT: .LBB35_6: # %else6 2769; RV64ZVE32F-NEXT: andi a2, a1, 16 2770; RV64ZVE32F-NEXT: bnez a2, .LBB35_14 2771; RV64ZVE32F-NEXT: .LBB35_7: # %else8 2772; RV64ZVE32F-NEXT: andi a2, a1, 32 2773; RV64ZVE32F-NEXT: beqz a2, .LBB35_9 2774; RV64ZVE32F-NEXT: .LBB35_8: # %cond.store9 2775; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2776; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1 2777; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2778; RV64ZVE32F-NEXT: slli a2, a2, 2 2779; RV64ZVE32F-NEXT: add a2, a0, a2 2780; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2781; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 2782; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2783; RV64ZVE32F-NEXT: vse32.v v10, (a2) 2784; RV64ZVE32F-NEXT: .LBB35_9: # %else10 2785; RV64ZVE32F-NEXT: andi a2, a1, 64 2786; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 2787; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2 2788; RV64ZVE32F-NEXT: bnez a2, .LBB35_15 2789; RV64ZVE32F-NEXT: # %bb.10: # %else12 2790; RV64ZVE32F-NEXT: andi a1, a1, -128 2791; RV64ZVE32F-NEXT: bnez a1, .LBB35_16 2792; RV64ZVE32F-NEXT: .LBB35_11: # %else14 2793; RV64ZVE32F-NEXT: ret 2794; RV64ZVE32F-NEXT: .LBB35_12: # %cond.store3 2795; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2796; RV64ZVE32F-NEXT: slli a2, a2, 2 2797; RV64ZVE32F-NEXT: add a2, a0, a2 2798; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 2799; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2800; RV64ZVE32F-NEXT: vse32.v v11, (a2) 2801; RV64ZVE32F-NEXT: andi a2, a1, 8 2802; RV64ZVE32F-NEXT: beqz a2, .LBB35_6 2803; RV64ZVE32F-NEXT: .LBB35_13: # %cond.store5 2804; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2805; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 2806; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2807; RV64ZVE32F-NEXT: slli a2, a2, 2 2808; RV64ZVE32F-NEXT: add a2, a0, a2 2809; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 2810; RV64ZVE32F-NEXT: vse32.v v10, (a2) 2811; RV64ZVE32F-NEXT: andi a2, a1, 16 2812; RV64ZVE32F-NEXT: beqz a2, .LBB35_7 2813; RV64ZVE32F-NEXT: .LBB35_14: # %cond.store7 2814; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2815; RV64ZVE32F-NEXT: vmv.x.s a2, v12 2816; RV64ZVE32F-NEXT: slli a2, a2, 2 2817; RV64ZVE32F-NEXT: add a2, a0, a2 2818; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 2819; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2820; RV64ZVE32F-NEXT: vse32.v v10, (a2) 2821; RV64ZVE32F-NEXT: andi a2, a1, 32 2822; RV64ZVE32F-NEXT: bnez a2, .LBB35_8 2823; RV64ZVE32F-NEXT: j .LBB35_9 2824; RV64ZVE32F-NEXT: .LBB35_15: # %cond.store11 2825; RV64ZVE32F-NEXT: vmv.x.s a2, v10 2826; RV64ZVE32F-NEXT: slli a2, a2, 2 2827; RV64ZVE32F-NEXT: add a2, a0, a2 2828; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2829; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 2830; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2831; RV64ZVE32F-NEXT: vse32.v v12, (a2) 2832; RV64ZVE32F-NEXT: andi a1, a1, -128 2833; RV64ZVE32F-NEXT: beqz a1, .LBB35_11 2834; RV64ZVE32F-NEXT: .LBB35_16: # %cond.store13 2835; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2836; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 2837; RV64ZVE32F-NEXT: vmv.x.s a1, v10 2838; RV64ZVE32F-NEXT: slli a1, a1, 2 2839; RV64ZVE32F-NEXT: add a0, a0, a1 2840; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 2841; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 2842; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2843; RV64ZVE32F-NEXT: vse32.v v8, (a0) 2844; RV64ZVE32F-NEXT: ret 2845 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs 2846 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m) 2847 ret void 2848} 2849 2850declare void @llvm.masked.scatter.v1i64.v1p0(<1 x i64>, <1 x ptr>, i32, <1 x i1>) 2851 2852define void @mscatter_v1i64(<1 x i64> %val, <1 x ptr> %ptrs, <1 x i1> %m) { 2853; RV32V-LABEL: mscatter_v1i64: 2854; RV32V: # %bb.0: 2855; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, ma 2856; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 2857; RV32V-NEXT: ret 2858; 2859; RV64V-LABEL: mscatter_v1i64: 2860; RV64V: # %bb.0: 2861; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma 2862; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 2863; RV64V-NEXT: ret 2864; 2865; RV32ZVE32F-LABEL: mscatter_v1i64: 2866; RV32ZVE32F: # %bb.0: 2867; RV32ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma 2868; RV32ZVE32F-NEXT: vfirst.m a2, v0 2869; RV32ZVE32F-NEXT: bnez a2, .LBB36_2 2870; RV32ZVE32F-NEXT: # %bb.1: # %cond.store 2871; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2872; RV32ZVE32F-NEXT: vmv.x.s a2, v8 2873; RV32ZVE32F-NEXT: sw a0, 0(a2) 2874; RV32ZVE32F-NEXT: sw a1, 4(a2) 2875; RV32ZVE32F-NEXT: .LBB36_2: # %else 2876; RV32ZVE32F-NEXT: ret 2877; 2878; RV64ZVE32F-LABEL: mscatter_v1i64: 2879; RV64ZVE32F: # %bb.0: 2880; RV64ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma 2881; RV64ZVE32F-NEXT: vfirst.m a2, v0 2882; RV64ZVE32F-NEXT: bnez a2, .LBB36_2 2883; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 2884; RV64ZVE32F-NEXT: sd a0, 0(a1) 2885; RV64ZVE32F-NEXT: .LBB36_2: # %else 2886; RV64ZVE32F-NEXT: ret 2887 call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> %val, <1 x ptr> %ptrs, i32 8, <1 x i1> %m) 2888 ret void 2889} 2890 2891declare void @llvm.masked.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, i32, <2 x i1>) 2892 2893define void @mscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) { 2894; RV32V-LABEL: mscatter_v2i64: 2895; RV32V: # %bb.0: 2896; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma 2897; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 2898; RV32V-NEXT: ret 2899; 2900; RV64V-LABEL: mscatter_v2i64: 2901; RV64V: # %bb.0: 2902; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma 2903; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 2904; RV64V-NEXT: ret 2905; 2906; RV32ZVE32F-LABEL: mscatter_v2i64: 2907; RV32ZVE32F: # %bb.0: 2908; RV32ZVE32F-NEXT: lw a1, 8(a0) 2909; RV32ZVE32F-NEXT: lw a2, 12(a0) 2910; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2911; RV32ZVE32F-NEXT: vmv.x.s a3, v0 2912; RV32ZVE32F-NEXT: andi a4, a3, 1 2913; RV32ZVE32F-NEXT: bnez a4, .LBB37_3 2914; RV32ZVE32F-NEXT: # %bb.1: # %else 2915; RV32ZVE32F-NEXT: andi a3, a3, 2 2916; RV32ZVE32F-NEXT: bnez a3, .LBB37_4 2917; RV32ZVE32F-NEXT: .LBB37_2: # %else2 2918; RV32ZVE32F-NEXT: ret 2919; RV32ZVE32F-NEXT: .LBB37_3: # %cond.store 2920; RV32ZVE32F-NEXT: lw a4, 0(a0) 2921; RV32ZVE32F-NEXT: lw a0, 4(a0) 2922; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 2923; RV32ZVE32F-NEXT: vmv.x.s a5, v8 2924; RV32ZVE32F-NEXT: sw a4, 0(a5) 2925; RV32ZVE32F-NEXT: sw a0, 4(a5) 2926; RV32ZVE32F-NEXT: andi a3, a3, 2 2927; RV32ZVE32F-NEXT: beqz a3, .LBB37_2 2928; RV32ZVE32F-NEXT: .LBB37_4: # %cond.store1 2929; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2930; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1 2931; RV32ZVE32F-NEXT: vmv.x.s a0, v8 2932; RV32ZVE32F-NEXT: sw a1, 0(a0) 2933; RV32ZVE32F-NEXT: sw a2, 4(a0) 2934; RV32ZVE32F-NEXT: ret 2935; 2936; RV64ZVE32F-LABEL: mscatter_v2i64: 2937; RV64ZVE32F: # %bb.0: 2938; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2939; RV64ZVE32F-NEXT: vmv.x.s a4, v0 2940; RV64ZVE32F-NEXT: andi a5, a4, 1 2941; RV64ZVE32F-NEXT: bnez a5, .LBB37_3 2942; RV64ZVE32F-NEXT: # %bb.1: # %else 2943; RV64ZVE32F-NEXT: andi a4, a4, 2 2944; RV64ZVE32F-NEXT: bnez a4, .LBB37_4 2945; RV64ZVE32F-NEXT: .LBB37_2: # %else2 2946; RV64ZVE32F-NEXT: ret 2947; RV64ZVE32F-NEXT: .LBB37_3: # %cond.store 2948; RV64ZVE32F-NEXT: sd a0, 0(a2) 2949; RV64ZVE32F-NEXT: andi a4, a4, 2 2950; RV64ZVE32F-NEXT: beqz a4, .LBB37_2 2951; RV64ZVE32F-NEXT: .LBB37_4: # %cond.store1 2952; RV64ZVE32F-NEXT: sd a1, 0(a3) 2953; RV64ZVE32F-NEXT: ret 2954 call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> %m) 2955 ret void 2956} 2957 2958declare void @llvm.masked.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, i32, <4 x i1>) 2959 2960define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) { 2961; RV32V-LABEL: mscatter_v4i64: 2962; RV32V: # %bb.0: 2963; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma 2964; RV32V-NEXT: vsoxei32.v v8, (zero), v10, v0.t 2965; RV32V-NEXT: ret 2966; 2967; RV64V-LABEL: mscatter_v4i64: 2968; RV64V: # %bb.0: 2969; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma 2970; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t 2971; RV64V-NEXT: ret 2972; 2973; RV32ZVE32F-LABEL: mscatter_v4i64: 2974; RV32ZVE32F: # %bb.0: 2975; RV32ZVE32F-NEXT: lw a1, 24(a0) 2976; RV32ZVE32F-NEXT: lw a2, 28(a0) 2977; RV32ZVE32F-NEXT: lw a6, 8(a0) 2978; RV32ZVE32F-NEXT: lw a7, 12(a0) 2979; RV32ZVE32F-NEXT: lw a3, 16(a0) 2980; RV32ZVE32F-NEXT: lw a4, 20(a0) 2981; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2982; RV32ZVE32F-NEXT: vmv.x.s a5, v0 2983; RV32ZVE32F-NEXT: andi t0, a5, 1 2984; RV32ZVE32F-NEXT: bnez t0, .LBB38_5 2985; RV32ZVE32F-NEXT: # %bb.1: # %else 2986; RV32ZVE32F-NEXT: andi a0, a5, 2 2987; RV32ZVE32F-NEXT: bnez a0, .LBB38_6 2988; RV32ZVE32F-NEXT: .LBB38_2: # %else2 2989; RV32ZVE32F-NEXT: andi a0, a5, 4 2990; RV32ZVE32F-NEXT: bnez a0, .LBB38_7 2991; RV32ZVE32F-NEXT: .LBB38_3: # %else4 2992; RV32ZVE32F-NEXT: andi a5, a5, 8 2993; RV32ZVE32F-NEXT: bnez a5, .LBB38_8 2994; RV32ZVE32F-NEXT: .LBB38_4: # %else6 2995; RV32ZVE32F-NEXT: ret 2996; RV32ZVE32F-NEXT: .LBB38_5: # %cond.store 2997; RV32ZVE32F-NEXT: lw t0, 0(a0) 2998; RV32ZVE32F-NEXT: lw a0, 4(a0) 2999; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 3000; RV32ZVE32F-NEXT: vmv.x.s t1, v8 3001; RV32ZVE32F-NEXT: sw t0, 0(t1) 3002; RV32ZVE32F-NEXT: sw a0, 4(t1) 3003; RV32ZVE32F-NEXT: andi a0, a5, 2 3004; RV32ZVE32F-NEXT: beqz a0, .LBB38_2 3005; RV32ZVE32F-NEXT: .LBB38_6: # %cond.store1 3006; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3007; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 3008; RV32ZVE32F-NEXT: vmv.x.s a0, v9 3009; RV32ZVE32F-NEXT: sw a6, 0(a0) 3010; RV32ZVE32F-NEXT: sw a7, 4(a0) 3011; RV32ZVE32F-NEXT: andi a0, a5, 4 3012; RV32ZVE32F-NEXT: beqz a0, .LBB38_3 3013; RV32ZVE32F-NEXT: .LBB38_7: # %cond.store3 3014; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3015; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 3016; RV32ZVE32F-NEXT: vmv.x.s a0, v9 3017; RV32ZVE32F-NEXT: sw a3, 0(a0) 3018; RV32ZVE32F-NEXT: sw a4, 4(a0) 3019; RV32ZVE32F-NEXT: andi a5, a5, 8 3020; RV32ZVE32F-NEXT: beqz a5, .LBB38_4 3021; RV32ZVE32F-NEXT: .LBB38_8: # %cond.store5 3022; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3023; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 3024; RV32ZVE32F-NEXT: vmv.x.s a0, v8 3025; RV32ZVE32F-NEXT: sw a1, 0(a0) 3026; RV32ZVE32F-NEXT: sw a2, 4(a0) 3027; RV32ZVE32F-NEXT: ret 3028; 3029; RV64ZVE32F-LABEL: mscatter_v4i64: 3030; RV64ZVE32F: # %bb.0: 3031; RV64ZVE32F-NEXT: ld a6, 8(a1) 3032; RV64ZVE32F-NEXT: ld a4, 16(a1) 3033; RV64ZVE32F-NEXT: ld a2, 24(a1) 3034; RV64ZVE32F-NEXT: ld t0, 8(a0) 3035; RV64ZVE32F-NEXT: ld a5, 16(a0) 3036; RV64ZVE32F-NEXT: ld a3, 24(a0) 3037; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3038; RV64ZVE32F-NEXT: vmv.x.s a7, v0 3039; RV64ZVE32F-NEXT: andi t1, a7, 1 3040; RV64ZVE32F-NEXT: bnez t1, .LBB38_5 3041; RV64ZVE32F-NEXT: # %bb.1: # %else 3042; RV64ZVE32F-NEXT: andi a0, a7, 2 3043; RV64ZVE32F-NEXT: bnez a0, .LBB38_6 3044; RV64ZVE32F-NEXT: .LBB38_2: # %else2 3045; RV64ZVE32F-NEXT: andi a0, a7, 4 3046; RV64ZVE32F-NEXT: bnez a0, .LBB38_7 3047; RV64ZVE32F-NEXT: .LBB38_3: # %else4 3048; RV64ZVE32F-NEXT: andi a0, a7, 8 3049; RV64ZVE32F-NEXT: bnez a0, .LBB38_8 3050; RV64ZVE32F-NEXT: .LBB38_4: # %else6 3051; RV64ZVE32F-NEXT: ret 3052; RV64ZVE32F-NEXT: .LBB38_5: # %cond.store 3053; RV64ZVE32F-NEXT: ld a1, 0(a1) 3054; RV64ZVE32F-NEXT: ld a0, 0(a0) 3055; RV64ZVE32F-NEXT: sd a0, 0(a1) 3056; RV64ZVE32F-NEXT: andi a0, a7, 2 3057; RV64ZVE32F-NEXT: beqz a0, .LBB38_2 3058; RV64ZVE32F-NEXT: .LBB38_6: # %cond.store1 3059; RV64ZVE32F-NEXT: sd t0, 0(a6) 3060; RV64ZVE32F-NEXT: andi a0, a7, 4 3061; RV64ZVE32F-NEXT: beqz a0, .LBB38_3 3062; RV64ZVE32F-NEXT: .LBB38_7: # %cond.store3 3063; RV64ZVE32F-NEXT: sd a5, 0(a4) 3064; RV64ZVE32F-NEXT: andi a0, a7, 8 3065; RV64ZVE32F-NEXT: beqz a0, .LBB38_4 3066; RV64ZVE32F-NEXT: .LBB38_8: # %cond.store5 3067; RV64ZVE32F-NEXT: sd a3, 0(a2) 3068; RV64ZVE32F-NEXT: ret 3069 call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %m) 3070 ret void 3071} 3072 3073define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) { 3074; RV32V-LABEL: mscatter_truemask_v4i64: 3075; RV32V: # %bb.0: 3076; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma 3077; RV32V-NEXT: vsoxei32.v v8, (zero), v10 3078; RV32V-NEXT: ret 3079; 3080; RV64V-LABEL: mscatter_truemask_v4i64: 3081; RV64V: # %bb.0: 3082; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma 3083; RV64V-NEXT: vsoxei64.v v8, (zero), v10 3084; RV64V-NEXT: ret 3085; 3086; RV32ZVE32F-LABEL: mscatter_truemask_v4i64: 3087; RV32ZVE32F: # %bb.0: 3088; RV32ZVE32F-NEXT: lw a1, 16(a0) 3089; RV32ZVE32F-NEXT: lw a2, 20(a0) 3090; RV32ZVE32F-NEXT: lw a3, 24(a0) 3091; RV32ZVE32F-NEXT: lw a4, 28(a0) 3092; RV32ZVE32F-NEXT: lw a5, 0(a0) 3093; RV32ZVE32F-NEXT: lw a6, 4(a0) 3094; RV32ZVE32F-NEXT: lw a7, 8(a0) 3095; RV32ZVE32F-NEXT: lw a0, 12(a0) 3096; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3097; RV32ZVE32F-NEXT: vmv.x.s t0, v8 3098; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 3099; RV32ZVE32F-NEXT: vmv.x.s t1, v9 3100; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 3101; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 3102; RV32ZVE32F-NEXT: sw a5, 0(t0) 3103; RV32ZVE32F-NEXT: sw a6, 4(t0) 3104; RV32ZVE32F-NEXT: vmv.x.s a5, v9 3105; RV32ZVE32F-NEXT: vmv.x.s a6, v8 3106; RV32ZVE32F-NEXT: sw a7, 0(t1) 3107; RV32ZVE32F-NEXT: sw a0, 4(t1) 3108; RV32ZVE32F-NEXT: sw a1, 0(a5) 3109; RV32ZVE32F-NEXT: sw a2, 4(a5) 3110; RV32ZVE32F-NEXT: sw a3, 0(a6) 3111; RV32ZVE32F-NEXT: sw a4, 4(a6) 3112; RV32ZVE32F-NEXT: ret 3113; 3114; RV64ZVE32F-LABEL: mscatter_truemask_v4i64: 3115; RV64ZVE32F: # %bb.0: 3116; RV64ZVE32F-NEXT: ld a2, 0(a1) 3117; RV64ZVE32F-NEXT: ld a3, 8(a1) 3118; RV64ZVE32F-NEXT: ld a4, 16(a1) 3119; RV64ZVE32F-NEXT: ld a1, 24(a1) 3120; RV64ZVE32F-NEXT: ld a5, 0(a0) 3121; RV64ZVE32F-NEXT: ld a6, 8(a0) 3122; RV64ZVE32F-NEXT: ld a7, 16(a0) 3123; RV64ZVE32F-NEXT: ld a0, 24(a0) 3124; RV64ZVE32F-NEXT: sd a5, 0(a2) 3125; RV64ZVE32F-NEXT: sd a6, 0(a3) 3126; RV64ZVE32F-NEXT: sd a7, 0(a4) 3127; RV64ZVE32F-NEXT: sd a0, 0(a1) 3128; RV64ZVE32F-NEXT: ret 3129 call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1)) 3130 ret void 3131} 3132 3133define void @mscatter_falsemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) { 3134; CHECK-LABEL: mscatter_falsemask_v4i64: 3135; CHECK: # %bb.0: 3136; CHECK-NEXT: ret 3137 call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer) 3138 ret void 3139} 3140 3141declare void @llvm.masked.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, i32, <8 x i1>) 3142 3143define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) { 3144; RV32V-LABEL: mscatter_v8i64: 3145; RV32V: # %bb.0: 3146; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 3147; RV32V-NEXT: vsoxei32.v v8, (zero), v12, v0.t 3148; RV32V-NEXT: ret 3149; 3150; RV64V-LABEL: mscatter_v8i64: 3151; RV64V: # %bb.0: 3152; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 3153; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t 3154; RV64V-NEXT: ret 3155; 3156; RV32ZVE32F-LABEL: mscatter_v8i64: 3157; RV32ZVE32F: # %bb.0: 3158; RV32ZVE32F-NEXT: addi sp, sp, -16 3159; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 3160; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 3161; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 3162; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill 3163; RV32ZVE32F-NEXT: .cfi_offset s0, -4 3164; RV32ZVE32F-NEXT: .cfi_offset s1, -8 3165; RV32ZVE32F-NEXT: .cfi_offset s2, -12 3166; RV32ZVE32F-NEXT: .cfi_remember_state 3167; RV32ZVE32F-NEXT: lw a1, 56(a0) 3168; RV32ZVE32F-NEXT: lw a2, 60(a0) 3169; RV32ZVE32F-NEXT: lw a5, 40(a0) 3170; RV32ZVE32F-NEXT: lw a6, 44(a0) 3171; RV32ZVE32F-NEXT: lw a3, 48(a0) 3172; RV32ZVE32F-NEXT: lw a4, 52(a0) 3173; RV32ZVE32F-NEXT: lw t2, 24(a0) 3174; RV32ZVE32F-NEXT: lw t3, 28(a0) 3175; RV32ZVE32F-NEXT: lw t0, 32(a0) 3176; RV32ZVE32F-NEXT: lw t1, 36(a0) 3177; RV32ZVE32F-NEXT: lw t6, 8(a0) 3178; RV32ZVE32F-NEXT: lw s0, 12(a0) 3179; RV32ZVE32F-NEXT: lw t4, 16(a0) 3180; RV32ZVE32F-NEXT: lw t5, 20(a0) 3181; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3182; RV32ZVE32F-NEXT: vmv.x.s a7, v0 3183; RV32ZVE32F-NEXT: andi s1, a7, 1 3184; RV32ZVE32F-NEXT: bnez s1, .LBB41_10 3185; RV32ZVE32F-NEXT: # %bb.1: # %else 3186; RV32ZVE32F-NEXT: andi a0, a7, 2 3187; RV32ZVE32F-NEXT: bnez a0, .LBB41_11 3188; RV32ZVE32F-NEXT: .LBB41_2: # %else2 3189; RV32ZVE32F-NEXT: andi a0, a7, 4 3190; RV32ZVE32F-NEXT: bnez a0, .LBB41_12 3191; RV32ZVE32F-NEXT: .LBB41_3: # %else4 3192; RV32ZVE32F-NEXT: andi a0, a7, 8 3193; RV32ZVE32F-NEXT: bnez a0, .LBB41_13 3194; RV32ZVE32F-NEXT: .LBB41_4: # %else6 3195; RV32ZVE32F-NEXT: andi a0, a7, 16 3196; RV32ZVE32F-NEXT: bnez a0, .LBB41_14 3197; RV32ZVE32F-NEXT: .LBB41_5: # %else8 3198; RV32ZVE32F-NEXT: andi a0, a7, 32 3199; RV32ZVE32F-NEXT: bnez a0, .LBB41_15 3200; RV32ZVE32F-NEXT: .LBB41_6: # %else10 3201; RV32ZVE32F-NEXT: andi a0, a7, 64 3202; RV32ZVE32F-NEXT: bnez a0, .LBB41_16 3203; RV32ZVE32F-NEXT: .LBB41_7: # %else12 3204; RV32ZVE32F-NEXT: andi a0, a7, -128 3205; RV32ZVE32F-NEXT: beqz a0, .LBB41_9 3206; RV32ZVE32F-NEXT: .LBB41_8: # %cond.store13 3207; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 3208; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 3209; RV32ZVE32F-NEXT: vmv.x.s a0, v8 3210; RV32ZVE32F-NEXT: sw a1, 0(a0) 3211; RV32ZVE32F-NEXT: sw a2, 4(a0) 3212; RV32ZVE32F-NEXT: .LBB41_9: # %else14 3213; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 3214; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 3215; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload 3216; RV32ZVE32F-NEXT: .cfi_restore s0 3217; RV32ZVE32F-NEXT: .cfi_restore s1 3218; RV32ZVE32F-NEXT: .cfi_restore s2 3219; RV32ZVE32F-NEXT: addi sp, sp, 16 3220; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 3221; RV32ZVE32F-NEXT: ret 3222; RV32ZVE32F-NEXT: .LBB41_10: # %cond.store 3223; RV32ZVE32F-NEXT: .cfi_restore_state 3224; RV32ZVE32F-NEXT: lw s1, 0(a0) 3225; RV32ZVE32F-NEXT: lw a0, 4(a0) 3226; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 3227; RV32ZVE32F-NEXT: vmv.x.s s2, v8 3228; RV32ZVE32F-NEXT: sw s1, 0(s2) 3229; RV32ZVE32F-NEXT: sw a0, 4(s2) 3230; RV32ZVE32F-NEXT: andi a0, a7, 2 3231; RV32ZVE32F-NEXT: beqz a0, .LBB41_2 3232; RV32ZVE32F-NEXT: .LBB41_11: # %cond.store1 3233; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3234; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 3235; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3236; RV32ZVE32F-NEXT: sw t6, 0(a0) 3237; RV32ZVE32F-NEXT: sw s0, 4(a0) 3238; RV32ZVE32F-NEXT: andi a0, a7, 4 3239; RV32ZVE32F-NEXT: beqz a0, .LBB41_3 3240; RV32ZVE32F-NEXT: .LBB41_12: # %cond.store3 3241; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3242; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 3243; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3244; RV32ZVE32F-NEXT: sw t4, 0(a0) 3245; RV32ZVE32F-NEXT: sw t5, 4(a0) 3246; RV32ZVE32F-NEXT: andi a0, a7, 8 3247; RV32ZVE32F-NEXT: beqz a0, .LBB41_4 3248; RV32ZVE32F-NEXT: .LBB41_13: # %cond.store5 3249; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3250; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 3251; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3252; RV32ZVE32F-NEXT: sw t2, 0(a0) 3253; RV32ZVE32F-NEXT: sw t3, 4(a0) 3254; RV32ZVE32F-NEXT: andi a0, a7, 16 3255; RV32ZVE32F-NEXT: beqz a0, .LBB41_5 3256; RV32ZVE32F-NEXT: .LBB41_14: # %cond.store7 3257; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 3258; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 3259; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3260; RV32ZVE32F-NEXT: sw t0, 0(a0) 3261; RV32ZVE32F-NEXT: sw t1, 4(a0) 3262; RV32ZVE32F-NEXT: andi a0, a7, 32 3263; RV32ZVE32F-NEXT: beqz a0, .LBB41_6 3264; RV32ZVE32F-NEXT: .LBB41_15: # %cond.store9 3265; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 3266; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 3267; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3268; RV32ZVE32F-NEXT: sw a5, 0(a0) 3269; RV32ZVE32F-NEXT: sw a6, 4(a0) 3270; RV32ZVE32F-NEXT: andi a0, a7, 64 3271; RV32ZVE32F-NEXT: beqz a0, .LBB41_7 3272; RV32ZVE32F-NEXT: .LBB41_16: # %cond.store11 3273; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 3274; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 3275; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3276; RV32ZVE32F-NEXT: sw a3, 0(a0) 3277; RV32ZVE32F-NEXT: sw a4, 4(a0) 3278; RV32ZVE32F-NEXT: andi a0, a7, -128 3279; RV32ZVE32F-NEXT: bnez a0, .LBB41_8 3280; RV32ZVE32F-NEXT: j .LBB41_9 3281; 3282; RV64ZVE32F-LABEL: mscatter_v8i64: 3283; RV64ZVE32F: # %bb.0: 3284; RV64ZVE32F-NEXT: addi sp, sp, -32 3285; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 32 3286; RV64ZVE32F-NEXT: sd s0, 24(sp) # 8-byte Folded Spill 3287; RV64ZVE32F-NEXT: sd s1, 16(sp) # 8-byte Folded Spill 3288; RV64ZVE32F-NEXT: sd s2, 8(sp) # 8-byte Folded Spill 3289; RV64ZVE32F-NEXT: .cfi_offset s0, -8 3290; RV64ZVE32F-NEXT: .cfi_offset s1, -16 3291; RV64ZVE32F-NEXT: .cfi_offset s2, -24 3292; RV64ZVE32F-NEXT: .cfi_remember_state 3293; RV64ZVE32F-NEXT: ld a4, 40(a1) 3294; RV64ZVE32F-NEXT: ld a3, 48(a1) 3295; RV64ZVE32F-NEXT: ld a2, 56(a1) 3296; RV64ZVE32F-NEXT: ld t5, 8(a1) 3297; RV64ZVE32F-NEXT: ld t3, 16(a1) 3298; RV64ZVE32F-NEXT: ld t2, 24(a1) 3299; RV64ZVE32F-NEXT: ld t0, 32(a1) 3300; RV64ZVE32F-NEXT: ld a7, 40(a0) 3301; RV64ZVE32F-NEXT: ld a6, 48(a0) 3302; RV64ZVE32F-NEXT: ld a5, 56(a0) 3303; RV64ZVE32F-NEXT: ld s1, 8(a0) 3304; RV64ZVE32F-NEXT: ld s0, 16(a0) 3305; RV64ZVE32F-NEXT: ld t6, 24(a0) 3306; RV64ZVE32F-NEXT: ld t4, 32(a0) 3307; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3308; RV64ZVE32F-NEXT: vmv.x.s t1, v0 3309; RV64ZVE32F-NEXT: andi s2, t1, 1 3310; RV64ZVE32F-NEXT: bnez s2, .LBB41_10 3311; RV64ZVE32F-NEXT: # %bb.1: # %else 3312; RV64ZVE32F-NEXT: andi a0, t1, 2 3313; RV64ZVE32F-NEXT: bnez a0, .LBB41_11 3314; RV64ZVE32F-NEXT: .LBB41_2: # %else2 3315; RV64ZVE32F-NEXT: andi a0, t1, 4 3316; RV64ZVE32F-NEXT: bnez a0, .LBB41_12 3317; RV64ZVE32F-NEXT: .LBB41_3: # %else4 3318; RV64ZVE32F-NEXT: andi a0, t1, 8 3319; RV64ZVE32F-NEXT: bnez a0, .LBB41_13 3320; RV64ZVE32F-NEXT: .LBB41_4: # %else6 3321; RV64ZVE32F-NEXT: andi a0, t1, 16 3322; RV64ZVE32F-NEXT: bnez a0, .LBB41_14 3323; RV64ZVE32F-NEXT: .LBB41_5: # %else8 3324; RV64ZVE32F-NEXT: andi a0, t1, 32 3325; RV64ZVE32F-NEXT: bnez a0, .LBB41_15 3326; RV64ZVE32F-NEXT: .LBB41_6: # %else10 3327; RV64ZVE32F-NEXT: andi a0, t1, 64 3328; RV64ZVE32F-NEXT: bnez a0, .LBB41_16 3329; RV64ZVE32F-NEXT: .LBB41_7: # %else12 3330; RV64ZVE32F-NEXT: andi a0, t1, -128 3331; RV64ZVE32F-NEXT: beqz a0, .LBB41_9 3332; RV64ZVE32F-NEXT: .LBB41_8: # %cond.store13 3333; RV64ZVE32F-NEXT: sd a5, 0(a2) 3334; RV64ZVE32F-NEXT: .LBB41_9: # %else14 3335; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload 3336; RV64ZVE32F-NEXT: ld s1, 16(sp) # 8-byte Folded Reload 3337; RV64ZVE32F-NEXT: ld s2, 8(sp) # 8-byte Folded Reload 3338; RV64ZVE32F-NEXT: .cfi_restore s0 3339; RV64ZVE32F-NEXT: .cfi_restore s1 3340; RV64ZVE32F-NEXT: .cfi_restore s2 3341; RV64ZVE32F-NEXT: addi sp, sp, 32 3342; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 0 3343; RV64ZVE32F-NEXT: ret 3344; RV64ZVE32F-NEXT: .LBB41_10: # %cond.store 3345; RV64ZVE32F-NEXT: .cfi_restore_state 3346; RV64ZVE32F-NEXT: ld a1, 0(a1) 3347; RV64ZVE32F-NEXT: ld a0, 0(a0) 3348; RV64ZVE32F-NEXT: sd a0, 0(a1) 3349; RV64ZVE32F-NEXT: andi a0, t1, 2 3350; RV64ZVE32F-NEXT: beqz a0, .LBB41_2 3351; RV64ZVE32F-NEXT: .LBB41_11: # %cond.store1 3352; RV64ZVE32F-NEXT: sd s1, 0(t5) 3353; RV64ZVE32F-NEXT: andi a0, t1, 4 3354; RV64ZVE32F-NEXT: beqz a0, .LBB41_3 3355; RV64ZVE32F-NEXT: .LBB41_12: # %cond.store3 3356; RV64ZVE32F-NEXT: sd s0, 0(t3) 3357; RV64ZVE32F-NEXT: andi a0, t1, 8 3358; RV64ZVE32F-NEXT: beqz a0, .LBB41_4 3359; RV64ZVE32F-NEXT: .LBB41_13: # %cond.store5 3360; RV64ZVE32F-NEXT: sd t6, 0(t2) 3361; RV64ZVE32F-NEXT: andi a0, t1, 16 3362; RV64ZVE32F-NEXT: beqz a0, .LBB41_5 3363; RV64ZVE32F-NEXT: .LBB41_14: # %cond.store7 3364; RV64ZVE32F-NEXT: sd t4, 0(t0) 3365; RV64ZVE32F-NEXT: andi a0, t1, 32 3366; RV64ZVE32F-NEXT: beqz a0, .LBB41_6 3367; RV64ZVE32F-NEXT: .LBB41_15: # %cond.store9 3368; RV64ZVE32F-NEXT: sd a7, 0(a4) 3369; RV64ZVE32F-NEXT: andi a0, t1, 64 3370; RV64ZVE32F-NEXT: beqz a0, .LBB41_7 3371; RV64ZVE32F-NEXT: .LBB41_16: # %cond.store11 3372; RV64ZVE32F-NEXT: sd a6, 0(a3) 3373; RV64ZVE32F-NEXT: andi a0, t1, -128 3374; RV64ZVE32F-NEXT: bnez a0, .LBB41_8 3375; RV64ZVE32F-NEXT: j .LBB41_9 3376 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 3377 ret void 3378} 3379 3380define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 3381; RV32V-LABEL: mscatter_baseidx_v8i8_v8i64: 3382; RV32V: # %bb.0: 3383; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3384; RV32V-NEXT: vsext.vf4 v14, v12 3385; RV32V-NEXT: vsll.vi v12, v14, 3 3386; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 3387; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 3388; RV32V-NEXT: ret 3389; 3390; RV64V-LABEL: mscatter_baseidx_v8i8_v8i64: 3391; RV64V: # %bb.0: 3392; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 3393; RV64V-NEXT: vsext.vf8 v16, v12 3394; RV64V-NEXT: vsll.vi v12, v16, 3 3395; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 3396; RV64V-NEXT: ret 3397; 3398; RV32ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64: 3399; RV32ZVE32F: # %bb.0: 3400; RV32ZVE32F-NEXT: addi sp, sp, -16 3401; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 3402; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 3403; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 3404; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill 3405; RV32ZVE32F-NEXT: .cfi_offset s0, -4 3406; RV32ZVE32F-NEXT: .cfi_offset s1, -8 3407; RV32ZVE32F-NEXT: .cfi_offset s2, -12 3408; RV32ZVE32F-NEXT: .cfi_remember_state 3409; RV32ZVE32F-NEXT: lw a2, 56(a0) 3410; RV32ZVE32F-NEXT: lw a3, 60(a0) 3411; RV32ZVE32F-NEXT: lw a6, 40(a0) 3412; RV32ZVE32F-NEXT: lw a7, 44(a0) 3413; RV32ZVE32F-NEXT: lw a4, 48(a0) 3414; RV32ZVE32F-NEXT: lw a5, 52(a0) 3415; RV32ZVE32F-NEXT: lw t3, 24(a0) 3416; RV32ZVE32F-NEXT: lw t4, 28(a0) 3417; RV32ZVE32F-NEXT: lw t1, 32(a0) 3418; RV32ZVE32F-NEXT: lw t2, 36(a0) 3419; RV32ZVE32F-NEXT: lw s0, 8(a0) 3420; RV32ZVE32F-NEXT: lw s1, 12(a0) 3421; RV32ZVE32F-NEXT: lw t5, 16(a0) 3422; RV32ZVE32F-NEXT: lw t6, 20(a0) 3423; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3424; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 3425; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 3426; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 3427; RV32ZVE32F-NEXT: vmv.x.s t0, v0 3428; RV32ZVE32F-NEXT: andi s2, t0, 1 3429; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3430; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 3431; RV32ZVE32F-NEXT: bnez s2, .LBB42_10 3432; RV32ZVE32F-NEXT: # %bb.1: # %else 3433; RV32ZVE32F-NEXT: andi a0, t0, 2 3434; RV32ZVE32F-NEXT: bnez a0, .LBB42_11 3435; RV32ZVE32F-NEXT: .LBB42_2: # %else2 3436; RV32ZVE32F-NEXT: andi a0, t0, 4 3437; RV32ZVE32F-NEXT: bnez a0, .LBB42_12 3438; RV32ZVE32F-NEXT: .LBB42_3: # %else4 3439; RV32ZVE32F-NEXT: andi a0, t0, 8 3440; RV32ZVE32F-NEXT: bnez a0, .LBB42_13 3441; RV32ZVE32F-NEXT: .LBB42_4: # %else6 3442; RV32ZVE32F-NEXT: andi a0, t0, 16 3443; RV32ZVE32F-NEXT: bnez a0, .LBB42_14 3444; RV32ZVE32F-NEXT: .LBB42_5: # %else8 3445; RV32ZVE32F-NEXT: andi a0, t0, 32 3446; RV32ZVE32F-NEXT: bnez a0, .LBB42_15 3447; RV32ZVE32F-NEXT: .LBB42_6: # %else10 3448; RV32ZVE32F-NEXT: andi a0, t0, 64 3449; RV32ZVE32F-NEXT: bnez a0, .LBB42_16 3450; RV32ZVE32F-NEXT: .LBB42_7: # %else12 3451; RV32ZVE32F-NEXT: andi a0, t0, -128 3452; RV32ZVE32F-NEXT: beqz a0, .LBB42_9 3453; RV32ZVE32F-NEXT: .LBB42_8: # %cond.store13 3454; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 3455; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 3456; RV32ZVE32F-NEXT: vmv.x.s a0, v8 3457; RV32ZVE32F-NEXT: sw a2, 0(a0) 3458; RV32ZVE32F-NEXT: sw a3, 4(a0) 3459; RV32ZVE32F-NEXT: .LBB42_9: # %else14 3460; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 3461; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 3462; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload 3463; RV32ZVE32F-NEXT: .cfi_restore s0 3464; RV32ZVE32F-NEXT: .cfi_restore s1 3465; RV32ZVE32F-NEXT: .cfi_restore s2 3466; RV32ZVE32F-NEXT: addi sp, sp, 16 3467; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 3468; RV32ZVE32F-NEXT: ret 3469; RV32ZVE32F-NEXT: .LBB42_10: # %cond.store 3470; RV32ZVE32F-NEXT: .cfi_restore_state 3471; RV32ZVE32F-NEXT: lw a1, 0(a0) 3472; RV32ZVE32F-NEXT: lw a0, 4(a0) 3473; RV32ZVE32F-NEXT: vmv.x.s s2, v8 3474; RV32ZVE32F-NEXT: sw a1, 0(s2) 3475; RV32ZVE32F-NEXT: sw a0, 4(s2) 3476; RV32ZVE32F-NEXT: andi a0, t0, 2 3477; RV32ZVE32F-NEXT: beqz a0, .LBB42_2 3478; RV32ZVE32F-NEXT: .LBB42_11: # %cond.store1 3479; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3480; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 3481; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3482; RV32ZVE32F-NEXT: sw s0, 0(a0) 3483; RV32ZVE32F-NEXT: sw s1, 4(a0) 3484; RV32ZVE32F-NEXT: andi a0, t0, 4 3485; RV32ZVE32F-NEXT: beqz a0, .LBB42_3 3486; RV32ZVE32F-NEXT: .LBB42_12: # %cond.store3 3487; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3488; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 3489; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3490; RV32ZVE32F-NEXT: sw t5, 0(a0) 3491; RV32ZVE32F-NEXT: sw t6, 4(a0) 3492; RV32ZVE32F-NEXT: andi a0, t0, 8 3493; RV32ZVE32F-NEXT: beqz a0, .LBB42_4 3494; RV32ZVE32F-NEXT: .LBB42_13: # %cond.store5 3495; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3496; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 3497; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3498; RV32ZVE32F-NEXT: sw t3, 0(a0) 3499; RV32ZVE32F-NEXT: sw t4, 4(a0) 3500; RV32ZVE32F-NEXT: andi a0, t0, 16 3501; RV32ZVE32F-NEXT: beqz a0, .LBB42_5 3502; RV32ZVE32F-NEXT: .LBB42_14: # %cond.store7 3503; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 3504; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 3505; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3506; RV32ZVE32F-NEXT: sw t1, 0(a0) 3507; RV32ZVE32F-NEXT: sw t2, 4(a0) 3508; RV32ZVE32F-NEXT: andi a0, t0, 32 3509; RV32ZVE32F-NEXT: beqz a0, .LBB42_6 3510; RV32ZVE32F-NEXT: .LBB42_15: # %cond.store9 3511; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 3512; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 3513; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3514; RV32ZVE32F-NEXT: sw a6, 0(a0) 3515; RV32ZVE32F-NEXT: sw a7, 4(a0) 3516; RV32ZVE32F-NEXT: andi a0, t0, 64 3517; RV32ZVE32F-NEXT: beqz a0, .LBB42_7 3518; RV32ZVE32F-NEXT: .LBB42_16: # %cond.store11 3519; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 3520; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 3521; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3522; RV32ZVE32F-NEXT: sw a4, 0(a0) 3523; RV32ZVE32F-NEXT: sw a5, 4(a0) 3524; RV32ZVE32F-NEXT: andi a0, t0, -128 3525; RV32ZVE32F-NEXT: bnez a0, .LBB42_8 3526; RV32ZVE32F-NEXT: j .LBB42_9 3527; 3528; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64: 3529; RV64ZVE32F: # %bb.0: 3530; RV64ZVE32F-NEXT: ld a4, 40(a0) 3531; RV64ZVE32F-NEXT: ld a3, 48(a0) 3532; RV64ZVE32F-NEXT: ld a2, 56(a0) 3533; RV64ZVE32F-NEXT: ld t1, 8(a0) 3534; RV64ZVE32F-NEXT: ld t0, 16(a0) 3535; RV64ZVE32F-NEXT: ld a7, 24(a0) 3536; RV64ZVE32F-NEXT: ld a6, 32(a0) 3537; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3538; RV64ZVE32F-NEXT: vmv.x.s a5, v0 3539; RV64ZVE32F-NEXT: andi t2, a5, 1 3540; RV64ZVE32F-NEXT: beqz t2, .LBB42_2 3541; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 3542; RV64ZVE32F-NEXT: ld a0, 0(a0) 3543; RV64ZVE32F-NEXT: vmv.x.s t2, v8 3544; RV64ZVE32F-NEXT: slli t2, t2, 3 3545; RV64ZVE32F-NEXT: add t2, a1, t2 3546; RV64ZVE32F-NEXT: sd a0, 0(t2) 3547; RV64ZVE32F-NEXT: .LBB42_2: # %else 3548; RV64ZVE32F-NEXT: andi a0, a5, 2 3549; RV64ZVE32F-NEXT: beqz a0, .LBB42_4 3550; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 3551; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 3552; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 3553; RV64ZVE32F-NEXT: vmv.x.s a0, v9 3554; RV64ZVE32F-NEXT: slli a0, a0, 3 3555; RV64ZVE32F-NEXT: add a0, a1, a0 3556; RV64ZVE32F-NEXT: sd t1, 0(a0) 3557; RV64ZVE32F-NEXT: .LBB42_4: # %else2 3558; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 3559; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 3560; RV64ZVE32F-NEXT: andi a0, a5, 4 3561; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 3562; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 3563; RV64ZVE32F-NEXT: bnez a0, .LBB42_12 3564; RV64ZVE32F-NEXT: # %bb.5: # %else4 3565; RV64ZVE32F-NEXT: andi a0, a5, 8 3566; RV64ZVE32F-NEXT: bnez a0, .LBB42_13 3567; RV64ZVE32F-NEXT: .LBB42_6: # %else6 3568; RV64ZVE32F-NEXT: andi a0, a5, 16 3569; RV64ZVE32F-NEXT: bnez a0, .LBB42_14 3570; RV64ZVE32F-NEXT: .LBB42_7: # %else8 3571; RV64ZVE32F-NEXT: andi a0, a5, 32 3572; RV64ZVE32F-NEXT: beqz a0, .LBB42_9 3573; RV64ZVE32F-NEXT: .LBB42_8: # %cond.store9 3574; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 3575; RV64ZVE32F-NEXT: vmv.x.s a0, v8 3576; RV64ZVE32F-NEXT: slli a0, a0, 3 3577; RV64ZVE32F-NEXT: add a0, a1, a0 3578; RV64ZVE32F-NEXT: sd a4, 0(a0) 3579; RV64ZVE32F-NEXT: .LBB42_9: # %else10 3580; RV64ZVE32F-NEXT: andi a0, a5, 64 3581; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 3582; RV64ZVE32F-NEXT: bnez a0, .LBB42_15 3583; RV64ZVE32F-NEXT: # %bb.10: # %else12 3584; RV64ZVE32F-NEXT: andi a0, a5, -128 3585; RV64ZVE32F-NEXT: bnez a0, .LBB42_16 3586; RV64ZVE32F-NEXT: .LBB42_11: # %else14 3587; RV64ZVE32F-NEXT: ret 3588; RV64ZVE32F-NEXT: .LBB42_12: # %cond.store3 3589; RV64ZVE32F-NEXT: vmv.x.s a0, v8 3590; RV64ZVE32F-NEXT: slli a0, a0, 3 3591; RV64ZVE32F-NEXT: add a0, a1, a0 3592; RV64ZVE32F-NEXT: sd t0, 0(a0) 3593; RV64ZVE32F-NEXT: andi a0, a5, 8 3594; RV64ZVE32F-NEXT: beqz a0, .LBB42_6 3595; RV64ZVE32F-NEXT: .LBB42_13: # %cond.store5 3596; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 3597; RV64ZVE32F-NEXT: vmv.x.s a0, v8 3598; RV64ZVE32F-NEXT: slli a0, a0, 3 3599; RV64ZVE32F-NEXT: add a0, a1, a0 3600; RV64ZVE32F-NEXT: sd a7, 0(a0) 3601; RV64ZVE32F-NEXT: andi a0, a5, 16 3602; RV64ZVE32F-NEXT: beqz a0, .LBB42_7 3603; RV64ZVE32F-NEXT: .LBB42_14: # %cond.store7 3604; RV64ZVE32F-NEXT: vmv.x.s a0, v9 3605; RV64ZVE32F-NEXT: slli a0, a0, 3 3606; RV64ZVE32F-NEXT: add a0, a1, a0 3607; RV64ZVE32F-NEXT: sd a6, 0(a0) 3608; RV64ZVE32F-NEXT: andi a0, a5, 32 3609; RV64ZVE32F-NEXT: bnez a0, .LBB42_8 3610; RV64ZVE32F-NEXT: j .LBB42_9 3611; RV64ZVE32F-NEXT: .LBB42_15: # %cond.store11 3612; RV64ZVE32F-NEXT: vmv.x.s a0, v8 3613; RV64ZVE32F-NEXT: slli a0, a0, 3 3614; RV64ZVE32F-NEXT: add a0, a1, a0 3615; RV64ZVE32F-NEXT: sd a3, 0(a0) 3616; RV64ZVE32F-NEXT: andi a0, a5, -128 3617; RV64ZVE32F-NEXT: beqz a0, .LBB42_11 3618; RV64ZVE32F-NEXT: .LBB42_16: # %cond.store13 3619; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 3620; RV64ZVE32F-NEXT: vmv.x.s a0, v8 3621; RV64ZVE32F-NEXT: slli a0, a0, 3 3622; RV64ZVE32F-NEXT: add a0, a1, a0 3623; RV64ZVE32F-NEXT: sd a2, 0(a0) 3624; RV64ZVE32F-NEXT: ret 3625 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs 3626 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 3627 ret void 3628} 3629 3630define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 3631; RV32V-LABEL: mscatter_baseidx_sext_v8i8_v8i64: 3632; RV32V: # %bb.0: 3633; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3634; RV32V-NEXT: vsext.vf4 v14, v12 3635; RV32V-NEXT: vsll.vi v12, v14, 3 3636; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 3637; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 3638; RV32V-NEXT: ret 3639; 3640; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8i64: 3641; RV64V: # %bb.0: 3642; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 3643; RV64V-NEXT: vsext.vf8 v16, v12 3644; RV64V-NEXT: vsll.vi v12, v16, 3 3645; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 3646; RV64V-NEXT: ret 3647; 3648; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64: 3649; RV32ZVE32F: # %bb.0: 3650; RV32ZVE32F-NEXT: addi sp, sp, -16 3651; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 3652; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 3653; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 3654; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill 3655; RV32ZVE32F-NEXT: .cfi_offset s0, -4 3656; RV32ZVE32F-NEXT: .cfi_offset s1, -8 3657; RV32ZVE32F-NEXT: .cfi_offset s2, -12 3658; RV32ZVE32F-NEXT: .cfi_remember_state 3659; RV32ZVE32F-NEXT: lw a2, 56(a0) 3660; RV32ZVE32F-NEXT: lw a3, 60(a0) 3661; RV32ZVE32F-NEXT: lw a6, 40(a0) 3662; RV32ZVE32F-NEXT: lw a7, 44(a0) 3663; RV32ZVE32F-NEXT: lw a4, 48(a0) 3664; RV32ZVE32F-NEXT: lw a5, 52(a0) 3665; RV32ZVE32F-NEXT: lw t3, 24(a0) 3666; RV32ZVE32F-NEXT: lw t4, 28(a0) 3667; RV32ZVE32F-NEXT: lw t1, 32(a0) 3668; RV32ZVE32F-NEXT: lw t2, 36(a0) 3669; RV32ZVE32F-NEXT: lw s0, 8(a0) 3670; RV32ZVE32F-NEXT: lw s1, 12(a0) 3671; RV32ZVE32F-NEXT: lw t5, 16(a0) 3672; RV32ZVE32F-NEXT: lw t6, 20(a0) 3673; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3674; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 3675; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 3676; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 3677; RV32ZVE32F-NEXT: vmv.x.s t0, v0 3678; RV32ZVE32F-NEXT: andi s2, t0, 1 3679; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3680; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 3681; RV32ZVE32F-NEXT: bnez s2, .LBB43_10 3682; RV32ZVE32F-NEXT: # %bb.1: # %else 3683; RV32ZVE32F-NEXT: andi a0, t0, 2 3684; RV32ZVE32F-NEXT: bnez a0, .LBB43_11 3685; RV32ZVE32F-NEXT: .LBB43_2: # %else2 3686; RV32ZVE32F-NEXT: andi a0, t0, 4 3687; RV32ZVE32F-NEXT: bnez a0, .LBB43_12 3688; RV32ZVE32F-NEXT: .LBB43_3: # %else4 3689; RV32ZVE32F-NEXT: andi a0, t0, 8 3690; RV32ZVE32F-NEXT: bnez a0, .LBB43_13 3691; RV32ZVE32F-NEXT: .LBB43_4: # %else6 3692; RV32ZVE32F-NEXT: andi a0, t0, 16 3693; RV32ZVE32F-NEXT: bnez a0, .LBB43_14 3694; RV32ZVE32F-NEXT: .LBB43_5: # %else8 3695; RV32ZVE32F-NEXT: andi a0, t0, 32 3696; RV32ZVE32F-NEXT: bnez a0, .LBB43_15 3697; RV32ZVE32F-NEXT: .LBB43_6: # %else10 3698; RV32ZVE32F-NEXT: andi a0, t0, 64 3699; RV32ZVE32F-NEXT: bnez a0, .LBB43_16 3700; RV32ZVE32F-NEXT: .LBB43_7: # %else12 3701; RV32ZVE32F-NEXT: andi a0, t0, -128 3702; RV32ZVE32F-NEXT: beqz a0, .LBB43_9 3703; RV32ZVE32F-NEXT: .LBB43_8: # %cond.store13 3704; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 3705; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 3706; RV32ZVE32F-NEXT: vmv.x.s a0, v8 3707; RV32ZVE32F-NEXT: sw a2, 0(a0) 3708; RV32ZVE32F-NEXT: sw a3, 4(a0) 3709; RV32ZVE32F-NEXT: .LBB43_9: # %else14 3710; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 3711; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 3712; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload 3713; RV32ZVE32F-NEXT: .cfi_restore s0 3714; RV32ZVE32F-NEXT: .cfi_restore s1 3715; RV32ZVE32F-NEXT: .cfi_restore s2 3716; RV32ZVE32F-NEXT: addi sp, sp, 16 3717; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 3718; RV32ZVE32F-NEXT: ret 3719; RV32ZVE32F-NEXT: .LBB43_10: # %cond.store 3720; RV32ZVE32F-NEXT: .cfi_restore_state 3721; RV32ZVE32F-NEXT: lw a1, 0(a0) 3722; RV32ZVE32F-NEXT: lw a0, 4(a0) 3723; RV32ZVE32F-NEXT: vmv.x.s s2, v8 3724; RV32ZVE32F-NEXT: sw a1, 0(s2) 3725; RV32ZVE32F-NEXT: sw a0, 4(s2) 3726; RV32ZVE32F-NEXT: andi a0, t0, 2 3727; RV32ZVE32F-NEXT: beqz a0, .LBB43_2 3728; RV32ZVE32F-NEXT: .LBB43_11: # %cond.store1 3729; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3730; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 3731; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3732; RV32ZVE32F-NEXT: sw s0, 0(a0) 3733; RV32ZVE32F-NEXT: sw s1, 4(a0) 3734; RV32ZVE32F-NEXT: andi a0, t0, 4 3735; RV32ZVE32F-NEXT: beqz a0, .LBB43_3 3736; RV32ZVE32F-NEXT: .LBB43_12: # %cond.store3 3737; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3738; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 3739; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3740; RV32ZVE32F-NEXT: sw t5, 0(a0) 3741; RV32ZVE32F-NEXT: sw t6, 4(a0) 3742; RV32ZVE32F-NEXT: andi a0, t0, 8 3743; RV32ZVE32F-NEXT: beqz a0, .LBB43_4 3744; RV32ZVE32F-NEXT: .LBB43_13: # %cond.store5 3745; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3746; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 3747; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3748; RV32ZVE32F-NEXT: sw t3, 0(a0) 3749; RV32ZVE32F-NEXT: sw t4, 4(a0) 3750; RV32ZVE32F-NEXT: andi a0, t0, 16 3751; RV32ZVE32F-NEXT: beqz a0, .LBB43_5 3752; RV32ZVE32F-NEXT: .LBB43_14: # %cond.store7 3753; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 3754; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 3755; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3756; RV32ZVE32F-NEXT: sw t1, 0(a0) 3757; RV32ZVE32F-NEXT: sw t2, 4(a0) 3758; RV32ZVE32F-NEXT: andi a0, t0, 32 3759; RV32ZVE32F-NEXT: beqz a0, .LBB43_6 3760; RV32ZVE32F-NEXT: .LBB43_15: # %cond.store9 3761; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 3762; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 3763; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3764; RV32ZVE32F-NEXT: sw a6, 0(a0) 3765; RV32ZVE32F-NEXT: sw a7, 4(a0) 3766; RV32ZVE32F-NEXT: andi a0, t0, 64 3767; RV32ZVE32F-NEXT: beqz a0, .LBB43_7 3768; RV32ZVE32F-NEXT: .LBB43_16: # %cond.store11 3769; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 3770; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 3771; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3772; RV32ZVE32F-NEXT: sw a4, 0(a0) 3773; RV32ZVE32F-NEXT: sw a5, 4(a0) 3774; RV32ZVE32F-NEXT: andi a0, t0, -128 3775; RV32ZVE32F-NEXT: bnez a0, .LBB43_8 3776; RV32ZVE32F-NEXT: j .LBB43_9 3777; 3778; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64: 3779; RV64ZVE32F: # %bb.0: 3780; RV64ZVE32F-NEXT: ld a4, 40(a0) 3781; RV64ZVE32F-NEXT: ld a3, 48(a0) 3782; RV64ZVE32F-NEXT: ld a2, 56(a0) 3783; RV64ZVE32F-NEXT: ld t1, 8(a0) 3784; RV64ZVE32F-NEXT: ld t0, 16(a0) 3785; RV64ZVE32F-NEXT: ld a7, 24(a0) 3786; RV64ZVE32F-NEXT: ld a6, 32(a0) 3787; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3788; RV64ZVE32F-NEXT: vmv.x.s a5, v0 3789; RV64ZVE32F-NEXT: andi t2, a5, 1 3790; RV64ZVE32F-NEXT: beqz t2, .LBB43_2 3791; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 3792; RV64ZVE32F-NEXT: ld a0, 0(a0) 3793; RV64ZVE32F-NEXT: vmv.x.s t2, v8 3794; RV64ZVE32F-NEXT: slli t2, t2, 3 3795; RV64ZVE32F-NEXT: add t2, a1, t2 3796; RV64ZVE32F-NEXT: sd a0, 0(t2) 3797; RV64ZVE32F-NEXT: .LBB43_2: # %else 3798; RV64ZVE32F-NEXT: andi a0, a5, 2 3799; RV64ZVE32F-NEXT: beqz a0, .LBB43_4 3800; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 3801; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 3802; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 3803; RV64ZVE32F-NEXT: vmv.x.s a0, v9 3804; RV64ZVE32F-NEXT: slli a0, a0, 3 3805; RV64ZVE32F-NEXT: add a0, a1, a0 3806; RV64ZVE32F-NEXT: sd t1, 0(a0) 3807; RV64ZVE32F-NEXT: .LBB43_4: # %else2 3808; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 3809; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 3810; RV64ZVE32F-NEXT: andi a0, a5, 4 3811; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 3812; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 3813; RV64ZVE32F-NEXT: bnez a0, .LBB43_12 3814; RV64ZVE32F-NEXT: # %bb.5: # %else4 3815; RV64ZVE32F-NEXT: andi a0, a5, 8 3816; RV64ZVE32F-NEXT: bnez a0, .LBB43_13 3817; RV64ZVE32F-NEXT: .LBB43_6: # %else6 3818; RV64ZVE32F-NEXT: andi a0, a5, 16 3819; RV64ZVE32F-NEXT: bnez a0, .LBB43_14 3820; RV64ZVE32F-NEXT: .LBB43_7: # %else8 3821; RV64ZVE32F-NEXT: andi a0, a5, 32 3822; RV64ZVE32F-NEXT: beqz a0, .LBB43_9 3823; RV64ZVE32F-NEXT: .LBB43_8: # %cond.store9 3824; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 3825; RV64ZVE32F-NEXT: vmv.x.s a0, v8 3826; RV64ZVE32F-NEXT: slli a0, a0, 3 3827; RV64ZVE32F-NEXT: add a0, a1, a0 3828; RV64ZVE32F-NEXT: sd a4, 0(a0) 3829; RV64ZVE32F-NEXT: .LBB43_9: # %else10 3830; RV64ZVE32F-NEXT: andi a0, a5, 64 3831; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 3832; RV64ZVE32F-NEXT: bnez a0, .LBB43_15 3833; RV64ZVE32F-NEXT: # %bb.10: # %else12 3834; RV64ZVE32F-NEXT: andi a0, a5, -128 3835; RV64ZVE32F-NEXT: bnez a0, .LBB43_16 3836; RV64ZVE32F-NEXT: .LBB43_11: # %else14 3837; RV64ZVE32F-NEXT: ret 3838; RV64ZVE32F-NEXT: .LBB43_12: # %cond.store3 3839; RV64ZVE32F-NEXT: vmv.x.s a0, v8 3840; RV64ZVE32F-NEXT: slli a0, a0, 3 3841; RV64ZVE32F-NEXT: add a0, a1, a0 3842; RV64ZVE32F-NEXT: sd t0, 0(a0) 3843; RV64ZVE32F-NEXT: andi a0, a5, 8 3844; RV64ZVE32F-NEXT: beqz a0, .LBB43_6 3845; RV64ZVE32F-NEXT: .LBB43_13: # %cond.store5 3846; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 3847; RV64ZVE32F-NEXT: vmv.x.s a0, v8 3848; RV64ZVE32F-NEXT: slli a0, a0, 3 3849; RV64ZVE32F-NEXT: add a0, a1, a0 3850; RV64ZVE32F-NEXT: sd a7, 0(a0) 3851; RV64ZVE32F-NEXT: andi a0, a5, 16 3852; RV64ZVE32F-NEXT: beqz a0, .LBB43_7 3853; RV64ZVE32F-NEXT: .LBB43_14: # %cond.store7 3854; RV64ZVE32F-NEXT: vmv.x.s a0, v9 3855; RV64ZVE32F-NEXT: slli a0, a0, 3 3856; RV64ZVE32F-NEXT: add a0, a1, a0 3857; RV64ZVE32F-NEXT: sd a6, 0(a0) 3858; RV64ZVE32F-NEXT: andi a0, a5, 32 3859; RV64ZVE32F-NEXT: bnez a0, .LBB43_8 3860; RV64ZVE32F-NEXT: j .LBB43_9 3861; RV64ZVE32F-NEXT: .LBB43_15: # %cond.store11 3862; RV64ZVE32F-NEXT: vmv.x.s a0, v8 3863; RV64ZVE32F-NEXT: slli a0, a0, 3 3864; RV64ZVE32F-NEXT: add a0, a1, a0 3865; RV64ZVE32F-NEXT: sd a3, 0(a0) 3866; RV64ZVE32F-NEXT: andi a0, a5, -128 3867; RV64ZVE32F-NEXT: beqz a0, .LBB43_11 3868; RV64ZVE32F-NEXT: .LBB43_16: # %cond.store13 3869; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 3870; RV64ZVE32F-NEXT: vmv.x.s a0, v8 3871; RV64ZVE32F-NEXT: slli a0, a0, 3 3872; RV64ZVE32F-NEXT: add a0, a1, a0 3873; RV64ZVE32F-NEXT: sd a2, 0(a0) 3874; RV64ZVE32F-NEXT: ret 3875 %eidxs = sext <8 x i8> %idxs to <8 x i64> 3876 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 3877 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 3878 ret void 3879} 3880 3881define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 3882; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8i64: 3883; RV32V: # %bb.0: 3884; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3885; RV32V-NEXT: vzext.vf2 v13, v12 3886; RV32V-NEXT: vsll.vi v12, v13, 3 3887; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 3888; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t 3889; RV32V-NEXT: ret 3890; 3891; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8i64: 3892; RV64V: # %bb.0: 3893; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3894; RV64V-NEXT: vzext.vf2 v13, v12 3895; RV64V-NEXT: vsll.vi v12, v13, 3 3896; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 3897; RV64V-NEXT: vsoxei16.v v8, (a0), v12, v0.t 3898; RV64V-NEXT: ret 3899; 3900; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64: 3901; RV32ZVE32F: # %bb.0: 3902; RV32ZVE32F-NEXT: addi sp, sp, -16 3903; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 3904; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 3905; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 3906; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill 3907; RV32ZVE32F-NEXT: .cfi_offset s0, -4 3908; RV32ZVE32F-NEXT: .cfi_offset s1, -8 3909; RV32ZVE32F-NEXT: .cfi_offset s2, -12 3910; RV32ZVE32F-NEXT: .cfi_remember_state 3911; RV32ZVE32F-NEXT: lw a2, 56(a0) 3912; RV32ZVE32F-NEXT: lw a3, 60(a0) 3913; RV32ZVE32F-NEXT: lw a6, 40(a0) 3914; RV32ZVE32F-NEXT: lw a7, 44(a0) 3915; RV32ZVE32F-NEXT: lw a4, 48(a0) 3916; RV32ZVE32F-NEXT: lw a5, 52(a0) 3917; RV32ZVE32F-NEXT: lw t3, 24(a0) 3918; RV32ZVE32F-NEXT: lw t4, 28(a0) 3919; RV32ZVE32F-NEXT: lw t1, 32(a0) 3920; RV32ZVE32F-NEXT: lw t2, 36(a0) 3921; RV32ZVE32F-NEXT: lw s0, 8(a0) 3922; RV32ZVE32F-NEXT: lw s1, 12(a0) 3923; RV32ZVE32F-NEXT: lw t5, 16(a0) 3924; RV32ZVE32F-NEXT: lw t6, 20(a0) 3925; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3926; RV32ZVE32F-NEXT: vzext.vf4 v10, v8 3927; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 3928; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 3929; RV32ZVE32F-NEXT: vmv.x.s t0, v0 3930; RV32ZVE32F-NEXT: andi s2, t0, 1 3931; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3932; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 3933; RV32ZVE32F-NEXT: bnez s2, .LBB44_10 3934; RV32ZVE32F-NEXT: # %bb.1: # %else 3935; RV32ZVE32F-NEXT: andi a0, t0, 2 3936; RV32ZVE32F-NEXT: bnez a0, .LBB44_11 3937; RV32ZVE32F-NEXT: .LBB44_2: # %else2 3938; RV32ZVE32F-NEXT: andi a0, t0, 4 3939; RV32ZVE32F-NEXT: bnez a0, .LBB44_12 3940; RV32ZVE32F-NEXT: .LBB44_3: # %else4 3941; RV32ZVE32F-NEXT: andi a0, t0, 8 3942; RV32ZVE32F-NEXT: bnez a0, .LBB44_13 3943; RV32ZVE32F-NEXT: .LBB44_4: # %else6 3944; RV32ZVE32F-NEXT: andi a0, t0, 16 3945; RV32ZVE32F-NEXT: bnez a0, .LBB44_14 3946; RV32ZVE32F-NEXT: .LBB44_5: # %else8 3947; RV32ZVE32F-NEXT: andi a0, t0, 32 3948; RV32ZVE32F-NEXT: bnez a0, .LBB44_15 3949; RV32ZVE32F-NEXT: .LBB44_6: # %else10 3950; RV32ZVE32F-NEXT: andi a0, t0, 64 3951; RV32ZVE32F-NEXT: bnez a0, .LBB44_16 3952; RV32ZVE32F-NEXT: .LBB44_7: # %else12 3953; RV32ZVE32F-NEXT: andi a0, t0, -128 3954; RV32ZVE32F-NEXT: beqz a0, .LBB44_9 3955; RV32ZVE32F-NEXT: .LBB44_8: # %cond.store13 3956; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 3957; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 3958; RV32ZVE32F-NEXT: vmv.x.s a0, v8 3959; RV32ZVE32F-NEXT: sw a2, 0(a0) 3960; RV32ZVE32F-NEXT: sw a3, 4(a0) 3961; RV32ZVE32F-NEXT: .LBB44_9: # %else14 3962; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 3963; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 3964; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload 3965; RV32ZVE32F-NEXT: .cfi_restore s0 3966; RV32ZVE32F-NEXT: .cfi_restore s1 3967; RV32ZVE32F-NEXT: .cfi_restore s2 3968; RV32ZVE32F-NEXT: addi sp, sp, 16 3969; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 3970; RV32ZVE32F-NEXT: ret 3971; RV32ZVE32F-NEXT: .LBB44_10: # %cond.store 3972; RV32ZVE32F-NEXT: .cfi_restore_state 3973; RV32ZVE32F-NEXT: lw a1, 0(a0) 3974; RV32ZVE32F-NEXT: lw a0, 4(a0) 3975; RV32ZVE32F-NEXT: vmv.x.s s2, v8 3976; RV32ZVE32F-NEXT: sw a1, 0(s2) 3977; RV32ZVE32F-NEXT: sw a0, 4(s2) 3978; RV32ZVE32F-NEXT: andi a0, t0, 2 3979; RV32ZVE32F-NEXT: beqz a0, .LBB44_2 3980; RV32ZVE32F-NEXT: .LBB44_11: # %cond.store1 3981; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3982; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 3983; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3984; RV32ZVE32F-NEXT: sw s0, 0(a0) 3985; RV32ZVE32F-NEXT: sw s1, 4(a0) 3986; RV32ZVE32F-NEXT: andi a0, t0, 4 3987; RV32ZVE32F-NEXT: beqz a0, .LBB44_3 3988; RV32ZVE32F-NEXT: .LBB44_12: # %cond.store3 3989; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3990; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 3991; RV32ZVE32F-NEXT: vmv.x.s a0, v10 3992; RV32ZVE32F-NEXT: sw t5, 0(a0) 3993; RV32ZVE32F-NEXT: sw t6, 4(a0) 3994; RV32ZVE32F-NEXT: andi a0, t0, 8 3995; RV32ZVE32F-NEXT: beqz a0, .LBB44_4 3996; RV32ZVE32F-NEXT: .LBB44_13: # %cond.store5 3997; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3998; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 3999; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4000; RV32ZVE32F-NEXT: sw t3, 0(a0) 4001; RV32ZVE32F-NEXT: sw t4, 4(a0) 4002; RV32ZVE32F-NEXT: andi a0, t0, 16 4003; RV32ZVE32F-NEXT: beqz a0, .LBB44_5 4004; RV32ZVE32F-NEXT: .LBB44_14: # %cond.store7 4005; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4006; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 4007; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4008; RV32ZVE32F-NEXT: sw t1, 0(a0) 4009; RV32ZVE32F-NEXT: sw t2, 4(a0) 4010; RV32ZVE32F-NEXT: andi a0, t0, 32 4011; RV32ZVE32F-NEXT: beqz a0, .LBB44_6 4012; RV32ZVE32F-NEXT: .LBB44_15: # %cond.store9 4013; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4014; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 4015; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4016; RV32ZVE32F-NEXT: sw a6, 0(a0) 4017; RV32ZVE32F-NEXT: sw a7, 4(a0) 4018; RV32ZVE32F-NEXT: andi a0, t0, 64 4019; RV32ZVE32F-NEXT: beqz a0, .LBB44_7 4020; RV32ZVE32F-NEXT: .LBB44_16: # %cond.store11 4021; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4022; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 4023; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4024; RV32ZVE32F-NEXT: sw a4, 0(a0) 4025; RV32ZVE32F-NEXT: sw a5, 4(a0) 4026; RV32ZVE32F-NEXT: andi a0, t0, -128 4027; RV32ZVE32F-NEXT: bnez a0, .LBB44_8 4028; RV32ZVE32F-NEXT: j .LBB44_9 4029; 4030; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64: 4031; RV64ZVE32F: # %bb.0: 4032; RV64ZVE32F-NEXT: ld a4, 40(a0) 4033; RV64ZVE32F-NEXT: ld a3, 48(a0) 4034; RV64ZVE32F-NEXT: ld a2, 56(a0) 4035; RV64ZVE32F-NEXT: ld t1, 8(a0) 4036; RV64ZVE32F-NEXT: ld t0, 16(a0) 4037; RV64ZVE32F-NEXT: ld a7, 24(a0) 4038; RV64ZVE32F-NEXT: ld a6, 32(a0) 4039; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 4040; RV64ZVE32F-NEXT: vmv.x.s a5, v0 4041; RV64ZVE32F-NEXT: andi t2, a5, 1 4042; RV64ZVE32F-NEXT: beqz t2, .LBB44_2 4043; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 4044; RV64ZVE32F-NEXT: ld a0, 0(a0) 4045; RV64ZVE32F-NEXT: vmv.x.s t2, v8 4046; RV64ZVE32F-NEXT: andi t2, t2, 255 4047; RV64ZVE32F-NEXT: slli t2, t2, 3 4048; RV64ZVE32F-NEXT: add t2, a1, t2 4049; RV64ZVE32F-NEXT: sd a0, 0(t2) 4050; RV64ZVE32F-NEXT: .LBB44_2: # %else 4051; RV64ZVE32F-NEXT: andi a0, a5, 2 4052; RV64ZVE32F-NEXT: beqz a0, .LBB44_4 4053; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 4054; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 4055; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 4056; RV64ZVE32F-NEXT: vmv.x.s a0, v9 4057; RV64ZVE32F-NEXT: andi a0, a0, 255 4058; RV64ZVE32F-NEXT: slli a0, a0, 3 4059; RV64ZVE32F-NEXT: add a0, a1, a0 4060; RV64ZVE32F-NEXT: sd t1, 0(a0) 4061; RV64ZVE32F-NEXT: .LBB44_4: # %else2 4062; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 4063; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 4064; RV64ZVE32F-NEXT: andi a0, a5, 4 4065; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 4066; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 4067; RV64ZVE32F-NEXT: bnez a0, .LBB44_12 4068; RV64ZVE32F-NEXT: # %bb.5: # %else4 4069; RV64ZVE32F-NEXT: andi a0, a5, 8 4070; RV64ZVE32F-NEXT: bnez a0, .LBB44_13 4071; RV64ZVE32F-NEXT: .LBB44_6: # %else6 4072; RV64ZVE32F-NEXT: andi a0, a5, 16 4073; RV64ZVE32F-NEXT: bnez a0, .LBB44_14 4074; RV64ZVE32F-NEXT: .LBB44_7: # %else8 4075; RV64ZVE32F-NEXT: andi a0, a5, 32 4076; RV64ZVE32F-NEXT: beqz a0, .LBB44_9 4077; RV64ZVE32F-NEXT: .LBB44_8: # %cond.store9 4078; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 4079; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4080; RV64ZVE32F-NEXT: andi a0, a0, 255 4081; RV64ZVE32F-NEXT: slli a0, a0, 3 4082; RV64ZVE32F-NEXT: add a0, a1, a0 4083; RV64ZVE32F-NEXT: sd a4, 0(a0) 4084; RV64ZVE32F-NEXT: .LBB44_9: # %else10 4085; RV64ZVE32F-NEXT: andi a0, a5, 64 4086; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 4087; RV64ZVE32F-NEXT: bnez a0, .LBB44_15 4088; RV64ZVE32F-NEXT: # %bb.10: # %else12 4089; RV64ZVE32F-NEXT: andi a0, a5, -128 4090; RV64ZVE32F-NEXT: bnez a0, .LBB44_16 4091; RV64ZVE32F-NEXT: .LBB44_11: # %else14 4092; RV64ZVE32F-NEXT: ret 4093; RV64ZVE32F-NEXT: .LBB44_12: # %cond.store3 4094; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4095; RV64ZVE32F-NEXT: andi a0, a0, 255 4096; RV64ZVE32F-NEXT: slli a0, a0, 3 4097; RV64ZVE32F-NEXT: add a0, a1, a0 4098; RV64ZVE32F-NEXT: sd t0, 0(a0) 4099; RV64ZVE32F-NEXT: andi a0, a5, 8 4100; RV64ZVE32F-NEXT: beqz a0, .LBB44_6 4101; RV64ZVE32F-NEXT: .LBB44_13: # %cond.store5 4102; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 4103; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4104; RV64ZVE32F-NEXT: andi a0, a0, 255 4105; RV64ZVE32F-NEXT: slli a0, a0, 3 4106; RV64ZVE32F-NEXT: add a0, a1, a0 4107; RV64ZVE32F-NEXT: sd a7, 0(a0) 4108; RV64ZVE32F-NEXT: andi a0, a5, 16 4109; RV64ZVE32F-NEXT: beqz a0, .LBB44_7 4110; RV64ZVE32F-NEXT: .LBB44_14: # %cond.store7 4111; RV64ZVE32F-NEXT: vmv.x.s a0, v9 4112; RV64ZVE32F-NEXT: andi a0, a0, 255 4113; RV64ZVE32F-NEXT: slli a0, a0, 3 4114; RV64ZVE32F-NEXT: add a0, a1, a0 4115; RV64ZVE32F-NEXT: sd a6, 0(a0) 4116; RV64ZVE32F-NEXT: andi a0, a5, 32 4117; RV64ZVE32F-NEXT: bnez a0, .LBB44_8 4118; RV64ZVE32F-NEXT: j .LBB44_9 4119; RV64ZVE32F-NEXT: .LBB44_15: # %cond.store11 4120; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4121; RV64ZVE32F-NEXT: andi a0, a0, 255 4122; RV64ZVE32F-NEXT: slli a0, a0, 3 4123; RV64ZVE32F-NEXT: add a0, a1, a0 4124; RV64ZVE32F-NEXT: sd a3, 0(a0) 4125; RV64ZVE32F-NEXT: andi a0, a5, -128 4126; RV64ZVE32F-NEXT: beqz a0, .LBB44_11 4127; RV64ZVE32F-NEXT: .LBB44_16: # %cond.store13 4128; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 4129; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4130; RV64ZVE32F-NEXT: andi a0, a0, 255 4131; RV64ZVE32F-NEXT: slli a0, a0, 3 4132; RV64ZVE32F-NEXT: add a0, a1, a0 4133; RV64ZVE32F-NEXT: sd a2, 0(a0) 4134; RV64ZVE32F-NEXT: ret 4135 %eidxs = zext <8 x i8> %idxs to <8 x i64> 4136 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 4137 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 4138 ret void 4139} 4140 4141define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { 4142; RV32V-LABEL: mscatter_baseidx_v8i16_v8i64: 4143; RV32V: # %bb.0: 4144; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 4145; RV32V-NEXT: vsext.vf2 v14, v12 4146; RV32V-NEXT: vsll.vi v12, v14, 3 4147; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 4148; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 4149; RV32V-NEXT: ret 4150; 4151; RV64V-LABEL: mscatter_baseidx_v8i16_v8i64: 4152; RV64V: # %bb.0: 4153; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 4154; RV64V-NEXT: vsext.vf4 v16, v12 4155; RV64V-NEXT: vsll.vi v12, v16, 3 4156; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 4157; RV64V-NEXT: ret 4158; 4159; RV32ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64: 4160; RV32ZVE32F: # %bb.0: 4161; RV32ZVE32F-NEXT: addi sp, sp, -16 4162; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 4163; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 4164; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 4165; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill 4166; RV32ZVE32F-NEXT: .cfi_offset s0, -4 4167; RV32ZVE32F-NEXT: .cfi_offset s1, -8 4168; RV32ZVE32F-NEXT: .cfi_offset s2, -12 4169; RV32ZVE32F-NEXT: .cfi_remember_state 4170; RV32ZVE32F-NEXT: lw a2, 56(a0) 4171; RV32ZVE32F-NEXT: lw a3, 60(a0) 4172; RV32ZVE32F-NEXT: lw a6, 40(a0) 4173; RV32ZVE32F-NEXT: lw a7, 44(a0) 4174; RV32ZVE32F-NEXT: lw a4, 48(a0) 4175; RV32ZVE32F-NEXT: lw a5, 52(a0) 4176; RV32ZVE32F-NEXT: lw t3, 24(a0) 4177; RV32ZVE32F-NEXT: lw t4, 28(a0) 4178; RV32ZVE32F-NEXT: lw t1, 32(a0) 4179; RV32ZVE32F-NEXT: lw t2, 36(a0) 4180; RV32ZVE32F-NEXT: lw s0, 8(a0) 4181; RV32ZVE32F-NEXT: lw s1, 12(a0) 4182; RV32ZVE32F-NEXT: lw t5, 16(a0) 4183; RV32ZVE32F-NEXT: lw t6, 20(a0) 4184; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 4185; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 4186; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 4187; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 4188; RV32ZVE32F-NEXT: vmv.x.s t0, v0 4189; RV32ZVE32F-NEXT: andi s2, t0, 1 4190; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4191; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 4192; RV32ZVE32F-NEXT: bnez s2, .LBB45_10 4193; RV32ZVE32F-NEXT: # %bb.1: # %else 4194; RV32ZVE32F-NEXT: andi a0, t0, 2 4195; RV32ZVE32F-NEXT: bnez a0, .LBB45_11 4196; RV32ZVE32F-NEXT: .LBB45_2: # %else2 4197; RV32ZVE32F-NEXT: andi a0, t0, 4 4198; RV32ZVE32F-NEXT: bnez a0, .LBB45_12 4199; RV32ZVE32F-NEXT: .LBB45_3: # %else4 4200; RV32ZVE32F-NEXT: andi a0, t0, 8 4201; RV32ZVE32F-NEXT: bnez a0, .LBB45_13 4202; RV32ZVE32F-NEXT: .LBB45_4: # %else6 4203; RV32ZVE32F-NEXT: andi a0, t0, 16 4204; RV32ZVE32F-NEXT: bnez a0, .LBB45_14 4205; RV32ZVE32F-NEXT: .LBB45_5: # %else8 4206; RV32ZVE32F-NEXT: andi a0, t0, 32 4207; RV32ZVE32F-NEXT: bnez a0, .LBB45_15 4208; RV32ZVE32F-NEXT: .LBB45_6: # %else10 4209; RV32ZVE32F-NEXT: andi a0, t0, 64 4210; RV32ZVE32F-NEXT: bnez a0, .LBB45_16 4211; RV32ZVE32F-NEXT: .LBB45_7: # %else12 4212; RV32ZVE32F-NEXT: andi a0, t0, -128 4213; RV32ZVE32F-NEXT: beqz a0, .LBB45_9 4214; RV32ZVE32F-NEXT: .LBB45_8: # %cond.store13 4215; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4216; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 4217; RV32ZVE32F-NEXT: vmv.x.s a0, v8 4218; RV32ZVE32F-NEXT: sw a2, 0(a0) 4219; RV32ZVE32F-NEXT: sw a3, 4(a0) 4220; RV32ZVE32F-NEXT: .LBB45_9: # %else14 4221; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 4222; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 4223; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload 4224; RV32ZVE32F-NEXT: .cfi_restore s0 4225; RV32ZVE32F-NEXT: .cfi_restore s1 4226; RV32ZVE32F-NEXT: .cfi_restore s2 4227; RV32ZVE32F-NEXT: addi sp, sp, 16 4228; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 4229; RV32ZVE32F-NEXT: ret 4230; RV32ZVE32F-NEXT: .LBB45_10: # %cond.store 4231; RV32ZVE32F-NEXT: .cfi_restore_state 4232; RV32ZVE32F-NEXT: lw a1, 0(a0) 4233; RV32ZVE32F-NEXT: lw a0, 4(a0) 4234; RV32ZVE32F-NEXT: vmv.x.s s2, v8 4235; RV32ZVE32F-NEXT: sw a1, 0(s2) 4236; RV32ZVE32F-NEXT: sw a0, 4(s2) 4237; RV32ZVE32F-NEXT: andi a0, t0, 2 4238; RV32ZVE32F-NEXT: beqz a0, .LBB45_2 4239; RV32ZVE32F-NEXT: .LBB45_11: # %cond.store1 4240; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4241; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 4242; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4243; RV32ZVE32F-NEXT: sw s0, 0(a0) 4244; RV32ZVE32F-NEXT: sw s1, 4(a0) 4245; RV32ZVE32F-NEXT: andi a0, t0, 4 4246; RV32ZVE32F-NEXT: beqz a0, .LBB45_3 4247; RV32ZVE32F-NEXT: .LBB45_12: # %cond.store3 4248; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4249; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 4250; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4251; RV32ZVE32F-NEXT: sw t5, 0(a0) 4252; RV32ZVE32F-NEXT: sw t6, 4(a0) 4253; RV32ZVE32F-NEXT: andi a0, t0, 8 4254; RV32ZVE32F-NEXT: beqz a0, .LBB45_4 4255; RV32ZVE32F-NEXT: .LBB45_13: # %cond.store5 4256; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4257; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 4258; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4259; RV32ZVE32F-NEXT: sw t3, 0(a0) 4260; RV32ZVE32F-NEXT: sw t4, 4(a0) 4261; RV32ZVE32F-NEXT: andi a0, t0, 16 4262; RV32ZVE32F-NEXT: beqz a0, .LBB45_5 4263; RV32ZVE32F-NEXT: .LBB45_14: # %cond.store7 4264; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4265; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 4266; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4267; RV32ZVE32F-NEXT: sw t1, 0(a0) 4268; RV32ZVE32F-NEXT: sw t2, 4(a0) 4269; RV32ZVE32F-NEXT: andi a0, t0, 32 4270; RV32ZVE32F-NEXT: beqz a0, .LBB45_6 4271; RV32ZVE32F-NEXT: .LBB45_15: # %cond.store9 4272; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4273; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 4274; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4275; RV32ZVE32F-NEXT: sw a6, 0(a0) 4276; RV32ZVE32F-NEXT: sw a7, 4(a0) 4277; RV32ZVE32F-NEXT: andi a0, t0, 64 4278; RV32ZVE32F-NEXT: beqz a0, .LBB45_7 4279; RV32ZVE32F-NEXT: .LBB45_16: # %cond.store11 4280; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4281; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 4282; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4283; RV32ZVE32F-NEXT: sw a4, 0(a0) 4284; RV32ZVE32F-NEXT: sw a5, 4(a0) 4285; RV32ZVE32F-NEXT: andi a0, t0, -128 4286; RV32ZVE32F-NEXT: bnez a0, .LBB45_8 4287; RV32ZVE32F-NEXT: j .LBB45_9 4288; 4289; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64: 4290; RV64ZVE32F: # %bb.0: 4291; RV64ZVE32F-NEXT: ld a4, 40(a0) 4292; RV64ZVE32F-NEXT: ld a3, 48(a0) 4293; RV64ZVE32F-NEXT: ld a2, 56(a0) 4294; RV64ZVE32F-NEXT: ld t1, 8(a0) 4295; RV64ZVE32F-NEXT: ld t0, 16(a0) 4296; RV64ZVE32F-NEXT: ld a7, 24(a0) 4297; RV64ZVE32F-NEXT: ld a6, 32(a0) 4298; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 4299; RV64ZVE32F-NEXT: vmv.x.s a5, v0 4300; RV64ZVE32F-NEXT: andi t2, a5, 1 4301; RV64ZVE32F-NEXT: beqz t2, .LBB45_2 4302; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 4303; RV64ZVE32F-NEXT: ld a0, 0(a0) 4304; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 4305; RV64ZVE32F-NEXT: vmv.x.s t2, v8 4306; RV64ZVE32F-NEXT: slli t2, t2, 3 4307; RV64ZVE32F-NEXT: add t2, a1, t2 4308; RV64ZVE32F-NEXT: sd a0, 0(t2) 4309; RV64ZVE32F-NEXT: .LBB45_2: # %else 4310; RV64ZVE32F-NEXT: andi a0, a5, 2 4311; RV64ZVE32F-NEXT: beqz a0, .LBB45_4 4312; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 4313; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 4314; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 4315; RV64ZVE32F-NEXT: vmv.x.s a0, v9 4316; RV64ZVE32F-NEXT: slli a0, a0, 3 4317; RV64ZVE32F-NEXT: add a0, a1, a0 4318; RV64ZVE32F-NEXT: sd t1, 0(a0) 4319; RV64ZVE32F-NEXT: .LBB45_4: # %else2 4320; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 4321; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 4322; RV64ZVE32F-NEXT: andi a0, a5, 4 4323; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 4324; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 4325; RV64ZVE32F-NEXT: bnez a0, .LBB45_12 4326; RV64ZVE32F-NEXT: # %bb.5: # %else4 4327; RV64ZVE32F-NEXT: andi a0, a5, 8 4328; RV64ZVE32F-NEXT: bnez a0, .LBB45_13 4329; RV64ZVE32F-NEXT: .LBB45_6: # %else6 4330; RV64ZVE32F-NEXT: andi a0, a5, 16 4331; RV64ZVE32F-NEXT: bnez a0, .LBB45_14 4332; RV64ZVE32F-NEXT: .LBB45_7: # %else8 4333; RV64ZVE32F-NEXT: andi a0, a5, 32 4334; RV64ZVE32F-NEXT: beqz a0, .LBB45_9 4335; RV64ZVE32F-NEXT: .LBB45_8: # %cond.store9 4336; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 4337; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4338; RV64ZVE32F-NEXT: slli a0, a0, 3 4339; RV64ZVE32F-NEXT: add a0, a1, a0 4340; RV64ZVE32F-NEXT: sd a4, 0(a0) 4341; RV64ZVE32F-NEXT: .LBB45_9: # %else10 4342; RV64ZVE32F-NEXT: andi a0, a5, 64 4343; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 4344; RV64ZVE32F-NEXT: bnez a0, .LBB45_15 4345; RV64ZVE32F-NEXT: # %bb.10: # %else12 4346; RV64ZVE32F-NEXT: andi a0, a5, -128 4347; RV64ZVE32F-NEXT: bnez a0, .LBB45_16 4348; RV64ZVE32F-NEXT: .LBB45_11: # %else14 4349; RV64ZVE32F-NEXT: ret 4350; RV64ZVE32F-NEXT: .LBB45_12: # %cond.store3 4351; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4352; RV64ZVE32F-NEXT: slli a0, a0, 3 4353; RV64ZVE32F-NEXT: add a0, a1, a0 4354; RV64ZVE32F-NEXT: sd t0, 0(a0) 4355; RV64ZVE32F-NEXT: andi a0, a5, 8 4356; RV64ZVE32F-NEXT: beqz a0, .LBB45_6 4357; RV64ZVE32F-NEXT: .LBB45_13: # %cond.store5 4358; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 4359; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4360; RV64ZVE32F-NEXT: slli a0, a0, 3 4361; RV64ZVE32F-NEXT: add a0, a1, a0 4362; RV64ZVE32F-NEXT: sd a7, 0(a0) 4363; RV64ZVE32F-NEXT: andi a0, a5, 16 4364; RV64ZVE32F-NEXT: beqz a0, .LBB45_7 4365; RV64ZVE32F-NEXT: .LBB45_14: # %cond.store7 4366; RV64ZVE32F-NEXT: vmv.x.s a0, v9 4367; RV64ZVE32F-NEXT: slli a0, a0, 3 4368; RV64ZVE32F-NEXT: add a0, a1, a0 4369; RV64ZVE32F-NEXT: sd a6, 0(a0) 4370; RV64ZVE32F-NEXT: andi a0, a5, 32 4371; RV64ZVE32F-NEXT: bnez a0, .LBB45_8 4372; RV64ZVE32F-NEXT: j .LBB45_9 4373; RV64ZVE32F-NEXT: .LBB45_15: # %cond.store11 4374; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4375; RV64ZVE32F-NEXT: slli a0, a0, 3 4376; RV64ZVE32F-NEXT: add a0, a1, a0 4377; RV64ZVE32F-NEXT: sd a3, 0(a0) 4378; RV64ZVE32F-NEXT: andi a0, a5, -128 4379; RV64ZVE32F-NEXT: beqz a0, .LBB45_11 4380; RV64ZVE32F-NEXT: .LBB45_16: # %cond.store13 4381; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 4382; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4383; RV64ZVE32F-NEXT: slli a0, a0, 3 4384; RV64ZVE32F-NEXT: add a0, a1, a0 4385; RV64ZVE32F-NEXT: sd a2, 0(a0) 4386; RV64ZVE32F-NEXT: ret 4387 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs 4388 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 4389 ret void 4390} 4391 4392define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { 4393; RV32V-LABEL: mscatter_baseidx_sext_v8i16_v8i64: 4394; RV32V: # %bb.0: 4395; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 4396; RV32V-NEXT: vsext.vf2 v14, v12 4397; RV32V-NEXT: vsll.vi v12, v14, 3 4398; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 4399; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 4400; RV32V-NEXT: ret 4401; 4402; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8i64: 4403; RV64V: # %bb.0: 4404; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 4405; RV64V-NEXT: vsext.vf4 v16, v12 4406; RV64V-NEXT: vsll.vi v12, v16, 3 4407; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 4408; RV64V-NEXT: ret 4409; 4410; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64: 4411; RV32ZVE32F: # %bb.0: 4412; RV32ZVE32F-NEXT: addi sp, sp, -16 4413; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 4414; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 4415; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 4416; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill 4417; RV32ZVE32F-NEXT: .cfi_offset s0, -4 4418; RV32ZVE32F-NEXT: .cfi_offset s1, -8 4419; RV32ZVE32F-NEXT: .cfi_offset s2, -12 4420; RV32ZVE32F-NEXT: .cfi_remember_state 4421; RV32ZVE32F-NEXT: lw a2, 56(a0) 4422; RV32ZVE32F-NEXT: lw a3, 60(a0) 4423; RV32ZVE32F-NEXT: lw a6, 40(a0) 4424; RV32ZVE32F-NEXT: lw a7, 44(a0) 4425; RV32ZVE32F-NEXT: lw a4, 48(a0) 4426; RV32ZVE32F-NEXT: lw a5, 52(a0) 4427; RV32ZVE32F-NEXT: lw t3, 24(a0) 4428; RV32ZVE32F-NEXT: lw t4, 28(a0) 4429; RV32ZVE32F-NEXT: lw t1, 32(a0) 4430; RV32ZVE32F-NEXT: lw t2, 36(a0) 4431; RV32ZVE32F-NEXT: lw s0, 8(a0) 4432; RV32ZVE32F-NEXT: lw s1, 12(a0) 4433; RV32ZVE32F-NEXT: lw t5, 16(a0) 4434; RV32ZVE32F-NEXT: lw t6, 20(a0) 4435; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 4436; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 4437; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 4438; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 4439; RV32ZVE32F-NEXT: vmv.x.s t0, v0 4440; RV32ZVE32F-NEXT: andi s2, t0, 1 4441; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4442; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 4443; RV32ZVE32F-NEXT: bnez s2, .LBB46_10 4444; RV32ZVE32F-NEXT: # %bb.1: # %else 4445; RV32ZVE32F-NEXT: andi a0, t0, 2 4446; RV32ZVE32F-NEXT: bnez a0, .LBB46_11 4447; RV32ZVE32F-NEXT: .LBB46_2: # %else2 4448; RV32ZVE32F-NEXT: andi a0, t0, 4 4449; RV32ZVE32F-NEXT: bnez a0, .LBB46_12 4450; RV32ZVE32F-NEXT: .LBB46_3: # %else4 4451; RV32ZVE32F-NEXT: andi a0, t0, 8 4452; RV32ZVE32F-NEXT: bnez a0, .LBB46_13 4453; RV32ZVE32F-NEXT: .LBB46_4: # %else6 4454; RV32ZVE32F-NEXT: andi a0, t0, 16 4455; RV32ZVE32F-NEXT: bnez a0, .LBB46_14 4456; RV32ZVE32F-NEXT: .LBB46_5: # %else8 4457; RV32ZVE32F-NEXT: andi a0, t0, 32 4458; RV32ZVE32F-NEXT: bnez a0, .LBB46_15 4459; RV32ZVE32F-NEXT: .LBB46_6: # %else10 4460; RV32ZVE32F-NEXT: andi a0, t0, 64 4461; RV32ZVE32F-NEXT: bnez a0, .LBB46_16 4462; RV32ZVE32F-NEXT: .LBB46_7: # %else12 4463; RV32ZVE32F-NEXT: andi a0, t0, -128 4464; RV32ZVE32F-NEXT: beqz a0, .LBB46_9 4465; RV32ZVE32F-NEXT: .LBB46_8: # %cond.store13 4466; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4467; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 4468; RV32ZVE32F-NEXT: vmv.x.s a0, v8 4469; RV32ZVE32F-NEXT: sw a2, 0(a0) 4470; RV32ZVE32F-NEXT: sw a3, 4(a0) 4471; RV32ZVE32F-NEXT: .LBB46_9: # %else14 4472; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 4473; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 4474; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload 4475; RV32ZVE32F-NEXT: .cfi_restore s0 4476; RV32ZVE32F-NEXT: .cfi_restore s1 4477; RV32ZVE32F-NEXT: .cfi_restore s2 4478; RV32ZVE32F-NEXT: addi sp, sp, 16 4479; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 4480; RV32ZVE32F-NEXT: ret 4481; RV32ZVE32F-NEXT: .LBB46_10: # %cond.store 4482; RV32ZVE32F-NEXT: .cfi_restore_state 4483; RV32ZVE32F-NEXT: lw a1, 0(a0) 4484; RV32ZVE32F-NEXT: lw a0, 4(a0) 4485; RV32ZVE32F-NEXT: vmv.x.s s2, v8 4486; RV32ZVE32F-NEXT: sw a1, 0(s2) 4487; RV32ZVE32F-NEXT: sw a0, 4(s2) 4488; RV32ZVE32F-NEXT: andi a0, t0, 2 4489; RV32ZVE32F-NEXT: beqz a0, .LBB46_2 4490; RV32ZVE32F-NEXT: .LBB46_11: # %cond.store1 4491; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4492; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 4493; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4494; RV32ZVE32F-NEXT: sw s0, 0(a0) 4495; RV32ZVE32F-NEXT: sw s1, 4(a0) 4496; RV32ZVE32F-NEXT: andi a0, t0, 4 4497; RV32ZVE32F-NEXT: beqz a0, .LBB46_3 4498; RV32ZVE32F-NEXT: .LBB46_12: # %cond.store3 4499; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4500; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 4501; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4502; RV32ZVE32F-NEXT: sw t5, 0(a0) 4503; RV32ZVE32F-NEXT: sw t6, 4(a0) 4504; RV32ZVE32F-NEXT: andi a0, t0, 8 4505; RV32ZVE32F-NEXT: beqz a0, .LBB46_4 4506; RV32ZVE32F-NEXT: .LBB46_13: # %cond.store5 4507; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4508; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 4509; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4510; RV32ZVE32F-NEXT: sw t3, 0(a0) 4511; RV32ZVE32F-NEXT: sw t4, 4(a0) 4512; RV32ZVE32F-NEXT: andi a0, t0, 16 4513; RV32ZVE32F-NEXT: beqz a0, .LBB46_5 4514; RV32ZVE32F-NEXT: .LBB46_14: # %cond.store7 4515; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4516; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 4517; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4518; RV32ZVE32F-NEXT: sw t1, 0(a0) 4519; RV32ZVE32F-NEXT: sw t2, 4(a0) 4520; RV32ZVE32F-NEXT: andi a0, t0, 32 4521; RV32ZVE32F-NEXT: beqz a0, .LBB46_6 4522; RV32ZVE32F-NEXT: .LBB46_15: # %cond.store9 4523; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4524; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 4525; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4526; RV32ZVE32F-NEXT: sw a6, 0(a0) 4527; RV32ZVE32F-NEXT: sw a7, 4(a0) 4528; RV32ZVE32F-NEXT: andi a0, t0, 64 4529; RV32ZVE32F-NEXT: beqz a0, .LBB46_7 4530; RV32ZVE32F-NEXT: .LBB46_16: # %cond.store11 4531; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4532; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 4533; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4534; RV32ZVE32F-NEXT: sw a4, 0(a0) 4535; RV32ZVE32F-NEXT: sw a5, 4(a0) 4536; RV32ZVE32F-NEXT: andi a0, t0, -128 4537; RV32ZVE32F-NEXT: bnez a0, .LBB46_8 4538; RV32ZVE32F-NEXT: j .LBB46_9 4539; 4540; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64: 4541; RV64ZVE32F: # %bb.0: 4542; RV64ZVE32F-NEXT: ld a4, 40(a0) 4543; RV64ZVE32F-NEXT: ld a3, 48(a0) 4544; RV64ZVE32F-NEXT: ld a2, 56(a0) 4545; RV64ZVE32F-NEXT: ld t1, 8(a0) 4546; RV64ZVE32F-NEXT: ld t0, 16(a0) 4547; RV64ZVE32F-NEXT: ld a7, 24(a0) 4548; RV64ZVE32F-NEXT: ld a6, 32(a0) 4549; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 4550; RV64ZVE32F-NEXT: vmv.x.s a5, v0 4551; RV64ZVE32F-NEXT: andi t2, a5, 1 4552; RV64ZVE32F-NEXT: beqz t2, .LBB46_2 4553; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 4554; RV64ZVE32F-NEXT: ld a0, 0(a0) 4555; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 4556; RV64ZVE32F-NEXT: vmv.x.s t2, v8 4557; RV64ZVE32F-NEXT: slli t2, t2, 3 4558; RV64ZVE32F-NEXT: add t2, a1, t2 4559; RV64ZVE32F-NEXT: sd a0, 0(t2) 4560; RV64ZVE32F-NEXT: .LBB46_2: # %else 4561; RV64ZVE32F-NEXT: andi a0, a5, 2 4562; RV64ZVE32F-NEXT: beqz a0, .LBB46_4 4563; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 4564; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 4565; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 4566; RV64ZVE32F-NEXT: vmv.x.s a0, v9 4567; RV64ZVE32F-NEXT: slli a0, a0, 3 4568; RV64ZVE32F-NEXT: add a0, a1, a0 4569; RV64ZVE32F-NEXT: sd t1, 0(a0) 4570; RV64ZVE32F-NEXT: .LBB46_4: # %else2 4571; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 4572; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 4573; RV64ZVE32F-NEXT: andi a0, a5, 4 4574; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 4575; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 4576; RV64ZVE32F-NEXT: bnez a0, .LBB46_12 4577; RV64ZVE32F-NEXT: # %bb.5: # %else4 4578; RV64ZVE32F-NEXT: andi a0, a5, 8 4579; RV64ZVE32F-NEXT: bnez a0, .LBB46_13 4580; RV64ZVE32F-NEXT: .LBB46_6: # %else6 4581; RV64ZVE32F-NEXT: andi a0, a5, 16 4582; RV64ZVE32F-NEXT: bnez a0, .LBB46_14 4583; RV64ZVE32F-NEXT: .LBB46_7: # %else8 4584; RV64ZVE32F-NEXT: andi a0, a5, 32 4585; RV64ZVE32F-NEXT: beqz a0, .LBB46_9 4586; RV64ZVE32F-NEXT: .LBB46_8: # %cond.store9 4587; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 4588; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4589; RV64ZVE32F-NEXT: slli a0, a0, 3 4590; RV64ZVE32F-NEXT: add a0, a1, a0 4591; RV64ZVE32F-NEXT: sd a4, 0(a0) 4592; RV64ZVE32F-NEXT: .LBB46_9: # %else10 4593; RV64ZVE32F-NEXT: andi a0, a5, 64 4594; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 4595; RV64ZVE32F-NEXT: bnez a0, .LBB46_15 4596; RV64ZVE32F-NEXT: # %bb.10: # %else12 4597; RV64ZVE32F-NEXT: andi a0, a5, -128 4598; RV64ZVE32F-NEXT: bnez a0, .LBB46_16 4599; RV64ZVE32F-NEXT: .LBB46_11: # %else14 4600; RV64ZVE32F-NEXT: ret 4601; RV64ZVE32F-NEXT: .LBB46_12: # %cond.store3 4602; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4603; RV64ZVE32F-NEXT: slli a0, a0, 3 4604; RV64ZVE32F-NEXT: add a0, a1, a0 4605; RV64ZVE32F-NEXT: sd t0, 0(a0) 4606; RV64ZVE32F-NEXT: andi a0, a5, 8 4607; RV64ZVE32F-NEXT: beqz a0, .LBB46_6 4608; RV64ZVE32F-NEXT: .LBB46_13: # %cond.store5 4609; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 4610; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4611; RV64ZVE32F-NEXT: slli a0, a0, 3 4612; RV64ZVE32F-NEXT: add a0, a1, a0 4613; RV64ZVE32F-NEXT: sd a7, 0(a0) 4614; RV64ZVE32F-NEXT: andi a0, a5, 16 4615; RV64ZVE32F-NEXT: beqz a0, .LBB46_7 4616; RV64ZVE32F-NEXT: .LBB46_14: # %cond.store7 4617; RV64ZVE32F-NEXT: vmv.x.s a0, v9 4618; RV64ZVE32F-NEXT: slli a0, a0, 3 4619; RV64ZVE32F-NEXT: add a0, a1, a0 4620; RV64ZVE32F-NEXT: sd a6, 0(a0) 4621; RV64ZVE32F-NEXT: andi a0, a5, 32 4622; RV64ZVE32F-NEXT: bnez a0, .LBB46_8 4623; RV64ZVE32F-NEXT: j .LBB46_9 4624; RV64ZVE32F-NEXT: .LBB46_15: # %cond.store11 4625; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4626; RV64ZVE32F-NEXT: slli a0, a0, 3 4627; RV64ZVE32F-NEXT: add a0, a1, a0 4628; RV64ZVE32F-NEXT: sd a3, 0(a0) 4629; RV64ZVE32F-NEXT: andi a0, a5, -128 4630; RV64ZVE32F-NEXT: beqz a0, .LBB46_11 4631; RV64ZVE32F-NEXT: .LBB46_16: # %cond.store13 4632; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 4633; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4634; RV64ZVE32F-NEXT: slli a0, a0, 3 4635; RV64ZVE32F-NEXT: add a0, a1, a0 4636; RV64ZVE32F-NEXT: sd a2, 0(a0) 4637; RV64ZVE32F-NEXT: ret 4638 %eidxs = sext <8 x i16> %idxs to <8 x i64> 4639 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 4640 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 4641 ret void 4642} 4643 4644define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { 4645; RV32V-LABEL: mscatter_baseidx_zext_v8i16_v8i64: 4646; RV32V: # %bb.0: 4647; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 4648; RV32V-NEXT: vzext.vf2 v14, v12 4649; RV32V-NEXT: vsll.vi v12, v14, 3 4650; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 4651; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 4652; RV32V-NEXT: ret 4653; 4654; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8i64: 4655; RV64V: # %bb.0: 4656; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 4657; RV64V-NEXT: vzext.vf2 v14, v12 4658; RV64V-NEXT: vsll.vi v12, v14, 3 4659; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 4660; RV64V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 4661; RV64V-NEXT: ret 4662; 4663; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64: 4664; RV32ZVE32F: # %bb.0: 4665; RV32ZVE32F-NEXT: addi sp, sp, -16 4666; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 4667; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 4668; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 4669; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill 4670; RV32ZVE32F-NEXT: .cfi_offset s0, -4 4671; RV32ZVE32F-NEXT: .cfi_offset s1, -8 4672; RV32ZVE32F-NEXT: .cfi_offset s2, -12 4673; RV32ZVE32F-NEXT: .cfi_remember_state 4674; RV32ZVE32F-NEXT: lw a2, 56(a0) 4675; RV32ZVE32F-NEXT: lw a3, 60(a0) 4676; RV32ZVE32F-NEXT: lw a6, 40(a0) 4677; RV32ZVE32F-NEXT: lw a7, 44(a0) 4678; RV32ZVE32F-NEXT: lw a4, 48(a0) 4679; RV32ZVE32F-NEXT: lw a5, 52(a0) 4680; RV32ZVE32F-NEXT: lw t3, 24(a0) 4681; RV32ZVE32F-NEXT: lw t4, 28(a0) 4682; RV32ZVE32F-NEXT: lw t1, 32(a0) 4683; RV32ZVE32F-NEXT: lw t2, 36(a0) 4684; RV32ZVE32F-NEXT: lw s0, 8(a0) 4685; RV32ZVE32F-NEXT: lw s1, 12(a0) 4686; RV32ZVE32F-NEXT: lw t5, 16(a0) 4687; RV32ZVE32F-NEXT: lw t6, 20(a0) 4688; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 4689; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 4690; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 4691; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 4692; RV32ZVE32F-NEXT: vmv.x.s t0, v0 4693; RV32ZVE32F-NEXT: andi s2, t0, 1 4694; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4695; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 4696; RV32ZVE32F-NEXT: bnez s2, .LBB47_10 4697; RV32ZVE32F-NEXT: # %bb.1: # %else 4698; RV32ZVE32F-NEXT: andi a0, t0, 2 4699; RV32ZVE32F-NEXT: bnez a0, .LBB47_11 4700; RV32ZVE32F-NEXT: .LBB47_2: # %else2 4701; RV32ZVE32F-NEXT: andi a0, t0, 4 4702; RV32ZVE32F-NEXT: bnez a0, .LBB47_12 4703; RV32ZVE32F-NEXT: .LBB47_3: # %else4 4704; RV32ZVE32F-NEXT: andi a0, t0, 8 4705; RV32ZVE32F-NEXT: bnez a0, .LBB47_13 4706; RV32ZVE32F-NEXT: .LBB47_4: # %else6 4707; RV32ZVE32F-NEXT: andi a0, t0, 16 4708; RV32ZVE32F-NEXT: bnez a0, .LBB47_14 4709; RV32ZVE32F-NEXT: .LBB47_5: # %else8 4710; RV32ZVE32F-NEXT: andi a0, t0, 32 4711; RV32ZVE32F-NEXT: bnez a0, .LBB47_15 4712; RV32ZVE32F-NEXT: .LBB47_6: # %else10 4713; RV32ZVE32F-NEXT: andi a0, t0, 64 4714; RV32ZVE32F-NEXT: bnez a0, .LBB47_16 4715; RV32ZVE32F-NEXT: .LBB47_7: # %else12 4716; RV32ZVE32F-NEXT: andi a0, t0, -128 4717; RV32ZVE32F-NEXT: beqz a0, .LBB47_9 4718; RV32ZVE32F-NEXT: .LBB47_8: # %cond.store13 4719; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4720; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 4721; RV32ZVE32F-NEXT: vmv.x.s a0, v8 4722; RV32ZVE32F-NEXT: sw a2, 0(a0) 4723; RV32ZVE32F-NEXT: sw a3, 4(a0) 4724; RV32ZVE32F-NEXT: .LBB47_9: # %else14 4725; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 4726; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 4727; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload 4728; RV32ZVE32F-NEXT: .cfi_restore s0 4729; RV32ZVE32F-NEXT: .cfi_restore s1 4730; RV32ZVE32F-NEXT: .cfi_restore s2 4731; RV32ZVE32F-NEXT: addi sp, sp, 16 4732; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 4733; RV32ZVE32F-NEXT: ret 4734; RV32ZVE32F-NEXT: .LBB47_10: # %cond.store 4735; RV32ZVE32F-NEXT: .cfi_restore_state 4736; RV32ZVE32F-NEXT: lw a1, 0(a0) 4737; RV32ZVE32F-NEXT: lw a0, 4(a0) 4738; RV32ZVE32F-NEXT: vmv.x.s s2, v8 4739; RV32ZVE32F-NEXT: sw a1, 0(s2) 4740; RV32ZVE32F-NEXT: sw a0, 4(s2) 4741; RV32ZVE32F-NEXT: andi a0, t0, 2 4742; RV32ZVE32F-NEXT: beqz a0, .LBB47_2 4743; RV32ZVE32F-NEXT: .LBB47_11: # %cond.store1 4744; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4745; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 4746; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4747; RV32ZVE32F-NEXT: sw s0, 0(a0) 4748; RV32ZVE32F-NEXT: sw s1, 4(a0) 4749; RV32ZVE32F-NEXT: andi a0, t0, 4 4750; RV32ZVE32F-NEXT: beqz a0, .LBB47_3 4751; RV32ZVE32F-NEXT: .LBB47_12: # %cond.store3 4752; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4753; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 4754; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4755; RV32ZVE32F-NEXT: sw t5, 0(a0) 4756; RV32ZVE32F-NEXT: sw t6, 4(a0) 4757; RV32ZVE32F-NEXT: andi a0, t0, 8 4758; RV32ZVE32F-NEXT: beqz a0, .LBB47_4 4759; RV32ZVE32F-NEXT: .LBB47_13: # %cond.store5 4760; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4761; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 4762; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4763; RV32ZVE32F-NEXT: sw t3, 0(a0) 4764; RV32ZVE32F-NEXT: sw t4, 4(a0) 4765; RV32ZVE32F-NEXT: andi a0, t0, 16 4766; RV32ZVE32F-NEXT: beqz a0, .LBB47_5 4767; RV32ZVE32F-NEXT: .LBB47_14: # %cond.store7 4768; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4769; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 4770; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4771; RV32ZVE32F-NEXT: sw t1, 0(a0) 4772; RV32ZVE32F-NEXT: sw t2, 4(a0) 4773; RV32ZVE32F-NEXT: andi a0, t0, 32 4774; RV32ZVE32F-NEXT: beqz a0, .LBB47_6 4775; RV32ZVE32F-NEXT: .LBB47_15: # %cond.store9 4776; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4777; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 4778; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4779; RV32ZVE32F-NEXT: sw a6, 0(a0) 4780; RV32ZVE32F-NEXT: sw a7, 4(a0) 4781; RV32ZVE32F-NEXT: andi a0, t0, 64 4782; RV32ZVE32F-NEXT: beqz a0, .LBB47_7 4783; RV32ZVE32F-NEXT: .LBB47_16: # %cond.store11 4784; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4785; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 4786; RV32ZVE32F-NEXT: vmv.x.s a0, v10 4787; RV32ZVE32F-NEXT: sw a4, 0(a0) 4788; RV32ZVE32F-NEXT: sw a5, 4(a0) 4789; RV32ZVE32F-NEXT: andi a0, t0, -128 4790; RV32ZVE32F-NEXT: bnez a0, .LBB47_8 4791; RV32ZVE32F-NEXT: j .LBB47_9 4792; 4793; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64: 4794; RV64ZVE32F: # %bb.0: 4795; RV64ZVE32F-NEXT: ld a4, 40(a0) 4796; RV64ZVE32F-NEXT: ld a3, 48(a0) 4797; RV64ZVE32F-NEXT: ld a2, 56(a0) 4798; RV64ZVE32F-NEXT: ld t1, 8(a0) 4799; RV64ZVE32F-NEXT: ld t0, 16(a0) 4800; RV64ZVE32F-NEXT: ld a7, 24(a0) 4801; RV64ZVE32F-NEXT: ld a6, 32(a0) 4802; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 4803; RV64ZVE32F-NEXT: vmv.x.s a5, v0 4804; RV64ZVE32F-NEXT: andi t2, a5, 1 4805; RV64ZVE32F-NEXT: beqz t2, .LBB47_2 4806; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 4807; RV64ZVE32F-NEXT: ld a0, 0(a0) 4808; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 4809; RV64ZVE32F-NEXT: vmv.x.s t2, v8 4810; RV64ZVE32F-NEXT: slli t2, t2, 48 4811; RV64ZVE32F-NEXT: srli t2, t2, 45 4812; RV64ZVE32F-NEXT: add t2, a1, t2 4813; RV64ZVE32F-NEXT: sd a0, 0(t2) 4814; RV64ZVE32F-NEXT: .LBB47_2: # %else 4815; RV64ZVE32F-NEXT: andi a0, a5, 2 4816; RV64ZVE32F-NEXT: beqz a0, .LBB47_4 4817; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 4818; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 4819; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 4820; RV64ZVE32F-NEXT: vmv.x.s a0, v9 4821; RV64ZVE32F-NEXT: slli a0, a0, 48 4822; RV64ZVE32F-NEXT: srli a0, a0, 45 4823; RV64ZVE32F-NEXT: add a0, a1, a0 4824; RV64ZVE32F-NEXT: sd t1, 0(a0) 4825; RV64ZVE32F-NEXT: .LBB47_4: # %else2 4826; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 4827; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 4828; RV64ZVE32F-NEXT: andi a0, a5, 4 4829; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 4830; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 4831; RV64ZVE32F-NEXT: bnez a0, .LBB47_12 4832; RV64ZVE32F-NEXT: # %bb.5: # %else4 4833; RV64ZVE32F-NEXT: andi a0, a5, 8 4834; RV64ZVE32F-NEXT: bnez a0, .LBB47_13 4835; RV64ZVE32F-NEXT: .LBB47_6: # %else6 4836; RV64ZVE32F-NEXT: andi a0, a5, 16 4837; RV64ZVE32F-NEXT: bnez a0, .LBB47_14 4838; RV64ZVE32F-NEXT: .LBB47_7: # %else8 4839; RV64ZVE32F-NEXT: andi a0, a5, 32 4840; RV64ZVE32F-NEXT: beqz a0, .LBB47_9 4841; RV64ZVE32F-NEXT: .LBB47_8: # %cond.store9 4842; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 4843; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4844; RV64ZVE32F-NEXT: slli a0, a0, 48 4845; RV64ZVE32F-NEXT: srli a0, a0, 45 4846; RV64ZVE32F-NEXT: add a0, a1, a0 4847; RV64ZVE32F-NEXT: sd a4, 0(a0) 4848; RV64ZVE32F-NEXT: .LBB47_9: # %else10 4849; RV64ZVE32F-NEXT: andi a0, a5, 64 4850; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 4851; RV64ZVE32F-NEXT: bnez a0, .LBB47_15 4852; RV64ZVE32F-NEXT: # %bb.10: # %else12 4853; RV64ZVE32F-NEXT: andi a0, a5, -128 4854; RV64ZVE32F-NEXT: bnez a0, .LBB47_16 4855; RV64ZVE32F-NEXT: .LBB47_11: # %else14 4856; RV64ZVE32F-NEXT: ret 4857; RV64ZVE32F-NEXT: .LBB47_12: # %cond.store3 4858; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4859; RV64ZVE32F-NEXT: slli a0, a0, 48 4860; RV64ZVE32F-NEXT: srli a0, a0, 45 4861; RV64ZVE32F-NEXT: add a0, a1, a0 4862; RV64ZVE32F-NEXT: sd t0, 0(a0) 4863; RV64ZVE32F-NEXT: andi a0, a5, 8 4864; RV64ZVE32F-NEXT: beqz a0, .LBB47_6 4865; RV64ZVE32F-NEXT: .LBB47_13: # %cond.store5 4866; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 4867; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4868; RV64ZVE32F-NEXT: slli a0, a0, 48 4869; RV64ZVE32F-NEXT: srli a0, a0, 45 4870; RV64ZVE32F-NEXT: add a0, a1, a0 4871; RV64ZVE32F-NEXT: sd a7, 0(a0) 4872; RV64ZVE32F-NEXT: andi a0, a5, 16 4873; RV64ZVE32F-NEXT: beqz a0, .LBB47_7 4874; RV64ZVE32F-NEXT: .LBB47_14: # %cond.store7 4875; RV64ZVE32F-NEXT: vmv.x.s a0, v9 4876; RV64ZVE32F-NEXT: slli a0, a0, 48 4877; RV64ZVE32F-NEXT: srli a0, a0, 45 4878; RV64ZVE32F-NEXT: add a0, a1, a0 4879; RV64ZVE32F-NEXT: sd a6, 0(a0) 4880; RV64ZVE32F-NEXT: andi a0, a5, 32 4881; RV64ZVE32F-NEXT: bnez a0, .LBB47_8 4882; RV64ZVE32F-NEXT: j .LBB47_9 4883; RV64ZVE32F-NEXT: .LBB47_15: # %cond.store11 4884; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4885; RV64ZVE32F-NEXT: slli a0, a0, 48 4886; RV64ZVE32F-NEXT: srli a0, a0, 45 4887; RV64ZVE32F-NEXT: add a0, a1, a0 4888; RV64ZVE32F-NEXT: sd a3, 0(a0) 4889; RV64ZVE32F-NEXT: andi a0, a5, -128 4890; RV64ZVE32F-NEXT: beqz a0, .LBB47_11 4891; RV64ZVE32F-NEXT: .LBB47_16: # %cond.store13 4892; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 4893; RV64ZVE32F-NEXT: vmv.x.s a0, v8 4894; RV64ZVE32F-NEXT: slli a0, a0, 48 4895; RV64ZVE32F-NEXT: srli a0, a0, 45 4896; RV64ZVE32F-NEXT: add a0, a1, a0 4897; RV64ZVE32F-NEXT: sd a2, 0(a0) 4898; RV64ZVE32F-NEXT: ret 4899 %eidxs = zext <8 x i16> %idxs to <8 x i64> 4900 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 4901 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 4902 ret void 4903} 4904 4905define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) { 4906; RV32V-LABEL: mscatter_baseidx_v8i32_v8i64: 4907; RV32V: # %bb.0: 4908; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 4909; RV32V-NEXT: vsll.vi v12, v12, 3 4910; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 4911; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 4912; RV32V-NEXT: ret 4913; 4914; RV64V-LABEL: mscatter_baseidx_v8i32_v8i64: 4915; RV64V: # %bb.0: 4916; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 4917; RV64V-NEXT: vsext.vf2 v16, v12 4918; RV64V-NEXT: vsll.vi v12, v16, 3 4919; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 4920; RV64V-NEXT: ret 4921; 4922; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64: 4923; RV32ZVE32F: # %bb.0: 4924; RV32ZVE32F-NEXT: addi sp, sp, -16 4925; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 4926; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 4927; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 4928; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill 4929; RV32ZVE32F-NEXT: .cfi_offset s0, -4 4930; RV32ZVE32F-NEXT: .cfi_offset s1, -8 4931; RV32ZVE32F-NEXT: .cfi_offset s2, -12 4932; RV32ZVE32F-NEXT: .cfi_remember_state 4933; RV32ZVE32F-NEXT: lw a2, 56(a0) 4934; RV32ZVE32F-NEXT: lw a3, 60(a0) 4935; RV32ZVE32F-NEXT: lw a6, 40(a0) 4936; RV32ZVE32F-NEXT: lw a7, 44(a0) 4937; RV32ZVE32F-NEXT: lw a4, 48(a0) 4938; RV32ZVE32F-NEXT: lw a5, 52(a0) 4939; RV32ZVE32F-NEXT: lw t3, 24(a0) 4940; RV32ZVE32F-NEXT: lw t4, 28(a0) 4941; RV32ZVE32F-NEXT: lw t1, 32(a0) 4942; RV32ZVE32F-NEXT: lw t2, 36(a0) 4943; RV32ZVE32F-NEXT: lw s0, 8(a0) 4944; RV32ZVE32F-NEXT: lw s1, 12(a0) 4945; RV32ZVE32F-NEXT: lw t5, 16(a0) 4946; RV32ZVE32F-NEXT: lw t6, 20(a0) 4947; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 4948; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 4949; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 4950; RV32ZVE32F-NEXT: vmv.x.s t0, v0 4951; RV32ZVE32F-NEXT: andi s2, t0, 1 4952; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4953; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 4954; RV32ZVE32F-NEXT: bnez s2, .LBB48_10 4955; RV32ZVE32F-NEXT: # %bb.1: # %else 4956; RV32ZVE32F-NEXT: andi a0, t0, 2 4957; RV32ZVE32F-NEXT: bnez a0, .LBB48_11 4958; RV32ZVE32F-NEXT: .LBB48_2: # %else2 4959; RV32ZVE32F-NEXT: andi a0, t0, 4 4960; RV32ZVE32F-NEXT: bnez a0, .LBB48_12 4961; RV32ZVE32F-NEXT: .LBB48_3: # %else4 4962; RV32ZVE32F-NEXT: andi a0, t0, 8 4963; RV32ZVE32F-NEXT: bnez a0, .LBB48_13 4964; RV32ZVE32F-NEXT: .LBB48_4: # %else6 4965; RV32ZVE32F-NEXT: andi a0, t0, 16 4966; RV32ZVE32F-NEXT: bnez a0, .LBB48_14 4967; RV32ZVE32F-NEXT: .LBB48_5: # %else8 4968; RV32ZVE32F-NEXT: andi a0, t0, 32 4969; RV32ZVE32F-NEXT: bnez a0, .LBB48_15 4970; RV32ZVE32F-NEXT: .LBB48_6: # %else10 4971; RV32ZVE32F-NEXT: andi a0, t0, 64 4972; RV32ZVE32F-NEXT: bnez a0, .LBB48_16 4973; RV32ZVE32F-NEXT: .LBB48_7: # %else12 4974; RV32ZVE32F-NEXT: andi a0, t0, -128 4975; RV32ZVE32F-NEXT: beqz a0, .LBB48_9 4976; RV32ZVE32F-NEXT: .LBB48_8: # %cond.store13 4977; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4978; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 4979; RV32ZVE32F-NEXT: vmv.x.s a0, v8 4980; RV32ZVE32F-NEXT: sw a2, 0(a0) 4981; RV32ZVE32F-NEXT: sw a3, 4(a0) 4982; RV32ZVE32F-NEXT: .LBB48_9: # %else14 4983; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 4984; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 4985; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload 4986; RV32ZVE32F-NEXT: .cfi_restore s0 4987; RV32ZVE32F-NEXT: .cfi_restore s1 4988; RV32ZVE32F-NEXT: .cfi_restore s2 4989; RV32ZVE32F-NEXT: addi sp, sp, 16 4990; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 4991; RV32ZVE32F-NEXT: ret 4992; RV32ZVE32F-NEXT: .LBB48_10: # %cond.store 4993; RV32ZVE32F-NEXT: .cfi_restore_state 4994; RV32ZVE32F-NEXT: lw a1, 0(a0) 4995; RV32ZVE32F-NEXT: lw a0, 4(a0) 4996; RV32ZVE32F-NEXT: vmv.x.s s2, v8 4997; RV32ZVE32F-NEXT: sw a1, 0(s2) 4998; RV32ZVE32F-NEXT: sw a0, 4(s2) 4999; RV32ZVE32F-NEXT: andi a0, t0, 2 5000; RV32ZVE32F-NEXT: beqz a0, .LBB48_2 5001; RV32ZVE32F-NEXT: .LBB48_11: # %cond.store1 5002; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5003; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 5004; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5005; RV32ZVE32F-NEXT: sw s0, 0(a0) 5006; RV32ZVE32F-NEXT: sw s1, 4(a0) 5007; RV32ZVE32F-NEXT: andi a0, t0, 4 5008; RV32ZVE32F-NEXT: beqz a0, .LBB48_3 5009; RV32ZVE32F-NEXT: .LBB48_12: # %cond.store3 5010; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5011; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 5012; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5013; RV32ZVE32F-NEXT: sw t5, 0(a0) 5014; RV32ZVE32F-NEXT: sw t6, 4(a0) 5015; RV32ZVE32F-NEXT: andi a0, t0, 8 5016; RV32ZVE32F-NEXT: beqz a0, .LBB48_4 5017; RV32ZVE32F-NEXT: .LBB48_13: # %cond.store5 5018; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5019; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 5020; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5021; RV32ZVE32F-NEXT: sw t3, 0(a0) 5022; RV32ZVE32F-NEXT: sw t4, 4(a0) 5023; RV32ZVE32F-NEXT: andi a0, t0, 16 5024; RV32ZVE32F-NEXT: beqz a0, .LBB48_5 5025; RV32ZVE32F-NEXT: .LBB48_14: # %cond.store7 5026; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5027; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 5028; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5029; RV32ZVE32F-NEXT: sw t1, 0(a0) 5030; RV32ZVE32F-NEXT: sw t2, 4(a0) 5031; RV32ZVE32F-NEXT: andi a0, t0, 32 5032; RV32ZVE32F-NEXT: beqz a0, .LBB48_6 5033; RV32ZVE32F-NEXT: .LBB48_15: # %cond.store9 5034; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5035; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 5036; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5037; RV32ZVE32F-NEXT: sw a6, 0(a0) 5038; RV32ZVE32F-NEXT: sw a7, 4(a0) 5039; RV32ZVE32F-NEXT: andi a0, t0, 64 5040; RV32ZVE32F-NEXT: beqz a0, .LBB48_7 5041; RV32ZVE32F-NEXT: .LBB48_16: # %cond.store11 5042; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5043; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 5044; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5045; RV32ZVE32F-NEXT: sw a4, 0(a0) 5046; RV32ZVE32F-NEXT: sw a5, 4(a0) 5047; RV32ZVE32F-NEXT: andi a0, t0, -128 5048; RV32ZVE32F-NEXT: bnez a0, .LBB48_8 5049; RV32ZVE32F-NEXT: j .LBB48_9 5050; 5051; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64: 5052; RV64ZVE32F: # %bb.0: 5053; RV64ZVE32F-NEXT: ld a4, 40(a0) 5054; RV64ZVE32F-NEXT: ld a3, 48(a0) 5055; RV64ZVE32F-NEXT: ld a2, 56(a0) 5056; RV64ZVE32F-NEXT: ld t1, 8(a0) 5057; RV64ZVE32F-NEXT: ld t0, 16(a0) 5058; RV64ZVE32F-NEXT: ld a7, 24(a0) 5059; RV64ZVE32F-NEXT: ld a6, 32(a0) 5060; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 5061; RV64ZVE32F-NEXT: vmv.x.s a5, v0 5062; RV64ZVE32F-NEXT: andi t2, a5, 1 5063; RV64ZVE32F-NEXT: beqz t2, .LBB48_2 5064; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 5065; RV64ZVE32F-NEXT: ld a0, 0(a0) 5066; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 5067; RV64ZVE32F-NEXT: vmv.x.s t2, v8 5068; RV64ZVE32F-NEXT: slli t2, t2, 3 5069; RV64ZVE32F-NEXT: add t2, a1, t2 5070; RV64ZVE32F-NEXT: sd a0, 0(t2) 5071; RV64ZVE32F-NEXT: .LBB48_2: # %else 5072; RV64ZVE32F-NEXT: andi a0, a5, 2 5073; RV64ZVE32F-NEXT: beqz a0, .LBB48_4 5074; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 5075; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5076; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 5077; RV64ZVE32F-NEXT: vmv.x.s a0, v10 5078; RV64ZVE32F-NEXT: slli a0, a0, 3 5079; RV64ZVE32F-NEXT: add a0, a1, a0 5080; RV64ZVE32F-NEXT: sd t1, 0(a0) 5081; RV64ZVE32F-NEXT: .LBB48_4: # %else2 5082; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma 5083; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 5084; RV64ZVE32F-NEXT: andi a0, a5, 4 5085; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 5086; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 5087; RV64ZVE32F-NEXT: bnez a0, .LBB48_12 5088; RV64ZVE32F-NEXT: # %bb.5: # %else4 5089; RV64ZVE32F-NEXT: andi a0, a5, 8 5090; RV64ZVE32F-NEXT: bnez a0, .LBB48_13 5091; RV64ZVE32F-NEXT: .LBB48_6: # %else6 5092; RV64ZVE32F-NEXT: andi a0, a5, 16 5093; RV64ZVE32F-NEXT: bnez a0, .LBB48_14 5094; RV64ZVE32F-NEXT: .LBB48_7: # %else8 5095; RV64ZVE32F-NEXT: andi a0, a5, 32 5096; RV64ZVE32F-NEXT: beqz a0, .LBB48_9 5097; RV64ZVE32F-NEXT: .LBB48_8: # %cond.store9 5098; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 5099; RV64ZVE32F-NEXT: vmv.x.s a0, v8 5100; RV64ZVE32F-NEXT: slli a0, a0, 3 5101; RV64ZVE32F-NEXT: add a0, a1, a0 5102; RV64ZVE32F-NEXT: sd a4, 0(a0) 5103; RV64ZVE32F-NEXT: .LBB48_9: # %else10 5104; RV64ZVE32F-NEXT: andi a0, a5, 64 5105; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 5106; RV64ZVE32F-NEXT: bnez a0, .LBB48_15 5107; RV64ZVE32F-NEXT: # %bb.10: # %else12 5108; RV64ZVE32F-NEXT: andi a0, a5, -128 5109; RV64ZVE32F-NEXT: bnez a0, .LBB48_16 5110; RV64ZVE32F-NEXT: .LBB48_11: # %else14 5111; RV64ZVE32F-NEXT: ret 5112; RV64ZVE32F-NEXT: .LBB48_12: # %cond.store3 5113; RV64ZVE32F-NEXT: vmv.x.s a0, v8 5114; RV64ZVE32F-NEXT: slli a0, a0, 3 5115; RV64ZVE32F-NEXT: add a0, a1, a0 5116; RV64ZVE32F-NEXT: sd t0, 0(a0) 5117; RV64ZVE32F-NEXT: andi a0, a5, 8 5118; RV64ZVE32F-NEXT: beqz a0, .LBB48_6 5119; RV64ZVE32F-NEXT: .LBB48_13: # %cond.store5 5120; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 5121; RV64ZVE32F-NEXT: vmv.x.s a0, v8 5122; RV64ZVE32F-NEXT: slli a0, a0, 3 5123; RV64ZVE32F-NEXT: add a0, a1, a0 5124; RV64ZVE32F-NEXT: sd a7, 0(a0) 5125; RV64ZVE32F-NEXT: andi a0, a5, 16 5126; RV64ZVE32F-NEXT: beqz a0, .LBB48_7 5127; RV64ZVE32F-NEXT: .LBB48_14: # %cond.store7 5128; RV64ZVE32F-NEXT: vmv.x.s a0, v10 5129; RV64ZVE32F-NEXT: slli a0, a0, 3 5130; RV64ZVE32F-NEXT: add a0, a1, a0 5131; RV64ZVE32F-NEXT: sd a6, 0(a0) 5132; RV64ZVE32F-NEXT: andi a0, a5, 32 5133; RV64ZVE32F-NEXT: bnez a0, .LBB48_8 5134; RV64ZVE32F-NEXT: j .LBB48_9 5135; RV64ZVE32F-NEXT: .LBB48_15: # %cond.store11 5136; RV64ZVE32F-NEXT: vmv.x.s a0, v8 5137; RV64ZVE32F-NEXT: slli a0, a0, 3 5138; RV64ZVE32F-NEXT: add a0, a1, a0 5139; RV64ZVE32F-NEXT: sd a3, 0(a0) 5140; RV64ZVE32F-NEXT: andi a0, a5, -128 5141; RV64ZVE32F-NEXT: beqz a0, .LBB48_11 5142; RV64ZVE32F-NEXT: .LBB48_16: # %cond.store13 5143; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 5144; RV64ZVE32F-NEXT: vmv.x.s a0, v8 5145; RV64ZVE32F-NEXT: slli a0, a0, 3 5146; RV64ZVE32F-NEXT: add a0, a1, a0 5147; RV64ZVE32F-NEXT: sd a2, 0(a0) 5148; RV64ZVE32F-NEXT: ret 5149 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs 5150 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 5151 ret void 5152} 5153 5154define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) { 5155; RV32V-LABEL: mscatter_baseidx_sext_v8i32_v8i64: 5156; RV32V: # %bb.0: 5157; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 5158; RV32V-NEXT: vsll.vi v12, v12, 3 5159; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 5160; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 5161; RV32V-NEXT: ret 5162; 5163; RV64V-LABEL: mscatter_baseidx_sext_v8i32_v8i64: 5164; RV64V: # %bb.0: 5165; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 5166; RV64V-NEXT: vsext.vf2 v16, v12 5167; RV64V-NEXT: vsll.vi v12, v16, 3 5168; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 5169; RV64V-NEXT: ret 5170; 5171; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64: 5172; RV32ZVE32F: # %bb.0: 5173; RV32ZVE32F-NEXT: addi sp, sp, -16 5174; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 5175; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 5176; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 5177; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill 5178; RV32ZVE32F-NEXT: .cfi_offset s0, -4 5179; RV32ZVE32F-NEXT: .cfi_offset s1, -8 5180; RV32ZVE32F-NEXT: .cfi_offset s2, -12 5181; RV32ZVE32F-NEXT: .cfi_remember_state 5182; RV32ZVE32F-NEXT: lw a2, 56(a0) 5183; RV32ZVE32F-NEXT: lw a3, 60(a0) 5184; RV32ZVE32F-NEXT: lw a6, 40(a0) 5185; RV32ZVE32F-NEXT: lw a7, 44(a0) 5186; RV32ZVE32F-NEXT: lw a4, 48(a0) 5187; RV32ZVE32F-NEXT: lw a5, 52(a0) 5188; RV32ZVE32F-NEXT: lw t3, 24(a0) 5189; RV32ZVE32F-NEXT: lw t4, 28(a0) 5190; RV32ZVE32F-NEXT: lw t1, 32(a0) 5191; RV32ZVE32F-NEXT: lw t2, 36(a0) 5192; RV32ZVE32F-NEXT: lw s0, 8(a0) 5193; RV32ZVE32F-NEXT: lw s1, 12(a0) 5194; RV32ZVE32F-NEXT: lw t5, 16(a0) 5195; RV32ZVE32F-NEXT: lw t6, 20(a0) 5196; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 5197; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 5198; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 5199; RV32ZVE32F-NEXT: vmv.x.s t0, v0 5200; RV32ZVE32F-NEXT: andi s2, t0, 1 5201; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 5202; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 5203; RV32ZVE32F-NEXT: bnez s2, .LBB49_10 5204; RV32ZVE32F-NEXT: # %bb.1: # %else 5205; RV32ZVE32F-NEXT: andi a0, t0, 2 5206; RV32ZVE32F-NEXT: bnez a0, .LBB49_11 5207; RV32ZVE32F-NEXT: .LBB49_2: # %else2 5208; RV32ZVE32F-NEXT: andi a0, t0, 4 5209; RV32ZVE32F-NEXT: bnez a0, .LBB49_12 5210; RV32ZVE32F-NEXT: .LBB49_3: # %else4 5211; RV32ZVE32F-NEXT: andi a0, t0, 8 5212; RV32ZVE32F-NEXT: bnez a0, .LBB49_13 5213; RV32ZVE32F-NEXT: .LBB49_4: # %else6 5214; RV32ZVE32F-NEXT: andi a0, t0, 16 5215; RV32ZVE32F-NEXT: bnez a0, .LBB49_14 5216; RV32ZVE32F-NEXT: .LBB49_5: # %else8 5217; RV32ZVE32F-NEXT: andi a0, t0, 32 5218; RV32ZVE32F-NEXT: bnez a0, .LBB49_15 5219; RV32ZVE32F-NEXT: .LBB49_6: # %else10 5220; RV32ZVE32F-NEXT: andi a0, t0, 64 5221; RV32ZVE32F-NEXT: bnez a0, .LBB49_16 5222; RV32ZVE32F-NEXT: .LBB49_7: # %else12 5223; RV32ZVE32F-NEXT: andi a0, t0, -128 5224; RV32ZVE32F-NEXT: beqz a0, .LBB49_9 5225; RV32ZVE32F-NEXT: .LBB49_8: # %cond.store13 5226; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5227; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 5228; RV32ZVE32F-NEXT: vmv.x.s a0, v8 5229; RV32ZVE32F-NEXT: sw a2, 0(a0) 5230; RV32ZVE32F-NEXT: sw a3, 4(a0) 5231; RV32ZVE32F-NEXT: .LBB49_9: # %else14 5232; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 5233; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 5234; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload 5235; RV32ZVE32F-NEXT: .cfi_restore s0 5236; RV32ZVE32F-NEXT: .cfi_restore s1 5237; RV32ZVE32F-NEXT: .cfi_restore s2 5238; RV32ZVE32F-NEXT: addi sp, sp, 16 5239; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 5240; RV32ZVE32F-NEXT: ret 5241; RV32ZVE32F-NEXT: .LBB49_10: # %cond.store 5242; RV32ZVE32F-NEXT: .cfi_restore_state 5243; RV32ZVE32F-NEXT: lw a1, 0(a0) 5244; RV32ZVE32F-NEXT: lw a0, 4(a0) 5245; RV32ZVE32F-NEXT: vmv.x.s s2, v8 5246; RV32ZVE32F-NEXT: sw a1, 0(s2) 5247; RV32ZVE32F-NEXT: sw a0, 4(s2) 5248; RV32ZVE32F-NEXT: andi a0, t0, 2 5249; RV32ZVE32F-NEXT: beqz a0, .LBB49_2 5250; RV32ZVE32F-NEXT: .LBB49_11: # %cond.store1 5251; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5252; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 5253; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5254; RV32ZVE32F-NEXT: sw s0, 0(a0) 5255; RV32ZVE32F-NEXT: sw s1, 4(a0) 5256; RV32ZVE32F-NEXT: andi a0, t0, 4 5257; RV32ZVE32F-NEXT: beqz a0, .LBB49_3 5258; RV32ZVE32F-NEXT: .LBB49_12: # %cond.store3 5259; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5260; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 5261; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5262; RV32ZVE32F-NEXT: sw t5, 0(a0) 5263; RV32ZVE32F-NEXT: sw t6, 4(a0) 5264; RV32ZVE32F-NEXT: andi a0, t0, 8 5265; RV32ZVE32F-NEXT: beqz a0, .LBB49_4 5266; RV32ZVE32F-NEXT: .LBB49_13: # %cond.store5 5267; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5268; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 5269; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5270; RV32ZVE32F-NEXT: sw t3, 0(a0) 5271; RV32ZVE32F-NEXT: sw t4, 4(a0) 5272; RV32ZVE32F-NEXT: andi a0, t0, 16 5273; RV32ZVE32F-NEXT: beqz a0, .LBB49_5 5274; RV32ZVE32F-NEXT: .LBB49_14: # %cond.store7 5275; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5276; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 5277; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5278; RV32ZVE32F-NEXT: sw t1, 0(a0) 5279; RV32ZVE32F-NEXT: sw t2, 4(a0) 5280; RV32ZVE32F-NEXT: andi a0, t0, 32 5281; RV32ZVE32F-NEXT: beqz a0, .LBB49_6 5282; RV32ZVE32F-NEXT: .LBB49_15: # %cond.store9 5283; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5284; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 5285; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5286; RV32ZVE32F-NEXT: sw a6, 0(a0) 5287; RV32ZVE32F-NEXT: sw a7, 4(a0) 5288; RV32ZVE32F-NEXT: andi a0, t0, 64 5289; RV32ZVE32F-NEXT: beqz a0, .LBB49_7 5290; RV32ZVE32F-NEXT: .LBB49_16: # %cond.store11 5291; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5292; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 5293; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5294; RV32ZVE32F-NEXT: sw a4, 0(a0) 5295; RV32ZVE32F-NEXT: sw a5, 4(a0) 5296; RV32ZVE32F-NEXT: andi a0, t0, -128 5297; RV32ZVE32F-NEXT: bnez a0, .LBB49_8 5298; RV32ZVE32F-NEXT: j .LBB49_9 5299; 5300; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64: 5301; RV64ZVE32F: # %bb.0: 5302; RV64ZVE32F-NEXT: ld a4, 40(a0) 5303; RV64ZVE32F-NEXT: ld a3, 48(a0) 5304; RV64ZVE32F-NEXT: ld a2, 56(a0) 5305; RV64ZVE32F-NEXT: ld t1, 8(a0) 5306; RV64ZVE32F-NEXT: ld t0, 16(a0) 5307; RV64ZVE32F-NEXT: ld a7, 24(a0) 5308; RV64ZVE32F-NEXT: ld a6, 32(a0) 5309; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 5310; RV64ZVE32F-NEXT: vmv.x.s a5, v0 5311; RV64ZVE32F-NEXT: andi t2, a5, 1 5312; RV64ZVE32F-NEXT: beqz t2, .LBB49_2 5313; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 5314; RV64ZVE32F-NEXT: ld a0, 0(a0) 5315; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 5316; RV64ZVE32F-NEXT: vmv.x.s t2, v8 5317; RV64ZVE32F-NEXT: slli t2, t2, 3 5318; RV64ZVE32F-NEXT: add t2, a1, t2 5319; RV64ZVE32F-NEXT: sd a0, 0(t2) 5320; RV64ZVE32F-NEXT: .LBB49_2: # %else 5321; RV64ZVE32F-NEXT: andi a0, a5, 2 5322; RV64ZVE32F-NEXT: beqz a0, .LBB49_4 5323; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 5324; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5325; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 5326; RV64ZVE32F-NEXT: vmv.x.s a0, v10 5327; RV64ZVE32F-NEXT: slli a0, a0, 3 5328; RV64ZVE32F-NEXT: add a0, a1, a0 5329; RV64ZVE32F-NEXT: sd t1, 0(a0) 5330; RV64ZVE32F-NEXT: .LBB49_4: # %else2 5331; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma 5332; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 5333; RV64ZVE32F-NEXT: andi a0, a5, 4 5334; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 5335; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 5336; RV64ZVE32F-NEXT: bnez a0, .LBB49_12 5337; RV64ZVE32F-NEXT: # %bb.5: # %else4 5338; RV64ZVE32F-NEXT: andi a0, a5, 8 5339; RV64ZVE32F-NEXT: bnez a0, .LBB49_13 5340; RV64ZVE32F-NEXT: .LBB49_6: # %else6 5341; RV64ZVE32F-NEXT: andi a0, a5, 16 5342; RV64ZVE32F-NEXT: bnez a0, .LBB49_14 5343; RV64ZVE32F-NEXT: .LBB49_7: # %else8 5344; RV64ZVE32F-NEXT: andi a0, a5, 32 5345; RV64ZVE32F-NEXT: beqz a0, .LBB49_9 5346; RV64ZVE32F-NEXT: .LBB49_8: # %cond.store9 5347; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 5348; RV64ZVE32F-NEXT: vmv.x.s a0, v8 5349; RV64ZVE32F-NEXT: slli a0, a0, 3 5350; RV64ZVE32F-NEXT: add a0, a1, a0 5351; RV64ZVE32F-NEXT: sd a4, 0(a0) 5352; RV64ZVE32F-NEXT: .LBB49_9: # %else10 5353; RV64ZVE32F-NEXT: andi a0, a5, 64 5354; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 5355; RV64ZVE32F-NEXT: bnez a0, .LBB49_15 5356; RV64ZVE32F-NEXT: # %bb.10: # %else12 5357; RV64ZVE32F-NEXT: andi a0, a5, -128 5358; RV64ZVE32F-NEXT: bnez a0, .LBB49_16 5359; RV64ZVE32F-NEXT: .LBB49_11: # %else14 5360; RV64ZVE32F-NEXT: ret 5361; RV64ZVE32F-NEXT: .LBB49_12: # %cond.store3 5362; RV64ZVE32F-NEXT: vmv.x.s a0, v8 5363; RV64ZVE32F-NEXT: slli a0, a0, 3 5364; RV64ZVE32F-NEXT: add a0, a1, a0 5365; RV64ZVE32F-NEXT: sd t0, 0(a0) 5366; RV64ZVE32F-NEXT: andi a0, a5, 8 5367; RV64ZVE32F-NEXT: beqz a0, .LBB49_6 5368; RV64ZVE32F-NEXT: .LBB49_13: # %cond.store5 5369; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 5370; RV64ZVE32F-NEXT: vmv.x.s a0, v8 5371; RV64ZVE32F-NEXT: slli a0, a0, 3 5372; RV64ZVE32F-NEXT: add a0, a1, a0 5373; RV64ZVE32F-NEXT: sd a7, 0(a0) 5374; RV64ZVE32F-NEXT: andi a0, a5, 16 5375; RV64ZVE32F-NEXT: beqz a0, .LBB49_7 5376; RV64ZVE32F-NEXT: .LBB49_14: # %cond.store7 5377; RV64ZVE32F-NEXT: vmv.x.s a0, v10 5378; RV64ZVE32F-NEXT: slli a0, a0, 3 5379; RV64ZVE32F-NEXT: add a0, a1, a0 5380; RV64ZVE32F-NEXT: sd a6, 0(a0) 5381; RV64ZVE32F-NEXT: andi a0, a5, 32 5382; RV64ZVE32F-NEXT: bnez a0, .LBB49_8 5383; RV64ZVE32F-NEXT: j .LBB49_9 5384; RV64ZVE32F-NEXT: .LBB49_15: # %cond.store11 5385; RV64ZVE32F-NEXT: vmv.x.s a0, v8 5386; RV64ZVE32F-NEXT: slli a0, a0, 3 5387; RV64ZVE32F-NEXT: add a0, a1, a0 5388; RV64ZVE32F-NEXT: sd a3, 0(a0) 5389; RV64ZVE32F-NEXT: andi a0, a5, -128 5390; RV64ZVE32F-NEXT: beqz a0, .LBB49_11 5391; RV64ZVE32F-NEXT: .LBB49_16: # %cond.store13 5392; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 5393; RV64ZVE32F-NEXT: vmv.x.s a0, v8 5394; RV64ZVE32F-NEXT: slli a0, a0, 3 5395; RV64ZVE32F-NEXT: add a0, a1, a0 5396; RV64ZVE32F-NEXT: sd a2, 0(a0) 5397; RV64ZVE32F-NEXT: ret 5398 %eidxs = sext <8 x i32> %idxs to <8 x i64> 5399 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 5400 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 5401 ret void 5402} 5403 5404define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) { 5405; RV32V-LABEL: mscatter_baseidx_zext_v8i32_v8i64: 5406; RV32V: # %bb.0: 5407; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 5408; RV32V-NEXT: vsll.vi v12, v12, 3 5409; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 5410; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 5411; RV32V-NEXT: ret 5412; 5413; RV64V-LABEL: mscatter_baseidx_zext_v8i32_v8i64: 5414; RV64V: # %bb.0: 5415; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 5416; RV64V-NEXT: vzext.vf2 v16, v12 5417; RV64V-NEXT: vsll.vi v12, v16, 3 5418; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 5419; RV64V-NEXT: ret 5420; 5421; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64: 5422; RV32ZVE32F: # %bb.0: 5423; RV32ZVE32F-NEXT: addi sp, sp, -16 5424; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 5425; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 5426; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 5427; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill 5428; RV32ZVE32F-NEXT: .cfi_offset s0, -4 5429; RV32ZVE32F-NEXT: .cfi_offset s1, -8 5430; RV32ZVE32F-NEXT: .cfi_offset s2, -12 5431; RV32ZVE32F-NEXT: .cfi_remember_state 5432; RV32ZVE32F-NEXT: lw a2, 56(a0) 5433; RV32ZVE32F-NEXT: lw a3, 60(a0) 5434; RV32ZVE32F-NEXT: lw a6, 40(a0) 5435; RV32ZVE32F-NEXT: lw a7, 44(a0) 5436; RV32ZVE32F-NEXT: lw a4, 48(a0) 5437; RV32ZVE32F-NEXT: lw a5, 52(a0) 5438; RV32ZVE32F-NEXT: lw t3, 24(a0) 5439; RV32ZVE32F-NEXT: lw t4, 28(a0) 5440; RV32ZVE32F-NEXT: lw t1, 32(a0) 5441; RV32ZVE32F-NEXT: lw t2, 36(a0) 5442; RV32ZVE32F-NEXT: lw s0, 8(a0) 5443; RV32ZVE32F-NEXT: lw s1, 12(a0) 5444; RV32ZVE32F-NEXT: lw t5, 16(a0) 5445; RV32ZVE32F-NEXT: lw t6, 20(a0) 5446; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 5447; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 5448; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 5449; RV32ZVE32F-NEXT: vmv.x.s t0, v0 5450; RV32ZVE32F-NEXT: andi s2, t0, 1 5451; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 5452; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 5453; RV32ZVE32F-NEXT: bnez s2, .LBB50_10 5454; RV32ZVE32F-NEXT: # %bb.1: # %else 5455; RV32ZVE32F-NEXT: andi a0, t0, 2 5456; RV32ZVE32F-NEXT: bnez a0, .LBB50_11 5457; RV32ZVE32F-NEXT: .LBB50_2: # %else2 5458; RV32ZVE32F-NEXT: andi a0, t0, 4 5459; RV32ZVE32F-NEXT: bnez a0, .LBB50_12 5460; RV32ZVE32F-NEXT: .LBB50_3: # %else4 5461; RV32ZVE32F-NEXT: andi a0, t0, 8 5462; RV32ZVE32F-NEXT: bnez a0, .LBB50_13 5463; RV32ZVE32F-NEXT: .LBB50_4: # %else6 5464; RV32ZVE32F-NEXT: andi a0, t0, 16 5465; RV32ZVE32F-NEXT: bnez a0, .LBB50_14 5466; RV32ZVE32F-NEXT: .LBB50_5: # %else8 5467; RV32ZVE32F-NEXT: andi a0, t0, 32 5468; RV32ZVE32F-NEXT: bnez a0, .LBB50_15 5469; RV32ZVE32F-NEXT: .LBB50_6: # %else10 5470; RV32ZVE32F-NEXT: andi a0, t0, 64 5471; RV32ZVE32F-NEXT: bnez a0, .LBB50_16 5472; RV32ZVE32F-NEXT: .LBB50_7: # %else12 5473; RV32ZVE32F-NEXT: andi a0, t0, -128 5474; RV32ZVE32F-NEXT: beqz a0, .LBB50_9 5475; RV32ZVE32F-NEXT: .LBB50_8: # %cond.store13 5476; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5477; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 5478; RV32ZVE32F-NEXT: vmv.x.s a0, v8 5479; RV32ZVE32F-NEXT: sw a2, 0(a0) 5480; RV32ZVE32F-NEXT: sw a3, 4(a0) 5481; RV32ZVE32F-NEXT: .LBB50_9: # %else14 5482; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 5483; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 5484; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload 5485; RV32ZVE32F-NEXT: .cfi_restore s0 5486; RV32ZVE32F-NEXT: .cfi_restore s1 5487; RV32ZVE32F-NEXT: .cfi_restore s2 5488; RV32ZVE32F-NEXT: addi sp, sp, 16 5489; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 5490; RV32ZVE32F-NEXT: ret 5491; RV32ZVE32F-NEXT: .LBB50_10: # %cond.store 5492; RV32ZVE32F-NEXT: .cfi_restore_state 5493; RV32ZVE32F-NEXT: lw a1, 0(a0) 5494; RV32ZVE32F-NEXT: lw a0, 4(a0) 5495; RV32ZVE32F-NEXT: vmv.x.s s2, v8 5496; RV32ZVE32F-NEXT: sw a1, 0(s2) 5497; RV32ZVE32F-NEXT: sw a0, 4(s2) 5498; RV32ZVE32F-NEXT: andi a0, t0, 2 5499; RV32ZVE32F-NEXT: beqz a0, .LBB50_2 5500; RV32ZVE32F-NEXT: .LBB50_11: # %cond.store1 5501; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5502; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 5503; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5504; RV32ZVE32F-NEXT: sw s0, 0(a0) 5505; RV32ZVE32F-NEXT: sw s1, 4(a0) 5506; RV32ZVE32F-NEXT: andi a0, t0, 4 5507; RV32ZVE32F-NEXT: beqz a0, .LBB50_3 5508; RV32ZVE32F-NEXT: .LBB50_12: # %cond.store3 5509; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5510; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 5511; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5512; RV32ZVE32F-NEXT: sw t5, 0(a0) 5513; RV32ZVE32F-NEXT: sw t6, 4(a0) 5514; RV32ZVE32F-NEXT: andi a0, t0, 8 5515; RV32ZVE32F-NEXT: beqz a0, .LBB50_4 5516; RV32ZVE32F-NEXT: .LBB50_13: # %cond.store5 5517; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5518; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 5519; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5520; RV32ZVE32F-NEXT: sw t3, 0(a0) 5521; RV32ZVE32F-NEXT: sw t4, 4(a0) 5522; RV32ZVE32F-NEXT: andi a0, t0, 16 5523; RV32ZVE32F-NEXT: beqz a0, .LBB50_5 5524; RV32ZVE32F-NEXT: .LBB50_14: # %cond.store7 5525; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5526; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 5527; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5528; RV32ZVE32F-NEXT: sw t1, 0(a0) 5529; RV32ZVE32F-NEXT: sw t2, 4(a0) 5530; RV32ZVE32F-NEXT: andi a0, t0, 32 5531; RV32ZVE32F-NEXT: beqz a0, .LBB50_6 5532; RV32ZVE32F-NEXT: .LBB50_15: # %cond.store9 5533; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5534; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 5535; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5536; RV32ZVE32F-NEXT: sw a6, 0(a0) 5537; RV32ZVE32F-NEXT: sw a7, 4(a0) 5538; RV32ZVE32F-NEXT: andi a0, t0, 64 5539; RV32ZVE32F-NEXT: beqz a0, .LBB50_7 5540; RV32ZVE32F-NEXT: .LBB50_16: # %cond.store11 5541; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5542; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 5543; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5544; RV32ZVE32F-NEXT: sw a4, 0(a0) 5545; RV32ZVE32F-NEXT: sw a5, 4(a0) 5546; RV32ZVE32F-NEXT: andi a0, t0, -128 5547; RV32ZVE32F-NEXT: bnez a0, .LBB50_8 5548; RV32ZVE32F-NEXT: j .LBB50_9 5549; 5550; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64: 5551; RV64ZVE32F: # %bb.0: 5552; RV64ZVE32F-NEXT: ld a4, 40(a0) 5553; RV64ZVE32F-NEXT: ld a3, 48(a0) 5554; RV64ZVE32F-NEXT: ld a2, 56(a0) 5555; RV64ZVE32F-NEXT: ld t1, 8(a0) 5556; RV64ZVE32F-NEXT: ld t0, 16(a0) 5557; RV64ZVE32F-NEXT: ld a7, 24(a0) 5558; RV64ZVE32F-NEXT: ld a6, 32(a0) 5559; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 5560; RV64ZVE32F-NEXT: vmv.x.s a5, v0 5561; RV64ZVE32F-NEXT: andi t2, a5, 1 5562; RV64ZVE32F-NEXT: beqz t2, .LBB50_2 5563; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 5564; RV64ZVE32F-NEXT: ld a0, 0(a0) 5565; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 5566; RV64ZVE32F-NEXT: vmv.x.s t2, v8 5567; RV64ZVE32F-NEXT: slli t2, t2, 32 5568; RV64ZVE32F-NEXT: srli t2, t2, 29 5569; RV64ZVE32F-NEXT: add t2, a1, t2 5570; RV64ZVE32F-NEXT: sd a0, 0(t2) 5571; RV64ZVE32F-NEXT: .LBB50_2: # %else 5572; RV64ZVE32F-NEXT: andi a0, a5, 2 5573; RV64ZVE32F-NEXT: beqz a0, .LBB50_4 5574; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 5575; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5576; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 5577; RV64ZVE32F-NEXT: vmv.x.s a0, v10 5578; RV64ZVE32F-NEXT: slli a0, a0, 32 5579; RV64ZVE32F-NEXT: srli a0, a0, 29 5580; RV64ZVE32F-NEXT: add a0, a1, a0 5581; RV64ZVE32F-NEXT: sd t1, 0(a0) 5582; RV64ZVE32F-NEXT: .LBB50_4: # %else2 5583; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma 5584; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 5585; RV64ZVE32F-NEXT: andi a0, a5, 4 5586; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 5587; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 5588; RV64ZVE32F-NEXT: bnez a0, .LBB50_12 5589; RV64ZVE32F-NEXT: # %bb.5: # %else4 5590; RV64ZVE32F-NEXT: andi a0, a5, 8 5591; RV64ZVE32F-NEXT: bnez a0, .LBB50_13 5592; RV64ZVE32F-NEXT: .LBB50_6: # %else6 5593; RV64ZVE32F-NEXT: andi a0, a5, 16 5594; RV64ZVE32F-NEXT: bnez a0, .LBB50_14 5595; RV64ZVE32F-NEXT: .LBB50_7: # %else8 5596; RV64ZVE32F-NEXT: andi a0, a5, 32 5597; RV64ZVE32F-NEXT: beqz a0, .LBB50_9 5598; RV64ZVE32F-NEXT: .LBB50_8: # %cond.store9 5599; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 5600; RV64ZVE32F-NEXT: vmv.x.s a0, v8 5601; RV64ZVE32F-NEXT: slli a0, a0, 32 5602; RV64ZVE32F-NEXT: srli a0, a0, 29 5603; RV64ZVE32F-NEXT: add a0, a1, a0 5604; RV64ZVE32F-NEXT: sd a4, 0(a0) 5605; RV64ZVE32F-NEXT: .LBB50_9: # %else10 5606; RV64ZVE32F-NEXT: andi a0, a5, 64 5607; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 5608; RV64ZVE32F-NEXT: bnez a0, .LBB50_15 5609; RV64ZVE32F-NEXT: # %bb.10: # %else12 5610; RV64ZVE32F-NEXT: andi a0, a5, -128 5611; RV64ZVE32F-NEXT: bnez a0, .LBB50_16 5612; RV64ZVE32F-NEXT: .LBB50_11: # %else14 5613; RV64ZVE32F-NEXT: ret 5614; RV64ZVE32F-NEXT: .LBB50_12: # %cond.store3 5615; RV64ZVE32F-NEXT: vmv.x.s a0, v8 5616; RV64ZVE32F-NEXT: slli a0, a0, 32 5617; RV64ZVE32F-NEXT: srli a0, a0, 29 5618; RV64ZVE32F-NEXT: add a0, a1, a0 5619; RV64ZVE32F-NEXT: sd t0, 0(a0) 5620; RV64ZVE32F-NEXT: andi a0, a5, 8 5621; RV64ZVE32F-NEXT: beqz a0, .LBB50_6 5622; RV64ZVE32F-NEXT: .LBB50_13: # %cond.store5 5623; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 5624; RV64ZVE32F-NEXT: vmv.x.s a0, v8 5625; RV64ZVE32F-NEXT: slli a0, a0, 32 5626; RV64ZVE32F-NEXT: srli a0, a0, 29 5627; RV64ZVE32F-NEXT: add a0, a1, a0 5628; RV64ZVE32F-NEXT: sd a7, 0(a0) 5629; RV64ZVE32F-NEXT: andi a0, a5, 16 5630; RV64ZVE32F-NEXT: beqz a0, .LBB50_7 5631; RV64ZVE32F-NEXT: .LBB50_14: # %cond.store7 5632; RV64ZVE32F-NEXT: vmv.x.s a0, v10 5633; RV64ZVE32F-NEXT: slli a0, a0, 32 5634; RV64ZVE32F-NEXT: srli a0, a0, 29 5635; RV64ZVE32F-NEXT: add a0, a1, a0 5636; RV64ZVE32F-NEXT: sd a6, 0(a0) 5637; RV64ZVE32F-NEXT: andi a0, a5, 32 5638; RV64ZVE32F-NEXT: bnez a0, .LBB50_8 5639; RV64ZVE32F-NEXT: j .LBB50_9 5640; RV64ZVE32F-NEXT: .LBB50_15: # %cond.store11 5641; RV64ZVE32F-NEXT: vmv.x.s a0, v8 5642; RV64ZVE32F-NEXT: slli a0, a0, 32 5643; RV64ZVE32F-NEXT: srli a0, a0, 29 5644; RV64ZVE32F-NEXT: add a0, a1, a0 5645; RV64ZVE32F-NEXT: sd a3, 0(a0) 5646; RV64ZVE32F-NEXT: andi a0, a5, -128 5647; RV64ZVE32F-NEXT: beqz a0, .LBB50_11 5648; RV64ZVE32F-NEXT: .LBB50_16: # %cond.store13 5649; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 5650; RV64ZVE32F-NEXT: vmv.x.s a0, v8 5651; RV64ZVE32F-NEXT: slli a0, a0, 32 5652; RV64ZVE32F-NEXT: srli a0, a0, 29 5653; RV64ZVE32F-NEXT: add a0, a1, a0 5654; RV64ZVE32F-NEXT: sd a2, 0(a0) 5655; RV64ZVE32F-NEXT: ret 5656 %eidxs = zext <8 x i32> %idxs to <8 x i64> 5657 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 5658 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 5659 ret void 5660} 5661 5662define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m) { 5663; RV32V-LABEL: mscatter_baseidx_v8i64: 5664; RV32V: # %bb.0: 5665; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 5666; RV32V-NEXT: vnsrl.wi v16, v12, 0 5667; RV32V-NEXT: vsll.vi v12, v16, 3 5668; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 5669; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 5670; RV32V-NEXT: ret 5671; 5672; RV64V-LABEL: mscatter_baseidx_v8i64: 5673; RV64V: # %bb.0: 5674; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 5675; RV64V-NEXT: vsll.vi v12, v12, 3 5676; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 5677; RV64V-NEXT: ret 5678; 5679; RV32ZVE32F-LABEL: mscatter_baseidx_v8i64: 5680; RV32ZVE32F: # %bb.0: 5681; RV32ZVE32F-NEXT: addi sp, sp, -48 5682; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 48 5683; RV32ZVE32F-NEXT: sw s0, 44(sp) # 4-byte Folded Spill 5684; RV32ZVE32F-NEXT: sw s1, 40(sp) # 4-byte Folded Spill 5685; RV32ZVE32F-NEXT: sw s2, 36(sp) # 4-byte Folded Spill 5686; RV32ZVE32F-NEXT: sw s3, 32(sp) # 4-byte Folded Spill 5687; RV32ZVE32F-NEXT: sw s4, 28(sp) # 4-byte Folded Spill 5688; RV32ZVE32F-NEXT: sw s5, 24(sp) # 4-byte Folded Spill 5689; RV32ZVE32F-NEXT: sw s6, 20(sp) # 4-byte Folded Spill 5690; RV32ZVE32F-NEXT: sw s7, 16(sp) # 4-byte Folded Spill 5691; RV32ZVE32F-NEXT: sw s8, 12(sp) # 4-byte Folded Spill 5692; RV32ZVE32F-NEXT: sw s9, 8(sp) # 4-byte Folded Spill 5693; RV32ZVE32F-NEXT: .cfi_offset s0, -4 5694; RV32ZVE32F-NEXT: .cfi_offset s1, -8 5695; RV32ZVE32F-NEXT: .cfi_offset s2, -12 5696; RV32ZVE32F-NEXT: .cfi_offset s3, -16 5697; RV32ZVE32F-NEXT: .cfi_offset s4, -20 5698; RV32ZVE32F-NEXT: .cfi_offset s5, -24 5699; RV32ZVE32F-NEXT: .cfi_offset s6, -28 5700; RV32ZVE32F-NEXT: .cfi_offset s7, -32 5701; RV32ZVE32F-NEXT: .cfi_offset s8, -36 5702; RV32ZVE32F-NEXT: .cfi_offset s9, -40 5703; RV32ZVE32F-NEXT: .cfi_remember_state 5704; RV32ZVE32F-NEXT: lw a3, 56(a0) 5705; RV32ZVE32F-NEXT: lw a4, 60(a0) 5706; RV32ZVE32F-NEXT: lw a7, 40(a0) 5707; RV32ZVE32F-NEXT: lw t0, 44(a0) 5708; RV32ZVE32F-NEXT: lw a5, 48(a0) 5709; RV32ZVE32F-NEXT: lw a6, 52(a0) 5710; RV32ZVE32F-NEXT: lw t3, 24(a0) 5711; RV32ZVE32F-NEXT: lw t4, 28(a0) 5712; RV32ZVE32F-NEXT: lw t1, 32(a0) 5713; RV32ZVE32F-NEXT: lw t2, 36(a0) 5714; RV32ZVE32F-NEXT: lw s0, 8(a0) 5715; RV32ZVE32F-NEXT: lw s1, 12(a0) 5716; RV32ZVE32F-NEXT: lw t5, 16(a0) 5717; RV32ZVE32F-NEXT: lw t6, 20(a0) 5718; RV32ZVE32F-NEXT: lw s2, 32(a2) 5719; RV32ZVE32F-NEXT: lw s3, 40(a2) 5720; RV32ZVE32F-NEXT: lw s4, 48(a2) 5721; RV32ZVE32F-NEXT: lw s5, 56(a2) 5722; RV32ZVE32F-NEXT: lw s6, 0(a2) 5723; RV32ZVE32F-NEXT: lw s7, 8(a2) 5724; RV32ZVE32F-NEXT: lw s8, 16(a2) 5725; RV32ZVE32F-NEXT: lw s9, 24(a2) 5726; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 5727; RV32ZVE32F-NEXT: vmv.v.x v8, s6 5728; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 5729; RV32ZVE32F-NEXT: vmv.x.s a2, v0 5730; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 5731; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s7 5732; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s8 5733; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s9 5734; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s2 5735; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s3 5736; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s4 5737; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s5 5738; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 5739; RV32ZVE32F-NEXT: andi s2, a2, 1 5740; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 5741; RV32ZVE32F-NEXT: bnez s2, .LBB51_10 5742; RV32ZVE32F-NEXT: # %bb.1: # %else 5743; RV32ZVE32F-NEXT: andi a0, a2, 2 5744; RV32ZVE32F-NEXT: bnez a0, .LBB51_11 5745; RV32ZVE32F-NEXT: .LBB51_2: # %else2 5746; RV32ZVE32F-NEXT: andi a0, a2, 4 5747; RV32ZVE32F-NEXT: bnez a0, .LBB51_12 5748; RV32ZVE32F-NEXT: .LBB51_3: # %else4 5749; RV32ZVE32F-NEXT: andi a0, a2, 8 5750; RV32ZVE32F-NEXT: bnez a0, .LBB51_13 5751; RV32ZVE32F-NEXT: .LBB51_4: # %else6 5752; RV32ZVE32F-NEXT: andi a0, a2, 16 5753; RV32ZVE32F-NEXT: bnez a0, .LBB51_14 5754; RV32ZVE32F-NEXT: .LBB51_5: # %else8 5755; RV32ZVE32F-NEXT: andi a0, a2, 32 5756; RV32ZVE32F-NEXT: bnez a0, .LBB51_15 5757; RV32ZVE32F-NEXT: .LBB51_6: # %else10 5758; RV32ZVE32F-NEXT: andi a0, a2, 64 5759; RV32ZVE32F-NEXT: bnez a0, .LBB51_16 5760; RV32ZVE32F-NEXT: .LBB51_7: # %else12 5761; RV32ZVE32F-NEXT: andi a0, a2, -128 5762; RV32ZVE32F-NEXT: beqz a0, .LBB51_9 5763; RV32ZVE32F-NEXT: .LBB51_8: # %cond.store13 5764; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5765; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 5766; RV32ZVE32F-NEXT: vmv.x.s a0, v8 5767; RV32ZVE32F-NEXT: sw a3, 0(a0) 5768; RV32ZVE32F-NEXT: sw a4, 4(a0) 5769; RV32ZVE32F-NEXT: .LBB51_9: # %else14 5770; RV32ZVE32F-NEXT: lw s0, 44(sp) # 4-byte Folded Reload 5771; RV32ZVE32F-NEXT: lw s1, 40(sp) # 4-byte Folded Reload 5772; RV32ZVE32F-NEXT: lw s2, 36(sp) # 4-byte Folded Reload 5773; RV32ZVE32F-NEXT: lw s3, 32(sp) # 4-byte Folded Reload 5774; RV32ZVE32F-NEXT: lw s4, 28(sp) # 4-byte Folded Reload 5775; RV32ZVE32F-NEXT: lw s5, 24(sp) # 4-byte Folded Reload 5776; RV32ZVE32F-NEXT: lw s6, 20(sp) # 4-byte Folded Reload 5777; RV32ZVE32F-NEXT: lw s7, 16(sp) # 4-byte Folded Reload 5778; RV32ZVE32F-NEXT: lw s8, 12(sp) # 4-byte Folded Reload 5779; RV32ZVE32F-NEXT: lw s9, 8(sp) # 4-byte Folded Reload 5780; RV32ZVE32F-NEXT: .cfi_restore s0 5781; RV32ZVE32F-NEXT: .cfi_restore s1 5782; RV32ZVE32F-NEXT: .cfi_restore s2 5783; RV32ZVE32F-NEXT: .cfi_restore s3 5784; RV32ZVE32F-NEXT: .cfi_restore s4 5785; RV32ZVE32F-NEXT: .cfi_restore s5 5786; RV32ZVE32F-NEXT: .cfi_restore s6 5787; RV32ZVE32F-NEXT: .cfi_restore s7 5788; RV32ZVE32F-NEXT: .cfi_restore s8 5789; RV32ZVE32F-NEXT: .cfi_restore s9 5790; RV32ZVE32F-NEXT: addi sp, sp, 48 5791; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 5792; RV32ZVE32F-NEXT: ret 5793; RV32ZVE32F-NEXT: .LBB51_10: # %cond.store 5794; RV32ZVE32F-NEXT: .cfi_restore_state 5795; RV32ZVE32F-NEXT: lw a1, 0(a0) 5796; RV32ZVE32F-NEXT: lw a0, 4(a0) 5797; RV32ZVE32F-NEXT: vmv.x.s s2, v8 5798; RV32ZVE32F-NEXT: sw a1, 0(s2) 5799; RV32ZVE32F-NEXT: sw a0, 4(s2) 5800; RV32ZVE32F-NEXT: andi a0, a2, 2 5801; RV32ZVE32F-NEXT: beqz a0, .LBB51_2 5802; RV32ZVE32F-NEXT: .LBB51_11: # %cond.store1 5803; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5804; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 5805; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5806; RV32ZVE32F-NEXT: sw s0, 0(a0) 5807; RV32ZVE32F-NEXT: sw s1, 4(a0) 5808; RV32ZVE32F-NEXT: andi a0, a2, 4 5809; RV32ZVE32F-NEXT: beqz a0, .LBB51_3 5810; RV32ZVE32F-NEXT: .LBB51_12: # %cond.store3 5811; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5812; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 5813; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5814; RV32ZVE32F-NEXT: sw t5, 0(a0) 5815; RV32ZVE32F-NEXT: sw t6, 4(a0) 5816; RV32ZVE32F-NEXT: andi a0, a2, 8 5817; RV32ZVE32F-NEXT: beqz a0, .LBB51_4 5818; RV32ZVE32F-NEXT: .LBB51_13: # %cond.store5 5819; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5820; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 5821; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5822; RV32ZVE32F-NEXT: sw t3, 0(a0) 5823; RV32ZVE32F-NEXT: sw t4, 4(a0) 5824; RV32ZVE32F-NEXT: andi a0, a2, 16 5825; RV32ZVE32F-NEXT: beqz a0, .LBB51_5 5826; RV32ZVE32F-NEXT: .LBB51_14: # %cond.store7 5827; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5828; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 5829; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5830; RV32ZVE32F-NEXT: sw t1, 0(a0) 5831; RV32ZVE32F-NEXT: sw t2, 4(a0) 5832; RV32ZVE32F-NEXT: andi a0, a2, 32 5833; RV32ZVE32F-NEXT: beqz a0, .LBB51_6 5834; RV32ZVE32F-NEXT: .LBB51_15: # %cond.store9 5835; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5836; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 5837; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5838; RV32ZVE32F-NEXT: sw a7, 0(a0) 5839; RV32ZVE32F-NEXT: sw t0, 4(a0) 5840; RV32ZVE32F-NEXT: andi a0, a2, 64 5841; RV32ZVE32F-NEXT: beqz a0, .LBB51_7 5842; RV32ZVE32F-NEXT: .LBB51_16: # %cond.store11 5843; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5844; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 5845; RV32ZVE32F-NEXT: vmv.x.s a0, v10 5846; RV32ZVE32F-NEXT: sw a5, 0(a0) 5847; RV32ZVE32F-NEXT: sw a6, 4(a0) 5848; RV32ZVE32F-NEXT: andi a0, a2, -128 5849; RV32ZVE32F-NEXT: bnez a0, .LBB51_8 5850; RV32ZVE32F-NEXT: j .LBB51_9 5851; 5852; RV64ZVE32F-LABEL: mscatter_baseidx_v8i64: 5853; RV64ZVE32F: # %bb.0: 5854; RV64ZVE32F-NEXT: addi sp, sp, -32 5855; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 32 5856; RV64ZVE32F-NEXT: sd s0, 24(sp) # 8-byte Folded Spill 5857; RV64ZVE32F-NEXT: sd s1, 16(sp) # 8-byte Folded Spill 5858; RV64ZVE32F-NEXT: sd s2, 8(sp) # 8-byte Folded Spill 5859; RV64ZVE32F-NEXT: sd s3, 0(sp) # 8-byte Folded Spill 5860; RV64ZVE32F-NEXT: .cfi_offset s0, -8 5861; RV64ZVE32F-NEXT: .cfi_offset s1, -16 5862; RV64ZVE32F-NEXT: .cfi_offset s2, -24 5863; RV64ZVE32F-NEXT: .cfi_offset s3, -32 5864; RV64ZVE32F-NEXT: .cfi_remember_state 5865; RV64ZVE32F-NEXT: ld a5, 40(a0) 5866; RV64ZVE32F-NEXT: ld a4, 48(a0) 5867; RV64ZVE32F-NEXT: ld a3, 56(a0) 5868; RV64ZVE32F-NEXT: ld s0, 8(a0) 5869; RV64ZVE32F-NEXT: ld t5, 16(a0) 5870; RV64ZVE32F-NEXT: ld t3, 24(a0) 5871; RV64ZVE32F-NEXT: ld t1, 32(a0) 5872; RV64ZVE32F-NEXT: ld s2, 8(a2) 5873; RV64ZVE32F-NEXT: ld s1, 16(a2) 5874; RV64ZVE32F-NEXT: ld t6, 24(a2) 5875; RV64ZVE32F-NEXT: ld t4, 32(a2) 5876; RV64ZVE32F-NEXT: ld t2, 40(a2) 5877; RV64ZVE32F-NEXT: ld t0, 48(a2) 5878; RV64ZVE32F-NEXT: ld a6, 56(a2) 5879; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 5880; RV64ZVE32F-NEXT: vmv.x.s a7, v0 5881; RV64ZVE32F-NEXT: andi s3, a7, 1 5882; RV64ZVE32F-NEXT: bnez s3, .LBB51_10 5883; RV64ZVE32F-NEXT: # %bb.1: # %else 5884; RV64ZVE32F-NEXT: andi a0, a7, 2 5885; RV64ZVE32F-NEXT: bnez a0, .LBB51_11 5886; RV64ZVE32F-NEXT: .LBB51_2: # %else2 5887; RV64ZVE32F-NEXT: andi a0, a7, 4 5888; RV64ZVE32F-NEXT: bnez a0, .LBB51_12 5889; RV64ZVE32F-NEXT: .LBB51_3: # %else4 5890; RV64ZVE32F-NEXT: andi a0, a7, 8 5891; RV64ZVE32F-NEXT: bnez a0, .LBB51_13 5892; RV64ZVE32F-NEXT: .LBB51_4: # %else6 5893; RV64ZVE32F-NEXT: andi a0, a7, 16 5894; RV64ZVE32F-NEXT: bnez a0, .LBB51_14 5895; RV64ZVE32F-NEXT: .LBB51_5: # %else8 5896; RV64ZVE32F-NEXT: andi a0, a7, 32 5897; RV64ZVE32F-NEXT: bnez a0, .LBB51_15 5898; RV64ZVE32F-NEXT: .LBB51_6: # %else10 5899; RV64ZVE32F-NEXT: andi a0, a7, 64 5900; RV64ZVE32F-NEXT: bnez a0, .LBB51_16 5901; RV64ZVE32F-NEXT: .LBB51_7: # %else12 5902; RV64ZVE32F-NEXT: andi a0, a7, -128 5903; RV64ZVE32F-NEXT: beqz a0, .LBB51_9 5904; RV64ZVE32F-NEXT: .LBB51_8: # %cond.store13 5905; RV64ZVE32F-NEXT: slli a6, a6, 3 5906; RV64ZVE32F-NEXT: add a1, a1, a6 5907; RV64ZVE32F-NEXT: sd a3, 0(a1) 5908; RV64ZVE32F-NEXT: .LBB51_9: # %else14 5909; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload 5910; RV64ZVE32F-NEXT: ld s1, 16(sp) # 8-byte Folded Reload 5911; RV64ZVE32F-NEXT: ld s2, 8(sp) # 8-byte Folded Reload 5912; RV64ZVE32F-NEXT: ld s3, 0(sp) # 8-byte Folded Reload 5913; RV64ZVE32F-NEXT: .cfi_restore s0 5914; RV64ZVE32F-NEXT: .cfi_restore s1 5915; RV64ZVE32F-NEXT: .cfi_restore s2 5916; RV64ZVE32F-NEXT: .cfi_restore s3 5917; RV64ZVE32F-NEXT: addi sp, sp, 32 5918; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 0 5919; RV64ZVE32F-NEXT: ret 5920; RV64ZVE32F-NEXT: .LBB51_10: # %cond.store 5921; RV64ZVE32F-NEXT: .cfi_restore_state 5922; RV64ZVE32F-NEXT: ld a2, 0(a2) 5923; RV64ZVE32F-NEXT: ld a0, 0(a0) 5924; RV64ZVE32F-NEXT: slli a2, a2, 3 5925; RV64ZVE32F-NEXT: add a2, a1, a2 5926; RV64ZVE32F-NEXT: sd a0, 0(a2) 5927; RV64ZVE32F-NEXT: andi a0, a7, 2 5928; RV64ZVE32F-NEXT: beqz a0, .LBB51_2 5929; RV64ZVE32F-NEXT: .LBB51_11: # %cond.store1 5930; RV64ZVE32F-NEXT: slli s2, s2, 3 5931; RV64ZVE32F-NEXT: add s2, a1, s2 5932; RV64ZVE32F-NEXT: sd s0, 0(s2) 5933; RV64ZVE32F-NEXT: andi a0, a7, 4 5934; RV64ZVE32F-NEXT: beqz a0, .LBB51_3 5935; RV64ZVE32F-NEXT: .LBB51_12: # %cond.store3 5936; RV64ZVE32F-NEXT: slli s1, s1, 3 5937; RV64ZVE32F-NEXT: add s1, a1, s1 5938; RV64ZVE32F-NEXT: sd t5, 0(s1) 5939; RV64ZVE32F-NEXT: andi a0, a7, 8 5940; RV64ZVE32F-NEXT: beqz a0, .LBB51_4 5941; RV64ZVE32F-NEXT: .LBB51_13: # %cond.store5 5942; RV64ZVE32F-NEXT: slli t6, t6, 3 5943; RV64ZVE32F-NEXT: add t6, a1, t6 5944; RV64ZVE32F-NEXT: sd t3, 0(t6) 5945; RV64ZVE32F-NEXT: andi a0, a7, 16 5946; RV64ZVE32F-NEXT: beqz a0, .LBB51_5 5947; RV64ZVE32F-NEXT: .LBB51_14: # %cond.store7 5948; RV64ZVE32F-NEXT: slli t4, t4, 3 5949; RV64ZVE32F-NEXT: add t4, a1, t4 5950; RV64ZVE32F-NEXT: sd t1, 0(t4) 5951; RV64ZVE32F-NEXT: andi a0, a7, 32 5952; RV64ZVE32F-NEXT: beqz a0, .LBB51_6 5953; RV64ZVE32F-NEXT: .LBB51_15: # %cond.store9 5954; RV64ZVE32F-NEXT: slli t2, t2, 3 5955; RV64ZVE32F-NEXT: add t2, a1, t2 5956; RV64ZVE32F-NEXT: sd a5, 0(t2) 5957; RV64ZVE32F-NEXT: andi a0, a7, 64 5958; RV64ZVE32F-NEXT: beqz a0, .LBB51_7 5959; RV64ZVE32F-NEXT: .LBB51_16: # %cond.store11 5960; RV64ZVE32F-NEXT: slli t0, t0, 3 5961; RV64ZVE32F-NEXT: add t0, a1, t0 5962; RV64ZVE32F-NEXT: sd a4, 0(t0) 5963; RV64ZVE32F-NEXT: andi a0, a7, -128 5964; RV64ZVE32F-NEXT: bnez a0, .LBB51_8 5965; RV64ZVE32F-NEXT: j .LBB51_9 5966 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs 5967 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 5968 ret void 5969} 5970 5971declare void @llvm.masked.scatter.v1bf16.v1p0(<1 x bfloat>, <1 x ptr>, i32, <1 x i1>) 5972 5973define void @mscatter_v1bf16(<1 x bfloat> %val, <1 x ptr> %ptrs, <1 x i1> %m) { 5974; RV32V-LABEL: mscatter_v1bf16: 5975; RV32V: # %bb.0: 5976; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 5977; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 5978; RV32V-NEXT: ret 5979; 5980; RV64V-LABEL: mscatter_v1bf16: 5981; RV64V: # %bb.0: 5982; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 5983; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 5984; RV64V-NEXT: ret 5985; 5986; RV32ZVE32F-LABEL: mscatter_v1bf16: 5987; RV32ZVE32F: # %bb.0: 5988; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 5989; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t 5990; RV32ZVE32F-NEXT: ret 5991; 5992; RV64ZVE32F-LABEL: mscatter_v1bf16: 5993; RV64ZVE32F: # %bb.0: 5994; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 5995; RV64ZVE32F-NEXT: vfirst.m a1, v0 5996; RV64ZVE32F-NEXT: bnez a1, .LBB52_2 5997; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 5998; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 5999; RV64ZVE32F-NEXT: vmv.x.s a1, v8 6000; RV64ZVE32F-NEXT: fmv.h.x fa5, a1 6001; RV64ZVE32F-NEXT: fsh fa5, 0(a0) 6002; RV64ZVE32F-NEXT: .LBB52_2: # %else 6003; RV64ZVE32F-NEXT: ret 6004 call void @llvm.masked.scatter.v1bf16.v1p0(<1 x bfloat> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m) 6005 ret void 6006} 6007 6008declare void @llvm.masked.scatter.v2bf16.v2p0(<2 x bfloat>, <2 x ptr>, i32, <2 x i1>) 6009 6010define void @mscatter_v2bf16(<2 x bfloat> %val, <2 x ptr> %ptrs, <2 x i1> %m) { 6011; RV32V-LABEL: mscatter_v2bf16: 6012; RV32V: # %bb.0: 6013; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 6014; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 6015; RV32V-NEXT: ret 6016; 6017; RV64V-LABEL: mscatter_v2bf16: 6018; RV64V: # %bb.0: 6019; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 6020; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 6021; RV64V-NEXT: ret 6022; 6023; RV32ZVE32F-LABEL: mscatter_v2bf16: 6024; RV32ZVE32F: # %bb.0: 6025; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 6026; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t 6027; RV32ZVE32F-NEXT: ret 6028; 6029; RV64ZVE32F-LABEL: mscatter_v2bf16: 6030; RV64ZVE32F: # %bb.0: 6031; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 6032; RV64ZVE32F-NEXT: vmv.x.s a2, v0 6033; RV64ZVE32F-NEXT: andi a3, a2, 1 6034; RV64ZVE32F-NEXT: bnez a3, .LBB53_3 6035; RV64ZVE32F-NEXT: # %bb.1: # %else 6036; RV64ZVE32F-NEXT: andi a2, a2, 2 6037; RV64ZVE32F-NEXT: bnez a2, .LBB53_4 6038; RV64ZVE32F-NEXT: .LBB53_2: # %else2 6039; RV64ZVE32F-NEXT: ret 6040; RV64ZVE32F-NEXT: .LBB53_3: # %cond.store 6041; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 6042; RV64ZVE32F-NEXT: vmv.x.s a3, v8 6043; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6044; RV64ZVE32F-NEXT: fsh fa5, 0(a0) 6045; RV64ZVE32F-NEXT: andi a2, a2, 2 6046; RV64ZVE32F-NEXT: beqz a2, .LBB53_2 6047; RV64ZVE32F-NEXT: .LBB53_4: # %cond.store1 6048; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 6049; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 6050; RV64ZVE32F-NEXT: vmv.x.s a0, v8 6051; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 6052; RV64ZVE32F-NEXT: fsh fa5, 0(a1) 6053; RV64ZVE32F-NEXT: ret 6054 call void @llvm.masked.scatter.v2bf16.v2p0(<2 x bfloat> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m) 6055 ret void 6056} 6057 6058declare void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat>, <4 x ptr>, i32, <4 x i1>) 6059 6060define void @mscatter_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> %m) { 6061; RV32-LABEL: mscatter_v4bf16: 6062; RV32: # %bb.0: 6063; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 6064; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 6065; RV32-NEXT: ret 6066; 6067; RV64V-LABEL: mscatter_v4bf16: 6068; RV64V: # %bb.0: 6069; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 6070; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t 6071; RV64V-NEXT: ret 6072; 6073; RV64ZVE32F-LABEL: mscatter_v4bf16: 6074; RV64ZVE32F: # %bb.0: 6075; RV64ZVE32F-NEXT: ld a4, 8(a0) 6076; RV64ZVE32F-NEXT: ld a2, 16(a0) 6077; RV64ZVE32F-NEXT: ld a1, 24(a0) 6078; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 6079; RV64ZVE32F-NEXT: vmv.x.s a3, v0 6080; RV64ZVE32F-NEXT: andi a5, a3, 1 6081; RV64ZVE32F-NEXT: bnez a5, .LBB54_5 6082; RV64ZVE32F-NEXT: # %bb.1: # %else 6083; RV64ZVE32F-NEXT: andi a0, a3, 2 6084; RV64ZVE32F-NEXT: bnez a0, .LBB54_6 6085; RV64ZVE32F-NEXT: .LBB54_2: # %else2 6086; RV64ZVE32F-NEXT: andi a0, a3, 4 6087; RV64ZVE32F-NEXT: bnez a0, .LBB54_7 6088; RV64ZVE32F-NEXT: .LBB54_3: # %else4 6089; RV64ZVE32F-NEXT: andi a3, a3, 8 6090; RV64ZVE32F-NEXT: bnez a3, .LBB54_8 6091; RV64ZVE32F-NEXT: .LBB54_4: # %else6 6092; RV64ZVE32F-NEXT: ret 6093; RV64ZVE32F-NEXT: .LBB54_5: # %cond.store 6094; RV64ZVE32F-NEXT: ld a0, 0(a0) 6095; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 6096; RV64ZVE32F-NEXT: vmv.x.s a5, v8 6097; RV64ZVE32F-NEXT: fmv.h.x fa5, a5 6098; RV64ZVE32F-NEXT: fsh fa5, 0(a0) 6099; RV64ZVE32F-NEXT: andi a0, a3, 2 6100; RV64ZVE32F-NEXT: beqz a0, .LBB54_2 6101; RV64ZVE32F-NEXT: .LBB54_6: # %cond.store1 6102; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 6103; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 6104; RV64ZVE32F-NEXT: vmv.x.s a0, v9 6105; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 6106; RV64ZVE32F-NEXT: fsh fa5, 0(a4) 6107; RV64ZVE32F-NEXT: andi a0, a3, 4 6108; RV64ZVE32F-NEXT: beqz a0, .LBB54_3 6109; RV64ZVE32F-NEXT: .LBB54_7: # %cond.store3 6110; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 6111; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 6112; RV64ZVE32F-NEXT: vmv.x.s a0, v9 6113; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 6114; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6115; RV64ZVE32F-NEXT: andi a3, a3, 8 6116; RV64ZVE32F-NEXT: beqz a3, .LBB54_4 6117; RV64ZVE32F-NEXT: .LBB54_8: # %cond.store5 6118; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 6119; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 6120; RV64ZVE32F-NEXT: vmv.x.s a0, v8 6121; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 6122; RV64ZVE32F-NEXT: fsh fa5, 0(a1) 6123; RV64ZVE32F-NEXT: ret 6124 call void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m) 6125 ret void 6126} 6127 6128define void @mscatter_truemask_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs) { 6129; RV32-LABEL: mscatter_truemask_v4bf16: 6130; RV32: # %bb.0: 6131; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 6132; RV32-NEXT: vsoxei32.v v8, (zero), v9 6133; RV32-NEXT: ret 6134; 6135; RV64V-LABEL: mscatter_truemask_v4bf16: 6136; RV64V: # %bb.0: 6137; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 6138; RV64V-NEXT: vsoxei64.v v8, (zero), v10 6139; RV64V-NEXT: ret 6140; 6141; RV64ZVE32F-LABEL: mscatter_truemask_v4bf16: 6142; RV64ZVE32F: # %bb.0: 6143; RV64ZVE32F-NEXT: ld a1, 0(a0) 6144; RV64ZVE32F-NEXT: ld a2, 8(a0) 6145; RV64ZVE32F-NEXT: ld a3, 16(a0) 6146; RV64ZVE32F-NEXT: ld a0, 24(a0) 6147; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 6148; RV64ZVE32F-NEXT: vmv.x.s a4, v8 6149; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 6150; RV64ZVE32F-NEXT: fmv.h.x fa5, a4 6151; RV64ZVE32F-NEXT: vmv.x.s a4, v9 6152; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 6153; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 6154; RV64ZVE32F-NEXT: fsh fa5, 0(a1) 6155; RV64ZVE32F-NEXT: vmv.x.s a1, v9 6156; RV64ZVE32F-NEXT: fmv.h.x fa5, a4 6157; RV64ZVE32F-NEXT: vmv.x.s a4, v8 6158; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6159; RV64ZVE32F-NEXT: fmv.h.x fa5, a1 6160; RV64ZVE32F-NEXT: fsh fa5, 0(a3) 6161; RV64ZVE32F-NEXT: fmv.h.x fa5, a4 6162; RV64ZVE32F-NEXT: fsh fa5, 0(a0) 6163; RV64ZVE32F-NEXT: ret 6164 call void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1)) 6165 ret void 6166} 6167 6168define void @mscatter_falsemask_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs) { 6169; CHECK-LABEL: mscatter_falsemask_v4bf16: 6170; CHECK: # %bb.0: 6171; CHECK-NEXT: ret 6172 call void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer) 6173 ret void 6174} 6175 6176declare void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat>, <8 x ptr>, i32, <8 x i1>) 6177 6178define void @mscatter_v8bf16(<8 x bfloat> %val, <8 x ptr> %ptrs, <8 x i1> %m) { 6179; RV32-LABEL: mscatter_v8bf16: 6180; RV32: # %bb.0: 6181; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 6182; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 6183; RV32-NEXT: ret 6184; 6185; RV64V-LABEL: mscatter_v8bf16: 6186; RV64V: # %bb.0: 6187; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 6188; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t 6189; RV64V-NEXT: ret 6190; 6191; RV64ZVE32F-LABEL: mscatter_v8bf16: 6192; RV64ZVE32F: # %bb.0: 6193; RV64ZVE32F-NEXT: ld a3, 40(a0) 6194; RV64ZVE32F-NEXT: ld a2, 48(a0) 6195; RV64ZVE32F-NEXT: ld a1, 56(a0) 6196; RV64ZVE32F-NEXT: ld t0, 8(a0) 6197; RV64ZVE32F-NEXT: ld a7, 16(a0) 6198; RV64ZVE32F-NEXT: ld a6, 24(a0) 6199; RV64ZVE32F-NEXT: ld a5, 32(a0) 6200; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 6201; RV64ZVE32F-NEXT: vmv.x.s a4, v0 6202; RV64ZVE32F-NEXT: andi t1, a4, 1 6203; RV64ZVE32F-NEXT: bnez t1, .LBB57_9 6204; RV64ZVE32F-NEXT: # %bb.1: # %else 6205; RV64ZVE32F-NEXT: andi a0, a4, 2 6206; RV64ZVE32F-NEXT: bnez a0, .LBB57_10 6207; RV64ZVE32F-NEXT: .LBB57_2: # %else2 6208; RV64ZVE32F-NEXT: andi a0, a4, 4 6209; RV64ZVE32F-NEXT: bnez a0, .LBB57_11 6210; RV64ZVE32F-NEXT: .LBB57_3: # %else4 6211; RV64ZVE32F-NEXT: andi a0, a4, 8 6212; RV64ZVE32F-NEXT: bnez a0, .LBB57_12 6213; RV64ZVE32F-NEXT: .LBB57_4: # %else6 6214; RV64ZVE32F-NEXT: andi a0, a4, 16 6215; RV64ZVE32F-NEXT: bnez a0, .LBB57_13 6216; RV64ZVE32F-NEXT: .LBB57_5: # %else8 6217; RV64ZVE32F-NEXT: andi a0, a4, 32 6218; RV64ZVE32F-NEXT: bnez a0, .LBB57_14 6219; RV64ZVE32F-NEXT: .LBB57_6: # %else10 6220; RV64ZVE32F-NEXT: andi a0, a4, 64 6221; RV64ZVE32F-NEXT: bnez a0, .LBB57_15 6222; RV64ZVE32F-NEXT: .LBB57_7: # %else12 6223; RV64ZVE32F-NEXT: andi a0, a4, -128 6224; RV64ZVE32F-NEXT: bnez a0, .LBB57_16 6225; RV64ZVE32F-NEXT: .LBB57_8: # %else14 6226; RV64ZVE32F-NEXT: ret 6227; RV64ZVE32F-NEXT: .LBB57_9: # %cond.store 6228; RV64ZVE32F-NEXT: ld a0, 0(a0) 6229; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 6230; RV64ZVE32F-NEXT: vmv.x.s t1, v8 6231; RV64ZVE32F-NEXT: fmv.h.x fa5, t1 6232; RV64ZVE32F-NEXT: fsh fa5, 0(a0) 6233; RV64ZVE32F-NEXT: andi a0, a4, 2 6234; RV64ZVE32F-NEXT: beqz a0, .LBB57_2 6235; RV64ZVE32F-NEXT: .LBB57_10: # %cond.store1 6236; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6237; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 6238; RV64ZVE32F-NEXT: vmv.x.s a0, v9 6239; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 6240; RV64ZVE32F-NEXT: fsh fa5, 0(t0) 6241; RV64ZVE32F-NEXT: andi a0, a4, 4 6242; RV64ZVE32F-NEXT: beqz a0, .LBB57_3 6243; RV64ZVE32F-NEXT: .LBB57_11: # %cond.store3 6244; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6245; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 6246; RV64ZVE32F-NEXT: vmv.x.s a0, v9 6247; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 6248; RV64ZVE32F-NEXT: fsh fa5, 0(a7) 6249; RV64ZVE32F-NEXT: andi a0, a4, 8 6250; RV64ZVE32F-NEXT: beqz a0, .LBB57_4 6251; RV64ZVE32F-NEXT: .LBB57_12: # %cond.store5 6252; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6253; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 6254; RV64ZVE32F-NEXT: vmv.x.s a0, v9 6255; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 6256; RV64ZVE32F-NEXT: fsh fa5, 0(a6) 6257; RV64ZVE32F-NEXT: andi a0, a4, 16 6258; RV64ZVE32F-NEXT: beqz a0, .LBB57_5 6259; RV64ZVE32F-NEXT: .LBB57_13: # %cond.store7 6260; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6261; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 6262; RV64ZVE32F-NEXT: vmv.x.s a0, v9 6263; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 6264; RV64ZVE32F-NEXT: fsh fa5, 0(a5) 6265; RV64ZVE32F-NEXT: andi a0, a4, 32 6266; RV64ZVE32F-NEXT: beqz a0, .LBB57_6 6267; RV64ZVE32F-NEXT: .LBB57_14: # %cond.store9 6268; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6269; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 6270; RV64ZVE32F-NEXT: vmv.x.s a0, v9 6271; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 6272; RV64ZVE32F-NEXT: fsh fa5, 0(a3) 6273; RV64ZVE32F-NEXT: andi a0, a4, 64 6274; RV64ZVE32F-NEXT: beqz a0, .LBB57_7 6275; RV64ZVE32F-NEXT: .LBB57_15: # %cond.store11 6276; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6277; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6 6278; RV64ZVE32F-NEXT: vmv.x.s a0, v9 6279; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 6280; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6281; RV64ZVE32F-NEXT: andi a0, a4, -128 6282; RV64ZVE32F-NEXT: beqz a0, .LBB57_8 6283; RV64ZVE32F-NEXT: .LBB57_16: # %cond.store13 6284; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6285; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 6286; RV64ZVE32F-NEXT: vmv.x.s a0, v8 6287; RV64ZVE32F-NEXT: fmv.h.x fa5, a0 6288; RV64ZVE32F-NEXT: fsh fa5, 0(a1) 6289; RV64ZVE32F-NEXT: ret 6290 call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) 6291 ret void 6292} 6293 6294define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 6295; RV32-LABEL: mscatter_baseidx_v8i8_v8bf16: 6296; RV32: # %bb.0: 6297; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 6298; RV32-NEXT: vsext.vf4 v10, v9 6299; RV32-NEXT: vadd.vv v10, v10, v10 6300; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma 6301; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 6302; RV32-NEXT: ret 6303; 6304; RV64V-LABEL: mscatter_baseidx_v8i8_v8bf16: 6305; RV64V: # %bb.0: 6306; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 6307; RV64V-NEXT: vsext.vf8 v12, v9 6308; RV64V-NEXT: vadd.vv v12, v12, v12 6309; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma 6310; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 6311; RV64V-NEXT: ret 6312; 6313; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8bf16: 6314; RV64ZVE32F: # %bb.0: 6315; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 6316; RV64ZVE32F-NEXT: vmv.x.s a1, v0 6317; RV64ZVE32F-NEXT: andi a2, a1, 1 6318; RV64ZVE32F-NEXT: beqz a2, .LBB58_2 6319; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 6320; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6321; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 6322; RV64ZVE32F-NEXT: vmv.x.s a3, v8 6323; RV64ZVE32F-NEXT: slli a2, a2, 1 6324; RV64ZVE32F-NEXT: add a2, a0, a2 6325; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6326; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6327; RV64ZVE32F-NEXT: .LBB58_2: # %else 6328; RV64ZVE32F-NEXT: andi a2, a1, 2 6329; RV64ZVE32F-NEXT: beqz a2, .LBB58_4 6330; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 6331; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 6332; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 6333; RV64ZVE32F-NEXT: vmv.x.s a2, v10 6334; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6335; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 6336; RV64ZVE32F-NEXT: vmv.x.s a3, v10 6337; RV64ZVE32F-NEXT: slli a2, a2, 1 6338; RV64ZVE32F-NEXT: add a2, a0, a2 6339; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6340; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6341; RV64ZVE32F-NEXT: .LBB58_4: # %else2 6342; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 6343; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 6344; RV64ZVE32F-NEXT: andi a2, a1, 4 6345; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 6346; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 6347; RV64ZVE32F-NEXT: bnez a2, .LBB58_12 6348; RV64ZVE32F-NEXT: # %bb.5: # %else4 6349; RV64ZVE32F-NEXT: andi a2, a1, 8 6350; RV64ZVE32F-NEXT: bnez a2, .LBB58_13 6351; RV64ZVE32F-NEXT: .LBB58_6: # %else6 6352; RV64ZVE32F-NEXT: andi a2, a1, 16 6353; RV64ZVE32F-NEXT: bnez a2, .LBB58_14 6354; RV64ZVE32F-NEXT: .LBB58_7: # %else8 6355; RV64ZVE32F-NEXT: andi a2, a1, 32 6356; RV64ZVE32F-NEXT: beqz a2, .LBB58_9 6357; RV64ZVE32F-NEXT: .LBB58_8: # %cond.store9 6358; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 6359; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1 6360; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6361; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6362; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 6363; RV64ZVE32F-NEXT: vmv.x.s a3, v9 6364; RV64ZVE32F-NEXT: slli a2, a2, 1 6365; RV64ZVE32F-NEXT: add a2, a0, a2 6366; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6367; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6368; RV64ZVE32F-NEXT: .LBB58_9: # %else10 6369; RV64ZVE32F-NEXT: andi a2, a1, 64 6370; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 6371; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 6372; RV64ZVE32F-NEXT: bnez a2, .LBB58_15 6373; RV64ZVE32F-NEXT: # %bb.10: # %else12 6374; RV64ZVE32F-NEXT: andi a1, a1, -128 6375; RV64ZVE32F-NEXT: bnez a1, .LBB58_16 6376; RV64ZVE32F-NEXT: .LBB58_11: # %else14 6377; RV64ZVE32F-NEXT: ret 6378; RV64ZVE32F-NEXT: .LBB58_12: # %cond.store3 6379; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6380; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6381; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 6382; RV64ZVE32F-NEXT: slli a2, a2, 1 6383; RV64ZVE32F-NEXT: vmv.x.s a3, v11 6384; RV64ZVE32F-NEXT: add a2, a0, a2 6385; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6386; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6387; RV64ZVE32F-NEXT: andi a2, a1, 8 6388; RV64ZVE32F-NEXT: beqz a2, .LBB58_6 6389; RV64ZVE32F-NEXT: .LBB58_13: # %cond.store5 6390; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 6391; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 6392; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6393; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6394; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 6395; RV64ZVE32F-NEXT: vmv.x.s a3, v9 6396; RV64ZVE32F-NEXT: slli a2, a2, 1 6397; RV64ZVE32F-NEXT: add a2, a0, a2 6398; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6399; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6400; RV64ZVE32F-NEXT: andi a2, a1, 16 6401; RV64ZVE32F-NEXT: beqz a2, .LBB58_7 6402; RV64ZVE32F-NEXT: .LBB58_14: # %cond.store7 6403; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 6404; RV64ZVE32F-NEXT: vmv.x.s a2, v10 6405; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6406; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 6407; RV64ZVE32F-NEXT: slli a2, a2, 1 6408; RV64ZVE32F-NEXT: vmv.x.s a3, v9 6409; RV64ZVE32F-NEXT: add a2, a0, a2 6410; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6411; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6412; RV64ZVE32F-NEXT: andi a2, a1, 32 6413; RV64ZVE32F-NEXT: bnez a2, .LBB58_8 6414; RV64ZVE32F-NEXT: j .LBB58_9 6415; RV64ZVE32F-NEXT: .LBB58_15: # %cond.store11 6416; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6417; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6418; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 6419; RV64ZVE32F-NEXT: slli a2, a2, 1 6420; RV64ZVE32F-NEXT: vmv.x.s a3, v10 6421; RV64ZVE32F-NEXT: add a2, a0, a2 6422; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6423; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6424; RV64ZVE32F-NEXT: andi a1, a1, -128 6425; RV64ZVE32F-NEXT: beqz a1, .LBB58_11 6426; RV64ZVE32F-NEXT: .LBB58_16: # %cond.store13 6427; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 6428; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 6429; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6430; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 6431; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 6432; RV64ZVE32F-NEXT: vmv.x.s a1, v9 6433; RV64ZVE32F-NEXT: slli a1, a1, 1 6434; RV64ZVE32F-NEXT: add a0, a0, a1 6435; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma 6436; RV64ZVE32F-NEXT: vmv.x.s a1, v8 6437; RV64ZVE32F-NEXT: fmv.h.x fa5, a1 6438; RV64ZVE32F-NEXT: fsh fa5, 0(a0) 6439; RV64ZVE32F-NEXT: ret 6440 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i8> %idxs 6441 call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) 6442 ret void 6443} 6444 6445define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 6446; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8bf16: 6447; RV32: # %bb.0: 6448; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 6449; RV32-NEXT: vsext.vf4 v10, v9 6450; RV32-NEXT: vadd.vv v10, v10, v10 6451; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma 6452; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 6453; RV32-NEXT: ret 6454; 6455; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8bf16: 6456; RV64V: # %bb.0: 6457; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 6458; RV64V-NEXT: vsext.vf8 v12, v9 6459; RV64V-NEXT: vadd.vv v12, v12, v12 6460; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma 6461; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 6462; RV64V-NEXT: ret 6463; 6464; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8bf16: 6465; RV64ZVE32F: # %bb.0: 6466; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 6467; RV64ZVE32F-NEXT: vmv.x.s a1, v0 6468; RV64ZVE32F-NEXT: andi a2, a1, 1 6469; RV64ZVE32F-NEXT: beqz a2, .LBB59_2 6470; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 6471; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6472; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 6473; RV64ZVE32F-NEXT: vmv.x.s a3, v8 6474; RV64ZVE32F-NEXT: slli a2, a2, 1 6475; RV64ZVE32F-NEXT: add a2, a0, a2 6476; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6477; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6478; RV64ZVE32F-NEXT: .LBB59_2: # %else 6479; RV64ZVE32F-NEXT: andi a2, a1, 2 6480; RV64ZVE32F-NEXT: beqz a2, .LBB59_4 6481; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 6482; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 6483; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 6484; RV64ZVE32F-NEXT: vmv.x.s a2, v10 6485; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6486; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 6487; RV64ZVE32F-NEXT: vmv.x.s a3, v10 6488; RV64ZVE32F-NEXT: slli a2, a2, 1 6489; RV64ZVE32F-NEXT: add a2, a0, a2 6490; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6491; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6492; RV64ZVE32F-NEXT: .LBB59_4: # %else2 6493; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 6494; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 6495; RV64ZVE32F-NEXT: andi a2, a1, 4 6496; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 6497; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 6498; RV64ZVE32F-NEXT: bnez a2, .LBB59_12 6499; RV64ZVE32F-NEXT: # %bb.5: # %else4 6500; RV64ZVE32F-NEXT: andi a2, a1, 8 6501; RV64ZVE32F-NEXT: bnez a2, .LBB59_13 6502; RV64ZVE32F-NEXT: .LBB59_6: # %else6 6503; RV64ZVE32F-NEXT: andi a2, a1, 16 6504; RV64ZVE32F-NEXT: bnez a2, .LBB59_14 6505; RV64ZVE32F-NEXT: .LBB59_7: # %else8 6506; RV64ZVE32F-NEXT: andi a2, a1, 32 6507; RV64ZVE32F-NEXT: beqz a2, .LBB59_9 6508; RV64ZVE32F-NEXT: .LBB59_8: # %cond.store9 6509; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 6510; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1 6511; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6512; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6513; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 6514; RV64ZVE32F-NEXT: vmv.x.s a3, v9 6515; RV64ZVE32F-NEXT: slli a2, a2, 1 6516; RV64ZVE32F-NEXT: add a2, a0, a2 6517; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6518; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6519; RV64ZVE32F-NEXT: .LBB59_9: # %else10 6520; RV64ZVE32F-NEXT: andi a2, a1, 64 6521; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 6522; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 6523; RV64ZVE32F-NEXT: bnez a2, .LBB59_15 6524; RV64ZVE32F-NEXT: # %bb.10: # %else12 6525; RV64ZVE32F-NEXT: andi a1, a1, -128 6526; RV64ZVE32F-NEXT: bnez a1, .LBB59_16 6527; RV64ZVE32F-NEXT: .LBB59_11: # %else14 6528; RV64ZVE32F-NEXT: ret 6529; RV64ZVE32F-NEXT: .LBB59_12: # %cond.store3 6530; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6531; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6532; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 6533; RV64ZVE32F-NEXT: slli a2, a2, 1 6534; RV64ZVE32F-NEXT: vmv.x.s a3, v11 6535; RV64ZVE32F-NEXT: add a2, a0, a2 6536; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6537; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6538; RV64ZVE32F-NEXT: andi a2, a1, 8 6539; RV64ZVE32F-NEXT: beqz a2, .LBB59_6 6540; RV64ZVE32F-NEXT: .LBB59_13: # %cond.store5 6541; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 6542; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 6543; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6544; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6545; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 6546; RV64ZVE32F-NEXT: vmv.x.s a3, v9 6547; RV64ZVE32F-NEXT: slli a2, a2, 1 6548; RV64ZVE32F-NEXT: add a2, a0, a2 6549; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6550; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6551; RV64ZVE32F-NEXT: andi a2, a1, 16 6552; RV64ZVE32F-NEXT: beqz a2, .LBB59_7 6553; RV64ZVE32F-NEXT: .LBB59_14: # %cond.store7 6554; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 6555; RV64ZVE32F-NEXT: vmv.x.s a2, v10 6556; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6557; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 6558; RV64ZVE32F-NEXT: slli a2, a2, 1 6559; RV64ZVE32F-NEXT: vmv.x.s a3, v9 6560; RV64ZVE32F-NEXT: add a2, a0, a2 6561; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6562; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6563; RV64ZVE32F-NEXT: andi a2, a1, 32 6564; RV64ZVE32F-NEXT: bnez a2, .LBB59_8 6565; RV64ZVE32F-NEXT: j .LBB59_9 6566; RV64ZVE32F-NEXT: .LBB59_15: # %cond.store11 6567; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6568; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6569; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 6570; RV64ZVE32F-NEXT: slli a2, a2, 1 6571; RV64ZVE32F-NEXT: vmv.x.s a3, v10 6572; RV64ZVE32F-NEXT: add a2, a0, a2 6573; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6574; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6575; RV64ZVE32F-NEXT: andi a1, a1, -128 6576; RV64ZVE32F-NEXT: beqz a1, .LBB59_11 6577; RV64ZVE32F-NEXT: .LBB59_16: # %cond.store13 6578; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 6579; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 6580; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6581; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 6582; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 6583; RV64ZVE32F-NEXT: vmv.x.s a1, v9 6584; RV64ZVE32F-NEXT: slli a1, a1, 1 6585; RV64ZVE32F-NEXT: add a0, a0, a1 6586; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma 6587; RV64ZVE32F-NEXT: vmv.x.s a1, v8 6588; RV64ZVE32F-NEXT: fmv.h.x fa5, a1 6589; RV64ZVE32F-NEXT: fsh fa5, 0(a0) 6590; RV64ZVE32F-NEXT: ret 6591 %eidxs = sext <8 x i8> %idxs to <8 x i16> 6592 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs 6593 call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) 6594 ret void 6595} 6596 6597define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 6598; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8bf16: 6599; RV32: # %bb.0: 6600; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 6601; RV32-NEXT: vwaddu.vv v10, v9, v9 6602; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma 6603; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t 6604; RV32-NEXT: ret 6605; 6606; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8bf16: 6607; RV64V: # %bb.0: 6608; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 6609; RV64V-NEXT: vwaddu.vv v10, v9, v9 6610; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma 6611; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t 6612; RV64V-NEXT: ret 6613; 6614; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8bf16: 6615; RV64ZVE32F: # %bb.0: 6616; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 6617; RV64ZVE32F-NEXT: vmv.x.s a1, v0 6618; RV64ZVE32F-NEXT: andi a2, a1, 1 6619; RV64ZVE32F-NEXT: beqz a2, .LBB60_2 6620; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 6621; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6622; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 6623; RV64ZVE32F-NEXT: vmv.x.s a3, v8 6624; RV64ZVE32F-NEXT: andi a2, a2, 255 6625; RV64ZVE32F-NEXT: slli a2, a2, 1 6626; RV64ZVE32F-NEXT: add a2, a0, a2 6627; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6628; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6629; RV64ZVE32F-NEXT: .LBB60_2: # %else 6630; RV64ZVE32F-NEXT: andi a2, a1, 2 6631; RV64ZVE32F-NEXT: beqz a2, .LBB60_4 6632; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 6633; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 6634; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 6635; RV64ZVE32F-NEXT: vmv.x.s a2, v10 6636; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6637; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 6638; RV64ZVE32F-NEXT: vmv.x.s a3, v10 6639; RV64ZVE32F-NEXT: andi a2, a2, 255 6640; RV64ZVE32F-NEXT: slli a2, a2, 1 6641; RV64ZVE32F-NEXT: add a2, a0, a2 6642; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6643; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6644; RV64ZVE32F-NEXT: .LBB60_4: # %else2 6645; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 6646; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 6647; RV64ZVE32F-NEXT: andi a2, a1, 4 6648; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 6649; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 6650; RV64ZVE32F-NEXT: bnez a2, .LBB60_12 6651; RV64ZVE32F-NEXT: # %bb.5: # %else4 6652; RV64ZVE32F-NEXT: andi a2, a1, 8 6653; RV64ZVE32F-NEXT: bnez a2, .LBB60_13 6654; RV64ZVE32F-NEXT: .LBB60_6: # %else6 6655; RV64ZVE32F-NEXT: andi a2, a1, 16 6656; RV64ZVE32F-NEXT: bnez a2, .LBB60_14 6657; RV64ZVE32F-NEXT: .LBB60_7: # %else8 6658; RV64ZVE32F-NEXT: andi a2, a1, 32 6659; RV64ZVE32F-NEXT: beqz a2, .LBB60_9 6660; RV64ZVE32F-NEXT: .LBB60_8: # %cond.store9 6661; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 6662; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1 6663; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6664; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6665; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 6666; RV64ZVE32F-NEXT: vmv.x.s a3, v9 6667; RV64ZVE32F-NEXT: andi a2, a2, 255 6668; RV64ZVE32F-NEXT: slli a2, a2, 1 6669; RV64ZVE32F-NEXT: add a2, a0, a2 6670; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6671; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6672; RV64ZVE32F-NEXT: .LBB60_9: # %else10 6673; RV64ZVE32F-NEXT: andi a2, a1, 64 6674; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 6675; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 6676; RV64ZVE32F-NEXT: bnez a2, .LBB60_15 6677; RV64ZVE32F-NEXT: # %bb.10: # %else12 6678; RV64ZVE32F-NEXT: andi a1, a1, -128 6679; RV64ZVE32F-NEXT: bnez a1, .LBB60_16 6680; RV64ZVE32F-NEXT: .LBB60_11: # %else14 6681; RV64ZVE32F-NEXT: ret 6682; RV64ZVE32F-NEXT: .LBB60_12: # %cond.store3 6683; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6684; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6685; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 6686; RV64ZVE32F-NEXT: andi a2, a2, 255 6687; RV64ZVE32F-NEXT: vmv.x.s a3, v11 6688; RV64ZVE32F-NEXT: slli a2, a2, 1 6689; RV64ZVE32F-NEXT: add a2, a0, a2 6690; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6691; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6692; RV64ZVE32F-NEXT: andi a2, a1, 8 6693; RV64ZVE32F-NEXT: beqz a2, .LBB60_6 6694; RV64ZVE32F-NEXT: .LBB60_13: # %cond.store5 6695; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 6696; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 6697; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6698; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6699; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 6700; RV64ZVE32F-NEXT: vmv.x.s a3, v9 6701; RV64ZVE32F-NEXT: andi a2, a2, 255 6702; RV64ZVE32F-NEXT: slli a2, a2, 1 6703; RV64ZVE32F-NEXT: add a2, a0, a2 6704; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6705; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6706; RV64ZVE32F-NEXT: andi a2, a1, 16 6707; RV64ZVE32F-NEXT: beqz a2, .LBB60_7 6708; RV64ZVE32F-NEXT: .LBB60_14: # %cond.store7 6709; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 6710; RV64ZVE32F-NEXT: vmv.x.s a2, v10 6711; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6712; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 6713; RV64ZVE32F-NEXT: andi a2, a2, 255 6714; RV64ZVE32F-NEXT: vmv.x.s a3, v9 6715; RV64ZVE32F-NEXT: slli a2, a2, 1 6716; RV64ZVE32F-NEXT: add a2, a0, a2 6717; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6718; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6719; RV64ZVE32F-NEXT: andi a2, a1, 32 6720; RV64ZVE32F-NEXT: bnez a2, .LBB60_8 6721; RV64ZVE32F-NEXT: j .LBB60_9 6722; RV64ZVE32F-NEXT: .LBB60_15: # %cond.store11 6723; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6724; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6725; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 6726; RV64ZVE32F-NEXT: andi a2, a2, 255 6727; RV64ZVE32F-NEXT: vmv.x.s a3, v10 6728; RV64ZVE32F-NEXT: slli a2, a2, 1 6729; RV64ZVE32F-NEXT: add a2, a0, a2 6730; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6731; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6732; RV64ZVE32F-NEXT: andi a1, a1, -128 6733; RV64ZVE32F-NEXT: beqz a1, .LBB60_11 6734; RV64ZVE32F-NEXT: .LBB60_16: # %cond.store13 6735; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 6736; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 6737; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6738; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 6739; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 6740; RV64ZVE32F-NEXT: vmv.x.s a1, v9 6741; RV64ZVE32F-NEXT: andi a1, a1, 255 6742; RV64ZVE32F-NEXT: slli a1, a1, 1 6743; RV64ZVE32F-NEXT: add a0, a0, a1 6744; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma 6745; RV64ZVE32F-NEXT: vmv.x.s a1, v8 6746; RV64ZVE32F-NEXT: fmv.h.x fa5, a1 6747; RV64ZVE32F-NEXT: fsh fa5, 0(a0) 6748; RV64ZVE32F-NEXT: ret 6749 %eidxs = zext <8 x i8> %idxs to <8 x i16> 6750 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs 6751 call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) 6752 ret void 6753} 6754 6755define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { 6756; RV32-LABEL: mscatter_baseidx_v8bf16: 6757; RV32: # %bb.0: 6758; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 6759; RV32-NEXT: vwadd.vv v10, v9, v9 6760; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 6761; RV32-NEXT: ret 6762; 6763; RV64V-LABEL: mscatter_baseidx_v8bf16: 6764; RV64V: # %bb.0: 6765; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 6766; RV64V-NEXT: vsext.vf4 v12, v9 6767; RV64V-NEXT: vadd.vv v12, v12, v12 6768; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma 6769; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 6770; RV64V-NEXT: ret 6771; 6772; RV64ZVE32F-LABEL: mscatter_baseidx_v8bf16: 6773; RV64ZVE32F: # %bb.0: 6774; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 6775; RV64ZVE32F-NEXT: vmv.x.s a1, v0 6776; RV64ZVE32F-NEXT: andi a2, a1, 1 6777; RV64ZVE32F-NEXT: beqz a2, .LBB61_2 6778; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 6779; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 6780; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6781; RV64ZVE32F-NEXT: vmv.x.s a3, v8 6782; RV64ZVE32F-NEXT: slli a2, a2, 1 6783; RV64ZVE32F-NEXT: add a2, a0, a2 6784; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6785; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6786; RV64ZVE32F-NEXT: .LBB61_2: # %else 6787; RV64ZVE32F-NEXT: andi a2, a1, 2 6788; RV64ZVE32F-NEXT: beqz a2, .LBB61_4 6789; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 6790; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 6791; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 6792; RV64ZVE32F-NEXT: vmv.x.s a2, v10 6793; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6794; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 6795; RV64ZVE32F-NEXT: vmv.x.s a3, v10 6796; RV64ZVE32F-NEXT: slli a2, a2, 1 6797; RV64ZVE32F-NEXT: add a2, a0, a2 6798; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6799; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6800; RV64ZVE32F-NEXT: .LBB61_4: # %else2 6801; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 6802; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 6803; RV64ZVE32F-NEXT: andi a2, a1, 4 6804; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 6805; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 6806; RV64ZVE32F-NEXT: bnez a2, .LBB61_12 6807; RV64ZVE32F-NEXT: # %bb.5: # %else4 6808; RV64ZVE32F-NEXT: andi a2, a1, 8 6809; RV64ZVE32F-NEXT: bnez a2, .LBB61_13 6810; RV64ZVE32F-NEXT: .LBB61_6: # %else6 6811; RV64ZVE32F-NEXT: andi a2, a1, 16 6812; RV64ZVE32F-NEXT: bnez a2, .LBB61_14 6813; RV64ZVE32F-NEXT: .LBB61_7: # %else8 6814; RV64ZVE32F-NEXT: andi a2, a1, 32 6815; RV64ZVE32F-NEXT: beqz a2, .LBB61_9 6816; RV64ZVE32F-NEXT: .LBB61_8: # %cond.store9 6817; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 6818; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1 6819; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6820; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6821; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 6822; RV64ZVE32F-NEXT: vmv.x.s a3, v9 6823; RV64ZVE32F-NEXT: slli a2, a2, 1 6824; RV64ZVE32F-NEXT: add a2, a0, a2 6825; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6826; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6827; RV64ZVE32F-NEXT: .LBB61_9: # %else10 6828; RV64ZVE32F-NEXT: andi a2, a1, 64 6829; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 6830; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 6831; RV64ZVE32F-NEXT: bnez a2, .LBB61_15 6832; RV64ZVE32F-NEXT: # %bb.10: # %else12 6833; RV64ZVE32F-NEXT: andi a1, a1, -128 6834; RV64ZVE32F-NEXT: bnez a1, .LBB61_16 6835; RV64ZVE32F-NEXT: .LBB61_11: # %else14 6836; RV64ZVE32F-NEXT: ret 6837; RV64ZVE32F-NEXT: .LBB61_12: # %cond.store3 6838; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6839; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6840; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 6841; RV64ZVE32F-NEXT: slli a2, a2, 1 6842; RV64ZVE32F-NEXT: vmv.x.s a3, v11 6843; RV64ZVE32F-NEXT: add a2, a0, a2 6844; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6845; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6846; RV64ZVE32F-NEXT: andi a2, a1, 8 6847; RV64ZVE32F-NEXT: beqz a2, .LBB61_6 6848; RV64ZVE32F-NEXT: .LBB61_13: # %cond.store5 6849; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 6850; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 6851; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6852; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6853; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 6854; RV64ZVE32F-NEXT: vmv.x.s a3, v9 6855; RV64ZVE32F-NEXT: slli a2, a2, 1 6856; RV64ZVE32F-NEXT: add a2, a0, a2 6857; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6858; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6859; RV64ZVE32F-NEXT: andi a2, a1, 16 6860; RV64ZVE32F-NEXT: beqz a2, .LBB61_7 6861; RV64ZVE32F-NEXT: .LBB61_14: # %cond.store7 6862; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6863; RV64ZVE32F-NEXT: vmv.x.s a2, v10 6864; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 6865; RV64ZVE32F-NEXT: slli a2, a2, 1 6866; RV64ZVE32F-NEXT: vmv.x.s a3, v9 6867; RV64ZVE32F-NEXT: add a2, a0, a2 6868; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6869; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6870; RV64ZVE32F-NEXT: andi a2, a1, 32 6871; RV64ZVE32F-NEXT: bnez a2, .LBB61_8 6872; RV64ZVE32F-NEXT: j .LBB61_9 6873; RV64ZVE32F-NEXT: .LBB61_15: # %cond.store11 6874; RV64ZVE32F-NEXT: vmv.x.s a2, v9 6875; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6876; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 6877; RV64ZVE32F-NEXT: slli a2, a2, 1 6878; RV64ZVE32F-NEXT: vmv.x.s a3, v10 6879; RV64ZVE32F-NEXT: add a2, a0, a2 6880; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 6881; RV64ZVE32F-NEXT: fsh fa5, 0(a2) 6882; RV64ZVE32F-NEXT: andi a1, a1, -128 6883; RV64ZVE32F-NEXT: beqz a1, .LBB61_11 6884; RV64ZVE32F-NEXT: .LBB61_16: # %cond.store13 6885; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 6886; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 6887; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 6888; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 6889; RV64ZVE32F-NEXT: vmv.x.s a1, v9 6890; RV64ZVE32F-NEXT: slli a1, a1, 1 6891; RV64ZVE32F-NEXT: add a0, a0, a1 6892; RV64ZVE32F-NEXT: vmv.x.s a1, v8 6893; RV64ZVE32F-NEXT: fmv.h.x fa5, a1 6894; RV64ZVE32F-NEXT: fsh fa5, 0(a0) 6895; RV64ZVE32F-NEXT: ret 6896 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %idxs 6897 call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) 6898 ret void 6899} 6900 6901declare void @llvm.masked.scatter.v1f16.v1p0(<1 x half>, <1 x ptr>, i32, <1 x i1>) 6902 6903define void @mscatter_v1f16(<1 x half> %val, <1 x ptr> %ptrs, <1 x i1> %m) { 6904; RV32V-LABEL: mscatter_v1f16: 6905; RV32V: # %bb.0: 6906; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 6907; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 6908; RV32V-NEXT: ret 6909; 6910; RV64V-LABEL: mscatter_v1f16: 6911; RV64V: # %bb.0: 6912; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 6913; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 6914; RV64V-NEXT: ret 6915; 6916; RV32ZVE32F-LABEL: mscatter_v1f16: 6917; RV32ZVE32F: # %bb.0: 6918; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 6919; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t 6920; RV32ZVE32F-NEXT: ret 6921; 6922; RV64ZVE32F-ZVFH-LABEL: mscatter_v1f16: 6923; RV64ZVE32F-ZVFH: # %bb.0: 6924; RV64ZVE32F-ZVFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 6925; RV64ZVE32F-ZVFH-NEXT: vfirst.m a1, v0 6926; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB62_2 6927; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store 6928; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 6929; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) 6930; RV64ZVE32F-ZVFH-NEXT: .LBB62_2: # %else 6931; RV64ZVE32F-ZVFH-NEXT: ret 6932; 6933; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v1f16: 6934; RV64ZVE32F-ZVFHMIN: # %bb.0: 6935; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 6936; RV64ZVE32F-ZVFHMIN-NEXT: vfirst.m a1, v0 6937; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB62_2 6938; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store 6939; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 6940; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 6941; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1 6942; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) 6943; RV64ZVE32F-ZVFHMIN-NEXT: .LBB62_2: # %else 6944; RV64ZVE32F-ZVFHMIN-NEXT: ret 6945 call void @llvm.masked.scatter.v1f16.v1p0(<1 x half> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m) 6946 ret void 6947} 6948 6949declare void @llvm.masked.scatter.v2f16.v2p0(<2 x half>, <2 x ptr>, i32, <2 x i1>) 6950 6951define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) { 6952; RV32V-LABEL: mscatter_v2f16: 6953; RV32V: # %bb.0: 6954; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 6955; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 6956; RV32V-NEXT: ret 6957; 6958; RV64V-LABEL: mscatter_v2f16: 6959; RV64V: # %bb.0: 6960; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 6961; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 6962; RV64V-NEXT: ret 6963; 6964; RV32ZVE32F-LABEL: mscatter_v2f16: 6965; RV32ZVE32F: # %bb.0: 6966; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 6967; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t 6968; RV32ZVE32F-NEXT: ret 6969; 6970; RV64ZVE32F-ZVFH-LABEL: mscatter_v2f16: 6971; RV64ZVE32F-ZVFH: # %bb.0: 6972; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 6973; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v0 6974; RV64ZVE32F-ZVFH-NEXT: andi a3, a2, 1 6975; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB63_3 6976; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else 6977; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2 6978; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB63_4 6979; RV64ZVE32F-ZVFH-NEXT: .LBB63_2: # %else2 6980; RV64ZVE32F-ZVFH-NEXT: ret 6981; RV64ZVE32F-ZVFH-NEXT: .LBB63_3: # %cond.store 6982; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 6983; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) 6984; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2 6985; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB63_2 6986; RV64ZVE32F-ZVFH-NEXT: .LBB63_4: # %cond.store1 6987; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 6988; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 6989; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1) 6990; RV64ZVE32F-ZVFH-NEXT: ret 6991; 6992; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v2f16: 6993; RV64ZVE32F-ZVFHMIN: # %bb.0: 6994; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 6995; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v0 6996; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a2, 1 6997; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB63_3 6998; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else 6999; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2 7000; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB63_4 7001; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_2: # %else2 7002; RV64ZVE32F-ZVFHMIN-NEXT: ret 7003; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_3: # %cond.store 7004; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 7005; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8 7006; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 7007; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) 7008; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2 7009; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB63_2 7010; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_4: # %cond.store1 7011; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 7012; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 7013; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8 7014; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 7015; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1) 7016; RV64ZVE32F-ZVFHMIN-NEXT: ret 7017 call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m) 7018 ret void 7019} 7020 7021declare void @llvm.masked.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, i32, <4 x i1>) 7022 7023define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) { 7024; RV32-LABEL: mscatter_v4f16: 7025; RV32: # %bb.0: 7026; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 7027; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 7028; RV32-NEXT: ret 7029; 7030; RV64V-LABEL: mscatter_v4f16: 7031; RV64V: # %bb.0: 7032; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 7033; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t 7034; RV64V-NEXT: ret 7035; 7036; RV64ZVE32F-ZVFH-LABEL: mscatter_v4f16: 7037; RV64ZVE32F-ZVFH: # %bb.0: 7038; RV64ZVE32F-ZVFH-NEXT: ld a4, 8(a0) 7039; RV64ZVE32F-ZVFH-NEXT: ld a2, 16(a0) 7040; RV64ZVE32F-ZVFH-NEXT: ld a1, 24(a0) 7041; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7042; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a3, v0 7043; RV64ZVE32F-ZVFH-NEXT: andi a5, a3, 1 7044; RV64ZVE32F-ZVFH-NEXT: bnez a5, .LBB64_5 7045; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else 7046; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 2 7047; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB64_6 7048; RV64ZVE32F-ZVFH-NEXT: .LBB64_2: # %else2 7049; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 4 7050; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB64_7 7051; RV64ZVE32F-ZVFH-NEXT: .LBB64_3: # %else4 7052; RV64ZVE32F-ZVFH-NEXT: andi a3, a3, 8 7053; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB64_8 7054; RV64ZVE32F-ZVFH-NEXT: .LBB64_4: # %else6 7055; RV64ZVE32F-ZVFH-NEXT: ret 7056; RV64ZVE32F-ZVFH-NEXT: .LBB64_5: # %cond.store 7057; RV64ZVE32F-ZVFH-NEXT: ld a0, 0(a0) 7058; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 7059; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) 7060; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 2 7061; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB64_2 7062; RV64ZVE32F-ZVFH-NEXT: .LBB64_6: # %cond.store1 7063; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 7064; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 1 7065; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a4) 7066; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 4 7067; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB64_3 7068; RV64ZVE32F-ZVFH-NEXT: .LBB64_7: # %cond.store3 7069; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 7070; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 2 7071; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) 7072; RV64ZVE32F-ZVFH-NEXT: andi a3, a3, 8 7073; RV64ZVE32F-ZVFH-NEXT: beqz a3, .LBB64_4 7074; RV64ZVE32F-ZVFH-NEXT: .LBB64_8: # %cond.store5 7075; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 7076; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 3 7077; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1) 7078; RV64ZVE32F-ZVFH-NEXT: ret 7079; 7080; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v4f16: 7081; RV64ZVE32F-ZVFHMIN: # %bb.0: 7082; RV64ZVE32F-ZVFHMIN-NEXT: ld a4, 8(a0) 7083; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 16(a0) 7084; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 24(a0) 7085; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7086; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v0 7087; RV64ZVE32F-ZVFHMIN-NEXT: andi a5, a3, 1 7088; RV64ZVE32F-ZVFHMIN-NEXT: bnez a5, .LBB64_5 7089; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else 7090; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 2 7091; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB64_6 7092; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_2: # %else2 7093; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 4 7094; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB64_7 7095; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_3: # %else4 7096; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a3, 8 7097; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB64_8 7098; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_4: # %else6 7099; RV64ZVE32F-ZVFHMIN-NEXT: ret 7100; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_5: # %cond.store 7101; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 0(a0) 7102; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 7103; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a5, v8 7104; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a5 7105; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) 7106; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 2 7107; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB64_2 7108; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_6: # %cond.store1 7109; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 7110; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1 7111; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9 7112; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 7113; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a4) 7114; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 4 7115; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB64_3 7116; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_7: # %cond.store3 7117; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 7118; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2 7119; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9 7120; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 7121; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 7122; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a3, 8 7123; RV64ZVE32F-ZVFHMIN-NEXT: beqz a3, .LBB64_4 7124; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_8: # %cond.store5 7125; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 7126; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3 7127; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8 7128; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 7129; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1) 7130; RV64ZVE32F-ZVFHMIN-NEXT: ret 7131 call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m) 7132 ret void 7133} 7134 7135define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) { 7136; RV32-LABEL: mscatter_truemask_v4f16: 7137; RV32: # %bb.0: 7138; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 7139; RV32-NEXT: vsoxei32.v v8, (zero), v9 7140; RV32-NEXT: ret 7141; 7142; RV64V-LABEL: mscatter_truemask_v4f16: 7143; RV64V: # %bb.0: 7144; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 7145; RV64V-NEXT: vsoxei64.v v8, (zero), v10 7146; RV64V-NEXT: ret 7147; 7148; RV64ZVE32F-ZVFH-LABEL: mscatter_truemask_v4f16: 7149; RV64ZVE32F-ZVFH: # %bb.0: 7150; RV64ZVE32F-ZVFH-NEXT: ld a1, 0(a0) 7151; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0) 7152; RV64ZVE32F-ZVFH-NEXT: ld a3, 16(a0) 7153; RV64ZVE32F-ZVFH-NEXT: ld a0, 24(a0) 7154; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 7155; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 1 7156; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1) 7157; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) 7158; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 2 7159; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 3 7160; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a3) 7161; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) 7162; RV64ZVE32F-ZVFH-NEXT: ret 7163; 7164; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_truemask_v4f16: 7165; RV64ZVE32F-ZVFHMIN: # %bb.0: 7166; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 0(a0) 7167; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0) 7168; RV64ZVE32F-ZVFHMIN-NEXT: ld a3, 16(a0) 7169; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 24(a0) 7170; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 7171; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a4, v8 7172; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1 7173; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a4 7174; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a4, v9 7175; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2 7176; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3 7177; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1) 7178; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9 7179; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a4 7180; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a4, v8 7181; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 7182; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1 7183; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a3) 7184; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a4 7185; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) 7186; RV64ZVE32F-ZVFHMIN-NEXT: ret 7187 call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1)) 7188 ret void 7189} 7190 7191define void @mscatter_falsemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) { 7192; CHECK-LABEL: mscatter_falsemask_v4f16: 7193; CHECK: # %bb.0: 7194; CHECK-NEXT: ret 7195 call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer) 7196 ret void 7197} 7198 7199declare void @llvm.masked.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, i32, <8 x i1>) 7200 7201define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) { 7202; RV32-LABEL: mscatter_v8f16: 7203; RV32: # %bb.0: 7204; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 7205; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 7206; RV32-NEXT: ret 7207; 7208; RV64V-LABEL: mscatter_v8f16: 7209; RV64V: # %bb.0: 7210; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 7211; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t 7212; RV64V-NEXT: ret 7213; 7214; RV64ZVE32F-ZVFH-LABEL: mscatter_v8f16: 7215; RV64ZVE32F-ZVFH: # %bb.0: 7216; RV64ZVE32F-ZVFH-NEXT: ld a3, 40(a0) 7217; RV64ZVE32F-ZVFH-NEXT: ld a2, 48(a0) 7218; RV64ZVE32F-ZVFH-NEXT: ld a1, 56(a0) 7219; RV64ZVE32F-ZVFH-NEXT: ld t0, 8(a0) 7220; RV64ZVE32F-ZVFH-NEXT: ld a7, 16(a0) 7221; RV64ZVE32F-ZVFH-NEXT: ld a6, 24(a0) 7222; RV64ZVE32F-ZVFH-NEXT: ld a5, 32(a0) 7223; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7224; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a4, v0 7225; RV64ZVE32F-ZVFH-NEXT: andi t1, a4, 1 7226; RV64ZVE32F-ZVFH-NEXT: bnez t1, .LBB67_9 7227; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else 7228; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 2 7229; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_10 7230; RV64ZVE32F-ZVFH-NEXT: .LBB67_2: # %else2 7231; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 4 7232; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_11 7233; RV64ZVE32F-ZVFH-NEXT: .LBB67_3: # %else4 7234; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 8 7235; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_12 7236; RV64ZVE32F-ZVFH-NEXT: .LBB67_4: # %else6 7237; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 16 7238; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_13 7239; RV64ZVE32F-ZVFH-NEXT: .LBB67_5: # %else8 7240; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 32 7241; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_14 7242; RV64ZVE32F-ZVFH-NEXT: .LBB67_6: # %else10 7243; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 64 7244; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_15 7245; RV64ZVE32F-ZVFH-NEXT: .LBB67_7: # %else12 7246; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, -128 7247; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_16 7248; RV64ZVE32F-ZVFH-NEXT: .LBB67_8: # %else14 7249; RV64ZVE32F-ZVFH-NEXT: ret 7250; RV64ZVE32F-ZVFH-NEXT: .LBB67_9: # %cond.store 7251; RV64ZVE32F-ZVFH-NEXT: ld a0, 0(a0) 7252; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7253; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) 7254; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 2 7255; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_2 7256; RV64ZVE32F-ZVFH-NEXT: .LBB67_10: # %cond.store1 7257; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7258; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 1 7259; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (t0) 7260; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 4 7261; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_3 7262; RV64ZVE32F-ZVFH-NEXT: .LBB67_11: # %cond.store3 7263; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7264; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 2 7265; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a7) 7266; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 8 7267; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_4 7268; RV64ZVE32F-ZVFH-NEXT: .LBB67_12: # %cond.store5 7269; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7270; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3 7271; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a6) 7272; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 16 7273; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_5 7274; RV64ZVE32F-ZVFH-NEXT: .LBB67_13: # %cond.store7 7275; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7276; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4 7277; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a5) 7278; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 32 7279; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_6 7280; RV64ZVE32F-ZVFH-NEXT: .LBB67_14: # %cond.store9 7281; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7282; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5 7283; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a3) 7284; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 64 7285; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_7 7286; RV64ZVE32F-ZVFH-NEXT: .LBB67_15: # %cond.store11 7287; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7288; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 6 7289; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) 7290; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, -128 7291; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_8 7292; RV64ZVE32F-ZVFH-NEXT: .LBB67_16: # %cond.store13 7293; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7294; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7 7295; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1) 7296; RV64ZVE32F-ZVFH-NEXT: ret 7297; 7298; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v8f16: 7299; RV64ZVE32F-ZVFHMIN: # %bb.0: 7300; RV64ZVE32F-ZVFHMIN-NEXT: ld a3, 40(a0) 7301; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 48(a0) 7302; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 56(a0) 7303; RV64ZVE32F-ZVFHMIN-NEXT: ld t0, 8(a0) 7304; RV64ZVE32F-ZVFHMIN-NEXT: ld a7, 16(a0) 7305; RV64ZVE32F-ZVFHMIN-NEXT: ld a6, 24(a0) 7306; RV64ZVE32F-ZVFHMIN-NEXT: ld a5, 32(a0) 7307; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7308; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a4, v0 7309; RV64ZVE32F-ZVFHMIN-NEXT: andi t1, a4, 1 7310; RV64ZVE32F-ZVFHMIN-NEXT: bnez t1, .LBB67_9 7311; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else 7312; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 2 7313; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_10 7314; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_2: # %else2 7315; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 4 7316; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_11 7317; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_3: # %else4 7318; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 8 7319; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_12 7320; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_4: # %else6 7321; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 16 7322; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_13 7323; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_5: # %else8 7324; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 32 7325; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_14 7326; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_6: # %else10 7327; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 64 7328; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_15 7329; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_7: # %else12 7330; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, -128 7331; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_16 7332; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_8: # %else14 7333; RV64ZVE32F-ZVFHMIN-NEXT: ret 7334; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_9: # %cond.store 7335; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 0(a0) 7336; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 7337; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s t1, v8 7338; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, t1 7339; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) 7340; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 2 7341; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_2 7342; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_10: # %cond.store1 7343; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7344; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1 7345; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9 7346; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 7347; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(t0) 7348; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 4 7349; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_3 7350; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_11: # %cond.store3 7351; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7352; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2 7353; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9 7354; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 7355; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a7) 7356; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 8 7357; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_4 7358; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_12: # %cond.store5 7359; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7360; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3 7361; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9 7362; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 7363; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a6) 7364; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 16 7365; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_5 7366; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_13: # %cond.store7 7367; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7368; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4 7369; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9 7370; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 7371; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a5) 7372; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 32 7373; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_6 7374; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_14: # %cond.store9 7375; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7376; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5 7377; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9 7378; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 7379; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a3) 7380; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 64 7381; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_7 7382; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_15: # %cond.store11 7383; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7384; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6 7385; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9 7386; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 7387; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 7388; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, -128 7389; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_8 7390; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_16: # %cond.store13 7391; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7392; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7 7393; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8 7394; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0 7395; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1) 7396; RV64ZVE32F-ZVFHMIN-NEXT: ret 7397 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) 7398 ret void 7399} 7400 7401define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 7402; RV32-LABEL: mscatter_baseidx_v8i8_v8f16: 7403; RV32: # %bb.0: 7404; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 7405; RV32-NEXT: vsext.vf4 v10, v9 7406; RV32-NEXT: vadd.vv v10, v10, v10 7407; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma 7408; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 7409; RV32-NEXT: ret 7410; 7411; RV64V-LABEL: mscatter_baseidx_v8i8_v8f16: 7412; RV64V: # %bb.0: 7413; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 7414; RV64V-NEXT: vsext.vf8 v12, v9 7415; RV64V-NEXT: vadd.vv v12, v12, v12 7416; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma 7417; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 7418; RV64V-NEXT: ret 7419; 7420; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_v8i8_v8f16: 7421; RV64ZVE32F-ZVFH: # %bb.0: 7422; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7423; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 7424; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 7425; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_2 7426; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store 7427; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 7428; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 7429; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 7430; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7431; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a2) 7432; RV64ZVE32F-ZVFH-NEXT: .LBB68_2: # %else 7433; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 7434; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_4 7435; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.store1 7436; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7437; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1 7438; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 7439; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 7440; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 7441; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7442; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1 7443; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2) 7444; RV64ZVE32F-ZVFH-NEXT: .LBB68_4: # %else2 7445; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 7446; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4 7447; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 7448; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 7449; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2 7450; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_12 7451; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4 7452; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 7453; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_13 7454; RV64ZVE32F-ZVFH-NEXT: .LBB68_6: # %else6 7455; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 7456; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_14 7457; RV64ZVE32F-ZVFH-NEXT: .LBB68_7: # %else8 7458; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 7459; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_9 7460; RV64ZVE32F-ZVFH-NEXT: .LBB68_8: # %cond.store9 7461; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7462; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1 7463; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 7464; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 7465; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 7466; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7467; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5 7468; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) 7469; RV64ZVE32F-ZVFH-NEXT: .LBB68_9: # %else10 7470; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 7471; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 7472; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2 7473; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_15 7474; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12 7475; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 7476; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB68_16 7477; RV64ZVE32F-ZVFH-NEXT: .LBB68_11: # %else14 7478; RV64ZVE32F-ZVFH-NEXT: ret 7479; RV64ZVE32F-ZVFH-NEXT: .LBB68_12: # %cond.store3 7480; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 7481; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 7482; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 7483; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7484; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2 7485; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2) 7486; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 7487; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_6 7488; RV64ZVE32F-ZVFH-NEXT: .LBB68_13: # %cond.store5 7489; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7490; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1 7491; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 7492; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 7493; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 7494; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7495; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3 7496; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) 7497; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 7498; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_7 7499; RV64ZVE32F-ZVFH-NEXT: .LBB68_14: # %cond.store7 7500; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7501; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 7502; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 7503; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 7504; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7505; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4 7506; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) 7507; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 7508; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_8 7509; RV64ZVE32F-ZVFH-NEXT: j .LBB68_9 7510; RV64ZVE32F-ZVFH-NEXT: .LBB68_15: # %cond.store11 7511; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 7512; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 7513; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 7514; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7515; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6 7516; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2) 7517; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 7518; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB68_11 7519; RV64ZVE32F-ZVFH-NEXT: .LBB68_16: # %cond.store13 7520; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7521; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1 7522; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v9 7523; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 7524; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 7525; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7526; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7 7527; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) 7528; RV64ZVE32F-ZVFH-NEXT: ret 7529; 7530; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_v8i8_v8f16: 7531; RV64ZVE32F-ZVFHMIN: # %bb.0: 7532; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7533; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 7534; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 7535; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_2 7536; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store 7537; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 7538; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 7539; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8 7540; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 7541; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 7542; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 7543; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 7544; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_2: # %else 7545; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 7546; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_4 7547; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.store1 7548; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7549; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1 7550; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 7551; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7552; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1 7553; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10 7554; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 7555; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 7556; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 7557; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 7558; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_4: # %else2 7559; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 7560; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4 7561; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 7562; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 7563; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2 7564; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_12 7565; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4 7566; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 7567; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_13 7568; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_6: # %else6 7569; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 7570; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_14 7571; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_7: # %else8 7572; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 7573; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_9 7574; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_8: # %cond.store9 7575; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7576; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1 7577; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 7578; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7579; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5 7580; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 7581; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 7582; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 7583; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 7584; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 7585; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_9: # %else10 7586; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 7587; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 7588; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2 7589; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_15 7590; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12 7591; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 7592; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB68_16 7593; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_11: # %else14 7594; RV64ZVE32F-ZVFHMIN-NEXT: ret 7595; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_12: # %cond.store3 7596; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 7597; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7598; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2 7599; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 7600; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11 7601; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 7602; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 7603; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 7604; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 7605; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_6 7606; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_13: # %cond.store5 7607; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7608; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1 7609; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 7610; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7611; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3 7612; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 7613; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 7614; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 7615; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 7616; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 7617; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 7618; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_7 7619; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_14: # %cond.store7 7620; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7621; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 7622; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7623; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4 7624; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 7625; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 7626; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 7627; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 7628; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 7629; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 7630; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_8 7631; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB68_9 7632; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_15: # %cond.store11 7633; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 7634; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7635; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 6 7636; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 7637; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10 7638; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 7639; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 7640; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 7641; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 7642; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB68_11 7643; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_16: # %cond.store13 7644; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7645; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1 7646; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7647; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7 7648; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 7649; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9 7650; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 7651; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 7652; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 7653; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 7654; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1 7655; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) 7656; RV64ZVE32F-ZVFHMIN-NEXT: ret 7657 %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs 7658 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) 7659 ret void 7660} 7661 7662define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 7663; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f16: 7664; RV32: # %bb.0: 7665; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 7666; RV32-NEXT: vsext.vf4 v10, v9 7667; RV32-NEXT: vadd.vv v10, v10, v10 7668; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma 7669; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 7670; RV32-NEXT: ret 7671; 7672; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8f16: 7673; RV64V: # %bb.0: 7674; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 7675; RV64V-NEXT: vsext.vf8 v12, v9 7676; RV64V-NEXT: vadd.vv v12, v12, v12 7677; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma 7678; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 7679; RV64V-NEXT: ret 7680; 7681; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_sext_v8i8_v8f16: 7682; RV64ZVE32F-ZVFH: # %bb.0: 7683; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7684; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 7685; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 7686; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_2 7687; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store 7688; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 7689; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 7690; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 7691; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7692; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a2) 7693; RV64ZVE32F-ZVFH-NEXT: .LBB69_2: # %else 7694; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 7695; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_4 7696; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.store1 7697; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7698; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1 7699; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 7700; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 7701; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 7702; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7703; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1 7704; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2) 7705; RV64ZVE32F-ZVFH-NEXT: .LBB69_4: # %else2 7706; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 7707; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4 7708; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 7709; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 7710; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2 7711; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_12 7712; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4 7713; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 7714; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_13 7715; RV64ZVE32F-ZVFH-NEXT: .LBB69_6: # %else6 7716; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 7717; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_14 7718; RV64ZVE32F-ZVFH-NEXT: .LBB69_7: # %else8 7719; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 7720; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_9 7721; RV64ZVE32F-ZVFH-NEXT: .LBB69_8: # %cond.store9 7722; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7723; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1 7724; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 7725; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 7726; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 7727; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7728; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5 7729; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) 7730; RV64ZVE32F-ZVFH-NEXT: .LBB69_9: # %else10 7731; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 7732; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 7733; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2 7734; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_15 7735; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12 7736; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 7737; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB69_16 7738; RV64ZVE32F-ZVFH-NEXT: .LBB69_11: # %else14 7739; RV64ZVE32F-ZVFH-NEXT: ret 7740; RV64ZVE32F-ZVFH-NEXT: .LBB69_12: # %cond.store3 7741; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 7742; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 7743; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 7744; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7745; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2 7746; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2) 7747; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 7748; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_6 7749; RV64ZVE32F-ZVFH-NEXT: .LBB69_13: # %cond.store5 7750; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7751; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1 7752; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 7753; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 7754; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 7755; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7756; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3 7757; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) 7758; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 7759; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_7 7760; RV64ZVE32F-ZVFH-NEXT: .LBB69_14: # %cond.store7 7761; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7762; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 7763; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 7764; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 7765; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7766; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4 7767; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) 7768; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 7769; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_8 7770; RV64ZVE32F-ZVFH-NEXT: j .LBB69_9 7771; RV64ZVE32F-ZVFH-NEXT: .LBB69_15: # %cond.store11 7772; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 7773; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 7774; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 7775; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7776; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6 7777; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2) 7778; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 7779; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB69_11 7780; RV64ZVE32F-ZVFH-NEXT: .LBB69_16: # %cond.store13 7781; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7782; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1 7783; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v9 7784; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 7785; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 7786; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7787; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7 7788; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) 7789; RV64ZVE32F-ZVFH-NEXT: ret 7790; 7791; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_sext_v8i8_v8f16: 7792; RV64ZVE32F-ZVFHMIN: # %bb.0: 7793; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7794; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 7795; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 7796; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_2 7797; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store 7798; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 7799; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 7800; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8 7801; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 7802; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 7803; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 7804; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 7805; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_2: # %else 7806; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 7807; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_4 7808; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.store1 7809; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7810; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1 7811; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 7812; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7813; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1 7814; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10 7815; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 7816; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 7817; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 7818; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 7819; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_4: # %else2 7820; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 7821; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4 7822; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 7823; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 7824; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2 7825; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_12 7826; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4 7827; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 7828; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_13 7829; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_6: # %else6 7830; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 7831; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_14 7832; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_7: # %else8 7833; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 7834; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_9 7835; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_8: # %cond.store9 7836; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7837; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1 7838; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 7839; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7840; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5 7841; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 7842; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 7843; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 7844; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 7845; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 7846; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_9: # %else10 7847; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 7848; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 7849; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2 7850; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_15 7851; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12 7852; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 7853; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB69_16 7854; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_11: # %else14 7855; RV64ZVE32F-ZVFHMIN-NEXT: ret 7856; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_12: # %cond.store3 7857; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 7858; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7859; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2 7860; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 7861; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11 7862; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 7863; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 7864; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 7865; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 7866; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_6 7867; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_13: # %cond.store5 7868; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7869; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1 7870; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 7871; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7872; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3 7873; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 7874; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 7875; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 7876; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 7877; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 7878; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 7879; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_7 7880; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_14: # %cond.store7 7881; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7882; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 7883; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7884; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4 7885; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 7886; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 7887; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 7888; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 7889; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 7890; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 7891; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_8 7892; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB69_9 7893; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_15: # %cond.store11 7894; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 7895; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7896; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 6 7897; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 7898; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10 7899; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 7900; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 7901; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 7902; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 7903; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB69_11 7904; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_16: # %cond.store13 7905; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7906; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1 7907; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7908; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7 7909; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 7910; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9 7911; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 7912; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 7913; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 7914; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 7915; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1 7916; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) 7917; RV64ZVE32F-ZVFHMIN-NEXT: ret 7918 %eidxs = sext <8 x i8> %idxs to <8 x i16> 7919 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs 7920 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) 7921 ret void 7922} 7923 7924define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 7925; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f16: 7926; RV32: # %bb.0: 7927; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 7928; RV32-NEXT: vwaddu.vv v10, v9, v9 7929; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma 7930; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t 7931; RV32-NEXT: ret 7932; 7933; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8f16: 7934; RV64V: # %bb.0: 7935; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 7936; RV64V-NEXT: vwaddu.vv v10, v9, v9 7937; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma 7938; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t 7939; RV64V-NEXT: ret 7940; 7941; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_zext_v8i8_v8f16: 7942; RV64ZVE32F-ZVFH: # %bb.0: 7943; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7944; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 7945; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 7946; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_2 7947; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store 7948; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 7949; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 7950; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 7951; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 7952; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7953; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a2) 7954; RV64ZVE32F-ZVFH-NEXT: .LBB70_2: # %else 7955; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 7956; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_4 7957; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.store1 7958; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7959; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1 7960; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 7961; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 7962; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 7963; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 7964; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7965; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1 7966; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2) 7967; RV64ZVE32F-ZVFH-NEXT: .LBB70_4: # %else2 7968; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 7969; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4 7970; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 7971; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 7972; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2 7973; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_12 7974; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4 7975; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 7976; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_13 7977; RV64ZVE32F-ZVFH-NEXT: .LBB70_6: # %else6 7978; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 7979; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_14 7980; RV64ZVE32F-ZVFH-NEXT: .LBB70_7: # %else8 7981; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 7982; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_9 7983; RV64ZVE32F-ZVFH-NEXT: .LBB70_8: # %cond.store9 7984; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7985; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1 7986; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 7987; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 7988; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 7989; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 7990; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 7991; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5 7992; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) 7993; RV64ZVE32F-ZVFH-NEXT: .LBB70_9: # %else10 7994; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 7995; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 7996; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2 7997; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_15 7998; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12 7999; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 8000; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB70_16 8001; RV64ZVE32F-ZVFH-NEXT: .LBB70_11: # %else14 8002; RV64ZVE32F-ZVFH-NEXT: ret 8003; RV64ZVE32F-ZVFH-NEXT: .LBB70_12: # %cond.store3 8004; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 8005; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 8006; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8007; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8008; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8009; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2 8010; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2) 8011; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 8012; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_6 8013; RV64ZVE32F-ZVFH-NEXT: .LBB70_13: # %cond.store5 8014; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8015; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1 8016; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 8017; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 8018; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8019; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8020; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8021; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3 8022; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) 8023; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 8024; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_7 8025; RV64ZVE32F-ZVFH-NEXT: .LBB70_14: # %cond.store7 8026; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8027; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 8028; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 8029; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8030; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8031; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8032; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4 8033; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) 8034; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 8035; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_8 8036; RV64ZVE32F-ZVFH-NEXT: j .LBB70_9 8037; RV64ZVE32F-ZVFH-NEXT: .LBB70_15: # %cond.store11 8038; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 8039; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 8040; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8041; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8042; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8043; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6 8044; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2) 8045; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 8046; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB70_11 8047; RV64ZVE32F-ZVFH-NEXT: .LBB70_16: # %cond.store13 8048; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8049; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1 8050; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v9 8051; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 255 8052; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 8053; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 8054; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8055; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7 8056; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) 8057; RV64ZVE32F-ZVFH-NEXT: ret 8058; 8059; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_zext_v8i8_v8f16: 8060; RV64ZVE32F-ZVFHMIN: # %bb.0: 8061; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8062; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 8063; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 8064; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_2 8065; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store 8066; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 8067; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 8068; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8 8069; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 8070; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8071; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8072; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 8073; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 8074; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_2: # %else 8075; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 8076; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_4 8077; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.store1 8078; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8079; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1 8080; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 8081; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8082; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1 8083; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10 8084; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 8085; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8086; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8087; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 8088; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 8089; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_4: # %else2 8090; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 8091; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4 8092; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 8093; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 8094; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2 8095; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_12 8096; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4 8097; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 8098; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_13 8099; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_6: # %else6 8100; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 8101; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_14 8102; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_7: # %else8 8103; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 8104; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_9 8105; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_8: # %cond.store9 8106; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8107; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1 8108; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 8109; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8110; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5 8111; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 8112; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 8113; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8114; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8115; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 8116; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 8117; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_9: # %else10 8118; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 8119; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 8120; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2 8121; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_15 8122; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12 8123; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 8124; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_16 8125; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_11: # %else14 8126; RV64ZVE32F-ZVFHMIN-NEXT: ret 8127; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_12: # %cond.store3 8128; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 8129; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8130; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2 8131; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 8132; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11 8133; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8134; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8135; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 8136; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 8137; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 8138; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_6 8139; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_13: # %cond.store5 8140; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8141; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1 8142; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 8143; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8144; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3 8145; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 8146; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 8147; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8148; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8149; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 8150; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 8151; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 8152; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_7 8153; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_14: # %cond.store7 8154; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8155; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 8156; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8157; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4 8158; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 8159; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 8160; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8161; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8162; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 8163; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 8164; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 8165; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_8 8166; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_9 8167; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_15: # %cond.store11 8168; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 8169; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8170; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 6 8171; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 8172; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10 8173; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8174; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8175; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 8176; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 8177; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 8178; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB70_11 8179; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_16: # %cond.store13 8180; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8181; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1 8182; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8183; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7 8184; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 8185; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9 8186; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 255 8187; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 8188; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 8189; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 8190; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 8191; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1 8192; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) 8193; RV64ZVE32F-ZVFHMIN-NEXT: ret 8194 %eidxs = zext <8 x i8> %idxs to <8 x i16> 8195 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs 8196 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) 8197 ret void 8198} 8199 8200define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { 8201; RV32-LABEL: mscatter_baseidx_v8f16: 8202; RV32: # %bb.0: 8203; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 8204; RV32-NEXT: vwadd.vv v10, v9, v9 8205; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 8206; RV32-NEXT: ret 8207; 8208; RV64V-LABEL: mscatter_baseidx_v8f16: 8209; RV64V: # %bb.0: 8210; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 8211; RV64V-NEXT: vsext.vf4 v12, v9 8212; RV64V-NEXT: vadd.vv v12, v12, v12 8213; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma 8214; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 8215; RV64V-NEXT: ret 8216; 8217; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_v8f16: 8218; RV64ZVE32F-ZVFH: # %bb.0: 8219; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8220; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 8221; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 8222; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_2 8223; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store 8224; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8225; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 8226; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8227; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8228; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a2) 8229; RV64ZVE32F-ZVFH-NEXT: .LBB71_2: # %else 8230; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 8231; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_4 8232; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.store1 8233; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 8234; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1 8235; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 8236; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8237; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8238; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8239; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1 8240; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2) 8241; RV64ZVE32F-ZVFH-NEXT: .LBB71_4: # %else2 8242; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, ta, ma 8243; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4 8244; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 8245; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 8246; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2 8247; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_12 8248; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4 8249; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 8250; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_13 8251; RV64ZVE32F-ZVFH-NEXT: .LBB71_6: # %else6 8252; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 8253; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_14 8254; RV64ZVE32F-ZVFH-NEXT: .LBB71_7: # %else8 8255; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 8256; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_9 8257; RV64ZVE32F-ZVFH-NEXT: .LBB71_8: # %cond.store9 8258; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 8259; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1 8260; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 8261; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8262; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8263; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8264; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5 8265; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) 8266; RV64ZVE32F-ZVFH-NEXT: .LBB71_9: # %else10 8267; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 8268; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 8269; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2 8270; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_15 8271; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12 8272; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 8273; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB71_16 8274; RV64ZVE32F-ZVFH-NEXT: .LBB71_11: # %else14 8275; RV64ZVE32F-ZVFH-NEXT: ret 8276; RV64ZVE32F-ZVFH-NEXT: .LBB71_12: # %cond.store3 8277; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 8278; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8279; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8280; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8281; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2 8282; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2) 8283; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 8284; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_6 8285; RV64ZVE32F-ZVFH-NEXT: .LBB71_13: # %cond.store5 8286; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 8287; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1 8288; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 8289; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8290; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8291; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8292; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3 8293; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) 8294; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 8295; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_7 8296; RV64ZVE32F-ZVFH-NEXT: .LBB71_14: # %cond.store7 8297; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8298; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 8299; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8300; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8301; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4 8302; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) 8303; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 8304; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_8 8305; RV64ZVE32F-ZVFH-NEXT: j .LBB71_9 8306; RV64ZVE32F-ZVFH-NEXT: .LBB71_15: # %cond.store11 8307; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9 8308; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8309; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8310; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8311; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6 8312; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2) 8313; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 8314; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB71_11 8315; RV64ZVE32F-ZVFH-NEXT: .LBB71_16: # %cond.store13 8316; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 8317; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1 8318; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v9 8319; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 8320; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 8321; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8322; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7 8323; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0) 8324; RV64ZVE32F-ZVFH-NEXT: ret 8325; 8326; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_v8f16: 8327; RV64ZVE32F-ZVFHMIN: # %bb.0: 8328; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8329; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 8330; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 8331; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_2 8332; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store 8333; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 8334; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 8335; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8 8336; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8337; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8338; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 8339; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 8340; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_2: # %else 8341; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 8342; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_4 8343; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.store1 8344; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 8345; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1 8346; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 8347; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8348; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1 8349; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10 8350; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8351; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8352; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 8353; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 8354; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_4: # %else2 8355; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, ta, ma 8356; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4 8357; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 8358; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 8359; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2 8360; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_12 8361; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4 8362; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 8363; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_13 8364; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_6: # %else6 8365; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 8366; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_14 8367; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_7: # %else8 8368; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 8369; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_9 8370; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_8: # %cond.store9 8371; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 8372; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1 8373; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 8374; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8375; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5 8376; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 8377; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8378; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8379; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 8380; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 8381; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_9: # %else10 8382; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 8383; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 8384; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2 8385; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_15 8386; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12 8387; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 8388; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB71_16 8389; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_11: # %else14 8390; RV64ZVE32F-ZVFHMIN-NEXT: ret 8391; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_12: # %cond.store3 8392; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 8393; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8394; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2 8395; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8396; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11 8397; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8398; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 8399; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 8400; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 8401; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_6 8402; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_13: # %cond.store5 8403; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 8404; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1 8405; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 8406; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8407; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3 8408; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 8409; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8410; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8411; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 8412; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 8413; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 8414; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_7 8415; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_14: # %cond.store7 8416; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8417; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 8418; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4 8419; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8420; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9 8421; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8422; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 8423; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 8424; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 8425; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_8 8426; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB71_9 8427; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_15: # %cond.store11 8428; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9 8429; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8430; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 6 8431; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8432; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10 8433; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8434; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 8435; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) 8436; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 8437; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB71_11 8438; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_16: # %cond.store13 8439; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 8440; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1 8441; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 8442; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7 8443; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9 8444; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 8445; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 8446; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 8447; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1 8448; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0) 8449; RV64ZVE32F-ZVFHMIN-NEXT: ret 8450 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs 8451 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m) 8452 ret void 8453} 8454 8455declare void @llvm.masked.scatter.v1f32.v1p0(<1 x float>, <1 x ptr>, i32, <1 x i1>) 8456 8457define void @mscatter_v1f32(<1 x float> %val, <1 x ptr> %ptrs, <1 x i1> %m) { 8458; RV32V-LABEL: mscatter_v1f32: 8459; RV32V: # %bb.0: 8460; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 8461; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 8462; RV32V-NEXT: ret 8463; 8464; RV64V-LABEL: mscatter_v1f32: 8465; RV64V: # %bb.0: 8466; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 8467; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 8468; RV64V-NEXT: ret 8469; 8470; RV32ZVE32F-LABEL: mscatter_v1f32: 8471; RV32ZVE32F: # %bb.0: 8472; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8473; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t 8474; RV32ZVE32F-NEXT: ret 8475; 8476; RV64ZVE32F-LABEL: mscatter_v1f32: 8477; RV64ZVE32F: # %bb.0: 8478; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 8479; RV64ZVE32F-NEXT: vfirst.m a1, v0 8480; RV64ZVE32F-NEXT: bnez a1, .LBB72_2 8481; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 8482; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8483; RV64ZVE32F-NEXT: vse32.v v8, (a0) 8484; RV64ZVE32F-NEXT: .LBB72_2: # %else 8485; RV64ZVE32F-NEXT: ret 8486 call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> %val, <1 x ptr> %ptrs, i32 4, <1 x i1> %m) 8487 ret void 8488} 8489 8490declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>) 8491 8492define void @mscatter_v2f32(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m) { 8493; RV32V-LABEL: mscatter_v2f32: 8494; RV32V: # %bb.0: 8495; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 8496; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 8497; RV32V-NEXT: ret 8498; 8499; RV64V-LABEL: mscatter_v2f32: 8500; RV64V: # %bb.0: 8501; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 8502; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 8503; RV64V-NEXT: ret 8504; 8505; RV32ZVE32F-LABEL: mscatter_v2f32: 8506; RV32ZVE32F: # %bb.0: 8507; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 8508; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t 8509; RV32ZVE32F-NEXT: ret 8510; 8511; RV64ZVE32F-LABEL: mscatter_v2f32: 8512; RV64ZVE32F: # %bb.0: 8513; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8514; RV64ZVE32F-NEXT: vmv.x.s a2, v0 8515; RV64ZVE32F-NEXT: andi a3, a2, 1 8516; RV64ZVE32F-NEXT: bnez a3, .LBB73_3 8517; RV64ZVE32F-NEXT: # %bb.1: # %else 8518; RV64ZVE32F-NEXT: andi a2, a2, 2 8519; RV64ZVE32F-NEXT: bnez a2, .LBB73_4 8520; RV64ZVE32F-NEXT: .LBB73_2: # %else2 8521; RV64ZVE32F-NEXT: ret 8522; RV64ZVE32F-NEXT: .LBB73_3: # %cond.store 8523; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8524; RV64ZVE32F-NEXT: vse32.v v8, (a0) 8525; RV64ZVE32F-NEXT: andi a2, a2, 2 8526; RV64ZVE32F-NEXT: beqz a2, .LBB73_2 8527; RV64ZVE32F-NEXT: .LBB73_4: # %cond.store1 8528; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8529; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 8530; RV64ZVE32F-NEXT: vse32.v v8, (a1) 8531; RV64ZVE32F-NEXT: ret 8532 call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> %val, <2 x ptr> %ptrs, i32 4, <2 x i1> %m) 8533 ret void 8534} 8535 8536declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>) 8537 8538define void @mscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m) { 8539; RV32-LABEL: mscatter_v4f32: 8540; RV32: # %bb.0: 8541; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 8542; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 8543; RV32-NEXT: ret 8544; 8545; RV64V-LABEL: mscatter_v4f32: 8546; RV64V: # %bb.0: 8547; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma 8548; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t 8549; RV64V-NEXT: ret 8550; 8551; RV64ZVE32F-LABEL: mscatter_v4f32: 8552; RV64ZVE32F: # %bb.0: 8553; RV64ZVE32F-NEXT: ld a4, 8(a0) 8554; RV64ZVE32F-NEXT: ld a2, 16(a0) 8555; RV64ZVE32F-NEXT: ld a1, 24(a0) 8556; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8557; RV64ZVE32F-NEXT: vmv.x.s a3, v0 8558; RV64ZVE32F-NEXT: andi a5, a3, 1 8559; RV64ZVE32F-NEXT: bnez a5, .LBB74_5 8560; RV64ZVE32F-NEXT: # %bb.1: # %else 8561; RV64ZVE32F-NEXT: andi a0, a3, 2 8562; RV64ZVE32F-NEXT: bnez a0, .LBB74_6 8563; RV64ZVE32F-NEXT: .LBB74_2: # %else2 8564; RV64ZVE32F-NEXT: andi a0, a3, 4 8565; RV64ZVE32F-NEXT: bnez a0, .LBB74_7 8566; RV64ZVE32F-NEXT: .LBB74_3: # %else4 8567; RV64ZVE32F-NEXT: andi a3, a3, 8 8568; RV64ZVE32F-NEXT: bnez a3, .LBB74_8 8569; RV64ZVE32F-NEXT: .LBB74_4: # %else6 8570; RV64ZVE32F-NEXT: ret 8571; RV64ZVE32F-NEXT: .LBB74_5: # %cond.store 8572; RV64ZVE32F-NEXT: ld a0, 0(a0) 8573; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8574; RV64ZVE32F-NEXT: vse32.v v8, (a0) 8575; RV64ZVE32F-NEXT: andi a0, a3, 2 8576; RV64ZVE32F-NEXT: beqz a0, .LBB74_2 8577; RV64ZVE32F-NEXT: .LBB74_6: # %cond.store1 8578; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8579; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 8580; RV64ZVE32F-NEXT: vse32.v v9, (a4) 8581; RV64ZVE32F-NEXT: andi a0, a3, 4 8582; RV64ZVE32F-NEXT: beqz a0, .LBB74_3 8583; RV64ZVE32F-NEXT: .LBB74_7: # %cond.store3 8584; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8585; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 8586; RV64ZVE32F-NEXT: vse32.v v9, (a2) 8587; RV64ZVE32F-NEXT: andi a3, a3, 8 8588; RV64ZVE32F-NEXT: beqz a3, .LBB74_4 8589; RV64ZVE32F-NEXT: .LBB74_8: # %cond.store5 8590; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8591; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 8592; RV64ZVE32F-NEXT: vse32.v v8, (a1) 8593; RV64ZVE32F-NEXT: ret 8594 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %m) 8595 ret void 8596} 8597 8598define void @mscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) { 8599; RV32-LABEL: mscatter_truemask_v4f32: 8600; RV32: # %bb.0: 8601; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 8602; RV32-NEXT: vsoxei32.v v8, (zero), v9 8603; RV32-NEXT: ret 8604; 8605; RV64V-LABEL: mscatter_truemask_v4f32: 8606; RV64V: # %bb.0: 8607; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma 8608; RV64V-NEXT: vsoxei64.v v8, (zero), v10 8609; RV64V-NEXT: ret 8610; 8611; RV64ZVE32F-LABEL: mscatter_truemask_v4f32: 8612; RV64ZVE32F: # %bb.0: 8613; RV64ZVE32F-NEXT: ld a1, 0(a0) 8614; RV64ZVE32F-NEXT: ld a2, 8(a0) 8615; RV64ZVE32F-NEXT: ld a3, 16(a0) 8616; RV64ZVE32F-NEXT: ld a0, 24(a0) 8617; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8618; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 8619; RV64ZVE32F-NEXT: vse32.v v8, (a1) 8620; RV64ZVE32F-NEXT: vse32.v v9, (a2) 8621; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 8622; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 8623; RV64ZVE32F-NEXT: vse32.v v9, (a3) 8624; RV64ZVE32F-NEXT: vse32.v v8, (a0) 8625; RV64ZVE32F-NEXT: ret 8626 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1)) 8627 ret void 8628} 8629 8630define void @mscatter_falsemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) { 8631; CHECK-LABEL: mscatter_falsemask_v4f32: 8632; CHECK: # %bb.0: 8633; CHECK-NEXT: ret 8634 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer) 8635 ret void 8636} 8637 8638declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32, <8 x i1>) 8639 8640define void @mscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m) { 8641; RV32-LABEL: mscatter_v8f32: 8642; RV32: # %bb.0: 8643; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 8644; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 8645; RV32-NEXT: ret 8646; 8647; RV64V-LABEL: mscatter_v8f32: 8648; RV64V: # %bb.0: 8649; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 8650; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t 8651; RV64V-NEXT: ret 8652; 8653; RV64ZVE32F-LABEL: mscatter_v8f32: 8654; RV64ZVE32F: # %bb.0: 8655; RV64ZVE32F-NEXT: ld a3, 40(a0) 8656; RV64ZVE32F-NEXT: ld a2, 48(a0) 8657; RV64ZVE32F-NEXT: ld a1, 56(a0) 8658; RV64ZVE32F-NEXT: ld t0, 8(a0) 8659; RV64ZVE32F-NEXT: ld a7, 16(a0) 8660; RV64ZVE32F-NEXT: ld a6, 24(a0) 8661; RV64ZVE32F-NEXT: ld a5, 32(a0) 8662; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8663; RV64ZVE32F-NEXT: vmv.x.s a4, v0 8664; RV64ZVE32F-NEXT: andi t1, a4, 1 8665; RV64ZVE32F-NEXT: bnez t1, .LBB77_9 8666; RV64ZVE32F-NEXT: # %bb.1: # %else 8667; RV64ZVE32F-NEXT: andi a0, a4, 2 8668; RV64ZVE32F-NEXT: bnez a0, .LBB77_10 8669; RV64ZVE32F-NEXT: .LBB77_2: # %else2 8670; RV64ZVE32F-NEXT: andi a0, a4, 4 8671; RV64ZVE32F-NEXT: bnez a0, .LBB77_11 8672; RV64ZVE32F-NEXT: .LBB77_3: # %else4 8673; RV64ZVE32F-NEXT: andi a0, a4, 8 8674; RV64ZVE32F-NEXT: bnez a0, .LBB77_12 8675; RV64ZVE32F-NEXT: .LBB77_4: # %else6 8676; RV64ZVE32F-NEXT: andi a0, a4, 16 8677; RV64ZVE32F-NEXT: bnez a0, .LBB77_13 8678; RV64ZVE32F-NEXT: .LBB77_5: # %else8 8679; RV64ZVE32F-NEXT: andi a0, a4, 32 8680; RV64ZVE32F-NEXT: bnez a0, .LBB77_14 8681; RV64ZVE32F-NEXT: .LBB77_6: # %else10 8682; RV64ZVE32F-NEXT: andi a0, a4, 64 8683; RV64ZVE32F-NEXT: bnez a0, .LBB77_15 8684; RV64ZVE32F-NEXT: .LBB77_7: # %else12 8685; RV64ZVE32F-NEXT: andi a0, a4, -128 8686; RV64ZVE32F-NEXT: bnez a0, .LBB77_16 8687; RV64ZVE32F-NEXT: .LBB77_8: # %else14 8688; RV64ZVE32F-NEXT: ret 8689; RV64ZVE32F-NEXT: .LBB77_9: # %cond.store 8690; RV64ZVE32F-NEXT: ld a0, 0(a0) 8691; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8692; RV64ZVE32F-NEXT: vse32.v v8, (a0) 8693; RV64ZVE32F-NEXT: andi a0, a4, 2 8694; RV64ZVE32F-NEXT: beqz a0, .LBB77_2 8695; RV64ZVE32F-NEXT: .LBB77_10: # %cond.store1 8696; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8697; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 8698; RV64ZVE32F-NEXT: vse32.v v10, (t0) 8699; RV64ZVE32F-NEXT: andi a0, a4, 4 8700; RV64ZVE32F-NEXT: beqz a0, .LBB77_3 8701; RV64ZVE32F-NEXT: .LBB77_11: # %cond.store3 8702; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8703; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2 8704; RV64ZVE32F-NEXT: vse32.v v10, (a7) 8705; RV64ZVE32F-NEXT: andi a0, a4, 8 8706; RV64ZVE32F-NEXT: beqz a0, .LBB77_4 8707; RV64ZVE32F-NEXT: .LBB77_12: # %cond.store5 8708; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8709; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 8710; RV64ZVE32F-NEXT: vse32.v v10, (a6) 8711; RV64ZVE32F-NEXT: andi a0, a4, 16 8712; RV64ZVE32F-NEXT: beqz a0, .LBB77_5 8713; RV64ZVE32F-NEXT: .LBB77_13: # %cond.store7 8714; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 8715; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 8716; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8717; RV64ZVE32F-NEXT: vse32.v v10, (a5) 8718; RV64ZVE32F-NEXT: andi a0, a4, 32 8719; RV64ZVE32F-NEXT: beqz a0, .LBB77_6 8720; RV64ZVE32F-NEXT: .LBB77_14: # %cond.store9 8721; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 8722; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 8723; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8724; RV64ZVE32F-NEXT: vse32.v v10, (a3) 8725; RV64ZVE32F-NEXT: andi a0, a4, 64 8726; RV64ZVE32F-NEXT: beqz a0, .LBB77_7 8727; RV64ZVE32F-NEXT: .LBB77_15: # %cond.store11 8728; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 8729; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 8730; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8731; RV64ZVE32F-NEXT: vse32.v v10, (a2) 8732; RV64ZVE32F-NEXT: andi a0, a4, -128 8733; RV64ZVE32F-NEXT: beqz a0, .LBB77_8 8734; RV64ZVE32F-NEXT: .LBB77_16: # %cond.store13 8735; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 8736; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 8737; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8738; RV64ZVE32F-NEXT: vse32.v v8, (a1) 8739; RV64ZVE32F-NEXT: ret 8740 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m) 8741 ret void 8742} 8743 8744define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 8745; RV32-LABEL: mscatter_baseidx_v8i8_v8f32: 8746; RV32: # %bb.0: 8747; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 8748; RV32-NEXT: vsext.vf4 v12, v10 8749; RV32-NEXT: vsll.vi v10, v12, 2 8750; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 8751; RV32-NEXT: ret 8752; 8753; RV64V-LABEL: mscatter_baseidx_v8i8_v8f32: 8754; RV64V: # %bb.0: 8755; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 8756; RV64V-NEXT: vsext.vf8 v12, v10 8757; RV64V-NEXT: vsll.vi v12, v12, 2 8758; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma 8759; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 8760; RV64V-NEXT: ret 8761; 8762; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f32: 8763; RV64ZVE32F: # %bb.0: 8764; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8765; RV64ZVE32F-NEXT: vmv.x.s a1, v0 8766; RV64ZVE32F-NEXT: andi a2, a1, 1 8767; RV64ZVE32F-NEXT: beqz a2, .LBB78_2 8768; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 8769; RV64ZVE32F-NEXT: vmv.x.s a2, v10 8770; RV64ZVE32F-NEXT: slli a2, a2, 2 8771; RV64ZVE32F-NEXT: add a2, a0, a2 8772; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8773; RV64ZVE32F-NEXT: vse32.v v8, (a2) 8774; RV64ZVE32F-NEXT: .LBB78_2: # %else 8775; RV64ZVE32F-NEXT: andi a2, a1, 2 8776; RV64ZVE32F-NEXT: beqz a2, .LBB78_4 8777; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 8778; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8779; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 8780; RV64ZVE32F-NEXT: vmv.x.s a2, v11 8781; RV64ZVE32F-NEXT: slli a2, a2, 2 8782; RV64ZVE32F-NEXT: add a2, a0, a2 8783; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 8784; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 8785; RV64ZVE32F-NEXT: vse32.v v11, (a2) 8786; RV64ZVE32F-NEXT: .LBB78_4: # %else2 8787; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 8788; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 8789; RV64ZVE32F-NEXT: andi a2, a1, 4 8790; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 8791; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 8792; RV64ZVE32F-NEXT: bnez a2, .LBB78_12 8793; RV64ZVE32F-NEXT: # %bb.5: # %else4 8794; RV64ZVE32F-NEXT: andi a2, a1, 8 8795; RV64ZVE32F-NEXT: bnez a2, .LBB78_13 8796; RV64ZVE32F-NEXT: .LBB78_6: # %else6 8797; RV64ZVE32F-NEXT: andi a2, a1, 16 8798; RV64ZVE32F-NEXT: bnez a2, .LBB78_14 8799; RV64ZVE32F-NEXT: .LBB78_7: # %else8 8800; RV64ZVE32F-NEXT: andi a2, a1, 32 8801; RV64ZVE32F-NEXT: beqz a2, .LBB78_9 8802; RV64ZVE32F-NEXT: .LBB78_8: # %cond.store9 8803; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8804; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 8805; RV64ZVE32F-NEXT: vmv.x.s a2, v10 8806; RV64ZVE32F-NEXT: slli a2, a2, 2 8807; RV64ZVE32F-NEXT: add a2, a0, a2 8808; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 8809; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 8810; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8811; RV64ZVE32F-NEXT: vse32.v v12, (a2) 8812; RV64ZVE32F-NEXT: .LBB78_9: # %else10 8813; RV64ZVE32F-NEXT: andi a2, a1, 64 8814; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 8815; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 8816; RV64ZVE32F-NEXT: bnez a2, .LBB78_15 8817; RV64ZVE32F-NEXT: # %bb.10: # %else12 8818; RV64ZVE32F-NEXT: andi a1, a1, -128 8819; RV64ZVE32F-NEXT: bnez a1, .LBB78_16 8820; RV64ZVE32F-NEXT: .LBB78_11: # %else14 8821; RV64ZVE32F-NEXT: ret 8822; RV64ZVE32F-NEXT: .LBB78_12: # %cond.store3 8823; RV64ZVE32F-NEXT: vmv.x.s a2, v10 8824; RV64ZVE32F-NEXT: slli a2, a2, 2 8825; RV64ZVE32F-NEXT: add a2, a0, a2 8826; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 8827; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 8828; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8829; RV64ZVE32F-NEXT: vse32.v v12, (a2) 8830; RV64ZVE32F-NEXT: andi a2, a1, 8 8831; RV64ZVE32F-NEXT: beqz a2, .LBB78_6 8832; RV64ZVE32F-NEXT: .LBB78_13: # %cond.store5 8833; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8834; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 8835; RV64ZVE32F-NEXT: vmv.x.s a2, v10 8836; RV64ZVE32F-NEXT: slli a2, a2, 2 8837; RV64ZVE32F-NEXT: add a2, a0, a2 8838; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 8839; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 8840; RV64ZVE32F-NEXT: vse32.v v10, (a2) 8841; RV64ZVE32F-NEXT: andi a2, a1, 16 8842; RV64ZVE32F-NEXT: beqz a2, .LBB78_7 8843; RV64ZVE32F-NEXT: .LBB78_14: # %cond.store7 8844; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8845; RV64ZVE32F-NEXT: vmv.x.s a2, v11 8846; RV64ZVE32F-NEXT: slli a2, a2, 2 8847; RV64ZVE32F-NEXT: add a2, a0, a2 8848; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 8849; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 8850; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8851; RV64ZVE32F-NEXT: vse32.v v12, (a2) 8852; RV64ZVE32F-NEXT: andi a2, a1, 32 8853; RV64ZVE32F-NEXT: bnez a2, .LBB78_8 8854; RV64ZVE32F-NEXT: j .LBB78_9 8855; RV64ZVE32F-NEXT: .LBB78_15: # %cond.store11 8856; RV64ZVE32F-NEXT: vmv.x.s a2, v10 8857; RV64ZVE32F-NEXT: slli a2, a2, 2 8858; RV64ZVE32F-NEXT: add a2, a0, a2 8859; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 8860; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 8861; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8862; RV64ZVE32F-NEXT: vse32.v v12, (a2) 8863; RV64ZVE32F-NEXT: andi a1, a1, -128 8864; RV64ZVE32F-NEXT: beqz a1, .LBB78_11 8865; RV64ZVE32F-NEXT: .LBB78_16: # %cond.store13 8866; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8867; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 8868; RV64ZVE32F-NEXT: vmv.x.s a1, v10 8869; RV64ZVE32F-NEXT: slli a1, a1, 2 8870; RV64ZVE32F-NEXT: add a0, a0, a1 8871; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 8872; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 8873; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8874; RV64ZVE32F-NEXT: vse32.v v8, (a0) 8875; RV64ZVE32F-NEXT: ret 8876 %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs 8877 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m) 8878 ret void 8879} 8880 8881define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 8882; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f32: 8883; RV32: # %bb.0: 8884; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 8885; RV32-NEXT: vsext.vf4 v12, v10 8886; RV32-NEXT: vsll.vi v10, v12, 2 8887; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 8888; RV32-NEXT: ret 8889; 8890; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8f32: 8891; RV64V: # %bb.0: 8892; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 8893; RV64V-NEXT: vsext.vf8 v12, v10 8894; RV64V-NEXT: vsll.vi v12, v12, 2 8895; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma 8896; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 8897; RV64V-NEXT: ret 8898; 8899; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f32: 8900; RV64ZVE32F: # %bb.0: 8901; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8902; RV64ZVE32F-NEXT: vmv.x.s a1, v0 8903; RV64ZVE32F-NEXT: andi a2, a1, 1 8904; RV64ZVE32F-NEXT: beqz a2, .LBB79_2 8905; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 8906; RV64ZVE32F-NEXT: vmv.x.s a2, v10 8907; RV64ZVE32F-NEXT: slli a2, a2, 2 8908; RV64ZVE32F-NEXT: add a2, a0, a2 8909; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8910; RV64ZVE32F-NEXT: vse32.v v8, (a2) 8911; RV64ZVE32F-NEXT: .LBB79_2: # %else 8912; RV64ZVE32F-NEXT: andi a2, a1, 2 8913; RV64ZVE32F-NEXT: beqz a2, .LBB79_4 8914; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 8915; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8916; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 8917; RV64ZVE32F-NEXT: vmv.x.s a2, v11 8918; RV64ZVE32F-NEXT: slli a2, a2, 2 8919; RV64ZVE32F-NEXT: add a2, a0, a2 8920; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 8921; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 8922; RV64ZVE32F-NEXT: vse32.v v11, (a2) 8923; RV64ZVE32F-NEXT: .LBB79_4: # %else2 8924; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 8925; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 8926; RV64ZVE32F-NEXT: andi a2, a1, 4 8927; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 8928; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 8929; RV64ZVE32F-NEXT: bnez a2, .LBB79_12 8930; RV64ZVE32F-NEXT: # %bb.5: # %else4 8931; RV64ZVE32F-NEXT: andi a2, a1, 8 8932; RV64ZVE32F-NEXT: bnez a2, .LBB79_13 8933; RV64ZVE32F-NEXT: .LBB79_6: # %else6 8934; RV64ZVE32F-NEXT: andi a2, a1, 16 8935; RV64ZVE32F-NEXT: bnez a2, .LBB79_14 8936; RV64ZVE32F-NEXT: .LBB79_7: # %else8 8937; RV64ZVE32F-NEXT: andi a2, a1, 32 8938; RV64ZVE32F-NEXT: beqz a2, .LBB79_9 8939; RV64ZVE32F-NEXT: .LBB79_8: # %cond.store9 8940; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8941; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 8942; RV64ZVE32F-NEXT: vmv.x.s a2, v10 8943; RV64ZVE32F-NEXT: slli a2, a2, 2 8944; RV64ZVE32F-NEXT: add a2, a0, a2 8945; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 8946; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 8947; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8948; RV64ZVE32F-NEXT: vse32.v v12, (a2) 8949; RV64ZVE32F-NEXT: .LBB79_9: # %else10 8950; RV64ZVE32F-NEXT: andi a2, a1, 64 8951; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 8952; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 8953; RV64ZVE32F-NEXT: bnez a2, .LBB79_15 8954; RV64ZVE32F-NEXT: # %bb.10: # %else12 8955; RV64ZVE32F-NEXT: andi a1, a1, -128 8956; RV64ZVE32F-NEXT: bnez a1, .LBB79_16 8957; RV64ZVE32F-NEXT: .LBB79_11: # %else14 8958; RV64ZVE32F-NEXT: ret 8959; RV64ZVE32F-NEXT: .LBB79_12: # %cond.store3 8960; RV64ZVE32F-NEXT: vmv.x.s a2, v10 8961; RV64ZVE32F-NEXT: slli a2, a2, 2 8962; RV64ZVE32F-NEXT: add a2, a0, a2 8963; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 8964; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 8965; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8966; RV64ZVE32F-NEXT: vse32.v v12, (a2) 8967; RV64ZVE32F-NEXT: andi a2, a1, 8 8968; RV64ZVE32F-NEXT: beqz a2, .LBB79_6 8969; RV64ZVE32F-NEXT: .LBB79_13: # %cond.store5 8970; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8971; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 8972; RV64ZVE32F-NEXT: vmv.x.s a2, v10 8973; RV64ZVE32F-NEXT: slli a2, a2, 2 8974; RV64ZVE32F-NEXT: add a2, a0, a2 8975; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 8976; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 8977; RV64ZVE32F-NEXT: vse32.v v10, (a2) 8978; RV64ZVE32F-NEXT: andi a2, a1, 16 8979; RV64ZVE32F-NEXT: beqz a2, .LBB79_7 8980; RV64ZVE32F-NEXT: .LBB79_14: # %cond.store7 8981; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8982; RV64ZVE32F-NEXT: vmv.x.s a2, v11 8983; RV64ZVE32F-NEXT: slli a2, a2, 2 8984; RV64ZVE32F-NEXT: add a2, a0, a2 8985; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 8986; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 8987; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8988; RV64ZVE32F-NEXT: vse32.v v12, (a2) 8989; RV64ZVE32F-NEXT: andi a2, a1, 32 8990; RV64ZVE32F-NEXT: bnez a2, .LBB79_8 8991; RV64ZVE32F-NEXT: j .LBB79_9 8992; RV64ZVE32F-NEXT: .LBB79_15: # %cond.store11 8993; RV64ZVE32F-NEXT: vmv.x.s a2, v10 8994; RV64ZVE32F-NEXT: slli a2, a2, 2 8995; RV64ZVE32F-NEXT: add a2, a0, a2 8996; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 8997; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 8998; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 8999; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9000; RV64ZVE32F-NEXT: andi a1, a1, -128 9001; RV64ZVE32F-NEXT: beqz a1, .LBB79_11 9002; RV64ZVE32F-NEXT: .LBB79_16: # %cond.store13 9003; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 9004; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 9005; RV64ZVE32F-NEXT: vmv.x.s a1, v10 9006; RV64ZVE32F-NEXT: slli a1, a1, 2 9007; RV64ZVE32F-NEXT: add a0, a0, a1 9008; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9009; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 9010; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9011; RV64ZVE32F-NEXT: vse32.v v8, (a0) 9012; RV64ZVE32F-NEXT: ret 9013 %eidxs = sext <8 x i8> %idxs to <8 x i32> 9014 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs 9015 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m) 9016 ret void 9017} 9018 9019define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 9020; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f32: 9021; RV32: # %bb.0: 9022; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 9023; RV32-NEXT: vzext.vf2 v11, v10 9024; RV32-NEXT: vsll.vi v10, v11, 2 9025; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma 9026; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t 9027; RV32-NEXT: ret 9028; 9029; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8f32: 9030; RV64V: # %bb.0: 9031; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 9032; RV64V-NEXT: vzext.vf2 v11, v10 9033; RV64V-NEXT: vsll.vi v10, v11, 2 9034; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma 9035; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t 9036; RV64V-NEXT: ret 9037; 9038; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f32: 9039; RV64ZVE32F: # %bb.0: 9040; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9041; RV64ZVE32F-NEXT: vmv.x.s a1, v0 9042; RV64ZVE32F-NEXT: andi a2, a1, 1 9043; RV64ZVE32F-NEXT: beqz a2, .LBB80_2 9044; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 9045; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9046; RV64ZVE32F-NEXT: andi a2, a2, 255 9047; RV64ZVE32F-NEXT: slli a2, a2, 2 9048; RV64ZVE32F-NEXT: add a2, a0, a2 9049; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9050; RV64ZVE32F-NEXT: vse32.v v8, (a2) 9051; RV64ZVE32F-NEXT: .LBB80_2: # %else 9052; RV64ZVE32F-NEXT: andi a2, a1, 2 9053; RV64ZVE32F-NEXT: beqz a2, .LBB80_4 9054; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 9055; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 9056; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 9057; RV64ZVE32F-NEXT: vmv.x.s a2, v11 9058; RV64ZVE32F-NEXT: andi a2, a2, 255 9059; RV64ZVE32F-NEXT: slli a2, a2, 2 9060; RV64ZVE32F-NEXT: add a2, a0, a2 9061; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9062; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 9063; RV64ZVE32F-NEXT: vse32.v v11, (a2) 9064; RV64ZVE32F-NEXT: .LBB80_4: # %else2 9065; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 9066; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 9067; RV64ZVE32F-NEXT: andi a2, a1, 4 9068; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 9069; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 9070; RV64ZVE32F-NEXT: bnez a2, .LBB80_12 9071; RV64ZVE32F-NEXT: # %bb.5: # %else4 9072; RV64ZVE32F-NEXT: andi a2, a1, 8 9073; RV64ZVE32F-NEXT: bnez a2, .LBB80_13 9074; RV64ZVE32F-NEXT: .LBB80_6: # %else6 9075; RV64ZVE32F-NEXT: andi a2, a1, 16 9076; RV64ZVE32F-NEXT: bnez a2, .LBB80_14 9077; RV64ZVE32F-NEXT: .LBB80_7: # %else8 9078; RV64ZVE32F-NEXT: andi a2, a1, 32 9079; RV64ZVE32F-NEXT: beqz a2, .LBB80_9 9080; RV64ZVE32F-NEXT: .LBB80_8: # %cond.store9 9081; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 9082; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 9083; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9084; RV64ZVE32F-NEXT: andi a2, a2, 255 9085; RV64ZVE32F-NEXT: slli a2, a2, 2 9086; RV64ZVE32F-NEXT: add a2, a0, a2 9087; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9088; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 9089; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9090; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9091; RV64ZVE32F-NEXT: .LBB80_9: # %else10 9092; RV64ZVE32F-NEXT: andi a2, a1, 64 9093; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 9094; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 9095; RV64ZVE32F-NEXT: bnez a2, .LBB80_15 9096; RV64ZVE32F-NEXT: # %bb.10: # %else12 9097; RV64ZVE32F-NEXT: andi a1, a1, -128 9098; RV64ZVE32F-NEXT: bnez a1, .LBB80_16 9099; RV64ZVE32F-NEXT: .LBB80_11: # %else14 9100; RV64ZVE32F-NEXT: ret 9101; RV64ZVE32F-NEXT: .LBB80_12: # %cond.store3 9102; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9103; RV64ZVE32F-NEXT: andi a2, a2, 255 9104; RV64ZVE32F-NEXT: slli a2, a2, 2 9105; RV64ZVE32F-NEXT: add a2, a0, a2 9106; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9107; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 9108; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9109; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9110; RV64ZVE32F-NEXT: andi a2, a1, 8 9111; RV64ZVE32F-NEXT: beqz a2, .LBB80_6 9112; RV64ZVE32F-NEXT: .LBB80_13: # %cond.store5 9113; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 9114; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 9115; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9116; RV64ZVE32F-NEXT: andi a2, a2, 255 9117; RV64ZVE32F-NEXT: slli a2, a2, 2 9118; RV64ZVE32F-NEXT: add a2, a0, a2 9119; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9120; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 9121; RV64ZVE32F-NEXT: vse32.v v10, (a2) 9122; RV64ZVE32F-NEXT: andi a2, a1, 16 9123; RV64ZVE32F-NEXT: beqz a2, .LBB80_7 9124; RV64ZVE32F-NEXT: .LBB80_14: # %cond.store7 9125; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9126; RV64ZVE32F-NEXT: vmv.x.s a2, v11 9127; RV64ZVE32F-NEXT: andi a2, a2, 255 9128; RV64ZVE32F-NEXT: slli a2, a2, 2 9129; RV64ZVE32F-NEXT: add a2, a0, a2 9130; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9131; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 9132; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9133; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9134; RV64ZVE32F-NEXT: andi a2, a1, 32 9135; RV64ZVE32F-NEXT: bnez a2, .LBB80_8 9136; RV64ZVE32F-NEXT: j .LBB80_9 9137; RV64ZVE32F-NEXT: .LBB80_15: # %cond.store11 9138; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9139; RV64ZVE32F-NEXT: andi a2, a2, 255 9140; RV64ZVE32F-NEXT: slli a2, a2, 2 9141; RV64ZVE32F-NEXT: add a2, a0, a2 9142; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9143; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 9144; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9145; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9146; RV64ZVE32F-NEXT: andi a1, a1, -128 9147; RV64ZVE32F-NEXT: beqz a1, .LBB80_11 9148; RV64ZVE32F-NEXT: .LBB80_16: # %cond.store13 9149; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 9150; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 9151; RV64ZVE32F-NEXT: vmv.x.s a1, v10 9152; RV64ZVE32F-NEXT: andi a1, a1, 255 9153; RV64ZVE32F-NEXT: slli a1, a1, 2 9154; RV64ZVE32F-NEXT: add a0, a0, a1 9155; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9156; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 9157; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9158; RV64ZVE32F-NEXT: vse32.v v8, (a0) 9159; RV64ZVE32F-NEXT: ret 9160 %eidxs = zext <8 x i8> %idxs to <8 x i32> 9161 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs 9162 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m) 9163 ret void 9164} 9165 9166define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { 9167; RV32-LABEL: mscatter_baseidx_v8i16_v8f32: 9168; RV32: # %bb.0: 9169; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 9170; RV32-NEXT: vsext.vf2 v12, v10 9171; RV32-NEXT: vsll.vi v10, v12, 2 9172; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 9173; RV32-NEXT: ret 9174; 9175; RV64V-LABEL: mscatter_baseidx_v8i16_v8f32: 9176; RV64V: # %bb.0: 9177; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 9178; RV64V-NEXT: vsext.vf4 v12, v10 9179; RV64V-NEXT: vsll.vi v12, v12, 2 9180; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma 9181; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 9182; RV64V-NEXT: ret 9183; 9184; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f32: 9185; RV64ZVE32F: # %bb.0: 9186; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9187; RV64ZVE32F-NEXT: vmv.x.s a1, v0 9188; RV64ZVE32F-NEXT: andi a2, a1, 1 9189; RV64ZVE32F-NEXT: beqz a2, .LBB81_2 9190; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 9191; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 9192; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9193; RV64ZVE32F-NEXT: slli a2, a2, 2 9194; RV64ZVE32F-NEXT: add a2, a0, a2 9195; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9196; RV64ZVE32F-NEXT: vse32.v v8, (a2) 9197; RV64ZVE32F-NEXT: .LBB81_2: # %else 9198; RV64ZVE32F-NEXT: andi a2, a1, 2 9199; RV64ZVE32F-NEXT: beqz a2, .LBB81_4 9200; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 9201; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9202; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 9203; RV64ZVE32F-NEXT: vmv.x.s a2, v11 9204; RV64ZVE32F-NEXT: slli a2, a2, 2 9205; RV64ZVE32F-NEXT: add a2, a0, a2 9206; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9207; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 9208; RV64ZVE32F-NEXT: vse32.v v11, (a2) 9209; RV64ZVE32F-NEXT: .LBB81_4: # %else2 9210; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 9211; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 9212; RV64ZVE32F-NEXT: andi a2, a1, 4 9213; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 9214; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 9215; RV64ZVE32F-NEXT: bnez a2, .LBB81_12 9216; RV64ZVE32F-NEXT: # %bb.5: # %else4 9217; RV64ZVE32F-NEXT: andi a2, a1, 8 9218; RV64ZVE32F-NEXT: bnez a2, .LBB81_13 9219; RV64ZVE32F-NEXT: .LBB81_6: # %else6 9220; RV64ZVE32F-NEXT: andi a2, a1, 16 9221; RV64ZVE32F-NEXT: bnez a2, .LBB81_14 9222; RV64ZVE32F-NEXT: .LBB81_7: # %else8 9223; RV64ZVE32F-NEXT: andi a2, a1, 32 9224; RV64ZVE32F-NEXT: beqz a2, .LBB81_9 9225; RV64ZVE32F-NEXT: .LBB81_8: # %cond.store9 9226; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9227; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 9228; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9229; RV64ZVE32F-NEXT: slli a2, a2, 2 9230; RV64ZVE32F-NEXT: add a2, a0, a2 9231; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9232; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 9233; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9234; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9235; RV64ZVE32F-NEXT: .LBB81_9: # %else10 9236; RV64ZVE32F-NEXT: andi a2, a1, 64 9237; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 9238; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 9239; RV64ZVE32F-NEXT: bnez a2, .LBB81_15 9240; RV64ZVE32F-NEXT: # %bb.10: # %else12 9241; RV64ZVE32F-NEXT: andi a1, a1, -128 9242; RV64ZVE32F-NEXT: bnez a1, .LBB81_16 9243; RV64ZVE32F-NEXT: .LBB81_11: # %else14 9244; RV64ZVE32F-NEXT: ret 9245; RV64ZVE32F-NEXT: .LBB81_12: # %cond.store3 9246; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9247; RV64ZVE32F-NEXT: slli a2, a2, 2 9248; RV64ZVE32F-NEXT: add a2, a0, a2 9249; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9250; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 9251; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9252; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9253; RV64ZVE32F-NEXT: andi a2, a1, 8 9254; RV64ZVE32F-NEXT: beqz a2, .LBB81_6 9255; RV64ZVE32F-NEXT: .LBB81_13: # %cond.store5 9256; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9257; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 9258; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9259; RV64ZVE32F-NEXT: slli a2, a2, 2 9260; RV64ZVE32F-NEXT: add a2, a0, a2 9261; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9262; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 9263; RV64ZVE32F-NEXT: vse32.v v10, (a2) 9264; RV64ZVE32F-NEXT: andi a2, a1, 16 9265; RV64ZVE32F-NEXT: beqz a2, .LBB81_7 9266; RV64ZVE32F-NEXT: .LBB81_14: # %cond.store7 9267; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 9268; RV64ZVE32F-NEXT: vmv.x.s a2, v11 9269; RV64ZVE32F-NEXT: slli a2, a2, 2 9270; RV64ZVE32F-NEXT: add a2, a0, a2 9271; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 9272; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 9273; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9274; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9275; RV64ZVE32F-NEXT: andi a2, a1, 32 9276; RV64ZVE32F-NEXT: bnez a2, .LBB81_8 9277; RV64ZVE32F-NEXT: j .LBB81_9 9278; RV64ZVE32F-NEXT: .LBB81_15: # %cond.store11 9279; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9280; RV64ZVE32F-NEXT: slli a2, a2, 2 9281; RV64ZVE32F-NEXT: add a2, a0, a2 9282; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9283; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 9284; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9285; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9286; RV64ZVE32F-NEXT: andi a1, a1, -128 9287; RV64ZVE32F-NEXT: beqz a1, .LBB81_11 9288; RV64ZVE32F-NEXT: .LBB81_16: # %cond.store13 9289; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9290; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 9291; RV64ZVE32F-NEXT: vmv.x.s a1, v10 9292; RV64ZVE32F-NEXT: slli a1, a1, 2 9293; RV64ZVE32F-NEXT: add a0, a0, a1 9294; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9295; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 9296; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9297; RV64ZVE32F-NEXT: vse32.v v8, (a0) 9298; RV64ZVE32F-NEXT: ret 9299 %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs 9300 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m) 9301 ret void 9302} 9303 9304define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { 9305; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8f32: 9306; RV32: # %bb.0: 9307; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 9308; RV32-NEXT: vsext.vf2 v12, v10 9309; RV32-NEXT: vsll.vi v10, v12, 2 9310; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 9311; RV32-NEXT: ret 9312; 9313; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8f32: 9314; RV64V: # %bb.0: 9315; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 9316; RV64V-NEXT: vsext.vf4 v12, v10 9317; RV64V-NEXT: vsll.vi v12, v12, 2 9318; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma 9319; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 9320; RV64V-NEXT: ret 9321; 9322; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f32: 9323; RV64ZVE32F: # %bb.0: 9324; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9325; RV64ZVE32F-NEXT: vmv.x.s a1, v0 9326; RV64ZVE32F-NEXT: andi a2, a1, 1 9327; RV64ZVE32F-NEXT: beqz a2, .LBB82_2 9328; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 9329; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 9330; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9331; RV64ZVE32F-NEXT: slli a2, a2, 2 9332; RV64ZVE32F-NEXT: add a2, a0, a2 9333; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9334; RV64ZVE32F-NEXT: vse32.v v8, (a2) 9335; RV64ZVE32F-NEXT: .LBB82_2: # %else 9336; RV64ZVE32F-NEXT: andi a2, a1, 2 9337; RV64ZVE32F-NEXT: beqz a2, .LBB82_4 9338; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 9339; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9340; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 9341; RV64ZVE32F-NEXT: vmv.x.s a2, v11 9342; RV64ZVE32F-NEXT: slli a2, a2, 2 9343; RV64ZVE32F-NEXT: add a2, a0, a2 9344; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9345; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 9346; RV64ZVE32F-NEXT: vse32.v v11, (a2) 9347; RV64ZVE32F-NEXT: .LBB82_4: # %else2 9348; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 9349; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 9350; RV64ZVE32F-NEXT: andi a2, a1, 4 9351; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 9352; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 9353; RV64ZVE32F-NEXT: bnez a2, .LBB82_12 9354; RV64ZVE32F-NEXT: # %bb.5: # %else4 9355; RV64ZVE32F-NEXT: andi a2, a1, 8 9356; RV64ZVE32F-NEXT: bnez a2, .LBB82_13 9357; RV64ZVE32F-NEXT: .LBB82_6: # %else6 9358; RV64ZVE32F-NEXT: andi a2, a1, 16 9359; RV64ZVE32F-NEXT: bnez a2, .LBB82_14 9360; RV64ZVE32F-NEXT: .LBB82_7: # %else8 9361; RV64ZVE32F-NEXT: andi a2, a1, 32 9362; RV64ZVE32F-NEXT: beqz a2, .LBB82_9 9363; RV64ZVE32F-NEXT: .LBB82_8: # %cond.store9 9364; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9365; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 9366; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9367; RV64ZVE32F-NEXT: slli a2, a2, 2 9368; RV64ZVE32F-NEXT: add a2, a0, a2 9369; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9370; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 9371; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9372; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9373; RV64ZVE32F-NEXT: .LBB82_9: # %else10 9374; RV64ZVE32F-NEXT: andi a2, a1, 64 9375; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 9376; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 9377; RV64ZVE32F-NEXT: bnez a2, .LBB82_15 9378; RV64ZVE32F-NEXT: # %bb.10: # %else12 9379; RV64ZVE32F-NEXT: andi a1, a1, -128 9380; RV64ZVE32F-NEXT: bnez a1, .LBB82_16 9381; RV64ZVE32F-NEXT: .LBB82_11: # %else14 9382; RV64ZVE32F-NEXT: ret 9383; RV64ZVE32F-NEXT: .LBB82_12: # %cond.store3 9384; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9385; RV64ZVE32F-NEXT: slli a2, a2, 2 9386; RV64ZVE32F-NEXT: add a2, a0, a2 9387; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9388; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 9389; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9390; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9391; RV64ZVE32F-NEXT: andi a2, a1, 8 9392; RV64ZVE32F-NEXT: beqz a2, .LBB82_6 9393; RV64ZVE32F-NEXT: .LBB82_13: # %cond.store5 9394; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9395; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 9396; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9397; RV64ZVE32F-NEXT: slli a2, a2, 2 9398; RV64ZVE32F-NEXT: add a2, a0, a2 9399; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9400; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 9401; RV64ZVE32F-NEXT: vse32.v v10, (a2) 9402; RV64ZVE32F-NEXT: andi a2, a1, 16 9403; RV64ZVE32F-NEXT: beqz a2, .LBB82_7 9404; RV64ZVE32F-NEXT: .LBB82_14: # %cond.store7 9405; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 9406; RV64ZVE32F-NEXT: vmv.x.s a2, v11 9407; RV64ZVE32F-NEXT: slli a2, a2, 2 9408; RV64ZVE32F-NEXT: add a2, a0, a2 9409; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 9410; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 9411; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9412; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9413; RV64ZVE32F-NEXT: andi a2, a1, 32 9414; RV64ZVE32F-NEXT: bnez a2, .LBB82_8 9415; RV64ZVE32F-NEXT: j .LBB82_9 9416; RV64ZVE32F-NEXT: .LBB82_15: # %cond.store11 9417; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9418; RV64ZVE32F-NEXT: slli a2, a2, 2 9419; RV64ZVE32F-NEXT: add a2, a0, a2 9420; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9421; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 9422; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9423; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9424; RV64ZVE32F-NEXT: andi a1, a1, -128 9425; RV64ZVE32F-NEXT: beqz a1, .LBB82_11 9426; RV64ZVE32F-NEXT: .LBB82_16: # %cond.store13 9427; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9428; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 9429; RV64ZVE32F-NEXT: vmv.x.s a1, v10 9430; RV64ZVE32F-NEXT: slli a1, a1, 2 9431; RV64ZVE32F-NEXT: add a0, a0, a1 9432; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9433; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 9434; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9435; RV64ZVE32F-NEXT: vse32.v v8, (a0) 9436; RV64ZVE32F-NEXT: ret 9437 %eidxs = sext <8 x i16> %idxs to <8 x i32> 9438 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs 9439 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m) 9440 ret void 9441} 9442 9443define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { 9444; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8f32: 9445; RV32: # %bb.0: 9446; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 9447; RV32-NEXT: vzext.vf2 v12, v10 9448; RV32-NEXT: vsll.vi v10, v12, 2 9449; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 9450; RV32-NEXT: ret 9451; 9452; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8f32: 9453; RV64V: # %bb.0: 9454; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 9455; RV64V-NEXT: vzext.vf2 v12, v10 9456; RV64V-NEXT: vsll.vi v10, v12, 2 9457; RV64V-NEXT: vsoxei32.v v8, (a0), v10, v0.t 9458; RV64V-NEXT: ret 9459; 9460; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f32: 9461; RV64ZVE32F: # %bb.0: 9462; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9463; RV64ZVE32F-NEXT: vmv.x.s a1, v0 9464; RV64ZVE32F-NEXT: andi a2, a1, 1 9465; RV64ZVE32F-NEXT: beqz a2, .LBB83_2 9466; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 9467; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 9468; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9469; RV64ZVE32F-NEXT: slli a2, a2, 48 9470; RV64ZVE32F-NEXT: srli a2, a2, 46 9471; RV64ZVE32F-NEXT: add a2, a0, a2 9472; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9473; RV64ZVE32F-NEXT: vse32.v v8, (a2) 9474; RV64ZVE32F-NEXT: .LBB83_2: # %else 9475; RV64ZVE32F-NEXT: andi a2, a1, 2 9476; RV64ZVE32F-NEXT: beqz a2, .LBB83_4 9477; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 9478; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9479; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 9480; RV64ZVE32F-NEXT: vmv.x.s a2, v11 9481; RV64ZVE32F-NEXT: slli a2, a2, 48 9482; RV64ZVE32F-NEXT: srli a2, a2, 46 9483; RV64ZVE32F-NEXT: add a2, a0, a2 9484; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9485; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 9486; RV64ZVE32F-NEXT: vse32.v v11, (a2) 9487; RV64ZVE32F-NEXT: .LBB83_4: # %else2 9488; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 9489; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 9490; RV64ZVE32F-NEXT: andi a2, a1, 4 9491; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 9492; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 9493; RV64ZVE32F-NEXT: bnez a2, .LBB83_12 9494; RV64ZVE32F-NEXT: # %bb.5: # %else4 9495; RV64ZVE32F-NEXT: andi a2, a1, 8 9496; RV64ZVE32F-NEXT: bnez a2, .LBB83_13 9497; RV64ZVE32F-NEXT: .LBB83_6: # %else6 9498; RV64ZVE32F-NEXT: andi a2, a1, 16 9499; RV64ZVE32F-NEXT: bnez a2, .LBB83_14 9500; RV64ZVE32F-NEXT: .LBB83_7: # %else8 9501; RV64ZVE32F-NEXT: andi a2, a1, 32 9502; RV64ZVE32F-NEXT: beqz a2, .LBB83_9 9503; RV64ZVE32F-NEXT: .LBB83_8: # %cond.store9 9504; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9505; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 9506; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9507; RV64ZVE32F-NEXT: slli a2, a2, 48 9508; RV64ZVE32F-NEXT: srli a2, a2, 46 9509; RV64ZVE32F-NEXT: add a2, a0, a2 9510; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9511; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 9512; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9513; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9514; RV64ZVE32F-NEXT: .LBB83_9: # %else10 9515; RV64ZVE32F-NEXT: andi a2, a1, 64 9516; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 9517; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 9518; RV64ZVE32F-NEXT: bnez a2, .LBB83_15 9519; RV64ZVE32F-NEXT: # %bb.10: # %else12 9520; RV64ZVE32F-NEXT: andi a1, a1, -128 9521; RV64ZVE32F-NEXT: bnez a1, .LBB83_16 9522; RV64ZVE32F-NEXT: .LBB83_11: # %else14 9523; RV64ZVE32F-NEXT: ret 9524; RV64ZVE32F-NEXT: .LBB83_12: # %cond.store3 9525; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9526; RV64ZVE32F-NEXT: slli a2, a2, 48 9527; RV64ZVE32F-NEXT: srli a2, a2, 46 9528; RV64ZVE32F-NEXT: add a2, a0, a2 9529; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9530; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 9531; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9532; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9533; RV64ZVE32F-NEXT: andi a2, a1, 8 9534; RV64ZVE32F-NEXT: beqz a2, .LBB83_6 9535; RV64ZVE32F-NEXT: .LBB83_13: # %cond.store5 9536; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9537; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 9538; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9539; RV64ZVE32F-NEXT: slli a2, a2, 48 9540; RV64ZVE32F-NEXT: srli a2, a2, 46 9541; RV64ZVE32F-NEXT: add a2, a0, a2 9542; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9543; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 9544; RV64ZVE32F-NEXT: vse32.v v10, (a2) 9545; RV64ZVE32F-NEXT: andi a2, a1, 16 9546; RV64ZVE32F-NEXT: beqz a2, .LBB83_7 9547; RV64ZVE32F-NEXT: .LBB83_14: # %cond.store7 9548; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 9549; RV64ZVE32F-NEXT: vmv.x.s a2, v11 9550; RV64ZVE32F-NEXT: slli a2, a2, 48 9551; RV64ZVE32F-NEXT: srli a2, a2, 46 9552; RV64ZVE32F-NEXT: add a2, a0, a2 9553; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 9554; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 9555; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9556; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9557; RV64ZVE32F-NEXT: andi a2, a1, 32 9558; RV64ZVE32F-NEXT: bnez a2, .LBB83_8 9559; RV64ZVE32F-NEXT: j .LBB83_9 9560; RV64ZVE32F-NEXT: .LBB83_15: # %cond.store11 9561; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9562; RV64ZVE32F-NEXT: slli a2, a2, 48 9563; RV64ZVE32F-NEXT: srli a2, a2, 46 9564; RV64ZVE32F-NEXT: add a2, a0, a2 9565; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9566; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 9567; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9568; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9569; RV64ZVE32F-NEXT: andi a1, a1, -128 9570; RV64ZVE32F-NEXT: beqz a1, .LBB83_11 9571; RV64ZVE32F-NEXT: .LBB83_16: # %cond.store13 9572; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9573; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 9574; RV64ZVE32F-NEXT: vmv.x.s a1, v10 9575; RV64ZVE32F-NEXT: slli a1, a1, 48 9576; RV64ZVE32F-NEXT: srli a1, a1, 46 9577; RV64ZVE32F-NEXT: add a0, a0, a1 9578; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9579; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 9580; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9581; RV64ZVE32F-NEXT: vse32.v v8, (a0) 9582; RV64ZVE32F-NEXT: ret 9583 %eidxs = zext <8 x i16> %idxs to <8 x i32> 9584 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs 9585 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m) 9586 ret void 9587} 9588 9589define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) { 9590; RV32-LABEL: mscatter_baseidx_v8f32: 9591; RV32: # %bb.0: 9592; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 9593; RV32-NEXT: vsll.vi v10, v10, 2 9594; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t 9595; RV32-NEXT: ret 9596; 9597; RV64V-LABEL: mscatter_baseidx_v8f32: 9598; RV64V: # %bb.0: 9599; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 9600; RV64V-NEXT: vsext.vf2 v12, v10 9601; RV64V-NEXT: vsll.vi v12, v12, 2 9602; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma 9603; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 9604; RV64V-NEXT: ret 9605; 9606; RV64ZVE32F-LABEL: mscatter_baseidx_v8f32: 9607; RV64ZVE32F: # %bb.0: 9608; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9609; RV64ZVE32F-NEXT: vmv.x.s a1, v0 9610; RV64ZVE32F-NEXT: andi a2, a1, 1 9611; RV64ZVE32F-NEXT: beqz a2, .LBB84_2 9612; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 9613; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9614; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9615; RV64ZVE32F-NEXT: slli a2, a2, 2 9616; RV64ZVE32F-NEXT: add a2, a0, a2 9617; RV64ZVE32F-NEXT: vse32.v v8, (a2) 9618; RV64ZVE32F-NEXT: .LBB84_2: # %else 9619; RV64ZVE32F-NEXT: andi a2, a1, 2 9620; RV64ZVE32F-NEXT: beqz a2, .LBB84_4 9621; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 9622; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9623; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1 9624; RV64ZVE32F-NEXT: vmv.x.s a2, v12 9625; RV64ZVE32F-NEXT: slli a2, a2, 2 9626; RV64ZVE32F-NEXT: add a2, a0, a2 9627; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 9628; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9629; RV64ZVE32F-NEXT: .LBB84_4: # %else2 9630; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma 9631; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4 9632; RV64ZVE32F-NEXT: andi a2, a1, 4 9633; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 9634; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 9635; RV64ZVE32F-NEXT: bnez a2, .LBB84_12 9636; RV64ZVE32F-NEXT: # %bb.5: # %else4 9637; RV64ZVE32F-NEXT: andi a2, a1, 8 9638; RV64ZVE32F-NEXT: bnez a2, .LBB84_13 9639; RV64ZVE32F-NEXT: .LBB84_6: # %else6 9640; RV64ZVE32F-NEXT: andi a2, a1, 16 9641; RV64ZVE32F-NEXT: bnez a2, .LBB84_14 9642; RV64ZVE32F-NEXT: .LBB84_7: # %else8 9643; RV64ZVE32F-NEXT: andi a2, a1, 32 9644; RV64ZVE32F-NEXT: beqz a2, .LBB84_9 9645; RV64ZVE32F-NEXT: .LBB84_8: # %cond.store9 9646; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9647; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1 9648; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9649; RV64ZVE32F-NEXT: slli a2, a2, 2 9650; RV64ZVE32F-NEXT: add a2, a0, a2 9651; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9652; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 9653; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9654; RV64ZVE32F-NEXT: vse32.v v10, (a2) 9655; RV64ZVE32F-NEXT: .LBB84_9: # %else10 9656; RV64ZVE32F-NEXT: andi a2, a1, 64 9657; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 9658; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2 9659; RV64ZVE32F-NEXT: bnez a2, .LBB84_15 9660; RV64ZVE32F-NEXT: # %bb.10: # %else12 9661; RV64ZVE32F-NEXT: andi a1, a1, -128 9662; RV64ZVE32F-NEXT: bnez a1, .LBB84_16 9663; RV64ZVE32F-NEXT: .LBB84_11: # %else14 9664; RV64ZVE32F-NEXT: ret 9665; RV64ZVE32F-NEXT: .LBB84_12: # %cond.store3 9666; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9667; RV64ZVE32F-NEXT: slli a2, a2, 2 9668; RV64ZVE32F-NEXT: add a2, a0, a2 9669; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 9670; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9671; RV64ZVE32F-NEXT: vse32.v v11, (a2) 9672; RV64ZVE32F-NEXT: andi a2, a1, 8 9673; RV64ZVE32F-NEXT: beqz a2, .LBB84_6 9674; RV64ZVE32F-NEXT: .LBB84_13: # %cond.store5 9675; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9676; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 9677; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9678; RV64ZVE32F-NEXT: slli a2, a2, 2 9679; RV64ZVE32F-NEXT: add a2, a0, a2 9680; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 9681; RV64ZVE32F-NEXT: vse32.v v10, (a2) 9682; RV64ZVE32F-NEXT: andi a2, a1, 16 9683; RV64ZVE32F-NEXT: beqz a2, .LBB84_7 9684; RV64ZVE32F-NEXT: .LBB84_14: # %cond.store7 9685; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9686; RV64ZVE32F-NEXT: vmv.x.s a2, v12 9687; RV64ZVE32F-NEXT: slli a2, a2, 2 9688; RV64ZVE32F-NEXT: add a2, a0, a2 9689; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 9690; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9691; RV64ZVE32F-NEXT: vse32.v v10, (a2) 9692; RV64ZVE32F-NEXT: andi a2, a1, 32 9693; RV64ZVE32F-NEXT: bnez a2, .LBB84_8 9694; RV64ZVE32F-NEXT: j .LBB84_9 9695; RV64ZVE32F-NEXT: .LBB84_15: # %cond.store11 9696; RV64ZVE32F-NEXT: vmv.x.s a2, v10 9697; RV64ZVE32F-NEXT: slli a2, a2, 2 9698; RV64ZVE32F-NEXT: add a2, a0, a2 9699; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9700; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 9701; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9702; RV64ZVE32F-NEXT: vse32.v v12, (a2) 9703; RV64ZVE32F-NEXT: andi a1, a1, -128 9704; RV64ZVE32F-NEXT: beqz a1, .LBB84_11 9705; RV64ZVE32F-NEXT: .LBB84_16: # %cond.store13 9706; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9707; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 9708; RV64ZVE32F-NEXT: vmv.x.s a1, v10 9709; RV64ZVE32F-NEXT: slli a1, a1, 2 9710; RV64ZVE32F-NEXT: add a0, a0, a1 9711; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 9712; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 9713; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9714; RV64ZVE32F-NEXT: vse32.v v8, (a0) 9715; RV64ZVE32F-NEXT: ret 9716 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs 9717 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m) 9718 ret void 9719} 9720 9721declare void @llvm.masked.scatter.v1f64.v1p0(<1 x double>, <1 x ptr>, i32, <1 x i1>) 9722 9723define void @mscatter_v1f64(<1 x double> %val, <1 x ptr> %ptrs, <1 x i1> %m) { 9724; RV32V-LABEL: mscatter_v1f64: 9725; RV32V: # %bb.0: 9726; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, ma 9727; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 9728; RV32V-NEXT: ret 9729; 9730; RV64V-LABEL: mscatter_v1f64: 9731; RV64V: # %bb.0: 9732; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma 9733; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 9734; RV64V-NEXT: ret 9735; 9736; RV32ZVE32F-LABEL: mscatter_v1f64: 9737; RV32ZVE32F: # %bb.0: 9738; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 9739; RV32ZVE32F-NEXT: vfirst.m a0, v0 9740; RV32ZVE32F-NEXT: bnez a0, .LBB85_2 9741; RV32ZVE32F-NEXT: # %bb.1: # %cond.store 9742; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9743; RV32ZVE32F-NEXT: vmv.x.s a0, v8 9744; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 9745; RV32ZVE32F-NEXT: .LBB85_2: # %else 9746; RV32ZVE32F-NEXT: ret 9747; 9748; RV64ZVE32F-LABEL: mscatter_v1f64: 9749; RV64ZVE32F: # %bb.0: 9750; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 9751; RV64ZVE32F-NEXT: vfirst.m a1, v0 9752; RV64ZVE32F-NEXT: bnez a1, .LBB85_2 9753; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 9754; RV64ZVE32F-NEXT: fsd fa0, 0(a0) 9755; RV64ZVE32F-NEXT: .LBB85_2: # %else 9756; RV64ZVE32F-NEXT: ret 9757 call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> %val, <1 x ptr> %ptrs, i32 8, <1 x i1> %m) 9758 ret void 9759} 9760 9761declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32, <2 x i1>) 9762 9763define void @mscatter_v2f64(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m) { 9764; RV32V-LABEL: mscatter_v2f64: 9765; RV32V: # %bb.0: 9766; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma 9767; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t 9768; RV32V-NEXT: ret 9769; 9770; RV64V-LABEL: mscatter_v2f64: 9771; RV64V: # %bb.0: 9772; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma 9773; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t 9774; RV64V-NEXT: ret 9775; 9776; RV32ZVE32F-LABEL: mscatter_v2f64: 9777; RV32ZVE32F: # %bb.0: 9778; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9779; RV32ZVE32F-NEXT: vmv.x.s a0, v0 9780; RV32ZVE32F-NEXT: andi a1, a0, 1 9781; RV32ZVE32F-NEXT: bnez a1, .LBB86_3 9782; RV32ZVE32F-NEXT: # %bb.1: # %else 9783; RV32ZVE32F-NEXT: andi a0, a0, 2 9784; RV32ZVE32F-NEXT: bnez a0, .LBB86_4 9785; RV32ZVE32F-NEXT: .LBB86_2: # %else2 9786; RV32ZVE32F-NEXT: ret 9787; RV32ZVE32F-NEXT: .LBB86_3: # %cond.store 9788; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 9789; RV32ZVE32F-NEXT: vmv.x.s a1, v8 9790; RV32ZVE32F-NEXT: fsd fa0, 0(a1) 9791; RV32ZVE32F-NEXT: andi a0, a0, 2 9792; RV32ZVE32F-NEXT: beqz a0, .LBB86_2 9793; RV32ZVE32F-NEXT: .LBB86_4: # %cond.store1 9794; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9795; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1 9796; RV32ZVE32F-NEXT: vmv.x.s a0, v8 9797; RV32ZVE32F-NEXT: fsd fa1, 0(a0) 9798; RV32ZVE32F-NEXT: ret 9799; 9800; RV64ZVE32F-LABEL: mscatter_v2f64: 9801; RV64ZVE32F: # %bb.0: 9802; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9803; RV64ZVE32F-NEXT: vmv.x.s a2, v0 9804; RV64ZVE32F-NEXT: andi a3, a2, 1 9805; RV64ZVE32F-NEXT: bnez a3, .LBB86_3 9806; RV64ZVE32F-NEXT: # %bb.1: # %else 9807; RV64ZVE32F-NEXT: andi a2, a2, 2 9808; RV64ZVE32F-NEXT: bnez a2, .LBB86_4 9809; RV64ZVE32F-NEXT: .LBB86_2: # %else2 9810; RV64ZVE32F-NEXT: ret 9811; RV64ZVE32F-NEXT: .LBB86_3: # %cond.store 9812; RV64ZVE32F-NEXT: fsd fa0, 0(a0) 9813; RV64ZVE32F-NEXT: andi a2, a2, 2 9814; RV64ZVE32F-NEXT: beqz a2, .LBB86_2 9815; RV64ZVE32F-NEXT: .LBB86_4: # %cond.store1 9816; RV64ZVE32F-NEXT: fsd fa1, 0(a1) 9817; RV64ZVE32F-NEXT: ret 9818 call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> %m) 9819 ret void 9820} 9821 9822declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32, <4 x i1>) 9823 9824define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) { 9825; RV32V-LABEL: mscatter_v4f64: 9826; RV32V: # %bb.0: 9827; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma 9828; RV32V-NEXT: vsoxei32.v v8, (zero), v10, v0.t 9829; RV32V-NEXT: ret 9830; 9831; RV64V-LABEL: mscatter_v4f64: 9832; RV64V: # %bb.0: 9833; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma 9834; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t 9835; RV64V-NEXT: ret 9836; 9837; RV32ZVE32F-LABEL: mscatter_v4f64: 9838; RV32ZVE32F: # %bb.0: 9839; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9840; RV32ZVE32F-NEXT: vmv.x.s a0, v0 9841; RV32ZVE32F-NEXT: andi a1, a0, 1 9842; RV32ZVE32F-NEXT: bnez a1, .LBB87_5 9843; RV32ZVE32F-NEXT: # %bb.1: # %else 9844; RV32ZVE32F-NEXT: andi a1, a0, 2 9845; RV32ZVE32F-NEXT: bnez a1, .LBB87_6 9846; RV32ZVE32F-NEXT: .LBB87_2: # %else2 9847; RV32ZVE32F-NEXT: andi a1, a0, 4 9848; RV32ZVE32F-NEXT: bnez a1, .LBB87_7 9849; RV32ZVE32F-NEXT: .LBB87_3: # %else4 9850; RV32ZVE32F-NEXT: andi a0, a0, 8 9851; RV32ZVE32F-NEXT: bnez a0, .LBB87_8 9852; RV32ZVE32F-NEXT: .LBB87_4: # %else6 9853; RV32ZVE32F-NEXT: ret 9854; RV32ZVE32F-NEXT: .LBB87_5: # %cond.store 9855; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 9856; RV32ZVE32F-NEXT: vmv.x.s a1, v8 9857; RV32ZVE32F-NEXT: fsd fa0, 0(a1) 9858; RV32ZVE32F-NEXT: andi a1, a0, 2 9859; RV32ZVE32F-NEXT: beqz a1, .LBB87_2 9860; RV32ZVE32F-NEXT: .LBB87_6: # %cond.store1 9861; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9862; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 9863; RV32ZVE32F-NEXT: vmv.x.s a1, v9 9864; RV32ZVE32F-NEXT: fsd fa1, 0(a1) 9865; RV32ZVE32F-NEXT: andi a1, a0, 4 9866; RV32ZVE32F-NEXT: beqz a1, .LBB87_3 9867; RV32ZVE32F-NEXT: .LBB87_7: # %cond.store3 9868; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9869; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 9870; RV32ZVE32F-NEXT: vmv.x.s a1, v9 9871; RV32ZVE32F-NEXT: fsd fa2, 0(a1) 9872; RV32ZVE32F-NEXT: andi a0, a0, 8 9873; RV32ZVE32F-NEXT: beqz a0, .LBB87_4 9874; RV32ZVE32F-NEXT: .LBB87_8: # %cond.store5 9875; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9876; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 9877; RV32ZVE32F-NEXT: vmv.x.s a0, v8 9878; RV32ZVE32F-NEXT: fsd fa3, 0(a0) 9879; RV32ZVE32F-NEXT: ret 9880; 9881; RV64ZVE32F-LABEL: mscatter_v4f64: 9882; RV64ZVE32F: # %bb.0: 9883; RV64ZVE32F-NEXT: ld a4, 8(a0) 9884; RV64ZVE32F-NEXT: ld a2, 16(a0) 9885; RV64ZVE32F-NEXT: ld a1, 24(a0) 9886; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9887; RV64ZVE32F-NEXT: vmv.x.s a3, v0 9888; RV64ZVE32F-NEXT: andi a5, a3, 1 9889; RV64ZVE32F-NEXT: bnez a5, .LBB87_5 9890; RV64ZVE32F-NEXT: # %bb.1: # %else 9891; RV64ZVE32F-NEXT: andi a0, a3, 2 9892; RV64ZVE32F-NEXT: bnez a0, .LBB87_6 9893; RV64ZVE32F-NEXT: .LBB87_2: # %else2 9894; RV64ZVE32F-NEXT: andi a0, a3, 4 9895; RV64ZVE32F-NEXT: bnez a0, .LBB87_7 9896; RV64ZVE32F-NEXT: .LBB87_3: # %else4 9897; RV64ZVE32F-NEXT: andi a3, a3, 8 9898; RV64ZVE32F-NEXT: bnez a3, .LBB87_8 9899; RV64ZVE32F-NEXT: .LBB87_4: # %else6 9900; RV64ZVE32F-NEXT: ret 9901; RV64ZVE32F-NEXT: .LBB87_5: # %cond.store 9902; RV64ZVE32F-NEXT: ld a0, 0(a0) 9903; RV64ZVE32F-NEXT: fsd fa0, 0(a0) 9904; RV64ZVE32F-NEXT: andi a0, a3, 2 9905; RV64ZVE32F-NEXT: beqz a0, .LBB87_2 9906; RV64ZVE32F-NEXT: .LBB87_6: # %cond.store1 9907; RV64ZVE32F-NEXT: fsd fa1, 0(a4) 9908; RV64ZVE32F-NEXT: andi a0, a3, 4 9909; RV64ZVE32F-NEXT: beqz a0, .LBB87_3 9910; RV64ZVE32F-NEXT: .LBB87_7: # %cond.store3 9911; RV64ZVE32F-NEXT: fsd fa2, 0(a2) 9912; RV64ZVE32F-NEXT: andi a3, a3, 8 9913; RV64ZVE32F-NEXT: beqz a3, .LBB87_4 9914; RV64ZVE32F-NEXT: .LBB87_8: # %cond.store5 9915; RV64ZVE32F-NEXT: fsd fa3, 0(a1) 9916; RV64ZVE32F-NEXT: ret 9917 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %m) 9918 ret void 9919} 9920 9921define void @mscatter_truemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs) { 9922; RV32V-LABEL: mscatter_truemask_v4f64: 9923; RV32V: # %bb.0: 9924; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma 9925; RV32V-NEXT: vsoxei32.v v8, (zero), v10 9926; RV32V-NEXT: ret 9927; 9928; RV64V-LABEL: mscatter_truemask_v4f64: 9929; RV64V: # %bb.0: 9930; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma 9931; RV64V-NEXT: vsoxei64.v v8, (zero), v10 9932; RV64V-NEXT: ret 9933; 9934; RV32ZVE32F-LABEL: mscatter_truemask_v4f64: 9935; RV32ZVE32F: # %bb.0: 9936; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9937; RV32ZVE32F-NEXT: vmv.x.s a0, v8 9938; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 9939; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 9940; RV32ZVE32F-NEXT: vmv.x.s a0, v9 9941; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 9942; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 9943; RV32ZVE32F-NEXT: fsd fa1, 0(a0) 9944; RV32ZVE32F-NEXT: vmv.x.s a0, v9 9945; RV32ZVE32F-NEXT: fsd fa2, 0(a0) 9946; RV32ZVE32F-NEXT: vmv.x.s a0, v8 9947; RV32ZVE32F-NEXT: fsd fa3, 0(a0) 9948; RV32ZVE32F-NEXT: ret 9949; 9950; RV64ZVE32F-LABEL: mscatter_truemask_v4f64: 9951; RV64ZVE32F: # %bb.0: 9952; RV64ZVE32F-NEXT: ld a1, 0(a0) 9953; RV64ZVE32F-NEXT: ld a2, 8(a0) 9954; RV64ZVE32F-NEXT: ld a3, 16(a0) 9955; RV64ZVE32F-NEXT: ld a0, 24(a0) 9956; RV64ZVE32F-NEXT: fsd fa0, 0(a1) 9957; RV64ZVE32F-NEXT: fsd fa1, 0(a2) 9958; RV64ZVE32F-NEXT: fsd fa2, 0(a3) 9959; RV64ZVE32F-NEXT: fsd fa3, 0(a0) 9960; RV64ZVE32F-NEXT: ret 9961 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1)) 9962 ret void 9963} 9964 9965define void @mscatter_falsemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs) { 9966; CHECK-LABEL: mscatter_falsemask_v4f64: 9967; CHECK: # %bb.0: 9968; CHECK-NEXT: ret 9969 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer) 9970 ret void 9971} 9972 9973declare void @llvm.masked.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, i32, <8 x i1>) 9974 9975define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) { 9976; RV32V-LABEL: mscatter_v8f64: 9977; RV32V: # %bb.0: 9978; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 9979; RV32V-NEXT: vsoxei32.v v8, (zero), v12, v0.t 9980; RV32V-NEXT: ret 9981; 9982; RV64V-LABEL: mscatter_v8f64: 9983; RV64V: # %bb.0: 9984; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 9985; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t 9986; RV64V-NEXT: ret 9987; 9988; RV32ZVE32F-LABEL: mscatter_v8f64: 9989; RV32ZVE32F: # %bb.0: 9990; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9991; RV32ZVE32F-NEXT: vmv.x.s a0, v0 9992; RV32ZVE32F-NEXT: andi a1, a0, 1 9993; RV32ZVE32F-NEXT: bnez a1, .LBB90_9 9994; RV32ZVE32F-NEXT: # %bb.1: # %else 9995; RV32ZVE32F-NEXT: andi a1, a0, 2 9996; RV32ZVE32F-NEXT: bnez a1, .LBB90_10 9997; RV32ZVE32F-NEXT: .LBB90_2: # %else2 9998; RV32ZVE32F-NEXT: andi a1, a0, 4 9999; RV32ZVE32F-NEXT: bnez a1, .LBB90_11 10000; RV32ZVE32F-NEXT: .LBB90_3: # %else4 10001; RV32ZVE32F-NEXT: andi a1, a0, 8 10002; RV32ZVE32F-NEXT: bnez a1, .LBB90_12 10003; RV32ZVE32F-NEXT: .LBB90_4: # %else6 10004; RV32ZVE32F-NEXT: andi a1, a0, 16 10005; RV32ZVE32F-NEXT: bnez a1, .LBB90_13 10006; RV32ZVE32F-NEXT: .LBB90_5: # %else8 10007; RV32ZVE32F-NEXT: andi a1, a0, 32 10008; RV32ZVE32F-NEXT: bnez a1, .LBB90_14 10009; RV32ZVE32F-NEXT: .LBB90_6: # %else10 10010; RV32ZVE32F-NEXT: andi a1, a0, 64 10011; RV32ZVE32F-NEXT: bnez a1, .LBB90_15 10012; RV32ZVE32F-NEXT: .LBB90_7: # %else12 10013; RV32ZVE32F-NEXT: andi a0, a0, -128 10014; RV32ZVE32F-NEXT: bnez a0, .LBB90_16 10015; RV32ZVE32F-NEXT: .LBB90_8: # %else14 10016; RV32ZVE32F-NEXT: ret 10017; RV32ZVE32F-NEXT: .LBB90_9: # %cond.store 10018; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 10019; RV32ZVE32F-NEXT: vmv.x.s a1, v8 10020; RV32ZVE32F-NEXT: fsd fa0, 0(a1) 10021; RV32ZVE32F-NEXT: andi a1, a0, 2 10022; RV32ZVE32F-NEXT: beqz a1, .LBB90_2 10023; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store1 10024; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10025; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 10026; RV32ZVE32F-NEXT: vmv.x.s a1, v10 10027; RV32ZVE32F-NEXT: fsd fa1, 0(a1) 10028; RV32ZVE32F-NEXT: andi a1, a0, 4 10029; RV32ZVE32F-NEXT: beqz a1, .LBB90_3 10030; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store3 10031; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10032; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 10033; RV32ZVE32F-NEXT: vmv.x.s a1, v10 10034; RV32ZVE32F-NEXT: fsd fa2, 0(a1) 10035; RV32ZVE32F-NEXT: andi a1, a0, 8 10036; RV32ZVE32F-NEXT: beqz a1, .LBB90_4 10037; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store5 10038; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10039; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 10040; RV32ZVE32F-NEXT: vmv.x.s a1, v10 10041; RV32ZVE32F-NEXT: fsd fa3, 0(a1) 10042; RV32ZVE32F-NEXT: andi a1, a0, 16 10043; RV32ZVE32F-NEXT: beqz a1, .LBB90_5 10044; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store7 10045; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10046; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 10047; RV32ZVE32F-NEXT: vmv.x.s a1, v10 10048; RV32ZVE32F-NEXT: fsd fa4, 0(a1) 10049; RV32ZVE32F-NEXT: andi a1, a0, 32 10050; RV32ZVE32F-NEXT: beqz a1, .LBB90_6 10051; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store9 10052; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10053; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 10054; RV32ZVE32F-NEXT: vmv.x.s a1, v10 10055; RV32ZVE32F-NEXT: fsd fa5, 0(a1) 10056; RV32ZVE32F-NEXT: andi a1, a0, 64 10057; RV32ZVE32F-NEXT: beqz a1, .LBB90_7 10058; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store11 10059; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10060; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 10061; RV32ZVE32F-NEXT: vmv.x.s a1, v10 10062; RV32ZVE32F-NEXT: fsd fa6, 0(a1) 10063; RV32ZVE32F-NEXT: andi a0, a0, -128 10064; RV32ZVE32F-NEXT: beqz a0, .LBB90_8 10065; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store13 10066; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10067; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 10068; RV32ZVE32F-NEXT: vmv.x.s a0, v8 10069; RV32ZVE32F-NEXT: fsd fa7, 0(a0) 10070; RV32ZVE32F-NEXT: ret 10071; 10072; RV64ZVE32F-LABEL: mscatter_v8f64: 10073; RV64ZVE32F: # %bb.0: 10074; RV64ZVE32F-NEXT: ld a3, 40(a0) 10075; RV64ZVE32F-NEXT: ld a2, 48(a0) 10076; RV64ZVE32F-NEXT: ld a1, 56(a0) 10077; RV64ZVE32F-NEXT: ld t0, 8(a0) 10078; RV64ZVE32F-NEXT: ld a7, 16(a0) 10079; RV64ZVE32F-NEXT: ld a6, 24(a0) 10080; RV64ZVE32F-NEXT: ld a5, 32(a0) 10081; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10082; RV64ZVE32F-NEXT: vmv.x.s a4, v0 10083; RV64ZVE32F-NEXT: andi t1, a4, 1 10084; RV64ZVE32F-NEXT: bnez t1, .LBB90_9 10085; RV64ZVE32F-NEXT: # %bb.1: # %else 10086; RV64ZVE32F-NEXT: andi a0, a4, 2 10087; RV64ZVE32F-NEXT: bnez a0, .LBB90_10 10088; RV64ZVE32F-NEXT: .LBB90_2: # %else2 10089; RV64ZVE32F-NEXT: andi a0, a4, 4 10090; RV64ZVE32F-NEXT: bnez a0, .LBB90_11 10091; RV64ZVE32F-NEXT: .LBB90_3: # %else4 10092; RV64ZVE32F-NEXT: andi a0, a4, 8 10093; RV64ZVE32F-NEXT: bnez a0, .LBB90_12 10094; RV64ZVE32F-NEXT: .LBB90_4: # %else6 10095; RV64ZVE32F-NEXT: andi a0, a4, 16 10096; RV64ZVE32F-NEXT: bnez a0, .LBB90_13 10097; RV64ZVE32F-NEXT: .LBB90_5: # %else8 10098; RV64ZVE32F-NEXT: andi a0, a4, 32 10099; RV64ZVE32F-NEXT: bnez a0, .LBB90_14 10100; RV64ZVE32F-NEXT: .LBB90_6: # %else10 10101; RV64ZVE32F-NEXT: andi a0, a4, 64 10102; RV64ZVE32F-NEXT: bnez a0, .LBB90_15 10103; RV64ZVE32F-NEXT: .LBB90_7: # %else12 10104; RV64ZVE32F-NEXT: andi a0, a4, -128 10105; RV64ZVE32F-NEXT: bnez a0, .LBB90_16 10106; RV64ZVE32F-NEXT: .LBB90_8: # %else14 10107; RV64ZVE32F-NEXT: ret 10108; RV64ZVE32F-NEXT: .LBB90_9: # %cond.store 10109; RV64ZVE32F-NEXT: ld a0, 0(a0) 10110; RV64ZVE32F-NEXT: fsd fa0, 0(a0) 10111; RV64ZVE32F-NEXT: andi a0, a4, 2 10112; RV64ZVE32F-NEXT: beqz a0, .LBB90_2 10113; RV64ZVE32F-NEXT: .LBB90_10: # %cond.store1 10114; RV64ZVE32F-NEXT: fsd fa1, 0(t0) 10115; RV64ZVE32F-NEXT: andi a0, a4, 4 10116; RV64ZVE32F-NEXT: beqz a0, .LBB90_3 10117; RV64ZVE32F-NEXT: .LBB90_11: # %cond.store3 10118; RV64ZVE32F-NEXT: fsd fa2, 0(a7) 10119; RV64ZVE32F-NEXT: andi a0, a4, 8 10120; RV64ZVE32F-NEXT: beqz a0, .LBB90_4 10121; RV64ZVE32F-NEXT: .LBB90_12: # %cond.store5 10122; RV64ZVE32F-NEXT: fsd fa3, 0(a6) 10123; RV64ZVE32F-NEXT: andi a0, a4, 16 10124; RV64ZVE32F-NEXT: beqz a0, .LBB90_5 10125; RV64ZVE32F-NEXT: .LBB90_13: # %cond.store7 10126; RV64ZVE32F-NEXT: fsd fa4, 0(a5) 10127; RV64ZVE32F-NEXT: andi a0, a4, 32 10128; RV64ZVE32F-NEXT: beqz a0, .LBB90_6 10129; RV64ZVE32F-NEXT: .LBB90_14: # %cond.store9 10130; RV64ZVE32F-NEXT: fsd fa5, 0(a3) 10131; RV64ZVE32F-NEXT: andi a0, a4, 64 10132; RV64ZVE32F-NEXT: beqz a0, .LBB90_7 10133; RV64ZVE32F-NEXT: .LBB90_15: # %cond.store11 10134; RV64ZVE32F-NEXT: fsd fa6, 0(a2) 10135; RV64ZVE32F-NEXT: andi a0, a4, -128 10136; RV64ZVE32F-NEXT: beqz a0, .LBB90_8 10137; RV64ZVE32F-NEXT: .LBB90_16: # %cond.store13 10138; RV64ZVE32F-NEXT: fsd fa7, 0(a1) 10139; RV64ZVE32F-NEXT: ret 10140 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 10141 ret void 10142} 10143 10144define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 10145; RV32V-LABEL: mscatter_baseidx_v8i8_v8f64: 10146; RV32V: # %bb.0: 10147; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 10148; RV32V-NEXT: vsext.vf4 v14, v12 10149; RV32V-NEXT: vsll.vi v12, v14, 3 10150; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 10151; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 10152; RV32V-NEXT: ret 10153; 10154; RV64V-LABEL: mscatter_baseidx_v8i8_v8f64: 10155; RV64V: # %bb.0: 10156; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 10157; RV64V-NEXT: vsext.vf8 v16, v12 10158; RV64V-NEXT: vsll.vi v12, v16, 3 10159; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 10160; RV64V-NEXT: ret 10161; 10162; RV32ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f64: 10163; RV32ZVE32F: # %bb.0: 10164; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 10165; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 10166; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 10167; RV32ZVE32F-NEXT: vmv.x.s a1, v0 10168; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 10169; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 10170; RV32ZVE32F-NEXT: andi a2, a1, 1 10171; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 10172; RV32ZVE32F-NEXT: bnez a2, .LBB91_9 10173; RV32ZVE32F-NEXT: # %bb.1: # %else 10174; RV32ZVE32F-NEXT: andi a0, a1, 2 10175; RV32ZVE32F-NEXT: bnez a0, .LBB91_10 10176; RV32ZVE32F-NEXT: .LBB91_2: # %else2 10177; RV32ZVE32F-NEXT: andi a0, a1, 4 10178; RV32ZVE32F-NEXT: bnez a0, .LBB91_11 10179; RV32ZVE32F-NEXT: .LBB91_3: # %else4 10180; RV32ZVE32F-NEXT: andi a0, a1, 8 10181; RV32ZVE32F-NEXT: bnez a0, .LBB91_12 10182; RV32ZVE32F-NEXT: .LBB91_4: # %else6 10183; RV32ZVE32F-NEXT: andi a0, a1, 16 10184; RV32ZVE32F-NEXT: bnez a0, .LBB91_13 10185; RV32ZVE32F-NEXT: .LBB91_5: # %else8 10186; RV32ZVE32F-NEXT: andi a0, a1, 32 10187; RV32ZVE32F-NEXT: bnez a0, .LBB91_14 10188; RV32ZVE32F-NEXT: .LBB91_6: # %else10 10189; RV32ZVE32F-NEXT: andi a0, a1, 64 10190; RV32ZVE32F-NEXT: bnez a0, .LBB91_15 10191; RV32ZVE32F-NEXT: .LBB91_7: # %else12 10192; RV32ZVE32F-NEXT: andi a0, a1, -128 10193; RV32ZVE32F-NEXT: bnez a0, .LBB91_16 10194; RV32ZVE32F-NEXT: .LBB91_8: # %else14 10195; RV32ZVE32F-NEXT: ret 10196; RV32ZVE32F-NEXT: .LBB91_9: # %cond.store 10197; RV32ZVE32F-NEXT: vmv.x.s a0, v8 10198; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 10199; RV32ZVE32F-NEXT: andi a0, a1, 2 10200; RV32ZVE32F-NEXT: beqz a0, .LBB91_2 10201; RV32ZVE32F-NEXT: .LBB91_10: # %cond.store1 10202; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10203; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 10204; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10205; RV32ZVE32F-NEXT: fsd fa1, 0(a0) 10206; RV32ZVE32F-NEXT: andi a0, a1, 4 10207; RV32ZVE32F-NEXT: beqz a0, .LBB91_3 10208; RV32ZVE32F-NEXT: .LBB91_11: # %cond.store3 10209; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10210; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 10211; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10212; RV32ZVE32F-NEXT: fsd fa2, 0(a0) 10213; RV32ZVE32F-NEXT: andi a0, a1, 8 10214; RV32ZVE32F-NEXT: beqz a0, .LBB91_4 10215; RV32ZVE32F-NEXT: .LBB91_12: # %cond.store5 10216; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10217; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 10218; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10219; RV32ZVE32F-NEXT: fsd fa3, 0(a0) 10220; RV32ZVE32F-NEXT: andi a0, a1, 16 10221; RV32ZVE32F-NEXT: beqz a0, .LBB91_5 10222; RV32ZVE32F-NEXT: .LBB91_13: # %cond.store7 10223; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10224; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 10225; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10226; RV32ZVE32F-NEXT: fsd fa4, 0(a0) 10227; RV32ZVE32F-NEXT: andi a0, a1, 32 10228; RV32ZVE32F-NEXT: beqz a0, .LBB91_6 10229; RV32ZVE32F-NEXT: .LBB91_14: # %cond.store9 10230; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10231; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 10232; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10233; RV32ZVE32F-NEXT: fsd fa5, 0(a0) 10234; RV32ZVE32F-NEXT: andi a0, a1, 64 10235; RV32ZVE32F-NEXT: beqz a0, .LBB91_7 10236; RV32ZVE32F-NEXT: .LBB91_15: # %cond.store11 10237; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10238; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 10239; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10240; RV32ZVE32F-NEXT: fsd fa6, 0(a0) 10241; RV32ZVE32F-NEXT: andi a0, a1, -128 10242; RV32ZVE32F-NEXT: beqz a0, .LBB91_8 10243; RV32ZVE32F-NEXT: .LBB91_16: # %cond.store13 10244; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10245; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 10246; RV32ZVE32F-NEXT: vmv.x.s a0, v8 10247; RV32ZVE32F-NEXT: fsd fa7, 0(a0) 10248; RV32ZVE32F-NEXT: ret 10249; 10250; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f64: 10251; RV64ZVE32F: # %bb.0: 10252; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10253; RV64ZVE32F-NEXT: vmv.x.s a1, v0 10254; RV64ZVE32F-NEXT: andi a2, a1, 1 10255; RV64ZVE32F-NEXT: beqz a2, .LBB91_2 10256; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 10257; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10258; RV64ZVE32F-NEXT: slli a2, a2, 3 10259; RV64ZVE32F-NEXT: add a2, a0, a2 10260; RV64ZVE32F-NEXT: fsd fa0, 0(a2) 10261; RV64ZVE32F-NEXT: .LBB91_2: # %else 10262; RV64ZVE32F-NEXT: andi a2, a1, 2 10263; RV64ZVE32F-NEXT: beqz a2, .LBB91_4 10264; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 10265; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 10266; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 10267; RV64ZVE32F-NEXT: vmv.x.s a2, v9 10268; RV64ZVE32F-NEXT: slli a2, a2, 3 10269; RV64ZVE32F-NEXT: add a2, a0, a2 10270; RV64ZVE32F-NEXT: fsd fa1, 0(a2) 10271; RV64ZVE32F-NEXT: .LBB91_4: # %else2 10272; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 10273; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 10274; RV64ZVE32F-NEXT: andi a2, a1, 4 10275; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 10276; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 10277; RV64ZVE32F-NEXT: bnez a2, .LBB91_12 10278; RV64ZVE32F-NEXT: # %bb.5: # %else4 10279; RV64ZVE32F-NEXT: andi a2, a1, 8 10280; RV64ZVE32F-NEXT: bnez a2, .LBB91_13 10281; RV64ZVE32F-NEXT: .LBB91_6: # %else6 10282; RV64ZVE32F-NEXT: andi a2, a1, 16 10283; RV64ZVE32F-NEXT: bnez a2, .LBB91_14 10284; RV64ZVE32F-NEXT: .LBB91_7: # %else8 10285; RV64ZVE32F-NEXT: andi a2, a1, 32 10286; RV64ZVE32F-NEXT: beqz a2, .LBB91_9 10287; RV64ZVE32F-NEXT: .LBB91_8: # %cond.store9 10288; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 10289; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10290; RV64ZVE32F-NEXT: slli a2, a2, 3 10291; RV64ZVE32F-NEXT: add a2, a0, a2 10292; RV64ZVE32F-NEXT: fsd fa5, 0(a2) 10293; RV64ZVE32F-NEXT: .LBB91_9: # %else10 10294; RV64ZVE32F-NEXT: andi a2, a1, 64 10295; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 10296; RV64ZVE32F-NEXT: bnez a2, .LBB91_15 10297; RV64ZVE32F-NEXT: # %bb.10: # %else12 10298; RV64ZVE32F-NEXT: andi a1, a1, -128 10299; RV64ZVE32F-NEXT: bnez a1, .LBB91_16 10300; RV64ZVE32F-NEXT: .LBB91_11: # %else14 10301; RV64ZVE32F-NEXT: ret 10302; RV64ZVE32F-NEXT: .LBB91_12: # %cond.store3 10303; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10304; RV64ZVE32F-NEXT: slli a2, a2, 3 10305; RV64ZVE32F-NEXT: add a2, a0, a2 10306; RV64ZVE32F-NEXT: fsd fa2, 0(a2) 10307; RV64ZVE32F-NEXT: andi a2, a1, 8 10308; RV64ZVE32F-NEXT: beqz a2, .LBB91_6 10309; RV64ZVE32F-NEXT: .LBB91_13: # %cond.store5 10310; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10311; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10312; RV64ZVE32F-NEXT: slli a2, a2, 3 10313; RV64ZVE32F-NEXT: add a2, a0, a2 10314; RV64ZVE32F-NEXT: fsd fa3, 0(a2) 10315; RV64ZVE32F-NEXT: andi a2, a1, 16 10316; RV64ZVE32F-NEXT: beqz a2, .LBB91_7 10317; RV64ZVE32F-NEXT: .LBB91_14: # %cond.store7 10318; RV64ZVE32F-NEXT: vmv.x.s a2, v9 10319; RV64ZVE32F-NEXT: slli a2, a2, 3 10320; RV64ZVE32F-NEXT: add a2, a0, a2 10321; RV64ZVE32F-NEXT: fsd fa4, 0(a2) 10322; RV64ZVE32F-NEXT: andi a2, a1, 32 10323; RV64ZVE32F-NEXT: bnez a2, .LBB91_8 10324; RV64ZVE32F-NEXT: j .LBB91_9 10325; RV64ZVE32F-NEXT: .LBB91_15: # %cond.store11 10326; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10327; RV64ZVE32F-NEXT: slli a2, a2, 3 10328; RV64ZVE32F-NEXT: add a2, a0, a2 10329; RV64ZVE32F-NEXT: fsd fa6, 0(a2) 10330; RV64ZVE32F-NEXT: andi a1, a1, -128 10331; RV64ZVE32F-NEXT: beqz a1, .LBB91_11 10332; RV64ZVE32F-NEXT: .LBB91_16: # %cond.store13 10333; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10334; RV64ZVE32F-NEXT: vmv.x.s a1, v8 10335; RV64ZVE32F-NEXT: slli a1, a1, 3 10336; RV64ZVE32F-NEXT: add a0, a0, a1 10337; RV64ZVE32F-NEXT: fsd fa7, 0(a0) 10338; RV64ZVE32F-NEXT: ret 10339 %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs 10340 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 10341 ret void 10342} 10343 10344define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 10345; RV32V-LABEL: mscatter_baseidx_sext_v8i8_v8f64: 10346; RV32V: # %bb.0: 10347; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 10348; RV32V-NEXT: vsext.vf4 v14, v12 10349; RV32V-NEXT: vsll.vi v12, v14, 3 10350; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 10351; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 10352; RV32V-NEXT: ret 10353; 10354; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8f64: 10355; RV64V: # %bb.0: 10356; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 10357; RV64V-NEXT: vsext.vf8 v16, v12 10358; RV64V-NEXT: vsll.vi v12, v16, 3 10359; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 10360; RV64V-NEXT: ret 10361; 10362; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f64: 10363; RV32ZVE32F: # %bb.0: 10364; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 10365; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 10366; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 10367; RV32ZVE32F-NEXT: vmv.x.s a1, v0 10368; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 10369; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 10370; RV32ZVE32F-NEXT: andi a2, a1, 1 10371; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 10372; RV32ZVE32F-NEXT: bnez a2, .LBB92_9 10373; RV32ZVE32F-NEXT: # %bb.1: # %else 10374; RV32ZVE32F-NEXT: andi a0, a1, 2 10375; RV32ZVE32F-NEXT: bnez a0, .LBB92_10 10376; RV32ZVE32F-NEXT: .LBB92_2: # %else2 10377; RV32ZVE32F-NEXT: andi a0, a1, 4 10378; RV32ZVE32F-NEXT: bnez a0, .LBB92_11 10379; RV32ZVE32F-NEXT: .LBB92_3: # %else4 10380; RV32ZVE32F-NEXT: andi a0, a1, 8 10381; RV32ZVE32F-NEXT: bnez a0, .LBB92_12 10382; RV32ZVE32F-NEXT: .LBB92_4: # %else6 10383; RV32ZVE32F-NEXT: andi a0, a1, 16 10384; RV32ZVE32F-NEXT: bnez a0, .LBB92_13 10385; RV32ZVE32F-NEXT: .LBB92_5: # %else8 10386; RV32ZVE32F-NEXT: andi a0, a1, 32 10387; RV32ZVE32F-NEXT: bnez a0, .LBB92_14 10388; RV32ZVE32F-NEXT: .LBB92_6: # %else10 10389; RV32ZVE32F-NEXT: andi a0, a1, 64 10390; RV32ZVE32F-NEXT: bnez a0, .LBB92_15 10391; RV32ZVE32F-NEXT: .LBB92_7: # %else12 10392; RV32ZVE32F-NEXT: andi a0, a1, -128 10393; RV32ZVE32F-NEXT: bnez a0, .LBB92_16 10394; RV32ZVE32F-NEXT: .LBB92_8: # %else14 10395; RV32ZVE32F-NEXT: ret 10396; RV32ZVE32F-NEXT: .LBB92_9: # %cond.store 10397; RV32ZVE32F-NEXT: vmv.x.s a0, v8 10398; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 10399; RV32ZVE32F-NEXT: andi a0, a1, 2 10400; RV32ZVE32F-NEXT: beqz a0, .LBB92_2 10401; RV32ZVE32F-NEXT: .LBB92_10: # %cond.store1 10402; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10403; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 10404; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10405; RV32ZVE32F-NEXT: fsd fa1, 0(a0) 10406; RV32ZVE32F-NEXT: andi a0, a1, 4 10407; RV32ZVE32F-NEXT: beqz a0, .LBB92_3 10408; RV32ZVE32F-NEXT: .LBB92_11: # %cond.store3 10409; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10410; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 10411; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10412; RV32ZVE32F-NEXT: fsd fa2, 0(a0) 10413; RV32ZVE32F-NEXT: andi a0, a1, 8 10414; RV32ZVE32F-NEXT: beqz a0, .LBB92_4 10415; RV32ZVE32F-NEXT: .LBB92_12: # %cond.store5 10416; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10417; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 10418; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10419; RV32ZVE32F-NEXT: fsd fa3, 0(a0) 10420; RV32ZVE32F-NEXT: andi a0, a1, 16 10421; RV32ZVE32F-NEXT: beqz a0, .LBB92_5 10422; RV32ZVE32F-NEXT: .LBB92_13: # %cond.store7 10423; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10424; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 10425; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10426; RV32ZVE32F-NEXT: fsd fa4, 0(a0) 10427; RV32ZVE32F-NEXT: andi a0, a1, 32 10428; RV32ZVE32F-NEXT: beqz a0, .LBB92_6 10429; RV32ZVE32F-NEXT: .LBB92_14: # %cond.store9 10430; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10431; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 10432; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10433; RV32ZVE32F-NEXT: fsd fa5, 0(a0) 10434; RV32ZVE32F-NEXT: andi a0, a1, 64 10435; RV32ZVE32F-NEXT: beqz a0, .LBB92_7 10436; RV32ZVE32F-NEXT: .LBB92_15: # %cond.store11 10437; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10438; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 10439; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10440; RV32ZVE32F-NEXT: fsd fa6, 0(a0) 10441; RV32ZVE32F-NEXT: andi a0, a1, -128 10442; RV32ZVE32F-NEXT: beqz a0, .LBB92_8 10443; RV32ZVE32F-NEXT: .LBB92_16: # %cond.store13 10444; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10445; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 10446; RV32ZVE32F-NEXT: vmv.x.s a0, v8 10447; RV32ZVE32F-NEXT: fsd fa7, 0(a0) 10448; RV32ZVE32F-NEXT: ret 10449; 10450; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f64: 10451; RV64ZVE32F: # %bb.0: 10452; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10453; RV64ZVE32F-NEXT: vmv.x.s a1, v0 10454; RV64ZVE32F-NEXT: andi a2, a1, 1 10455; RV64ZVE32F-NEXT: beqz a2, .LBB92_2 10456; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 10457; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10458; RV64ZVE32F-NEXT: slli a2, a2, 3 10459; RV64ZVE32F-NEXT: add a2, a0, a2 10460; RV64ZVE32F-NEXT: fsd fa0, 0(a2) 10461; RV64ZVE32F-NEXT: .LBB92_2: # %else 10462; RV64ZVE32F-NEXT: andi a2, a1, 2 10463; RV64ZVE32F-NEXT: beqz a2, .LBB92_4 10464; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 10465; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 10466; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 10467; RV64ZVE32F-NEXT: vmv.x.s a2, v9 10468; RV64ZVE32F-NEXT: slli a2, a2, 3 10469; RV64ZVE32F-NEXT: add a2, a0, a2 10470; RV64ZVE32F-NEXT: fsd fa1, 0(a2) 10471; RV64ZVE32F-NEXT: .LBB92_4: # %else2 10472; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 10473; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 10474; RV64ZVE32F-NEXT: andi a2, a1, 4 10475; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 10476; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 10477; RV64ZVE32F-NEXT: bnez a2, .LBB92_12 10478; RV64ZVE32F-NEXT: # %bb.5: # %else4 10479; RV64ZVE32F-NEXT: andi a2, a1, 8 10480; RV64ZVE32F-NEXT: bnez a2, .LBB92_13 10481; RV64ZVE32F-NEXT: .LBB92_6: # %else6 10482; RV64ZVE32F-NEXT: andi a2, a1, 16 10483; RV64ZVE32F-NEXT: bnez a2, .LBB92_14 10484; RV64ZVE32F-NEXT: .LBB92_7: # %else8 10485; RV64ZVE32F-NEXT: andi a2, a1, 32 10486; RV64ZVE32F-NEXT: beqz a2, .LBB92_9 10487; RV64ZVE32F-NEXT: .LBB92_8: # %cond.store9 10488; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 10489; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10490; RV64ZVE32F-NEXT: slli a2, a2, 3 10491; RV64ZVE32F-NEXT: add a2, a0, a2 10492; RV64ZVE32F-NEXT: fsd fa5, 0(a2) 10493; RV64ZVE32F-NEXT: .LBB92_9: # %else10 10494; RV64ZVE32F-NEXT: andi a2, a1, 64 10495; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 10496; RV64ZVE32F-NEXT: bnez a2, .LBB92_15 10497; RV64ZVE32F-NEXT: # %bb.10: # %else12 10498; RV64ZVE32F-NEXT: andi a1, a1, -128 10499; RV64ZVE32F-NEXT: bnez a1, .LBB92_16 10500; RV64ZVE32F-NEXT: .LBB92_11: # %else14 10501; RV64ZVE32F-NEXT: ret 10502; RV64ZVE32F-NEXT: .LBB92_12: # %cond.store3 10503; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10504; RV64ZVE32F-NEXT: slli a2, a2, 3 10505; RV64ZVE32F-NEXT: add a2, a0, a2 10506; RV64ZVE32F-NEXT: fsd fa2, 0(a2) 10507; RV64ZVE32F-NEXT: andi a2, a1, 8 10508; RV64ZVE32F-NEXT: beqz a2, .LBB92_6 10509; RV64ZVE32F-NEXT: .LBB92_13: # %cond.store5 10510; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10511; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10512; RV64ZVE32F-NEXT: slli a2, a2, 3 10513; RV64ZVE32F-NEXT: add a2, a0, a2 10514; RV64ZVE32F-NEXT: fsd fa3, 0(a2) 10515; RV64ZVE32F-NEXT: andi a2, a1, 16 10516; RV64ZVE32F-NEXT: beqz a2, .LBB92_7 10517; RV64ZVE32F-NEXT: .LBB92_14: # %cond.store7 10518; RV64ZVE32F-NEXT: vmv.x.s a2, v9 10519; RV64ZVE32F-NEXT: slli a2, a2, 3 10520; RV64ZVE32F-NEXT: add a2, a0, a2 10521; RV64ZVE32F-NEXT: fsd fa4, 0(a2) 10522; RV64ZVE32F-NEXT: andi a2, a1, 32 10523; RV64ZVE32F-NEXT: bnez a2, .LBB92_8 10524; RV64ZVE32F-NEXT: j .LBB92_9 10525; RV64ZVE32F-NEXT: .LBB92_15: # %cond.store11 10526; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10527; RV64ZVE32F-NEXT: slli a2, a2, 3 10528; RV64ZVE32F-NEXT: add a2, a0, a2 10529; RV64ZVE32F-NEXT: fsd fa6, 0(a2) 10530; RV64ZVE32F-NEXT: andi a1, a1, -128 10531; RV64ZVE32F-NEXT: beqz a1, .LBB92_11 10532; RV64ZVE32F-NEXT: .LBB92_16: # %cond.store13 10533; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10534; RV64ZVE32F-NEXT: vmv.x.s a1, v8 10535; RV64ZVE32F-NEXT: slli a1, a1, 3 10536; RV64ZVE32F-NEXT: add a0, a0, a1 10537; RV64ZVE32F-NEXT: fsd fa7, 0(a0) 10538; RV64ZVE32F-NEXT: ret 10539 %eidxs = sext <8 x i8> %idxs to <8 x i64> 10540 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 10541 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 10542 ret void 10543} 10544 10545define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) { 10546; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8f64: 10547; RV32V: # %bb.0: 10548; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 10549; RV32V-NEXT: vzext.vf2 v13, v12 10550; RV32V-NEXT: vsll.vi v12, v13, 3 10551; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 10552; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t 10553; RV32V-NEXT: ret 10554; 10555; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8f64: 10556; RV64V: # %bb.0: 10557; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 10558; RV64V-NEXT: vzext.vf2 v13, v12 10559; RV64V-NEXT: vsll.vi v12, v13, 3 10560; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 10561; RV64V-NEXT: vsoxei16.v v8, (a0), v12, v0.t 10562; RV64V-NEXT: ret 10563; 10564; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f64: 10565; RV32ZVE32F: # %bb.0: 10566; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 10567; RV32ZVE32F-NEXT: vzext.vf4 v10, v8 10568; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 10569; RV32ZVE32F-NEXT: vmv.x.s a1, v0 10570; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 10571; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 10572; RV32ZVE32F-NEXT: andi a2, a1, 1 10573; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 10574; RV32ZVE32F-NEXT: bnez a2, .LBB93_9 10575; RV32ZVE32F-NEXT: # %bb.1: # %else 10576; RV32ZVE32F-NEXT: andi a0, a1, 2 10577; RV32ZVE32F-NEXT: bnez a0, .LBB93_10 10578; RV32ZVE32F-NEXT: .LBB93_2: # %else2 10579; RV32ZVE32F-NEXT: andi a0, a1, 4 10580; RV32ZVE32F-NEXT: bnez a0, .LBB93_11 10581; RV32ZVE32F-NEXT: .LBB93_3: # %else4 10582; RV32ZVE32F-NEXT: andi a0, a1, 8 10583; RV32ZVE32F-NEXT: bnez a0, .LBB93_12 10584; RV32ZVE32F-NEXT: .LBB93_4: # %else6 10585; RV32ZVE32F-NEXT: andi a0, a1, 16 10586; RV32ZVE32F-NEXT: bnez a0, .LBB93_13 10587; RV32ZVE32F-NEXT: .LBB93_5: # %else8 10588; RV32ZVE32F-NEXT: andi a0, a1, 32 10589; RV32ZVE32F-NEXT: bnez a0, .LBB93_14 10590; RV32ZVE32F-NEXT: .LBB93_6: # %else10 10591; RV32ZVE32F-NEXT: andi a0, a1, 64 10592; RV32ZVE32F-NEXT: bnez a0, .LBB93_15 10593; RV32ZVE32F-NEXT: .LBB93_7: # %else12 10594; RV32ZVE32F-NEXT: andi a0, a1, -128 10595; RV32ZVE32F-NEXT: bnez a0, .LBB93_16 10596; RV32ZVE32F-NEXT: .LBB93_8: # %else14 10597; RV32ZVE32F-NEXT: ret 10598; RV32ZVE32F-NEXT: .LBB93_9: # %cond.store 10599; RV32ZVE32F-NEXT: vmv.x.s a0, v8 10600; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 10601; RV32ZVE32F-NEXT: andi a0, a1, 2 10602; RV32ZVE32F-NEXT: beqz a0, .LBB93_2 10603; RV32ZVE32F-NEXT: .LBB93_10: # %cond.store1 10604; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10605; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 10606; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10607; RV32ZVE32F-NEXT: fsd fa1, 0(a0) 10608; RV32ZVE32F-NEXT: andi a0, a1, 4 10609; RV32ZVE32F-NEXT: beqz a0, .LBB93_3 10610; RV32ZVE32F-NEXT: .LBB93_11: # %cond.store3 10611; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10612; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 10613; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10614; RV32ZVE32F-NEXT: fsd fa2, 0(a0) 10615; RV32ZVE32F-NEXT: andi a0, a1, 8 10616; RV32ZVE32F-NEXT: beqz a0, .LBB93_4 10617; RV32ZVE32F-NEXT: .LBB93_12: # %cond.store5 10618; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10619; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 10620; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10621; RV32ZVE32F-NEXT: fsd fa3, 0(a0) 10622; RV32ZVE32F-NEXT: andi a0, a1, 16 10623; RV32ZVE32F-NEXT: beqz a0, .LBB93_5 10624; RV32ZVE32F-NEXT: .LBB93_13: # %cond.store7 10625; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10626; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 10627; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10628; RV32ZVE32F-NEXT: fsd fa4, 0(a0) 10629; RV32ZVE32F-NEXT: andi a0, a1, 32 10630; RV32ZVE32F-NEXT: beqz a0, .LBB93_6 10631; RV32ZVE32F-NEXT: .LBB93_14: # %cond.store9 10632; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10633; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 10634; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10635; RV32ZVE32F-NEXT: fsd fa5, 0(a0) 10636; RV32ZVE32F-NEXT: andi a0, a1, 64 10637; RV32ZVE32F-NEXT: beqz a0, .LBB93_7 10638; RV32ZVE32F-NEXT: .LBB93_15: # %cond.store11 10639; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10640; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 10641; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10642; RV32ZVE32F-NEXT: fsd fa6, 0(a0) 10643; RV32ZVE32F-NEXT: andi a0, a1, -128 10644; RV32ZVE32F-NEXT: beqz a0, .LBB93_8 10645; RV32ZVE32F-NEXT: .LBB93_16: # %cond.store13 10646; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10647; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 10648; RV32ZVE32F-NEXT: vmv.x.s a0, v8 10649; RV32ZVE32F-NEXT: fsd fa7, 0(a0) 10650; RV32ZVE32F-NEXT: ret 10651; 10652; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f64: 10653; RV64ZVE32F: # %bb.0: 10654; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10655; RV64ZVE32F-NEXT: vmv.x.s a1, v0 10656; RV64ZVE32F-NEXT: andi a2, a1, 1 10657; RV64ZVE32F-NEXT: beqz a2, .LBB93_2 10658; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 10659; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10660; RV64ZVE32F-NEXT: andi a2, a2, 255 10661; RV64ZVE32F-NEXT: slli a2, a2, 3 10662; RV64ZVE32F-NEXT: add a2, a0, a2 10663; RV64ZVE32F-NEXT: fsd fa0, 0(a2) 10664; RV64ZVE32F-NEXT: .LBB93_2: # %else 10665; RV64ZVE32F-NEXT: andi a2, a1, 2 10666; RV64ZVE32F-NEXT: beqz a2, .LBB93_4 10667; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 10668; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 10669; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 10670; RV64ZVE32F-NEXT: vmv.x.s a2, v9 10671; RV64ZVE32F-NEXT: andi a2, a2, 255 10672; RV64ZVE32F-NEXT: slli a2, a2, 3 10673; RV64ZVE32F-NEXT: add a2, a0, a2 10674; RV64ZVE32F-NEXT: fsd fa1, 0(a2) 10675; RV64ZVE32F-NEXT: .LBB93_4: # %else2 10676; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 10677; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 10678; RV64ZVE32F-NEXT: andi a2, a1, 4 10679; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 10680; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 10681; RV64ZVE32F-NEXT: bnez a2, .LBB93_12 10682; RV64ZVE32F-NEXT: # %bb.5: # %else4 10683; RV64ZVE32F-NEXT: andi a2, a1, 8 10684; RV64ZVE32F-NEXT: bnez a2, .LBB93_13 10685; RV64ZVE32F-NEXT: .LBB93_6: # %else6 10686; RV64ZVE32F-NEXT: andi a2, a1, 16 10687; RV64ZVE32F-NEXT: bnez a2, .LBB93_14 10688; RV64ZVE32F-NEXT: .LBB93_7: # %else8 10689; RV64ZVE32F-NEXT: andi a2, a1, 32 10690; RV64ZVE32F-NEXT: beqz a2, .LBB93_9 10691; RV64ZVE32F-NEXT: .LBB93_8: # %cond.store9 10692; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 10693; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10694; RV64ZVE32F-NEXT: andi a2, a2, 255 10695; RV64ZVE32F-NEXT: slli a2, a2, 3 10696; RV64ZVE32F-NEXT: add a2, a0, a2 10697; RV64ZVE32F-NEXT: fsd fa5, 0(a2) 10698; RV64ZVE32F-NEXT: .LBB93_9: # %else10 10699; RV64ZVE32F-NEXT: andi a2, a1, 64 10700; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 10701; RV64ZVE32F-NEXT: bnez a2, .LBB93_15 10702; RV64ZVE32F-NEXT: # %bb.10: # %else12 10703; RV64ZVE32F-NEXT: andi a1, a1, -128 10704; RV64ZVE32F-NEXT: bnez a1, .LBB93_16 10705; RV64ZVE32F-NEXT: .LBB93_11: # %else14 10706; RV64ZVE32F-NEXT: ret 10707; RV64ZVE32F-NEXT: .LBB93_12: # %cond.store3 10708; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10709; RV64ZVE32F-NEXT: andi a2, a2, 255 10710; RV64ZVE32F-NEXT: slli a2, a2, 3 10711; RV64ZVE32F-NEXT: add a2, a0, a2 10712; RV64ZVE32F-NEXT: fsd fa2, 0(a2) 10713; RV64ZVE32F-NEXT: andi a2, a1, 8 10714; RV64ZVE32F-NEXT: beqz a2, .LBB93_6 10715; RV64ZVE32F-NEXT: .LBB93_13: # %cond.store5 10716; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10717; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10718; RV64ZVE32F-NEXT: andi a2, a2, 255 10719; RV64ZVE32F-NEXT: slli a2, a2, 3 10720; RV64ZVE32F-NEXT: add a2, a0, a2 10721; RV64ZVE32F-NEXT: fsd fa3, 0(a2) 10722; RV64ZVE32F-NEXT: andi a2, a1, 16 10723; RV64ZVE32F-NEXT: beqz a2, .LBB93_7 10724; RV64ZVE32F-NEXT: .LBB93_14: # %cond.store7 10725; RV64ZVE32F-NEXT: vmv.x.s a2, v9 10726; RV64ZVE32F-NEXT: andi a2, a2, 255 10727; RV64ZVE32F-NEXT: slli a2, a2, 3 10728; RV64ZVE32F-NEXT: add a2, a0, a2 10729; RV64ZVE32F-NEXT: fsd fa4, 0(a2) 10730; RV64ZVE32F-NEXT: andi a2, a1, 32 10731; RV64ZVE32F-NEXT: bnez a2, .LBB93_8 10732; RV64ZVE32F-NEXT: j .LBB93_9 10733; RV64ZVE32F-NEXT: .LBB93_15: # %cond.store11 10734; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10735; RV64ZVE32F-NEXT: andi a2, a2, 255 10736; RV64ZVE32F-NEXT: slli a2, a2, 3 10737; RV64ZVE32F-NEXT: add a2, a0, a2 10738; RV64ZVE32F-NEXT: fsd fa6, 0(a2) 10739; RV64ZVE32F-NEXT: andi a1, a1, -128 10740; RV64ZVE32F-NEXT: beqz a1, .LBB93_11 10741; RV64ZVE32F-NEXT: .LBB93_16: # %cond.store13 10742; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10743; RV64ZVE32F-NEXT: vmv.x.s a1, v8 10744; RV64ZVE32F-NEXT: andi a1, a1, 255 10745; RV64ZVE32F-NEXT: slli a1, a1, 3 10746; RV64ZVE32F-NEXT: add a0, a0, a1 10747; RV64ZVE32F-NEXT: fsd fa7, 0(a0) 10748; RV64ZVE32F-NEXT: ret 10749 %eidxs = zext <8 x i8> %idxs to <8 x i64> 10750 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 10751 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 10752 ret void 10753} 10754 10755define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { 10756; RV32V-LABEL: mscatter_baseidx_v8i16_v8f64: 10757; RV32V: # %bb.0: 10758; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 10759; RV32V-NEXT: vsext.vf2 v14, v12 10760; RV32V-NEXT: vsll.vi v12, v14, 3 10761; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 10762; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 10763; RV32V-NEXT: ret 10764; 10765; RV64V-LABEL: mscatter_baseidx_v8i16_v8f64: 10766; RV64V: # %bb.0: 10767; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 10768; RV64V-NEXT: vsext.vf4 v16, v12 10769; RV64V-NEXT: vsll.vi v12, v16, 3 10770; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 10771; RV64V-NEXT: ret 10772; 10773; RV32ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f64: 10774; RV32ZVE32F: # %bb.0: 10775; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 10776; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 10777; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 10778; RV32ZVE32F-NEXT: vmv.x.s a1, v0 10779; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 10780; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 10781; RV32ZVE32F-NEXT: andi a2, a1, 1 10782; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 10783; RV32ZVE32F-NEXT: bnez a2, .LBB94_9 10784; RV32ZVE32F-NEXT: # %bb.1: # %else 10785; RV32ZVE32F-NEXT: andi a0, a1, 2 10786; RV32ZVE32F-NEXT: bnez a0, .LBB94_10 10787; RV32ZVE32F-NEXT: .LBB94_2: # %else2 10788; RV32ZVE32F-NEXT: andi a0, a1, 4 10789; RV32ZVE32F-NEXT: bnez a0, .LBB94_11 10790; RV32ZVE32F-NEXT: .LBB94_3: # %else4 10791; RV32ZVE32F-NEXT: andi a0, a1, 8 10792; RV32ZVE32F-NEXT: bnez a0, .LBB94_12 10793; RV32ZVE32F-NEXT: .LBB94_4: # %else6 10794; RV32ZVE32F-NEXT: andi a0, a1, 16 10795; RV32ZVE32F-NEXT: bnez a0, .LBB94_13 10796; RV32ZVE32F-NEXT: .LBB94_5: # %else8 10797; RV32ZVE32F-NEXT: andi a0, a1, 32 10798; RV32ZVE32F-NEXT: bnez a0, .LBB94_14 10799; RV32ZVE32F-NEXT: .LBB94_6: # %else10 10800; RV32ZVE32F-NEXT: andi a0, a1, 64 10801; RV32ZVE32F-NEXT: bnez a0, .LBB94_15 10802; RV32ZVE32F-NEXT: .LBB94_7: # %else12 10803; RV32ZVE32F-NEXT: andi a0, a1, -128 10804; RV32ZVE32F-NEXT: bnez a0, .LBB94_16 10805; RV32ZVE32F-NEXT: .LBB94_8: # %else14 10806; RV32ZVE32F-NEXT: ret 10807; RV32ZVE32F-NEXT: .LBB94_9: # %cond.store 10808; RV32ZVE32F-NEXT: vmv.x.s a0, v8 10809; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 10810; RV32ZVE32F-NEXT: andi a0, a1, 2 10811; RV32ZVE32F-NEXT: beqz a0, .LBB94_2 10812; RV32ZVE32F-NEXT: .LBB94_10: # %cond.store1 10813; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10814; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 10815; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10816; RV32ZVE32F-NEXT: fsd fa1, 0(a0) 10817; RV32ZVE32F-NEXT: andi a0, a1, 4 10818; RV32ZVE32F-NEXT: beqz a0, .LBB94_3 10819; RV32ZVE32F-NEXT: .LBB94_11: # %cond.store3 10820; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10821; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 10822; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10823; RV32ZVE32F-NEXT: fsd fa2, 0(a0) 10824; RV32ZVE32F-NEXT: andi a0, a1, 8 10825; RV32ZVE32F-NEXT: beqz a0, .LBB94_4 10826; RV32ZVE32F-NEXT: .LBB94_12: # %cond.store5 10827; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10828; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 10829; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10830; RV32ZVE32F-NEXT: fsd fa3, 0(a0) 10831; RV32ZVE32F-NEXT: andi a0, a1, 16 10832; RV32ZVE32F-NEXT: beqz a0, .LBB94_5 10833; RV32ZVE32F-NEXT: .LBB94_13: # %cond.store7 10834; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10835; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 10836; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10837; RV32ZVE32F-NEXT: fsd fa4, 0(a0) 10838; RV32ZVE32F-NEXT: andi a0, a1, 32 10839; RV32ZVE32F-NEXT: beqz a0, .LBB94_6 10840; RV32ZVE32F-NEXT: .LBB94_14: # %cond.store9 10841; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10842; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 10843; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10844; RV32ZVE32F-NEXT: fsd fa5, 0(a0) 10845; RV32ZVE32F-NEXT: andi a0, a1, 64 10846; RV32ZVE32F-NEXT: beqz a0, .LBB94_7 10847; RV32ZVE32F-NEXT: .LBB94_15: # %cond.store11 10848; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10849; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 10850; RV32ZVE32F-NEXT: vmv.x.s a0, v10 10851; RV32ZVE32F-NEXT: fsd fa6, 0(a0) 10852; RV32ZVE32F-NEXT: andi a0, a1, -128 10853; RV32ZVE32F-NEXT: beqz a0, .LBB94_8 10854; RV32ZVE32F-NEXT: .LBB94_16: # %cond.store13 10855; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 10856; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 10857; RV32ZVE32F-NEXT: vmv.x.s a0, v8 10858; RV32ZVE32F-NEXT: fsd fa7, 0(a0) 10859; RV32ZVE32F-NEXT: ret 10860; 10861; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f64: 10862; RV64ZVE32F: # %bb.0: 10863; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10864; RV64ZVE32F-NEXT: vmv.x.s a1, v0 10865; RV64ZVE32F-NEXT: andi a2, a1, 1 10866; RV64ZVE32F-NEXT: beqz a2, .LBB94_2 10867; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 10868; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 10869; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10870; RV64ZVE32F-NEXT: slli a2, a2, 3 10871; RV64ZVE32F-NEXT: add a2, a0, a2 10872; RV64ZVE32F-NEXT: fsd fa0, 0(a2) 10873; RV64ZVE32F-NEXT: .LBB94_2: # %else 10874; RV64ZVE32F-NEXT: andi a2, a1, 2 10875; RV64ZVE32F-NEXT: beqz a2, .LBB94_4 10876; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 10877; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 10878; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 10879; RV64ZVE32F-NEXT: vmv.x.s a2, v9 10880; RV64ZVE32F-NEXT: slli a2, a2, 3 10881; RV64ZVE32F-NEXT: add a2, a0, a2 10882; RV64ZVE32F-NEXT: fsd fa1, 0(a2) 10883; RV64ZVE32F-NEXT: .LBB94_4: # %else2 10884; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 10885; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 10886; RV64ZVE32F-NEXT: andi a2, a1, 4 10887; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 10888; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 10889; RV64ZVE32F-NEXT: bnez a2, .LBB94_12 10890; RV64ZVE32F-NEXT: # %bb.5: # %else4 10891; RV64ZVE32F-NEXT: andi a2, a1, 8 10892; RV64ZVE32F-NEXT: bnez a2, .LBB94_13 10893; RV64ZVE32F-NEXT: .LBB94_6: # %else6 10894; RV64ZVE32F-NEXT: andi a2, a1, 16 10895; RV64ZVE32F-NEXT: bnez a2, .LBB94_14 10896; RV64ZVE32F-NEXT: .LBB94_7: # %else8 10897; RV64ZVE32F-NEXT: andi a2, a1, 32 10898; RV64ZVE32F-NEXT: beqz a2, .LBB94_9 10899; RV64ZVE32F-NEXT: .LBB94_8: # %cond.store9 10900; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 10901; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10902; RV64ZVE32F-NEXT: slli a2, a2, 3 10903; RV64ZVE32F-NEXT: add a2, a0, a2 10904; RV64ZVE32F-NEXT: fsd fa5, 0(a2) 10905; RV64ZVE32F-NEXT: .LBB94_9: # %else10 10906; RV64ZVE32F-NEXT: andi a2, a1, 64 10907; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 10908; RV64ZVE32F-NEXT: bnez a2, .LBB94_15 10909; RV64ZVE32F-NEXT: # %bb.10: # %else12 10910; RV64ZVE32F-NEXT: andi a1, a1, -128 10911; RV64ZVE32F-NEXT: bnez a1, .LBB94_16 10912; RV64ZVE32F-NEXT: .LBB94_11: # %else14 10913; RV64ZVE32F-NEXT: ret 10914; RV64ZVE32F-NEXT: .LBB94_12: # %cond.store3 10915; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10916; RV64ZVE32F-NEXT: slli a2, a2, 3 10917; RV64ZVE32F-NEXT: add a2, a0, a2 10918; RV64ZVE32F-NEXT: fsd fa2, 0(a2) 10919; RV64ZVE32F-NEXT: andi a2, a1, 8 10920; RV64ZVE32F-NEXT: beqz a2, .LBB94_6 10921; RV64ZVE32F-NEXT: .LBB94_13: # %cond.store5 10922; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10923; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10924; RV64ZVE32F-NEXT: slli a2, a2, 3 10925; RV64ZVE32F-NEXT: add a2, a0, a2 10926; RV64ZVE32F-NEXT: fsd fa3, 0(a2) 10927; RV64ZVE32F-NEXT: andi a2, a1, 16 10928; RV64ZVE32F-NEXT: beqz a2, .LBB94_7 10929; RV64ZVE32F-NEXT: .LBB94_14: # %cond.store7 10930; RV64ZVE32F-NEXT: vmv.x.s a2, v9 10931; RV64ZVE32F-NEXT: slli a2, a2, 3 10932; RV64ZVE32F-NEXT: add a2, a0, a2 10933; RV64ZVE32F-NEXT: fsd fa4, 0(a2) 10934; RV64ZVE32F-NEXT: andi a2, a1, 32 10935; RV64ZVE32F-NEXT: bnez a2, .LBB94_8 10936; RV64ZVE32F-NEXT: j .LBB94_9 10937; RV64ZVE32F-NEXT: .LBB94_15: # %cond.store11 10938; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10939; RV64ZVE32F-NEXT: slli a2, a2, 3 10940; RV64ZVE32F-NEXT: add a2, a0, a2 10941; RV64ZVE32F-NEXT: fsd fa6, 0(a2) 10942; RV64ZVE32F-NEXT: andi a1, a1, -128 10943; RV64ZVE32F-NEXT: beqz a1, .LBB94_11 10944; RV64ZVE32F-NEXT: .LBB94_16: # %cond.store13 10945; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10946; RV64ZVE32F-NEXT: vmv.x.s a1, v8 10947; RV64ZVE32F-NEXT: slli a1, a1, 3 10948; RV64ZVE32F-NEXT: add a0, a0, a1 10949; RV64ZVE32F-NEXT: fsd fa7, 0(a0) 10950; RV64ZVE32F-NEXT: ret 10951 %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs 10952 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 10953 ret void 10954} 10955 10956define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { 10957; RV32V-LABEL: mscatter_baseidx_sext_v8i16_v8f64: 10958; RV32V: # %bb.0: 10959; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 10960; RV32V-NEXT: vsext.vf2 v14, v12 10961; RV32V-NEXT: vsll.vi v12, v14, 3 10962; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 10963; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 10964; RV32V-NEXT: ret 10965; 10966; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8f64: 10967; RV64V: # %bb.0: 10968; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 10969; RV64V-NEXT: vsext.vf4 v16, v12 10970; RV64V-NEXT: vsll.vi v12, v16, 3 10971; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 10972; RV64V-NEXT: ret 10973; 10974; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f64: 10975; RV32ZVE32F: # %bb.0: 10976; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 10977; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 10978; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 10979; RV32ZVE32F-NEXT: vmv.x.s a1, v0 10980; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 10981; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 10982; RV32ZVE32F-NEXT: andi a2, a1, 1 10983; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 10984; RV32ZVE32F-NEXT: bnez a2, .LBB95_9 10985; RV32ZVE32F-NEXT: # %bb.1: # %else 10986; RV32ZVE32F-NEXT: andi a0, a1, 2 10987; RV32ZVE32F-NEXT: bnez a0, .LBB95_10 10988; RV32ZVE32F-NEXT: .LBB95_2: # %else2 10989; RV32ZVE32F-NEXT: andi a0, a1, 4 10990; RV32ZVE32F-NEXT: bnez a0, .LBB95_11 10991; RV32ZVE32F-NEXT: .LBB95_3: # %else4 10992; RV32ZVE32F-NEXT: andi a0, a1, 8 10993; RV32ZVE32F-NEXT: bnez a0, .LBB95_12 10994; RV32ZVE32F-NEXT: .LBB95_4: # %else6 10995; RV32ZVE32F-NEXT: andi a0, a1, 16 10996; RV32ZVE32F-NEXT: bnez a0, .LBB95_13 10997; RV32ZVE32F-NEXT: .LBB95_5: # %else8 10998; RV32ZVE32F-NEXT: andi a0, a1, 32 10999; RV32ZVE32F-NEXT: bnez a0, .LBB95_14 11000; RV32ZVE32F-NEXT: .LBB95_6: # %else10 11001; RV32ZVE32F-NEXT: andi a0, a1, 64 11002; RV32ZVE32F-NEXT: bnez a0, .LBB95_15 11003; RV32ZVE32F-NEXT: .LBB95_7: # %else12 11004; RV32ZVE32F-NEXT: andi a0, a1, -128 11005; RV32ZVE32F-NEXT: bnez a0, .LBB95_16 11006; RV32ZVE32F-NEXT: .LBB95_8: # %else14 11007; RV32ZVE32F-NEXT: ret 11008; RV32ZVE32F-NEXT: .LBB95_9: # %cond.store 11009; RV32ZVE32F-NEXT: vmv.x.s a0, v8 11010; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 11011; RV32ZVE32F-NEXT: andi a0, a1, 2 11012; RV32ZVE32F-NEXT: beqz a0, .LBB95_2 11013; RV32ZVE32F-NEXT: .LBB95_10: # %cond.store1 11014; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11015; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 11016; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11017; RV32ZVE32F-NEXT: fsd fa1, 0(a0) 11018; RV32ZVE32F-NEXT: andi a0, a1, 4 11019; RV32ZVE32F-NEXT: beqz a0, .LBB95_3 11020; RV32ZVE32F-NEXT: .LBB95_11: # %cond.store3 11021; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11022; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 11023; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11024; RV32ZVE32F-NEXT: fsd fa2, 0(a0) 11025; RV32ZVE32F-NEXT: andi a0, a1, 8 11026; RV32ZVE32F-NEXT: beqz a0, .LBB95_4 11027; RV32ZVE32F-NEXT: .LBB95_12: # %cond.store5 11028; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11029; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 11030; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11031; RV32ZVE32F-NEXT: fsd fa3, 0(a0) 11032; RV32ZVE32F-NEXT: andi a0, a1, 16 11033; RV32ZVE32F-NEXT: beqz a0, .LBB95_5 11034; RV32ZVE32F-NEXT: .LBB95_13: # %cond.store7 11035; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11036; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 11037; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11038; RV32ZVE32F-NEXT: fsd fa4, 0(a0) 11039; RV32ZVE32F-NEXT: andi a0, a1, 32 11040; RV32ZVE32F-NEXT: beqz a0, .LBB95_6 11041; RV32ZVE32F-NEXT: .LBB95_14: # %cond.store9 11042; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11043; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 11044; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11045; RV32ZVE32F-NEXT: fsd fa5, 0(a0) 11046; RV32ZVE32F-NEXT: andi a0, a1, 64 11047; RV32ZVE32F-NEXT: beqz a0, .LBB95_7 11048; RV32ZVE32F-NEXT: .LBB95_15: # %cond.store11 11049; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11050; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 11051; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11052; RV32ZVE32F-NEXT: fsd fa6, 0(a0) 11053; RV32ZVE32F-NEXT: andi a0, a1, -128 11054; RV32ZVE32F-NEXT: beqz a0, .LBB95_8 11055; RV32ZVE32F-NEXT: .LBB95_16: # %cond.store13 11056; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11057; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 11058; RV32ZVE32F-NEXT: vmv.x.s a0, v8 11059; RV32ZVE32F-NEXT: fsd fa7, 0(a0) 11060; RV32ZVE32F-NEXT: ret 11061; 11062; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f64: 11063; RV64ZVE32F: # %bb.0: 11064; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 11065; RV64ZVE32F-NEXT: vmv.x.s a1, v0 11066; RV64ZVE32F-NEXT: andi a2, a1, 1 11067; RV64ZVE32F-NEXT: beqz a2, .LBB95_2 11068; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 11069; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 11070; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11071; RV64ZVE32F-NEXT: slli a2, a2, 3 11072; RV64ZVE32F-NEXT: add a2, a0, a2 11073; RV64ZVE32F-NEXT: fsd fa0, 0(a2) 11074; RV64ZVE32F-NEXT: .LBB95_2: # %else 11075; RV64ZVE32F-NEXT: andi a2, a1, 2 11076; RV64ZVE32F-NEXT: beqz a2, .LBB95_4 11077; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 11078; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 11079; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 11080; RV64ZVE32F-NEXT: vmv.x.s a2, v9 11081; RV64ZVE32F-NEXT: slli a2, a2, 3 11082; RV64ZVE32F-NEXT: add a2, a0, a2 11083; RV64ZVE32F-NEXT: fsd fa1, 0(a2) 11084; RV64ZVE32F-NEXT: .LBB95_4: # %else2 11085; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 11086; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 11087; RV64ZVE32F-NEXT: andi a2, a1, 4 11088; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 11089; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 11090; RV64ZVE32F-NEXT: bnez a2, .LBB95_12 11091; RV64ZVE32F-NEXT: # %bb.5: # %else4 11092; RV64ZVE32F-NEXT: andi a2, a1, 8 11093; RV64ZVE32F-NEXT: bnez a2, .LBB95_13 11094; RV64ZVE32F-NEXT: .LBB95_6: # %else6 11095; RV64ZVE32F-NEXT: andi a2, a1, 16 11096; RV64ZVE32F-NEXT: bnez a2, .LBB95_14 11097; RV64ZVE32F-NEXT: .LBB95_7: # %else8 11098; RV64ZVE32F-NEXT: andi a2, a1, 32 11099; RV64ZVE32F-NEXT: beqz a2, .LBB95_9 11100; RV64ZVE32F-NEXT: .LBB95_8: # %cond.store9 11101; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 11102; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11103; RV64ZVE32F-NEXT: slli a2, a2, 3 11104; RV64ZVE32F-NEXT: add a2, a0, a2 11105; RV64ZVE32F-NEXT: fsd fa5, 0(a2) 11106; RV64ZVE32F-NEXT: .LBB95_9: # %else10 11107; RV64ZVE32F-NEXT: andi a2, a1, 64 11108; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 11109; RV64ZVE32F-NEXT: bnez a2, .LBB95_15 11110; RV64ZVE32F-NEXT: # %bb.10: # %else12 11111; RV64ZVE32F-NEXT: andi a1, a1, -128 11112; RV64ZVE32F-NEXT: bnez a1, .LBB95_16 11113; RV64ZVE32F-NEXT: .LBB95_11: # %else14 11114; RV64ZVE32F-NEXT: ret 11115; RV64ZVE32F-NEXT: .LBB95_12: # %cond.store3 11116; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11117; RV64ZVE32F-NEXT: slli a2, a2, 3 11118; RV64ZVE32F-NEXT: add a2, a0, a2 11119; RV64ZVE32F-NEXT: fsd fa2, 0(a2) 11120; RV64ZVE32F-NEXT: andi a2, a1, 8 11121; RV64ZVE32F-NEXT: beqz a2, .LBB95_6 11122; RV64ZVE32F-NEXT: .LBB95_13: # %cond.store5 11123; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 11124; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11125; RV64ZVE32F-NEXT: slli a2, a2, 3 11126; RV64ZVE32F-NEXT: add a2, a0, a2 11127; RV64ZVE32F-NEXT: fsd fa3, 0(a2) 11128; RV64ZVE32F-NEXT: andi a2, a1, 16 11129; RV64ZVE32F-NEXT: beqz a2, .LBB95_7 11130; RV64ZVE32F-NEXT: .LBB95_14: # %cond.store7 11131; RV64ZVE32F-NEXT: vmv.x.s a2, v9 11132; RV64ZVE32F-NEXT: slli a2, a2, 3 11133; RV64ZVE32F-NEXT: add a2, a0, a2 11134; RV64ZVE32F-NEXT: fsd fa4, 0(a2) 11135; RV64ZVE32F-NEXT: andi a2, a1, 32 11136; RV64ZVE32F-NEXT: bnez a2, .LBB95_8 11137; RV64ZVE32F-NEXT: j .LBB95_9 11138; RV64ZVE32F-NEXT: .LBB95_15: # %cond.store11 11139; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11140; RV64ZVE32F-NEXT: slli a2, a2, 3 11141; RV64ZVE32F-NEXT: add a2, a0, a2 11142; RV64ZVE32F-NEXT: fsd fa6, 0(a2) 11143; RV64ZVE32F-NEXT: andi a1, a1, -128 11144; RV64ZVE32F-NEXT: beqz a1, .LBB95_11 11145; RV64ZVE32F-NEXT: .LBB95_16: # %cond.store13 11146; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 11147; RV64ZVE32F-NEXT: vmv.x.s a1, v8 11148; RV64ZVE32F-NEXT: slli a1, a1, 3 11149; RV64ZVE32F-NEXT: add a0, a0, a1 11150; RV64ZVE32F-NEXT: fsd fa7, 0(a0) 11151; RV64ZVE32F-NEXT: ret 11152 %eidxs = sext <8 x i16> %idxs to <8 x i64> 11153 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 11154 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 11155 ret void 11156} 11157 11158define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) { 11159; RV32V-LABEL: mscatter_baseidx_zext_v8i16_v8f64: 11160; RV32V: # %bb.0: 11161; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 11162; RV32V-NEXT: vzext.vf2 v14, v12 11163; RV32V-NEXT: vsll.vi v12, v14, 3 11164; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 11165; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 11166; RV32V-NEXT: ret 11167; 11168; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8f64: 11169; RV64V: # %bb.0: 11170; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 11171; RV64V-NEXT: vzext.vf2 v14, v12 11172; RV64V-NEXT: vsll.vi v12, v14, 3 11173; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 11174; RV64V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 11175; RV64V-NEXT: ret 11176; 11177; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64: 11178; RV32ZVE32F: # %bb.0: 11179; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 11180; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 11181; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 11182; RV32ZVE32F-NEXT: vmv.x.s a1, v0 11183; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 11184; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 11185; RV32ZVE32F-NEXT: andi a2, a1, 1 11186; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 11187; RV32ZVE32F-NEXT: bnez a2, .LBB96_9 11188; RV32ZVE32F-NEXT: # %bb.1: # %else 11189; RV32ZVE32F-NEXT: andi a0, a1, 2 11190; RV32ZVE32F-NEXT: bnez a0, .LBB96_10 11191; RV32ZVE32F-NEXT: .LBB96_2: # %else2 11192; RV32ZVE32F-NEXT: andi a0, a1, 4 11193; RV32ZVE32F-NEXT: bnez a0, .LBB96_11 11194; RV32ZVE32F-NEXT: .LBB96_3: # %else4 11195; RV32ZVE32F-NEXT: andi a0, a1, 8 11196; RV32ZVE32F-NEXT: bnez a0, .LBB96_12 11197; RV32ZVE32F-NEXT: .LBB96_4: # %else6 11198; RV32ZVE32F-NEXT: andi a0, a1, 16 11199; RV32ZVE32F-NEXT: bnez a0, .LBB96_13 11200; RV32ZVE32F-NEXT: .LBB96_5: # %else8 11201; RV32ZVE32F-NEXT: andi a0, a1, 32 11202; RV32ZVE32F-NEXT: bnez a0, .LBB96_14 11203; RV32ZVE32F-NEXT: .LBB96_6: # %else10 11204; RV32ZVE32F-NEXT: andi a0, a1, 64 11205; RV32ZVE32F-NEXT: bnez a0, .LBB96_15 11206; RV32ZVE32F-NEXT: .LBB96_7: # %else12 11207; RV32ZVE32F-NEXT: andi a0, a1, -128 11208; RV32ZVE32F-NEXT: bnez a0, .LBB96_16 11209; RV32ZVE32F-NEXT: .LBB96_8: # %else14 11210; RV32ZVE32F-NEXT: ret 11211; RV32ZVE32F-NEXT: .LBB96_9: # %cond.store 11212; RV32ZVE32F-NEXT: vmv.x.s a0, v8 11213; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 11214; RV32ZVE32F-NEXT: andi a0, a1, 2 11215; RV32ZVE32F-NEXT: beqz a0, .LBB96_2 11216; RV32ZVE32F-NEXT: .LBB96_10: # %cond.store1 11217; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11218; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 11219; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11220; RV32ZVE32F-NEXT: fsd fa1, 0(a0) 11221; RV32ZVE32F-NEXT: andi a0, a1, 4 11222; RV32ZVE32F-NEXT: beqz a0, .LBB96_3 11223; RV32ZVE32F-NEXT: .LBB96_11: # %cond.store3 11224; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11225; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 11226; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11227; RV32ZVE32F-NEXT: fsd fa2, 0(a0) 11228; RV32ZVE32F-NEXT: andi a0, a1, 8 11229; RV32ZVE32F-NEXT: beqz a0, .LBB96_4 11230; RV32ZVE32F-NEXT: .LBB96_12: # %cond.store5 11231; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11232; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 11233; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11234; RV32ZVE32F-NEXT: fsd fa3, 0(a0) 11235; RV32ZVE32F-NEXT: andi a0, a1, 16 11236; RV32ZVE32F-NEXT: beqz a0, .LBB96_5 11237; RV32ZVE32F-NEXT: .LBB96_13: # %cond.store7 11238; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11239; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 11240; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11241; RV32ZVE32F-NEXT: fsd fa4, 0(a0) 11242; RV32ZVE32F-NEXT: andi a0, a1, 32 11243; RV32ZVE32F-NEXT: beqz a0, .LBB96_6 11244; RV32ZVE32F-NEXT: .LBB96_14: # %cond.store9 11245; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11246; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 11247; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11248; RV32ZVE32F-NEXT: fsd fa5, 0(a0) 11249; RV32ZVE32F-NEXT: andi a0, a1, 64 11250; RV32ZVE32F-NEXT: beqz a0, .LBB96_7 11251; RV32ZVE32F-NEXT: .LBB96_15: # %cond.store11 11252; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11253; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 11254; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11255; RV32ZVE32F-NEXT: fsd fa6, 0(a0) 11256; RV32ZVE32F-NEXT: andi a0, a1, -128 11257; RV32ZVE32F-NEXT: beqz a0, .LBB96_8 11258; RV32ZVE32F-NEXT: .LBB96_16: # %cond.store13 11259; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11260; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 11261; RV32ZVE32F-NEXT: vmv.x.s a0, v8 11262; RV32ZVE32F-NEXT: fsd fa7, 0(a0) 11263; RV32ZVE32F-NEXT: ret 11264; 11265; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64: 11266; RV64ZVE32F: # %bb.0: 11267; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 11268; RV64ZVE32F-NEXT: vmv.x.s a1, v0 11269; RV64ZVE32F-NEXT: andi a2, a1, 1 11270; RV64ZVE32F-NEXT: beqz a2, .LBB96_2 11271; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 11272; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 11273; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11274; RV64ZVE32F-NEXT: slli a2, a2, 48 11275; RV64ZVE32F-NEXT: srli a2, a2, 45 11276; RV64ZVE32F-NEXT: add a2, a0, a2 11277; RV64ZVE32F-NEXT: fsd fa0, 0(a2) 11278; RV64ZVE32F-NEXT: .LBB96_2: # %else 11279; RV64ZVE32F-NEXT: andi a2, a1, 2 11280; RV64ZVE32F-NEXT: beqz a2, .LBB96_4 11281; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 11282; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 11283; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 11284; RV64ZVE32F-NEXT: vmv.x.s a2, v9 11285; RV64ZVE32F-NEXT: slli a2, a2, 48 11286; RV64ZVE32F-NEXT: srli a2, a2, 45 11287; RV64ZVE32F-NEXT: add a2, a0, a2 11288; RV64ZVE32F-NEXT: fsd fa1, 0(a2) 11289; RV64ZVE32F-NEXT: .LBB96_4: # %else2 11290; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 11291; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 11292; RV64ZVE32F-NEXT: andi a2, a1, 4 11293; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 11294; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 11295; RV64ZVE32F-NEXT: bnez a2, .LBB96_12 11296; RV64ZVE32F-NEXT: # %bb.5: # %else4 11297; RV64ZVE32F-NEXT: andi a2, a1, 8 11298; RV64ZVE32F-NEXT: bnez a2, .LBB96_13 11299; RV64ZVE32F-NEXT: .LBB96_6: # %else6 11300; RV64ZVE32F-NEXT: andi a2, a1, 16 11301; RV64ZVE32F-NEXT: bnez a2, .LBB96_14 11302; RV64ZVE32F-NEXT: .LBB96_7: # %else8 11303; RV64ZVE32F-NEXT: andi a2, a1, 32 11304; RV64ZVE32F-NEXT: beqz a2, .LBB96_9 11305; RV64ZVE32F-NEXT: .LBB96_8: # %cond.store9 11306; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 11307; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11308; RV64ZVE32F-NEXT: slli a2, a2, 48 11309; RV64ZVE32F-NEXT: srli a2, a2, 45 11310; RV64ZVE32F-NEXT: add a2, a0, a2 11311; RV64ZVE32F-NEXT: fsd fa5, 0(a2) 11312; RV64ZVE32F-NEXT: .LBB96_9: # %else10 11313; RV64ZVE32F-NEXT: andi a2, a1, 64 11314; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 11315; RV64ZVE32F-NEXT: bnez a2, .LBB96_15 11316; RV64ZVE32F-NEXT: # %bb.10: # %else12 11317; RV64ZVE32F-NEXT: andi a1, a1, -128 11318; RV64ZVE32F-NEXT: bnez a1, .LBB96_16 11319; RV64ZVE32F-NEXT: .LBB96_11: # %else14 11320; RV64ZVE32F-NEXT: ret 11321; RV64ZVE32F-NEXT: .LBB96_12: # %cond.store3 11322; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11323; RV64ZVE32F-NEXT: slli a2, a2, 48 11324; RV64ZVE32F-NEXT: srli a2, a2, 45 11325; RV64ZVE32F-NEXT: add a2, a0, a2 11326; RV64ZVE32F-NEXT: fsd fa2, 0(a2) 11327; RV64ZVE32F-NEXT: andi a2, a1, 8 11328; RV64ZVE32F-NEXT: beqz a2, .LBB96_6 11329; RV64ZVE32F-NEXT: .LBB96_13: # %cond.store5 11330; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 11331; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11332; RV64ZVE32F-NEXT: slli a2, a2, 48 11333; RV64ZVE32F-NEXT: srli a2, a2, 45 11334; RV64ZVE32F-NEXT: add a2, a0, a2 11335; RV64ZVE32F-NEXT: fsd fa3, 0(a2) 11336; RV64ZVE32F-NEXT: andi a2, a1, 16 11337; RV64ZVE32F-NEXT: beqz a2, .LBB96_7 11338; RV64ZVE32F-NEXT: .LBB96_14: # %cond.store7 11339; RV64ZVE32F-NEXT: vmv.x.s a2, v9 11340; RV64ZVE32F-NEXT: slli a2, a2, 48 11341; RV64ZVE32F-NEXT: srli a2, a2, 45 11342; RV64ZVE32F-NEXT: add a2, a0, a2 11343; RV64ZVE32F-NEXT: fsd fa4, 0(a2) 11344; RV64ZVE32F-NEXT: andi a2, a1, 32 11345; RV64ZVE32F-NEXT: bnez a2, .LBB96_8 11346; RV64ZVE32F-NEXT: j .LBB96_9 11347; RV64ZVE32F-NEXT: .LBB96_15: # %cond.store11 11348; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11349; RV64ZVE32F-NEXT: slli a2, a2, 48 11350; RV64ZVE32F-NEXT: srli a2, a2, 45 11351; RV64ZVE32F-NEXT: add a2, a0, a2 11352; RV64ZVE32F-NEXT: fsd fa6, 0(a2) 11353; RV64ZVE32F-NEXT: andi a1, a1, -128 11354; RV64ZVE32F-NEXT: beqz a1, .LBB96_11 11355; RV64ZVE32F-NEXT: .LBB96_16: # %cond.store13 11356; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 11357; RV64ZVE32F-NEXT: vmv.x.s a1, v8 11358; RV64ZVE32F-NEXT: slli a1, a1, 48 11359; RV64ZVE32F-NEXT: srli a1, a1, 45 11360; RV64ZVE32F-NEXT: add a0, a0, a1 11361; RV64ZVE32F-NEXT: fsd fa7, 0(a0) 11362; RV64ZVE32F-NEXT: ret 11363 %eidxs = zext <8 x i16> %idxs to <8 x i64> 11364 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 11365 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 11366 ret void 11367} 11368 11369define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) { 11370; RV32V-LABEL: mscatter_baseidx_v8i32_v8f64: 11371; RV32V: # %bb.0: 11372; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 11373; RV32V-NEXT: vsll.vi v12, v12, 3 11374; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 11375; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 11376; RV32V-NEXT: ret 11377; 11378; RV64V-LABEL: mscatter_baseidx_v8i32_v8f64: 11379; RV64V: # %bb.0: 11380; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 11381; RV64V-NEXT: vsext.vf2 v16, v12 11382; RV64V-NEXT: vsll.vi v12, v16, 3 11383; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 11384; RV64V-NEXT: ret 11385; 11386; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8f64: 11387; RV32ZVE32F: # %bb.0: 11388; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 11389; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 11390; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 11391; RV32ZVE32F-NEXT: vmv.x.s a1, v0 11392; RV32ZVE32F-NEXT: andi a2, a1, 1 11393; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 11394; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 11395; RV32ZVE32F-NEXT: bnez a2, .LBB97_9 11396; RV32ZVE32F-NEXT: # %bb.1: # %else 11397; RV32ZVE32F-NEXT: andi a0, a1, 2 11398; RV32ZVE32F-NEXT: bnez a0, .LBB97_10 11399; RV32ZVE32F-NEXT: .LBB97_2: # %else2 11400; RV32ZVE32F-NEXT: andi a0, a1, 4 11401; RV32ZVE32F-NEXT: bnez a0, .LBB97_11 11402; RV32ZVE32F-NEXT: .LBB97_3: # %else4 11403; RV32ZVE32F-NEXT: andi a0, a1, 8 11404; RV32ZVE32F-NEXT: bnez a0, .LBB97_12 11405; RV32ZVE32F-NEXT: .LBB97_4: # %else6 11406; RV32ZVE32F-NEXT: andi a0, a1, 16 11407; RV32ZVE32F-NEXT: bnez a0, .LBB97_13 11408; RV32ZVE32F-NEXT: .LBB97_5: # %else8 11409; RV32ZVE32F-NEXT: andi a0, a1, 32 11410; RV32ZVE32F-NEXT: bnez a0, .LBB97_14 11411; RV32ZVE32F-NEXT: .LBB97_6: # %else10 11412; RV32ZVE32F-NEXT: andi a0, a1, 64 11413; RV32ZVE32F-NEXT: bnez a0, .LBB97_15 11414; RV32ZVE32F-NEXT: .LBB97_7: # %else12 11415; RV32ZVE32F-NEXT: andi a0, a1, -128 11416; RV32ZVE32F-NEXT: bnez a0, .LBB97_16 11417; RV32ZVE32F-NEXT: .LBB97_8: # %else14 11418; RV32ZVE32F-NEXT: ret 11419; RV32ZVE32F-NEXT: .LBB97_9: # %cond.store 11420; RV32ZVE32F-NEXT: vmv.x.s a0, v8 11421; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 11422; RV32ZVE32F-NEXT: andi a0, a1, 2 11423; RV32ZVE32F-NEXT: beqz a0, .LBB97_2 11424; RV32ZVE32F-NEXT: .LBB97_10: # %cond.store1 11425; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11426; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 11427; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11428; RV32ZVE32F-NEXT: fsd fa1, 0(a0) 11429; RV32ZVE32F-NEXT: andi a0, a1, 4 11430; RV32ZVE32F-NEXT: beqz a0, .LBB97_3 11431; RV32ZVE32F-NEXT: .LBB97_11: # %cond.store3 11432; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11433; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 11434; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11435; RV32ZVE32F-NEXT: fsd fa2, 0(a0) 11436; RV32ZVE32F-NEXT: andi a0, a1, 8 11437; RV32ZVE32F-NEXT: beqz a0, .LBB97_4 11438; RV32ZVE32F-NEXT: .LBB97_12: # %cond.store5 11439; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11440; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 11441; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11442; RV32ZVE32F-NEXT: fsd fa3, 0(a0) 11443; RV32ZVE32F-NEXT: andi a0, a1, 16 11444; RV32ZVE32F-NEXT: beqz a0, .LBB97_5 11445; RV32ZVE32F-NEXT: .LBB97_13: # %cond.store7 11446; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11447; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 11448; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11449; RV32ZVE32F-NEXT: fsd fa4, 0(a0) 11450; RV32ZVE32F-NEXT: andi a0, a1, 32 11451; RV32ZVE32F-NEXT: beqz a0, .LBB97_6 11452; RV32ZVE32F-NEXT: .LBB97_14: # %cond.store9 11453; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11454; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 11455; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11456; RV32ZVE32F-NEXT: fsd fa5, 0(a0) 11457; RV32ZVE32F-NEXT: andi a0, a1, 64 11458; RV32ZVE32F-NEXT: beqz a0, .LBB97_7 11459; RV32ZVE32F-NEXT: .LBB97_15: # %cond.store11 11460; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11461; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 11462; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11463; RV32ZVE32F-NEXT: fsd fa6, 0(a0) 11464; RV32ZVE32F-NEXT: andi a0, a1, -128 11465; RV32ZVE32F-NEXT: beqz a0, .LBB97_8 11466; RV32ZVE32F-NEXT: .LBB97_16: # %cond.store13 11467; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11468; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 11469; RV32ZVE32F-NEXT: vmv.x.s a0, v8 11470; RV32ZVE32F-NEXT: fsd fa7, 0(a0) 11471; RV32ZVE32F-NEXT: ret 11472; 11473; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32_v8f64: 11474; RV64ZVE32F: # %bb.0: 11475; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 11476; RV64ZVE32F-NEXT: vmv.x.s a1, v0 11477; RV64ZVE32F-NEXT: andi a2, a1, 1 11478; RV64ZVE32F-NEXT: beqz a2, .LBB97_2 11479; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 11480; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 11481; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11482; RV64ZVE32F-NEXT: slli a2, a2, 3 11483; RV64ZVE32F-NEXT: add a2, a0, a2 11484; RV64ZVE32F-NEXT: fsd fa0, 0(a2) 11485; RV64ZVE32F-NEXT: .LBB97_2: # %else 11486; RV64ZVE32F-NEXT: andi a2, a1, 2 11487; RV64ZVE32F-NEXT: beqz a2, .LBB97_4 11488; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 11489; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11490; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 11491; RV64ZVE32F-NEXT: vmv.x.s a2, v10 11492; RV64ZVE32F-NEXT: slli a2, a2, 3 11493; RV64ZVE32F-NEXT: add a2, a0, a2 11494; RV64ZVE32F-NEXT: fsd fa1, 0(a2) 11495; RV64ZVE32F-NEXT: .LBB97_4: # %else2 11496; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma 11497; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 11498; RV64ZVE32F-NEXT: andi a2, a1, 4 11499; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 11500; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 11501; RV64ZVE32F-NEXT: bnez a2, .LBB97_12 11502; RV64ZVE32F-NEXT: # %bb.5: # %else4 11503; RV64ZVE32F-NEXT: andi a2, a1, 8 11504; RV64ZVE32F-NEXT: bnez a2, .LBB97_13 11505; RV64ZVE32F-NEXT: .LBB97_6: # %else6 11506; RV64ZVE32F-NEXT: andi a2, a1, 16 11507; RV64ZVE32F-NEXT: bnez a2, .LBB97_14 11508; RV64ZVE32F-NEXT: .LBB97_7: # %else8 11509; RV64ZVE32F-NEXT: andi a2, a1, 32 11510; RV64ZVE32F-NEXT: beqz a2, .LBB97_9 11511; RV64ZVE32F-NEXT: .LBB97_8: # %cond.store9 11512; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 11513; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11514; RV64ZVE32F-NEXT: slli a2, a2, 3 11515; RV64ZVE32F-NEXT: add a2, a0, a2 11516; RV64ZVE32F-NEXT: fsd fa5, 0(a2) 11517; RV64ZVE32F-NEXT: .LBB97_9: # %else10 11518; RV64ZVE32F-NEXT: andi a2, a1, 64 11519; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 11520; RV64ZVE32F-NEXT: bnez a2, .LBB97_15 11521; RV64ZVE32F-NEXT: # %bb.10: # %else12 11522; RV64ZVE32F-NEXT: andi a1, a1, -128 11523; RV64ZVE32F-NEXT: bnez a1, .LBB97_16 11524; RV64ZVE32F-NEXT: .LBB97_11: # %else14 11525; RV64ZVE32F-NEXT: ret 11526; RV64ZVE32F-NEXT: .LBB97_12: # %cond.store3 11527; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11528; RV64ZVE32F-NEXT: slli a2, a2, 3 11529; RV64ZVE32F-NEXT: add a2, a0, a2 11530; RV64ZVE32F-NEXT: fsd fa2, 0(a2) 11531; RV64ZVE32F-NEXT: andi a2, a1, 8 11532; RV64ZVE32F-NEXT: beqz a2, .LBB97_6 11533; RV64ZVE32F-NEXT: .LBB97_13: # %cond.store5 11534; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 11535; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11536; RV64ZVE32F-NEXT: slli a2, a2, 3 11537; RV64ZVE32F-NEXT: add a2, a0, a2 11538; RV64ZVE32F-NEXT: fsd fa3, 0(a2) 11539; RV64ZVE32F-NEXT: andi a2, a1, 16 11540; RV64ZVE32F-NEXT: beqz a2, .LBB97_7 11541; RV64ZVE32F-NEXT: .LBB97_14: # %cond.store7 11542; RV64ZVE32F-NEXT: vmv.x.s a2, v10 11543; RV64ZVE32F-NEXT: slli a2, a2, 3 11544; RV64ZVE32F-NEXT: add a2, a0, a2 11545; RV64ZVE32F-NEXT: fsd fa4, 0(a2) 11546; RV64ZVE32F-NEXT: andi a2, a1, 32 11547; RV64ZVE32F-NEXT: bnez a2, .LBB97_8 11548; RV64ZVE32F-NEXT: j .LBB97_9 11549; RV64ZVE32F-NEXT: .LBB97_15: # %cond.store11 11550; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11551; RV64ZVE32F-NEXT: slli a2, a2, 3 11552; RV64ZVE32F-NEXT: add a2, a0, a2 11553; RV64ZVE32F-NEXT: fsd fa6, 0(a2) 11554; RV64ZVE32F-NEXT: andi a1, a1, -128 11555; RV64ZVE32F-NEXT: beqz a1, .LBB97_11 11556; RV64ZVE32F-NEXT: .LBB97_16: # %cond.store13 11557; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 11558; RV64ZVE32F-NEXT: vmv.x.s a1, v8 11559; RV64ZVE32F-NEXT: slli a1, a1, 3 11560; RV64ZVE32F-NEXT: add a0, a0, a1 11561; RV64ZVE32F-NEXT: fsd fa7, 0(a0) 11562; RV64ZVE32F-NEXT: ret 11563 %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs 11564 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 11565 ret void 11566} 11567 11568define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) { 11569; RV32V-LABEL: mscatter_baseidx_sext_v8i32_v8f64: 11570; RV32V: # %bb.0: 11571; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 11572; RV32V-NEXT: vsll.vi v12, v12, 3 11573; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 11574; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 11575; RV32V-NEXT: ret 11576; 11577; RV64V-LABEL: mscatter_baseidx_sext_v8i32_v8f64: 11578; RV64V: # %bb.0: 11579; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 11580; RV64V-NEXT: vsext.vf2 v16, v12 11581; RV64V-NEXT: vsll.vi v12, v16, 3 11582; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 11583; RV64V-NEXT: ret 11584; 11585; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8f64: 11586; RV32ZVE32F: # %bb.0: 11587; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 11588; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 11589; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 11590; RV32ZVE32F-NEXT: vmv.x.s a1, v0 11591; RV32ZVE32F-NEXT: andi a2, a1, 1 11592; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 11593; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 11594; RV32ZVE32F-NEXT: bnez a2, .LBB98_9 11595; RV32ZVE32F-NEXT: # %bb.1: # %else 11596; RV32ZVE32F-NEXT: andi a0, a1, 2 11597; RV32ZVE32F-NEXT: bnez a0, .LBB98_10 11598; RV32ZVE32F-NEXT: .LBB98_2: # %else2 11599; RV32ZVE32F-NEXT: andi a0, a1, 4 11600; RV32ZVE32F-NEXT: bnez a0, .LBB98_11 11601; RV32ZVE32F-NEXT: .LBB98_3: # %else4 11602; RV32ZVE32F-NEXT: andi a0, a1, 8 11603; RV32ZVE32F-NEXT: bnez a0, .LBB98_12 11604; RV32ZVE32F-NEXT: .LBB98_4: # %else6 11605; RV32ZVE32F-NEXT: andi a0, a1, 16 11606; RV32ZVE32F-NEXT: bnez a0, .LBB98_13 11607; RV32ZVE32F-NEXT: .LBB98_5: # %else8 11608; RV32ZVE32F-NEXT: andi a0, a1, 32 11609; RV32ZVE32F-NEXT: bnez a0, .LBB98_14 11610; RV32ZVE32F-NEXT: .LBB98_6: # %else10 11611; RV32ZVE32F-NEXT: andi a0, a1, 64 11612; RV32ZVE32F-NEXT: bnez a0, .LBB98_15 11613; RV32ZVE32F-NEXT: .LBB98_7: # %else12 11614; RV32ZVE32F-NEXT: andi a0, a1, -128 11615; RV32ZVE32F-NEXT: bnez a0, .LBB98_16 11616; RV32ZVE32F-NEXT: .LBB98_8: # %else14 11617; RV32ZVE32F-NEXT: ret 11618; RV32ZVE32F-NEXT: .LBB98_9: # %cond.store 11619; RV32ZVE32F-NEXT: vmv.x.s a0, v8 11620; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 11621; RV32ZVE32F-NEXT: andi a0, a1, 2 11622; RV32ZVE32F-NEXT: beqz a0, .LBB98_2 11623; RV32ZVE32F-NEXT: .LBB98_10: # %cond.store1 11624; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11625; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 11626; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11627; RV32ZVE32F-NEXT: fsd fa1, 0(a0) 11628; RV32ZVE32F-NEXT: andi a0, a1, 4 11629; RV32ZVE32F-NEXT: beqz a0, .LBB98_3 11630; RV32ZVE32F-NEXT: .LBB98_11: # %cond.store3 11631; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11632; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 11633; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11634; RV32ZVE32F-NEXT: fsd fa2, 0(a0) 11635; RV32ZVE32F-NEXT: andi a0, a1, 8 11636; RV32ZVE32F-NEXT: beqz a0, .LBB98_4 11637; RV32ZVE32F-NEXT: .LBB98_12: # %cond.store5 11638; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11639; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 11640; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11641; RV32ZVE32F-NEXT: fsd fa3, 0(a0) 11642; RV32ZVE32F-NEXT: andi a0, a1, 16 11643; RV32ZVE32F-NEXT: beqz a0, .LBB98_5 11644; RV32ZVE32F-NEXT: .LBB98_13: # %cond.store7 11645; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11646; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 11647; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11648; RV32ZVE32F-NEXT: fsd fa4, 0(a0) 11649; RV32ZVE32F-NEXT: andi a0, a1, 32 11650; RV32ZVE32F-NEXT: beqz a0, .LBB98_6 11651; RV32ZVE32F-NEXT: .LBB98_14: # %cond.store9 11652; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11653; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 11654; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11655; RV32ZVE32F-NEXT: fsd fa5, 0(a0) 11656; RV32ZVE32F-NEXT: andi a0, a1, 64 11657; RV32ZVE32F-NEXT: beqz a0, .LBB98_7 11658; RV32ZVE32F-NEXT: .LBB98_15: # %cond.store11 11659; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11660; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 11661; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11662; RV32ZVE32F-NEXT: fsd fa6, 0(a0) 11663; RV32ZVE32F-NEXT: andi a0, a1, -128 11664; RV32ZVE32F-NEXT: beqz a0, .LBB98_8 11665; RV32ZVE32F-NEXT: .LBB98_16: # %cond.store13 11666; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11667; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 11668; RV32ZVE32F-NEXT: vmv.x.s a0, v8 11669; RV32ZVE32F-NEXT: fsd fa7, 0(a0) 11670; RV32ZVE32F-NEXT: ret 11671; 11672; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8f64: 11673; RV64ZVE32F: # %bb.0: 11674; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 11675; RV64ZVE32F-NEXT: vmv.x.s a1, v0 11676; RV64ZVE32F-NEXT: andi a2, a1, 1 11677; RV64ZVE32F-NEXT: beqz a2, .LBB98_2 11678; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 11679; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 11680; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11681; RV64ZVE32F-NEXT: slli a2, a2, 3 11682; RV64ZVE32F-NEXT: add a2, a0, a2 11683; RV64ZVE32F-NEXT: fsd fa0, 0(a2) 11684; RV64ZVE32F-NEXT: .LBB98_2: # %else 11685; RV64ZVE32F-NEXT: andi a2, a1, 2 11686; RV64ZVE32F-NEXT: beqz a2, .LBB98_4 11687; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 11688; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11689; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 11690; RV64ZVE32F-NEXT: vmv.x.s a2, v10 11691; RV64ZVE32F-NEXT: slli a2, a2, 3 11692; RV64ZVE32F-NEXT: add a2, a0, a2 11693; RV64ZVE32F-NEXT: fsd fa1, 0(a2) 11694; RV64ZVE32F-NEXT: .LBB98_4: # %else2 11695; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma 11696; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 11697; RV64ZVE32F-NEXT: andi a2, a1, 4 11698; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 11699; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 11700; RV64ZVE32F-NEXT: bnez a2, .LBB98_12 11701; RV64ZVE32F-NEXT: # %bb.5: # %else4 11702; RV64ZVE32F-NEXT: andi a2, a1, 8 11703; RV64ZVE32F-NEXT: bnez a2, .LBB98_13 11704; RV64ZVE32F-NEXT: .LBB98_6: # %else6 11705; RV64ZVE32F-NEXT: andi a2, a1, 16 11706; RV64ZVE32F-NEXT: bnez a2, .LBB98_14 11707; RV64ZVE32F-NEXT: .LBB98_7: # %else8 11708; RV64ZVE32F-NEXT: andi a2, a1, 32 11709; RV64ZVE32F-NEXT: beqz a2, .LBB98_9 11710; RV64ZVE32F-NEXT: .LBB98_8: # %cond.store9 11711; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 11712; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11713; RV64ZVE32F-NEXT: slli a2, a2, 3 11714; RV64ZVE32F-NEXT: add a2, a0, a2 11715; RV64ZVE32F-NEXT: fsd fa5, 0(a2) 11716; RV64ZVE32F-NEXT: .LBB98_9: # %else10 11717; RV64ZVE32F-NEXT: andi a2, a1, 64 11718; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 11719; RV64ZVE32F-NEXT: bnez a2, .LBB98_15 11720; RV64ZVE32F-NEXT: # %bb.10: # %else12 11721; RV64ZVE32F-NEXT: andi a1, a1, -128 11722; RV64ZVE32F-NEXT: bnez a1, .LBB98_16 11723; RV64ZVE32F-NEXT: .LBB98_11: # %else14 11724; RV64ZVE32F-NEXT: ret 11725; RV64ZVE32F-NEXT: .LBB98_12: # %cond.store3 11726; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11727; RV64ZVE32F-NEXT: slli a2, a2, 3 11728; RV64ZVE32F-NEXT: add a2, a0, a2 11729; RV64ZVE32F-NEXT: fsd fa2, 0(a2) 11730; RV64ZVE32F-NEXT: andi a2, a1, 8 11731; RV64ZVE32F-NEXT: beqz a2, .LBB98_6 11732; RV64ZVE32F-NEXT: .LBB98_13: # %cond.store5 11733; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 11734; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11735; RV64ZVE32F-NEXT: slli a2, a2, 3 11736; RV64ZVE32F-NEXT: add a2, a0, a2 11737; RV64ZVE32F-NEXT: fsd fa3, 0(a2) 11738; RV64ZVE32F-NEXT: andi a2, a1, 16 11739; RV64ZVE32F-NEXT: beqz a2, .LBB98_7 11740; RV64ZVE32F-NEXT: .LBB98_14: # %cond.store7 11741; RV64ZVE32F-NEXT: vmv.x.s a2, v10 11742; RV64ZVE32F-NEXT: slli a2, a2, 3 11743; RV64ZVE32F-NEXT: add a2, a0, a2 11744; RV64ZVE32F-NEXT: fsd fa4, 0(a2) 11745; RV64ZVE32F-NEXT: andi a2, a1, 32 11746; RV64ZVE32F-NEXT: bnez a2, .LBB98_8 11747; RV64ZVE32F-NEXT: j .LBB98_9 11748; RV64ZVE32F-NEXT: .LBB98_15: # %cond.store11 11749; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11750; RV64ZVE32F-NEXT: slli a2, a2, 3 11751; RV64ZVE32F-NEXT: add a2, a0, a2 11752; RV64ZVE32F-NEXT: fsd fa6, 0(a2) 11753; RV64ZVE32F-NEXT: andi a1, a1, -128 11754; RV64ZVE32F-NEXT: beqz a1, .LBB98_11 11755; RV64ZVE32F-NEXT: .LBB98_16: # %cond.store13 11756; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 11757; RV64ZVE32F-NEXT: vmv.x.s a1, v8 11758; RV64ZVE32F-NEXT: slli a1, a1, 3 11759; RV64ZVE32F-NEXT: add a0, a0, a1 11760; RV64ZVE32F-NEXT: fsd fa7, 0(a0) 11761; RV64ZVE32F-NEXT: ret 11762 %eidxs = sext <8 x i32> %idxs to <8 x i64> 11763 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 11764 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 11765 ret void 11766} 11767 11768define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) { 11769; RV32V-LABEL: mscatter_baseidx_zext_v8i32_v8f64: 11770; RV32V: # %bb.0: 11771; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 11772; RV32V-NEXT: vsll.vi v12, v12, 3 11773; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 11774; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 11775; RV32V-NEXT: ret 11776; 11777; RV64V-LABEL: mscatter_baseidx_zext_v8i32_v8f64: 11778; RV64V: # %bb.0: 11779; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 11780; RV64V-NEXT: vzext.vf2 v16, v12 11781; RV64V-NEXT: vsll.vi v12, v16, 3 11782; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 11783; RV64V-NEXT: ret 11784; 11785; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8f64: 11786; RV32ZVE32F: # %bb.0: 11787; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 11788; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 11789; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 11790; RV32ZVE32F-NEXT: vmv.x.s a1, v0 11791; RV32ZVE32F-NEXT: andi a2, a1, 1 11792; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 11793; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 11794; RV32ZVE32F-NEXT: bnez a2, .LBB99_9 11795; RV32ZVE32F-NEXT: # %bb.1: # %else 11796; RV32ZVE32F-NEXT: andi a0, a1, 2 11797; RV32ZVE32F-NEXT: bnez a0, .LBB99_10 11798; RV32ZVE32F-NEXT: .LBB99_2: # %else2 11799; RV32ZVE32F-NEXT: andi a0, a1, 4 11800; RV32ZVE32F-NEXT: bnez a0, .LBB99_11 11801; RV32ZVE32F-NEXT: .LBB99_3: # %else4 11802; RV32ZVE32F-NEXT: andi a0, a1, 8 11803; RV32ZVE32F-NEXT: bnez a0, .LBB99_12 11804; RV32ZVE32F-NEXT: .LBB99_4: # %else6 11805; RV32ZVE32F-NEXT: andi a0, a1, 16 11806; RV32ZVE32F-NEXT: bnez a0, .LBB99_13 11807; RV32ZVE32F-NEXT: .LBB99_5: # %else8 11808; RV32ZVE32F-NEXT: andi a0, a1, 32 11809; RV32ZVE32F-NEXT: bnez a0, .LBB99_14 11810; RV32ZVE32F-NEXT: .LBB99_6: # %else10 11811; RV32ZVE32F-NEXT: andi a0, a1, 64 11812; RV32ZVE32F-NEXT: bnez a0, .LBB99_15 11813; RV32ZVE32F-NEXT: .LBB99_7: # %else12 11814; RV32ZVE32F-NEXT: andi a0, a1, -128 11815; RV32ZVE32F-NEXT: bnez a0, .LBB99_16 11816; RV32ZVE32F-NEXT: .LBB99_8: # %else14 11817; RV32ZVE32F-NEXT: ret 11818; RV32ZVE32F-NEXT: .LBB99_9: # %cond.store 11819; RV32ZVE32F-NEXT: vmv.x.s a0, v8 11820; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 11821; RV32ZVE32F-NEXT: andi a0, a1, 2 11822; RV32ZVE32F-NEXT: beqz a0, .LBB99_2 11823; RV32ZVE32F-NEXT: .LBB99_10: # %cond.store1 11824; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11825; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 11826; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11827; RV32ZVE32F-NEXT: fsd fa1, 0(a0) 11828; RV32ZVE32F-NEXT: andi a0, a1, 4 11829; RV32ZVE32F-NEXT: beqz a0, .LBB99_3 11830; RV32ZVE32F-NEXT: .LBB99_11: # %cond.store3 11831; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11832; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 11833; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11834; RV32ZVE32F-NEXT: fsd fa2, 0(a0) 11835; RV32ZVE32F-NEXT: andi a0, a1, 8 11836; RV32ZVE32F-NEXT: beqz a0, .LBB99_4 11837; RV32ZVE32F-NEXT: .LBB99_12: # %cond.store5 11838; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11839; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 11840; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11841; RV32ZVE32F-NEXT: fsd fa3, 0(a0) 11842; RV32ZVE32F-NEXT: andi a0, a1, 16 11843; RV32ZVE32F-NEXT: beqz a0, .LBB99_5 11844; RV32ZVE32F-NEXT: .LBB99_13: # %cond.store7 11845; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11846; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 11847; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11848; RV32ZVE32F-NEXT: fsd fa4, 0(a0) 11849; RV32ZVE32F-NEXT: andi a0, a1, 32 11850; RV32ZVE32F-NEXT: beqz a0, .LBB99_6 11851; RV32ZVE32F-NEXT: .LBB99_14: # %cond.store9 11852; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11853; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 11854; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11855; RV32ZVE32F-NEXT: fsd fa5, 0(a0) 11856; RV32ZVE32F-NEXT: andi a0, a1, 64 11857; RV32ZVE32F-NEXT: beqz a0, .LBB99_7 11858; RV32ZVE32F-NEXT: .LBB99_15: # %cond.store11 11859; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11860; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 11861; RV32ZVE32F-NEXT: vmv.x.s a0, v10 11862; RV32ZVE32F-NEXT: fsd fa6, 0(a0) 11863; RV32ZVE32F-NEXT: andi a0, a1, -128 11864; RV32ZVE32F-NEXT: beqz a0, .LBB99_8 11865; RV32ZVE32F-NEXT: .LBB99_16: # %cond.store13 11866; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11867; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 11868; RV32ZVE32F-NEXT: vmv.x.s a0, v8 11869; RV32ZVE32F-NEXT: fsd fa7, 0(a0) 11870; RV32ZVE32F-NEXT: ret 11871; 11872; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8f64: 11873; RV64ZVE32F: # %bb.0: 11874; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 11875; RV64ZVE32F-NEXT: vmv.x.s a1, v0 11876; RV64ZVE32F-NEXT: andi a2, a1, 1 11877; RV64ZVE32F-NEXT: beqz a2, .LBB99_2 11878; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 11879; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 11880; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11881; RV64ZVE32F-NEXT: slli a2, a2, 32 11882; RV64ZVE32F-NEXT: srli a2, a2, 29 11883; RV64ZVE32F-NEXT: add a2, a0, a2 11884; RV64ZVE32F-NEXT: fsd fa0, 0(a2) 11885; RV64ZVE32F-NEXT: .LBB99_2: # %else 11886; RV64ZVE32F-NEXT: andi a2, a1, 2 11887; RV64ZVE32F-NEXT: beqz a2, .LBB99_4 11888; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 11889; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11890; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 11891; RV64ZVE32F-NEXT: vmv.x.s a2, v10 11892; RV64ZVE32F-NEXT: slli a2, a2, 32 11893; RV64ZVE32F-NEXT: srli a2, a2, 29 11894; RV64ZVE32F-NEXT: add a2, a0, a2 11895; RV64ZVE32F-NEXT: fsd fa1, 0(a2) 11896; RV64ZVE32F-NEXT: .LBB99_4: # %else2 11897; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma 11898; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 11899; RV64ZVE32F-NEXT: andi a2, a1, 4 11900; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 11901; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 11902; RV64ZVE32F-NEXT: bnez a2, .LBB99_12 11903; RV64ZVE32F-NEXT: # %bb.5: # %else4 11904; RV64ZVE32F-NEXT: andi a2, a1, 8 11905; RV64ZVE32F-NEXT: bnez a2, .LBB99_13 11906; RV64ZVE32F-NEXT: .LBB99_6: # %else6 11907; RV64ZVE32F-NEXT: andi a2, a1, 16 11908; RV64ZVE32F-NEXT: bnez a2, .LBB99_14 11909; RV64ZVE32F-NEXT: .LBB99_7: # %else8 11910; RV64ZVE32F-NEXT: andi a2, a1, 32 11911; RV64ZVE32F-NEXT: beqz a2, .LBB99_9 11912; RV64ZVE32F-NEXT: .LBB99_8: # %cond.store9 11913; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 11914; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11915; RV64ZVE32F-NEXT: slli a2, a2, 32 11916; RV64ZVE32F-NEXT: srli a2, a2, 29 11917; RV64ZVE32F-NEXT: add a2, a0, a2 11918; RV64ZVE32F-NEXT: fsd fa5, 0(a2) 11919; RV64ZVE32F-NEXT: .LBB99_9: # %else10 11920; RV64ZVE32F-NEXT: andi a2, a1, 64 11921; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 11922; RV64ZVE32F-NEXT: bnez a2, .LBB99_15 11923; RV64ZVE32F-NEXT: # %bb.10: # %else12 11924; RV64ZVE32F-NEXT: andi a1, a1, -128 11925; RV64ZVE32F-NEXT: bnez a1, .LBB99_16 11926; RV64ZVE32F-NEXT: .LBB99_11: # %else14 11927; RV64ZVE32F-NEXT: ret 11928; RV64ZVE32F-NEXT: .LBB99_12: # %cond.store3 11929; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11930; RV64ZVE32F-NEXT: slli a2, a2, 32 11931; RV64ZVE32F-NEXT: srli a2, a2, 29 11932; RV64ZVE32F-NEXT: add a2, a0, a2 11933; RV64ZVE32F-NEXT: fsd fa2, 0(a2) 11934; RV64ZVE32F-NEXT: andi a2, a1, 8 11935; RV64ZVE32F-NEXT: beqz a2, .LBB99_6 11936; RV64ZVE32F-NEXT: .LBB99_13: # %cond.store5 11937; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 11938; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11939; RV64ZVE32F-NEXT: slli a2, a2, 32 11940; RV64ZVE32F-NEXT: srli a2, a2, 29 11941; RV64ZVE32F-NEXT: add a2, a0, a2 11942; RV64ZVE32F-NEXT: fsd fa3, 0(a2) 11943; RV64ZVE32F-NEXT: andi a2, a1, 16 11944; RV64ZVE32F-NEXT: beqz a2, .LBB99_7 11945; RV64ZVE32F-NEXT: .LBB99_14: # %cond.store7 11946; RV64ZVE32F-NEXT: vmv.x.s a2, v10 11947; RV64ZVE32F-NEXT: slli a2, a2, 32 11948; RV64ZVE32F-NEXT: srli a2, a2, 29 11949; RV64ZVE32F-NEXT: add a2, a0, a2 11950; RV64ZVE32F-NEXT: fsd fa4, 0(a2) 11951; RV64ZVE32F-NEXT: andi a2, a1, 32 11952; RV64ZVE32F-NEXT: bnez a2, .LBB99_8 11953; RV64ZVE32F-NEXT: j .LBB99_9 11954; RV64ZVE32F-NEXT: .LBB99_15: # %cond.store11 11955; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11956; RV64ZVE32F-NEXT: slli a2, a2, 32 11957; RV64ZVE32F-NEXT: srli a2, a2, 29 11958; RV64ZVE32F-NEXT: add a2, a0, a2 11959; RV64ZVE32F-NEXT: fsd fa6, 0(a2) 11960; RV64ZVE32F-NEXT: andi a1, a1, -128 11961; RV64ZVE32F-NEXT: beqz a1, .LBB99_11 11962; RV64ZVE32F-NEXT: .LBB99_16: # %cond.store13 11963; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 11964; RV64ZVE32F-NEXT: vmv.x.s a1, v8 11965; RV64ZVE32F-NEXT: slli a1, a1, 32 11966; RV64ZVE32F-NEXT: srli a1, a1, 29 11967; RV64ZVE32F-NEXT: add a0, a0, a1 11968; RV64ZVE32F-NEXT: fsd fa7, 0(a0) 11969; RV64ZVE32F-NEXT: ret 11970 %eidxs = zext <8 x i32> %idxs to <8 x i64> 11971 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 11972 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 11973 ret void 11974} 11975 11976define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m) { 11977; RV32V-LABEL: mscatter_baseidx_v8f64: 11978; RV32V: # %bb.0: 11979; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 11980; RV32V-NEXT: vnsrl.wi v16, v12, 0 11981; RV32V-NEXT: vsll.vi v12, v16, 3 11982; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma 11983; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t 11984; RV32V-NEXT: ret 11985; 11986; RV64V-LABEL: mscatter_baseidx_v8f64: 11987; RV64V: # %bb.0: 11988; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 11989; RV64V-NEXT: vsll.vi v12, v12, 3 11990; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t 11991; RV64V-NEXT: ret 11992; 11993; RV32ZVE32F-LABEL: mscatter_baseidx_v8f64: 11994; RV32ZVE32F: # %bb.0: 11995; RV32ZVE32F-NEXT: lw a2, 32(a1) 11996; RV32ZVE32F-NEXT: lw a3, 40(a1) 11997; RV32ZVE32F-NEXT: lw a4, 48(a1) 11998; RV32ZVE32F-NEXT: lw a5, 56(a1) 11999; RV32ZVE32F-NEXT: lw a6, 0(a1) 12000; RV32ZVE32F-NEXT: lw a7, 8(a1) 12001; RV32ZVE32F-NEXT: lw t0, 16(a1) 12002; RV32ZVE32F-NEXT: lw t1, 24(a1) 12003; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 12004; RV32ZVE32F-NEXT: vmv.v.x v8, a6 12005; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 12006; RV32ZVE32F-NEXT: vmv.x.s a1, v0 12007; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 12008; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 12009; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0 12010; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1 12011; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 12012; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3 12013; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 12014; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 12015; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 12016; RV32ZVE32F-NEXT: andi a2, a1, 1 12017; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 12018; RV32ZVE32F-NEXT: bnez a2, .LBB100_9 12019; RV32ZVE32F-NEXT: # %bb.1: # %else 12020; RV32ZVE32F-NEXT: andi a0, a1, 2 12021; RV32ZVE32F-NEXT: bnez a0, .LBB100_10 12022; RV32ZVE32F-NEXT: .LBB100_2: # %else2 12023; RV32ZVE32F-NEXT: andi a0, a1, 4 12024; RV32ZVE32F-NEXT: bnez a0, .LBB100_11 12025; RV32ZVE32F-NEXT: .LBB100_3: # %else4 12026; RV32ZVE32F-NEXT: andi a0, a1, 8 12027; RV32ZVE32F-NEXT: bnez a0, .LBB100_12 12028; RV32ZVE32F-NEXT: .LBB100_4: # %else6 12029; RV32ZVE32F-NEXT: andi a0, a1, 16 12030; RV32ZVE32F-NEXT: bnez a0, .LBB100_13 12031; RV32ZVE32F-NEXT: .LBB100_5: # %else8 12032; RV32ZVE32F-NEXT: andi a0, a1, 32 12033; RV32ZVE32F-NEXT: bnez a0, .LBB100_14 12034; RV32ZVE32F-NEXT: .LBB100_6: # %else10 12035; RV32ZVE32F-NEXT: andi a0, a1, 64 12036; RV32ZVE32F-NEXT: bnez a0, .LBB100_15 12037; RV32ZVE32F-NEXT: .LBB100_7: # %else12 12038; RV32ZVE32F-NEXT: andi a0, a1, -128 12039; RV32ZVE32F-NEXT: bnez a0, .LBB100_16 12040; RV32ZVE32F-NEXT: .LBB100_8: # %else14 12041; RV32ZVE32F-NEXT: ret 12042; RV32ZVE32F-NEXT: .LBB100_9: # %cond.store 12043; RV32ZVE32F-NEXT: vmv.x.s a0, v8 12044; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 12045; RV32ZVE32F-NEXT: andi a0, a1, 2 12046; RV32ZVE32F-NEXT: beqz a0, .LBB100_2 12047; RV32ZVE32F-NEXT: .LBB100_10: # %cond.store1 12048; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12049; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 12050; RV32ZVE32F-NEXT: vmv.x.s a0, v10 12051; RV32ZVE32F-NEXT: fsd fa1, 0(a0) 12052; RV32ZVE32F-NEXT: andi a0, a1, 4 12053; RV32ZVE32F-NEXT: beqz a0, .LBB100_3 12054; RV32ZVE32F-NEXT: .LBB100_11: # %cond.store3 12055; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12056; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 12057; RV32ZVE32F-NEXT: vmv.x.s a0, v10 12058; RV32ZVE32F-NEXT: fsd fa2, 0(a0) 12059; RV32ZVE32F-NEXT: andi a0, a1, 8 12060; RV32ZVE32F-NEXT: beqz a0, .LBB100_4 12061; RV32ZVE32F-NEXT: .LBB100_12: # %cond.store5 12062; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12063; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 12064; RV32ZVE32F-NEXT: vmv.x.s a0, v10 12065; RV32ZVE32F-NEXT: fsd fa3, 0(a0) 12066; RV32ZVE32F-NEXT: andi a0, a1, 16 12067; RV32ZVE32F-NEXT: beqz a0, .LBB100_5 12068; RV32ZVE32F-NEXT: .LBB100_13: # %cond.store7 12069; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12070; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 12071; RV32ZVE32F-NEXT: vmv.x.s a0, v10 12072; RV32ZVE32F-NEXT: fsd fa4, 0(a0) 12073; RV32ZVE32F-NEXT: andi a0, a1, 32 12074; RV32ZVE32F-NEXT: beqz a0, .LBB100_6 12075; RV32ZVE32F-NEXT: .LBB100_14: # %cond.store9 12076; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12077; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 12078; RV32ZVE32F-NEXT: vmv.x.s a0, v10 12079; RV32ZVE32F-NEXT: fsd fa5, 0(a0) 12080; RV32ZVE32F-NEXT: andi a0, a1, 64 12081; RV32ZVE32F-NEXT: beqz a0, .LBB100_7 12082; RV32ZVE32F-NEXT: .LBB100_15: # %cond.store11 12083; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12084; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 12085; RV32ZVE32F-NEXT: vmv.x.s a0, v10 12086; RV32ZVE32F-NEXT: fsd fa6, 0(a0) 12087; RV32ZVE32F-NEXT: andi a0, a1, -128 12088; RV32ZVE32F-NEXT: beqz a0, .LBB100_8 12089; RV32ZVE32F-NEXT: .LBB100_16: # %cond.store13 12090; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12091; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 12092; RV32ZVE32F-NEXT: vmv.x.s a0, v8 12093; RV32ZVE32F-NEXT: fsd fa7, 0(a0) 12094; RV32ZVE32F-NEXT: ret 12095; 12096; RV64ZVE32F-LABEL: mscatter_baseidx_v8f64: 12097; RV64ZVE32F: # %bb.0: 12098; RV64ZVE32F-NEXT: ld t1, 8(a1) 12099; RV64ZVE32F-NEXT: ld t0, 16(a1) 12100; RV64ZVE32F-NEXT: ld a7, 24(a1) 12101; RV64ZVE32F-NEXT: ld a6, 32(a1) 12102; RV64ZVE32F-NEXT: ld a5, 40(a1) 12103; RV64ZVE32F-NEXT: ld a4, 48(a1) 12104; RV64ZVE32F-NEXT: ld a2, 56(a1) 12105; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12106; RV64ZVE32F-NEXT: vmv.x.s a3, v0 12107; RV64ZVE32F-NEXT: andi t2, a3, 1 12108; RV64ZVE32F-NEXT: bnez t2, .LBB100_9 12109; RV64ZVE32F-NEXT: # %bb.1: # %else 12110; RV64ZVE32F-NEXT: andi a1, a3, 2 12111; RV64ZVE32F-NEXT: bnez a1, .LBB100_10 12112; RV64ZVE32F-NEXT: .LBB100_2: # %else2 12113; RV64ZVE32F-NEXT: andi a1, a3, 4 12114; RV64ZVE32F-NEXT: bnez a1, .LBB100_11 12115; RV64ZVE32F-NEXT: .LBB100_3: # %else4 12116; RV64ZVE32F-NEXT: andi a1, a3, 8 12117; RV64ZVE32F-NEXT: bnez a1, .LBB100_12 12118; RV64ZVE32F-NEXT: .LBB100_4: # %else6 12119; RV64ZVE32F-NEXT: andi a1, a3, 16 12120; RV64ZVE32F-NEXT: bnez a1, .LBB100_13 12121; RV64ZVE32F-NEXT: .LBB100_5: # %else8 12122; RV64ZVE32F-NEXT: andi a1, a3, 32 12123; RV64ZVE32F-NEXT: bnez a1, .LBB100_14 12124; RV64ZVE32F-NEXT: .LBB100_6: # %else10 12125; RV64ZVE32F-NEXT: andi a1, a3, 64 12126; RV64ZVE32F-NEXT: bnez a1, .LBB100_15 12127; RV64ZVE32F-NEXT: .LBB100_7: # %else12 12128; RV64ZVE32F-NEXT: andi a1, a3, -128 12129; RV64ZVE32F-NEXT: bnez a1, .LBB100_16 12130; RV64ZVE32F-NEXT: .LBB100_8: # %else14 12131; RV64ZVE32F-NEXT: ret 12132; RV64ZVE32F-NEXT: .LBB100_9: # %cond.store 12133; RV64ZVE32F-NEXT: ld a1, 0(a1) 12134; RV64ZVE32F-NEXT: slli a1, a1, 3 12135; RV64ZVE32F-NEXT: add a1, a0, a1 12136; RV64ZVE32F-NEXT: fsd fa0, 0(a1) 12137; RV64ZVE32F-NEXT: andi a1, a3, 2 12138; RV64ZVE32F-NEXT: beqz a1, .LBB100_2 12139; RV64ZVE32F-NEXT: .LBB100_10: # %cond.store1 12140; RV64ZVE32F-NEXT: slli t1, t1, 3 12141; RV64ZVE32F-NEXT: add t1, a0, t1 12142; RV64ZVE32F-NEXT: fsd fa1, 0(t1) 12143; RV64ZVE32F-NEXT: andi a1, a3, 4 12144; RV64ZVE32F-NEXT: beqz a1, .LBB100_3 12145; RV64ZVE32F-NEXT: .LBB100_11: # %cond.store3 12146; RV64ZVE32F-NEXT: slli t0, t0, 3 12147; RV64ZVE32F-NEXT: add t0, a0, t0 12148; RV64ZVE32F-NEXT: fsd fa2, 0(t0) 12149; RV64ZVE32F-NEXT: andi a1, a3, 8 12150; RV64ZVE32F-NEXT: beqz a1, .LBB100_4 12151; RV64ZVE32F-NEXT: .LBB100_12: # %cond.store5 12152; RV64ZVE32F-NEXT: slli a7, a7, 3 12153; RV64ZVE32F-NEXT: add a7, a0, a7 12154; RV64ZVE32F-NEXT: fsd fa3, 0(a7) 12155; RV64ZVE32F-NEXT: andi a1, a3, 16 12156; RV64ZVE32F-NEXT: beqz a1, .LBB100_5 12157; RV64ZVE32F-NEXT: .LBB100_13: # %cond.store7 12158; RV64ZVE32F-NEXT: slli a6, a6, 3 12159; RV64ZVE32F-NEXT: add a6, a0, a6 12160; RV64ZVE32F-NEXT: fsd fa4, 0(a6) 12161; RV64ZVE32F-NEXT: andi a1, a3, 32 12162; RV64ZVE32F-NEXT: beqz a1, .LBB100_6 12163; RV64ZVE32F-NEXT: .LBB100_14: # %cond.store9 12164; RV64ZVE32F-NEXT: slli a5, a5, 3 12165; RV64ZVE32F-NEXT: add a5, a0, a5 12166; RV64ZVE32F-NEXT: fsd fa5, 0(a5) 12167; RV64ZVE32F-NEXT: andi a1, a3, 64 12168; RV64ZVE32F-NEXT: beqz a1, .LBB100_7 12169; RV64ZVE32F-NEXT: .LBB100_15: # %cond.store11 12170; RV64ZVE32F-NEXT: slli a4, a4, 3 12171; RV64ZVE32F-NEXT: add a4, a0, a4 12172; RV64ZVE32F-NEXT: fsd fa6, 0(a4) 12173; RV64ZVE32F-NEXT: andi a1, a3, -128 12174; RV64ZVE32F-NEXT: beqz a1, .LBB100_8 12175; RV64ZVE32F-NEXT: .LBB100_16: # %cond.store13 12176; RV64ZVE32F-NEXT: slli a2, a2, 3 12177; RV64ZVE32F-NEXT: add a0, a0, a2 12178; RV64ZVE32F-NEXT: fsd fa7, 0(a0) 12179; RV64ZVE32F-NEXT: ret 12180 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs 12181 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m) 12182 ret void 12183} 12184 12185declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32, <16 x i1>) 12186 12187define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, <16 x i1> %m) { 12188; RV32-LABEL: mscatter_baseidx_v16i8: 12189; RV32: # %bb.0: 12190; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 12191; RV32-NEXT: vsext.vf4 v12, v9 12192; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma 12193; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 12194; RV32-NEXT: ret 12195; 12196; RV64V-LABEL: mscatter_baseidx_v16i8: 12197; RV64V: # %bb.0: 12198; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma 12199; RV64V-NEXT: vsext.vf8 v16, v9 12200; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma 12201; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t 12202; RV64V-NEXT: ret 12203; 12204; RV64ZVE32F-LABEL: mscatter_baseidx_v16i8: 12205; RV64ZVE32F: # %bb.0: 12206; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 12207; RV64ZVE32F-NEXT: vmv.x.s a1, v0 12208; RV64ZVE32F-NEXT: andi a2, a1, 1 12209; RV64ZVE32F-NEXT: beqz a2, .LBB101_2 12210; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 12211; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12212; RV64ZVE32F-NEXT: vmv.x.s a2, v9 12213; RV64ZVE32F-NEXT: add a2, a0, a2 12214; RV64ZVE32F-NEXT: vse8.v v8, (a2) 12215; RV64ZVE32F-NEXT: .LBB101_2: # %else 12216; RV64ZVE32F-NEXT: andi a2, a1, 2 12217; RV64ZVE32F-NEXT: beqz a2, .LBB101_4 12218; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 12219; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12220; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 12221; RV64ZVE32F-NEXT: vmv.x.s a2, v10 12222; RV64ZVE32F-NEXT: add a2, a0, a2 12223; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12224; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 12225; RV64ZVE32F-NEXT: vse8.v v10, (a2) 12226; RV64ZVE32F-NEXT: .LBB101_4: # %else2 12227; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 12228; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 12229; RV64ZVE32F-NEXT: andi a2, a1, 4 12230; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 12231; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2 12232; RV64ZVE32F-NEXT: bnez a2, .LBB101_25 12233; RV64ZVE32F-NEXT: # %bb.5: # %else4 12234; RV64ZVE32F-NEXT: andi a2, a1, 8 12235; RV64ZVE32F-NEXT: bnez a2, .LBB101_26 12236; RV64ZVE32F-NEXT: .LBB101_6: # %else6 12237; RV64ZVE32F-NEXT: andi a2, a1, 16 12238; RV64ZVE32F-NEXT: beqz a2, .LBB101_8 12239; RV64ZVE32F-NEXT: .LBB101_7: # %cond.store7 12240; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12241; RV64ZVE32F-NEXT: vmv.x.s a2, v10 12242; RV64ZVE32F-NEXT: add a2, a0, a2 12243; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 4 12244; RV64ZVE32F-NEXT: vse8.v v11, (a2) 12245; RV64ZVE32F-NEXT: .LBB101_8: # %else8 12246; RV64ZVE32F-NEXT: andi a2, a1, 32 12247; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma 12248; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 8 12249; RV64ZVE32F-NEXT: beqz a2, .LBB101_10 12250; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9 12251; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12252; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 12253; RV64ZVE32F-NEXT: vmv.x.s a2, v11 12254; RV64ZVE32F-NEXT: add a2, a0, a2 12255; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12256; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 5 12257; RV64ZVE32F-NEXT: vse8.v v11, (a2) 12258; RV64ZVE32F-NEXT: .LBB101_10: # %else10 12259; RV64ZVE32F-NEXT: andi a2, a1, 64 12260; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 12261; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 12262; RV64ZVE32F-NEXT: bnez a2, .LBB101_27 12263; RV64ZVE32F-NEXT: # %bb.11: # %else12 12264; RV64ZVE32F-NEXT: andi a2, a1, 128 12265; RV64ZVE32F-NEXT: bnez a2, .LBB101_28 12266; RV64ZVE32F-NEXT: .LBB101_12: # %else14 12267; RV64ZVE32F-NEXT: andi a2, a1, 256 12268; RV64ZVE32F-NEXT: bnez a2, .LBB101_29 12269; RV64ZVE32F-NEXT: .LBB101_13: # %else16 12270; RV64ZVE32F-NEXT: andi a2, a1, 512 12271; RV64ZVE32F-NEXT: beqz a2, .LBB101_15 12272; RV64ZVE32F-NEXT: .LBB101_14: # %cond.store17 12273; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12274; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 12275; RV64ZVE32F-NEXT: vmv.x.s a2, v10 12276; RV64ZVE32F-NEXT: add a2, a0, a2 12277; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12278; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 9 12279; RV64ZVE32F-NEXT: vse8.v v10, (a2) 12280; RV64ZVE32F-NEXT: .LBB101_15: # %else18 12281; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 12282; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 12283; RV64ZVE32F-NEXT: andi a2, a1, 1024 12284; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 12285; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 12286; RV64ZVE32F-NEXT: bnez a2, .LBB101_30 12287; RV64ZVE32F-NEXT: # %bb.16: # %else20 12288; RV64ZVE32F-NEXT: slli a2, a1, 52 12289; RV64ZVE32F-NEXT: bltz a2, .LBB101_31 12290; RV64ZVE32F-NEXT: .LBB101_17: # %else22 12291; RV64ZVE32F-NEXT: slli a2, a1, 51 12292; RV64ZVE32F-NEXT: bltz a2, .LBB101_32 12293; RV64ZVE32F-NEXT: .LBB101_18: # %else24 12294; RV64ZVE32F-NEXT: slli a2, a1, 50 12295; RV64ZVE32F-NEXT: bgez a2, .LBB101_20 12296; RV64ZVE32F-NEXT: .LBB101_19: # %cond.store25 12297; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12298; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1 12299; RV64ZVE32F-NEXT: vmv.x.s a2, v9 12300; RV64ZVE32F-NEXT: add a2, a0, a2 12301; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12302; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 13 12303; RV64ZVE32F-NEXT: vse8.v v9, (a2) 12304; RV64ZVE32F-NEXT: .LBB101_20: # %else26 12305; RV64ZVE32F-NEXT: slli a2, a1, 49 12306; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 12307; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 12308; RV64ZVE32F-NEXT: bgez a2, .LBB101_22 12309; RV64ZVE32F-NEXT: # %bb.21: # %cond.store27 12310; RV64ZVE32F-NEXT: vmv.x.s a2, v9 12311; RV64ZVE32F-NEXT: add a2, a0, a2 12312; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12313; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 14 12314; RV64ZVE32F-NEXT: vse8.v v10, (a2) 12315; RV64ZVE32F-NEXT: .LBB101_22: # %else28 12316; RV64ZVE32F-NEXT: lui a2, 1048568 12317; RV64ZVE32F-NEXT: and a1, a1, a2 12318; RV64ZVE32F-NEXT: beqz a1, .LBB101_24 12319; RV64ZVE32F-NEXT: # %bb.23: # %cond.store29 12320; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12321; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 12322; RV64ZVE32F-NEXT: vmv.x.s a1, v9 12323; RV64ZVE32F-NEXT: add a0, a0, a1 12324; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12325; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 15 12326; RV64ZVE32F-NEXT: vse8.v v8, (a0) 12327; RV64ZVE32F-NEXT: .LBB101_24: # %else30 12328; RV64ZVE32F-NEXT: ret 12329; RV64ZVE32F-NEXT: .LBB101_25: # %cond.store3 12330; RV64ZVE32F-NEXT: vmv.x.s a2, v11 12331; RV64ZVE32F-NEXT: add a2, a0, a2 12332; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12333; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 12334; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12335; RV64ZVE32F-NEXT: andi a2, a1, 8 12336; RV64ZVE32F-NEXT: beqz a2, .LBB101_6 12337; RV64ZVE32F-NEXT: .LBB101_26: # %cond.store5 12338; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12339; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 12340; RV64ZVE32F-NEXT: vmv.x.s a2, v11 12341; RV64ZVE32F-NEXT: add a2, a0, a2 12342; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12343; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3 12344; RV64ZVE32F-NEXT: vse8.v v11, (a2) 12345; RV64ZVE32F-NEXT: andi a2, a1, 16 12346; RV64ZVE32F-NEXT: bnez a2, .LBB101_7 12347; RV64ZVE32F-NEXT: j .LBB101_8 12348; RV64ZVE32F-NEXT: .LBB101_27: # %cond.store11 12349; RV64ZVE32F-NEXT: vmv.x.s a2, v10 12350; RV64ZVE32F-NEXT: add a2, a0, a2 12351; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12352; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 6 12353; RV64ZVE32F-NEXT: vse8.v v11, (a2) 12354; RV64ZVE32F-NEXT: andi a2, a1, 128 12355; RV64ZVE32F-NEXT: beqz a2, .LBB101_12 12356; RV64ZVE32F-NEXT: .LBB101_28: # %cond.store13 12357; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12358; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 12359; RV64ZVE32F-NEXT: vmv.x.s a2, v10 12360; RV64ZVE32F-NEXT: add a2, a0, a2 12361; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12362; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 7 12363; RV64ZVE32F-NEXT: vse8.v v10, (a2) 12364; RV64ZVE32F-NEXT: andi a2, a1, 256 12365; RV64ZVE32F-NEXT: beqz a2, .LBB101_13 12366; RV64ZVE32F-NEXT: .LBB101_29: # %cond.store15 12367; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12368; RV64ZVE32F-NEXT: vmv.x.s a2, v9 12369; RV64ZVE32F-NEXT: add a2, a0, a2 12370; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 8 12371; RV64ZVE32F-NEXT: vse8.v v10, (a2) 12372; RV64ZVE32F-NEXT: andi a2, a1, 512 12373; RV64ZVE32F-NEXT: bnez a2, .LBB101_14 12374; RV64ZVE32F-NEXT: j .LBB101_15 12375; RV64ZVE32F-NEXT: .LBB101_30: # %cond.store19 12376; RV64ZVE32F-NEXT: vmv.x.s a2, v9 12377; RV64ZVE32F-NEXT: add a2, a0, a2 12378; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12379; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 10 12380; RV64ZVE32F-NEXT: vse8.v v11, (a2) 12381; RV64ZVE32F-NEXT: slli a2, a1, 52 12382; RV64ZVE32F-NEXT: bgez a2, .LBB101_17 12383; RV64ZVE32F-NEXT: .LBB101_31: # %cond.store21 12384; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12385; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 12386; RV64ZVE32F-NEXT: vmv.x.s a2, v9 12387; RV64ZVE32F-NEXT: add a2, a0, a2 12388; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12389; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 11 12390; RV64ZVE32F-NEXT: vse8.v v9, (a2) 12391; RV64ZVE32F-NEXT: slli a2, a1, 51 12392; RV64ZVE32F-NEXT: bgez a2, .LBB101_18 12393; RV64ZVE32F-NEXT: .LBB101_32: # %cond.store23 12394; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12395; RV64ZVE32F-NEXT: vmv.x.s a2, v10 12396; RV64ZVE32F-NEXT: add a2, a0, a2 12397; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 12 12398; RV64ZVE32F-NEXT: vse8.v v9, (a2) 12399; RV64ZVE32F-NEXT: slli a2, a1, 50 12400; RV64ZVE32F-NEXT: bltz a2, .LBB101_19 12401; RV64ZVE32F-NEXT: j .LBB101_20 12402 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs 12403 call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %val, <16 x ptr> %ptrs, i32 1, <16 x i1> %m) 12404 ret void 12405} 12406 12407declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32, <32 x i1>) 12408 12409define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, <32 x i1> %m) { 12410; RV32-LABEL: mscatter_baseidx_v32i8: 12411; RV32: # %bb.0: 12412; RV32-NEXT: li a1, 32 12413; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma 12414; RV32-NEXT: vsext.vf4 v16, v10 12415; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, ma 12416; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 12417; RV32-NEXT: ret 12418; 12419; RV64V-LABEL: mscatter_baseidx_v32i8: 12420; RV64V: # %bb.0: 12421; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma 12422; RV64V-NEXT: vsext.vf8 v16, v10 12423; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma 12424; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t 12425; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma 12426; RV64V-NEXT: vslidedown.vi v8, v8, 16 12427; RV64V-NEXT: vslidedown.vi v10, v10, 16 12428; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 12429; RV64V-NEXT: vslidedown.vi v0, v0, 2 12430; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma 12431; RV64V-NEXT: vsext.vf8 v16, v10 12432; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma 12433; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t 12434; RV64V-NEXT: ret 12435; 12436; RV64ZVE32F-LABEL: mscatter_baseidx_v32i8: 12437; RV64ZVE32F: # %bb.0: 12438; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12439; RV64ZVE32F-NEXT: vmv.x.s a1, v0 12440; RV64ZVE32F-NEXT: andi a2, a1, 1 12441; RV64ZVE32F-NEXT: beqz a2, .LBB102_2 12442; RV64ZVE32F-NEXT: # %bb.1: # %cond.store 12443; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12444; RV64ZVE32F-NEXT: vmv.x.s a2, v10 12445; RV64ZVE32F-NEXT: add a2, a0, a2 12446; RV64ZVE32F-NEXT: vse8.v v8, (a2) 12447; RV64ZVE32F-NEXT: .LBB102_2: # %else 12448; RV64ZVE32F-NEXT: andi a2, a1, 2 12449; RV64ZVE32F-NEXT: beqz a2, .LBB102_4 12450; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 12451; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12452; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1 12453; RV64ZVE32F-NEXT: vmv.x.s a2, v12 12454; RV64ZVE32F-NEXT: add a2, a0, a2 12455; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12456; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 12457; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12458; RV64ZVE32F-NEXT: .LBB102_4: # %else2 12459; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 12460; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 4 12461; RV64ZVE32F-NEXT: andi a2, a1, 4 12462; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 12463; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2 12464; RV64ZVE32F-NEXT: bnez a2, .LBB102_49 12465; RV64ZVE32F-NEXT: # %bb.5: # %else4 12466; RV64ZVE32F-NEXT: andi a2, a1, 8 12467; RV64ZVE32F-NEXT: bnez a2, .LBB102_50 12468; RV64ZVE32F-NEXT: .LBB102_6: # %else6 12469; RV64ZVE32F-NEXT: andi a2, a1, 16 12470; RV64ZVE32F-NEXT: beqz a2, .LBB102_8 12471; RV64ZVE32F-NEXT: .LBB102_7: # %cond.store7 12472; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12473; RV64ZVE32F-NEXT: vmv.x.s a2, v13 12474; RV64ZVE32F-NEXT: add a2, a0, a2 12475; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 12476; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12477; RV64ZVE32F-NEXT: .LBB102_8: # %else8 12478; RV64ZVE32F-NEXT: andi a2, a1, 32 12479; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma 12480; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 8 12481; RV64ZVE32F-NEXT: beqz a2, .LBB102_10 12482; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9 12483; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12484; RV64ZVE32F-NEXT: vslidedown.vi v14, v13, 1 12485; RV64ZVE32F-NEXT: vmv.x.s a2, v14 12486; RV64ZVE32F-NEXT: add a2, a0, a2 12487; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12488; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 5 12489; RV64ZVE32F-NEXT: vse8.v v14, (a2) 12490; RV64ZVE32F-NEXT: .LBB102_10: # %else10 12491; RV64ZVE32F-NEXT: andi a2, a1, 64 12492; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 12493; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2 12494; RV64ZVE32F-NEXT: bnez a2, .LBB102_51 12495; RV64ZVE32F-NEXT: # %bb.11: # %else12 12496; RV64ZVE32F-NEXT: andi a2, a1, 128 12497; RV64ZVE32F-NEXT: bnez a2, .LBB102_52 12498; RV64ZVE32F-NEXT: .LBB102_12: # %else14 12499; RV64ZVE32F-NEXT: andi a2, a1, 256 12500; RV64ZVE32F-NEXT: bnez a2, .LBB102_53 12501; RV64ZVE32F-NEXT: .LBB102_13: # %else16 12502; RV64ZVE32F-NEXT: andi a2, a1, 512 12503; RV64ZVE32F-NEXT: beqz a2, .LBB102_15 12504; RV64ZVE32F-NEXT: .LBB102_14: # %cond.store17 12505; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12506; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1 12507; RV64ZVE32F-NEXT: vmv.x.s a2, v13 12508; RV64ZVE32F-NEXT: add a2, a0, a2 12509; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12510; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 9 12511; RV64ZVE32F-NEXT: vse8.v v13, (a2) 12512; RV64ZVE32F-NEXT: .LBB102_15: # %else18 12513; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 12514; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4 12515; RV64ZVE32F-NEXT: andi a2, a1, 1024 12516; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 12517; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2 12518; RV64ZVE32F-NEXT: beqz a2, .LBB102_17 12519; RV64ZVE32F-NEXT: # %bb.16: # %cond.store19 12520; RV64ZVE32F-NEXT: vmv.x.s a2, v12 12521; RV64ZVE32F-NEXT: add a2, a0, a2 12522; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12523; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 10 12524; RV64ZVE32F-NEXT: vse8.v v14, (a2) 12525; RV64ZVE32F-NEXT: .LBB102_17: # %else20 12526; RV64ZVE32F-NEXT: slli a2, a1, 52 12527; RV64ZVE32F-NEXT: bgez a2, .LBB102_19 12528; RV64ZVE32F-NEXT: # %bb.18: # %cond.store21 12529; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12530; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 12531; RV64ZVE32F-NEXT: vmv.x.s a2, v12 12532; RV64ZVE32F-NEXT: add a2, a0, a2 12533; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12534; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 11 12535; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12536; RV64ZVE32F-NEXT: .LBB102_19: # %else22 12537; RV64ZVE32F-NEXT: slli a2, a1, 51 12538; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma 12539; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 16 12540; RV64ZVE32F-NEXT: bgez a2, .LBB102_21 12541; RV64ZVE32F-NEXT: # %bb.20: # %cond.store23 12542; RV64ZVE32F-NEXT: vmv.x.s a2, v13 12543; RV64ZVE32F-NEXT: add a2, a0, a2 12544; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12545; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 12 12546; RV64ZVE32F-NEXT: vse8.v v11, (a2) 12547; RV64ZVE32F-NEXT: .LBB102_21: # %else24 12548; RV64ZVE32F-NEXT: slli a2, a1, 50 12549; RV64ZVE32F-NEXT: bgez a2, .LBB102_23 12550; RV64ZVE32F-NEXT: # %bb.22: # %cond.store25 12551; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12552; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 1 12553; RV64ZVE32F-NEXT: vmv.x.s a2, v11 12554; RV64ZVE32F-NEXT: add a2, a0, a2 12555; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12556; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 13 12557; RV64ZVE32F-NEXT: vse8.v v11, (a2) 12558; RV64ZVE32F-NEXT: .LBB102_23: # %else26 12559; RV64ZVE32F-NEXT: slli a2, a1, 49 12560; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 12561; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 2 12562; RV64ZVE32F-NEXT: bltz a2, .LBB102_54 12563; RV64ZVE32F-NEXT: # %bb.24: # %else28 12564; RV64ZVE32F-NEXT: slli a2, a1, 48 12565; RV64ZVE32F-NEXT: bltz a2, .LBB102_55 12566; RV64ZVE32F-NEXT: .LBB102_25: # %else30 12567; RV64ZVE32F-NEXT: slli a2, a1, 47 12568; RV64ZVE32F-NEXT: bltz a2, .LBB102_56 12569; RV64ZVE32F-NEXT: .LBB102_26: # %else32 12570; RV64ZVE32F-NEXT: slli a2, a1, 46 12571; RV64ZVE32F-NEXT: bgez a2, .LBB102_28 12572; RV64ZVE32F-NEXT: .LBB102_27: # %cond.store33 12573; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12574; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 12575; RV64ZVE32F-NEXT: vmv.x.s a2, v11 12576; RV64ZVE32F-NEXT: add a2, a0, a2 12577; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma 12578; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 17 12579; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12580; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12581; RV64ZVE32F-NEXT: .LBB102_28: # %else34 12582; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 12583; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 12584; RV64ZVE32F-NEXT: slli a2, a1, 45 12585; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 12586; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2 12587; RV64ZVE32F-NEXT: bltz a2, .LBB102_57 12588; RV64ZVE32F-NEXT: # %bb.29: # %else36 12589; RV64ZVE32F-NEXT: slli a2, a1, 44 12590; RV64ZVE32F-NEXT: bltz a2, .LBB102_58 12591; RV64ZVE32F-NEXT: .LBB102_30: # %else38 12592; RV64ZVE32F-NEXT: slli a2, a1, 43 12593; RV64ZVE32F-NEXT: bgez a2, .LBB102_32 12594; RV64ZVE32F-NEXT: .LBB102_31: # %cond.store39 12595; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma 12596; RV64ZVE32F-NEXT: vmv.x.s a2, v11 12597; RV64ZVE32F-NEXT: add a2, a0, a2 12598; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 20 12599; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12600; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12601; RV64ZVE32F-NEXT: .LBB102_32: # %else40 12602; RV64ZVE32F-NEXT: slli a2, a1, 42 12603; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma 12604; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 8 12605; RV64ZVE32F-NEXT: bgez a2, .LBB102_34 12606; RV64ZVE32F-NEXT: # %bb.33: # %cond.store41 12607; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12608; RV64ZVE32F-NEXT: vslidedown.vi v12, v11, 1 12609; RV64ZVE32F-NEXT: vmv.x.s a2, v12 12610; RV64ZVE32F-NEXT: add a2, a0, a2 12611; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma 12612; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 21 12613; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12614; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12615; RV64ZVE32F-NEXT: .LBB102_34: # %else42 12616; RV64ZVE32F-NEXT: slli a2, a1, 41 12617; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 12618; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 2 12619; RV64ZVE32F-NEXT: bltz a2, .LBB102_59 12620; RV64ZVE32F-NEXT: # %bb.35: # %else44 12621; RV64ZVE32F-NEXT: slli a2, a1, 40 12622; RV64ZVE32F-NEXT: bltz a2, .LBB102_60 12623; RV64ZVE32F-NEXT: .LBB102_36: # %else46 12624; RV64ZVE32F-NEXT: slli a2, a1, 39 12625; RV64ZVE32F-NEXT: bltz a2, .LBB102_61 12626; RV64ZVE32F-NEXT: .LBB102_37: # %else48 12627; RV64ZVE32F-NEXT: slli a2, a1, 38 12628; RV64ZVE32F-NEXT: bgez a2, .LBB102_39 12629; RV64ZVE32F-NEXT: .LBB102_38: # %cond.store49 12630; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12631; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 12632; RV64ZVE32F-NEXT: vmv.x.s a2, v11 12633; RV64ZVE32F-NEXT: add a2, a0, a2 12634; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma 12635; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 25 12636; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12637; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12638; RV64ZVE32F-NEXT: .LBB102_39: # %else50 12639; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 12640; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 12641; RV64ZVE32F-NEXT: slli a2, a1, 37 12642; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 12643; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 12644; RV64ZVE32F-NEXT: bltz a2, .LBB102_62 12645; RV64ZVE32F-NEXT: # %bb.40: # %else52 12646; RV64ZVE32F-NEXT: slli a2, a1, 36 12647; RV64ZVE32F-NEXT: bltz a2, .LBB102_63 12648; RV64ZVE32F-NEXT: .LBB102_41: # %else54 12649; RV64ZVE32F-NEXT: slli a2, a1, 35 12650; RV64ZVE32F-NEXT: bltz a2, .LBB102_64 12651; RV64ZVE32F-NEXT: .LBB102_42: # %else56 12652; RV64ZVE32F-NEXT: slli a2, a1, 34 12653; RV64ZVE32F-NEXT: bgez a2, .LBB102_44 12654; RV64ZVE32F-NEXT: .LBB102_43: # %cond.store57 12655; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12656; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 12657; RV64ZVE32F-NEXT: vmv.x.s a2, v10 12658; RV64ZVE32F-NEXT: add a2, a0, a2 12659; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma 12660; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 29 12661; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12662; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12663; RV64ZVE32F-NEXT: .LBB102_44: # %else58 12664; RV64ZVE32F-NEXT: slli a2, a1, 33 12665; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 12666; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 12667; RV64ZVE32F-NEXT: bgez a2, .LBB102_46 12668; RV64ZVE32F-NEXT: # %bb.45: # %cond.store59 12669; RV64ZVE32F-NEXT: vmv.x.s a2, v10 12670; RV64ZVE32F-NEXT: add a2, a0, a2 12671; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma 12672; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 30 12673; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12674; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12675; RV64ZVE32F-NEXT: .LBB102_46: # %else60 12676; RV64ZVE32F-NEXT: lui a2, 524288 12677; RV64ZVE32F-NEXT: and a1, a1, a2 12678; RV64ZVE32F-NEXT: beqz a1, .LBB102_48 12679; RV64ZVE32F-NEXT: # %bb.47: # %cond.store61 12680; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12681; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 12682; RV64ZVE32F-NEXT: vmv.x.s a1, v10 12683; RV64ZVE32F-NEXT: add a0, a0, a1 12684; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma 12685; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 31 12686; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12687; RV64ZVE32F-NEXT: vse8.v v8, (a0) 12688; RV64ZVE32F-NEXT: .LBB102_48: # %else62 12689; RV64ZVE32F-NEXT: ret 12690; RV64ZVE32F-NEXT: .LBB102_49: # %cond.store3 12691; RV64ZVE32F-NEXT: vmv.x.s a2, v12 12692; RV64ZVE32F-NEXT: add a2, a0, a2 12693; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12694; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2 12695; RV64ZVE32F-NEXT: vse8.v v14, (a2) 12696; RV64ZVE32F-NEXT: andi a2, a1, 8 12697; RV64ZVE32F-NEXT: beqz a2, .LBB102_6 12698; RV64ZVE32F-NEXT: .LBB102_50: # %cond.store5 12699; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12700; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 12701; RV64ZVE32F-NEXT: vmv.x.s a2, v12 12702; RV64ZVE32F-NEXT: add a2, a0, a2 12703; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12704; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3 12705; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12706; RV64ZVE32F-NEXT: andi a2, a1, 16 12707; RV64ZVE32F-NEXT: bnez a2, .LBB102_7 12708; RV64ZVE32F-NEXT: j .LBB102_8 12709; RV64ZVE32F-NEXT: .LBB102_51: # %cond.store11 12710; RV64ZVE32F-NEXT: vmv.x.s a2, v13 12711; RV64ZVE32F-NEXT: add a2, a0, a2 12712; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12713; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 6 12714; RV64ZVE32F-NEXT: vse8.v v14, (a2) 12715; RV64ZVE32F-NEXT: andi a2, a1, 128 12716; RV64ZVE32F-NEXT: beqz a2, .LBB102_12 12717; RV64ZVE32F-NEXT: .LBB102_52: # %cond.store13 12718; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12719; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 12720; RV64ZVE32F-NEXT: vmv.x.s a2, v13 12721; RV64ZVE32F-NEXT: add a2, a0, a2 12722; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12723; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 7 12724; RV64ZVE32F-NEXT: vse8.v v13, (a2) 12725; RV64ZVE32F-NEXT: andi a2, a1, 256 12726; RV64ZVE32F-NEXT: beqz a2, .LBB102_13 12727; RV64ZVE32F-NEXT: .LBB102_53: # %cond.store15 12728; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12729; RV64ZVE32F-NEXT: vmv.x.s a2, v12 12730; RV64ZVE32F-NEXT: add a2, a0, a2 12731; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 8 12732; RV64ZVE32F-NEXT: vse8.v v13, (a2) 12733; RV64ZVE32F-NEXT: andi a2, a1, 512 12734; RV64ZVE32F-NEXT: bnez a2, .LBB102_14 12735; RV64ZVE32F-NEXT: j .LBB102_15 12736; RV64ZVE32F-NEXT: .LBB102_54: # %cond.store27 12737; RV64ZVE32F-NEXT: vmv.x.s a2, v11 12738; RV64ZVE32F-NEXT: add a2, a0, a2 12739; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12740; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 14 12741; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12742; RV64ZVE32F-NEXT: slli a2, a1, 48 12743; RV64ZVE32F-NEXT: bgez a2, .LBB102_25 12744; RV64ZVE32F-NEXT: .LBB102_55: # %cond.store29 12745; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12746; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 12747; RV64ZVE32F-NEXT: vmv.x.s a2, v11 12748; RV64ZVE32F-NEXT: add a2, a0, a2 12749; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12750; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 15 12751; RV64ZVE32F-NEXT: vse8.v v11, (a2) 12752; RV64ZVE32F-NEXT: slli a2, a1, 47 12753; RV64ZVE32F-NEXT: bgez a2, .LBB102_26 12754; RV64ZVE32F-NEXT: .LBB102_56: # %cond.store31 12755; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma 12756; RV64ZVE32F-NEXT: vmv.x.s a2, v10 12757; RV64ZVE32F-NEXT: add a2, a0, a2 12758; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16 12759; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12760; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12761; RV64ZVE32F-NEXT: slli a2, a1, 46 12762; RV64ZVE32F-NEXT: bltz a2, .LBB102_27 12763; RV64ZVE32F-NEXT: j .LBB102_28 12764; RV64ZVE32F-NEXT: .LBB102_57: # %cond.store35 12765; RV64ZVE32F-NEXT: vmv.x.s a2, v12 12766; RV64ZVE32F-NEXT: add a2, a0, a2 12767; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma 12768; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 18 12769; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12770; RV64ZVE32F-NEXT: vse8.v v14, (a2) 12771; RV64ZVE32F-NEXT: slli a2, a1, 44 12772; RV64ZVE32F-NEXT: bgez a2, .LBB102_30 12773; RV64ZVE32F-NEXT: .LBB102_58: # %cond.store37 12774; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12775; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 12776; RV64ZVE32F-NEXT: vmv.x.s a2, v12 12777; RV64ZVE32F-NEXT: add a2, a0, a2 12778; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma 12779; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 19 12780; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12781; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12782; RV64ZVE32F-NEXT: slli a2, a1, 43 12783; RV64ZVE32F-NEXT: bltz a2, .LBB102_31 12784; RV64ZVE32F-NEXT: j .LBB102_32 12785; RV64ZVE32F-NEXT: .LBB102_59: # %cond.store43 12786; RV64ZVE32F-NEXT: vmv.x.s a2, v11 12787; RV64ZVE32F-NEXT: add a2, a0, a2 12788; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma 12789; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22 12790; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12791; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12792; RV64ZVE32F-NEXT: slli a2, a1, 40 12793; RV64ZVE32F-NEXT: bgez a2, .LBB102_36 12794; RV64ZVE32F-NEXT: .LBB102_60: # %cond.store45 12795; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12796; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 12797; RV64ZVE32F-NEXT: vmv.x.s a2, v11 12798; RV64ZVE32F-NEXT: add a2, a0, a2 12799; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma 12800; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23 12801; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12802; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12803; RV64ZVE32F-NEXT: slli a2, a1, 39 12804; RV64ZVE32F-NEXT: bgez a2, .LBB102_37 12805; RV64ZVE32F-NEXT: .LBB102_61: # %cond.store47 12806; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma 12807; RV64ZVE32F-NEXT: vmv.x.s a2, v10 12808; RV64ZVE32F-NEXT: add a2, a0, a2 12809; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 24 12810; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12811; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12812; RV64ZVE32F-NEXT: slli a2, a1, 38 12813; RV64ZVE32F-NEXT: bltz a2, .LBB102_38 12814; RV64ZVE32F-NEXT: j .LBB102_39 12815; RV64ZVE32F-NEXT: .LBB102_62: # %cond.store51 12816; RV64ZVE32F-NEXT: vmv.x.s a2, v10 12817; RV64ZVE32F-NEXT: add a2, a0, a2 12818; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma 12819; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 26 12820; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12821; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12822; RV64ZVE32F-NEXT: slli a2, a1, 36 12823; RV64ZVE32F-NEXT: bgez a2, .LBB102_41 12824; RV64ZVE32F-NEXT: .LBB102_63: # %cond.store53 12825; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 12826; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 12827; RV64ZVE32F-NEXT: vmv.x.s a2, v10 12828; RV64ZVE32F-NEXT: add a2, a0, a2 12829; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma 12830; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27 12831; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12832; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12833; RV64ZVE32F-NEXT: slli a2, a1, 35 12834; RV64ZVE32F-NEXT: bgez a2, .LBB102_42 12835; RV64ZVE32F-NEXT: .LBB102_64: # %cond.store55 12836; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma 12837; RV64ZVE32F-NEXT: vmv.x.s a2, v11 12838; RV64ZVE32F-NEXT: add a2, a0, a2 12839; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 28 12840; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12841; RV64ZVE32F-NEXT: vse8.v v12, (a2) 12842; RV64ZVE32F-NEXT: slli a2, a1, 34 12843; RV64ZVE32F-NEXT: bltz a2, .LBB102_43 12844; RV64ZVE32F-NEXT: j .LBB102_44 12845 %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs 12846 call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> %val, <32 x ptr> %ptrs, i32 1, <32 x i1> %m) 12847 ret void 12848} 12849 12850define void @mscatter_unit_stride(<8 x i16> %val, ptr %base) { 12851; CHECK-LABEL: mscatter_unit_stride: 12852; CHECK: # %bb.0: 12853; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 12854; CHECK-NEXT: vse16.v v8, (a0) 12855; CHECK-NEXT: ret 12856 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> 12857 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true)) 12858 ret void 12859} 12860 12861define void @mscatter_unit_stride_with_offset(<8 x i16> %val, ptr %base) { 12862; CHECK-LABEL: mscatter_unit_stride_with_offset: 12863; CHECK: # %bb.0: 12864; CHECK-NEXT: addi a0, a0, 10 12865; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 12866; CHECK-NEXT: vse16.v v8, (a0) 12867; CHECK-NEXT: ret 12868 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12> 12869 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true)) 12870 ret void 12871} 12872 12873define void @mscatter_shuffle_reverse(<8 x i16> %val, ptr %base) { 12874; CHECK-LABEL: mscatter_shuffle_reverse: 12875; CHECK: # %bb.0: 12876; CHECK-NEXT: addi a0, a0, 14 12877; CHECK-NEXT: li a1, -2 12878; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 12879; CHECK-NEXT: vsse16.v v8, (a0), a1 12880; CHECK-NEXT: ret 12881 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0> 12882 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true)) 12883 ret void 12884} 12885 12886define void @mscatter_shuffle_rotate(<8 x i16> %val, ptr %base) { 12887; RV32-LABEL: mscatter_shuffle_rotate: 12888; RV32: # %bb.0: 12889; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 12890; RV32-NEXT: vslidedown.vi v9, v8, 4 12891; RV32-NEXT: vslideup.vi v9, v8, 4 12892; RV32-NEXT: vse16.v v9, (a0) 12893; RV32-NEXT: ret 12894; 12895; RV64V-LABEL: mscatter_shuffle_rotate: 12896; RV64V: # %bb.0: 12897; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 12898; RV64V-NEXT: vslidedown.vi v9, v8, 4 12899; RV64V-NEXT: vslideup.vi v9, v8, 4 12900; RV64V-NEXT: vse16.v v9, (a0) 12901; RV64V-NEXT: ret 12902; 12903; RV64ZVE32F-LABEL: mscatter_shuffle_rotate: 12904; RV64ZVE32F: # %bb.0: 12905; RV64ZVE32F-NEXT: addi a1, a0, 6 12906; RV64ZVE32F-NEXT: addi a2, a0, 4 12907; RV64ZVE32F-NEXT: addi a3, a0, 2 12908; RV64ZVE32F-NEXT: addi a4, a0, 14 12909; RV64ZVE32F-NEXT: addi a5, a0, 12 12910; RV64ZVE32F-NEXT: addi a6, a0, 10 12911; RV64ZVE32F-NEXT: addi a7, a0, 8 12912; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 12913; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 12914; RV64ZVE32F-NEXT: vse16.v v8, (a7) 12915; RV64ZVE32F-NEXT: vse16.v v9, (a6) 12916; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 12917; RV64ZVE32F-NEXT: vse16.v v9, (a5) 12918; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 12919; RV64ZVE32F-NEXT: vse16.v v9, (a4) 12920; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 12921; RV64ZVE32F-NEXT: vse16.v v9, (a0) 12922; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 12923; RV64ZVE32F-NEXT: vse16.v v9, (a3) 12924; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6 12925; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 12926; RV64ZVE32F-NEXT: vse16.v v9, (a2) 12927; RV64ZVE32F-NEXT: vse16.v v8, (a1) 12928; RV64ZVE32F-NEXT: ret 12929 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 4, i64 5, i64 6, i64 7, i64 0, i64 1, i64 2, i64 3> 12930 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true)) 12931 ret void 12932} 12933;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 12934; RV32V-ZVFH: {{.*}} 12935; RV32V-ZVFHMIN: {{.*}} 12936; RV32ZVE32F-ZVFH: {{.*}} 12937; RV32ZVE32F-ZVFHMIN: {{.*}} 12938; RV64: {{.*}} 12939; RV64V-ZVFH: {{.*}} 12940; RV64V-ZVFHMIN: {{.*}} 12941