1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFH 4; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFH 6; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFHMIN 8; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFHMIN 10 11; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \ 12; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFH 13; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \ 14; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFH 15; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \ 16; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFHMIN 17; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \ 18; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFHMIN 19 20declare <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i8>) 21 22define <1 x i8> @mgather_v1i8(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i8> %passthru) { 23; RV32V-LABEL: mgather_v1i8: 24; RV32V: # %bb.0: 25; RV32V-NEXT: vsetivli zero, 1, e8, mf8, ta, mu 26; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 27; RV32V-NEXT: vmv1r.v v8, v9 28; RV32V-NEXT: ret 29; 30; RV64V-LABEL: mgather_v1i8: 31; RV64V: # %bb.0: 32; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, mu 33; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 34; RV64V-NEXT: vmv1r.v v8, v9 35; RV64V-NEXT: ret 36; 37; RV32ZVE32F-LABEL: mgather_v1i8: 38; RV32ZVE32F: # %bb.0: 39; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu 40; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 41; RV32ZVE32F-NEXT: vmv1r.v v8, v9 42; RV32ZVE32F-NEXT: ret 43; 44; RV64ZVE32F-LABEL: mgather_v1i8: 45; RV64ZVE32F: # %bb.0: 46; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 47; RV64ZVE32F-NEXT: vfirst.m a1, v0 48; RV64ZVE32F-NEXT: bnez a1, .LBB0_2 49; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 50; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 51; RV64ZVE32F-NEXT: vle8.v v8, (a0) 52; RV64ZVE32F-NEXT: .LBB0_2: # %else 53; RV64ZVE32F-NEXT: ret 54 %v = call <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr> %ptrs, i32 1, <1 x i1> %m, <1 x i8> %passthru) 55 ret <1 x i8> %v 56} 57 58declare <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i8>) 59 60define <2 x i8> @mgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) { 61; RV32V-LABEL: mgather_v2i8: 62; RV32V: # %bb.0: 63; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu 64; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 65; RV32V-NEXT: vmv1r.v v8, v9 66; RV32V-NEXT: ret 67; 68; RV64V-LABEL: mgather_v2i8: 69; RV64V: # %bb.0: 70; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu 71; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 72; RV64V-NEXT: vmv1r.v v8, v9 73; RV64V-NEXT: ret 74; 75; RV32ZVE32F-LABEL: mgather_v2i8: 76; RV32ZVE32F: # %bb.0: 77; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu 78; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 79; RV32ZVE32F-NEXT: vmv1r.v v8, v9 80; RV32ZVE32F-NEXT: ret 81; 82; RV64ZVE32F-LABEL: mgather_v2i8: 83; RV64ZVE32F: # %bb.0: 84; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 85; RV64ZVE32F-NEXT: vmv.x.s a2, v0 86; RV64ZVE32F-NEXT: andi a3, a2, 1 87; RV64ZVE32F-NEXT: bnez a3, .LBB1_3 88; RV64ZVE32F-NEXT: # %bb.1: # %else 89; RV64ZVE32F-NEXT: andi a2, a2, 2 90; RV64ZVE32F-NEXT: bnez a2, .LBB1_4 91; RV64ZVE32F-NEXT: .LBB1_2: # %else2 92; RV64ZVE32F-NEXT: ret 93; RV64ZVE32F-NEXT: .LBB1_3: # %cond.load 94; RV64ZVE32F-NEXT: lbu a0, 0(a0) 95; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma 96; RV64ZVE32F-NEXT: vmv.s.x v8, a0 97; RV64ZVE32F-NEXT: andi a2, a2, 2 98; RV64ZVE32F-NEXT: beqz a2, .LBB1_2 99; RV64ZVE32F-NEXT: .LBB1_4: # %cond.load1 100; RV64ZVE32F-NEXT: lbu a0, 0(a1) 101; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma 102; RV64ZVE32F-NEXT: vmv.s.x v9, a0 103; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 104; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 105; RV64ZVE32F-NEXT: ret 106 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru) 107 ret <2 x i8> %v 108} 109 110define <2 x i16> @mgather_v2i8_sextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) { 111; RV32V-LABEL: mgather_v2i8_sextload_v2i16: 112; RV32V: # %bb.0: 113; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu 114; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 115; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 116; RV32V-NEXT: vsext.vf2 v8, v9 117; RV32V-NEXT: ret 118; 119; RV64V-LABEL: mgather_v2i8_sextload_v2i16: 120; RV64V: # %bb.0: 121; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu 122; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 123; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 124; RV64V-NEXT: vsext.vf2 v8, v9 125; RV64V-NEXT: ret 126; 127; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i16: 128; RV32ZVE32F: # %bb.0: 129; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu 130; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 131; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 132; RV32ZVE32F-NEXT: vsext.vf2 v8, v9 133; RV32ZVE32F-NEXT: ret 134; 135; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i16: 136; RV64ZVE32F: # %bb.0: 137; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 138; RV64ZVE32F-NEXT: vmv.x.s a2, v0 139; RV64ZVE32F-NEXT: andi a3, a2, 1 140; RV64ZVE32F-NEXT: beqz a3, .LBB2_2 141; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 142; RV64ZVE32F-NEXT: lbu a0, 0(a0) 143; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma 144; RV64ZVE32F-NEXT: vmv.s.x v8, a0 145; RV64ZVE32F-NEXT: .LBB2_2: # %else 146; RV64ZVE32F-NEXT: andi a2, a2, 2 147; RV64ZVE32F-NEXT: beqz a2, .LBB2_4 148; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 149; RV64ZVE32F-NEXT: lbu a0, 0(a1) 150; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma 151; RV64ZVE32F-NEXT: vmv.s.x v9, a0 152; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 153; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 154; RV64ZVE32F-NEXT: .LBB2_4: # %else2 155; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 156; RV64ZVE32F-NEXT: vsext.vf2 v9, v8 157; RV64ZVE32F-NEXT: vmv1r.v v8, v9 158; RV64ZVE32F-NEXT: ret 159 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru) 160 %ev = sext <2 x i8> %v to <2 x i16> 161 ret <2 x i16> %ev 162} 163 164define <2 x i16> @mgather_v2i8_zextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) { 165; RV32V-LABEL: mgather_v2i8_zextload_v2i16: 166; RV32V: # %bb.0: 167; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu 168; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 169; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 170; RV32V-NEXT: vzext.vf2 v8, v9 171; RV32V-NEXT: ret 172; 173; RV64V-LABEL: mgather_v2i8_zextload_v2i16: 174; RV64V: # %bb.0: 175; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu 176; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 177; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 178; RV64V-NEXT: vzext.vf2 v8, v9 179; RV64V-NEXT: ret 180; 181; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i16: 182; RV32ZVE32F: # %bb.0: 183; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu 184; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 185; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 186; RV32ZVE32F-NEXT: vzext.vf2 v8, v9 187; RV32ZVE32F-NEXT: ret 188; 189; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i16: 190; RV64ZVE32F: # %bb.0: 191; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 192; RV64ZVE32F-NEXT: vmv.x.s a2, v0 193; RV64ZVE32F-NEXT: andi a3, a2, 1 194; RV64ZVE32F-NEXT: beqz a3, .LBB3_2 195; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 196; RV64ZVE32F-NEXT: lbu a0, 0(a0) 197; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma 198; RV64ZVE32F-NEXT: vmv.s.x v8, a0 199; RV64ZVE32F-NEXT: .LBB3_2: # %else 200; RV64ZVE32F-NEXT: andi a2, a2, 2 201; RV64ZVE32F-NEXT: beqz a2, .LBB3_4 202; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 203; RV64ZVE32F-NEXT: lbu a0, 0(a1) 204; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma 205; RV64ZVE32F-NEXT: vmv.s.x v9, a0 206; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 207; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 208; RV64ZVE32F-NEXT: .LBB3_4: # %else2 209; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 210; RV64ZVE32F-NEXT: vzext.vf2 v9, v8 211; RV64ZVE32F-NEXT: vmv1r.v v8, v9 212; RV64ZVE32F-NEXT: ret 213 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru) 214 %ev = zext <2 x i8> %v to <2 x i16> 215 ret <2 x i16> %ev 216} 217 218define <2 x i32> @mgather_v2i8_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) { 219; RV32V-LABEL: mgather_v2i8_sextload_v2i32: 220; RV32V: # %bb.0: 221; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu 222; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 223; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 224; RV32V-NEXT: vsext.vf4 v8, v9 225; RV32V-NEXT: ret 226; 227; RV64V-LABEL: mgather_v2i8_sextload_v2i32: 228; RV64V: # %bb.0: 229; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu 230; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 231; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 232; RV64V-NEXT: vsext.vf4 v8, v9 233; RV64V-NEXT: ret 234; 235; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i32: 236; RV32ZVE32F: # %bb.0: 237; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu 238; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 239; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 240; RV32ZVE32F-NEXT: vsext.vf4 v8, v9 241; RV32ZVE32F-NEXT: ret 242; 243; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i32: 244; RV64ZVE32F: # %bb.0: 245; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 246; RV64ZVE32F-NEXT: vmv.x.s a2, v0 247; RV64ZVE32F-NEXT: andi a3, a2, 1 248; RV64ZVE32F-NEXT: beqz a3, .LBB4_2 249; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 250; RV64ZVE32F-NEXT: lbu a0, 0(a0) 251; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma 252; RV64ZVE32F-NEXT: vmv.s.x v8, a0 253; RV64ZVE32F-NEXT: .LBB4_2: # %else 254; RV64ZVE32F-NEXT: andi a2, a2, 2 255; RV64ZVE32F-NEXT: beqz a2, .LBB4_4 256; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 257; RV64ZVE32F-NEXT: lbu a0, 0(a1) 258; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma 259; RV64ZVE32F-NEXT: vmv.s.x v9, a0 260; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 261; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 262; RV64ZVE32F-NEXT: .LBB4_4: # %else2 263; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 264; RV64ZVE32F-NEXT: vsext.vf4 v9, v8 265; RV64ZVE32F-NEXT: vmv.v.v v8, v9 266; RV64ZVE32F-NEXT: ret 267 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru) 268 %ev = sext <2 x i8> %v to <2 x i32> 269 ret <2 x i32> %ev 270} 271 272define <2 x i32> @mgather_v2i8_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) { 273; RV32V-LABEL: mgather_v2i8_zextload_v2i32: 274; RV32V: # %bb.0: 275; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu 276; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 277; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 278; RV32V-NEXT: vzext.vf4 v8, v9 279; RV32V-NEXT: ret 280; 281; RV64V-LABEL: mgather_v2i8_zextload_v2i32: 282; RV64V: # %bb.0: 283; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu 284; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 285; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 286; RV64V-NEXT: vzext.vf4 v8, v9 287; RV64V-NEXT: ret 288; 289; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i32: 290; RV32ZVE32F: # %bb.0: 291; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu 292; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 293; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 294; RV32ZVE32F-NEXT: vzext.vf4 v8, v9 295; RV32ZVE32F-NEXT: ret 296; 297; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i32: 298; RV64ZVE32F: # %bb.0: 299; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 300; RV64ZVE32F-NEXT: vmv.x.s a2, v0 301; RV64ZVE32F-NEXT: andi a3, a2, 1 302; RV64ZVE32F-NEXT: beqz a3, .LBB5_2 303; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 304; RV64ZVE32F-NEXT: lbu a0, 0(a0) 305; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma 306; RV64ZVE32F-NEXT: vmv.s.x v8, a0 307; RV64ZVE32F-NEXT: .LBB5_2: # %else 308; RV64ZVE32F-NEXT: andi a2, a2, 2 309; RV64ZVE32F-NEXT: beqz a2, .LBB5_4 310; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 311; RV64ZVE32F-NEXT: lbu a0, 0(a1) 312; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma 313; RV64ZVE32F-NEXT: vmv.s.x v9, a0 314; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 315; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 316; RV64ZVE32F-NEXT: .LBB5_4: # %else2 317; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 318; RV64ZVE32F-NEXT: vzext.vf4 v9, v8 319; RV64ZVE32F-NEXT: vmv.v.v v8, v9 320; RV64ZVE32F-NEXT: ret 321 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru) 322 %ev = zext <2 x i8> %v to <2 x i32> 323 ret <2 x i32> %ev 324} 325 326define <2 x i64> @mgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) { 327; RV32V-LABEL: mgather_v2i8_sextload_v2i64: 328; RV32V: # %bb.0: 329; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu 330; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 331; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma 332; RV32V-NEXT: vsext.vf8 v8, v9 333; RV32V-NEXT: ret 334; 335; RV64V-LABEL: mgather_v2i8_sextload_v2i64: 336; RV64V: # %bb.0: 337; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu 338; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 339; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma 340; RV64V-NEXT: vsext.vf8 v8, v9 341; RV64V-NEXT: ret 342; 343; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i64: 344; RV32ZVE32F: # %bb.0: 345; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu 346; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 347; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1 348; RV32ZVE32F-NEXT: vmv.x.s a1, v9 349; RV32ZVE32F-NEXT: vmv.x.s a2, v8 350; RV32ZVE32F-NEXT: srai a3, a1, 31 351; RV32ZVE32F-NEXT: srai a4, a2, 31 352; RV32ZVE32F-NEXT: sw a1, 0(a0) 353; RV32ZVE32F-NEXT: sw a3, 4(a0) 354; RV32ZVE32F-NEXT: sw a2, 8(a0) 355; RV32ZVE32F-NEXT: sw a4, 12(a0) 356; RV32ZVE32F-NEXT: ret 357; 358; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i64: 359; RV64ZVE32F: # %bb.0: 360; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 361; RV64ZVE32F-NEXT: vmv.x.s a2, v0 362; RV64ZVE32F-NEXT: andi a3, a2, 1 363; RV64ZVE32F-NEXT: beqz a3, .LBB6_2 364; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 365; RV64ZVE32F-NEXT: lbu a0, 0(a0) 366; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma 367; RV64ZVE32F-NEXT: vmv.s.x v8, a0 368; RV64ZVE32F-NEXT: .LBB6_2: # %else 369; RV64ZVE32F-NEXT: andi a2, a2, 2 370; RV64ZVE32F-NEXT: beqz a2, .LBB6_4 371; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 372; RV64ZVE32F-NEXT: lbu a0, 0(a1) 373; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma 374; RV64ZVE32F-NEXT: vmv.s.x v9, a0 375; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 376; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 377; RV64ZVE32F-NEXT: .LBB6_4: # %else2 378; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 379; RV64ZVE32F-NEXT: vmv.x.s a0, v8 380; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 381; RV64ZVE32F-NEXT: vmv.x.s a1, v8 382; RV64ZVE32F-NEXT: ret 383 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru) 384 %ev = sext <2 x i8> %v to <2 x i64> 385 ret <2 x i64> %ev 386} 387 388define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) { 389; RV32V-LABEL: mgather_v2i8_zextload_v2i64: 390; RV32V: # %bb.0: 391; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu 392; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 393; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma 394; RV32V-NEXT: vzext.vf8 v8, v9 395; RV32V-NEXT: ret 396; 397; RV64V-LABEL: mgather_v2i8_zextload_v2i64: 398; RV64V: # %bb.0: 399; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu 400; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 401; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma 402; RV64V-NEXT: vzext.vf8 v8, v9 403; RV64V-NEXT: ret 404; 405; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i64: 406; RV32ZVE32F: # %bb.0: 407; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu 408; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 409; RV32ZVE32F-NEXT: sw zero, 12(a0) 410; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1 411; RV32ZVE32F-NEXT: vmv.x.s a1, v9 412; RV32ZVE32F-NEXT: vmv.x.s a2, v8 413; RV32ZVE32F-NEXT: andi a1, a1, 255 414; RV32ZVE32F-NEXT: andi a2, a2, 255 415; RV32ZVE32F-NEXT: sw a1, 0(a0) 416; RV32ZVE32F-NEXT: sw zero, 4(a0) 417; RV32ZVE32F-NEXT: sw a2, 8(a0) 418; RV32ZVE32F-NEXT: ret 419; 420; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i64: 421; RV64ZVE32F: # %bb.0: 422; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 423; RV64ZVE32F-NEXT: vmv.x.s a2, v0 424; RV64ZVE32F-NEXT: andi a3, a2, 1 425; RV64ZVE32F-NEXT: beqz a3, .LBB7_2 426; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 427; RV64ZVE32F-NEXT: lbu a0, 0(a0) 428; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma 429; RV64ZVE32F-NEXT: vmv.s.x v8, a0 430; RV64ZVE32F-NEXT: .LBB7_2: # %else 431; RV64ZVE32F-NEXT: andi a2, a2, 2 432; RV64ZVE32F-NEXT: beqz a2, .LBB7_4 433; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 434; RV64ZVE32F-NEXT: lbu a0, 0(a1) 435; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma 436; RV64ZVE32F-NEXT: vmv.s.x v9, a0 437; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 438; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 439; RV64ZVE32F-NEXT: .LBB7_4: # %else2 440; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 441; RV64ZVE32F-NEXT: vmv.x.s a0, v8 442; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 443; RV64ZVE32F-NEXT: andi a0, a0, 255 444; RV64ZVE32F-NEXT: vmv.x.s a1, v8 445; RV64ZVE32F-NEXT: andi a1, a1, 255 446; RV64ZVE32F-NEXT: ret 447 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru) 448 %ev = zext <2 x i8> %v to <2 x i64> 449 ret <2 x i64> %ev 450} 451 452declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i8>) 453 454define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru) { 455; RV32-LABEL: mgather_v4i8: 456; RV32: # %bb.0: 457; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, mu 458; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 459; RV32-NEXT: vmv1r.v v8, v9 460; RV32-NEXT: ret 461; 462; RV64V-LABEL: mgather_v4i8: 463; RV64V: # %bb.0: 464; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, mu 465; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t 466; RV64V-NEXT: vmv1r.v v8, v10 467; RV64V-NEXT: ret 468; 469; RV64ZVE32F-LABEL: mgather_v4i8: 470; RV64ZVE32F: # %bb.0: 471; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 472; RV64ZVE32F-NEXT: vmv.x.s a1, v0 473; RV64ZVE32F-NEXT: andi a2, a1, 1 474; RV64ZVE32F-NEXT: bnez a2, .LBB8_5 475; RV64ZVE32F-NEXT: # %bb.1: # %else 476; RV64ZVE32F-NEXT: andi a2, a1, 2 477; RV64ZVE32F-NEXT: bnez a2, .LBB8_6 478; RV64ZVE32F-NEXT: .LBB8_2: # %else2 479; RV64ZVE32F-NEXT: andi a2, a1, 4 480; RV64ZVE32F-NEXT: bnez a2, .LBB8_7 481; RV64ZVE32F-NEXT: .LBB8_3: # %else5 482; RV64ZVE32F-NEXT: andi a1, a1, 8 483; RV64ZVE32F-NEXT: bnez a1, .LBB8_8 484; RV64ZVE32F-NEXT: .LBB8_4: # %else8 485; RV64ZVE32F-NEXT: ret 486; RV64ZVE32F-NEXT: .LBB8_5: # %cond.load 487; RV64ZVE32F-NEXT: ld a2, 0(a0) 488; RV64ZVE32F-NEXT: lbu a2, 0(a2) 489; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma 490; RV64ZVE32F-NEXT: vmv.s.x v8, a2 491; RV64ZVE32F-NEXT: andi a2, a1, 2 492; RV64ZVE32F-NEXT: beqz a2, .LBB8_2 493; RV64ZVE32F-NEXT: .LBB8_6: # %cond.load1 494; RV64ZVE32F-NEXT: ld a2, 8(a0) 495; RV64ZVE32F-NEXT: lbu a2, 0(a2) 496; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma 497; RV64ZVE32F-NEXT: vmv.s.x v9, a2 498; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma 499; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 500; RV64ZVE32F-NEXT: andi a2, a1, 4 501; RV64ZVE32F-NEXT: beqz a2, .LBB8_3 502; RV64ZVE32F-NEXT: .LBB8_7: # %cond.load4 503; RV64ZVE32F-NEXT: ld a2, 16(a0) 504; RV64ZVE32F-NEXT: lbu a2, 0(a2) 505; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf4, tu, ma 506; RV64ZVE32F-NEXT: vmv.s.x v9, a2 507; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 508; RV64ZVE32F-NEXT: andi a1, a1, 8 509; RV64ZVE32F-NEXT: beqz a1, .LBB8_4 510; RV64ZVE32F-NEXT: .LBB8_8: # %cond.load7 511; RV64ZVE32F-NEXT: ld a0, 24(a0) 512; RV64ZVE32F-NEXT: lbu a0, 0(a0) 513; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 514; RV64ZVE32F-NEXT: vmv.s.x v9, a0 515; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 516; RV64ZVE32F-NEXT: ret 517 %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %m, <4 x i8> %passthru) 518 ret <4 x i8> %v 519} 520 521define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) { 522; RV32-LABEL: mgather_truemask_v4i8: 523; RV32: # %bb.0: 524; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 525; RV32-NEXT: vluxei32.v v9, (zero), v8 526; RV32-NEXT: vmv1r.v v8, v9 527; RV32-NEXT: ret 528; 529; RV64V-LABEL: mgather_truemask_v4i8: 530; RV64V: # %bb.0: 531; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 532; RV64V-NEXT: vluxei64.v v10, (zero), v8 533; RV64V-NEXT: vmv1r.v v8, v10 534; RV64V-NEXT: ret 535; 536; RV64ZVE32F-LABEL: mgather_truemask_v4i8: 537; RV64ZVE32F: # %bb.0: 538; RV64ZVE32F-NEXT: ld a1, 0(a0) 539; RV64ZVE32F-NEXT: ld a2, 8(a0) 540; RV64ZVE32F-NEXT: ld a3, 16(a0) 541; RV64ZVE32F-NEXT: ld a0, 24(a0) 542; RV64ZVE32F-NEXT: lbu a1, 0(a1) 543; RV64ZVE32F-NEXT: lbu a2, 0(a2) 544; RV64ZVE32F-NEXT: lbu a3, 0(a3) 545; RV64ZVE32F-NEXT: lbu a0, 0(a0) 546; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 547; RV64ZVE32F-NEXT: vmv.v.x v8, a1 548; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 549; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 550; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 551; RV64ZVE32F-NEXT: ret 552 %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1), <4 x i8> %passthru) 553 ret <4 x i8> %v 554} 555 556define <4 x i8> @mgather_falsemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) { 557; RV32-LABEL: mgather_falsemask_v4i8: 558; RV32: # %bb.0: 559; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma 560; RV32-NEXT: vmv1r.v v8, v9 561; RV32-NEXT: ret 562; 563; RV64V-LABEL: mgather_falsemask_v4i8: 564; RV64V: # %bb.0: 565; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma 566; RV64V-NEXT: vmv1r.v v8, v10 567; RV64V-NEXT: ret 568; 569; RV64ZVE32F-LABEL: mgather_falsemask_v4i8: 570; RV64ZVE32F: # %bb.0: 571; RV64ZVE32F-NEXT: ret 572 %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> zeroinitializer, <4 x i8> %passthru) 573 ret <4 x i8> %v 574} 575 576declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>) 577 578define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) { 579; RV32-LABEL: mgather_v8i8: 580; RV32: # %bb.0: 581; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 582; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 583; RV32-NEXT: vmv1r.v v8, v10 584; RV32-NEXT: ret 585; 586; RV64V-LABEL: mgather_v8i8: 587; RV64V: # %bb.0: 588; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 589; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t 590; RV64V-NEXT: vmv1r.v v8, v12 591; RV64V-NEXT: ret 592; 593; RV64ZVE32F-LABEL: mgather_v8i8: 594; RV64ZVE32F: # %bb.0: 595; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 596; RV64ZVE32F-NEXT: vmv.x.s a1, v0 597; RV64ZVE32F-NEXT: andi a2, a1, 1 598; RV64ZVE32F-NEXT: bnez a2, .LBB11_9 599; RV64ZVE32F-NEXT: # %bb.1: # %else 600; RV64ZVE32F-NEXT: andi a2, a1, 2 601; RV64ZVE32F-NEXT: bnez a2, .LBB11_10 602; RV64ZVE32F-NEXT: .LBB11_2: # %else2 603; RV64ZVE32F-NEXT: andi a2, a1, 4 604; RV64ZVE32F-NEXT: bnez a2, .LBB11_11 605; RV64ZVE32F-NEXT: .LBB11_3: # %else5 606; RV64ZVE32F-NEXT: andi a2, a1, 8 607; RV64ZVE32F-NEXT: bnez a2, .LBB11_12 608; RV64ZVE32F-NEXT: .LBB11_4: # %else8 609; RV64ZVE32F-NEXT: andi a2, a1, 16 610; RV64ZVE32F-NEXT: bnez a2, .LBB11_13 611; RV64ZVE32F-NEXT: .LBB11_5: # %else11 612; RV64ZVE32F-NEXT: andi a2, a1, 32 613; RV64ZVE32F-NEXT: bnez a2, .LBB11_14 614; RV64ZVE32F-NEXT: .LBB11_6: # %else14 615; RV64ZVE32F-NEXT: andi a2, a1, 64 616; RV64ZVE32F-NEXT: bnez a2, .LBB11_15 617; RV64ZVE32F-NEXT: .LBB11_7: # %else17 618; RV64ZVE32F-NEXT: andi a1, a1, -128 619; RV64ZVE32F-NEXT: bnez a1, .LBB11_16 620; RV64ZVE32F-NEXT: .LBB11_8: # %else20 621; RV64ZVE32F-NEXT: ret 622; RV64ZVE32F-NEXT: .LBB11_9: # %cond.load 623; RV64ZVE32F-NEXT: ld a2, 0(a0) 624; RV64ZVE32F-NEXT: lbu a2, 0(a2) 625; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma 626; RV64ZVE32F-NEXT: vmv.s.x v8, a2 627; RV64ZVE32F-NEXT: andi a2, a1, 2 628; RV64ZVE32F-NEXT: beqz a2, .LBB11_2 629; RV64ZVE32F-NEXT: .LBB11_10: # %cond.load1 630; RV64ZVE32F-NEXT: ld a2, 8(a0) 631; RV64ZVE32F-NEXT: lbu a2, 0(a2) 632; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma 633; RV64ZVE32F-NEXT: vmv.s.x v9, a2 634; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma 635; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 636; RV64ZVE32F-NEXT: andi a2, a1, 4 637; RV64ZVE32F-NEXT: beqz a2, .LBB11_3 638; RV64ZVE32F-NEXT: .LBB11_11: # %cond.load4 639; RV64ZVE32F-NEXT: ld a2, 16(a0) 640; RV64ZVE32F-NEXT: lbu a2, 0(a2) 641; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma 642; RV64ZVE32F-NEXT: vmv.s.x v9, a2 643; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 644; RV64ZVE32F-NEXT: andi a2, a1, 8 645; RV64ZVE32F-NEXT: beqz a2, .LBB11_4 646; RV64ZVE32F-NEXT: .LBB11_12: # %cond.load7 647; RV64ZVE32F-NEXT: ld a2, 24(a0) 648; RV64ZVE32F-NEXT: lbu a2, 0(a2) 649; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma 650; RV64ZVE32F-NEXT: vmv.s.x v9, a2 651; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 652; RV64ZVE32F-NEXT: andi a2, a1, 16 653; RV64ZVE32F-NEXT: beqz a2, .LBB11_5 654; RV64ZVE32F-NEXT: .LBB11_13: # %cond.load10 655; RV64ZVE32F-NEXT: ld a2, 32(a0) 656; RV64ZVE32F-NEXT: lbu a2, 0(a2) 657; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma 658; RV64ZVE32F-NEXT: vmv.s.x v9, a2 659; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 660; RV64ZVE32F-NEXT: andi a2, a1, 32 661; RV64ZVE32F-NEXT: beqz a2, .LBB11_6 662; RV64ZVE32F-NEXT: .LBB11_14: # %cond.load13 663; RV64ZVE32F-NEXT: ld a2, 40(a0) 664; RV64ZVE32F-NEXT: lbu a2, 0(a2) 665; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma 666; RV64ZVE32F-NEXT: vmv.s.x v9, a2 667; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 668; RV64ZVE32F-NEXT: andi a2, a1, 64 669; RV64ZVE32F-NEXT: beqz a2, .LBB11_7 670; RV64ZVE32F-NEXT: .LBB11_15: # %cond.load16 671; RV64ZVE32F-NEXT: ld a2, 48(a0) 672; RV64ZVE32F-NEXT: lbu a2, 0(a2) 673; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, mf2, tu, ma 674; RV64ZVE32F-NEXT: vmv.s.x v9, a2 675; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 676; RV64ZVE32F-NEXT: andi a1, a1, -128 677; RV64ZVE32F-NEXT: beqz a1, .LBB11_8 678; RV64ZVE32F-NEXT: .LBB11_16: # %cond.load19 679; RV64ZVE32F-NEXT: ld a0, 56(a0) 680; RV64ZVE32F-NEXT: lbu a0, 0(a0) 681; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 682; RV64ZVE32F-NEXT: vmv.s.x v9, a0 683; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 684; RV64ZVE32F-NEXT: ret 685 %v = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %m, <8 x i8> %passthru) 686 ret <8 x i8> %v 687} 688 689define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i8> %passthru) { 690; RV32-LABEL: mgather_baseidx_v8i8: 691; RV32: # %bb.0: 692; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 693; RV32-NEXT: vsext.vf4 v10, v8 694; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, mu 695; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t 696; RV32-NEXT: vmv1r.v v8, v9 697; RV32-NEXT: ret 698; 699; RV64V-LABEL: mgather_baseidx_v8i8: 700; RV64V: # %bb.0: 701; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 702; RV64V-NEXT: vsext.vf8 v12, v8 703; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, mu 704; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t 705; RV64V-NEXT: vmv1r.v v8, v9 706; RV64V-NEXT: ret 707; 708; RV64ZVE32F-LABEL: mgather_baseidx_v8i8: 709; RV64ZVE32F: # %bb.0: 710; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 711; RV64ZVE32F-NEXT: vmv.x.s a1, v0 712; RV64ZVE32F-NEXT: andi a2, a1, 1 713; RV64ZVE32F-NEXT: beqz a2, .LBB12_2 714; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 715; RV64ZVE32F-NEXT: vmv.x.s a2, v8 716; RV64ZVE32F-NEXT: add a2, a0, a2 717; RV64ZVE32F-NEXT: lbu a2, 0(a2) 718; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma 719; RV64ZVE32F-NEXT: vmv.s.x v9, a2 720; RV64ZVE32F-NEXT: .LBB12_2: # %else 721; RV64ZVE32F-NEXT: andi a2, a1, 2 722; RV64ZVE32F-NEXT: beqz a2, .LBB12_4 723; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 724; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 725; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 726; RV64ZVE32F-NEXT: vmv.x.s a2, v10 727; RV64ZVE32F-NEXT: add a2, a0, a2 728; RV64ZVE32F-NEXT: lbu a2, 0(a2) 729; RV64ZVE32F-NEXT: vmv.s.x v10, a2 730; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma 731; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 732; RV64ZVE32F-NEXT: .LBB12_4: # %else2 733; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 734; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 735; RV64ZVE32F-NEXT: andi a2, a1, 4 736; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 737; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 738; RV64ZVE32F-NEXT: bnez a2, .LBB12_14 739; RV64ZVE32F-NEXT: # %bb.5: # %else5 740; RV64ZVE32F-NEXT: andi a2, a1, 8 741; RV64ZVE32F-NEXT: bnez a2, .LBB12_15 742; RV64ZVE32F-NEXT: .LBB12_6: # %else8 743; RV64ZVE32F-NEXT: andi a2, a1, 16 744; RV64ZVE32F-NEXT: bnez a2, .LBB12_16 745; RV64ZVE32F-NEXT: .LBB12_7: # %else11 746; RV64ZVE32F-NEXT: andi a2, a1, 32 747; RV64ZVE32F-NEXT: beqz a2, .LBB12_9 748; RV64ZVE32F-NEXT: .LBB12_8: # %cond.load13 749; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 750; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 751; RV64ZVE32F-NEXT: vmv.x.s a2, v8 752; RV64ZVE32F-NEXT: add a2, a0, a2 753; RV64ZVE32F-NEXT: lbu a2, 0(a2) 754; RV64ZVE32F-NEXT: vmv.s.x v8, a2 755; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma 756; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 757; RV64ZVE32F-NEXT: .LBB12_9: # %else14 758; RV64ZVE32F-NEXT: andi a2, a1, 64 759; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 760; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 761; RV64ZVE32F-NEXT: beqz a2, .LBB12_11 762; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 763; RV64ZVE32F-NEXT: vmv.x.s a2, v8 764; RV64ZVE32F-NEXT: add a2, a0, a2 765; RV64ZVE32F-NEXT: lbu a2, 0(a2) 766; RV64ZVE32F-NEXT: vmv.s.x v10, a2 767; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, mf2, tu, ma 768; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 769; RV64ZVE32F-NEXT: .LBB12_11: # %else17 770; RV64ZVE32F-NEXT: andi a1, a1, -128 771; RV64ZVE32F-NEXT: beqz a1, .LBB12_13 772; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 773; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 774; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 775; RV64ZVE32F-NEXT: vmv.x.s a1, v8 776; RV64ZVE32F-NEXT: add a0, a0, a1 777; RV64ZVE32F-NEXT: lbu a0, 0(a0) 778; RV64ZVE32F-NEXT: vmv.s.x v8, a0 779; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 780; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 781; RV64ZVE32F-NEXT: .LBB12_13: # %else20 782; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 783; RV64ZVE32F-NEXT: vmv1r.v v8, v9 784; RV64ZVE32F-NEXT: ret 785; RV64ZVE32F-NEXT: .LBB12_14: # %cond.load4 786; RV64ZVE32F-NEXT: vmv.x.s a2, v8 787; RV64ZVE32F-NEXT: add a2, a0, a2 788; RV64ZVE32F-NEXT: lbu a2, 0(a2) 789; RV64ZVE32F-NEXT: vmv.s.x v11, a2 790; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma 791; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 792; RV64ZVE32F-NEXT: andi a2, a1, 8 793; RV64ZVE32F-NEXT: beqz a2, .LBB12_6 794; RV64ZVE32F-NEXT: .LBB12_15: # %cond.load7 795; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 796; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 797; RV64ZVE32F-NEXT: vmv.x.s a2, v8 798; RV64ZVE32F-NEXT: add a2, a0, a2 799; RV64ZVE32F-NEXT: lbu a2, 0(a2) 800; RV64ZVE32F-NEXT: vmv.s.x v8, a2 801; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma 802; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 803; RV64ZVE32F-NEXT: andi a2, a1, 16 804; RV64ZVE32F-NEXT: beqz a2, .LBB12_7 805; RV64ZVE32F-NEXT: .LBB12_16: # %cond.load10 806; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma 807; RV64ZVE32F-NEXT: vmv.x.s a2, v10 808; RV64ZVE32F-NEXT: add a2, a0, a2 809; RV64ZVE32F-NEXT: lbu a2, 0(a2) 810; RV64ZVE32F-NEXT: vmv.s.x v8, a2 811; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 812; RV64ZVE32F-NEXT: andi a2, a1, 32 813; RV64ZVE32F-NEXT: bnez a2, .LBB12_8 814; RV64ZVE32F-NEXT: j .LBB12_9 815 %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs 816 %v = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %m, <8 x i8> %passthru) 817 ret <8 x i8> %v 818} 819 820declare <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i16>) 821 822define <1 x i16> @mgather_v1i16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i16> %passthru) { 823; RV32V-LABEL: mgather_v1i16: 824; RV32V: # %bb.0: 825; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu 826; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 827; RV32V-NEXT: vmv1r.v v8, v9 828; RV32V-NEXT: ret 829; 830; RV64V-LABEL: mgather_v1i16: 831; RV64V: # %bb.0: 832; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu 833; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 834; RV64V-NEXT: vmv1r.v v8, v9 835; RV64V-NEXT: ret 836; 837; RV32ZVE32F-LABEL: mgather_v1i16: 838; RV32ZVE32F: # %bb.0: 839; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu 840; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 841; RV32ZVE32F-NEXT: vmv1r.v v8, v9 842; RV32ZVE32F-NEXT: ret 843; 844; RV64ZVE32F-LABEL: mgather_v1i16: 845; RV64ZVE32F: # %bb.0: 846; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 847; RV64ZVE32F-NEXT: vfirst.m a1, v0 848; RV64ZVE32F-NEXT: bnez a1, .LBB13_2 849; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 850; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 851; RV64ZVE32F-NEXT: vle16.v v8, (a0) 852; RV64ZVE32F-NEXT: .LBB13_2: # %else 853; RV64ZVE32F-NEXT: ret 854 %v = call <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x i16> %passthru) 855 ret <1 x i16> %v 856} 857 858declare <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i16>) 859 860define <2 x i16> @mgather_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) { 861; RV32V-LABEL: mgather_v2i16: 862; RV32V: # %bb.0: 863; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 864; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 865; RV32V-NEXT: vmv1r.v v8, v9 866; RV32V-NEXT: ret 867; 868; RV64V-LABEL: mgather_v2i16: 869; RV64V: # %bb.0: 870; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 871; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 872; RV64V-NEXT: vmv1r.v v8, v9 873; RV64V-NEXT: ret 874; 875; RV32ZVE32F-LABEL: mgather_v2i16: 876; RV32ZVE32F: # %bb.0: 877; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu 878; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 879; RV32ZVE32F-NEXT: vmv1r.v v8, v9 880; RV32ZVE32F-NEXT: ret 881; 882; RV64ZVE32F-LABEL: mgather_v2i16: 883; RV64ZVE32F: # %bb.0: 884; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 885; RV64ZVE32F-NEXT: vmv.x.s a2, v0 886; RV64ZVE32F-NEXT: andi a3, a2, 1 887; RV64ZVE32F-NEXT: bnez a3, .LBB14_3 888; RV64ZVE32F-NEXT: # %bb.1: # %else 889; RV64ZVE32F-NEXT: andi a2, a2, 2 890; RV64ZVE32F-NEXT: bnez a2, .LBB14_4 891; RV64ZVE32F-NEXT: .LBB14_2: # %else2 892; RV64ZVE32F-NEXT: ret 893; RV64ZVE32F-NEXT: .LBB14_3: # %cond.load 894; RV64ZVE32F-NEXT: lh a0, 0(a0) 895; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 896; RV64ZVE32F-NEXT: vmv.s.x v8, a0 897; RV64ZVE32F-NEXT: andi a2, a2, 2 898; RV64ZVE32F-NEXT: beqz a2, .LBB14_2 899; RV64ZVE32F-NEXT: .LBB14_4: # %cond.load1 900; RV64ZVE32F-NEXT: lh a0, 0(a1) 901; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 902; RV64ZVE32F-NEXT: vmv.s.x v9, a0 903; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 904; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 905; RV64ZVE32F-NEXT: ret 906 %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru) 907 ret <2 x i16> %v 908} 909 910define <2 x i32> @mgather_v2i16_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) { 911; RV32V-LABEL: mgather_v2i16_sextload_v2i32: 912; RV32V: # %bb.0: 913; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 914; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 915; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 916; RV32V-NEXT: vsext.vf2 v8, v9 917; RV32V-NEXT: ret 918; 919; RV64V-LABEL: mgather_v2i16_sextload_v2i32: 920; RV64V: # %bb.0: 921; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 922; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 923; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 924; RV64V-NEXT: vsext.vf2 v8, v9 925; RV64V-NEXT: ret 926; 927; RV32ZVE32F-LABEL: mgather_v2i16_sextload_v2i32: 928; RV32ZVE32F: # %bb.0: 929; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu 930; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 931; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 932; RV32ZVE32F-NEXT: vsext.vf2 v8, v9 933; RV32ZVE32F-NEXT: ret 934; 935; RV64ZVE32F-LABEL: mgather_v2i16_sextload_v2i32: 936; RV64ZVE32F: # %bb.0: 937; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 938; RV64ZVE32F-NEXT: vmv.x.s a2, v0 939; RV64ZVE32F-NEXT: andi a3, a2, 1 940; RV64ZVE32F-NEXT: beqz a3, .LBB15_2 941; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 942; RV64ZVE32F-NEXT: lh a0, 0(a0) 943; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 944; RV64ZVE32F-NEXT: vmv.s.x v8, a0 945; RV64ZVE32F-NEXT: .LBB15_2: # %else 946; RV64ZVE32F-NEXT: andi a2, a2, 2 947; RV64ZVE32F-NEXT: beqz a2, .LBB15_4 948; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 949; RV64ZVE32F-NEXT: lh a0, 0(a1) 950; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 951; RV64ZVE32F-NEXT: vmv.s.x v9, a0 952; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 953; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 954; RV64ZVE32F-NEXT: .LBB15_4: # %else2 955; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 956; RV64ZVE32F-NEXT: vsext.vf2 v9, v8 957; RV64ZVE32F-NEXT: vmv.v.v v8, v9 958; RV64ZVE32F-NEXT: ret 959 %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru) 960 %ev = sext <2 x i16> %v to <2 x i32> 961 ret <2 x i32> %ev 962} 963 964define <2 x i32> @mgather_v2i16_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) { 965; RV32V-LABEL: mgather_v2i16_zextload_v2i32: 966; RV32V: # %bb.0: 967; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 968; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 969; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 970; RV32V-NEXT: vzext.vf2 v8, v9 971; RV32V-NEXT: ret 972; 973; RV64V-LABEL: mgather_v2i16_zextload_v2i32: 974; RV64V: # %bb.0: 975; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 976; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 977; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 978; RV64V-NEXT: vzext.vf2 v8, v9 979; RV64V-NEXT: ret 980; 981; RV32ZVE32F-LABEL: mgather_v2i16_zextload_v2i32: 982; RV32ZVE32F: # %bb.0: 983; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu 984; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 985; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 986; RV32ZVE32F-NEXT: vzext.vf2 v8, v9 987; RV32ZVE32F-NEXT: ret 988; 989; RV64ZVE32F-LABEL: mgather_v2i16_zextload_v2i32: 990; RV64ZVE32F: # %bb.0: 991; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 992; RV64ZVE32F-NEXT: vmv.x.s a2, v0 993; RV64ZVE32F-NEXT: andi a3, a2, 1 994; RV64ZVE32F-NEXT: beqz a3, .LBB16_2 995; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 996; RV64ZVE32F-NEXT: lh a0, 0(a0) 997; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 998; RV64ZVE32F-NEXT: vmv.s.x v8, a0 999; RV64ZVE32F-NEXT: .LBB16_2: # %else 1000; RV64ZVE32F-NEXT: andi a2, a2, 2 1001; RV64ZVE32F-NEXT: beqz a2, .LBB16_4 1002; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 1003; RV64ZVE32F-NEXT: lh a0, 0(a1) 1004; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1005; RV64ZVE32F-NEXT: vmv.s.x v9, a0 1006; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 1007; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 1008; RV64ZVE32F-NEXT: .LBB16_4: # %else2 1009; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 1010; RV64ZVE32F-NEXT: vzext.vf2 v9, v8 1011; RV64ZVE32F-NEXT: vmv.v.v v8, v9 1012; RV64ZVE32F-NEXT: ret 1013 %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru) 1014 %ev = zext <2 x i16> %v to <2 x i32> 1015 ret <2 x i32> %ev 1016} 1017 1018define <2 x i64> @mgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) { 1019; RV32V-LABEL: mgather_v2i16_sextload_v2i64: 1020; RV32V: # %bb.0: 1021; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 1022; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 1023; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma 1024; RV32V-NEXT: vsext.vf4 v8, v9 1025; RV32V-NEXT: ret 1026; 1027; RV64V-LABEL: mgather_v2i16_sextload_v2i64: 1028; RV64V: # %bb.0: 1029; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 1030; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 1031; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma 1032; RV64V-NEXT: vsext.vf4 v8, v9 1033; RV64V-NEXT: ret 1034; 1035; RV32ZVE32F-LABEL: mgather_v2i16_sextload_v2i64: 1036; RV32ZVE32F: # %bb.0: 1037; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu 1038; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 1039; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1 1040; RV32ZVE32F-NEXT: vmv.x.s a1, v9 1041; RV32ZVE32F-NEXT: vmv.x.s a2, v8 1042; RV32ZVE32F-NEXT: srai a3, a1, 31 1043; RV32ZVE32F-NEXT: srai a4, a2, 31 1044; RV32ZVE32F-NEXT: sw a1, 0(a0) 1045; RV32ZVE32F-NEXT: sw a3, 4(a0) 1046; RV32ZVE32F-NEXT: sw a2, 8(a0) 1047; RV32ZVE32F-NEXT: sw a4, 12(a0) 1048; RV32ZVE32F-NEXT: ret 1049; 1050; RV64ZVE32F-LABEL: mgather_v2i16_sextload_v2i64: 1051; RV64ZVE32F: # %bb.0: 1052; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1053; RV64ZVE32F-NEXT: vmv.x.s a2, v0 1054; RV64ZVE32F-NEXT: andi a3, a2, 1 1055; RV64ZVE32F-NEXT: beqz a3, .LBB17_2 1056; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 1057; RV64ZVE32F-NEXT: lh a0, 0(a0) 1058; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 1059; RV64ZVE32F-NEXT: vmv.s.x v8, a0 1060; RV64ZVE32F-NEXT: .LBB17_2: # %else 1061; RV64ZVE32F-NEXT: andi a2, a2, 2 1062; RV64ZVE32F-NEXT: beqz a2, .LBB17_4 1063; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 1064; RV64ZVE32F-NEXT: lh a0, 0(a1) 1065; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1066; RV64ZVE32F-NEXT: vmv.s.x v9, a0 1067; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 1068; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 1069; RV64ZVE32F-NEXT: .LBB17_4: # %else2 1070; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 1071; RV64ZVE32F-NEXT: vmv.x.s a0, v8 1072; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 1073; RV64ZVE32F-NEXT: vmv.x.s a1, v8 1074; RV64ZVE32F-NEXT: ret 1075 %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru) 1076 %ev = sext <2 x i16> %v to <2 x i64> 1077 ret <2 x i64> %ev 1078} 1079 1080define <2 x i64> @mgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) { 1081; RV32V-LABEL: mgather_v2i16_zextload_v2i64: 1082; RV32V: # %bb.0: 1083; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 1084; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 1085; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma 1086; RV32V-NEXT: vzext.vf4 v8, v9 1087; RV32V-NEXT: ret 1088; 1089; RV64V-LABEL: mgather_v2i16_zextload_v2i64: 1090; RV64V: # %bb.0: 1091; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 1092; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 1093; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma 1094; RV64V-NEXT: vzext.vf4 v8, v9 1095; RV64V-NEXT: ret 1096; 1097; RV32ZVE32F-LABEL: mgather_v2i16_zextload_v2i64: 1098; RV32ZVE32F: # %bb.0: 1099; RV32ZVE32F-NEXT: lui a1, 16 1100; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu 1101; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 1102; RV32ZVE32F-NEXT: addi a1, a1, -1 1103; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1 1104; RV32ZVE32F-NEXT: vmv.x.s a2, v9 1105; RV32ZVE32F-NEXT: vmv.x.s a3, v8 1106; RV32ZVE32F-NEXT: and a2, a2, a1 1107; RV32ZVE32F-NEXT: and a1, a3, a1 1108; RV32ZVE32F-NEXT: sw a2, 0(a0) 1109; RV32ZVE32F-NEXT: sw zero, 4(a0) 1110; RV32ZVE32F-NEXT: sw a1, 8(a0) 1111; RV32ZVE32F-NEXT: sw zero, 12(a0) 1112; RV32ZVE32F-NEXT: ret 1113; 1114; RV64ZVE32F-LABEL: mgather_v2i16_zextload_v2i64: 1115; RV64ZVE32F: # %bb.0: 1116; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1117; RV64ZVE32F-NEXT: vmv.x.s a2, v0 1118; RV64ZVE32F-NEXT: andi a3, a2, 1 1119; RV64ZVE32F-NEXT: beqz a3, .LBB18_2 1120; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 1121; RV64ZVE32F-NEXT: lh a0, 0(a0) 1122; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 1123; RV64ZVE32F-NEXT: vmv.s.x v8, a0 1124; RV64ZVE32F-NEXT: .LBB18_2: # %else 1125; RV64ZVE32F-NEXT: andi a2, a2, 2 1126; RV64ZVE32F-NEXT: beqz a2, .LBB18_4 1127; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 1128; RV64ZVE32F-NEXT: lh a0, 0(a1) 1129; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1130; RV64ZVE32F-NEXT: vmv.s.x v9, a0 1131; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 1132; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 1133; RV64ZVE32F-NEXT: .LBB18_4: # %else2 1134; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 1135; RV64ZVE32F-NEXT: vmv.x.s a0, v8 1136; RV64ZVE32F-NEXT: lui a1, 16 1137; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 1138; RV64ZVE32F-NEXT: addiw a1, a1, -1 1139; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1140; RV64ZVE32F-NEXT: and a0, a0, a1 1141; RV64ZVE32F-NEXT: and a1, a2, a1 1142; RV64ZVE32F-NEXT: ret 1143 %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru) 1144 %ev = zext <2 x i16> %v to <2 x i64> 1145 ret <2 x i64> %ev 1146} 1147 1148declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>) 1149 1150define <4 x i16> @mgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i16> %passthru) { 1151; RV32-LABEL: mgather_v4i16: 1152; RV32: # %bb.0: 1153; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 1154; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1155; RV32-NEXT: vmv1r.v v8, v9 1156; RV32-NEXT: ret 1157; 1158; RV64V-LABEL: mgather_v4i16: 1159; RV64V: # %bb.0: 1160; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 1161; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t 1162; RV64V-NEXT: vmv1r.v v8, v10 1163; RV64V-NEXT: ret 1164; 1165; RV64ZVE32F-LABEL: mgather_v4i16: 1166; RV64ZVE32F: # %bb.0: 1167; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1168; RV64ZVE32F-NEXT: vmv.x.s a1, v0 1169; RV64ZVE32F-NEXT: andi a2, a1, 1 1170; RV64ZVE32F-NEXT: bnez a2, .LBB19_5 1171; RV64ZVE32F-NEXT: # %bb.1: # %else 1172; RV64ZVE32F-NEXT: andi a2, a1, 2 1173; RV64ZVE32F-NEXT: bnez a2, .LBB19_6 1174; RV64ZVE32F-NEXT: .LBB19_2: # %else2 1175; RV64ZVE32F-NEXT: andi a2, a1, 4 1176; RV64ZVE32F-NEXT: bnez a2, .LBB19_7 1177; RV64ZVE32F-NEXT: .LBB19_3: # %else5 1178; RV64ZVE32F-NEXT: andi a1, a1, 8 1179; RV64ZVE32F-NEXT: bnez a1, .LBB19_8 1180; RV64ZVE32F-NEXT: .LBB19_4: # %else8 1181; RV64ZVE32F-NEXT: ret 1182; RV64ZVE32F-NEXT: .LBB19_5: # %cond.load 1183; RV64ZVE32F-NEXT: ld a2, 0(a0) 1184; RV64ZVE32F-NEXT: lh a2, 0(a2) 1185; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 1186; RV64ZVE32F-NEXT: vmv.s.x v8, a2 1187; RV64ZVE32F-NEXT: andi a2, a1, 2 1188; RV64ZVE32F-NEXT: beqz a2, .LBB19_2 1189; RV64ZVE32F-NEXT: .LBB19_6: # %cond.load1 1190; RV64ZVE32F-NEXT: ld a2, 8(a0) 1191; RV64ZVE32F-NEXT: lh a2, 0(a2) 1192; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1193; RV64ZVE32F-NEXT: vmv.s.x v9, a2 1194; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma 1195; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 1196; RV64ZVE32F-NEXT: andi a2, a1, 4 1197; RV64ZVE32F-NEXT: beqz a2, .LBB19_3 1198; RV64ZVE32F-NEXT: .LBB19_7: # %cond.load4 1199; RV64ZVE32F-NEXT: ld a2, 16(a0) 1200; RV64ZVE32F-NEXT: lh a2, 0(a2) 1201; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma 1202; RV64ZVE32F-NEXT: vmv.s.x v9, a2 1203; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 1204; RV64ZVE32F-NEXT: andi a1, a1, 8 1205; RV64ZVE32F-NEXT: beqz a1, .LBB19_4 1206; RV64ZVE32F-NEXT: .LBB19_8: # %cond.load7 1207; RV64ZVE32F-NEXT: ld a0, 24(a0) 1208; RV64ZVE32F-NEXT: lh a0, 0(a0) 1209; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 1210; RV64ZVE32F-NEXT: vmv.s.x v9, a0 1211; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 1212; RV64ZVE32F-NEXT: ret 1213 %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x i16> %passthru) 1214 ret <4 x i16> %v 1215} 1216 1217define <4 x i16> @mgather_truemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) { 1218; RV32-LABEL: mgather_truemask_v4i16: 1219; RV32: # %bb.0: 1220; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 1221; RV32-NEXT: vluxei32.v v9, (zero), v8 1222; RV32-NEXT: vmv1r.v v8, v9 1223; RV32-NEXT: ret 1224; 1225; RV64V-LABEL: mgather_truemask_v4i16: 1226; RV64V: # %bb.0: 1227; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 1228; RV64V-NEXT: vluxei64.v v10, (zero), v8 1229; RV64V-NEXT: vmv1r.v v8, v10 1230; RV64V-NEXT: ret 1231; 1232; RV64ZVE32F-LABEL: mgather_truemask_v4i16: 1233; RV64ZVE32F: # %bb.0: 1234; RV64ZVE32F-NEXT: ld a1, 0(a0) 1235; RV64ZVE32F-NEXT: ld a2, 8(a0) 1236; RV64ZVE32F-NEXT: ld a3, 16(a0) 1237; RV64ZVE32F-NEXT: ld a0, 24(a0) 1238; RV64ZVE32F-NEXT: lh a1, 0(a1) 1239; RV64ZVE32F-NEXT: lh a2, 0(a2) 1240; RV64ZVE32F-NEXT: lh a3, 0(a3) 1241; RV64ZVE32F-NEXT: lh a0, 0(a0) 1242; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 1243; RV64ZVE32F-NEXT: vmv.v.x v8, a1 1244; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 1245; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 1246; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 1247; RV64ZVE32F-NEXT: ret 1248 %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x i16> %passthru) 1249 ret <4 x i16> %v 1250} 1251 1252define <4 x i16> @mgather_falsemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) { 1253; RV32-LABEL: mgather_falsemask_v4i16: 1254; RV32: # %bb.0: 1255; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1256; RV32-NEXT: vmv1r.v v8, v9 1257; RV32-NEXT: ret 1258; 1259; RV64V-LABEL: mgather_falsemask_v4i16: 1260; RV64V: # %bb.0: 1261; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1262; RV64V-NEXT: vmv1r.v v8, v10 1263; RV64V-NEXT: ret 1264; 1265; RV64ZVE32F-LABEL: mgather_falsemask_v4i16: 1266; RV64ZVE32F: # %bb.0: 1267; RV64ZVE32F-NEXT: ret 1268 %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x i16> %passthru) 1269 ret <4 x i16> %v 1270} 1271 1272declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>) 1273 1274define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthru) { 1275; RV32-LABEL: mgather_v8i16: 1276; RV32: # %bb.0: 1277; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu 1278; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 1279; RV32-NEXT: vmv.v.v v8, v10 1280; RV32-NEXT: ret 1281; 1282; RV64V-LABEL: mgather_v8i16: 1283; RV64V: # %bb.0: 1284; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu 1285; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t 1286; RV64V-NEXT: vmv.v.v v8, v12 1287; RV64V-NEXT: ret 1288; 1289; RV64ZVE32F-LABEL: mgather_v8i16: 1290; RV64ZVE32F: # %bb.0: 1291; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1292; RV64ZVE32F-NEXT: vmv.x.s a1, v0 1293; RV64ZVE32F-NEXT: andi a2, a1, 1 1294; RV64ZVE32F-NEXT: bnez a2, .LBB22_9 1295; RV64ZVE32F-NEXT: # %bb.1: # %else 1296; RV64ZVE32F-NEXT: andi a2, a1, 2 1297; RV64ZVE32F-NEXT: bnez a2, .LBB22_10 1298; RV64ZVE32F-NEXT: .LBB22_2: # %else2 1299; RV64ZVE32F-NEXT: andi a2, a1, 4 1300; RV64ZVE32F-NEXT: bnez a2, .LBB22_11 1301; RV64ZVE32F-NEXT: .LBB22_3: # %else5 1302; RV64ZVE32F-NEXT: andi a2, a1, 8 1303; RV64ZVE32F-NEXT: bnez a2, .LBB22_12 1304; RV64ZVE32F-NEXT: .LBB22_4: # %else8 1305; RV64ZVE32F-NEXT: andi a2, a1, 16 1306; RV64ZVE32F-NEXT: bnez a2, .LBB22_13 1307; RV64ZVE32F-NEXT: .LBB22_5: # %else11 1308; RV64ZVE32F-NEXT: andi a2, a1, 32 1309; RV64ZVE32F-NEXT: bnez a2, .LBB22_14 1310; RV64ZVE32F-NEXT: .LBB22_6: # %else14 1311; RV64ZVE32F-NEXT: andi a2, a1, 64 1312; RV64ZVE32F-NEXT: bnez a2, .LBB22_15 1313; RV64ZVE32F-NEXT: .LBB22_7: # %else17 1314; RV64ZVE32F-NEXT: andi a1, a1, -128 1315; RV64ZVE32F-NEXT: bnez a1, .LBB22_16 1316; RV64ZVE32F-NEXT: .LBB22_8: # %else20 1317; RV64ZVE32F-NEXT: ret 1318; RV64ZVE32F-NEXT: .LBB22_9: # %cond.load 1319; RV64ZVE32F-NEXT: ld a2, 0(a0) 1320; RV64ZVE32F-NEXT: lh a2, 0(a2) 1321; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 1322; RV64ZVE32F-NEXT: vmv.s.x v8, a2 1323; RV64ZVE32F-NEXT: andi a2, a1, 2 1324; RV64ZVE32F-NEXT: beqz a2, .LBB22_2 1325; RV64ZVE32F-NEXT: .LBB22_10: # %cond.load1 1326; RV64ZVE32F-NEXT: ld a2, 8(a0) 1327; RV64ZVE32F-NEXT: lh a2, 0(a2) 1328; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1329; RV64ZVE32F-NEXT: vmv.s.x v9, a2 1330; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma 1331; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 1332; RV64ZVE32F-NEXT: andi a2, a1, 4 1333; RV64ZVE32F-NEXT: beqz a2, .LBB22_3 1334; RV64ZVE32F-NEXT: .LBB22_11: # %cond.load4 1335; RV64ZVE32F-NEXT: ld a2, 16(a0) 1336; RV64ZVE32F-NEXT: lh a2, 0(a2) 1337; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma 1338; RV64ZVE32F-NEXT: vmv.s.x v9, a2 1339; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 1340; RV64ZVE32F-NEXT: andi a2, a1, 8 1341; RV64ZVE32F-NEXT: beqz a2, .LBB22_4 1342; RV64ZVE32F-NEXT: .LBB22_12: # %cond.load7 1343; RV64ZVE32F-NEXT: ld a2, 24(a0) 1344; RV64ZVE32F-NEXT: lh a2, 0(a2) 1345; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma 1346; RV64ZVE32F-NEXT: vmv.s.x v9, a2 1347; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 1348; RV64ZVE32F-NEXT: andi a2, a1, 16 1349; RV64ZVE32F-NEXT: beqz a2, .LBB22_5 1350; RV64ZVE32F-NEXT: .LBB22_13: # %cond.load10 1351; RV64ZVE32F-NEXT: ld a2, 32(a0) 1352; RV64ZVE32F-NEXT: lh a2, 0(a2) 1353; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma 1354; RV64ZVE32F-NEXT: vmv.s.x v9, a2 1355; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 1356; RV64ZVE32F-NEXT: andi a2, a1, 32 1357; RV64ZVE32F-NEXT: beqz a2, .LBB22_6 1358; RV64ZVE32F-NEXT: .LBB22_14: # %cond.load13 1359; RV64ZVE32F-NEXT: ld a2, 40(a0) 1360; RV64ZVE32F-NEXT: lh a2, 0(a2) 1361; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma 1362; RV64ZVE32F-NEXT: vmv.s.x v9, a2 1363; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 1364; RV64ZVE32F-NEXT: andi a2, a1, 64 1365; RV64ZVE32F-NEXT: beqz a2, .LBB22_7 1366; RV64ZVE32F-NEXT: .LBB22_15: # %cond.load16 1367; RV64ZVE32F-NEXT: ld a2, 48(a0) 1368; RV64ZVE32F-NEXT: lh a2, 0(a2) 1369; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma 1370; RV64ZVE32F-NEXT: vmv.s.x v9, a2 1371; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 1372; RV64ZVE32F-NEXT: andi a1, a1, -128 1373; RV64ZVE32F-NEXT: beqz a1, .LBB22_8 1374; RV64ZVE32F-NEXT: .LBB22_16: # %cond.load19 1375; RV64ZVE32F-NEXT: ld a0, 56(a0) 1376; RV64ZVE32F-NEXT: lh a0, 0(a0) 1377; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1378; RV64ZVE32F-NEXT: vmv.s.x v9, a0 1379; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 1380; RV64ZVE32F-NEXT: ret 1381 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru) 1382 ret <8 x i16> %v 1383} 1384 1385define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) { 1386; RV32-LABEL: mgather_baseidx_v8i8_v8i16: 1387; RV32: # %bb.0: 1388; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1389; RV32-NEXT: vsext.vf4 v10, v8 1390; RV32-NEXT: vadd.vv v10, v10, v10 1391; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu 1392; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t 1393; RV32-NEXT: vmv.v.v v8, v9 1394; RV32-NEXT: ret 1395; 1396; RV64V-LABEL: mgather_baseidx_v8i8_v8i16: 1397; RV64V: # %bb.0: 1398; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1399; RV64V-NEXT: vsext.vf8 v12, v8 1400; RV64V-NEXT: vadd.vv v12, v12, v12 1401; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu 1402; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t 1403; RV64V-NEXT: vmv.v.v v8, v9 1404; RV64V-NEXT: ret 1405; 1406; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i16: 1407; RV64ZVE32F: # %bb.0: 1408; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1409; RV64ZVE32F-NEXT: vmv.x.s a1, v0 1410; RV64ZVE32F-NEXT: andi a2, a1, 1 1411; RV64ZVE32F-NEXT: beqz a2, .LBB23_2 1412; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 1413; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1414; RV64ZVE32F-NEXT: slli a2, a2, 1 1415; RV64ZVE32F-NEXT: add a2, a0, a2 1416; RV64ZVE32F-NEXT: lh a2, 0(a2) 1417; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 1418; RV64ZVE32F-NEXT: vmv.s.x v9, a2 1419; RV64ZVE32F-NEXT: .LBB23_2: # %else 1420; RV64ZVE32F-NEXT: andi a2, a1, 2 1421; RV64ZVE32F-NEXT: beqz a2, .LBB23_4 1422; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 1423; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1424; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 1425; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1426; RV64ZVE32F-NEXT: slli a2, a2, 1 1427; RV64ZVE32F-NEXT: add a2, a0, a2 1428; RV64ZVE32F-NEXT: lh a2, 0(a2) 1429; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1430; RV64ZVE32F-NEXT: vmv.s.x v10, a2 1431; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma 1432; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 1433; RV64ZVE32F-NEXT: .LBB23_4: # %else2 1434; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 1435; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 1436; RV64ZVE32F-NEXT: andi a2, a1, 4 1437; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1438; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 1439; RV64ZVE32F-NEXT: bnez a2, .LBB23_14 1440; RV64ZVE32F-NEXT: # %bb.5: # %else5 1441; RV64ZVE32F-NEXT: andi a2, a1, 8 1442; RV64ZVE32F-NEXT: bnez a2, .LBB23_15 1443; RV64ZVE32F-NEXT: .LBB23_6: # %else8 1444; RV64ZVE32F-NEXT: andi a2, a1, 16 1445; RV64ZVE32F-NEXT: bnez a2, .LBB23_16 1446; RV64ZVE32F-NEXT: .LBB23_7: # %else11 1447; RV64ZVE32F-NEXT: andi a2, a1, 32 1448; RV64ZVE32F-NEXT: beqz a2, .LBB23_9 1449; RV64ZVE32F-NEXT: .LBB23_8: # %cond.load13 1450; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1451; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 1452; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1453; RV64ZVE32F-NEXT: slli a2, a2, 1 1454; RV64ZVE32F-NEXT: add a2, a0, a2 1455; RV64ZVE32F-NEXT: lh a2, 0(a2) 1456; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1457; RV64ZVE32F-NEXT: vmv.s.x v8, a2 1458; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma 1459; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 1460; RV64ZVE32F-NEXT: .LBB23_9: # %else14 1461; RV64ZVE32F-NEXT: andi a2, a1, 64 1462; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1463; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 1464; RV64ZVE32F-NEXT: beqz a2, .LBB23_11 1465; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 1466; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1467; RV64ZVE32F-NEXT: slli a2, a2, 1 1468; RV64ZVE32F-NEXT: add a2, a0, a2 1469; RV64ZVE32F-NEXT: lh a2, 0(a2) 1470; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1471; RV64ZVE32F-NEXT: vmv.s.x v10, a2 1472; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma 1473; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 1474; RV64ZVE32F-NEXT: .LBB23_11: # %else17 1475; RV64ZVE32F-NEXT: andi a1, a1, -128 1476; RV64ZVE32F-NEXT: beqz a1, .LBB23_13 1477; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 1478; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1479; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 1480; RV64ZVE32F-NEXT: vmv.x.s a1, v8 1481; RV64ZVE32F-NEXT: slli a1, a1, 1 1482; RV64ZVE32F-NEXT: add a0, a0, a1 1483; RV64ZVE32F-NEXT: lh a0, 0(a0) 1484; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1485; RV64ZVE32F-NEXT: vmv.s.x v8, a0 1486; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1487; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 1488; RV64ZVE32F-NEXT: .LBB23_13: # %else20 1489; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1490; RV64ZVE32F-NEXT: vmv1r.v v8, v9 1491; RV64ZVE32F-NEXT: ret 1492; RV64ZVE32F-NEXT: .LBB23_14: # %cond.load4 1493; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1494; RV64ZVE32F-NEXT: slli a2, a2, 1 1495; RV64ZVE32F-NEXT: add a2, a0, a2 1496; RV64ZVE32F-NEXT: lh a2, 0(a2) 1497; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1498; RV64ZVE32F-NEXT: vmv.s.x v11, a2 1499; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma 1500; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 1501; RV64ZVE32F-NEXT: andi a2, a1, 8 1502; RV64ZVE32F-NEXT: beqz a2, .LBB23_6 1503; RV64ZVE32F-NEXT: .LBB23_15: # %cond.load7 1504; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1505; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 1506; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1507; RV64ZVE32F-NEXT: slli a2, a2, 1 1508; RV64ZVE32F-NEXT: add a2, a0, a2 1509; RV64ZVE32F-NEXT: lh a2, 0(a2) 1510; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1511; RV64ZVE32F-NEXT: vmv.s.x v8, a2 1512; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma 1513; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 1514; RV64ZVE32F-NEXT: andi a2, a1, 16 1515; RV64ZVE32F-NEXT: beqz a2, .LBB23_7 1516; RV64ZVE32F-NEXT: .LBB23_16: # %cond.load10 1517; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1518; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1519; RV64ZVE32F-NEXT: slli a2, a2, 1 1520; RV64ZVE32F-NEXT: add a2, a0, a2 1521; RV64ZVE32F-NEXT: lh a2, 0(a2) 1522; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1523; RV64ZVE32F-NEXT: vmv.s.x v8, a2 1524; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma 1525; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 1526; RV64ZVE32F-NEXT: andi a2, a1, 32 1527; RV64ZVE32F-NEXT: bnez a2, .LBB23_8 1528; RV64ZVE32F-NEXT: j .LBB23_9 1529 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs 1530 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru) 1531 ret <8 x i16> %v 1532} 1533 1534define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) { 1535; RV32-LABEL: mgather_baseidx_sext_v8i8_v8i16: 1536; RV32: # %bb.0: 1537; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1538; RV32-NEXT: vsext.vf4 v10, v8 1539; RV32-NEXT: vadd.vv v10, v10, v10 1540; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu 1541; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t 1542; RV32-NEXT: vmv.v.v v8, v9 1543; RV32-NEXT: ret 1544; 1545; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i16: 1546; RV64V: # %bb.0: 1547; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1548; RV64V-NEXT: vsext.vf8 v12, v8 1549; RV64V-NEXT: vadd.vv v12, v12, v12 1550; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu 1551; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t 1552; RV64V-NEXT: vmv.v.v v8, v9 1553; RV64V-NEXT: ret 1554; 1555; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i16: 1556; RV64ZVE32F: # %bb.0: 1557; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1558; RV64ZVE32F-NEXT: vmv.x.s a1, v0 1559; RV64ZVE32F-NEXT: andi a2, a1, 1 1560; RV64ZVE32F-NEXT: beqz a2, .LBB24_2 1561; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 1562; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1563; RV64ZVE32F-NEXT: slli a2, a2, 1 1564; RV64ZVE32F-NEXT: add a2, a0, a2 1565; RV64ZVE32F-NEXT: lh a2, 0(a2) 1566; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 1567; RV64ZVE32F-NEXT: vmv.s.x v9, a2 1568; RV64ZVE32F-NEXT: .LBB24_2: # %else 1569; RV64ZVE32F-NEXT: andi a2, a1, 2 1570; RV64ZVE32F-NEXT: beqz a2, .LBB24_4 1571; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 1572; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1573; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 1574; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1575; RV64ZVE32F-NEXT: slli a2, a2, 1 1576; RV64ZVE32F-NEXT: add a2, a0, a2 1577; RV64ZVE32F-NEXT: lh a2, 0(a2) 1578; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1579; RV64ZVE32F-NEXT: vmv.s.x v10, a2 1580; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma 1581; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 1582; RV64ZVE32F-NEXT: .LBB24_4: # %else2 1583; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 1584; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 1585; RV64ZVE32F-NEXT: andi a2, a1, 4 1586; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1587; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 1588; RV64ZVE32F-NEXT: bnez a2, .LBB24_14 1589; RV64ZVE32F-NEXT: # %bb.5: # %else5 1590; RV64ZVE32F-NEXT: andi a2, a1, 8 1591; RV64ZVE32F-NEXT: bnez a2, .LBB24_15 1592; RV64ZVE32F-NEXT: .LBB24_6: # %else8 1593; RV64ZVE32F-NEXT: andi a2, a1, 16 1594; RV64ZVE32F-NEXT: bnez a2, .LBB24_16 1595; RV64ZVE32F-NEXT: .LBB24_7: # %else11 1596; RV64ZVE32F-NEXT: andi a2, a1, 32 1597; RV64ZVE32F-NEXT: beqz a2, .LBB24_9 1598; RV64ZVE32F-NEXT: .LBB24_8: # %cond.load13 1599; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1600; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 1601; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1602; RV64ZVE32F-NEXT: slli a2, a2, 1 1603; RV64ZVE32F-NEXT: add a2, a0, a2 1604; RV64ZVE32F-NEXT: lh a2, 0(a2) 1605; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1606; RV64ZVE32F-NEXT: vmv.s.x v8, a2 1607; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma 1608; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 1609; RV64ZVE32F-NEXT: .LBB24_9: # %else14 1610; RV64ZVE32F-NEXT: andi a2, a1, 64 1611; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1612; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 1613; RV64ZVE32F-NEXT: beqz a2, .LBB24_11 1614; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 1615; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1616; RV64ZVE32F-NEXT: slli a2, a2, 1 1617; RV64ZVE32F-NEXT: add a2, a0, a2 1618; RV64ZVE32F-NEXT: lh a2, 0(a2) 1619; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1620; RV64ZVE32F-NEXT: vmv.s.x v10, a2 1621; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma 1622; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 1623; RV64ZVE32F-NEXT: .LBB24_11: # %else17 1624; RV64ZVE32F-NEXT: andi a1, a1, -128 1625; RV64ZVE32F-NEXT: beqz a1, .LBB24_13 1626; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 1627; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1628; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 1629; RV64ZVE32F-NEXT: vmv.x.s a1, v8 1630; RV64ZVE32F-NEXT: slli a1, a1, 1 1631; RV64ZVE32F-NEXT: add a0, a0, a1 1632; RV64ZVE32F-NEXT: lh a0, 0(a0) 1633; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1634; RV64ZVE32F-NEXT: vmv.s.x v8, a0 1635; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1636; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 1637; RV64ZVE32F-NEXT: .LBB24_13: # %else20 1638; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1639; RV64ZVE32F-NEXT: vmv1r.v v8, v9 1640; RV64ZVE32F-NEXT: ret 1641; RV64ZVE32F-NEXT: .LBB24_14: # %cond.load4 1642; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1643; RV64ZVE32F-NEXT: slli a2, a2, 1 1644; RV64ZVE32F-NEXT: add a2, a0, a2 1645; RV64ZVE32F-NEXT: lh a2, 0(a2) 1646; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1647; RV64ZVE32F-NEXT: vmv.s.x v11, a2 1648; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma 1649; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 1650; RV64ZVE32F-NEXT: andi a2, a1, 8 1651; RV64ZVE32F-NEXT: beqz a2, .LBB24_6 1652; RV64ZVE32F-NEXT: .LBB24_15: # %cond.load7 1653; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1654; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 1655; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1656; RV64ZVE32F-NEXT: slli a2, a2, 1 1657; RV64ZVE32F-NEXT: add a2, a0, a2 1658; RV64ZVE32F-NEXT: lh a2, 0(a2) 1659; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1660; RV64ZVE32F-NEXT: vmv.s.x v8, a2 1661; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma 1662; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 1663; RV64ZVE32F-NEXT: andi a2, a1, 16 1664; RV64ZVE32F-NEXT: beqz a2, .LBB24_7 1665; RV64ZVE32F-NEXT: .LBB24_16: # %cond.load10 1666; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1667; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1668; RV64ZVE32F-NEXT: slli a2, a2, 1 1669; RV64ZVE32F-NEXT: add a2, a0, a2 1670; RV64ZVE32F-NEXT: lh a2, 0(a2) 1671; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1672; RV64ZVE32F-NEXT: vmv.s.x v8, a2 1673; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma 1674; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 1675; RV64ZVE32F-NEXT: andi a2, a1, 32 1676; RV64ZVE32F-NEXT: bnez a2, .LBB24_8 1677; RV64ZVE32F-NEXT: j .LBB24_9 1678 %eidxs = sext <8 x i8> %idxs to <8 x i16> 1679 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs 1680 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru) 1681 ret <8 x i16> %v 1682} 1683 1684define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) { 1685; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i16: 1686; RV32: # %bb.0: 1687; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 1688; RV32-NEXT: vwaddu.vv v10, v8, v8 1689; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu 1690; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t 1691; RV32-NEXT: vmv.v.v v8, v9 1692; RV32-NEXT: ret 1693; 1694; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i16: 1695; RV64V: # %bb.0: 1696; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 1697; RV64V-NEXT: vwaddu.vv v10, v8, v8 1698; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu 1699; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t 1700; RV64V-NEXT: vmv.v.v v8, v9 1701; RV64V-NEXT: ret 1702; 1703; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i16: 1704; RV64ZVE32F: # %bb.0: 1705; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1706; RV64ZVE32F-NEXT: vmv.x.s a1, v0 1707; RV64ZVE32F-NEXT: andi a2, a1, 1 1708; RV64ZVE32F-NEXT: beqz a2, .LBB25_2 1709; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 1710; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1711; RV64ZVE32F-NEXT: andi a2, a2, 255 1712; RV64ZVE32F-NEXT: slli a2, a2, 1 1713; RV64ZVE32F-NEXT: add a2, a0, a2 1714; RV64ZVE32F-NEXT: lh a2, 0(a2) 1715; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 1716; RV64ZVE32F-NEXT: vmv.s.x v9, a2 1717; RV64ZVE32F-NEXT: .LBB25_2: # %else 1718; RV64ZVE32F-NEXT: andi a2, a1, 2 1719; RV64ZVE32F-NEXT: beqz a2, .LBB25_4 1720; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 1721; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1722; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 1723; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1724; RV64ZVE32F-NEXT: andi a2, a2, 255 1725; RV64ZVE32F-NEXT: slli a2, a2, 1 1726; RV64ZVE32F-NEXT: add a2, a0, a2 1727; RV64ZVE32F-NEXT: lh a2, 0(a2) 1728; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1729; RV64ZVE32F-NEXT: vmv.s.x v10, a2 1730; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma 1731; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 1732; RV64ZVE32F-NEXT: .LBB25_4: # %else2 1733; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 1734; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 1735; RV64ZVE32F-NEXT: andi a2, a1, 4 1736; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1737; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 1738; RV64ZVE32F-NEXT: bnez a2, .LBB25_14 1739; RV64ZVE32F-NEXT: # %bb.5: # %else5 1740; RV64ZVE32F-NEXT: andi a2, a1, 8 1741; RV64ZVE32F-NEXT: bnez a2, .LBB25_15 1742; RV64ZVE32F-NEXT: .LBB25_6: # %else8 1743; RV64ZVE32F-NEXT: andi a2, a1, 16 1744; RV64ZVE32F-NEXT: bnez a2, .LBB25_16 1745; RV64ZVE32F-NEXT: .LBB25_7: # %else11 1746; RV64ZVE32F-NEXT: andi a2, a1, 32 1747; RV64ZVE32F-NEXT: beqz a2, .LBB25_9 1748; RV64ZVE32F-NEXT: .LBB25_8: # %cond.load13 1749; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1750; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 1751; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1752; RV64ZVE32F-NEXT: andi a2, a2, 255 1753; RV64ZVE32F-NEXT: slli a2, a2, 1 1754; RV64ZVE32F-NEXT: add a2, a0, a2 1755; RV64ZVE32F-NEXT: lh a2, 0(a2) 1756; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1757; RV64ZVE32F-NEXT: vmv.s.x v8, a2 1758; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma 1759; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 1760; RV64ZVE32F-NEXT: .LBB25_9: # %else14 1761; RV64ZVE32F-NEXT: andi a2, a1, 64 1762; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1763; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 1764; RV64ZVE32F-NEXT: beqz a2, .LBB25_11 1765; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 1766; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1767; RV64ZVE32F-NEXT: andi a2, a2, 255 1768; RV64ZVE32F-NEXT: slli a2, a2, 1 1769; RV64ZVE32F-NEXT: add a2, a0, a2 1770; RV64ZVE32F-NEXT: lh a2, 0(a2) 1771; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1772; RV64ZVE32F-NEXT: vmv.s.x v10, a2 1773; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma 1774; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 1775; RV64ZVE32F-NEXT: .LBB25_11: # %else17 1776; RV64ZVE32F-NEXT: andi a1, a1, -128 1777; RV64ZVE32F-NEXT: beqz a1, .LBB25_13 1778; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 1779; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1780; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 1781; RV64ZVE32F-NEXT: vmv.x.s a1, v8 1782; RV64ZVE32F-NEXT: andi a1, a1, 255 1783; RV64ZVE32F-NEXT: slli a1, a1, 1 1784; RV64ZVE32F-NEXT: add a0, a0, a1 1785; RV64ZVE32F-NEXT: lh a0, 0(a0) 1786; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1787; RV64ZVE32F-NEXT: vmv.s.x v8, a0 1788; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1789; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 1790; RV64ZVE32F-NEXT: .LBB25_13: # %else20 1791; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1792; RV64ZVE32F-NEXT: vmv1r.v v8, v9 1793; RV64ZVE32F-NEXT: ret 1794; RV64ZVE32F-NEXT: .LBB25_14: # %cond.load4 1795; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1796; RV64ZVE32F-NEXT: andi a2, a2, 255 1797; RV64ZVE32F-NEXT: slli a2, a2, 1 1798; RV64ZVE32F-NEXT: add a2, a0, a2 1799; RV64ZVE32F-NEXT: lh a2, 0(a2) 1800; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1801; RV64ZVE32F-NEXT: vmv.s.x v11, a2 1802; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma 1803; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 1804; RV64ZVE32F-NEXT: andi a2, a1, 8 1805; RV64ZVE32F-NEXT: beqz a2, .LBB25_6 1806; RV64ZVE32F-NEXT: .LBB25_15: # %cond.load7 1807; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 1808; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 1809; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1810; RV64ZVE32F-NEXT: andi a2, a2, 255 1811; RV64ZVE32F-NEXT: slli a2, a2, 1 1812; RV64ZVE32F-NEXT: add a2, a0, a2 1813; RV64ZVE32F-NEXT: lh a2, 0(a2) 1814; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 1815; RV64ZVE32F-NEXT: vmv.s.x v8, a2 1816; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma 1817; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 1818; RV64ZVE32F-NEXT: andi a2, a1, 16 1819; RV64ZVE32F-NEXT: beqz a2, .LBB25_7 1820; RV64ZVE32F-NEXT: .LBB25_16: # %cond.load10 1821; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1822; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1823; RV64ZVE32F-NEXT: andi a2, a2, 255 1824; RV64ZVE32F-NEXT: slli a2, a2, 1 1825; RV64ZVE32F-NEXT: add a2, a0, a2 1826; RV64ZVE32F-NEXT: lh a2, 0(a2) 1827; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1828; RV64ZVE32F-NEXT: vmv.s.x v8, a2 1829; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma 1830; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 1831; RV64ZVE32F-NEXT: andi a2, a1, 32 1832; RV64ZVE32F-NEXT: bnez a2, .LBB25_8 1833; RV64ZVE32F-NEXT: j .LBB25_9 1834 %eidxs = zext <8 x i8> %idxs to <8 x i16> 1835 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs 1836 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru) 1837 ret <8 x i16> %v 1838} 1839 1840define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i16> %passthru) { 1841; RV32-LABEL: mgather_baseidx_v8i16: 1842; RV32: # %bb.0: 1843; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu 1844; RV32-NEXT: vwadd.vv v10, v8, v8 1845; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t 1846; RV32-NEXT: vmv.v.v v8, v9 1847; RV32-NEXT: ret 1848; 1849; RV64V-LABEL: mgather_baseidx_v8i16: 1850; RV64V: # %bb.0: 1851; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1852; RV64V-NEXT: vsext.vf4 v12, v8 1853; RV64V-NEXT: vadd.vv v12, v12, v12 1854; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu 1855; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t 1856; RV64V-NEXT: vmv.v.v v8, v9 1857; RV64V-NEXT: ret 1858; 1859; RV64ZVE32F-LABEL: mgather_baseidx_v8i16: 1860; RV64ZVE32F: # %bb.0: 1861; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1862; RV64ZVE32F-NEXT: vmv.x.s a1, v0 1863; RV64ZVE32F-NEXT: andi a2, a1, 1 1864; RV64ZVE32F-NEXT: beqz a2, .LBB26_2 1865; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 1866; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 1867; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1868; RV64ZVE32F-NEXT: slli a2, a2, 1 1869; RV64ZVE32F-NEXT: add a2, a0, a2 1870; RV64ZVE32F-NEXT: lh a2, 0(a2) 1871; RV64ZVE32F-NEXT: vmv.s.x v9, a2 1872; RV64ZVE32F-NEXT: .LBB26_2: # %else 1873; RV64ZVE32F-NEXT: andi a2, a1, 2 1874; RV64ZVE32F-NEXT: beqz a2, .LBB26_4 1875; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 1876; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 1877; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 1878; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1879; RV64ZVE32F-NEXT: slli a2, a2, 1 1880; RV64ZVE32F-NEXT: add a2, a0, a2 1881; RV64ZVE32F-NEXT: lh a2, 0(a2) 1882; RV64ZVE32F-NEXT: vmv.s.x v10, a2 1883; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma 1884; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 1885; RV64ZVE32F-NEXT: .LBB26_4: # %else2 1886; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 1887; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 1888; RV64ZVE32F-NEXT: andi a2, a1, 4 1889; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 1890; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 1891; RV64ZVE32F-NEXT: bnez a2, .LBB26_14 1892; RV64ZVE32F-NEXT: # %bb.5: # %else5 1893; RV64ZVE32F-NEXT: andi a2, a1, 8 1894; RV64ZVE32F-NEXT: bnez a2, .LBB26_15 1895; RV64ZVE32F-NEXT: .LBB26_6: # %else8 1896; RV64ZVE32F-NEXT: andi a2, a1, 16 1897; RV64ZVE32F-NEXT: bnez a2, .LBB26_16 1898; RV64ZVE32F-NEXT: .LBB26_7: # %else11 1899; RV64ZVE32F-NEXT: andi a2, a1, 32 1900; RV64ZVE32F-NEXT: beqz a2, .LBB26_9 1901; RV64ZVE32F-NEXT: .LBB26_8: # %cond.load13 1902; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 1903; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 1904; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1905; RV64ZVE32F-NEXT: slli a2, a2, 1 1906; RV64ZVE32F-NEXT: add a2, a0, a2 1907; RV64ZVE32F-NEXT: lh a2, 0(a2) 1908; RV64ZVE32F-NEXT: vmv.s.x v8, a2 1909; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma 1910; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 1911; RV64ZVE32F-NEXT: .LBB26_9: # %else14 1912; RV64ZVE32F-NEXT: andi a2, a1, 64 1913; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 1914; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 1915; RV64ZVE32F-NEXT: beqz a2, .LBB26_11 1916; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 1917; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1918; RV64ZVE32F-NEXT: slli a2, a2, 1 1919; RV64ZVE32F-NEXT: add a2, a0, a2 1920; RV64ZVE32F-NEXT: lh a2, 0(a2) 1921; RV64ZVE32F-NEXT: vmv.s.x v10, a2 1922; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma 1923; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 1924; RV64ZVE32F-NEXT: .LBB26_11: # %else17 1925; RV64ZVE32F-NEXT: andi a1, a1, -128 1926; RV64ZVE32F-NEXT: beqz a1, .LBB26_13 1927; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 1928; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 1929; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 1930; RV64ZVE32F-NEXT: vmv.x.s a1, v8 1931; RV64ZVE32F-NEXT: slli a1, a1, 1 1932; RV64ZVE32F-NEXT: add a0, a0, a1 1933; RV64ZVE32F-NEXT: lh a0, 0(a0) 1934; RV64ZVE32F-NEXT: vmv.s.x v8, a0 1935; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1936; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 1937; RV64ZVE32F-NEXT: .LBB26_13: # %else20 1938; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1939; RV64ZVE32F-NEXT: vmv1r.v v8, v9 1940; RV64ZVE32F-NEXT: ret 1941; RV64ZVE32F-NEXT: .LBB26_14: # %cond.load4 1942; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1943; RV64ZVE32F-NEXT: slli a2, a2, 1 1944; RV64ZVE32F-NEXT: add a2, a0, a2 1945; RV64ZVE32F-NEXT: lh a2, 0(a2) 1946; RV64ZVE32F-NEXT: vmv.s.x v11, a2 1947; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma 1948; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 1949; RV64ZVE32F-NEXT: andi a2, a1, 8 1950; RV64ZVE32F-NEXT: beqz a2, .LBB26_6 1951; RV64ZVE32F-NEXT: .LBB26_15: # %cond.load7 1952; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 1953; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 1954; RV64ZVE32F-NEXT: vmv.x.s a2, v8 1955; RV64ZVE32F-NEXT: slli a2, a2, 1 1956; RV64ZVE32F-NEXT: add a2, a0, a2 1957; RV64ZVE32F-NEXT: lh a2, 0(a2) 1958; RV64ZVE32F-NEXT: vmv.s.x v8, a2 1959; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma 1960; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 1961; RV64ZVE32F-NEXT: andi a2, a1, 16 1962; RV64ZVE32F-NEXT: beqz a2, .LBB26_7 1963; RV64ZVE32F-NEXT: .LBB26_16: # %cond.load10 1964; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma 1965; RV64ZVE32F-NEXT: vmv.x.s a2, v10 1966; RV64ZVE32F-NEXT: slli a2, a2, 1 1967; RV64ZVE32F-NEXT: add a2, a0, a2 1968; RV64ZVE32F-NEXT: lh a2, 0(a2) 1969; RV64ZVE32F-NEXT: vmv.s.x v8, a2 1970; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 1971; RV64ZVE32F-NEXT: andi a2, a1, 32 1972; RV64ZVE32F-NEXT: bnez a2, .LBB26_8 1973; RV64ZVE32F-NEXT: j .LBB26_9 1974 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs 1975 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru) 1976 ret <8 x i16> %v 1977} 1978 1979declare <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i32>) 1980 1981define <1 x i32> @mgather_v1i32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i32> %passthru) { 1982; RV32V-LABEL: mgather_v1i32: 1983; RV32V: # %bb.0: 1984; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu 1985; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 1986; RV32V-NEXT: vmv1r.v v8, v9 1987; RV32V-NEXT: ret 1988; 1989; RV64V-LABEL: mgather_v1i32: 1990; RV64V: # %bb.0: 1991; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu 1992; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 1993; RV64V-NEXT: vmv1r.v v8, v9 1994; RV64V-NEXT: ret 1995; 1996; RV32ZVE32F-LABEL: mgather_v1i32: 1997; RV32ZVE32F: # %bb.0: 1998; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu 1999; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 2000; RV32ZVE32F-NEXT: vmv.v.v v8, v9 2001; RV32ZVE32F-NEXT: ret 2002; 2003; RV64ZVE32F-LABEL: mgather_v1i32: 2004; RV64ZVE32F: # %bb.0: 2005; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 2006; RV64ZVE32F-NEXT: vfirst.m a1, v0 2007; RV64ZVE32F-NEXT: bnez a1, .LBB27_2 2008; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 2009; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2010; RV64ZVE32F-NEXT: vle32.v v8, (a0) 2011; RV64ZVE32F-NEXT: .LBB27_2: # %else 2012; RV64ZVE32F-NEXT: ret 2013 %v = call <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x i32> %passthru) 2014 ret <1 x i32> %v 2015} 2016 2017declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>) 2018 2019define <2 x i32> @mgather_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) { 2020; RV32V-LABEL: mgather_v2i32: 2021; RV32V: # %bb.0: 2022; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 2023; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 2024; RV32V-NEXT: vmv1r.v v8, v9 2025; RV32V-NEXT: ret 2026; 2027; RV64V-LABEL: mgather_v2i32: 2028; RV64V: # %bb.0: 2029; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 2030; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 2031; RV64V-NEXT: vmv1r.v v8, v9 2032; RV64V-NEXT: ret 2033; 2034; RV32ZVE32F-LABEL: mgather_v2i32: 2035; RV32ZVE32F: # %bb.0: 2036; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu 2037; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 2038; RV32ZVE32F-NEXT: vmv.v.v v8, v9 2039; RV32ZVE32F-NEXT: ret 2040; 2041; RV64ZVE32F-LABEL: mgather_v2i32: 2042; RV64ZVE32F: # %bb.0: 2043; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2044; RV64ZVE32F-NEXT: vmv.x.s a2, v0 2045; RV64ZVE32F-NEXT: andi a3, a2, 1 2046; RV64ZVE32F-NEXT: bnez a3, .LBB28_3 2047; RV64ZVE32F-NEXT: # %bb.1: # %else 2048; RV64ZVE32F-NEXT: andi a2, a2, 2 2049; RV64ZVE32F-NEXT: bnez a2, .LBB28_4 2050; RV64ZVE32F-NEXT: .LBB28_2: # %else2 2051; RV64ZVE32F-NEXT: ret 2052; RV64ZVE32F-NEXT: .LBB28_3: # %cond.load 2053; RV64ZVE32F-NEXT: lw a0, 0(a0) 2054; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 2055; RV64ZVE32F-NEXT: vmv.s.x v8, a0 2056; RV64ZVE32F-NEXT: andi a2, a2, 2 2057; RV64ZVE32F-NEXT: beqz a2, .LBB28_2 2058; RV64ZVE32F-NEXT: .LBB28_4: # %cond.load1 2059; RV64ZVE32F-NEXT: lw a0, 0(a1) 2060; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 2061; RV64ZVE32F-NEXT: vmv.s.x v9, a0 2062; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 2063; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 2064; RV64ZVE32F-NEXT: ret 2065 %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru) 2066 ret <2 x i32> %v 2067} 2068 2069define <2 x i64> @mgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) { 2070; RV32V-LABEL: mgather_v2i32_sextload_v2i64: 2071; RV32V: # %bb.0: 2072; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 2073; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 2074; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma 2075; RV32V-NEXT: vsext.vf2 v8, v9 2076; RV32V-NEXT: ret 2077; 2078; RV64V-LABEL: mgather_v2i32_sextload_v2i64: 2079; RV64V: # %bb.0: 2080; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 2081; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 2082; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma 2083; RV64V-NEXT: vsext.vf2 v8, v9 2084; RV64V-NEXT: ret 2085; 2086; RV32ZVE32F-LABEL: mgather_v2i32_sextload_v2i64: 2087; RV32ZVE32F: # %bb.0: 2088; RV32ZVE32F-NEXT: addi a1, a0, 8 2089; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu 2090; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 2091; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1 2092; RV32ZVE32F-NEXT: vmv.x.s a2, v9 2093; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2094; RV32ZVE32F-NEXT: vse32.v v9, (a0) 2095; RV32ZVE32F-NEXT: vmv.x.s a3, v8 2096; RV32ZVE32F-NEXT: srai a2, a2, 31 2097; RV32ZVE32F-NEXT: vse32.v v8, (a1) 2098; RV32ZVE32F-NEXT: srai a3, a3, 31 2099; RV32ZVE32F-NEXT: sw a2, 4(a0) 2100; RV32ZVE32F-NEXT: sw a3, 12(a0) 2101; RV32ZVE32F-NEXT: ret 2102; 2103; RV64ZVE32F-LABEL: mgather_v2i32_sextload_v2i64: 2104; RV64ZVE32F: # %bb.0: 2105; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2106; RV64ZVE32F-NEXT: vmv.x.s a2, v0 2107; RV64ZVE32F-NEXT: andi a3, a2, 1 2108; RV64ZVE32F-NEXT: beqz a3, .LBB29_2 2109; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 2110; RV64ZVE32F-NEXT: lw a0, 0(a0) 2111; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 2112; RV64ZVE32F-NEXT: vmv.s.x v8, a0 2113; RV64ZVE32F-NEXT: .LBB29_2: # %else 2114; RV64ZVE32F-NEXT: andi a2, a2, 2 2115; RV64ZVE32F-NEXT: beqz a2, .LBB29_4 2116; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 2117; RV64ZVE32F-NEXT: lw a0, 0(a1) 2118; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 2119; RV64ZVE32F-NEXT: vmv.s.x v9, a0 2120; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 2121; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 2122; RV64ZVE32F-NEXT: .LBB29_4: # %else2 2123; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2124; RV64ZVE32F-NEXT: vmv.x.s a0, v8 2125; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 2126; RV64ZVE32F-NEXT: vmv.x.s a1, v8 2127; RV64ZVE32F-NEXT: ret 2128 %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru) 2129 %ev = sext <2 x i32> %v to <2 x i64> 2130 ret <2 x i64> %ev 2131} 2132 2133define <2 x i64> @mgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) { 2134; RV32V-LABEL: mgather_v2i32_zextload_v2i64: 2135; RV32V: # %bb.0: 2136; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 2137; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 2138; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma 2139; RV32V-NEXT: vzext.vf2 v8, v9 2140; RV32V-NEXT: ret 2141; 2142; RV64V-LABEL: mgather_v2i32_zextload_v2i64: 2143; RV64V: # %bb.0: 2144; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 2145; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 2146; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma 2147; RV64V-NEXT: vzext.vf2 v8, v9 2148; RV64V-NEXT: ret 2149; 2150; RV32ZVE32F-LABEL: mgather_v2i32_zextload_v2i64: 2151; RV32ZVE32F: # %bb.0: 2152; RV32ZVE32F-NEXT: addi a1, a0, 8 2153; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu 2154; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 2155; RV32ZVE32F-NEXT: sw zero, 4(a0) 2156; RV32ZVE32F-NEXT: sw zero, 12(a0) 2157; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2158; RV32ZVE32F-NEXT: vse32.v v9, (a0) 2159; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1 2160; RV32ZVE32F-NEXT: vse32.v v8, (a1) 2161; RV32ZVE32F-NEXT: ret 2162; 2163; RV64ZVE32F-LABEL: mgather_v2i32_zextload_v2i64: 2164; RV64ZVE32F: # %bb.0: 2165; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2166; RV64ZVE32F-NEXT: vmv.x.s a2, v0 2167; RV64ZVE32F-NEXT: andi a3, a2, 1 2168; RV64ZVE32F-NEXT: beqz a3, .LBB30_2 2169; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 2170; RV64ZVE32F-NEXT: lw a0, 0(a0) 2171; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 2172; RV64ZVE32F-NEXT: vmv.s.x v8, a0 2173; RV64ZVE32F-NEXT: .LBB30_2: # %else 2174; RV64ZVE32F-NEXT: andi a2, a2, 2 2175; RV64ZVE32F-NEXT: beqz a2, .LBB30_4 2176; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 2177; RV64ZVE32F-NEXT: lw a0, 0(a1) 2178; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 2179; RV64ZVE32F-NEXT: vmv.s.x v9, a0 2180; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 2181; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 2182; RV64ZVE32F-NEXT: .LBB30_4: # %else2 2183; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 2184; RV64ZVE32F-NEXT: vmv.x.s a0, v8 2185; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 2186; RV64ZVE32F-NEXT: slli a0, a0, 32 2187; RV64ZVE32F-NEXT: vmv.x.s a1, v8 2188; RV64ZVE32F-NEXT: srli a0, a0, 32 2189; RV64ZVE32F-NEXT: slli a1, a1, 32 2190; RV64ZVE32F-NEXT: srli a1, a1, 32 2191; RV64ZVE32F-NEXT: ret 2192 %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru) 2193 %ev = zext <2 x i32> %v to <2 x i64> 2194 ret <2 x i64> %ev 2195} 2196 2197declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) 2198 2199define <4 x i32> @mgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i32> %passthru) { 2200; RV32-LABEL: mgather_v4i32: 2201; RV32: # %bb.0: 2202; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu 2203; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 2204; RV32-NEXT: vmv.v.v v8, v9 2205; RV32-NEXT: ret 2206; 2207; RV64V-LABEL: mgather_v4i32: 2208; RV64V: # %bb.0: 2209; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, mu 2210; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t 2211; RV64V-NEXT: vmv.v.v v8, v10 2212; RV64V-NEXT: ret 2213; 2214; RV64ZVE32F-LABEL: mgather_v4i32: 2215; RV64ZVE32F: # %bb.0: 2216; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2217; RV64ZVE32F-NEXT: vmv.x.s a1, v0 2218; RV64ZVE32F-NEXT: andi a2, a1, 1 2219; RV64ZVE32F-NEXT: bnez a2, .LBB31_5 2220; RV64ZVE32F-NEXT: # %bb.1: # %else 2221; RV64ZVE32F-NEXT: andi a2, a1, 2 2222; RV64ZVE32F-NEXT: bnez a2, .LBB31_6 2223; RV64ZVE32F-NEXT: .LBB31_2: # %else2 2224; RV64ZVE32F-NEXT: andi a2, a1, 4 2225; RV64ZVE32F-NEXT: bnez a2, .LBB31_7 2226; RV64ZVE32F-NEXT: .LBB31_3: # %else5 2227; RV64ZVE32F-NEXT: andi a1, a1, 8 2228; RV64ZVE32F-NEXT: bnez a1, .LBB31_8 2229; RV64ZVE32F-NEXT: .LBB31_4: # %else8 2230; RV64ZVE32F-NEXT: ret 2231; RV64ZVE32F-NEXT: .LBB31_5: # %cond.load 2232; RV64ZVE32F-NEXT: ld a2, 0(a0) 2233; RV64ZVE32F-NEXT: lw a2, 0(a2) 2234; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 2235; RV64ZVE32F-NEXT: vmv.s.x v8, a2 2236; RV64ZVE32F-NEXT: andi a2, a1, 2 2237; RV64ZVE32F-NEXT: beqz a2, .LBB31_2 2238; RV64ZVE32F-NEXT: .LBB31_6: # %cond.load1 2239; RV64ZVE32F-NEXT: ld a2, 8(a0) 2240; RV64ZVE32F-NEXT: lw a2, 0(a2) 2241; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 2242; RV64ZVE32F-NEXT: vmv.s.x v9, a2 2243; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 2244; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 2245; RV64ZVE32F-NEXT: andi a2, a1, 4 2246; RV64ZVE32F-NEXT: beqz a2, .LBB31_3 2247; RV64ZVE32F-NEXT: .LBB31_7: # %cond.load4 2248; RV64ZVE32F-NEXT: ld a2, 16(a0) 2249; RV64ZVE32F-NEXT: lw a2, 0(a2) 2250; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 2251; RV64ZVE32F-NEXT: vmv.s.x v9, a2 2252; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 2253; RV64ZVE32F-NEXT: andi a1, a1, 8 2254; RV64ZVE32F-NEXT: beqz a1, .LBB31_4 2255; RV64ZVE32F-NEXT: .LBB31_8: # %cond.load7 2256; RV64ZVE32F-NEXT: ld a0, 24(a0) 2257; RV64ZVE32F-NEXT: lw a0, 0(a0) 2258; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2259; RV64ZVE32F-NEXT: vmv.s.x v9, a0 2260; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 2261; RV64ZVE32F-NEXT: ret 2262 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x i32> %passthru) 2263 ret <4 x i32> %v 2264} 2265 2266define <4 x i32> @mgather_truemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) { 2267; RV32-LABEL: mgather_truemask_v4i32: 2268; RV32: # %bb.0: 2269; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2270; RV32-NEXT: vluxei32.v v8, (zero), v8 2271; RV32-NEXT: ret 2272; 2273; RV64V-LABEL: mgather_truemask_v4i32: 2274; RV64V: # %bb.0: 2275; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2276; RV64V-NEXT: vluxei64.v v10, (zero), v8 2277; RV64V-NEXT: vmv.v.v v8, v10 2278; RV64V-NEXT: ret 2279; 2280; RV64ZVE32F-LABEL: mgather_truemask_v4i32: 2281; RV64ZVE32F: # %bb.0: 2282; RV64ZVE32F-NEXT: ld a1, 0(a0) 2283; RV64ZVE32F-NEXT: ld a2, 8(a0) 2284; RV64ZVE32F-NEXT: ld a3, 16(a0) 2285; RV64ZVE32F-NEXT: ld a0, 24(a0) 2286; RV64ZVE32F-NEXT: lw a1, 0(a1) 2287; RV64ZVE32F-NEXT: lw a2, 0(a2) 2288; RV64ZVE32F-NEXT: lw a3, 0(a3) 2289; RV64ZVE32F-NEXT: lw a0, 0(a0) 2290; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2291; RV64ZVE32F-NEXT: vmv.v.x v8, a1 2292; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 2293; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 2294; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 2295; RV64ZVE32F-NEXT: ret 2296 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1), <4 x i32> %passthru) 2297 ret <4 x i32> %v 2298} 2299 2300define <4 x i32> @mgather_falsemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) { 2301; RV32-LABEL: mgather_falsemask_v4i32: 2302; RV32: # %bb.0: 2303; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2304; RV32-NEXT: vmv1r.v v8, v9 2305; RV32-NEXT: ret 2306; 2307; RV64V-LABEL: mgather_falsemask_v4i32: 2308; RV64V: # %bb.0: 2309; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2310; RV64V-NEXT: vmv1r.v v8, v10 2311; RV64V-NEXT: ret 2312; 2313; RV64ZVE32F-LABEL: mgather_falsemask_v4i32: 2314; RV64ZVE32F: # %bb.0: 2315; RV64ZVE32F-NEXT: ret 2316 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer, <4 x i32> %passthru) 2317 ret <4 x i32> %v 2318} 2319 2320declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>) 2321 2322define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthru) { 2323; RV32-LABEL: mgather_v8i32: 2324; RV32: # %bb.0: 2325; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu 2326; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 2327; RV32-NEXT: vmv.v.v v8, v10 2328; RV32-NEXT: ret 2329; 2330; RV64V-LABEL: mgather_v8i32: 2331; RV64V: # %bb.0: 2332; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu 2333; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t 2334; RV64V-NEXT: vmv.v.v v8, v12 2335; RV64V-NEXT: ret 2336; 2337; RV64ZVE32F-LABEL: mgather_v8i32: 2338; RV64ZVE32F: # %bb.0: 2339; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2340; RV64ZVE32F-NEXT: vmv.x.s a1, v0 2341; RV64ZVE32F-NEXT: andi a2, a1, 1 2342; RV64ZVE32F-NEXT: bnez a2, .LBB34_9 2343; RV64ZVE32F-NEXT: # %bb.1: # %else 2344; RV64ZVE32F-NEXT: andi a2, a1, 2 2345; RV64ZVE32F-NEXT: bnez a2, .LBB34_10 2346; RV64ZVE32F-NEXT: .LBB34_2: # %else2 2347; RV64ZVE32F-NEXT: andi a2, a1, 4 2348; RV64ZVE32F-NEXT: bnez a2, .LBB34_11 2349; RV64ZVE32F-NEXT: .LBB34_3: # %else5 2350; RV64ZVE32F-NEXT: andi a2, a1, 8 2351; RV64ZVE32F-NEXT: bnez a2, .LBB34_12 2352; RV64ZVE32F-NEXT: .LBB34_4: # %else8 2353; RV64ZVE32F-NEXT: andi a2, a1, 16 2354; RV64ZVE32F-NEXT: bnez a2, .LBB34_13 2355; RV64ZVE32F-NEXT: .LBB34_5: # %else11 2356; RV64ZVE32F-NEXT: andi a2, a1, 32 2357; RV64ZVE32F-NEXT: bnez a2, .LBB34_14 2358; RV64ZVE32F-NEXT: .LBB34_6: # %else14 2359; RV64ZVE32F-NEXT: andi a2, a1, 64 2360; RV64ZVE32F-NEXT: bnez a2, .LBB34_15 2361; RV64ZVE32F-NEXT: .LBB34_7: # %else17 2362; RV64ZVE32F-NEXT: andi a1, a1, -128 2363; RV64ZVE32F-NEXT: bnez a1, .LBB34_16 2364; RV64ZVE32F-NEXT: .LBB34_8: # %else20 2365; RV64ZVE32F-NEXT: ret 2366; RV64ZVE32F-NEXT: .LBB34_9: # %cond.load 2367; RV64ZVE32F-NEXT: ld a2, 0(a0) 2368; RV64ZVE32F-NEXT: lw a2, 0(a2) 2369; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 2370; RV64ZVE32F-NEXT: vmv.s.x v8, a2 2371; RV64ZVE32F-NEXT: andi a2, a1, 2 2372; RV64ZVE32F-NEXT: beqz a2, .LBB34_2 2373; RV64ZVE32F-NEXT: .LBB34_10: # %cond.load1 2374; RV64ZVE32F-NEXT: ld a2, 8(a0) 2375; RV64ZVE32F-NEXT: lw a2, 0(a2) 2376; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 2377; RV64ZVE32F-NEXT: vmv.s.x v10, a2 2378; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 2379; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 2380; RV64ZVE32F-NEXT: andi a2, a1, 4 2381; RV64ZVE32F-NEXT: beqz a2, .LBB34_3 2382; RV64ZVE32F-NEXT: .LBB34_11: # %cond.load4 2383; RV64ZVE32F-NEXT: ld a2, 16(a0) 2384; RV64ZVE32F-NEXT: lw a2, 0(a2) 2385; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 2386; RV64ZVE32F-NEXT: vmv.s.x v10, a2 2387; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2 2388; RV64ZVE32F-NEXT: andi a2, a1, 8 2389; RV64ZVE32F-NEXT: beqz a2, .LBB34_4 2390; RV64ZVE32F-NEXT: .LBB34_12: # %cond.load7 2391; RV64ZVE32F-NEXT: ld a2, 24(a0) 2392; RV64ZVE32F-NEXT: lw a2, 0(a2) 2393; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma 2394; RV64ZVE32F-NEXT: vmv.s.x v10, a2 2395; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3 2396; RV64ZVE32F-NEXT: andi a2, a1, 16 2397; RV64ZVE32F-NEXT: beqz a2, .LBB34_5 2398; RV64ZVE32F-NEXT: .LBB34_13: # %cond.load10 2399; RV64ZVE32F-NEXT: ld a2, 32(a0) 2400; RV64ZVE32F-NEXT: lw a2, 0(a2) 2401; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma 2402; RV64ZVE32F-NEXT: vmv.s.x v10, a2 2403; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4 2404; RV64ZVE32F-NEXT: andi a2, a1, 32 2405; RV64ZVE32F-NEXT: beqz a2, .LBB34_6 2406; RV64ZVE32F-NEXT: .LBB34_14: # %cond.load13 2407; RV64ZVE32F-NEXT: ld a2, 40(a0) 2408; RV64ZVE32F-NEXT: lw a2, 0(a2) 2409; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma 2410; RV64ZVE32F-NEXT: vmv.s.x v10, a2 2411; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5 2412; RV64ZVE32F-NEXT: andi a2, a1, 64 2413; RV64ZVE32F-NEXT: beqz a2, .LBB34_7 2414; RV64ZVE32F-NEXT: .LBB34_15: # %cond.load16 2415; RV64ZVE32F-NEXT: ld a2, 48(a0) 2416; RV64ZVE32F-NEXT: lw a2, 0(a2) 2417; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma 2418; RV64ZVE32F-NEXT: vmv.s.x v10, a2 2419; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6 2420; RV64ZVE32F-NEXT: andi a1, a1, -128 2421; RV64ZVE32F-NEXT: beqz a1, .LBB34_8 2422; RV64ZVE32F-NEXT: .LBB34_16: # %cond.load19 2423; RV64ZVE32F-NEXT: ld a0, 56(a0) 2424; RV64ZVE32F-NEXT: lw a0, 0(a0) 2425; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2426; RV64ZVE32F-NEXT: vmv.s.x v10, a0 2427; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 7 2428; RV64ZVE32F-NEXT: ret 2429 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru) 2430 ret <8 x i32> %v 2431} 2432 2433define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) { 2434; RV32-LABEL: mgather_baseidx_v8i8_v8i32: 2435; RV32: # %bb.0: 2436; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu 2437; RV32-NEXT: vsext.vf4 v12, v8 2438; RV32-NEXT: vsll.vi v8, v12, 2 2439; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t 2440; RV32-NEXT: vmv.v.v v8, v10 2441; RV32-NEXT: ret 2442; 2443; RV64V-LABEL: mgather_baseidx_v8i8_v8i32: 2444; RV64V: # %bb.0: 2445; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 2446; RV64V-NEXT: vsext.vf8 v12, v8 2447; RV64V-NEXT: vsll.vi v12, v12, 2 2448; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu 2449; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t 2450; RV64V-NEXT: vmv.v.v v8, v10 2451; RV64V-NEXT: ret 2452; 2453; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i32: 2454; RV64ZVE32F: # %bb.0: 2455; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2456; RV64ZVE32F-NEXT: vmv.x.s a1, v0 2457; RV64ZVE32F-NEXT: andi a2, a1, 1 2458; RV64ZVE32F-NEXT: beqz a2, .LBB35_2 2459; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 2460; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2461; RV64ZVE32F-NEXT: slli a2, a2, 2 2462; RV64ZVE32F-NEXT: add a2, a0, a2 2463; RV64ZVE32F-NEXT: lw a2, 0(a2) 2464; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 2465; RV64ZVE32F-NEXT: vmv.s.x v10, a2 2466; RV64ZVE32F-NEXT: .LBB35_2: # %else 2467; RV64ZVE32F-NEXT: andi a2, a1, 2 2468; RV64ZVE32F-NEXT: beqz a2, .LBB35_4 2469; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 2470; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2471; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 2472; RV64ZVE32F-NEXT: vmv.x.s a2, v9 2473; RV64ZVE32F-NEXT: slli a2, a2, 2 2474; RV64ZVE32F-NEXT: add a2, a0, a2 2475; RV64ZVE32F-NEXT: lw a2, 0(a2) 2476; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2477; RV64ZVE32F-NEXT: vmv.s.x v9, a2 2478; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 2479; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 2480; RV64ZVE32F-NEXT: .LBB35_4: # %else2 2481; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 2482; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 2483; RV64ZVE32F-NEXT: andi a2, a1, 4 2484; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2485; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 2486; RV64ZVE32F-NEXT: bnez a2, .LBB35_14 2487; RV64ZVE32F-NEXT: # %bb.5: # %else5 2488; RV64ZVE32F-NEXT: andi a2, a1, 8 2489; RV64ZVE32F-NEXT: bnez a2, .LBB35_15 2490; RV64ZVE32F-NEXT: .LBB35_6: # %else8 2491; RV64ZVE32F-NEXT: andi a2, a1, 16 2492; RV64ZVE32F-NEXT: bnez a2, .LBB35_16 2493; RV64ZVE32F-NEXT: .LBB35_7: # %else11 2494; RV64ZVE32F-NEXT: andi a2, a1, 32 2495; RV64ZVE32F-NEXT: beqz a2, .LBB35_9 2496; RV64ZVE32F-NEXT: .LBB35_8: # %cond.load13 2497; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2498; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 2499; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2500; RV64ZVE32F-NEXT: slli a2, a2, 2 2501; RV64ZVE32F-NEXT: add a2, a0, a2 2502; RV64ZVE32F-NEXT: lw a2, 0(a2) 2503; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2504; RV64ZVE32F-NEXT: vmv.s.x v12, a2 2505; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma 2506; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 2507; RV64ZVE32F-NEXT: .LBB35_9: # %else14 2508; RV64ZVE32F-NEXT: andi a2, a1, 64 2509; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2510; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 2511; RV64ZVE32F-NEXT: beqz a2, .LBB35_11 2512; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 2513; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2514; RV64ZVE32F-NEXT: slli a2, a2, 2 2515; RV64ZVE32F-NEXT: add a2, a0, a2 2516; RV64ZVE32F-NEXT: lw a2, 0(a2) 2517; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2518; RV64ZVE32F-NEXT: vmv.s.x v12, a2 2519; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma 2520; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 2521; RV64ZVE32F-NEXT: .LBB35_11: # %else17 2522; RV64ZVE32F-NEXT: andi a1, a1, -128 2523; RV64ZVE32F-NEXT: beqz a1, .LBB35_13 2524; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 2525; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2526; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 2527; RV64ZVE32F-NEXT: vmv.x.s a1, v8 2528; RV64ZVE32F-NEXT: slli a1, a1, 2 2529; RV64ZVE32F-NEXT: add a0, a0, a1 2530; RV64ZVE32F-NEXT: lw a0, 0(a0) 2531; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2532; RV64ZVE32F-NEXT: vmv.s.x v8, a0 2533; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2534; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 2535; RV64ZVE32F-NEXT: .LBB35_13: # %else20 2536; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2537; RV64ZVE32F-NEXT: vmv2r.v v8, v10 2538; RV64ZVE32F-NEXT: ret 2539; RV64ZVE32F-NEXT: .LBB35_14: # %cond.load4 2540; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2541; RV64ZVE32F-NEXT: slli a2, a2, 2 2542; RV64ZVE32F-NEXT: add a2, a0, a2 2543; RV64ZVE32F-NEXT: lw a2, 0(a2) 2544; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2545; RV64ZVE32F-NEXT: vmv.s.x v12, a2 2546; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 2547; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 2548; RV64ZVE32F-NEXT: andi a2, a1, 8 2549; RV64ZVE32F-NEXT: beqz a2, .LBB35_6 2550; RV64ZVE32F-NEXT: .LBB35_15: # %cond.load7 2551; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2552; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 2553; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2554; RV64ZVE32F-NEXT: slli a2, a2, 2 2555; RV64ZVE32F-NEXT: add a2, a0, a2 2556; RV64ZVE32F-NEXT: lw a2, 0(a2) 2557; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2558; RV64ZVE32F-NEXT: vmv.s.x v8, a2 2559; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma 2560; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 2561; RV64ZVE32F-NEXT: andi a2, a1, 16 2562; RV64ZVE32F-NEXT: beqz a2, .LBB35_7 2563; RV64ZVE32F-NEXT: .LBB35_16: # %cond.load10 2564; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2565; RV64ZVE32F-NEXT: vmv.x.s a2, v9 2566; RV64ZVE32F-NEXT: slli a2, a2, 2 2567; RV64ZVE32F-NEXT: add a2, a0, a2 2568; RV64ZVE32F-NEXT: lw a2, 0(a2) 2569; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 2570; RV64ZVE32F-NEXT: vmv.s.x v12, a2 2571; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma 2572; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 2573; RV64ZVE32F-NEXT: andi a2, a1, 32 2574; RV64ZVE32F-NEXT: bnez a2, .LBB35_8 2575; RV64ZVE32F-NEXT: j .LBB35_9 2576 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs 2577 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru) 2578 ret <8 x i32> %v 2579} 2580 2581define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) { 2582; RV32-LABEL: mgather_baseidx_sext_v8i8_v8i32: 2583; RV32: # %bb.0: 2584; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu 2585; RV32-NEXT: vsext.vf4 v12, v8 2586; RV32-NEXT: vsll.vi v8, v12, 2 2587; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t 2588; RV32-NEXT: vmv.v.v v8, v10 2589; RV32-NEXT: ret 2590; 2591; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i32: 2592; RV64V: # %bb.0: 2593; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 2594; RV64V-NEXT: vsext.vf8 v12, v8 2595; RV64V-NEXT: vsll.vi v12, v12, 2 2596; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu 2597; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t 2598; RV64V-NEXT: vmv.v.v v8, v10 2599; RV64V-NEXT: ret 2600; 2601; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i32: 2602; RV64ZVE32F: # %bb.0: 2603; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2604; RV64ZVE32F-NEXT: vmv.x.s a1, v0 2605; RV64ZVE32F-NEXT: andi a2, a1, 1 2606; RV64ZVE32F-NEXT: beqz a2, .LBB36_2 2607; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 2608; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2609; RV64ZVE32F-NEXT: slli a2, a2, 2 2610; RV64ZVE32F-NEXT: add a2, a0, a2 2611; RV64ZVE32F-NEXT: lw a2, 0(a2) 2612; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 2613; RV64ZVE32F-NEXT: vmv.s.x v10, a2 2614; RV64ZVE32F-NEXT: .LBB36_2: # %else 2615; RV64ZVE32F-NEXT: andi a2, a1, 2 2616; RV64ZVE32F-NEXT: beqz a2, .LBB36_4 2617; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 2618; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2619; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 2620; RV64ZVE32F-NEXT: vmv.x.s a2, v9 2621; RV64ZVE32F-NEXT: slli a2, a2, 2 2622; RV64ZVE32F-NEXT: add a2, a0, a2 2623; RV64ZVE32F-NEXT: lw a2, 0(a2) 2624; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2625; RV64ZVE32F-NEXT: vmv.s.x v9, a2 2626; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 2627; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 2628; RV64ZVE32F-NEXT: .LBB36_4: # %else2 2629; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 2630; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 2631; RV64ZVE32F-NEXT: andi a2, a1, 4 2632; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2633; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 2634; RV64ZVE32F-NEXT: bnez a2, .LBB36_14 2635; RV64ZVE32F-NEXT: # %bb.5: # %else5 2636; RV64ZVE32F-NEXT: andi a2, a1, 8 2637; RV64ZVE32F-NEXT: bnez a2, .LBB36_15 2638; RV64ZVE32F-NEXT: .LBB36_6: # %else8 2639; RV64ZVE32F-NEXT: andi a2, a1, 16 2640; RV64ZVE32F-NEXT: bnez a2, .LBB36_16 2641; RV64ZVE32F-NEXT: .LBB36_7: # %else11 2642; RV64ZVE32F-NEXT: andi a2, a1, 32 2643; RV64ZVE32F-NEXT: beqz a2, .LBB36_9 2644; RV64ZVE32F-NEXT: .LBB36_8: # %cond.load13 2645; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2646; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 2647; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2648; RV64ZVE32F-NEXT: slli a2, a2, 2 2649; RV64ZVE32F-NEXT: add a2, a0, a2 2650; RV64ZVE32F-NEXT: lw a2, 0(a2) 2651; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2652; RV64ZVE32F-NEXT: vmv.s.x v12, a2 2653; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma 2654; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 2655; RV64ZVE32F-NEXT: .LBB36_9: # %else14 2656; RV64ZVE32F-NEXT: andi a2, a1, 64 2657; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2658; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 2659; RV64ZVE32F-NEXT: beqz a2, .LBB36_11 2660; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 2661; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2662; RV64ZVE32F-NEXT: slli a2, a2, 2 2663; RV64ZVE32F-NEXT: add a2, a0, a2 2664; RV64ZVE32F-NEXT: lw a2, 0(a2) 2665; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2666; RV64ZVE32F-NEXT: vmv.s.x v12, a2 2667; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma 2668; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 2669; RV64ZVE32F-NEXT: .LBB36_11: # %else17 2670; RV64ZVE32F-NEXT: andi a1, a1, -128 2671; RV64ZVE32F-NEXT: beqz a1, .LBB36_13 2672; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 2673; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2674; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 2675; RV64ZVE32F-NEXT: vmv.x.s a1, v8 2676; RV64ZVE32F-NEXT: slli a1, a1, 2 2677; RV64ZVE32F-NEXT: add a0, a0, a1 2678; RV64ZVE32F-NEXT: lw a0, 0(a0) 2679; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2680; RV64ZVE32F-NEXT: vmv.s.x v8, a0 2681; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2682; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 2683; RV64ZVE32F-NEXT: .LBB36_13: # %else20 2684; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2685; RV64ZVE32F-NEXT: vmv2r.v v8, v10 2686; RV64ZVE32F-NEXT: ret 2687; RV64ZVE32F-NEXT: .LBB36_14: # %cond.load4 2688; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2689; RV64ZVE32F-NEXT: slli a2, a2, 2 2690; RV64ZVE32F-NEXT: add a2, a0, a2 2691; RV64ZVE32F-NEXT: lw a2, 0(a2) 2692; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2693; RV64ZVE32F-NEXT: vmv.s.x v12, a2 2694; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 2695; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 2696; RV64ZVE32F-NEXT: andi a2, a1, 8 2697; RV64ZVE32F-NEXT: beqz a2, .LBB36_6 2698; RV64ZVE32F-NEXT: .LBB36_15: # %cond.load7 2699; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2700; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 2701; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2702; RV64ZVE32F-NEXT: slli a2, a2, 2 2703; RV64ZVE32F-NEXT: add a2, a0, a2 2704; RV64ZVE32F-NEXT: lw a2, 0(a2) 2705; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2706; RV64ZVE32F-NEXT: vmv.s.x v8, a2 2707; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma 2708; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 2709; RV64ZVE32F-NEXT: andi a2, a1, 16 2710; RV64ZVE32F-NEXT: beqz a2, .LBB36_7 2711; RV64ZVE32F-NEXT: .LBB36_16: # %cond.load10 2712; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2713; RV64ZVE32F-NEXT: vmv.x.s a2, v9 2714; RV64ZVE32F-NEXT: slli a2, a2, 2 2715; RV64ZVE32F-NEXT: add a2, a0, a2 2716; RV64ZVE32F-NEXT: lw a2, 0(a2) 2717; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 2718; RV64ZVE32F-NEXT: vmv.s.x v12, a2 2719; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma 2720; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 2721; RV64ZVE32F-NEXT: andi a2, a1, 32 2722; RV64ZVE32F-NEXT: bnez a2, .LBB36_8 2723; RV64ZVE32F-NEXT: j .LBB36_9 2724 %eidxs = sext <8 x i8> %idxs to <8 x i32> 2725 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs 2726 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru) 2727 ret <8 x i32> %v 2728} 2729 2730define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) { 2731; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i32: 2732; RV32: # %bb.0: 2733; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2734; RV32-NEXT: vzext.vf2 v9, v8 2735; RV32-NEXT: vsll.vi v8, v9, 2 2736; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu 2737; RV32-NEXT: vluxei16.v v10, (a0), v8, v0.t 2738; RV32-NEXT: vmv.v.v v8, v10 2739; RV32-NEXT: ret 2740; 2741; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i32: 2742; RV64V: # %bb.0: 2743; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2744; RV64V-NEXT: vzext.vf2 v9, v8 2745; RV64V-NEXT: vsll.vi v8, v9, 2 2746; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu 2747; RV64V-NEXT: vluxei16.v v10, (a0), v8, v0.t 2748; RV64V-NEXT: vmv.v.v v8, v10 2749; RV64V-NEXT: ret 2750; 2751; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i32: 2752; RV64ZVE32F: # %bb.0: 2753; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2754; RV64ZVE32F-NEXT: vmv.x.s a1, v0 2755; RV64ZVE32F-NEXT: andi a2, a1, 1 2756; RV64ZVE32F-NEXT: beqz a2, .LBB37_2 2757; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 2758; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2759; RV64ZVE32F-NEXT: andi a2, a2, 255 2760; RV64ZVE32F-NEXT: slli a2, a2, 2 2761; RV64ZVE32F-NEXT: add a2, a0, a2 2762; RV64ZVE32F-NEXT: lw a2, 0(a2) 2763; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 2764; RV64ZVE32F-NEXT: vmv.s.x v10, a2 2765; RV64ZVE32F-NEXT: .LBB37_2: # %else 2766; RV64ZVE32F-NEXT: andi a2, a1, 2 2767; RV64ZVE32F-NEXT: beqz a2, .LBB37_4 2768; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 2769; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2770; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 2771; RV64ZVE32F-NEXT: vmv.x.s a2, v9 2772; RV64ZVE32F-NEXT: andi a2, a2, 255 2773; RV64ZVE32F-NEXT: slli a2, a2, 2 2774; RV64ZVE32F-NEXT: add a2, a0, a2 2775; RV64ZVE32F-NEXT: lw a2, 0(a2) 2776; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2777; RV64ZVE32F-NEXT: vmv.s.x v9, a2 2778; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 2779; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 2780; RV64ZVE32F-NEXT: .LBB37_4: # %else2 2781; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 2782; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 2783; RV64ZVE32F-NEXT: andi a2, a1, 4 2784; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2785; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 2786; RV64ZVE32F-NEXT: bnez a2, .LBB37_14 2787; RV64ZVE32F-NEXT: # %bb.5: # %else5 2788; RV64ZVE32F-NEXT: andi a2, a1, 8 2789; RV64ZVE32F-NEXT: bnez a2, .LBB37_15 2790; RV64ZVE32F-NEXT: .LBB37_6: # %else8 2791; RV64ZVE32F-NEXT: andi a2, a1, 16 2792; RV64ZVE32F-NEXT: bnez a2, .LBB37_16 2793; RV64ZVE32F-NEXT: .LBB37_7: # %else11 2794; RV64ZVE32F-NEXT: andi a2, a1, 32 2795; RV64ZVE32F-NEXT: beqz a2, .LBB37_9 2796; RV64ZVE32F-NEXT: .LBB37_8: # %cond.load13 2797; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2798; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 2799; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2800; RV64ZVE32F-NEXT: andi a2, a2, 255 2801; RV64ZVE32F-NEXT: slli a2, a2, 2 2802; RV64ZVE32F-NEXT: add a2, a0, a2 2803; RV64ZVE32F-NEXT: lw a2, 0(a2) 2804; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2805; RV64ZVE32F-NEXT: vmv.s.x v12, a2 2806; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma 2807; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 2808; RV64ZVE32F-NEXT: .LBB37_9: # %else14 2809; RV64ZVE32F-NEXT: andi a2, a1, 64 2810; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2811; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 2812; RV64ZVE32F-NEXT: beqz a2, .LBB37_11 2813; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 2814; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2815; RV64ZVE32F-NEXT: andi a2, a2, 255 2816; RV64ZVE32F-NEXT: slli a2, a2, 2 2817; RV64ZVE32F-NEXT: add a2, a0, a2 2818; RV64ZVE32F-NEXT: lw a2, 0(a2) 2819; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2820; RV64ZVE32F-NEXT: vmv.s.x v12, a2 2821; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma 2822; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 2823; RV64ZVE32F-NEXT: .LBB37_11: # %else17 2824; RV64ZVE32F-NEXT: andi a1, a1, -128 2825; RV64ZVE32F-NEXT: beqz a1, .LBB37_13 2826; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 2827; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2828; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 2829; RV64ZVE32F-NEXT: vmv.x.s a1, v8 2830; RV64ZVE32F-NEXT: andi a1, a1, 255 2831; RV64ZVE32F-NEXT: slli a1, a1, 2 2832; RV64ZVE32F-NEXT: add a0, a0, a1 2833; RV64ZVE32F-NEXT: lw a0, 0(a0) 2834; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2835; RV64ZVE32F-NEXT: vmv.s.x v8, a0 2836; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2837; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 2838; RV64ZVE32F-NEXT: .LBB37_13: # %else20 2839; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2840; RV64ZVE32F-NEXT: vmv2r.v v8, v10 2841; RV64ZVE32F-NEXT: ret 2842; RV64ZVE32F-NEXT: .LBB37_14: # %cond.load4 2843; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2844; RV64ZVE32F-NEXT: andi a2, a2, 255 2845; RV64ZVE32F-NEXT: slli a2, a2, 2 2846; RV64ZVE32F-NEXT: add a2, a0, a2 2847; RV64ZVE32F-NEXT: lw a2, 0(a2) 2848; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2849; RV64ZVE32F-NEXT: vmv.s.x v12, a2 2850; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 2851; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 2852; RV64ZVE32F-NEXT: andi a2, a1, 8 2853; RV64ZVE32F-NEXT: beqz a2, .LBB37_6 2854; RV64ZVE32F-NEXT: .LBB37_15: # %cond.load7 2855; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 2856; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 2857; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2858; RV64ZVE32F-NEXT: andi a2, a2, 255 2859; RV64ZVE32F-NEXT: slli a2, a2, 2 2860; RV64ZVE32F-NEXT: add a2, a0, a2 2861; RV64ZVE32F-NEXT: lw a2, 0(a2) 2862; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2863; RV64ZVE32F-NEXT: vmv.s.x v8, a2 2864; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma 2865; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 2866; RV64ZVE32F-NEXT: andi a2, a1, 16 2867; RV64ZVE32F-NEXT: beqz a2, .LBB37_7 2868; RV64ZVE32F-NEXT: .LBB37_16: # %cond.load10 2869; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2870; RV64ZVE32F-NEXT: vmv.x.s a2, v9 2871; RV64ZVE32F-NEXT: andi a2, a2, 255 2872; RV64ZVE32F-NEXT: slli a2, a2, 2 2873; RV64ZVE32F-NEXT: add a2, a0, a2 2874; RV64ZVE32F-NEXT: lw a2, 0(a2) 2875; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 2876; RV64ZVE32F-NEXT: vmv.s.x v12, a2 2877; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma 2878; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 2879; RV64ZVE32F-NEXT: andi a2, a1, 32 2880; RV64ZVE32F-NEXT: bnez a2, .LBB37_8 2881; RV64ZVE32F-NEXT: j .LBB37_9 2882 %eidxs = zext <8 x i8> %idxs to <8 x i32> 2883 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs 2884 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru) 2885 ret <8 x i32> %v 2886} 2887 2888define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) { 2889; RV32-LABEL: mgather_baseidx_v8i16_v8i32: 2890; RV32: # %bb.0: 2891; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu 2892; RV32-NEXT: vsext.vf2 v12, v8 2893; RV32-NEXT: vsll.vi v8, v12, 2 2894; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t 2895; RV32-NEXT: vmv.v.v v8, v10 2896; RV32-NEXT: ret 2897; 2898; RV64V-LABEL: mgather_baseidx_v8i16_v8i32: 2899; RV64V: # %bb.0: 2900; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 2901; RV64V-NEXT: vsext.vf4 v12, v8 2902; RV64V-NEXT: vsll.vi v12, v12, 2 2903; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu 2904; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t 2905; RV64V-NEXT: vmv.v.v v8, v10 2906; RV64V-NEXT: ret 2907; 2908; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8i32: 2909; RV64ZVE32F: # %bb.0: 2910; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2911; RV64ZVE32F-NEXT: vmv.x.s a1, v0 2912; RV64ZVE32F-NEXT: andi a2, a1, 1 2913; RV64ZVE32F-NEXT: beqz a2, .LBB38_2 2914; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 2915; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 2916; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2917; RV64ZVE32F-NEXT: slli a2, a2, 2 2918; RV64ZVE32F-NEXT: add a2, a0, a2 2919; RV64ZVE32F-NEXT: lw a2, 0(a2) 2920; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 2921; RV64ZVE32F-NEXT: vmv.s.x v10, a2 2922; RV64ZVE32F-NEXT: .LBB38_2: # %else 2923; RV64ZVE32F-NEXT: andi a2, a1, 2 2924; RV64ZVE32F-NEXT: beqz a2, .LBB38_4 2925; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 2926; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 2927; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 2928; RV64ZVE32F-NEXT: vmv.x.s a2, v9 2929; RV64ZVE32F-NEXT: slli a2, a2, 2 2930; RV64ZVE32F-NEXT: add a2, a0, a2 2931; RV64ZVE32F-NEXT: lw a2, 0(a2) 2932; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2933; RV64ZVE32F-NEXT: vmv.s.x v9, a2 2934; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 2935; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 2936; RV64ZVE32F-NEXT: .LBB38_4: # %else2 2937; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 2938; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 2939; RV64ZVE32F-NEXT: andi a2, a1, 4 2940; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 2941; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 2942; RV64ZVE32F-NEXT: bnez a2, .LBB38_14 2943; RV64ZVE32F-NEXT: # %bb.5: # %else5 2944; RV64ZVE32F-NEXT: andi a2, a1, 8 2945; RV64ZVE32F-NEXT: bnez a2, .LBB38_15 2946; RV64ZVE32F-NEXT: .LBB38_6: # %else8 2947; RV64ZVE32F-NEXT: andi a2, a1, 16 2948; RV64ZVE32F-NEXT: bnez a2, .LBB38_16 2949; RV64ZVE32F-NEXT: .LBB38_7: # %else11 2950; RV64ZVE32F-NEXT: andi a2, a1, 32 2951; RV64ZVE32F-NEXT: beqz a2, .LBB38_9 2952; RV64ZVE32F-NEXT: .LBB38_8: # %cond.load13 2953; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 2954; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 2955; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2956; RV64ZVE32F-NEXT: slli a2, a2, 2 2957; RV64ZVE32F-NEXT: add a2, a0, a2 2958; RV64ZVE32F-NEXT: lw a2, 0(a2) 2959; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2960; RV64ZVE32F-NEXT: vmv.s.x v12, a2 2961; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma 2962; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 2963; RV64ZVE32F-NEXT: .LBB38_9: # %else14 2964; RV64ZVE32F-NEXT: andi a2, a1, 64 2965; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 2966; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 2967; RV64ZVE32F-NEXT: beqz a2, .LBB38_11 2968; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 2969; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2970; RV64ZVE32F-NEXT: slli a2, a2, 2 2971; RV64ZVE32F-NEXT: add a2, a0, a2 2972; RV64ZVE32F-NEXT: lw a2, 0(a2) 2973; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2974; RV64ZVE32F-NEXT: vmv.s.x v12, a2 2975; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma 2976; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 2977; RV64ZVE32F-NEXT: .LBB38_11: # %else17 2978; RV64ZVE32F-NEXT: andi a1, a1, -128 2979; RV64ZVE32F-NEXT: beqz a1, .LBB38_13 2980; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 2981; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 2982; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 2983; RV64ZVE32F-NEXT: vmv.x.s a1, v8 2984; RV64ZVE32F-NEXT: slli a1, a1, 2 2985; RV64ZVE32F-NEXT: add a0, a0, a1 2986; RV64ZVE32F-NEXT: lw a0, 0(a0) 2987; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2988; RV64ZVE32F-NEXT: vmv.s.x v8, a0 2989; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2990; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 2991; RV64ZVE32F-NEXT: .LBB38_13: # %else20 2992; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 2993; RV64ZVE32F-NEXT: vmv2r.v v8, v10 2994; RV64ZVE32F-NEXT: ret 2995; RV64ZVE32F-NEXT: .LBB38_14: # %cond.load4 2996; RV64ZVE32F-NEXT: vmv.x.s a2, v8 2997; RV64ZVE32F-NEXT: slli a2, a2, 2 2998; RV64ZVE32F-NEXT: add a2, a0, a2 2999; RV64ZVE32F-NEXT: lw a2, 0(a2) 3000; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 3001; RV64ZVE32F-NEXT: vmv.s.x v12, a2 3002; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 3003; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 3004; RV64ZVE32F-NEXT: andi a2, a1, 8 3005; RV64ZVE32F-NEXT: beqz a2, .LBB38_6 3006; RV64ZVE32F-NEXT: .LBB38_15: # %cond.load7 3007; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 3008; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 3009; RV64ZVE32F-NEXT: vmv.x.s a2, v8 3010; RV64ZVE32F-NEXT: slli a2, a2, 2 3011; RV64ZVE32F-NEXT: add a2, a0, a2 3012; RV64ZVE32F-NEXT: lw a2, 0(a2) 3013; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 3014; RV64ZVE32F-NEXT: vmv.s.x v8, a2 3015; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma 3016; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 3017; RV64ZVE32F-NEXT: andi a2, a1, 16 3018; RV64ZVE32F-NEXT: beqz a2, .LBB38_7 3019; RV64ZVE32F-NEXT: .LBB38_16: # %cond.load10 3020; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 3021; RV64ZVE32F-NEXT: vmv.x.s a2, v9 3022; RV64ZVE32F-NEXT: slli a2, a2, 2 3023; RV64ZVE32F-NEXT: add a2, a0, a2 3024; RV64ZVE32F-NEXT: lw a2, 0(a2) 3025; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3026; RV64ZVE32F-NEXT: vmv.s.x v12, a2 3027; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma 3028; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 3029; RV64ZVE32F-NEXT: andi a2, a1, 32 3030; RV64ZVE32F-NEXT: bnez a2, .LBB38_8 3031; RV64ZVE32F-NEXT: j .LBB38_9 3032 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs 3033 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru) 3034 ret <8 x i32> %v 3035} 3036 3037define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) { 3038; RV32-LABEL: mgather_baseidx_sext_v8i16_v8i32: 3039; RV32: # %bb.0: 3040; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu 3041; RV32-NEXT: vsext.vf2 v12, v8 3042; RV32-NEXT: vsll.vi v8, v12, 2 3043; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t 3044; RV32-NEXT: vmv.v.v v8, v10 3045; RV32-NEXT: ret 3046; 3047; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8i32: 3048; RV64V: # %bb.0: 3049; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 3050; RV64V-NEXT: vsext.vf4 v12, v8 3051; RV64V-NEXT: vsll.vi v12, v12, 2 3052; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu 3053; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t 3054; RV64V-NEXT: vmv.v.v v8, v10 3055; RV64V-NEXT: ret 3056; 3057; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i32: 3058; RV64ZVE32F: # %bb.0: 3059; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3060; RV64ZVE32F-NEXT: vmv.x.s a1, v0 3061; RV64ZVE32F-NEXT: andi a2, a1, 1 3062; RV64ZVE32F-NEXT: beqz a2, .LBB39_2 3063; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 3064; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 3065; RV64ZVE32F-NEXT: vmv.x.s a2, v8 3066; RV64ZVE32F-NEXT: slli a2, a2, 2 3067; RV64ZVE32F-NEXT: add a2, a0, a2 3068; RV64ZVE32F-NEXT: lw a2, 0(a2) 3069; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 3070; RV64ZVE32F-NEXT: vmv.s.x v10, a2 3071; RV64ZVE32F-NEXT: .LBB39_2: # %else 3072; RV64ZVE32F-NEXT: andi a2, a1, 2 3073; RV64ZVE32F-NEXT: beqz a2, .LBB39_4 3074; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 3075; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 3076; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 3077; RV64ZVE32F-NEXT: vmv.x.s a2, v9 3078; RV64ZVE32F-NEXT: slli a2, a2, 2 3079; RV64ZVE32F-NEXT: add a2, a0, a2 3080; RV64ZVE32F-NEXT: lw a2, 0(a2) 3081; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 3082; RV64ZVE32F-NEXT: vmv.s.x v9, a2 3083; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 3084; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 3085; RV64ZVE32F-NEXT: .LBB39_4: # %else2 3086; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 3087; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 3088; RV64ZVE32F-NEXT: andi a2, a1, 4 3089; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 3090; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 3091; RV64ZVE32F-NEXT: bnez a2, .LBB39_14 3092; RV64ZVE32F-NEXT: # %bb.5: # %else5 3093; RV64ZVE32F-NEXT: andi a2, a1, 8 3094; RV64ZVE32F-NEXT: bnez a2, .LBB39_15 3095; RV64ZVE32F-NEXT: .LBB39_6: # %else8 3096; RV64ZVE32F-NEXT: andi a2, a1, 16 3097; RV64ZVE32F-NEXT: bnez a2, .LBB39_16 3098; RV64ZVE32F-NEXT: .LBB39_7: # %else11 3099; RV64ZVE32F-NEXT: andi a2, a1, 32 3100; RV64ZVE32F-NEXT: beqz a2, .LBB39_9 3101; RV64ZVE32F-NEXT: .LBB39_8: # %cond.load13 3102; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 3103; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 3104; RV64ZVE32F-NEXT: vmv.x.s a2, v8 3105; RV64ZVE32F-NEXT: slli a2, a2, 2 3106; RV64ZVE32F-NEXT: add a2, a0, a2 3107; RV64ZVE32F-NEXT: lw a2, 0(a2) 3108; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 3109; RV64ZVE32F-NEXT: vmv.s.x v12, a2 3110; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma 3111; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 3112; RV64ZVE32F-NEXT: .LBB39_9: # %else14 3113; RV64ZVE32F-NEXT: andi a2, a1, 64 3114; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 3115; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 3116; RV64ZVE32F-NEXT: beqz a2, .LBB39_11 3117; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 3118; RV64ZVE32F-NEXT: vmv.x.s a2, v8 3119; RV64ZVE32F-NEXT: slli a2, a2, 2 3120; RV64ZVE32F-NEXT: add a2, a0, a2 3121; RV64ZVE32F-NEXT: lw a2, 0(a2) 3122; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 3123; RV64ZVE32F-NEXT: vmv.s.x v12, a2 3124; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma 3125; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 3126; RV64ZVE32F-NEXT: .LBB39_11: # %else17 3127; RV64ZVE32F-NEXT: andi a1, a1, -128 3128; RV64ZVE32F-NEXT: beqz a1, .LBB39_13 3129; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 3130; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 3131; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 3132; RV64ZVE32F-NEXT: vmv.x.s a1, v8 3133; RV64ZVE32F-NEXT: slli a1, a1, 2 3134; RV64ZVE32F-NEXT: add a0, a0, a1 3135; RV64ZVE32F-NEXT: lw a0, 0(a0) 3136; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 3137; RV64ZVE32F-NEXT: vmv.s.x v8, a0 3138; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3139; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 3140; RV64ZVE32F-NEXT: .LBB39_13: # %else20 3141; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3142; RV64ZVE32F-NEXT: vmv2r.v v8, v10 3143; RV64ZVE32F-NEXT: ret 3144; RV64ZVE32F-NEXT: .LBB39_14: # %cond.load4 3145; RV64ZVE32F-NEXT: vmv.x.s a2, v8 3146; RV64ZVE32F-NEXT: slli a2, a2, 2 3147; RV64ZVE32F-NEXT: add a2, a0, a2 3148; RV64ZVE32F-NEXT: lw a2, 0(a2) 3149; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 3150; RV64ZVE32F-NEXT: vmv.s.x v12, a2 3151; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 3152; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 3153; RV64ZVE32F-NEXT: andi a2, a1, 8 3154; RV64ZVE32F-NEXT: beqz a2, .LBB39_6 3155; RV64ZVE32F-NEXT: .LBB39_15: # %cond.load7 3156; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 3157; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 3158; RV64ZVE32F-NEXT: vmv.x.s a2, v8 3159; RV64ZVE32F-NEXT: slli a2, a2, 2 3160; RV64ZVE32F-NEXT: add a2, a0, a2 3161; RV64ZVE32F-NEXT: lw a2, 0(a2) 3162; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 3163; RV64ZVE32F-NEXT: vmv.s.x v8, a2 3164; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma 3165; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 3166; RV64ZVE32F-NEXT: andi a2, a1, 16 3167; RV64ZVE32F-NEXT: beqz a2, .LBB39_7 3168; RV64ZVE32F-NEXT: .LBB39_16: # %cond.load10 3169; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 3170; RV64ZVE32F-NEXT: vmv.x.s a2, v9 3171; RV64ZVE32F-NEXT: slli a2, a2, 2 3172; RV64ZVE32F-NEXT: add a2, a0, a2 3173; RV64ZVE32F-NEXT: lw a2, 0(a2) 3174; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3175; RV64ZVE32F-NEXT: vmv.s.x v12, a2 3176; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma 3177; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 3178; RV64ZVE32F-NEXT: andi a2, a1, 32 3179; RV64ZVE32F-NEXT: bnez a2, .LBB39_8 3180; RV64ZVE32F-NEXT: j .LBB39_9 3181 %eidxs = sext <8 x i16> %idxs to <8 x i32> 3182 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs 3183 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru) 3184 ret <8 x i32> %v 3185} 3186 3187define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) { 3188; RV32-LABEL: mgather_baseidx_zext_v8i16_v8i32: 3189; RV32: # %bb.0: 3190; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu 3191; RV32-NEXT: vzext.vf2 v12, v8 3192; RV32-NEXT: vsll.vi v8, v12, 2 3193; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t 3194; RV32-NEXT: vmv.v.v v8, v10 3195; RV32-NEXT: ret 3196; 3197; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i32: 3198; RV64V: # %bb.0: 3199; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu 3200; RV64V-NEXT: vzext.vf2 v12, v8 3201; RV64V-NEXT: vsll.vi v8, v12, 2 3202; RV64V-NEXT: vluxei32.v v10, (a0), v8, v0.t 3203; RV64V-NEXT: vmv.v.v v8, v10 3204; RV64V-NEXT: ret 3205; 3206; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i32: 3207; RV64ZVE32F: # %bb.0: 3208; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3209; RV64ZVE32F-NEXT: vmv.x.s a1, v0 3210; RV64ZVE32F-NEXT: andi a2, a1, 1 3211; RV64ZVE32F-NEXT: beqz a2, .LBB40_2 3212; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 3213; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 3214; RV64ZVE32F-NEXT: vmv.x.s a2, v8 3215; RV64ZVE32F-NEXT: slli a2, a2, 48 3216; RV64ZVE32F-NEXT: srli a2, a2, 46 3217; RV64ZVE32F-NEXT: add a2, a0, a2 3218; RV64ZVE32F-NEXT: lw a2, 0(a2) 3219; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 3220; RV64ZVE32F-NEXT: vmv.s.x v10, a2 3221; RV64ZVE32F-NEXT: .LBB40_2: # %else 3222; RV64ZVE32F-NEXT: andi a2, a1, 2 3223; RV64ZVE32F-NEXT: beqz a2, .LBB40_4 3224; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 3225; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 3226; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 3227; RV64ZVE32F-NEXT: vmv.x.s a2, v9 3228; RV64ZVE32F-NEXT: slli a2, a2, 48 3229; RV64ZVE32F-NEXT: srli a2, a2, 46 3230; RV64ZVE32F-NEXT: add a2, a0, a2 3231; RV64ZVE32F-NEXT: lw a2, 0(a2) 3232; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 3233; RV64ZVE32F-NEXT: vmv.s.x v9, a2 3234; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 3235; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 3236; RV64ZVE32F-NEXT: .LBB40_4: # %else2 3237; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 3238; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 3239; RV64ZVE32F-NEXT: andi a2, a1, 4 3240; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 3241; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 3242; RV64ZVE32F-NEXT: bnez a2, .LBB40_14 3243; RV64ZVE32F-NEXT: # %bb.5: # %else5 3244; RV64ZVE32F-NEXT: andi a2, a1, 8 3245; RV64ZVE32F-NEXT: bnez a2, .LBB40_15 3246; RV64ZVE32F-NEXT: .LBB40_6: # %else8 3247; RV64ZVE32F-NEXT: andi a2, a1, 16 3248; RV64ZVE32F-NEXT: bnez a2, .LBB40_16 3249; RV64ZVE32F-NEXT: .LBB40_7: # %else11 3250; RV64ZVE32F-NEXT: andi a2, a1, 32 3251; RV64ZVE32F-NEXT: beqz a2, .LBB40_9 3252; RV64ZVE32F-NEXT: .LBB40_8: # %cond.load13 3253; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 3254; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 3255; RV64ZVE32F-NEXT: vmv.x.s a2, v8 3256; RV64ZVE32F-NEXT: slli a2, a2, 48 3257; RV64ZVE32F-NEXT: srli a2, a2, 46 3258; RV64ZVE32F-NEXT: add a2, a0, a2 3259; RV64ZVE32F-NEXT: lw a2, 0(a2) 3260; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 3261; RV64ZVE32F-NEXT: vmv.s.x v12, a2 3262; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma 3263; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 3264; RV64ZVE32F-NEXT: .LBB40_9: # %else14 3265; RV64ZVE32F-NEXT: andi a2, a1, 64 3266; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 3267; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 3268; RV64ZVE32F-NEXT: beqz a2, .LBB40_11 3269; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 3270; RV64ZVE32F-NEXT: vmv.x.s a2, v8 3271; RV64ZVE32F-NEXT: slli a2, a2, 48 3272; RV64ZVE32F-NEXT: srli a2, a2, 46 3273; RV64ZVE32F-NEXT: add a2, a0, a2 3274; RV64ZVE32F-NEXT: lw a2, 0(a2) 3275; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 3276; RV64ZVE32F-NEXT: vmv.s.x v12, a2 3277; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma 3278; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 3279; RV64ZVE32F-NEXT: .LBB40_11: # %else17 3280; RV64ZVE32F-NEXT: andi a1, a1, -128 3281; RV64ZVE32F-NEXT: beqz a1, .LBB40_13 3282; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 3283; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 3284; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 3285; RV64ZVE32F-NEXT: vmv.x.s a1, v8 3286; RV64ZVE32F-NEXT: slli a1, a1, 48 3287; RV64ZVE32F-NEXT: srli a1, a1, 46 3288; RV64ZVE32F-NEXT: add a0, a0, a1 3289; RV64ZVE32F-NEXT: lw a0, 0(a0) 3290; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 3291; RV64ZVE32F-NEXT: vmv.s.x v8, a0 3292; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3293; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 3294; RV64ZVE32F-NEXT: .LBB40_13: # %else20 3295; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3296; RV64ZVE32F-NEXT: vmv2r.v v8, v10 3297; RV64ZVE32F-NEXT: ret 3298; RV64ZVE32F-NEXT: .LBB40_14: # %cond.load4 3299; RV64ZVE32F-NEXT: vmv.x.s a2, v8 3300; RV64ZVE32F-NEXT: slli a2, a2, 48 3301; RV64ZVE32F-NEXT: srli a2, a2, 46 3302; RV64ZVE32F-NEXT: add a2, a0, a2 3303; RV64ZVE32F-NEXT: lw a2, 0(a2) 3304; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 3305; RV64ZVE32F-NEXT: vmv.s.x v12, a2 3306; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 3307; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 3308; RV64ZVE32F-NEXT: andi a2, a1, 8 3309; RV64ZVE32F-NEXT: beqz a2, .LBB40_6 3310; RV64ZVE32F-NEXT: .LBB40_15: # %cond.load7 3311; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 3312; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 3313; RV64ZVE32F-NEXT: vmv.x.s a2, v8 3314; RV64ZVE32F-NEXT: slli a2, a2, 48 3315; RV64ZVE32F-NEXT: srli a2, a2, 46 3316; RV64ZVE32F-NEXT: add a2, a0, a2 3317; RV64ZVE32F-NEXT: lw a2, 0(a2) 3318; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 3319; RV64ZVE32F-NEXT: vmv.s.x v8, a2 3320; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma 3321; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 3322; RV64ZVE32F-NEXT: andi a2, a1, 16 3323; RV64ZVE32F-NEXT: beqz a2, .LBB40_7 3324; RV64ZVE32F-NEXT: .LBB40_16: # %cond.load10 3325; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 3326; RV64ZVE32F-NEXT: vmv.x.s a2, v9 3327; RV64ZVE32F-NEXT: slli a2, a2, 48 3328; RV64ZVE32F-NEXT: srli a2, a2, 46 3329; RV64ZVE32F-NEXT: add a2, a0, a2 3330; RV64ZVE32F-NEXT: lw a2, 0(a2) 3331; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3332; RV64ZVE32F-NEXT: vmv.s.x v12, a2 3333; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma 3334; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 3335; RV64ZVE32F-NEXT: andi a2, a1, 32 3336; RV64ZVE32F-NEXT: bnez a2, .LBB40_8 3337; RV64ZVE32F-NEXT: j .LBB40_9 3338 %eidxs = zext <8 x i16> %idxs to <8 x i32> 3339 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs 3340 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru) 3341 ret <8 x i32> %v 3342} 3343 3344define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i32> %passthru) { 3345; RV32-LABEL: mgather_baseidx_v8i32: 3346; RV32: # %bb.0: 3347; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu 3348; RV32-NEXT: vsll.vi v8, v8, 2 3349; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t 3350; RV32-NEXT: vmv.v.v v8, v10 3351; RV32-NEXT: ret 3352; 3353; RV64V-LABEL: mgather_baseidx_v8i32: 3354; RV64V: # %bb.0: 3355; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 3356; RV64V-NEXT: vsext.vf2 v12, v8 3357; RV64V-NEXT: vsll.vi v12, v12, 2 3358; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu 3359; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t 3360; RV64V-NEXT: vmv.v.v v8, v10 3361; RV64V-NEXT: ret 3362; 3363; RV64ZVE32F-LABEL: mgather_baseidx_v8i32: 3364; RV64ZVE32F: # %bb.0: 3365; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3366; RV64ZVE32F-NEXT: vmv.x.s a1, v0 3367; RV64ZVE32F-NEXT: andi a2, a1, 1 3368; RV64ZVE32F-NEXT: beqz a2, .LBB41_2 3369; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 3370; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 3371; RV64ZVE32F-NEXT: vmv.x.s a2, v8 3372; RV64ZVE32F-NEXT: slli a2, a2, 2 3373; RV64ZVE32F-NEXT: add a2, a0, a2 3374; RV64ZVE32F-NEXT: lw a2, 0(a2) 3375; RV64ZVE32F-NEXT: vmv.s.x v10, a2 3376; RV64ZVE32F-NEXT: .LBB41_2: # %else 3377; RV64ZVE32F-NEXT: andi a2, a1, 2 3378; RV64ZVE32F-NEXT: beqz a2, .LBB41_4 3379; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 3380; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 3381; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 3382; RV64ZVE32F-NEXT: vmv.x.s a2, v12 3383; RV64ZVE32F-NEXT: slli a2, a2, 2 3384; RV64ZVE32F-NEXT: add a2, a0, a2 3385; RV64ZVE32F-NEXT: lw a2, 0(a2) 3386; RV64ZVE32F-NEXT: vmv.s.x v12, a2 3387; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 3388; RV64ZVE32F-NEXT: .LBB41_4: # %else2 3389; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma 3390; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 3391; RV64ZVE32F-NEXT: andi a2, a1, 4 3392; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 3393; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 3394; RV64ZVE32F-NEXT: bnez a2, .LBB41_14 3395; RV64ZVE32F-NEXT: # %bb.5: # %else5 3396; RV64ZVE32F-NEXT: andi a2, a1, 8 3397; RV64ZVE32F-NEXT: bnez a2, .LBB41_15 3398; RV64ZVE32F-NEXT: .LBB41_6: # %else8 3399; RV64ZVE32F-NEXT: andi a2, a1, 16 3400; RV64ZVE32F-NEXT: bnez a2, .LBB41_16 3401; RV64ZVE32F-NEXT: .LBB41_7: # %else11 3402; RV64ZVE32F-NEXT: andi a2, a1, 32 3403; RV64ZVE32F-NEXT: beqz a2, .LBB41_9 3404; RV64ZVE32F-NEXT: .LBB41_8: # %cond.load13 3405; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3406; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 1 3407; RV64ZVE32F-NEXT: vmv.x.s a2, v8 3408; RV64ZVE32F-NEXT: slli a2, a2, 2 3409; RV64ZVE32F-NEXT: add a2, a0, a2 3410; RV64ZVE32F-NEXT: lw a2, 0(a2) 3411; RV64ZVE32F-NEXT: vmv.s.x v8, a2 3412; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma 3413; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 3414; RV64ZVE32F-NEXT: .LBB41_9: # %else14 3415; RV64ZVE32F-NEXT: andi a2, a1, 64 3416; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 3417; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2 3418; RV64ZVE32F-NEXT: beqz a2, .LBB41_11 3419; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 3420; RV64ZVE32F-NEXT: vmv.x.s a2, v8 3421; RV64ZVE32F-NEXT: slli a2, a2, 2 3422; RV64ZVE32F-NEXT: add a2, a0, a2 3423; RV64ZVE32F-NEXT: lw a2, 0(a2) 3424; RV64ZVE32F-NEXT: vmv.s.x v12, a2 3425; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma 3426; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 3427; RV64ZVE32F-NEXT: .LBB41_11: # %else17 3428; RV64ZVE32F-NEXT: andi a1, a1, -128 3429; RV64ZVE32F-NEXT: beqz a1, .LBB41_13 3430; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 3431; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3432; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 3433; RV64ZVE32F-NEXT: vmv.x.s a1, v8 3434; RV64ZVE32F-NEXT: slli a1, a1, 2 3435; RV64ZVE32F-NEXT: add a0, a0, a1 3436; RV64ZVE32F-NEXT: lw a0, 0(a0) 3437; RV64ZVE32F-NEXT: vmv.s.x v8, a0 3438; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3439; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 3440; RV64ZVE32F-NEXT: .LBB41_13: # %else20 3441; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3442; RV64ZVE32F-NEXT: vmv2r.v v8, v10 3443; RV64ZVE32F-NEXT: ret 3444; RV64ZVE32F-NEXT: .LBB41_14: # %cond.load4 3445; RV64ZVE32F-NEXT: vmv.x.s a2, v8 3446; RV64ZVE32F-NEXT: slli a2, a2, 2 3447; RV64ZVE32F-NEXT: add a2, a0, a2 3448; RV64ZVE32F-NEXT: lw a2, 0(a2) 3449; RV64ZVE32F-NEXT: vmv.s.x v9, a2 3450; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 3451; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2 3452; RV64ZVE32F-NEXT: andi a2, a1, 8 3453; RV64ZVE32F-NEXT: beqz a2, .LBB41_6 3454; RV64ZVE32F-NEXT: .LBB41_15: # %cond.load7 3455; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma 3456; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 3457; RV64ZVE32F-NEXT: vmv.x.s a2, v8 3458; RV64ZVE32F-NEXT: slli a2, a2, 2 3459; RV64ZVE32F-NEXT: add a2, a0, a2 3460; RV64ZVE32F-NEXT: lw a2, 0(a2) 3461; RV64ZVE32F-NEXT: vmv.s.x v8, a2 3462; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 3463; RV64ZVE32F-NEXT: andi a2, a1, 16 3464; RV64ZVE32F-NEXT: beqz a2, .LBB41_7 3465; RV64ZVE32F-NEXT: .LBB41_16: # %cond.load10 3466; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma 3467; RV64ZVE32F-NEXT: vmv.x.s a2, v12 3468; RV64ZVE32F-NEXT: slli a2, a2, 2 3469; RV64ZVE32F-NEXT: add a2, a0, a2 3470; RV64ZVE32F-NEXT: lw a2, 0(a2) 3471; RV64ZVE32F-NEXT: vmv.s.x v8, a2 3472; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 3473; RV64ZVE32F-NEXT: andi a2, a1, 32 3474; RV64ZVE32F-NEXT: bnez a2, .LBB41_8 3475; RV64ZVE32F-NEXT: j .LBB41_9 3476 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs 3477 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru) 3478 ret <8 x i32> %v 3479} 3480 3481declare <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i64>) 3482 3483define <1 x i64> @mgather_v1i64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i64> %passthru) { 3484; RV32V-LABEL: mgather_v1i64: 3485; RV32V: # %bb.0: 3486; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, mu 3487; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 3488; RV32V-NEXT: vmv.v.v v8, v9 3489; RV32V-NEXT: ret 3490; 3491; RV64V-LABEL: mgather_v1i64: 3492; RV64V: # %bb.0: 3493; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, mu 3494; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 3495; RV64V-NEXT: vmv.v.v v8, v9 3496; RV64V-NEXT: ret 3497; 3498; RV32ZVE32F-LABEL: mgather_v1i64: 3499; RV32ZVE32F: # %bb.0: 3500; RV32ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma 3501; RV32ZVE32F-NEXT: vfirst.m a2, v0 3502; RV32ZVE32F-NEXT: bnez a2, .LBB42_2 3503; RV32ZVE32F-NEXT: # %bb.1: # %cond.load 3504; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 3505; RV32ZVE32F-NEXT: vmv.x.s a1, v8 3506; RV32ZVE32F-NEXT: lw a0, 0(a1) 3507; RV32ZVE32F-NEXT: lw a1, 4(a1) 3508; RV32ZVE32F-NEXT: .LBB42_2: # %else 3509; RV32ZVE32F-NEXT: ret 3510; 3511; RV64ZVE32F-LABEL: mgather_v1i64: 3512; RV64ZVE32F: # %bb.0: 3513; RV64ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma 3514; RV64ZVE32F-NEXT: vfirst.m a2, v0 3515; RV64ZVE32F-NEXT: bnez a2, .LBB42_2 3516; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 3517; RV64ZVE32F-NEXT: ld a1, 0(a0) 3518; RV64ZVE32F-NEXT: .LBB42_2: # %else 3519; RV64ZVE32F-NEXT: mv a0, a1 3520; RV64ZVE32F-NEXT: ret 3521 %v = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> %ptrs, i32 8, <1 x i1> %m, <1 x i64> %passthru) 3522 ret <1 x i64> %v 3523} 3524 3525declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>) 3526 3527define <2 x i64> @mgather_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> %passthru) { 3528; RV32V-LABEL: mgather_v2i64: 3529; RV32V: # %bb.0: 3530; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, mu 3531; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 3532; RV32V-NEXT: vmv.v.v v8, v9 3533; RV32V-NEXT: ret 3534; 3535; RV64V-LABEL: mgather_v2i64: 3536; RV64V: # %bb.0: 3537; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, mu 3538; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 3539; RV64V-NEXT: vmv.v.v v8, v9 3540; RV64V-NEXT: ret 3541; 3542; RV32ZVE32F-LABEL: mgather_v2i64: 3543; RV32ZVE32F: # %bb.0: 3544; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3545; RV32ZVE32F-NEXT: vmv.x.s a4, v0 3546; RV32ZVE32F-NEXT: andi a2, a4, 1 3547; RV32ZVE32F-NEXT: beqz a2, .LBB43_3 3548; RV32ZVE32F-NEXT: # %bb.1: # %cond.load 3549; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 3550; RV32ZVE32F-NEXT: vmv.x.s a3, v8 3551; RV32ZVE32F-NEXT: lw a2, 0(a3) 3552; RV32ZVE32F-NEXT: lw a3, 4(a3) 3553; RV32ZVE32F-NEXT: andi a4, a4, 2 3554; RV32ZVE32F-NEXT: bnez a4, .LBB43_4 3555; RV32ZVE32F-NEXT: .LBB43_2: 3556; RV32ZVE32F-NEXT: lw a4, 8(a1) 3557; RV32ZVE32F-NEXT: lw a1, 12(a1) 3558; RV32ZVE32F-NEXT: j .LBB43_5 3559; RV32ZVE32F-NEXT: .LBB43_3: 3560; RV32ZVE32F-NEXT: lw a2, 0(a1) 3561; RV32ZVE32F-NEXT: lw a3, 4(a1) 3562; RV32ZVE32F-NEXT: andi a4, a4, 2 3563; RV32ZVE32F-NEXT: beqz a4, .LBB43_2 3564; RV32ZVE32F-NEXT: .LBB43_4: # %cond.load1 3565; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3566; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1 3567; RV32ZVE32F-NEXT: vmv.x.s a1, v8 3568; RV32ZVE32F-NEXT: lw a4, 0(a1) 3569; RV32ZVE32F-NEXT: lw a1, 4(a1) 3570; RV32ZVE32F-NEXT: .LBB43_5: # %else2 3571; RV32ZVE32F-NEXT: sw a2, 0(a0) 3572; RV32ZVE32F-NEXT: sw a3, 4(a0) 3573; RV32ZVE32F-NEXT: sw a4, 8(a0) 3574; RV32ZVE32F-NEXT: sw a1, 12(a0) 3575; RV32ZVE32F-NEXT: ret 3576; 3577; RV64ZVE32F-LABEL: mgather_v2i64: 3578; RV64ZVE32F: # %bb.0: 3579; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3580; RV64ZVE32F-NEXT: vmv.x.s a4, v0 3581; RV64ZVE32F-NEXT: andi a5, a4, 1 3582; RV64ZVE32F-NEXT: beqz a5, .LBB43_2 3583; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 3584; RV64ZVE32F-NEXT: ld a2, 0(a0) 3585; RV64ZVE32F-NEXT: .LBB43_2: # %else 3586; RV64ZVE32F-NEXT: andi a4, a4, 2 3587; RV64ZVE32F-NEXT: beqz a4, .LBB43_4 3588; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 3589; RV64ZVE32F-NEXT: ld a3, 0(a1) 3590; RV64ZVE32F-NEXT: .LBB43_4: # %else2 3591; RV64ZVE32F-NEXT: mv a0, a2 3592; RV64ZVE32F-NEXT: mv a1, a3 3593; RV64ZVE32F-NEXT: ret 3594 %v = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> %ptrs, i32 8, <2 x i1> %m, <2 x i64> %passthru) 3595 ret <2 x i64> %v 3596} 3597 3598declare <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i64>) 3599 3600define <4 x i64> @mgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i64> %passthru) { 3601; RV32V-LABEL: mgather_v4i64: 3602; RV32V: # %bb.0: 3603; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, mu 3604; RV32V-NEXT: vluxei32.v v10, (zero), v8, v0.t 3605; RV32V-NEXT: vmv.v.v v8, v10 3606; RV32V-NEXT: ret 3607; 3608; RV64V-LABEL: mgather_v4i64: 3609; RV64V: # %bb.0: 3610; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, mu 3611; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t 3612; RV64V-NEXT: vmv.v.v v8, v10 3613; RV64V-NEXT: ret 3614; 3615; RV32ZVE32F-LABEL: mgather_v4i64: 3616; RV32ZVE32F: # %bb.0: 3617; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3618; RV32ZVE32F-NEXT: vmv.x.s a6, v0 3619; RV32ZVE32F-NEXT: andi a2, a6, 1 3620; RV32ZVE32F-NEXT: beqz a2, .LBB44_5 3621; RV32ZVE32F-NEXT: # %bb.1: # %cond.load 3622; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 3623; RV32ZVE32F-NEXT: vmv.x.s a3, v8 3624; RV32ZVE32F-NEXT: lw a2, 0(a3) 3625; RV32ZVE32F-NEXT: lw a3, 4(a3) 3626; RV32ZVE32F-NEXT: andi a4, a6, 2 3627; RV32ZVE32F-NEXT: bnez a4, .LBB44_6 3628; RV32ZVE32F-NEXT: .LBB44_2: 3629; RV32ZVE32F-NEXT: lw a4, 8(a1) 3630; RV32ZVE32F-NEXT: lw a5, 12(a1) 3631; RV32ZVE32F-NEXT: andi a7, a6, 4 3632; RV32ZVE32F-NEXT: bnez a7, .LBB44_7 3633; RV32ZVE32F-NEXT: .LBB44_3: 3634; RV32ZVE32F-NEXT: lw a7, 16(a1) 3635; RV32ZVE32F-NEXT: lw t0, 20(a1) 3636; RV32ZVE32F-NEXT: andi a6, a6, 8 3637; RV32ZVE32F-NEXT: bnez a6, .LBB44_8 3638; RV32ZVE32F-NEXT: .LBB44_4: 3639; RV32ZVE32F-NEXT: lw a6, 24(a1) 3640; RV32ZVE32F-NEXT: lw a1, 28(a1) 3641; RV32ZVE32F-NEXT: j .LBB44_9 3642; RV32ZVE32F-NEXT: .LBB44_5: 3643; RV32ZVE32F-NEXT: lw a2, 0(a1) 3644; RV32ZVE32F-NEXT: lw a3, 4(a1) 3645; RV32ZVE32F-NEXT: andi a4, a6, 2 3646; RV32ZVE32F-NEXT: beqz a4, .LBB44_2 3647; RV32ZVE32F-NEXT: .LBB44_6: # %cond.load1 3648; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3649; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 3650; RV32ZVE32F-NEXT: vmv.x.s a5, v9 3651; RV32ZVE32F-NEXT: lw a4, 0(a5) 3652; RV32ZVE32F-NEXT: lw a5, 4(a5) 3653; RV32ZVE32F-NEXT: andi a7, a6, 4 3654; RV32ZVE32F-NEXT: beqz a7, .LBB44_3 3655; RV32ZVE32F-NEXT: .LBB44_7: # %cond.load4 3656; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3657; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 3658; RV32ZVE32F-NEXT: vmv.x.s t0, v9 3659; RV32ZVE32F-NEXT: lw a7, 0(t0) 3660; RV32ZVE32F-NEXT: lw t0, 4(t0) 3661; RV32ZVE32F-NEXT: andi a6, a6, 8 3662; RV32ZVE32F-NEXT: beqz a6, .LBB44_4 3663; RV32ZVE32F-NEXT: .LBB44_8: # %cond.load7 3664; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3665; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 3666; RV32ZVE32F-NEXT: vmv.x.s a1, v8 3667; RV32ZVE32F-NEXT: lw a6, 0(a1) 3668; RV32ZVE32F-NEXT: lw a1, 4(a1) 3669; RV32ZVE32F-NEXT: .LBB44_9: # %else8 3670; RV32ZVE32F-NEXT: sw a2, 0(a0) 3671; RV32ZVE32F-NEXT: sw a3, 4(a0) 3672; RV32ZVE32F-NEXT: sw a4, 8(a0) 3673; RV32ZVE32F-NEXT: sw a5, 12(a0) 3674; RV32ZVE32F-NEXT: sw a7, 16(a0) 3675; RV32ZVE32F-NEXT: sw t0, 20(a0) 3676; RV32ZVE32F-NEXT: sw a6, 24(a0) 3677; RV32ZVE32F-NEXT: sw a1, 28(a0) 3678; RV32ZVE32F-NEXT: ret 3679; 3680; RV64ZVE32F-LABEL: mgather_v4i64: 3681; RV64ZVE32F: # %bb.0: 3682; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3683; RV64ZVE32F-NEXT: vmv.x.s a5, v0 3684; RV64ZVE32F-NEXT: andi a3, a5, 1 3685; RV64ZVE32F-NEXT: beqz a3, .LBB44_5 3686; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 3687; RV64ZVE32F-NEXT: ld a3, 0(a1) 3688; RV64ZVE32F-NEXT: ld a3, 0(a3) 3689; RV64ZVE32F-NEXT: andi a4, a5, 2 3690; RV64ZVE32F-NEXT: bnez a4, .LBB44_6 3691; RV64ZVE32F-NEXT: .LBB44_2: 3692; RV64ZVE32F-NEXT: ld a4, 8(a2) 3693; RV64ZVE32F-NEXT: andi a6, a5, 4 3694; RV64ZVE32F-NEXT: bnez a6, .LBB44_7 3695; RV64ZVE32F-NEXT: .LBB44_3: 3696; RV64ZVE32F-NEXT: ld a6, 16(a2) 3697; RV64ZVE32F-NEXT: andi a5, a5, 8 3698; RV64ZVE32F-NEXT: bnez a5, .LBB44_8 3699; RV64ZVE32F-NEXT: .LBB44_4: 3700; RV64ZVE32F-NEXT: ld a1, 24(a2) 3701; RV64ZVE32F-NEXT: j .LBB44_9 3702; RV64ZVE32F-NEXT: .LBB44_5: 3703; RV64ZVE32F-NEXT: ld a3, 0(a2) 3704; RV64ZVE32F-NEXT: andi a4, a5, 2 3705; RV64ZVE32F-NEXT: beqz a4, .LBB44_2 3706; RV64ZVE32F-NEXT: .LBB44_6: # %cond.load1 3707; RV64ZVE32F-NEXT: ld a4, 8(a1) 3708; RV64ZVE32F-NEXT: ld a4, 0(a4) 3709; RV64ZVE32F-NEXT: andi a6, a5, 4 3710; RV64ZVE32F-NEXT: beqz a6, .LBB44_3 3711; RV64ZVE32F-NEXT: .LBB44_7: # %cond.load4 3712; RV64ZVE32F-NEXT: ld a6, 16(a1) 3713; RV64ZVE32F-NEXT: ld a6, 0(a6) 3714; RV64ZVE32F-NEXT: andi a5, a5, 8 3715; RV64ZVE32F-NEXT: beqz a5, .LBB44_4 3716; RV64ZVE32F-NEXT: .LBB44_8: # %cond.load7 3717; RV64ZVE32F-NEXT: ld a1, 24(a1) 3718; RV64ZVE32F-NEXT: ld a1, 0(a1) 3719; RV64ZVE32F-NEXT: .LBB44_9: # %else8 3720; RV64ZVE32F-NEXT: sd a3, 0(a0) 3721; RV64ZVE32F-NEXT: sd a4, 8(a0) 3722; RV64ZVE32F-NEXT: sd a6, 16(a0) 3723; RV64ZVE32F-NEXT: sd a1, 24(a0) 3724; RV64ZVE32F-NEXT: ret 3725 %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %m, <4 x i64> %passthru) 3726 ret <4 x i64> %v 3727} 3728 3729define <4 x i64> @mgather_truemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) { 3730; RV32V-LABEL: mgather_truemask_v4i64: 3731; RV32V: # %bb.0: 3732; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma 3733; RV32V-NEXT: vluxei32.v v10, (zero), v8 3734; RV32V-NEXT: vmv.v.v v8, v10 3735; RV32V-NEXT: ret 3736; 3737; RV64V-LABEL: mgather_truemask_v4i64: 3738; RV64V: # %bb.0: 3739; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma 3740; RV64V-NEXT: vluxei64.v v8, (zero), v8 3741; RV64V-NEXT: ret 3742; 3743; RV32ZVE32F-LABEL: mgather_truemask_v4i64: 3744; RV32ZVE32F: # %bb.0: 3745; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3746; RV32ZVE32F-NEXT: vmv.x.s a1, v8 3747; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 3748; RV32ZVE32F-NEXT: vmv.x.s a2, v9 3749; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 3750; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 3751; RV32ZVE32F-NEXT: lw a3, 0(a1) 3752; RV32ZVE32F-NEXT: lw a1, 4(a1) 3753; RV32ZVE32F-NEXT: vmv.x.s a4, v9 3754; RV32ZVE32F-NEXT: vmv.x.s a5, v8 3755; RV32ZVE32F-NEXT: lw a6, 0(a2) 3756; RV32ZVE32F-NEXT: lw a2, 4(a2) 3757; RV32ZVE32F-NEXT: lw a7, 0(a4) 3758; RV32ZVE32F-NEXT: lw a4, 4(a4) 3759; RV32ZVE32F-NEXT: lw t0, 0(a5) 3760; RV32ZVE32F-NEXT: lw a5, 4(a5) 3761; RV32ZVE32F-NEXT: sw a7, 16(a0) 3762; RV32ZVE32F-NEXT: sw a4, 20(a0) 3763; RV32ZVE32F-NEXT: sw t0, 24(a0) 3764; RV32ZVE32F-NEXT: sw a5, 28(a0) 3765; RV32ZVE32F-NEXT: sw a3, 0(a0) 3766; RV32ZVE32F-NEXT: sw a1, 4(a0) 3767; RV32ZVE32F-NEXT: sw a6, 8(a0) 3768; RV32ZVE32F-NEXT: sw a2, 12(a0) 3769; RV32ZVE32F-NEXT: ret 3770; 3771; RV64ZVE32F-LABEL: mgather_truemask_v4i64: 3772; RV64ZVE32F: # %bb.0: 3773; RV64ZVE32F-NEXT: ld a2, 0(a1) 3774; RV64ZVE32F-NEXT: ld a3, 8(a1) 3775; RV64ZVE32F-NEXT: ld a4, 16(a1) 3776; RV64ZVE32F-NEXT: ld a1, 24(a1) 3777; RV64ZVE32F-NEXT: ld a2, 0(a2) 3778; RV64ZVE32F-NEXT: ld a3, 0(a3) 3779; RV64ZVE32F-NEXT: ld a4, 0(a4) 3780; RV64ZVE32F-NEXT: ld a1, 0(a1) 3781; RV64ZVE32F-NEXT: sd a2, 0(a0) 3782; RV64ZVE32F-NEXT: sd a3, 8(a0) 3783; RV64ZVE32F-NEXT: sd a4, 16(a0) 3784; RV64ZVE32F-NEXT: sd a1, 24(a0) 3785; RV64ZVE32F-NEXT: ret 3786 %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1), <4 x i64> %passthru) 3787 ret <4 x i64> %v 3788} 3789 3790define <4 x i64> @mgather_falsemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) { 3791; RV32V-LABEL: mgather_falsemask_v4i64: 3792; RV32V: # %bb.0: 3793; RV32V-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3794; RV32V-NEXT: vmv2r.v v8, v10 3795; RV32V-NEXT: ret 3796; 3797; RV64V-LABEL: mgather_falsemask_v4i64: 3798; RV64V: # %bb.0: 3799; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3800; RV64V-NEXT: vmv2r.v v8, v10 3801; RV64V-NEXT: ret 3802; 3803; RV32ZVE32F-LABEL: mgather_falsemask_v4i64: 3804; RV32ZVE32F: # %bb.0: 3805; RV32ZVE32F-NEXT: lw a2, 0(a1) 3806; RV32ZVE32F-NEXT: lw a3, 4(a1) 3807; RV32ZVE32F-NEXT: lw a4, 8(a1) 3808; RV32ZVE32F-NEXT: lw a5, 12(a1) 3809; RV32ZVE32F-NEXT: lw a6, 16(a1) 3810; RV32ZVE32F-NEXT: lw a7, 20(a1) 3811; RV32ZVE32F-NEXT: lw t0, 24(a1) 3812; RV32ZVE32F-NEXT: lw a1, 28(a1) 3813; RV32ZVE32F-NEXT: sw a6, 16(a0) 3814; RV32ZVE32F-NEXT: sw a7, 20(a0) 3815; RV32ZVE32F-NEXT: sw t0, 24(a0) 3816; RV32ZVE32F-NEXT: sw a1, 28(a0) 3817; RV32ZVE32F-NEXT: sw a2, 0(a0) 3818; RV32ZVE32F-NEXT: sw a3, 4(a0) 3819; RV32ZVE32F-NEXT: sw a4, 8(a0) 3820; RV32ZVE32F-NEXT: sw a5, 12(a0) 3821; RV32ZVE32F-NEXT: ret 3822; 3823; RV64ZVE32F-LABEL: mgather_falsemask_v4i64: 3824; RV64ZVE32F: # %bb.0: 3825; RV64ZVE32F-NEXT: ld a1, 0(a2) 3826; RV64ZVE32F-NEXT: ld a3, 8(a2) 3827; RV64ZVE32F-NEXT: ld a4, 16(a2) 3828; RV64ZVE32F-NEXT: ld a2, 24(a2) 3829; RV64ZVE32F-NEXT: sd a1, 0(a0) 3830; RV64ZVE32F-NEXT: sd a3, 8(a0) 3831; RV64ZVE32F-NEXT: sd a4, 16(a0) 3832; RV64ZVE32F-NEXT: sd a2, 24(a0) 3833; RV64ZVE32F-NEXT: ret 3834 %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer, <4 x i64> %passthru) 3835 ret <4 x i64> %v 3836} 3837 3838declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i64>) 3839 3840define <8 x i64> @mgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i64> %passthru) { 3841; RV32V-LABEL: mgather_v8i64: 3842; RV32V: # %bb.0: 3843; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 3844; RV32V-NEXT: vluxei32.v v12, (zero), v8, v0.t 3845; RV32V-NEXT: vmv.v.v v8, v12 3846; RV32V-NEXT: ret 3847; 3848; RV64V-LABEL: mgather_v8i64: 3849; RV64V: # %bb.0: 3850; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 3851; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t 3852; RV64V-NEXT: vmv.v.v v8, v12 3853; RV64V-NEXT: ret 3854; 3855; RV32ZVE32F-LABEL: mgather_v8i64: 3856; RV32ZVE32F: # %bb.0: 3857; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3858; RV32ZVE32F-NEXT: vmv.x.s t0, v0 3859; RV32ZVE32F-NEXT: andi a2, t0, 1 3860; RV32ZVE32F-NEXT: beqz a2, .LBB47_7 3861; RV32ZVE32F-NEXT: # %bb.1: # %cond.load 3862; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 3863; RV32ZVE32F-NEXT: vmv.x.s a3, v8 3864; RV32ZVE32F-NEXT: lw a2, 0(a3) 3865; RV32ZVE32F-NEXT: lw a3, 4(a3) 3866; RV32ZVE32F-NEXT: andi a4, t0, 2 3867; RV32ZVE32F-NEXT: bnez a4, .LBB47_8 3868; RV32ZVE32F-NEXT: .LBB47_2: 3869; RV32ZVE32F-NEXT: lw a4, 8(a1) 3870; RV32ZVE32F-NEXT: lw a5, 12(a1) 3871; RV32ZVE32F-NEXT: andi a6, t0, 4 3872; RV32ZVE32F-NEXT: bnez a6, .LBB47_9 3873; RV32ZVE32F-NEXT: .LBB47_3: 3874; RV32ZVE32F-NEXT: lw a6, 16(a1) 3875; RV32ZVE32F-NEXT: lw a7, 20(a1) 3876; RV32ZVE32F-NEXT: andi t1, t0, 8 3877; RV32ZVE32F-NEXT: bnez t1, .LBB47_10 3878; RV32ZVE32F-NEXT: .LBB47_4: 3879; RV32ZVE32F-NEXT: lw t1, 24(a1) 3880; RV32ZVE32F-NEXT: lw t2, 28(a1) 3881; RV32ZVE32F-NEXT: andi t3, t0, 16 3882; RV32ZVE32F-NEXT: bnez t3, .LBB47_11 3883; RV32ZVE32F-NEXT: .LBB47_5: 3884; RV32ZVE32F-NEXT: lw t3, 32(a1) 3885; RV32ZVE32F-NEXT: lw t4, 36(a1) 3886; RV32ZVE32F-NEXT: andi t5, t0, 32 3887; RV32ZVE32F-NEXT: bnez t5, .LBB47_12 3888; RV32ZVE32F-NEXT: .LBB47_6: 3889; RV32ZVE32F-NEXT: lw t5, 40(a1) 3890; RV32ZVE32F-NEXT: lw t6, 44(a1) 3891; RV32ZVE32F-NEXT: j .LBB47_13 3892; RV32ZVE32F-NEXT: .LBB47_7: 3893; RV32ZVE32F-NEXT: lw a2, 0(a1) 3894; RV32ZVE32F-NEXT: lw a3, 4(a1) 3895; RV32ZVE32F-NEXT: andi a4, t0, 2 3896; RV32ZVE32F-NEXT: beqz a4, .LBB47_2 3897; RV32ZVE32F-NEXT: .LBB47_8: # %cond.load1 3898; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3899; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 3900; RV32ZVE32F-NEXT: vmv.x.s a5, v10 3901; RV32ZVE32F-NEXT: lw a4, 0(a5) 3902; RV32ZVE32F-NEXT: lw a5, 4(a5) 3903; RV32ZVE32F-NEXT: andi a6, t0, 4 3904; RV32ZVE32F-NEXT: beqz a6, .LBB47_3 3905; RV32ZVE32F-NEXT: .LBB47_9: # %cond.load4 3906; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3907; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 3908; RV32ZVE32F-NEXT: vmv.x.s a7, v10 3909; RV32ZVE32F-NEXT: lw a6, 0(a7) 3910; RV32ZVE32F-NEXT: lw a7, 4(a7) 3911; RV32ZVE32F-NEXT: andi t1, t0, 8 3912; RV32ZVE32F-NEXT: beqz t1, .LBB47_4 3913; RV32ZVE32F-NEXT: .LBB47_10: # %cond.load7 3914; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 3915; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 3916; RV32ZVE32F-NEXT: vmv.x.s t2, v10 3917; RV32ZVE32F-NEXT: lw t1, 0(t2) 3918; RV32ZVE32F-NEXT: lw t2, 4(t2) 3919; RV32ZVE32F-NEXT: andi t3, t0, 16 3920; RV32ZVE32F-NEXT: beqz t3, .LBB47_5 3921; RV32ZVE32F-NEXT: .LBB47_11: # %cond.load10 3922; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 3923; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 3924; RV32ZVE32F-NEXT: vmv.x.s t4, v10 3925; RV32ZVE32F-NEXT: lw t3, 0(t4) 3926; RV32ZVE32F-NEXT: lw t4, 4(t4) 3927; RV32ZVE32F-NEXT: andi t5, t0, 32 3928; RV32ZVE32F-NEXT: beqz t5, .LBB47_6 3929; RV32ZVE32F-NEXT: .LBB47_12: # %cond.load13 3930; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 3931; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 3932; RV32ZVE32F-NEXT: vmv.x.s t6, v10 3933; RV32ZVE32F-NEXT: lw t5, 0(t6) 3934; RV32ZVE32F-NEXT: lw t6, 4(t6) 3935; RV32ZVE32F-NEXT: .LBB47_13: # %else14 3936; RV32ZVE32F-NEXT: addi sp, sp, -16 3937; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 3938; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 3939; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 3940; RV32ZVE32F-NEXT: .cfi_offset s0, -4 3941; RV32ZVE32F-NEXT: .cfi_offset s1, -8 3942; RV32ZVE32F-NEXT: andi s0, t0, 64 3943; RV32ZVE32F-NEXT: beqz s0, .LBB47_16 3944; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 3945; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 3946; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 3947; RV32ZVE32F-NEXT: vmv.x.s s1, v10 3948; RV32ZVE32F-NEXT: lw s0, 0(s1) 3949; RV32ZVE32F-NEXT: lw s1, 4(s1) 3950; RV32ZVE32F-NEXT: andi t0, t0, -128 3951; RV32ZVE32F-NEXT: bnez t0, .LBB47_17 3952; RV32ZVE32F-NEXT: .LBB47_15: 3953; RV32ZVE32F-NEXT: lw t0, 56(a1) 3954; RV32ZVE32F-NEXT: lw a1, 60(a1) 3955; RV32ZVE32F-NEXT: j .LBB47_18 3956; RV32ZVE32F-NEXT: .LBB47_16: 3957; RV32ZVE32F-NEXT: lw s0, 48(a1) 3958; RV32ZVE32F-NEXT: lw s1, 52(a1) 3959; RV32ZVE32F-NEXT: andi t0, t0, -128 3960; RV32ZVE32F-NEXT: beqz t0, .LBB47_15 3961; RV32ZVE32F-NEXT: .LBB47_17: # %cond.load19 3962; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 3963; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 3964; RV32ZVE32F-NEXT: vmv.x.s a1, v8 3965; RV32ZVE32F-NEXT: lw t0, 0(a1) 3966; RV32ZVE32F-NEXT: lw a1, 4(a1) 3967; RV32ZVE32F-NEXT: .LBB47_18: # %else20 3968; RV32ZVE32F-NEXT: sw a2, 0(a0) 3969; RV32ZVE32F-NEXT: sw a3, 4(a0) 3970; RV32ZVE32F-NEXT: sw a4, 8(a0) 3971; RV32ZVE32F-NEXT: sw a5, 12(a0) 3972; RV32ZVE32F-NEXT: sw a6, 16(a0) 3973; RV32ZVE32F-NEXT: sw a7, 20(a0) 3974; RV32ZVE32F-NEXT: sw t1, 24(a0) 3975; RV32ZVE32F-NEXT: sw t2, 28(a0) 3976; RV32ZVE32F-NEXT: sw t3, 32(a0) 3977; RV32ZVE32F-NEXT: sw t4, 36(a0) 3978; RV32ZVE32F-NEXT: sw t5, 40(a0) 3979; RV32ZVE32F-NEXT: sw t6, 44(a0) 3980; RV32ZVE32F-NEXT: sw s0, 48(a0) 3981; RV32ZVE32F-NEXT: sw s1, 52(a0) 3982; RV32ZVE32F-NEXT: sw t0, 56(a0) 3983; RV32ZVE32F-NEXT: sw a1, 60(a0) 3984; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 3985; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 3986; RV32ZVE32F-NEXT: .cfi_restore s0 3987; RV32ZVE32F-NEXT: .cfi_restore s1 3988; RV32ZVE32F-NEXT: addi sp, sp, 16 3989; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 3990; RV32ZVE32F-NEXT: ret 3991; 3992; RV64ZVE32F-LABEL: mgather_v8i64: 3993; RV64ZVE32F: # %bb.0: 3994; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 3995; RV64ZVE32F-NEXT: vmv.x.s a6, v0 3996; RV64ZVE32F-NEXT: andi a3, a6, 1 3997; RV64ZVE32F-NEXT: beqz a3, .LBB47_9 3998; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 3999; RV64ZVE32F-NEXT: ld a3, 0(a1) 4000; RV64ZVE32F-NEXT: ld a3, 0(a3) 4001; RV64ZVE32F-NEXT: andi a4, a6, 2 4002; RV64ZVE32F-NEXT: bnez a4, .LBB47_10 4003; RV64ZVE32F-NEXT: .LBB47_2: 4004; RV64ZVE32F-NEXT: ld a4, 8(a2) 4005; RV64ZVE32F-NEXT: andi a5, a6, 4 4006; RV64ZVE32F-NEXT: bnez a5, .LBB47_11 4007; RV64ZVE32F-NEXT: .LBB47_3: 4008; RV64ZVE32F-NEXT: ld a5, 16(a2) 4009; RV64ZVE32F-NEXT: andi a7, a6, 8 4010; RV64ZVE32F-NEXT: bnez a7, .LBB47_12 4011; RV64ZVE32F-NEXT: .LBB47_4: 4012; RV64ZVE32F-NEXT: ld a7, 24(a2) 4013; RV64ZVE32F-NEXT: andi t0, a6, 16 4014; RV64ZVE32F-NEXT: bnez t0, .LBB47_13 4015; RV64ZVE32F-NEXT: .LBB47_5: 4016; RV64ZVE32F-NEXT: ld t0, 32(a2) 4017; RV64ZVE32F-NEXT: andi t1, a6, 32 4018; RV64ZVE32F-NEXT: bnez t1, .LBB47_14 4019; RV64ZVE32F-NEXT: .LBB47_6: 4020; RV64ZVE32F-NEXT: ld t1, 40(a2) 4021; RV64ZVE32F-NEXT: andi t2, a6, 64 4022; RV64ZVE32F-NEXT: bnez t2, .LBB47_15 4023; RV64ZVE32F-NEXT: .LBB47_7: 4024; RV64ZVE32F-NEXT: ld t2, 48(a2) 4025; RV64ZVE32F-NEXT: andi a6, a6, -128 4026; RV64ZVE32F-NEXT: bnez a6, .LBB47_16 4027; RV64ZVE32F-NEXT: .LBB47_8: 4028; RV64ZVE32F-NEXT: ld a1, 56(a2) 4029; RV64ZVE32F-NEXT: j .LBB47_17 4030; RV64ZVE32F-NEXT: .LBB47_9: 4031; RV64ZVE32F-NEXT: ld a3, 0(a2) 4032; RV64ZVE32F-NEXT: andi a4, a6, 2 4033; RV64ZVE32F-NEXT: beqz a4, .LBB47_2 4034; RV64ZVE32F-NEXT: .LBB47_10: # %cond.load1 4035; RV64ZVE32F-NEXT: ld a4, 8(a1) 4036; RV64ZVE32F-NEXT: ld a4, 0(a4) 4037; RV64ZVE32F-NEXT: andi a5, a6, 4 4038; RV64ZVE32F-NEXT: beqz a5, .LBB47_3 4039; RV64ZVE32F-NEXT: .LBB47_11: # %cond.load4 4040; RV64ZVE32F-NEXT: ld a5, 16(a1) 4041; RV64ZVE32F-NEXT: ld a5, 0(a5) 4042; RV64ZVE32F-NEXT: andi a7, a6, 8 4043; RV64ZVE32F-NEXT: beqz a7, .LBB47_4 4044; RV64ZVE32F-NEXT: .LBB47_12: # %cond.load7 4045; RV64ZVE32F-NEXT: ld a7, 24(a1) 4046; RV64ZVE32F-NEXT: ld a7, 0(a7) 4047; RV64ZVE32F-NEXT: andi t0, a6, 16 4048; RV64ZVE32F-NEXT: beqz t0, .LBB47_5 4049; RV64ZVE32F-NEXT: .LBB47_13: # %cond.load10 4050; RV64ZVE32F-NEXT: ld t0, 32(a1) 4051; RV64ZVE32F-NEXT: ld t0, 0(t0) 4052; RV64ZVE32F-NEXT: andi t1, a6, 32 4053; RV64ZVE32F-NEXT: beqz t1, .LBB47_6 4054; RV64ZVE32F-NEXT: .LBB47_14: # %cond.load13 4055; RV64ZVE32F-NEXT: ld t1, 40(a1) 4056; RV64ZVE32F-NEXT: ld t1, 0(t1) 4057; RV64ZVE32F-NEXT: andi t2, a6, 64 4058; RV64ZVE32F-NEXT: beqz t2, .LBB47_7 4059; RV64ZVE32F-NEXT: .LBB47_15: # %cond.load16 4060; RV64ZVE32F-NEXT: ld t2, 48(a1) 4061; RV64ZVE32F-NEXT: ld t2, 0(t2) 4062; RV64ZVE32F-NEXT: andi a6, a6, -128 4063; RV64ZVE32F-NEXT: beqz a6, .LBB47_8 4064; RV64ZVE32F-NEXT: .LBB47_16: # %cond.load19 4065; RV64ZVE32F-NEXT: ld a1, 56(a1) 4066; RV64ZVE32F-NEXT: ld a1, 0(a1) 4067; RV64ZVE32F-NEXT: .LBB47_17: # %else20 4068; RV64ZVE32F-NEXT: sd a3, 0(a0) 4069; RV64ZVE32F-NEXT: sd a4, 8(a0) 4070; RV64ZVE32F-NEXT: sd a5, 16(a0) 4071; RV64ZVE32F-NEXT: sd a7, 24(a0) 4072; RV64ZVE32F-NEXT: sd t0, 32(a0) 4073; RV64ZVE32F-NEXT: sd t1, 40(a0) 4074; RV64ZVE32F-NEXT: sd t2, 48(a0) 4075; RV64ZVE32F-NEXT: sd a1, 56(a0) 4076; RV64ZVE32F-NEXT: ret 4077 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru) 4078 ret <8 x i64> %v 4079} 4080 4081define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) { 4082; RV32V-LABEL: mgather_baseidx_v8i8_v8i64: 4083; RV32V: # %bb.0: 4084; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 4085; RV32V-NEXT: vsext.vf4 v10, v8 4086; RV32V-NEXT: vsll.vi v8, v10, 3 4087; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 4088; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 4089; RV32V-NEXT: vmv.v.v v8, v12 4090; RV32V-NEXT: ret 4091; 4092; RV64V-LABEL: mgather_baseidx_v8i8_v8i64: 4093; RV64V: # %bb.0: 4094; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 4095; RV64V-NEXT: vsext.vf8 v16, v8 4096; RV64V-NEXT: vsll.vi v8, v16, 3 4097; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t 4098; RV64V-NEXT: vmv.v.v v8, v12 4099; RV64V-NEXT: ret 4100; 4101; RV32ZVE32F-LABEL: mgather_baseidx_v8i8_v8i64: 4102; RV32ZVE32F: # %bb.0: 4103; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 4104; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 4105; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 4106; RV32ZVE32F-NEXT: vmv.x.s t0, v0 4107; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4108; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 4109; RV32ZVE32F-NEXT: andi a3, t0, 1 4110; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 4111; RV32ZVE32F-NEXT: beqz a3, .LBB48_7 4112; RV32ZVE32F-NEXT: # %bb.1: # %cond.load 4113; RV32ZVE32F-NEXT: vmv.x.s a3, v8 4114; RV32ZVE32F-NEXT: lw a1, 0(a3) 4115; RV32ZVE32F-NEXT: lw a3, 4(a3) 4116; RV32ZVE32F-NEXT: andi a4, t0, 2 4117; RV32ZVE32F-NEXT: bnez a4, .LBB48_8 4118; RV32ZVE32F-NEXT: .LBB48_2: 4119; RV32ZVE32F-NEXT: lw a4, 8(a2) 4120; RV32ZVE32F-NEXT: lw a5, 12(a2) 4121; RV32ZVE32F-NEXT: andi a6, t0, 4 4122; RV32ZVE32F-NEXT: bnez a6, .LBB48_9 4123; RV32ZVE32F-NEXT: .LBB48_3: 4124; RV32ZVE32F-NEXT: lw a6, 16(a2) 4125; RV32ZVE32F-NEXT: lw a7, 20(a2) 4126; RV32ZVE32F-NEXT: andi t1, t0, 8 4127; RV32ZVE32F-NEXT: bnez t1, .LBB48_10 4128; RV32ZVE32F-NEXT: .LBB48_4: 4129; RV32ZVE32F-NEXT: lw t1, 24(a2) 4130; RV32ZVE32F-NEXT: lw t2, 28(a2) 4131; RV32ZVE32F-NEXT: andi t3, t0, 16 4132; RV32ZVE32F-NEXT: bnez t3, .LBB48_11 4133; RV32ZVE32F-NEXT: .LBB48_5: 4134; RV32ZVE32F-NEXT: lw t3, 32(a2) 4135; RV32ZVE32F-NEXT: lw t4, 36(a2) 4136; RV32ZVE32F-NEXT: andi t5, t0, 32 4137; RV32ZVE32F-NEXT: bnez t5, .LBB48_12 4138; RV32ZVE32F-NEXT: .LBB48_6: 4139; RV32ZVE32F-NEXT: lw t5, 40(a2) 4140; RV32ZVE32F-NEXT: lw t6, 44(a2) 4141; RV32ZVE32F-NEXT: j .LBB48_13 4142; RV32ZVE32F-NEXT: .LBB48_7: 4143; RV32ZVE32F-NEXT: lw a1, 0(a2) 4144; RV32ZVE32F-NEXT: lw a3, 4(a2) 4145; RV32ZVE32F-NEXT: andi a4, t0, 2 4146; RV32ZVE32F-NEXT: beqz a4, .LBB48_2 4147; RV32ZVE32F-NEXT: .LBB48_8: # %cond.load1 4148; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4149; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 4150; RV32ZVE32F-NEXT: vmv.x.s a5, v10 4151; RV32ZVE32F-NEXT: lw a4, 0(a5) 4152; RV32ZVE32F-NEXT: lw a5, 4(a5) 4153; RV32ZVE32F-NEXT: andi a6, t0, 4 4154; RV32ZVE32F-NEXT: beqz a6, .LBB48_3 4155; RV32ZVE32F-NEXT: .LBB48_9: # %cond.load4 4156; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4157; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 4158; RV32ZVE32F-NEXT: vmv.x.s a7, v10 4159; RV32ZVE32F-NEXT: lw a6, 0(a7) 4160; RV32ZVE32F-NEXT: lw a7, 4(a7) 4161; RV32ZVE32F-NEXT: andi t1, t0, 8 4162; RV32ZVE32F-NEXT: beqz t1, .LBB48_4 4163; RV32ZVE32F-NEXT: .LBB48_10: # %cond.load7 4164; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4165; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 4166; RV32ZVE32F-NEXT: vmv.x.s t2, v10 4167; RV32ZVE32F-NEXT: lw t1, 0(t2) 4168; RV32ZVE32F-NEXT: lw t2, 4(t2) 4169; RV32ZVE32F-NEXT: andi t3, t0, 16 4170; RV32ZVE32F-NEXT: beqz t3, .LBB48_5 4171; RV32ZVE32F-NEXT: .LBB48_11: # %cond.load10 4172; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4173; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 4174; RV32ZVE32F-NEXT: vmv.x.s t4, v10 4175; RV32ZVE32F-NEXT: lw t3, 0(t4) 4176; RV32ZVE32F-NEXT: lw t4, 4(t4) 4177; RV32ZVE32F-NEXT: andi t5, t0, 32 4178; RV32ZVE32F-NEXT: beqz t5, .LBB48_6 4179; RV32ZVE32F-NEXT: .LBB48_12: # %cond.load13 4180; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4181; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 4182; RV32ZVE32F-NEXT: vmv.x.s t6, v10 4183; RV32ZVE32F-NEXT: lw t5, 0(t6) 4184; RV32ZVE32F-NEXT: lw t6, 4(t6) 4185; RV32ZVE32F-NEXT: .LBB48_13: # %else14 4186; RV32ZVE32F-NEXT: addi sp, sp, -16 4187; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 4188; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 4189; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 4190; RV32ZVE32F-NEXT: .cfi_offset s0, -4 4191; RV32ZVE32F-NEXT: .cfi_offset s1, -8 4192; RV32ZVE32F-NEXT: andi s0, t0, 64 4193; RV32ZVE32F-NEXT: beqz s0, .LBB48_16 4194; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 4195; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4196; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 4197; RV32ZVE32F-NEXT: vmv.x.s s1, v10 4198; RV32ZVE32F-NEXT: lw s0, 0(s1) 4199; RV32ZVE32F-NEXT: lw s1, 4(s1) 4200; RV32ZVE32F-NEXT: andi t0, t0, -128 4201; RV32ZVE32F-NEXT: bnez t0, .LBB48_17 4202; RV32ZVE32F-NEXT: .LBB48_15: 4203; RV32ZVE32F-NEXT: lw t0, 56(a2) 4204; RV32ZVE32F-NEXT: lw a2, 60(a2) 4205; RV32ZVE32F-NEXT: j .LBB48_18 4206; RV32ZVE32F-NEXT: .LBB48_16: 4207; RV32ZVE32F-NEXT: lw s0, 48(a2) 4208; RV32ZVE32F-NEXT: lw s1, 52(a2) 4209; RV32ZVE32F-NEXT: andi t0, t0, -128 4210; RV32ZVE32F-NEXT: beqz t0, .LBB48_15 4211; RV32ZVE32F-NEXT: .LBB48_17: # %cond.load19 4212; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4213; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 4214; RV32ZVE32F-NEXT: vmv.x.s a2, v8 4215; RV32ZVE32F-NEXT: lw t0, 0(a2) 4216; RV32ZVE32F-NEXT: lw a2, 4(a2) 4217; RV32ZVE32F-NEXT: .LBB48_18: # %else20 4218; RV32ZVE32F-NEXT: sw a1, 0(a0) 4219; RV32ZVE32F-NEXT: sw a3, 4(a0) 4220; RV32ZVE32F-NEXT: sw a4, 8(a0) 4221; RV32ZVE32F-NEXT: sw a5, 12(a0) 4222; RV32ZVE32F-NEXT: sw a6, 16(a0) 4223; RV32ZVE32F-NEXT: sw a7, 20(a0) 4224; RV32ZVE32F-NEXT: sw t1, 24(a0) 4225; RV32ZVE32F-NEXT: sw t2, 28(a0) 4226; RV32ZVE32F-NEXT: sw t3, 32(a0) 4227; RV32ZVE32F-NEXT: sw t4, 36(a0) 4228; RV32ZVE32F-NEXT: sw t5, 40(a0) 4229; RV32ZVE32F-NEXT: sw t6, 44(a0) 4230; RV32ZVE32F-NEXT: sw s0, 48(a0) 4231; RV32ZVE32F-NEXT: sw s1, 52(a0) 4232; RV32ZVE32F-NEXT: sw t0, 56(a0) 4233; RV32ZVE32F-NEXT: sw a2, 60(a0) 4234; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 4235; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 4236; RV32ZVE32F-NEXT: .cfi_restore s0 4237; RV32ZVE32F-NEXT: .cfi_restore s1 4238; RV32ZVE32F-NEXT: addi sp, sp, 16 4239; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 4240; RV32ZVE32F-NEXT: ret 4241; 4242; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i64: 4243; RV64ZVE32F: # %bb.0: 4244; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 4245; RV64ZVE32F-NEXT: vmv.x.s a5, v0 4246; RV64ZVE32F-NEXT: andi a3, a5, 1 4247; RV64ZVE32F-NEXT: beqz a3, .LBB48_3 4248; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 4249; RV64ZVE32F-NEXT: vmv.x.s a3, v8 4250; RV64ZVE32F-NEXT: slli a3, a3, 3 4251; RV64ZVE32F-NEXT: add a3, a1, a3 4252; RV64ZVE32F-NEXT: ld a3, 0(a3) 4253; RV64ZVE32F-NEXT: andi a4, a5, 2 4254; RV64ZVE32F-NEXT: bnez a4, .LBB48_4 4255; RV64ZVE32F-NEXT: .LBB48_2: 4256; RV64ZVE32F-NEXT: ld a4, 8(a2) 4257; RV64ZVE32F-NEXT: j .LBB48_5 4258; RV64ZVE32F-NEXT: .LBB48_3: 4259; RV64ZVE32F-NEXT: ld a3, 0(a2) 4260; RV64ZVE32F-NEXT: andi a4, a5, 2 4261; RV64ZVE32F-NEXT: beqz a4, .LBB48_2 4262; RV64ZVE32F-NEXT: .LBB48_4: # %cond.load1 4263; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 4264; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 4265; RV64ZVE32F-NEXT: vmv.x.s a4, v9 4266; RV64ZVE32F-NEXT: slli a4, a4, 3 4267; RV64ZVE32F-NEXT: add a4, a1, a4 4268; RV64ZVE32F-NEXT: ld a4, 0(a4) 4269; RV64ZVE32F-NEXT: .LBB48_5: # %else2 4270; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 4271; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 4272; RV64ZVE32F-NEXT: andi a6, a5, 4 4273; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 4274; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 4275; RV64ZVE32F-NEXT: beqz a6, .LBB48_10 4276; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 4277; RV64ZVE32F-NEXT: vmv.x.s a6, v8 4278; RV64ZVE32F-NEXT: slli a6, a6, 3 4279; RV64ZVE32F-NEXT: add a6, a1, a6 4280; RV64ZVE32F-NEXT: ld a6, 0(a6) 4281; RV64ZVE32F-NEXT: andi a7, a5, 8 4282; RV64ZVE32F-NEXT: bnez a7, .LBB48_11 4283; RV64ZVE32F-NEXT: .LBB48_7: 4284; RV64ZVE32F-NEXT: ld a7, 24(a2) 4285; RV64ZVE32F-NEXT: andi t0, a5, 16 4286; RV64ZVE32F-NEXT: bnez t0, .LBB48_12 4287; RV64ZVE32F-NEXT: .LBB48_8: 4288; RV64ZVE32F-NEXT: ld t0, 32(a2) 4289; RV64ZVE32F-NEXT: andi t1, a5, 32 4290; RV64ZVE32F-NEXT: bnez t1, .LBB48_13 4291; RV64ZVE32F-NEXT: .LBB48_9: 4292; RV64ZVE32F-NEXT: ld t1, 40(a2) 4293; RV64ZVE32F-NEXT: j .LBB48_14 4294; RV64ZVE32F-NEXT: .LBB48_10: 4295; RV64ZVE32F-NEXT: ld a6, 16(a2) 4296; RV64ZVE32F-NEXT: andi a7, a5, 8 4297; RV64ZVE32F-NEXT: beqz a7, .LBB48_7 4298; RV64ZVE32F-NEXT: .LBB48_11: # %cond.load7 4299; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 4300; RV64ZVE32F-NEXT: vmv.x.s a7, v8 4301; RV64ZVE32F-NEXT: slli a7, a7, 3 4302; RV64ZVE32F-NEXT: add a7, a1, a7 4303; RV64ZVE32F-NEXT: ld a7, 0(a7) 4304; RV64ZVE32F-NEXT: andi t0, a5, 16 4305; RV64ZVE32F-NEXT: beqz t0, .LBB48_8 4306; RV64ZVE32F-NEXT: .LBB48_12: # %cond.load10 4307; RV64ZVE32F-NEXT: vmv.x.s t0, v9 4308; RV64ZVE32F-NEXT: slli t0, t0, 3 4309; RV64ZVE32F-NEXT: add t0, a1, t0 4310; RV64ZVE32F-NEXT: ld t0, 0(t0) 4311; RV64ZVE32F-NEXT: andi t1, a5, 32 4312; RV64ZVE32F-NEXT: beqz t1, .LBB48_9 4313; RV64ZVE32F-NEXT: .LBB48_13: # %cond.load13 4314; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 4315; RV64ZVE32F-NEXT: vmv.x.s t1, v8 4316; RV64ZVE32F-NEXT: slli t1, t1, 3 4317; RV64ZVE32F-NEXT: add t1, a1, t1 4318; RV64ZVE32F-NEXT: ld t1, 0(t1) 4319; RV64ZVE32F-NEXT: .LBB48_14: # %else14 4320; RV64ZVE32F-NEXT: andi t2, a5, 64 4321; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 4322; RV64ZVE32F-NEXT: beqz t2, .LBB48_17 4323; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 4324; RV64ZVE32F-NEXT: vmv.x.s t2, v8 4325; RV64ZVE32F-NEXT: slli t2, t2, 3 4326; RV64ZVE32F-NEXT: add t2, a1, t2 4327; RV64ZVE32F-NEXT: ld t2, 0(t2) 4328; RV64ZVE32F-NEXT: andi a5, a5, -128 4329; RV64ZVE32F-NEXT: bnez a5, .LBB48_18 4330; RV64ZVE32F-NEXT: .LBB48_16: 4331; RV64ZVE32F-NEXT: ld a1, 56(a2) 4332; RV64ZVE32F-NEXT: j .LBB48_19 4333; RV64ZVE32F-NEXT: .LBB48_17: 4334; RV64ZVE32F-NEXT: ld t2, 48(a2) 4335; RV64ZVE32F-NEXT: andi a5, a5, -128 4336; RV64ZVE32F-NEXT: beqz a5, .LBB48_16 4337; RV64ZVE32F-NEXT: .LBB48_18: # %cond.load19 4338; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 4339; RV64ZVE32F-NEXT: vmv.x.s a2, v8 4340; RV64ZVE32F-NEXT: slli a2, a2, 3 4341; RV64ZVE32F-NEXT: add a1, a1, a2 4342; RV64ZVE32F-NEXT: ld a1, 0(a1) 4343; RV64ZVE32F-NEXT: .LBB48_19: # %else20 4344; RV64ZVE32F-NEXT: sd a3, 0(a0) 4345; RV64ZVE32F-NEXT: sd a4, 8(a0) 4346; RV64ZVE32F-NEXT: sd a6, 16(a0) 4347; RV64ZVE32F-NEXT: sd a7, 24(a0) 4348; RV64ZVE32F-NEXT: sd t0, 32(a0) 4349; RV64ZVE32F-NEXT: sd t1, 40(a0) 4350; RV64ZVE32F-NEXT: sd t2, 48(a0) 4351; RV64ZVE32F-NEXT: sd a1, 56(a0) 4352; RV64ZVE32F-NEXT: ret 4353 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs 4354 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru) 4355 ret <8 x i64> %v 4356} 4357 4358define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) { 4359; RV32V-LABEL: mgather_baseidx_sext_v8i8_v8i64: 4360; RV32V: # %bb.0: 4361; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 4362; RV32V-NEXT: vsext.vf4 v10, v8 4363; RV32V-NEXT: vsll.vi v8, v10, 3 4364; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 4365; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 4366; RV32V-NEXT: vmv.v.v v8, v12 4367; RV32V-NEXT: ret 4368; 4369; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i64: 4370; RV64V: # %bb.0: 4371; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 4372; RV64V-NEXT: vsext.vf8 v16, v8 4373; RV64V-NEXT: vsll.vi v8, v16, 3 4374; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t 4375; RV64V-NEXT: vmv.v.v v8, v12 4376; RV64V-NEXT: ret 4377; 4378; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i64: 4379; RV32ZVE32F: # %bb.0: 4380; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 4381; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 4382; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 4383; RV32ZVE32F-NEXT: vmv.x.s t0, v0 4384; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4385; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 4386; RV32ZVE32F-NEXT: andi a3, t0, 1 4387; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 4388; RV32ZVE32F-NEXT: beqz a3, .LBB49_7 4389; RV32ZVE32F-NEXT: # %bb.1: # %cond.load 4390; RV32ZVE32F-NEXT: vmv.x.s a3, v8 4391; RV32ZVE32F-NEXT: lw a1, 0(a3) 4392; RV32ZVE32F-NEXT: lw a3, 4(a3) 4393; RV32ZVE32F-NEXT: andi a4, t0, 2 4394; RV32ZVE32F-NEXT: bnez a4, .LBB49_8 4395; RV32ZVE32F-NEXT: .LBB49_2: 4396; RV32ZVE32F-NEXT: lw a4, 8(a2) 4397; RV32ZVE32F-NEXT: lw a5, 12(a2) 4398; RV32ZVE32F-NEXT: andi a6, t0, 4 4399; RV32ZVE32F-NEXT: bnez a6, .LBB49_9 4400; RV32ZVE32F-NEXT: .LBB49_3: 4401; RV32ZVE32F-NEXT: lw a6, 16(a2) 4402; RV32ZVE32F-NEXT: lw a7, 20(a2) 4403; RV32ZVE32F-NEXT: andi t1, t0, 8 4404; RV32ZVE32F-NEXT: bnez t1, .LBB49_10 4405; RV32ZVE32F-NEXT: .LBB49_4: 4406; RV32ZVE32F-NEXT: lw t1, 24(a2) 4407; RV32ZVE32F-NEXT: lw t2, 28(a2) 4408; RV32ZVE32F-NEXT: andi t3, t0, 16 4409; RV32ZVE32F-NEXT: bnez t3, .LBB49_11 4410; RV32ZVE32F-NEXT: .LBB49_5: 4411; RV32ZVE32F-NEXT: lw t3, 32(a2) 4412; RV32ZVE32F-NEXT: lw t4, 36(a2) 4413; RV32ZVE32F-NEXT: andi t5, t0, 32 4414; RV32ZVE32F-NEXT: bnez t5, .LBB49_12 4415; RV32ZVE32F-NEXT: .LBB49_6: 4416; RV32ZVE32F-NEXT: lw t5, 40(a2) 4417; RV32ZVE32F-NEXT: lw t6, 44(a2) 4418; RV32ZVE32F-NEXT: j .LBB49_13 4419; RV32ZVE32F-NEXT: .LBB49_7: 4420; RV32ZVE32F-NEXT: lw a1, 0(a2) 4421; RV32ZVE32F-NEXT: lw a3, 4(a2) 4422; RV32ZVE32F-NEXT: andi a4, t0, 2 4423; RV32ZVE32F-NEXT: beqz a4, .LBB49_2 4424; RV32ZVE32F-NEXT: .LBB49_8: # %cond.load1 4425; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4426; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 4427; RV32ZVE32F-NEXT: vmv.x.s a5, v10 4428; RV32ZVE32F-NEXT: lw a4, 0(a5) 4429; RV32ZVE32F-NEXT: lw a5, 4(a5) 4430; RV32ZVE32F-NEXT: andi a6, t0, 4 4431; RV32ZVE32F-NEXT: beqz a6, .LBB49_3 4432; RV32ZVE32F-NEXT: .LBB49_9: # %cond.load4 4433; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4434; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 4435; RV32ZVE32F-NEXT: vmv.x.s a7, v10 4436; RV32ZVE32F-NEXT: lw a6, 0(a7) 4437; RV32ZVE32F-NEXT: lw a7, 4(a7) 4438; RV32ZVE32F-NEXT: andi t1, t0, 8 4439; RV32ZVE32F-NEXT: beqz t1, .LBB49_4 4440; RV32ZVE32F-NEXT: .LBB49_10: # %cond.load7 4441; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4442; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 4443; RV32ZVE32F-NEXT: vmv.x.s t2, v10 4444; RV32ZVE32F-NEXT: lw t1, 0(t2) 4445; RV32ZVE32F-NEXT: lw t2, 4(t2) 4446; RV32ZVE32F-NEXT: andi t3, t0, 16 4447; RV32ZVE32F-NEXT: beqz t3, .LBB49_5 4448; RV32ZVE32F-NEXT: .LBB49_11: # %cond.load10 4449; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4450; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 4451; RV32ZVE32F-NEXT: vmv.x.s t4, v10 4452; RV32ZVE32F-NEXT: lw t3, 0(t4) 4453; RV32ZVE32F-NEXT: lw t4, 4(t4) 4454; RV32ZVE32F-NEXT: andi t5, t0, 32 4455; RV32ZVE32F-NEXT: beqz t5, .LBB49_6 4456; RV32ZVE32F-NEXT: .LBB49_12: # %cond.load13 4457; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4458; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 4459; RV32ZVE32F-NEXT: vmv.x.s t6, v10 4460; RV32ZVE32F-NEXT: lw t5, 0(t6) 4461; RV32ZVE32F-NEXT: lw t6, 4(t6) 4462; RV32ZVE32F-NEXT: .LBB49_13: # %else14 4463; RV32ZVE32F-NEXT: addi sp, sp, -16 4464; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 4465; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 4466; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 4467; RV32ZVE32F-NEXT: .cfi_offset s0, -4 4468; RV32ZVE32F-NEXT: .cfi_offset s1, -8 4469; RV32ZVE32F-NEXT: andi s0, t0, 64 4470; RV32ZVE32F-NEXT: beqz s0, .LBB49_16 4471; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 4472; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4473; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 4474; RV32ZVE32F-NEXT: vmv.x.s s1, v10 4475; RV32ZVE32F-NEXT: lw s0, 0(s1) 4476; RV32ZVE32F-NEXT: lw s1, 4(s1) 4477; RV32ZVE32F-NEXT: andi t0, t0, -128 4478; RV32ZVE32F-NEXT: bnez t0, .LBB49_17 4479; RV32ZVE32F-NEXT: .LBB49_15: 4480; RV32ZVE32F-NEXT: lw t0, 56(a2) 4481; RV32ZVE32F-NEXT: lw a2, 60(a2) 4482; RV32ZVE32F-NEXT: j .LBB49_18 4483; RV32ZVE32F-NEXT: .LBB49_16: 4484; RV32ZVE32F-NEXT: lw s0, 48(a2) 4485; RV32ZVE32F-NEXT: lw s1, 52(a2) 4486; RV32ZVE32F-NEXT: andi t0, t0, -128 4487; RV32ZVE32F-NEXT: beqz t0, .LBB49_15 4488; RV32ZVE32F-NEXT: .LBB49_17: # %cond.load19 4489; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4490; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 4491; RV32ZVE32F-NEXT: vmv.x.s a2, v8 4492; RV32ZVE32F-NEXT: lw t0, 0(a2) 4493; RV32ZVE32F-NEXT: lw a2, 4(a2) 4494; RV32ZVE32F-NEXT: .LBB49_18: # %else20 4495; RV32ZVE32F-NEXT: sw a1, 0(a0) 4496; RV32ZVE32F-NEXT: sw a3, 4(a0) 4497; RV32ZVE32F-NEXT: sw a4, 8(a0) 4498; RV32ZVE32F-NEXT: sw a5, 12(a0) 4499; RV32ZVE32F-NEXT: sw a6, 16(a0) 4500; RV32ZVE32F-NEXT: sw a7, 20(a0) 4501; RV32ZVE32F-NEXT: sw t1, 24(a0) 4502; RV32ZVE32F-NEXT: sw t2, 28(a0) 4503; RV32ZVE32F-NEXT: sw t3, 32(a0) 4504; RV32ZVE32F-NEXT: sw t4, 36(a0) 4505; RV32ZVE32F-NEXT: sw t5, 40(a0) 4506; RV32ZVE32F-NEXT: sw t6, 44(a0) 4507; RV32ZVE32F-NEXT: sw s0, 48(a0) 4508; RV32ZVE32F-NEXT: sw s1, 52(a0) 4509; RV32ZVE32F-NEXT: sw t0, 56(a0) 4510; RV32ZVE32F-NEXT: sw a2, 60(a0) 4511; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 4512; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 4513; RV32ZVE32F-NEXT: .cfi_restore s0 4514; RV32ZVE32F-NEXT: .cfi_restore s1 4515; RV32ZVE32F-NEXT: addi sp, sp, 16 4516; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 4517; RV32ZVE32F-NEXT: ret 4518; 4519; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i64: 4520; RV64ZVE32F: # %bb.0: 4521; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 4522; RV64ZVE32F-NEXT: vmv.x.s a5, v0 4523; RV64ZVE32F-NEXT: andi a3, a5, 1 4524; RV64ZVE32F-NEXT: beqz a3, .LBB49_3 4525; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 4526; RV64ZVE32F-NEXT: vmv.x.s a3, v8 4527; RV64ZVE32F-NEXT: slli a3, a3, 3 4528; RV64ZVE32F-NEXT: add a3, a1, a3 4529; RV64ZVE32F-NEXT: ld a3, 0(a3) 4530; RV64ZVE32F-NEXT: andi a4, a5, 2 4531; RV64ZVE32F-NEXT: bnez a4, .LBB49_4 4532; RV64ZVE32F-NEXT: .LBB49_2: 4533; RV64ZVE32F-NEXT: ld a4, 8(a2) 4534; RV64ZVE32F-NEXT: j .LBB49_5 4535; RV64ZVE32F-NEXT: .LBB49_3: 4536; RV64ZVE32F-NEXT: ld a3, 0(a2) 4537; RV64ZVE32F-NEXT: andi a4, a5, 2 4538; RV64ZVE32F-NEXT: beqz a4, .LBB49_2 4539; RV64ZVE32F-NEXT: .LBB49_4: # %cond.load1 4540; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 4541; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 4542; RV64ZVE32F-NEXT: vmv.x.s a4, v9 4543; RV64ZVE32F-NEXT: slli a4, a4, 3 4544; RV64ZVE32F-NEXT: add a4, a1, a4 4545; RV64ZVE32F-NEXT: ld a4, 0(a4) 4546; RV64ZVE32F-NEXT: .LBB49_5: # %else2 4547; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 4548; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 4549; RV64ZVE32F-NEXT: andi a6, a5, 4 4550; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 4551; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 4552; RV64ZVE32F-NEXT: beqz a6, .LBB49_10 4553; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 4554; RV64ZVE32F-NEXT: vmv.x.s a6, v8 4555; RV64ZVE32F-NEXT: slli a6, a6, 3 4556; RV64ZVE32F-NEXT: add a6, a1, a6 4557; RV64ZVE32F-NEXT: ld a6, 0(a6) 4558; RV64ZVE32F-NEXT: andi a7, a5, 8 4559; RV64ZVE32F-NEXT: bnez a7, .LBB49_11 4560; RV64ZVE32F-NEXT: .LBB49_7: 4561; RV64ZVE32F-NEXT: ld a7, 24(a2) 4562; RV64ZVE32F-NEXT: andi t0, a5, 16 4563; RV64ZVE32F-NEXT: bnez t0, .LBB49_12 4564; RV64ZVE32F-NEXT: .LBB49_8: 4565; RV64ZVE32F-NEXT: ld t0, 32(a2) 4566; RV64ZVE32F-NEXT: andi t1, a5, 32 4567; RV64ZVE32F-NEXT: bnez t1, .LBB49_13 4568; RV64ZVE32F-NEXT: .LBB49_9: 4569; RV64ZVE32F-NEXT: ld t1, 40(a2) 4570; RV64ZVE32F-NEXT: j .LBB49_14 4571; RV64ZVE32F-NEXT: .LBB49_10: 4572; RV64ZVE32F-NEXT: ld a6, 16(a2) 4573; RV64ZVE32F-NEXT: andi a7, a5, 8 4574; RV64ZVE32F-NEXT: beqz a7, .LBB49_7 4575; RV64ZVE32F-NEXT: .LBB49_11: # %cond.load7 4576; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 4577; RV64ZVE32F-NEXT: vmv.x.s a7, v8 4578; RV64ZVE32F-NEXT: slli a7, a7, 3 4579; RV64ZVE32F-NEXT: add a7, a1, a7 4580; RV64ZVE32F-NEXT: ld a7, 0(a7) 4581; RV64ZVE32F-NEXT: andi t0, a5, 16 4582; RV64ZVE32F-NEXT: beqz t0, .LBB49_8 4583; RV64ZVE32F-NEXT: .LBB49_12: # %cond.load10 4584; RV64ZVE32F-NEXT: vmv.x.s t0, v9 4585; RV64ZVE32F-NEXT: slli t0, t0, 3 4586; RV64ZVE32F-NEXT: add t0, a1, t0 4587; RV64ZVE32F-NEXT: ld t0, 0(t0) 4588; RV64ZVE32F-NEXT: andi t1, a5, 32 4589; RV64ZVE32F-NEXT: beqz t1, .LBB49_9 4590; RV64ZVE32F-NEXT: .LBB49_13: # %cond.load13 4591; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 4592; RV64ZVE32F-NEXT: vmv.x.s t1, v8 4593; RV64ZVE32F-NEXT: slli t1, t1, 3 4594; RV64ZVE32F-NEXT: add t1, a1, t1 4595; RV64ZVE32F-NEXT: ld t1, 0(t1) 4596; RV64ZVE32F-NEXT: .LBB49_14: # %else14 4597; RV64ZVE32F-NEXT: andi t2, a5, 64 4598; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 4599; RV64ZVE32F-NEXT: beqz t2, .LBB49_17 4600; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 4601; RV64ZVE32F-NEXT: vmv.x.s t2, v8 4602; RV64ZVE32F-NEXT: slli t2, t2, 3 4603; RV64ZVE32F-NEXT: add t2, a1, t2 4604; RV64ZVE32F-NEXT: ld t2, 0(t2) 4605; RV64ZVE32F-NEXT: andi a5, a5, -128 4606; RV64ZVE32F-NEXT: bnez a5, .LBB49_18 4607; RV64ZVE32F-NEXT: .LBB49_16: 4608; RV64ZVE32F-NEXT: ld a1, 56(a2) 4609; RV64ZVE32F-NEXT: j .LBB49_19 4610; RV64ZVE32F-NEXT: .LBB49_17: 4611; RV64ZVE32F-NEXT: ld t2, 48(a2) 4612; RV64ZVE32F-NEXT: andi a5, a5, -128 4613; RV64ZVE32F-NEXT: beqz a5, .LBB49_16 4614; RV64ZVE32F-NEXT: .LBB49_18: # %cond.load19 4615; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 4616; RV64ZVE32F-NEXT: vmv.x.s a2, v8 4617; RV64ZVE32F-NEXT: slli a2, a2, 3 4618; RV64ZVE32F-NEXT: add a1, a1, a2 4619; RV64ZVE32F-NEXT: ld a1, 0(a1) 4620; RV64ZVE32F-NEXT: .LBB49_19: # %else20 4621; RV64ZVE32F-NEXT: sd a3, 0(a0) 4622; RV64ZVE32F-NEXT: sd a4, 8(a0) 4623; RV64ZVE32F-NEXT: sd a6, 16(a0) 4624; RV64ZVE32F-NEXT: sd a7, 24(a0) 4625; RV64ZVE32F-NEXT: sd t0, 32(a0) 4626; RV64ZVE32F-NEXT: sd t1, 40(a0) 4627; RV64ZVE32F-NEXT: sd t2, 48(a0) 4628; RV64ZVE32F-NEXT: sd a1, 56(a0) 4629; RV64ZVE32F-NEXT: ret 4630 %eidxs = sext <8 x i8> %idxs to <8 x i64> 4631 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 4632 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru) 4633 ret <8 x i64> %v 4634} 4635 4636define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) { 4637; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8i64: 4638; RV32V: # %bb.0: 4639; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4640; RV32V-NEXT: vzext.vf2 v9, v8 4641; RV32V-NEXT: vsll.vi v8, v9, 3 4642; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 4643; RV32V-NEXT: vluxei16.v v12, (a0), v8, v0.t 4644; RV32V-NEXT: vmv.v.v v8, v12 4645; RV32V-NEXT: ret 4646; 4647; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i64: 4648; RV64V: # %bb.0: 4649; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4650; RV64V-NEXT: vzext.vf2 v9, v8 4651; RV64V-NEXT: vsll.vi v8, v9, 3 4652; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 4653; RV64V-NEXT: vluxei16.v v12, (a0), v8, v0.t 4654; RV64V-NEXT: vmv.v.v v8, v12 4655; RV64V-NEXT: ret 4656; 4657; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i64: 4658; RV32ZVE32F: # %bb.0: 4659; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 4660; RV32ZVE32F-NEXT: vzext.vf4 v10, v8 4661; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 4662; RV32ZVE32F-NEXT: vmv.x.s t0, v0 4663; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4664; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 4665; RV32ZVE32F-NEXT: andi a3, t0, 1 4666; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 4667; RV32ZVE32F-NEXT: beqz a3, .LBB50_7 4668; RV32ZVE32F-NEXT: # %bb.1: # %cond.load 4669; RV32ZVE32F-NEXT: vmv.x.s a3, v8 4670; RV32ZVE32F-NEXT: lw a1, 0(a3) 4671; RV32ZVE32F-NEXT: lw a3, 4(a3) 4672; RV32ZVE32F-NEXT: andi a4, t0, 2 4673; RV32ZVE32F-NEXT: bnez a4, .LBB50_8 4674; RV32ZVE32F-NEXT: .LBB50_2: 4675; RV32ZVE32F-NEXT: lw a4, 8(a2) 4676; RV32ZVE32F-NEXT: lw a5, 12(a2) 4677; RV32ZVE32F-NEXT: andi a6, t0, 4 4678; RV32ZVE32F-NEXT: bnez a6, .LBB50_9 4679; RV32ZVE32F-NEXT: .LBB50_3: 4680; RV32ZVE32F-NEXT: lw a6, 16(a2) 4681; RV32ZVE32F-NEXT: lw a7, 20(a2) 4682; RV32ZVE32F-NEXT: andi t1, t0, 8 4683; RV32ZVE32F-NEXT: bnez t1, .LBB50_10 4684; RV32ZVE32F-NEXT: .LBB50_4: 4685; RV32ZVE32F-NEXT: lw t1, 24(a2) 4686; RV32ZVE32F-NEXT: lw t2, 28(a2) 4687; RV32ZVE32F-NEXT: andi t3, t0, 16 4688; RV32ZVE32F-NEXT: bnez t3, .LBB50_11 4689; RV32ZVE32F-NEXT: .LBB50_5: 4690; RV32ZVE32F-NEXT: lw t3, 32(a2) 4691; RV32ZVE32F-NEXT: lw t4, 36(a2) 4692; RV32ZVE32F-NEXT: andi t5, t0, 32 4693; RV32ZVE32F-NEXT: bnez t5, .LBB50_12 4694; RV32ZVE32F-NEXT: .LBB50_6: 4695; RV32ZVE32F-NEXT: lw t5, 40(a2) 4696; RV32ZVE32F-NEXT: lw t6, 44(a2) 4697; RV32ZVE32F-NEXT: j .LBB50_13 4698; RV32ZVE32F-NEXT: .LBB50_7: 4699; RV32ZVE32F-NEXT: lw a1, 0(a2) 4700; RV32ZVE32F-NEXT: lw a3, 4(a2) 4701; RV32ZVE32F-NEXT: andi a4, t0, 2 4702; RV32ZVE32F-NEXT: beqz a4, .LBB50_2 4703; RV32ZVE32F-NEXT: .LBB50_8: # %cond.load1 4704; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4705; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 4706; RV32ZVE32F-NEXT: vmv.x.s a5, v10 4707; RV32ZVE32F-NEXT: lw a4, 0(a5) 4708; RV32ZVE32F-NEXT: lw a5, 4(a5) 4709; RV32ZVE32F-NEXT: andi a6, t0, 4 4710; RV32ZVE32F-NEXT: beqz a6, .LBB50_3 4711; RV32ZVE32F-NEXT: .LBB50_9: # %cond.load4 4712; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4713; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 4714; RV32ZVE32F-NEXT: vmv.x.s a7, v10 4715; RV32ZVE32F-NEXT: lw a6, 0(a7) 4716; RV32ZVE32F-NEXT: lw a7, 4(a7) 4717; RV32ZVE32F-NEXT: andi t1, t0, 8 4718; RV32ZVE32F-NEXT: beqz t1, .LBB50_4 4719; RV32ZVE32F-NEXT: .LBB50_10: # %cond.load7 4720; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4721; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 4722; RV32ZVE32F-NEXT: vmv.x.s t2, v10 4723; RV32ZVE32F-NEXT: lw t1, 0(t2) 4724; RV32ZVE32F-NEXT: lw t2, 4(t2) 4725; RV32ZVE32F-NEXT: andi t3, t0, 16 4726; RV32ZVE32F-NEXT: beqz t3, .LBB50_5 4727; RV32ZVE32F-NEXT: .LBB50_11: # %cond.load10 4728; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4729; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 4730; RV32ZVE32F-NEXT: vmv.x.s t4, v10 4731; RV32ZVE32F-NEXT: lw t3, 0(t4) 4732; RV32ZVE32F-NEXT: lw t4, 4(t4) 4733; RV32ZVE32F-NEXT: andi t5, t0, 32 4734; RV32ZVE32F-NEXT: beqz t5, .LBB50_6 4735; RV32ZVE32F-NEXT: .LBB50_12: # %cond.load13 4736; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4737; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 4738; RV32ZVE32F-NEXT: vmv.x.s t6, v10 4739; RV32ZVE32F-NEXT: lw t5, 0(t6) 4740; RV32ZVE32F-NEXT: lw t6, 4(t6) 4741; RV32ZVE32F-NEXT: .LBB50_13: # %else14 4742; RV32ZVE32F-NEXT: addi sp, sp, -16 4743; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 4744; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 4745; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 4746; RV32ZVE32F-NEXT: .cfi_offset s0, -4 4747; RV32ZVE32F-NEXT: .cfi_offset s1, -8 4748; RV32ZVE32F-NEXT: andi s0, t0, 64 4749; RV32ZVE32F-NEXT: beqz s0, .LBB50_16 4750; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 4751; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4752; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 4753; RV32ZVE32F-NEXT: vmv.x.s s1, v10 4754; RV32ZVE32F-NEXT: lw s0, 0(s1) 4755; RV32ZVE32F-NEXT: lw s1, 4(s1) 4756; RV32ZVE32F-NEXT: andi t0, t0, -128 4757; RV32ZVE32F-NEXT: bnez t0, .LBB50_17 4758; RV32ZVE32F-NEXT: .LBB50_15: 4759; RV32ZVE32F-NEXT: lw t0, 56(a2) 4760; RV32ZVE32F-NEXT: lw a2, 60(a2) 4761; RV32ZVE32F-NEXT: j .LBB50_18 4762; RV32ZVE32F-NEXT: .LBB50_16: 4763; RV32ZVE32F-NEXT: lw s0, 48(a2) 4764; RV32ZVE32F-NEXT: lw s1, 52(a2) 4765; RV32ZVE32F-NEXT: andi t0, t0, -128 4766; RV32ZVE32F-NEXT: beqz t0, .LBB50_15 4767; RV32ZVE32F-NEXT: .LBB50_17: # %cond.load19 4768; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 4769; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 4770; RV32ZVE32F-NEXT: vmv.x.s a2, v8 4771; RV32ZVE32F-NEXT: lw t0, 0(a2) 4772; RV32ZVE32F-NEXT: lw a2, 4(a2) 4773; RV32ZVE32F-NEXT: .LBB50_18: # %else20 4774; RV32ZVE32F-NEXT: sw a1, 0(a0) 4775; RV32ZVE32F-NEXT: sw a3, 4(a0) 4776; RV32ZVE32F-NEXT: sw a4, 8(a0) 4777; RV32ZVE32F-NEXT: sw a5, 12(a0) 4778; RV32ZVE32F-NEXT: sw a6, 16(a0) 4779; RV32ZVE32F-NEXT: sw a7, 20(a0) 4780; RV32ZVE32F-NEXT: sw t1, 24(a0) 4781; RV32ZVE32F-NEXT: sw t2, 28(a0) 4782; RV32ZVE32F-NEXT: sw t3, 32(a0) 4783; RV32ZVE32F-NEXT: sw t4, 36(a0) 4784; RV32ZVE32F-NEXT: sw t5, 40(a0) 4785; RV32ZVE32F-NEXT: sw t6, 44(a0) 4786; RV32ZVE32F-NEXT: sw s0, 48(a0) 4787; RV32ZVE32F-NEXT: sw s1, 52(a0) 4788; RV32ZVE32F-NEXT: sw t0, 56(a0) 4789; RV32ZVE32F-NEXT: sw a2, 60(a0) 4790; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 4791; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 4792; RV32ZVE32F-NEXT: .cfi_restore s0 4793; RV32ZVE32F-NEXT: .cfi_restore s1 4794; RV32ZVE32F-NEXT: addi sp, sp, 16 4795; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 4796; RV32ZVE32F-NEXT: ret 4797; 4798; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i64: 4799; RV64ZVE32F: # %bb.0: 4800; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 4801; RV64ZVE32F-NEXT: vmv.x.s a5, v0 4802; RV64ZVE32F-NEXT: andi a3, a5, 1 4803; RV64ZVE32F-NEXT: beqz a3, .LBB50_3 4804; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 4805; RV64ZVE32F-NEXT: vmv.x.s a3, v8 4806; RV64ZVE32F-NEXT: andi a3, a3, 255 4807; RV64ZVE32F-NEXT: slli a3, a3, 3 4808; RV64ZVE32F-NEXT: add a3, a1, a3 4809; RV64ZVE32F-NEXT: ld a3, 0(a3) 4810; RV64ZVE32F-NEXT: andi a4, a5, 2 4811; RV64ZVE32F-NEXT: bnez a4, .LBB50_4 4812; RV64ZVE32F-NEXT: .LBB50_2: 4813; RV64ZVE32F-NEXT: ld a4, 8(a2) 4814; RV64ZVE32F-NEXT: j .LBB50_5 4815; RV64ZVE32F-NEXT: .LBB50_3: 4816; RV64ZVE32F-NEXT: ld a3, 0(a2) 4817; RV64ZVE32F-NEXT: andi a4, a5, 2 4818; RV64ZVE32F-NEXT: beqz a4, .LBB50_2 4819; RV64ZVE32F-NEXT: .LBB50_4: # %cond.load1 4820; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 4821; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 4822; RV64ZVE32F-NEXT: vmv.x.s a4, v9 4823; RV64ZVE32F-NEXT: andi a4, a4, 255 4824; RV64ZVE32F-NEXT: slli a4, a4, 3 4825; RV64ZVE32F-NEXT: add a4, a1, a4 4826; RV64ZVE32F-NEXT: ld a4, 0(a4) 4827; RV64ZVE32F-NEXT: .LBB50_5: # %else2 4828; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 4829; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 4830; RV64ZVE32F-NEXT: andi a6, a5, 4 4831; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 4832; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 4833; RV64ZVE32F-NEXT: beqz a6, .LBB50_10 4834; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 4835; RV64ZVE32F-NEXT: vmv.x.s a6, v8 4836; RV64ZVE32F-NEXT: andi a6, a6, 255 4837; RV64ZVE32F-NEXT: slli a6, a6, 3 4838; RV64ZVE32F-NEXT: add a6, a1, a6 4839; RV64ZVE32F-NEXT: ld a6, 0(a6) 4840; RV64ZVE32F-NEXT: andi a7, a5, 8 4841; RV64ZVE32F-NEXT: bnez a7, .LBB50_11 4842; RV64ZVE32F-NEXT: .LBB50_7: 4843; RV64ZVE32F-NEXT: ld a7, 24(a2) 4844; RV64ZVE32F-NEXT: andi t0, a5, 16 4845; RV64ZVE32F-NEXT: bnez t0, .LBB50_12 4846; RV64ZVE32F-NEXT: .LBB50_8: 4847; RV64ZVE32F-NEXT: ld t0, 32(a2) 4848; RV64ZVE32F-NEXT: andi t1, a5, 32 4849; RV64ZVE32F-NEXT: bnez t1, .LBB50_13 4850; RV64ZVE32F-NEXT: .LBB50_9: 4851; RV64ZVE32F-NEXT: ld t1, 40(a2) 4852; RV64ZVE32F-NEXT: j .LBB50_14 4853; RV64ZVE32F-NEXT: .LBB50_10: 4854; RV64ZVE32F-NEXT: ld a6, 16(a2) 4855; RV64ZVE32F-NEXT: andi a7, a5, 8 4856; RV64ZVE32F-NEXT: beqz a7, .LBB50_7 4857; RV64ZVE32F-NEXT: .LBB50_11: # %cond.load7 4858; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 4859; RV64ZVE32F-NEXT: vmv.x.s a7, v8 4860; RV64ZVE32F-NEXT: andi a7, a7, 255 4861; RV64ZVE32F-NEXT: slli a7, a7, 3 4862; RV64ZVE32F-NEXT: add a7, a1, a7 4863; RV64ZVE32F-NEXT: ld a7, 0(a7) 4864; RV64ZVE32F-NEXT: andi t0, a5, 16 4865; RV64ZVE32F-NEXT: beqz t0, .LBB50_8 4866; RV64ZVE32F-NEXT: .LBB50_12: # %cond.load10 4867; RV64ZVE32F-NEXT: vmv.x.s t0, v9 4868; RV64ZVE32F-NEXT: andi t0, t0, 255 4869; RV64ZVE32F-NEXT: slli t0, t0, 3 4870; RV64ZVE32F-NEXT: add t0, a1, t0 4871; RV64ZVE32F-NEXT: ld t0, 0(t0) 4872; RV64ZVE32F-NEXT: andi t1, a5, 32 4873; RV64ZVE32F-NEXT: beqz t1, .LBB50_9 4874; RV64ZVE32F-NEXT: .LBB50_13: # %cond.load13 4875; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 4876; RV64ZVE32F-NEXT: vmv.x.s t1, v8 4877; RV64ZVE32F-NEXT: andi t1, t1, 255 4878; RV64ZVE32F-NEXT: slli t1, t1, 3 4879; RV64ZVE32F-NEXT: add t1, a1, t1 4880; RV64ZVE32F-NEXT: ld t1, 0(t1) 4881; RV64ZVE32F-NEXT: .LBB50_14: # %else14 4882; RV64ZVE32F-NEXT: andi t2, a5, 64 4883; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 4884; RV64ZVE32F-NEXT: beqz t2, .LBB50_17 4885; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 4886; RV64ZVE32F-NEXT: vmv.x.s t2, v8 4887; RV64ZVE32F-NEXT: andi t2, t2, 255 4888; RV64ZVE32F-NEXT: slli t2, t2, 3 4889; RV64ZVE32F-NEXT: add t2, a1, t2 4890; RV64ZVE32F-NEXT: ld t2, 0(t2) 4891; RV64ZVE32F-NEXT: andi a5, a5, -128 4892; RV64ZVE32F-NEXT: bnez a5, .LBB50_18 4893; RV64ZVE32F-NEXT: .LBB50_16: 4894; RV64ZVE32F-NEXT: ld a1, 56(a2) 4895; RV64ZVE32F-NEXT: j .LBB50_19 4896; RV64ZVE32F-NEXT: .LBB50_17: 4897; RV64ZVE32F-NEXT: ld t2, 48(a2) 4898; RV64ZVE32F-NEXT: andi a5, a5, -128 4899; RV64ZVE32F-NEXT: beqz a5, .LBB50_16 4900; RV64ZVE32F-NEXT: .LBB50_18: # %cond.load19 4901; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 4902; RV64ZVE32F-NEXT: vmv.x.s a2, v8 4903; RV64ZVE32F-NEXT: andi a2, a2, 255 4904; RV64ZVE32F-NEXT: slli a2, a2, 3 4905; RV64ZVE32F-NEXT: add a1, a1, a2 4906; RV64ZVE32F-NEXT: ld a1, 0(a1) 4907; RV64ZVE32F-NEXT: .LBB50_19: # %else20 4908; RV64ZVE32F-NEXT: sd a3, 0(a0) 4909; RV64ZVE32F-NEXT: sd a4, 8(a0) 4910; RV64ZVE32F-NEXT: sd a6, 16(a0) 4911; RV64ZVE32F-NEXT: sd a7, 24(a0) 4912; RV64ZVE32F-NEXT: sd t0, 32(a0) 4913; RV64ZVE32F-NEXT: sd t1, 40(a0) 4914; RV64ZVE32F-NEXT: sd t2, 48(a0) 4915; RV64ZVE32F-NEXT: sd a1, 56(a0) 4916; RV64ZVE32F-NEXT: ret 4917 %eidxs = zext <8 x i8> %idxs to <8 x i64> 4918 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 4919 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru) 4920 ret <8 x i64> %v 4921} 4922 4923define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) { 4924; RV32V-LABEL: mgather_baseidx_v8i16_v8i64: 4925; RV32V: # %bb.0: 4926; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 4927; RV32V-NEXT: vsext.vf2 v10, v8 4928; RV32V-NEXT: vsll.vi v8, v10, 3 4929; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 4930; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 4931; RV32V-NEXT: vmv.v.v v8, v12 4932; RV32V-NEXT: ret 4933; 4934; RV64V-LABEL: mgather_baseidx_v8i16_v8i64: 4935; RV64V: # %bb.0: 4936; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 4937; RV64V-NEXT: vsext.vf4 v16, v8 4938; RV64V-NEXT: vsll.vi v8, v16, 3 4939; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t 4940; RV64V-NEXT: vmv.v.v v8, v12 4941; RV64V-NEXT: ret 4942; 4943; RV32ZVE32F-LABEL: mgather_baseidx_v8i16_v8i64: 4944; RV32ZVE32F: # %bb.0: 4945; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 4946; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 4947; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 4948; RV32ZVE32F-NEXT: vmv.x.s t0, v0 4949; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4950; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 4951; RV32ZVE32F-NEXT: andi a3, t0, 1 4952; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 4953; RV32ZVE32F-NEXT: beqz a3, .LBB51_7 4954; RV32ZVE32F-NEXT: # %bb.1: # %cond.load 4955; RV32ZVE32F-NEXT: vmv.x.s a3, v8 4956; RV32ZVE32F-NEXT: lw a1, 0(a3) 4957; RV32ZVE32F-NEXT: lw a3, 4(a3) 4958; RV32ZVE32F-NEXT: andi a4, t0, 2 4959; RV32ZVE32F-NEXT: bnez a4, .LBB51_8 4960; RV32ZVE32F-NEXT: .LBB51_2: 4961; RV32ZVE32F-NEXT: lw a4, 8(a2) 4962; RV32ZVE32F-NEXT: lw a5, 12(a2) 4963; RV32ZVE32F-NEXT: andi a6, t0, 4 4964; RV32ZVE32F-NEXT: bnez a6, .LBB51_9 4965; RV32ZVE32F-NEXT: .LBB51_3: 4966; RV32ZVE32F-NEXT: lw a6, 16(a2) 4967; RV32ZVE32F-NEXT: lw a7, 20(a2) 4968; RV32ZVE32F-NEXT: andi t1, t0, 8 4969; RV32ZVE32F-NEXT: bnez t1, .LBB51_10 4970; RV32ZVE32F-NEXT: .LBB51_4: 4971; RV32ZVE32F-NEXT: lw t1, 24(a2) 4972; RV32ZVE32F-NEXT: lw t2, 28(a2) 4973; RV32ZVE32F-NEXT: andi t3, t0, 16 4974; RV32ZVE32F-NEXT: bnez t3, .LBB51_11 4975; RV32ZVE32F-NEXT: .LBB51_5: 4976; RV32ZVE32F-NEXT: lw t3, 32(a2) 4977; RV32ZVE32F-NEXT: lw t4, 36(a2) 4978; RV32ZVE32F-NEXT: andi t5, t0, 32 4979; RV32ZVE32F-NEXT: bnez t5, .LBB51_12 4980; RV32ZVE32F-NEXT: .LBB51_6: 4981; RV32ZVE32F-NEXT: lw t5, 40(a2) 4982; RV32ZVE32F-NEXT: lw t6, 44(a2) 4983; RV32ZVE32F-NEXT: j .LBB51_13 4984; RV32ZVE32F-NEXT: .LBB51_7: 4985; RV32ZVE32F-NEXT: lw a1, 0(a2) 4986; RV32ZVE32F-NEXT: lw a3, 4(a2) 4987; RV32ZVE32F-NEXT: andi a4, t0, 2 4988; RV32ZVE32F-NEXT: beqz a4, .LBB51_2 4989; RV32ZVE32F-NEXT: .LBB51_8: # %cond.load1 4990; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4991; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 4992; RV32ZVE32F-NEXT: vmv.x.s a5, v10 4993; RV32ZVE32F-NEXT: lw a4, 0(a5) 4994; RV32ZVE32F-NEXT: lw a5, 4(a5) 4995; RV32ZVE32F-NEXT: andi a6, t0, 4 4996; RV32ZVE32F-NEXT: beqz a6, .LBB51_3 4997; RV32ZVE32F-NEXT: .LBB51_9: # %cond.load4 4998; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 4999; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 5000; RV32ZVE32F-NEXT: vmv.x.s a7, v10 5001; RV32ZVE32F-NEXT: lw a6, 0(a7) 5002; RV32ZVE32F-NEXT: lw a7, 4(a7) 5003; RV32ZVE32F-NEXT: andi t1, t0, 8 5004; RV32ZVE32F-NEXT: beqz t1, .LBB51_4 5005; RV32ZVE32F-NEXT: .LBB51_10: # %cond.load7 5006; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5007; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 5008; RV32ZVE32F-NEXT: vmv.x.s t2, v10 5009; RV32ZVE32F-NEXT: lw t1, 0(t2) 5010; RV32ZVE32F-NEXT: lw t2, 4(t2) 5011; RV32ZVE32F-NEXT: andi t3, t0, 16 5012; RV32ZVE32F-NEXT: beqz t3, .LBB51_5 5013; RV32ZVE32F-NEXT: .LBB51_11: # %cond.load10 5014; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5015; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 5016; RV32ZVE32F-NEXT: vmv.x.s t4, v10 5017; RV32ZVE32F-NEXT: lw t3, 0(t4) 5018; RV32ZVE32F-NEXT: lw t4, 4(t4) 5019; RV32ZVE32F-NEXT: andi t5, t0, 32 5020; RV32ZVE32F-NEXT: beqz t5, .LBB51_6 5021; RV32ZVE32F-NEXT: .LBB51_12: # %cond.load13 5022; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5023; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 5024; RV32ZVE32F-NEXT: vmv.x.s t6, v10 5025; RV32ZVE32F-NEXT: lw t5, 0(t6) 5026; RV32ZVE32F-NEXT: lw t6, 4(t6) 5027; RV32ZVE32F-NEXT: .LBB51_13: # %else14 5028; RV32ZVE32F-NEXT: addi sp, sp, -16 5029; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 5030; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 5031; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 5032; RV32ZVE32F-NEXT: .cfi_offset s0, -4 5033; RV32ZVE32F-NEXT: .cfi_offset s1, -8 5034; RV32ZVE32F-NEXT: andi s0, t0, 64 5035; RV32ZVE32F-NEXT: beqz s0, .LBB51_16 5036; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 5037; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5038; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 5039; RV32ZVE32F-NEXT: vmv.x.s s1, v10 5040; RV32ZVE32F-NEXT: lw s0, 0(s1) 5041; RV32ZVE32F-NEXT: lw s1, 4(s1) 5042; RV32ZVE32F-NEXT: andi t0, t0, -128 5043; RV32ZVE32F-NEXT: bnez t0, .LBB51_17 5044; RV32ZVE32F-NEXT: .LBB51_15: 5045; RV32ZVE32F-NEXT: lw t0, 56(a2) 5046; RV32ZVE32F-NEXT: lw a2, 60(a2) 5047; RV32ZVE32F-NEXT: j .LBB51_18 5048; RV32ZVE32F-NEXT: .LBB51_16: 5049; RV32ZVE32F-NEXT: lw s0, 48(a2) 5050; RV32ZVE32F-NEXT: lw s1, 52(a2) 5051; RV32ZVE32F-NEXT: andi t0, t0, -128 5052; RV32ZVE32F-NEXT: beqz t0, .LBB51_15 5053; RV32ZVE32F-NEXT: .LBB51_17: # %cond.load19 5054; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5055; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 5056; RV32ZVE32F-NEXT: vmv.x.s a2, v8 5057; RV32ZVE32F-NEXT: lw t0, 0(a2) 5058; RV32ZVE32F-NEXT: lw a2, 4(a2) 5059; RV32ZVE32F-NEXT: .LBB51_18: # %else20 5060; RV32ZVE32F-NEXT: sw a1, 0(a0) 5061; RV32ZVE32F-NEXT: sw a3, 4(a0) 5062; RV32ZVE32F-NEXT: sw a4, 8(a0) 5063; RV32ZVE32F-NEXT: sw a5, 12(a0) 5064; RV32ZVE32F-NEXT: sw a6, 16(a0) 5065; RV32ZVE32F-NEXT: sw a7, 20(a0) 5066; RV32ZVE32F-NEXT: sw t1, 24(a0) 5067; RV32ZVE32F-NEXT: sw t2, 28(a0) 5068; RV32ZVE32F-NEXT: sw t3, 32(a0) 5069; RV32ZVE32F-NEXT: sw t4, 36(a0) 5070; RV32ZVE32F-NEXT: sw t5, 40(a0) 5071; RV32ZVE32F-NEXT: sw t6, 44(a0) 5072; RV32ZVE32F-NEXT: sw s0, 48(a0) 5073; RV32ZVE32F-NEXT: sw s1, 52(a0) 5074; RV32ZVE32F-NEXT: sw t0, 56(a0) 5075; RV32ZVE32F-NEXT: sw a2, 60(a0) 5076; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 5077; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 5078; RV32ZVE32F-NEXT: .cfi_restore s0 5079; RV32ZVE32F-NEXT: .cfi_restore s1 5080; RV32ZVE32F-NEXT: addi sp, sp, 16 5081; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 5082; RV32ZVE32F-NEXT: ret 5083; 5084; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8i64: 5085; RV64ZVE32F: # %bb.0: 5086; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 5087; RV64ZVE32F-NEXT: vmv.x.s a5, v0 5088; RV64ZVE32F-NEXT: andi a3, a5, 1 5089; RV64ZVE32F-NEXT: beqz a3, .LBB51_3 5090; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 5091; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 5092; RV64ZVE32F-NEXT: vmv.x.s a3, v8 5093; RV64ZVE32F-NEXT: slli a3, a3, 3 5094; RV64ZVE32F-NEXT: add a3, a1, a3 5095; RV64ZVE32F-NEXT: ld a3, 0(a3) 5096; RV64ZVE32F-NEXT: andi a4, a5, 2 5097; RV64ZVE32F-NEXT: bnez a4, .LBB51_4 5098; RV64ZVE32F-NEXT: .LBB51_2: 5099; RV64ZVE32F-NEXT: ld a4, 8(a2) 5100; RV64ZVE32F-NEXT: j .LBB51_5 5101; RV64ZVE32F-NEXT: .LBB51_3: 5102; RV64ZVE32F-NEXT: ld a3, 0(a2) 5103; RV64ZVE32F-NEXT: andi a4, a5, 2 5104; RV64ZVE32F-NEXT: beqz a4, .LBB51_2 5105; RV64ZVE32F-NEXT: .LBB51_4: # %cond.load1 5106; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 5107; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 5108; RV64ZVE32F-NEXT: vmv.x.s a4, v9 5109; RV64ZVE32F-NEXT: slli a4, a4, 3 5110; RV64ZVE32F-NEXT: add a4, a1, a4 5111; RV64ZVE32F-NEXT: ld a4, 0(a4) 5112; RV64ZVE32F-NEXT: .LBB51_5: # %else2 5113; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 5114; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 5115; RV64ZVE32F-NEXT: andi a6, a5, 4 5116; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 5117; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 5118; RV64ZVE32F-NEXT: beqz a6, .LBB51_10 5119; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 5120; RV64ZVE32F-NEXT: vmv.x.s a6, v8 5121; RV64ZVE32F-NEXT: slli a6, a6, 3 5122; RV64ZVE32F-NEXT: add a6, a1, a6 5123; RV64ZVE32F-NEXT: ld a6, 0(a6) 5124; RV64ZVE32F-NEXT: andi a7, a5, 8 5125; RV64ZVE32F-NEXT: bnez a7, .LBB51_11 5126; RV64ZVE32F-NEXT: .LBB51_7: 5127; RV64ZVE32F-NEXT: ld a7, 24(a2) 5128; RV64ZVE32F-NEXT: andi t0, a5, 16 5129; RV64ZVE32F-NEXT: bnez t0, .LBB51_12 5130; RV64ZVE32F-NEXT: .LBB51_8: 5131; RV64ZVE32F-NEXT: ld t0, 32(a2) 5132; RV64ZVE32F-NEXT: andi t1, a5, 32 5133; RV64ZVE32F-NEXT: bnez t1, .LBB51_13 5134; RV64ZVE32F-NEXT: .LBB51_9: 5135; RV64ZVE32F-NEXT: ld t1, 40(a2) 5136; RV64ZVE32F-NEXT: j .LBB51_14 5137; RV64ZVE32F-NEXT: .LBB51_10: 5138; RV64ZVE32F-NEXT: ld a6, 16(a2) 5139; RV64ZVE32F-NEXT: andi a7, a5, 8 5140; RV64ZVE32F-NEXT: beqz a7, .LBB51_7 5141; RV64ZVE32F-NEXT: .LBB51_11: # %cond.load7 5142; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 5143; RV64ZVE32F-NEXT: vmv.x.s a7, v8 5144; RV64ZVE32F-NEXT: slli a7, a7, 3 5145; RV64ZVE32F-NEXT: add a7, a1, a7 5146; RV64ZVE32F-NEXT: ld a7, 0(a7) 5147; RV64ZVE32F-NEXT: andi t0, a5, 16 5148; RV64ZVE32F-NEXT: beqz t0, .LBB51_8 5149; RV64ZVE32F-NEXT: .LBB51_12: # %cond.load10 5150; RV64ZVE32F-NEXT: vmv.x.s t0, v9 5151; RV64ZVE32F-NEXT: slli t0, t0, 3 5152; RV64ZVE32F-NEXT: add t0, a1, t0 5153; RV64ZVE32F-NEXT: ld t0, 0(t0) 5154; RV64ZVE32F-NEXT: andi t1, a5, 32 5155; RV64ZVE32F-NEXT: beqz t1, .LBB51_9 5156; RV64ZVE32F-NEXT: .LBB51_13: # %cond.load13 5157; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 5158; RV64ZVE32F-NEXT: vmv.x.s t1, v8 5159; RV64ZVE32F-NEXT: slli t1, t1, 3 5160; RV64ZVE32F-NEXT: add t1, a1, t1 5161; RV64ZVE32F-NEXT: ld t1, 0(t1) 5162; RV64ZVE32F-NEXT: .LBB51_14: # %else14 5163; RV64ZVE32F-NEXT: andi t2, a5, 64 5164; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 5165; RV64ZVE32F-NEXT: beqz t2, .LBB51_17 5166; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 5167; RV64ZVE32F-NEXT: vmv.x.s t2, v8 5168; RV64ZVE32F-NEXT: slli t2, t2, 3 5169; RV64ZVE32F-NEXT: add t2, a1, t2 5170; RV64ZVE32F-NEXT: ld t2, 0(t2) 5171; RV64ZVE32F-NEXT: andi a5, a5, -128 5172; RV64ZVE32F-NEXT: bnez a5, .LBB51_18 5173; RV64ZVE32F-NEXT: .LBB51_16: 5174; RV64ZVE32F-NEXT: ld a1, 56(a2) 5175; RV64ZVE32F-NEXT: j .LBB51_19 5176; RV64ZVE32F-NEXT: .LBB51_17: 5177; RV64ZVE32F-NEXT: ld t2, 48(a2) 5178; RV64ZVE32F-NEXT: andi a5, a5, -128 5179; RV64ZVE32F-NEXT: beqz a5, .LBB51_16 5180; RV64ZVE32F-NEXT: .LBB51_18: # %cond.load19 5181; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 5182; RV64ZVE32F-NEXT: vmv.x.s a2, v8 5183; RV64ZVE32F-NEXT: slli a2, a2, 3 5184; RV64ZVE32F-NEXT: add a1, a1, a2 5185; RV64ZVE32F-NEXT: ld a1, 0(a1) 5186; RV64ZVE32F-NEXT: .LBB51_19: # %else20 5187; RV64ZVE32F-NEXT: sd a3, 0(a0) 5188; RV64ZVE32F-NEXT: sd a4, 8(a0) 5189; RV64ZVE32F-NEXT: sd a6, 16(a0) 5190; RV64ZVE32F-NEXT: sd a7, 24(a0) 5191; RV64ZVE32F-NEXT: sd t0, 32(a0) 5192; RV64ZVE32F-NEXT: sd t1, 40(a0) 5193; RV64ZVE32F-NEXT: sd t2, 48(a0) 5194; RV64ZVE32F-NEXT: sd a1, 56(a0) 5195; RV64ZVE32F-NEXT: ret 5196 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs 5197 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru) 5198 ret <8 x i64> %v 5199} 5200 5201define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) { 5202; RV32V-LABEL: mgather_baseidx_sext_v8i16_v8i64: 5203; RV32V: # %bb.0: 5204; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 5205; RV32V-NEXT: vsext.vf2 v10, v8 5206; RV32V-NEXT: vsll.vi v8, v10, 3 5207; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 5208; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 5209; RV32V-NEXT: vmv.v.v v8, v12 5210; RV32V-NEXT: ret 5211; 5212; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8i64: 5213; RV64V: # %bb.0: 5214; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 5215; RV64V-NEXT: vsext.vf4 v16, v8 5216; RV64V-NEXT: vsll.vi v8, v16, 3 5217; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t 5218; RV64V-NEXT: vmv.v.v v8, v12 5219; RV64V-NEXT: ret 5220; 5221; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i64: 5222; RV32ZVE32F: # %bb.0: 5223; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 5224; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 5225; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 5226; RV32ZVE32F-NEXT: vmv.x.s t0, v0 5227; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 5228; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 5229; RV32ZVE32F-NEXT: andi a3, t0, 1 5230; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 5231; RV32ZVE32F-NEXT: beqz a3, .LBB52_7 5232; RV32ZVE32F-NEXT: # %bb.1: # %cond.load 5233; RV32ZVE32F-NEXT: vmv.x.s a3, v8 5234; RV32ZVE32F-NEXT: lw a1, 0(a3) 5235; RV32ZVE32F-NEXT: lw a3, 4(a3) 5236; RV32ZVE32F-NEXT: andi a4, t0, 2 5237; RV32ZVE32F-NEXT: bnez a4, .LBB52_8 5238; RV32ZVE32F-NEXT: .LBB52_2: 5239; RV32ZVE32F-NEXT: lw a4, 8(a2) 5240; RV32ZVE32F-NEXT: lw a5, 12(a2) 5241; RV32ZVE32F-NEXT: andi a6, t0, 4 5242; RV32ZVE32F-NEXT: bnez a6, .LBB52_9 5243; RV32ZVE32F-NEXT: .LBB52_3: 5244; RV32ZVE32F-NEXT: lw a6, 16(a2) 5245; RV32ZVE32F-NEXT: lw a7, 20(a2) 5246; RV32ZVE32F-NEXT: andi t1, t0, 8 5247; RV32ZVE32F-NEXT: bnez t1, .LBB52_10 5248; RV32ZVE32F-NEXT: .LBB52_4: 5249; RV32ZVE32F-NEXT: lw t1, 24(a2) 5250; RV32ZVE32F-NEXT: lw t2, 28(a2) 5251; RV32ZVE32F-NEXT: andi t3, t0, 16 5252; RV32ZVE32F-NEXT: bnez t3, .LBB52_11 5253; RV32ZVE32F-NEXT: .LBB52_5: 5254; RV32ZVE32F-NEXT: lw t3, 32(a2) 5255; RV32ZVE32F-NEXT: lw t4, 36(a2) 5256; RV32ZVE32F-NEXT: andi t5, t0, 32 5257; RV32ZVE32F-NEXT: bnez t5, .LBB52_12 5258; RV32ZVE32F-NEXT: .LBB52_6: 5259; RV32ZVE32F-NEXT: lw t5, 40(a2) 5260; RV32ZVE32F-NEXT: lw t6, 44(a2) 5261; RV32ZVE32F-NEXT: j .LBB52_13 5262; RV32ZVE32F-NEXT: .LBB52_7: 5263; RV32ZVE32F-NEXT: lw a1, 0(a2) 5264; RV32ZVE32F-NEXT: lw a3, 4(a2) 5265; RV32ZVE32F-NEXT: andi a4, t0, 2 5266; RV32ZVE32F-NEXT: beqz a4, .LBB52_2 5267; RV32ZVE32F-NEXT: .LBB52_8: # %cond.load1 5268; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5269; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 5270; RV32ZVE32F-NEXT: vmv.x.s a5, v10 5271; RV32ZVE32F-NEXT: lw a4, 0(a5) 5272; RV32ZVE32F-NEXT: lw a5, 4(a5) 5273; RV32ZVE32F-NEXT: andi a6, t0, 4 5274; RV32ZVE32F-NEXT: beqz a6, .LBB52_3 5275; RV32ZVE32F-NEXT: .LBB52_9: # %cond.load4 5276; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5277; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 5278; RV32ZVE32F-NEXT: vmv.x.s a7, v10 5279; RV32ZVE32F-NEXT: lw a6, 0(a7) 5280; RV32ZVE32F-NEXT: lw a7, 4(a7) 5281; RV32ZVE32F-NEXT: andi t1, t0, 8 5282; RV32ZVE32F-NEXT: beqz t1, .LBB52_4 5283; RV32ZVE32F-NEXT: .LBB52_10: # %cond.load7 5284; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5285; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 5286; RV32ZVE32F-NEXT: vmv.x.s t2, v10 5287; RV32ZVE32F-NEXT: lw t1, 0(t2) 5288; RV32ZVE32F-NEXT: lw t2, 4(t2) 5289; RV32ZVE32F-NEXT: andi t3, t0, 16 5290; RV32ZVE32F-NEXT: beqz t3, .LBB52_5 5291; RV32ZVE32F-NEXT: .LBB52_11: # %cond.load10 5292; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5293; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 5294; RV32ZVE32F-NEXT: vmv.x.s t4, v10 5295; RV32ZVE32F-NEXT: lw t3, 0(t4) 5296; RV32ZVE32F-NEXT: lw t4, 4(t4) 5297; RV32ZVE32F-NEXT: andi t5, t0, 32 5298; RV32ZVE32F-NEXT: beqz t5, .LBB52_6 5299; RV32ZVE32F-NEXT: .LBB52_12: # %cond.load13 5300; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5301; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 5302; RV32ZVE32F-NEXT: vmv.x.s t6, v10 5303; RV32ZVE32F-NEXT: lw t5, 0(t6) 5304; RV32ZVE32F-NEXT: lw t6, 4(t6) 5305; RV32ZVE32F-NEXT: .LBB52_13: # %else14 5306; RV32ZVE32F-NEXT: addi sp, sp, -16 5307; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 5308; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 5309; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 5310; RV32ZVE32F-NEXT: .cfi_offset s0, -4 5311; RV32ZVE32F-NEXT: .cfi_offset s1, -8 5312; RV32ZVE32F-NEXT: andi s0, t0, 64 5313; RV32ZVE32F-NEXT: beqz s0, .LBB52_16 5314; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 5315; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5316; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 5317; RV32ZVE32F-NEXT: vmv.x.s s1, v10 5318; RV32ZVE32F-NEXT: lw s0, 0(s1) 5319; RV32ZVE32F-NEXT: lw s1, 4(s1) 5320; RV32ZVE32F-NEXT: andi t0, t0, -128 5321; RV32ZVE32F-NEXT: bnez t0, .LBB52_17 5322; RV32ZVE32F-NEXT: .LBB52_15: 5323; RV32ZVE32F-NEXT: lw t0, 56(a2) 5324; RV32ZVE32F-NEXT: lw a2, 60(a2) 5325; RV32ZVE32F-NEXT: j .LBB52_18 5326; RV32ZVE32F-NEXT: .LBB52_16: 5327; RV32ZVE32F-NEXT: lw s0, 48(a2) 5328; RV32ZVE32F-NEXT: lw s1, 52(a2) 5329; RV32ZVE32F-NEXT: andi t0, t0, -128 5330; RV32ZVE32F-NEXT: beqz t0, .LBB52_15 5331; RV32ZVE32F-NEXT: .LBB52_17: # %cond.load19 5332; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5333; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 5334; RV32ZVE32F-NEXT: vmv.x.s a2, v8 5335; RV32ZVE32F-NEXT: lw t0, 0(a2) 5336; RV32ZVE32F-NEXT: lw a2, 4(a2) 5337; RV32ZVE32F-NEXT: .LBB52_18: # %else20 5338; RV32ZVE32F-NEXT: sw a1, 0(a0) 5339; RV32ZVE32F-NEXT: sw a3, 4(a0) 5340; RV32ZVE32F-NEXT: sw a4, 8(a0) 5341; RV32ZVE32F-NEXT: sw a5, 12(a0) 5342; RV32ZVE32F-NEXT: sw a6, 16(a0) 5343; RV32ZVE32F-NEXT: sw a7, 20(a0) 5344; RV32ZVE32F-NEXT: sw t1, 24(a0) 5345; RV32ZVE32F-NEXT: sw t2, 28(a0) 5346; RV32ZVE32F-NEXT: sw t3, 32(a0) 5347; RV32ZVE32F-NEXT: sw t4, 36(a0) 5348; RV32ZVE32F-NEXT: sw t5, 40(a0) 5349; RV32ZVE32F-NEXT: sw t6, 44(a0) 5350; RV32ZVE32F-NEXT: sw s0, 48(a0) 5351; RV32ZVE32F-NEXT: sw s1, 52(a0) 5352; RV32ZVE32F-NEXT: sw t0, 56(a0) 5353; RV32ZVE32F-NEXT: sw a2, 60(a0) 5354; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 5355; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 5356; RV32ZVE32F-NEXT: .cfi_restore s0 5357; RV32ZVE32F-NEXT: .cfi_restore s1 5358; RV32ZVE32F-NEXT: addi sp, sp, 16 5359; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 5360; RV32ZVE32F-NEXT: ret 5361; 5362; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i64: 5363; RV64ZVE32F: # %bb.0: 5364; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 5365; RV64ZVE32F-NEXT: vmv.x.s a5, v0 5366; RV64ZVE32F-NEXT: andi a3, a5, 1 5367; RV64ZVE32F-NEXT: beqz a3, .LBB52_3 5368; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 5369; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 5370; RV64ZVE32F-NEXT: vmv.x.s a3, v8 5371; RV64ZVE32F-NEXT: slli a3, a3, 3 5372; RV64ZVE32F-NEXT: add a3, a1, a3 5373; RV64ZVE32F-NEXT: ld a3, 0(a3) 5374; RV64ZVE32F-NEXT: andi a4, a5, 2 5375; RV64ZVE32F-NEXT: bnez a4, .LBB52_4 5376; RV64ZVE32F-NEXT: .LBB52_2: 5377; RV64ZVE32F-NEXT: ld a4, 8(a2) 5378; RV64ZVE32F-NEXT: j .LBB52_5 5379; RV64ZVE32F-NEXT: .LBB52_3: 5380; RV64ZVE32F-NEXT: ld a3, 0(a2) 5381; RV64ZVE32F-NEXT: andi a4, a5, 2 5382; RV64ZVE32F-NEXT: beqz a4, .LBB52_2 5383; RV64ZVE32F-NEXT: .LBB52_4: # %cond.load1 5384; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 5385; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 5386; RV64ZVE32F-NEXT: vmv.x.s a4, v9 5387; RV64ZVE32F-NEXT: slli a4, a4, 3 5388; RV64ZVE32F-NEXT: add a4, a1, a4 5389; RV64ZVE32F-NEXT: ld a4, 0(a4) 5390; RV64ZVE32F-NEXT: .LBB52_5: # %else2 5391; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 5392; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 5393; RV64ZVE32F-NEXT: andi a6, a5, 4 5394; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 5395; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 5396; RV64ZVE32F-NEXT: beqz a6, .LBB52_10 5397; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 5398; RV64ZVE32F-NEXT: vmv.x.s a6, v8 5399; RV64ZVE32F-NEXT: slli a6, a6, 3 5400; RV64ZVE32F-NEXT: add a6, a1, a6 5401; RV64ZVE32F-NEXT: ld a6, 0(a6) 5402; RV64ZVE32F-NEXT: andi a7, a5, 8 5403; RV64ZVE32F-NEXT: bnez a7, .LBB52_11 5404; RV64ZVE32F-NEXT: .LBB52_7: 5405; RV64ZVE32F-NEXT: ld a7, 24(a2) 5406; RV64ZVE32F-NEXT: andi t0, a5, 16 5407; RV64ZVE32F-NEXT: bnez t0, .LBB52_12 5408; RV64ZVE32F-NEXT: .LBB52_8: 5409; RV64ZVE32F-NEXT: ld t0, 32(a2) 5410; RV64ZVE32F-NEXT: andi t1, a5, 32 5411; RV64ZVE32F-NEXT: bnez t1, .LBB52_13 5412; RV64ZVE32F-NEXT: .LBB52_9: 5413; RV64ZVE32F-NEXT: ld t1, 40(a2) 5414; RV64ZVE32F-NEXT: j .LBB52_14 5415; RV64ZVE32F-NEXT: .LBB52_10: 5416; RV64ZVE32F-NEXT: ld a6, 16(a2) 5417; RV64ZVE32F-NEXT: andi a7, a5, 8 5418; RV64ZVE32F-NEXT: beqz a7, .LBB52_7 5419; RV64ZVE32F-NEXT: .LBB52_11: # %cond.load7 5420; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 5421; RV64ZVE32F-NEXT: vmv.x.s a7, v8 5422; RV64ZVE32F-NEXT: slli a7, a7, 3 5423; RV64ZVE32F-NEXT: add a7, a1, a7 5424; RV64ZVE32F-NEXT: ld a7, 0(a7) 5425; RV64ZVE32F-NEXT: andi t0, a5, 16 5426; RV64ZVE32F-NEXT: beqz t0, .LBB52_8 5427; RV64ZVE32F-NEXT: .LBB52_12: # %cond.load10 5428; RV64ZVE32F-NEXT: vmv.x.s t0, v9 5429; RV64ZVE32F-NEXT: slli t0, t0, 3 5430; RV64ZVE32F-NEXT: add t0, a1, t0 5431; RV64ZVE32F-NEXT: ld t0, 0(t0) 5432; RV64ZVE32F-NEXT: andi t1, a5, 32 5433; RV64ZVE32F-NEXT: beqz t1, .LBB52_9 5434; RV64ZVE32F-NEXT: .LBB52_13: # %cond.load13 5435; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 5436; RV64ZVE32F-NEXT: vmv.x.s t1, v8 5437; RV64ZVE32F-NEXT: slli t1, t1, 3 5438; RV64ZVE32F-NEXT: add t1, a1, t1 5439; RV64ZVE32F-NEXT: ld t1, 0(t1) 5440; RV64ZVE32F-NEXT: .LBB52_14: # %else14 5441; RV64ZVE32F-NEXT: andi t2, a5, 64 5442; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 5443; RV64ZVE32F-NEXT: beqz t2, .LBB52_17 5444; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 5445; RV64ZVE32F-NEXT: vmv.x.s t2, v8 5446; RV64ZVE32F-NEXT: slli t2, t2, 3 5447; RV64ZVE32F-NEXT: add t2, a1, t2 5448; RV64ZVE32F-NEXT: ld t2, 0(t2) 5449; RV64ZVE32F-NEXT: andi a5, a5, -128 5450; RV64ZVE32F-NEXT: bnez a5, .LBB52_18 5451; RV64ZVE32F-NEXT: .LBB52_16: 5452; RV64ZVE32F-NEXT: ld a1, 56(a2) 5453; RV64ZVE32F-NEXT: j .LBB52_19 5454; RV64ZVE32F-NEXT: .LBB52_17: 5455; RV64ZVE32F-NEXT: ld t2, 48(a2) 5456; RV64ZVE32F-NEXT: andi a5, a5, -128 5457; RV64ZVE32F-NEXT: beqz a5, .LBB52_16 5458; RV64ZVE32F-NEXT: .LBB52_18: # %cond.load19 5459; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 5460; RV64ZVE32F-NEXT: vmv.x.s a2, v8 5461; RV64ZVE32F-NEXT: slli a2, a2, 3 5462; RV64ZVE32F-NEXT: add a1, a1, a2 5463; RV64ZVE32F-NEXT: ld a1, 0(a1) 5464; RV64ZVE32F-NEXT: .LBB52_19: # %else20 5465; RV64ZVE32F-NEXT: sd a3, 0(a0) 5466; RV64ZVE32F-NEXT: sd a4, 8(a0) 5467; RV64ZVE32F-NEXT: sd a6, 16(a0) 5468; RV64ZVE32F-NEXT: sd a7, 24(a0) 5469; RV64ZVE32F-NEXT: sd t0, 32(a0) 5470; RV64ZVE32F-NEXT: sd t1, 40(a0) 5471; RV64ZVE32F-NEXT: sd t2, 48(a0) 5472; RV64ZVE32F-NEXT: sd a1, 56(a0) 5473; RV64ZVE32F-NEXT: ret 5474 %eidxs = sext <8 x i16> %idxs to <8 x i64> 5475 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 5476 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru) 5477 ret <8 x i64> %v 5478} 5479 5480define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) { 5481; RV32V-LABEL: mgather_baseidx_zext_v8i16_v8i64: 5482; RV32V: # %bb.0: 5483; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 5484; RV32V-NEXT: vzext.vf2 v10, v8 5485; RV32V-NEXT: vsll.vi v8, v10, 3 5486; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 5487; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 5488; RV32V-NEXT: vmv.v.v v8, v12 5489; RV32V-NEXT: ret 5490; 5491; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i64: 5492; RV64V: # %bb.0: 5493; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 5494; RV64V-NEXT: vzext.vf2 v10, v8 5495; RV64V-NEXT: vsll.vi v8, v10, 3 5496; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 5497; RV64V-NEXT: vluxei32.v v12, (a0), v8, v0.t 5498; RV64V-NEXT: vmv.v.v v8, v12 5499; RV64V-NEXT: ret 5500; 5501; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i64: 5502; RV32ZVE32F: # %bb.0: 5503; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 5504; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 5505; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 5506; RV32ZVE32F-NEXT: vmv.x.s t0, v0 5507; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 5508; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 5509; RV32ZVE32F-NEXT: andi a3, t0, 1 5510; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 5511; RV32ZVE32F-NEXT: beqz a3, .LBB53_7 5512; RV32ZVE32F-NEXT: # %bb.1: # %cond.load 5513; RV32ZVE32F-NEXT: vmv.x.s a3, v8 5514; RV32ZVE32F-NEXT: lw a1, 0(a3) 5515; RV32ZVE32F-NEXT: lw a3, 4(a3) 5516; RV32ZVE32F-NEXT: andi a4, t0, 2 5517; RV32ZVE32F-NEXT: bnez a4, .LBB53_8 5518; RV32ZVE32F-NEXT: .LBB53_2: 5519; RV32ZVE32F-NEXT: lw a4, 8(a2) 5520; RV32ZVE32F-NEXT: lw a5, 12(a2) 5521; RV32ZVE32F-NEXT: andi a6, t0, 4 5522; RV32ZVE32F-NEXT: bnez a6, .LBB53_9 5523; RV32ZVE32F-NEXT: .LBB53_3: 5524; RV32ZVE32F-NEXT: lw a6, 16(a2) 5525; RV32ZVE32F-NEXT: lw a7, 20(a2) 5526; RV32ZVE32F-NEXT: andi t1, t0, 8 5527; RV32ZVE32F-NEXT: bnez t1, .LBB53_10 5528; RV32ZVE32F-NEXT: .LBB53_4: 5529; RV32ZVE32F-NEXT: lw t1, 24(a2) 5530; RV32ZVE32F-NEXT: lw t2, 28(a2) 5531; RV32ZVE32F-NEXT: andi t3, t0, 16 5532; RV32ZVE32F-NEXT: bnez t3, .LBB53_11 5533; RV32ZVE32F-NEXT: .LBB53_5: 5534; RV32ZVE32F-NEXT: lw t3, 32(a2) 5535; RV32ZVE32F-NEXT: lw t4, 36(a2) 5536; RV32ZVE32F-NEXT: andi t5, t0, 32 5537; RV32ZVE32F-NEXT: bnez t5, .LBB53_12 5538; RV32ZVE32F-NEXT: .LBB53_6: 5539; RV32ZVE32F-NEXT: lw t5, 40(a2) 5540; RV32ZVE32F-NEXT: lw t6, 44(a2) 5541; RV32ZVE32F-NEXT: j .LBB53_13 5542; RV32ZVE32F-NEXT: .LBB53_7: 5543; RV32ZVE32F-NEXT: lw a1, 0(a2) 5544; RV32ZVE32F-NEXT: lw a3, 4(a2) 5545; RV32ZVE32F-NEXT: andi a4, t0, 2 5546; RV32ZVE32F-NEXT: beqz a4, .LBB53_2 5547; RV32ZVE32F-NEXT: .LBB53_8: # %cond.load1 5548; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5549; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 5550; RV32ZVE32F-NEXT: vmv.x.s a5, v10 5551; RV32ZVE32F-NEXT: lw a4, 0(a5) 5552; RV32ZVE32F-NEXT: lw a5, 4(a5) 5553; RV32ZVE32F-NEXT: andi a6, t0, 4 5554; RV32ZVE32F-NEXT: beqz a6, .LBB53_3 5555; RV32ZVE32F-NEXT: .LBB53_9: # %cond.load4 5556; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5557; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 5558; RV32ZVE32F-NEXT: vmv.x.s a7, v10 5559; RV32ZVE32F-NEXT: lw a6, 0(a7) 5560; RV32ZVE32F-NEXT: lw a7, 4(a7) 5561; RV32ZVE32F-NEXT: andi t1, t0, 8 5562; RV32ZVE32F-NEXT: beqz t1, .LBB53_4 5563; RV32ZVE32F-NEXT: .LBB53_10: # %cond.load7 5564; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5565; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 5566; RV32ZVE32F-NEXT: vmv.x.s t2, v10 5567; RV32ZVE32F-NEXT: lw t1, 0(t2) 5568; RV32ZVE32F-NEXT: lw t2, 4(t2) 5569; RV32ZVE32F-NEXT: andi t3, t0, 16 5570; RV32ZVE32F-NEXT: beqz t3, .LBB53_5 5571; RV32ZVE32F-NEXT: .LBB53_11: # %cond.load10 5572; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5573; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 5574; RV32ZVE32F-NEXT: vmv.x.s t4, v10 5575; RV32ZVE32F-NEXT: lw t3, 0(t4) 5576; RV32ZVE32F-NEXT: lw t4, 4(t4) 5577; RV32ZVE32F-NEXT: andi t5, t0, 32 5578; RV32ZVE32F-NEXT: beqz t5, .LBB53_6 5579; RV32ZVE32F-NEXT: .LBB53_12: # %cond.load13 5580; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5581; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 5582; RV32ZVE32F-NEXT: vmv.x.s t6, v10 5583; RV32ZVE32F-NEXT: lw t5, 0(t6) 5584; RV32ZVE32F-NEXT: lw t6, 4(t6) 5585; RV32ZVE32F-NEXT: .LBB53_13: # %else14 5586; RV32ZVE32F-NEXT: addi sp, sp, -16 5587; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 5588; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 5589; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 5590; RV32ZVE32F-NEXT: .cfi_offset s0, -4 5591; RV32ZVE32F-NEXT: .cfi_offset s1, -8 5592; RV32ZVE32F-NEXT: andi s0, t0, 64 5593; RV32ZVE32F-NEXT: beqz s0, .LBB53_16 5594; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 5595; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5596; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 5597; RV32ZVE32F-NEXT: vmv.x.s s1, v10 5598; RV32ZVE32F-NEXT: lw s0, 0(s1) 5599; RV32ZVE32F-NEXT: lw s1, 4(s1) 5600; RV32ZVE32F-NEXT: andi t0, t0, -128 5601; RV32ZVE32F-NEXT: bnez t0, .LBB53_17 5602; RV32ZVE32F-NEXT: .LBB53_15: 5603; RV32ZVE32F-NEXT: lw t0, 56(a2) 5604; RV32ZVE32F-NEXT: lw a2, 60(a2) 5605; RV32ZVE32F-NEXT: j .LBB53_18 5606; RV32ZVE32F-NEXT: .LBB53_16: 5607; RV32ZVE32F-NEXT: lw s0, 48(a2) 5608; RV32ZVE32F-NEXT: lw s1, 52(a2) 5609; RV32ZVE32F-NEXT: andi t0, t0, -128 5610; RV32ZVE32F-NEXT: beqz t0, .LBB53_15 5611; RV32ZVE32F-NEXT: .LBB53_17: # %cond.load19 5612; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5613; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 5614; RV32ZVE32F-NEXT: vmv.x.s a2, v8 5615; RV32ZVE32F-NEXT: lw t0, 0(a2) 5616; RV32ZVE32F-NEXT: lw a2, 4(a2) 5617; RV32ZVE32F-NEXT: .LBB53_18: # %else20 5618; RV32ZVE32F-NEXT: sw a1, 0(a0) 5619; RV32ZVE32F-NEXT: sw a3, 4(a0) 5620; RV32ZVE32F-NEXT: sw a4, 8(a0) 5621; RV32ZVE32F-NEXT: sw a5, 12(a0) 5622; RV32ZVE32F-NEXT: sw a6, 16(a0) 5623; RV32ZVE32F-NEXT: sw a7, 20(a0) 5624; RV32ZVE32F-NEXT: sw t1, 24(a0) 5625; RV32ZVE32F-NEXT: sw t2, 28(a0) 5626; RV32ZVE32F-NEXT: sw t3, 32(a0) 5627; RV32ZVE32F-NEXT: sw t4, 36(a0) 5628; RV32ZVE32F-NEXT: sw t5, 40(a0) 5629; RV32ZVE32F-NEXT: sw t6, 44(a0) 5630; RV32ZVE32F-NEXT: sw s0, 48(a0) 5631; RV32ZVE32F-NEXT: sw s1, 52(a0) 5632; RV32ZVE32F-NEXT: sw t0, 56(a0) 5633; RV32ZVE32F-NEXT: sw a2, 60(a0) 5634; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 5635; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 5636; RV32ZVE32F-NEXT: .cfi_restore s0 5637; RV32ZVE32F-NEXT: .cfi_restore s1 5638; RV32ZVE32F-NEXT: addi sp, sp, 16 5639; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 5640; RV32ZVE32F-NEXT: ret 5641; 5642; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i64: 5643; RV64ZVE32F: # %bb.0: 5644; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 5645; RV64ZVE32F-NEXT: vmv.x.s a5, v0 5646; RV64ZVE32F-NEXT: andi a3, a5, 1 5647; RV64ZVE32F-NEXT: beqz a3, .LBB53_3 5648; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 5649; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 5650; RV64ZVE32F-NEXT: vmv.x.s a3, v8 5651; RV64ZVE32F-NEXT: slli a3, a3, 48 5652; RV64ZVE32F-NEXT: srli a3, a3, 45 5653; RV64ZVE32F-NEXT: add a3, a1, a3 5654; RV64ZVE32F-NEXT: ld a3, 0(a3) 5655; RV64ZVE32F-NEXT: andi a4, a5, 2 5656; RV64ZVE32F-NEXT: bnez a4, .LBB53_4 5657; RV64ZVE32F-NEXT: .LBB53_2: 5658; RV64ZVE32F-NEXT: ld a4, 8(a2) 5659; RV64ZVE32F-NEXT: j .LBB53_5 5660; RV64ZVE32F-NEXT: .LBB53_3: 5661; RV64ZVE32F-NEXT: ld a3, 0(a2) 5662; RV64ZVE32F-NEXT: andi a4, a5, 2 5663; RV64ZVE32F-NEXT: beqz a4, .LBB53_2 5664; RV64ZVE32F-NEXT: .LBB53_4: # %cond.load1 5665; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 5666; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 5667; RV64ZVE32F-NEXT: vmv.x.s a4, v9 5668; RV64ZVE32F-NEXT: slli a4, a4, 48 5669; RV64ZVE32F-NEXT: srli a4, a4, 45 5670; RV64ZVE32F-NEXT: add a4, a1, a4 5671; RV64ZVE32F-NEXT: ld a4, 0(a4) 5672; RV64ZVE32F-NEXT: .LBB53_5: # %else2 5673; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 5674; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 5675; RV64ZVE32F-NEXT: andi a6, a5, 4 5676; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 5677; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 5678; RV64ZVE32F-NEXT: beqz a6, .LBB53_10 5679; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 5680; RV64ZVE32F-NEXT: vmv.x.s a6, v8 5681; RV64ZVE32F-NEXT: slli a6, a6, 48 5682; RV64ZVE32F-NEXT: srli a6, a6, 45 5683; RV64ZVE32F-NEXT: add a6, a1, a6 5684; RV64ZVE32F-NEXT: ld a6, 0(a6) 5685; RV64ZVE32F-NEXT: andi a7, a5, 8 5686; RV64ZVE32F-NEXT: bnez a7, .LBB53_11 5687; RV64ZVE32F-NEXT: .LBB53_7: 5688; RV64ZVE32F-NEXT: ld a7, 24(a2) 5689; RV64ZVE32F-NEXT: andi t0, a5, 16 5690; RV64ZVE32F-NEXT: bnez t0, .LBB53_12 5691; RV64ZVE32F-NEXT: .LBB53_8: 5692; RV64ZVE32F-NEXT: ld t0, 32(a2) 5693; RV64ZVE32F-NEXT: andi t1, a5, 32 5694; RV64ZVE32F-NEXT: bnez t1, .LBB53_13 5695; RV64ZVE32F-NEXT: .LBB53_9: 5696; RV64ZVE32F-NEXT: ld t1, 40(a2) 5697; RV64ZVE32F-NEXT: j .LBB53_14 5698; RV64ZVE32F-NEXT: .LBB53_10: 5699; RV64ZVE32F-NEXT: ld a6, 16(a2) 5700; RV64ZVE32F-NEXT: andi a7, a5, 8 5701; RV64ZVE32F-NEXT: beqz a7, .LBB53_7 5702; RV64ZVE32F-NEXT: .LBB53_11: # %cond.load7 5703; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 5704; RV64ZVE32F-NEXT: vmv.x.s a7, v8 5705; RV64ZVE32F-NEXT: slli a7, a7, 48 5706; RV64ZVE32F-NEXT: srli a7, a7, 45 5707; RV64ZVE32F-NEXT: add a7, a1, a7 5708; RV64ZVE32F-NEXT: ld a7, 0(a7) 5709; RV64ZVE32F-NEXT: andi t0, a5, 16 5710; RV64ZVE32F-NEXT: beqz t0, .LBB53_8 5711; RV64ZVE32F-NEXT: .LBB53_12: # %cond.load10 5712; RV64ZVE32F-NEXT: vmv.x.s t0, v9 5713; RV64ZVE32F-NEXT: slli t0, t0, 48 5714; RV64ZVE32F-NEXT: srli t0, t0, 45 5715; RV64ZVE32F-NEXT: add t0, a1, t0 5716; RV64ZVE32F-NEXT: ld t0, 0(t0) 5717; RV64ZVE32F-NEXT: andi t1, a5, 32 5718; RV64ZVE32F-NEXT: beqz t1, .LBB53_9 5719; RV64ZVE32F-NEXT: .LBB53_13: # %cond.load13 5720; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 5721; RV64ZVE32F-NEXT: vmv.x.s t1, v8 5722; RV64ZVE32F-NEXT: slli t1, t1, 48 5723; RV64ZVE32F-NEXT: srli t1, t1, 45 5724; RV64ZVE32F-NEXT: add t1, a1, t1 5725; RV64ZVE32F-NEXT: ld t1, 0(t1) 5726; RV64ZVE32F-NEXT: .LBB53_14: # %else14 5727; RV64ZVE32F-NEXT: andi t2, a5, 64 5728; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 5729; RV64ZVE32F-NEXT: beqz t2, .LBB53_17 5730; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 5731; RV64ZVE32F-NEXT: vmv.x.s t2, v8 5732; RV64ZVE32F-NEXT: slli t2, t2, 48 5733; RV64ZVE32F-NEXT: srli t2, t2, 45 5734; RV64ZVE32F-NEXT: add t2, a1, t2 5735; RV64ZVE32F-NEXT: ld t2, 0(t2) 5736; RV64ZVE32F-NEXT: andi a5, a5, -128 5737; RV64ZVE32F-NEXT: bnez a5, .LBB53_18 5738; RV64ZVE32F-NEXT: .LBB53_16: 5739; RV64ZVE32F-NEXT: ld a1, 56(a2) 5740; RV64ZVE32F-NEXT: j .LBB53_19 5741; RV64ZVE32F-NEXT: .LBB53_17: 5742; RV64ZVE32F-NEXT: ld t2, 48(a2) 5743; RV64ZVE32F-NEXT: andi a5, a5, -128 5744; RV64ZVE32F-NEXT: beqz a5, .LBB53_16 5745; RV64ZVE32F-NEXT: .LBB53_18: # %cond.load19 5746; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 5747; RV64ZVE32F-NEXT: vmv.x.s a2, v8 5748; RV64ZVE32F-NEXT: slli a2, a2, 48 5749; RV64ZVE32F-NEXT: srli a2, a2, 45 5750; RV64ZVE32F-NEXT: add a1, a1, a2 5751; RV64ZVE32F-NEXT: ld a1, 0(a1) 5752; RV64ZVE32F-NEXT: .LBB53_19: # %else20 5753; RV64ZVE32F-NEXT: sd a3, 0(a0) 5754; RV64ZVE32F-NEXT: sd a4, 8(a0) 5755; RV64ZVE32F-NEXT: sd a6, 16(a0) 5756; RV64ZVE32F-NEXT: sd a7, 24(a0) 5757; RV64ZVE32F-NEXT: sd t0, 32(a0) 5758; RV64ZVE32F-NEXT: sd t1, 40(a0) 5759; RV64ZVE32F-NEXT: sd t2, 48(a0) 5760; RV64ZVE32F-NEXT: sd a1, 56(a0) 5761; RV64ZVE32F-NEXT: ret 5762 %eidxs = zext <8 x i16> %idxs to <8 x i64> 5763 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 5764 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru) 5765 ret <8 x i64> %v 5766} 5767 5768define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) { 5769; RV32V-LABEL: mgather_baseidx_v8i32_v8i64: 5770; RV32V: # %bb.0: 5771; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 5772; RV32V-NEXT: vsll.vi v8, v8, 3 5773; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 5774; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 5775; RV32V-NEXT: vmv.v.v v8, v12 5776; RV32V-NEXT: ret 5777; 5778; RV64V-LABEL: mgather_baseidx_v8i32_v8i64: 5779; RV64V: # %bb.0: 5780; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 5781; RV64V-NEXT: vsext.vf2 v16, v8 5782; RV64V-NEXT: vsll.vi v8, v16, 3 5783; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t 5784; RV64V-NEXT: vmv.v.v v8, v12 5785; RV64V-NEXT: ret 5786; 5787; RV32ZVE32F-LABEL: mgather_baseidx_v8i32_v8i64: 5788; RV32ZVE32F: # %bb.0: 5789; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 5790; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 5791; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 5792; RV32ZVE32F-NEXT: vmv.x.s t0, v0 5793; RV32ZVE32F-NEXT: andi a3, t0, 1 5794; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 5795; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 5796; RV32ZVE32F-NEXT: beqz a3, .LBB54_7 5797; RV32ZVE32F-NEXT: # %bb.1: # %cond.load 5798; RV32ZVE32F-NEXT: vmv.x.s a3, v8 5799; RV32ZVE32F-NEXT: lw a1, 0(a3) 5800; RV32ZVE32F-NEXT: lw a3, 4(a3) 5801; RV32ZVE32F-NEXT: andi a4, t0, 2 5802; RV32ZVE32F-NEXT: bnez a4, .LBB54_8 5803; RV32ZVE32F-NEXT: .LBB54_2: 5804; RV32ZVE32F-NEXT: lw a4, 8(a2) 5805; RV32ZVE32F-NEXT: lw a5, 12(a2) 5806; RV32ZVE32F-NEXT: andi a6, t0, 4 5807; RV32ZVE32F-NEXT: bnez a6, .LBB54_9 5808; RV32ZVE32F-NEXT: .LBB54_3: 5809; RV32ZVE32F-NEXT: lw a6, 16(a2) 5810; RV32ZVE32F-NEXT: lw a7, 20(a2) 5811; RV32ZVE32F-NEXT: andi t1, t0, 8 5812; RV32ZVE32F-NEXT: bnez t1, .LBB54_10 5813; RV32ZVE32F-NEXT: .LBB54_4: 5814; RV32ZVE32F-NEXT: lw t1, 24(a2) 5815; RV32ZVE32F-NEXT: lw t2, 28(a2) 5816; RV32ZVE32F-NEXT: andi t3, t0, 16 5817; RV32ZVE32F-NEXT: bnez t3, .LBB54_11 5818; RV32ZVE32F-NEXT: .LBB54_5: 5819; RV32ZVE32F-NEXT: lw t3, 32(a2) 5820; RV32ZVE32F-NEXT: lw t4, 36(a2) 5821; RV32ZVE32F-NEXT: andi t5, t0, 32 5822; RV32ZVE32F-NEXT: bnez t5, .LBB54_12 5823; RV32ZVE32F-NEXT: .LBB54_6: 5824; RV32ZVE32F-NEXT: lw t5, 40(a2) 5825; RV32ZVE32F-NEXT: lw t6, 44(a2) 5826; RV32ZVE32F-NEXT: j .LBB54_13 5827; RV32ZVE32F-NEXT: .LBB54_7: 5828; RV32ZVE32F-NEXT: lw a1, 0(a2) 5829; RV32ZVE32F-NEXT: lw a3, 4(a2) 5830; RV32ZVE32F-NEXT: andi a4, t0, 2 5831; RV32ZVE32F-NEXT: beqz a4, .LBB54_2 5832; RV32ZVE32F-NEXT: .LBB54_8: # %cond.load1 5833; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5834; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 5835; RV32ZVE32F-NEXT: vmv.x.s a5, v10 5836; RV32ZVE32F-NEXT: lw a4, 0(a5) 5837; RV32ZVE32F-NEXT: lw a5, 4(a5) 5838; RV32ZVE32F-NEXT: andi a6, t0, 4 5839; RV32ZVE32F-NEXT: beqz a6, .LBB54_3 5840; RV32ZVE32F-NEXT: .LBB54_9: # %cond.load4 5841; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5842; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 5843; RV32ZVE32F-NEXT: vmv.x.s a7, v10 5844; RV32ZVE32F-NEXT: lw a6, 0(a7) 5845; RV32ZVE32F-NEXT: lw a7, 4(a7) 5846; RV32ZVE32F-NEXT: andi t1, t0, 8 5847; RV32ZVE32F-NEXT: beqz t1, .LBB54_4 5848; RV32ZVE32F-NEXT: .LBB54_10: # %cond.load7 5849; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5850; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 5851; RV32ZVE32F-NEXT: vmv.x.s t2, v10 5852; RV32ZVE32F-NEXT: lw t1, 0(t2) 5853; RV32ZVE32F-NEXT: lw t2, 4(t2) 5854; RV32ZVE32F-NEXT: andi t3, t0, 16 5855; RV32ZVE32F-NEXT: beqz t3, .LBB54_5 5856; RV32ZVE32F-NEXT: .LBB54_11: # %cond.load10 5857; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5858; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 5859; RV32ZVE32F-NEXT: vmv.x.s t4, v10 5860; RV32ZVE32F-NEXT: lw t3, 0(t4) 5861; RV32ZVE32F-NEXT: lw t4, 4(t4) 5862; RV32ZVE32F-NEXT: andi t5, t0, 32 5863; RV32ZVE32F-NEXT: beqz t5, .LBB54_6 5864; RV32ZVE32F-NEXT: .LBB54_12: # %cond.load13 5865; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5866; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 5867; RV32ZVE32F-NEXT: vmv.x.s t6, v10 5868; RV32ZVE32F-NEXT: lw t5, 0(t6) 5869; RV32ZVE32F-NEXT: lw t6, 4(t6) 5870; RV32ZVE32F-NEXT: .LBB54_13: # %else14 5871; RV32ZVE32F-NEXT: addi sp, sp, -16 5872; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 5873; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 5874; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 5875; RV32ZVE32F-NEXT: .cfi_offset s0, -4 5876; RV32ZVE32F-NEXT: .cfi_offset s1, -8 5877; RV32ZVE32F-NEXT: andi s0, t0, 64 5878; RV32ZVE32F-NEXT: beqz s0, .LBB54_16 5879; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 5880; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5881; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 5882; RV32ZVE32F-NEXT: vmv.x.s s1, v10 5883; RV32ZVE32F-NEXT: lw s0, 0(s1) 5884; RV32ZVE32F-NEXT: lw s1, 4(s1) 5885; RV32ZVE32F-NEXT: andi t0, t0, -128 5886; RV32ZVE32F-NEXT: bnez t0, .LBB54_17 5887; RV32ZVE32F-NEXT: .LBB54_15: 5888; RV32ZVE32F-NEXT: lw t0, 56(a2) 5889; RV32ZVE32F-NEXT: lw a2, 60(a2) 5890; RV32ZVE32F-NEXT: j .LBB54_18 5891; RV32ZVE32F-NEXT: .LBB54_16: 5892; RV32ZVE32F-NEXT: lw s0, 48(a2) 5893; RV32ZVE32F-NEXT: lw s1, 52(a2) 5894; RV32ZVE32F-NEXT: andi t0, t0, -128 5895; RV32ZVE32F-NEXT: beqz t0, .LBB54_15 5896; RV32ZVE32F-NEXT: .LBB54_17: # %cond.load19 5897; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 5898; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 5899; RV32ZVE32F-NEXT: vmv.x.s a2, v8 5900; RV32ZVE32F-NEXT: lw t0, 0(a2) 5901; RV32ZVE32F-NEXT: lw a2, 4(a2) 5902; RV32ZVE32F-NEXT: .LBB54_18: # %else20 5903; RV32ZVE32F-NEXT: sw a1, 0(a0) 5904; RV32ZVE32F-NEXT: sw a3, 4(a0) 5905; RV32ZVE32F-NEXT: sw a4, 8(a0) 5906; RV32ZVE32F-NEXT: sw a5, 12(a0) 5907; RV32ZVE32F-NEXT: sw a6, 16(a0) 5908; RV32ZVE32F-NEXT: sw a7, 20(a0) 5909; RV32ZVE32F-NEXT: sw t1, 24(a0) 5910; RV32ZVE32F-NEXT: sw t2, 28(a0) 5911; RV32ZVE32F-NEXT: sw t3, 32(a0) 5912; RV32ZVE32F-NEXT: sw t4, 36(a0) 5913; RV32ZVE32F-NEXT: sw t5, 40(a0) 5914; RV32ZVE32F-NEXT: sw t6, 44(a0) 5915; RV32ZVE32F-NEXT: sw s0, 48(a0) 5916; RV32ZVE32F-NEXT: sw s1, 52(a0) 5917; RV32ZVE32F-NEXT: sw t0, 56(a0) 5918; RV32ZVE32F-NEXT: sw a2, 60(a0) 5919; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 5920; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 5921; RV32ZVE32F-NEXT: .cfi_restore s0 5922; RV32ZVE32F-NEXT: .cfi_restore s1 5923; RV32ZVE32F-NEXT: addi sp, sp, 16 5924; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 5925; RV32ZVE32F-NEXT: ret 5926; 5927; RV64ZVE32F-LABEL: mgather_baseidx_v8i32_v8i64: 5928; RV64ZVE32F: # %bb.0: 5929; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 5930; RV64ZVE32F-NEXT: vmv.x.s a5, v0 5931; RV64ZVE32F-NEXT: andi a3, a5, 1 5932; RV64ZVE32F-NEXT: beqz a3, .LBB54_3 5933; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 5934; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 5935; RV64ZVE32F-NEXT: vmv.x.s a3, v8 5936; RV64ZVE32F-NEXT: slli a3, a3, 3 5937; RV64ZVE32F-NEXT: add a3, a1, a3 5938; RV64ZVE32F-NEXT: ld a3, 0(a3) 5939; RV64ZVE32F-NEXT: andi a4, a5, 2 5940; RV64ZVE32F-NEXT: bnez a4, .LBB54_4 5941; RV64ZVE32F-NEXT: .LBB54_2: 5942; RV64ZVE32F-NEXT: ld a4, 8(a2) 5943; RV64ZVE32F-NEXT: j .LBB54_5 5944; RV64ZVE32F-NEXT: .LBB54_3: 5945; RV64ZVE32F-NEXT: ld a3, 0(a2) 5946; RV64ZVE32F-NEXT: andi a4, a5, 2 5947; RV64ZVE32F-NEXT: beqz a4, .LBB54_2 5948; RV64ZVE32F-NEXT: .LBB54_4: # %cond.load1 5949; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 5950; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 5951; RV64ZVE32F-NEXT: vmv.x.s a4, v10 5952; RV64ZVE32F-NEXT: slli a4, a4, 3 5953; RV64ZVE32F-NEXT: add a4, a1, a4 5954; RV64ZVE32F-NEXT: ld a4, 0(a4) 5955; RV64ZVE32F-NEXT: .LBB54_5: # %else2 5956; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma 5957; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 5958; RV64ZVE32F-NEXT: andi a6, a5, 4 5959; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 5960; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 5961; RV64ZVE32F-NEXT: beqz a6, .LBB54_10 5962; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 5963; RV64ZVE32F-NEXT: vmv.x.s a6, v8 5964; RV64ZVE32F-NEXT: slli a6, a6, 3 5965; RV64ZVE32F-NEXT: add a6, a1, a6 5966; RV64ZVE32F-NEXT: ld a6, 0(a6) 5967; RV64ZVE32F-NEXT: andi a7, a5, 8 5968; RV64ZVE32F-NEXT: bnez a7, .LBB54_11 5969; RV64ZVE32F-NEXT: .LBB54_7: 5970; RV64ZVE32F-NEXT: ld a7, 24(a2) 5971; RV64ZVE32F-NEXT: andi t0, a5, 16 5972; RV64ZVE32F-NEXT: bnez t0, .LBB54_12 5973; RV64ZVE32F-NEXT: .LBB54_8: 5974; RV64ZVE32F-NEXT: ld t0, 32(a2) 5975; RV64ZVE32F-NEXT: andi t1, a5, 32 5976; RV64ZVE32F-NEXT: bnez t1, .LBB54_13 5977; RV64ZVE32F-NEXT: .LBB54_9: 5978; RV64ZVE32F-NEXT: ld t1, 40(a2) 5979; RV64ZVE32F-NEXT: j .LBB54_14 5980; RV64ZVE32F-NEXT: .LBB54_10: 5981; RV64ZVE32F-NEXT: ld a6, 16(a2) 5982; RV64ZVE32F-NEXT: andi a7, a5, 8 5983; RV64ZVE32F-NEXT: beqz a7, .LBB54_7 5984; RV64ZVE32F-NEXT: .LBB54_11: # %cond.load7 5985; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 5986; RV64ZVE32F-NEXT: vmv.x.s a7, v8 5987; RV64ZVE32F-NEXT: slli a7, a7, 3 5988; RV64ZVE32F-NEXT: add a7, a1, a7 5989; RV64ZVE32F-NEXT: ld a7, 0(a7) 5990; RV64ZVE32F-NEXT: andi t0, a5, 16 5991; RV64ZVE32F-NEXT: beqz t0, .LBB54_8 5992; RV64ZVE32F-NEXT: .LBB54_12: # %cond.load10 5993; RV64ZVE32F-NEXT: vmv.x.s t0, v10 5994; RV64ZVE32F-NEXT: slli t0, t0, 3 5995; RV64ZVE32F-NEXT: add t0, a1, t0 5996; RV64ZVE32F-NEXT: ld t0, 0(t0) 5997; RV64ZVE32F-NEXT: andi t1, a5, 32 5998; RV64ZVE32F-NEXT: beqz t1, .LBB54_9 5999; RV64ZVE32F-NEXT: .LBB54_13: # %cond.load13 6000; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 6001; RV64ZVE32F-NEXT: vmv.x.s t1, v8 6002; RV64ZVE32F-NEXT: slli t1, t1, 3 6003; RV64ZVE32F-NEXT: add t1, a1, t1 6004; RV64ZVE32F-NEXT: ld t1, 0(t1) 6005; RV64ZVE32F-NEXT: .LBB54_14: # %else14 6006; RV64ZVE32F-NEXT: andi t2, a5, 64 6007; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 6008; RV64ZVE32F-NEXT: beqz t2, .LBB54_17 6009; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 6010; RV64ZVE32F-NEXT: vmv.x.s t2, v8 6011; RV64ZVE32F-NEXT: slli t2, t2, 3 6012; RV64ZVE32F-NEXT: add t2, a1, t2 6013; RV64ZVE32F-NEXT: ld t2, 0(t2) 6014; RV64ZVE32F-NEXT: andi a5, a5, -128 6015; RV64ZVE32F-NEXT: bnez a5, .LBB54_18 6016; RV64ZVE32F-NEXT: .LBB54_16: 6017; RV64ZVE32F-NEXT: ld a1, 56(a2) 6018; RV64ZVE32F-NEXT: j .LBB54_19 6019; RV64ZVE32F-NEXT: .LBB54_17: 6020; RV64ZVE32F-NEXT: ld t2, 48(a2) 6021; RV64ZVE32F-NEXT: andi a5, a5, -128 6022; RV64ZVE32F-NEXT: beqz a5, .LBB54_16 6023; RV64ZVE32F-NEXT: .LBB54_18: # %cond.load19 6024; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 6025; RV64ZVE32F-NEXT: vmv.x.s a2, v8 6026; RV64ZVE32F-NEXT: slli a2, a2, 3 6027; RV64ZVE32F-NEXT: add a1, a1, a2 6028; RV64ZVE32F-NEXT: ld a1, 0(a1) 6029; RV64ZVE32F-NEXT: .LBB54_19: # %else20 6030; RV64ZVE32F-NEXT: sd a3, 0(a0) 6031; RV64ZVE32F-NEXT: sd a4, 8(a0) 6032; RV64ZVE32F-NEXT: sd a6, 16(a0) 6033; RV64ZVE32F-NEXT: sd a7, 24(a0) 6034; RV64ZVE32F-NEXT: sd t0, 32(a0) 6035; RV64ZVE32F-NEXT: sd t1, 40(a0) 6036; RV64ZVE32F-NEXT: sd t2, 48(a0) 6037; RV64ZVE32F-NEXT: sd a1, 56(a0) 6038; RV64ZVE32F-NEXT: ret 6039 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs 6040 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru) 6041 ret <8 x i64> %v 6042} 6043 6044define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) { 6045; RV32V-LABEL: mgather_baseidx_sext_v8i32_v8i64: 6046; RV32V: # %bb.0: 6047; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 6048; RV32V-NEXT: vsll.vi v8, v8, 3 6049; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 6050; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 6051; RV32V-NEXT: vmv.v.v v8, v12 6052; RV32V-NEXT: ret 6053; 6054; RV64V-LABEL: mgather_baseidx_sext_v8i32_v8i64: 6055; RV64V: # %bb.0: 6056; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 6057; RV64V-NEXT: vsext.vf2 v16, v8 6058; RV64V-NEXT: vsll.vi v8, v16, 3 6059; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t 6060; RV64V-NEXT: vmv.v.v v8, v12 6061; RV64V-NEXT: ret 6062; 6063; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8i64: 6064; RV32ZVE32F: # %bb.0: 6065; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 6066; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 6067; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 6068; RV32ZVE32F-NEXT: vmv.x.s t0, v0 6069; RV32ZVE32F-NEXT: andi a3, t0, 1 6070; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 6071; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 6072; RV32ZVE32F-NEXT: beqz a3, .LBB55_7 6073; RV32ZVE32F-NEXT: # %bb.1: # %cond.load 6074; RV32ZVE32F-NEXT: vmv.x.s a3, v8 6075; RV32ZVE32F-NEXT: lw a1, 0(a3) 6076; RV32ZVE32F-NEXT: lw a3, 4(a3) 6077; RV32ZVE32F-NEXT: andi a4, t0, 2 6078; RV32ZVE32F-NEXT: bnez a4, .LBB55_8 6079; RV32ZVE32F-NEXT: .LBB55_2: 6080; RV32ZVE32F-NEXT: lw a4, 8(a2) 6081; RV32ZVE32F-NEXT: lw a5, 12(a2) 6082; RV32ZVE32F-NEXT: andi a6, t0, 4 6083; RV32ZVE32F-NEXT: bnez a6, .LBB55_9 6084; RV32ZVE32F-NEXT: .LBB55_3: 6085; RV32ZVE32F-NEXT: lw a6, 16(a2) 6086; RV32ZVE32F-NEXT: lw a7, 20(a2) 6087; RV32ZVE32F-NEXT: andi t1, t0, 8 6088; RV32ZVE32F-NEXT: bnez t1, .LBB55_10 6089; RV32ZVE32F-NEXT: .LBB55_4: 6090; RV32ZVE32F-NEXT: lw t1, 24(a2) 6091; RV32ZVE32F-NEXT: lw t2, 28(a2) 6092; RV32ZVE32F-NEXT: andi t3, t0, 16 6093; RV32ZVE32F-NEXT: bnez t3, .LBB55_11 6094; RV32ZVE32F-NEXT: .LBB55_5: 6095; RV32ZVE32F-NEXT: lw t3, 32(a2) 6096; RV32ZVE32F-NEXT: lw t4, 36(a2) 6097; RV32ZVE32F-NEXT: andi t5, t0, 32 6098; RV32ZVE32F-NEXT: bnez t5, .LBB55_12 6099; RV32ZVE32F-NEXT: .LBB55_6: 6100; RV32ZVE32F-NEXT: lw t5, 40(a2) 6101; RV32ZVE32F-NEXT: lw t6, 44(a2) 6102; RV32ZVE32F-NEXT: j .LBB55_13 6103; RV32ZVE32F-NEXT: .LBB55_7: 6104; RV32ZVE32F-NEXT: lw a1, 0(a2) 6105; RV32ZVE32F-NEXT: lw a3, 4(a2) 6106; RV32ZVE32F-NEXT: andi a4, t0, 2 6107; RV32ZVE32F-NEXT: beqz a4, .LBB55_2 6108; RV32ZVE32F-NEXT: .LBB55_8: # %cond.load1 6109; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 6110; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 6111; RV32ZVE32F-NEXT: vmv.x.s a5, v10 6112; RV32ZVE32F-NEXT: lw a4, 0(a5) 6113; RV32ZVE32F-NEXT: lw a5, 4(a5) 6114; RV32ZVE32F-NEXT: andi a6, t0, 4 6115; RV32ZVE32F-NEXT: beqz a6, .LBB55_3 6116; RV32ZVE32F-NEXT: .LBB55_9: # %cond.load4 6117; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 6118; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 6119; RV32ZVE32F-NEXT: vmv.x.s a7, v10 6120; RV32ZVE32F-NEXT: lw a6, 0(a7) 6121; RV32ZVE32F-NEXT: lw a7, 4(a7) 6122; RV32ZVE32F-NEXT: andi t1, t0, 8 6123; RV32ZVE32F-NEXT: beqz t1, .LBB55_4 6124; RV32ZVE32F-NEXT: .LBB55_10: # %cond.load7 6125; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 6126; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 6127; RV32ZVE32F-NEXT: vmv.x.s t2, v10 6128; RV32ZVE32F-NEXT: lw t1, 0(t2) 6129; RV32ZVE32F-NEXT: lw t2, 4(t2) 6130; RV32ZVE32F-NEXT: andi t3, t0, 16 6131; RV32ZVE32F-NEXT: beqz t3, .LBB55_5 6132; RV32ZVE32F-NEXT: .LBB55_11: # %cond.load10 6133; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 6134; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 6135; RV32ZVE32F-NEXT: vmv.x.s t4, v10 6136; RV32ZVE32F-NEXT: lw t3, 0(t4) 6137; RV32ZVE32F-NEXT: lw t4, 4(t4) 6138; RV32ZVE32F-NEXT: andi t5, t0, 32 6139; RV32ZVE32F-NEXT: beqz t5, .LBB55_6 6140; RV32ZVE32F-NEXT: .LBB55_12: # %cond.load13 6141; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 6142; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 6143; RV32ZVE32F-NEXT: vmv.x.s t6, v10 6144; RV32ZVE32F-NEXT: lw t5, 0(t6) 6145; RV32ZVE32F-NEXT: lw t6, 4(t6) 6146; RV32ZVE32F-NEXT: .LBB55_13: # %else14 6147; RV32ZVE32F-NEXT: addi sp, sp, -16 6148; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 6149; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 6150; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 6151; RV32ZVE32F-NEXT: .cfi_offset s0, -4 6152; RV32ZVE32F-NEXT: .cfi_offset s1, -8 6153; RV32ZVE32F-NEXT: andi s0, t0, 64 6154; RV32ZVE32F-NEXT: beqz s0, .LBB55_16 6155; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 6156; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 6157; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 6158; RV32ZVE32F-NEXT: vmv.x.s s1, v10 6159; RV32ZVE32F-NEXT: lw s0, 0(s1) 6160; RV32ZVE32F-NEXT: lw s1, 4(s1) 6161; RV32ZVE32F-NEXT: andi t0, t0, -128 6162; RV32ZVE32F-NEXT: bnez t0, .LBB55_17 6163; RV32ZVE32F-NEXT: .LBB55_15: 6164; RV32ZVE32F-NEXT: lw t0, 56(a2) 6165; RV32ZVE32F-NEXT: lw a2, 60(a2) 6166; RV32ZVE32F-NEXT: j .LBB55_18 6167; RV32ZVE32F-NEXT: .LBB55_16: 6168; RV32ZVE32F-NEXT: lw s0, 48(a2) 6169; RV32ZVE32F-NEXT: lw s1, 52(a2) 6170; RV32ZVE32F-NEXT: andi t0, t0, -128 6171; RV32ZVE32F-NEXT: beqz t0, .LBB55_15 6172; RV32ZVE32F-NEXT: .LBB55_17: # %cond.load19 6173; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 6174; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 6175; RV32ZVE32F-NEXT: vmv.x.s a2, v8 6176; RV32ZVE32F-NEXT: lw t0, 0(a2) 6177; RV32ZVE32F-NEXT: lw a2, 4(a2) 6178; RV32ZVE32F-NEXT: .LBB55_18: # %else20 6179; RV32ZVE32F-NEXT: sw a1, 0(a0) 6180; RV32ZVE32F-NEXT: sw a3, 4(a0) 6181; RV32ZVE32F-NEXT: sw a4, 8(a0) 6182; RV32ZVE32F-NEXT: sw a5, 12(a0) 6183; RV32ZVE32F-NEXT: sw a6, 16(a0) 6184; RV32ZVE32F-NEXT: sw a7, 20(a0) 6185; RV32ZVE32F-NEXT: sw t1, 24(a0) 6186; RV32ZVE32F-NEXT: sw t2, 28(a0) 6187; RV32ZVE32F-NEXT: sw t3, 32(a0) 6188; RV32ZVE32F-NEXT: sw t4, 36(a0) 6189; RV32ZVE32F-NEXT: sw t5, 40(a0) 6190; RV32ZVE32F-NEXT: sw t6, 44(a0) 6191; RV32ZVE32F-NEXT: sw s0, 48(a0) 6192; RV32ZVE32F-NEXT: sw s1, 52(a0) 6193; RV32ZVE32F-NEXT: sw t0, 56(a0) 6194; RV32ZVE32F-NEXT: sw a2, 60(a0) 6195; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 6196; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 6197; RV32ZVE32F-NEXT: .cfi_restore s0 6198; RV32ZVE32F-NEXT: .cfi_restore s1 6199; RV32ZVE32F-NEXT: addi sp, sp, 16 6200; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 6201; RV32ZVE32F-NEXT: ret 6202; 6203; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8i64: 6204; RV64ZVE32F: # %bb.0: 6205; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 6206; RV64ZVE32F-NEXT: vmv.x.s a5, v0 6207; RV64ZVE32F-NEXT: andi a3, a5, 1 6208; RV64ZVE32F-NEXT: beqz a3, .LBB55_3 6209; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 6210; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 6211; RV64ZVE32F-NEXT: vmv.x.s a3, v8 6212; RV64ZVE32F-NEXT: slli a3, a3, 3 6213; RV64ZVE32F-NEXT: add a3, a1, a3 6214; RV64ZVE32F-NEXT: ld a3, 0(a3) 6215; RV64ZVE32F-NEXT: andi a4, a5, 2 6216; RV64ZVE32F-NEXT: bnez a4, .LBB55_4 6217; RV64ZVE32F-NEXT: .LBB55_2: 6218; RV64ZVE32F-NEXT: ld a4, 8(a2) 6219; RV64ZVE32F-NEXT: j .LBB55_5 6220; RV64ZVE32F-NEXT: .LBB55_3: 6221; RV64ZVE32F-NEXT: ld a3, 0(a2) 6222; RV64ZVE32F-NEXT: andi a4, a5, 2 6223; RV64ZVE32F-NEXT: beqz a4, .LBB55_2 6224; RV64ZVE32F-NEXT: .LBB55_4: # %cond.load1 6225; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 6226; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 6227; RV64ZVE32F-NEXT: vmv.x.s a4, v10 6228; RV64ZVE32F-NEXT: slli a4, a4, 3 6229; RV64ZVE32F-NEXT: add a4, a1, a4 6230; RV64ZVE32F-NEXT: ld a4, 0(a4) 6231; RV64ZVE32F-NEXT: .LBB55_5: # %else2 6232; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma 6233; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 6234; RV64ZVE32F-NEXT: andi a6, a5, 4 6235; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 6236; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 6237; RV64ZVE32F-NEXT: beqz a6, .LBB55_10 6238; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 6239; RV64ZVE32F-NEXT: vmv.x.s a6, v8 6240; RV64ZVE32F-NEXT: slli a6, a6, 3 6241; RV64ZVE32F-NEXT: add a6, a1, a6 6242; RV64ZVE32F-NEXT: ld a6, 0(a6) 6243; RV64ZVE32F-NEXT: andi a7, a5, 8 6244; RV64ZVE32F-NEXT: bnez a7, .LBB55_11 6245; RV64ZVE32F-NEXT: .LBB55_7: 6246; RV64ZVE32F-NEXT: ld a7, 24(a2) 6247; RV64ZVE32F-NEXT: andi t0, a5, 16 6248; RV64ZVE32F-NEXT: bnez t0, .LBB55_12 6249; RV64ZVE32F-NEXT: .LBB55_8: 6250; RV64ZVE32F-NEXT: ld t0, 32(a2) 6251; RV64ZVE32F-NEXT: andi t1, a5, 32 6252; RV64ZVE32F-NEXT: bnez t1, .LBB55_13 6253; RV64ZVE32F-NEXT: .LBB55_9: 6254; RV64ZVE32F-NEXT: ld t1, 40(a2) 6255; RV64ZVE32F-NEXT: j .LBB55_14 6256; RV64ZVE32F-NEXT: .LBB55_10: 6257; RV64ZVE32F-NEXT: ld a6, 16(a2) 6258; RV64ZVE32F-NEXT: andi a7, a5, 8 6259; RV64ZVE32F-NEXT: beqz a7, .LBB55_7 6260; RV64ZVE32F-NEXT: .LBB55_11: # %cond.load7 6261; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 6262; RV64ZVE32F-NEXT: vmv.x.s a7, v8 6263; RV64ZVE32F-NEXT: slli a7, a7, 3 6264; RV64ZVE32F-NEXT: add a7, a1, a7 6265; RV64ZVE32F-NEXT: ld a7, 0(a7) 6266; RV64ZVE32F-NEXT: andi t0, a5, 16 6267; RV64ZVE32F-NEXT: beqz t0, .LBB55_8 6268; RV64ZVE32F-NEXT: .LBB55_12: # %cond.load10 6269; RV64ZVE32F-NEXT: vmv.x.s t0, v10 6270; RV64ZVE32F-NEXT: slli t0, t0, 3 6271; RV64ZVE32F-NEXT: add t0, a1, t0 6272; RV64ZVE32F-NEXT: ld t0, 0(t0) 6273; RV64ZVE32F-NEXT: andi t1, a5, 32 6274; RV64ZVE32F-NEXT: beqz t1, .LBB55_9 6275; RV64ZVE32F-NEXT: .LBB55_13: # %cond.load13 6276; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 6277; RV64ZVE32F-NEXT: vmv.x.s t1, v8 6278; RV64ZVE32F-NEXT: slli t1, t1, 3 6279; RV64ZVE32F-NEXT: add t1, a1, t1 6280; RV64ZVE32F-NEXT: ld t1, 0(t1) 6281; RV64ZVE32F-NEXT: .LBB55_14: # %else14 6282; RV64ZVE32F-NEXT: andi t2, a5, 64 6283; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 6284; RV64ZVE32F-NEXT: beqz t2, .LBB55_17 6285; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 6286; RV64ZVE32F-NEXT: vmv.x.s t2, v8 6287; RV64ZVE32F-NEXT: slli t2, t2, 3 6288; RV64ZVE32F-NEXT: add t2, a1, t2 6289; RV64ZVE32F-NEXT: ld t2, 0(t2) 6290; RV64ZVE32F-NEXT: andi a5, a5, -128 6291; RV64ZVE32F-NEXT: bnez a5, .LBB55_18 6292; RV64ZVE32F-NEXT: .LBB55_16: 6293; RV64ZVE32F-NEXT: ld a1, 56(a2) 6294; RV64ZVE32F-NEXT: j .LBB55_19 6295; RV64ZVE32F-NEXT: .LBB55_17: 6296; RV64ZVE32F-NEXT: ld t2, 48(a2) 6297; RV64ZVE32F-NEXT: andi a5, a5, -128 6298; RV64ZVE32F-NEXT: beqz a5, .LBB55_16 6299; RV64ZVE32F-NEXT: .LBB55_18: # %cond.load19 6300; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 6301; RV64ZVE32F-NEXT: vmv.x.s a2, v8 6302; RV64ZVE32F-NEXT: slli a2, a2, 3 6303; RV64ZVE32F-NEXT: add a1, a1, a2 6304; RV64ZVE32F-NEXT: ld a1, 0(a1) 6305; RV64ZVE32F-NEXT: .LBB55_19: # %else20 6306; RV64ZVE32F-NEXT: sd a3, 0(a0) 6307; RV64ZVE32F-NEXT: sd a4, 8(a0) 6308; RV64ZVE32F-NEXT: sd a6, 16(a0) 6309; RV64ZVE32F-NEXT: sd a7, 24(a0) 6310; RV64ZVE32F-NEXT: sd t0, 32(a0) 6311; RV64ZVE32F-NEXT: sd t1, 40(a0) 6312; RV64ZVE32F-NEXT: sd t2, 48(a0) 6313; RV64ZVE32F-NEXT: sd a1, 56(a0) 6314; RV64ZVE32F-NEXT: ret 6315 %eidxs = sext <8 x i32> %idxs to <8 x i64> 6316 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 6317 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru) 6318 ret <8 x i64> %v 6319} 6320 6321define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) { 6322; RV32V-LABEL: mgather_baseidx_zext_v8i32_v8i64: 6323; RV32V: # %bb.0: 6324; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 6325; RV32V-NEXT: vsll.vi v8, v8, 3 6326; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 6327; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 6328; RV32V-NEXT: vmv.v.v v8, v12 6329; RV32V-NEXT: ret 6330; 6331; RV64V-LABEL: mgather_baseidx_zext_v8i32_v8i64: 6332; RV64V: # %bb.0: 6333; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 6334; RV64V-NEXT: vzext.vf2 v16, v8 6335; RV64V-NEXT: vsll.vi v8, v16, 3 6336; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t 6337; RV64V-NEXT: vmv.v.v v8, v12 6338; RV64V-NEXT: ret 6339; 6340; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8i64: 6341; RV32ZVE32F: # %bb.0: 6342; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 6343; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 6344; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 6345; RV32ZVE32F-NEXT: vmv.x.s t0, v0 6346; RV32ZVE32F-NEXT: andi a3, t0, 1 6347; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 6348; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 6349; RV32ZVE32F-NEXT: beqz a3, .LBB56_7 6350; RV32ZVE32F-NEXT: # %bb.1: # %cond.load 6351; RV32ZVE32F-NEXT: vmv.x.s a3, v8 6352; RV32ZVE32F-NEXT: lw a1, 0(a3) 6353; RV32ZVE32F-NEXT: lw a3, 4(a3) 6354; RV32ZVE32F-NEXT: andi a4, t0, 2 6355; RV32ZVE32F-NEXT: bnez a4, .LBB56_8 6356; RV32ZVE32F-NEXT: .LBB56_2: 6357; RV32ZVE32F-NEXT: lw a4, 8(a2) 6358; RV32ZVE32F-NEXT: lw a5, 12(a2) 6359; RV32ZVE32F-NEXT: andi a6, t0, 4 6360; RV32ZVE32F-NEXT: bnez a6, .LBB56_9 6361; RV32ZVE32F-NEXT: .LBB56_3: 6362; RV32ZVE32F-NEXT: lw a6, 16(a2) 6363; RV32ZVE32F-NEXT: lw a7, 20(a2) 6364; RV32ZVE32F-NEXT: andi t1, t0, 8 6365; RV32ZVE32F-NEXT: bnez t1, .LBB56_10 6366; RV32ZVE32F-NEXT: .LBB56_4: 6367; RV32ZVE32F-NEXT: lw t1, 24(a2) 6368; RV32ZVE32F-NEXT: lw t2, 28(a2) 6369; RV32ZVE32F-NEXT: andi t3, t0, 16 6370; RV32ZVE32F-NEXT: bnez t3, .LBB56_11 6371; RV32ZVE32F-NEXT: .LBB56_5: 6372; RV32ZVE32F-NEXT: lw t3, 32(a2) 6373; RV32ZVE32F-NEXT: lw t4, 36(a2) 6374; RV32ZVE32F-NEXT: andi t5, t0, 32 6375; RV32ZVE32F-NEXT: bnez t5, .LBB56_12 6376; RV32ZVE32F-NEXT: .LBB56_6: 6377; RV32ZVE32F-NEXT: lw t5, 40(a2) 6378; RV32ZVE32F-NEXT: lw t6, 44(a2) 6379; RV32ZVE32F-NEXT: j .LBB56_13 6380; RV32ZVE32F-NEXT: .LBB56_7: 6381; RV32ZVE32F-NEXT: lw a1, 0(a2) 6382; RV32ZVE32F-NEXT: lw a3, 4(a2) 6383; RV32ZVE32F-NEXT: andi a4, t0, 2 6384; RV32ZVE32F-NEXT: beqz a4, .LBB56_2 6385; RV32ZVE32F-NEXT: .LBB56_8: # %cond.load1 6386; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 6387; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 6388; RV32ZVE32F-NEXT: vmv.x.s a5, v10 6389; RV32ZVE32F-NEXT: lw a4, 0(a5) 6390; RV32ZVE32F-NEXT: lw a5, 4(a5) 6391; RV32ZVE32F-NEXT: andi a6, t0, 4 6392; RV32ZVE32F-NEXT: beqz a6, .LBB56_3 6393; RV32ZVE32F-NEXT: .LBB56_9: # %cond.load4 6394; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 6395; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 6396; RV32ZVE32F-NEXT: vmv.x.s a7, v10 6397; RV32ZVE32F-NEXT: lw a6, 0(a7) 6398; RV32ZVE32F-NEXT: lw a7, 4(a7) 6399; RV32ZVE32F-NEXT: andi t1, t0, 8 6400; RV32ZVE32F-NEXT: beqz t1, .LBB56_4 6401; RV32ZVE32F-NEXT: .LBB56_10: # %cond.load7 6402; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 6403; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 6404; RV32ZVE32F-NEXT: vmv.x.s t2, v10 6405; RV32ZVE32F-NEXT: lw t1, 0(t2) 6406; RV32ZVE32F-NEXT: lw t2, 4(t2) 6407; RV32ZVE32F-NEXT: andi t3, t0, 16 6408; RV32ZVE32F-NEXT: beqz t3, .LBB56_5 6409; RV32ZVE32F-NEXT: .LBB56_11: # %cond.load10 6410; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 6411; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 6412; RV32ZVE32F-NEXT: vmv.x.s t4, v10 6413; RV32ZVE32F-NEXT: lw t3, 0(t4) 6414; RV32ZVE32F-NEXT: lw t4, 4(t4) 6415; RV32ZVE32F-NEXT: andi t5, t0, 32 6416; RV32ZVE32F-NEXT: beqz t5, .LBB56_6 6417; RV32ZVE32F-NEXT: .LBB56_12: # %cond.load13 6418; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 6419; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 6420; RV32ZVE32F-NEXT: vmv.x.s t6, v10 6421; RV32ZVE32F-NEXT: lw t5, 0(t6) 6422; RV32ZVE32F-NEXT: lw t6, 4(t6) 6423; RV32ZVE32F-NEXT: .LBB56_13: # %else14 6424; RV32ZVE32F-NEXT: addi sp, sp, -16 6425; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 6426; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 6427; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 6428; RV32ZVE32F-NEXT: .cfi_offset s0, -4 6429; RV32ZVE32F-NEXT: .cfi_offset s1, -8 6430; RV32ZVE32F-NEXT: andi s0, t0, 64 6431; RV32ZVE32F-NEXT: beqz s0, .LBB56_16 6432; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 6433; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 6434; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 6435; RV32ZVE32F-NEXT: vmv.x.s s1, v10 6436; RV32ZVE32F-NEXT: lw s0, 0(s1) 6437; RV32ZVE32F-NEXT: lw s1, 4(s1) 6438; RV32ZVE32F-NEXT: andi t0, t0, -128 6439; RV32ZVE32F-NEXT: bnez t0, .LBB56_17 6440; RV32ZVE32F-NEXT: .LBB56_15: 6441; RV32ZVE32F-NEXT: lw t0, 56(a2) 6442; RV32ZVE32F-NEXT: lw a2, 60(a2) 6443; RV32ZVE32F-NEXT: j .LBB56_18 6444; RV32ZVE32F-NEXT: .LBB56_16: 6445; RV32ZVE32F-NEXT: lw s0, 48(a2) 6446; RV32ZVE32F-NEXT: lw s1, 52(a2) 6447; RV32ZVE32F-NEXT: andi t0, t0, -128 6448; RV32ZVE32F-NEXT: beqz t0, .LBB56_15 6449; RV32ZVE32F-NEXT: .LBB56_17: # %cond.load19 6450; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 6451; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 6452; RV32ZVE32F-NEXT: vmv.x.s a2, v8 6453; RV32ZVE32F-NEXT: lw t0, 0(a2) 6454; RV32ZVE32F-NEXT: lw a2, 4(a2) 6455; RV32ZVE32F-NEXT: .LBB56_18: # %else20 6456; RV32ZVE32F-NEXT: sw a1, 0(a0) 6457; RV32ZVE32F-NEXT: sw a3, 4(a0) 6458; RV32ZVE32F-NEXT: sw a4, 8(a0) 6459; RV32ZVE32F-NEXT: sw a5, 12(a0) 6460; RV32ZVE32F-NEXT: sw a6, 16(a0) 6461; RV32ZVE32F-NEXT: sw a7, 20(a0) 6462; RV32ZVE32F-NEXT: sw t1, 24(a0) 6463; RV32ZVE32F-NEXT: sw t2, 28(a0) 6464; RV32ZVE32F-NEXT: sw t3, 32(a0) 6465; RV32ZVE32F-NEXT: sw t4, 36(a0) 6466; RV32ZVE32F-NEXT: sw t5, 40(a0) 6467; RV32ZVE32F-NEXT: sw t6, 44(a0) 6468; RV32ZVE32F-NEXT: sw s0, 48(a0) 6469; RV32ZVE32F-NEXT: sw s1, 52(a0) 6470; RV32ZVE32F-NEXT: sw t0, 56(a0) 6471; RV32ZVE32F-NEXT: sw a2, 60(a0) 6472; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 6473; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 6474; RV32ZVE32F-NEXT: .cfi_restore s0 6475; RV32ZVE32F-NEXT: .cfi_restore s1 6476; RV32ZVE32F-NEXT: addi sp, sp, 16 6477; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 6478; RV32ZVE32F-NEXT: ret 6479; 6480; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8i64: 6481; RV64ZVE32F: # %bb.0: 6482; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 6483; RV64ZVE32F-NEXT: vmv.x.s a5, v0 6484; RV64ZVE32F-NEXT: andi a3, a5, 1 6485; RV64ZVE32F-NEXT: beqz a3, .LBB56_3 6486; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 6487; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 6488; RV64ZVE32F-NEXT: vmv.x.s a3, v8 6489; RV64ZVE32F-NEXT: slli a3, a3, 32 6490; RV64ZVE32F-NEXT: srli a3, a3, 29 6491; RV64ZVE32F-NEXT: add a3, a1, a3 6492; RV64ZVE32F-NEXT: ld a3, 0(a3) 6493; RV64ZVE32F-NEXT: andi a4, a5, 2 6494; RV64ZVE32F-NEXT: bnez a4, .LBB56_4 6495; RV64ZVE32F-NEXT: .LBB56_2: 6496; RV64ZVE32F-NEXT: ld a4, 8(a2) 6497; RV64ZVE32F-NEXT: j .LBB56_5 6498; RV64ZVE32F-NEXT: .LBB56_3: 6499; RV64ZVE32F-NEXT: ld a3, 0(a2) 6500; RV64ZVE32F-NEXT: andi a4, a5, 2 6501; RV64ZVE32F-NEXT: beqz a4, .LBB56_2 6502; RV64ZVE32F-NEXT: .LBB56_4: # %cond.load1 6503; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 6504; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 6505; RV64ZVE32F-NEXT: vmv.x.s a4, v10 6506; RV64ZVE32F-NEXT: slli a4, a4, 32 6507; RV64ZVE32F-NEXT: srli a4, a4, 29 6508; RV64ZVE32F-NEXT: add a4, a1, a4 6509; RV64ZVE32F-NEXT: ld a4, 0(a4) 6510; RV64ZVE32F-NEXT: .LBB56_5: # %else2 6511; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma 6512; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 6513; RV64ZVE32F-NEXT: andi a6, a5, 4 6514; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 6515; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 6516; RV64ZVE32F-NEXT: beqz a6, .LBB56_10 6517; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 6518; RV64ZVE32F-NEXT: vmv.x.s a6, v8 6519; RV64ZVE32F-NEXT: slli a6, a6, 32 6520; RV64ZVE32F-NEXT: srli a6, a6, 29 6521; RV64ZVE32F-NEXT: add a6, a1, a6 6522; RV64ZVE32F-NEXT: ld a6, 0(a6) 6523; RV64ZVE32F-NEXT: andi a7, a5, 8 6524; RV64ZVE32F-NEXT: bnez a7, .LBB56_11 6525; RV64ZVE32F-NEXT: .LBB56_7: 6526; RV64ZVE32F-NEXT: ld a7, 24(a2) 6527; RV64ZVE32F-NEXT: andi t0, a5, 16 6528; RV64ZVE32F-NEXT: bnez t0, .LBB56_12 6529; RV64ZVE32F-NEXT: .LBB56_8: 6530; RV64ZVE32F-NEXT: ld t0, 32(a2) 6531; RV64ZVE32F-NEXT: andi t1, a5, 32 6532; RV64ZVE32F-NEXT: bnez t1, .LBB56_13 6533; RV64ZVE32F-NEXT: .LBB56_9: 6534; RV64ZVE32F-NEXT: ld t1, 40(a2) 6535; RV64ZVE32F-NEXT: j .LBB56_14 6536; RV64ZVE32F-NEXT: .LBB56_10: 6537; RV64ZVE32F-NEXT: ld a6, 16(a2) 6538; RV64ZVE32F-NEXT: andi a7, a5, 8 6539; RV64ZVE32F-NEXT: beqz a7, .LBB56_7 6540; RV64ZVE32F-NEXT: .LBB56_11: # %cond.load7 6541; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 6542; RV64ZVE32F-NEXT: vmv.x.s a7, v8 6543; RV64ZVE32F-NEXT: slli a7, a7, 32 6544; RV64ZVE32F-NEXT: srli a7, a7, 29 6545; RV64ZVE32F-NEXT: add a7, a1, a7 6546; RV64ZVE32F-NEXT: ld a7, 0(a7) 6547; RV64ZVE32F-NEXT: andi t0, a5, 16 6548; RV64ZVE32F-NEXT: beqz t0, .LBB56_8 6549; RV64ZVE32F-NEXT: .LBB56_12: # %cond.load10 6550; RV64ZVE32F-NEXT: vmv.x.s t0, v10 6551; RV64ZVE32F-NEXT: slli t0, t0, 32 6552; RV64ZVE32F-NEXT: srli t0, t0, 29 6553; RV64ZVE32F-NEXT: add t0, a1, t0 6554; RV64ZVE32F-NEXT: ld t0, 0(t0) 6555; RV64ZVE32F-NEXT: andi t1, a5, 32 6556; RV64ZVE32F-NEXT: beqz t1, .LBB56_9 6557; RV64ZVE32F-NEXT: .LBB56_13: # %cond.load13 6558; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 6559; RV64ZVE32F-NEXT: vmv.x.s t1, v8 6560; RV64ZVE32F-NEXT: slli t1, t1, 32 6561; RV64ZVE32F-NEXT: srli t1, t1, 29 6562; RV64ZVE32F-NEXT: add t1, a1, t1 6563; RV64ZVE32F-NEXT: ld t1, 0(t1) 6564; RV64ZVE32F-NEXT: .LBB56_14: # %else14 6565; RV64ZVE32F-NEXT: andi t2, a5, 64 6566; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 6567; RV64ZVE32F-NEXT: beqz t2, .LBB56_17 6568; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 6569; RV64ZVE32F-NEXT: vmv.x.s t2, v8 6570; RV64ZVE32F-NEXT: slli t2, t2, 32 6571; RV64ZVE32F-NEXT: srli t2, t2, 29 6572; RV64ZVE32F-NEXT: add t2, a1, t2 6573; RV64ZVE32F-NEXT: ld t2, 0(t2) 6574; RV64ZVE32F-NEXT: andi a5, a5, -128 6575; RV64ZVE32F-NEXT: bnez a5, .LBB56_18 6576; RV64ZVE32F-NEXT: .LBB56_16: 6577; RV64ZVE32F-NEXT: ld a1, 56(a2) 6578; RV64ZVE32F-NEXT: j .LBB56_19 6579; RV64ZVE32F-NEXT: .LBB56_17: 6580; RV64ZVE32F-NEXT: ld t2, 48(a2) 6581; RV64ZVE32F-NEXT: andi a5, a5, -128 6582; RV64ZVE32F-NEXT: beqz a5, .LBB56_16 6583; RV64ZVE32F-NEXT: .LBB56_18: # %cond.load19 6584; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 6585; RV64ZVE32F-NEXT: vmv.x.s a2, v8 6586; RV64ZVE32F-NEXT: slli a2, a2, 32 6587; RV64ZVE32F-NEXT: srli a2, a2, 29 6588; RV64ZVE32F-NEXT: add a1, a1, a2 6589; RV64ZVE32F-NEXT: ld a1, 0(a1) 6590; RV64ZVE32F-NEXT: .LBB56_19: # %else20 6591; RV64ZVE32F-NEXT: sd a3, 0(a0) 6592; RV64ZVE32F-NEXT: sd a4, 8(a0) 6593; RV64ZVE32F-NEXT: sd a6, 16(a0) 6594; RV64ZVE32F-NEXT: sd a7, 24(a0) 6595; RV64ZVE32F-NEXT: sd t0, 32(a0) 6596; RV64ZVE32F-NEXT: sd t1, 40(a0) 6597; RV64ZVE32F-NEXT: sd t2, 48(a0) 6598; RV64ZVE32F-NEXT: sd a1, 56(a0) 6599; RV64ZVE32F-NEXT: ret 6600 %eidxs = zext <8 x i32> %idxs to <8 x i64> 6601 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs 6602 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru) 6603 ret <8 x i64> %v 6604} 6605 6606define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, <8 x i64> %passthru) { 6607; RV32V-LABEL: mgather_baseidx_v8i64: 6608; RV32V: # %bb.0: 6609; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 6610; RV32V-NEXT: vnsrl.wi v16, v8, 0 6611; RV32V-NEXT: vsll.vi v8, v16, 3 6612; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 6613; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 6614; RV32V-NEXT: vmv.v.v v8, v12 6615; RV32V-NEXT: ret 6616; 6617; RV64V-LABEL: mgather_baseidx_v8i64: 6618; RV64V: # %bb.0: 6619; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 6620; RV64V-NEXT: vsll.vi v8, v8, 3 6621; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t 6622; RV64V-NEXT: vmv.v.v v8, v12 6623; RV64V-NEXT: ret 6624; 6625; RV32ZVE32F-LABEL: mgather_baseidx_v8i64: 6626; RV32ZVE32F: # %bb.0: 6627; RV32ZVE32F-NEXT: lw a4, 32(a2) 6628; RV32ZVE32F-NEXT: lw a5, 40(a2) 6629; RV32ZVE32F-NEXT: lw a6, 48(a2) 6630; RV32ZVE32F-NEXT: lw a7, 56(a2) 6631; RV32ZVE32F-NEXT: lw t0, 0(a2) 6632; RV32ZVE32F-NEXT: lw t1, 8(a2) 6633; RV32ZVE32F-NEXT: lw t2, 16(a2) 6634; RV32ZVE32F-NEXT: lw a2, 24(a2) 6635; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 6636; RV32ZVE32F-NEXT: vmv.v.x v8, t0 6637; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 6638; RV32ZVE32F-NEXT: vmv.x.s t0, v0 6639; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 6640; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1 6641; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t2 6642; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 6643; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 6644; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 6645; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 6646; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 6647; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 6648; RV32ZVE32F-NEXT: andi a2, t0, 1 6649; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 6650; RV32ZVE32F-NEXT: beqz a2, .LBB57_7 6651; RV32ZVE32F-NEXT: # %bb.1: # %cond.load 6652; RV32ZVE32F-NEXT: vmv.x.s a2, v8 6653; RV32ZVE32F-NEXT: lw a1, 0(a2) 6654; RV32ZVE32F-NEXT: lw a2, 4(a2) 6655; RV32ZVE32F-NEXT: andi a4, t0, 2 6656; RV32ZVE32F-NEXT: bnez a4, .LBB57_8 6657; RV32ZVE32F-NEXT: .LBB57_2: 6658; RV32ZVE32F-NEXT: lw a4, 8(a3) 6659; RV32ZVE32F-NEXT: lw a5, 12(a3) 6660; RV32ZVE32F-NEXT: andi a6, t0, 4 6661; RV32ZVE32F-NEXT: bnez a6, .LBB57_9 6662; RV32ZVE32F-NEXT: .LBB57_3: 6663; RV32ZVE32F-NEXT: lw a6, 16(a3) 6664; RV32ZVE32F-NEXT: lw a7, 20(a3) 6665; RV32ZVE32F-NEXT: andi t1, t0, 8 6666; RV32ZVE32F-NEXT: bnez t1, .LBB57_10 6667; RV32ZVE32F-NEXT: .LBB57_4: 6668; RV32ZVE32F-NEXT: lw t1, 24(a3) 6669; RV32ZVE32F-NEXT: lw t2, 28(a3) 6670; RV32ZVE32F-NEXT: andi t3, t0, 16 6671; RV32ZVE32F-NEXT: bnez t3, .LBB57_11 6672; RV32ZVE32F-NEXT: .LBB57_5: 6673; RV32ZVE32F-NEXT: lw t3, 32(a3) 6674; RV32ZVE32F-NEXT: lw t4, 36(a3) 6675; RV32ZVE32F-NEXT: andi t5, t0, 32 6676; RV32ZVE32F-NEXT: bnez t5, .LBB57_12 6677; RV32ZVE32F-NEXT: .LBB57_6: 6678; RV32ZVE32F-NEXT: lw t5, 40(a3) 6679; RV32ZVE32F-NEXT: lw t6, 44(a3) 6680; RV32ZVE32F-NEXT: j .LBB57_13 6681; RV32ZVE32F-NEXT: .LBB57_7: 6682; RV32ZVE32F-NEXT: lw a1, 0(a3) 6683; RV32ZVE32F-NEXT: lw a2, 4(a3) 6684; RV32ZVE32F-NEXT: andi a4, t0, 2 6685; RV32ZVE32F-NEXT: beqz a4, .LBB57_2 6686; RV32ZVE32F-NEXT: .LBB57_8: # %cond.load1 6687; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 6688; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 6689; RV32ZVE32F-NEXT: vmv.x.s a5, v10 6690; RV32ZVE32F-NEXT: lw a4, 0(a5) 6691; RV32ZVE32F-NEXT: lw a5, 4(a5) 6692; RV32ZVE32F-NEXT: andi a6, t0, 4 6693; RV32ZVE32F-NEXT: beqz a6, .LBB57_3 6694; RV32ZVE32F-NEXT: .LBB57_9: # %cond.load4 6695; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 6696; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 6697; RV32ZVE32F-NEXT: vmv.x.s a7, v10 6698; RV32ZVE32F-NEXT: lw a6, 0(a7) 6699; RV32ZVE32F-NEXT: lw a7, 4(a7) 6700; RV32ZVE32F-NEXT: andi t1, t0, 8 6701; RV32ZVE32F-NEXT: beqz t1, .LBB57_4 6702; RV32ZVE32F-NEXT: .LBB57_10: # %cond.load7 6703; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 6704; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 6705; RV32ZVE32F-NEXT: vmv.x.s t2, v10 6706; RV32ZVE32F-NEXT: lw t1, 0(t2) 6707; RV32ZVE32F-NEXT: lw t2, 4(t2) 6708; RV32ZVE32F-NEXT: andi t3, t0, 16 6709; RV32ZVE32F-NEXT: beqz t3, .LBB57_5 6710; RV32ZVE32F-NEXT: .LBB57_11: # %cond.load10 6711; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 6712; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 6713; RV32ZVE32F-NEXT: vmv.x.s t4, v10 6714; RV32ZVE32F-NEXT: lw t3, 0(t4) 6715; RV32ZVE32F-NEXT: lw t4, 4(t4) 6716; RV32ZVE32F-NEXT: andi t5, t0, 32 6717; RV32ZVE32F-NEXT: beqz t5, .LBB57_6 6718; RV32ZVE32F-NEXT: .LBB57_12: # %cond.load13 6719; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 6720; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 6721; RV32ZVE32F-NEXT: vmv.x.s t6, v10 6722; RV32ZVE32F-NEXT: lw t5, 0(t6) 6723; RV32ZVE32F-NEXT: lw t6, 4(t6) 6724; RV32ZVE32F-NEXT: .LBB57_13: # %else14 6725; RV32ZVE32F-NEXT: addi sp, sp, -16 6726; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 6727; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 6728; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill 6729; RV32ZVE32F-NEXT: .cfi_offset s0, -4 6730; RV32ZVE32F-NEXT: .cfi_offset s1, -8 6731; RV32ZVE32F-NEXT: andi s0, t0, 64 6732; RV32ZVE32F-NEXT: beqz s0, .LBB57_16 6733; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16 6734; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 6735; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 6736; RV32ZVE32F-NEXT: vmv.x.s s1, v10 6737; RV32ZVE32F-NEXT: lw s0, 0(s1) 6738; RV32ZVE32F-NEXT: lw s1, 4(s1) 6739; RV32ZVE32F-NEXT: andi t0, t0, -128 6740; RV32ZVE32F-NEXT: bnez t0, .LBB57_17 6741; RV32ZVE32F-NEXT: .LBB57_15: 6742; RV32ZVE32F-NEXT: lw t0, 56(a3) 6743; RV32ZVE32F-NEXT: lw a3, 60(a3) 6744; RV32ZVE32F-NEXT: j .LBB57_18 6745; RV32ZVE32F-NEXT: .LBB57_16: 6746; RV32ZVE32F-NEXT: lw s0, 48(a3) 6747; RV32ZVE32F-NEXT: lw s1, 52(a3) 6748; RV32ZVE32F-NEXT: andi t0, t0, -128 6749; RV32ZVE32F-NEXT: beqz t0, .LBB57_15 6750; RV32ZVE32F-NEXT: .LBB57_17: # %cond.load19 6751; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 6752; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 6753; RV32ZVE32F-NEXT: vmv.x.s a3, v8 6754; RV32ZVE32F-NEXT: lw t0, 0(a3) 6755; RV32ZVE32F-NEXT: lw a3, 4(a3) 6756; RV32ZVE32F-NEXT: .LBB57_18: # %else20 6757; RV32ZVE32F-NEXT: sw a1, 0(a0) 6758; RV32ZVE32F-NEXT: sw a2, 4(a0) 6759; RV32ZVE32F-NEXT: sw a4, 8(a0) 6760; RV32ZVE32F-NEXT: sw a5, 12(a0) 6761; RV32ZVE32F-NEXT: sw a6, 16(a0) 6762; RV32ZVE32F-NEXT: sw a7, 20(a0) 6763; RV32ZVE32F-NEXT: sw t1, 24(a0) 6764; RV32ZVE32F-NEXT: sw t2, 28(a0) 6765; RV32ZVE32F-NEXT: sw t3, 32(a0) 6766; RV32ZVE32F-NEXT: sw t4, 36(a0) 6767; RV32ZVE32F-NEXT: sw t5, 40(a0) 6768; RV32ZVE32F-NEXT: sw t6, 44(a0) 6769; RV32ZVE32F-NEXT: sw s0, 48(a0) 6770; RV32ZVE32F-NEXT: sw s1, 52(a0) 6771; RV32ZVE32F-NEXT: sw t0, 56(a0) 6772; RV32ZVE32F-NEXT: sw a3, 60(a0) 6773; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 6774; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload 6775; RV32ZVE32F-NEXT: .cfi_restore s0 6776; RV32ZVE32F-NEXT: .cfi_restore s1 6777; RV32ZVE32F-NEXT: addi sp, sp, 16 6778; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 6779; RV32ZVE32F-NEXT: ret 6780; 6781; RV64ZVE32F-LABEL: mgather_baseidx_v8i64: 6782; RV64ZVE32F: # %bb.0: 6783; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 6784; RV64ZVE32F-NEXT: vmv.x.s a7, v0 6785; RV64ZVE32F-NEXT: andi a4, a7, 1 6786; RV64ZVE32F-NEXT: beqz a4, .LBB57_9 6787; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 6788; RV64ZVE32F-NEXT: ld a4, 0(a2) 6789; RV64ZVE32F-NEXT: slli a4, a4, 3 6790; RV64ZVE32F-NEXT: add a4, a1, a4 6791; RV64ZVE32F-NEXT: ld a4, 0(a4) 6792; RV64ZVE32F-NEXT: andi a5, a7, 2 6793; RV64ZVE32F-NEXT: bnez a5, .LBB57_10 6794; RV64ZVE32F-NEXT: .LBB57_2: 6795; RV64ZVE32F-NEXT: ld a5, 8(a3) 6796; RV64ZVE32F-NEXT: andi a6, a7, 4 6797; RV64ZVE32F-NEXT: bnez a6, .LBB57_11 6798; RV64ZVE32F-NEXT: .LBB57_3: 6799; RV64ZVE32F-NEXT: ld a6, 16(a3) 6800; RV64ZVE32F-NEXT: andi t0, a7, 8 6801; RV64ZVE32F-NEXT: bnez t0, .LBB57_12 6802; RV64ZVE32F-NEXT: .LBB57_4: 6803; RV64ZVE32F-NEXT: ld t0, 24(a3) 6804; RV64ZVE32F-NEXT: andi t1, a7, 16 6805; RV64ZVE32F-NEXT: bnez t1, .LBB57_13 6806; RV64ZVE32F-NEXT: .LBB57_5: 6807; RV64ZVE32F-NEXT: ld t1, 32(a3) 6808; RV64ZVE32F-NEXT: andi t2, a7, 32 6809; RV64ZVE32F-NEXT: bnez t2, .LBB57_14 6810; RV64ZVE32F-NEXT: .LBB57_6: 6811; RV64ZVE32F-NEXT: ld t2, 40(a3) 6812; RV64ZVE32F-NEXT: andi t3, a7, 64 6813; RV64ZVE32F-NEXT: bnez t3, .LBB57_15 6814; RV64ZVE32F-NEXT: .LBB57_7: 6815; RV64ZVE32F-NEXT: ld t3, 48(a3) 6816; RV64ZVE32F-NEXT: andi a7, a7, -128 6817; RV64ZVE32F-NEXT: bnez a7, .LBB57_16 6818; RV64ZVE32F-NEXT: .LBB57_8: 6819; RV64ZVE32F-NEXT: ld a1, 56(a3) 6820; RV64ZVE32F-NEXT: j .LBB57_17 6821; RV64ZVE32F-NEXT: .LBB57_9: 6822; RV64ZVE32F-NEXT: ld a4, 0(a3) 6823; RV64ZVE32F-NEXT: andi a5, a7, 2 6824; RV64ZVE32F-NEXT: beqz a5, .LBB57_2 6825; RV64ZVE32F-NEXT: .LBB57_10: # %cond.load1 6826; RV64ZVE32F-NEXT: ld a5, 8(a2) 6827; RV64ZVE32F-NEXT: slli a5, a5, 3 6828; RV64ZVE32F-NEXT: add a5, a1, a5 6829; RV64ZVE32F-NEXT: ld a5, 0(a5) 6830; RV64ZVE32F-NEXT: andi a6, a7, 4 6831; RV64ZVE32F-NEXT: beqz a6, .LBB57_3 6832; RV64ZVE32F-NEXT: .LBB57_11: # %cond.load4 6833; RV64ZVE32F-NEXT: ld a6, 16(a2) 6834; RV64ZVE32F-NEXT: slli a6, a6, 3 6835; RV64ZVE32F-NEXT: add a6, a1, a6 6836; RV64ZVE32F-NEXT: ld a6, 0(a6) 6837; RV64ZVE32F-NEXT: andi t0, a7, 8 6838; RV64ZVE32F-NEXT: beqz t0, .LBB57_4 6839; RV64ZVE32F-NEXT: .LBB57_12: # %cond.load7 6840; RV64ZVE32F-NEXT: ld t0, 24(a2) 6841; RV64ZVE32F-NEXT: slli t0, t0, 3 6842; RV64ZVE32F-NEXT: add t0, a1, t0 6843; RV64ZVE32F-NEXT: ld t0, 0(t0) 6844; RV64ZVE32F-NEXT: andi t1, a7, 16 6845; RV64ZVE32F-NEXT: beqz t1, .LBB57_5 6846; RV64ZVE32F-NEXT: .LBB57_13: # %cond.load10 6847; RV64ZVE32F-NEXT: ld t1, 32(a2) 6848; RV64ZVE32F-NEXT: slli t1, t1, 3 6849; RV64ZVE32F-NEXT: add t1, a1, t1 6850; RV64ZVE32F-NEXT: ld t1, 0(t1) 6851; RV64ZVE32F-NEXT: andi t2, a7, 32 6852; RV64ZVE32F-NEXT: beqz t2, .LBB57_6 6853; RV64ZVE32F-NEXT: .LBB57_14: # %cond.load13 6854; RV64ZVE32F-NEXT: ld t2, 40(a2) 6855; RV64ZVE32F-NEXT: slli t2, t2, 3 6856; RV64ZVE32F-NEXT: add t2, a1, t2 6857; RV64ZVE32F-NEXT: ld t2, 0(t2) 6858; RV64ZVE32F-NEXT: andi t3, a7, 64 6859; RV64ZVE32F-NEXT: beqz t3, .LBB57_7 6860; RV64ZVE32F-NEXT: .LBB57_15: # %cond.load16 6861; RV64ZVE32F-NEXT: ld t3, 48(a2) 6862; RV64ZVE32F-NEXT: slli t3, t3, 3 6863; RV64ZVE32F-NEXT: add t3, a1, t3 6864; RV64ZVE32F-NEXT: ld t3, 0(t3) 6865; RV64ZVE32F-NEXT: andi a7, a7, -128 6866; RV64ZVE32F-NEXT: beqz a7, .LBB57_8 6867; RV64ZVE32F-NEXT: .LBB57_16: # %cond.load19 6868; RV64ZVE32F-NEXT: ld a2, 56(a2) 6869; RV64ZVE32F-NEXT: slli a2, a2, 3 6870; RV64ZVE32F-NEXT: add a1, a1, a2 6871; RV64ZVE32F-NEXT: ld a1, 0(a1) 6872; RV64ZVE32F-NEXT: .LBB57_17: # %else20 6873; RV64ZVE32F-NEXT: sd a4, 0(a0) 6874; RV64ZVE32F-NEXT: sd a5, 8(a0) 6875; RV64ZVE32F-NEXT: sd a6, 16(a0) 6876; RV64ZVE32F-NEXT: sd t0, 24(a0) 6877; RV64ZVE32F-NEXT: sd t1, 32(a0) 6878; RV64ZVE32F-NEXT: sd t2, 40(a0) 6879; RV64ZVE32F-NEXT: sd t3, 48(a0) 6880; RV64ZVE32F-NEXT: sd a1, 56(a0) 6881; RV64ZVE32F-NEXT: ret 6882 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs 6883 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru) 6884 ret <8 x i64> %v 6885} 6886 6887declare <1 x bfloat> @llvm.masked.gather.v1bf16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x bfloat>) 6888 6889define <1 x bfloat> @mgather_v1bf16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x bfloat> %passthru) { 6890; RV32V-LABEL: mgather_v1bf16: 6891; RV32V: # %bb.0: 6892; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu 6893; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 6894; RV32V-NEXT: vmv1r.v v8, v9 6895; RV32V-NEXT: ret 6896; 6897; RV64V-LABEL: mgather_v1bf16: 6898; RV64V: # %bb.0: 6899; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu 6900; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 6901; RV64V-NEXT: vmv1r.v v8, v9 6902; RV64V-NEXT: ret 6903; 6904; RV32ZVE32F-LABEL: mgather_v1bf16: 6905; RV32ZVE32F: # %bb.0: 6906; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu 6907; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 6908; RV32ZVE32F-NEXT: vmv1r.v v8, v9 6909; RV32ZVE32F-NEXT: ret 6910; 6911; RV64ZVE32F-LABEL: mgather_v1bf16: 6912; RV64ZVE32F: # %bb.0: 6913; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 6914; RV64ZVE32F-NEXT: vfirst.m a1, v0 6915; RV64ZVE32F-NEXT: bnez a1, .LBB58_2 6916; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 6917; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 6918; RV64ZVE32F-NEXT: vle16.v v8, (a0) 6919; RV64ZVE32F-NEXT: .LBB58_2: # %else 6920; RV64ZVE32F-NEXT: ret 6921 %v = call <1 x bfloat> @llvm.masked.gather.v1bf16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x bfloat> %passthru) 6922 ret <1 x bfloat> %v 6923} 6924 6925declare <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x bfloat>) 6926 6927define <2 x bfloat> @mgather_v2bf16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x bfloat> %passthru) { 6928; RV32V-LABEL: mgather_v2bf16: 6929; RV32V: # %bb.0: 6930; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 6931; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 6932; RV32V-NEXT: vmv1r.v v8, v9 6933; RV32V-NEXT: ret 6934; 6935; RV64V-LABEL: mgather_v2bf16: 6936; RV64V: # %bb.0: 6937; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 6938; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 6939; RV64V-NEXT: vmv1r.v v8, v9 6940; RV64V-NEXT: ret 6941; 6942; RV32ZVE32F-LABEL: mgather_v2bf16: 6943; RV32ZVE32F: # %bb.0: 6944; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu 6945; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 6946; RV32ZVE32F-NEXT: vmv1r.v v8, v9 6947; RV32ZVE32F-NEXT: ret 6948; 6949; RV64ZVE32F-LABEL: mgather_v2bf16: 6950; RV64ZVE32F: # %bb.0: 6951; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 6952; RV64ZVE32F-NEXT: vmv.x.s a2, v0 6953; RV64ZVE32F-NEXT: andi a3, a2, 1 6954; RV64ZVE32F-NEXT: bnez a3, .LBB59_3 6955; RV64ZVE32F-NEXT: # %bb.1: # %else 6956; RV64ZVE32F-NEXT: andi a2, a2, 2 6957; RV64ZVE32F-NEXT: bnez a2, .LBB59_4 6958; RV64ZVE32F-NEXT: .LBB59_2: # %else2 6959; RV64ZVE32F-NEXT: ret 6960; RV64ZVE32F-NEXT: .LBB59_3: # %cond.load 6961; RV64ZVE32F-NEXT: lh a0, 0(a0) 6962; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 6963; RV64ZVE32F-NEXT: vmv.s.x v8, a0 6964; RV64ZVE32F-NEXT: andi a2, a2, 2 6965; RV64ZVE32F-NEXT: beqz a2, .LBB59_2 6966; RV64ZVE32F-NEXT: .LBB59_4: # %cond.load1 6967; RV64ZVE32F-NEXT: lh a0, 0(a1) 6968; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 6969; RV64ZVE32F-NEXT: vmv.s.x v9, a0 6970; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 6971; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 6972; RV64ZVE32F-NEXT: ret 6973 %v = call <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x bfloat> %passthru) 6974 ret <2 x bfloat> %v 6975} 6976 6977declare <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x bfloat>) 6978 6979define <4 x bfloat> @mgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x bfloat> %passthru) { 6980; RV32-LABEL: mgather_v4bf16: 6981; RV32: # %bb.0: 6982; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 6983; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 6984; RV32-NEXT: vmv1r.v v8, v9 6985; RV32-NEXT: ret 6986; 6987; RV64V-LABEL: mgather_v4bf16: 6988; RV64V: # %bb.0: 6989; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 6990; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t 6991; RV64V-NEXT: vmv1r.v v8, v10 6992; RV64V-NEXT: ret 6993; 6994; RV64ZVE32F-LABEL: mgather_v4bf16: 6995; RV64ZVE32F: # %bb.0: 6996; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 6997; RV64ZVE32F-NEXT: vmv.x.s a1, v0 6998; RV64ZVE32F-NEXT: andi a2, a1, 1 6999; RV64ZVE32F-NEXT: bnez a2, .LBB60_5 7000; RV64ZVE32F-NEXT: # %bb.1: # %else 7001; RV64ZVE32F-NEXT: andi a2, a1, 2 7002; RV64ZVE32F-NEXT: bnez a2, .LBB60_6 7003; RV64ZVE32F-NEXT: .LBB60_2: # %else2 7004; RV64ZVE32F-NEXT: andi a2, a1, 4 7005; RV64ZVE32F-NEXT: bnez a2, .LBB60_7 7006; RV64ZVE32F-NEXT: .LBB60_3: # %else5 7007; RV64ZVE32F-NEXT: andi a1, a1, 8 7008; RV64ZVE32F-NEXT: bnez a1, .LBB60_8 7009; RV64ZVE32F-NEXT: .LBB60_4: # %else8 7010; RV64ZVE32F-NEXT: ret 7011; RV64ZVE32F-NEXT: .LBB60_5: # %cond.load 7012; RV64ZVE32F-NEXT: ld a2, 0(a0) 7013; RV64ZVE32F-NEXT: lh a2, 0(a2) 7014; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 7015; RV64ZVE32F-NEXT: vmv.s.x v8, a2 7016; RV64ZVE32F-NEXT: andi a2, a1, 2 7017; RV64ZVE32F-NEXT: beqz a2, .LBB60_2 7018; RV64ZVE32F-NEXT: .LBB60_6: # %cond.load1 7019; RV64ZVE32F-NEXT: ld a2, 8(a0) 7020; RV64ZVE32F-NEXT: lh a2, 0(a2) 7021; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 7022; RV64ZVE32F-NEXT: vmv.s.x v9, a2 7023; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma 7024; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 7025; RV64ZVE32F-NEXT: andi a2, a1, 4 7026; RV64ZVE32F-NEXT: beqz a2, .LBB60_3 7027; RV64ZVE32F-NEXT: .LBB60_7: # %cond.load4 7028; RV64ZVE32F-NEXT: ld a2, 16(a0) 7029; RV64ZVE32F-NEXT: lh a2, 0(a2) 7030; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma 7031; RV64ZVE32F-NEXT: vmv.s.x v9, a2 7032; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 7033; RV64ZVE32F-NEXT: andi a1, a1, 8 7034; RV64ZVE32F-NEXT: beqz a1, .LBB60_4 7035; RV64ZVE32F-NEXT: .LBB60_8: # %cond.load7 7036; RV64ZVE32F-NEXT: ld a0, 24(a0) 7037; RV64ZVE32F-NEXT: lh a0, 0(a0) 7038; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 7039; RV64ZVE32F-NEXT: vmv.s.x v9, a0 7040; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 7041; RV64ZVE32F-NEXT: ret 7042 %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x bfloat> %passthru) 7043 ret <4 x bfloat> %v 7044} 7045 7046define <4 x bfloat> @mgather_truemask_v4bf16(<4 x ptr> %ptrs, <4 x bfloat> %passthru) { 7047; RV32-LABEL: mgather_truemask_v4bf16: 7048; RV32: # %bb.0: 7049; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 7050; RV32-NEXT: vluxei32.v v9, (zero), v8 7051; RV32-NEXT: vmv1r.v v8, v9 7052; RV32-NEXT: ret 7053; 7054; RV64V-LABEL: mgather_truemask_v4bf16: 7055; RV64V: # %bb.0: 7056; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 7057; RV64V-NEXT: vluxei64.v v10, (zero), v8 7058; RV64V-NEXT: vmv1r.v v8, v10 7059; RV64V-NEXT: ret 7060; 7061; RV64ZVE32F-LABEL: mgather_truemask_v4bf16: 7062; RV64ZVE32F: # %bb.0: 7063; RV64ZVE32F-NEXT: ld a1, 0(a0) 7064; RV64ZVE32F-NEXT: ld a2, 8(a0) 7065; RV64ZVE32F-NEXT: ld a3, 16(a0) 7066; RV64ZVE32F-NEXT: ld a0, 24(a0) 7067; RV64ZVE32F-NEXT: lh a1, 0(a1) 7068; RV64ZVE32F-NEXT: lh a2, 0(a2) 7069; RV64ZVE32F-NEXT: lh a3, 0(a3) 7070; RV64ZVE32F-NEXT: lh a0, 0(a0) 7071; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 7072; RV64ZVE32F-NEXT: vmv.v.x v8, a1 7073; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 7074; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 7075; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 7076; RV64ZVE32F-NEXT: ret 7077 %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x bfloat> %passthru) 7078 ret <4 x bfloat> %v 7079} 7080 7081define <4 x bfloat> @mgather_falsemask_v4bf16(<4 x ptr> %ptrs, <4 x bfloat> %passthru) { 7082; RV32-LABEL: mgather_falsemask_v4bf16: 7083; RV32: # %bb.0: 7084; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7085; RV32-NEXT: vmv1r.v v8, v9 7086; RV32-NEXT: ret 7087; 7088; RV64V-LABEL: mgather_falsemask_v4bf16: 7089; RV64V: # %bb.0: 7090; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7091; RV64V-NEXT: vmv1r.v v8, v10 7092; RV64V-NEXT: ret 7093; 7094; RV64ZVE32F-LABEL: mgather_falsemask_v4bf16: 7095; RV64ZVE32F: # %bb.0: 7096; RV64ZVE32F-NEXT: ret 7097 %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x bfloat> %passthru) 7098 ret <4 x bfloat> %v 7099} 7100 7101declare <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x bfloat>) 7102 7103define <8 x bfloat> @mgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x bfloat> %passthru) { 7104; RV32-LABEL: mgather_v8bf16: 7105; RV32: # %bb.0: 7106; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu 7107; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 7108; RV32-NEXT: vmv.v.v v8, v10 7109; RV32-NEXT: ret 7110; 7111; RV64V-LABEL: mgather_v8bf16: 7112; RV64V: # %bb.0: 7113; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu 7114; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t 7115; RV64V-NEXT: vmv.v.v v8, v12 7116; RV64V-NEXT: ret 7117; 7118; RV64ZVE32F-LABEL: mgather_v8bf16: 7119; RV64ZVE32F: # %bb.0: 7120; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7121; RV64ZVE32F-NEXT: vmv.x.s a1, v0 7122; RV64ZVE32F-NEXT: andi a2, a1, 1 7123; RV64ZVE32F-NEXT: bnez a2, .LBB63_9 7124; RV64ZVE32F-NEXT: # %bb.1: # %else 7125; RV64ZVE32F-NEXT: andi a2, a1, 2 7126; RV64ZVE32F-NEXT: bnez a2, .LBB63_10 7127; RV64ZVE32F-NEXT: .LBB63_2: # %else2 7128; RV64ZVE32F-NEXT: andi a2, a1, 4 7129; RV64ZVE32F-NEXT: bnez a2, .LBB63_11 7130; RV64ZVE32F-NEXT: .LBB63_3: # %else5 7131; RV64ZVE32F-NEXT: andi a2, a1, 8 7132; RV64ZVE32F-NEXT: bnez a2, .LBB63_12 7133; RV64ZVE32F-NEXT: .LBB63_4: # %else8 7134; RV64ZVE32F-NEXT: andi a2, a1, 16 7135; RV64ZVE32F-NEXT: bnez a2, .LBB63_13 7136; RV64ZVE32F-NEXT: .LBB63_5: # %else11 7137; RV64ZVE32F-NEXT: andi a2, a1, 32 7138; RV64ZVE32F-NEXT: bnez a2, .LBB63_14 7139; RV64ZVE32F-NEXT: .LBB63_6: # %else14 7140; RV64ZVE32F-NEXT: andi a2, a1, 64 7141; RV64ZVE32F-NEXT: bnez a2, .LBB63_15 7142; RV64ZVE32F-NEXT: .LBB63_7: # %else17 7143; RV64ZVE32F-NEXT: andi a1, a1, -128 7144; RV64ZVE32F-NEXT: bnez a1, .LBB63_16 7145; RV64ZVE32F-NEXT: .LBB63_8: # %else20 7146; RV64ZVE32F-NEXT: ret 7147; RV64ZVE32F-NEXT: .LBB63_9: # %cond.load 7148; RV64ZVE32F-NEXT: ld a2, 0(a0) 7149; RV64ZVE32F-NEXT: lh a2, 0(a2) 7150; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 7151; RV64ZVE32F-NEXT: vmv.s.x v8, a2 7152; RV64ZVE32F-NEXT: andi a2, a1, 2 7153; RV64ZVE32F-NEXT: beqz a2, .LBB63_2 7154; RV64ZVE32F-NEXT: .LBB63_10: # %cond.load1 7155; RV64ZVE32F-NEXT: ld a2, 8(a0) 7156; RV64ZVE32F-NEXT: lh a2, 0(a2) 7157; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 7158; RV64ZVE32F-NEXT: vmv.s.x v9, a2 7159; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma 7160; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 7161; RV64ZVE32F-NEXT: andi a2, a1, 4 7162; RV64ZVE32F-NEXT: beqz a2, .LBB63_3 7163; RV64ZVE32F-NEXT: .LBB63_11: # %cond.load4 7164; RV64ZVE32F-NEXT: ld a2, 16(a0) 7165; RV64ZVE32F-NEXT: lh a2, 0(a2) 7166; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma 7167; RV64ZVE32F-NEXT: vmv.s.x v9, a2 7168; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 7169; RV64ZVE32F-NEXT: andi a2, a1, 8 7170; RV64ZVE32F-NEXT: beqz a2, .LBB63_4 7171; RV64ZVE32F-NEXT: .LBB63_12: # %cond.load7 7172; RV64ZVE32F-NEXT: ld a2, 24(a0) 7173; RV64ZVE32F-NEXT: lh a2, 0(a2) 7174; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma 7175; RV64ZVE32F-NEXT: vmv.s.x v9, a2 7176; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 7177; RV64ZVE32F-NEXT: andi a2, a1, 16 7178; RV64ZVE32F-NEXT: beqz a2, .LBB63_5 7179; RV64ZVE32F-NEXT: .LBB63_13: # %cond.load10 7180; RV64ZVE32F-NEXT: ld a2, 32(a0) 7181; RV64ZVE32F-NEXT: lh a2, 0(a2) 7182; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma 7183; RV64ZVE32F-NEXT: vmv.s.x v9, a2 7184; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 7185; RV64ZVE32F-NEXT: andi a2, a1, 32 7186; RV64ZVE32F-NEXT: beqz a2, .LBB63_6 7187; RV64ZVE32F-NEXT: .LBB63_14: # %cond.load13 7188; RV64ZVE32F-NEXT: ld a2, 40(a0) 7189; RV64ZVE32F-NEXT: lh a2, 0(a2) 7190; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma 7191; RV64ZVE32F-NEXT: vmv.s.x v9, a2 7192; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 7193; RV64ZVE32F-NEXT: andi a2, a1, 64 7194; RV64ZVE32F-NEXT: beqz a2, .LBB63_7 7195; RV64ZVE32F-NEXT: .LBB63_15: # %cond.load16 7196; RV64ZVE32F-NEXT: ld a2, 48(a0) 7197; RV64ZVE32F-NEXT: lh a2, 0(a2) 7198; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma 7199; RV64ZVE32F-NEXT: vmv.s.x v9, a2 7200; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 7201; RV64ZVE32F-NEXT: andi a1, a1, -128 7202; RV64ZVE32F-NEXT: beqz a1, .LBB63_8 7203; RV64ZVE32F-NEXT: .LBB63_16: # %cond.load19 7204; RV64ZVE32F-NEXT: ld a0, 56(a0) 7205; RV64ZVE32F-NEXT: lh a0, 0(a0) 7206; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma 7207; RV64ZVE32F-NEXT: vmv.s.x v9, a0 7208; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 7209; RV64ZVE32F-NEXT: ret 7210 %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru) 7211 ret <8 x bfloat> %v 7212} 7213 7214define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) { 7215; RV32-LABEL: mgather_baseidx_v8i8_v8bf16: 7216; RV32: # %bb.0: 7217; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 7218; RV32-NEXT: vsext.vf4 v10, v8 7219; RV32-NEXT: vadd.vv v10, v10, v10 7220; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu 7221; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t 7222; RV32-NEXT: vmv.v.v v8, v9 7223; RV32-NEXT: ret 7224; 7225; RV64V-LABEL: mgather_baseidx_v8i8_v8bf16: 7226; RV64V: # %bb.0: 7227; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 7228; RV64V-NEXT: vsext.vf8 v12, v8 7229; RV64V-NEXT: vadd.vv v12, v12, v12 7230; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu 7231; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t 7232; RV64V-NEXT: vmv.v.v v8, v9 7233; RV64V-NEXT: ret 7234; 7235; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8bf16: 7236; RV64ZVE32F: # %bb.0: 7237; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7238; RV64ZVE32F-NEXT: vmv.x.s a1, v0 7239; RV64ZVE32F-NEXT: andi a2, a1, 1 7240; RV64ZVE32F-NEXT: beqz a2, .LBB64_2 7241; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 7242; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7243; RV64ZVE32F-NEXT: slli a2, a2, 1 7244; RV64ZVE32F-NEXT: add a2, a0, a2 7245; RV64ZVE32F-NEXT: lh a2, 0(a2) 7246; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 7247; RV64ZVE32F-NEXT: vmv.s.x v9, a2 7248; RV64ZVE32F-NEXT: .LBB64_2: # %else 7249; RV64ZVE32F-NEXT: andi a2, a1, 2 7250; RV64ZVE32F-NEXT: beqz a2, .LBB64_4 7251; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 7252; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7253; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 7254; RV64ZVE32F-NEXT: vmv.x.s a2, v10 7255; RV64ZVE32F-NEXT: slli a2, a2, 1 7256; RV64ZVE32F-NEXT: add a2, a0, a2 7257; RV64ZVE32F-NEXT: lh a2, 0(a2) 7258; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7259; RV64ZVE32F-NEXT: vmv.s.x v10, a2 7260; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma 7261; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 7262; RV64ZVE32F-NEXT: .LBB64_4: # %else2 7263; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 7264; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 7265; RV64ZVE32F-NEXT: andi a2, a1, 4 7266; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 7267; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 7268; RV64ZVE32F-NEXT: bnez a2, .LBB64_14 7269; RV64ZVE32F-NEXT: # %bb.5: # %else5 7270; RV64ZVE32F-NEXT: andi a2, a1, 8 7271; RV64ZVE32F-NEXT: bnez a2, .LBB64_15 7272; RV64ZVE32F-NEXT: .LBB64_6: # %else8 7273; RV64ZVE32F-NEXT: andi a2, a1, 16 7274; RV64ZVE32F-NEXT: bnez a2, .LBB64_16 7275; RV64ZVE32F-NEXT: .LBB64_7: # %else11 7276; RV64ZVE32F-NEXT: andi a2, a1, 32 7277; RV64ZVE32F-NEXT: beqz a2, .LBB64_9 7278; RV64ZVE32F-NEXT: .LBB64_8: # %cond.load13 7279; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7280; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 7281; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7282; RV64ZVE32F-NEXT: slli a2, a2, 1 7283; RV64ZVE32F-NEXT: add a2, a0, a2 7284; RV64ZVE32F-NEXT: lh a2, 0(a2) 7285; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7286; RV64ZVE32F-NEXT: vmv.s.x v8, a2 7287; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma 7288; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 7289; RV64ZVE32F-NEXT: .LBB64_9: # %else14 7290; RV64ZVE32F-NEXT: andi a2, a1, 64 7291; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 7292; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 7293; RV64ZVE32F-NEXT: beqz a2, .LBB64_11 7294; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 7295; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7296; RV64ZVE32F-NEXT: slli a2, a2, 1 7297; RV64ZVE32F-NEXT: add a2, a0, a2 7298; RV64ZVE32F-NEXT: lh a2, 0(a2) 7299; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7300; RV64ZVE32F-NEXT: vmv.s.x v10, a2 7301; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma 7302; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 7303; RV64ZVE32F-NEXT: .LBB64_11: # %else17 7304; RV64ZVE32F-NEXT: andi a1, a1, -128 7305; RV64ZVE32F-NEXT: beqz a1, .LBB64_13 7306; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 7307; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7308; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 7309; RV64ZVE32F-NEXT: vmv.x.s a1, v8 7310; RV64ZVE32F-NEXT: slli a1, a1, 1 7311; RV64ZVE32F-NEXT: add a0, a0, a1 7312; RV64ZVE32F-NEXT: lh a0, 0(a0) 7313; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7314; RV64ZVE32F-NEXT: vmv.s.x v8, a0 7315; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma 7316; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 7317; RV64ZVE32F-NEXT: .LBB64_13: # %else20 7318; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7319; RV64ZVE32F-NEXT: vmv1r.v v8, v9 7320; RV64ZVE32F-NEXT: ret 7321; RV64ZVE32F-NEXT: .LBB64_14: # %cond.load4 7322; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7323; RV64ZVE32F-NEXT: slli a2, a2, 1 7324; RV64ZVE32F-NEXT: add a2, a0, a2 7325; RV64ZVE32F-NEXT: lh a2, 0(a2) 7326; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7327; RV64ZVE32F-NEXT: vmv.s.x v11, a2 7328; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma 7329; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 7330; RV64ZVE32F-NEXT: andi a2, a1, 8 7331; RV64ZVE32F-NEXT: beqz a2, .LBB64_6 7332; RV64ZVE32F-NEXT: .LBB64_15: # %cond.load7 7333; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7334; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 7335; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7336; RV64ZVE32F-NEXT: slli a2, a2, 1 7337; RV64ZVE32F-NEXT: add a2, a0, a2 7338; RV64ZVE32F-NEXT: lh a2, 0(a2) 7339; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7340; RV64ZVE32F-NEXT: vmv.s.x v8, a2 7341; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma 7342; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 7343; RV64ZVE32F-NEXT: andi a2, a1, 16 7344; RV64ZVE32F-NEXT: beqz a2, .LBB64_7 7345; RV64ZVE32F-NEXT: .LBB64_16: # %cond.load10 7346; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7347; RV64ZVE32F-NEXT: vmv.x.s a2, v10 7348; RV64ZVE32F-NEXT: slli a2, a2, 1 7349; RV64ZVE32F-NEXT: add a2, a0, a2 7350; RV64ZVE32F-NEXT: lh a2, 0(a2) 7351; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 7352; RV64ZVE32F-NEXT: vmv.s.x v8, a2 7353; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma 7354; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 7355; RV64ZVE32F-NEXT: andi a2, a1, 32 7356; RV64ZVE32F-NEXT: bnez a2, .LBB64_8 7357; RV64ZVE32F-NEXT: j .LBB64_9 7358 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i8> %idxs 7359 %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru) 7360 ret <8 x bfloat> %v 7361} 7362 7363define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) { 7364; RV32-LABEL: mgather_baseidx_sext_v8i8_v8bf16: 7365; RV32: # %bb.0: 7366; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 7367; RV32-NEXT: vsext.vf4 v10, v8 7368; RV32-NEXT: vadd.vv v10, v10, v10 7369; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu 7370; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t 7371; RV32-NEXT: vmv.v.v v8, v9 7372; RV32-NEXT: ret 7373; 7374; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8bf16: 7375; RV64V: # %bb.0: 7376; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 7377; RV64V-NEXT: vsext.vf8 v12, v8 7378; RV64V-NEXT: vadd.vv v12, v12, v12 7379; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu 7380; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t 7381; RV64V-NEXT: vmv.v.v v8, v9 7382; RV64V-NEXT: ret 7383; 7384; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8bf16: 7385; RV64ZVE32F: # %bb.0: 7386; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7387; RV64ZVE32F-NEXT: vmv.x.s a1, v0 7388; RV64ZVE32F-NEXT: andi a2, a1, 1 7389; RV64ZVE32F-NEXT: beqz a2, .LBB65_2 7390; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 7391; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7392; RV64ZVE32F-NEXT: slli a2, a2, 1 7393; RV64ZVE32F-NEXT: add a2, a0, a2 7394; RV64ZVE32F-NEXT: lh a2, 0(a2) 7395; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 7396; RV64ZVE32F-NEXT: vmv.s.x v9, a2 7397; RV64ZVE32F-NEXT: .LBB65_2: # %else 7398; RV64ZVE32F-NEXT: andi a2, a1, 2 7399; RV64ZVE32F-NEXT: beqz a2, .LBB65_4 7400; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 7401; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7402; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 7403; RV64ZVE32F-NEXT: vmv.x.s a2, v10 7404; RV64ZVE32F-NEXT: slli a2, a2, 1 7405; RV64ZVE32F-NEXT: add a2, a0, a2 7406; RV64ZVE32F-NEXT: lh a2, 0(a2) 7407; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7408; RV64ZVE32F-NEXT: vmv.s.x v10, a2 7409; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma 7410; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 7411; RV64ZVE32F-NEXT: .LBB65_4: # %else2 7412; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 7413; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 7414; RV64ZVE32F-NEXT: andi a2, a1, 4 7415; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 7416; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 7417; RV64ZVE32F-NEXT: bnez a2, .LBB65_14 7418; RV64ZVE32F-NEXT: # %bb.5: # %else5 7419; RV64ZVE32F-NEXT: andi a2, a1, 8 7420; RV64ZVE32F-NEXT: bnez a2, .LBB65_15 7421; RV64ZVE32F-NEXT: .LBB65_6: # %else8 7422; RV64ZVE32F-NEXT: andi a2, a1, 16 7423; RV64ZVE32F-NEXT: bnez a2, .LBB65_16 7424; RV64ZVE32F-NEXT: .LBB65_7: # %else11 7425; RV64ZVE32F-NEXT: andi a2, a1, 32 7426; RV64ZVE32F-NEXT: beqz a2, .LBB65_9 7427; RV64ZVE32F-NEXT: .LBB65_8: # %cond.load13 7428; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7429; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 7430; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7431; RV64ZVE32F-NEXT: slli a2, a2, 1 7432; RV64ZVE32F-NEXT: add a2, a0, a2 7433; RV64ZVE32F-NEXT: lh a2, 0(a2) 7434; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7435; RV64ZVE32F-NEXT: vmv.s.x v8, a2 7436; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma 7437; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 7438; RV64ZVE32F-NEXT: .LBB65_9: # %else14 7439; RV64ZVE32F-NEXT: andi a2, a1, 64 7440; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 7441; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 7442; RV64ZVE32F-NEXT: beqz a2, .LBB65_11 7443; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 7444; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7445; RV64ZVE32F-NEXT: slli a2, a2, 1 7446; RV64ZVE32F-NEXT: add a2, a0, a2 7447; RV64ZVE32F-NEXT: lh a2, 0(a2) 7448; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7449; RV64ZVE32F-NEXT: vmv.s.x v10, a2 7450; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma 7451; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 7452; RV64ZVE32F-NEXT: .LBB65_11: # %else17 7453; RV64ZVE32F-NEXT: andi a1, a1, -128 7454; RV64ZVE32F-NEXT: beqz a1, .LBB65_13 7455; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 7456; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7457; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 7458; RV64ZVE32F-NEXT: vmv.x.s a1, v8 7459; RV64ZVE32F-NEXT: slli a1, a1, 1 7460; RV64ZVE32F-NEXT: add a0, a0, a1 7461; RV64ZVE32F-NEXT: lh a0, 0(a0) 7462; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7463; RV64ZVE32F-NEXT: vmv.s.x v8, a0 7464; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma 7465; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 7466; RV64ZVE32F-NEXT: .LBB65_13: # %else20 7467; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7468; RV64ZVE32F-NEXT: vmv1r.v v8, v9 7469; RV64ZVE32F-NEXT: ret 7470; RV64ZVE32F-NEXT: .LBB65_14: # %cond.load4 7471; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7472; RV64ZVE32F-NEXT: slli a2, a2, 1 7473; RV64ZVE32F-NEXT: add a2, a0, a2 7474; RV64ZVE32F-NEXT: lh a2, 0(a2) 7475; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7476; RV64ZVE32F-NEXT: vmv.s.x v11, a2 7477; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma 7478; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 7479; RV64ZVE32F-NEXT: andi a2, a1, 8 7480; RV64ZVE32F-NEXT: beqz a2, .LBB65_6 7481; RV64ZVE32F-NEXT: .LBB65_15: # %cond.load7 7482; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7483; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 7484; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7485; RV64ZVE32F-NEXT: slli a2, a2, 1 7486; RV64ZVE32F-NEXT: add a2, a0, a2 7487; RV64ZVE32F-NEXT: lh a2, 0(a2) 7488; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7489; RV64ZVE32F-NEXT: vmv.s.x v8, a2 7490; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma 7491; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 7492; RV64ZVE32F-NEXT: andi a2, a1, 16 7493; RV64ZVE32F-NEXT: beqz a2, .LBB65_7 7494; RV64ZVE32F-NEXT: .LBB65_16: # %cond.load10 7495; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7496; RV64ZVE32F-NEXT: vmv.x.s a2, v10 7497; RV64ZVE32F-NEXT: slli a2, a2, 1 7498; RV64ZVE32F-NEXT: add a2, a0, a2 7499; RV64ZVE32F-NEXT: lh a2, 0(a2) 7500; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 7501; RV64ZVE32F-NEXT: vmv.s.x v8, a2 7502; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma 7503; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 7504; RV64ZVE32F-NEXT: andi a2, a1, 32 7505; RV64ZVE32F-NEXT: bnez a2, .LBB65_8 7506; RV64ZVE32F-NEXT: j .LBB65_9 7507 %eidxs = sext <8 x i8> %idxs to <8 x i16> 7508 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs 7509 %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru) 7510 ret <8 x bfloat> %v 7511} 7512 7513define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) { 7514; RV32-LABEL: mgather_baseidx_zext_v8i8_v8bf16: 7515; RV32: # %bb.0: 7516; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 7517; RV32-NEXT: vwaddu.vv v10, v8, v8 7518; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu 7519; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t 7520; RV32-NEXT: vmv.v.v v8, v9 7521; RV32-NEXT: ret 7522; 7523; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8bf16: 7524; RV64V: # %bb.0: 7525; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 7526; RV64V-NEXT: vwaddu.vv v10, v8, v8 7527; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu 7528; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t 7529; RV64V-NEXT: vmv.v.v v8, v9 7530; RV64V-NEXT: ret 7531; 7532; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8bf16: 7533; RV64ZVE32F: # %bb.0: 7534; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7535; RV64ZVE32F-NEXT: vmv.x.s a1, v0 7536; RV64ZVE32F-NEXT: andi a2, a1, 1 7537; RV64ZVE32F-NEXT: beqz a2, .LBB66_2 7538; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 7539; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7540; RV64ZVE32F-NEXT: andi a2, a2, 255 7541; RV64ZVE32F-NEXT: slli a2, a2, 1 7542; RV64ZVE32F-NEXT: add a2, a0, a2 7543; RV64ZVE32F-NEXT: lh a2, 0(a2) 7544; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 7545; RV64ZVE32F-NEXT: vmv.s.x v9, a2 7546; RV64ZVE32F-NEXT: .LBB66_2: # %else 7547; RV64ZVE32F-NEXT: andi a2, a1, 2 7548; RV64ZVE32F-NEXT: beqz a2, .LBB66_4 7549; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 7550; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7551; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 7552; RV64ZVE32F-NEXT: vmv.x.s a2, v10 7553; RV64ZVE32F-NEXT: andi a2, a2, 255 7554; RV64ZVE32F-NEXT: slli a2, a2, 1 7555; RV64ZVE32F-NEXT: add a2, a0, a2 7556; RV64ZVE32F-NEXT: lh a2, 0(a2) 7557; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7558; RV64ZVE32F-NEXT: vmv.s.x v10, a2 7559; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma 7560; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 7561; RV64ZVE32F-NEXT: .LBB66_4: # %else2 7562; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 7563; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 7564; RV64ZVE32F-NEXT: andi a2, a1, 4 7565; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 7566; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 7567; RV64ZVE32F-NEXT: bnez a2, .LBB66_14 7568; RV64ZVE32F-NEXT: # %bb.5: # %else5 7569; RV64ZVE32F-NEXT: andi a2, a1, 8 7570; RV64ZVE32F-NEXT: bnez a2, .LBB66_15 7571; RV64ZVE32F-NEXT: .LBB66_6: # %else8 7572; RV64ZVE32F-NEXT: andi a2, a1, 16 7573; RV64ZVE32F-NEXT: bnez a2, .LBB66_16 7574; RV64ZVE32F-NEXT: .LBB66_7: # %else11 7575; RV64ZVE32F-NEXT: andi a2, a1, 32 7576; RV64ZVE32F-NEXT: beqz a2, .LBB66_9 7577; RV64ZVE32F-NEXT: .LBB66_8: # %cond.load13 7578; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7579; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 7580; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7581; RV64ZVE32F-NEXT: andi a2, a2, 255 7582; RV64ZVE32F-NEXT: slli a2, a2, 1 7583; RV64ZVE32F-NEXT: add a2, a0, a2 7584; RV64ZVE32F-NEXT: lh a2, 0(a2) 7585; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7586; RV64ZVE32F-NEXT: vmv.s.x v8, a2 7587; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma 7588; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 7589; RV64ZVE32F-NEXT: .LBB66_9: # %else14 7590; RV64ZVE32F-NEXT: andi a2, a1, 64 7591; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 7592; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 7593; RV64ZVE32F-NEXT: beqz a2, .LBB66_11 7594; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 7595; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7596; RV64ZVE32F-NEXT: andi a2, a2, 255 7597; RV64ZVE32F-NEXT: slli a2, a2, 1 7598; RV64ZVE32F-NEXT: add a2, a0, a2 7599; RV64ZVE32F-NEXT: lh a2, 0(a2) 7600; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7601; RV64ZVE32F-NEXT: vmv.s.x v10, a2 7602; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma 7603; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 7604; RV64ZVE32F-NEXT: .LBB66_11: # %else17 7605; RV64ZVE32F-NEXT: andi a1, a1, -128 7606; RV64ZVE32F-NEXT: beqz a1, .LBB66_13 7607; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 7608; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7609; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 7610; RV64ZVE32F-NEXT: vmv.x.s a1, v8 7611; RV64ZVE32F-NEXT: andi a1, a1, 255 7612; RV64ZVE32F-NEXT: slli a1, a1, 1 7613; RV64ZVE32F-NEXT: add a0, a0, a1 7614; RV64ZVE32F-NEXT: lh a0, 0(a0) 7615; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7616; RV64ZVE32F-NEXT: vmv.s.x v8, a0 7617; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma 7618; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 7619; RV64ZVE32F-NEXT: .LBB66_13: # %else20 7620; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7621; RV64ZVE32F-NEXT: vmv1r.v v8, v9 7622; RV64ZVE32F-NEXT: ret 7623; RV64ZVE32F-NEXT: .LBB66_14: # %cond.load4 7624; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7625; RV64ZVE32F-NEXT: andi a2, a2, 255 7626; RV64ZVE32F-NEXT: slli a2, a2, 1 7627; RV64ZVE32F-NEXT: add a2, a0, a2 7628; RV64ZVE32F-NEXT: lh a2, 0(a2) 7629; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7630; RV64ZVE32F-NEXT: vmv.s.x v11, a2 7631; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma 7632; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 7633; RV64ZVE32F-NEXT: andi a2, a1, 8 7634; RV64ZVE32F-NEXT: beqz a2, .LBB66_6 7635; RV64ZVE32F-NEXT: .LBB66_15: # %cond.load7 7636; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 7637; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 7638; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7639; RV64ZVE32F-NEXT: andi a2, a2, 255 7640; RV64ZVE32F-NEXT: slli a2, a2, 1 7641; RV64ZVE32F-NEXT: add a2, a0, a2 7642; RV64ZVE32F-NEXT: lh a2, 0(a2) 7643; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 7644; RV64ZVE32F-NEXT: vmv.s.x v8, a2 7645; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma 7646; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 7647; RV64ZVE32F-NEXT: andi a2, a1, 16 7648; RV64ZVE32F-NEXT: beqz a2, .LBB66_7 7649; RV64ZVE32F-NEXT: .LBB66_16: # %cond.load10 7650; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7651; RV64ZVE32F-NEXT: vmv.x.s a2, v10 7652; RV64ZVE32F-NEXT: andi a2, a2, 255 7653; RV64ZVE32F-NEXT: slli a2, a2, 1 7654; RV64ZVE32F-NEXT: add a2, a0, a2 7655; RV64ZVE32F-NEXT: lh a2, 0(a2) 7656; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 7657; RV64ZVE32F-NEXT: vmv.s.x v8, a2 7658; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma 7659; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 7660; RV64ZVE32F-NEXT: andi a2, a1, 32 7661; RV64ZVE32F-NEXT: bnez a2, .LBB66_8 7662; RV64ZVE32F-NEXT: j .LBB66_9 7663 %eidxs = zext <8 x i8> %idxs to <8 x i16> 7664 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs 7665 %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru) 7666 ret <8 x bfloat> %v 7667} 7668 7669define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) { 7670; RV32-LABEL: mgather_baseidx_v8bf16: 7671; RV32: # %bb.0: 7672; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu 7673; RV32-NEXT: vwadd.vv v10, v8, v8 7674; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t 7675; RV32-NEXT: vmv.v.v v8, v9 7676; RV32-NEXT: ret 7677; 7678; RV64V-LABEL: mgather_baseidx_v8bf16: 7679; RV64V: # %bb.0: 7680; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 7681; RV64V-NEXT: vsext.vf4 v12, v8 7682; RV64V-NEXT: vadd.vv v12, v12, v12 7683; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu 7684; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t 7685; RV64V-NEXT: vmv.v.v v8, v9 7686; RV64V-NEXT: ret 7687; 7688; RV64ZVE32F-LABEL: mgather_baseidx_v8bf16: 7689; RV64ZVE32F: # %bb.0: 7690; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7691; RV64ZVE32F-NEXT: vmv.x.s a1, v0 7692; RV64ZVE32F-NEXT: andi a2, a1, 1 7693; RV64ZVE32F-NEXT: beqz a2, .LBB67_2 7694; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 7695; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma 7696; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7697; RV64ZVE32F-NEXT: slli a2, a2, 1 7698; RV64ZVE32F-NEXT: add a2, a0, a2 7699; RV64ZVE32F-NEXT: lh a2, 0(a2) 7700; RV64ZVE32F-NEXT: vmv.s.x v9, a2 7701; RV64ZVE32F-NEXT: .LBB67_2: # %else 7702; RV64ZVE32F-NEXT: andi a2, a1, 2 7703; RV64ZVE32F-NEXT: beqz a2, .LBB67_4 7704; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 7705; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 7706; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 7707; RV64ZVE32F-NEXT: vmv.x.s a2, v10 7708; RV64ZVE32F-NEXT: slli a2, a2, 1 7709; RV64ZVE32F-NEXT: add a2, a0, a2 7710; RV64ZVE32F-NEXT: lh a2, 0(a2) 7711; RV64ZVE32F-NEXT: vmv.s.x v10, a2 7712; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma 7713; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 7714; RV64ZVE32F-NEXT: .LBB67_4: # %else2 7715; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 7716; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 7717; RV64ZVE32F-NEXT: andi a2, a1, 4 7718; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 7719; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 7720; RV64ZVE32F-NEXT: bnez a2, .LBB67_14 7721; RV64ZVE32F-NEXT: # %bb.5: # %else5 7722; RV64ZVE32F-NEXT: andi a2, a1, 8 7723; RV64ZVE32F-NEXT: bnez a2, .LBB67_15 7724; RV64ZVE32F-NEXT: .LBB67_6: # %else8 7725; RV64ZVE32F-NEXT: andi a2, a1, 16 7726; RV64ZVE32F-NEXT: bnez a2, .LBB67_16 7727; RV64ZVE32F-NEXT: .LBB67_7: # %else11 7728; RV64ZVE32F-NEXT: andi a2, a1, 32 7729; RV64ZVE32F-NEXT: beqz a2, .LBB67_9 7730; RV64ZVE32F-NEXT: .LBB67_8: # %cond.load13 7731; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 7732; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 7733; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7734; RV64ZVE32F-NEXT: slli a2, a2, 1 7735; RV64ZVE32F-NEXT: add a2, a0, a2 7736; RV64ZVE32F-NEXT: lh a2, 0(a2) 7737; RV64ZVE32F-NEXT: vmv.s.x v8, a2 7738; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma 7739; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 7740; RV64ZVE32F-NEXT: .LBB67_9: # %else14 7741; RV64ZVE32F-NEXT: andi a2, a1, 64 7742; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 7743; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 7744; RV64ZVE32F-NEXT: beqz a2, .LBB67_11 7745; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 7746; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7747; RV64ZVE32F-NEXT: slli a2, a2, 1 7748; RV64ZVE32F-NEXT: add a2, a0, a2 7749; RV64ZVE32F-NEXT: lh a2, 0(a2) 7750; RV64ZVE32F-NEXT: vmv.s.x v10, a2 7751; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma 7752; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 7753; RV64ZVE32F-NEXT: .LBB67_11: # %else17 7754; RV64ZVE32F-NEXT: andi a1, a1, -128 7755; RV64ZVE32F-NEXT: beqz a1, .LBB67_13 7756; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 7757; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 7758; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 7759; RV64ZVE32F-NEXT: vmv.x.s a1, v8 7760; RV64ZVE32F-NEXT: slli a1, a1, 1 7761; RV64ZVE32F-NEXT: add a0, a0, a1 7762; RV64ZVE32F-NEXT: lh a0, 0(a0) 7763; RV64ZVE32F-NEXT: vmv.s.x v8, a0 7764; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma 7765; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 7766; RV64ZVE32F-NEXT: .LBB67_13: # %else20 7767; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7768; RV64ZVE32F-NEXT: vmv1r.v v8, v9 7769; RV64ZVE32F-NEXT: ret 7770; RV64ZVE32F-NEXT: .LBB67_14: # %cond.load4 7771; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7772; RV64ZVE32F-NEXT: slli a2, a2, 1 7773; RV64ZVE32F-NEXT: add a2, a0, a2 7774; RV64ZVE32F-NEXT: lh a2, 0(a2) 7775; RV64ZVE32F-NEXT: vmv.s.x v11, a2 7776; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma 7777; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 7778; RV64ZVE32F-NEXT: andi a2, a1, 8 7779; RV64ZVE32F-NEXT: beqz a2, .LBB67_6 7780; RV64ZVE32F-NEXT: .LBB67_15: # %cond.load7 7781; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 7782; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 7783; RV64ZVE32F-NEXT: vmv.x.s a2, v8 7784; RV64ZVE32F-NEXT: slli a2, a2, 1 7785; RV64ZVE32F-NEXT: add a2, a0, a2 7786; RV64ZVE32F-NEXT: lh a2, 0(a2) 7787; RV64ZVE32F-NEXT: vmv.s.x v8, a2 7788; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma 7789; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 7790; RV64ZVE32F-NEXT: andi a2, a1, 16 7791; RV64ZVE32F-NEXT: beqz a2, .LBB67_7 7792; RV64ZVE32F-NEXT: .LBB67_16: # %cond.load10 7793; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma 7794; RV64ZVE32F-NEXT: vmv.x.s a2, v10 7795; RV64ZVE32F-NEXT: slli a2, a2, 1 7796; RV64ZVE32F-NEXT: add a2, a0, a2 7797; RV64ZVE32F-NEXT: lh a2, 0(a2) 7798; RV64ZVE32F-NEXT: vmv.s.x v8, a2 7799; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 7800; RV64ZVE32F-NEXT: andi a2, a1, 32 7801; RV64ZVE32F-NEXT: bnez a2, .LBB67_8 7802; RV64ZVE32F-NEXT: j .LBB67_9 7803 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %idxs 7804 %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru) 7805 ret <8 x bfloat> %v 7806} 7807 7808declare <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x half>) 7809 7810define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passthru) { 7811; RV32V-LABEL: mgather_v1f16: 7812; RV32V: # %bb.0: 7813; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu 7814; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 7815; RV32V-NEXT: vmv1r.v v8, v9 7816; RV32V-NEXT: ret 7817; 7818; RV64V-LABEL: mgather_v1f16: 7819; RV64V: # %bb.0: 7820; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu 7821; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 7822; RV64V-NEXT: vmv1r.v v8, v9 7823; RV64V-NEXT: ret 7824; 7825; RV32ZVE32F-LABEL: mgather_v1f16: 7826; RV32ZVE32F: # %bb.0: 7827; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu 7828; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 7829; RV32ZVE32F-NEXT: vmv1r.v v8, v9 7830; RV32ZVE32F-NEXT: ret 7831; 7832; RV64ZVE32F-LABEL: mgather_v1f16: 7833; RV64ZVE32F: # %bb.0: 7834; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 7835; RV64ZVE32F-NEXT: vfirst.m a1, v0 7836; RV64ZVE32F-NEXT: bnez a1, .LBB68_2 7837; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 7838; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 7839; RV64ZVE32F-NEXT: vle16.v v8, (a0) 7840; RV64ZVE32F-NEXT: .LBB68_2: # %else 7841; RV64ZVE32F-NEXT: ret 7842 %v = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x half> %passthru) 7843 ret <1 x half> %v 7844} 7845 7846declare <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x half>) 7847 7848define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passthru) { 7849; RV32V-LABEL: mgather_v2f16: 7850; RV32V: # %bb.0: 7851; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 7852; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 7853; RV32V-NEXT: vmv1r.v v8, v9 7854; RV32V-NEXT: ret 7855; 7856; RV64V-LABEL: mgather_v2f16: 7857; RV64V: # %bb.0: 7858; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 7859; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 7860; RV64V-NEXT: vmv1r.v v8, v9 7861; RV64V-NEXT: ret 7862; 7863; RV32ZVE32F-LABEL: mgather_v2f16: 7864; RV32ZVE32F: # %bb.0: 7865; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu 7866; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 7867; RV32ZVE32F-NEXT: vmv1r.v v8, v9 7868; RV32ZVE32F-NEXT: ret 7869; 7870; RV64ZVE32F-ZVFH-LABEL: mgather_v2f16: 7871; RV64ZVE32F-ZVFH: # %bb.0: 7872; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7873; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v0 7874; RV64ZVE32F-ZVFH-NEXT: andi a3, a2, 1 7875; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB69_3 7876; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else 7877; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2 7878; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_4 7879; RV64ZVE32F-ZVFH-NEXT: .LBB69_2: # %else2 7880; RV64ZVE32F-ZVFH-NEXT: ret 7881; RV64ZVE32F-ZVFH-NEXT: .LBB69_3: # %cond.load 7882; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) 7883; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma 7884; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 7885; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2 7886; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_2 7887; RV64ZVE32F-ZVFH-NEXT: .LBB69_4: # %cond.load1 7888; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a1) 7889; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma 7890; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 7891; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 7892; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 1 7893; RV64ZVE32F-ZVFH-NEXT: ret 7894; 7895; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v2f16: 7896; RV64ZVE32F-ZVFHMIN: # %bb.0: 7897; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7898; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v0 7899; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a2, 1 7900; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB69_3 7901; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else 7902; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2 7903; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_4 7904; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_2: # %else2 7905; RV64ZVE32F-ZVFHMIN-NEXT: ret 7906; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_3: # %cond.load 7907; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) 7908; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma 7909; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 7910; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2 7911; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_2 7912; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_4: # %cond.load1 7913; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a1) 7914; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 7915; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0 7916; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 7917; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 1 7918; RV64ZVE32F-ZVFHMIN-NEXT: ret 7919 %v = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x half> %passthru) 7920 ret <2 x half> %v 7921} 7922 7923declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>) 7924 7925define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passthru) { 7926; RV32-LABEL: mgather_v4f16: 7927; RV32: # %bb.0: 7928; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 7929; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 7930; RV32-NEXT: vmv1r.v v8, v9 7931; RV32-NEXT: ret 7932; 7933; RV64V-LABEL: mgather_v4f16: 7934; RV64V: # %bb.0: 7935; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 7936; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t 7937; RV64V-NEXT: vmv1r.v v8, v10 7938; RV64V-NEXT: ret 7939; 7940; RV64ZVE32F-ZVFH-LABEL: mgather_v4f16: 7941; RV64ZVE32F-ZVFH: # %bb.0: 7942; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7943; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 7944; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 7945; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_5 7946; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else 7947; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 7948; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_6 7949; RV64ZVE32F-ZVFH-NEXT: .LBB70_2: # %else2 7950; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 7951; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_7 7952; RV64ZVE32F-ZVFH-NEXT: .LBB70_3: # %else5 7953; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 8 7954; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB70_8 7955; RV64ZVE32F-ZVFH-NEXT: .LBB70_4: # %else8 7956; RV64ZVE32F-ZVFH-NEXT: ret 7957; RV64ZVE32F-ZVFH-NEXT: .LBB70_5: # %cond.load 7958; RV64ZVE32F-ZVFH-NEXT: ld a2, 0(a0) 7959; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 7960; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma 7961; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 7962; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 7963; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_2 7964; RV64ZVE32F-ZVFH-NEXT: .LBB70_6: # %cond.load1 7965; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0) 7966; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 7967; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma 7968; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 7969; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, tu, ma 7970; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 1 7971; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 7972; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_3 7973; RV64ZVE32F-ZVFH-NEXT: .LBB70_7: # %cond.load4 7974; RV64ZVE32F-ZVFH-NEXT: ld a2, 16(a0) 7975; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 7976; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, mf2, tu, ma 7977; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 7978; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 2 7979; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 8 7980; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB70_4 7981; RV64ZVE32F-ZVFH-NEXT: .LBB70_8: # %cond.load7 7982; RV64ZVE32F-ZVFH-NEXT: ld a0, 24(a0) 7983; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) 7984; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 7985; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 7986; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 3 7987; RV64ZVE32F-ZVFH-NEXT: ret 7988; 7989; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v4f16: 7990; RV64ZVE32F-ZVFHMIN: # %bb.0: 7991; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 7992; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 7993; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 7994; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_5 7995; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else 7996; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 7997; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_6 7998; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_2: # %else2 7999; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 8000; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_7 8001; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_3: # %else5 8002; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 8 8003; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_8 8004; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_4: # %else8 8005; RV64ZVE32F-ZVFHMIN-NEXT: ret 8006; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_5: # %cond.load 8007; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 0(a0) 8008; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8009; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma 8010; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 8011; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 8012; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_2 8013; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_6: # %cond.load1 8014; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0) 8015; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8016; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 8017; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 8018; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, tu, ma 8019; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 1 8020; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 8021; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_3 8022; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_7: # %cond.load4 8023; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 16(a0) 8024; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8025; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, tu, ma 8026; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 8027; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 2 8028; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 8 8029; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB70_4 8030; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_8: # %cond.load7 8031; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 24(a0) 8032; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) 8033; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 8034; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0 8035; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 8036; RV64ZVE32F-ZVFHMIN-NEXT: ret 8037 %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x half> %passthru) 8038 ret <4 x half> %v 8039} 8040 8041define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) { 8042; RV32-LABEL: mgather_truemask_v4f16: 8043; RV32: # %bb.0: 8044; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 8045; RV32-NEXT: vluxei32.v v9, (zero), v8 8046; RV32-NEXT: vmv1r.v v8, v9 8047; RV32-NEXT: ret 8048; 8049; RV64V-LABEL: mgather_truemask_v4f16: 8050; RV64V: # %bb.0: 8051; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 8052; RV64V-NEXT: vluxei64.v v10, (zero), v8 8053; RV64V-NEXT: vmv1r.v v8, v10 8054; RV64V-NEXT: ret 8055; 8056; RV64ZVE32F-ZVFH-LABEL: mgather_truemask_v4f16: 8057; RV64ZVE32F-ZVFH: # %bb.0: 8058; RV64ZVE32F-ZVFH-NEXT: ld a1, 0(a0) 8059; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0) 8060; RV64ZVE32F-ZVFH-NEXT: ld a3, 16(a0) 8061; RV64ZVE32F-ZVFH-NEXT: ld a0, 24(a0) 8062; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a1) 8063; RV64ZVE32F-ZVFH-NEXT: flh fa4, 0(a2) 8064; RV64ZVE32F-ZVFH-NEXT: flh fa3, 0(a3) 8065; RV64ZVE32F-ZVFH-NEXT: flh fa2, 0(a0) 8066; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 8067; RV64ZVE32F-ZVFH-NEXT: vfmv.v.f v8, fa5 8068; RV64ZVE32F-ZVFH-NEXT: vfslide1down.vf v8, v8, fa4 8069; RV64ZVE32F-ZVFH-NEXT: vfslide1down.vf v8, v8, fa3 8070; RV64ZVE32F-ZVFH-NEXT: vfslide1down.vf v8, v8, fa2 8071; RV64ZVE32F-ZVFH-NEXT: ret 8072; 8073; RV64ZVE32F-ZVFHMIN-LABEL: mgather_truemask_v4f16: 8074; RV64ZVE32F-ZVFHMIN: # %bb.0: 8075; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 0(a0) 8076; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0) 8077; RV64ZVE32F-ZVFHMIN-NEXT: ld a3, 16(a0) 8078; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 24(a0) 8079; RV64ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1) 8080; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8081; RV64ZVE32F-ZVFHMIN-NEXT: lh a3, 0(a3) 8082; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) 8083; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 8084; RV64ZVE32F-ZVFHMIN-NEXT: vmv.v.x v8, a1 8085; RV64ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2 8086; RV64ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a3 8087; RV64ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 8088; RV64ZVE32F-ZVFHMIN-NEXT: ret 8089 %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x half> %passthru) 8090 ret <4 x half> %v 8091} 8092 8093define <4 x half> @mgather_falsemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) { 8094; RV32-LABEL: mgather_falsemask_v4f16: 8095; RV32: # %bb.0: 8096; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8097; RV32-NEXT: vmv1r.v v8, v9 8098; RV32-NEXT: ret 8099; 8100; RV64V-LABEL: mgather_falsemask_v4f16: 8101; RV64V: # %bb.0: 8102; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8103; RV64V-NEXT: vmv1r.v v8, v10 8104; RV64V-NEXT: ret 8105; 8106; RV64ZVE32F-LABEL: mgather_falsemask_v4f16: 8107; RV64ZVE32F: # %bb.0: 8108; RV64ZVE32F-NEXT: ret 8109 %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x half> %passthru) 8110 ret <4 x half> %v 8111} 8112 8113declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>) 8114 8115define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passthru) { 8116; RV32-LABEL: mgather_v8f16: 8117; RV32: # %bb.0: 8118; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu 8119; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 8120; RV32-NEXT: vmv.v.v v8, v10 8121; RV32-NEXT: ret 8122; 8123; RV64V-LABEL: mgather_v8f16: 8124; RV64V: # %bb.0: 8125; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu 8126; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t 8127; RV64V-NEXT: vmv.v.v v8, v12 8128; RV64V-NEXT: ret 8129; 8130; RV64ZVE32F-ZVFH-LABEL: mgather_v8f16: 8131; RV64ZVE32F-ZVFH: # %bb.0: 8132; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8133; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 8134; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 8135; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_9 8136; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else 8137; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 8138; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_10 8139; RV64ZVE32F-ZVFH-NEXT: .LBB73_2: # %else2 8140; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 8141; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_11 8142; RV64ZVE32F-ZVFH-NEXT: .LBB73_3: # %else5 8143; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 8144; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_12 8145; RV64ZVE32F-ZVFH-NEXT: .LBB73_4: # %else8 8146; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 8147; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_13 8148; RV64ZVE32F-ZVFH-NEXT: .LBB73_5: # %else11 8149; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 8150; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_14 8151; RV64ZVE32F-ZVFH-NEXT: .LBB73_6: # %else14 8152; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 8153; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_15 8154; RV64ZVE32F-ZVFH-NEXT: .LBB73_7: # %else17 8155; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 8156; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB73_16 8157; RV64ZVE32F-ZVFH-NEXT: .LBB73_8: # %else20 8158; RV64ZVE32F-ZVFH-NEXT: ret 8159; RV64ZVE32F-ZVFH-NEXT: .LBB73_9: # %cond.load 8160; RV64ZVE32F-ZVFH-NEXT: ld a2, 0(a0) 8161; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8162; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma 8163; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 8164; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 8165; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_2 8166; RV64ZVE32F-ZVFH-NEXT: .LBB73_10: # %cond.load1 8167; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0) 8168; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8169; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma 8170; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 8171; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma 8172; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 1 8173; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 8174; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_3 8175; RV64ZVE32F-ZVFH-NEXT: .LBB73_11: # %cond.load4 8176; RV64ZVE32F-ZVFH-NEXT: ld a2, 16(a0) 8177; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8178; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma 8179; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 8180; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 2 8181; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 8182; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_4 8183; RV64ZVE32F-ZVFH-NEXT: .LBB73_12: # %cond.load7 8184; RV64ZVE32F-ZVFH-NEXT: ld a2, 24(a0) 8185; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8186; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma 8187; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 8188; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 3 8189; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 8190; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_5 8191; RV64ZVE32F-ZVFH-NEXT: .LBB73_13: # %cond.load10 8192; RV64ZVE32F-ZVFH-NEXT: ld a2, 32(a0) 8193; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8194; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma 8195; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 8196; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 4 8197; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 8198; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_6 8199; RV64ZVE32F-ZVFH-NEXT: .LBB73_14: # %cond.load13 8200; RV64ZVE32F-ZVFH-NEXT: ld a2, 40(a0) 8201; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8202; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma 8203; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 8204; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 5 8205; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 8206; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_7 8207; RV64ZVE32F-ZVFH-NEXT: .LBB73_15: # %cond.load16 8208; RV64ZVE32F-ZVFH-NEXT: ld a2, 48(a0) 8209; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8210; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma 8211; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 8212; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 6 8213; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 8214; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB73_8 8215; RV64ZVE32F-ZVFH-NEXT: .LBB73_16: # %cond.load19 8216; RV64ZVE32F-ZVFH-NEXT: ld a0, 56(a0) 8217; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) 8218; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 8219; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 8220; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 7 8221; RV64ZVE32F-ZVFH-NEXT: ret 8222; 8223; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v8f16: 8224; RV64ZVE32F-ZVFHMIN: # %bb.0: 8225; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8226; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 8227; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 8228; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_9 8229; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else 8230; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 8231; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_10 8232; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_2: # %else2 8233; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 8234; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_11 8235; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_3: # %else5 8236; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 8237; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_12 8238; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_4: # %else8 8239; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 8240; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_13 8241; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_5: # %else11 8242; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 8243; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_14 8244; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_6: # %else14 8245; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 8246; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_15 8247; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_7: # %else17 8248; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 8249; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_16 8250; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_8: # %else20 8251; RV64ZVE32F-ZVFHMIN-NEXT: ret 8252; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_9: # %cond.load 8253; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 0(a0) 8254; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8255; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma 8256; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 8257; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 8258; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_2 8259; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_10: # %cond.load1 8260; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0) 8261; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8262; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 8263; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 8264; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma 8265; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 1 8266; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 8267; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_3 8268; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_11: # %cond.load4 8269; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 16(a0) 8270; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8271; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma 8272; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 8273; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 2 8274; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 8275; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_4 8276; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_12: # %cond.load7 8277; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 24(a0) 8278; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8279; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma 8280; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 8281; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 8282; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 8283; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_5 8284; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_13: # %cond.load10 8285; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 32(a0) 8286; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8287; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma 8288; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 8289; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 4 8290; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 8291; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_6 8292; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_14: # %cond.load13 8293; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 40(a0) 8294; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8295; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma 8296; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 8297; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 5 8298; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 8299; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_7 8300; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_15: # %cond.load16 8301; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 48(a0) 8302; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8303; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma 8304; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 8305; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 6 8306; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 8307; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_8 8308; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_16: # %cond.load19 8309; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 56(a0) 8310; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) 8311; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 8312; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0 8313; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 7 8314; RV64ZVE32F-ZVFHMIN-NEXT: ret 8315 %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) 8316 ret <8 x half> %v 8317} 8318 8319define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) { 8320; RV32-LABEL: mgather_baseidx_v8i8_v8f16: 8321; RV32: # %bb.0: 8322; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 8323; RV32-NEXT: vsext.vf4 v10, v8 8324; RV32-NEXT: vadd.vv v10, v10, v10 8325; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu 8326; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t 8327; RV32-NEXT: vmv.v.v v8, v9 8328; RV32-NEXT: ret 8329; 8330; RV64V-LABEL: mgather_baseidx_v8i8_v8f16: 8331; RV64V: # %bb.0: 8332; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 8333; RV64V-NEXT: vsext.vf8 v12, v8 8334; RV64V-NEXT: vadd.vv v12, v12, v12 8335; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu 8336; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t 8337; RV64V-NEXT: vmv.v.v v8, v9 8338; RV64V-NEXT: ret 8339; 8340; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_v8i8_v8f16: 8341; RV64ZVE32F-ZVFH: # %bb.0: 8342; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8343; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 8344; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 8345; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_2 8346; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.load 8347; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 8348; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8349; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8350; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8351; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma 8352; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 8353; RV64ZVE32F-ZVFH-NEXT: .LBB74_2: # %else 8354; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 8355; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_4 8356; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.load1 8357; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8358; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1 8359; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 8360; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8361; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8362; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8363; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8364; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 8365; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma 8366; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 1 8367; RV64ZVE32F-ZVFH-NEXT: .LBB74_4: # %else2 8368; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 8369; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4 8370; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 8371; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 8372; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2 8373; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_14 8374; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5 8375; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 8376; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_15 8377; RV64ZVE32F-ZVFH-NEXT: .LBB74_6: # %else8 8378; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 8379; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_16 8380; RV64ZVE32F-ZVFH-NEXT: .LBB74_7: # %else11 8381; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 8382; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_9 8383; RV64ZVE32F-ZVFH-NEXT: .LBB74_8: # %cond.load13 8384; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8385; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1 8386; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 8387; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8388; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8389; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8390; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8391; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 8392; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma 8393; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5 8394; RV64ZVE32F-ZVFH-NEXT: .LBB74_9: # %else14 8395; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 8396; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 8397; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2 8398; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_11 8399; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %cond.load16 8400; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 8401; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8402; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8403; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8404; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8405; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 8406; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma 8407; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6 8408; RV64ZVE32F-ZVFH-NEXT: .LBB74_11: # %else17 8409; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 8410; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB74_13 8411; RV64ZVE32F-ZVFH-NEXT: # %bb.12: # %cond.load19 8412; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8413; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 8414; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8 8415; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 8416; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 8417; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) 8418; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8419; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 8420; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 8421; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7 8422; RV64ZVE32F-ZVFH-NEXT: .LBB74_13: # %else20 8423; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8424; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 8425; RV64ZVE32F-ZVFH-NEXT: ret 8426; RV64ZVE32F-ZVFH-NEXT: .LBB74_14: # %cond.load4 8427; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 8428; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8429; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8430; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8431; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8432; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5 8433; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma 8434; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2 8435; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 8436; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_6 8437; RV64ZVE32F-ZVFH-NEXT: .LBB74_15: # %cond.load7 8438; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8439; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 8440; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 8441; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8442; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8443; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8444; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8445; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 8446; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma 8447; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3 8448; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 8449; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_7 8450; RV64ZVE32F-ZVFH-NEXT: .LBB74_16: # %cond.load10 8451; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8452; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 8453; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8454; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8455; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8456; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma 8457; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 8458; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma 8459; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4 8460; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 8461; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_8 8462; RV64ZVE32F-ZVFH-NEXT: j .LBB74_9 8463; 8464; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8i8_v8f16: 8465; RV64ZVE32F-ZVFHMIN: # %bb.0: 8466; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8467; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 8468; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 8469; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_2 8470; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load 8471; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 8472; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8473; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8474; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8475; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma 8476; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 8477; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_2: # %else 8478; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 8479; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_4 8480; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.load1 8481; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8482; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1 8483; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 8484; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8485; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8486; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8487; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8488; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 8489; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma 8490; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1 8491; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_4: # %else2 8492; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 8493; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4 8494; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 8495; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 8496; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 8497; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_14 8498; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5 8499; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 8500; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_15 8501; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_6: # %else8 8502; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 8503; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_16 8504; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_7: # %else11 8505; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 8506; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_9 8507; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_8: # %cond.load13 8508; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8509; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1 8510; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 8511; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8512; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8513; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8514; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8515; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 8516; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma 8517; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5 8518; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_9: # %else14 8519; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 8520; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 8521; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2 8522; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_11 8523; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %cond.load16 8524; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 8525; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8526; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8527; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8528; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8529; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 8530; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma 8531; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6 8532; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_11: # %else17 8533; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 8534; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB74_13 8535; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.12: # %cond.load19 8536; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8537; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 8538; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 8539; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 8540; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 8541; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) 8542; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8543; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 8544; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 8545; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7 8546; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_13: # %else20 8547; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8548; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 8549; RV64ZVE32F-ZVFHMIN-NEXT: ret 8550; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_14: # %cond.load4 8551; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 8552; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8553; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8554; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8555; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8556; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2 8557; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma 8558; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2 8559; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 8560; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_6 8561; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_15: # %cond.load7 8562; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8563; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 8564; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 8565; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8566; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8567; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8568; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8569; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 8570; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma 8571; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3 8572; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 8573; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_7 8574; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_16: # %cond.load10 8575; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8576; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 8577; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8578; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8579; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8580; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 8581; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 8582; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma 8583; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4 8584; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 8585; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_8 8586; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB74_9 8587 %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs 8588 %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) 8589 ret <8 x half> %v 8590} 8591 8592define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) { 8593; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f16: 8594; RV32: # %bb.0: 8595; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 8596; RV32-NEXT: vsext.vf4 v10, v8 8597; RV32-NEXT: vadd.vv v10, v10, v10 8598; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu 8599; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t 8600; RV32-NEXT: vmv.v.v v8, v9 8601; RV32-NEXT: ret 8602; 8603; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f16: 8604; RV64V: # %bb.0: 8605; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 8606; RV64V-NEXT: vsext.vf8 v12, v8 8607; RV64V-NEXT: vadd.vv v12, v12, v12 8608; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu 8609; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t 8610; RV64V-NEXT: vmv.v.v v8, v9 8611; RV64V-NEXT: ret 8612; 8613; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_sext_v8i8_v8f16: 8614; RV64ZVE32F-ZVFH: # %bb.0: 8615; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8616; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 8617; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 8618; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_2 8619; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.load 8620; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 8621; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8622; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8623; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8624; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma 8625; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 8626; RV64ZVE32F-ZVFH-NEXT: .LBB75_2: # %else 8627; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 8628; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_4 8629; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.load1 8630; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8631; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1 8632; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 8633; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8634; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8635; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8636; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8637; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 8638; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma 8639; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 1 8640; RV64ZVE32F-ZVFH-NEXT: .LBB75_4: # %else2 8641; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 8642; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4 8643; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 8644; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 8645; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2 8646; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_14 8647; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5 8648; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 8649; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_15 8650; RV64ZVE32F-ZVFH-NEXT: .LBB75_6: # %else8 8651; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 8652; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_16 8653; RV64ZVE32F-ZVFH-NEXT: .LBB75_7: # %else11 8654; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 8655; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_9 8656; RV64ZVE32F-ZVFH-NEXT: .LBB75_8: # %cond.load13 8657; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8658; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1 8659; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 8660; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8661; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8662; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8663; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8664; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 8665; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma 8666; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5 8667; RV64ZVE32F-ZVFH-NEXT: .LBB75_9: # %else14 8668; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 8669; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 8670; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2 8671; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_11 8672; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %cond.load16 8673; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 8674; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8675; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8676; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8677; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8678; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 8679; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma 8680; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6 8681; RV64ZVE32F-ZVFH-NEXT: .LBB75_11: # %else17 8682; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 8683; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB75_13 8684; RV64ZVE32F-ZVFH-NEXT: # %bb.12: # %cond.load19 8685; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8686; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 8687; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8 8688; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 8689; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 8690; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) 8691; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8692; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 8693; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 8694; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7 8695; RV64ZVE32F-ZVFH-NEXT: .LBB75_13: # %else20 8696; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8697; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 8698; RV64ZVE32F-ZVFH-NEXT: ret 8699; RV64ZVE32F-ZVFH-NEXT: .LBB75_14: # %cond.load4 8700; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 8701; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8702; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8703; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8704; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8705; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5 8706; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma 8707; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2 8708; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 8709; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_6 8710; RV64ZVE32F-ZVFH-NEXT: .LBB75_15: # %cond.load7 8711; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8712; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 8713; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 8714; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8715; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8716; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8717; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8718; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 8719; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma 8720; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3 8721; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 8722; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_7 8723; RV64ZVE32F-ZVFH-NEXT: .LBB75_16: # %cond.load10 8724; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8725; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 8726; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8727; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8728; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8729; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma 8730; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 8731; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma 8732; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4 8733; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 8734; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_8 8735; RV64ZVE32F-ZVFH-NEXT: j .LBB75_9 8736; 8737; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_sext_v8i8_v8f16: 8738; RV64ZVE32F-ZVFHMIN: # %bb.0: 8739; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8740; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 8741; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 8742; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_2 8743; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load 8744; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 8745; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8746; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8747; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8748; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma 8749; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 8750; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_2: # %else 8751; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 8752; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_4 8753; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.load1 8754; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8755; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1 8756; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 8757; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8758; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8759; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8760; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8761; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 8762; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma 8763; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1 8764; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_4: # %else2 8765; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 8766; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4 8767; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 8768; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 8769; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 8770; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_14 8771; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5 8772; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 8773; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_15 8774; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_6: # %else8 8775; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 8776; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_16 8777; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_7: # %else11 8778; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 8779; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_9 8780; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_8: # %cond.load13 8781; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8782; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1 8783; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 8784; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8785; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8786; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8787; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8788; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 8789; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma 8790; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5 8791; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_9: # %else14 8792; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 8793; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 8794; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2 8795; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_11 8796; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %cond.load16 8797; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 8798; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8799; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8800; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8801; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8802; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 8803; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma 8804; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6 8805; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_11: # %else17 8806; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 8807; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB75_13 8808; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.12: # %cond.load19 8809; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8810; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 8811; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 8812; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 8813; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 8814; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) 8815; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8816; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 8817; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 8818; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7 8819; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_13: # %else20 8820; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8821; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 8822; RV64ZVE32F-ZVFHMIN-NEXT: ret 8823; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_14: # %cond.load4 8824; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 8825; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8826; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8827; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8828; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8829; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2 8830; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma 8831; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2 8832; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 8833; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_6 8834; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_15: # %cond.load7 8835; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8836; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 8837; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 8838; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8839; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8840; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8841; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8842; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 8843; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma 8844; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3 8845; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 8846; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_7 8847; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_16: # %cond.load10 8848; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8849; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 8850; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 8851; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 8852; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 8853; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 8854; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 8855; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma 8856; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4 8857; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 8858; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_8 8859; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB75_9 8860 %eidxs = sext <8 x i8> %idxs to <8 x i16> 8861 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs 8862 %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) 8863 ret <8 x half> %v 8864} 8865 8866define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) { 8867; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f16: 8868; RV32: # %bb.0: 8869; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 8870; RV32-NEXT: vwaddu.vv v10, v8, v8 8871; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu 8872; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t 8873; RV32-NEXT: vmv.v.v v8, v9 8874; RV32-NEXT: ret 8875; 8876; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f16: 8877; RV64V: # %bb.0: 8878; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 8879; RV64V-NEXT: vwaddu.vv v10, v8, v8 8880; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu 8881; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t 8882; RV64V-NEXT: vmv.v.v v8, v9 8883; RV64V-NEXT: ret 8884; 8885; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_zext_v8i8_v8f16: 8886; RV64ZVE32F-ZVFH: # %bb.0: 8887; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8888; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 8889; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 8890; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_2 8891; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.load 8892; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 8893; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 8894; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8895; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8896; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8897; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma 8898; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 8899; RV64ZVE32F-ZVFH-NEXT: .LBB76_2: # %else 8900; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 8901; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_4 8902; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.load1 8903; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8904; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1 8905; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 8906; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 8907; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8908; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8909; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8910; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8911; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 8912; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma 8913; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 1 8914; RV64ZVE32F-ZVFH-NEXT: .LBB76_4: # %else2 8915; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 8916; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4 8917; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 8918; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 8919; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2 8920; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_14 8921; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5 8922; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 8923; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_15 8924; RV64ZVE32F-ZVFH-NEXT: .LBB76_6: # %else8 8925; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 8926; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_16 8927; RV64ZVE32F-ZVFH-NEXT: .LBB76_7: # %else11 8928; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 8929; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_9 8930; RV64ZVE32F-ZVFH-NEXT: .LBB76_8: # %cond.load13 8931; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8932; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1 8933; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 8934; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 8935; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8936; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8937; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8938; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8939; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 8940; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma 8941; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5 8942; RV64ZVE32F-ZVFH-NEXT: .LBB76_9: # %else14 8943; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 8944; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 8945; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2 8946; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_11 8947; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %cond.load16 8948; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 8949; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 8950; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8951; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8952; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8953; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8954; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 8955; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma 8956; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6 8957; RV64ZVE32F-ZVFH-NEXT: .LBB76_11: # %else17 8958; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 8959; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB76_13 8960; RV64ZVE32F-ZVFH-NEXT: # %bb.12: # %cond.load19 8961; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8962; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 8963; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8 8964; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 255 8965; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 8966; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 8967; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) 8968; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8969; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 8970; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 8971; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7 8972; RV64ZVE32F-ZVFH-NEXT: .LBB76_13: # %else20 8973; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 8974; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 8975; RV64ZVE32F-ZVFH-NEXT: ret 8976; RV64ZVE32F-ZVFH-NEXT: .LBB76_14: # %cond.load4 8977; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 8978; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 8979; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8980; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8981; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8982; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8983; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5 8984; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma 8985; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2 8986; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 8987; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_6 8988; RV64ZVE32F-ZVFH-NEXT: .LBB76_15: # %cond.load7 8989; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 8990; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 8991; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 8992; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 8993; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 8994; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 8995; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 8996; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 8997; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 8998; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma 8999; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3 9000; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 9001; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_7 9002; RV64ZVE32F-ZVFH-NEXT: .LBB76_16: # %cond.load10 9003; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9004; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 9005; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 9006; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 9007; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 9008; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 9009; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma 9010; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 9011; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma 9012; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4 9013; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 9014; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_8 9015; RV64ZVE32F-ZVFH-NEXT: j .LBB76_9 9016; 9017; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_zext_v8i8_v8f16: 9018; RV64ZVE32F-ZVFHMIN: # %bb.0: 9019; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9020; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 9021; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 9022; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_2 9023; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load 9024; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 9025; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 9026; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 9027; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 9028; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 9029; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma 9030; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 9031; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_2: # %else 9032; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 9033; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_4 9034; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.load1 9035; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 9036; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1 9037; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 9038; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 9039; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 9040; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 9041; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 9042; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 9043; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 9044; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma 9045; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1 9046; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_4: # %else2 9047; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 9048; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4 9049; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 9050; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 9051; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 9052; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_14 9053; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5 9054; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 9055; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_15 9056; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_6: # %else8 9057; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 9058; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_16 9059; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_7: # %else11 9060; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 9061; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_9 9062; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_8: # %cond.load13 9063; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 9064; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1 9065; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 9066; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 9067; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 9068; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 9069; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 9070; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 9071; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 9072; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma 9073; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5 9074; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_9: # %else14 9075; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 9076; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 9077; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2 9078; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_11 9079; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %cond.load16 9080; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 9081; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 9082; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 9083; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 9084; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 9085; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 9086; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 9087; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma 9088; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6 9089; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_11: # %else17 9090; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 9091; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB76_13 9092; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.12: # %cond.load19 9093; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 9094; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 9095; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 9096; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 255 9097; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 9098; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 9099; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) 9100; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 9101; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 9102; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 9103; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7 9104; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_13: # %else20 9105; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9106; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 9107; RV64ZVE32F-ZVFHMIN-NEXT: ret 9108; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_14: # %cond.load4 9109; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 9110; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 9111; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 9112; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 9113; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 9114; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 9115; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2 9116; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma 9117; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2 9118; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 9119; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_6 9120; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_15: # %cond.load7 9121; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 9122; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 9123; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 9124; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 9125; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 9126; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 9127; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 9128; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 9129; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 9130; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma 9131; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3 9132; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 9133; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_7 9134; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_16: # %cond.load10 9135; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9136; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 9137; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 9138; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 9139; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 9140; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 9141; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 9142; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 9143; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma 9144; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4 9145; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 9146; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_8 9147; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB76_9 9148 %eidxs = zext <8 x i8> %idxs to <8 x i16> 9149 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs 9150 %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) 9151 ret <8 x half> %v 9152} 9153 9154define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x half> %passthru) { 9155; RV32-LABEL: mgather_baseidx_v8f16: 9156; RV32: # %bb.0: 9157; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu 9158; RV32-NEXT: vwadd.vv v10, v8, v8 9159; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t 9160; RV32-NEXT: vmv.v.v v8, v9 9161; RV32-NEXT: ret 9162; 9163; RV64V-LABEL: mgather_baseidx_v8f16: 9164; RV64V: # %bb.0: 9165; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 9166; RV64V-NEXT: vsext.vf4 v12, v8 9167; RV64V-NEXT: vadd.vv v12, v12, v12 9168; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu 9169; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t 9170; RV64V-NEXT: vmv.v.v v8, v9 9171; RV64V-NEXT: ret 9172; 9173; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_v8f16: 9174; RV64ZVE32F-ZVFH: # %bb.0: 9175; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9176; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0 9177; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1 9178; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_2 9179; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.load 9180; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma 9181; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 9182; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 9183; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 9184; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 9185; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5 9186; RV64ZVE32F-ZVFH-NEXT: .LBB77_2: # %else 9187; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2 9188; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_4 9189; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.load1 9190; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9191; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1 9192; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 9193; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 9194; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 9195; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 9196; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 9197; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma 9198; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 1 9199; RV64ZVE32F-ZVFH-NEXT: .LBB77_4: # %else2 9200; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, ta, ma 9201; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4 9202; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 9203; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 9204; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2 9205; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_14 9206; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5 9207; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 9208; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_15 9209; RV64ZVE32F-ZVFH-NEXT: .LBB77_6: # %else8 9210; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 9211; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_16 9212; RV64ZVE32F-ZVFH-NEXT: .LBB77_7: # %else11 9213; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 9214; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_9 9215; RV64ZVE32F-ZVFH-NEXT: .LBB77_8: # %cond.load13 9216; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9217; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1 9218; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 9219; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 9220; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 9221; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 9222; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 9223; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma 9224; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5 9225; RV64ZVE32F-ZVFH-NEXT: .LBB77_9: # %else14 9226; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 9227; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 9228; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2 9229; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_11 9230; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %cond.load16 9231; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 9232; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 9233; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 9234; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 9235; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 9236; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma 9237; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6 9238; RV64ZVE32F-ZVFH-NEXT: .LBB77_11: # %else17 9239; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 9240; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB77_13 9241; RV64ZVE32F-ZVFH-NEXT: # %bb.12: # %cond.load19 9242; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9243; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 9244; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8 9245; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 9246; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 9247; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) 9248; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 9249; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 9250; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7 9251; RV64ZVE32F-ZVFH-NEXT: .LBB77_13: # %else20 9252; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9253; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 9254; RV64ZVE32F-ZVFH-NEXT: ret 9255; RV64ZVE32F-ZVFH-NEXT: .LBB77_14: # %cond.load4 9256; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 9257; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 9258; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 9259; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 9260; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5 9261; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma 9262; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2 9263; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 9264; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_6 9265; RV64ZVE32F-ZVFH-NEXT: .LBB77_15: # %cond.load7 9266; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9267; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 9268; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 9269; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 9270; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 9271; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 9272; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 9273; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma 9274; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3 9275; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 9276; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_7 9277; RV64ZVE32F-ZVFH-NEXT: .LBB77_16: # %cond.load10 9278; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma 9279; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 9280; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 9281; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 9282; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) 9283; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 9284; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4 9285; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 9286; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_8 9287; RV64ZVE32F-ZVFH-NEXT: j .LBB77_9 9288; 9289; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8f16: 9290; RV64ZVE32F-ZVFHMIN: # %bb.0: 9291; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9292; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0 9293; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1 9294; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_2 9295; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load 9296; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma 9297; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 9298; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 9299; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 9300; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 9301; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2 9302; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_2: # %else 9303; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2 9304; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_4 9305; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.load1 9306; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9307; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1 9308; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 9309; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 9310; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 9311; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 9312; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 9313; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma 9314; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1 9315; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_4: # %else2 9316; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, ta, ma 9317; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4 9318; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 9319; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 9320; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 9321; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_14 9322; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5 9323; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 9324; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_15 9325; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_6: # %else8 9326; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 9327; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_16 9328; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_7: # %else11 9329; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 9330; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_9 9331; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_8: # %cond.load13 9332; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9333; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1 9334; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 9335; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 9336; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 9337; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 9338; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 9339; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma 9340; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5 9341; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_9: # %else14 9342; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 9343; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 9344; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2 9345; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_11 9346; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %cond.load16 9347; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 9348; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 9349; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 9350; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 9351; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 9352; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma 9353; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6 9354; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_11: # %else17 9355; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 9356; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB77_13 9357; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.12: # %cond.load19 9358; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9359; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 9360; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 9361; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 9362; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 9363; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) 9364; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 9365; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 9366; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7 9367; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_13: # %else20 9368; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9369; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 9370; RV64ZVE32F-ZVFHMIN-NEXT: ret 9371; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_14: # %cond.load4 9372; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 9373; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 9374; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 9375; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 9376; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2 9377; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma 9378; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2 9379; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 9380; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_6 9381; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_15: # %cond.load7 9382; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 9383; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 9384; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 9385; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 9386; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 9387; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 9388; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 9389; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma 9390; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3 9391; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 9392; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_7 9393; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_16: # %cond.load10 9394; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma 9395; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 9396; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 9397; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 9398; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) 9399; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2 9400; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4 9401; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 9402; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_8 9403; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB77_9 9404 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs 9405 %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) 9406 ret <8 x half> %v 9407} 9408 9409declare <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x float>) 9410 9411define <1 x float> @mgather_v1f32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x float> %passthru) { 9412; RV32V-LABEL: mgather_v1f32: 9413; RV32V: # %bb.0: 9414; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu 9415; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 9416; RV32V-NEXT: vmv1r.v v8, v9 9417; RV32V-NEXT: ret 9418; 9419; RV64V-LABEL: mgather_v1f32: 9420; RV64V: # %bb.0: 9421; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu 9422; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 9423; RV64V-NEXT: vmv1r.v v8, v9 9424; RV64V-NEXT: ret 9425; 9426; RV32ZVE32F-LABEL: mgather_v1f32: 9427; RV32ZVE32F: # %bb.0: 9428; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu 9429; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 9430; RV32ZVE32F-NEXT: vmv.v.v v8, v9 9431; RV32ZVE32F-NEXT: ret 9432; 9433; RV64ZVE32F-LABEL: mgather_v1f32: 9434; RV64ZVE32F: # %bb.0: 9435; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 9436; RV64ZVE32F-NEXT: vfirst.m a1, v0 9437; RV64ZVE32F-NEXT: bnez a1, .LBB78_2 9438; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 9439; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 9440; RV64ZVE32F-NEXT: vle32.v v8, (a0) 9441; RV64ZVE32F-NEXT: .LBB78_2: # %else 9442; RV64ZVE32F-NEXT: ret 9443 %v = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x float> %passthru) 9444 ret <1 x float> %v 9445} 9446 9447declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>) 9448 9449define <2 x float> @mgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x float> %passthru) { 9450; RV32V-LABEL: mgather_v2f32: 9451; RV32V: # %bb.0: 9452; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 9453; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 9454; RV32V-NEXT: vmv1r.v v8, v9 9455; RV32V-NEXT: ret 9456; 9457; RV64V-LABEL: mgather_v2f32: 9458; RV64V: # %bb.0: 9459; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 9460; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 9461; RV64V-NEXT: vmv1r.v v8, v9 9462; RV64V-NEXT: ret 9463; 9464; RV32ZVE32F-LABEL: mgather_v2f32: 9465; RV32ZVE32F: # %bb.0: 9466; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu 9467; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t 9468; RV32ZVE32F-NEXT: vmv.v.v v8, v9 9469; RV32ZVE32F-NEXT: ret 9470; 9471; RV64ZVE32F-LABEL: mgather_v2f32: 9472; RV64ZVE32F: # %bb.0: 9473; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9474; RV64ZVE32F-NEXT: vmv.x.s a2, v0 9475; RV64ZVE32F-NEXT: andi a3, a2, 1 9476; RV64ZVE32F-NEXT: bnez a3, .LBB79_3 9477; RV64ZVE32F-NEXT: # %bb.1: # %else 9478; RV64ZVE32F-NEXT: andi a2, a2, 2 9479; RV64ZVE32F-NEXT: bnez a2, .LBB79_4 9480; RV64ZVE32F-NEXT: .LBB79_2: # %else2 9481; RV64ZVE32F-NEXT: ret 9482; RV64ZVE32F-NEXT: .LBB79_3: # %cond.load 9483; RV64ZVE32F-NEXT: flw fa5, 0(a0) 9484; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 9485; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 9486; RV64ZVE32F-NEXT: andi a2, a2, 2 9487; RV64ZVE32F-NEXT: beqz a2, .LBB79_2 9488; RV64ZVE32F-NEXT: .LBB79_4: # %cond.load1 9489; RV64ZVE32F-NEXT: flw fa5, 0(a1) 9490; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 9491; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 9492; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 9493; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 9494; RV64ZVE32F-NEXT: ret 9495 %v = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x float> %passthru) 9496 ret <2 x float> %v 9497} 9498 9499declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>) 9500 9501define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %passthru) { 9502; RV32-LABEL: mgather_v4f32: 9503; RV32: # %bb.0: 9504; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu 9505; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 9506; RV32-NEXT: vmv.v.v v8, v9 9507; RV32-NEXT: ret 9508; 9509; RV64V-LABEL: mgather_v4f32: 9510; RV64V: # %bb.0: 9511; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, mu 9512; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t 9513; RV64V-NEXT: vmv.v.v v8, v10 9514; RV64V-NEXT: ret 9515; 9516; RV64ZVE32F-LABEL: mgather_v4f32: 9517; RV64ZVE32F: # %bb.0: 9518; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9519; RV64ZVE32F-NEXT: vmv.x.s a1, v0 9520; RV64ZVE32F-NEXT: andi a2, a1, 1 9521; RV64ZVE32F-NEXT: bnez a2, .LBB80_5 9522; RV64ZVE32F-NEXT: # %bb.1: # %else 9523; RV64ZVE32F-NEXT: andi a2, a1, 2 9524; RV64ZVE32F-NEXT: bnez a2, .LBB80_6 9525; RV64ZVE32F-NEXT: .LBB80_2: # %else2 9526; RV64ZVE32F-NEXT: andi a2, a1, 4 9527; RV64ZVE32F-NEXT: bnez a2, .LBB80_7 9528; RV64ZVE32F-NEXT: .LBB80_3: # %else5 9529; RV64ZVE32F-NEXT: andi a1, a1, 8 9530; RV64ZVE32F-NEXT: bnez a1, .LBB80_8 9531; RV64ZVE32F-NEXT: .LBB80_4: # %else8 9532; RV64ZVE32F-NEXT: ret 9533; RV64ZVE32F-NEXT: .LBB80_5: # %cond.load 9534; RV64ZVE32F-NEXT: ld a2, 0(a0) 9535; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9536; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 9537; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 9538; RV64ZVE32F-NEXT: andi a2, a1, 2 9539; RV64ZVE32F-NEXT: beqz a2, .LBB80_2 9540; RV64ZVE32F-NEXT: .LBB80_6: # %cond.load1 9541; RV64ZVE32F-NEXT: ld a2, 8(a0) 9542; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9543; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 9544; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 9545; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 9546; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 9547; RV64ZVE32F-NEXT: andi a2, a1, 4 9548; RV64ZVE32F-NEXT: beqz a2, .LBB80_3 9549; RV64ZVE32F-NEXT: .LBB80_7: # %cond.load4 9550; RV64ZVE32F-NEXT: ld a2, 16(a0) 9551; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9552; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 9553; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 9554; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 9555; RV64ZVE32F-NEXT: andi a1, a1, 8 9556; RV64ZVE32F-NEXT: beqz a1, .LBB80_4 9557; RV64ZVE32F-NEXT: .LBB80_8: # %cond.load7 9558; RV64ZVE32F-NEXT: ld a0, 24(a0) 9559; RV64ZVE32F-NEXT: flw fa5, 0(a0) 9560; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma 9561; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 9562; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 9563; RV64ZVE32F-NEXT: ret 9564 %v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x float> %passthru) 9565 ret <4 x float> %v 9566} 9567 9568define <4 x float> @mgather_truemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthru) { 9569; RV32-LABEL: mgather_truemask_v4f32: 9570; RV32: # %bb.0: 9571; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 9572; RV32-NEXT: vluxei32.v v8, (zero), v8 9573; RV32-NEXT: ret 9574; 9575; RV64V-LABEL: mgather_truemask_v4f32: 9576; RV64V: # %bb.0: 9577; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma 9578; RV64V-NEXT: vluxei64.v v10, (zero), v8 9579; RV64V-NEXT: vmv.v.v v8, v10 9580; RV64V-NEXT: ret 9581; 9582; RV64ZVE32F-LABEL: mgather_truemask_v4f32: 9583; RV64ZVE32F: # %bb.0: 9584; RV64ZVE32F-NEXT: ld a1, 0(a0) 9585; RV64ZVE32F-NEXT: ld a2, 8(a0) 9586; RV64ZVE32F-NEXT: ld a3, 16(a0) 9587; RV64ZVE32F-NEXT: ld a0, 24(a0) 9588; RV64ZVE32F-NEXT: flw fa5, 0(a1) 9589; RV64ZVE32F-NEXT: flw fa4, 0(a2) 9590; RV64ZVE32F-NEXT: flw fa3, 0(a3) 9591; RV64ZVE32F-NEXT: flw fa2, 0(a0) 9592; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma 9593; RV64ZVE32F-NEXT: vfmv.v.f v8, fa5 9594; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa4 9595; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa3 9596; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa2 9597; RV64ZVE32F-NEXT: ret 9598 %v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1), <4 x float> %passthru) 9599 ret <4 x float> %v 9600} 9601 9602define <4 x float> @mgather_falsemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthru) { 9603; RV32-LABEL: mgather_falsemask_v4f32: 9604; RV32: # %bb.0: 9605; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9606; RV32-NEXT: vmv1r.v v8, v9 9607; RV32-NEXT: ret 9608; 9609; RV64V-LABEL: mgather_falsemask_v4f32: 9610; RV64V: # %bb.0: 9611; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9612; RV64V-NEXT: vmv1r.v v8, v10 9613; RV64V-NEXT: ret 9614; 9615; RV64ZVE32F-LABEL: mgather_falsemask_v4f32: 9616; RV64ZVE32F: # %bb.0: 9617; RV64ZVE32F-NEXT: ret 9618 %v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer, <4 x float> %passthru) 9619 ret <4 x float> %v 9620} 9621 9622declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>) 9623 9624define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %passthru) { 9625; RV32-LABEL: mgather_v8f32: 9626; RV32: # %bb.0: 9627; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu 9628; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 9629; RV32-NEXT: vmv.v.v v8, v10 9630; RV32-NEXT: ret 9631; 9632; RV64V-LABEL: mgather_v8f32: 9633; RV64V: # %bb.0: 9634; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu 9635; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t 9636; RV64V-NEXT: vmv.v.v v8, v12 9637; RV64V-NEXT: ret 9638; 9639; RV64ZVE32F-LABEL: mgather_v8f32: 9640; RV64ZVE32F: # %bb.0: 9641; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9642; RV64ZVE32F-NEXT: vmv.x.s a1, v0 9643; RV64ZVE32F-NEXT: andi a2, a1, 1 9644; RV64ZVE32F-NEXT: bnez a2, .LBB83_9 9645; RV64ZVE32F-NEXT: # %bb.1: # %else 9646; RV64ZVE32F-NEXT: andi a2, a1, 2 9647; RV64ZVE32F-NEXT: bnez a2, .LBB83_10 9648; RV64ZVE32F-NEXT: .LBB83_2: # %else2 9649; RV64ZVE32F-NEXT: andi a2, a1, 4 9650; RV64ZVE32F-NEXT: bnez a2, .LBB83_11 9651; RV64ZVE32F-NEXT: .LBB83_3: # %else5 9652; RV64ZVE32F-NEXT: andi a2, a1, 8 9653; RV64ZVE32F-NEXT: bnez a2, .LBB83_12 9654; RV64ZVE32F-NEXT: .LBB83_4: # %else8 9655; RV64ZVE32F-NEXT: andi a2, a1, 16 9656; RV64ZVE32F-NEXT: bnez a2, .LBB83_13 9657; RV64ZVE32F-NEXT: .LBB83_5: # %else11 9658; RV64ZVE32F-NEXT: andi a2, a1, 32 9659; RV64ZVE32F-NEXT: bnez a2, .LBB83_14 9660; RV64ZVE32F-NEXT: .LBB83_6: # %else14 9661; RV64ZVE32F-NEXT: andi a2, a1, 64 9662; RV64ZVE32F-NEXT: bnez a2, .LBB83_15 9663; RV64ZVE32F-NEXT: .LBB83_7: # %else17 9664; RV64ZVE32F-NEXT: andi a1, a1, -128 9665; RV64ZVE32F-NEXT: bnez a1, .LBB83_16 9666; RV64ZVE32F-NEXT: .LBB83_8: # %else20 9667; RV64ZVE32F-NEXT: ret 9668; RV64ZVE32F-NEXT: .LBB83_9: # %cond.load 9669; RV64ZVE32F-NEXT: ld a2, 0(a0) 9670; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9671; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 9672; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 9673; RV64ZVE32F-NEXT: andi a2, a1, 2 9674; RV64ZVE32F-NEXT: beqz a2, .LBB83_2 9675; RV64ZVE32F-NEXT: .LBB83_10: # %cond.load1 9676; RV64ZVE32F-NEXT: ld a2, 8(a0) 9677; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9678; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 9679; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 9680; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 9681; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 9682; RV64ZVE32F-NEXT: andi a2, a1, 4 9683; RV64ZVE32F-NEXT: beqz a2, .LBB83_3 9684; RV64ZVE32F-NEXT: .LBB83_11: # %cond.load4 9685; RV64ZVE32F-NEXT: ld a2, 16(a0) 9686; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9687; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 9688; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 9689; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2 9690; RV64ZVE32F-NEXT: andi a2, a1, 8 9691; RV64ZVE32F-NEXT: beqz a2, .LBB83_4 9692; RV64ZVE32F-NEXT: .LBB83_12: # %cond.load7 9693; RV64ZVE32F-NEXT: ld a2, 24(a0) 9694; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9695; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma 9696; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 9697; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3 9698; RV64ZVE32F-NEXT: andi a2, a1, 16 9699; RV64ZVE32F-NEXT: beqz a2, .LBB83_5 9700; RV64ZVE32F-NEXT: .LBB83_13: # %cond.load10 9701; RV64ZVE32F-NEXT: ld a2, 32(a0) 9702; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9703; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma 9704; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 9705; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4 9706; RV64ZVE32F-NEXT: andi a2, a1, 32 9707; RV64ZVE32F-NEXT: beqz a2, .LBB83_6 9708; RV64ZVE32F-NEXT: .LBB83_14: # %cond.load13 9709; RV64ZVE32F-NEXT: ld a2, 40(a0) 9710; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9711; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma 9712; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 9713; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5 9714; RV64ZVE32F-NEXT: andi a2, a1, 64 9715; RV64ZVE32F-NEXT: beqz a2, .LBB83_7 9716; RV64ZVE32F-NEXT: .LBB83_15: # %cond.load16 9717; RV64ZVE32F-NEXT: ld a2, 48(a0) 9718; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9719; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma 9720; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 9721; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6 9722; RV64ZVE32F-NEXT: andi a1, a1, -128 9723; RV64ZVE32F-NEXT: beqz a1, .LBB83_8 9724; RV64ZVE32F-NEXT: .LBB83_16: # %cond.load19 9725; RV64ZVE32F-NEXT: ld a0, 56(a0) 9726; RV64ZVE32F-NEXT: flw fa5, 0(a0) 9727; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 9728; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 9729; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 7 9730; RV64ZVE32F-NEXT: ret 9731 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru) 9732 ret <8 x float> %v 9733} 9734 9735define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) { 9736; RV32-LABEL: mgather_baseidx_v8i8_v8f32: 9737; RV32: # %bb.0: 9738; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu 9739; RV32-NEXT: vsext.vf4 v12, v8 9740; RV32-NEXT: vsll.vi v8, v12, 2 9741; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t 9742; RV32-NEXT: vmv.v.v v8, v10 9743; RV32-NEXT: ret 9744; 9745; RV64V-LABEL: mgather_baseidx_v8i8_v8f32: 9746; RV64V: # %bb.0: 9747; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 9748; RV64V-NEXT: vsext.vf8 v12, v8 9749; RV64V-NEXT: vsll.vi v12, v12, 2 9750; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu 9751; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t 9752; RV64V-NEXT: vmv.v.v v8, v10 9753; RV64V-NEXT: ret 9754; 9755; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f32: 9756; RV64ZVE32F: # %bb.0: 9757; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9758; RV64ZVE32F-NEXT: vmv.x.s a1, v0 9759; RV64ZVE32F-NEXT: andi a2, a1, 1 9760; RV64ZVE32F-NEXT: beqz a2, .LBB84_2 9761; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 9762; RV64ZVE32F-NEXT: vmv.x.s a2, v8 9763; RV64ZVE32F-NEXT: slli a2, a2, 2 9764; RV64ZVE32F-NEXT: add a2, a0, a2 9765; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9766; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 9767; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 9768; RV64ZVE32F-NEXT: .LBB84_2: # %else 9769; RV64ZVE32F-NEXT: andi a2, a1, 2 9770; RV64ZVE32F-NEXT: beqz a2, .LBB84_4 9771; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 9772; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 9773; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 9774; RV64ZVE32F-NEXT: vmv.x.s a2, v9 9775; RV64ZVE32F-NEXT: slli a2, a2, 2 9776; RV64ZVE32F-NEXT: add a2, a0, a2 9777; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9778; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9779; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 9780; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 9781; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 9782; RV64ZVE32F-NEXT: .LBB84_4: # %else2 9783; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 9784; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 9785; RV64ZVE32F-NEXT: andi a2, a1, 4 9786; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 9787; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 9788; RV64ZVE32F-NEXT: bnez a2, .LBB84_14 9789; RV64ZVE32F-NEXT: # %bb.5: # %else5 9790; RV64ZVE32F-NEXT: andi a2, a1, 8 9791; RV64ZVE32F-NEXT: bnez a2, .LBB84_15 9792; RV64ZVE32F-NEXT: .LBB84_6: # %else8 9793; RV64ZVE32F-NEXT: andi a2, a1, 16 9794; RV64ZVE32F-NEXT: bnez a2, .LBB84_16 9795; RV64ZVE32F-NEXT: .LBB84_7: # %else11 9796; RV64ZVE32F-NEXT: andi a2, a1, 32 9797; RV64ZVE32F-NEXT: beqz a2, .LBB84_9 9798; RV64ZVE32F-NEXT: .LBB84_8: # %cond.load13 9799; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 9800; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 9801; RV64ZVE32F-NEXT: vmv.x.s a2, v8 9802; RV64ZVE32F-NEXT: slli a2, a2, 2 9803; RV64ZVE32F-NEXT: add a2, a0, a2 9804; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9805; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9806; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 9807; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma 9808; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 9809; RV64ZVE32F-NEXT: .LBB84_9: # %else14 9810; RV64ZVE32F-NEXT: andi a2, a1, 64 9811; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 9812; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 9813; RV64ZVE32F-NEXT: beqz a2, .LBB84_11 9814; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 9815; RV64ZVE32F-NEXT: vmv.x.s a2, v8 9816; RV64ZVE32F-NEXT: slli a2, a2, 2 9817; RV64ZVE32F-NEXT: add a2, a0, a2 9818; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9819; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9820; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 9821; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma 9822; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 9823; RV64ZVE32F-NEXT: .LBB84_11: # %else17 9824; RV64ZVE32F-NEXT: andi a1, a1, -128 9825; RV64ZVE32F-NEXT: beqz a1, .LBB84_13 9826; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 9827; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 9828; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 9829; RV64ZVE32F-NEXT: vmv.x.s a1, v8 9830; RV64ZVE32F-NEXT: slli a1, a1, 2 9831; RV64ZVE32F-NEXT: add a0, a0, a1 9832; RV64ZVE32F-NEXT: flw fa5, 0(a0) 9833; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9834; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 9835; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 9836; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 9837; RV64ZVE32F-NEXT: .LBB84_13: # %else20 9838; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9839; RV64ZVE32F-NEXT: vmv2r.v v8, v10 9840; RV64ZVE32F-NEXT: ret 9841; RV64ZVE32F-NEXT: .LBB84_14: # %cond.load4 9842; RV64ZVE32F-NEXT: vmv.x.s a2, v8 9843; RV64ZVE32F-NEXT: slli a2, a2, 2 9844; RV64ZVE32F-NEXT: add a2, a0, a2 9845; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9846; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9847; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 9848; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 9849; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 9850; RV64ZVE32F-NEXT: andi a2, a1, 8 9851; RV64ZVE32F-NEXT: beqz a2, .LBB84_6 9852; RV64ZVE32F-NEXT: .LBB84_15: # %cond.load7 9853; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 9854; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 9855; RV64ZVE32F-NEXT: vmv.x.s a2, v8 9856; RV64ZVE32F-NEXT: slli a2, a2, 2 9857; RV64ZVE32F-NEXT: add a2, a0, a2 9858; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9859; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9860; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 9861; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma 9862; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 9863; RV64ZVE32F-NEXT: andi a2, a1, 16 9864; RV64ZVE32F-NEXT: beqz a2, .LBB84_7 9865; RV64ZVE32F-NEXT: .LBB84_16: # %cond.load10 9866; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9867; RV64ZVE32F-NEXT: vmv.x.s a2, v9 9868; RV64ZVE32F-NEXT: slli a2, a2, 2 9869; RV64ZVE32F-NEXT: add a2, a0, a2 9870; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9871; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 9872; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 9873; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma 9874; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 9875; RV64ZVE32F-NEXT: andi a2, a1, 32 9876; RV64ZVE32F-NEXT: bnez a2, .LBB84_8 9877; RV64ZVE32F-NEXT: j .LBB84_9 9878 %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs 9879 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru) 9880 ret <8 x float> %v 9881} 9882 9883define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) { 9884; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f32: 9885; RV32: # %bb.0: 9886; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu 9887; RV32-NEXT: vsext.vf4 v12, v8 9888; RV32-NEXT: vsll.vi v8, v12, 2 9889; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t 9890; RV32-NEXT: vmv.v.v v8, v10 9891; RV32-NEXT: ret 9892; 9893; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f32: 9894; RV64V: # %bb.0: 9895; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 9896; RV64V-NEXT: vsext.vf8 v12, v8 9897; RV64V-NEXT: vsll.vi v12, v12, 2 9898; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu 9899; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t 9900; RV64V-NEXT: vmv.v.v v8, v10 9901; RV64V-NEXT: ret 9902; 9903; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f32: 9904; RV64ZVE32F: # %bb.0: 9905; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9906; RV64ZVE32F-NEXT: vmv.x.s a1, v0 9907; RV64ZVE32F-NEXT: andi a2, a1, 1 9908; RV64ZVE32F-NEXT: beqz a2, .LBB85_2 9909; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 9910; RV64ZVE32F-NEXT: vmv.x.s a2, v8 9911; RV64ZVE32F-NEXT: slli a2, a2, 2 9912; RV64ZVE32F-NEXT: add a2, a0, a2 9913; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9914; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 9915; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 9916; RV64ZVE32F-NEXT: .LBB85_2: # %else 9917; RV64ZVE32F-NEXT: andi a2, a1, 2 9918; RV64ZVE32F-NEXT: beqz a2, .LBB85_4 9919; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 9920; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 9921; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 9922; RV64ZVE32F-NEXT: vmv.x.s a2, v9 9923; RV64ZVE32F-NEXT: slli a2, a2, 2 9924; RV64ZVE32F-NEXT: add a2, a0, a2 9925; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9926; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9927; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 9928; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 9929; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 9930; RV64ZVE32F-NEXT: .LBB85_4: # %else2 9931; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 9932; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 9933; RV64ZVE32F-NEXT: andi a2, a1, 4 9934; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 9935; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 9936; RV64ZVE32F-NEXT: bnez a2, .LBB85_14 9937; RV64ZVE32F-NEXT: # %bb.5: # %else5 9938; RV64ZVE32F-NEXT: andi a2, a1, 8 9939; RV64ZVE32F-NEXT: bnez a2, .LBB85_15 9940; RV64ZVE32F-NEXT: .LBB85_6: # %else8 9941; RV64ZVE32F-NEXT: andi a2, a1, 16 9942; RV64ZVE32F-NEXT: bnez a2, .LBB85_16 9943; RV64ZVE32F-NEXT: .LBB85_7: # %else11 9944; RV64ZVE32F-NEXT: andi a2, a1, 32 9945; RV64ZVE32F-NEXT: beqz a2, .LBB85_9 9946; RV64ZVE32F-NEXT: .LBB85_8: # %cond.load13 9947; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 9948; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 9949; RV64ZVE32F-NEXT: vmv.x.s a2, v8 9950; RV64ZVE32F-NEXT: slli a2, a2, 2 9951; RV64ZVE32F-NEXT: add a2, a0, a2 9952; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9953; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9954; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 9955; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma 9956; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 9957; RV64ZVE32F-NEXT: .LBB85_9: # %else14 9958; RV64ZVE32F-NEXT: andi a2, a1, 64 9959; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 9960; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 9961; RV64ZVE32F-NEXT: beqz a2, .LBB85_11 9962; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 9963; RV64ZVE32F-NEXT: vmv.x.s a2, v8 9964; RV64ZVE32F-NEXT: slli a2, a2, 2 9965; RV64ZVE32F-NEXT: add a2, a0, a2 9966; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9967; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9968; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 9969; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma 9970; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 9971; RV64ZVE32F-NEXT: .LBB85_11: # %else17 9972; RV64ZVE32F-NEXT: andi a1, a1, -128 9973; RV64ZVE32F-NEXT: beqz a1, .LBB85_13 9974; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 9975; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 9976; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 9977; RV64ZVE32F-NEXT: vmv.x.s a1, v8 9978; RV64ZVE32F-NEXT: slli a1, a1, 2 9979; RV64ZVE32F-NEXT: add a0, a0, a1 9980; RV64ZVE32F-NEXT: flw fa5, 0(a0) 9981; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9982; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 9983; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 9984; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 9985; RV64ZVE32F-NEXT: .LBB85_13: # %else20 9986; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 9987; RV64ZVE32F-NEXT: vmv2r.v v8, v10 9988; RV64ZVE32F-NEXT: ret 9989; RV64ZVE32F-NEXT: .LBB85_14: # %cond.load4 9990; RV64ZVE32F-NEXT: vmv.x.s a2, v8 9991; RV64ZVE32F-NEXT: slli a2, a2, 2 9992; RV64ZVE32F-NEXT: add a2, a0, a2 9993; RV64ZVE32F-NEXT: flw fa5, 0(a2) 9994; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 9995; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 9996; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 9997; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 9998; RV64ZVE32F-NEXT: andi a2, a1, 8 9999; RV64ZVE32F-NEXT: beqz a2, .LBB85_6 10000; RV64ZVE32F-NEXT: .LBB85_15: # %cond.load7 10001; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 10002; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10003; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10004; RV64ZVE32F-NEXT: slli a2, a2, 2 10005; RV64ZVE32F-NEXT: add a2, a0, a2 10006; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10007; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10008; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 10009; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma 10010; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 10011; RV64ZVE32F-NEXT: andi a2, a1, 16 10012; RV64ZVE32F-NEXT: beqz a2, .LBB85_7 10013; RV64ZVE32F-NEXT: .LBB85_16: # %cond.load10 10014; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10015; RV64ZVE32F-NEXT: vmv.x.s a2, v9 10016; RV64ZVE32F-NEXT: slli a2, a2, 2 10017; RV64ZVE32F-NEXT: add a2, a0, a2 10018; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10019; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 10020; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10021; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma 10022; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 10023; RV64ZVE32F-NEXT: andi a2, a1, 32 10024; RV64ZVE32F-NEXT: bnez a2, .LBB85_8 10025; RV64ZVE32F-NEXT: j .LBB85_9 10026 %eidxs = sext <8 x i8> %idxs to <8 x i32> 10027 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs 10028 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru) 10029 ret <8 x float> %v 10030} 10031 10032define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) { 10033; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f32: 10034; RV32: # %bb.0: 10035; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 10036; RV32-NEXT: vzext.vf2 v9, v8 10037; RV32-NEXT: vsll.vi v8, v9, 2 10038; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu 10039; RV32-NEXT: vluxei16.v v10, (a0), v8, v0.t 10040; RV32-NEXT: vmv.v.v v8, v10 10041; RV32-NEXT: ret 10042; 10043; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f32: 10044; RV64V: # %bb.0: 10045; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 10046; RV64V-NEXT: vzext.vf2 v9, v8 10047; RV64V-NEXT: vsll.vi v8, v9, 2 10048; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu 10049; RV64V-NEXT: vluxei16.v v10, (a0), v8, v0.t 10050; RV64V-NEXT: vmv.v.v v8, v10 10051; RV64V-NEXT: ret 10052; 10053; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f32: 10054; RV64ZVE32F: # %bb.0: 10055; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10056; RV64ZVE32F-NEXT: vmv.x.s a1, v0 10057; RV64ZVE32F-NEXT: andi a2, a1, 1 10058; RV64ZVE32F-NEXT: beqz a2, .LBB86_2 10059; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 10060; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10061; RV64ZVE32F-NEXT: andi a2, a2, 255 10062; RV64ZVE32F-NEXT: slli a2, a2, 2 10063; RV64ZVE32F-NEXT: add a2, a0, a2 10064; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10065; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 10066; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 10067; RV64ZVE32F-NEXT: .LBB86_2: # %else 10068; RV64ZVE32F-NEXT: andi a2, a1, 2 10069; RV64ZVE32F-NEXT: beqz a2, .LBB86_4 10070; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 10071; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 10072; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 10073; RV64ZVE32F-NEXT: vmv.x.s a2, v9 10074; RV64ZVE32F-NEXT: andi a2, a2, 255 10075; RV64ZVE32F-NEXT: slli a2, a2, 2 10076; RV64ZVE32F-NEXT: add a2, a0, a2 10077; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10078; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10079; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 10080; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 10081; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 10082; RV64ZVE32F-NEXT: .LBB86_4: # %else2 10083; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 10084; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 10085; RV64ZVE32F-NEXT: andi a2, a1, 4 10086; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 10087; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 10088; RV64ZVE32F-NEXT: bnez a2, .LBB86_14 10089; RV64ZVE32F-NEXT: # %bb.5: # %else5 10090; RV64ZVE32F-NEXT: andi a2, a1, 8 10091; RV64ZVE32F-NEXT: bnez a2, .LBB86_15 10092; RV64ZVE32F-NEXT: .LBB86_6: # %else8 10093; RV64ZVE32F-NEXT: andi a2, a1, 16 10094; RV64ZVE32F-NEXT: bnez a2, .LBB86_16 10095; RV64ZVE32F-NEXT: .LBB86_7: # %else11 10096; RV64ZVE32F-NEXT: andi a2, a1, 32 10097; RV64ZVE32F-NEXT: beqz a2, .LBB86_9 10098; RV64ZVE32F-NEXT: .LBB86_8: # %cond.load13 10099; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 10100; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 10101; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10102; RV64ZVE32F-NEXT: andi a2, a2, 255 10103; RV64ZVE32F-NEXT: slli a2, a2, 2 10104; RV64ZVE32F-NEXT: add a2, a0, a2 10105; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10106; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10107; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10108; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma 10109; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 10110; RV64ZVE32F-NEXT: .LBB86_9: # %else14 10111; RV64ZVE32F-NEXT: andi a2, a1, 64 10112; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 10113; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 10114; RV64ZVE32F-NEXT: beqz a2, .LBB86_11 10115; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 10116; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10117; RV64ZVE32F-NEXT: andi a2, a2, 255 10118; RV64ZVE32F-NEXT: slli a2, a2, 2 10119; RV64ZVE32F-NEXT: add a2, a0, a2 10120; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10121; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10122; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10123; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma 10124; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 10125; RV64ZVE32F-NEXT: .LBB86_11: # %else17 10126; RV64ZVE32F-NEXT: andi a1, a1, -128 10127; RV64ZVE32F-NEXT: beqz a1, .LBB86_13 10128; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 10129; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 10130; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10131; RV64ZVE32F-NEXT: vmv.x.s a1, v8 10132; RV64ZVE32F-NEXT: andi a1, a1, 255 10133; RV64ZVE32F-NEXT: slli a1, a1, 2 10134; RV64ZVE32F-NEXT: add a0, a0, a1 10135; RV64ZVE32F-NEXT: flw fa5, 0(a0) 10136; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10137; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 10138; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 10139; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 10140; RV64ZVE32F-NEXT: .LBB86_13: # %else20 10141; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10142; RV64ZVE32F-NEXT: vmv2r.v v8, v10 10143; RV64ZVE32F-NEXT: ret 10144; RV64ZVE32F-NEXT: .LBB86_14: # %cond.load4 10145; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10146; RV64ZVE32F-NEXT: andi a2, a2, 255 10147; RV64ZVE32F-NEXT: slli a2, a2, 2 10148; RV64ZVE32F-NEXT: add a2, a0, a2 10149; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10150; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10151; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10152; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 10153; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 10154; RV64ZVE32F-NEXT: andi a2, a1, 8 10155; RV64ZVE32F-NEXT: beqz a2, .LBB86_6 10156; RV64ZVE32F-NEXT: .LBB86_15: # %cond.load7 10157; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 10158; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10159; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10160; RV64ZVE32F-NEXT: andi a2, a2, 255 10161; RV64ZVE32F-NEXT: slli a2, a2, 2 10162; RV64ZVE32F-NEXT: add a2, a0, a2 10163; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10164; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10165; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 10166; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma 10167; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 10168; RV64ZVE32F-NEXT: andi a2, a1, 16 10169; RV64ZVE32F-NEXT: beqz a2, .LBB86_7 10170; RV64ZVE32F-NEXT: .LBB86_16: # %cond.load10 10171; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10172; RV64ZVE32F-NEXT: vmv.x.s a2, v9 10173; RV64ZVE32F-NEXT: andi a2, a2, 255 10174; RV64ZVE32F-NEXT: slli a2, a2, 2 10175; RV64ZVE32F-NEXT: add a2, a0, a2 10176; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10177; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 10178; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10179; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma 10180; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 10181; RV64ZVE32F-NEXT: andi a2, a1, 32 10182; RV64ZVE32F-NEXT: bnez a2, .LBB86_8 10183; RV64ZVE32F-NEXT: j .LBB86_9 10184 %eidxs = zext <8 x i8> %idxs to <8 x i32> 10185 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs 10186 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru) 10187 ret <8 x float> %v 10188} 10189 10190define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) { 10191; RV32-LABEL: mgather_baseidx_v8i16_v8f32: 10192; RV32: # %bb.0: 10193; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu 10194; RV32-NEXT: vsext.vf2 v12, v8 10195; RV32-NEXT: vsll.vi v8, v12, 2 10196; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t 10197; RV32-NEXT: vmv.v.v v8, v10 10198; RV32-NEXT: ret 10199; 10200; RV64V-LABEL: mgather_baseidx_v8i16_v8f32: 10201; RV64V: # %bb.0: 10202; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 10203; RV64V-NEXT: vsext.vf4 v12, v8 10204; RV64V-NEXT: vsll.vi v12, v12, 2 10205; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu 10206; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t 10207; RV64V-NEXT: vmv.v.v v8, v10 10208; RV64V-NEXT: ret 10209; 10210; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8f32: 10211; RV64ZVE32F: # %bb.0: 10212; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10213; RV64ZVE32F-NEXT: vmv.x.s a1, v0 10214; RV64ZVE32F-NEXT: andi a2, a1, 1 10215; RV64ZVE32F-NEXT: beqz a2, .LBB87_2 10216; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 10217; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 10218; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10219; RV64ZVE32F-NEXT: slli a2, a2, 2 10220; RV64ZVE32F-NEXT: add a2, a0, a2 10221; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10222; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 10223; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 10224; RV64ZVE32F-NEXT: .LBB87_2: # %else 10225; RV64ZVE32F-NEXT: andi a2, a1, 2 10226; RV64ZVE32F-NEXT: beqz a2, .LBB87_4 10227; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 10228; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 10229; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 10230; RV64ZVE32F-NEXT: vmv.x.s a2, v9 10231; RV64ZVE32F-NEXT: slli a2, a2, 2 10232; RV64ZVE32F-NEXT: add a2, a0, a2 10233; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10234; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10235; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 10236; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 10237; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 10238; RV64ZVE32F-NEXT: .LBB87_4: # %else2 10239; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 10240; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 10241; RV64ZVE32F-NEXT: andi a2, a1, 4 10242; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 10243; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 10244; RV64ZVE32F-NEXT: bnez a2, .LBB87_14 10245; RV64ZVE32F-NEXT: # %bb.5: # %else5 10246; RV64ZVE32F-NEXT: andi a2, a1, 8 10247; RV64ZVE32F-NEXT: bnez a2, .LBB87_15 10248; RV64ZVE32F-NEXT: .LBB87_6: # %else8 10249; RV64ZVE32F-NEXT: andi a2, a1, 16 10250; RV64ZVE32F-NEXT: bnez a2, .LBB87_16 10251; RV64ZVE32F-NEXT: .LBB87_7: # %else11 10252; RV64ZVE32F-NEXT: andi a2, a1, 32 10253; RV64ZVE32F-NEXT: beqz a2, .LBB87_9 10254; RV64ZVE32F-NEXT: .LBB87_8: # %cond.load13 10255; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 10256; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 10257; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10258; RV64ZVE32F-NEXT: slli a2, a2, 2 10259; RV64ZVE32F-NEXT: add a2, a0, a2 10260; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10261; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10262; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10263; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma 10264; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 10265; RV64ZVE32F-NEXT: .LBB87_9: # %else14 10266; RV64ZVE32F-NEXT: andi a2, a1, 64 10267; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 10268; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 10269; RV64ZVE32F-NEXT: beqz a2, .LBB87_11 10270; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 10271; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10272; RV64ZVE32F-NEXT: slli a2, a2, 2 10273; RV64ZVE32F-NEXT: add a2, a0, a2 10274; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10275; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10276; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10277; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma 10278; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 10279; RV64ZVE32F-NEXT: .LBB87_11: # %else17 10280; RV64ZVE32F-NEXT: andi a1, a1, -128 10281; RV64ZVE32F-NEXT: beqz a1, .LBB87_13 10282; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 10283; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 10284; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10285; RV64ZVE32F-NEXT: vmv.x.s a1, v8 10286; RV64ZVE32F-NEXT: slli a1, a1, 2 10287; RV64ZVE32F-NEXT: add a0, a0, a1 10288; RV64ZVE32F-NEXT: flw fa5, 0(a0) 10289; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10290; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 10291; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 10292; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 10293; RV64ZVE32F-NEXT: .LBB87_13: # %else20 10294; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10295; RV64ZVE32F-NEXT: vmv2r.v v8, v10 10296; RV64ZVE32F-NEXT: ret 10297; RV64ZVE32F-NEXT: .LBB87_14: # %cond.load4 10298; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10299; RV64ZVE32F-NEXT: slli a2, a2, 2 10300; RV64ZVE32F-NEXT: add a2, a0, a2 10301; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10302; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10303; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10304; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 10305; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 10306; RV64ZVE32F-NEXT: andi a2, a1, 8 10307; RV64ZVE32F-NEXT: beqz a2, .LBB87_6 10308; RV64ZVE32F-NEXT: .LBB87_15: # %cond.load7 10309; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 10310; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10311; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10312; RV64ZVE32F-NEXT: slli a2, a2, 2 10313; RV64ZVE32F-NEXT: add a2, a0, a2 10314; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10315; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10316; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 10317; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma 10318; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 10319; RV64ZVE32F-NEXT: andi a2, a1, 16 10320; RV64ZVE32F-NEXT: beqz a2, .LBB87_7 10321; RV64ZVE32F-NEXT: .LBB87_16: # %cond.load10 10322; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 10323; RV64ZVE32F-NEXT: vmv.x.s a2, v9 10324; RV64ZVE32F-NEXT: slli a2, a2, 2 10325; RV64ZVE32F-NEXT: add a2, a0, a2 10326; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10327; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 10328; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10329; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma 10330; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 10331; RV64ZVE32F-NEXT: andi a2, a1, 32 10332; RV64ZVE32F-NEXT: bnez a2, .LBB87_8 10333; RV64ZVE32F-NEXT: j .LBB87_9 10334 %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs 10335 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru) 10336 ret <8 x float> %v 10337} 10338 10339define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) { 10340; RV32-LABEL: mgather_baseidx_sext_v8i16_v8f32: 10341; RV32: # %bb.0: 10342; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu 10343; RV32-NEXT: vsext.vf2 v12, v8 10344; RV32-NEXT: vsll.vi v8, v12, 2 10345; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t 10346; RV32-NEXT: vmv.v.v v8, v10 10347; RV32-NEXT: ret 10348; 10349; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8f32: 10350; RV64V: # %bb.0: 10351; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 10352; RV64V-NEXT: vsext.vf4 v12, v8 10353; RV64V-NEXT: vsll.vi v12, v12, 2 10354; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu 10355; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t 10356; RV64V-NEXT: vmv.v.v v8, v10 10357; RV64V-NEXT: ret 10358; 10359; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f32: 10360; RV64ZVE32F: # %bb.0: 10361; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10362; RV64ZVE32F-NEXT: vmv.x.s a1, v0 10363; RV64ZVE32F-NEXT: andi a2, a1, 1 10364; RV64ZVE32F-NEXT: beqz a2, .LBB88_2 10365; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 10366; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 10367; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10368; RV64ZVE32F-NEXT: slli a2, a2, 2 10369; RV64ZVE32F-NEXT: add a2, a0, a2 10370; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10371; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 10372; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 10373; RV64ZVE32F-NEXT: .LBB88_2: # %else 10374; RV64ZVE32F-NEXT: andi a2, a1, 2 10375; RV64ZVE32F-NEXT: beqz a2, .LBB88_4 10376; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 10377; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 10378; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 10379; RV64ZVE32F-NEXT: vmv.x.s a2, v9 10380; RV64ZVE32F-NEXT: slli a2, a2, 2 10381; RV64ZVE32F-NEXT: add a2, a0, a2 10382; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10383; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10384; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 10385; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 10386; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 10387; RV64ZVE32F-NEXT: .LBB88_4: # %else2 10388; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 10389; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 10390; RV64ZVE32F-NEXT: andi a2, a1, 4 10391; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 10392; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 10393; RV64ZVE32F-NEXT: bnez a2, .LBB88_14 10394; RV64ZVE32F-NEXT: # %bb.5: # %else5 10395; RV64ZVE32F-NEXT: andi a2, a1, 8 10396; RV64ZVE32F-NEXT: bnez a2, .LBB88_15 10397; RV64ZVE32F-NEXT: .LBB88_6: # %else8 10398; RV64ZVE32F-NEXT: andi a2, a1, 16 10399; RV64ZVE32F-NEXT: bnez a2, .LBB88_16 10400; RV64ZVE32F-NEXT: .LBB88_7: # %else11 10401; RV64ZVE32F-NEXT: andi a2, a1, 32 10402; RV64ZVE32F-NEXT: beqz a2, .LBB88_9 10403; RV64ZVE32F-NEXT: .LBB88_8: # %cond.load13 10404; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 10405; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 10406; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10407; RV64ZVE32F-NEXT: slli a2, a2, 2 10408; RV64ZVE32F-NEXT: add a2, a0, a2 10409; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10410; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10411; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10412; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma 10413; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 10414; RV64ZVE32F-NEXT: .LBB88_9: # %else14 10415; RV64ZVE32F-NEXT: andi a2, a1, 64 10416; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 10417; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 10418; RV64ZVE32F-NEXT: beqz a2, .LBB88_11 10419; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 10420; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10421; RV64ZVE32F-NEXT: slli a2, a2, 2 10422; RV64ZVE32F-NEXT: add a2, a0, a2 10423; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10424; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10425; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10426; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma 10427; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 10428; RV64ZVE32F-NEXT: .LBB88_11: # %else17 10429; RV64ZVE32F-NEXT: andi a1, a1, -128 10430; RV64ZVE32F-NEXT: beqz a1, .LBB88_13 10431; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 10432; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 10433; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10434; RV64ZVE32F-NEXT: vmv.x.s a1, v8 10435; RV64ZVE32F-NEXT: slli a1, a1, 2 10436; RV64ZVE32F-NEXT: add a0, a0, a1 10437; RV64ZVE32F-NEXT: flw fa5, 0(a0) 10438; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10439; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 10440; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 10441; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 10442; RV64ZVE32F-NEXT: .LBB88_13: # %else20 10443; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10444; RV64ZVE32F-NEXT: vmv2r.v v8, v10 10445; RV64ZVE32F-NEXT: ret 10446; RV64ZVE32F-NEXT: .LBB88_14: # %cond.load4 10447; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10448; RV64ZVE32F-NEXT: slli a2, a2, 2 10449; RV64ZVE32F-NEXT: add a2, a0, a2 10450; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10451; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10452; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10453; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 10454; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 10455; RV64ZVE32F-NEXT: andi a2, a1, 8 10456; RV64ZVE32F-NEXT: beqz a2, .LBB88_6 10457; RV64ZVE32F-NEXT: .LBB88_15: # %cond.load7 10458; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 10459; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10460; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10461; RV64ZVE32F-NEXT: slli a2, a2, 2 10462; RV64ZVE32F-NEXT: add a2, a0, a2 10463; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10464; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10465; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 10466; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma 10467; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 10468; RV64ZVE32F-NEXT: andi a2, a1, 16 10469; RV64ZVE32F-NEXT: beqz a2, .LBB88_7 10470; RV64ZVE32F-NEXT: .LBB88_16: # %cond.load10 10471; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 10472; RV64ZVE32F-NEXT: vmv.x.s a2, v9 10473; RV64ZVE32F-NEXT: slli a2, a2, 2 10474; RV64ZVE32F-NEXT: add a2, a0, a2 10475; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10476; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 10477; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10478; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma 10479; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 10480; RV64ZVE32F-NEXT: andi a2, a1, 32 10481; RV64ZVE32F-NEXT: bnez a2, .LBB88_8 10482; RV64ZVE32F-NEXT: j .LBB88_9 10483 %eidxs = sext <8 x i16> %idxs to <8 x i32> 10484 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs 10485 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru) 10486 ret <8 x float> %v 10487} 10488 10489define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) { 10490; RV32-LABEL: mgather_baseidx_zext_v8i16_v8f32: 10491; RV32: # %bb.0: 10492; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu 10493; RV32-NEXT: vzext.vf2 v12, v8 10494; RV32-NEXT: vsll.vi v8, v12, 2 10495; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t 10496; RV32-NEXT: vmv.v.v v8, v10 10497; RV32-NEXT: ret 10498; 10499; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f32: 10500; RV64V: # %bb.0: 10501; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu 10502; RV64V-NEXT: vzext.vf2 v12, v8 10503; RV64V-NEXT: vsll.vi v8, v12, 2 10504; RV64V-NEXT: vluxei32.v v10, (a0), v8, v0.t 10505; RV64V-NEXT: vmv.v.v v8, v10 10506; RV64V-NEXT: ret 10507; 10508; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f32: 10509; RV64ZVE32F: # %bb.0: 10510; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10511; RV64ZVE32F-NEXT: vmv.x.s a1, v0 10512; RV64ZVE32F-NEXT: andi a2, a1, 1 10513; RV64ZVE32F-NEXT: beqz a2, .LBB89_2 10514; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 10515; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 10516; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10517; RV64ZVE32F-NEXT: slli a2, a2, 48 10518; RV64ZVE32F-NEXT: srli a2, a2, 46 10519; RV64ZVE32F-NEXT: add a2, a0, a2 10520; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10521; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 10522; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 10523; RV64ZVE32F-NEXT: .LBB89_2: # %else 10524; RV64ZVE32F-NEXT: andi a2, a1, 2 10525; RV64ZVE32F-NEXT: beqz a2, .LBB89_4 10526; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 10527; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 10528; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 10529; RV64ZVE32F-NEXT: vmv.x.s a2, v9 10530; RV64ZVE32F-NEXT: slli a2, a2, 48 10531; RV64ZVE32F-NEXT: srli a2, a2, 46 10532; RV64ZVE32F-NEXT: add a2, a0, a2 10533; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10534; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10535; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 10536; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 10537; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 10538; RV64ZVE32F-NEXT: .LBB89_4: # %else2 10539; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 10540; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 10541; RV64ZVE32F-NEXT: andi a2, a1, 4 10542; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 10543; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 10544; RV64ZVE32F-NEXT: bnez a2, .LBB89_14 10545; RV64ZVE32F-NEXT: # %bb.5: # %else5 10546; RV64ZVE32F-NEXT: andi a2, a1, 8 10547; RV64ZVE32F-NEXT: bnez a2, .LBB89_15 10548; RV64ZVE32F-NEXT: .LBB89_6: # %else8 10549; RV64ZVE32F-NEXT: andi a2, a1, 16 10550; RV64ZVE32F-NEXT: bnez a2, .LBB89_16 10551; RV64ZVE32F-NEXT: .LBB89_7: # %else11 10552; RV64ZVE32F-NEXT: andi a2, a1, 32 10553; RV64ZVE32F-NEXT: beqz a2, .LBB89_9 10554; RV64ZVE32F-NEXT: .LBB89_8: # %cond.load13 10555; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 10556; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 10557; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10558; RV64ZVE32F-NEXT: slli a2, a2, 48 10559; RV64ZVE32F-NEXT: srli a2, a2, 46 10560; RV64ZVE32F-NEXT: add a2, a0, a2 10561; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10562; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10563; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10564; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma 10565; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 10566; RV64ZVE32F-NEXT: .LBB89_9: # %else14 10567; RV64ZVE32F-NEXT: andi a2, a1, 64 10568; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 10569; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 10570; RV64ZVE32F-NEXT: beqz a2, .LBB89_11 10571; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 10572; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10573; RV64ZVE32F-NEXT: slli a2, a2, 48 10574; RV64ZVE32F-NEXT: srli a2, a2, 46 10575; RV64ZVE32F-NEXT: add a2, a0, a2 10576; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10577; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10578; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10579; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma 10580; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 10581; RV64ZVE32F-NEXT: .LBB89_11: # %else17 10582; RV64ZVE32F-NEXT: andi a1, a1, -128 10583; RV64ZVE32F-NEXT: beqz a1, .LBB89_13 10584; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 10585; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 10586; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10587; RV64ZVE32F-NEXT: vmv.x.s a1, v8 10588; RV64ZVE32F-NEXT: slli a1, a1, 48 10589; RV64ZVE32F-NEXT: srli a1, a1, 46 10590; RV64ZVE32F-NEXT: add a0, a0, a1 10591; RV64ZVE32F-NEXT: flw fa5, 0(a0) 10592; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10593; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 10594; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 10595; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 10596; RV64ZVE32F-NEXT: .LBB89_13: # %else20 10597; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10598; RV64ZVE32F-NEXT: vmv2r.v v8, v10 10599; RV64ZVE32F-NEXT: ret 10600; RV64ZVE32F-NEXT: .LBB89_14: # %cond.load4 10601; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10602; RV64ZVE32F-NEXT: slli a2, a2, 48 10603; RV64ZVE32F-NEXT: srli a2, a2, 46 10604; RV64ZVE32F-NEXT: add a2, a0, a2 10605; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10606; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10607; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10608; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 10609; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 10610; RV64ZVE32F-NEXT: andi a2, a1, 8 10611; RV64ZVE32F-NEXT: beqz a2, .LBB89_6 10612; RV64ZVE32F-NEXT: .LBB89_15: # %cond.load7 10613; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 10614; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10615; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10616; RV64ZVE32F-NEXT: slli a2, a2, 48 10617; RV64ZVE32F-NEXT: srli a2, a2, 46 10618; RV64ZVE32F-NEXT: add a2, a0, a2 10619; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10620; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10621; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 10622; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma 10623; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 10624; RV64ZVE32F-NEXT: andi a2, a1, 16 10625; RV64ZVE32F-NEXT: beqz a2, .LBB89_7 10626; RV64ZVE32F-NEXT: .LBB89_16: # %cond.load10 10627; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 10628; RV64ZVE32F-NEXT: vmv.x.s a2, v9 10629; RV64ZVE32F-NEXT: slli a2, a2, 48 10630; RV64ZVE32F-NEXT: srli a2, a2, 46 10631; RV64ZVE32F-NEXT: add a2, a0, a2 10632; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10633; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 10634; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10635; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma 10636; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 10637; RV64ZVE32F-NEXT: andi a2, a1, 32 10638; RV64ZVE32F-NEXT: bnez a2, .LBB89_8 10639; RV64ZVE32F-NEXT: j .LBB89_9 10640 %eidxs = zext <8 x i16> %idxs to <8 x i32> 10641 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs 10642 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru) 10643 ret <8 x float> %v 10644} 10645 10646define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x float> %passthru) { 10647; RV32-LABEL: mgather_baseidx_v8f32: 10648; RV32: # %bb.0: 10649; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu 10650; RV32-NEXT: vsll.vi v8, v8, 2 10651; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t 10652; RV32-NEXT: vmv.v.v v8, v10 10653; RV32-NEXT: ret 10654; 10655; RV64V-LABEL: mgather_baseidx_v8f32: 10656; RV64V: # %bb.0: 10657; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 10658; RV64V-NEXT: vsext.vf2 v12, v8 10659; RV64V-NEXT: vsll.vi v12, v12, 2 10660; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu 10661; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t 10662; RV64V-NEXT: vmv.v.v v8, v10 10663; RV64V-NEXT: ret 10664; 10665; RV64ZVE32F-LABEL: mgather_baseidx_v8f32: 10666; RV64ZVE32F: # %bb.0: 10667; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10668; RV64ZVE32F-NEXT: vmv.x.s a1, v0 10669; RV64ZVE32F-NEXT: andi a2, a1, 1 10670; RV64ZVE32F-NEXT: beqz a2, .LBB90_2 10671; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 10672; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma 10673; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10674; RV64ZVE32F-NEXT: slli a2, a2, 2 10675; RV64ZVE32F-NEXT: add a2, a0, a2 10676; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10677; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 10678; RV64ZVE32F-NEXT: .LBB90_2: # %else 10679; RV64ZVE32F-NEXT: andi a2, a1, 2 10680; RV64ZVE32F-NEXT: beqz a2, .LBB90_4 10681; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 10682; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma 10683; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 10684; RV64ZVE32F-NEXT: vmv.x.s a2, v12 10685; RV64ZVE32F-NEXT: slli a2, a2, 2 10686; RV64ZVE32F-NEXT: add a2, a0, a2 10687; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10688; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10689; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 10690; RV64ZVE32F-NEXT: .LBB90_4: # %else2 10691; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma 10692; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 10693; RV64ZVE32F-NEXT: andi a2, a1, 4 10694; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 10695; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 10696; RV64ZVE32F-NEXT: bnez a2, .LBB90_14 10697; RV64ZVE32F-NEXT: # %bb.5: # %else5 10698; RV64ZVE32F-NEXT: andi a2, a1, 8 10699; RV64ZVE32F-NEXT: bnez a2, .LBB90_15 10700; RV64ZVE32F-NEXT: .LBB90_6: # %else8 10701; RV64ZVE32F-NEXT: andi a2, a1, 16 10702; RV64ZVE32F-NEXT: bnez a2, .LBB90_16 10703; RV64ZVE32F-NEXT: .LBB90_7: # %else11 10704; RV64ZVE32F-NEXT: andi a2, a1, 32 10705; RV64ZVE32F-NEXT: beqz a2, .LBB90_9 10706; RV64ZVE32F-NEXT: .LBB90_8: # %cond.load13 10707; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10708; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 1 10709; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10710; RV64ZVE32F-NEXT: slli a2, a2, 2 10711; RV64ZVE32F-NEXT: add a2, a0, a2 10712; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10713; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 10714; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma 10715; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 10716; RV64ZVE32F-NEXT: .LBB90_9: # %else14 10717; RV64ZVE32F-NEXT: andi a2, a1, 64 10718; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 10719; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2 10720; RV64ZVE32F-NEXT: beqz a2, .LBB90_11 10721; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 10722; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10723; RV64ZVE32F-NEXT: slli a2, a2, 2 10724; RV64ZVE32F-NEXT: add a2, a0, a2 10725; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10726; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 10727; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma 10728; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 10729; RV64ZVE32F-NEXT: .LBB90_11: # %else17 10730; RV64ZVE32F-NEXT: andi a1, a1, -128 10731; RV64ZVE32F-NEXT: beqz a1, .LBB90_13 10732; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 10733; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10734; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10735; RV64ZVE32F-NEXT: vmv.x.s a1, v8 10736; RV64ZVE32F-NEXT: slli a1, a1, 2 10737; RV64ZVE32F-NEXT: add a0, a0, a1 10738; RV64ZVE32F-NEXT: flw fa5, 0(a0) 10739; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 10740; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 10741; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 10742; RV64ZVE32F-NEXT: .LBB90_13: # %else20 10743; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10744; RV64ZVE32F-NEXT: vmv2r.v v8, v10 10745; RV64ZVE32F-NEXT: ret 10746; RV64ZVE32F-NEXT: .LBB90_14: # %cond.load4 10747; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10748; RV64ZVE32F-NEXT: slli a2, a2, 2 10749; RV64ZVE32F-NEXT: add a2, a0, a2 10750; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10751; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 10752; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma 10753; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2 10754; RV64ZVE32F-NEXT: andi a2, a1, 8 10755; RV64ZVE32F-NEXT: beqz a2, .LBB90_6 10756; RV64ZVE32F-NEXT: .LBB90_15: # %cond.load7 10757; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma 10758; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10759; RV64ZVE32F-NEXT: vmv.x.s a2, v8 10760; RV64ZVE32F-NEXT: slli a2, a2, 2 10761; RV64ZVE32F-NEXT: add a2, a0, a2 10762; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10763; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 10764; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 10765; RV64ZVE32F-NEXT: andi a2, a1, 16 10766; RV64ZVE32F-NEXT: beqz a2, .LBB90_7 10767; RV64ZVE32F-NEXT: .LBB90_16: # %cond.load10 10768; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma 10769; RV64ZVE32F-NEXT: vmv.x.s a2, v12 10770; RV64ZVE32F-NEXT: slli a2, a2, 2 10771; RV64ZVE32F-NEXT: add a2, a0, a2 10772; RV64ZVE32F-NEXT: flw fa5, 0(a2) 10773; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 10774; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 10775; RV64ZVE32F-NEXT: andi a2, a1, 32 10776; RV64ZVE32F-NEXT: bnez a2, .LBB90_8 10777; RV64ZVE32F-NEXT: j .LBB90_9 10778 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs 10779 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru) 10780 ret <8 x float> %v 10781} 10782 10783declare <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x double>) 10784 10785define <1 x double> @mgather_v1f64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x double> %passthru) { 10786; RV32V-LABEL: mgather_v1f64: 10787; RV32V: # %bb.0: 10788; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, mu 10789; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 10790; RV32V-NEXT: vmv.v.v v8, v9 10791; RV32V-NEXT: ret 10792; 10793; RV64V-LABEL: mgather_v1f64: 10794; RV64V: # %bb.0: 10795; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, mu 10796; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 10797; RV64V-NEXT: vmv.v.v v8, v9 10798; RV64V-NEXT: ret 10799; 10800; RV32ZVE32F-LABEL: mgather_v1f64: 10801; RV32ZVE32F: # %bb.0: 10802; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 10803; RV32ZVE32F-NEXT: vfirst.m a0, v0 10804; RV32ZVE32F-NEXT: bnez a0, .LBB91_2 10805; RV32ZVE32F-NEXT: # %bb.1: # %cond.load 10806; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma 10807; RV32ZVE32F-NEXT: vmv.x.s a0, v8 10808; RV32ZVE32F-NEXT: fld fa0, 0(a0) 10809; RV32ZVE32F-NEXT: .LBB91_2: # %else 10810; RV32ZVE32F-NEXT: ret 10811; 10812; RV64ZVE32F-LABEL: mgather_v1f64: 10813; RV64ZVE32F: # %bb.0: 10814; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 10815; RV64ZVE32F-NEXT: vfirst.m a1, v0 10816; RV64ZVE32F-NEXT: bnez a1, .LBB91_2 10817; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 10818; RV64ZVE32F-NEXT: fld fa0, 0(a0) 10819; RV64ZVE32F-NEXT: .LBB91_2: # %else 10820; RV64ZVE32F-NEXT: ret 10821 %v = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> %ptrs, i32 8, <1 x i1> %m, <1 x double> %passthru) 10822 ret <1 x double> %v 10823} 10824 10825declare <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x double>) 10826 10827define <2 x double> @mgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x double> %passthru) { 10828; RV32V-LABEL: mgather_v2f64: 10829; RV32V: # %bb.0: 10830; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, mu 10831; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t 10832; RV32V-NEXT: vmv.v.v v8, v9 10833; RV32V-NEXT: ret 10834; 10835; RV64V-LABEL: mgather_v2f64: 10836; RV64V: # %bb.0: 10837; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, mu 10838; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t 10839; RV64V-NEXT: vmv.v.v v8, v9 10840; RV64V-NEXT: ret 10841; 10842; RV32ZVE32F-LABEL: mgather_v2f64: 10843; RV32ZVE32F: # %bb.0: 10844; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10845; RV32ZVE32F-NEXT: vmv.x.s a0, v0 10846; RV32ZVE32F-NEXT: andi a1, a0, 1 10847; RV32ZVE32F-NEXT: bnez a1, .LBB92_3 10848; RV32ZVE32F-NEXT: # %bb.1: # %else 10849; RV32ZVE32F-NEXT: andi a0, a0, 2 10850; RV32ZVE32F-NEXT: bnez a0, .LBB92_4 10851; RV32ZVE32F-NEXT: .LBB92_2: # %else2 10852; RV32ZVE32F-NEXT: ret 10853; RV32ZVE32F-NEXT: .LBB92_3: # %cond.load 10854; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 10855; RV32ZVE32F-NEXT: vmv.x.s a1, v8 10856; RV32ZVE32F-NEXT: fld fa0, 0(a1) 10857; RV32ZVE32F-NEXT: andi a0, a0, 2 10858; RV32ZVE32F-NEXT: beqz a0, .LBB92_2 10859; RV32ZVE32F-NEXT: .LBB92_4: # %cond.load1 10860; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10861; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1 10862; RV32ZVE32F-NEXT: vmv.x.s a0, v8 10863; RV32ZVE32F-NEXT: fld fa1, 0(a0) 10864; RV32ZVE32F-NEXT: ret 10865; 10866; RV64ZVE32F-LABEL: mgather_v2f64: 10867; RV64ZVE32F: # %bb.0: 10868; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10869; RV64ZVE32F-NEXT: vmv.x.s a2, v0 10870; RV64ZVE32F-NEXT: andi a3, a2, 1 10871; RV64ZVE32F-NEXT: bnez a3, .LBB92_3 10872; RV64ZVE32F-NEXT: # %bb.1: # %else 10873; RV64ZVE32F-NEXT: andi a2, a2, 2 10874; RV64ZVE32F-NEXT: bnez a2, .LBB92_4 10875; RV64ZVE32F-NEXT: .LBB92_2: # %else2 10876; RV64ZVE32F-NEXT: ret 10877; RV64ZVE32F-NEXT: .LBB92_3: # %cond.load 10878; RV64ZVE32F-NEXT: fld fa0, 0(a0) 10879; RV64ZVE32F-NEXT: andi a2, a2, 2 10880; RV64ZVE32F-NEXT: beqz a2, .LBB92_2 10881; RV64ZVE32F-NEXT: .LBB92_4: # %cond.load1 10882; RV64ZVE32F-NEXT: fld fa1, 0(a1) 10883; RV64ZVE32F-NEXT: ret 10884 %v = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 8, <2 x i1> %m, <2 x double> %passthru) 10885 ret <2 x double> %v 10886} 10887 10888declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x double>) 10889 10890define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %passthru) { 10891; RV32V-LABEL: mgather_v4f64: 10892; RV32V: # %bb.0: 10893; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, mu 10894; RV32V-NEXT: vluxei32.v v10, (zero), v8, v0.t 10895; RV32V-NEXT: vmv.v.v v8, v10 10896; RV32V-NEXT: ret 10897; 10898; RV64V-LABEL: mgather_v4f64: 10899; RV64V: # %bb.0: 10900; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, mu 10901; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t 10902; RV64V-NEXT: vmv.v.v v8, v10 10903; RV64V-NEXT: ret 10904; 10905; RV32ZVE32F-LABEL: mgather_v4f64: 10906; RV32ZVE32F: # %bb.0: 10907; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10908; RV32ZVE32F-NEXT: vmv.x.s a1, v0 10909; RV32ZVE32F-NEXT: andi a2, a1, 1 10910; RV32ZVE32F-NEXT: bnez a2, .LBB93_6 10911; RV32ZVE32F-NEXT: # %bb.1: # %else 10912; RV32ZVE32F-NEXT: andi a2, a1, 2 10913; RV32ZVE32F-NEXT: bnez a2, .LBB93_7 10914; RV32ZVE32F-NEXT: .LBB93_2: # %else2 10915; RV32ZVE32F-NEXT: andi a2, a1, 4 10916; RV32ZVE32F-NEXT: bnez a2, .LBB93_8 10917; RV32ZVE32F-NEXT: .LBB93_3: # %else5 10918; RV32ZVE32F-NEXT: andi a1, a1, 8 10919; RV32ZVE32F-NEXT: beqz a1, .LBB93_5 10920; RV32ZVE32F-NEXT: .LBB93_4: # %cond.load7 10921; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10922; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 10923; RV32ZVE32F-NEXT: vmv.x.s a1, v8 10924; RV32ZVE32F-NEXT: fld fa3, 0(a1) 10925; RV32ZVE32F-NEXT: .LBB93_5: # %else8 10926; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 10927; RV32ZVE32F-NEXT: fsd fa1, 8(a0) 10928; RV32ZVE32F-NEXT: fsd fa2, 16(a0) 10929; RV32ZVE32F-NEXT: fsd fa3, 24(a0) 10930; RV32ZVE32F-NEXT: ret 10931; RV32ZVE32F-NEXT: .LBB93_6: # %cond.load 10932; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 10933; RV32ZVE32F-NEXT: vmv.x.s a2, v8 10934; RV32ZVE32F-NEXT: fld fa0, 0(a2) 10935; RV32ZVE32F-NEXT: andi a2, a1, 2 10936; RV32ZVE32F-NEXT: beqz a2, .LBB93_2 10937; RV32ZVE32F-NEXT: .LBB93_7: # %cond.load1 10938; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10939; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 10940; RV32ZVE32F-NEXT: vmv.x.s a2, v9 10941; RV32ZVE32F-NEXT: fld fa1, 0(a2) 10942; RV32ZVE32F-NEXT: andi a2, a1, 4 10943; RV32ZVE32F-NEXT: beqz a2, .LBB93_3 10944; RV32ZVE32F-NEXT: .LBB93_8: # %cond.load4 10945; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 10946; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 10947; RV32ZVE32F-NEXT: vmv.x.s a2, v9 10948; RV32ZVE32F-NEXT: fld fa2, 0(a2) 10949; RV32ZVE32F-NEXT: andi a1, a1, 8 10950; RV32ZVE32F-NEXT: bnez a1, .LBB93_4 10951; RV32ZVE32F-NEXT: j .LBB93_5 10952; 10953; RV64ZVE32F-LABEL: mgather_v4f64: 10954; RV64ZVE32F: # %bb.0: 10955; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 10956; RV64ZVE32F-NEXT: vmv.x.s a2, v0 10957; RV64ZVE32F-NEXT: andi a3, a2, 1 10958; RV64ZVE32F-NEXT: bnez a3, .LBB93_6 10959; RV64ZVE32F-NEXT: # %bb.1: # %else 10960; RV64ZVE32F-NEXT: andi a3, a2, 2 10961; RV64ZVE32F-NEXT: bnez a3, .LBB93_7 10962; RV64ZVE32F-NEXT: .LBB93_2: # %else2 10963; RV64ZVE32F-NEXT: andi a3, a2, 4 10964; RV64ZVE32F-NEXT: bnez a3, .LBB93_8 10965; RV64ZVE32F-NEXT: .LBB93_3: # %else5 10966; RV64ZVE32F-NEXT: andi a2, a2, 8 10967; RV64ZVE32F-NEXT: beqz a2, .LBB93_5 10968; RV64ZVE32F-NEXT: .LBB93_4: # %cond.load7 10969; RV64ZVE32F-NEXT: ld a1, 24(a1) 10970; RV64ZVE32F-NEXT: fld fa3, 0(a1) 10971; RV64ZVE32F-NEXT: .LBB93_5: # %else8 10972; RV64ZVE32F-NEXT: fsd fa0, 0(a0) 10973; RV64ZVE32F-NEXT: fsd fa1, 8(a0) 10974; RV64ZVE32F-NEXT: fsd fa2, 16(a0) 10975; RV64ZVE32F-NEXT: fsd fa3, 24(a0) 10976; RV64ZVE32F-NEXT: ret 10977; RV64ZVE32F-NEXT: .LBB93_6: # %cond.load 10978; RV64ZVE32F-NEXT: ld a3, 0(a1) 10979; RV64ZVE32F-NEXT: fld fa0, 0(a3) 10980; RV64ZVE32F-NEXT: andi a3, a2, 2 10981; RV64ZVE32F-NEXT: beqz a3, .LBB93_2 10982; RV64ZVE32F-NEXT: .LBB93_7: # %cond.load1 10983; RV64ZVE32F-NEXT: ld a3, 8(a1) 10984; RV64ZVE32F-NEXT: fld fa1, 0(a3) 10985; RV64ZVE32F-NEXT: andi a3, a2, 4 10986; RV64ZVE32F-NEXT: beqz a3, .LBB93_3 10987; RV64ZVE32F-NEXT: .LBB93_8: # %cond.load4 10988; RV64ZVE32F-NEXT: ld a3, 16(a1) 10989; RV64ZVE32F-NEXT: fld fa2, 0(a3) 10990; RV64ZVE32F-NEXT: andi a2, a2, 8 10991; RV64ZVE32F-NEXT: bnez a2, .LBB93_4 10992; RV64ZVE32F-NEXT: j .LBB93_5 10993 %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %m, <4 x double> %passthru) 10994 ret <4 x double> %v 10995} 10996 10997define <4 x double> @mgather_truemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passthru) { 10998; RV32V-LABEL: mgather_truemask_v4f64: 10999; RV32V: # %bb.0: 11000; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma 11001; RV32V-NEXT: vluxei32.v v10, (zero), v8 11002; RV32V-NEXT: vmv.v.v v8, v10 11003; RV32V-NEXT: ret 11004; 11005; RV64V-LABEL: mgather_truemask_v4f64: 11006; RV64V: # %bb.0: 11007; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma 11008; RV64V-NEXT: vluxei64.v v8, (zero), v8 11009; RV64V-NEXT: ret 11010; 11011; RV32ZVE32F-LABEL: mgather_truemask_v4f64: 11012; RV32ZVE32F: # %bb.0: 11013; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11014; RV32ZVE32F-NEXT: vmv.x.s a1, v8 11015; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 11016; RV32ZVE32F-NEXT: fld fa5, 0(a1) 11017; RV32ZVE32F-NEXT: vmv.x.s a1, v9 11018; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 11019; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 11020; RV32ZVE32F-NEXT: fld fa4, 0(a1) 11021; RV32ZVE32F-NEXT: vmv.x.s a1, v9 11022; RV32ZVE32F-NEXT: fld fa3, 0(a1) 11023; RV32ZVE32F-NEXT: vmv.x.s a1, v8 11024; RV32ZVE32F-NEXT: fld fa2, 0(a1) 11025; RV32ZVE32F-NEXT: fsd fa5, 0(a0) 11026; RV32ZVE32F-NEXT: fsd fa4, 8(a0) 11027; RV32ZVE32F-NEXT: fsd fa3, 16(a0) 11028; RV32ZVE32F-NEXT: fsd fa2, 24(a0) 11029; RV32ZVE32F-NEXT: ret 11030; 11031; RV64ZVE32F-LABEL: mgather_truemask_v4f64: 11032; RV64ZVE32F: # %bb.0: 11033; RV64ZVE32F-NEXT: ld a2, 0(a1) 11034; RV64ZVE32F-NEXT: ld a3, 8(a1) 11035; RV64ZVE32F-NEXT: ld a4, 16(a1) 11036; RV64ZVE32F-NEXT: ld a1, 24(a1) 11037; RV64ZVE32F-NEXT: fld fa5, 0(a2) 11038; RV64ZVE32F-NEXT: fld fa4, 0(a3) 11039; RV64ZVE32F-NEXT: fld fa3, 0(a4) 11040; RV64ZVE32F-NEXT: fld fa2, 0(a1) 11041; RV64ZVE32F-NEXT: fsd fa5, 0(a0) 11042; RV64ZVE32F-NEXT: fsd fa4, 8(a0) 11043; RV64ZVE32F-NEXT: fsd fa3, 16(a0) 11044; RV64ZVE32F-NEXT: fsd fa2, 24(a0) 11045; RV64ZVE32F-NEXT: ret 11046 %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1), <4 x double> %passthru) 11047 ret <4 x double> %v 11048} 11049 11050define <4 x double> @mgather_falsemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passthru) { 11051; RV32V-LABEL: mgather_falsemask_v4f64: 11052; RV32V: # %bb.0: 11053; RV32V-NEXT: vsetivli zero, 1, e8, m1, ta, ma 11054; RV32V-NEXT: vmv2r.v v8, v10 11055; RV32V-NEXT: ret 11056; 11057; RV64V-LABEL: mgather_falsemask_v4f64: 11058; RV64V: # %bb.0: 11059; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma 11060; RV64V-NEXT: vmv2r.v v8, v10 11061; RV64V-NEXT: ret 11062; 11063; RV32ZVE32F-LABEL: mgather_falsemask_v4f64: 11064; RV32ZVE32F: # %bb.0: 11065; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 11066; RV32ZVE32F-NEXT: fsd fa1, 8(a0) 11067; RV32ZVE32F-NEXT: fsd fa2, 16(a0) 11068; RV32ZVE32F-NEXT: fsd fa3, 24(a0) 11069; RV32ZVE32F-NEXT: ret 11070; 11071; RV64ZVE32F-LABEL: mgather_falsemask_v4f64: 11072; RV64ZVE32F: # %bb.0: 11073; RV64ZVE32F-NEXT: fsd fa0, 0(a0) 11074; RV64ZVE32F-NEXT: fsd fa1, 8(a0) 11075; RV64ZVE32F-NEXT: fsd fa2, 16(a0) 11076; RV64ZVE32F-NEXT: fsd fa3, 24(a0) 11077; RV64ZVE32F-NEXT: ret 11078 %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer, <4 x double> %passthru) 11079 ret <4 x double> %v 11080} 11081 11082declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x double>) 11083 11084define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %passthru) { 11085; RV32V-LABEL: mgather_v8f64: 11086; RV32V: # %bb.0: 11087; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 11088; RV32V-NEXT: vluxei32.v v12, (zero), v8, v0.t 11089; RV32V-NEXT: vmv.v.v v8, v12 11090; RV32V-NEXT: ret 11091; 11092; RV64V-LABEL: mgather_v8f64: 11093; RV64V: # %bb.0: 11094; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 11095; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t 11096; RV64V-NEXT: vmv.v.v v8, v12 11097; RV64V-NEXT: ret 11098; 11099; RV32ZVE32F-LABEL: mgather_v8f64: 11100; RV32ZVE32F: # %bb.0: 11101; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 11102; RV32ZVE32F-NEXT: vmv.x.s a1, v0 11103; RV32ZVE32F-NEXT: andi a2, a1, 1 11104; RV32ZVE32F-NEXT: bnez a2, .LBB96_10 11105; RV32ZVE32F-NEXT: # %bb.1: # %else 11106; RV32ZVE32F-NEXT: andi a2, a1, 2 11107; RV32ZVE32F-NEXT: bnez a2, .LBB96_11 11108; RV32ZVE32F-NEXT: .LBB96_2: # %else2 11109; RV32ZVE32F-NEXT: andi a2, a1, 4 11110; RV32ZVE32F-NEXT: bnez a2, .LBB96_12 11111; RV32ZVE32F-NEXT: .LBB96_3: # %else5 11112; RV32ZVE32F-NEXT: andi a2, a1, 8 11113; RV32ZVE32F-NEXT: bnez a2, .LBB96_13 11114; RV32ZVE32F-NEXT: .LBB96_4: # %else8 11115; RV32ZVE32F-NEXT: andi a2, a1, 16 11116; RV32ZVE32F-NEXT: bnez a2, .LBB96_14 11117; RV32ZVE32F-NEXT: .LBB96_5: # %else11 11118; RV32ZVE32F-NEXT: andi a2, a1, 32 11119; RV32ZVE32F-NEXT: bnez a2, .LBB96_15 11120; RV32ZVE32F-NEXT: .LBB96_6: # %else14 11121; RV32ZVE32F-NEXT: andi a2, a1, 64 11122; RV32ZVE32F-NEXT: bnez a2, .LBB96_16 11123; RV32ZVE32F-NEXT: .LBB96_7: # %else17 11124; RV32ZVE32F-NEXT: andi a1, a1, -128 11125; RV32ZVE32F-NEXT: beqz a1, .LBB96_9 11126; RV32ZVE32F-NEXT: .LBB96_8: # %cond.load19 11127; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11128; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 11129; RV32ZVE32F-NEXT: vmv.x.s a1, v8 11130; RV32ZVE32F-NEXT: fld fa7, 0(a1) 11131; RV32ZVE32F-NEXT: .LBB96_9: # %else20 11132; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 11133; RV32ZVE32F-NEXT: fsd fa1, 8(a0) 11134; RV32ZVE32F-NEXT: fsd fa2, 16(a0) 11135; RV32ZVE32F-NEXT: fsd fa3, 24(a0) 11136; RV32ZVE32F-NEXT: fsd fa4, 32(a0) 11137; RV32ZVE32F-NEXT: fsd fa5, 40(a0) 11138; RV32ZVE32F-NEXT: fsd fa6, 48(a0) 11139; RV32ZVE32F-NEXT: fsd fa7, 56(a0) 11140; RV32ZVE32F-NEXT: ret 11141; RV32ZVE32F-NEXT: .LBB96_10: # %cond.load 11142; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 11143; RV32ZVE32F-NEXT: vmv.x.s a2, v8 11144; RV32ZVE32F-NEXT: fld fa0, 0(a2) 11145; RV32ZVE32F-NEXT: andi a2, a1, 2 11146; RV32ZVE32F-NEXT: beqz a2, .LBB96_2 11147; RV32ZVE32F-NEXT: .LBB96_11: # %cond.load1 11148; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11149; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 11150; RV32ZVE32F-NEXT: vmv.x.s a2, v10 11151; RV32ZVE32F-NEXT: fld fa1, 0(a2) 11152; RV32ZVE32F-NEXT: andi a2, a1, 4 11153; RV32ZVE32F-NEXT: beqz a2, .LBB96_3 11154; RV32ZVE32F-NEXT: .LBB96_12: # %cond.load4 11155; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11156; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 11157; RV32ZVE32F-NEXT: vmv.x.s a2, v10 11158; RV32ZVE32F-NEXT: fld fa2, 0(a2) 11159; RV32ZVE32F-NEXT: andi a2, a1, 8 11160; RV32ZVE32F-NEXT: beqz a2, .LBB96_4 11161; RV32ZVE32F-NEXT: .LBB96_13: # %cond.load7 11162; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11163; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 11164; RV32ZVE32F-NEXT: vmv.x.s a2, v10 11165; RV32ZVE32F-NEXT: fld fa3, 0(a2) 11166; RV32ZVE32F-NEXT: andi a2, a1, 16 11167; RV32ZVE32F-NEXT: beqz a2, .LBB96_5 11168; RV32ZVE32F-NEXT: .LBB96_14: # %cond.load10 11169; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11170; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 11171; RV32ZVE32F-NEXT: vmv.x.s a2, v10 11172; RV32ZVE32F-NEXT: fld fa4, 0(a2) 11173; RV32ZVE32F-NEXT: andi a2, a1, 32 11174; RV32ZVE32F-NEXT: beqz a2, .LBB96_6 11175; RV32ZVE32F-NEXT: .LBB96_15: # %cond.load13 11176; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11177; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 11178; RV32ZVE32F-NEXT: vmv.x.s a2, v10 11179; RV32ZVE32F-NEXT: fld fa5, 0(a2) 11180; RV32ZVE32F-NEXT: andi a2, a1, 64 11181; RV32ZVE32F-NEXT: beqz a2, .LBB96_7 11182; RV32ZVE32F-NEXT: .LBB96_16: # %cond.load16 11183; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11184; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 11185; RV32ZVE32F-NEXT: vmv.x.s a2, v10 11186; RV32ZVE32F-NEXT: fld fa6, 0(a2) 11187; RV32ZVE32F-NEXT: andi a1, a1, -128 11188; RV32ZVE32F-NEXT: bnez a1, .LBB96_8 11189; RV32ZVE32F-NEXT: j .LBB96_9 11190; 11191; RV64ZVE32F-LABEL: mgather_v8f64: 11192; RV64ZVE32F: # %bb.0: 11193; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 11194; RV64ZVE32F-NEXT: vmv.x.s a2, v0 11195; RV64ZVE32F-NEXT: andi a3, a2, 1 11196; RV64ZVE32F-NEXT: bnez a3, .LBB96_10 11197; RV64ZVE32F-NEXT: # %bb.1: # %else 11198; RV64ZVE32F-NEXT: andi a3, a2, 2 11199; RV64ZVE32F-NEXT: bnez a3, .LBB96_11 11200; RV64ZVE32F-NEXT: .LBB96_2: # %else2 11201; RV64ZVE32F-NEXT: andi a3, a2, 4 11202; RV64ZVE32F-NEXT: bnez a3, .LBB96_12 11203; RV64ZVE32F-NEXT: .LBB96_3: # %else5 11204; RV64ZVE32F-NEXT: andi a3, a2, 8 11205; RV64ZVE32F-NEXT: bnez a3, .LBB96_13 11206; RV64ZVE32F-NEXT: .LBB96_4: # %else8 11207; RV64ZVE32F-NEXT: andi a3, a2, 16 11208; RV64ZVE32F-NEXT: bnez a3, .LBB96_14 11209; RV64ZVE32F-NEXT: .LBB96_5: # %else11 11210; RV64ZVE32F-NEXT: andi a3, a2, 32 11211; RV64ZVE32F-NEXT: bnez a3, .LBB96_15 11212; RV64ZVE32F-NEXT: .LBB96_6: # %else14 11213; RV64ZVE32F-NEXT: andi a3, a2, 64 11214; RV64ZVE32F-NEXT: bnez a3, .LBB96_16 11215; RV64ZVE32F-NEXT: .LBB96_7: # %else17 11216; RV64ZVE32F-NEXT: andi a2, a2, -128 11217; RV64ZVE32F-NEXT: beqz a2, .LBB96_9 11218; RV64ZVE32F-NEXT: .LBB96_8: # %cond.load19 11219; RV64ZVE32F-NEXT: ld a1, 56(a1) 11220; RV64ZVE32F-NEXT: fld fa7, 0(a1) 11221; RV64ZVE32F-NEXT: .LBB96_9: # %else20 11222; RV64ZVE32F-NEXT: fsd fa0, 0(a0) 11223; RV64ZVE32F-NEXT: fsd fa1, 8(a0) 11224; RV64ZVE32F-NEXT: fsd fa2, 16(a0) 11225; RV64ZVE32F-NEXT: fsd fa3, 24(a0) 11226; RV64ZVE32F-NEXT: fsd fa4, 32(a0) 11227; RV64ZVE32F-NEXT: fsd fa5, 40(a0) 11228; RV64ZVE32F-NEXT: fsd fa6, 48(a0) 11229; RV64ZVE32F-NEXT: fsd fa7, 56(a0) 11230; RV64ZVE32F-NEXT: ret 11231; RV64ZVE32F-NEXT: .LBB96_10: # %cond.load 11232; RV64ZVE32F-NEXT: ld a3, 0(a1) 11233; RV64ZVE32F-NEXT: fld fa0, 0(a3) 11234; RV64ZVE32F-NEXT: andi a3, a2, 2 11235; RV64ZVE32F-NEXT: beqz a3, .LBB96_2 11236; RV64ZVE32F-NEXT: .LBB96_11: # %cond.load1 11237; RV64ZVE32F-NEXT: ld a3, 8(a1) 11238; RV64ZVE32F-NEXT: fld fa1, 0(a3) 11239; RV64ZVE32F-NEXT: andi a3, a2, 4 11240; RV64ZVE32F-NEXT: beqz a3, .LBB96_3 11241; RV64ZVE32F-NEXT: .LBB96_12: # %cond.load4 11242; RV64ZVE32F-NEXT: ld a3, 16(a1) 11243; RV64ZVE32F-NEXT: fld fa2, 0(a3) 11244; RV64ZVE32F-NEXT: andi a3, a2, 8 11245; RV64ZVE32F-NEXT: beqz a3, .LBB96_4 11246; RV64ZVE32F-NEXT: .LBB96_13: # %cond.load7 11247; RV64ZVE32F-NEXT: ld a3, 24(a1) 11248; RV64ZVE32F-NEXT: fld fa3, 0(a3) 11249; RV64ZVE32F-NEXT: andi a3, a2, 16 11250; RV64ZVE32F-NEXT: beqz a3, .LBB96_5 11251; RV64ZVE32F-NEXT: .LBB96_14: # %cond.load10 11252; RV64ZVE32F-NEXT: ld a3, 32(a1) 11253; RV64ZVE32F-NEXT: fld fa4, 0(a3) 11254; RV64ZVE32F-NEXT: andi a3, a2, 32 11255; RV64ZVE32F-NEXT: beqz a3, .LBB96_6 11256; RV64ZVE32F-NEXT: .LBB96_15: # %cond.load13 11257; RV64ZVE32F-NEXT: ld a3, 40(a1) 11258; RV64ZVE32F-NEXT: fld fa5, 0(a3) 11259; RV64ZVE32F-NEXT: andi a3, a2, 64 11260; RV64ZVE32F-NEXT: beqz a3, .LBB96_7 11261; RV64ZVE32F-NEXT: .LBB96_16: # %cond.load16 11262; RV64ZVE32F-NEXT: ld a3, 48(a1) 11263; RV64ZVE32F-NEXT: fld fa6, 0(a3) 11264; RV64ZVE32F-NEXT: andi a2, a2, -128 11265; RV64ZVE32F-NEXT: bnez a2, .LBB96_8 11266; RV64ZVE32F-NEXT: j .LBB96_9 11267 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) 11268 ret <8 x double> %v 11269} 11270 11271define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) { 11272; RV32V-LABEL: mgather_baseidx_v8i8_v8f64: 11273; RV32V: # %bb.0: 11274; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 11275; RV32V-NEXT: vsext.vf4 v10, v8 11276; RV32V-NEXT: vsll.vi v8, v10, 3 11277; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 11278; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 11279; RV32V-NEXT: vmv.v.v v8, v12 11280; RV32V-NEXT: ret 11281; 11282; RV64V-LABEL: mgather_baseidx_v8i8_v8f64: 11283; RV64V: # %bb.0: 11284; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 11285; RV64V-NEXT: vsext.vf8 v16, v8 11286; RV64V-NEXT: vsll.vi v8, v16, 3 11287; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t 11288; RV64V-NEXT: vmv.v.v v8, v12 11289; RV64V-NEXT: ret 11290; 11291; RV32ZVE32F-LABEL: mgather_baseidx_v8i8_v8f64: 11292; RV32ZVE32F: # %bb.0: 11293; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 11294; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 11295; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 11296; RV32ZVE32F-NEXT: vmv.x.s a2, v0 11297; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 11298; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 11299; RV32ZVE32F-NEXT: andi a3, a2, 1 11300; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 11301; RV32ZVE32F-NEXT: bnez a3, .LBB97_10 11302; RV32ZVE32F-NEXT: # %bb.1: # %else 11303; RV32ZVE32F-NEXT: andi a1, a2, 2 11304; RV32ZVE32F-NEXT: bnez a1, .LBB97_11 11305; RV32ZVE32F-NEXT: .LBB97_2: # %else2 11306; RV32ZVE32F-NEXT: andi a1, a2, 4 11307; RV32ZVE32F-NEXT: bnez a1, .LBB97_12 11308; RV32ZVE32F-NEXT: .LBB97_3: # %else5 11309; RV32ZVE32F-NEXT: andi a1, a2, 8 11310; RV32ZVE32F-NEXT: bnez a1, .LBB97_13 11311; RV32ZVE32F-NEXT: .LBB97_4: # %else8 11312; RV32ZVE32F-NEXT: andi a1, a2, 16 11313; RV32ZVE32F-NEXT: bnez a1, .LBB97_14 11314; RV32ZVE32F-NEXT: .LBB97_5: # %else11 11315; RV32ZVE32F-NEXT: andi a1, a2, 32 11316; RV32ZVE32F-NEXT: bnez a1, .LBB97_15 11317; RV32ZVE32F-NEXT: .LBB97_6: # %else14 11318; RV32ZVE32F-NEXT: andi a1, a2, 64 11319; RV32ZVE32F-NEXT: bnez a1, .LBB97_16 11320; RV32ZVE32F-NEXT: .LBB97_7: # %else17 11321; RV32ZVE32F-NEXT: andi a1, a2, -128 11322; RV32ZVE32F-NEXT: beqz a1, .LBB97_9 11323; RV32ZVE32F-NEXT: .LBB97_8: # %cond.load19 11324; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11325; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 11326; RV32ZVE32F-NEXT: vmv.x.s a1, v8 11327; RV32ZVE32F-NEXT: fld fa7, 0(a1) 11328; RV32ZVE32F-NEXT: .LBB97_9: # %else20 11329; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 11330; RV32ZVE32F-NEXT: fsd fa1, 8(a0) 11331; RV32ZVE32F-NEXT: fsd fa2, 16(a0) 11332; RV32ZVE32F-NEXT: fsd fa3, 24(a0) 11333; RV32ZVE32F-NEXT: fsd fa4, 32(a0) 11334; RV32ZVE32F-NEXT: fsd fa5, 40(a0) 11335; RV32ZVE32F-NEXT: fsd fa6, 48(a0) 11336; RV32ZVE32F-NEXT: fsd fa7, 56(a0) 11337; RV32ZVE32F-NEXT: ret 11338; RV32ZVE32F-NEXT: .LBB97_10: # %cond.load 11339; RV32ZVE32F-NEXT: vmv.x.s a1, v8 11340; RV32ZVE32F-NEXT: fld fa0, 0(a1) 11341; RV32ZVE32F-NEXT: andi a1, a2, 2 11342; RV32ZVE32F-NEXT: beqz a1, .LBB97_2 11343; RV32ZVE32F-NEXT: .LBB97_11: # %cond.load1 11344; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11345; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 11346; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11347; RV32ZVE32F-NEXT: fld fa1, 0(a1) 11348; RV32ZVE32F-NEXT: andi a1, a2, 4 11349; RV32ZVE32F-NEXT: beqz a1, .LBB97_3 11350; RV32ZVE32F-NEXT: .LBB97_12: # %cond.load4 11351; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11352; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 11353; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11354; RV32ZVE32F-NEXT: fld fa2, 0(a1) 11355; RV32ZVE32F-NEXT: andi a1, a2, 8 11356; RV32ZVE32F-NEXT: beqz a1, .LBB97_4 11357; RV32ZVE32F-NEXT: .LBB97_13: # %cond.load7 11358; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11359; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 11360; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11361; RV32ZVE32F-NEXT: fld fa3, 0(a1) 11362; RV32ZVE32F-NEXT: andi a1, a2, 16 11363; RV32ZVE32F-NEXT: beqz a1, .LBB97_5 11364; RV32ZVE32F-NEXT: .LBB97_14: # %cond.load10 11365; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11366; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 11367; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11368; RV32ZVE32F-NEXT: fld fa4, 0(a1) 11369; RV32ZVE32F-NEXT: andi a1, a2, 32 11370; RV32ZVE32F-NEXT: beqz a1, .LBB97_6 11371; RV32ZVE32F-NEXT: .LBB97_15: # %cond.load13 11372; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11373; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 11374; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11375; RV32ZVE32F-NEXT: fld fa5, 0(a1) 11376; RV32ZVE32F-NEXT: andi a1, a2, 64 11377; RV32ZVE32F-NEXT: beqz a1, .LBB97_7 11378; RV32ZVE32F-NEXT: .LBB97_16: # %cond.load16 11379; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11380; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 11381; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11382; RV32ZVE32F-NEXT: fld fa6, 0(a1) 11383; RV32ZVE32F-NEXT: andi a1, a2, -128 11384; RV32ZVE32F-NEXT: bnez a1, .LBB97_8 11385; RV32ZVE32F-NEXT: j .LBB97_9 11386; 11387; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f64: 11388; RV64ZVE32F: # %bb.0: 11389; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 11390; RV64ZVE32F-NEXT: vmv.x.s a2, v0 11391; RV64ZVE32F-NEXT: andi a3, a2, 1 11392; RV64ZVE32F-NEXT: beqz a3, .LBB97_2 11393; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 11394; RV64ZVE32F-NEXT: vmv.x.s a3, v8 11395; RV64ZVE32F-NEXT: slli a3, a3, 3 11396; RV64ZVE32F-NEXT: add a3, a1, a3 11397; RV64ZVE32F-NEXT: fld fa0, 0(a3) 11398; RV64ZVE32F-NEXT: .LBB97_2: # %else 11399; RV64ZVE32F-NEXT: andi a3, a2, 2 11400; RV64ZVE32F-NEXT: beqz a3, .LBB97_4 11401; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 11402; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 11403; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 11404; RV64ZVE32F-NEXT: vmv.x.s a3, v9 11405; RV64ZVE32F-NEXT: slli a3, a3, 3 11406; RV64ZVE32F-NEXT: add a3, a1, a3 11407; RV64ZVE32F-NEXT: fld fa1, 0(a3) 11408; RV64ZVE32F-NEXT: .LBB97_4: # %else2 11409; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 11410; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 11411; RV64ZVE32F-NEXT: andi a3, a2, 4 11412; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 11413; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 11414; RV64ZVE32F-NEXT: bnez a3, .LBB97_14 11415; RV64ZVE32F-NEXT: # %bb.5: # %else5 11416; RV64ZVE32F-NEXT: andi a3, a2, 8 11417; RV64ZVE32F-NEXT: bnez a3, .LBB97_15 11418; RV64ZVE32F-NEXT: .LBB97_6: # %else8 11419; RV64ZVE32F-NEXT: andi a3, a2, 16 11420; RV64ZVE32F-NEXT: bnez a3, .LBB97_16 11421; RV64ZVE32F-NEXT: .LBB97_7: # %else11 11422; RV64ZVE32F-NEXT: andi a3, a2, 32 11423; RV64ZVE32F-NEXT: beqz a3, .LBB97_9 11424; RV64ZVE32F-NEXT: .LBB97_8: # %cond.load13 11425; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 11426; RV64ZVE32F-NEXT: vmv.x.s a3, v8 11427; RV64ZVE32F-NEXT: slli a3, a3, 3 11428; RV64ZVE32F-NEXT: add a3, a1, a3 11429; RV64ZVE32F-NEXT: fld fa5, 0(a3) 11430; RV64ZVE32F-NEXT: .LBB97_9: # %else14 11431; RV64ZVE32F-NEXT: andi a3, a2, 64 11432; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 11433; RV64ZVE32F-NEXT: beqz a3, .LBB97_11 11434; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 11435; RV64ZVE32F-NEXT: vmv.x.s a3, v8 11436; RV64ZVE32F-NEXT: slli a3, a3, 3 11437; RV64ZVE32F-NEXT: add a3, a1, a3 11438; RV64ZVE32F-NEXT: fld fa6, 0(a3) 11439; RV64ZVE32F-NEXT: .LBB97_11: # %else17 11440; RV64ZVE32F-NEXT: andi a2, a2, -128 11441; RV64ZVE32F-NEXT: beqz a2, .LBB97_13 11442; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 11443; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 11444; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11445; RV64ZVE32F-NEXT: slli a2, a2, 3 11446; RV64ZVE32F-NEXT: add a1, a1, a2 11447; RV64ZVE32F-NEXT: fld fa7, 0(a1) 11448; RV64ZVE32F-NEXT: .LBB97_13: # %else20 11449; RV64ZVE32F-NEXT: fsd fa0, 0(a0) 11450; RV64ZVE32F-NEXT: fsd fa1, 8(a0) 11451; RV64ZVE32F-NEXT: fsd fa2, 16(a0) 11452; RV64ZVE32F-NEXT: fsd fa3, 24(a0) 11453; RV64ZVE32F-NEXT: fsd fa4, 32(a0) 11454; RV64ZVE32F-NEXT: fsd fa5, 40(a0) 11455; RV64ZVE32F-NEXT: fsd fa6, 48(a0) 11456; RV64ZVE32F-NEXT: fsd fa7, 56(a0) 11457; RV64ZVE32F-NEXT: ret 11458; RV64ZVE32F-NEXT: .LBB97_14: # %cond.load4 11459; RV64ZVE32F-NEXT: vmv.x.s a3, v8 11460; RV64ZVE32F-NEXT: slli a3, a3, 3 11461; RV64ZVE32F-NEXT: add a3, a1, a3 11462; RV64ZVE32F-NEXT: fld fa2, 0(a3) 11463; RV64ZVE32F-NEXT: andi a3, a2, 8 11464; RV64ZVE32F-NEXT: beqz a3, .LBB97_6 11465; RV64ZVE32F-NEXT: .LBB97_15: # %cond.load7 11466; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 11467; RV64ZVE32F-NEXT: vmv.x.s a3, v8 11468; RV64ZVE32F-NEXT: slli a3, a3, 3 11469; RV64ZVE32F-NEXT: add a3, a1, a3 11470; RV64ZVE32F-NEXT: fld fa3, 0(a3) 11471; RV64ZVE32F-NEXT: andi a3, a2, 16 11472; RV64ZVE32F-NEXT: beqz a3, .LBB97_7 11473; RV64ZVE32F-NEXT: .LBB97_16: # %cond.load10 11474; RV64ZVE32F-NEXT: vmv.x.s a3, v9 11475; RV64ZVE32F-NEXT: slli a3, a3, 3 11476; RV64ZVE32F-NEXT: add a3, a1, a3 11477; RV64ZVE32F-NEXT: fld fa4, 0(a3) 11478; RV64ZVE32F-NEXT: andi a3, a2, 32 11479; RV64ZVE32F-NEXT: bnez a3, .LBB97_8 11480; RV64ZVE32F-NEXT: j .LBB97_9 11481 %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs 11482 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) 11483 ret <8 x double> %v 11484} 11485 11486define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) { 11487; RV32V-LABEL: mgather_baseidx_sext_v8i8_v8f64: 11488; RV32V: # %bb.0: 11489; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 11490; RV32V-NEXT: vsext.vf4 v10, v8 11491; RV32V-NEXT: vsll.vi v8, v10, 3 11492; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 11493; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 11494; RV32V-NEXT: vmv.v.v v8, v12 11495; RV32V-NEXT: ret 11496; 11497; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f64: 11498; RV64V: # %bb.0: 11499; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 11500; RV64V-NEXT: vsext.vf8 v16, v8 11501; RV64V-NEXT: vsll.vi v8, v16, 3 11502; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t 11503; RV64V-NEXT: vmv.v.v v8, v12 11504; RV64V-NEXT: ret 11505; 11506; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f64: 11507; RV32ZVE32F: # %bb.0: 11508; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 11509; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 11510; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 11511; RV32ZVE32F-NEXT: vmv.x.s a2, v0 11512; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 11513; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 11514; RV32ZVE32F-NEXT: andi a3, a2, 1 11515; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 11516; RV32ZVE32F-NEXT: bnez a3, .LBB98_10 11517; RV32ZVE32F-NEXT: # %bb.1: # %else 11518; RV32ZVE32F-NEXT: andi a1, a2, 2 11519; RV32ZVE32F-NEXT: bnez a1, .LBB98_11 11520; RV32ZVE32F-NEXT: .LBB98_2: # %else2 11521; RV32ZVE32F-NEXT: andi a1, a2, 4 11522; RV32ZVE32F-NEXT: bnez a1, .LBB98_12 11523; RV32ZVE32F-NEXT: .LBB98_3: # %else5 11524; RV32ZVE32F-NEXT: andi a1, a2, 8 11525; RV32ZVE32F-NEXT: bnez a1, .LBB98_13 11526; RV32ZVE32F-NEXT: .LBB98_4: # %else8 11527; RV32ZVE32F-NEXT: andi a1, a2, 16 11528; RV32ZVE32F-NEXT: bnez a1, .LBB98_14 11529; RV32ZVE32F-NEXT: .LBB98_5: # %else11 11530; RV32ZVE32F-NEXT: andi a1, a2, 32 11531; RV32ZVE32F-NEXT: bnez a1, .LBB98_15 11532; RV32ZVE32F-NEXT: .LBB98_6: # %else14 11533; RV32ZVE32F-NEXT: andi a1, a2, 64 11534; RV32ZVE32F-NEXT: bnez a1, .LBB98_16 11535; RV32ZVE32F-NEXT: .LBB98_7: # %else17 11536; RV32ZVE32F-NEXT: andi a1, a2, -128 11537; RV32ZVE32F-NEXT: beqz a1, .LBB98_9 11538; RV32ZVE32F-NEXT: .LBB98_8: # %cond.load19 11539; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11540; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 11541; RV32ZVE32F-NEXT: vmv.x.s a1, v8 11542; RV32ZVE32F-NEXT: fld fa7, 0(a1) 11543; RV32ZVE32F-NEXT: .LBB98_9: # %else20 11544; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 11545; RV32ZVE32F-NEXT: fsd fa1, 8(a0) 11546; RV32ZVE32F-NEXT: fsd fa2, 16(a0) 11547; RV32ZVE32F-NEXT: fsd fa3, 24(a0) 11548; RV32ZVE32F-NEXT: fsd fa4, 32(a0) 11549; RV32ZVE32F-NEXT: fsd fa5, 40(a0) 11550; RV32ZVE32F-NEXT: fsd fa6, 48(a0) 11551; RV32ZVE32F-NEXT: fsd fa7, 56(a0) 11552; RV32ZVE32F-NEXT: ret 11553; RV32ZVE32F-NEXT: .LBB98_10: # %cond.load 11554; RV32ZVE32F-NEXT: vmv.x.s a1, v8 11555; RV32ZVE32F-NEXT: fld fa0, 0(a1) 11556; RV32ZVE32F-NEXT: andi a1, a2, 2 11557; RV32ZVE32F-NEXT: beqz a1, .LBB98_2 11558; RV32ZVE32F-NEXT: .LBB98_11: # %cond.load1 11559; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11560; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 11561; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11562; RV32ZVE32F-NEXT: fld fa1, 0(a1) 11563; RV32ZVE32F-NEXT: andi a1, a2, 4 11564; RV32ZVE32F-NEXT: beqz a1, .LBB98_3 11565; RV32ZVE32F-NEXT: .LBB98_12: # %cond.load4 11566; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11567; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 11568; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11569; RV32ZVE32F-NEXT: fld fa2, 0(a1) 11570; RV32ZVE32F-NEXT: andi a1, a2, 8 11571; RV32ZVE32F-NEXT: beqz a1, .LBB98_4 11572; RV32ZVE32F-NEXT: .LBB98_13: # %cond.load7 11573; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11574; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 11575; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11576; RV32ZVE32F-NEXT: fld fa3, 0(a1) 11577; RV32ZVE32F-NEXT: andi a1, a2, 16 11578; RV32ZVE32F-NEXT: beqz a1, .LBB98_5 11579; RV32ZVE32F-NEXT: .LBB98_14: # %cond.load10 11580; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11581; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 11582; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11583; RV32ZVE32F-NEXT: fld fa4, 0(a1) 11584; RV32ZVE32F-NEXT: andi a1, a2, 32 11585; RV32ZVE32F-NEXT: beqz a1, .LBB98_6 11586; RV32ZVE32F-NEXT: .LBB98_15: # %cond.load13 11587; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11588; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 11589; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11590; RV32ZVE32F-NEXT: fld fa5, 0(a1) 11591; RV32ZVE32F-NEXT: andi a1, a2, 64 11592; RV32ZVE32F-NEXT: beqz a1, .LBB98_7 11593; RV32ZVE32F-NEXT: .LBB98_16: # %cond.load16 11594; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11595; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 11596; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11597; RV32ZVE32F-NEXT: fld fa6, 0(a1) 11598; RV32ZVE32F-NEXT: andi a1, a2, -128 11599; RV32ZVE32F-NEXT: bnez a1, .LBB98_8 11600; RV32ZVE32F-NEXT: j .LBB98_9 11601; 11602; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f64: 11603; RV64ZVE32F: # %bb.0: 11604; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 11605; RV64ZVE32F-NEXT: vmv.x.s a2, v0 11606; RV64ZVE32F-NEXT: andi a3, a2, 1 11607; RV64ZVE32F-NEXT: beqz a3, .LBB98_2 11608; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 11609; RV64ZVE32F-NEXT: vmv.x.s a3, v8 11610; RV64ZVE32F-NEXT: slli a3, a3, 3 11611; RV64ZVE32F-NEXT: add a3, a1, a3 11612; RV64ZVE32F-NEXT: fld fa0, 0(a3) 11613; RV64ZVE32F-NEXT: .LBB98_2: # %else 11614; RV64ZVE32F-NEXT: andi a3, a2, 2 11615; RV64ZVE32F-NEXT: beqz a3, .LBB98_4 11616; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 11617; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 11618; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 11619; RV64ZVE32F-NEXT: vmv.x.s a3, v9 11620; RV64ZVE32F-NEXT: slli a3, a3, 3 11621; RV64ZVE32F-NEXT: add a3, a1, a3 11622; RV64ZVE32F-NEXT: fld fa1, 0(a3) 11623; RV64ZVE32F-NEXT: .LBB98_4: # %else2 11624; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 11625; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 11626; RV64ZVE32F-NEXT: andi a3, a2, 4 11627; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 11628; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 11629; RV64ZVE32F-NEXT: bnez a3, .LBB98_14 11630; RV64ZVE32F-NEXT: # %bb.5: # %else5 11631; RV64ZVE32F-NEXT: andi a3, a2, 8 11632; RV64ZVE32F-NEXT: bnez a3, .LBB98_15 11633; RV64ZVE32F-NEXT: .LBB98_6: # %else8 11634; RV64ZVE32F-NEXT: andi a3, a2, 16 11635; RV64ZVE32F-NEXT: bnez a3, .LBB98_16 11636; RV64ZVE32F-NEXT: .LBB98_7: # %else11 11637; RV64ZVE32F-NEXT: andi a3, a2, 32 11638; RV64ZVE32F-NEXT: beqz a3, .LBB98_9 11639; RV64ZVE32F-NEXT: .LBB98_8: # %cond.load13 11640; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 11641; RV64ZVE32F-NEXT: vmv.x.s a3, v8 11642; RV64ZVE32F-NEXT: slli a3, a3, 3 11643; RV64ZVE32F-NEXT: add a3, a1, a3 11644; RV64ZVE32F-NEXT: fld fa5, 0(a3) 11645; RV64ZVE32F-NEXT: .LBB98_9: # %else14 11646; RV64ZVE32F-NEXT: andi a3, a2, 64 11647; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 11648; RV64ZVE32F-NEXT: beqz a3, .LBB98_11 11649; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 11650; RV64ZVE32F-NEXT: vmv.x.s a3, v8 11651; RV64ZVE32F-NEXT: slli a3, a3, 3 11652; RV64ZVE32F-NEXT: add a3, a1, a3 11653; RV64ZVE32F-NEXT: fld fa6, 0(a3) 11654; RV64ZVE32F-NEXT: .LBB98_11: # %else17 11655; RV64ZVE32F-NEXT: andi a2, a2, -128 11656; RV64ZVE32F-NEXT: beqz a2, .LBB98_13 11657; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 11658; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 11659; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11660; RV64ZVE32F-NEXT: slli a2, a2, 3 11661; RV64ZVE32F-NEXT: add a1, a1, a2 11662; RV64ZVE32F-NEXT: fld fa7, 0(a1) 11663; RV64ZVE32F-NEXT: .LBB98_13: # %else20 11664; RV64ZVE32F-NEXT: fsd fa0, 0(a0) 11665; RV64ZVE32F-NEXT: fsd fa1, 8(a0) 11666; RV64ZVE32F-NEXT: fsd fa2, 16(a0) 11667; RV64ZVE32F-NEXT: fsd fa3, 24(a0) 11668; RV64ZVE32F-NEXT: fsd fa4, 32(a0) 11669; RV64ZVE32F-NEXT: fsd fa5, 40(a0) 11670; RV64ZVE32F-NEXT: fsd fa6, 48(a0) 11671; RV64ZVE32F-NEXT: fsd fa7, 56(a0) 11672; RV64ZVE32F-NEXT: ret 11673; RV64ZVE32F-NEXT: .LBB98_14: # %cond.load4 11674; RV64ZVE32F-NEXT: vmv.x.s a3, v8 11675; RV64ZVE32F-NEXT: slli a3, a3, 3 11676; RV64ZVE32F-NEXT: add a3, a1, a3 11677; RV64ZVE32F-NEXT: fld fa2, 0(a3) 11678; RV64ZVE32F-NEXT: andi a3, a2, 8 11679; RV64ZVE32F-NEXT: beqz a3, .LBB98_6 11680; RV64ZVE32F-NEXT: .LBB98_15: # %cond.load7 11681; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 11682; RV64ZVE32F-NEXT: vmv.x.s a3, v8 11683; RV64ZVE32F-NEXT: slli a3, a3, 3 11684; RV64ZVE32F-NEXT: add a3, a1, a3 11685; RV64ZVE32F-NEXT: fld fa3, 0(a3) 11686; RV64ZVE32F-NEXT: andi a3, a2, 16 11687; RV64ZVE32F-NEXT: beqz a3, .LBB98_7 11688; RV64ZVE32F-NEXT: .LBB98_16: # %cond.load10 11689; RV64ZVE32F-NEXT: vmv.x.s a3, v9 11690; RV64ZVE32F-NEXT: slli a3, a3, 3 11691; RV64ZVE32F-NEXT: add a3, a1, a3 11692; RV64ZVE32F-NEXT: fld fa4, 0(a3) 11693; RV64ZVE32F-NEXT: andi a3, a2, 32 11694; RV64ZVE32F-NEXT: bnez a3, .LBB98_8 11695; RV64ZVE32F-NEXT: j .LBB98_9 11696 %eidxs = sext <8 x i8> %idxs to <8 x i64> 11697 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 11698 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) 11699 ret <8 x double> %v 11700} 11701 11702define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) { 11703; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8f64: 11704; RV32V: # %bb.0: 11705; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 11706; RV32V-NEXT: vzext.vf2 v9, v8 11707; RV32V-NEXT: vsll.vi v8, v9, 3 11708; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 11709; RV32V-NEXT: vluxei16.v v12, (a0), v8, v0.t 11710; RV32V-NEXT: vmv.v.v v8, v12 11711; RV32V-NEXT: ret 11712; 11713; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f64: 11714; RV64V: # %bb.0: 11715; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 11716; RV64V-NEXT: vzext.vf2 v9, v8 11717; RV64V-NEXT: vsll.vi v8, v9, 3 11718; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 11719; RV64V-NEXT: vluxei16.v v12, (a0), v8, v0.t 11720; RV64V-NEXT: vmv.v.v v8, v12 11721; RV64V-NEXT: ret 11722; 11723; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f64: 11724; RV32ZVE32F: # %bb.0: 11725; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 11726; RV32ZVE32F-NEXT: vzext.vf4 v10, v8 11727; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 11728; RV32ZVE32F-NEXT: vmv.x.s a2, v0 11729; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 11730; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 11731; RV32ZVE32F-NEXT: andi a3, a2, 1 11732; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 11733; RV32ZVE32F-NEXT: bnez a3, .LBB99_10 11734; RV32ZVE32F-NEXT: # %bb.1: # %else 11735; RV32ZVE32F-NEXT: andi a1, a2, 2 11736; RV32ZVE32F-NEXT: bnez a1, .LBB99_11 11737; RV32ZVE32F-NEXT: .LBB99_2: # %else2 11738; RV32ZVE32F-NEXT: andi a1, a2, 4 11739; RV32ZVE32F-NEXT: bnez a1, .LBB99_12 11740; RV32ZVE32F-NEXT: .LBB99_3: # %else5 11741; RV32ZVE32F-NEXT: andi a1, a2, 8 11742; RV32ZVE32F-NEXT: bnez a1, .LBB99_13 11743; RV32ZVE32F-NEXT: .LBB99_4: # %else8 11744; RV32ZVE32F-NEXT: andi a1, a2, 16 11745; RV32ZVE32F-NEXT: bnez a1, .LBB99_14 11746; RV32ZVE32F-NEXT: .LBB99_5: # %else11 11747; RV32ZVE32F-NEXT: andi a1, a2, 32 11748; RV32ZVE32F-NEXT: bnez a1, .LBB99_15 11749; RV32ZVE32F-NEXT: .LBB99_6: # %else14 11750; RV32ZVE32F-NEXT: andi a1, a2, 64 11751; RV32ZVE32F-NEXT: bnez a1, .LBB99_16 11752; RV32ZVE32F-NEXT: .LBB99_7: # %else17 11753; RV32ZVE32F-NEXT: andi a1, a2, -128 11754; RV32ZVE32F-NEXT: beqz a1, .LBB99_9 11755; RV32ZVE32F-NEXT: .LBB99_8: # %cond.load19 11756; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11757; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 11758; RV32ZVE32F-NEXT: vmv.x.s a1, v8 11759; RV32ZVE32F-NEXT: fld fa7, 0(a1) 11760; RV32ZVE32F-NEXT: .LBB99_9: # %else20 11761; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 11762; RV32ZVE32F-NEXT: fsd fa1, 8(a0) 11763; RV32ZVE32F-NEXT: fsd fa2, 16(a0) 11764; RV32ZVE32F-NEXT: fsd fa3, 24(a0) 11765; RV32ZVE32F-NEXT: fsd fa4, 32(a0) 11766; RV32ZVE32F-NEXT: fsd fa5, 40(a0) 11767; RV32ZVE32F-NEXT: fsd fa6, 48(a0) 11768; RV32ZVE32F-NEXT: fsd fa7, 56(a0) 11769; RV32ZVE32F-NEXT: ret 11770; RV32ZVE32F-NEXT: .LBB99_10: # %cond.load 11771; RV32ZVE32F-NEXT: vmv.x.s a1, v8 11772; RV32ZVE32F-NEXT: fld fa0, 0(a1) 11773; RV32ZVE32F-NEXT: andi a1, a2, 2 11774; RV32ZVE32F-NEXT: beqz a1, .LBB99_2 11775; RV32ZVE32F-NEXT: .LBB99_11: # %cond.load1 11776; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11777; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 11778; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11779; RV32ZVE32F-NEXT: fld fa1, 0(a1) 11780; RV32ZVE32F-NEXT: andi a1, a2, 4 11781; RV32ZVE32F-NEXT: beqz a1, .LBB99_3 11782; RV32ZVE32F-NEXT: .LBB99_12: # %cond.load4 11783; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11784; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 11785; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11786; RV32ZVE32F-NEXT: fld fa2, 0(a1) 11787; RV32ZVE32F-NEXT: andi a1, a2, 8 11788; RV32ZVE32F-NEXT: beqz a1, .LBB99_4 11789; RV32ZVE32F-NEXT: .LBB99_13: # %cond.load7 11790; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 11791; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 11792; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11793; RV32ZVE32F-NEXT: fld fa3, 0(a1) 11794; RV32ZVE32F-NEXT: andi a1, a2, 16 11795; RV32ZVE32F-NEXT: beqz a1, .LBB99_5 11796; RV32ZVE32F-NEXT: .LBB99_14: # %cond.load10 11797; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11798; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 11799; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11800; RV32ZVE32F-NEXT: fld fa4, 0(a1) 11801; RV32ZVE32F-NEXT: andi a1, a2, 32 11802; RV32ZVE32F-NEXT: beqz a1, .LBB99_6 11803; RV32ZVE32F-NEXT: .LBB99_15: # %cond.load13 11804; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11805; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 11806; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11807; RV32ZVE32F-NEXT: fld fa5, 0(a1) 11808; RV32ZVE32F-NEXT: andi a1, a2, 64 11809; RV32ZVE32F-NEXT: beqz a1, .LBB99_7 11810; RV32ZVE32F-NEXT: .LBB99_16: # %cond.load16 11811; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11812; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 11813; RV32ZVE32F-NEXT: vmv.x.s a1, v10 11814; RV32ZVE32F-NEXT: fld fa6, 0(a1) 11815; RV32ZVE32F-NEXT: andi a1, a2, -128 11816; RV32ZVE32F-NEXT: bnez a1, .LBB99_8 11817; RV32ZVE32F-NEXT: j .LBB99_9 11818; 11819; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f64: 11820; RV64ZVE32F: # %bb.0: 11821; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 11822; RV64ZVE32F-NEXT: vmv.x.s a2, v0 11823; RV64ZVE32F-NEXT: andi a3, a2, 1 11824; RV64ZVE32F-NEXT: beqz a3, .LBB99_2 11825; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 11826; RV64ZVE32F-NEXT: vmv.x.s a3, v8 11827; RV64ZVE32F-NEXT: andi a3, a3, 255 11828; RV64ZVE32F-NEXT: slli a3, a3, 3 11829; RV64ZVE32F-NEXT: add a3, a1, a3 11830; RV64ZVE32F-NEXT: fld fa0, 0(a3) 11831; RV64ZVE32F-NEXT: .LBB99_2: # %else 11832; RV64ZVE32F-NEXT: andi a3, a2, 2 11833; RV64ZVE32F-NEXT: beqz a3, .LBB99_4 11834; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 11835; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 11836; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 11837; RV64ZVE32F-NEXT: vmv.x.s a3, v9 11838; RV64ZVE32F-NEXT: andi a3, a3, 255 11839; RV64ZVE32F-NEXT: slli a3, a3, 3 11840; RV64ZVE32F-NEXT: add a3, a1, a3 11841; RV64ZVE32F-NEXT: fld fa1, 0(a3) 11842; RV64ZVE32F-NEXT: .LBB99_4: # %else2 11843; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 11844; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 11845; RV64ZVE32F-NEXT: andi a3, a2, 4 11846; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 11847; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 11848; RV64ZVE32F-NEXT: bnez a3, .LBB99_14 11849; RV64ZVE32F-NEXT: # %bb.5: # %else5 11850; RV64ZVE32F-NEXT: andi a3, a2, 8 11851; RV64ZVE32F-NEXT: bnez a3, .LBB99_15 11852; RV64ZVE32F-NEXT: .LBB99_6: # %else8 11853; RV64ZVE32F-NEXT: andi a3, a2, 16 11854; RV64ZVE32F-NEXT: bnez a3, .LBB99_16 11855; RV64ZVE32F-NEXT: .LBB99_7: # %else11 11856; RV64ZVE32F-NEXT: andi a3, a2, 32 11857; RV64ZVE32F-NEXT: beqz a3, .LBB99_9 11858; RV64ZVE32F-NEXT: .LBB99_8: # %cond.load13 11859; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 11860; RV64ZVE32F-NEXT: vmv.x.s a3, v8 11861; RV64ZVE32F-NEXT: andi a3, a3, 255 11862; RV64ZVE32F-NEXT: slli a3, a3, 3 11863; RV64ZVE32F-NEXT: add a3, a1, a3 11864; RV64ZVE32F-NEXT: fld fa5, 0(a3) 11865; RV64ZVE32F-NEXT: .LBB99_9: # %else14 11866; RV64ZVE32F-NEXT: andi a3, a2, 64 11867; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 11868; RV64ZVE32F-NEXT: beqz a3, .LBB99_11 11869; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 11870; RV64ZVE32F-NEXT: vmv.x.s a3, v8 11871; RV64ZVE32F-NEXT: andi a3, a3, 255 11872; RV64ZVE32F-NEXT: slli a3, a3, 3 11873; RV64ZVE32F-NEXT: add a3, a1, a3 11874; RV64ZVE32F-NEXT: fld fa6, 0(a3) 11875; RV64ZVE32F-NEXT: .LBB99_11: # %else17 11876; RV64ZVE32F-NEXT: andi a2, a2, -128 11877; RV64ZVE32F-NEXT: beqz a2, .LBB99_13 11878; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 11879; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 11880; RV64ZVE32F-NEXT: vmv.x.s a2, v8 11881; RV64ZVE32F-NEXT: andi a2, a2, 255 11882; RV64ZVE32F-NEXT: slli a2, a2, 3 11883; RV64ZVE32F-NEXT: add a1, a1, a2 11884; RV64ZVE32F-NEXT: fld fa7, 0(a1) 11885; RV64ZVE32F-NEXT: .LBB99_13: # %else20 11886; RV64ZVE32F-NEXT: fsd fa0, 0(a0) 11887; RV64ZVE32F-NEXT: fsd fa1, 8(a0) 11888; RV64ZVE32F-NEXT: fsd fa2, 16(a0) 11889; RV64ZVE32F-NEXT: fsd fa3, 24(a0) 11890; RV64ZVE32F-NEXT: fsd fa4, 32(a0) 11891; RV64ZVE32F-NEXT: fsd fa5, 40(a0) 11892; RV64ZVE32F-NEXT: fsd fa6, 48(a0) 11893; RV64ZVE32F-NEXT: fsd fa7, 56(a0) 11894; RV64ZVE32F-NEXT: ret 11895; RV64ZVE32F-NEXT: .LBB99_14: # %cond.load4 11896; RV64ZVE32F-NEXT: vmv.x.s a3, v8 11897; RV64ZVE32F-NEXT: andi a3, a3, 255 11898; RV64ZVE32F-NEXT: slli a3, a3, 3 11899; RV64ZVE32F-NEXT: add a3, a1, a3 11900; RV64ZVE32F-NEXT: fld fa2, 0(a3) 11901; RV64ZVE32F-NEXT: andi a3, a2, 8 11902; RV64ZVE32F-NEXT: beqz a3, .LBB99_6 11903; RV64ZVE32F-NEXT: .LBB99_15: # %cond.load7 11904; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 11905; RV64ZVE32F-NEXT: vmv.x.s a3, v8 11906; RV64ZVE32F-NEXT: andi a3, a3, 255 11907; RV64ZVE32F-NEXT: slli a3, a3, 3 11908; RV64ZVE32F-NEXT: add a3, a1, a3 11909; RV64ZVE32F-NEXT: fld fa3, 0(a3) 11910; RV64ZVE32F-NEXT: andi a3, a2, 16 11911; RV64ZVE32F-NEXT: beqz a3, .LBB99_7 11912; RV64ZVE32F-NEXT: .LBB99_16: # %cond.load10 11913; RV64ZVE32F-NEXT: vmv.x.s a3, v9 11914; RV64ZVE32F-NEXT: andi a3, a3, 255 11915; RV64ZVE32F-NEXT: slli a3, a3, 3 11916; RV64ZVE32F-NEXT: add a3, a1, a3 11917; RV64ZVE32F-NEXT: fld fa4, 0(a3) 11918; RV64ZVE32F-NEXT: andi a3, a2, 32 11919; RV64ZVE32F-NEXT: bnez a3, .LBB99_8 11920; RV64ZVE32F-NEXT: j .LBB99_9 11921 %eidxs = zext <8 x i8> %idxs to <8 x i64> 11922 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 11923 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) 11924 ret <8 x double> %v 11925} 11926 11927define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) { 11928; RV32V-LABEL: mgather_baseidx_v8i16_v8f64: 11929; RV32V: # %bb.0: 11930; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 11931; RV32V-NEXT: vsext.vf2 v10, v8 11932; RV32V-NEXT: vsll.vi v8, v10, 3 11933; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 11934; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 11935; RV32V-NEXT: vmv.v.v v8, v12 11936; RV32V-NEXT: ret 11937; 11938; RV64V-LABEL: mgather_baseidx_v8i16_v8f64: 11939; RV64V: # %bb.0: 11940; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 11941; RV64V-NEXT: vsext.vf4 v16, v8 11942; RV64V-NEXT: vsll.vi v8, v16, 3 11943; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t 11944; RV64V-NEXT: vmv.v.v v8, v12 11945; RV64V-NEXT: ret 11946; 11947; RV32ZVE32F-LABEL: mgather_baseidx_v8i16_v8f64: 11948; RV32ZVE32F: # %bb.0: 11949; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 11950; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 11951; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 11952; RV32ZVE32F-NEXT: vmv.x.s a2, v0 11953; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 11954; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 11955; RV32ZVE32F-NEXT: andi a3, a2, 1 11956; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 11957; RV32ZVE32F-NEXT: bnez a3, .LBB100_10 11958; RV32ZVE32F-NEXT: # %bb.1: # %else 11959; RV32ZVE32F-NEXT: andi a1, a2, 2 11960; RV32ZVE32F-NEXT: bnez a1, .LBB100_11 11961; RV32ZVE32F-NEXT: .LBB100_2: # %else2 11962; RV32ZVE32F-NEXT: andi a1, a2, 4 11963; RV32ZVE32F-NEXT: bnez a1, .LBB100_12 11964; RV32ZVE32F-NEXT: .LBB100_3: # %else5 11965; RV32ZVE32F-NEXT: andi a1, a2, 8 11966; RV32ZVE32F-NEXT: bnez a1, .LBB100_13 11967; RV32ZVE32F-NEXT: .LBB100_4: # %else8 11968; RV32ZVE32F-NEXT: andi a1, a2, 16 11969; RV32ZVE32F-NEXT: bnez a1, .LBB100_14 11970; RV32ZVE32F-NEXT: .LBB100_5: # %else11 11971; RV32ZVE32F-NEXT: andi a1, a2, 32 11972; RV32ZVE32F-NEXT: bnez a1, .LBB100_15 11973; RV32ZVE32F-NEXT: .LBB100_6: # %else14 11974; RV32ZVE32F-NEXT: andi a1, a2, 64 11975; RV32ZVE32F-NEXT: bnez a1, .LBB100_16 11976; RV32ZVE32F-NEXT: .LBB100_7: # %else17 11977; RV32ZVE32F-NEXT: andi a1, a2, -128 11978; RV32ZVE32F-NEXT: beqz a1, .LBB100_9 11979; RV32ZVE32F-NEXT: .LBB100_8: # %cond.load19 11980; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 11981; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 11982; RV32ZVE32F-NEXT: vmv.x.s a1, v8 11983; RV32ZVE32F-NEXT: fld fa7, 0(a1) 11984; RV32ZVE32F-NEXT: .LBB100_9: # %else20 11985; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 11986; RV32ZVE32F-NEXT: fsd fa1, 8(a0) 11987; RV32ZVE32F-NEXT: fsd fa2, 16(a0) 11988; RV32ZVE32F-NEXT: fsd fa3, 24(a0) 11989; RV32ZVE32F-NEXT: fsd fa4, 32(a0) 11990; RV32ZVE32F-NEXT: fsd fa5, 40(a0) 11991; RV32ZVE32F-NEXT: fsd fa6, 48(a0) 11992; RV32ZVE32F-NEXT: fsd fa7, 56(a0) 11993; RV32ZVE32F-NEXT: ret 11994; RV32ZVE32F-NEXT: .LBB100_10: # %cond.load 11995; RV32ZVE32F-NEXT: vmv.x.s a1, v8 11996; RV32ZVE32F-NEXT: fld fa0, 0(a1) 11997; RV32ZVE32F-NEXT: andi a1, a2, 2 11998; RV32ZVE32F-NEXT: beqz a1, .LBB100_2 11999; RV32ZVE32F-NEXT: .LBB100_11: # %cond.load1 12000; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12001; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 12002; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12003; RV32ZVE32F-NEXT: fld fa1, 0(a1) 12004; RV32ZVE32F-NEXT: andi a1, a2, 4 12005; RV32ZVE32F-NEXT: beqz a1, .LBB100_3 12006; RV32ZVE32F-NEXT: .LBB100_12: # %cond.load4 12007; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12008; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 12009; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12010; RV32ZVE32F-NEXT: fld fa2, 0(a1) 12011; RV32ZVE32F-NEXT: andi a1, a2, 8 12012; RV32ZVE32F-NEXT: beqz a1, .LBB100_4 12013; RV32ZVE32F-NEXT: .LBB100_13: # %cond.load7 12014; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12015; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 12016; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12017; RV32ZVE32F-NEXT: fld fa3, 0(a1) 12018; RV32ZVE32F-NEXT: andi a1, a2, 16 12019; RV32ZVE32F-NEXT: beqz a1, .LBB100_5 12020; RV32ZVE32F-NEXT: .LBB100_14: # %cond.load10 12021; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12022; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 12023; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12024; RV32ZVE32F-NEXT: fld fa4, 0(a1) 12025; RV32ZVE32F-NEXT: andi a1, a2, 32 12026; RV32ZVE32F-NEXT: beqz a1, .LBB100_6 12027; RV32ZVE32F-NEXT: .LBB100_15: # %cond.load13 12028; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12029; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 12030; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12031; RV32ZVE32F-NEXT: fld fa5, 0(a1) 12032; RV32ZVE32F-NEXT: andi a1, a2, 64 12033; RV32ZVE32F-NEXT: beqz a1, .LBB100_7 12034; RV32ZVE32F-NEXT: .LBB100_16: # %cond.load16 12035; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12036; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 12037; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12038; RV32ZVE32F-NEXT: fld fa6, 0(a1) 12039; RV32ZVE32F-NEXT: andi a1, a2, -128 12040; RV32ZVE32F-NEXT: bnez a1, .LBB100_8 12041; RV32ZVE32F-NEXT: j .LBB100_9 12042; 12043; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8f64: 12044; RV64ZVE32F: # %bb.0: 12045; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12046; RV64ZVE32F-NEXT: vmv.x.s a2, v0 12047; RV64ZVE32F-NEXT: andi a3, a2, 1 12048; RV64ZVE32F-NEXT: beqz a3, .LBB100_2 12049; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 12050; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 12051; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12052; RV64ZVE32F-NEXT: slli a3, a3, 3 12053; RV64ZVE32F-NEXT: add a3, a1, a3 12054; RV64ZVE32F-NEXT: fld fa0, 0(a3) 12055; RV64ZVE32F-NEXT: .LBB100_2: # %else 12056; RV64ZVE32F-NEXT: andi a3, a2, 2 12057; RV64ZVE32F-NEXT: beqz a3, .LBB100_4 12058; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 12059; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 12060; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 12061; RV64ZVE32F-NEXT: vmv.x.s a3, v9 12062; RV64ZVE32F-NEXT: slli a3, a3, 3 12063; RV64ZVE32F-NEXT: add a3, a1, a3 12064; RV64ZVE32F-NEXT: fld fa1, 0(a3) 12065; RV64ZVE32F-NEXT: .LBB100_4: # %else2 12066; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 12067; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 12068; RV64ZVE32F-NEXT: andi a3, a2, 4 12069; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 12070; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 12071; RV64ZVE32F-NEXT: bnez a3, .LBB100_14 12072; RV64ZVE32F-NEXT: # %bb.5: # %else5 12073; RV64ZVE32F-NEXT: andi a3, a2, 8 12074; RV64ZVE32F-NEXT: bnez a3, .LBB100_15 12075; RV64ZVE32F-NEXT: .LBB100_6: # %else8 12076; RV64ZVE32F-NEXT: andi a3, a2, 16 12077; RV64ZVE32F-NEXT: bnez a3, .LBB100_16 12078; RV64ZVE32F-NEXT: .LBB100_7: # %else11 12079; RV64ZVE32F-NEXT: andi a3, a2, 32 12080; RV64ZVE32F-NEXT: beqz a3, .LBB100_9 12081; RV64ZVE32F-NEXT: .LBB100_8: # %cond.load13 12082; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 12083; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12084; RV64ZVE32F-NEXT: slli a3, a3, 3 12085; RV64ZVE32F-NEXT: add a3, a1, a3 12086; RV64ZVE32F-NEXT: fld fa5, 0(a3) 12087; RV64ZVE32F-NEXT: .LBB100_9: # %else14 12088; RV64ZVE32F-NEXT: andi a3, a2, 64 12089; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 12090; RV64ZVE32F-NEXT: beqz a3, .LBB100_11 12091; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 12092; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12093; RV64ZVE32F-NEXT: slli a3, a3, 3 12094; RV64ZVE32F-NEXT: add a3, a1, a3 12095; RV64ZVE32F-NEXT: fld fa6, 0(a3) 12096; RV64ZVE32F-NEXT: .LBB100_11: # %else17 12097; RV64ZVE32F-NEXT: andi a2, a2, -128 12098; RV64ZVE32F-NEXT: beqz a2, .LBB100_13 12099; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 12100; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 12101; RV64ZVE32F-NEXT: vmv.x.s a2, v8 12102; RV64ZVE32F-NEXT: slli a2, a2, 3 12103; RV64ZVE32F-NEXT: add a1, a1, a2 12104; RV64ZVE32F-NEXT: fld fa7, 0(a1) 12105; RV64ZVE32F-NEXT: .LBB100_13: # %else20 12106; RV64ZVE32F-NEXT: fsd fa0, 0(a0) 12107; RV64ZVE32F-NEXT: fsd fa1, 8(a0) 12108; RV64ZVE32F-NEXT: fsd fa2, 16(a0) 12109; RV64ZVE32F-NEXT: fsd fa3, 24(a0) 12110; RV64ZVE32F-NEXT: fsd fa4, 32(a0) 12111; RV64ZVE32F-NEXT: fsd fa5, 40(a0) 12112; RV64ZVE32F-NEXT: fsd fa6, 48(a0) 12113; RV64ZVE32F-NEXT: fsd fa7, 56(a0) 12114; RV64ZVE32F-NEXT: ret 12115; RV64ZVE32F-NEXT: .LBB100_14: # %cond.load4 12116; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12117; RV64ZVE32F-NEXT: slli a3, a3, 3 12118; RV64ZVE32F-NEXT: add a3, a1, a3 12119; RV64ZVE32F-NEXT: fld fa2, 0(a3) 12120; RV64ZVE32F-NEXT: andi a3, a2, 8 12121; RV64ZVE32F-NEXT: beqz a3, .LBB100_6 12122; RV64ZVE32F-NEXT: .LBB100_15: # %cond.load7 12123; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 12124; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12125; RV64ZVE32F-NEXT: slli a3, a3, 3 12126; RV64ZVE32F-NEXT: add a3, a1, a3 12127; RV64ZVE32F-NEXT: fld fa3, 0(a3) 12128; RV64ZVE32F-NEXT: andi a3, a2, 16 12129; RV64ZVE32F-NEXT: beqz a3, .LBB100_7 12130; RV64ZVE32F-NEXT: .LBB100_16: # %cond.load10 12131; RV64ZVE32F-NEXT: vmv.x.s a3, v9 12132; RV64ZVE32F-NEXT: slli a3, a3, 3 12133; RV64ZVE32F-NEXT: add a3, a1, a3 12134; RV64ZVE32F-NEXT: fld fa4, 0(a3) 12135; RV64ZVE32F-NEXT: andi a3, a2, 32 12136; RV64ZVE32F-NEXT: bnez a3, .LBB100_8 12137; RV64ZVE32F-NEXT: j .LBB100_9 12138 %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs 12139 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) 12140 ret <8 x double> %v 12141} 12142 12143define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) { 12144; RV32V-LABEL: mgather_baseidx_sext_v8i16_v8f64: 12145; RV32V: # %bb.0: 12146; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 12147; RV32V-NEXT: vsext.vf2 v10, v8 12148; RV32V-NEXT: vsll.vi v8, v10, 3 12149; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 12150; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 12151; RV32V-NEXT: vmv.v.v v8, v12 12152; RV32V-NEXT: ret 12153; 12154; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8f64: 12155; RV64V: # %bb.0: 12156; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 12157; RV64V-NEXT: vsext.vf4 v16, v8 12158; RV64V-NEXT: vsll.vi v8, v16, 3 12159; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t 12160; RV64V-NEXT: vmv.v.v v8, v12 12161; RV64V-NEXT: ret 12162; 12163; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f64: 12164; RV32ZVE32F: # %bb.0: 12165; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 12166; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 12167; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 12168; RV32ZVE32F-NEXT: vmv.x.s a2, v0 12169; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 12170; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 12171; RV32ZVE32F-NEXT: andi a3, a2, 1 12172; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 12173; RV32ZVE32F-NEXT: bnez a3, .LBB101_10 12174; RV32ZVE32F-NEXT: # %bb.1: # %else 12175; RV32ZVE32F-NEXT: andi a1, a2, 2 12176; RV32ZVE32F-NEXT: bnez a1, .LBB101_11 12177; RV32ZVE32F-NEXT: .LBB101_2: # %else2 12178; RV32ZVE32F-NEXT: andi a1, a2, 4 12179; RV32ZVE32F-NEXT: bnez a1, .LBB101_12 12180; RV32ZVE32F-NEXT: .LBB101_3: # %else5 12181; RV32ZVE32F-NEXT: andi a1, a2, 8 12182; RV32ZVE32F-NEXT: bnez a1, .LBB101_13 12183; RV32ZVE32F-NEXT: .LBB101_4: # %else8 12184; RV32ZVE32F-NEXT: andi a1, a2, 16 12185; RV32ZVE32F-NEXT: bnez a1, .LBB101_14 12186; RV32ZVE32F-NEXT: .LBB101_5: # %else11 12187; RV32ZVE32F-NEXT: andi a1, a2, 32 12188; RV32ZVE32F-NEXT: bnez a1, .LBB101_15 12189; RV32ZVE32F-NEXT: .LBB101_6: # %else14 12190; RV32ZVE32F-NEXT: andi a1, a2, 64 12191; RV32ZVE32F-NEXT: bnez a1, .LBB101_16 12192; RV32ZVE32F-NEXT: .LBB101_7: # %else17 12193; RV32ZVE32F-NEXT: andi a1, a2, -128 12194; RV32ZVE32F-NEXT: beqz a1, .LBB101_9 12195; RV32ZVE32F-NEXT: .LBB101_8: # %cond.load19 12196; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12197; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 12198; RV32ZVE32F-NEXT: vmv.x.s a1, v8 12199; RV32ZVE32F-NEXT: fld fa7, 0(a1) 12200; RV32ZVE32F-NEXT: .LBB101_9: # %else20 12201; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 12202; RV32ZVE32F-NEXT: fsd fa1, 8(a0) 12203; RV32ZVE32F-NEXT: fsd fa2, 16(a0) 12204; RV32ZVE32F-NEXT: fsd fa3, 24(a0) 12205; RV32ZVE32F-NEXT: fsd fa4, 32(a0) 12206; RV32ZVE32F-NEXT: fsd fa5, 40(a0) 12207; RV32ZVE32F-NEXT: fsd fa6, 48(a0) 12208; RV32ZVE32F-NEXT: fsd fa7, 56(a0) 12209; RV32ZVE32F-NEXT: ret 12210; RV32ZVE32F-NEXT: .LBB101_10: # %cond.load 12211; RV32ZVE32F-NEXT: vmv.x.s a1, v8 12212; RV32ZVE32F-NEXT: fld fa0, 0(a1) 12213; RV32ZVE32F-NEXT: andi a1, a2, 2 12214; RV32ZVE32F-NEXT: beqz a1, .LBB101_2 12215; RV32ZVE32F-NEXT: .LBB101_11: # %cond.load1 12216; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12217; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 12218; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12219; RV32ZVE32F-NEXT: fld fa1, 0(a1) 12220; RV32ZVE32F-NEXT: andi a1, a2, 4 12221; RV32ZVE32F-NEXT: beqz a1, .LBB101_3 12222; RV32ZVE32F-NEXT: .LBB101_12: # %cond.load4 12223; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12224; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 12225; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12226; RV32ZVE32F-NEXT: fld fa2, 0(a1) 12227; RV32ZVE32F-NEXT: andi a1, a2, 8 12228; RV32ZVE32F-NEXT: beqz a1, .LBB101_4 12229; RV32ZVE32F-NEXT: .LBB101_13: # %cond.load7 12230; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12231; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 12232; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12233; RV32ZVE32F-NEXT: fld fa3, 0(a1) 12234; RV32ZVE32F-NEXT: andi a1, a2, 16 12235; RV32ZVE32F-NEXT: beqz a1, .LBB101_5 12236; RV32ZVE32F-NEXT: .LBB101_14: # %cond.load10 12237; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12238; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 12239; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12240; RV32ZVE32F-NEXT: fld fa4, 0(a1) 12241; RV32ZVE32F-NEXT: andi a1, a2, 32 12242; RV32ZVE32F-NEXT: beqz a1, .LBB101_6 12243; RV32ZVE32F-NEXT: .LBB101_15: # %cond.load13 12244; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12245; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 12246; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12247; RV32ZVE32F-NEXT: fld fa5, 0(a1) 12248; RV32ZVE32F-NEXT: andi a1, a2, 64 12249; RV32ZVE32F-NEXT: beqz a1, .LBB101_7 12250; RV32ZVE32F-NEXT: .LBB101_16: # %cond.load16 12251; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12252; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 12253; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12254; RV32ZVE32F-NEXT: fld fa6, 0(a1) 12255; RV32ZVE32F-NEXT: andi a1, a2, -128 12256; RV32ZVE32F-NEXT: bnez a1, .LBB101_8 12257; RV32ZVE32F-NEXT: j .LBB101_9 12258; 12259; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f64: 12260; RV64ZVE32F: # %bb.0: 12261; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12262; RV64ZVE32F-NEXT: vmv.x.s a2, v0 12263; RV64ZVE32F-NEXT: andi a3, a2, 1 12264; RV64ZVE32F-NEXT: beqz a3, .LBB101_2 12265; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 12266; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 12267; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12268; RV64ZVE32F-NEXT: slli a3, a3, 3 12269; RV64ZVE32F-NEXT: add a3, a1, a3 12270; RV64ZVE32F-NEXT: fld fa0, 0(a3) 12271; RV64ZVE32F-NEXT: .LBB101_2: # %else 12272; RV64ZVE32F-NEXT: andi a3, a2, 2 12273; RV64ZVE32F-NEXT: beqz a3, .LBB101_4 12274; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 12275; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 12276; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 12277; RV64ZVE32F-NEXT: vmv.x.s a3, v9 12278; RV64ZVE32F-NEXT: slli a3, a3, 3 12279; RV64ZVE32F-NEXT: add a3, a1, a3 12280; RV64ZVE32F-NEXT: fld fa1, 0(a3) 12281; RV64ZVE32F-NEXT: .LBB101_4: # %else2 12282; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 12283; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 12284; RV64ZVE32F-NEXT: andi a3, a2, 4 12285; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 12286; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 12287; RV64ZVE32F-NEXT: bnez a3, .LBB101_14 12288; RV64ZVE32F-NEXT: # %bb.5: # %else5 12289; RV64ZVE32F-NEXT: andi a3, a2, 8 12290; RV64ZVE32F-NEXT: bnez a3, .LBB101_15 12291; RV64ZVE32F-NEXT: .LBB101_6: # %else8 12292; RV64ZVE32F-NEXT: andi a3, a2, 16 12293; RV64ZVE32F-NEXT: bnez a3, .LBB101_16 12294; RV64ZVE32F-NEXT: .LBB101_7: # %else11 12295; RV64ZVE32F-NEXT: andi a3, a2, 32 12296; RV64ZVE32F-NEXT: beqz a3, .LBB101_9 12297; RV64ZVE32F-NEXT: .LBB101_8: # %cond.load13 12298; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 12299; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12300; RV64ZVE32F-NEXT: slli a3, a3, 3 12301; RV64ZVE32F-NEXT: add a3, a1, a3 12302; RV64ZVE32F-NEXT: fld fa5, 0(a3) 12303; RV64ZVE32F-NEXT: .LBB101_9: # %else14 12304; RV64ZVE32F-NEXT: andi a3, a2, 64 12305; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 12306; RV64ZVE32F-NEXT: beqz a3, .LBB101_11 12307; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 12308; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12309; RV64ZVE32F-NEXT: slli a3, a3, 3 12310; RV64ZVE32F-NEXT: add a3, a1, a3 12311; RV64ZVE32F-NEXT: fld fa6, 0(a3) 12312; RV64ZVE32F-NEXT: .LBB101_11: # %else17 12313; RV64ZVE32F-NEXT: andi a2, a2, -128 12314; RV64ZVE32F-NEXT: beqz a2, .LBB101_13 12315; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 12316; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 12317; RV64ZVE32F-NEXT: vmv.x.s a2, v8 12318; RV64ZVE32F-NEXT: slli a2, a2, 3 12319; RV64ZVE32F-NEXT: add a1, a1, a2 12320; RV64ZVE32F-NEXT: fld fa7, 0(a1) 12321; RV64ZVE32F-NEXT: .LBB101_13: # %else20 12322; RV64ZVE32F-NEXT: fsd fa0, 0(a0) 12323; RV64ZVE32F-NEXT: fsd fa1, 8(a0) 12324; RV64ZVE32F-NEXT: fsd fa2, 16(a0) 12325; RV64ZVE32F-NEXT: fsd fa3, 24(a0) 12326; RV64ZVE32F-NEXT: fsd fa4, 32(a0) 12327; RV64ZVE32F-NEXT: fsd fa5, 40(a0) 12328; RV64ZVE32F-NEXT: fsd fa6, 48(a0) 12329; RV64ZVE32F-NEXT: fsd fa7, 56(a0) 12330; RV64ZVE32F-NEXT: ret 12331; RV64ZVE32F-NEXT: .LBB101_14: # %cond.load4 12332; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12333; RV64ZVE32F-NEXT: slli a3, a3, 3 12334; RV64ZVE32F-NEXT: add a3, a1, a3 12335; RV64ZVE32F-NEXT: fld fa2, 0(a3) 12336; RV64ZVE32F-NEXT: andi a3, a2, 8 12337; RV64ZVE32F-NEXT: beqz a3, .LBB101_6 12338; RV64ZVE32F-NEXT: .LBB101_15: # %cond.load7 12339; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 12340; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12341; RV64ZVE32F-NEXT: slli a3, a3, 3 12342; RV64ZVE32F-NEXT: add a3, a1, a3 12343; RV64ZVE32F-NEXT: fld fa3, 0(a3) 12344; RV64ZVE32F-NEXT: andi a3, a2, 16 12345; RV64ZVE32F-NEXT: beqz a3, .LBB101_7 12346; RV64ZVE32F-NEXT: .LBB101_16: # %cond.load10 12347; RV64ZVE32F-NEXT: vmv.x.s a3, v9 12348; RV64ZVE32F-NEXT: slli a3, a3, 3 12349; RV64ZVE32F-NEXT: add a3, a1, a3 12350; RV64ZVE32F-NEXT: fld fa4, 0(a3) 12351; RV64ZVE32F-NEXT: andi a3, a2, 32 12352; RV64ZVE32F-NEXT: bnez a3, .LBB101_8 12353; RV64ZVE32F-NEXT: j .LBB101_9 12354 %eidxs = sext <8 x i16> %idxs to <8 x i64> 12355 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 12356 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) 12357 ret <8 x double> %v 12358} 12359 12360define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) { 12361; RV32V-LABEL: mgather_baseidx_zext_v8i16_v8f64: 12362; RV32V: # %bb.0: 12363; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 12364; RV32V-NEXT: vzext.vf2 v10, v8 12365; RV32V-NEXT: vsll.vi v8, v10, 3 12366; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 12367; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 12368; RV32V-NEXT: vmv.v.v v8, v12 12369; RV32V-NEXT: ret 12370; 12371; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f64: 12372; RV64V: # %bb.0: 12373; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 12374; RV64V-NEXT: vzext.vf2 v10, v8 12375; RV64V-NEXT: vsll.vi v8, v10, 3 12376; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 12377; RV64V-NEXT: vluxei32.v v12, (a0), v8, v0.t 12378; RV64V-NEXT: vmv.v.v v8, v12 12379; RV64V-NEXT: ret 12380; 12381; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64: 12382; RV32ZVE32F: # %bb.0: 12383; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 12384; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 12385; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 12386; RV32ZVE32F-NEXT: vmv.x.s a2, v0 12387; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 12388; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 12389; RV32ZVE32F-NEXT: andi a3, a2, 1 12390; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 12391; RV32ZVE32F-NEXT: bnez a3, .LBB102_10 12392; RV32ZVE32F-NEXT: # %bb.1: # %else 12393; RV32ZVE32F-NEXT: andi a1, a2, 2 12394; RV32ZVE32F-NEXT: bnez a1, .LBB102_11 12395; RV32ZVE32F-NEXT: .LBB102_2: # %else2 12396; RV32ZVE32F-NEXT: andi a1, a2, 4 12397; RV32ZVE32F-NEXT: bnez a1, .LBB102_12 12398; RV32ZVE32F-NEXT: .LBB102_3: # %else5 12399; RV32ZVE32F-NEXT: andi a1, a2, 8 12400; RV32ZVE32F-NEXT: bnez a1, .LBB102_13 12401; RV32ZVE32F-NEXT: .LBB102_4: # %else8 12402; RV32ZVE32F-NEXT: andi a1, a2, 16 12403; RV32ZVE32F-NEXT: bnez a1, .LBB102_14 12404; RV32ZVE32F-NEXT: .LBB102_5: # %else11 12405; RV32ZVE32F-NEXT: andi a1, a2, 32 12406; RV32ZVE32F-NEXT: bnez a1, .LBB102_15 12407; RV32ZVE32F-NEXT: .LBB102_6: # %else14 12408; RV32ZVE32F-NEXT: andi a1, a2, 64 12409; RV32ZVE32F-NEXT: bnez a1, .LBB102_16 12410; RV32ZVE32F-NEXT: .LBB102_7: # %else17 12411; RV32ZVE32F-NEXT: andi a1, a2, -128 12412; RV32ZVE32F-NEXT: beqz a1, .LBB102_9 12413; RV32ZVE32F-NEXT: .LBB102_8: # %cond.load19 12414; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12415; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 12416; RV32ZVE32F-NEXT: vmv.x.s a1, v8 12417; RV32ZVE32F-NEXT: fld fa7, 0(a1) 12418; RV32ZVE32F-NEXT: .LBB102_9: # %else20 12419; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 12420; RV32ZVE32F-NEXT: fsd fa1, 8(a0) 12421; RV32ZVE32F-NEXT: fsd fa2, 16(a0) 12422; RV32ZVE32F-NEXT: fsd fa3, 24(a0) 12423; RV32ZVE32F-NEXT: fsd fa4, 32(a0) 12424; RV32ZVE32F-NEXT: fsd fa5, 40(a0) 12425; RV32ZVE32F-NEXT: fsd fa6, 48(a0) 12426; RV32ZVE32F-NEXT: fsd fa7, 56(a0) 12427; RV32ZVE32F-NEXT: ret 12428; RV32ZVE32F-NEXT: .LBB102_10: # %cond.load 12429; RV32ZVE32F-NEXT: vmv.x.s a1, v8 12430; RV32ZVE32F-NEXT: fld fa0, 0(a1) 12431; RV32ZVE32F-NEXT: andi a1, a2, 2 12432; RV32ZVE32F-NEXT: beqz a1, .LBB102_2 12433; RV32ZVE32F-NEXT: .LBB102_11: # %cond.load1 12434; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12435; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 12436; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12437; RV32ZVE32F-NEXT: fld fa1, 0(a1) 12438; RV32ZVE32F-NEXT: andi a1, a2, 4 12439; RV32ZVE32F-NEXT: beqz a1, .LBB102_3 12440; RV32ZVE32F-NEXT: .LBB102_12: # %cond.load4 12441; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12442; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 12443; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12444; RV32ZVE32F-NEXT: fld fa2, 0(a1) 12445; RV32ZVE32F-NEXT: andi a1, a2, 8 12446; RV32ZVE32F-NEXT: beqz a1, .LBB102_4 12447; RV32ZVE32F-NEXT: .LBB102_13: # %cond.load7 12448; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12449; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 12450; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12451; RV32ZVE32F-NEXT: fld fa3, 0(a1) 12452; RV32ZVE32F-NEXT: andi a1, a2, 16 12453; RV32ZVE32F-NEXT: beqz a1, .LBB102_5 12454; RV32ZVE32F-NEXT: .LBB102_14: # %cond.load10 12455; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12456; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 12457; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12458; RV32ZVE32F-NEXT: fld fa4, 0(a1) 12459; RV32ZVE32F-NEXT: andi a1, a2, 32 12460; RV32ZVE32F-NEXT: beqz a1, .LBB102_6 12461; RV32ZVE32F-NEXT: .LBB102_15: # %cond.load13 12462; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12463; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 12464; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12465; RV32ZVE32F-NEXT: fld fa5, 0(a1) 12466; RV32ZVE32F-NEXT: andi a1, a2, 64 12467; RV32ZVE32F-NEXT: beqz a1, .LBB102_7 12468; RV32ZVE32F-NEXT: .LBB102_16: # %cond.load16 12469; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12470; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 12471; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12472; RV32ZVE32F-NEXT: fld fa6, 0(a1) 12473; RV32ZVE32F-NEXT: andi a1, a2, -128 12474; RV32ZVE32F-NEXT: bnez a1, .LBB102_8 12475; RV32ZVE32F-NEXT: j .LBB102_9 12476; 12477; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64: 12478; RV64ZVE32F: # %bb.0: 12479; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12480; RV64ZVE32F-NEXT: vmv.x.s a2, v0 12481; RV64ZVE32F-NEXT: andi a3, a2, 1 12482; RV64ZVE32F-NEXT: beqz a3, .LBB102_2 12483; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 12484; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma 12485; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12486; RV64ZVE32F-NEXT: slli a3, a3, 48 12487; RV64ZVE32F-NEXT: srli a3, a3, 45 12488; RV64ZVE32F-NEXT: add a3, a1, a3 12489; RV64ZVE32F-NEXT: fld fa0, 0(a3) 12490; RV64ZVE32F-NEXT: .LBB102_2: # %else 12491; RV64ZVE32F-NEXT: andi a3, a2, 2 12492; RV64ZVE32F-NEXT: beqz a3, .LBB102_4 12493; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 12494; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 12495; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 12496; RV64ZVE32F-NEXT: vmv.x.s a3, v9 12497; RV64ZVE32F-NEXT: slli a3, a3, 48 12498; RV64ZVE32F-NEXT: srli a3, a3, 45 12499; RV64ZVE32F-NEXT: add a3, a1, a3 12500; RV64ZVE32F-NEXT: fld fa1, 0(a3) 12501; RV64ZVE32F-NEXT: .LBB102_4: # %else2 12502; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma 12503; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 12504; RV64ZVE32F-NEXT: andi a3, a2, 4 12505; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma 12506; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 12507; RV64ZVE32F-NEXT: bnez a3, .LBB102_14 12508; RV64ZVE32F-NEXT: # %bb.5: # %else5 12509; RV64ZVE32F-NEXT: andi a3, a2, 8 12510; RV64ZVE32F-NEXT: bnez a3, .LBB102_15 12511; RV64ZVE32F-NEXT: .LBB102_6: # %else8 12512; RV64ZVE32F-NEXT: andi a3, a2, 16 12513; RV64ZVE32F-NEXT: bnez a3, .LBB102_16 12514; RV64ZVE32F-NEXT: .LBB102_7: # %else11 12515; RV64ZVE32F-NEXT: andi a3, a2, 32 12516; RV64ZVE32F-NEXT: beqz a3, .LBB102_9 12517; RV64ZVE32F-NEXT: .LBB102_8: # %cond.load13 12518; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 12519; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12520; RV64ZVE32F-NEXT: slli a3, a3, 48 12521; RV64ZVE32F-NEXT: srli a3, a3, 45 12522; RV64ZVE32F-NEXT: add a3, a1, a3 12523; RV64ZVE32F-NEXT: fld fa5, 0(a3) 12524; RV64ZVE32F-NEXT: .LBB102_9: # %else14 12525; RV64ZVE32F-NEXT: andi a3, a2, 64 12526; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 12527; RV64ZVE32F-NEXT: beqz a3, .LBB102_11 12528; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 12529; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12530; RV64ZVE32F-NEXT: slli a3, a3, 48 12531; RV64ZVE32F-NEXT: srli a3, a3, 45 12532; RV64ZVE32F-NEXT: add a3, a1, a3 12533; RV64ZVE32F-NEXT: fld fa6, 0(a3) 12534; RV64ZVE32F-NEXT: .LBB102_11: # %else17 12535; RV64ZVE32F-NEXT: andi a2, a2, -128 12536; RV64ZVE32F-NEXT: beqz a2, .LBB102_13 12537; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 12538; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 12539; RV64ZVE32F-NEXT: vmv.x.s a2, v8 12540; RV64ZVE32F-NEXT: slli a2, a2, 48 12541; RV64ZVE32F-NEXT: srli a2, a2, 45 12542; RV64ZVE32F-NEXT: add a1, a1, a2 12543; RV64ZVE32F-NEXT: fld fa7, 0(a1) 12544; RV64ZVE32F-NEXT: .LBB102_13: # %else20 12545; RV64ZVE32F-NEXT: fsd fa0, 0(a0) 12546; RV64ZVE32F-NEXT: fsd fa1, 8(a0) 12547; RV64ZVE32F-NEXT: fsd fa2, 16(a0) 12548; RV64ZVE32F-NEXT: fsd fa3, 24(a0) 12549; RV64ZVE32F-NEXT: fsd fa4, 32(a0) 12550; RV64ZVE32F-NEXT: fsd fa5, 40(a0) 12551; RV64ZVE32F-NEXT: fsd fa6, 48(a0) 12552; RV64ZVE32F-NEXT: fsd fa7, 56(a0) 12553; RV64ZVE32F-NEXT: ret 12554; RV64ZVE32F-NEXT: .LBB102_14: # %cond.load4 12555; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12556; RV64ZVE32F-NEXT: slli a3, a3, 48 12557; RV64ZVE32F-NEXT: srli a3, a3, 45 12558; RV64ZVE32F-NEXT: add a3, a1, a3 12559; RV64ZVE32F-NEXT: fld fa2, 0(a3) 12560; RV64ZVE32F-NEXT: andi a3, a2, 8 12561; RV64ZVE32F-NEXT: beqz a3, .LBB102_6 12562; RV64ZVE32F-NEXT: .LBB102_15: # %cond.load7 12563; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 12564; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12565; RV64ZVE32F-NEXT: slli a3, a3, 48 12566; RV64ZVE32F-NEXT: srli a3, a3, 45 12567; RV64ZVE32F-NEXT: add a3, a1, a3 12568; RV64ZVE32F-NEXT: fld fa3, 0(a3) 12569; RV64ZVE32F-NEXT: andi a3, a2, 16 12570; RV64ZVE32F-NEXT: beqz a3, .LBB102_7 12571; RV64ZVE32F-NEXT: .LBB102_16: # %cond.load10 12572; RV64ZVE32F-NEXT: vmv.x.s a3, v9 12573; RV64ZVE32F-NEXT: slli a3, a3, 48 12574; RV64ZVE32F-NEXT: srli a3, a3, 45 12575; RV64ZVE32F-NEXT: add a3, a1, a3 12576; RV64ZVE32F-NEXT: fld fa4, 0(a3) 12577; RV64ZVE32F-NEXT: andi a3, a2, 32 12578; RV64ZVE32F-NEXT: bnez a3, .LBB102_8 12579; RV64ZVE32F-NEXT: j .LBB102_9 12580 %eidxs = zext <8 x i16> %idxs to <8 x i64> 12581 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 12582 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) 12583 ret <8 x double> %v 12584} 12585 12586define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) { 12587; RV32V-LABEL: mgather_baseidx_v8i32_v8f64: 12588; RV32V: # %bb.0: 12589; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 12590; RV32V-NEXT: vsll.vi v8, v8, 3 12591; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 12592; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 12593; RV32V-NEXT: vmv.v.v v8, v12 12594; RV32V-NEXT: ret 12595; 12596; RV64V-LABEL: mgather_baseidx_v8i32_v8f64: 12597; RV64V: # %bb.0: 12598; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 12599; RV64V-NEXT: vsext.vf2 v16, v8 12600; RV64V-NEXT: vsll.vi v8, v16, 3 12601; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t 12602; RV64V-NEXT: vmv.v.v v8, v12 12603; RV64V-NEXT: ret 12604; 12605; RV32ZVE32F-LABEL: mgather_baseidx_v8i32_v8f64: 12606; RV32ZVE32F: # %bb.0: 12607; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 12608; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 12609; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 12610; RV32ZVE32F-NEXT: vmv.x.s a2, v0 12611; RV32ZVE32F-NEXT: andi a3, a2, 1 12612; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 12613; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 12614; RV32ZVE32F-NEXT: bnez a3, .LBB103_10 12615; RV32ZVE32F-NEXT: # %bb.1: # %else 12616; RV32ZVE32F-NEXT: andi a1, a2, 2 12617; RV32ZVE32F-NEXT: bnez a1, .LBB103_11 12618; RV32ZVE32F-NEXT: .LBB103_2: # %else2 12619; RV32ZVE32F-NEXT: andi a1, a2, 4 12620; RV32ZVE32F-NEXT: bnez a1, .LBB103_12 12621; RV32ZVE32F-NEXT: .LBB103_3: # %else5 12622; RV32ZVE32F-NEXT: andi a1, a2, 8 12623; RV32ZVE32F-NEXT: bnez a1, .LBB103_13 12624; RV32ZVE32F-NEXT: .LBB103_4: # %else8 12625; RV32ZVE32F-NEXT: andi a1, a2, 16 12626; RV32ZVE32F-NEXT: bnez a1, .LBB103_14 12627; RV32ZVE32F-NEXT: .LBB103_5: # %else11 12628; RV32ZVE32F-NEXT: andi a1, a2, 32 12629; RV32ZVE32F-NEXT: bnez a1, .LBB103_15 12630; RV32ZVE32F-NEXT: .LBB103_6: # %else14 12631; RV32ZVE32F-NEXT: andi a1, a2, 64 12632; RV32ZVE32F-NEXT: bnez a1, .LBB103_16 12633; RV32ZVE32F-NEXT: .LBB103_7: # %else17 12634; RV32ZVE32F-NEXT: andi a1, a2, -128 12635; RV32ZVE32F-NEXT: beqz a1, .LBB103_9 12636; RV32ZVE32F-NEXT: .LBB103_8: # %cond.load19 12637; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12638; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 12639; RV32ZVE32F-NEXT: vmv.x.s a1, v8 12640; RV32ZVE32F-NEXT: fld fa7, 0(a1) 12641; RV32ZVE32F-NEXT: .LBB103_9: # %else20 12642; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 12643; RV32ZVE32F-NEXT: fsd fa1, 8(a0) 12644; RV32ZVE32F-NEXT: fsd fa2, 16(a0) 12645; RV32ZVE32F-NEXT: fsd fa3, 24(a0) 12646; RV32ZVE32F-NEXT: fsd fa4, 32(a0) 12647; RV32ZVE32F-NEXT: fsd fa5, 40(a0) 12648; RV32ZVE32F-NEXT: fsd fa6, 48(a0) 12649; RV32ZVE32F-NEXT: fsd fa7, 56(a0) 12650; RV32ZVE32F-NEXT: ret 12651; RV32ZVE32F-NEXT: .LBB103_10: # %cond.load 12652; RV32ZVE32F-NEXT: vmv.x.s a1, v8 12653; RV32ZVE32F-NEXT: fld fa0, 0(a1) 12654; RV32ZVE32F-NEXT: andi a1, a2, 2 12655; RV32ZVE32F-NEXT: beqz a1, .LBB103_2 12656; RV32ZVE32F-NEXT: .LBB103_11: # %cond.load1 12657; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12658; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 12659; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12660; RV32ZVE32F-NEXT: fld fa1, 0(a1) 12661; RV32ZVE32F-NEXT: andi a1, a2, 4 12662; RV32ZVE32F-NEXT: beqz a1, .LBB103_3 12663; RV32ZVE32F-NEXT: .LBB103_12: # %cond.load4 12664; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12665; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 12666; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12667; RV32ZVE32F-NEXT: fld fa2, 0(a1) 12668; RV32ZVE32F-NEXT: andi a1, a2, 8 12669; RV32ZVE32F-NEXT: beqz a1, .LBB103_4 12670; RV32ZVE32F-NEXT: .LBB103_13: # %cond.load7 12671; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12672; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 12673; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12674; RV32ZVE32F-NEXT: fld fa3, 0(a1) 12675; RV32ZVE32F-NEXT: andi a1, a2, 16 12676; RV32ZVE32F-NEXT: beqz a1, .LBB103_5 12677; RV32ZVE32F-NEXT: .LBB103_14: # %cond.load10 12678; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12679; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 12680; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12681; RV32ZVE32F-NEXT: fld fa4, 0(a1) 12682; RV32ZVE32F-NEXT: andi a1, a2, 32 12683; RV32ZVE32F-NEXT: beqz a1, .LBB103_6 12684; RV32ZVE32F-NEXT: .LBB103_15: # %cond.load13 12685; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12686; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 12687; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12688; RV32ZVE32F-NEXT: fld fa5, 0(a1) 12689; RV32ZVE32F-NEXT: andi a1, a2, 64 12690; RV32ZVE32F-NEXT: beqz a1, .LBB103_7 12691; RV32ZVE32F-NEXT: .LBB103_16: # %cond.load16 12692; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12693; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 12694; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12695; RV32ZVE32F-NEXT: fld fa6, 0(a1) 12696; RV32ZVE32F-NEXT: andi a1, a2, -128 12697; RV32ZVE32F-NEXT: bnez a1, .LBB103_8 12698; RV32ZVE32F-NEXT: j .LBB103_9 12699; 12700; RV64ZVE32F-LABEL: mgather_baseidx_v8i32_v8f64: 12701; RV64ZVE32F: # %bb.0: 12702; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12703; RV64ZVE32F-NEXT: vmv.x.s a2, v0 12704; RV64ZVE32F-NEXT: andi a3, a2, 1 12705; RV64ZVE32F-NEXT: beqz a3, .LBB103_2 12706; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 12707; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 12708; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12709; RV64ZVE32F-NEXT: slli a3, a3, 3 12710; RV64ZVE32F-NEXT: add a3, a1, a3 12711; RV64ZVE32F-NEXT: fld fa0, 0(a3) 12712; RV64ZVE32F-NEXT: .LBB103_2: # %else 12713; RV64ZVE32F-NEXT: andi a3, a2, 2 12714; RV64ZVE32F-NEXT: beqz a3, .LBB103_4 12715; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 12716; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12717; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 12718; RV64ZVE32F-NEXT: vmv.x.s a3, v10 12719; RV64ZVE32F-NEXT: slli a3, a3, 3 12720; RV64ZVE32F-NEXT: add a3, a1, a3 12721; RV64ZVE32F-NEXT: fld fa1, 0(a3) 12722; RV64ZVE32F-NEXT: .LBB103_4: # %else2 12723; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma 12724; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 12725; RV64ZVE32F-NEXT: andi a3, a2, 4 12726; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 12727; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 12728; RV64ZVE32F-NEXT: bnez a3, .LBB103_14 12729; RV64ZVE32F-NEXT: # %bb.5: # %else5 12730; RV64ZVE32F-NEXT: andi a3, a2, 8 12731; RV64ZVE32F-NEXT: bnez a3, .LBB103_15 12732; RV64ZVE32F-NEXT: .LBB103_6: # %else8 12733; RV64ZVE32F-NEXT: andi a3, a2, 16 12734; RV64ZVE32F-NEXT: bnez a3, .LBB103_16 12735; RV64ZVE32F-NEXT: .LBB103_7: # %else11 12736; RV64ZVE32F-NEXT: andi a3, a2, 32 12737; RV64ZVE32F-NEXT: beqz a3, .LBB103_9 12738; RV64ZVE32F-NEXT: .LBB103_8: # %cond.load13 12739; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 12740; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12741; RV64ZVE32F-NEXT: slli a3, a3, 3 12742; RV64ZVE32F-NEXT: add a3, a1, a3 12743; RV64ZVE32F-NEXT: fld fa5, 0(a3) 12744; RV64ZVE32F-NEXT: .LBB103_9: # %else14 12745; RV64ZVE32F-NEXT: andi a3, a2, 64 12746; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 12747; RV64ZVE32F-NEXT: beqz a3, .LBB103_11 12748; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 12749; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12750; RV64ZVE32F-NEXT: slli a3, a3, 3 12751; RV64ZVE32F-NEXT: add a3, a1, a3 12752; RV64ZVE32F-NEXT: fld fa6, 0(a3) 12753; RV64ZVE32F-NEXT: .LBB103_11: # %else17 12754; RV64ZVE32F-NEXT: andi a2, a2, -128 12755; RV64ZVE32F-NEXT: beqz a2, .LBB103_13 12756; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 12757; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 12758; RV64ZVE32F-NEXT: vmv.x.s a2, v8 12759; RV64ZVE32F-NEXT: slli a2, a2, 3 12760; RV64ZVE32F-NEXT: add a1, a1, a2 12761; RV64ZVE32F-NEXT: fld fa7, 0(a1) 12762; RV64ZVE32F-NEXT: .LBB103_13: # %else20 12763; RV64ZVE32F-NEXT: fsd fa0, 0(a0) 12764; RV64ZVE32F-NEXT: fsd fa1, 8(a0) 12765; RV64ZVE32F-NEXT: fsd fa2, 16(a0) 12766; RV64ZVE32F-NEXT: fsd fa3, 24(a0) 12767; RV64ZVE32F-NEXT: fsd fa4, 32(a0) 12768; RV64ZVE32F-NEXT: fsd fa5, 40(a0) 12769; RV64ZVE32F-NEXT: fsd fa6, 48(a0) 12770; RV64ZVE32F-NEXT: fsd fa7, 56(a0) 12771; RV64ZVE32F-NEXT: ret 12772; RV64ZVE32F-NEXT: .LBB103_14: # %cond.load4 12773; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12774; RV64ZVE32F-NEXT: slli a3, a3, 3 12775; RV64ZVE32F-NEXT: add a3, a1, a3 12776; RV64ZVE32F-NEXT: fld fa2, 0(a3) 12777; RV64ZVE32F-NEXT: andi a3, a2, 8 12778; RV64ZVE32F-NEXT: beqz a3, .LBB103_6 12779; RV64ZVE32F-NEXT: .LBB103_15: # %cond.load7 12780; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 12781; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12782; RV64ZVE32F-NEXT: slli a3, a3, 3 12783; RV64ZVE32F-NEXT: add a3, a1, a3 12784; RV64ZVE32F-NEXT: fld fa3, 0(a3) 12785; RV64ZVE32F-NEXT: andi a3, a2, 16 12786; RV64ZVE32F-NEXT: beqz a3, .LBB103_7 12787; RV64ZVE32F-NEXT: .LBB103_16: # %cond.load10 12788; RV64ZVE32F-NEXT: vmv.x.s a3, v10 12789; RV64ZVE32F-NEXT: slli a3, a3, 3 12790; RV64ZVE32F-NEXT: add a3, a1, a3 12791; RV64ZVE32F-NEXT: fld fa4, 0(a3) 12792; RV64ZVE32F-NEXT: andi a3, a2, 32 12793; RV64ZVE32F-NEXT: bnez a3, .LBB103_8 12794; RV64ZVE32F-NEXT: j .LBB103_9 12795 %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs 12796 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) 12797 ret <8 x double> %v 12798} 12799 12800define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) { 12801; RV32V-LABEL: mgather_baseidx_sext_v8i32_v8f64: 12802; RV32V: # %bb.0: 12803; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 12804; RV32V-NEXT: vsll.vi v8, v8, 3 12805; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 12806; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 12807; RV32V-NEXT: vmv.v.v v8, v12 12808; RV32V-NEXT: ret 12809; 12810; RV64V-LABEL: mgather_baseidx_sext_v8i32_v8f64: 12811; RV64V: # %bb.0: 12812; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 12813; RV64V-NEXT: vsext.vf2 v16, v8 12814; RV64V-NEXT: vsll.vi v8, v16, 3 12815; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t 12816; RV64V-NEXT: vmv.v.v v8, v12 12817; RV64V-NEXT: ret 12818; 12819; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8f64: 12820; RV32ZVE32F: # %bb.0: 12821; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 12822; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 12823; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 12824; RV32ZVE32F-NEXT: vmv.x.s a2, v0 12825; RV32ZVE32F-NEXT: andi a3, a2, 1 12826; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 12827; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 12828; RV32ZVE32F-NEXT: bnez a3, .LBB104_10 12829; RV32ZVE32F-NEXT: # %bb.1: # %else 12830; RV32ZVE32F-NEXT: andi a1, a2, 2 12831; RV32ZVE32F-NEXT: bnez a1, .LBB104_11 12832; RV32ZVE32F-NEXT: .LBB104_2: # %else2 12833; RV32ZVE32F-NEXT: andi a1, a2, 4 12834; RV32ZVE32F-NEXT: bnez a1, .LBB104_12 12835; RV32ZVE32F-NEXT: .LBB104_3: # %else5 12836; RV32ZVE32F-NEXT: andi a1, a2, 8 12837; RV32ZVE32F-NEXT: bnez a1, .LBB104_13 12838; RV32ZVE32F-NEXT: .LBB104_4: # %else8 12839; RV32ZVE32F-NEXT: andi a1, a2, 16 12840; RV32ZVE32F-NEXT: bnez a1, .LBB104_14 12841; RV32ZVE32F-NEXT: .LBB104_5: # %else11 12842; RV32ZVE32F-NEXT: andi a1, a2, 32 12843; RV32ZVE32F-NEXT: bnez a1, .LBB104_15 12844; RV32ZVE32F-NEXT: .LBB104_6: # %else14 12845; RV32ZVE32F-NEXT: andi a1, a2, 64 12846; RV32ZVE32F-NEXT: bnez a1, .LBB104_16 12847; RV32ZVE32F-NEXT: .LBB104_7: # %else17 12848; RV32ZVE32F-NEXT: andi a1, a2, -128 12849; RV32ZVE32F-NEXT: beqz a1, .LBB104_9 12850; RV32ZVE32F-NEXT: .LBB104_8: # %cond.load19 12851; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12852; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 12853; RV32ZVE32F-NEXT: vmv.x.s a1, v8 12854; RV32ZVE32F-NEXT: fld fa7, 0(a1) 12855; RV32ZVE32F-NEXT: .LBB104_9: # %else20 12856; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 12857; RV32ZVE32F-NEXT: fsd fa1, 8(a0) 12858; RV32ZVE32F-NEXT: fsd fa2, 16(a0) 12859; RV32ZVE32F-NEXT: fsd fa3, 24(a0) 12860; RV32ZVE32F-NEXT: fsd fa4, 32(a0) 12861; RV32ZVE32F-NEXT: fsd fa5, 40(a0) 12862; RV32ZVE32F-NEXT: fsd fa6, 48(a0) 12863; RV32ZVE32F-NEXT: fsd fa7, 56(a0) 12864; RV32ZVE32F-NEXT: ret 12865; RV32ZVE32F-NEXT: .LBB104_10: # %cond.load 12866; RV32ZVE32F-NEXT: vmv.x.s a1, v8 12867; RV32ZVE32F-NEXT: fld fa0, 0(a1) 12868; RV32ZVE32F-NEXT: andi a1, a2, 2 12869; RV32ZVE32F-NEXT: beqz a1, .LBB104_2 12870; RV32ZVE32F-NEXT: .LBB104_11: # %cond.load1 12871; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12872; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 12873; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12874; RV32ZVE32F-NEXT: fld fa1, 0(a1) 12875; RV32ZVE32F-NEXT: andi a1, a2, 4 12876; RV32ZVE32F-NEXT: beqz a1, .LBB104_3 12877; RV32ZVE32F-NEXT: .LBB104_12: # %cond.load4 12878; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12879; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 12880; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12881; RV32ZVE32F-NEXT: fld fa2, 0(a1) 12882; RV32ZVE32F-NEXT: andi a1, a2, 8 12883; RV32ZVE32F-NEXT: beqz a1, .LBB104_4 12884; RV32ZVE32F-NEXT: .LBB104_13: # %cond.load7 12885; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12886; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 12887; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12888; RV32ZVE32F-NEXT: fld fa3, 0(a1) 12889; RV32ZVE32F-NEXT: andi a1, a2, 16 12890; RV32ZVE32F-NEXT: beqz a1, .LBB104_5 12891; RV32ZVE32F-NEXT: .LBB104_14: # %cond.load10 12892; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12893; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 12894; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12895; RV32ZVE32F-NEXT: fld fa4, 0(a1) 12896; RV32ZVE32F-NEXT: andi a1, a2, 32 12897; RV32ZVE32F-NEXT: beqz a1, .LBB104_6 12898; RV32ZVE32F-NEXT: .LBB104_15: # %cond.load13 12899; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12900; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 12901; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12902; RV32ZVE32F-NEXT: fld fa5, 0(a1) 12903; RV32ZVE32F-NEXT: andi a1, a2, 64 12904; RV32ZVE32F-NEXT: beqz a1, .LBB104_7 12905; RV32ZVE32F-NEXT: .LBB104_16: # %cond.load16 12906; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 12907; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 12908; RV32ZVE32F-NEXT: vmv.x.s a1, v10 12909; RV32ZVE32F-NEXT: fld fa6, 0(a1) 12910; RV32ZVE32F-NEXT: andi a1, a2, -128 12911; RV32ZVE32F-NEXT: bnez a1, .LBB104_8 12912; RV32ZVE32F-NEXT: j .LBB104_9 12913; 12914; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8f64: 12915; RV64ZVE32F: # %bb.0: 12916; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 12917; RV64ZVE32F-NEXT: vmv.x.s a2, v0 12918; RV64ZVE32F-NEXT: andi a3, a2, 1 12919; RV64ZVE32F-NEXT: beqz a3, .LBB104_2 12920; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 12921; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 12922; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12923; RV64ZVE32F-NEXT: slli a3, a3, 3 12924; RV64ZVE32F-NEXT: add a3, a1, a3 12925; RV64ZVE32F-NEXT: fld fa0, 0(a3) 12926; RV64ZVE32F-NEXT: .LBB104_2: # %else 12927; RV64ZVE32F-NEXT: andi a3, a2, 2 12928; RV64ZVE32F-NEXT: beqz a3, .LBB104_4 12929; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 12930; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 12931; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 12932; RV64ZVE32F-NEXT: vmv.x.s a3, v10 12933; RV64ZVE32F-NEXT: slli a3, a3, 3 12934; RV64ZVE32F-NEXT: add a3, a1, a3 12935; RV64ZVE32F-NEXT: fld fa1, 0(a3) 12936; RV64ZVE32F-NEXT: .LBB104_4: # %else2 12937; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma 12938; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 12939; RV64ZVE32F-NEXT: andi a3, a2, 4 12940; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 12941; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 12942; RV64ZVE32F-NEXT: bnez a3, .LBB104_14 12943; RV64ZVE32F-NEXT: # %bb.5: # %else5 12944; RV64ZVE32F-NEXT: andi a3, a2, 8 12945; RV64ZVE32F-NEXT: bnez a3, .LBB104_15 12946; RV64ZVE32F-NEXT: .LBB104_6: # %else8 12947; RV64ZVE32F-NEXT: andi a3, a2, 16 12948; RV64ZVE32F-NEXT: bnez a3, .LBB104_16 12949; RV64ZVE32F-NEXT: .LBB104_7: # %else11 12950; RV64ZVE32F-NEXT: andi a3, a2, 32 12951; RV64ZVE32F-NEXT: beqz a3, .LBB104_9 12952; RV64ZVE32F-NEXT: .LBB104_8: # %cond.load13 12953; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 12954; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12955; RV64ZVE32F-NEXT: slli a3, a3, 3 12956; RV64ZVE32F-NEXT: add a3, a1, a3 12957; RV64ZVE32F-NEXT: fld fa5, 0(a3) 12958; RV64ZVE32F-NEXT: .LBB104_9: # %else14 12959; RV64ZVE32F-NEXT: andi a3, a2, 64 12960; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 12961; RV64ZVE32F-NEXT: beqz a3, .LBB104_11 12962; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 12963; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12964; RV64ZVE32F-NEXT: slli a3, a3, 3 12965; RV64ZVE32F-NEXT: add a3, a1, a3 12966; RV64ZVE32F-NEXT: fld fa6, 0(a3) 12967; RV64ZVE32F-NEXT: .LBB104_11: # %else17 12968; RV64ZVE32F-NEXT: andi a2, a2, -128 12969; RV64ZVE32F-NEXT: beqz a2, .LBB104_13 12970; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 12971; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 12972; RV64ZVE32F-NEXT: vmv.x.s a2, v8 12973; RV64ZVE32F-NEXT: slli a2, a2, 3 12974; RV64ZVE32F-NEXT: add a1, a1, a2 12975; RV64ZVE32F-NEXT: fld fa7, 0(a1) 12976; RV64ZVE32F-NEXT: .LBB104_13: # %else20 12977; RV64ZVE32F-NEXT: fsd fa0, 0(a0) 12978; RV64ZVE32F-NEXT: fsd fa1, 8(a0) 12979; RV64ZVE32F-NEXT: fsd fa2, 16(a0) 12980; RV64ZVE32F-NEXT: fsd fa3, 24(a0) 12981; RV64ZVE32F-NEXT: fsd fa4, 32(a0) 12982; RV64ZVE32F-NEXT: fsd fa5, 40(a0) 12983; RV64ZVE32F-NEXT: fsd fa6, 48(a0) 12984; RV64ZVE32F-NEXT: fsd fa7, 56(a0) 12985; RV64ZVE32F-NEXT: ret 12986; RV64ZVE32F-NEXT: .LBB104_14: # %cond.load4 12987; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12988; RV64ZVE32F-NEXT: slli a3, a3, 3 12989; RV64ZVE32F-NEXT: add a3, a1, a3 12990; RV64ZVE32F-NEXT: fld fa2, 0(a3) 12991; RV64ZVE32F-NEXT: andi a3, a2, 8 12992; RV64ZVE32F-NEXT: beqz a3, .LBB104_6 12993; RV64ZVE32F-NEXT: .LBB104_15: # %cond.load7 12994; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 12995; RV64ZVE32F-NEXT: vmv.x.s a3, v8 12996; RV64ZVE32F-NEXT: slli a3, a3, 3 12997; RV64ZVE32F-NEXT: add a3, a1, a3 12998; RV64ZVE32F-NEXT: fld fa3, 0(a3) 12999; RV64ZVE32F-NEXT: andi a3, a2, 16 13000; RV64ZVE32F-NEXT: beqz a3, .LBB104_7 13001; RV64ZVE32F-NEXT: .LBB104_16: # %cond.load10 13002; RV64ZVE32F-NEXT: vmv.x.s a3, v10 13003; RV64ZVE32F-NEXT: slli a3, a3, 3 13004; RV64ZVE32F-NEXT: add a3, a1, a3 13005; RV64ZVE32F-NEXT: fld fa4, 0(a3) 13006; RV64ZVE32F-NEXT: andi a3, a2, 32 13007; RV64ZVE32F-NEXT: bnez a3, .LBB104_8 13008; RV64ZVE32F-NEXT: j .LBB104_9 13009 %eidxs = sext <8 x i32> %idxs to <8 x i64> 13010 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 13011 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) 13012 ret <8 x double> %v 13013} 13014 13015define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) { 13016; RV32V-LABEL: mgather_baseidx_zext_v8i32_v8f64: 13017; RV32V: # %bb.0: 13018; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 13019; RV32V-NEXT: vsll.vi v8, v8, 3 13020; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 13021; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 13022; RV32V-NEXT: vmv.v.v v8, v12 13023; RV32V-NEXT: ret 13024; 13025; RV64V-LABEL: mgather_baseidx_zext_v8i32_v8f64: 13026; RV64V: # %bb.0: 13027; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 13028; RV64V-NEXT: vzext.vf2 v16, v8 13029; RV64V-NEXT: vsll.vi v8, v16, 3 13030; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t 13031; RV64V-NEXT: vmv.v.v v8, v12 13032; RV64V-NEXT: ret 13033; 13034; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8f64: 13035; RV32ZVE32F: # %bb.0: 13036; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 13037; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 13038; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 13039; RV32ZVE32F-NEXT: vmv.x.s a2, v0 13040; RV32ZVE32F-NEXT: andi a3, a2, 1 13041; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 13042; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 13043; RV32ZVE32F-NEXT: bnez a3, .LBB105_10 13044; RV32ZVE32F-NEXT: # %bb.1: # %else 13045; RV32ZVE32F-NEXT: andi a1, a2, 2 13046; RV32ZVE32F-NEXT: bnez a1, .LBB105_11 13047; RV32ZVE32F-NEXT: .LBB105_2: # %else2 13048; RV32ZVE32F-NEXT: andi a1, a2, 4 13049; RV32ZVE32F-NEXT: bnez a1, .LBB105_12 13050; RV32ZVE32F-NEXT: .LBB105_3: # %else5 13051; RV32ZVE32F-NEXT: andi a1, a2, 8 13052; RV32ZVE32F-NEXT: bnez a1, .LBB105_13 13053; RV32ZVE32F-NEXT: .LBB105_4: # %else8 13054; RV32ZVE32F-NEXT: andi a1, a2, 16 13055; RV32ZVE32F-NEXT: bnez a1, .LBB105_14 13056; RV32ZVE32F-NEXT: .LBB105_5: # %else11 13057; RV32ZVE32F-NEXT: andi a1, a2, 32 13058; RV32ZVE32F-NEXT: bnez a1, .LBB105_15 13059; RV32ZVE32F-NEXT: .LBB105_6: # %else14 13060; RV32ZVE32F-NEXT: andi a1, a2, 64 13061; RV32ZVE32F-NEXT: bnez a1, .LBB105_16 13062; RV32ZVE32F-NEXT: .LBB105_7: # %else17 13063; RV32ZVE32F-NEXT: andi a1, a2, -128 13064; RV32ZVE32F-NEXT: beqz a1, .LBB105_9 13065; RV32ZVE32F-NEXT: .LBB105_8: # %cond.load19 13066; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 13067; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 13068; RV32ZVE32F-NEXT: vmv.x.s a1, v8 13069; RV32ZVE32F-NEXT: fld fa7, 0(a1) 13070; RV32ZVE32F-NEXT: .LBB105_9: # %else20 13071; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 13072; RV32ZVE32F-NEXT: fsd fa1, 8(a0) 13073; RV32ZVE32F-NEXT: fsd fa2, 16(a0) 13074; RV32ZVE32F-NEXT: fsd fa3, 24(a0) 13075; RV32ZVE32F-NEXT: fsd fa4, 32(a0) 13076; RV32ZVE32F-NEXT: fsd fa5, 40(a0) 13077; RV32ZVE32F-NEXT: fsd fa6, 48(a0) 13078; RV32ZVE32F-NEXT: fsd fa7, 56(a0) 13079; RV32ZVE32F-NEXT: ret 13080; RV32ZVE32F-NEXT: .LBB105_10: # %cond.load 13081; RV32ZVE32F-NEXT: vmv.x.s a1, v8 13082; RV32ZVE32F-NEXT: fld fa0, 0(a1) 13083; RV32ZVE32F-NEXT: andi a1, a2, 2 13084; RV32ZVE32F-NEXT: beqz a1, .LBB105_2 13085; RV32ZVE32F-NEXT: .LBB105_11: # %cond.load1 13086; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 13087; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 13088; RV32ZVE32F-NEXT: vmv.x.s a1, v10 13089; RV32ZVE32F-NEXT: fld fa1, 0(a1) 13090; RV32ZVE32F-NEXT: andi a1, a2, 4 13091; RV32ZVE32F-NEXT: beqz a1, .LBB105_3 13092; RV32ZVE32F-NEXT: .LBB105_12: # %cond.load4 13093; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 13094; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 13095; RV32ZVE32F-NEXT: vmv.x.s a1, v10 13096; RV32ZVE32F-NEXT: fld fa2, 0(a1) 13097; RV32ZVE32F-NEXT: andi a1, a2, 8 13098; RV32ZVE32F-NEXT: beqz a1, .LBB105_4 13099; RV32ZVE32F-NEXT: .LBB105_13: # %cond.load7 13100; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 13101; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 13102; RV32ZVE32F-NEXT: vmv.x.s a1, v10 13103; RV32ZVE32F-NEXT: fld fa3, 0(a1) 13104; RV32ZVE32F-NEXT: andi a1, a2, 16 13105; RV32ZVE32F-NEXT: beqz a1, .LBB105_5 13106; RV32ZVE32F-NEXT: .LBB105_14: # %cond.load10 13107; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 13108; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 13109; RV32ZVE32F-NEXT: vmv.x.s a1, v10 13110; RV32ZVE32F-NEXT: fld fa4, 0(a1) 13111; RV32ZVE32F-NEXT: andi a1, a2, 32 13112; RV32ZVE32F-NEXT: beqz a1, .LBB105_6 13113; RV32ZVE32F-NEXT: .LBB105_15: # %cond.load13 13114; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 13115; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 13116; RV32ZVE32F-NEXT: vmv.x.s a1, v10 13117; RV32ZVE32F-NEXT: fld fa5, 0(a1) 13118; RV32ZVE32F-NEXT: andi a1, a2, 64 13119; RV32ZVE32F-NEXT: beqz a1, .LBB105_7 13120; RV32ZVE32F-NEXT: .LBB105_16: # %cond.load16 13121; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 13122; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 13123; RV32ZVE32F-NEXT: vmv.x.s a1, v10 13124; RV32ZVE32F-NEXT: fld fa6, 0(a1) 13125; RV32ZVE32F-NEXT: andi a1, a2, -128 13126; RV32ZVE32F-NEXT: bnez a1, .LBB105_8 13127; RV32ZVE32F-NEXT: j .LBB105_9 13128; 13129; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8f64: 13130; RV64ZVE32F: # %bb.0: 13131; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 13132; RV64ZVE32F-NEXT: vmv.x.s a2, v0 13133; RV64ZVE32F-NEXT: andi a3, a2, 1 13134; RV64ZVE32F-NEXT: beqz a3, .LBB105_2 13135; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 13136; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma 13137; RV64ZVE32F-NEXT: vmv.x.s a3, v8 13138; RV64ZVE32F-NEXT: slli a3, a3, 32 13139; RV64ZVE32F-NEXT: srli a3, a3, 29 13140; RV64ZVE32F-NEXT: add a3, a1, a3 13141; RV64ZVE32F-NEXT: fld fa0, 0(a3) 13142; RV64ZVE32F-NEXT: .LBB105_2: # %else 13143; RV64ZVE32F-NEXT: andi a3, a2, 2 13144; RV64ZVE32F-NEXT: beqz a3, .LBB105_4 13145; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 13146; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 13147; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 13148; RV64ZVE32F-NEXT: vmv.x.s a3, v10 13149; RV64ZVE32F-NEXT: slli a3, a3, 32 13150; RV64ZVE32F-NEXT: srli a3, a3, 29 13151; RV64ZVE32F-NEXT: add a3, a1, a3 13152; RV64ZVE32F-NEXT: fld fa1, 0(a3) 13153; RV64ZVE32F-NEXT: .LBB105_4: # %else2 13154; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma 13155; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 13156; RV64ZVE32F-NEXT: andi a3, a2, 4 13157; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma 13158; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 13159; RV64ZVE32F-NEXT: bnez a3, .LBB105_14 13160; RV64ZVE32F-NEXT: # %bb.5: # %else5 13161; RV64ZVE32F-NEXT: andi a3, a2, 8 13162; RV64ZVE32F-NEXT: bnez a3, .LBB105_15 13163; RV64ZVE32F-NEXT: .LBB105_6: # %else8 13164; RV64ZVE32F-NEXT: andi a3, a2, 16 13165; RV64ZVE32F-NEXT: bnez a3, .LBB105_16 13166; RV64ZVE32F-NEXT: .LBB105_7: # %else11 13167; RV64ZVE32F-NEXT: andi a3, a2, 32 13168; RV64ZVE32F-NEXT: beqz a3, .LBB105_9 13169; RV64ZVE32F-NEXT: .LBB105_8: # %cond.load13 13170; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 13171; RV64ZVE32F-NEXT: vmv.x.s a3, v8 13172; RV64ZVE32F-NEXT: slli a3, a3, 32 13173; RV64ZVE32F-NEXT: srli a3, a3, 29 13174; RV64ZVE32F-NEXT: add a3, a1, a3 13175; RV64ZVE32F-NEXT: fld fa5, 0(a3) 13176; RV64ZVE32F-NEXT: .LBB105_9: # %else14 13177; RV64ZVE32F-NEXT: andi a3, a2, 64 13178; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 13179; RV64ZVE32F-NEXT: beqz a3, .LBB105_11 13180; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 13181; RV64ZVE32F-NEXT: vmv.x.s a3, v8 13182; RV64ZVE32F-NEXT: slli a3, a3, 32 13183; RV64ZVE32F-NEXT: srli a3, a3, 29 13184; RV64ZVE32F-NEXT: add a3, a1, a3 13185; RV64ZVE32F-NEXT: fld fa6, 0(a3) 13186; RV64ZVE32F-NEXT: .LBB105_11: # %else17 13187; RV64ZVE32F-NEXT: andi a2, a2, -128 13188; RV64ZVE32F-NEXT: beqz a2, .LBB105_13 13189; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 13190; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 13191; RV64ZVE32F-NEXT: vmv.x.s a2, v8 13192; RV64ZVE32F-NEXT: slli a2, a2, 32 13193; RV64ZVE32F-NEXT: srli a2, a2, 29 13194; RV64ZVE32F-NEXT: add a1, a1, a2 13195; RV64ZVE32F-NEXT: fld fa7, 0(a1) 13196; RV64ZVE32F-NEXT: .LBB105_13: # %else20 13197; RV64ZVE32F-NEXT: fsd fa0, 0(a0) 13198; RV64ZVE32F-NEXT: fsd fa1, 8(a0) 13199; RV64ZVE32F-NEXT: fsd fa2, 16(a0) 13200; RV64ZVE32F-NEXT: fsd fa3, 24(a0) 13201; RV64ZVE32F-NEXT: fsd fa4, 32(a0) 13202; RV64ZVE32F-NEXT: fsd fa5, 40(a0) 13203; RV64ZVE32F-NEXT: fsd fa6, 48(a0) 13204; RV64ZVE32F-NEXT: fsd fa7, 56(a0) 13205; RV64ZVE32F-NEXT: ret 13206; RV64ZVE32F-NEXT: .LBB105_14: # %cond.load4 13207; RV64ZVE32F-NEXT: vmv.x.s a3, v8 13208; RV64ZVE32F-NEXT: slli a3, a3, 32 13209; RV64ZVE32F-NEXT: srli a3, a3, 29 13210; RV64ZVE32F-NEXT: add a3, a1, a3 13211; RV64ZVE32F-NEXT: fld fa2, 0(a3) 13212; RV64ZVE32F-NEXT: andi a3, a2, 8 13213; RV64ZVE32F-NEXT: beqz a3, .LBB105_6 13214; RV64ZVE32F-NEXT: .LBB105_15: # %cond.load7 13215; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 13216; RV64ZVE32F-NEXT: vmv.x.s a3, v8 13217; RV64ZVE32F-NEXT: slli a3, a3, 32 13218; RV64ZVE32F-NEXT: srli a3, a3, 29 13219; RV64ZVE32F-NEXT: add a3, a1, a3 13220; RV64ZVE32F-NEXT: fld fa3, 0(a3) 13221; RV64ZVE32F-NEXT: andi a3, a2, 16 13222; RV64ZVE32F-NEXT: beqz a3, .LBB105_7 13223; RV64ZVE32F-NEXT: .LBB105_16: # %cond.load10 13224; RV64ZVE32F-NEXT: vmv.x.s a3, v10 13225; RV64ZVE32F-NEXT: slli a3, a3, 32 13226; RV64ZVE32F-NEXT: srli a3, a3, 29 13227; RV64ZVE32F-NEXT: add a3, a1, a3 13228; RV64ZVE32F-NEXT: fld fa4, 0(a3) 13229; RV64ZVE32F-NEXT: andi a3, a2, 32 13230; RV64ZVE32F-NEXT: bnez a3, .LBB105_8 13231; RV64ZVE32F-NEXT: j .LBB105_9 13232 %eidxs = zext <8 x i32> %idxs to <8 x i64> 13233 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs 13234 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) 13235 ret <8 x double> %v 13236} 13237 13238define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, <8 x double> %passthru) { 13239; RV32V-LABEL: mgather_baseidx_v8f64: 13240; RV32V: # %bb.0: 13241; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma 13242; RV32V-NEXT: vnsrl.wi v16, v8, 0 13243; RV32V-NEXT: vsll.vi v8, v16, 3 13244; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu 13245; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t 13246; RV32V-NEXT: vmv.v.v v8, v12 13247; RV32V-NEXT: ret 13248; 13249; RV64V-LABEL: mgather_baseidx_v8f64: 13250; RV64V: # %bb.0: 13251; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu 13252; RV64V-NEXT: vsll.vi v8, v8, 3 13253; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t 13254; RV64V-NEXT: vmv.v.v v8, v12 13255; RV64V-NEXT: ret 13256; 13257; RV32ZVE32F-LABEL: mgather_baseidx_v8f64: 13258; RV32ZVE32F: # %bb.0: 13259; RV32ZVE32F-NEXT: lw a3, 32(a2) 13260; RV32ZVE32F-NEXT: lw a4, 40(a2) 13261; RV32ZVE32F-NEXT: lw a5, 48(a2) 13262; RV32ZVE32F-NEXT: lw a6, 56(a2) 13263; RV32ZVE32F-NEXT: lw a7, 0(a2) 13264; RV32ZVE32F-NEXT: lw t0, 8(a2) 13265; RV32ZVE32F-NEXT: lw t1, 16(a2) 13266; RV32ZVE32F-NEXT: lw t2, 24(a2) 13267; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma 13268; RV32ZVE32F-NEXT: vmv.v.x v8, a7 13269; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 13270; RV32ZVE32F-NEXT: vmv.x.s a2, v0 13271; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma 13272; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0 13273; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1 13274; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t2 13275; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3 13276; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 13277; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 13278; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 13279; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 13280; RV32ZVE32F-NEXT: andi a3, a2, 1 13281; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 13282; RV32ZVE32F-NEXT: bnez a3, .LBB106_10 13283; RV32ZVE32F-NEXT: # %bb.1: # %else 13284; RV32ZVE32F-NEXT: andi a1, a2, 2 13285; RV32ZVE32F-NEXT: bnez a1, .LBB106_11 13286; RV32ZVE32F-NEXT: .LBB106_2: # %else2 13287; RV32ZVE32F-NEXT: andi a1, a2, 4 13288; RV32ZVE32F-NEXT: bnez a1, .LBB106_12 13289; RV32ZVE32F-NEXT: .LBB106_3: # %else5 13290; RV32ZVE32F-NEXT: andi a1, a2, 8 13291; RV32ZVE32F-NEXT: bnez a1, .LBB106_13 13292; RV32ZVE32F-NEXT: .LBB106_4: # %else8 13293; RV32ZVE32F-NEXT: andi a1, a2, 16 13294; RV32ZVE32F-NEXT: bnez a1, .LBB106_14 13295; RV32ZVE32F-NEXT: .LBB106_5: # %else11 13296; RV32ZVE32F-NEXT: andi a1, a2, 32 13297; RV32ZVE32F-NEXT: bnez a1, .LBB106_15 13298; RV32ZVE32F-NEXT: .LBB106_6: # %else14 13299; RV32ZVE32F-NEXT: andi a1, a2, 64 13300; RV32ZVE32F-NEXT: bnez a1, .LBB106_16 13301; RV32ZVE32F-NEXT: .LBB106_7: # %else17 13302; RV32ZVE32F-NEXT: andi a1, a2, -128 13303; RV32ZVE32F-NEXT: beqz a1, .LBB106_9 13304; RV32ZVE32F-NEXT: .LBB106_8: # %cond.load19 13305; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 13306; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 13307; RV32ZVE32F-NEXT: vmv.x.s a1, v8 13308; RV32ZVE32F-NEXT: fld fa7, 0(a1) 13309; RV32ZVE32F-NEXT: .LBB106_9: # %else20 13310; RV32ZVE32F-NEXT: fsd fa0, 0(a0) 13311; RV32ZVE32F-NEXT: fsd fa1, 8(a0) 13312; RV32ZVE32F-NEXT: fsd fa2, 16(a0) 13313; RV32ZVE32F-NEXT: fsd fa3, 24(a0) 13314; RV32ZVE32F-NEXT: fsd fa4, 32(a0) 13315; RV32ZVE32F-NEXT: fsd fa5, 40(a0) 13316; RV32ZVE32F-NEXT: fsd fa6, 48(a0) 13317; RV32ZVE32F-NEXT: fsd fa7, 56(a0) 13318; RV32ZVE32F-NEXT: ret 13319; RV32ZVE32F-NEXT: .LBB106_10: # %cond.load 13320; RV32ZVE32F-NEXT: vmv.x.s a1, v8 13321; RV32ZVE32F-NEXT: fld fa0, 0(a1) 13322; RV32ZVE32F-NEXT: andi a1, a2, 2 13323; RV32ZVE32F-NEXT: beqz a1, .LBB106_2 13324; RV32ZVE32F-NEXT: .LBB106_11: # %cond.load1 13325; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 13326; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 13327; RV32ZVE32F-NEXT: vmv.x.s a1, v10 13328; RV32ZVE32F-NEXT: fld fa1, 0(a1) 13329; RV32ZVE32F-NEXT: andi a1, a2, 4 13330; RV32ZVE32F-NEXT: beqz a1, .LBB106_3 13331; RV32ZVE32F-NEXT: .LBB106_12: # %cond.load4 13332; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 13333; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 13334; RV32ZVE32F-NEXT: vmv.x.s a1, v10 13335; RV32ZVE32F-NEXT: fld fa2, 0(a1) 13336; RV32ZVE32F-NEXT: andi a1, a2, 8 13337; RV32ZVE32F-NEXT: beqz a1, .LBB106_4 13338; RV32ZVE32F-NEXT: .LBB106_13: # %cond.load7 13339; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 13340; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 13341; RV32ZVE32F-NEXT: vmv.x.s a1, v10 13342; RV32ZVE32F-NEXT: fld fa3, 0(a1) 13343; RV32ZVE32F-NEXT: andi a1, a2, 16 13344; RV32ZVE32F-NEXT: beqz a1, .LBB106_5 13345; RV32ZVE32F-NEXT: .LBB106_14: # %cond.load10 13346; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 13347; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 13348; RV32ZVE32F-NEXT: vmv.x.s a1, v10 13349; RV32ZVE32F-NEXT: fld fa4, 0(a1) 13350; RV32ZVE32F-NEXT: andi a1, a2, 32 13351; RV32ZVE32F-NEXT: beqz a1, .LBB106_6 13352; RV32ZVE32F-NEXT: .LBB106_15: # %cond.load13 13353; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 13354; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 13355; RV32ZVE32F-NEXT: vmv.x.s a1, v10 13356; RV32ZVE32F-NEXT: fld fa5, 0(a1) 13357; RV32ZVE32F-NEXT: andi a1, a2, 64 13358; RV32ZVE32F-NEXT: beqz a1, .LBB106_7 13359; RV32ZVE32F-NEXT: .LBB106_16: # %cond.load16 13360; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 13361; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 13362; RV32ZVE32F-NEXT: vmv.x.s a1, v10 13363; RV32ZVE32F-NEXT: fld fa6, 0(a1) 13364; RV32ZVE32F-NEXT: andi a1, a2, -128 13365; RV32ZVE32F-NEXT: bnez a1, .LBB106_8 13366; RV32ZVE32F-NEXT: j .LBB106_9 13367; 13368; RV64ZVE32F-LABEL: mgather_baseidx_v8f64: 13369; RV64ZVE32F: # %bb.0: 13370; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 13371; RV64ZVE32F-NEXT: vmv.x.s a3, v0 13372; RV64ZVE32F-NEXT: andi a4, a3, 1 13373; RV64ZVE32F-NEXT: bnez a4, .LBB106_10 13374; RV64ZVE32F-NEXT: # %bb.1: # %else 13375; RV64ZVE32F-NEXT: andi a4, a3, 2 13376; RV64ZVE32F-NEXT: bnez a4, .LBB106_11 13377; RV64ZVE32F-NEXT: .LBB106_2: # %else2 13378; RV64ZVE32F-NEXT: andi a4, a3, 4 13379; RV64ZVE32F-NEXT: bnez a4, .LBB106_12 13380; RV64ZVE32F-NEXT: .LBB106_3: # %else5 13381; RV64ZVE32F-NEXT: andi a4, a3, 8 13382; RV64ZVE32F-NEXT: bnez a4, .LBB106_13 13383; RV64ZVE32F-NEXT: .LBB106_4: # %else8 13384; RV64ZVE32F-NEXT: andi a4, a3, 16 13385; RV64ZVE32F-NEXT: bnez a4, .LBB106_14 13386; RV64ZVE32F-NEXT: .LBB106_5: # %else11 13387; RV64ZVE32F-NEXT: andi a4, a3, 32 13388; RV64ZVE32F-NEXT: bnez a4, .LBB106_15 13389; RV64ZVE32F-NEXT: .LBB106_6: # %else14 13390; RV64ZVE32F-NEXT: andi a4, a3, 64 13391; RV64ZVE32F-NEXT: bnez a4, .LBB106_16 13392; RV64ZVE32F-NEXT: .LBB106_7: # %else17 13393; RV64ZVE32F-NEXT: andi a3, a3, -128 13394; RV64ZVE32F-NEXT: beqz a3, .LBB106_9 13395; RV64ZVE32F-NEXT: .LBB106_8: # %cond.load19 13396; RV64ZVE32F-NEXT: ld a2, 56(a2) 13397; RV64ZVE32F-NEXT: slli a2, a2, 3 13398; RV64ZVE32F-NEXT: add a1, a1, a2 13399; RV64ZVE32F-NEXT: fld fa7, 0(a1) 13400; RV64ZVE32F-NEXT: .LBB106_9: # %else20 13401; RV64ZVE32F-NEXT: fsd fa0, 0(a0) 13402; RV64ZVE32F-NEXT: fsd fa1, 8(a0) 13403; RV64ZVE32F-NEXT: fsd fa2, 16(a0) 13404; RV64ZVE32F-NEXT: fsd fa3, 24(a0) 13405; RV64ZVE32F-NEXT: fsd fa4, 32(a0) 13406; RV64ZVE32F-NEXT: fsd fa5, 40(a0) 13407; RV64ZVE32F-NEXT: fsd fa6, 48(a0) 13408; RV64ZVE32F-NEXT: fsd fa7, 56(a0) 13409; RV64ZVE32F-NEXT: ret 13410; RV64ZVE32F-NEXT: .LBB106_10: # %cond.load 13411; RV64ZVE32F-NEXT: ld a4, 0(a2) 13412; RV64ZVE32F-NEXT: slli a4, a4, 3 13413; RV64ZVE32F-NEXT: add a4, a1, a4 13414; RV64ZVE32F-NEXT: fld fa0, 0(a4) 13415; RV64ZVE32F-NEXT: andi a4, a3, 2 13416; RV64ZVE32F-NEXT: beqz a4, .LBB106_2 13417; RV64ZVE32F-NEXT: .LBB106_11: # %cond.load1 13418; RV64ZVE32F-NEXT: ld a4, 8(a2) 13419; RV64ZVE32F-NEXT: slli a4, a4, 3 13420; RV64ZVE32F-NEXT: add a4, a1, a4 13421; RV64ZVE32F-NEXT: fld fa1, 0(a4) 13422; RV64ZVE32F-NEXT: andi a4, a3, 4 13423; RV64ZVE32F-NEXT: beqz a4, .LBB106_3 13424; RV64ZVE32F-NEXT: .LBB106_12: # %cond.load4 13425; RV64ZVE32F-NEXT: ld a4, 16(a2) 13426; RV64ZVE32F-NEXT: slli a4, a4, 3 13427; RV64ZVE32F-NEXT: add a4, a1, a4 13428; RV64ZVE32F-NEXT: fld fa2, 0(a4) 13429; RV64ZVE32F-NEXT: andi a4, a3, 8 13430; RV64ZVE32F-NEXT: beqz a4, .LBB106_4 13431; RV64ZVE32F-NEXT: .LBB106_13: # %cond.load7 13432; RV64ZVE32F-NEXT: ld a4, 24(a2) 13433; RV64ZVE32F-NEXT: slli a4, a4, 3 13434; RV64ZVE32F-NEXT: add a4, a1, a4 13435; RV64ZVE32F-NEXT: fld fa3, 0(a4) 13436; RV64ZVE32F-NEXT: andi a4, a3, 16 13437; RV64ZVE32F-NEXT: beqz a4, .LBB106_5 13438; RV64ZVE32F-NEXT: .LBB106_14: # %cond.load10 13439; RV64ZVE32F-NEXT: ld a4, 32(a2) 13440; RV64ZVE32F-NEXT: slli a4, a4, 3 13441; RV64ZVE32F-NEXT: add a4, a1, a4 13442; RV64ZVE32F-NEXT: fld fa4, 0(a4) 13443; RV64ZVE32F-NEXT: andi a4, a3, 32 13444; RV64ZVE32F-NEXT: beqz a4, .LBB106_6 13445; RV64ZVE32F-NEXT: .LBB106_15: # %cond.load13 13446; RV64ZVE32F-NEXT: ld a4, 40(a2) 13447; RV64ZVE32F-NEXT: slli a4, a4, 3 13448; RV64ZVE32F-NEXT: add a4, a1, a4 13449; RV64ZVE32F-NEXT: fld fa5, 0(a4) 13450; RV64ZVE32F-NEXT: andi a4, a3, 64 13451; RV64ZVE32F-NEXT: beqz a4, .LBB106_7 13452; RV64ZVE32F-NEXT: .LBB106_16: # %cond.load16 13453; RV64ZVE32F-NEXT: ld a4, 48(a2) 13454; RV64ZVE32F-NEXT: slli a4, a4, 3 13455; RV64ZVE32F-NEXT: add a4, a1, a4 13456; RV64ZVE32F-NEXT: fld fa6, 0(a4) 13457; RV64ZVE32F-NEXT: andi a3, a3, -128 13458; RV64ZVE32F-NEXT: bnez a3, .LBB106_8 13459; RV64ZVE32F-NEXT: j .LBB106_9 13460 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs 13461 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru) 13462 ret <8 x double> %v 13463} 13464 13465declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>) 13466 13467define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m, <16 x i8> %passthru) { 13468; RV32-LABEL: mgather_baseidx_v16i8: 13469; RV32: # %bb.0: 13470; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 13471; RV32-NEXT: vsext.vf4 v12, v8 13472; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu 13473; RV32-NEXT: vluxei32.v v9, (a0), v12, v0.t 13474; RV32-NEXT: vmv.v.v v8, v9 13475; RV32-NEXT: ret 13476; 13477; RV64V-LABEL: mgather_baseidx_v16i8: 13478; RV64V: # %bb.0: 13479; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma 13480; RV64V-NEXT: vsext.vf8 v16, v8 13481; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu 13482; RV64V-NEXT: vluxei64.v v9, (a0), v16, v0.t 13483; RV64V-NEXT: vmv.v.v v8, v9 13484; RV64V-NEXT: ret 13485; 13486; RV64ZVE32F-LABEL: mgather_baseidx_v16i8: 13487; RV64ZVE32F: # %bb.0: 13488; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma 13489; RV64ZVE32F-NEXT: vmv.x.s a1, v0 13490; RV64ZVE32F-NEXT: andi a2, a1, 1 13491; RV64ZVE32F-NEXT: beqz a2, .LBB107_2 13492; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 13493; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, tu, ma 13494; RV64ZVE32F-NEXT: vmv.x.s a2, v8 13495; RV64ZVE32F-NEXT: add a2, a0, a2 13496; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13497; RV64ZVE32F-NEXT: vmv.s.x v9, a2 13498; RV64ZVE32F-NEXT: .LBB107_2: # %else 13499; RV64ZVE32F-NEXT: andi a2, a1, 2 13500; RV64ZVE32F-NEXT: beqz a2, .LBB107_4 13501; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 13502; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 13503; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 13504; RV64ZVE32F-NEXT: vmv.x.s a2, v10 13505; RV64ZVE32F-NEXT: add a2, a0, a2 13506; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13507; RV64ZVE32F-NEXT: vmv.s.x v10, a2 13508; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma 13509; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 13510; RV64ZVE32F-NEXT: .LBB107_4: # %else2 13511; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 13512; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 13513; RV64ZVE32F-NEXT: andi a2, a1, 4 13514; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 13515; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 13516; RV64ZVE32F-NEXT: bnez a2, .LBB107_25 13517; RV64ZVE32F-NEXT: # %bb.5: # %else5 13518; RV64ZVE32F-NEXT: andi a2, a1, 8 13519; RV64ZVE32F-NEXT: bnez a2, .LBB107_26 13520; RV64ZVE32F-NEXT: .LBB107_6: # %else8 13521; RV64ZVE32F-NEXT: andi a2, a1, 16 13522; RV64ZVE32F-NEXT: beqz a2, .LBB107_8 13523; RV64ZVE32F-NEXT: .LBB107_7: # %cond.load10 13524; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma 13525; RV64ZVE32F-NEXT: vmv.x.s a2, v10 13526; RV64ZVE32F-NEXT: add a2, a0, a2 13527; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13528; RV64ZVE32F-NEXT: vmv.s.x v11, a2 13529; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 4 13530; RV64ZVE32F-NEXT: .LBB107_8: # %else11 13531; RV64ZVE32F-NEXT: andi a2, a1, 32 13532; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma 13533; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8 13534; RV64ZVE32F-NEXT: beqz a2, .LBB107_10 13535; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13 13536; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 13537; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 13538; RV64ZVE32F-NEXT: vmv.x.s a2, v11 13539; RV64ZVE32F-NEXT: add a2, a0, a2 13540; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13541; RV64ZVE32F-NEXT: vmv.s.x v11, a2 13542; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma 13543; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 5 13544; RV64ZVE32F-NEXT: .LBB107_10: # %else14 13545; RV64ZVE32F-NEXT: andi a2, a1, 64 13546; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 13547; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 13548; RV64ZVE32F-NEXT: bnez a2, .LBB107_27 13549; RV64ZVE32F-NEXT: # %bb.11: # %else17 13550; RV64ZVE32F-NEXT: andi a2, a1, 128 13551; RV64ZVE32F-NEXT: bnez a2, .LBB107_28 13552; RV64ZVE32F-NEXT: .LBB107_12: # %else20 13553; RV64ZVE32F-NEXT: andi a2, a1, 256 13554; RV64ZVE32F-NEXT: bnez a2, .LBB107_29 13555; RV64ZVE32F-NEXT: .LBB107_13: # %else23 13556; RV64ZVE32F-NEXT: andi a2, a1, 512 13557; RV64ZVE32F-NEXT: beqz a2, .LBB107_15 13558; RV64ZVE32F-NEXT: .LBB107_14: # %cond.load25 13559; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 13560; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 13561; RV64ZVE32F-NEXT: vmv.x.s a2, v10 13562; RV64ZVE32F-NEXT: add a2, a0, a2 13563; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13564; RV64ZVE32F-NEXT: vmv.s.x v10, a2 13565; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m1, tu, ma 13566; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 9 13567; RV64ZVE32F-NEXT: .LBB107_15: # %else26 13568; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 13569; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 13570; RV64ZVE32F-NEXT: andi a2, a1, 1024 13571; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 13572; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 13573; RV64ZVE32F-NEXT: bnez a2, .LBB107_30 13574; RV64ZVE32F-NEXT: # %bb.16: # %else29 13575; RV64ZVE32F-NEXT: slli a2, a1, 52 13576; RV64ZVE32F-NEXT: bltz a2, .LBB107_31 13577; RV64ZVE32F-NEXT: .LBB107_17: # %else32 13578; RV64ZVE32F-NEXT: slli a2, a1, 51 13579; RV64ZVE32F-NEXT: bltz a2, .LBB107_32 13580; RV64ZVE32F-NEXT: .LBB107_18: # %else35 13581; RV64ZVE32F-NEXT: slli a2, a1, 50 13582; RV64ZVE32F-NEXT: bgez a2, .LBB107_20 13583; RV64ZVE32F-NEXT: .LBB107_19: # %cond.load37 13584; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 13585; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 13586; RV64ZVE32F-NEXT: vmv.x.s a2, v8 13587; RV64ZVE32F-NEXT: add a2, a0, a2 13588; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13589; RV64ZVE32F-NEXT: vmv.s.x v8, a2 13590; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma 13591; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 13 13592; RV64ZVE32F-NEXT: .LBB107_20: # %else38 13593; RV64ZVE32F-NEXT: slli a2, a1, 49 13594; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 13595; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 13596; RV64ZVE32F-NEXT: bgez a2, .LBB107_22 13597; RV64ZVE32F-NEXT: # %bb.21: # %cond.load40 13598; RV64ZVE32F-NEXT: vmv.x.s a2, v8 13599; RV64ZVE32F-NEXT: add a2, a0, a2 13600; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13601; RV64ZVE32F-NEXT: vmv.s.x v10, a2 13602; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma 13603; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 14 13604; RV64ZVE32F-NEXT: .LBB107_22: # %else41 13605; RV64ZVE32F-NEXT: lui a2, 1048568 13606; RV64ZVE32F-NEXT: and a1, a1, a2 13607; RV64ZVE32F-NEXT: beqz a1, .LBB107_24 13608; RV64ZVE32F-NEXT: # %bb.23: # %cond.load43 13609; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 13610; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 13611; RV64ZVE32F-NEXT: vmv.x.s a1, v8 13612; RV64ZVE32F-NEXT: add a0, a0, a1 13613; RV64ZVE32F-NEXT: lbu a0, 0(a0) 13614; RV64ZVE32F-NEXT: vmv.s.x v8, a0 13615; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, ma 13616; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 15 13617; RV64ZVE32F-NEXT: .LBB107_24: # %else44 13618; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 13619; RV64ZVE32F-NEXT: vmv1r.v v8, v9 13620; RV64ZVE32F-NEXT: ret 13621; RV64ZVE32F-NEXT: .LBB107_25: # %cond.load4 13622; RV64ZVE32F-NEXT: vmv.x.s a2, v11 13623; RV64ZVE32F-NEXT: add a2, a0, a2 13624; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13625; RV64ZVE32F-NEXT: vmv.s.x v12, a2 13626; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma 13627; RV64ZVE32F-NEXT: vslideup.vi v9, v12, 2 13628; RV64ZVE32F-NEXT: andi a2, a1, 8 13629; RV64ZVE32F-NEXT: beqz a2, .LBB107_6 13630; RV64ZVE32F-NEXT: .LBB107_26: # %cond.load7 13631; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 13632; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 13633; RV64ZVE32F-NEXT: vmv.x.s a2, v11 13634; RV64ZVE32F-NEXT: add a2, a0, a2 13635; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13636; RV64ZVE32F-NEXT: vmv.s.x v11, a2 13637; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma 13638; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 3 13639; RV64ZVE32F-NEXT: andi a2, a1, 16 13640; RV64ZVE32F-NEXT: bnez a2, .LBB107_7 13641; RV64ZVE32F-NEXT: j .LBB107_8 13642; RV64ZVE32F-NEXT: .LBB107_27: # %cond.load16 13643; RV64ZVE32F-NEXT: vmv.x.s a2, v10 13644; RV64ZVE32F-NEXT: add a2, a0, a2 13645; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13646; RV64ZVE32F-NEXT: vmv.s.x v11, a2 13647; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma 13648; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 6 13649; RV64ZVE32F-NEXT: andi a2, a1, 128 13650; RV64ZVE32F-NEXT: beqz a2, .LBB107_12 13651; RV64ZVE32F-NEXT: .LBB107_28: # %cond.load19 13652; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 13653; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 13654; RV64ZVE32F-NEXT: vmv.x.s a2, v10 13655; RV64ZVE32F-NEXT: add a2, a0, a2 13656; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13657; RV64ZVE32F-NEXT: vmv.s.x v10, a2 13658; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma 13659; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 7 13660; RV64ZVE32F-NEXT: andi a2, a1, 256 13661; RV64ZVE32F-NEXT: beqz a2, .LBB107_13 13662; RV64ZVE32F-NEXT: .LBB107_29: # %cond.load22 13663; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma 13664; RV64ZVE32F-NEXT: vmv.x.s a2, v8 13665; RV64ZVE32F-NEXT: add a2, a0, a2 13666; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13667; RV64ZVE32F-NEXT: vmv.s.x v10, a2 13668; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 8 13669; RV64ZVE32F-NEXT: andi a2, a1, 512 13670; RV64ZVE32F-NEXT: bnez a2, .LBB107_14 13671; RV64ZVE32F-NEXT: j .LBB107_15 13672; RV64ZVE32F-NEXT: .LBB107_30: # %cond.load28 13673; RV64ZVE32F-NEXT: vmv.x.s a2, v8 13674; RV64ZVE32F-NEXT: add a2, a0, a2 13675; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13676; RV64ZVE32F-NEXT: vmv.s.x v11, a2 13677; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma 13678; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 10 13679; RV64ZVE32F-NEXT: slli a2, a1, 52 13680; RV64ZVE32F-NEXT: bgez a2, .LBB107_17 13681; RV64ZVE32F-NEXT: .LBB107_31: # %cond.load31 13682; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 13683; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 13684; RV64ZVE32F-NEXT: vmv.x.s a2, v8 13685; RV64ZVE32F-NEXT: add a2, a0, a2 13686; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13687; RV64ZVE32F-NEXT: vmv.s.x v8, a2 13688; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma 13689; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 11 13690; RV64ZVE32F-NEXT: slli a2, a1, 51 13691; RV64ZVE32F-NEXT: bgez a2, .LBB107_18 13692; RV64ZVE32F-NEXT: .LBB107_32: # %cond.load34 13693; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma 13694; RV64ZVE32F-NEXT: vmv.x.s a2, v10 13695; RV64ZVE32F-NEXT: add a2, a0, a2 13696; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13697; RV64ZVE32F-NEXT: vmv.s.x v8, a2 13698; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 12 13699; RV64ZVE32F-NEXT: slli a2, a1, 50 13700; RV64ZVE32F-NEXT: bltz a2, .LBB107_19 13701; RV64ZVE32F-NEXT: j .LBB107_20 13702 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs 13703 %v = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 2, <16 x i1> %m, <16 x i8> %passthru) 13704 ret <16 x i8> %v 13705} 13706 13707declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>) 13708 13709define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m, <32 x i8> %passthru) { 13710; RV32-LABEL: mgather_baseidx_v32i8: 13711; RV32: # %bb.0: 13712; RV32-NEXT: li a1, 32 13713; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma 13714; RV32-NEXT: vsext.vf4 v16, v8 13715; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu 13716; RV32-NEXT: vluxei32.v v10, (a0), v16, v0.t 13717; RV32-NEXT: vmv.v.v v8, v10 13718; RV32-NEXT: ret 13719; 13720; RV64V-LABEL: mgather_baseidx_v32i8: 13721; RV64V: # %bb.0: 13722; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma 13723; RV64V-NEXT: vsext.vf8 v16, v8 13724; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma 13725; RV64V-NEXT: vslidedown.vi v12, v10, 16 13726; RV64V-NEXT: vslidedown.vi v14, v8, 16 13727; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 13728; RV64V-NEXT: vslidedown.vi v8, v0, 2 13729; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu 13730; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t 13731; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma 13732; RV64V-NEXT: vsext.vf8 v16, v14 13733; RV64V-NEXT: vmv1r.v v0, v8 13734; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu 13735; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t 13736; RV64V-NEXT: li a0, 32 13737; RV64V-NEXT: vsetvli zero, a0, e8, m2, ta, ma 13738; RV64V-NEXT: vslideup.vi v10, v12, 16 13739; RV64V-NEXT: vmv.v.v v8, v10 13740; RV64V-NEXT: ret 13741; 13742; RV64ZVE32F-LABEL: mgather_baseidx_v32i8: 13743; RV64ZVE32F: # %bb.0: 13744; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 13745; RV64ZVE32F-NEXT: vmv.x.s a1, v0 13746; RV64ZVE32F-NEXT: andi a2, a1, 1 13747; RV64ZVE32F-NEXT: beqz a2, .LBB108_2 13748; RV64ZVE32F-NEXT: # %bb.1: # %cond.load 13749; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, tu, ma 13750; RV64ZVE32F-NEXT: vmv.x.s a2, v8 13751; RV64ZVE32F-NEXT: add a2, a0, a2 13752; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13753; RV64ZVE32F-NEXT: vmv.s.x v10, a2 13754; RV64ZVE32F-NEXT: .LBB108_2: # %else 13755; RV64ZVE32F-NEXT: andi a2, a1, 2 13756; RV64ZVE32F-NEXT: beqz a2, .LBB108_4 13757; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 13758; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 13759; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 13760; RV64ZVE32F-NEXT: vmv.x.s a2, v12 13761; RV64ZVE32F-NEXT: add a2, a0, a2 13762; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13763; RV64ZVE32F-NEXT: vmv.s.x v12, a2 13764; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma 13765; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 13766; RV64ZVE32F-NEXT: .LBB108_4: # %else2 13767; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 13768; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 4 13769; RV64ZVE32F-NEXT: andi a2, a1, 4 13770; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 13771; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 13772; RV64ZVE32F-NEXT: bnez a2, .LBB108_49 13773; RV64ZVE32F-NEXT: # %bb.5: # %else5 13774; RV64ZVE32F-NEXT: andi a2, a1, 8 13775; RV64ZVE32F-NEXT: bnez a2, .LBB108_50 13776; RV64ZVE32F-NEXT: .LBB108_6: # %else8 13777; RV64ZVE32F-NEXT: andi a2, a1, 16 13778; RV64ZVE32F-NEXT: beqz a2, .LBB108_8 13779; RV64ZVE32F-NEXT: .LBB108_7: # %cond.load10 13780; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma 13781; RV64ZVE32F-NEXT: vmv.x.s a2, v13 13782; RV64ZVE32F-NEXT: add a2, a0, a2 13783; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13784; RV64ZVE32F-NEXT: vmv.s.x v12, a2 13785; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 13786; RV64ZVE32F-NEXT: .LBB108_8: # %else11 13787; RV64ZVE32F-NEXT: andi a2, a1, 32 13788; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma 13789; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 8 13790; RV64ZVE32F-NEXT: beqz a2, .LBB108_10 13791; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13 13792; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 13793; RV64ZVE32F-NEXT: vslidedown.vi v14, v13, 1 13794; RV64ZVE32F-NEXT: vmv.x.s a2, v14 13795; RV64ZVE32F-NEXT: add a2, a0, a2 13796; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13797; RV64ZVE32F-NEXT: vmv.s.x v14, a2 13798; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma 13799; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 5 13800; RV64ZVE32F-NEXT: .LBB108_10: # %else14 13801; RV64ZVE32F-NEXT: andi a2, a1, 64 13802; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 13803; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2 13804; RV64ZVE32F-NEXT: bnez a2, .LBB108_51 13805; RV64ZVE32F-NEXT: # %bb.11: # %else17 13806; RV64ZVE32F-NEXT: andi a2, a1, 128 13807; RV64ZVE32F-NEXT: bnez a2, .LBB108_52 13808; RV64ZVE32F-NEXT: .LBB108_12: # %else20 13809; RV64ZVE32F-NEXT: andi a2, a1, 256 13810; RV64ZVE32F-NEXT: bnez a2, .LBB108_53 13811; RV64ZVE32F-NEXT: .LBB108_13: # %else23 13812; RV64ZVE32F-NEXT: andi a2, a1, 512 13813; RV64ZVE32F-NEXT: beqz a2, .LBB108_15 13814; RV64ZVE32F-NEXT: .LBB108_14: # %cond.load25 13815; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 13816; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1 13817; RV64ZVE32F-NEXT: vmv.x.s a2, v13 13818; RV64ZVE32F-NEXT: add a2, a0, a2 13819; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13820; RV64ZVE32F-NEXT: vmv.s.x v13, a2 13821; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m1, tu, ma 13822; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 9 13823; RV64ZVE32F-NEXT: .LBB108_15: # %else26 13824; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 13825; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4 13826; RV64ZVE32F-NEXT: andi a2, a1, 1024 13827; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 13828; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2 13829; RV64ZVE32F-NEXT: beqz a2, .LBB108_17 13830; RV64ZVE32F-NEXT: # %bb.16: # %cond.load28 13831; RV64ZVE32F-NEXT: vmv.x.s a2, v12 13832; RV64ZVE32F-NEXT: add a2, a0, a2 13833; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13834; RV64ZVE32F-NEXT: vmv.s.x v14, a2 13835; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma 13836; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 10 13837; RV64ZVE32F-NEXT: .LBB108_17: # %else29 13838; RV64ZVE32F-NEXT: slli a2, a1, 52 13839; RV64ZVE32F-NEXT: bgez a2, .LBB108_19 13840; RV64ZVE32F-NEXT: # %bb.18: # %cond.load31 13841; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 13842; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 13843; RV64ZVE32F-NEXT: vmv.x.s a2, v12 13844; RV64ZVE32F-NEXT: add a2, a0, a2 13845; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13846; RV64ZVE32F-NEXT: vmv.s.x v12, a2 13847; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma 13848; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 11 13849; RV64ZVE32F-NEXT: .LBB108_19: # %else32 13850; RV64ZVE32F-NEXT: slli a2, a1, 51 13851; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma 13852; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 16 13853; RV64ZVE32F-NEXT: bgez a2, .LBB108_21 13854; RV64ZVE32F-NEXT: # %bb.20: # %cond.load34 13855; RV64ZVE32F-NEXT: vmv.x.s a2, v13 13856; RV64ZVE32F-NEXT: add a2, a0, a2 13857; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13858; RV64ZVE32F-NEXT: vmv.s.x v9, a2 13859; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma 13860; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 12 13861; RV64ZVE32F-NEXT: .LBB108_21: # %else35 13862; RV64ZVE32F-NEXT: slli a2, a1, 50 13863; RV64ZVE32F-NEXT: bgez a2, .LBB108_23 13864; RV64ZVE32F-NEXT: # %bb.22: # %cond.load37 13865; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 13866; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 1 13867; RV64ZVE32F-NEXT: vmv.x.s a2, v9 13868; RV64ZVE32F-NEXT: add a2, a0, a2 13869; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13870; RV64ZVE32F-NEXT: vmv.s.x v9, a2 13871; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma 13872; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 13 13873; RV64ZVE32F-NEXT: .LBB108_23: # %else38 13874; RV64ZVE32F-NEXT: slli a2, a1, 49 13875; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 13876; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 2 13877; RV64ZVE32F-NEXT: bltz a2, .LBB108_54 13878; RV64ZVE32F-NEXT: # %bb.24: # %else41 13879; RV64ZVE32F-NEXT: slli a2, a1, 48 13880; RV64ZVE32F-NEXT: bltz a2, .LBB108_55 13881; RV64ZVE32F-NEXT: .LBB108_25: # %else44 13882; RV64ZVE32F-NEXT: slli a2, a1, 47 13883; RV64ZVE32F-NEXT: bltz a2, .LBB108_56 13884; RV64ZVE32F-NEXT: .LBB108_26: # %else47 13885; RV64ZVE32F-NEXT: slli a2, a1, 46 13886; RV64ZVE32F-NEXT: bgez a2, .LBB108_28 13887; RV64ZVE32F-NEXT: .LBB108_27: # %cond.load49 13888; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 13889; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 13890; RV64ZVE32F-NEXT: vmv.x.s a2, v9 13891; RV64ZVE32F-NEXT: add a2, a0, a2 13892; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13893; RV64ZVE32F-NEXT: vmv.s.x v12, a2 13894; RV64ZVE32F-NEXT: vsetivli zero, 18, e8, m2, tu, ma 13895; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 17 13896; RV64ZVE32F-NEXT: .LBB108_28: # %else50 13897; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 13898; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 13899; RV64ZVE32F-NEXT: slli a2, a1, 45 13900; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 13901; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 13902; RV64ZVE32F-NEXT: bltz a2, .LBB108_57 13903; RV64ZVE32F-NEXT: # %bb.29: # %else53 13904; RV64ZVE32F-NEXT: slli a2, a1, 44 13905; RV64ZVE32F-NEXT: bltz a2, .LBB108_58 13906; RV64ZVE32F-NEXT: .LBB108_30: # %else56 13907; RV64ZVE32F-NEXT: slli a2, a1, 43 13908; RV64ZVE32F-NEXT: bgez a2, .LBB108_32 13909; RV64ZVE32F-NEXT: .LBB108_31: # %cond.load58 13910; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, ma 13911; RV64ZVE32F-NEXT: vmv.x.s a2, v9 13912; RV64ZVE32F-NEXT: add a2, a0, a2 13913; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13914; RV64ZVE32F-NEXT: vmv.s.x v12, a2 13915; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 20 13916; RV64ZVE32F-NEXT: .LBB108_32: # %else59 13917; RV64ZVE32F-NEXT: slli a2, a1, 42 13918; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma 13919; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8 13920; RV64ZVE32F-NEXT: bgez a2, .LBB108_34 13921; RV64ZVE32F-NEXT: # %bb.33: # %cond.load61 13922; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 13923; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 1 13924; RV64ZVE32F-NEXT: vmv.x.s a2, v12 13925; RV64ZVE32F-NEXT: add a2, a0, a2 13926; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13927; RV64ZVE32F-NEXT: vmv.s.x v12, a2 13928; RV64ZVE32F-NEXT: vsetivli zero, 22, e8, m2, tu, ma 13929; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 21 13930; RV64ZVE32F-NEXT: .LBB108_34: # %else62 13931; RV64ZVE32F-NEXT: slli a2, a1, 41 13932; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 13933; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 13934; RV64ZVE32F-NEXT: bltz a2, .LBB108_59 13935; RV64ZVE32F-NEXT: # %bb.35: # %else65 13936; RV64ZVE32F-NEXT: slli a2, a1, 40 13937; RV64ZVE32F-NEXT: bltz a2, .LBB108_60 13938; RV64ZVE32F-NEXT: .LBB108_36: # %else68 13939; RV64ZVE32F-NEXT: slli a2, a1, 39 13940; RV64ZVE32F-NEXT: bltz a2, .LBB108_61 13941; RV64ZVE32F-NEXT: .LBB108_37: # %else71 13942; RV64ZVE32F-NEXT: slli a2, a1, 38 13943; RV64ZVE32F-NEXT: bgez a2, .LBB108_39 13944; RV64ZVE32F-NEXT: .LBB108_38: # %cond.load73 13945; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 13946; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 13947; RV64ZVE32F-NEXT: vmv.x.s a2, v9 13948; RV64ZVE32F-NEXT: add a2, a0, a2 13949; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13950; RV64ZVE32F-NEXT: vmv.s.x v12, a2 13951; RV64ZVE32F-NEXT: vsetivli zero, 26, e8, m2, tu, ma 13952; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 25 13953; RV64ZVE32F-NEXT: .LBB108_39: # %else74 13954; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 13955; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 13956; RV64ZVE32F-NEXT: slli a2, a1, 37 13957; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 13958; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 13959; RV64ZVE32F-NEXT: bltz a2, .LBB108_62 13960; RV64ZVE32F-NEXT: # %bb.40: # %else77 13961; RV64ZVE32F-NEXT: slli a2, a1, 36 13962; RV64ZVE32F-NEXT: bltz a2, .LBB108_63 13963; RV64ZVE32F-NEXT: .LBB108_41: # %else80 13964; RV64ZVE32F-NEXT: slli a2, a1, 35 13965; RV64ZVE32F-NEXT: bltz a2, .LBB108_64 13966; RV64ZVE32F-NEXT: .LBB108_42: # %else83 13967; RV64ZVE32F-NEXT: slli a2, a1, 34 13968; RV64ZVE32F-NEXT: bgez a2, .LBB108_44 13969; RV64ZVE32F-NEXT: .LBB108_43: # %cond.load85 13970; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 13971; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 13972; RV64ZVE32F-NEXT: vmv.x.s a2, v8 13973; RV64ZVE32F-NEXT: add a2, a0, a2 13974; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13975; RV64ZVE32F-NEXT: vmv.s.x v12, a2 13976; RV64ZVE32F-NEXT: vsetivli zero, 30, e8, m2, tu, ma 13977; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 29 13978; RV64ZVE32F-NEXT: .LBB108_44: # %else86 13979; RV64ZVE32F-NEXT: slli a2, a1, 33 13980; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 13981; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 13982; RV64ZVE32F-NEXT: bgez a2, .LBB108_46 13983; RV64ZVE32F-NEXT: # %bb.45: # %cond.load88 13984; RV64ZVE32F-NEXT: vmv.x.s a2, v8 13985; RV64ZVE32F-NEXT: add a2, a0, a2 13986; RV64ZVE32F-NEXT: lbu a2, 0(a2) 13987; RV64ZVE32F-NEXT: vmv.s.x v12, a2 13988; RV64ZVE32F-NEXT: vsetivli zero, 31, e8, m2, tu, ma 13989; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 30 13990; RV64ZVE32F-NEXT: .LBB108_46: # %else89 13991; RV64ZVE32F-NEXT: lui a2, 524288 13992; RV64ZVE32F-NEXT: and a1, a1, a2 13993; RV64ZVE32F-NEXT: beqz a1, .LBB108_48 13994; RV64ZVE32F-NEXT: # %bb.47: # %cond.load91 13995; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 13996; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 13997; RV64ZVE32F-NEXT: vmv.x.s a1, v8 13998; RV64ZVE32F-NEXT: add a0, a0, a1 13999; RV64ZVE32F-NEXT: lbu a0, 0(a0) 14000; RV64ZVE32F-NEXT: li a1, 32 14001; RV64ZVE32F-NEXT: vmv.s.x v8, a0 14002; RV64ZVE32F-NEXT: vsetvli zero, a1, e8, m2, ta, ma 14003; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 31 14004; RV64ZVE32F-NEXT: .LBB108_48: # %else92 14005; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma 14006; RV64ZVE32F-NEXT: vmv2r.v v8, v10 14007; RV64ZVE32F-NEXT: ret 14008; RV64ZVE32F-NEXT: .LBB108_49: # %cond.load4 14009; RV64ZVE32F-NEXT: vmv.x.s a2, v12 14010; RV64ZVE32F-NEXT: add a2, a0, a2 14011; RV64ZVE32F-NEXT: lbu a2, 0(a2) 14012; RV64ZVE32F-NEXT: vmv.s.x v14, a2 14013; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma 14014; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2 14015; RV64ZVE32F-NEXT: andi a2, a1, 8 14016; RV64ZVE32F-NEXT: beqz a2, .LBB108_6 14017; RV64ZVE32F-NEXT: .LBB108_50: # %cond.load7 14018; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 14019; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 14020; RV64ZVE32F-NEXT: vmv.x.s a2, v12 14021; RV64ZVE32F-NEXT: add a2, a0, a2 14022; RV64ZVE32F-NEXT: lbu a2, 0(a2) 14023; RV64ZVE32F-NEXT: vmv.s.x v12, a2 14024; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma 14025; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 14026; RV64ZVE32F-NEXT: andi a2, a1, 16 14027; RV64ZVE32F-NEXT: bnez a2, .LBB108_7 14028; RV64ZVE32F-NEXT: j .LBB108_8 14029; RV64ZVE32F-NEXT: .LBB108_51: # %cond.load16 14030; RV64ZVE32F-NEXT: vmv.x.s a2, v13 14031; RV64ZVE32F-NEXT: add a2, a0, a2 14032; RV64ZVE32F-NEXT: lbu a2, 0(a2) 14033; RV64ZVE32F-NEXT: vmv.s.x v14, a2 14034; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma 14035; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 6 14036; RV64ZVE32F-NEXT: andi a2, a1, 128 14037; RV64ZVE32F-NEXT: beqz a2, .LBB108_12 14038; RV64ZVE32F-NEXT: .LBB108_52: # %cond.load19 14039; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 14040; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 14041; RV64ZVE32F-NEXT: vmv.x.s a2, v13 14042; RV64ZVE32F-NEXT: add a2, a0, a2 14043; RV64ZVE32F-NEXT: lbu a2, 0(a2) 14044; RV64ZVE32F-NEXT: vmv.s.x v13, a2 14045; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma 14046; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 7 14047; RV64ZVE32F-NEXT: andi a2, a1, 256 14048; RV64ZVE32F-NEXT: beqz a2, .LBB108_13 14049; RV64ZVE32F-NEXT: .LBB108_53: # %cond.load22 14050; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma 14051; RV64ZVE32F-NEXT: vmv.x.s a2, v12 14052; RV64ZVE32F-NEXT: add a2, a0, a2 14053; RV64ZVE32F-NEXT: lbu a2, 0(a2) 14054; RV64ZVE32F-NEXT: vmv.s.x v13, a2 14055; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 8 14056; RV64ZVE32F-NEXT: andi a2, a1, 512 14057; RV64ZVE32F-NEXT: bnez a2, .LBB108_14 14058; RV64ZVE32F-NEXT: j .LBB108_15 14059; RV64ZVE32F-NEXT: .LBB108_54: # %cond.load40 14060; RV64ZVE32F-NEXT: vmv.x.s a2, v9 14061; RV64ZVE32F-NEXT: add a2, a0, a2 14062; RV64ZVE32F-NEXT: lbu a2, 0(a2) 14063; RV64ZVE32F-NEXT: vmv.s.x v12, a2 14064; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma 14065; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 14 14066; RV64ZVE32F-NEXT: slli a2, a1, 48 14067; RV64ZVE32F-NEXT: bgez a2, .LBB108_25 14068; RV64ZVE32F-NEXT: .LBB108_55: # %cond.load43 14069; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 14070; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 14071; RV64ZVE32F-NEXT: vmv.x.s a2, v9 14072; RV64ZVE32F-NEXT: add a2, a0, a2 14073; RV64ZVE32F-NEXT: lbu a2, 0(a2) 14074; RV64ZVE32F-NEXT: vmv.s.x v9, a2 14075; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, tu, ma 14076; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 15 14077; RV64ZVE32F-NEXT: slli a2, a1, 47 14078; RV64ZVE32F-NEXT: bgez a2, .LBB108_26 14079; RV64ZVE32F-NEXT: .LBB108_56: # %cond.load46 14080; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, ma 14081; RV64ZVE32F-NEXT: vmv.x.s a2, v8 14082; RV64ZVE32F-NEXT: add a2, a0, a2 14083; RV64ZVE32F-NEXT: lbu a2, 0(a2) 14084; RV64ZVE32F-NEXT: vmv.s.x v12, a2 14085; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16 14086; RV64ZVE32F-NEXT: slli a2, a1, 46 14087; RV64ZVE32F-NEXT: bltz a2, .LBB108_27 14088; RV64ZVE32F-NEXT: j .LBB108_28 14089; RV64ZVE32F-NEXT: .LBB108_57: # %cond.load52 14090; RV64ZVE32F-NEXT: vmv.x.s a2, v12 14091; RV64ZVE32F-NEXT: add a2, a0, a2 14092; RV64ZVE32F-NEXT: lbu a2, 0(a2) 14093; RV64ZVE32F-NEXT: vmv.s.x v14, a2 14094; RV64ZVE32F-NEXT: vsetivli zero, 19, e8, m2, tu, ma 14095; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 18 14096; RV64ZVE32F-NEXT: slli a2, a1, 44 14097; RV64ZVE32F-NEXT: bgez a2, .LBB108_30 14098; RV64ZVE32F-NEXT: .LBB108_58: # %cond.load55 14099; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 14100; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 14101; RV64ZVE32F-NEXT: vmv.x.s a2, v12 14102; RV64ZVE32F-NEXT: add a2, a0, a2 14103; RV64ZVE32F-NEXT: lbu a2, 0(a2) 14104; RV64ZVE32F-NEXT: vmv.s.x v12, a2 14105; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, ma 14106; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 19 14107; RV64ZVE32F-NEXT: slli a2, a1, 43 14108; RV64ZVE32F-NEXT: bltz a2, .LBB108_31 14109; RV64ZVE32F-NEXT: j .LBB108_32 14110; RV64ZVE32F-NEXT: .LBB108_59: # %cond.load64 14111; RV64ZVE32F-NEXT: vmv.x.s a2, v9 14112; RV64ZVE32F-NEXT: add a2, a0, a2 14113; RV64ZVE32F-NEXT: lbu a2, 0(a2) 14114; RV64ZVE32F-NEXT: vmv.s.x v12, a2 14115; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, ma 14116; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22 14117; RV64ZVE32F-NEXT: slli a2, a1, 40 14118; RV64ZVE32F-NEXT: bgez a2, .LBB108_36 14119; RV64ZVE32F-NEXT: .LBB108_60: # %cond.load67 14120; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 14121; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 14122; RV64ZVE32F-NEXT: vmv.x.s a2, v9 14123; RV64ZVE32F-NEXT: add a2, a0, a2 14124; RV64ZVE32F-NEXT: lbu a2, 0(a2) 14125; RV64ZVE32F-NEXT: vmv.s.x v12, a2 14126; RV64ZVE32F-NEXT: vsetivli zero, 24, e8, m2, tu, ma 14127; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 23 14128; RV64ZVE32F-NEXT: slli a2, a1, 39 14129; RV64ZVE32F-NEXT: bgez a2, .LBB108_37 14130; RV64ZVE32F-NEXT: .LBB108_61: # %cond.load70 14131; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, ma 14132; RV64ZVE32F-NEXT: vmv.x.s a2, v8 14133; RV64ZVE32F-NEXT: add a2, a0, a2 14134; RV64ZVE32F-NEXT: lbu a2, 0(a2) 14135; RV64ZVE32F-NEXT: vmv.s.x v12, a2 14136; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24 14137; RV64ZVE32F-NEXT: slli a2, a1, 38 14138; RV64ZVE32F-NEXT: bltz a2, .LBB108_38 14139; RV64ZVE32F-NEXT: j .LBB108_39 14140; RV64ZVE32F-NEXT: .LBB108_62: # %cond.load76 14141; RV64ZVE32F-NEXT: vmv.x.s a2, v8 14142; RV64ZVE32F-NEXT: add a2, a0, a2 14143; RV64ZVE32F-NEXT: lbu a2, 0(a2) 14144; RV64ZVE32F-NEXT: vmv.s.x v12, a2 14145; RV64ZVE32F-NEXT: vsetivli zero, 27, e8, m2, tu, ma 14146; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 26 14147; RV64ZVE32F-NEXT: slli a2, a1, 36 14148; RV64ZVE32F-NEXT: bgez a2, .LBB108_41 14149; RV64ZVE32F-NEXT: .LBB108_63: # %cond.load79 14150; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 14151; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 14152; RV64ZVE32F-NEXT: vmv.x.s a2, v8 14153; RV64ZVE32F-NEXT: add a2, a0, a2 14154; RV64ZVE32F-NEXT: lbu a2, 0(a2) 14155; RV64ZVE32F-NEXT: vmv.s.x v12, a2 14156; RV64ZVE32F-NEXT: vsetivli zero, 28, e8, m2, tu, ma 14157; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 27 14158; RV64ZVE32F-NEXT: slli a2, a1, 35 14159; RV64ZVE32F-NEXT: bgez a2, .LBB108_42 14160; RV64ZVE32F-NEXT: .LBB108_64: # %cond.load82 14161; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, ma 14162; RV64ZVE32F-NEXT: vmv.x.s a2, v9 14163; RV64ZVE32F-NEXT: add a2, a0, a2 14164; RV64ZVE32F-NEXT: lbu a2, 0(a2) 14165; RV64ZVE32F-NEXT: vmv.s.x v12, a2 14166; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28 14167; RV64ZVE32F-NEXT: slli a2, a1, 34 14168; RV64ZVE32F-NEXT: bltz a2, .LBB108_43 14169; RV64ZVE32F-NEXT: j .LBB108_44 14170 %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs 14171 %v = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %ptrs, i32 2, <32 x i1> %m, <32 x i8> %passthru) 14172 ret <32 x i8> %v 14173} 14174 14175 14176define <4 x i32> @mgather_broadcast_load_unmasked(ptr %base) { 14177; CHECK-LABEL: mgather_broadcast_load_unmasked: 14178; CHECK: # %bb.0: 14179; CHECK-NEXT: lw a0, 0(a0) 14180; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14181; CHECK-NEXT: vmv.v.x v8, a0 14182; CHECK-NEXT: ret 14183 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> zeroinitializer 14184 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) 14185 ret <4 x i32> %v 14186} 14187 14188; Same as previous, but use an explicit splat instead of splat-via-gep 14189define <4 x i32> @mgather_broadcast_load_unmasked2(ptr %base) { 14190; CHECK-LABEL: mgather_broadcast_load_unmasked2: 14191; CHECK: # %bb.0: 14192; CHECK-NEXT: lw a0, 0(a0) 14193; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14194; CHECK-NEXT: vmv.v.x v8, a0 14195; CHECK-NEXT: ret 14196 %ptrhead = insertelement <4 x ptr> poison, ptr %base, i32 0 14197 %ptrs = shufflevector <4 x ptr> %ptrhead, <4 x ptr> poison, <4 x i32> zeroinitializer 14198 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) 14199 ret <4 x i32> %v 14200} 14201 14202define <4 x i32> @mgather_broadcast_load_masked(ptr %base, <4 x i1> %m) { 14203; CHECK-LABEL: mgather_broadcast_load_masked: 14204; CHECK: # %bb.0: 14205; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14206; CHECK-NEXT: vlse32.v v8, (a0), zero, v0.t 14207; CHECK-NEXT: ret 14208 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> zeroinitializer 14209 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x i32> poison) 14210 ret <4 x i32> %v 14211} 14212 14213define <4 x i32> @mgather_unit_stride_load(ptr %base) { 14214; CHECK-LABEL: mgather_unit_stride_load: 14215; CHECK: # %bb.0: 14216; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14217; CHECK-NEXT: vle32.v v8, (a0) 14218; CHECK-NEXT: ret 14219 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14220 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) 14221 ret <4 x i32> %v 14222} 14223 14224define <4 x i32> @mgather_unit_stride_load_with_offset(ptr %base) { 14225; CHECK-LABEL: mgather_unit_stride_load_with_offset: 14226; CHECK: # %bb.0: 14227; CHECK-NEXT: addi a0, a0, 16 14228; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14229; CHECK-NEXT: vle32.v v8, (a0) 14230; CHECK-NEXT: ret 14231 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 14232 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) 14233 ret <4 x i32> %v 14234} 14235 14236define <4 x i32> @mgather_unit_stride_load_narrow_idx(ptr %base) { 14237; CHECK-LABEL: mgather_unit_stride_load_narrow_idx: 14238; CHECK: # %bb.0: 14239; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14240; CHECK-NEXT: vle32.v v8, (a0) 14241; CHECK-NEXT: ret 14242 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i8> <i8 0, i8 1, i8 2, i8 3> 14243 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) 14244 ret <4 x i32> %v 14245} 14246 14247define <4 x i32> @mgather_unit_stride_load_wide_idx(ptr %base) { 14248; CHECK-LABEL: mgather_unit_stride_load_wide_idx: 14249; CHECK: # %bb.0: 14250; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14251; CHECK-NEXT: vle32.v v8, (a0) 14252; CHECK-NEXT: ret 14253 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i128> <i128 0, i128 1, i128 2, i128 3> 14254 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) 14255 ret <4 x i32> %v 14256} 14257 14258; This looks like a strided load (at i8), but isn't at index type. 14259define <4 x i32> @mgather_narrow_edge_case(ptr %base) { 14260; RV32-LABEL: mgather_narrow_edge_case: 14261; RV32: # %bb.0: 14262; RV32-NEXT: li a1, -512 14263; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14264; RV32-NEXT: vmv.v.i v0, 5 14265; RV32-NEXT: vmv.v.x v8, a1 14266; RV32-NEXT: vmerge.vim v8, v8, 0, v0 14267; RV32-NEXT: vluxei32.v v8, (a0), v8 14268; RV32-NEXT: ret 14269; 14270; RV64V-LABEL: mgather_narrow_edge_case: 14271; RV64V: # %bb.0: 14272; RV64V-NEXT: li a1, -512 14273; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 14274; RV64V-NEXT: vmv.v.i v0, 5 14275; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma 14276; RV64V-NEXT: vmv.v.x v8, a1 14277; RV64V-NEXT: vmerge.vim v10, v8, 0, v0 14278; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma 14279; RV64V-NEXT: vluxei64.v v8, (a0), v10 14280; RV64V-NEXT: ret 14281; 14282; RV64ZVE32F-LABEL: mgather_narrow_edge_case: 14283; RV64ZVE32F: # %bb.0: 14284; RV64ZVE32F-NEXT: lw a1, -512(a0) 14285; RV64ZVE32F-NEXT: lw a0, 0(a0) 14286; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14287; RV64ZVE32F-NEXT: vmv.v.i v0, 5 14288; RV64ZVE32F-NEXT: vmv.v.x v8, a1 14289; RV64ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 14290; RV64ZVE32F-NEXT: ret 14291 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i8> <i8 0, i8 128, i8 0, i8 128> 14292 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) 14293 ret <4 x i32> %v 14294} 14295 14296define <8 x i16> @mgather_strided_unaligned(ptr %base) { 14297; RV32-LABEL: mgather_strided_unaligned: 14298; RV32: # %bb.0: 14299; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 14300; RV32-NEXT: vid.v v8 14301; RV32-NEXT: vsll.vi v8, v8, 2 14302; RV32-NEXT: vadd.vx v8, v8, a0 14303; RV32-NEXT: vmv.x.s a0, v8 14304; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 14305; RV32-NEXT: vslidedown.vi v10, v8, 1 14306; RV32-NEXT: vslidedown.vi v11, v8, 2 14307; RV32-NEXT: vmv.x.s a1, v10 14308; RV32-NEXT: vslidedown.vi v10, v8, 3 14309; RV32-NEXT: vmv.x.s a2, v11 14310; RV32-NEXT: vmv.x.s a3, v10 14311; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma 14312; RV32-NEXT: vslidedown.vi v10, v8, 4 14313; RV32-NEXT: vmv.x.s a4, v10 14314; RV32-NEXT: vslidedown.vi v10, v8, 5 14315; RV32-NEXT: vmv.x.s a5, v10 14316; RV32-NEXT: vslidedown.vi v10, v8, 6 14317; RV32-NEXT: vslidedown.vi v8, v8, 7 14318; RV32-NEXT: lbu a6, 0(a0) 14319; RV32-NEXT: lbu a0, 1(a0) 14320; RV32-NEXT: vmv.x.s a7, v10 14321; RV32-NEXT: vmv.x.s t0, v8 14322; RV32-NEXT: lbu t1, 0(a1) 14323; RV32-NEXT: lbu a1, 1(a1) 14324; RV32-NEXT: lbu t2, 0(a2) 14325; RV32-NEXT: lbu a2, 1(a2) 14326; RV32-NEXT: slli a0, a0, 8 14327; RV32-NEXT: or a0, a0, a6 14328; RV32-NEXT: lbu a6, 0(a3) 14329; RV32-NEXT: lbu a3, 1(a3) 14330; RV32-NEXT: slli a1, a1, 8 14331; RV32-NEXT: or a1, a1, t1 14332; RV32-NEXT: lbu t1, 0(a4) 14333; RV32-NEXT: lbu a4, 1(a4) 14334; RV32-NEXT: slli a2, a2, 8 14335; RV32-NEXT: or a2, a2, t2 14336; RV32-NEXT: lbu t2, 0(a5) 14337; RV32-NEXT: lbu a5, 1(a5) 14338; RV32-NEXT: slli a3, a3, 8 14339; RV32-NEXT: or a3, a3, a6 14340; RV32-NEXT: lbu a6, 0(a7) 14341; RV32-NEXT: lbu a7, 1(a7) 14342; RV32-NEXT: slli a4, a4, 8 14343; RV32-NEXT: or a4, a4, t1 14344; RV32-NEXT: lbu t1, 0(t0) 14345; RV32-NEXT: lbu t0, 1(t0) 14346; RV32-NEXT: slli a5, a5, 8 14347; RV32-NEXT: or a5, a5, t2 14348; RV32-NEXT: slli a7, a7, 8 14349; RV32-NEXT: or a6, a7, a6 14350; RV32-NEXT: slli t0, t0, 8 14351; RV32-NEXT: or a7, t0, t1 14352; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu 14353; RV32-NEXT: vmv.v.x v8, a0 14354; RV32-NEXT: vslide1down.vx v8, v8, a1 14355; RV32-NEXT: vmv.v.x v9, a4 14356; RV32-NEXT: vslide1down.vx v8, v8, a2 14357; RV32-NEXT: vslide1down.vx v9, v9, a5 14358; RV32-NEXT: vslide1down.vx v10, v8, a3 14359; RV32-NEXT: vslide1down.vx v8, v9, a6 14360; RV32-NEXT: vmv.v.i v0, 15 14361; RV32-NEXT: vslide1down.vx v8, v8, a7 14362; RV32-NEXT: vslidedown.vi v8, v10, 4, v0.t 14363; RV32-NEXT: ret 14364; 14365; RV64V-LABEL: mgather_strided_unaligned: 14366; RV64V: # %bb.0: 14367; RV64V-NEXT: addi sp, sp, -128 14368; RV64V-NEXT: .cfi_def_cfa_offset 128 14369; RV64V-NEXT: sd ra, 120(sp) # 8-byte Folded Spill 14370; RV64V-NEXT: sd s0, 112(sp) # 8-byte Folded Spill 14371; RV64V-NEXT: .cfi_offset ra, -8 14372; RV64V-NEXT: .cfi_offset s0, -16 14373; RV64V-NEXT: addi s0, sp, 128 14374; RV64V-NEXT: .cfi_def_cfa s0, 0 14375; RV64V-NEXT: andi sp, sp, -64 14376; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma 14377; RV64V-NEXT: vid.v v8 14378; RV64V-NEXT: mv a1, sp 14379; RV64V-NEXT: vsll.vi v8, v8, 2 14380; RV64V-NEXT: vadd.vx v8, v8, a0 14381; RV64V-NEXT: vmv.x.s a0, v8 14382; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma 14383; RV64V-NEXT: vslidedown.vi v12, v8, 1 14384; RV64V-NEXT: vmv.x.s a2, v12 14385; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma 14386; RV64V-NEXT: vslidedown.vi v12, v8, 2 14387; RV64V-NEXT: vmv.x.s a3, v12 14388; RV64V-NEXT: vslidedown.vi v12, v8, 3 14389; RV64V-NEXT: lbu a4, 0(a0) 14390; RV64V-NEXT: lbu a0, 1(a0) 14391; RV64V-NEXT: vmv.x.s a5, v12 14392; RV64V-NEXT: lbu a6, 0(a2) 14393; RV64V-NEXT: lbu a2, 1(a2) 14394; RV64V-NEXT: lbu a7, 0(a3) 14395; RV64V-NEXT: lbu a3, 1(a3) 14396; RV64V-NEXT: lbu t0, 0(a5) 14397; RV64V-NEXT: lbu a5, 1(a5) 14398; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu 14399; RV64V-NEXT: vse64.v v8, (a1) 14400; RV64V-NEXT: slli a0, a0, 8 14401; RV64V-NEXT: or a0, a0, a4 14402; RV64V-NEXT: slli a2, a2, 8 14403; RV64V-NEXT: slli a3, a3, 8 14404; RV64V-NEXT: or a1, a2, a6 14405; RV64V-NEXT: or a2, a3, a7 14406; RV64V-NEXT: ld a3, 32(sp) 14407; RV64V-NEXT: ld a4, 40(sp) 14408; RV64V-NEXT: ld a6, 48(sp) 14409; RV64V-NEXT: ld a7, 56(sp) 14410; RV64V-NEXT: slli a5, a5, 8 14411; RV64V-NEXT: or a5, a5, t0 14412; RV64V-NEXT: lbu t0, 0(a3) 14413; RV64V-NEXT: lbu a3, 1(a3) 14414; RV64V-NEXT: vmv.v.x v8, a0 14415; RV64V-NEXT: lbu a0, 0(a4) 14416; RV64V-NEXT: lbu a4, 1(a4) 14417; RV64V-NEXT: vslide1down.vx v8, v8, a1 14418; RV64V-NEXT: lbu a1, 0(a6) 14419; RV64V-NEXT: lbu a6, 1(a6) 14420; RV64V-NEXT: vslide1down.vx v8, v8, a2 14421; RV64V-NEXT: lbu a2, 0(a7) 14422; RV64V-NEXT: lbu a7, 1(a7) 14423; RV64V-NEXT: vslide1down.vx v9, v8, a5 14424; RV64V-NEXT: slli a3, a3, 8 14425; RV64V-NEXT: slli a4, a4, 8 14426; RV64V-NEXT: slli a6, a6, 8 14427; RV64V-NEXT: slli a7, a7, 8 14428; RV64V-NEXT: or a3, a3, t0 14429; RV64V-NEXT: or a0, a4, a0 14430; RV64V-NEXT: or a1, a6, a1 14431; RV64V-NEXT: or a2, a7, a2 14432; RV64V-NEXT: vmv.v.x v8, a3 14433; RV64V-NEXT: vslide1down.vx v8, v8, a0 14434; RV64V-NEXT: vslide1down.vx v8, v8, a1 14435; RV64V-NEXT: vmv.v.i v0, 15 14436; RV64V-NEXT: vslide1down.vx v8, v8, a2 14437; RV64V-NEXT: vslidedown.vi v8, v9, 4, v0.t 14438; RV64V-NEXT: addi sp, s0, -128 14439; RV64V-NEXT: .cfi_def_cfa sp, 128 14440; RV64V-NEXT: ld ra, 120(sp) # 8-byte Folded Reload 14441; RV64V-NEXT: ld s0, 112(sp) # 8-byte Folded Reload 14442; RV64V-NEXT: .cfi_restore ra 14443; RV64V-NEXT: .cfi_restore s0 14444; RV64V-NEXT: addi sp, sp, 128 14445; RV64V-NEXT: .cfi_def_cfa_offset 0 14446; RV64V-NEXT: ret 14447; 14448; RV64ZVE32F-LABEL: mgather_strided_unaligned: 14449; RV64ZVE32F: # %bb.0: 14450; RV64ZVE32F-NEXT: lbu a1, 0(a0) 14451; RV64ZVE32F-NEXT: lbu a2, 1(a0) 14452; RV64ZVE32F-NEXT: lbu a3, 4(a0) 14453; RV64ZVE32F-NEXT: lbu a4, 5(a0) 14454; RV64ZVE32F-NEXT: lbu a5, 8(a0) 14455; RV64ZVE32F-NEXT: lbu a6, 9(a0) 14456; RV64ZVE32F-NEXT: lbu a7, 12(a0) 14457; RV64ZVE32F-NEXT: lbu t0, 13(a0) 14458; RV64ZVE32F-NEXT: slli a2, a2, 8 14459; RV64ZVE32F-NEXT: slli a4, a4, 8 14460; RV64ZVE32F-NEXT: or a1, a2, a1 14461; RV64ZVE32F-NEXT: or a3, a4, a3 14462; RV64ZVE32F-NEXT: lbu a2, 16(a0) 14463; RV64ZVE32F-NEXT: lbu a4, 17(a0) 14464; RV64ZVE32F-NEXT: lbu t1, 20(a0) 14465; RV64ZVE32F-NEXT: lbu t2, 21(a0) 14466; RV64ZVE32F-NEXT: slli a6, a6, 8 14467; RV64ZVE32F-NEXT: or a5, a6, a5 14468; RV64ZVE32F-NEXT: slli t0, t0, 8 14469; RV64ZVE32F-NEXT: slli a4, a4, 8 14470; RV64ZVE32F-NEXT: slli t2, t2, 8 14471; RV64ZVE32F-NEXT: or a6, t0, a7 14472; RV64ZVE32F-NEXT: or a2, a4, a2 14473; RV64ZVE32F-NEXT: lbu a4, 24(a0) 14474; RV64ZVE32F-NEXT: lbu a7, 25(a0) 14475; RV64ZVE32F-NEXT: or t0, t2, t1 14476; RV64ZVE32F-NEXT: lbu t1, 28(a0) 14477; RV64ZVE32F-NEXT: lbu a0, 29(a0) 14478; RV64ZVE32F-NEXT: slli a7, a7, 8 14479; RV64ZVE32F-NEXT: or a4, a7, a4 14480; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu 14481; RV64ZVE32F-NEXT: vmv.v.i v0, 15 14482; RV64ZVE32F-NEXT: slli a0, a0, 8 14483; RV64ZVE32F-NEXT: or a0, a0, t1 14484; RV64ZVE32F-NEXT: vmv.v.x v8, a1 14485; RV64ZVE32F-NEXT: vmv.v.x v9, a2 14486; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 14487; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, t0 14488; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 14489; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4 14490; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a6 14491; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 14492; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t 14493; RV64ZVE32F-NEXT: ret 14494 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 14495 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> splat (i1 true), <8 x i16> poison) 14496 ret <8 x i16> %v 14497} 14498 14499; TODO: Recognize as strided load with SEW=32 14500define <8 x i16> @mgather_strided_2xSEW(ptr %base) { 14501; RV32-LABEL: mgather_strided_2xSEW: 14502; RV32: # %bb.0: 14503; RV32-NEXT: li a1, 8 14504; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14505; RV32-NEXT: vlse32.v v8, (a0), a1 14506; RV32-NEXT: ret 14507; 14508; RV64V-LABEL: mgather_strided_2xSEW: 14509; RV64V: # %bb.0: 14510; RV64V-NEXT: li a1, 8 14511; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14512; RV64V-NEXT: vlse32.v v8, (a0), a1 14513; RV64V-NEXT: ret 14514; 14515; RV64ZVE32F-LABEL: mgather_strided_2xSEW: 14516; RV64ZVE32F: # %bb.0: 14517; RV64ZVE32F-NEXT: lh a1, 0(a0) 14518; RV64ZVE32F-NEXT: lh a2, 2(a0) 14519; RV64ZVE32F-NEXT: lh a3, 8(a0) 14520; RV64ZVE32F-NEXT: lh a4, 10(a0) 14521; RV64ZVE32F-NEXT: lh a5, 16(a0) 14522; RV64ZVE32F-NEXT: lh a6, 18(a0) 14523; RV64ZVE32F-NEXT: lh a7, 24(a0) 14524; RV64ZVE32F-NEXT: lh a0, 26(a0) 14525; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu 14526; RV64ZVE32F-NEXT: vmv.v.i v0, 15 14527; RV64ZVE32F-NEXT: vmv.v.x v8, a1 14528; RV64ZVE32F-NEXT: vmv.v.x v9, a5 14529; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 14530; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6 14531; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 14532; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 14533; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 14534; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 14535; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t 14536; RV64ZVE32F-NEXT: ret 14537 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13> 14538 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) 14539 ret <8 x i16> %v 14540} 14541 14542; TODO: Recognize as strided load with SEW=32 14543define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) { 14544; RV32-LABEL: mgather_strided_2xSEW_with_offset: 14545; RV32: # %bb.0: 14546; RV32-NEXT: addi a0, a0, 4 14547; RV32-NEXT: li a1, 8 14548; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14549; RV32-NEXT: vlse32.v v8, (a0), a1 14550; RV32-NEXT: ret 14551; 14552; RV64V-LABEL: mgather_strided_2xSEW_with_offset: 14553; RV64V: # %bb.0: 14554; RV64V-NEXT: addi a0, a0, 4 14555; RV64V-NEXT: li a1, 8 14556; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14557; RV64V-NEXT: vlse32.v v8, (a0), a1 14558; RV64V-NEXT: ret 14559; 14560; RV64ZVE32F-LABEL: mgather_strided_2xSEW_with_offset: 14561; RV64ZVE32F: # %bb.0: 14562; RV64ZVE32F-NEXT: lh a1, 4(a0) 14563; RV64ZVE32F-NEXT: lh a2, 6(a0) 14564; RV64ZVE32F-NEXT: lh a3, 12(a0) 14565; RV64ZVE32F-NEXT: lh a4, 14(a0) 14566; RV64ZVE32F-NEXT: lh a5, 20(a0) 14567; RV64ZVE32F-NEXT: lh a6, 22(a0) 14568; RV64ZVE32F-NEXT: lh a7, 28(a0) 14569; RV64ZVE32F-NEXT: lh a0, 30(a0) 14570; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu 14571; RV64ZVE32F-NEXT: vmv.v.i v0, 15 14572; RV64ZVE32F-NEXT: vmv.v.x v8, a1 14573; RV64ZVE32F-NEXT: vmv.v.x v9, a5 14574; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 14575; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6 14576; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 14577; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 14578; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 14579; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 14580; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t 14581; RV64ZVE32F-NEXT: ret 14582 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 2, i64 3, i64 6, i64 7, i64 10, i64 11, i64 14, i64 15> 14583 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) 14584 ret <8 x i16> %v 14585} 14586 14587; TODO: Recognize as strided load with SEW=32 14588define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) { 14589; RV32-LABEL: mgather_reverse_unit_strided_2xSEW: 14590; RV32: # %bb.0: 14591; RV32-NEXT: addi a0, a0, 28 14592; RV32-NEXT: li a1, -4 14593; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14594; RV32-NEXT: vlse32.v v8, (a0), a1 14595; RV32-NEXT: ret 14596; 14597; RV64V-LABEL: mgather_reverse_unit_strided_2xSEW: 14598; RV64V: # %bb.0: 14599; RV64V-NEXT: addi a0, a0, 28 14600; RV64V-NEXT: li a1, -4 14601; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14602; RV64V-NEXT: vlse32.v v8, (a0), a1 14603; RV64V-NEXT: ret 14604; 14605; RV64ZVE32F-LABEL: mgather_reverse_unit_strided_2xSEW: 14606; RV64ZVE32F: # %bb.0: 14607; RV64ZVE32F-NEXT: lh a1, 24(a0) 14608; RV64ZVE32F-NEXT: lh a2, 26(a0) 14609; RV64ZVE32F-NEXT: lh a3, 28(a0) 14610; RV64ZVE32F-NEXT: lh a4, 30(a0) 14611; RV64ZVE32F-NEXT: lh a5, 16(a0) 14612; RV64ZVE32F-NEXT: lh a6, 18(a0) 14613; RV64ZVE32F-NEXT: lh a7, 20(a0) 14614; RV64ZVE32F-NEXT: lh a0, 22(a0) 14615; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu 14616; RV64ZVE32F-NEXT: vmv.v.i v0, 15 14617; RV64ZVE32F-NEXT: vmv.v.x v8, a3 14618; RV64ZVE32F-NEXT: vmv.v.x v9, a7 14619; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 14620; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a0 14621; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 14622; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a5 14623; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a2 14624; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6 14625; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t 14626; RV64ZVE32F-NEXT: ret 14627 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 14, i64 15, i64 12, i64 13, i64 10, i64 11, i64 8, i64 9> 14628 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) 14629 ret <8 x i16> %v 14630} 14631 14632; TODO: Recognize as strided load with SEW=32 14633define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) { 14634; RV32-LABEL: mgather_reverse_strided_2xSEW: 14635; RV32: # %bb.0: 14636; RV32-NEXT: addi a0, a0, 28 14637; RV32-NEXT: li a1, -8 14638; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14639; RV32-NEXT: vlse32.v v8, (a0), a1 14640; RV32-NEXT: ret 14641; 14642; RV64V-LABEL: mgather_reverse_strided_2xSEW: 14643; RV64V: # %bb.0: 14644; RV64V-NEXT: addi a0, a0, 28 14645; RV64V-NEXT: li a1, -8 14646; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14647; RV64V-NEXT: vlse32.v v8, (a0), a1 14648; RV64V-NEXT: ret 14649; 14650; RV64ZVE32F-LABEL: mgather_reverse_strided_2xSEW: 14651; RV64ZVE32F: # %bb.0: 14652; RV64ZVE32F-NEXT: lh a1, 20(a0) 14653; RV64ZVE32F-NEXT: lh a2, 22(a0) 14654; RV64ZVE32F-NEXT: lh a3, 28(a0) 14655; RV64ZVE32F-NEXT: lh a4, 30(a0) 14656; RV64ZVE32F-NEXT: lh a5, 4(a0) 14657; RV64ZVE32F-NEXT: lh a6, 6(a0) 14658; RV64ZVE32F-NEXT: lh a7, 12(a0) 14659; RV64ZVE32F-NEXT: lh a0, 14(a0) 14660; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu 14661; RV64ZVE32F-NEXT: vmv.v.i v0, 15 14662; RV64ZVE32F-NEXT: vmv.v.x v8, a3 14663; RV64ZVE32F-NEXT: vmv.v.x v9, a7 14664; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 14665; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a0 14666; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 14667; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a5 14668; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a2 14669; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6 14670; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t 14671; RV64ZVE32F-NEXT: ret 14672 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 14, i64 15, i64 10, i64 11, i64 6, i64 7, i64 2, i64 3> 14673 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) 14674 ret <8 x i16> %v 14675} 14676 14677define <8 x i16> @mgather_gather_2xSEW(ptr %base) { 14678; RV32-LABEL: mgather_gather_2xSEW: 14679; RV32: # %bb.0: 14680; RV32-NEXT: lui a1, 16513 14681; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14682; RV32-NEXT: vmv.s.x v9, a1 14683; RV32-NEXT: vluxei8.v v8, (a0), v9 14684; RV32-NEXT: ret 14685; 14686; RV64V-LABEL: mgather_gather_2xSEW: 14687; RV64V: # %bb.0: 14688; RV64V-NEXT: lui a1, 16513 14689; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14690; RV64V-NEXT: vmv.s.x v9, a1 14691; RV64V-NEXT: vluxei8.v v8, (a0), v9 14692; RV64V-NEXT: ret 14693; 14694; RV64ZVE32F-LABEL: mgather_gather_2xSEW: 14695; RV64ZVE32F: # %bb.0: 14696; RV64ZVE32F-NEXT: lh a1, 8(a0) 14697; RV64ZVE32F-NEXT: lh a2, 10(a0) 14698; RV64ZVE32F-NEXT: lh a3, 16(a0) 14699; RV64ZVE32F-NEXT: lh a4, 18(a0) 14700; RV64ZVE32F-NEXT: lh a5, 0(a0) 14701; RV64ZVE32F-NEXT: lh a6, 2(a0) 14702; RV64ZVE32F-NEXT: lh a7, 4(a0) 14703; RV64ZVE32F-NEXT: lh a0, 6(a0) 14704; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu 14705; RV64ZVE32F-NEXT: vmv.v.i v0, 15 14706; RV64ZVE32F-NEXT: vmv.v.x v8, a5 14707; RV64ZVE32F-NEXT: vmv.v.x v9, a1 14708; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 14709; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2 14710; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 14711; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 14712; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 14713; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 14714; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t 14715; RV64ZVE32F-NEXT: ret 14716 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 2, i32 3> 14717 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) 14718 ret <8 x i16> %v 14719} 14720 14721; Base pointer isn't sufficiently aligned to form gather with e32 14722define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) { 14723; RV32-LABEL: mgather_gather_2xSEW_unaligned: 14724; RV32: # %bb.0: 14725; RV32-NEXT: lui a1, %hi(.LCPI123_0) 14726; RV32-NEXT: addi a1, a1, %lo(.LCPI123_0) 14727; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 14728; RV32-NEXT: vle8.v v9, (a1) 14729; RV32-NEXT: vluxei8.v v8, (a0), v9 14730; RV32-NEXT: ret 14731; 14732; RV64V-LABEL: mgather_gather_2xSEW_unaligned: 14733; RV64V: # %bb.0: 14734; RV64V-NEXT: lui a1, %hi(.LCPI123_0) 14735; RV64V-NEXT: addi a1, a1, %lo(.LCPI123_0) 14736; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 14737; RV64V-NEXT: vle8.v v9, (a1) 14738; RV64V-NEXT: vluxei8.v v8, (a0), v9 14739; RV64V-NEXT: ret 14740; 14741; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned: 14742; RV64ZVE32F: # %bb.0: 14743; RV64ZVE32F-NEXT: lh a1, 8(a0) 14744; RV64ZVE32F-NEXT: lh a2, 10(a0) 14745; RV64ZVE32F-NEXT: lh a3, 18(a0) 14746; RV64ZVE32F-NEXT: lh a4, 20(a0) 14747; RV64ZVE32F-NEXT: lh a5, 0(a0) 14748; RV64ZVE32F-NEXT: lh a6, 2(a0) 14749; RV64ZVE32F-NEXT: lh a7, 4(a0) 14750; RV64ZVE32F-NEXT: lh a0, 6(a0) 14751; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu 14752; RV64ZVE32F-NEXT: vmv.v.i v0, 15 14753; RV64ZVE32F-NEXT: vmv.v.x v8, a5 14754; RV64ZVE32F-NEXT: vmv.v.x v9, a1 14755; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 14756; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2 14757; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 14758; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 14759; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 14760; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 14761; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t 14762; RV64ZVE32F-NEXT: ret 14763 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3> 14764 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true), <8 x i16> poison) 14765 ret <8 x i16> %v 14766} 14767 14768; Despite sufficient starting alignment, the index values aren't properly 14769; aligned for e32. 14770define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) { 14771; RV32-LABEL: mgather_gather_2xSEW_unaligned2: 14772; RV32: # %bb.0: 14773; RV32-NEXT: lui a1, %hi(.LCPI124_0) 14774; RV32-NEXT: addi a1, a1, %lo(.LCPI124_0) 14775; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 14776; RV32-NEXT: vle8.v v9, (a1) 14777; RV32-NEXT: vluxei8.v v8, (a0), v9 14778; RV32-NEXT: ret 14779; 14780; RV64V-LABEL: mgather_gather_2xSEW_unaligned2: 14781; RV64V: # %bb.0: 14782; RV64V-NEXT: lui a1, %hi(.LCPI124_0) 14783; RV64V-NEXT: addi a1, a1, %lo(.LCPI124_0) 14784; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 14785; RV64V-NEXT: vle8.v v9, (a1) 14786; RV64V-NEXT: vluxei8.v v8, (a0), v9 14787; RV64V-NEXT: ret 14788; 14789; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned2: 14790; RV64ZVE32F: # %bb.0: 14791; RV64ZVE32F-NEXT: lh a1, 2(a0) 14792; RV64ZVE32F-NEXT: lh a2, 4(a0) 14793; RV64ZVE32F-NEXT: lh a3, 6(a0) 14794; RV64ZVE32F-NEXT: lh a4, 8(a0) 14795; RV64ZVE32F-NEXT: lh a5, 10(a0) 14796; RV64ZVE32F-NEXT: lh a6, 18(a0) 14797; RV64ZVE32F-NEXT: lh a0, 20(a0) 14798; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu 14799; RV64ZVE32F-NEXT: vmv.v.i v0, 15 14800; RV64ZVE32F-NEXT: vmv.v.x v8, a1 14801; RV64ZVE32F-NEXT: vmv.v.x v9, a4 14802; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 14803; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a5 14804; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 14805; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2 14806; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a0 14807; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a3 14808; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t 14809; RV64ZVE32F-NEXT: ret 14810 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3> 14811 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) 14812 ret <8 x i16> %v 14813} 14814 14815define <8 x i16> @mgather_gather_4xSEW(ptr %base) { 14816; RV32V-LABEL: mgather_gather_4xSEW: 14817; RV32V: # %bb.0: 14818; RV32V-NEXT: li a1, 16 14819; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma 14820; RV32V-NEXT: vlse64.v v8, (a0), a1 14821; RV32V-NEXT: ret 14822; 14823; RV64V-LABEL: mgather_gather_4xSEW: 14824; RV64V: # %bb.0: 14825; RV64V-NEXT: li a1, 16 14826; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma 14827; RV64V-NEXT: vlse64.v v8, (a0), a1 14828; RV64V-NEXT: ret 14829; 14830; RV32ZVE32F-LABEL: mgather_gather_4xSEW: 14831; RV32ZVE32F: # %bb.0: 14832; RV32ZVE32F-NEXT: lui a1, 82176 14833; RV32ZVE32F-NEXT: addi a1, a1, 1024 14834; RV32ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14835; RV32ZVE32F-NEXT: vmv.s.x v9, a1 14836; RV32ZVE32F-NEXT: vluxei8.v v8, (a0), v9 14837; RV32ZVE32F-NEXT: ret 14838; 14839; RV64ZVE32F-LABEL: mgather_gather_4xSEW: 14840; RV64ZVE32F: # %bb.0: 14841; RV64ZVE32F-NEXT: lh a1, 0(a0) 14842; RV64ZVE32F-NEXT: lh a2, 2(a0) 14843; RV64ZVE32F-NEXT: lh a3, 4(a0) 14844; RV64ZVE32F-NEXT: lh a4, 6(a0) 14845; RV64ZVE32F-NEXT: lh a5, 16(a0) 14846; RV64ZVE32F-NEXT: lh a6, 18(a0) 14847; RV64ZVE32F-NEXT: lh a7, 20(a0) 14848; RV64ZVE32F-NEXT: lh a0, 22(a0) 14849; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu 14850; RV64ZVE32F-NEXT: vmv.v.i v0, 15 14851; RV64ZVE32F-NEXT: vmv.v.x v8, a1 14852; RV64ZVE32F-NEXT: vmv.v.x v9, a5 14853; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 14854; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6 14855; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 14856; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 14857; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 14858; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 14859; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t 14860; RV64ZVE32F-NEXT: ret 14861 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 14862 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> splat (i1 true), <8 x i16> poison) 14863 ret <8 x i16> %v 14864} 14865 14866; This is a case where we'd be able to do 4xSEW if we had proper alignment 14867; but we only have sufficient alignment for 2xSEW. 14868define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) { 14869; RV32-LABEL: mgather_gather_4xSEW_partial_align: 14870; RV32: # %bb.0: 14871; RV32-NEXT: lui a1, 82176 14872; RV32-NEXT: addi a1, a1, 1024 14873; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14874; RV32-NEXT: vmv.s.x v9, a1 14875; RV32-NEXT: vluxei8.v v8, (a0), v9 14876; RV32-NEXT: ret 14877; 14878; RV64V-LABEL: mgather_gather_4xSEW_partial_align: 14879; RV64V: # %bb.0: 14880; RV64V-NEXT: lui a1, 82176 14881; RV64V-NEXT: addi a1, a1, 1024 14882; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma 14883; RV64V-NEXT: vmv.s.x v9, a1 14884; RV64V-NEXT: vluxei8.v v8, (a0), v9 14885; RV64V-NEXT: ret 14886; 14887; RV64ZVE32F-LABEL: mgather_gather_4xSEW_partial_align: 14888; RV64ZVE32F: # %bb.0: 14889; RV64ZVE32F-NEXT: lh a1, 0(a0) 14890; RV64ZVE32F-NEXT: lh a2, 2(a0) 14891; RV64ZVE32F-NEXT: lh a3, 4(a0) 14892; RV64ZVE32F-NEXT: lh a4, 6(a0) 14893; RV64ZVE32F-NEXT: lh a5, 16(a0) 14894; RV64ZVE32F-NEXT: lh a6, 18(a0) 14895; RV64ZVE32F-NEXT: lh a7, 20(a0) 14896; RV64ZVE32F-NEXT: lh a0, 22(a0) 14897; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu 14898; RV64ZVE32F-NEXT: vmv.v.i v0, 15 14899; RV64ZVE32F-NEXT: vmv.v.x v8, a1 14900; RV64ZVE32F-NEXT: vmv.v.x v9, a5 14901; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 14902; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6 14903; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 14904; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 14905; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 14906; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 14907; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t 14908; RV64ZVE32F-NEXT: ret 14909 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 14910 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) 14911 ret <8 x i16> %v 14912} 14913 14914define <8 x i16> @mgather_shuffle_reverse(ptr %base) { 14915; CHECK-LABEL: mgather_shuffle_reverse: 14916; CHECK: # %bb.0: 14917; CHECK-NEXT: addi a0, a0, 14 14918; CHECK-NEXT: li a1, -2 14919; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 14920; CHECK-NEXT: vlse16.v v8, (a0), a1 14921; CHECK-NEXT: ret 14922 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0> 14923 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) 14924 ret <8 x i16> %v 14925} 14926 14927define <8 x i16> @mgather_shuffle_rotate(ptr %base) { 14928; RV32-LABEL: mgather_shuffle_rotate: 14929; RV32: # %bb.0: 14930; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 14931; RV32-NEXT: vle16.v v9, (a0) 14932; RV32-NEXT: vslidedown.vi v8, v9, 4 14933; RV32-NEXT: vslideup.vi v8, v9, 4 14934; RV32-NEXT: ret 14935; 14936; RV64V-LABEL: mgather_shuffle_rotate: 14937; RV64V: # %bb.0: 14938; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 14939; RV64V-NEXT: vle16.v v9, (a0) 14940; RV64V-NEXT: vslidedown.vi v8, v9, 4 14941; RV64V-NEXT: vslideup.vi v8, v9, 4 14942; RV64V-NEXT: ret 14943; 14944; RV64ZVE32F-LABEL: mgather_shuffle_rotate: 14945; RV64ZVE32F: # %bb.0: 14946; RV64ZVE32F-NEXT: lh a1, 8(a0) 14947; RV64ZVE32F-NEXT: lh a2, 10(a0) 14948; RV64ZVE32F-NEXT: lh a3, 12(a0) 14949; RV64ZVE32F-NEXT: lh a4, 14(a0) 14950; RV64ZVE32F-NEXT: lh a5, 0(a0) 14951; RV64ZVE32F-NEXT: lh a6, 2(a0) 14952; RV64ZVE32F-NEXT: lh a7, 4(a0) 14953; RV64ZVE32F-NEXT: lh a0, 6(a0) 14954; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu 14955; RV64ZVE32F-NEXT: vmv.v.i v0, 15 14956; RV64ZVE32F-NEXT: vmv.v.x v8, a1 14957; RV64ZVE32F-NEXT: vmv.v.x v9, a5 14958; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 14959; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6 14960; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 14961; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 14962; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 14963; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 14964; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t 14965; RV64ZVE32F-NEXT: ret 14966 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 4, i64 5, i64 6, i64 7, i64 0, i64 1, i64 2, i64 3> 14967 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) 14968 ret <8 x i16> %v 14969} 14970 14971define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { 14972; RV32-LABEL: mgather_shuffle_vrgather: 14973; RV32: # %bb.0: 14974; RV32-NEXT: lui a1, %hi(.LCPI129_0) 14975; RV32-NEXT: addi a1, a1, %lo(.LCPI129_0) 14976; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 14977; RV32-NEXT: vle16.v v9, (a1) 14978; RV32-NEXT: vle16.v v10, (a0) 14979; RV32-NEXT: vrgather.vv v8, v10, v9 14980; RV32-NEXT: ret 14981; 14982; RV64V-LABEL: mgather_shuffle_vrgather: 14983; RV64V: # %bb.0: 14984; RV64V-NEXT: lui a1, %hi(.LCPI129_0) 14985; RV64V-NEXT: addi a1, a1, %lo(.LCPI129_0) 14986; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma 14987; RV64V-NEXT: vle16.v v9, (a1) 14988; RV64V-NEXT: vle16.v v10, (a0) 14989; RV64V-NEXT: vrgather.vv v8, v10, v9 14990; RV64V-NEXT: ret 14991; 14992; RV64ZVE32F-LABEL: mgather_shuffle_vrgather: 14993; RV64ZVE32F: # %bb.0: 14994; RV64ZVE32F-NEXT: lh a1, 0(a0) 14995; RV64ZVE32F-NEXT: lh a2, 2(a0) 14996; RV64ZVE32F-NEXT: lh a3, 4(a0) 14997; RV64ZVE32F-NEXT: lh a4, 6(a0) 14998; RV64ZVE32F-NEXT: lh a5, 8(a0) 14999; RV64ZVE32F-NEXT: lh a6, 10(a0) 15000; RV64ZVE32F-NEXT: lh a7, 12(a0) 15001; RV64ZVE32F-NEXT: lh a0, 14(a0) 15002; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu 15003; RV64ZVE32F-NEXT: vmv.v.i v0, 15 15004; RV64ZVE32F-NEXT: vmv.v.x v8, a1 15005; RV64ZVE32F-NEXT: vmv.v.x v9, a5 15006; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 15007; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6 15008; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 15009; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 15010; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a2 15011; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 15012; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t 15013; RV64ZVE32F-NEXT: ret 15014 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 0, i64 2, i64 3, i64 1, i64 4, i64 5, i64 6, i64 7> 15015 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) 15016 ret <8 x i16> %v 15017} 15018 15019; v32i64 is not a legal type, so make sure we don't try to combine the mgather 15020; to a vlse intrinsic until it is legalized and split. 15021define <32 x i64> @mgather_strided_split(ptr %base) { 15022; RV32V-LABEL: mgather_strided_split: 15023; RV32V: # %bb.0: 15024; RV32V-NEXT: li a1, 16 15025; RV32V-NEXT: vsetivli zero, 16, e64, m8, ta, ma 15026; RV32V-NEXT: vlse64.v v8, (a0), a1 15027; RV32V-NEXT: addi a0, a0, 256 15028; RV32V-NEXT: vlse64.v v16, (a0), a1 15029; RV32V-NEXT: ret 15030; 15031; RV64V-LABEL: mgather_strided_split: 15032; RV64V: # %bb.0: 15033; RV64V-NEXT: li a1, 16 15034; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma 15035; RV64V-NEXT: vlse64.v v8, (a0), a1 15036; RV64V-NEXT: addi a0, a0, 256 15037; RV64V-NEXT: vlse64.v v16, (a0), a1 15038; RV64V-NEXT: ret 15039; 15040; RV32ZVE32F-LABEL: mgather_strided_split: 15041; RV32ZVE32F: # %bb.0: 15042; RV32ZVE32F-NEXT: addi sp, sp, -512 15043; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 512 15044; RV32ZVE32F-NEXT: sw ra, 508(sp) # 4-byte Folded Spill 15045; RV32ZVE32F-NEXT: sw s0, 504(sp) # 4-byte Folded Spill 15046; RV32ZVE32F-NEXT: sw s2, 500(sp) # 4-byte Folded Spill 15047; RV32ZVE32F-NEXT: sw s3, 496(sp) # 4-byte Folded Spill 15048; RV32ZVE32F-NEXT: sw s4, 492(sp) # 4-byte Folded Spill 15049; RV32ZVE32F-NEXT: sw s5, 488(sp) # 4-byte Folded Spill 15050; RV32ZVE32F-NEXT: sw s6, 484(sp) # 4-byte Folded Spill 15051; RV32ZVE32F-NEXT: sw s7, 480(sp) # 4-byte Folded Spill 15052; RV32ZVE32F-NEXT: sw s8, 476(sp) # 4-byte Folded Spill 15053; RV32ZVE32F-NEXT: sw s9, 472(sp) # 4-byte Folded Spill 15054; RV32ZVE32F-NEXT: sw s10, 468(sp) # 4-byte Folded Spill 15055; RV32ZVE32F-NEXT: sw s11, 464(sp) # 4-byte Folded Spill 15056; RV32ZVE32F-NEXT: .cfi_offset ra, -4 15057; RV32ZVE32F-NEXT: .cfi_offset s0, -8 15058; RV32ZVE32F-NEXT: .cfi_offset s2, -12 15059; RV32ZVE32F-NEXT: .cfi_offset s3, -16 15060; RV32ZVE32F-NEXT: .cfi_offset s4, -20 15061; RV32ZVE32F-NEXT: .cfi_offset s5, -24 15062; RV32ZVE32F-NEXT: .cfi_offset s6, -28 15063; RV32ZVE32F-NEXT: .cfi_offset s7, -32 15064; RV32ZVE32F-NEXT: .cfi_offset s8, -36 15065; RV32ZVE32F-NEXT: .cfi_offset s9, -40 15066; RV32ZVE32F-NEXT: .cfi_offset s10, -44 15067; RV32ZVE32F-NEXT: .cfi_offset s11, -48 15068; RV32ZVE32F-NEXT: addi s0, sp, 512 15069; RV32ZVE32F-NEXT: .cfi_def_cfa s0, 0 15070; RV32ZVE32F-NEXT: andi sp, sp, -128 15071; RV32ZVE32F-NEXT: li a2, 32 15072; RV32ZVE32F-NEXT: lw a3, 0(a1) 15073; RV32ZVE32F-NEXT: sw a3, 236(sp) # 4-byte Folded Spill 15074; RV32ZVE32F-NEXT: lw a3, 4(a1) 15075; RV32ZVE32F-NEXT: sw a3, 232(sp) # 4-byte Folded Spill 15076; RV32ZVE32F-NEXT: addi a3, sp, 256 15077; RV32ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma 15078; RV32ZVE32F-NEXT: vid.v v8 15079; RV32ZVE32F-NEXT: vsll.vi v8, v8, 4 15080; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 15081; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma 15082; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 1 15083; RV32ZVE32F-NEXT: vslidedown.vi v17, v8, 2 15084; RV32ZVE32F-NEXT: vmv.x.s a1, v16 15085; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 3 15086; RV32ZVE32F-NEXT: vmv.x.s a4, v17 15087; RV32ZVE32F-NEXT: vmv.x.s a5, v16 15088; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma 15089; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 4 15090; RV32ZVE32F-NEXT: vmv.x.s a6, v16 15091; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 5 15092; RV32ZVE32F-NEXT: vmv.x.s a7, v16 15093; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 6 15094; RV32ZVE32F-NEXT: vmv.x.s t0, v16 15095; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 7 15096; RV32ZVE32F-NEXT: vmv.x.s t1, v16 15097; RV32ZVE32F-NEXT: lw t2, 0(a1) 15098; RV32ZVE32F-NEXT: sw t2, 196(sp) # 4-byte Folded Spill 15099; RV32ZVE32F-NEXT: lw a1, 4(a1) 15100; RV32ZVE32F-NEXT: sw a1, 192(sp) # 4-byte Folded Spill 15101; RV32ZVE32F-NEXT: lw ra, 0(a4) 15102; RV32ZVE32F-NEXT: lw a1, 4(a4) 15103; RV32ZVE32F-NEXT: sw a1, 172(sp) # 4-byte Folded Spill 15104; RV32ZVE32F-NEXT: lw a1, 0(a5) 15105; RV32ZVE32F-NEXT: sw a1, 168(sp) # 4-byte Folded Spill 15106; RV32ZVE32F-NEXT: lw a1, 4(a5) 15107; RV32ZVE32F-NEXT: sw a1, 164(sp) # 4-byte Folded Spill 15108; RV32ZVE32F-NEXT: lw a1, 0(a6) 15109; RV32ZVE32F-NEXT: sw a1, 252(sp) # 4-byte Folded Spill 15110; RV32ZVE32F-NEXT: lw a1, 4(a6) 15111; RV32ZVE32F-NEXT: sw a1, 248(sp) # 4-byte Folded Spill 15112; RV32ZVE32F-NEXT: lw a1, 0(a7) 15113; RV32ZVE32F-NEXT: sw a1, 244(sp) # 4-byte Folded Spill 15114; RV32ZVE32F-NEXT: lw a1, 4(a7) 15115; RV32ZVE32F-NEXT: sw a1, 240(sp) # 4-byte Folded Spill 15116; RV32ZVE32F-NEXT: lw a1, 0(t0) 15117; RV32ZVE32F-NEXT: sw a1, 188(sp) # 4-byte Folded Spill 15118; RV32ZVE32F-NEXT: lw a1, 4(t0) 15119; RV32ZVE32F-NEXT: sw a1, 184(sp) # 4-byte Folded Spill 15120; RV32ZVE32F-NEXT: lw a1, 0(t1) 15121; RV32ZVE32F-NEXT: sw a1, 180(sp) # 4-byte Folded Spill 15122; RV32ZVE32F-NEXT: lw a1, 4(t1) 15123; RV32ZVE32F-NEXT: sw a1, 176(sp) # 4-byte Folded Spill 15124; RV32ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma 15125; RV32ZVE32F-NEXT: vse32.v v8, (a3) 15126; RV32ZVE32F-NEXT: lw a1, 288(sp) 15127; RV32ZVE32F-NEXT: lw a2, 292(sp) 15128; RV32ZVE32F-NEXT: lw a3, 296(sp) 15129; RV32ZVE32F-NEXT: lw a4, 300(sp) 15130; RV32ZVE32F-NEXT: lw a5, 0(a1) 15131; RV32ZVE32F-NEXT: sw a5, 228(sp) # 4-byte Folded Spill 15132; RV32ZVE32F-NEXT: lw a1, 4(a1) 15133; RV32ZVE32F-NEXT: sw a1, 224(sp) # 4-byte Folded Spill 15134; RV32ZVE32F-NEXT: lw a1, 0(a2) 15135; RV32ZVE32F-NEXT: sw a1, 220(sp) # 4-byte Folded Spill 15136; RV32ZVE32F-NEXT: lw a1, 4(a2) 15137; RV32ZVE32F-NEXT: sw a1, 216(sp) # 4-byte Folded Spill 15138; RV32ZVE32F-NEXT: lw a1, 0(a3) 15139; RV32ZVE32F-NEXT: sw a1, 212(sp) # 4-byte Folded Spill 15140; RV32ZVE32F-NEXT: lw a1, 4(a3) 15141; RV32ZVE32F-NEXT: sw a1, 208(sp) # 4-byte Folded Spill 15142; RV32ZVE32F-NEXT: lw a1, 0(a4) 15143; RV32ZVE32F-NEXT: sw a1, 204(sp) # 4-byte Folded Spill 15144; RV32ZVE32F-NEXT: lw a1, 4(a4) 15145; RV32ZVE32F-NEXT: sw a1, 200(sp) # 4-byte Folded Spill 15146; RV32ZVE32F-NEXT: lw a1, 304(sp) 15147; RV32ZVE32F-NEXT: lw a2, 308(sp) 15148; RV32ZVE32F-NEXT: lw a3, 312(sp) 15149; RV32ZVE32F-NEXT: lw a4, 316(sp) 15150; RV32ZVE32F-NEXT: lw a5, 0(a1) 15151; RV32ZVE32F-NEXT: sw a5, 160(sp) # 4-byte Folded Spill 15152; RV32ZVE32F-NEXT: lw a1, 4(a1) 15153; RV32ZVE32F-NEXT: sw a1, 156(sp) # 4-byte Folded Spill 15154; RV32ZVE32F-NEXT: lw a1, 0(a2) 15155; RV32ZVE32F-NEXT: sw a1, 152(sp) # 4-byte Folded Spill 15156; RV32ZVE32F-NEXT: lw a1, 4(a2) 15157; RV32ZVE32F-NEXT: sw a1, 148(sp) # 4-byte Folded Spill 15158; RV32ZVE32F-NEXT: lw a1, 0(a3) 15159; RV32ZVE32F-NEXT: sw a1, 144(sp) # 4-byte Folded Spill 15160; RV32ZVE32F-NEXT: lw a1, 4(a3) 15161; RV32ZVE32F-NEXT: sw a1, 140(sp) # 4-byte Folded Spill 15162; RV32ZVE32F-NEXT: lw a1, 0(a4) 15163; RV32ZVE32F-NEXT: sw a1, 136(sp) # 4-byte Folded Spill 15164; RV32ZVE32F-NEXT: lw a1, 4(a4) 15165; RV32ZVE32F-NEXT: sw a1, 132(sp) # 4-byte Folded Spill 15166; RV32ZVE32F-NEXT: lw a1, 320(sp) 15167; RV32ZVE32F-NEXT: lw a2, 324(sp) 15168; RV32ZVE32F-NEXT: lw a3, 328(sp) 15169; RV32ZVE32F-NEXT: lw a4, 332(sp) 15170; RV32ZVE32F-NEXT: lw a5, 0(a1) 15171; RV32ZVE32F-NEXT: sw a5, 128(sp) # 4-byte Folded Spill 15172; RV32ZVE32F-NEXT: lw a1, 4(a1) 15173; RV32ZVE32F-NEXT: sw a1, 124(sp) # 4-byte Folded Spill 15174; RV32ZVE32F-NEXT: lw a1, 0(a2) 15175; RV32ZVE32F-NEXT: sw a1, 120(sp) # 4-byte Folded Spill 15176; RV32ZVE32F-NEXT: lw a1, 4(a2) 15177; RV32ZVE32F-NEXT: sw a1, 116(sp) # 4-byte Folded Spill 15178; RV32ZVE32F-NEXT: lw s8, 0(a3) 15179; RV32ZVE32F-NEXT: lw s9, 4(a3) 15180; RV32ZVE32F-NEXT: lw s10, 0(a4) 15181; RV32ZVE32F-NEXT: lw s11, 4(a4) 15182; RV32ZVE32F-NEXT: lw a1, 336(sp) 15183; RV32ZVE32F-NEXT: lw a2, 340(sp) 15184; RV32ZVE32F-NEXT: lw a3, 344(sp) 15185; RV32ZVE32F-NEXT: lw a4, 348(sp) 15186; RV32ZVE32F-NEXT: lw t5, 0(a1) 15187; RV32ZVE32F-NEXT: lw t6, 4(a1) 15188; RV32ZVE32F-NEXT: lw s2, 0(a2) 15189; RV32ZVE32F-NEXT: lw s3, 4(a2) 15190; RV32ZVE32F-NEXT: lw a5, 0(a3) 15191; RV32ZVE32F-NEXT: lw a6, 4(a3) 15192; RV32ZVE32F-NEXT: lw a7, 0(a4) 15193; RV32ZVE32F-NEXT: lw t0, 4(a4) 15194; RV32ZVE32F-NEXT: lw a1, 352(sp) 15195; RV32ZVE32F-NEXT: lw a2, 356(sp) 15196; RV32ZVE32F-NEXT: lw a3, 360(sp) 15197; RV32ZVE32F-NEXT: lw a4, 364(sp) 15198; RV32ZVE32F-NEXT: lw t1, 0(a1) 15199; RV32ZVE32F-NEXT: sw t1, 112(sp) # 4-byte Folded Spill 15200; RV32ZVE32F-NEXT: lw a1, 4(a1) 15201; RV32ZVE32F-NEXT: sw a1, 108(sp) # 4-byte Folded Spill 15202; RV32ZVE32F-NEXT: lw a1, 0(a2) 15203; RV32ZVE32F-NEXT: sw a1, 104(sp) # 4-byte Folded Spill 15204; RV32ZVE32F-NEXT: lw a1, 4(a2) 15205; RV32ZVE32F-NEXT: sw a1, 100(sp) # 4-byte Folded Spill 15206; RV32ZVE32F-NEXT: lw s4, 0(a3) 15207; RV32ZVE32F-NEXT: lw s5, 4(a3) 15208; RV32ZVE32F-NEXT: lw s6, 0(a4) 15209; RV32ZVE32F-NEXT: lw s7, 4(a4) 15210; RV32ZVE32F-NEXT: lw a1, 368(sp) 15211; RV32ZVE32F-NEXT: lw a2, 372(sp) 15212; RV32ZVE32F-NEXT: lw a3, 376(sp) 15213; RV32ZVE32F-NEXT: lw a4, 380(sp) 15214; RV32ZVE32F-NEXT: lw t1, 0(a1) 15215; RV32ZVE32F-NEXT: lw t2, 4(a1) 15216; RV32ZVE32F-NEXT: lw t3, 0(a2) 15217; RV32ZVE32F-NEXT: lw t4, 4(a2) 15218; RV32ZVE32F-NEXT: lw a1, 0(a3) 15219; RV32ZVE32F-NEXT: lw a2, 4(a3) 15220; RV32ZVE32F-NEXT: lw a3, 0(a4) 15221; RV32ZVE32F-NEXT: lw a4, 4(a4) 15222; RV32ZVE32F-NEXT: sw ra, 16(a0) 15223; RV32ZVE32F-NEXT: lw ra, 172(sp) # 4-byte Folded Reload 15224; RV32ZVE32F-NEXT: sw ra, 20(a0) 15225; RV32ZVE32F-NEXT: lw ra, 168(sp) # 4-byte Folded Reload 15226; RV32ZVE32F-NEXT: sw ra, 24(a0) 15227; RV32ZVE32F-NEXT: lw ra, 164(sp) # 4-byte Folded Reload 15228; RV32ZVE32F-NEXT: sw ra, 28(a0) 15229; RV32ZVE32F-NEXT: lw ra, 236(sp) # 4-byte Folded Reload 15230; RV32ZVE32F-NEXT: sw ra, 0(a0) 15231; RV32ZVE32F-NEXT: lw ra, 232(sp) # 4-byte Folded Reload 15232; RV32ZVE32F-NEXT: sw ra, 4(a0) 15233; RV32ZVE32F-NEXT: lw ra, 196(sp) # 4-byte Folded Reload 15234; RV32ZVE32F-NEXT: sw ra, 8(a0) 15235; RV32ZVE32F-NEXT: lw ra, 192(sp) # 4-byte Folded Reload 15236; RV32ZVE32F-NEXT: sw ra, 12(a0) 15237; RV32ZVE32F-NEXT: lw ra, 188(sp) # 4-byte Folded Reload 15238; RV32ZVE32F-NEXT: sw ra, 48(a0) 15239; RV32ZVE32F-NEXT: lw ra, 184(sp) # 4-byte Folded Reload 15240; RV32ZVE32F-NEXT: sw ra, 52(a0) 15241; RV32ZVE32F-NEXT: lw ra, 180(sp) # 4-byte Folded Reload 15242; RV32ZVE32F-NEXT: sw ra, 56(a0) 15243; RV32ZVE32F-NEXT: lw ra, 176(sp) # 4-byte Folded Reload 15244; RV32ZVE32F-NEXT: sw ra, 60(a0) 15245; RV32ZVE32F-NEXT: sw a5, 176(a0) 15246; RV32ZVE32F-NEXT: sw a6, 180(a0) 15247; RV32ZVE32F-NEXT: sw a7, 184(a0) 15248; RV32ZVE32F-NEXT: sw t0, 188(a0) 15249; RV32ZVE32F-NEXT: sw t5, 160(a0) 15250; RV32ZVE32F-NEXT: sw t6, 164(a0) 15251; RV32ZVE32F-NEXT: sw s2, 168(a0) 15252; RV32ZVE32F-NEXT: sw s3, 172(a0) 15253; RV32ZVE32F-NEXT: sw s8, 144(a0) 15254; RV32ZVE32F-NEXT: sw s9, 148(a0) 15255; RV32ZVE32F-NEXT: sw s10, 152(a0) 15256; RV32ZVE32F-NEXT: sw s11, 156(a0) 15257; RV32ZVE32F-NEXT: lw a5, 128(sp) # 4-byte Folded Reload 15258; RV32ZVE32F-NEXT: sw a5, 128(a0) 15259; RV32ZVE32F-NEXT: lw a5, 124(sp) # 4-byte Folded Reload 15260; RV32ZVE32F-NEXT: sw a5, 132(a0) 15261; RV32ZVE32F-NEXT: lw a5, 120(sp) # 4-byte Folded Reload 15262; RV32ZVE32F-NEXT: sw a5, 136(a0) 15263; RV32ZVE32F-NEXT: lw a5, 116(sp) # 4-byte Folded Reload 15264; RV32ZVE32F-NEXT: sw a5, 140(a0) 15265; RV32ZVE32F-NEXT: lw a5, 144(sp) # 4-byte Folded Reload 15266; RV32ZVE32F-NEXT: sw a5, 112(a0) 15267; RV32ZVE32F-NEXT: lw a5, 140(sp) # 4-byte Folded Reload 15268; RV32ZVE32F-NEXT: sw a5, 116(a0) 15269; RV32ZVE32F-NEXT: lw a5, 136(sp) # 4-byte Folded Reload 15270; RV32ZVE32F-NEXT: sw a5, 120(a0) 15271; RV32ZVE32F-NEXT: lw a5, 132(sp) # 4-byte Folded Reload 15272; RV32ZVE32F-NEXT: sw a5, 124(a0) 15273; RV32ZVE32F-NEXT: lw a5, 160(sp) # 4-byte Folded Reload 15274; RV32ZVE32F-NEXT: sw a5, 96(a0) 15275; RV32ZVE32F-NEXT: lw a5, 156(sp) # 4-byte Folded Reload 15276; RV32ZVE32F-NEXT: sw a5, 100(a0) 15277; RV32ZVE32F-NEXT: lw a5, 152(sp) # 4-byte Folded Reload 15278; RV32ZVE32F-NEXT: sw a5, 104(a0) 15279; RV32ZVE32F-NEXT: lw a5, 148(sp) # 4-byte Folded Reload 15280; RV32ZVE32F-NEXT: sw a5, 108(a0) 15281; RV32ZVE32F-NEXT: lw a5, 212(sp) # 4-byte Folded Reload 15282; RV32ZVE32F-NEXT: sw a5, 80(a0) 15283; RV32ZVE32F-NEXT: lw a5, 208(sp) # 4-byte Folded Reload 15284; RV32ZVE32F-NEXT: sw a5, 84(a0) 15285; RV32ZVE32F-NEXT: lw a5, 204(sp) # 4-byte Folded Reload 15286; RV32ZVE32F-NEXT: sw a5, 88(a0) 15287; RV32ZVE32F-NEXT: lw a5, 200(sp) # 4-byte Folded Reload 15288; RV32ZVE32F-NEXT: sw a5, 92(a0) 15289; RV32ZVE32F-NEXT: lw a5, 228(sp) # 4-byte Folded Reload 15290; RV32ZVE32F-NEXT: sw a5, 64(a0) 15291; RV32ZVE32F-NEXT: lw a5, 224(sp) # 4-byte Folded Reload 15292; RV32ZVE32F-NEXT: sw a5, 68(a0) 15293; RV32ZVE32F-NEXT: lw a5, 220(sp) # 4-byte Folded Reload 15294; RV32ZVE32F-NEXT: sw a5, 72(a0) 15295; RV32ZVE32F-NEXT: lw a5, 216(sp) # 4-byte Folded Reload 15296; RV32ZVE32F-NEXT: sw a5, 76(a0) 15297; RV32ZVE32F-NEXT: sw a1, 240(a0) 15298; RV32ZVE32F-NEXT: sw a2, 244(a0) 15299; RV32ZVE32F-NEXT: sw a3, 248(a0) 15300; RV32ZVE32F-NEXT: sw a4, 252(a0) 15301; RV32ZVE32F-NEXT: sw t1, 224(a0) 15302; RV32ZVE32F-NEXT: sw t2, 228(a0) 15303; RV32ZVE32F-NEXT: sw t3, 232(a0) 15304; RV32ZVE32F-NEXT: sw t4, 236(a0) 15305; RV32ZVE32F-NEXT: sw s4, 208(a0) 15306; RV32ZVE32F-NEXT: sw s5, 212(a0) 15307; RV32ZVE32F-NEXT: sw s6, 216(a0) 15308; RV32ZVE32F-NEXT: sw s7, 220(a0) 15309; RV32ZVE32F-NEXT: lw a1, 112(sp) # 4-byte Folded Reload 15310; RV32ZVE32F-NEXT: sw a1, 192(a0) 15311; RV32ZVE32F-NEXT: lw a1, 108(sp) # 4-byte Folded Reload 15312; RV32ZVE32F-NEXT: sw a1, 196(a0) 15313; RV32ZVE32F-NEXT: lw a1, 104(sp) # 4-byte Folded Reload 15314; RV32ZVE32F-NEXT: sw a1, 200(a0) 15315; RV32ZVE32F-NEXT: lw a1, 100(sp) # 4-byte Folded Reload 15316; RV32ZVE32F-NEXT: sw a1, 204(a0) 15317; RV32ZVE32F-NEXT: lw a1, 252(sp) # 4-byte Folded Reload 15318; RV32ZVE32F-NEXT: sw a1, 32(a0) 15319; RV32ZVE32F-NEXT: lw a1, 248(sp) # 4-byte Folded Reload 15320; RV32ZVE32F-NEXT: sw a1, 36(a0) 15321; RV32ZVE32F-NEXT: lw a1, 244(sp) # 4-byte Folded Reload 15322; RV32ZVE32F-NEXT: sw a1, 40(a0) 15323; RV32ZVE32F-NEXT: lw a1, 240(sp) # 4-byte Folded Reload 15324; RV32ZVE32F-NEXT: sw a1, 44(a0) 15325; RV32ZVE32F-NEXT: addi sp, s0, -512 15326; RV32ZVE32F-NEXT: .cfi_def_cfa sp, 512 15327; RV32ZVE32F-NEXT: lw ra, 508(sp) # 4-byte Folded Reload 15328; RV32ZVE32F-NEXT: lw s0, 504(sp) # 4-byte Folded Reload 15329; RV32ZVE32F-NEXT: lw s2, 500(sp) # 4-byte Folded Reload 15330; RV32ZVE32F-NEXT: lw s3, 496(sp) # 4-byte Folded Reload 15331; RV32ZVE32F-NEXT: lw s4, 492(sp) # 4-byte Folded Reload 15332; RV32ZVE32F-NEXT: lw s5, 488(sp) # 4-byte Folded Reload 15333; RV32ZVE32F-NEXT: lw s6, 484(sp) # 4-byte Folded Reload 15334; RV32ZVE32F-NEXT: lw s7, 480(sp) # 4-byte Folded Reload 15335; RV32ZVE32F-NEXT: lw s8, 476(sp) # 4-byte Folded Reload 15336; RV32ZVE32F-NEXT: lw s9, 472(sp) # 4-byte Folded Reload 15337; RV32ZVE32F-NEXT: lw s10, 468(sp) # 4-byte Folded Reload 15338; RV32ZVE32F-NEXT: lw s11, 464(sp) # 4-byte Folded Reload 15339; RV32ZVE32F-NEXT: .cfi_restore ra 15340; RV32ZVE32F-NEXT: .cfi_restore s0 15341; RV32ZVE32F-NEXT: .cfi_restore s2 15342; RV32ZVE32F-NEXT: .cfi_restore s3 15343; RV32ZVE32F-NEXT: .cfi_restore s4 15344; RV32ZVE32F-NEXT: .cfi_restore s5 15345; RV32ZVE32F-NEXT: .cfi_restore s6 15346; RV32ZVE32F-NEXT: .cfi_restore s7 15347; RV32ZVE32F-NEXT: .cfi_restore s8 15348; RV32ZVE32F-NEXT: .cfi_restore s9 15349; RV32ZVE32F-NEXT: .cfi_restore s10 15350; RV32ZVE32F-NEXT: .cfi_restore s11 15351; RV32ZVE32F-NEXT: addi sp, sp, 512 15352; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0 15353; RV32ZVE32F-NEXT: ret 15354; 15355; RV64ZVE32F-LABEL: mgather_strided_split: 15356; RV64ZVE32F: # %bb.0: 15357; RV64ZVE32F-NEXT: addi sp, sp, -144 15358; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 144 15359; RV64ZVE32F-NEXT: sd ra, 136(sp) # 8-byte Folded Spill 15360; RV64ZVE32F-NEXT: sd s0, 128(sp) # 8-byte Folded Spill 15361; RV64ZVE32F-NEXT: sd s1, 120(sp) # 8-byte Folded Spill 15362; RV64ZVE32F-NEXT: sd s2, 112(sp) # 8-byte Folded Spill 15363; RV64ZVE32F-NEXT: sd s3, 104(sp) # 8-byte Folded Spill 15364; RV64ZVE32F-NEXT: sd s4, 96(sp) # 8-byte Folded Spill 15365; RV64ZVE32F-NEXT: sd s5, 88(sp) # 8-byte Folded Spill 15366; RV64ZVE32F-NEXT: sd s6, 80(sp) # 8-byte Folded Spill 15367; RV64ZVE32F-NEXT: sd s7, 72(sp) # 8-byte Folded Spill 15368; RV64ZVE32F-NEXT: sd s8, 64(sp) # 8-byte Folded Spill 15369; RV64ZVE32F-NEXT: sd s9, 56(sp) # 8-byte Folded Spill 15370; RV64ZVE32F-NEXT: sd s10, 48(sp) # 8-byte Folded Spill 15371; RV64ZVE32F-NEXT: sd s11, 40(sp) # 8-byte Folded Spill 15372; RV64ZVE32F-NEXT: .cfi_offset ra, -8 15373; RV64ZVE32F-NEXT: .cfi_offset s0, -16 15374; RV64ZVE32F-NEXT: .cfi_offset s1, -24 15375; RV64ZVE32F-NEXT: .cfi_offset s2, -32 15376; RV64ZVE32F-NEXT: .cfi_offset s3, -40 15377; RV64ZVE32F-NEXT: .cfi_offset s4, -48 15378; RV64ZVE32F-NEXT: .cfi_offset s5, -56 15379; RV64ZVE32F-NEXT: .cfi_offset s6, -64 15380; RV64ZVE32F-NEXT: .cfi_offset s7, -72 15381; RV64ZVE32F-NEXT: .cfi_offset s8, -80 15382; RV64ZVE32F-NEXT: .cfi_offset s9, -88 15383; RV64ZVE32F-NEXT: .cfi_offset s10, -96 15384; RV64ZVE32F-NEXT: .cfi_offset s11, -104 15385; RV64ZVE32F-NEXT: ld a2, 0(a1) 15386; RV64ZVE32F-NEXT: sd a2, 32(sp) # 8-byte Folded Spill 15387; RV64ZVE32F-NEXT: ld a2, 16(a1) 15388; RV64ZVE32F-NEXT: sd a2, 24(sp) # 8-byte Folded Spill 15389; RV64ZVE32F-NEXT: ld a2, 32(a1) 15390; RV64ZVE32F-NEXT: sd a2, 16(sp) # 8-byte Folded Spill 15391; RV64ZVE32F-NEXT: ld a2, 48(a1) 15392; RV64ZVE32F-NEXT: sd a2, 8(sp) # 8-byte Folded Spill 15393; RV64ZVE32F-NEXT: ld a2, 64(a1) 15394; RV64ZVE32F-NEXT: sd a2, 0(sp) # 8-byte Folded Spill 15395; RV64ZVE32F-NEXT: ld a7, 80(a1) 15396; RV64ZVE32F-NEXT: ld t0, 96(a1) 15397; RV64ZVE32F-NEXT: ld t1, 112(a1) 15398; RV64ZVE32F-NEXT: ld t2, 128(a1) 15399; RV64ZVE32F-NEXT: ld t3, 144(a1) 15400; RV64ZVE32F-NEXT: ld t4, 160(a1) 15401; RV64ZVE32F-NEXT: ld t5, 176(a1) 15402; RV64ZVE32F-NEXT: ld t6, 192(a1) 15403; RV64ZVE32F-NEXT: ld s0, 208(a1) 15404; RV64ZVE32F-NEXT: ld s1, 224(a1) 15405; RV64ZVE32F-NEXT: ld s2, 240(a1) 15406; RV64ZVE32F-NEXT: ld s3, 256(a1) 15407; RV64ZVE32F-NEXT: ld s4, 272(a1) 15408; RV64ZVE32F-NEXT: ld s5, 288(a1) 15409; RV64ZVE32F-NEXT: ld s6, 304(a1) 15410; RV64ZVE32F-NEXT: ld s7, 320(a1) 15411; RV64ZVE32F-NEXT: ld s8, 336(a1) 15412; RV64ZVE32F-NEXT: ld s9, 352(a1) 15413; RV64ZVE32F-NEXT: ld s10, 368(a1) 15414; RV64ZVE32F-NEXT: ld s11, 384(a1) 15415; RV64ZVE32F-NEXT: ld ra, 400(a1) 15416; RV64ZVE32F-NEXT: ld a6, 416(a1) 15417; RV64ZVE32F-NEXT: ld a5, 432(a1) 15418; RV64ZVE32F-NEXT: ld a2, 448(a1) 15419; RV64ZVE32F-NEXT: ld a3, 464(a1) 15420; RV64ZVE32F-NEXT: ld a4, 480(a1) 15421; RV64ZVE32F-NEXT: ld a1, 496(a1) 15422; RV64ZVE32F-NEXT: sd a2, 224(a0) 15423; RV64ZVE32F-NEXT: sd a3, 232(a0) 15424; RV64ZVE32F-NEXT: sd a4, 240(a0) 15425; RV64ZVE32F-NEXT: sd a1, 248(a0) 15426; RV64ZVE32F-NEXT: sd s11, 192(a0) 15427; RV64ZVE32F-NEXT: sd ra, 200(a0) 15428; RV64ZVE32F-NEXT: sd a6, 208(a0) 15429; RV64ZVE32F-NEXT: sd a5, 216(a0) 15430; RV64ZVE32F-NEXT: sd s7, 160(a0) 15431; RV64ZVE32F-NEXT: sd s8, 168(a0) 15432; RV64ZVE32F-NEXT: sd s9, 176(a0) 15433; RV64ZVE32F-NEXT: sd s10, 184(a0) 15434; RV64ZVE32F-NEXT: sd s3, 128(a0) 15435; RV64ZVE32F-NEXT: sd s4, 136(a0) 15436; RV64ZVE32F-NEXT: sd s5, 144(a0) 15437; RV64ZVE32F-NEXT: sd s6, 152(a0) 15438; RV64ZVE32F-NEXT: sd t6, 96(a0) 15439; RV64ZVE32F-NEXT: sd s0, 104(a0) 15440; RV64ZVE32F-NEXT: sd s1, 112(a0) 15441; RV64ZVE32F-NEXT: sd s2, 120(a0) 15442; RV64ZVE32F-NEXT: sd t2, 64(a0) 15443; RV64ZVE32F-NEXT: sd t3, 72(a0) 15444; RV64ZVE32F-NEXT: sd t4, 80(a0) 15445; RV64ZVE32F-NEXT: sd t5, 88(a0) 15446; RV64ZVE32F-NEXT: ld a1, 0(sp) # 8-byte Folded Reload 15447; RV64ZVE32F-NEXT: sd a1, 32(a0) 15448; RV64ZVE32F-NEXT: sd a7, 40(a0) 15449; RV64ZVE32F-NEXT: sd t0, 48(a0) 15450; RV64ZVE32F-NEXT: sd t1, 56(a0) 15451; RV64ZVE32F-NEXT: ld a1, 32(sp) # 8-byte Folded Reload 15452; RV64ZVE32F-NEXT: sd a1, 0(a0) 15453; RV64ZVE32F-NEXT: ld a1, 24(sp) # 8-byte Folded Reload 15454; RV64ZVE32F-NEXT: sd a1, 8(a0) 15455; RV64ZVE32F-NEXT: ld a1, 16(sp) # 8-byte Folded Reload 15456; RV64ZVE32F-NEXT: sd a1, 16(a0) 15457; RV64ZVE32F-NEXT: ld a1, 8(sp) # 8-byte Folded Reload 15458; RV64ZVE32F-NEXT: sd a1, 24(a0) 15459; RV64ZVE32F-NEXT: ld ra, 136(sp) # 8-byte Folded Reload 15460; RV64ZVE32F-NEXT: ld s0, 128(sp) # 8-byte Folded Reload 15461; RV64ZVE32F-NEXT: ld s1, 120(sp) # 8-byte Folded Reload 15462; RV64ZVE32F-NEXT: ld s2, 112(sp) # 8-byte Folded Reload 15463; RV64ZVE32F-NEXT: ld s3, 104(sp) # 8-byte Folded Reload 15464; RV64ZVE32F-NEXT: ld s4, 96(sp) # 8-byte Folded Reload 15465; RV64ZVE32F-NEXT: ld s5, 88(sp) # 8-byte Folded Reload 15466; RV64ZVE32F-NEXT: ld s6, 80(sp) # 8-byte Folded Reload 15467; RV64ZVE32F-NEXT: ld s7, 72(sp) # 8-byte Folded Reload 15468; RV64ZVE32F-NEXT: ld s8, 64(sp) # 8-byte Folded Reload 15469; RV64ZVE32F-NEXT: ld s9, 56(sp) # 8-byte Folded Reload 15470; RV64ZVE32F-NEXT: ld s10, 48(sp) # 8-byte Folded Reload 15471; RV64ZVE32F-NEXT: ld s11, 40(sp) # 8-byte Folded Reload 15472; RV64ZVE32F-NEXT: .cfi_restore ra 15473; RV64ZVE32F-NEXT: .cfi_restore s0 15474; RV64ZVE32F-NEXT: .cfi_restore s1 15475; RV64ZVE32F-NEXT: .cfi_restore s2 15476; RV64ZVE32F-NEXT: .cfi_restore s3 15477; RV64ZVE32F-NEXT: .cfi_restore s4 15478; RV64ZVE32F-NEXT: .cfi_restore s5 15479; RV64ZVE32F-NEXT: .cfi_restore s6 15480; RV64ZVE32F-NEXT: .cfi_restore s7 15481; RV64ZVE32F-NEXT: .cfi_restore s8 15482; RV64ZVE32F-NEXT: .cfi_restore s9 15483; RV64ZVE32F-NEXT: .cfi_restore s10 15484; RV64ZVE32F-NEXT: .cfi_restore s11 15485; RV64ZVE32F-NEXT: addi sp, sp, 144 15486; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 0 15487; RV64ZVE32F-NEXT: ret 15488 %ptrs = getelementptr inbounds i64, ptr %base, <32 x i64> <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30, i64 32, i64 34, i64 36, i64 38, i64 40, i64 42, i64 44, i64 46, i64 48, i64 50, i64 52, i64 54, i64 56, i64 58, i64 60, i64 62> 15489 %x = call <32 x i64> @llvm.masked.gather.v32i64.v32p0(<32 x ptr> %ptrs, i32 8, <32 x i1> splat (i1 true), <32 x i64> poison) 15490 ret <32 x i64> %x 15491} 15492 15493define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) { 15494; RV32V-LABEL: masked_gather_widen_sew_negative_stride: 15495; RV32V: # %bb.0: 15496; RV32V-NEXT: addi a0, a0, 136 15497; RV32V-NEXT: li a1, -136 15498; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma 15499; RV32V-NEXT: vlse64.v v8, (a0), a1 15500; RV32V-NEXT: ret 15501; 15502; RV64V-LABEL: masked_gather_widen_sew_negative_stride: 15503; RV64V: # %bb.0: 15504; RV64V-NEXT: addi a0, a0, 136 15505; RV64V-NEXT: li a1, -136 15506; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma 15507; RV64V-NEXT: vlse64.v v8, (a0), a1 15508; RV64V-NEXT: ret 15509; 15510; RV32ZVE32F-LABEL: masked_gather_widen_sew_negative_stride: 15511; RV32ZVE32F: # %bb.0: 15512; RV32ZVE32F-NEXT: lui a1, 16393 15513; RV32ZVE32F-NEXT: addi a1, a1, -888 15514; RV32ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma 15515; RV32ZVE32F-NEXT: vmv.s.x v9, a1 15516; RV32ZVE32F-NEXT: vluxei8.v v8, (a0), v9 15517; RV32ZVE32F-NEXT: ret 15518; 15519; RV64ZVE32F-LABEL: masked_gather_widen_sew_negative_stride: 15520; RV64ZVE32F: # %bb.0: 15521; RV64ZVE32F-NEXT: lw a1, 136(a0) 15522; RV64ZVE32F-NEXT: lw a2, 140(a0) 15523; RV64ZVE32F-NEXT: lw a3, 0(a0) 15524; RV64ZVE32F-NEXT: lw a0, 4(a0) 15525; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma 15526; RV64ZVE32F-NEXT: vmv.v.x v8, a1 15527; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 15528; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 15529; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 15530; RV64ZVE32F-NEXT: ret 15531 %ptrs = getelementptr i32, ptr %base, <4 x i64> <i64 34, i64 35, i64 0, i64 1> 15532 %x = call <4 x i32> @llvm.masked.gather.v4i32.v32p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 true), <4 x i32> poison) 15533 ret <4 x i32> %x 15534} 15535 15536;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 15537; RV32V-ZVFH: {{.*}} 15538; RV32V-ZVFHMIN: {{.*}} 15539; RV32ZVE32F-ZVFH: {{.*}} 15540; RV32ZVE32F-ZVFHMIN: {{.*}} 15541; RV64: {{.*}} 15542; RV64V-ZVFH: {{.*}} 15543; RV64V-ZVFHMIN: {{.*}} 15544