1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32,RV32NOM 3; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32,RV32M 4; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64,RV64NOM 5; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64,RV64M 6 7; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32,RV32M 8; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64,RV64M 9 10define i8 @extractelt_v16i8(<16 x i8> %a) nounwind { 11; CHECK-LABEL: extractelt_v16i8: 12; CHECK: # %bb.0: 13; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 14; CHECK-NEXT: vslidedown.vi v8, v8, 7 15; CHECK-NEXT: vmv.x.s a0, v8 16; CHECK-NEXT: ret 17 %b = extractelement <16 x i8> %a, i32 7 18 ret i8 %b 19} 20 21define i16 @extractelt_v8i16(<8 x i16> %a) nounwind { 22; CHECK-LABEL: extractelt_v8i16: 23; CHECK: # %bb.0: 24; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 25; CHECK-NEXT: vslidedown.vi v8, v8, 7 26; CHECK-NEXT: vmv.x.s a0, v8 27; CHECK-NEXT: ret 28 %b = extractelement <8 x i16> %a, i32 7 29 ret i16 %b 30} 31 32define i32 @extractelt_v4i32(<4 x i32> %a) nounwind { 33; CHECK-LABEL: extractelt_v4i32: 34; CHECK: # %bb.0: 35; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 36; CHECK-NEXT: vslidedown.vi v8, v8, 2 37; CHECK-NEXT: vmv.x.s a0, v8 38; CHECK-NEXT: ret 39 %b = extractelement <4 x i32> %a, i32 2 40 ret i32 %b 41} 42 43define i64 @extractelt_v2i64(<2 x i64> %a) nounwind { 44; RV32-LABEL: extractelt_v2i64: 45; RV32: # %bb.0: 46; RV32-NEXT: li a0, 32 47; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 48; RV32-NEXT: vsrl.vx v9, v8, a0 49; RV32-NEXT: vmv.x.s a1, v9 50; RV32-NEXT: vmv.x.s a0, v8 51; RV32-NEXT: ret 52; 53; RV64-LABEL: extractelt_v2i64: 54; RV64: # %bb.0: 55; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 56; RV64-NEXT: vmv.x.s a0, v8 57; RV64-NEXT: ret 58 %b = extractelement <2 x i64> %a, i32 0 59 ret i64 %b 60} 61 62define bfloat @extractelt_v8bf16(<8 x bfloat> %a) nounwind { 63; CHECK-LABEL: extractelt_v8bf16: 64; CHECK: # %bb.0: 65; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 66; CHECK-NEXT: vslidedown.vi v8, v8, 7 67; CHECK-NEXT: vmv.x.s a0, v8 68; CHECK-NEXT: fmv.h.x fa0, a0 69; CHECK-NEXT: ret 70 %b = extractelement <8 x bfloat> %a, i32 7 71 ret bfloat %b 72} 73 74define half @extractelt_v8f16(<8 x half> %a) nounwind { 75; ZVFH-LABEL: extractelt_v8f16: 76; ZVFH: # %bb.0: 77; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 78; ZVFH-NEXT: vslidedown.vi v8, v8, 7 79; ZVFH-NEXT: vfmv.f.s fa0, v8 80; ZVFH-NEXT: ret 81; 82; ZVFHMIN-LABEL: extractelt_v8f16: 83; ZVFHMIN: # %bb.0: 84; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 85; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7 86; ZVFHMIN-NEXT: vmv.x.s a0, v8 87; ZVFHMIN-NEXT: fmv.h.x fa0, a0 88; ZVFHMIN-NEXT: ret 89 %b = extractelement <8 x half> %a, i32 7 90 ret half %b 91} 92 93define float @extractelt_v4f32(<4 x float> %a) nounwind { 94; CHECK-LABEL: extractelt_v4f32: 95; CHECK: # %bb.0: 96; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 97; CHECK-NEXT: vslidedown.vi v8, v8, 2 98; CHECK-NEXT: vfmv.f.s fa0, v8 99; CHECK-NEXT: ret 100 %b = extractelement <4 x float> %a, i32 2 101 ret float %b 102} 103 104define double @extractelt_v2f64(<2 x double> %a) nounwind { 105; CHECK-LABEL: extractelt_v2f64: 106; CHECK: # %bb.0: 107; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma 108; CHECK-NEXT: vfmv.f.s fa0, v8 109; CHECK-NEXT: ret 110 %b = extractelement <2 x double> %a, i32 0 111 ret double %b 112} 113 114define i8 @extractelt_v32i8(<32 x i8> %a) nounwind { 115; CHECK-LABEL: extractelt_v32i8: 116; CHECK: # %bb.0: 117; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 118; CHECK-NEXT: vslidedown.vi v8, v8, 7 119; CHECK-NEXT: vmv.x.s a0, v8 120; CHECK-NEXT: ret 121 %b = extractelement <32 x i8> %a, i32 7 122 ret i8 %b 123} 124 125define i16 @extractelt_v16i16(<16 x i16> %a) nounwind { 126; CHECK-LABEL: extractelt_v16i16: 127; CHECK: # %bb.0: 128; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 129; CHECK-NEXT: vslidedown.vi v8, v8, 7 130; CHECK-NEXT: vmv.x.s a0, v8 131; CHECK-NEXT: ret 132 %b = extractelement <16 x i16> %a, i32 7 133 ret i16 %b 134} 135 136define i32 @extractelt_v8i32(<8 x i32> %a) nounwind { 137; CHECK-LABEL: extractelt_v8i32: 138; CHECK: # %bb.0: 139; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma 140; CHECK-NEXT: vslidedown.vi v8, v8, 6 141; CHECK-NEXT: vmv.x.s a0, v8 142; CHECK-NEXT: ret 143 %b = extractelement <8 x i32> %a, i32 6 144 ret i32 %b 145} 146 147define i64 @extractelt_v4i64(<4 x i64> %a) nounwind { 148; RV32-LABEL: extractelt_v4i64: 149; RV32: # %bb.0: 150; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma 151; RV32-NEXT: vslidedown.vi v8, v8, 3 152; RV32-NEXT: li a0, 32 153; RV32-NEXT: vsrl.vx v10, v8, a0 154; RV32-NEXT: vmv.x.s a1, v10 155; RV32-NEXT: vmv.x.s a0, v8 156; RV32-NEXT: ret 157; 158; RV64-LABEL: extractelt_v4i64: 159; RV64: # %bb.0: 160; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma 161; RV64-NEXT: vslidedown.vi v8, v8, 3 162; RV64-NEXT: vmv.x.s a0, v8 163; RV64-NEXT: ret 164 %b = extractelement <4 x i64> %a, i32 3 165 ret i64 %b 166} 167 168define bfloat @extractelt_v16bf16(<16 x bfloat> %a) nounwind { 169; CHECK-LABEL: extractelt_v16bf16: 170; CHECK: # %bb.0: 171; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 172; CHECK-NEXT: vslidedown.vi v8, v8, 7 173; CHECK-NEXT: vmv.x.s a0, v8 174; CHECK-NEXT: fmv.h.x fa0, a0 175; CHECK-NEXT: ret 176 %b = extractelement <16 x bfloat> %a, i32 7 177 ret bfloat %b 178} 179 180define half @extractelt_v16f16(<16 x half> %a) nounwind { 181; ZVFH-LABEL: extractelt_v16f16: 182; ZVFH: # %bb.0: 183; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma 184; ZVFH-NEXT: vslidedown.vi v8, v8, 7 185; ZVFH-NEXT: vfmv.f.s fa0, v8 186; ZVFH-NEXT: ret 187; 188; ZVFHMIN-LABEL: extractelt_v16f16: 189; ZVFHMIN: # %bb.0: 190; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma 191; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7 192; ZVFHMIN-NEXT: vmv.x.s a0, v8 193; ZVFHMIN-NEXT: fmv.h.x fa0, a0 194; ZVFHMIN-NEXT: ret 195 %b = extractelement <16 x half> %a, i32 7 196 ret half %b 197} 198 199define float @extractelt_v8f32(<8 x float> %a) nounwind { 200; CHECK-LABEL: extractelt_v8f32: 201; CHECK: # %bb.0: 202; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 203; CHECK-NEXT: vslidedown.vi v8, v8, 2 204; CHECK-NEXT: vfmv.f.s fa0, v8 205; CHECK-NEXT: ret 206 %b = extractelement <8 x float> %a, i32 2 207 ret float %b 208} 209 210define double @extractelt_v4f64(<4 x double> %a) nounwind { 211; CHECK-LABEL: extractelt_v4f64: 212; CHECK: # %bb.0: 213; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma 214; CHECK-NEXT: vfmv.f.s fa0, v8 215; CHECK-NEXT: ret 216 %b = extractelement <4 x double> %a, i32 0 217 ret double %b 218} 219 220; This uses a non-power of 2 type so that it isn't an MVT to catch an 221; incorrect use of getSimpleValueType(). 222; NOTE: Type legalization is bitcasting to vXi32 and doing 2 independent 223; slidedowns and extracts. 224define i64 @extractelt_v3i64(<3 x i64> %a) nounwind { 225; RV32-LABEL: extractelt_v3i64: 226; RV32: # %bb.0: 227; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma 228; RV32-NEXT: vslidedown.vi v10, v8, 4 229; RV32-NEXT: vslidedown.vi v8, v8, 5 230; RV32-NEXT: vmv.x.s a0, v10 231; RV32-NEXT: vmv.x.s a1, v8 232; RV32-NEXT: ret 233; 234; RV64-LABEL: extractelt_v3i64: 235; RV64: # %bb.0: 236; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma 237; RV64-NEXT: vslidedown.vi v8, v8, 2 238; RV64-NEXT: vmv.x.s a0, v8 239; RV64-NEXT: ret 240 %b = extractelement <3 x i64> %a, i32 2 241 ret i64 %b 242} 243 244; A LMUL8 type 245define i32 @extractelt_v32i32(<32 x i32> %a) nounwind { 246; RV32-LABEL: extractelt_v32i32: 247; RV32: # %bb.0: 248; RV32-NEXT: addi sp, sp, -256 249; RV32-NEXT: sw ra, 252(sp) # 4-byte Folded Spill 250; RV32-NEXT: sw s0, 248(sp) # 4-byte Folded Spill 251; RV32-NEXT: addi s0, sp, 256 252; RV32-NEXT: andi sp, sp, -128 253; RV32-NEXT: li a0, 32 254; RV32-NEXT: mv a1, sp 255; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma 256; RV32-NEXT: vse32.v v8, (a1) 257; RV32-NEXT: lw a0, 124(sp) 258; RV32-NEXT: addi sp, s0, -256 259; RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload 260; RV32-NEXT: lw s0, 248(sp) # 4-byte Folded Reload 261; RV32-NEXT: addi sp, sp, 256 262; RV32-NEXT: ret 263; 264; RV64-LABEL: extractelt_v32i32: 265; RV64: # %bb.0: 266; RV64-NEXT: addi sp, sp, -256 267; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill 268; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill 269; RV64-NEXT: addi s0, sp, 256 270; RV64-NEXT: andi sp, sp, -128 271; RV64-NEXT: li a0, 32 272; RV64-NEXT: mv a1, sp 273; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma 274; RV64-NEXT: vse32.v v8, (a1) 275; RV64-NEXT: lw a0, 124(sp) 276; RV64-NEXT: addi sp, s0, -256 277; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload 278; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload 279; RV64-NEXT: addi sp, sp, 256 280; RV64-NEXT: ret 281 %b = extractelement <32 x i32> %a, i32 31 282 ret i32 %b 283} 284 285; Exercise type legalization for type beyond LMUL8 286define i32 @extractelt_v64i32(<64 x i32> %a) nounwind { 287; RV32-LABEL: extractelt_v64i32: 288; RV32: # %bb.0: 289; RV32-NEXT: addi sp, sp, -256 290; RV32-NEXT: sw ra, 252(sp) # 4-byte Folded Spill 291; RV32-NEXT: sw s0, 248(sp) # 4-byte Folded Spill 292; RV32-NEXT: addi s0, sp, 256 293; RV32-NEXT: andi sp, sp, -128 294; RV32-NEXT: li a0, 32 295; RV32-NEXT: mv a1, sp 296; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma 297; RV32-NEXT: vse32.v v16, (a1) 298; RV32-NEXT: lw a0, 124(sp) 299; RV32-NEXT: addi sp, s0, -256 300; RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload 301; RV32-NEXT: lw s0, 248(sp) # 4-byte Folded Reload 302; RV32-NEXT: addi sp, sp, 256 303; RV32-NEXT: ret 304; 305; RV64-LABEL: extractelt_v64i32: 306; RV64: # %bb.0: 307; RV64-NEXT: addi sp, sp, -256 308; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill 309; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill 310; RV64-NEXT: addi s0, sp, 256 311; RV64-NEXT: andi sp, sp, -128 312; RV64-NEXT: li a0, 32 313; RV64-NEXT: mv a1, sp 314; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma 315; RV64-NEXT: vse32.v v16, (a1) 316; RV64-NEXT: lw a0, 124(sp) 317; RV64-NEXT: addi sp, s0, -256 318; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload 319; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload 320; RV64-NEXT: addi sp, sp, 256 321; RV64-NEXT: ret 322 %b = extractelement <64 x i32> %a, i32 63 323 ret i32 %b 324} 325 326define i8 @extractelt_v16i8_idx(<16 x i8> %a, i32 zeroext %idx) nounwind { 327; CHECK-LABEL: extractelt_v16i8_idx: 328; CHECK: # %bb.0: 329; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 330; CHECK-NEXT: vslidedown.vx v8, v8, a0 331; CHECK-NEXT: vmv.x.s a0, v8 332; CHECK-NEXT: ret 333 %b = extractelement <16 x i8> %a, i32 %idx 334 ret i8 %b 335} 336 337define i16 @extractelt_v8i16_idx(<8 x i16> %a, i32 zeroext %idx) nounwind { 338; CHECK-LABEL: extractelt_v8i16_idx: 339; CHECK: # %bb.0: 340; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 341; CHECK-NEXT: vslidedown.vx v8, v8, a0 342; CHECK-NEXT: vmv.x.s a0, v8 343; CHECK-NEXT: ret 344 %b = extractelement <8 x i16> %a, i32 %idx 345 ret i16 %b 346} 347 348define i32 @extractelt_v4i32_idx(<4 x i32> %a, i32 zeroext %idx) nounwind { 349; CHECK-LABEL: extractelt_v4i32_idx: 350; CHECK: # %bb.0: 351; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 352; CHECK-NEXT: vadd.vv v8, v8, v8 353; CHECK-NEXT: vslidedown.vx v8, v8, a0 354; CHECK-NEXT: vmv.x.s a0, v8 355; CHECK-NEXT: ret 356 %b = add <4 x i32> %a, %a 357 %c = extractelement <4 x i32> %b, i32 %idx 358 ret i32 %c 359} 360 361define i64 @extractelt_v2i64_idx(<2 x i64> %a, i32 zeroext %idx) nounwind { 362; RV32-LABEL: extractelt_v2i64_idx: 363; RV32: # %bb.0: 364; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 365; RV32-NEXT: vadd.vv v8, v8, v8 366; RV32-NEXT: li a1, 32 367; RV32-NEXT: vslidedown.vx v8, v8, a0 368; RV32-NEXT: vmv.x.s a0, v8 369; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 370; RV32-NEXT: vsrl.vx v8, v8, a1 371; RV32-NEXT: vmv.x.s a1, v8 372; RV32-NEXT: ret 373; 374; RV64-LABEL: extractelt_v2i64_idx: 375; RV64: # %bb.0: 376; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 377; RV64-NEXT: vadd.vv v8, v8, v8 378; RV64-NEXT: vslidedown.vx v8, v8, a0 379; RV64-NEXT: vmv.x.s a0, v8 380; RV64-NEXT: ret 381 %b = add <2 x i64> %a, %a 382 %c = extractelement <2 x i64> %b, i32 %idx 383 ret i64 %c 384} 385 386define bfloat @extractelt_v8bf16_idx(<8 x bfloat> %a, i32 zeroext %idx) nounwind { 387; CHECK-LABEL: extractelt_v8bf16_idx: 388; CHECK: # %bb.0: 389; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 390; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 391; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 392; CHECK-NEXT: vfadd.vv v8, v10, v10 393; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 394; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 395; CHECK-NEXT: vslidedown.vx v8, v10, a0 396; CHECK-NEXT: vmv.x.s a0, v8 397; CHECK-NEXT: fmv.h.x fa0, a0 398; CHECK-NEXT: ret 399 %b = fadd <8 x bfloat> %a, %a 400 %c = extractelement <8 x bfloat> %b, i32 %idx 401 ret bfloat %c 402} 403 404define half @extractelt_v8f16_idx(<8 x half> %a, i32 zeroext %idx) nounwind { 405; ZVFH-LABEL: extractelt_v8f16_idx: 406; ZVFH: # %bb.0: 407; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 408; ZVFH-NEXT: vfadd.vv v8, v8, v8 409; ZVFH-NEXT: vslidedown.vx v8, v8, a0 410; ZVFH-NEXT: vfmv.f.s fa0, v8 411; ZVFH-NEXT: ret 412; 413; ZVFHMIN-LABEL: extractelt_v8f16_idx: 414; ZVFHMIN: # %bb.0: 415; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 416; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 417; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 418; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10 419; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 420; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 421; ZVFHMIN-NEXT: vslidedown.vx v8, v10, a0 422; ZVFHMIN-NEXT: vmv.x.s a0, v8 423; ZVFHMIN-NEXT: fmv.h.x fa0, a0 424; ZVFHMIN-NEXT: ret 425 %b = fadd <8 x half> %a, %a 426 %c = extractelement <8 x half> %b, i32 %idx 427 ret half %c 428} 429 430define float @extractelt_v4f32_idx(<4 x float> %a, i32 zeroext %idx) nounwind { 431; CHECK-LABEL: extractelt_v4f32_idx: 432; CHECK: # %bb.0: 433; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 434; CHECK-NEXT: vfadd.vv v8, v8, v8 435; CHECK-NEXT: vslidedown.vx v8, v8, a0 436; CHECK-NEXT: vfmv.f.s fa0, v8 437; CHECK-NEXT: ret 438 %b = fadd <4 x float> %a, %a 439 %c = extractelement <4 x float> %b, i32 %idx 440 ret float %c 441} 442 443define double @extractelt_v2f64_idx(<2 x double> %a, i32 zeroext %idx) nounwind { 444; CHECK-LABEL: extractelt_v2f64_idx: 445; CHECK: # %bb.0: 446; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 447; CHECK-NEXT: vfadd.vv v8, v8, v8 448; CHECK-NEXT: vslidedown.vx v8, v8, a0 449; CHECK-NEXT: vfmv.f.s fa0, v8 450; CHECK-NEXT: ret 451 %b = fadd <2 x double> %a, %a 452 %c = extractelement <2 x double> %b, i32 %idx 453 ret double %c 454} 455 456define i8 @extractelt_v32i8_idx(<32 x i8> %a, i32 zeroext %idx) nounwind { 457; CHECK-LABEL: extractelt_v32i8_idx: 458; CHECK: # %bb.0: 459; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, ma 460; CHECK-NEXT: vslidedown.vx v8, v8, a0 461; CHECK-NEXT: vmv.x.s a0, v8 462; CHECK-NEXT: ret 463 %b = extractelement <32 x i8> %a, i32 %idx 464 ret i8 %b 465} 466 467define i16 @extractelt_v16i16_idx(<16 x i16> %a, i32 zeroext %idx) nounwind { 468; CHECK-LABEL: extractelt_v16i16_idx: 469; CHECK: # %bb.0: 470; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma 471; CHECK-NEXT: vslidedown.vx v8, v8, a0 472; CHECK-NEXT: vmv.x.s a0, v8 473; CHECK-NEXT: ret 474 %b = extractelement <16 x i16> %a, i32 %idx 475 ret i16 %b 476} 477 478define i32 @extractelt_v8i32_idx(<8 x i32> %a, i32 zeroext %idx) nounwind { 479; CHECK-LABEL: extractelt_v8i32_idx: 480; CHECK: # %bb.0: 481; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 482; CHECK-NEXT: vadd.vv v8, v8, v8 483; CHECK-NEXT: vslidedown.vx v8, v8, a0 484; CHECK-NEXT: vmv.x.s a0, v8 485; CHECK-NEXT: ret 486 %b = add <8 x i32> %a, %a 487 %c = extractelement <8 x i32> %b, i32 %idx 488 ret i32 %c 489} 490 491define i64 @extractelt_v4i64_idx(<4 x i64> %a, i32 zeroext %idx) nounwind { 492; RV32-LABEL: extractelt_v4i64_idx: 493; RV32: # %bb.0: 494; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 495; RV32-NEXT: vadd.vv v8, v8, v8 496; RV32-NEXT: li a1, 32 497; RV32-NEXT: vslidedown.vx v8, v8, a0 498; RV32-NEXT: vmv.x.s a0, v8 499; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma 500; RV32-NEXT: vsrl.vx v8, v8, a1 501; RV32-NEXT: vmv.x.s a1, v8 502; RV32-NEXT: ret 503; 504; RV64-LABEL: extractelt_v4i64_idx: 505; RV64: # %bb.0: 506; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 507; RV64-NEXT: vadd.vv v8, v8, v8 508; RV64-NEXT: vslidedown.vx v8, v8, a0 509; RV64-NEXT: vmv.x.s a0, v8 510; RV64-NEXT: ret 511 %b = add <4 x i64> %a, %a 512 %c = extractelement <4 x i64> %b, i32 %idx 513 ret i64 %c 514} 515 516define bfloat @extractelt_v16bf16_idx(<16 x bfloat> %a, i32 zeroext %idx) nounwind { 517; CHECK-LABEL: extractelt_v16bf16_idx: 518; CHECK: # %bb.0: 519; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 520; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 521; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 522; CHECK-NEXT: vfadd.vv v8, v12, v12 523; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 524; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8 525; CHECK-NEXT: vslidedown.vx v8, v12, a0 526; CHECK-NEXT: vmv.x.s a0, v8 527; CHECK-NEXT: fmv.h.x fa0, a0 528; CHECK-NEXT: ret 529 %b = fadd <16 x bfloat> %a, %a 530 %c = extractelement <16 x bfloat> %b, i32 %idx 531 ret bfloat %c 532} 533 534define half @extractelt_v16f16_idx(<16 x half> %a, i32 zeroext %idx) nounwind { 535; ZVFH-LABEL: extractelt_v16f16_idx: 536; ZVFH: # %bb.0: 537; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma 538; ZVFH-NEXT: vfadd.vv v8, v8, v8 539; ZVFH-NEXT: vslidedown.vx v8, v8, a0 540; ZVFH-NEXT: vfmv.f.s fa0, v8 541; ZVFH-NEXT: ret 542; 543; ZVFHMIN-LABEL: extractelt_v16f16_idx: 544; ZVFHMIN: # %bb.0: 545; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma 546; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 547; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 548; ZVFHMIN-NEXT: vfadd.vv v8, v12, v12 549; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 550; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8 551; ZVFHMIN-NEXT: vslidedown.vx v8, v12, a0 552; ZVFHMIN-NEXT: vmv.x.s a0, v8 553; ZVFHMIN-NEXT: fmv.h.x fa0, a0 554; ZVFHMIN-NEXT: ret 555 %b = fadd <16 x half> %a, %a 556 %c = extractelement <16 x half> %b, i32 %idx 557 ret half %c 558} 559 560define float @extractelt_v8f32_idx(<8 x float> %a, i32 zeroext %idx) nounwind { 561; CHECK-LABEL: extractelt_v8f32_idx: 562; CHECK: # %bb.0: 563; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 564; CHECK-NEXT: vfadd.vv v8, v8, v8 565; CHECK-NEXT: vslidedown.vx v8, v8, a0 566; CHECK-NEXT: vfmv.f.s fa0, v8 567; CHECK-NEXT: ret 568 %b = fadd <8 x float> %a, %a 569 %c = extractelement <8 x float> %b, i32 %idx 570 ret float %c 571} 572 573define double @extractelt_v4f64_idx(<4 x double> %a, i32 zeroext %idx) nounwind { 574; CHECK-LABEL: extractelt_v4f64_idx: 575; CHECK: # %bb.0: 576; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 577; CHECK-NEXT: vfadd.vv v8, v8, v8 578; CHECK-NEXT: vslidedown.vx v8, v8, a0 579; CHECK-NEXT: vfmv.f.s fa0, v8 580; CHECK-NEXT: ret 581 %b = fadd <4 x double> %a, %a 582 %c = extractelement <4 x double> %b, i32 %idx 583 ret double %c 584} 585 586; This uses a non-power of 2 type so that it isn't an MVT to catch an 587; incorrect use of getSimpleValueType_idx(, i32 zeroext %idx). 588; NOTE: Type legalization is bitcasting to vXi32 and doing 2 independent 589; slidedowns and extracts. 590define i64 @extractelt_v3i64_idx(<3 x i64> %a, i32 zeroext %idx) nounwind { 591; RV32-LABEL: extractelt_v3i64_idx: 592; RV32: # %bb.0: 593; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 594; RV32-NEXT: vadd.vv v8, v8, v8 595; RV32-NEXT: add a0, a0, a0 596; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma 597; RV32-NEXT: vslidedown.vx v10, v8, a0 598; RV32-NEXT: addi a1, a0, 1 599; RV32-NEXT: vmv.x.s a0, v10 600; RV32-NEXT: vslidedown.vx v8, v8, a1 601; RV32-NEXT: vmv.x.s a1, v8 602; RV32-NEXT: ret 603; 604; RV64-LABEL: extractelt_v3i64_idx: 605; RV64: # %bb.0: 606; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 607; RV64-NEXT: vadd.vv v8, v8, v8 608; RV64-NEXT: vslidedown.vx v8, v8, a0 609; RV64-NEXT: vmv.x.s a0, v8 610; RV64-NEXT: ret 611 %b = add <3 x i64> %a, %a 612 %c = extractelement <3 x i64> %b, i32 %idx 613 ret i64 %c 614} 615 616define i32 @extractelt_v32i32_idx(ptr %x, i32 zeroext %idx) nounwind { 617; RV32NOM-LABEL: extractelt_v32i32_idx: 618; RV32NOM: # %bb.0: 619; RV32NOM-NEXT: addi sp, sp, -256 620; RV32NOM-NEXT: sw ra, 252(sp) # 4-byte Folded Spill 621; RV32NOM-NEXT: sw s0, 248(sp) # 4-byte Folded Spill 622; RV32NOM-NEXT: sw s2, 244(sp) # 4-byte Folded Spill 623; RV32NOM-NEXT: addi s0, sp, 256 624; RV32NOM-NEXT: andi sp, sp, -128 625; RV32NOM-NEXT: mv s2, a0 626; RV32NOM-NEXT: andi a0, a1, 31 627; RV32NOM-NEXT: li a1, 4 628; RV32NOM-NEXT: call __mulsi3 629; RV32NOM-NEXT: li a1, 32 630; RV32NOM-NEXT: vsetvli zero, a1, e32, m8, ta, ma 631; RV32NOM-NEXT: vle32.v v8, (s2) 632; RV32NOM-NEXT: mv a1, sp 633; RV32NOM-NEXT: add a0, a1, a0 634; RV32NOM-NEXT: vadd.vv v8, v8, v8 635; RV32NOM-NEXT: vse32.v v8, (a1) 636; RV32NOM-NEXT: lw a0, 0(a0) 637; RV32NOM-NEXT: addi sp, s0, -256 638; RV32NOM-NEXT: lw ra, 252(sp) # 4-byte Folded Reload 639; RV32NOM-NEXT: lw s0, 248(sp) # 4-byte Folded Reload 640; RV32NOM-NEXT: lw s2, 244(sp) # 4-byte Folded Reload 641; RV32NOM-NEXT: addi sp, sp, 256 642; RV32NOM-NEXT: ret 643; 644; RV32M-LABEL: extractelt_v32i32_idx: 645; RV32M: # %bb.0: 646; RV32M-NEXT: addi sp, sp, -256 647; RV32M-NEXT: sw ra, 252(sp) # 4-byte Folded Spill 648; RV32M-NEXT: sw s0, 248(sp) # 4-byte Folded Spill 649; RV32M-NEXT: addi s0, sp, 256 650; RV32M-NEXT: andi sp, sp, -128 651; RV32M-NEXT: andi a1, a1, 31 652; RV32M-NEXT: li a2, 32 653; RV32M-NEXT: vsetvli zero, a2, e32, m8, ta, ma 654; RV32M-NEXT: vle32.v v8, (a0) 655; RV32M-NEXT: slli a1, a1, 2 656; RV32M-NEXT: mv a0, sp 657; RV32M-NEXT: or a1, a0, a1 658; RV32M-NEXT: vadd.vv v8, v8, v8 659; RV32M-NEXT: vse32.v v8, (a0) 660; RV32M-NEXT: lw a0, 0(a1) 661; RV32M-NEXT: addi sp, s0, -256 662; RV32M-NEXT: lw ra, 252(sp) # 4-byte Folded Reload 663; RV32M-NEXT: lw s0, 248(sp) # 4-byte Folded Reload 664; RV32M-NEXT: addi sp, sp, 256 665; RV32M-NEXT: ret 666; 667; RV64NOM-LABEL: extractelt_v32i32_idx: 668; RV64NOM: # %bb.0: 669; RV64NOM-NEXT: addi sp, sp, -256 670; RV64NOM-NEXT: sd ra, 248(sp) # 8-byte Folded Spill 671; RV64NOM-NEXT: sd s0, 240(sp) # 8-byte Folded Spill 672; RV64NOM-NEXT: sd s2, 232(sp) # 8-byte Folded Spill 673; RV64NOM-NEXT: addi s0, sp, 256 674; RV64NOM-NEXT: andi sp, sp, -128 675; RV64NOM-NEXT: mv s2, a0 676; RV64NOM-NEXT: andi a0, a1, 31 677; RV64NOM-NEXT: li a1, 4 678; RV64NOM-NEXT: call __muldi3 679; RV64NOM-NEXT: li a1, 32 680; RV64NOM-NEXT: vsetvli zero, a1, e32, m8, ta, ma 681; RV64NOM-NEXT: vle32.v v8, (s2) 682; RV64NOM-NEXT: mv a1, sp 683; RV64NOM-NEXT: add a0, a1, a0 684; RV64NOM-NEXT: vadd.vv v8, v8, v8 685; RV64NOM-NEXT: vse32.v v8, (a1) 686; RV64NOM-NEXT: lw a0, 0(a0) 687; RV64NOM-NEXT: addi sp, s0, -256 688; RV64NOM-NEXT: ld ra, 248(sp) # 8-byte Folded Reload 689; RV64NOM-NEXT: ld s0, 240(sp) # 8-byte Folded Reload 690; RV64NOM-NEXT: ld s2, 232(sp) # 8-byte Folded Reload 691; RV64NOM-NEXT: addi sp, sp, 256 692; RV64NOM-NEXT: ret 693; 694; RV64M-LABEL: extractelt_v32i32_idx: 695; RV64M: # %bb.0: 696; RV64M-NEXT: addi sp, sp, -256 697; RV64M-NEXT: sd ra, 248(sp) # 8-byte Folded Spill 698; RV64M-NEXT: sd s0, 240(sp) # 8-byte Folded Spill 699; RV64M-NEXT: addi s0, sp, 256 700; RV64M-NEXT: andi sp, sp, -128 701; RV64M-NEXT: andi a1, a1, 31 702; RV64M-NEXT: li a2, 32 703; RV64M-NEXT: vsetvli zero, a2, e32, m8, ta, ma 704; RV64M-NEXT: vle32.v v8, (a0) 705; RV64M-NEXT: slli a1, a1, 2 706; RV64M-NEXT: mv a0, sp 707; RV64M-NEXT: or a1, a0, a1 708; RV64M-NEXT: vadd.vv v8, v8, v8 709; RV64M-NEXT: vse32.v v8, (a0) 710; RV64M-NEXT: lw a0, 0(a1) 711; RV64M-NEXT: addi sp, s0, -256 712; RV64M-NEXT: ld ra, 248(sp) # 8-byte Folded Reload 713; RV64M-NEXT: ld s0, 240(sp) # 8-byte Folded Reload 714; RV64M-NEXT: addi sp, sp, 256 715; RV64M-NEXT: ret 716 %a = load <32 x i32>, ptr %x 717 %b = add <32 x i32> %a, %a 718 %c = extractelement <32 x i32> %b, i32 %idx 719 ret i32 %c 720} 721 722define i32 @extractelt_v64i32_idx(<64 x i32> %a, i32 zeroext %idx) nounwind { 723; RV32-LABEL: extractelt_v64i32_idx: 724; RV32: # %bb.0: 725; RV32-NEXT: addi sp, sp, -384 726; RV32-NEXT: sw ra, 380(sp) # 4-byte Folded Spill 727; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill 728; RV32-NEXT: addi s0, sp, 384 729; RV32-NEXT: andi sp, sp, -128 730; RV32-NEXT: andi a0, a0, 63 731; RV32-NEXT: mv a1, sp 732; RV32-NEXT: li a2, 32 733; RV32-NEXT: addi a3, sp, 128 734; RV32-NEXT: slli a0, a0, 2 735; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 736; RV32-NEXT: vadd.vv v8, v8, v8 737; RV32-NEXT: vadd.vv v16, v16, v16 738; RV32-NEXT: add a0, a1, a0 739; RV32-NEXT: vse32.v v16, (a3) 740; RV32-NEXT: vse32.v v8, (a1) 741; RV32-NEXT: lw a0, 0(a0) 742; RV32-NEXT: addi sp, s0, -384 743; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload 744; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload 745; RV32-NEXT: addi sp, sp, 384 746; RV32-NEXT: ret 747; 748; RV64-LABEL: extractelt_v64i32_idx: 749; RV64: # %bb.0: 750; RV64-NEXT: addi sp, sp, -384 751; RV64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill 752; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill 753; RV64-NEXT: addi s0, sp, 384 754; RV64-NEXT: andi sp, sp, -128 755; RV64-NEXT: andi a0, a0, 63 756; RV64-NEXT: mv a1, sp 757; RV64-NEXT: li a2, 32 758; RV64-NEXT: addi a3, sp, 128 759; RV64-NEXT: slli a0, a0, 2 760; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma 761; RV64-NEXT: vadd.vv v8, v8, v8 762; RV64-NEXT: vadd.vv v16, v16, v16 763; RV64-NEXT: add a0, a1, a0 764; RV64-NEXT: vse32.v v16, (a3) 765; RV64-NEXT: vse32.v v8, (a1) 766; RV64-NEXT: lw a0, 0(a0) 767; RV64-NEXT: addi sp, s0, -384 768; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload 769; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload 770; RV64-NEXT: addi sp, sp, 384 771; RV64-NEXT: ret 772 %b = add <64 x i32> %a, %a 773 %c = extractelement <64 x i32> %b, i32 %idx 774 ret i32 %c 775} 776 777define void @store_extractelt_v16i8(<16 x i8> %a, ptr %p) nounwind { 778; CHECK-LABEL: store_extractelt_v16i8: 779; CHECK: # %bb.0: 780; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 781; CHECK-NEXT: vslidedown.vi v8, v8, 7 782; CHECK-NEXT: vse8.v v8, (a0) 783; CHECK-NEXT: ret 784 %b = extractelement <16 x i8> %a, i32 7 785 store i8 %b, ptr %p 786 ret void 787} 788 789define void @store_extractelt_v8i16(<8 x i16> %a, ptr %p) nounwind { 790; CHECK-LABEL: store_extractelt_v8i16: 791; CHECK: # %bb.0: 792; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 793; CHECK-NEXT: vslidedown.vi v8, v8, 7 794; CHECK-NEXT: vse16.v v8, (a0) 795; CHECK-NEXT: ret 796 %b = extractelement <8 x i16> %a, i32 7 797 store i16 %b, ptr %p 798 ret void 799} 800 801define void @store_extractelt_v4i32(<4 x i32> %a, ptr %p) nounwind { 802; CHECK-LABEL: store_extractelt_v4i32: 803; CHECK: # %bb.0: 804; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 805; CHECK-NEXT: vslidedown.vi v8, v8, 2 806; CHECK-NEXT: vse32.v v8, (a0) 807; CHECK-NEXT: ret 808 %b = extractelement <4 x i32> %a, i32 2 809 store i32 %b, ptr %p 810 ret void 811} 812 813; FIXME: Use vse64.v on RV32 to avoid two scalar extracts and two scalar stores. 814define void @store_extractelt_v2i64(<2 x i64> %a, ptr %p) nounwind { 815; RV32-LABEL: store_extractelt_v2i64: 816; RV32: # %bb.0: 817; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 818; RV32-NEXT: vslidedown.vi v8, v8, 1 819; RV32-NEXT: li a1, 32 820; RV32-NEXT: vsrl.vx v9, v8, a1 821; RV32-NEXT: vmv.x.s a1, v8 822; RV32-NEXT: vmv.x.s a2, v9 823; RV32-NEXT: sw a1, 0(a0) 824; RV32-NEXT: sw a2, 4(a0) 825; RV32-NEXT: ret 826; 827; RV64-LABEL: store_extractelt_v2i64: 828; RV64: # %bb.0: 829; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 830; RV64-NEXT: vslidedown.vi v8, v8, 1 831; RV64-NEXT: vse64.v v8, (a0) 832; RV64-NEXT: ret 833 %b = extractelement <2 x i64> %a, i64 1 834 store i64 %b, ptr %p 835 ret void 836} 837 838define void @store_extractelt_v2f64(<2 x double> %a, ptr %p) nounwind { 839; CHECK-LABEL: store_extractelt_v2f64: 840; CHECK: # %bb.0: 841; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma 842; CHECK-NEXT: vslidedown.vi v8, v8, 1 843; CHECK-NEXT: vse64.v v8, (a0) 844; CHECK-NEXT: ret 845 %b = extractelement <2 x double> %a, i64 1 846 store double %b, ptr %p 847 ret void 848} 849 850define i32 @extractelt_add_v4i32(<4 x i32> %x) { 851; RV32-LABEL: extractelt_add_v4i32: 852; RV32: # %bb.0: 853; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 854; RV32-NEXT: vslidedown.vi v8, v8, 2 855; RV32-NEXT: vmv.x.s a0, v8 856; RV32-NEXT: addi a0, a0, 13 857; RV32-NEXT: ret 858; 859; RV64-LABEL: extractelt_add_v4i32: 860; RV64: # %bb.0: 861; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma 862; RV64-NEXT: vslidedown.vi v8, v8, 2 863; RV64-NEXT: vmv.x.s a0, v8 864; RV64-NEXT: addiw a0, a0, 13 865; RV64-NEXT: ret 866 %bo = add <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14> 867 %ext = extractelement <4 x i32> %bo, i32 2 868 ret i32 %ext 869} 870 871define i32 @extractelt_sub_v4i32(<4 x i32> %x) { 872; RV32-LABEL: extractelt_sub_v4i32: 873; RV32: # %bb.0: 874; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 875; RV32-NEXT: vslidedown.vi v8, v8, 2 876; RV32-NEXT: vmv.x.s a0, v8 877; RV32-NEXT: li a1, 13 878; RV32-NEXT: sub a0, a1, a0 879; RV32-NEXT: ret 880; 881; RV64-LABEL: extractelt_sub_v4i32: 882; RV64: # %bb.0: 883; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma 884; RV64-NEXT: vslidedown.vi v8, v8, 2 885; RV64-NEXT: vmv.x.s a0, v8 886; RV64-NEXT: li a1, 13 887; RV64-NEXT: subw a0, a1, a0 888; RV64-NEXT: ret 889 %bo = sub <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %x 890 %ext = extractelement <4 x i32> %bo, i32 2 891 ret i32 %ext 892} 893 894define i32 @extractelt_mul_v4i32(<4 x i32> %x) { 895; RV32NOM-LABEL: extractelt_mul_v4i32: 896; RV32NOM: # %bb.0: 897; RV32NOM-NEXT: li a0, 13 898; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma 899; RV32NOM-NEXT: vmul.vx v8, v8, a0 900; RV32NOM-NEXT: vslidedown.vi v8, v8, 2 901; RV32NOM-NEXT: vmv.x.s a0, v8 902; RV32NOM-NEXT: ret 903; 904; RV32M-LABEL: extractelt_mul_v4i32: 905; RV32M: # %bb.0: 906; RV32M-NEXT: vsetivli zero, 1, e32, m1, ta, ma 907; RV32M-NEXT: vslidedown.vi v8, v8, 2 908; RV32M-NEXT: vmv.x.s a0, v8 909; RV32M-NEXT: li a1, 13 910; RV32M-NEXT: mul a0, a0, a1 911; RV32M-NEXT: ret 912; 913; RV64NOM-LABEL: extractelt_mul_v4i32: 914; RV64NOM: # %bb.0: 915; RV64NOM-NEXT: li a0, 13 916; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma 917; RV64NOM-NEXT: vmul.vx v8, v8, a0 918; RV64NOM-NEXT: vslidedown.vi v8, v8, 2 919; RV64NOM-NEXT: vmv.x.s a0, v8 920; RV64NOM-NEXT: ret 921; 922; RV64M-LABEL: extractelt_mul_v4i32: 923; RV64M: # %bb.0: 924; RV64M-NEXT: vsetivli zero, 1, e32, m1, ta, ma 925; RV64M-NEXT: vslidedown.vi v8, v8, 2 926; RV64M-NEXT: vmv.x.s a0, v8 927; RV64M-NEXT: li a1, 13 928; RV64M-NEXT: mulw a0, a0, a1 929; RV64M-NEXT: ret 930 %bo = mul <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14> 931 %ext = extractelement <4 x i32> %bo, i32 2 932 ret i32 %ext 933} 934 935define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) { 936; RV32NOM-LABEL: extractelt_sdiv_v4i32: 937; RV32NOM: # %bb.0: 938; RV32NOM-NEXT: lui a0, %hi(.LCPI46_0) 939; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI46_0) 940; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma 941; RV32NOM-NEXT: vle32.v v9, (a0) 942; RV32NOM-NEXT: lui a0, 1044480 943; RV32NOM-NEXT: vmv.s.x v10, a0 944; RV32NOM-NEXT: lui a0, 12320 945; RV32NOM-NEXT: addi a0, a0, 257 946; RV32NOM-NEXT: vsext.vf4 v11, v10 947; RV32NOM-NEXT: vand.vv v10, v8, v11 948; RV32NOM-NEXT: vmulh.vv v8, v8, v9 949; RV32NOM-NEXT: vmv.s.x v9, a0 950; RV32NOM-NEXT: vsext.vf4 v11, v9 951; RV32NOM-NEXT: vadd.vv v8, v8, v10 952; RV32NOM-NEXT: vsra.vv v9, v8, v11 953; RV32NOM-NEXT: vsrl.vi v8, v8, 31 954; RV32NOM-NEXT: vadd.vv v8, v9, v8 955; RV32NOM-NEXT: vslidedown.vi v8, v8, 2 956; RV32NOM-NEXT: vmv.x.s a0, v8 957; RV32NOM-NEXT: ret 958; 959; RV32M-LABEL: extractelt_sdiv_v4i32: 960; RV32M: # %bb.0: 961; RV32M-NEXT: vsetivli zero, 1, e32, m1, ta, ma 962; RV32M-NEXT: vslidedown.vi v8, v8, 2 963; RV32M-NEXT: lui a0, 322639 964; RV32M-NEXT: vmv.x.s a1, v8 965; RV32M-NEXT: addi a0, a0, -945 966; RV32M-NEXT: mulh a0, a1, a0 967; RV32M-NEXT: srli a1, a0, 31 968; RV32M-NEXT: srai a0, a0, 2 969; RV32M-NEXT: add a0, a0, a1 970; RV32M-NEXT: ret 971; 972; RV64NOM-LABEL: extractelt_sdiv_v4i32: 973; RV64NOM: # %bb.0: 974; RV64NOM-NEXT: lui a0, %hi(.LCPI46_0) 975; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI46_0) 976; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma 977; RV64NOM-NEXT: vle32.v v9, (a0) 978; RV64NOM-NEXT: lui a0, 1044480 979; RV64NOM-NEXT: vmv.s.x v10, a0 980; RV64NOM-NEXT: lui a0, 12320 981; RV64NOM-NEXT: addi a0, a0, 257 982; RV64NOM-NEXT: vsext.vf4 v11, v10 983; RV64NOM-NEXT: vand.vv v10, v8, v11 984; RV64NOM-NEXT: vmulh.vv v8, v8, v9 985; RV64NOM-NEXT: vmv.s.x v9, a0 986; RV64NOM-NEXT: vadd.vv v8, v8, v10 987; RV64NOM-NEXT: vsext.vf4 v10, v9 988; RV64NOM-NEXT: vsra.vv v8, v8, v10 989; RV64NOM-NEXT: vsrl.vi v9, v8, 31 990; RV64NOM-NEXT: vadd.vv v8, v8, v9 991; RV64NOM-NEXT: vslidedown.vi v8, v8, 2 992; RV64NOM-NEXT: vmv.x.s a0, v8 993; RV64NOM-NEXT: ret 994; 995; RV64M-LABEL: extractelt_sdiv_v4i32: 996; RV64M: # %bb.0: 997; RV64M-NEXT: vsetivli zero, 1, e32, m1, ta, ma 998; RV64M-NEXT: vslidedown.vi v8, v8, 2 999; RV64M-NEXT: lui a0, 322639 1000; RV64M-NEXT: vmv.x.s a1, v8 1001; RV64M-NEXT: addiw a0, a0, -945 1002; RV64M-NEXT: mul a0, a1, a0 1003; RV64M-NEXT: srli a1, a0, 63 1004; RV64M-NEXT: srai a0, a0, 34 1005; RV64M-NEXT: add a0, a0, a1 1006; RV64M-NEXT: ret 1007 %bo = sdiv <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14> 1008 %ext = extractelement <4 x i32> %bo, i32 2 1009 ret i32 %ext 1010} 1011 1012define i32 @extractelt_udiv_v4i32(<4 x i32> %x) { 1013; RV32NOM-LABEL: extractelt_udiv_v4i32: 1014; RV32NOM: # %bb.0: 1015; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1016; RV32NOM-NEXT: vsrl.vi v8, v8, 0 1017; RV32NOM-NEXT: lui a0, 322639 1018; RV32NOM-NEXT: addi a0, a0, -945 1019; RV32NOM-NEXT: vmulhu.vx v8, v8, a0 1020; RV32NOM-NEXT: vslidedown.vi v8, v8, 2 1021; RV32NOM-NEXT: vmv.x.s a0, v8 1022; RV32NOM-NEXT: srli a0, a0, 2 1023; RV32NOM-NEXT: ret 1024; 1025; RV32M-LABEL: extractelt_udiv_v4i32: 1026; RV32M: # %bb.0: 1027; RV32M-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1028; RV32M-NEXT: vslidedown.vi v8, v8, 2 1029; RV32M-NEXT: lui a0, 322639 1030; RV32M-NEXT: vmv.x.s a1, v8 1031; RV32M-NEXT: addi a0, a0, -945 1032; RV32M-NEXT: mulhu a0, a1, a0 1033; RV32M-NEXT: srli a0, a0, 2 1034; RV32M-NEXT: ret 1035; 1036; RV64NOM-LABEL: extractelt_udiv_v4i32: 1037; RV64NOM: # %bb.0: 1038; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1039; RV64NOM-NEXT: vsrl.vi v8, v8, 0 1040; RV64NOM-NEXT: lui a0, 322639 1041; RV64NOM-NEXT: addi a0, a0, -945 1042; RV64NOM-NEXT: vmulhu.vx v8, v8, a0 1043; RV64NOM-NEXT: vslidedown.vi v8, v8, 2 1044; RV64NOM-NEXT: vmv.x.s a0, v8 1045; RV64NOM-NEXT: slli a0, a0, 33 1046; RV64NOM-NEXT: srli a0, a0, 35 1047; RV64NOM-NEXT: ret 1048; 1049; RV64M-LABEL: extractelt_udiv_v4i32: 1050; RV64M: # %bb.0: 1051; RV64M-NEXT: lui a0, 322639 1052; RV64M-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1053; RV64M-NEXT: vslidedown.vi v8, v8, 2 1054; RV64M-NEXT: addi a0, a0, -945 1055; RV64M-NEXT: vmv.x.s a1, v8 1056; RV64M-NEXT: slli a0, a0, 32 1057; RV64M-NEXT: slli a1, a1, 32 1058; RV64M-NEXT: mulhu a0, a1, a0 1059; RV64M-NEXT: srli a0, a0, 34 1060; RV64M-NEXT: ret 1061 %bo = udiv <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14> 1062 %ext = extractelement <4 x i32> %bo, i32 2 1063 ret i32 %ext 1064} 1065 1066define float @extractelt_fadd_v4f32(<4 x float> %x) { 1067; CHECK-LABEL: extractelt_fadd_v4f32: 1068; CHECK: # %bb.0: 1069; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1070; CHECK-NEXT: vslidedown.vi v8, v8, 2 1071; CHECK-NEXT: lui a0, 267520 1072; CHECK-NEXT: vfmv.f.s fa5, v8 1073; CHECK-NEXT: fmv.w.x fa4, a0 1074; CHECK-NEXT: fadd.s fa0, fa5, fa4 1075; CHECK-NEXT: ret 1076 %bo = fadd <4 x float> %x, <float 11.0, float 12.0, float 13.0, float 14.0> 1077 %ext = extractelement <4 x float> %bo, i32 2 1078 ret float %ext 1079} 1080 1081define float @extractelt_fsub_v4f32(<4 x float> %x) { 1082; CHECK-LABEL: extractelt_fsub_v4f32: 1083; CHECK: # %bb.0: 1084; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1085; CHECK-NEXT: vslidedown.vi v8, v8, 2 1086; CHECK-NEXT: lui a0, 267520 1087; CHECK-NEXT: vfmv.f.s fa5, v8 1088; CHECK-NEXT: fmv.w.x fa4, a0 1089; CHECK-NEXT: fsub.s fa0, fa4, fa5 1090; CHECK-NEXT: ret 1091 %bo = fsub <4 x float> <float 11.0, float 12.0, float 13.0, float 14.0>, %x 1092 %ext = extractelement <4 x float> %bo, i32 2 1093 ret float %ext 1094} 1095 1096define float @extractelt_fmul_v4f32(<4 x float> %x) { 1097; CHECK-LABEL: extractelt_fmul_v4f32: 1098; CHECK: # %bb.0: 1099; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1100; CHECK-NEXT: vslidedown.vi v8, v8, 2 1101; CHECK-NEXT: lui a0, 267520 1102; CHECK-NEXT: vfmv.f.s fa5, v8 1103; CHECK-NEXT: fmv.w.x fa4, a0 1104; CHECK-NEXT: fmul.s fa0, fa5, fa4 1105; CHECK-NEXT: ret 1106 %bo = fmul <4 x float> %x, <float 11.0, float 12.0, float 13.0, float 14.0> 1107 %ext = extractelement <4 x float> %bo, i32 2 1108 ret float %ext 1109} 1110 1111define float @extractelt_fdiv_v4f32(<4 x float> %x) { 1112; CHECK-LABEL: extractelt_fdiv_v4f32: 1113; CHECK: # %bb.0: 1114; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1115; CHECK-NEXT: vslidedown.vi v8, v8, 2 1116; CHECK-NEXT: lui a0, 267520 1117; CHECK-NEXT: vfmv.f.s fa5, v8 1118; CHECK-NEXT: fmv.w.x fa4, a0 1119; CHECK-NEXT: fdiv.s fa0, fa5, fa4 1120; CHECK-NEXT: ret 1121 %bo = fdiv <4 x float> %x, <float 11.0, float 12.0, float 13.0, float 14.0> 1122 %ext = extractelement <4 x float> %bo, i32 2 1123 ret float %ext 1124} 1125 1126define i32 @extractelt_v16i32_idx7_exact_vlen(<16 x i32> %a) nounwind vscale_range(2,2) { 1127; CHECK-LABEL: extractelt_v16i32_idx7_exact_vlen: 1128; CHECK: # %bb.0: 1129; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1130; CHECK-NEXT: vslidedown.vi v8, v9, 3 1131; CHECK-NEXT: vmv.x.s a0, v8 1132; CHECK-NEXT: ret 1133 %b = extractelement <16 x i32> %a, i32 7 1134 ret i32 %b 1135} 1136 1137define i32 @extractelt_v16i32_idx15_exact_vlen(<16 x i32> %a) nounwind vscale_range(2,2) { 1138; CHECK-LABEL: extractelt_v16i32_idx15_exact_vlen: 1139; CHECK: # %bb.0: 1140; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 1141; CHECK-NEXT: vslidedown.vi v8, v11, 3 1142; CHECK-NEXT: vmv.x.s a0, v8 1143; CHECK-NEXT: ret 1144 %b = extractelement <16 x i32> %a, i32 15 1145 ret i32 %b 1146} 1147