1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5define i8 @explode_2xi8(<2 x i8> %v) { 6; CHECK-LABEL: explode_2xi8: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 9; CHECK-NEXT: vmv.x.s a0, v8 10; CHECK-NEXT: vslidedown.vi v8, v8, 1 11; CHECK-NEXT: vmv.x.s a1, v8 12; CHECK-NEXT: add a0, a0, a1 13; CHECK-NEXT: ret 14 %e0 = extractelement <2 x i8> %v, i32 0 15 %e1 = extractelement <2 x i8> %v, i32 1 16 %add0 = add i8 %e0, %e1 17 ret i8 %add0 18} 19 20define i8 @explode_4xi8(<4 x i8> %v) { 21; CHECK-LABEL: explode_4xi8: 22; CHECK: # %bb.0: 23; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 24; CHECK-NEXT: vmv.x.s a0, v8 25; CHECK-NEXT: vslidedown.vi v9, v8, 1 26; CHECK-NEXT: vmv.x.s a1, v9 27; CHECK-NEXT: vslidedown.vi v9, v8, 2 28; CHECK-NEXT: vmv.x.s a2, v9 29; CHECK-NEXT: vslidedown.vi v8, v8, 3 30; CHECK-NEXT: vmv.x.s a3, v8 31; CHECK-NEXT: add a0, a0, a1 32; CHECK-NEXT: add a2, a2, a3 33; CHECK-NEXT: add a0, a0, a2 34; CHECK-NEXT: ret 35 %e0 = extractelement <4 x i8> %v, i32 0 36 %e1 = extractelement <4 x i8> %v, i32 1 37 %e2 = extractelement <4 x i8> %v, i32 2 38 %e3 = extractelement <4 x i8> %v, i32 3 39 %add0 = add i8 %e0, %e1 40 %add1 = add i8 %add0, %e2 41 %add2 = add i8 %add1, %e3 42 ret i8 %add2 43} 44 45 46define i8 @explode_8xi8(<8 x i8> %v) { 47; CHECK-LABEL: explode_8xi8: 48; CHECK: # %bb.0: 49; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 50; CHECK-NEXT: vmv.x.s a0, v8 51; CHECK-NEXT: vslidedown.vi v9, v8, 1 52; CHECK-NEXT: vmv.x.s a1, v9 53; CHECK-NEXT: vslidedown.vi v9, v8, 2 54; CHECK-NEXT: vmv.x.s a2, v9 55; CHECK-NEXT: vslidedown.vi v9, v8, 3 56; CHECK-NEXT: vmv.x.s a3, v9 57; CHECK-NEXT: vslidedown.vi v9, v8, 4 58; CHECK-NEXT: vmv.x.s a4, v9 59; CHECK-NEXT: vslidedown.vi v9, v8, 5 60; CHECK-NEXT: vmv.x.s a5, v9 61; CHECK-NEXT: vslidedown.vi v9, v8, 6 62; CHECK-NEXT: vmv.x.s a6, v9 63; CHECK-NEXT: vslidedown.vi v8, v8, 7 64; CHECK-NEXT: vmv.x.s a7, v8 65; CHECK-NEXT: add a0, a0, a1 66; CHECK-NEXT: add a2, a2, a3 67; CHECK-NEXT: add a0, a0, a2 68; CHECK-NEXT: add a4, a4, a5 69; CHECK-NEXT: add a4, a4, a6 70; CHECK-NEXT: add a0, a0, a4 71; CHECK-NEXT: add a0, a0, a7 72; CHECK-NEXT: ret 73 %e0 = extractelement <8 x i8> %v, i32 0 74 %e1 = extractelement <8 x i8> %v, i32 1 75 %e2 = extractelement <8 x i8> %v, i32 2 76 %e3 = extractelement <8 x i8> %v, i32 3 77 %e4 = extractelement <8 x i8> %v, i32 4 78 %e5 = extractelement <8 x i8> %v, i32 5 79 %e6 = extractelement <8 x i8> %v, i32 6 80 %e7 = extractelement <8 x i8> %v, i32 7 81 %add0 = add i8 %e0, %e1 82 %add1 = add i8 %add0, %e2 83 %add2 = add i8 %add1, %e3 84 %add3 = add i8 %add2, %e4 85 %add4 = add i8 %add3, %e5 86 %add5 = add i8 %add4, %e6 87 %add6 = add i8 %add5, %e7 88 ret i8 %add6 89} 90 91define i8 @explode_16xi8(<16 x i8> %v) { 92; RV32-LABEL: explode_16xi8: 93; RV32: # %bb.0: 94; RV32-NEXT: addi sp, sp, -16 95; RV32-NEXT: .cfi_def_cfa_offset 16 96; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 97; RV32-NEXT: .cfi_offset s0, -4 98; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma 99; RV32-NEXT: vmv.x.s a0, v8 100; RV32-NEXT: vslidedown.vi v9, v8, 1 101; RV32-NEXT: vmv.x.s a1, v9 102; RV32-NEXT: vslidedown.vi v9, v8, 2 103; RV32-NEXT: vmv.x.s a2, v9 104; RV32-NEXT: vslidedown.vi v9, v8, 3 105; RV32-NEXT: vmv.x.s a3, v9 106; RV32-NEXT: vslidedown.vi v9, v8, 4 107; RV32-NEXT: vmv.x.s a4, v9 108; RV32-NEXT: vslidedown.vi v9, v8, 5 109; RV32-NEXT: vmv.x.s a5, v9 110; RV32-NEXT: vslidedown.vi v9, v8, 6 111; RV32-NEXT: vmv.x.s a6, v9 112; RV32-NEXT: vslidedown.vi v9, v8, 7 113; RV32-NEXT: vmv.x.s a7, v9 114; RV32-NEXT: vslidedown.vi v9, v8, 8 115; RV32-NEXT: vmv.x.s t0, v9 116; RV32-NEXT: vslidedown.vi v9, v8, 9 117; RV32-NEXT: vmv.x.s t1, v9 118; RV32-NEXT: vslidedown.vi v9, v8, 10 119; RV32-NEXT: vmv.x.s t2, v9 120; RV32-NEXT: vslidedown.vi v9, v8, 11 121; RV32-NEXT: vmv.x.s t3, v9 122; RV32-NEXT: vslidedown.vi v9, v8, 12 123; RV32-NEXT: vmv.x.s t4, v9 124; RV32-NEXT: vslidedown.vi v9, v8, 13 125; RV32-NEXT: vmv.x.s t5, v9 126; RV32-NEXT: vslidedown.vi v9, v8, 14 127; RV32-NEXT: vmv.x.s t6, v9 128; RV32-NEXT: vslidedown.vi v8, v8, 15 129; RV32-NEXT: vmv.x.s s0, v8 130; RV32-NEXT: add a0, a0, a1 131; RV32-NEXT: add a2, a2, a3 132; RV32-NEXT: add a0, a0, a2 133; RV32-NEXT: add a4, a4, a5 134; RV32-NEXT: add a4, a4, a6 135; RV32-NEXT: add a0, a0, a4 136; RV32-NEXT: add a7, a7, t0 137; RV32-NEXT: add a7, a7, t1 138; RV32-NEXT: add a7, a7, t2 139; RV32-NEXT: add a0, a0, a7 140; RV32-NEXT: add t3, t3, t4 141; RV32-NEXT: add t3, t3, t5 142; RV32-NEXT: add t3, t3, t6 143; RV32-NEXT: add t3, t3, s0 144; RV32-NEXT: add a0, a0, t3 145; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 146; RV32-NEXT: addi sp, sp, 16 147; RV32-NEXT: ret 148; 149; RV64-LABEL: explode_16xi8: 150; RV64: # %bb.0: 151; RV64-NEXT: addi sp, sp, -16 152; RV64-NEXT: .cfi_def_cfa_offset 16 153; RV64-NEXT: sd s0, 8(sp) # 8-byte Folded Spill 154; RV64-NEXT: .cfi_offset s0, -8 155; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma 156; RV64-NEXT: vmv.x.s a0, v8 157; RV64-NEXT: vslidedown.vi v9, v8, 1 158; RV64-NEXT: vmv.x.s a1, v9 159; RV64-NEXT: vslidedown.vi v9, v8, 2 160; RV64-NEXT: vmv.x.s a2, v9 161; RV64-NEXT: vslidedown.vi v9, v8, 3 162; RV64-NEXT: vmv.x.s a3, v9 163; RV64-NEXT: vslidedown.vi v9, v8, 4 164; RV64-NEXT: vmv.x.s a4, v9 165; RV64-NEXT: vslidedown.vi v9, v8, 5 166; RV64-NEXT: vmv.x.s a5, v9 167; RV64-NEXT: vslidedown.vi v9, v8, 6 168; RV64-NEXT: vmv.x.s a6, v9 169; RV64-NEXT: vslidedown.vi v9, v8, 7 170; RV64-NEXT: vmv.x.s a7, v9 171; RV64-NEXT: vslidedown.vi v9, v8, 8 172; RV64-NEXT: vmv.x.s t0, v9 173; RV64-NEXT: vslidedown.vi v9, v8, 9 174; RV64-NEXT: vmv.x.s t1, v9 175; RV64-NEXT: vslidedown.vi v9, v8, 10 176; RV64-NEXT: vmv.x.s t2, v9 177; RV64-NEXT: vslidedown.vi v9, v8, 11 178; RV64-NEXT: vmv.x.s t3, v9 179; RV64-NEXT: vslidedown.vi v9, v8, 12 180; RV64-NEXT: vmv.x.s t4, v9 181; RV64-NEXT: vslidedown.vi v9, v8, 13 182; RV64-NEXT: vmv.x.s t5, v9 183; RV64-NEXT: vslidedown.vi v9, v8, 14 184; RV64-NEXT: vmv.x.s t6, v9 185; RV64-NEXT: vslidedown.vi v8, v8, 15 186; RV64-NEXT: vmv.x.s s0, v8 187; RV64-NEXT: add a0, a0, a1 188; RV64-NEXT: add a2, a2, a3 189; RV64-NEXT: add a0, a0, a2 190; RV64-NEXT: add a4, a4, a5 191; RV64-NEXT: add a4, a4, a6 192; RV64-NEXT: add a0, a0, a4 193; RV64-NEXT: add a7, a7, t0 194; RV64-NEXT: add a7, a7, t1 195; RV64-NEXT: add a7, a7, t2 196; RV64-NEXT: add a0, a0, a7 197; RV64-NEXT: add t3, t3, t4 198; RV64-NEXT: add t3, t3, t5 199; RV64-NEXT: add t3, t3, t6 200; RV64-NEXT: add t3, t3, s0 201; RV64-NEXT: add a0, a0, t3 202; RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload 203; RV64-NEXT: addi sp, sp, 16 204; RV64-NEXT: ret 205 %e0 = extractelement <16 x i8> %v, i32 0 206 %e1 = extractelement <16 x i8> %v, i32 1 207 %e2 = extractelement <16 x i8> %v, i32 2 208 %e3 = extractelement <16 x i8> %v, i32 3 209 %e4 = extractelement <16 x i8> %v, i32 4 210 %e5 = extractelement <16 x i8> %v, i32 5 211 %e6 = extractelement <16 x i8> %v, i32 6 212 %e7 = extractelement <16 x i8> %v, i32 7 213 %e8 = extractelement <16 x i8> %v, i32 8 214 %e9 = extractelement <16 x i8> %v, i32 9 215 %e10 = extractelement <16 x i8> %v, i32 10 216 %e11 = extractelement <16 x i8> %v, i32 11 217 %e12 = extractelement <16 x i8> %v, i32 12 218 %e13 = extractelement <16 x i8> %v, i32 13 219 %e14 = extractelement <16 x i8> %v, i32 14 220 %e15 = extractelement <16 x i8> %v, i32 15 221 %add0 = add i8 %e0, %e1 222 %add1 = add i8 %add0, %e2 223 %add2 = add i8 %add1, %e3 224 %add3 = add i8 %add2, %e4 225 %add4 = add i8 %add3, %e5 226 %add5 = add i8 %add4, %e6 227 %add6 = add i8 %add5, %e7 228 %add7 = add i8 %add6, %e8 229 %add8 = add i8 %add7, %e9 230 %add9 = add i8 %add8, %e10 231 %add10 = add i8 %add9, %e11 232 %add11 = add i8 %add10, %e12 233 %add12 = add i8 %add11, %e13 234 %add13 = add i8 %add12, %e14 235 %add14 = add i8 %add13, %e15 236 ret i8 %add14 237} 238 239define i16 @explode_2xi16(<2 x i16> %v) { 240; CHECK-LABEL: explode_2xi16: 241; CHECK: # %bb.0: 242; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 243; CHECK-NEXT: vmv.x.s a0, v8 244; CHECK-NEXT: vslidedown.vi v8, v8, 1 245; CHECK-NEXT: vmv.x.s a1, v8 246; CHECK-NEXT: add a0, a0, a1 247; CHECK-NEXT: ret 248 %e0 = extractelement <2 x i16> %v, i32 0 249 %e1 = extractelement <2 x i16> %v, i32 1 250 %add0 = add i16 %e0, %e1 251 ret i16 %add0 252} 253 254define i16 @explode_4xi16(<4 x i16> %v) { 255; CHECK-LABEL: explode_4xi16: 256; CHECK: # %bb.0: 257; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 258; CHECK-NEXT: vmv.x.s a0, v8 259; CHECK-NEXT: vslidedown.vi v9, v8, 1 260; CHECK-NEXT: vmv.x.s a1, v9 261; CHECK-NEXT: vslidedown.vi v9, v8, 2 262; CHECK-NEXT: vmv.x.s a2, v9 263; CHECK-NEXT: vslidedown.vi v8, v8, 3 264; CHECK-NEXT: vmv.x.s a3, v8 265; CHECK-NEXT: add a0, a0, a1 266; CHECK-NEXT: add a2, a2, a3 267; CHECK-NEXT: add a0, a0, a2 268; CHECK-NEXT: ret 269 %e0 = extractelement <4 x i16> %v, i32 0 270 %e1 = extractelement <4 x i16> %v, i32 1 271 %e2 = extractelement <4 x i16> %v, i32 2 272 %e3 = extractelement <4 x i16> %v, i32 3 273 %add0 = add i16 %e0, %e1 274 %add1 = add i16 %add0, %e2 275 %add2 = add i16 %add1, %e3 276 ret i16 %add2 277} 278 279 280define i16 @explode_8xi16(<8 x i16> %v) { 281; CHECK-LABEL: explode_8xi16: 282; CHECK: # %bb.0: 283; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 284; CHECK-NEXT: vmv.x.s a0, v8 285; CHECK-NEXT: vslidedown.vi v9, v8, 1 286; CHECK-NEXT: vmv.x.s a1, v9 287; CHECK-NEXT: vslidedown.vi v9, v8, 2 288; CHECK-NEXT: vmv.x.s a2, v9 289; CHECK-NEXT: vslidedown.vi v9, v8, 3 290; CHECK-NEXT: vmv.x.s a3, v9 291; CHECK-NEXT: vslidedown.vi v9, v8, 4 292; CHECK-NEXT: vmv.x.s a4, v9 293; CHECK-NEXT: vslidedown.vi v9, v8, 5 294; CHECK-NEXT: vmv.x.s a5, v9 295; CHECK-NEXT: vslidedown.vi v9, v8, 6 296; CHECK-NEXT: vmv.x.s a6, v9 297; CHECK-NEXT: vslidedown.vi v8, v8, 7 298; CHECK-NEXT: vmv.x.s a7, v8 299; CHECK-NEXT: add a0, a0, a1 300; CHECK-NEXT: add a2, a2, a3 301; CHECK-NEXT: add a0, a0, a2 302; CHECK-NEXT: add a4, a4, a5 303; CHECK-NEXT: add a4, a4, a6 304; CHECK-NEXT: add a0, a0, a4 305; CHECK-NEXT: add a0, a0, a7 306; CHECK-NEXT: ret 307 %e0 = extractelement <8 x i16> %v, i32 0 308 %e1 = extractelement <8 x i16> %v, i32 1 309 %e2 = extractelement <8 x i16> %v, i32 2 310 %e3 = extractelement <8 x i16> %v, i32 3 311 %e4 = extractelement <8 x i16> %v, i32 4 312 %e5 = extractelement <8 x i16> %v, i32 5 313 %e6 = extractelement <8 x i16> %v, i32 6 314 %e7 = extractelement <8 x i16> %v, i32 7 315 %add0 = add i16 %e0, %e1 316 %add1 = add i16 %add0, %e2 317 %add2 = add i16 %add1, %e3 318 %add3 = add i16 %add2, %e4 319 %add4 = add i16 %add3, %e5 320 %add5 = add i16 %add4, %e6 321 %add6 = add i16 %add5, %e7 322 ret i16 %add6 323} 324 325define i16 @explode_16xi16(<16 x i16> %v) { 326; RV32-LABEL: explode_16xi16: 327; RV32: # %bb.0: 328; RV32-NEXT: addi sp, sp, -16 329; RV32-NEXT: .cfi_def_cfa_offset 16 330; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 331; RV32-NEXT: .cfi_offset s0, -4 332; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma 333; RV32-NEXT: vmv.x.s a0, v8 334; RV32-NEXT: vslidedown.vi v10, v8, 1 335; RV32-NEXT: vmv.x.s a1, v10 336; RV32-NEXT: vslidedown.vi v10, v8, 2 337; RV32-NEXT: vmv.x.s a2, v10 338; RV32-NEXT: vslidedown.vi v10, v8, 3 339; RV32-NEXT: vmv.x.s a3, v10 340; RV32-NEXT: vslidedown.vi v10, v8, 4 341; RV32-NEXT: vmv.x.s a4, v10 342; RV32-NEXT: vslidedown.vi v10, v8, 5 343; RV32-NEXT: vmv.x.s a5, v10 344; RV32-NEXT: vslidedown.vi v10, v8, 6 345; RV32-NEXT: vmv.x.s a6, v10 346; RV32-NEXT: vslidedown.vi v10, v8, 7 347; RV32-NEXT: vmv.x.s a7, v10 348; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma 349; RV32-NEXT: vslidedown.vi v10, v8, 8 350; RV32-NEXT: vmv.x.s t0, v10 351; RV32-NEXT: vslidedown.vi v10, v8, 9 352; RV32-NEXT: vmv.x.s t1, v10 353; RV32-NEXT: vslidedown.vi v10, v8, 10 354; RV32-NEXT: vmv.x.s t2, v10 355; RV32-NEXT: vslidedown.vi v10, v8, 11 356; RV32-NEXT: vmv.x.s t3, v10 357; RV32-NEXT: vslidedown.vi v10, v8, 12 358; RV32-NEXT: vmv.x.s t4, v10 359; RV32-NEXT: vslidedown.vi v10, v8, 13 360; RV32-NEXT: vmv.x.s t5, v10 361; RV32-NEXT: vslidedown.vi v10, v8, 14 362; RV32-NEXT: vmv.x.s t6, v10 363; RV32-NEXT: vslidedown.vi v8, v8, 15 364; RV32-NEXT: vmv.x.s s0, v8 365; RV32-NEXT: add a0, a0, a1 366; RV32-NEXT: add a2, a2, a3 367; RV32-NEXT: add a0, a0, a2 368; RV32-NEXT: add a4, a4, a5 369; RV32-NEXT: add a4, a4, a6 370; RV32-NEXT: add a0, a0, a4 371; RV32-NEXT: add a7, a7, t0 372; RV32-NEXT: add a7, a7, t1 373; RV32-NEXT: add a7, a7, t2 374; RV32-NEXT: add a0, a0, a7 375; RV32-NEXT: add t3, t3, t4 376; RV32-NEXT: add t3, t3, t5 377; RV32-NEXT: add t3, t3, t6 378; RV32-NEXT: add t3, t3, s0 379; RV32-NEXT: add a0, a0, t3 380; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 381; RV32-NEXT: addi sp, sp, 16 382; RV32-NEXT: ret 383; 384; RV64-LABEL: explode_16xi16: 385; RV64: # %bb.0: 386; RV64-NEXT: addi sp, sp, -16 387; RV64-NEXT: .cfi_def_cfa_offset 16 388; RV64-NEXT: sd s0, 8(sp) # 8-byte Folded Spill 389; RV64-NEXT: .cfi_offset s0, -8 390; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma 391; RV64-NEXT: vmv.x.s a0, v8 392; RV64-NEXT: vslidedown.vi v10, v8, 1 393; RV64-NEXT: vmv.x.s a1, v10 394; RV64-NEXT: vslidedown.vi v10, v8, 2 395; RV64-NEXT: vmv.x.s a2, v10 396; RV64-NEXT: vslidedown.vi v10, v8, 3 397; RV64-NEXT: vmv.x.s a3, v10 398; RV64-NEXT: vslidedown.vi v10, v8, 4 399; RV64-NEXT: vmv.x.s a4, v10 400; RV64-NEXT: vslidedown.vi v10, v8, 5 401; RV64-NEXT: vmv.x.s a5, v10 402; RV64-NEXT: vslidedown.vi v10, v8, 6 403; RV64-NEXT: vmv.x.s a6, v10 404; RV64-NEXT: vslidedown.vi v10, v8, 7 405; RV64-NEXT: vmv.x.s a7, v10 406; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma 407; RV64-NEXT: vslidedown.vi v10, v8, 8 408; RV64-NEXT: vmv.x.s t0, v10 409; RV64-NEXT: vslidedown.vi v10, v8, 9 410; RV64-NEXT: vmv.x.s t1, v10 411; RV64-NEXT: vslidedown.vi v10, v8, 10 412; RV64-NEXT: vmv.x.s t2, v10 413; RV64-NEXT: vslidedown.vi v10, v8, 11 414; RV64-NEXT: vmv.x.s t3, v10 415; RV64-NEXT: vslidedown.vi v10, v8, 12 416; RV64-NEXT: vmv.x.s t4, v10 417; RV64-NEXT: vslidedown.vi v10, v8, 13 418; RV64-NEXT: vmv.x.s t5, v10 419; RV64-NEXT: vslidedown.vi v10, v8, 14 420; RV64-NEXT: vmv.x.s t6, v10 421; RV64-NEXT: vslidedown.vi v8, v8, 15 422; RV64-NEXT: vmv.x.s s0, v8 423; RV64-NEXT: add a0, a0, a1 424; RV64-NEXT: add a2, a2, a3 425; RV64-NEXT: add a0, a0, a2 426; RV64-NEXT: add a4, a4, a5 427; RV64-NEXT: add a4, a4, a6 428; RV64-NEXT: add a0, a0, a4 429; RV64-NEXT: add a7, a7, t0 430; RV64-NEXT: add a7, a7, t1 431; RV64-NEXT: add a7, a7, t2 432; RV64-NEXT: add a0, a0, a7 433; RV64-NEXT: add t3, t3, t4 434; RV64-NEXT: add t3, t3, t5 435; RV64-NEXT: add t3, t3, t6 436; RV64-NEXT: add t3, t3, s0 437; RV64-NEXT: add a0, a0, t3 438; RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload 439; RV64-NEXT: addi sp, sp, 16 440; RV64-NEXT: ret 441 %e0 = extractelement <16 x i16> %v, i32 0 442 %e1 = extractelement <16 x i16> %v, i32 1 443 %e2 = extractelement <16 x i16> %v, i32 2 444 %e3 = extractelement <16 x i16> %v, i32 3 445 %e4 = extractelement <16 x i16> %v, i32 4 446 %e5 = extractelement <16 x i16> %v, i32 5 447 %e6 = extractelement <16 x i16> %v, i32 6 448 %e7 = extractelement <16 x i16> %v, i32 7 449 %e8 = extractelement <16 x i16> %v, i32 8 450 %e9 = extractelement <16 x i16> %v, i32 9 451 %e10 = extractelement <16 x i16> %v, i32 10 452 %e11 = extractelement <16 x i16> %v, i32 11 453 %e12 = extractelement <16 x i16> %v, i32 12 454 %e13 = extractelement <16 x i16> %v, i32 13 455 %e14 = extractelement <16 x i16> %v, i32 14 456 %e15 = extractelement <16 x i16> %v, i32 15 457 %add0 = add i16 %e0, %e1 458 %add1 = add i16 %add0, %e2 459 %add2 = add i16 %add1, %e3 460 %add3 = add i16 %add2, %e4 461 %add4 = add i16 %add3, %e5 462 %add5 = add i16 %add4, %e6 463 %add6 = add i16 %add5, %e7 464 %add7 = add i16 %add6, %e8 465 %add8 = add i16 %add7, %e9 466 %add9 = add i16 %add8, %e10 467 %add10 = add i16 %add9, %e11 468 %add11 = add i16 %add10, %e12 469 %add12 = add i16 %add11, %e13 470 %add13 = add i16 %add12, %e14 471 %add14 = add i16 %add13, %e15 472 ret i16 %add14 473} 474 475define i32 @explode_2xi32(<2 x i32> %v) { 476; RV32-LABEL: explode_2xi32: 477; RV32: # %bb.0: 478; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 479; RV32-NEXT: vmv.x.s a0, v8 480; RV32-NEXT: vslidedown.vi v8, v8, 1 481; RV32-NEXT: vmv.x.s a1, v8 482; RV32-NEXT: add a0, a0, a1 483; RV32-NEXT: ret 484; 485; RV64-LABEL: explode_2xi32: 486; RV64: # %bb.0: 487; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 488; RV64-NEXT: vmv.x.s a0, v8 489; RV64-NEXT: vslidedown.vi v8, v8, 1 490; RV64-NEXT: vmv.x.s a1, v8 491; RV64-NEXT: addw a0, a0, a1 492; RV64-NEXT: ret 493 %e0 = extractelement <2 x i32> %v, i32 0 494 %e1 = extractelement <2 x i32> %v, i32 1 495 %add0 = add i32 %e0, %e1 496 ret i32 %add0 497} 498 499define i32 @explode_4xi32(<4 x i32> %v) { 500; RV32-LABEL: explode_4xi32: 501; RV32: # %bb.0: 502; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 503; RV32-NEXT: vmv.x.s a0, v8 504; RV32-NEXT: vslidedown.vi v9, v8, 1 505; RV32-NEXT: vmv.x.s a1, v9 506; RV32-NEXT: vslidedown.vi v9, v8, 2 507; RV32-NEXT: vmv.x.s a2, v9 508; RV32-NEXT: vslidedown.vi v8, v8, 3 509; RV32-NEXT: vmv.x.s a3, v8 510; RV32-NEXT: add a0, a0, a1 511; RV32-NEXT: add a2, a2, a3 512; RV32-NEXT: add a0, a0, a2 513; RV32-NEXT: ret 514; 515; RV64-LABEL: explode_4xi32: 516; RV64: # %bb.0: 517; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma 518; RV64-NEXT: vmv.x.s a0, v8 519; RV64-NEXT: vslidedown.vi v9, v8, 1 520; RV64-NEXT: vmv.x.s a1, v9 521; RV64-NEXT: vslidedown.vi v9, v8, 2 522; RV64-NEXT: vmv.x.s a2, v9 523; RV64-NEXT: vslidedown.vi v8, v8, 3 524; RV64-NEXT: vmv.x.s a3, v8 525; RV64-NEXT: add a0, a0, a1 526; RV64-NEXT: add a2, a2, a3 527; RV64-NEXT: addw a0, a0, a2 528; RV64-NEXT: ret 529 %e0 = extractelement <4 x i32> %v, i32 0 530 %e1 = extractelement <4 x i32> %v, i32 1 531 %e2 = extractelement <4 x i32> %v, i32 2 532 %e3 = extractelement <4 x i32> %v, i32 3 533 %add0 = add i32 %e0, %e1 534 %add1 = add i32 %add0, %e2 535 %add2 = add i32 %add1, %e3 536 ret i32 %add2 537} 538 539 540define i32 @explode_8xi32(<8 x i32> %v) { 541; RV32-LABEL: explode_8xi32: 542; RV32: # %bb.0: 543; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 544; RV32-NEXT: vmv.x.s a0, v8 545; RV32-NEXT: vslidedown.vi v10, v8, 1 546; RV32-NEXT: vmv.x.s a1, v10 547; RV32-NEXT: vslidedown.vi v10, v8, 2 548; RV32-NEXT: vmv.x.s a2, v10 549; RV32-NEXT: vslidedown.vi v10, v8, 3 550; RV32-NEXT: vmv.x.s a3, v10 551; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma 552; RV32-NEXT: vslidedown.vi v10, v8, 4 553; RV32-NEXT: vmv.x.s a4, v10 554; RV32-NEXT: vslidedown.vi v10, v8, 5 555; RV32-NEXT: vmv.x.s a5, v10 556; RV32-NEXT: vslidedown.vi v10, v8, 6 557; RV32-NEXT: vmv.x.s a6, v10 558; RV32-NEXT: vslidedown.vi v8, v8, 7 559; RV32-NEXT: vmv.x.s a7, v8 560; RV32-NEXT: add a0, a0, a1 561; RV32-NEXT: add a2, a2, a3 562; RV32-NEXT: add a0, a0, a2 563; RV32-NEXT: add a4, a4, a5 564; RV32-NEXT: add a4, a4, a6 565; RV32-NEXT: add a0, a0, a4 566; RV32-NEXT: add a0, a0, a7 567; RV32-NEXT: ret 568; 569; RV64-LABEL: explode_8xi32: 570; RV64: # %bb.0: 571; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma 572; RV64-NEXT: vmv.x.s a0, v8 573; RV64-NEXT: vslidedown.vi v10, v8, 1 574; RV64-NEXT: vmv.x.s a1, v10 575; RV64-NEXT: vslidedown.vi v10, v8, 2 576; RV64-NEXT: vmv.x.s a2, v10 577; RV64-NEXT: vslidedown.vi v10, v8, 3 578; RV64-NEXT: vmv.x.s a3, v10 579; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma 580; RV64-NEXT: vslidedown.vi v10, v8, 4 581; RV64-NEXT: vmv.x.s a4, v10 582; RV64-NEXT: vslidedown.vi v10, v8, 5 583; RV64-NEXT: vmv.x.s a5, v10 584; RV64-NEXT: vslidedown.vi v10, v8, 6 585; RV64-NEXT: vmv.x.s a6, v10 586; RV64-NEXT: vslidedown.vi v8, v8, 7 587; RV64-NEXT: vmv.x.s a7, v8 588; RV64-NEXT: add a0, a0, a1 589; RV64-NEXT: add a2, a2, a3 590; RV64-NEXT: add a0, a0, a2 591; RV64-NEXT: add a4, a4, a5 592; RV64-NEXT: add a4, a4, a6 593; RV64-NEXT: add a0, a0, a4 594; RV64-NEXT: addw a0, a0, a7 595; RV64-NEXT: ret 596 %e0 = extractelement <8 x i32> %v, i32 0 597 %e1 = extractelement <8 x i32> %v, i32 1 598 %e2 = extractelement <8 x i32> %v, i32 2 599 %e3 = extractelement <8 x i32> %v, i32 3 600 %e4 = extractelement <8 x i32> %v, i32 4 601 %e5 = extractelement <8 x i32> %v, i32 5 602 %e6 = extractelement <8 x i32> %v, i32 6 603 %e7 = extractelement <8 x i32> %v, i32 7 604 %add0 = add i32 %e0, %e1 605 %add1 = add i32 %add0, %e2 606 %add2 = add i32 %add1, %e3 607 %add3 = add i32 %add2, %e4 608 %add4 = add i32 %add3, %e5 609 %add5 = add i32 %add4, %e6 610 %add6 = add i32 %add5, %e7 611 ret i32 %add6 612} 613 614define i32 @explode_16xi32(<16 x i32> %v) { 615; RV32-LABEL: explode_16xi32: 616; RV32: # %bb.0: 617; RV32-NEXT: addi sp, sp, -16 618; RV32-NEXT: .cfi_def_cfa_offset 16 619; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 620; RV32-NEXT: .cfi_offset s0, -4 621; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 622; RV32-NEXT: vmv.x.s a0, v8 623; RV32-NEXT: vslidedown.vi v12, v8, 1 624; RV32-NEXT: vmv.x.s a1, v12 625; RV32-NEXT: vslidedown.vi v12, v8, 2 626; RV32-NEXT: vmv.x.s a2, v12 627; RV32-NEXT: vslidedown.vi v12, v8, 3 628; RV32-NEXT: vmv.x.s a3, v12 629; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma 630; RV32-NEXT: vslidedown.vi v12, v8, 4 631; RV32-NEXT: vmv.x.s a4, v12 632; RV32-NEXT: vslidedown.vi v12, v8, 5 633; RV32-NEXT: vmv.x.s a5, v12 634; RV32-NEXT: vslidedown.vi v12, v8, 6 635; RV32-NEXT: vmv.x.s a6, v12 636; RV32-NEXT: vslidedown.vi v12, v8, 7 637; RV32-NEXT: vmv.x.s a7, v12 638; RV32-NEXT: vsetivli zero, 1, e32, m4, ta, ma 639; RV32-NEXT: vslidedown.vi v12, v8, 8 640; RV32-NEXT: vmv.x.s t0, v12 641; RV32-NEXT: vslidedown.vi v12, v8, 9 642; RV32-NEXT: vmv.x.s t1, v12 643; RV32-NEXT: vslidedown.vi v12, v8, 10 644; RV32-NEXT: vmv.x.s t2, v12 645; RV32-NEXT: vslidedown.vi v12, v8, 11 646; RV32-NEXT: vmv.x.s t3, v12 647; RV32-NEXT: vslidedown.vi v12, v8, 12 648; RV32-NEXT: vmv.x.s t4, v12 649; RV32-NEXT: vslidedown.vi v12, v8, 13 650; RV32-NEXT: vmv.x.s t5, v12 651; RV32-NEXT: vslidedown.vi v12, v8, 14 652; RV32-NEXT: vmv.x.s t6, v12 653; RV32-NEXT: vslidedown.vi v8, v8, 15 654; RV32-NEXT: vmv.x.s s0, v8 655; RV32-NEXT: add a0, a0, a1 656; RV32-NEXT: add a2, a2, a3 657; RV32-NEXT: add a0, a0, a2 658; RV32-NEXT: add a4, a4, a5 659; RV32-NEXT: add a4, a4, a6 660; RV32-NEXT: add a0, a0, a4 661; RV32-NEXT: add a7, a7, t0 662; RV32-NEXT: add a7, a7, t1 663; RV32-NEXT: add a7, a7, t2 664; RV32-NEXT: add a0, a0, a7 665; RV32-NEXT: add t3, t3, t4 666; RV32-NEXT: add t3, t3, t5 667; RV32-NEXT: add t3, t3, t6 668; RV32-NEXT: add t3, t3, s0 669; RV32-NEXT: add a0, a0, t3 670; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 671; RV32-NEXT: addi sp, sp, 16 672; RV32-NEXT: ret 673; 674; RV64-LABEL: explode_16xi32: 675; RV64: # %bb.0: 676; RV64-NEXT: addi sp, sp, -16 677; RV64-NEXT: .cfi_def_cfa_offset 16 678; RV64-NEXT: sd s0, 8(sp) # 8-byte Folded Spill 679; RV64-NEXT: .cfi_offset s0, -8 680; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma 681; RV64-NEXT: vmv.x.s a0, v8 682; RV64-NEXT: vslidedown.vi v12, v8, 1 683; RV64-NEXT: vmv.x.s a1, v12 684; RV64-NEXT: vslidedown.vi v12, v8, 2 685; RV64-NEXT: vmv.x.s a2, v12 686; RV64-NEXT: vslidedown.vi v12, v8, 3 687; RV64-NEXT: vmv.x.s a3, v12 688; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma 689; RV64-NEXT: vslidedown.vi v12, v8, 4 690; RV64-NEXT: vmv.x.s a4, v12 691; RV64-NEXT: vslidedown.vi v12, v8, 5 692; RV64-NEXT: vmv.x.s a5, v12 693; RV64-NEXT: vslidedown.vi v12, v8, 6 694; RV64-NEXT: vmv.x.s a6, v12 695; RV64-NEXT: vslidedown.vi v12, v8, 7 696; RV64-NEXT: vmv.x.s a7, v12 697; RV64-NEXT: vsetivli zero, 1, e32, m4, ta, ma 698; RV64-NEXT: vslidedown.vi v12, v8, 8 699; RV64-NEXT: vmv.x.s t0, v12 700; RV64-NEXT: vslidedown.vi v12, v8, 9 701; RV64-NEXT: vmv.x.s t1, v12 702; RV64-NEXT: vslidedown.vi v12, v8, 10 703; RV64-NEXT: vmv.x.s t2, v12 704; RV64-NEXT: vslidedown.vi v12, v8, 11 705; RV64-NEXT: vmv.x.s t3, v12 706; RV64-NEXT: vslidedown.vi v12, v8, 12 707; RV64-NEXT: vmv.x.s t4, v12 708; RV64-NEXT: vslidedown.vi v12, v8, 13 709; RV64-NEXT: vmv.x.s t5, v12 710; RV64-NEXT: vslidedown.vi v12, v8, 14 711; RV64-NEXT: vmv.x.s t6, v12 712; RV64-NEXT: vslidedown.vi v8, v8, 15 713; RV64-NEXT: vmv.x.s s0, v8 714; RV64-NEXT: add a0, a0, a1 715; RV64-NEXT: add a2, a2, a3 716; RV64-NEXT: add a0, a0, a2 717; RV64-NEXT: add a4, a4, a5 718; RV64-NEXT: add a4, a4, a6 719; RV64-NEXT: add a0, a0, a4 720; RV64-NEXT: add a7, a7, t0 721; RV64-NEXT: add a7, a7, t1 722; RV64-NEXT: add a7, a7, t2 723; RV64-NEXT: add a0, a0, a7 724; RV64-NEXT: add t3, t3, t4 725; RV64-NEXT: add t3, t3, t5 726; RV64-NEXT: add t3, t3, t6 727; RV64-NEXT: add t3, t3, s0 728; RV64-NEXT: addw a0, a0, t3 729; RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload 730; RV64-NEXT: addi sp, sp, 16 731; RV64-NEXT: ret 732 %e0 = extractelement <16 x i32> %v, i32 0 733 %e1 = extractelement <16 x i32> %v, i32 1 734 %e2 = extractelement <16 x i32> %v, i32 2 735 %e3 = extractelement <16 x i32> %v, i32 3 736 %e4 = extractelement <16 x i32> %v, i32 4 737 %e5 = extractelement <16 x i32> %v, i32 5 738 %e6 = extractelement <16 x i32> %v, i32 6 739 %e7 = extractelement <16 x i32> %v, i32 7 740 %e8 = extractelement <16 x i32> %v, i32 8 741 %e9 = extractelement <16 x i32> %v, i32 9 742 %e10 = extractelement <16 x i32> %v, i32 10 743 %e11 = extractelement <16 x i32> %v, i32 11 744 %e12 = extractelement <16 x i32> %v, i32 12 745 %e13 = extractelement <16 x i32> %v, i32 13 746 %e14 = extractelement <16 x i32> %v, i32 14 747 %e15 = extractelement <16 x i32> %v, i32 15 748 %add0 = add i32 %e0, %e1 749 %add1 = add i32 %add0, %e2 750 %add2 = add i32 %add1, %e3 751 %add3 = add i32 %add2, %e4 752 %add4 = add i32 %add3, %e5 753 %add5 = add i32 %add4, %e6 754 %add6 = add i32 %add5, %e7 755 %add7 = add i32 %add6, %e8 756 %add8 = add i32 %add7, %e9 757 %add9 = add i32 %add8, %e10 758 %add10 = add i32 %add9, %e11 759 %add11 = add i32 %add10, %e12 760 %add12 = add i32 %add11, %e13 761 %add13 = add i32 %add12, %e14 762 %add14 = add i32 %add13, %e15 763 ret i32 %add14 764} 765 766define i64 @explode_2xi64(<2 x i64> %v) { 767; RV32-LABEL: explode_2xi64: 768; RV32: # %bb.0: 769; RV32-NEXT: li a0, 32 770; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 771; RV32-NEXT: vsrl.vx v9, v8, a0 772; RV32-NEXT: vmv.x.s a1, v9 773; RV32-NEXT: vmv.x.s a2, v8 774; RV32-NEXT: vslidedown.vi v8, v8, 1 775; RV32-NEXT: vsrl.vx v9, v8, a0 776; RV32-NEXT: vmv.x.s a0, v9 777; RV32-NEXT: vmv.x.s a3, v8 778; RV32-NEXT: add a1, a1, a0 779; RV32-NEXT: add a0, a2, a3 780; RV32-NEXT: sltu a2, a0, a2 781; RV32-NEXT: add a1, a1, a2 782; RV32-NEXT: ret 783; 784; RV64-LABEL: explode_2xi64: 785; RV64: # %bb.0: 786; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 787; RV64-NEXT: vmv.x.s a0, v8 788; RV64-NEXT: vslidedown.vi v8, v8, 1 789; RV64-NEXT: vmv.x.s a1, v8 790; RV64-NEXT: add a0, a0, a1 791; RV64-NEXT: ret 792 %e0 = extractelement <2 x i64> %v, i32 0 793 %e1 = extractelement <2 x i64> %v, i32 1 794 %add0 = add i64 %e0, %e1 795 ret i64 %add0 796} 797 798define i64 @explode_4xi64(<4 x i64> %v) { 799; RV32-LABEL: explode_4xi64: 800; RV32: # %bb.0: 801; RV32-NEXT: li a0, 32 802; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma 803; RV32-NEXT: vsrl.vx v10, v8, a0 804; RV32-NEXT: vmv.x.s a1, v10 805; RV32-NEXT: vmv.x.s a2, v8 806; RV32-NEXT: vslidedown.vi v10, v8, 1 807; RV32-NEXT: vsrl.vx v12, v10, a0 808; RV32-NEXT: vmv.x.s a3, v12 809; RV32-NEXT: vmv.x.s a4, v10 810; RV32-NEXT: vslidedown.vi v10, v8, 2 811; RV32-NEXT: vsrl.vx v12, v10, a0 812; RV32-NEXT: vmv.x.s a5, v12 813; RV32-NEXT: vmv.x.s a6, v10 814; RV32-NEXT: vslidedown.vi v8, v8, 3 815; RV32-NEXT: vsrl.vx v10, v8, a0 816; RV32-NEXT: vmv.x.s a0, v10 817; RV32-NEXT: vmv.x.s a7, v8 818; RV32-NEXT: add a1, a1, a3 819; RV32-NEXT: add a4, a2, a4 820; RV32-NEXT: sltu a2, a4, a2 821; RV32-NEXT: add a1, a1, a2 822; RV32-NEXT: add a6, a4, a6 823; RV32-NEXT: sltu a2, a6, a4 824; RV32-NEXT: add a1, a1, a5 825; RV32-NEXT: add a0, a2, a0 826; RV32-NEXT: add a1, a1, a0 827; RV32-NEXT: add a0, a6, a7 828; RV32-NEXT: sltu a2, a0, a6 829; RV32-NEXT: add a1, a1, a2 830; RV32-NEXT: ret 831; 832; RV64-LABEL: explode_4xi64: 833; RV64: # %bb.0: 834; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 835; RV64-NEXT: vmv.x.s a0, v8 836; RV64-NEXT: vslidedown.vi v10, v8, 1 837; RV64-NEXT: vmv.x.s a1, v10 838; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma 839; RV64-NEXT: vslidedown.vi v10, v8, 2 840; RV64-NEXT: vmv.x.s a2, v10 841; RV64-NEXT: vslidedown.vi v8, v8, 3 842; RV64-NEXT: vmv.x.s a3, v8 843; RV64-NEXT: add a0, a0, a1 844; RV64-NEXT: add a2, a2, a3 845; RV64-NEXT: add a0, a0, a2 846; RV64-NEXT: ret 847 %e0 = extractelement <4 x i64> %v, i32 0 848 %e1 = extractelement <4 x i64> %v, i32 1 849 %e2 = extractelement <4 x i64> %v, i32 2 850 %e3 = extractelement <4 x i64> %v, i32 3 851 %add0 = add i64 %e0, %e1 852 %add1 = add i64 %add0, %e2 853 %add2 = add i64 %add1, %e3 854 ret i64 %add2 855} 856 857 858define i64 @explode_8xi64(<8 x i64> %v) { 859; RV32-LABEL: explode_8xi64: 860; RV32: # %bb.0: 861; RV32-NEXT: addi sp, sp, -16 862; RV32-NEXT: .cfi_def_cfa_offset 16 863; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 864; RV32-NEXT: .cfi_offset s0, -4 865; RV32-NEXT: li a0, 32 866; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma 867; RV32-NEXT: vsrl.vx v12, v8, a0 868; RV32-NEXT: vmv.x.s a1, v12 869; RV32-NEXT: vmv.x.s a2, v8 870; RV32-NEXT: vslidedown.vi v12, v8, 1 871; RV32-NEXT: vsrl.vx v16, v12, a0 872; RV32-NEXT: vmv.x.s a3, v16 873; RV32-NEXT: vmv.x.s a4, v12 874; RV32-NEXT: vslidedown.vi v12, v8, 2 875; RV32-NEXT: vsrl.vx v16, v12, a0 876; RV32-NEXT: vmv.x.s a5, v16 877; RV32-NEXT: vmv.x.s a6, v12 878; RV32-NEXT: vslidedown.vi v12, v8, 3 879; RV32-NEXT: vsrl.vx v16, v12, a0 880; RV32-NEXT: vmv.x.s a7, v16 881; RV32-NEXT: vmv.x.s t0, v12 882; RV32-NEXT: vslidedown.vi v12, v8, 4 883; RV32-NEXT: vsrl.vx v16, v12, a0 884; RV32-NEXT: vmv.x.s t1, v16 885; RV32-NEXT: vmv.x.s t2, v12 886; RV32-NEXT: vslidedown.vi v12, v8, 5 887; RV32-NEXT: vsrl.vx v16, v12, a0 888; RV32-NEXT: vmv.x.s t3, v16 889; RV32-NEXT: vmv.x.s t4, v12 890; RV32-NEXT: vslidedown.vi v12, v8, 6 891; RV32-NEXT: vsrl.vx v16, v12, a0 892; RV32-NEXT: vmv.x.s t5, v16 893; RV32-NEXT: vmv.x.s t6, v12 894; RV32-NEXT: vslidedown.vi v8, v8, 7 895; RV32-NEXT: vsrl.vx v12, v8, a0 896; RV32-NEXT: vmv.x.s a0, v12 897; RV32-NEXT: vmv.x.s s0, v8 898; RV32-NEXT: add a1, a1, a3 899; RV32-NEXT: add a4, a2, a4 900; RV32-NEXT: sltu a2, a4, a2 901; RV32-NEXT: add a1, a1, a2 902; RV32-NEXT: add a6, a4, a6 903; RV32-NEXT: sltu a2, a6, a4 904; RV32-NEXT: add a1, a1, a5 905; RV32-NEXT: add a2, a2, a7 906; RV32-NEXT: add a1, a1, a2 907; RV32-NEXT: add t0, a6, t0 908; RV32-NEXT: sltu a2, t0, a6 909; RV32-NEXT: add a2, a2, t1 910; RV32-NEXT: add a1, a1, a2 911; RV32-NEXT: add t2, t0, t2 912; RV32-NEXT: sltu a2, t2, t0 913; RV32-NEXT: add a2, a2, t3 914; RV32-NEXT: add a1, a1, a2 915; RV32-NEXT: add t4, t2, t4 916; RV32-NEXT: sltu a2, t4, t2 917; RV32-NEXT: add a2, a2, t5 918; RV32-NEXT: add a1, a1, a2 919; RV32-NEXT: add t6, t4, t6 920; RV32-NEXT: sltu a2, t6, t4 921; RV32-NEXT: add a0, a2, a0 922; RV32-NEXT: add a1, a1, a0 923; RV32-NEXT: add a0, t6, s0 924; RV32-NEXT: sltu a2, a0, t6 925; RV32-NEXT: add a1, a1, a2 926; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 927; RV32-NEXT: addi sp, sp, 16 928; RV32-NEXT: ret 929; 930; RV64-LABEL: explode_8xi64: 931; RV64: # %bb.0: 932; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 933; RV64-NEXT: vmv.x.s a0, v8 934; RV64-NEXT: vslidedown.vi v12, v8, 1 935; RV64-NEXT: vmv.x.s a1, v12 936; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma 937; RV64-NEXT: vslidedown.vi v12, v8, 2 938; RV64-NEXT: vmv.x.s a2, v12 939; RV64-NEXT: vslidedown.vi v12, v8, 3 940; RV64-NEXT: vmv.x.s a3, v12 941; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma 942; RV64-NEXT: vslidedown.vi v12, v8, 4 943; RV64-NEXT: vmv.x.s a4, v12 944; RV64-NEXT: vslidedown.vi v12, v8, 5 945; RV64-NEXT: vmv.x.s a5, v12 946; RV64-NEXT: vslidedown.vi v12, v8, 6 947; RV64-NEXT: vmv.x.s a6, v12 948; RV64-NEXT: vslidedown.vi v8, v8, 7 949; RV64-NEXT: vmv.x.s a7, v8 950; RV64-NEXT: add a0, a0, a1 951; RV64-NEXT: add a2, a2, a3 952; RV64-NEXT: add a0, a0, a2 953; RV64-NEXT: add a4, a4, a5 954; RV64-NEXT: add a4, a4, a6 955; RV64-NEXT: add a0, a0, a4 956; RV64-NEXT: add a0, a0, a7 957; RV64-NEXT: ret 958 %e0 = extractelement <8 x i64> %v, i32 0 959 %e1 = extractelement <8 x i64> %v, i32 1 960 %e2 = extractelement <8 x i64> %v, i32 2 961 %e3 = extractelement <8 x i64> %v, i32 3 962 %e4 = extractelement <8 x i64> %v, i32 4 963 %e5 = extractelement <8 x i64> %v, i32 5 964 %e6 = extractelement <8 x i64> %v, i32 6 965 %e7 = extractelement <8 x i64> %v, i32 7 966 %add0 = add i64 %e0, %e1 967 %add1 = add i64 %add0, %e2 968 %add2 = add i64 %add1, %e3 969 %add3 = add i64 %add2, %e4 970 %add4 = add i64 %add3, %e5 971 %add5 = add i64 %add4, %e6 972 %add6 = add i64 %add5, %e7 973 ret i64 %add6 974} 975 976define i64 @explode_16xi64(<16 x i64> %v) { 977; RV32-LABEL: explode_16xi64: 978; RV32: # %bb.0: 979; RV32-NEXT: addi sp, sp, -64 980; RV32-NEXT: .cfi_def_cfa_offset 64 981; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill 982; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill 983; RV32-NEXT: sw s1, 52(sp) # 4-byte Folded Spill 984; RV32-NEXT: sw s2, 48(sp) # 4-byte Folded Spill 985; RV32-NEXT: sw s3, 44(sp) # 4-byte Folded Spill 986; RV32-NEXT: sw s4, 40(sp) # 4-byte Folded Spill 987; RV32-NEXT: sw s5, 36(sp) # 4-byte Folded Spill 988; RV32-NEXT: sw s6, 32(sp) # 4-byte Folded Spill 989; RV32-NEXT: sw s7, 28(sp) # 4-byte Folded Spill 990; RV32-NEXT: sw s8, 24(sp) # 4-byte Folded Spill 991; RV32-NEXT: sw s9, 20(sp) # 4-byte Folded Spill 992; RV32-NEXT: sw s10, 16(sp) # 4-byte Folded Spill 993; RV32-NEXT: sw s11, 12(sp) # 4-byte Folded Spill 994; RV32-NEXT: .cfi_offset ra, -4 995; RV32-NEXT: .cfi_offset s0, -8 996; RV32-NEXT: .cfi_offset s1, -12 997; RV32-NEXT: .cfi_offset s2, -16 998; RV32-NEXT: .cfi_offset s3, -20 999; RV32-NEXT: .cfi_offset s4, -24 1000; RV32-NEXT: .cfi_offset s5, -28 1001; RV32-NEXT: .cfi_offset s6, -32 1002; RV32-NEXT: .cfi_offset s7, -36 1003; RV32-NEXT: .cfi_offset s8, -40 1004; RV32-NEXT: .cfi_offset s9, -44 1005; RV32-NEXT: .cfi_offset s10, -48 1006; RV32-NEXT: .cfi_offset s11, -52 1007; RV32-NEXT: li a1, 32 1008; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma 1009; RV32-NEXT: vsrl.vx v16, v8, a1 1010; RV32-NEXT: vmv.x.s a0, v16 1011; RV32-NEXT: sw a0, 8(sp) # 4-byte Folded Spill 1012; RV32-NEXT: vmv.x.s a0, v8 1013; RV32-NEXT: vslidedown.vi v16, v8, 1 1014; RV32-NEXT: vsrl.vx v24, v16, a1 1015; RV32-NEXT: vmv.x.s a5, v24 1016; RV32-NEXT: vmv.x.s a6, v16 1017; RV32-NEXT: vslidedown.vi v16, v8, 2 1018; RV32-NEXT: vsrl.vx v24, v16, a1 1019; RV32-NEXT: vmv.x.s a3, v24 1020; RV32-NEXT: vmv.x.s a4, v16 1021; RV32-NEXT: vslidedown.vi v16, v8, 3 1022; RV32-NEXT: vsrl.vx v24, v16, a1 1023; RV32-NEXT: vmv.x.s s2, v24 1024; RV32-NEXT: vmv.x.s a7, v16 1025; RV32-NEXT: vslidedown.vi v16, v8, 4 1026; RV32-NEXT: vsrl.vx v24, v16, a1 1027; RV32-NEXT: vmv.x.s s3, v24 1028; RV32-NEXT: vmv.x.s t0, v16 1029; RV32-NEXT: vslidedown.vi v16, v8, 5 1030; RV32-NEXT: vsrl.vx v24, v16, a1 1031; RV32-NEXT: vmv.x.s s4, v24 1032; RV32-NEXT: vmv.x.s t1, v16 1033; RV32-NEXT: vslidedown.vi v16, v8, 6 1034; RV32-NEXT: vsrl.vx v24, v16, a1 1035; RV32-NEXT: vmv.x.s s5, v24 1036; RV32-NEXT: vmv.x.s t2, v16 1037; RV32-NEXT: vslidedown.vi v16, v8, 7 1038; RV32-NEXT: vsrl.vx v24, v16, a1 1039; RV32-NEXT: vmv.x.s s6, v24 1040; RV32-NEXT: vmv.x.s t3, v16 1041; RV32-NEXT: vslidedown.vi v16, v8, 8 1042; RV32-NEXT: vsrl.vx v24, v16, a1 1043; RV32-NEXT: vmv.x.s s7, v24 1044; RV32-NEXT: vmv.x.s t4, v16 1045; RV32-NEXT: vslidedown.vi v16, v8, 9 1046; RV32-NEXT: vsrl.vx v24, v16, a1 1047; RV32-NEXT: vmv.x.s s8, v24 1048; RV32-NEXT: vmv.x.s t5, v16 1049; RV32-NEXT: vslidedown.vi v16, v8, 10 1050; RV32-NEXT: vsrl.vx v24, v16, a1 1051; RV32-NEXT: vmv.x.s s9, v24 1052; RV32-NEXT: vmv.x.s t6, v16 1053; RV32-NEXT: vslidedown.vi v16, v8, 11 1054; RV32-NEXT: vsrl.vx v24, v16, a1 1055; RV32-NEXT: vmv.x.s s10, v24 1056; RV32-NEXT: vmv.x.s s0, v16 1057; RV32-NEXT: vslidedown.vi v16, v8, 12 1058; RV32-NEXT: vsrl.vx v24, v16, a1 1059; RV32-NEXT: vmv.x.s s11, v24 1060; RV32-NEXT: vmv.x.s s1, v16 1061; RV32-NEXT: vslidedown.vi v0, v8, 13 1062; RV32-NEXT: vsrl.vx v16, v0, a1 1063; RV32-NEXT: vmv.x.s ra, v16 1064; RV32-NEXT: vslidedown.vi v16, v8, 14 1065; RV32-NEXT: vsrl.vx v24, v16, a1 1066; RV32-NEXT: vslidedown.vi v8, v8, 15 1067; RV32-NEXT: vmv.x.s a2, v0 1068; RV32-NEXT: vsrl.vx v0, v8, a1 1069; RV32-NEXT: lw a1, 8(sp) # 4-byte Folded Reload 1070; RV32-NEXT: add a5, a1, a5 1071; RV32-NEXT: add a6, a0, a6 1072; RV32-NEXT: sltu a0, a6, a0 1073; RV32-NEXT: add a0, a5, a0 1074; RV32-NEXT: add a0, a0, a3 1075; RV32-NEXT: add a4, a6, a4 1076; RV32-NEXT: sltu a1, a4, a6 1077; RV32-NEXT: add a1, a1, s2 1078; RV32-NEXT: add a0, a0, a1 1079; RV32-NEXT: add a7, a4, a7 1080; RV32-NEXT: sltu a1, a7, a4 1081; RV32-NEXT: add a1, a1, s3 1082; RV32-NEXT: add a0, a0, a1 1083; RV32-NEXT: add t0, a7, t0 1084; RV32-NEXT: sltu a1, t0, a7 1085; RV32-NEXT: add a1, a1, s4 1086; RV32-NEXT: add a0, a0, a1 1087; RV32-NEXT: add t1, t0, t1 1088; RV32-NEXT: sltu a1, t1, t0 1089; RV32-NEXT: add a1, a1, s5 1090; RV32-NEXT: add a0, a0, a1 1091; RV32-NEXT: add t2, t1, t2 1092; RV32-NEXT: sltu a1, t2, t1 1093; RV32-NEXT: add a1, a1, s6 1094; RV32-NEXT: add a0, a0, a1 1095; RV32-NEXT: add t3, t2, t3 1096; RV32-NEXT: sltu a1, t3, t2 1097; RV32-NEXT: add a1, a1, s7 1098; RV32-NEXT: add a0, a0, a1 1099; RV32-NEXT: add t4, t3, t4 1100; RV32-NEXT: sltu a1, t4, t3 1101; RV32-NEXT: add a1, a1, s8 1102; RV32-NEXT: add a0, a0, a1 1103; RV32-NEXT: add t5, t4, t5 1104; RV32-NEXT: sltu a1, t5, t4 1105; RV32-NEXT: add a1, a1, s9 1106; RV32-NEXT: add a0, a0, a1 1107; RV32-NEXT: add t6, t5, t6 1108; RV32-NEXT: sltu a1, t6, t5 1109; RV32-NEXT: add a1, a1, s10 1110; RV32-NEXT: add a0, a0, a1 1111; RV32-NEXT: add s0, t6, s0 1112; RV32-NEXT: sltu a1, s0, t6 1113; RV32-NEXT: add a1, a1, s11 1114; RV32-NEXT: add a0, a0, a1 1115; RV32-NEXT: add s1, s0, s1 1116; RV32-NEXT: sltu a1, s1, s0 1117; RV32-NEXT: add a1, a1, ra 1118; RV32-NEXT: add a0, a0, a1 1119; RV32-NEXT: vmv.x.s a1, v24 1120; RV32-NEXT: add a2, s1, a2 1121; RV32-NEXT: sltu a3, a2, s1 1122; RV32-NEXT: add a1, a3, a1 1123; RV32-NEXT: vmv.x.s a3, v16 1124; RV32-NEXT: add a0, a0, a1 1125; RV32-NEXT: vmv.x.s a1, v0 1126; RV32-NEXT: add a3, a2, a3 1127; RV32-NEXT: sltu a2, a3, a2 1128; RV32-NEXT: add a1, a2, a1 1129; RV32-NEXT: add a1, a0, a1 1130; RV32-NEXT: vmv.x.s a0, v8 1131; RV32-NEXT: add a0, a3, a0 1132; RV32-NEXT: sltu a2, a0, a3 1133; RV32-NEXT: add a1, a1, a2 1134; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload 1135; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload 1136; RV32-NEXT: lw s1, 52(sp) # 4-byte Folded Reload 1137; RV32-NEXT: lw s2, 48(sp) # 4-byte Folded Reload 1138; RV32-NEXT: lw s3, 44(sp) # 4-byte Folded Reload 1139; RV32-NEXT: lw s4, 40(sp) # 4-byte Folded Reload 1140; RV32-NEXT: lw s5, 36(sp) # 4-byte Folded Reload 1141; RV32-NEXT: lw s6, 32(sp) # 4-byte Folded Reload 1142; RV32-NEXT: lw s7, 28(sp) # 4-byte Folded Reload 1143; RV32-NEXT: lw s8, 24(sp) # 4-byte Folded Reload 1144; RV32-NEXT: lw s9, 20(sp) # 4-byte Folded Reload 1145; RV32-NEXT: lw s10, 16(sp) # 4-byte Folded Reload 1146; RV32-NEXT: lw s11, 12(sp) # 4-byte Folded Reload 1147; RV32-NEXT: addi sp, sp, 64 1148; RV32-NEXT: ret 1149; 1150; RV64-LABEL: explode_16xi64: 1151; RV64: # %bb.0: 1152; RV64-NEXT: addi sp, sp, -16 1153; RV64-NEXT: .cfi_def_cfa_offset 16 1154; RV64-NEXT: sd s0, 8(sp) # 8-byte Folded Spill 1155; RV64-NEXT: .cfi_offset s0, -8 1156; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1157; RV64-NEXT: vmv.x.s a0, v8 1158; RV64-NEXT: vslidedown.vi v16, v8, 1 1159; RV64-NEXT: vmv.x.s a1, v16 1160; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma 1161; RV64-NEXT: vslidedown.vi v16, v8, 2 1162; RV64-NEXT: vmv.x.s a2, v16 1163; RV64-NEXT: vslidedown.vi v16, v8, 3 1164; RV64-NEXT: vmv.x.s a3, v16 1165; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma 1166; RV64-NEXT: vslidedown.vi v16, v8, 4 1167; RV64-NEXT: vmv.x.s a4, v16 1168; RV64-NEXT: vslidedown.vi v16, v8, 5 1169; RV64-NEXT: vmv.x.s a5, v16 1170; RV64-NEXT: vslidedown.vi v16, v8, 6 1171; RV64-NEXT: vmv.x.s a6, v16 1172; RV64-NEXT: vslidedown.vi v16, v8, 7 1173; RV64-NEXT: vmv.x.s a7, v16 1174; RV64-NEXT: vsetivli zero, 1, e64, m8, ta, ma 1175; RV64-NEXT: vslidedown.vi v16, v8, 8 1176; RV64-NEXT: vmv.x.s t0, v16 1177; RV64-NEXT: vslidedown.vi v16, v8, 9 1178; RV64-NEXT: vmv.x.s t1, v16 1179; RV64-NEXT: vslidedown.vi v16, v8, 10 1180; RV64-NEXT: vmv.x.s t2, v16 1181; RV64-NEXT: vslidedown.vi v16, v8, 11 1182; RV64-NEXT: vmv.x.s t3, v16 1183; RV64-NEXT: vslidedown.vi v16, v8, 12 1184; RV64-NEXT: vmv.x.s t4, v16 1185; RV64-NEXT: vslidedown.vi v16, v8, 13 1186; RV64-NEXT: vmv.x.s t5, v16 1187; RV64-NEXT: vslidedown.vi v16, v8, 14 1188; RV64-NEXT: vmv.x.s t6, v16 1189; RV64-NEXT: vslidedown.vi v8, v8, 15 1190; RV64-NEXT: vmv.x.s s0, v8 1191; RV64-NEXT: add a0, a0, a1 1192; RV64-NEXT: add a2, a2, a3 1193; RV64-NEXT: add a0, a0, a2 1194; RV64-NEXT: add a4, a4, a5 1195; RV64-NEXT: add a4, a4, a6 1196; RV64-NEXT: add a0, a0, a4 1197; RV64-NEXT: add a7, a7, t0 1198; RV64-NEXT: add a7, a7, t1 1199; RV64-NEXT: add a7, a7, t2 1200; RV64-NEXT: add a0, a0, a7 1201; RV64-NEXT: add t3, t3, t4 1202; RV64-NEXT: add t3, t3, t5 1203; RV64-NEXT: add t3, t3, t6 1204; RV64-NEXT: add t3, t3, s0 1205; RV64-NEXT: add a0, a0, t3 1206; RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload 1207; RV64-NEXT: addi sp, sp, 16 1208; RV64-NEXT: ret 1209 %e0 = extractelement <16 x i64> %v, i32 0 1210 %e1 = extractelement <16 x i64> %v, i32 1 1211 %e2 = extractelement <16 x i64> %v, i32 2 1212 %e3 = extractelement <16 x i64> %v, i32 3 1213 %e4 = extractelement <16 x i64> %v, i32 4 1214 %e5 = extractelement <16 x i64> %v, i32 5 1215 %e6 = extractelement <16 x i64> %v, i32 6 1216 %e7 = extractelement <16 x i64> %v, i32 7 1217 %e8 = extractelement <16 x i64> %v, i32 8 1218 %e9 = extractelement <16 x i64> %v, i32 9 1219 %e10 = extractelement <16 x i64> %v, i32 10 1220 %e11 = extractelement <16 x i64> %v, i32 11 1221 %e12 = extractelement <16 x i64> %v, i32 12 1222 %e13 = extractelement <16 x i64> %v, i32 13 1223 %e14 = extractelement <16 x i64> %v, i32 14 1224 %e15 = extractelement <16 x i64> %v, i32 15 1225 %add0 = add i64 %e0, %e1 1226 %add1 = add i64 %add0, %e2 1227 %add2 = add i64 %add1, %e3 1228 %add3 = add i64 %add2, %e4 1229 %add4 = add i64 %add3, %e5 1230 %add5 = add i64 %add4, %e6 1231 %add6 = add i64 %add5, %e7 1232 %add7 = add i64 %add6, %e8 1233 %add8 = add i64 %add7, %e9 1234 %add9 = add i64 %add8, %e10 1235 %add10 = add i64 %add9, %e11 1236 %add11 = add i64 %add10, %e12 1237 %add12 = add i64 %add11, %e13 1238 %add13 = add i64 %add12, %e14 1239 %add14 = add i64 %add13, %e15 1240 ret i64 %add14 1241} 1242