1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5define i8 @explode_2xi8(<2 x i8> %v) { 6; CHECK-LABEL: explode_2xi8: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma 9; CHECK-NEXT: vmv.x.s a0, v8 10; CHECK-NEXT: vslidedown.vi v8, v8, 1 11; CHECK-NEXT: vmv.x.s a1, v8 12; CHECK-NEXT: xor a0, a0, a1 13; CHECK-NEXT: ret 14 %e0 = extractelement <2 x i8> %v, i32 0 15 %e1 = extractelement <2 x i8> %v, i32 1 16 %add0 = xor i8 %e0, %e1 17 ret i8 %add0 18} 19 20define i8 @explode_4xi8(<4 x i8> %v) { 21; CHECK-LABEL: explode_4xi8: 22; CHECK: # %bb.0: 23; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 24; CHECK-NEXT: vmv.x.s a0, v8 25; CHECK-NEXT: vslidedown.vi v9, v8, 1 26; CHECK-NEXT: vmv.x.s a1, v9 27; CHECK-NEXT: vslidedown.vi v9, v8, 2 28; CHECK-NEXT: vmv.x.s a2, v9 29; CHECK-NEXT: vslidedown.vi v8, v8, 3 30; CHECK-NEXT: vmv.x.s a3, v8 31; CHECK-NEXT: xor a0, a0, a1 32; CHECK-NEXT: add a2, a2, a3 33; CHECK-NEXT: add a0, a0, a2 34; CHECK-NEXT: ret 35 %e0 = extractelement <4 x i8> %v, i32 0 36 %e1 = extractelement <4 x i8> %v, i32 1 37 %e2 = extractelement <4 x i8> %v, i32 2 38 %e3 = extractelement <4 x i8> %v, i32 3 39 %add0 = xor i8 %e0, %e1 40 %add1 = add i8 %add0, %e2 41 %add2 = add i8 %add1, %e3 42 ret i8 %add2 43} 44 45 46define i8 @explode_8xi8(<8 x i8> %v) { 47; CHECK-LABEL: explode_8xi8: 48; CHECK: # %bb.0: 49; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 50; CHECK-NEXT: vmv.x.s a0, v8 51; CHECK-NEXT: vslidedown.vi v9, v8, 1 52; CHECK-NEXT: vmv.x.s a1, v9 53; CHECK-NEXT: vslidedown.vi v9, v8, 2 54; CHECK-NEXT: vmv.x.s a2, v9 55; CHECK-NEXT: vslidedown.vi v9, v8, 3 56; CHECK-NEXT: vmv.x.s a3, v9 57; CHECK-NEXT: vslidedown.vi v9, v8, 4 58; CHECK-NEXT: vmv.x.s a4, v9 59; CHECK-NEXT: vslidedown.vi v9, v8, 5 60; CHECK-NEXT: vmv.x.s a5, v9 61; CHECK-NEXT: vslidedown.vi v9, v8, 6 62; CHECK-NEXT: vmv.x.s a6, v9 63; CHECK-NEXT: vslidedown.vi v8, v8, 7 64; CHECK-NEXT: vmv.x.s a7, v8 65; CHECK-NEXT: xor a0, a0, a1 66; CHECK-NEXT: add a2, a2, a3 67; CHECK-NEXT: add a0, a0, a2 68; CHECK-NEXT: add a4, a4, a5 69; CHECK-NEXT: add a4, a4, a6 70; CHECK-NEXT: add a0, a0, a4 71; CHECK-NEXT: add a0, a0, a7 72; CHECK-NEXT: ret 73 %e0 = extractelement <8 x i8> %v, i32 0 74 %e1 = extractelement <8 x i8> %v, i32 1 75 %e2 = extractelement <8 x i8> %v, i32 2 76 %e3 = extractelement <8 x i8> %v, i32 3 77 %e4 = extractelement <8 x i8> %v, i32 4 78 %e5 = extractelement <8 x i8> %v, i32 5 79 %e6 = extractelement <8 x i8> %v, i32 6 80 %e7 = extractelement <8 x i8> %v, i32 7 81 %add0 = xor i8 %e0, %e1 82 %add1 = add i8 %add0, %e2 83 %add2 = add i8 %add1, %e3 84 %add3 = add i8 %add2, %e4 85 %add4 = add i8 %add3, %e5 86 %add5 = add i8 %add4, %e6 87 %add6 = add i8 %add5, %e7 88 ret i8 %add6 89} 90 91define i8 @explode_16xi8(<16 x i8> %v) { 92; RV32-LABEL: explode_16xi8: 93; RV32: # %bb.0: 94; RV32-NEXT: addi sp, sp, -16 95; RV32-NEXT: .cfi_def_cfa_offset 16 96; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 97; RV32-NEXT: .cfi_offset s0, -4 98; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma 99; RV32-NEXT: vmv.x.s a0, v8 100; RV32-NEXT: vslidedown.vi v9, v8, 1 101; RV32-NEXT: vmv.x.s a1, v9 102; RV32-NEXT: vslidedown.vi v9, v8, 2 103; RV32-NEXT: vmv.x.s a2, v9 104; RV32-NEXT: vslidedown.vi v9, v8, 3 105; RV32-NEXT: vmv.x.s a3, v9 106; RV32-NEXT: vslidedown.vi v9, v8, 4 107; RV32-NEXT: vmv.x.s a4, v9 108; RV32-NEXT: vslidedown.vi v9, v8, 5 109; RV32-NEXT: vmv.x.s a5, v9 110; RV32-NEXT: vslidedown.vi v9, v8, 6 111; RV32-NEXT: vmv.x.s a6, v9 112; RV32-NEXT: vslidedown.vi v9, v8, 7 113; RV32-NEXT: vmv.x.s a7, v9 114; RV32-NEXT: vslidedown.vi v9, v8, 8 115; RV32-NEXT: vmv.x.s t0, v9 116; RV32-NEXT: vslidedown.vi v9, v8, 9 117; RV32-NEXT: vmv.x.s t1, v9 118; RV32-NEXT: vslidedown.vi v9, v8, 10 119; RV32-NEXT: vmv.x.s t2, v9 120; RV32-NEXT: vslidedown.vi v9, v8, 11 121; RV32-NEXT: vmv.x.s t3, v9 122; RV32-NEXT: vslidedown.vi v9, v8, 12 123; RV32-NEXT: vmv.x.s t4, v9 124; RV32-NEXT: vslidedown.vi v9, v8, 13 125; RV32-NEXT: vmv.x.s t5, v9 126; RV32-NEXT: vslidedown.vi v9, v8, 14 127; RV32-NEXT: vmv.x.s t6, v9 128; RV32-NEXT: vslidedown.vi v8, v8, 15 129; RV32-NEXT: vmv.x.s s0, v8 130; RV32-NEXT: xor a0, a0, a1 131; RV32-NEXT: add a2, a2, a3 132; RV32-NEXT: add a0, a0, a2 133; RV32-NEXT: add a4, a4, a5 134; RV32-NEXT: add a4, a4, a6 135; RV32-NEXT: add a0, a0, a4 136; RV32-NEXT: add a7, a7, t0 137; RV32-NEXT: add a7, a7, t1 138; RV32-NEXT: add a7, a7, t2 139; RV32-NEXT: add a0, a0, a7 140; RV32-NEXT: add t3, t3, t4 141; RV32-NEXT: add t3, t3, t5 142; RV32-NEXT: add t3, t3, t6 143; RV32-NEXT: add t3, t3, s0 144; RV32-NEXT: add a0, a0, t3 145; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 146; RV32-NEXT: addi sp, sp, 16 147; RV32-NEXT: ret 148; 149; RV64-LABEL: explode_16xi8: 150; RV64: # %bb.0: 151; RV64-NEXT: addi sp, sp, -16 152; RV64-NEXT: .cfi_def_cfa_offset 16 153; RV64-NEXT: sd s0, 8(sp) # 8-byte Folded Spill 154; RV64-NEXT: .cfi_offset s0, -8 155; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma 156; RV64-NEXT: vmv.x.s a0, v8 157; RV64-NEXT: vslidedown.vi v9, v8, 1 158; RV64-NEXT: vmv.x.s a1, v9 159; RV64-NEXT: vslidedown.vi v9, v8, 2 160; RV64-NEXT: vmv.x.s a2, v9 161; RV64-NEXT: vslidedown.vi v9, v8, 3 162; RV64-NEXT: vmv.x.s a3, v9 163; RV64-NEXT: vslidedown.vi v9, v8, 4 164; RV64-NEXT: vmv.x.s a4, v9 165; RV64-NEXT: vslidedown.vi v9, v8, 5 166; RV64-NEXT: vmv.x.s a5, v9 167; RV64-NEXT: vslidedown.vi v9, v8, 6 168; RV64-NEXT: vmv.x.s a6, v9 169; RV64-NEXT: vslidedown.vi v9, v8, 7 170; RV64-NEXT: vmv.x.s a7, v9 171; RV64-NEXT: vslidedown.vi v9, v8, 8 172; RV64-NEXT: vmv.x.s t0, v9 173; RV64-NEXT: vslidedown.vi v9, v8, 9 174; RV64-NEXT: vmv.x.s t1, v9 175; RV64-NEXT: vslidedown.vi v9, v8, 10 176; RV64-NEXT: vmv.x.s t2, v9 177; RV64-NEXT: vslidedown.vi v9, v8, 11 178; RV64-NEXT: vmv.x.s t3, v9 179; RV64-NEXT: vslidedown.vi v9, v8, 12 180; RV64-NEXT: vmv.x.s t4, v9 181; RV64-NEXT: vslidedown.vi v9, v8, 13 182; RV64-NEXT: vmv.x.s t5, v9 183; RV64-NEXT: vslidedown.vi v9, v8, 14 184; RV64-NEXT: vmv.x.s t6, v9 185; RV64-NEXT: vslidedown.vi v8, v8, 15 186; RV64-NEXT: vmv.x.s s0, v8 187; RV64-NEXT: xor a0, a0, a1 188; RV64-NEXT: add a2, a2, a3 189; RV64-NEXT: add a0, a0, a2 190; RV64-NEXT: add a4, a4, a5 191; RV64-NEXT: add a4, a4, a6 192; RV64-NEXT: add a0, a0, a4 193; RV64-NEXT: add a7, a7, t0 194; RV64-NEXT: add a7, a7, t1 195; RV64-NEXT: add a7, a7, t2 196; RV64-NEXT: add a0, a0, a7 197; RV64-NEXT: add t3, t3, t4 198; RV64-NEXT: add t3, t3, t5 199; RV64-NEXT: add t3, t3, t6 200; RV64-NEXT: add t3, t3, s0 201; RV64-NEXT: add a0, a0, t3 202; RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload 203; RV64-NEXT: addi sp, sp, 16 204; RV64-NEXT: ret 205 %e0 = extractelement <16 x i8> %v, i32 0 206 %e1 = extractelement <16 x i8> %v, i32 1 207 %e2 = extractelement <16 x i8> %v, i32 2 208 %e3 = extractelement <16 x i8> %v, i32 3 209 %e4 = extractelement <16 x i8> %v, i32 4 210 %e5 = extractelement <16 x i8> %v, i32 5 211 %e6 = extractelement <16 x i8> %v, i32 6 212 %e7 = extractelement <16 x i8> %v, i32 7 213 %e8 = extractelement <16 x i8> %v, i32 8 214 %e9 = extractelement <16 x i8> %v, i32 9 215 %e10 = extractelement <16 x i8> %v, i32 10 216 %e11 = extractelement <16 x i8> %v, i32 11 217 %e12 = extractelement <16 x i8> %v, i32 12 218 %e13 = extractelement <16 x i8> %v, i32 13 219 %e14 = extractelement <16 x i8> %v, i32 14 220 %e15 = extractelement <16 x i8> %v, i32 15 221 %add0 = xor i8 %e0, %e1 222 %add1 = add i8 %add0, %e2 223 %add2 = add i8 %add1, %e3 224 %add3 = add i8 %add2, %e4 225 %add4 = add i8 %add3, %e5 226 %add5 = add i8 %add4, %e6 227 %add6 = add i8 %add5, %e7 228 %add7 = add i8 %add6, %e8 229 %add8 = add i8 %add7, %e9 230 %add9 = add i8 %add8, %e10 231 %add10 = add i8 %add9, %e11 232 %add11 = add i8 %add10, %e12 233 %add12 = add i8 %add11, %e13 234 %add13 = add i8 %add12, %e14 235 %add14 = add i8 %add13, %e15 236 ret i8 %add14 237} 238 239define i16 @explode_2xi16(<2 x i16> %v) { 240; CHECK-LABEL: explode_2xi16: 241; CHECK: # %bb.0: 242; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 243; CHECK-NEXT: vmv.x.s a0, v8 244; CHECK-NEXT: vslidedown.vi v8, v8, 1 245; CHECK-NEXT: vmv.x.s a1, v8 246; CHECK-NEXT: xor a0, a0, a1 247; CHECK-NEXT: ret 248 %e0 = extractelement <2 x i16> %v, i32 0 249 %e1 = extractelement <2 x i16> %v, i32 1 250 %add0 = xor i16 %e0, %e1 251 ret i16 %add0 252} 253 254define i16 @explode_4xi16(<4 x i16> %v) { 255; CHECK-LABEL: explode_4xi16: 256; CHECK: # %bb.0: 257; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 258; CHECK-NEXT: vmv.x.s a0, v8 259; CHECK-NEXT: vslidedown.vi v9, v8, 1 260; CHECK-NEXT: vmv.x.s a1, v9 261; CHECK-NEXT: vslidedown.vi v9, v8, 2 262; CHECK-NEXT: vmv.x.s a2, v9 263; CHECK-NEXT: vslidedown.vi v8, v8, 3 264; CHECK-NEXT: vmv.x.s a3, v8 265; CHECK-NEXT: xor a0, a0, a1 266; CHECK-NEXT: add a2, a2, a3 267; CHECK-NEXT: add a0, a0, a2 268; CHECK-NEXT: ret 269 %e0 = extractelement <4 x i16> %v, i32 0 270 %e1 = extractelement <4 x i16> %v, i32 1 271 %e2 = extractelement <4 x i16> %v, i32 2 272 %e3 = extractelement <4 x i16> %v, i32 3 273 %add0 = xor i16 %e0, %e1 274 %add1 = add i16 %add0, %e2 275 %add2 = add i16 %add1, %e3 276 ret i16 %add2 277} 278 279 280define i16 @explode_8xi16(<8 x i16> %v) { 281; CHECK-LABEL: explode_8xi16: 282; CHECK: # %bb.0: 283; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 284; CHECK-NEXT: vmv.x.s a0, v8 285; CHECK-NEXT: vslidedown.vi v9, v8, 1 286; CHECK-NEXT: vmv.x.s a1, v9 287; CHECK-NEXT: vslidedown.vi v9, v8, 2 288; CHECK-NEXT: vmv.x.s a2, v9 289; CHECK-NEXT: vslidedown.vi v9, v8, 3 290; CHECK-NEXT: vmv.x.s a3, v9 291; CHECK-NEXT: vslidedown.vi v9, v8, 4 292; CHECK-NEXT: vmv.x.s a4, v9 293; CHECK-NEXT: vslidedown.vi v9, v8, 5 294; CHECK-NEXT: vmv.x.s a5, v9 295; CHECK-NEXT: vslidedown.vi v9, v8, 6 296; CHECK-NEXT: vmv.x.s a6, v9 297; CHECK-NEXT: vslidedown.vi v8, v8, 7 298; CHECK-NEXT: vmv.x.s a7, v8 299; CHECK-NEXT: xor a0, a0, a1 300; CHECK-NEXT: add a2, a2, a3 301; CHECK-NEXT: add a0, a0, a2 302; CHECK-NEXT: add a4, a4, a5 303; CHECK-NEXT: add a4, a4, a6 304; CHECK-NEXT: add a0, a0, a4 305; CHECK-NEXT: add a0, a0, a7 306; CHECK-NEXT: ret 307 %e0 = extractelement <8 x i16> %v, i32 0 308 %e1 = extractelement <8 x i16> %v, i32 1 309 %e2 = extractelement <8 x i16> %v, i32 2 310 %e3 = extractelement <8 x i16> %v, i32 3 311 %e4 = extractelement <8 x i16> %v, i32 4 312 %e5 = extractelement <8 x i16> %v, i32 5 313 %e6 = extractelement <8 x i16> %v, i32 6 314 %e7 = extractelement <8 x i16> %v, i32 7 315 %add0 = xor i16 %e0, %e1 316 %add1 = add i16 %add0, %e2 317 %add2 = add i16 %add1, %e3 318 %add3 = add i16 %add2, %e4 319 %add4 = add i16 %add3, %e5 320 %add5 = add i16 %add4, %e6 321 %add6 = add i16 %add5, %e7 322 ret i16 %add6 323} 324 325define i16 @explode_16xi16(<16 x i16> %v) { 326; RV32-LABEL: explode_16xi16: 327; RV32: # %bb.0: 328; RV32-NEXT: addi sp, sp, -16 329; RV32-NEXT: .cfi_def_cfa_offset 16 330; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 331; RV32-NEXT: .cfi_offset s0, -4 332; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma 333; RV32-NEXT: vmv.x.s a0, v8 334; RV32-NEXT: vslidedown.vi v10, v8, 1 335; RV32-NEXT: vmv.x.s a1, v10 336; RV32-NEXT: vslidedown.vi v10, v8, 2 337; RV32-NEXT: vmv.x.s a2, v10 338; RV32-NEXT: vslidedown.vi v10, v8, 3 339; RV32-NEXT: vmv.x.s a3, v10 340; RV32-NEXT: vslidedown.vi v10, v8, 4 341; RV32-NEXT: vmv.x.s a4, v10 342; RV32-NEXT: vslidedown.vi v10, v8, 5 343; RV32-NEXT: vmv.x.s a5, v10 344; RV32-NEXT: vslidedown.vi v10, v8, 6 345; RV32-NEXT: vmv.x.s a6, v10 346; RV32-NEXT: vslidedown.vi v10, v8, 7 347; RV32-NEXT: vmv.x.s a7, v10 348; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, ma 349; RV32-NEXT: vslidedown.vi v10, v8, 8 350; RV32-NEXT: vmv.x.s t0, v10 351; RV32-NEXT: vslidedown.vi v10, v8, 9 352; RV32-NEXT: vmv.x.s t1, v10 353; RV32-NEXT: vslidedown.vi v10, v8, 10 354; RV32-NEXT: vmv.x.s t2, v10 355; RV32-NEXT: vslidedown.vi v10, v8, 11 356; RV32-NEXT: vmv.x.s t3, v10 357; RV32-NEXT: vslidedown.vi v10, v8, 12 358; RV32-NEXT: vmv.x.s t4, v10 359; RV32-NEXT: vslidedown.vi v10, v8, 13 360; RV32-NEXT: vmv.x.s t5, v10 361; RV32-NEXT: vslidedown.vi v10, v8, 14 362; RV32-NEXT: vmv.x.s t6, v10 363; RV32-NEXT: vslidedown.vi v8, v8, 15 364; RV32-NEXT: vmv.x.s s0, v8 365; RV32-NEXT: xor a0, a0, a1 366; RV32-NEXT: add a2, a2, a3 367; RV32-NEXT: add a0, a0, a2 368; RV32-NEXT: add a4, a4, a5 369; RV32-NEXT: add a4, a4, a6 370; RV32-NEXT: add a0, a0, a4 371; RV32-NEXT: add a7, a7, t0 372; RV32-NEXT: add a7, a7, t1 373; RV32-NEXT: add a7, a7, t2 374; RV32-NEXT: add a0, a0, a7 375; RV32-NEXT: add t3, t3, t4 376; RV32-NEXT: add t3, t3, t5 377; RV32-NEXT: add t3, t3, t6 378; RV32-NEXT: add t3, t3, s0 379; RV32-NEXT: add a0, a0, t3 380; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 381; RV32-NEXT: addi sp, sp, 16 382; RV32-NEXT: ret 383; 384; RV64-LABEL: explode_16xi16: 385; RV64: # %bb.0: 386; RV64-NEXT: addi sp, sp, -16 387; RV64-NEXT: .cfi_def_cfa_offset 16 388; RV64-NEXT: sd s0, 8(sp) # 8-byte Folded Spill 389; RV64-NEXT: .cfi_offset s0, -8 390; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma 391; RV64-NEXT: vmv.x.s a0, v8 392; RV64-NEXT: vslidedown.vi v10, v8, 1 393; RV64-NEXT: vmv.x.s a1, v10 394; RV64-NEXT: vslidedown.vi v10, v8, 2 395; RV64-NEXT: vmv.x.s a2, v10 396; RV64-NEXT: vslidedown.vi v10, v8, 3 397; RV64-NEXT: vmv.x.s a3, v10 398; RV64-NEXT: vslidedown.vi v10, v8, 4 399; RV64-NEXT: vmv.x.s a4, v10 400; RV64-NEXT: vslidedown.vi v10, v8, 5 401; RV64-NEXT: vmv.x.s a5, v10 402; RV64-NEXT: vslidedown.vi v10, v8, 6 403; RV64-NEXT: vmv.x.s a6, v10 404; RV64-NEXT: vslidedown.vi v10, v8, 7 405; RV64-NEXT: vmv.x.s a7, v10 406; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, ma 407; RV64-NEXT: vslidedown.vi v10, v8, 8 408; RV64-NEXT: vmv.x.s t0, v10 409; RV64-NEXT: vslidedown.vi v10, v8, 9 410; RV64-NEXT: vmv.x.s t1, v10 411; RV64-NEXT: vslidedown.vi v10, v8, 10 412; RV64-NEXT: vmv.x.s t2, v10 413; RV64-NEXT: vslidedown.vi v10, v8, 11 414; RV64-NEXT: vmv.x.s t3, v10 415; RV64-NEXT: vslidedown.vi v10, v8, 12 416; RV64-NEXT: vmv.x.s t4, v10 417; RV64-NEXT: vslidedown.vi v10, v8, 13 418; RV64-NEXT: vmv.x.s t5, v10 419; RV64-NEXT: vslidedown.vi v10, v8, 14 420; RV64-NEXT: vmv.x.s t6, v10 421; RV64-NEXT: vslidedown.vi v8, v8, 15 422; RV64-NEXT: vmv.x.s s0, v8 423; RV64-NEXT: xor a0, a0, a1 424; RV64-NEXT: add a2, a2, a3 425; RV64-NEXT: add a0, a0, a2 426; RV64-NEXT: add a4, a4, a5 427; RV64-NEXT: add a4, a4, a6 428; RV64-NEXT: add a0, a0, a4 429; RV64-NEXT: add a7, a7, t0 430; RV64-NEXT: add a7, a7, t1 431; RV64-NEXT: add a7, a7, t2 432; RV64-NEXT: add a0, a0, a7 433; RV64-NEXT: add t3, t3, t4 434; RV64-NEXT: add t3, t3, t5 435; RV64-NEXT: add t3, t3, t6 436; RV64-NEXT: add t3, t3, s0 437; RV64-NEXT: add a0, a0, t3 438; RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload 439; RV64-NEXT: addi sp, sp, 16 440; RV64-NEXT: ret 441 %e0 = extractelement <16 x i16> %v, i32 0 442 %e1 = extractelement <16 x i16> %v, i32 1 443 %e2 = extractelement <16 x i16> %v, i32 2 444 %e3 = extractelement <16 x i16> %v, i32 3 445 %e4 = extractelement <16 x i16> %v, i32 4 446 %e5 = extractelement <16 x i16> %v, i32 5 447 %e6 = extractelement <16 x i16> %v, i32 6 448 %e7 = extractelement <16 x i16> %v, i32 7 449 %e8 = extractelement <16 x i16> %v, i32 8 450 %e9 = extractelement <16 x i16> %v, i32 9 451 %e10 = extractelement <16 x i16> %v, i32 10 452 %e11 = extractelement <16 x i16> %v, i32 11 453 %e12 = extractelement <16 x i16> %v, i32 12 454 %e13 = extractelement <16 x i16> %v, i32 13 455 %e14 = extractelement <16 x i16> %v, i32 14 456 %e15 = extractelement <16 x i16> %v, i32 15 457 %add0 = xor i16 %e0, %e1 458 %add1 = add i16 %add0, %e2 459 %add2 = add i16 %add1, %e3 460 %add3 = add i16 %add2, %e4 461 %add4 = add i16 %add3, %e5 462 %add5 = add i16 %add4, %e6 463 %add6 = add i16 %add5, %e7 464 %add7 = add i16 %add6, %e8 465 %add8 = add i16 %add7, %e9 466 %add9 = add i16 %add8, %e10 467 %add10 = add i16 %add9, %e11 468 %add11 = add i16 %add10, %e12 469 %add12 = add i16 %add11, %e13 470 %add13 = add i16 %add12, %e14 471 %add14 = add i16 %add13, %e15 472 ret i16 %add14 473} 474 475define i32 @explode_2xi32(<2 x i32> %v) { 476; CHECK-LABEL: explode_2xi32: 477; CHECK: # %bb.0: 478; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 479; CHECK-NEXT: vmv.x.s a0, v8 480; CHECK-NEXT: vslidedown.vi v8, v8, 1 481; CHECK-NEXT: vmv.x.s a1, v8 482; CHECK-NEXT: xor a0, a0, a1 483; CHECK-NEXT: ret 484 %e0 = extractelement <2 x i32> %v, i32 0 485 %e1 = extractelement <2 x i32> %v, i32 1 486 %add0 = xor i32 %e0, %e1 487 ret i32 %add0 488} 489 490define i32 @explode_4xi32(<4 x i32> %v) { 491; RV32-LABEL: explode_4xi32: 492; RV32: # %bb.0: 493; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 494; RV32-NEXT: vmv.x.s a0, v8 495; RV32-NEXT: vslidedown.vi v9, v8, 1 496; RV32-NEXT: vmv.x.s a1, v9 497; RV32-NEXT: vslidedown.vi v9, v8, 2 498; RV32-NEXT: vmv.x.s a2, v9 499; RV32-NEXT: vslidedown.vi v8, v8, 3 500; RV32-NEXT: vmv.x.s a3, v8 501; RV32-NEXT: xor a0, a0, a1 502; RV32-NEXT: add a2, a2, a3 503; RV32-NEXT: add a0, a0, a2 504; RV32-NEXT: ret 505; 506; RV64-LABEL: explode_4xi32: 507; RV64: # %bb.0: 508; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma 509; RV64-NEXT: vmv.x.s a0, v8 510; RV64-NEXT: vslidedown.vi v9, v8, 1 511; RV64-NEXT: vmv.x.s a1, v9 512; RV64-NEXT: vslidedown.vi v9, v8, 2 513; RV64-NEXT: vmv.x.s a2, v9 514; RV64-NEXT: vslidedown.vi v8, v8, 3 515; RV64-NEXT: vmv.x.s a3, v8 516; RV64-NEXT: xor a0, a0, a1 517; RV64-NEXT: add a2, a2, a3 518; RV64-NEXT: addw a0, a0, a2 519; RV64-NEXT: ret 520 %e0 = extractelement <4 x i32> %v, i32 0 521 %e1 = extractelement <4 x i32> %v, i32 1 522 %e2 = extractelement <4 x i32> %v, i32 2 523 %e3 = extractelement <4 x i32> %v, i32 3 524 %add0 = xor i32 %e0, %e1 525 %add1 = add i32 %add0, %e2 526 %add2 = add i32 %add1, %e3 527 ret i32 %add2 528} 529 530 531define i32 @explode_8xi32(<8 x i32> %v) { 532; RV32-LABEL: explode_8xi32: 533; RV32: # %bb.0: 534; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 535; RV32-NEXT: vmv.x.s a0, v8 536; RV32-NEXT: vslidedown.vi v10, v8, 1 537; RV32-NEXT: vmv.x.s a1, v10 538; RV32-NEXT: vslidedown.vi v10, v8, 2 539; RV32-NEXT: vmv.x.s a2, v10 540; RV32-NEXT: vslidedown.vi v10, v8, 3 541; RV32-NEXT: vmv.x.s a3, v10 542; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma 543; RV32-NEXT: vslidedown.vi v10, v8, 4 544; RV32-NEXT: vmv.x.s a4, v10 545; RV32-NEXT: vslidedown.vi v10, v8, 5 546; RV32-NEXT: vmv.x.s a5, v10 547; RV32-NEXT: vslidedown.vi v10, v8, 6 548; RV32-NEXT: vmv.x.s a6, v10 549; RV32-NEXT: vslidedown.vi v8, v8, 7 550; RV32-NEXT: vmv.x.s a7, v8 551; RV32-NEXT: xor a0, a0, a1 552; RV32-NEXT: add a2, a2, a3 553; RV32-NEXT: add a0, a0, a2 554; RV32-NEXT: add a4, a4, a5 555; RV32-NEXT: add a4, a4, a6 556; RV32-NEXT: add a0, a0, a4 557; RV32-NEXT: add a0, a0, a7 558; RV32-NEXT: ret 559; 560; RV64-LABEL: explode_8xi32: 561; RV64: # %bb.0: 562; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma 563; RV64-NEXT: vmv.x.s a0, v8 564; RV64-NEXT: vslidedown.vi v10, v8, 1 565; RV64-NEXT: vmv.x.s a1, v10 566; RV64-NEXT: vslidedown.vi v10, v8, 2 567; RV64-NEXT: vmv.x.s a2, v10 568; RV64-NEXT: vslidedown.vi v10, v8, 3 569; RV64-NEXT: vmv.x.s a3, v10 570; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma 571; RV64-NEXT: vslidedown.vi v10, v8, 4 572; RV64-NEXT: vmv.x.s a4, v10 573; RV64-NEXT: vslidedown.vi v10, v8, 5 574; RV64-NEXT: vmv.x.s a5, v10 575; RV64-NEXT: vslidedown.vi v10, v8, 6 576; RV64-NEXT: vmv.x.s a6, v10 577; RV64-NEXT: vslidedown.vi v8, v8, 7 578; RV64-NEXT: vmv.x.s a7, v8 579; RV64-NEXT: xor a0, a0, a1 580; RV64-NEXT: add a2, a2, a3 581; RV64-NEXT: add a0, a0, a2 582; RV64-NEXT: add a4, a4, a5 583; RV64-NEXT: add a4, a4, a6 584; RV64-NEXT: add a0, a0, a4 585; RV64-NEXT: addw a0, a0, a7 586; RV64-NEXT: ret 587 %e0 = extractelement <8 x i32> %v, i32 0 588 %e1 = extractelement <8 x i32> %v, i32 1 589 %e2 = extractelement <8 x i32> %v, i32 2 590 %e3 = extractelement <8 x i32> %v, i32 3 591 %e4 = extractelement <8 x i32> %v, i32 4 592 %e5 = extractelement <8 x i32> %v, i32 5 593 %e6 = extractelement <8 x i32> %v, i32 6 594 %e7 = extractelement <8 x i32> %v, i32 7 595 %add0 = xor i32 %e0, %e1 596 %add1 = add i32 %add0, %e2 597 %add2 = add i32 %add1, %e3 598 %add3 = add i32 %add2, %e4 599 %add4 = add i32 %add3, %e5 600 %add5 = add i32 %add4, %e6 601 %add6 = add i32 %add5, %e7 602 ret i32 %add6 603} 604 605define i32 @explode_16xi32(<16 x i32> %v) { 606; RV32-LABEL: explode_16xi32: 607; RV32: # %bb.0: 608; RV32-NEXT: addi sp, sp, -128 609; RV32-NEXT: .cfi_def_cfa_offset 128 610; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill 611; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill 612; RV32-NEXT: sw s2, 116(sp) # 4-byte Folded Spill 613; RV32-NEXT: .cfi_offset ra, -4 614; RV32-NEXT: .cfi_offset s0, -8 615; RV32-NEXT: .cfi_offset s2, -12 616; RV32-NEXT: addi s0, sp, 128 617; RV32-NEXT: .cfi_def_cfa s0, 0 618; RV32-NEXT: andi sp, sp, -64 619; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 620; RV32-NEXT: vmv.x.s a0, v8 621; RV32-NEXT: vslidedown.vi v12, v8, 1 622; RV32-NEXT: vmv.x.s a1, v12 623; RV32-NEXT: vslidedown.vi v12, v8, 2 624; RV32-NEXT: vmv.x.s a2, v12 625; RV32-NEXT: vslidedown.vi v12, v8, 3 626; RV32-NEXT: vmv.x.s a3, v12 627; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma 628; RV32-NEXT: vslidedown.vi v12, v8, 4 629; RV32-NEXT: vmv.x.s a4, v12 630; RV32-NEXT: vslidedown.vi v12, v8, 5 631; RV32-NEXT: vmv.x.s a5, v12 632; RV32-NEXT: vslidedown.vi v12, v8, 6 633; RV32-NEXT: vmv.x.s a6, v12 634; RV32-NEXT: vslidedown.vi v12, v8, 7 635; RV32-NEXT: vmv.x.s a7, v12 636; RV32-NEXT: mv t0, sp 637; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 638; RV32-NEXT: vse32.v v8, (t0) 639; RV32-NEXT: lw t0, 32(sp) 640; RV32-NEXT: lw t1, 36(sp) 641; RV32-NEXT: lw t2, 40(sp) 642; RV32-NEXT: lw t3, 44(sp) 643; RV32-NEXT: lw t4, 48(sp) 644; RV32-NEXT: lw t5, 52(sp) 645; RV32-NEXT: lw t6, 56(sp) 646; RV32-NEXT: lw s2, 60(sp) 647; RV32-NEXT: xor a0, a0, a1 648; RV32-NEXT: add a2, a2, a3 649; RV32-NEXT: add a0, a0, a2 650; RV32-NEXT: add a4, a4, a5 651; RV32-NEXT: add a4, a4, a6 652; RV32-NEXT: add a0, a0, a4 653; RV32-NEXT: add a7, a7, t0 654; RV32-NEXT: add a0, a0, a7 655; RV32-NEXT: add t1, t1, t2 656; RV32-NEXT: add t1, t1, t3 657; RV32-NEXT: add a0, a0, t1 658; RV32-NEXT: add t4, t4, t5 659; RV32-NEXT: add t4, t4, t6 660; RV32-NEXT: add t4, t4, s2 661; RV32-NEXT: add a0, a0, t4 662; RV32-NEXT: addi sp, s0, -128 663; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload 664; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload 665; RV32-NEXT: lw s2, 116(sp) # 4-byte Folded Reload 666; RV32-NEXT: addi sp, sp, 128 667; RV32-NEXT: ret 668; 669; RV64-LABEL: explode_16xi32: 670; RV64: # %bb.0: 671; RV64-NEXT: addi sp, sp, -128 672; RV64-NEXT: .cfi_def_cfa_offset 128 673; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill 674; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill 675; RV64-NEXT: sd s2, 104(sp) # 8-byte Folded Spill 676; RV64-NEXT: .cfi_offset ra, -8 677; RV64-NEXT: .cfi_offset s0, -16 678; RV64-NEXT: .cfi_offset s2, -24 679; RV64-NEXT: addi s0, sp, 128 680; RV64-NEXT: .cfi_def_cfa s0, 0 681; RV64-NEXT: andi sp, sp, -64 682; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma 683; RV64-NEXT: vmv.x.s a0, v8 684; RV64-NEXT: vslidedown.vi v12, v8, 1 685; RV64-NEXT: vmv.x.s a1, v12 686; RV64-NEXT: vslidedown.vi v12, v8, 2 687; RV64-NEXT: vmv.x.s a2, v12 688; RV64-NEXT: vslidedown.vi v12, v8, 3 689; RV64-NEXT: vmv.x.s a3, v12 690; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma 691; RV64-NEXT: vslidedown.vi v12, v8, 4 692; RV64-NEXT: vmv.x.s a4, v12 693; RV64-NEXT: vslidedown.vi v12, v8, 5 694; RV64-NEXT: vmv.x.s a5, v12 695; RV64-NEXT: vslidedown.vi v12, v8, 6 696; RV64-NEXT: vmv.x.s a6, v12 697; RV64-NEXT: vslidedown.vi v12, v8, 7 698; RV64-NEXT: vmv.x.s a7, v12 699; RV64-NEXT: mv t0, sp 700; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma 701; RV64-NEXT: vse32.v v8, (t0) 702; RV64-NEXT: lw t0, 32(sp) 703; RV64-NEXT: lw t1, 36(sp) 704; RV64-NEXT: lw t2, 40(sp) 705; RV64-NEXT: lw t3, 44(sp) 706; RV64-NEXT: lw t4, 48(sp) 707; RV64-NEXT: lw t5, 52(sp) 708; RV64-NEXT: lw t6, 56(sp) 709; RV64-NEXT: lw s2, 60(sp) 710; RV64-NEXT: xor a0, a0, a1 711; RV64-NEXT: add a2, a2, a3 712; RV64-NEXT: add a0, a0, a2 713; RV64-NEXT: add a4, a4, a5 714; RV64-NEXT: add a4, a4, a6 715; RV64-NEXT: add a0, a0, a4 716; RV64-NEXT: add a7, a7, t0 717; RV64-NEXT: add a0, a0, a7 718; RV64-NEXT: add t1, t1, t2 719; RV64-NEXT: add t1, t1, t3 720; RV64-NEXT: add a0, a0, t1 721; RV64-NEXT: add t4, t4, t5 722; RV64-NEXT: add t4, t4, t6 723; RV64-NEXT: add t4, t4, s2 724; RV64-NEXT: addw a0, a0, t4 725; RV64-NEXT: addi sp, s0, -128 726; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload 727; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload 728; RV64-NEXT: ld s2, 104(sp) # 8-byte Folded Reload 729; RV64-NEXT: addi sp, sp, 128 730; RV64-NEXT: ret 731 %e0 = extractelement <16 x i32> %v, i32 0 732 %e1 = extractelement <16 x i32> %v, i32 1 733 %e2 = extractelement <16 x i32> %v, i32 2 734 %e3 = extractelement <16 x i32> %v, i32 3 735 %e4 = extractelement <16 x i32> %v, i32 4 736 %e5 = extractelement <16 x i32> %v, i32 5 737 %e6 = extractelement <16 x i32> %v, i32 6 738 %e7 = extractelement <16 x i32> %v, i32 7 739 %e8 = extractelement <16 x i32> %v, i32 8 740 %e9 = extractelement <16 x i32> %v, i32 9 741 %e10 = extractelement <16 x i32> %v, i32 10 742 %e11 = extractelement <16 x i32> %v, i32 11 743 %e12 = extractelement <16 x i32> %v, i32 12 744 %e13 = extractelement <16 x i32> %v, i32 13 745 %e14 = extractelement <16 x i32> %v, i32 14 746 %e15 = extractelement <16 x i32> %v, i32 15 747 %add0 = xor i32 %e0, %e1 748 %add1 = add i32 %add0, %e2 749 %add2 = add i32 %add1, %e3 750 %add3 = add i32 %add2, %e4 751 %add4 = add i32 %add3, %e5 752 %add5 = add i32 %add4, %e6 753 %add6 = add i32 %add5, %e7 754 %add7 = add i32 %add6, %e8 755 %add8 = add i32 %add7, %e9 756 %add9 = add i32 %add8, %e10 757 %add10 = add i32 %add9, %e11 758 %add11 = add i32 %add10, %e12 759 %add12 = add i32 %add11, %e13 760 %add13 = add i32 %add12, %e14 761 %add14 = add i32 %add13, %e15 762 ret i32 %add14 763} 764 765define i64 @explode_2xi64(<2 x i64> %v) { 766; RV32-LABEL: explode_2xi64: 767; RV32: # %bb.0: 768; RV32-NEXT: li a0, 32 769; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 770; RV32-NEXT: vsrl.vx v9, v8, a0 771; RV32-NEXT: vmv.x.s a1, v9 772; RV32-NEXT: vmv.x.s a2, v8 773; RV32-NEXT: vslidedown.vi v8, v8, 1 774; RV32-NEXT: vsrl.vx v9, v8, a0 775; RV32-NEXT: vmv.x.s a0, v9 776; RV32-NEXT: vmv.x.s a3, v8 777; RV32-NEXT: xor a1, a1, a0 778; RV32-NEXT: xor a0, a2, a3 779; RV32-NEXT: ret 780; 781; RV64-LABEL: explode_2xi64: 782; RV64: # %bb.0: 783; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 784; RV64-NEXT: vmv.x.s a0, v8 785; RV64-NEXT: vslidedown.vi v8, v8, 1 786; RV64-NEXT: vmv.x.s a1, v8 787; RV64-NEXT: xor a0, a0, a1 788; RV64-NEXT: ret 789 %e0 = extractelement <2 x i64> %v, i32 0 790 %e1 = extractelement <2 x i64> %v, i32 1 791 %add0 = xor i64 %e0, %e1 792 ret i64 %add0 793} 794 795define i64 @explode_4xi64(<4 x i64> %v) { 796; RV32-LABEL: explode_4xi64: 797; RV32: # %bb.0: 798; RV32-NEXT: li a0, 32 799; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma 800; RV32-NEXT: vsrl.vx v10, v8, a0 801; RV32-NEXT: vmv.x.s a1, v10 802; RV32-NEXT: vmv.x.s a2, v8 803; RV32-NEXT: vslidedown.vi v10, v8, 1 804; RV32-NEXT: vsrl.vx v12, v10, a0 805; RV32-NEXT: vmv.x.s a3, v12 806; RV32-NEXT: vmv.x.s a4, v10 807; RV32-NEXT: vslidedown.vi v10, v8, 2 808; RV32-NEXT: vsrl.vx v12, v10, a0 809; RV32-NEXT: vmv.x.s a5, v12 810; RV32-NEXT: vmv.x.s a6, v10 811; RV32-NEXT: vslidedown.vi v8, v8, 3 812; RV32-NEXT: vsrl.vx v10, v8, a0 813; RV32-NEXT: vmv.x.s a0, v10 814; RV32-NEXT: vmv.x.s a7, v8 815; RV32-NEXT: xor a1, a1, a3 816; RV32-NEXT: xor a2, a2, a4 817; RV32-NEXT: add a6, a2, a6 818; RV32-NEXT: sltu a2, a6, a2 819; RV32-NEXT: add a1, a1, a5 820; RV32-NEXT: add a1, a1, a2 821; RV32-NEXT: add a1, a1, a0 822; RV32-NEXT: add a0, a6, a7 823; RV32-NEXT: sltu a2, a0, a6 824; RV32-NEXT: add a1, a1, a2 825; RV32-NEXT: ret 826; 827; RV64-LABEL: explode_4xi64: 828; RV64: # %bb.0: 829; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 830; RV64-NEXT: vmv.x.s a0, v8 831; RV64-NEXT: vslidedown.vi v10, v8, 1 832; RV64-NEXT: vmv.x.s a1, v10 833; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma 834; RV64-NEXT: vslidedown.vi v10, v8, 2 835; RV64-NEXT: vmv.x.s a2, v10 836; RV64-NEXT: vslidedown.vi v8, v8, 3 837; RV64-NEXT: vmv.x.s a3, v8 838; RV64-NEXT: xor a0, a0, a1 839; RV64-NEXT: add a2, a2, a3 840; RV64-NEXT: add a0, a0, a2 841; RV64-NEXT: ret 842 %e0 = extractelement <4 x i64> %v, i32 0 843 %e1 = extractelement <4 x i64> %v, i32 1 844 %e2 = extractelement <4 x i64> %v, i32 2 845 %e3 = extractelement <4 x i64> %v, i32 3 846 %add0 = xor i64 %e0, %e1 847 %add1 = add i64 %add0, %e2 848 %add2 = add i64 %add1, %e3 849 ret i64 %add2 850} 851 852 853define i64 @explode_8xi64(<8 x i64> %v) { 854; RV32-LABEL: explode_8xi64: 855; RV32: # %bb.0: 856; RV32-NEXT: addi sp, sp, -16 857; RV32-NEXT: .cfi_def_cfa_offset 16 858; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill 859; RV32-NEXT: .cfi_offset s0, -4 860; RV32-NEXT: li a0, 32 861; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma 862; RV32-NEXT: vsrl.vx v12, v8, a0 863; RV32-NEXT: vmv.x.s a1, v12 864; RV32-NEXT: vmv.x.s a2, v8 865; RV32-NEXT: vslidedown.vi v12, v8, 1 866; RV32-NEXT: vsrl.vx v16, v12, a0 867; RV32-NEXT: vmv.x.s a3, v16 868; RV32-NEXT: vmv.x.s a4, v12 869; RV32-NEXT: vslidedown.vi v12, v8, 2 870; RV32-NEXT: vsrl.vx v16, v12, a0 871; RV32-NEXT: vmv.x.s a5, v16 872; RV32-NEXT: vmv.x.s a6, v12 873; RV32-NEXT: vslidedown.vi v12, v8, 3 874; RV32-NEXT: vsrl.vx v16, v12, a0 875; RV32-NEXT: vmv.x.s a7, v16 876; RV32-NEXT: vmv.x.s t0, v12 877; RV32-NEXT: vslidedown.vi v12, v8, 4 878; RV32-NEXT: vsrl.vx v16, v12, a0 879; RV32-NEXT: vmv.x.s t1, v16 880; RV32-NEXT: vmv.x.s t2, v12 881; RV32-NEXT: vslidedown.vi v12, v8, 5 882; RV32-NEXT: vsrl.vx v16, v12, a0 883; RV32-NEXT: vmv.x.s t3, v16 884; RV32-NEXT: vmv.x.s t4, v12 885; RV32-NEXT: vslidedown.vi v12, v8, 6 886; RV32-NEXT: vsrl.vx v16, v12, a0 887; RV32-NEXT: vmv.x.s t5, v16 888; RV32-NEXT: vmv.x.s t6, v12 889; RV32-NEXT: vslidedown.vi v8, v8, 7 890; RV32-NEXT: vsrl.vx v12, v8, a0 891; RV32-NEXT: vmv.x.s a0, v12 892; RV32-NEXT: vmv.x.s s0, v8 893; RV32-NEXT: xor a1, a1, a3 894; RV32-NEXT: xor a2, a2, a4 895; RV32-NEXT: add a6, a2, a6 896; RV32-NEXT: sltu a2, a6, a2 897; RV32-NEXT: add a1, a1, a5 898; RV32-NEXT: add a1, a1, a2 899; RV32-NEXT: add a1, a1, a7 900; RV32-NEXT: add t0, a6, t0 901; RV32-NEXT: sltu a2, t0, a6 902; RV32-NEXT: add a2, a2, t1 903; RV32-NEXT: add a1, a1, a2 904; RV32-NEXT: add t2, t0, t2 905; RV32-NEXT: sltu a2, t2, t0 906; RV32-NEXT: add a2, a2, t3 907; RV32-NEXT: add a1, a1, a2 908; RV32-NEXT: add t4, t2, t4 909; RV32-NEXT: sltu a2, t4, t2 910; RV32-NEXT: add a2, a2, t5 911; RV32-NEXT: add a1, a1, a2 912; RV32-NEXT: add t6, t4, t6 913; RV32-NEXT: sltu a2, t6, t4 914; RV32-NEXT: add a0, a2, a0 915; RV32-NEXT: add a1, a1, a0 916; RV32-NEXT: add a0, t6, s0 917; RV32-NEXT: sltu a2, a0, t6 918; RV32-NEXT: add a1, a1, a2 919; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload 920; RV32-NEXT: addi sp, sp, 16 921; RV32-NEXT: ret 922; 923; RV64-LABEL: explode_8xi64: 924; RV64: # %bb.0: 925; RV64-NEXT: addi sp, sp, -128 926; RV64-NEXT: .cfi_def_cfa_offset 128 927; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill 928; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill 929; RV64-NEXT: .cfi_offset ra, -8 930; RV64-NEXT: .cfi_offset s0, -16 931; RV64-NEXT: addi s0, sp, 128 932; RV64-NEXT: .cfi_def_cfa s0, 0 933; RV64-NEXT: andi sp, sp, -64 934; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 935; RV64-NEXT: vmv.x.s a0, v8 936; RV64-NEXT: vslidedown.vi v12, v8, 1 937; RV64-NEXT: vmv.x.s a1, v12 938; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma 939; RV64-NEXT: vslidedown.vi v12, v8, 2 940; RV64-NEXT: vmv.x.s a2, v12 941; RV64-NEXT: vslidedown.vi v12, v8, 3 942; RV64-NEXT: vmv.x.s a3, v12 943; RV64-NEXT: mv a4, sp 944; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 945; RV64-NEXT: vse64.v v8, (a4) 946; RV64-NEXT: ld a4, 32(sp) 947; RV64-NEXT: ld a5, 40(sp) 948; RV64-NEXT: ld a6, 48(sp) 949; RV64-NEXT: ld a7, 56(sp) 950; RV64-NEXT: xor a0, a0, a1 951; RV64-NEXT: add a2, a2, a3 952; RV64-NEXT: add a0, a0, a2 953; RV64-NEXT: add a0, a0, a4 954; RV64-NEXT: add a5, a5, a6 955; RV64-NEXT: add a0, a0, a5 956; RV64-NEXT: add a0, a0, a7 957; RV64-NEXT: addi sp, s0, -128 958; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload 959; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload 960; RV64-NEXT: addi sp, sp, 128 961; RV64-NEXT: ret 962 %e0 = extractelement <8 x i64> %v, i32 0 963 %e1 = extractelement <8 x i64> %v, i32 1 964 %e2 = extractelement <8 x i64> %v, i32 2 965 %e3 = extractelement <8 x i64> %v, i32 3 966 %e4 = extractelement <8 x i64> %v, i32 4 967 %e5 = extractelement <8 x i64> %v, i32 5 968 %e6 = extractelement <8 x i64> %v, i32 6 969 %e7 = extractelement <8 x i64> %v, i32 7 970 %add0 = xor i64 %e0, %e1 971 %add1 = add i64 %add0, %e2 972 %add2 = add i64 %add1, %e3 973 %add3 = add i64 %add2, %e4 974 %add4 = add i64 %add3, %e5 975 %add5 = add i64 %add4, %e6 976 %add6 = add i64 %add5, %e7 977 ret i64 %add6 978} 979 980define i64 @explode_16xi64(<16 x i64> %v) { 981; RV32-LABEL: explode_16xi64: 982; RV32: # %bb.0: 983; RV32-NEXT: addi sp, sp, -64 984; RV32-NEXT: .cfi_def_cfa_offset 64 985; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill 986; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill 987; RV32-NEXT: sw s1, 52(sp) # 4-byte Folded Spill 988; RV32-NEXT: sw s2, 48(sp) # 4-byte Folded Spill 989; RV32-NEXT: sw s3, 44(sp) # 4-byte Folded Spill 990; RV32-NEXT: sw s4, 40(sp) # 4-byte Folded Spill 991; RV32-NEXT: sw s5, 36(sp) # 4-byte Folded Spill 992; RV32-NEXT: sw s6, 32(sp) # 4-byte Folded Spill 993; RV32-NEXT: sw s7, 28(sp) # 4-byte Folded Spill 994; RV32-NEXT: sw s8, 24(sp) # 4-byte Folded Spill 995; RV32-NEXT: sw s9, 20(sp) # 4-byte Folded Spill 996; RV32-NEXT: sw s10, 16(sp) # 4-byte Folded Spill 997; RV32-NEXT: sw s11, 12(sp) # 4-byte Folded Spill 998; RV32-NEXT: .cfi_offset ra, -4 999; RV32-NEXT: .cfi_offset s0, -8 1000; RV32-NEXT: .cfi_offset s1, -12 1001; RV32-NEXT: .cfi_offset s2, -16 1002; RV32-NEXT: .cfi_offset s3, -20 1003; RV32-NEXT: .cfi_offset s4, -24 1004; RV32-NEXT: .cfi_offset s5, -28 1005; RV32-NEXT: .cfi_offset s6, -32 1006; RV32-NEXT: .cfi_offset s7, -36 1007; RV32-NEXT: .cfi_offset s8, -40 1008; RV32-NEXT: .cfi_offset s9, -44 1009; RV32-NEXT: .cfi_offset s10, -48 1010; RV32-NEXT: .cfi_offset s11, -52 1011; RV32-NEXT: li a0, 32 1012; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma 1013; RV32-NEXT: vsrl.vx v16, v8, a0 1014; RV32-NEXT: vmv.x.s a1, v16 1015; RV32-NEXT: sw a1, 8(sp) # 4-byte Folded Spill 1016; RV32-NEXT: vmv.x.s a2, v8 1017; RV32-NEXT: vslidedown.vi v16, v8, 1 1018; RV32-NEXT: vsrl.vx v24, v16, a0 1019; RV32-NEXT: vmv.x.s a3, v24 1020; RV32-NEXT: vmv.x.s a4, v16 1021; RV32-NEXT: vslidedown.vi v16, v8, 2 1022; RV32-NEXT: vsrl.vx v24, v16, a0 1023; RV32-NEXT: vmv.x.s a5, v24 1024; RV32-NEXT: vmv.x.s a6, v16 1025; RV32-NEXT: vslidedown.vi v16, v8, 3 1026; RV32-NEXT: vsrl.vx v24, v16, a0 1027; RV32-NEXT: vmv.x.s a7, v24 1028; RV32-NEXT: vmv.x.s t0, v16 1029; RV32-NEXT: vslidedown.vi v16, v8, 4 1030; RV32-NEXT: vsrl.vx v24, v16, a0 1031; RV32-NEXT: vmv.x.s s3, v24 1032; RV32-NEXT: vmv.x.s t1, v16 1033; RV32-NEXT: vslidedown.vi v16, v8, 5 1034; RV32-NEXT: vsrl.vx v24, v16, a0 1035; RV32-NEXT: vmv.x.s s4, v24 1036; RV32-NEXT: vmv.x.s t2, v16 1037; RV32-NEXT: vslidedown.vi v16, v8, 6 1038; RV32-NEXT: vsrl.vx v24, v16, a0 1039; RV32-NEXT: vmv.x.s s5, v24 1040; RV32-NEXT: vmv.x.s t3, v16 1041; RV32-NEXT: vslidedown.vi v16, v8, 7 1042; RV32-NEXT: vsrl.vx v24, v16, a0 1043; RV32-NEXT: vmv.x.s s6, v24 1044; RV32-NEXT: vmv.x.s t4, v16 1045; RV32-NEXT: vslidedown.vi v16, v8, 8 1046; RV32-NEXT: vsrl.vx v24, v16, a0 1047; RV32-NEXT: vmv.x.s s7, v24 1048; RV32-NEXT: vmv.x.s t5, v16 1049; RV32-NEXT: vslidedown.vi v16, v8, 9 1050; RV32-NEXT: vsrl.vx v24, v16, a0 1051; RV32-NEXT: vmv.x.s s8, v24 1052; RV32-NEXT: vmv.x.s t6, v16 1053; RV32-NEXT: vslidedown.vi v16, v8, 10 1054; RV32-NEXT: vsrl.vx v24, v16, a0 1055; RV32-NEXT: vmv.x.s s9, v24 1056; RV32-NEXT: vmv.x.s s0, v16 1057; RV32-NEXT: vslidedown.vi v16, v8, 11 1058; RV32-NEXT: vsrl.vx v24, v16, a0 1059; RV32-NEXT: vmv.x.s s10, v24 1060; RV32-NEXT: vmv.x.s s1, v16 1061; RV32-NEXT: vslidedown.vi v16, v8, 12 1062; RV32-NEXT: vsrl.vx v24, v16, a0 1063; RV32-NEXT: vmv.x.s s11, v24 1064; RV32-NEXT: vmv.x.s s2, v16 1065; RV32-NEXT: vslidedown.vi v24, v8, 13 1066; RV32-NEXT: vsrl.vx v16, v24, a0 1067; RV32-NEXT: vmv.x.s ra, v16 1068; RV32-NEXT: vslidedown.vi v16, v8, 14 1069; RV32-NEXT: vsrl.vx v0, v16, a0 1070; RV32-NEXT: vslidedown.vi v8, v8, 15 1071; RV32-NEXT: vmv.x.s a1, v24 1072; RV32-NEXT: vsrl.vx v24, v8, a0 1073; RV32-NEXT: lw a0, 8(sp) # 4-byte Folded Reload 1074; RV32-NEXT: xor a0, a0, a3 1075; RV32-NEXT: xor a2, a2, a4 1076; RV32-NEXT: add a0, a0, a5 1077; RV32-NEXT: add a6, a2, a6 1078; RV32-NEXT: sltu a2, a6, a2 1079; RV32-NEXT: add a0, a0, a2 1080; RV32-NEXT: add a0, a0, a7 1081; RV32-NEXT: add t0, a6, t0 1082; RV32-NEXT: sltu a2, t0, a6 1083; RV32-NEXT: add a2, a2, s3 1084; RV32-NEXT: add a0, a0, a2 1085; RV32-NEXT: add t1, t0, t1 1086; RV32-NEXT: sltu a2, t1, t0 1087; RV32-NEXT: add a2, a2, s4 1088; RV32-NEXT: add a0, a0, a2 1089; RV32-NEXT: add t2, t1, t2 1090; RV32-NEXT: sltu a2, t2, t1 1091; RV32-NEXT: add a2, a2, s5 1092; RV32-NEXT: add a0, a0, a2 1093; RV32-NEXT: add t3, t2, t3 1094; RV32-NEXT: sltu a2, t3, t2 1095; RV32-NEXT: add a2, a2, s6 1096; RV32-NEXT: add a0, a0, a2 1097; RV32-NEXT: add t4, t3, t4 1098; RV32-NEXT: sltu a2, t4, t3 1099; RV32-NEXT: add a2, a2, s7 1100; RV32-NEXT: add a0, a0, a2 1101; RV32-NEXT: add t5, t4, t5 1102; RV32-NEXT: sltu a2, t5, t4 1103; RV32-NEXT: add a2, a2, s8 1104; RV32-NEXT: add a0, a0, a2 1105; RV32-NEXT: add t6, t5, t6 1106; RV32-NEXT: sltu a2, t6, t5 1107; RV32-NEXT: add a2, a2, s9 1108; RV32-NEXT: add a0, a0, a2 1109; RV32-NEXT: add s0, t6, s0 1110; RV32-NEXT: sltu a2, s0, t6 1111; RV32-NEXT: add a2, a2, s10 1112; RV32-NEXT: add a0, a0, a2 1113; RV32-NEXT: add s1, s0, s1 1114; RV32-NEXT: sltu a2, s1, s0 1115; RV32-NEXT: add a2, a2, s11 1116; RV32-NEXT: add a0, a0, a2 1117; RV32-NEXT: add s2, s1, s2 1118; RV32-NEXT: sltu a2, s2, s1 1119; RV32-NEXT: add a2, a2, ra 1120; RV32-NEXT: add a0, a0, a2 1121; RV32-NEXT: vmv.x.s a2, v0 1122; RV32-NEXT: add a1, s2, a1 1123; RV32-NEXT: sltu a3, a1, s2 1124; RV32-NEXT: add a2, a3, a2 1125; RV32-NEXT: vmv.x.s a3, v16 1126; RV32-NEXT: add a0, a0, a2 1127; RV32-NEXT: vmv.x.s a2, v24 1128; RV32-NEXT: add a3, a1, a3 1129; RV32-NEXT: sltu a1, a3, a1 1130; RV32-NEXT: add a1, a1, a2 1131; RV32-NEXT: add a1, a0, a1 1132; RV32-NEXT: vmv.x.s a0, v8 1133; RV32-NEXT: add a0, a3, a0 1134; RV32-NEXT: sltu a2, a0, a3 1135; RV32-NEXT: add a1, a1, a2 1136; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload 1137; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload 1138; RV32-NEXT: lw s1, 52(sp) # 4-byte Folded Reload 1139; RV32-NEXT: lw s2, 48(sp) # 4-byte Folded Reload 1140; RV32-NEXT: lw s3, 44(sp) # 4-byte Folded Reload 1141; RV32-NEXT: lw s4, 40(sp) # 4-byte Folded Reload 1142; RV32-NEXT: lw s5, 36(sp) # 4-byte Folded Reload 1143; RV32-NEXT: lw s6, 32(sp) # 4-byte Folded Reload 1144; RV32-NEXT: lw s7, 28(sp) # 4-byte Folded Reload 1145; RV32-NEXT: lw s8, 24(sp) # 4-byte Folded Reload 1146; RV32-NEXT: lw s9, 20(sp) # 4-byte Folded Reload 1147; RV32-NEXT: lw s10, 16(sp) # 4-byte Folded Reload 1148; RV32-NEXT: lw s11, 12(sp) # 4-byte Folded Reload 1149; RV32-NEXT: addi sp, sp, 64 1150; RV32-NEXT: ret 1151; 1152; RV64-LABEL: explode_16xi64: 1153; RV64: # %bb.0: 1154; RV64-NEXT: addi sp, sp, -256 1155; RV64-NEXT: .cfi_def_cfa_offset 256 1156; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill 1157; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill 1158; RV64-NEXT: sd s2, 232(sp) # 8-byte Folded Spill 1159; RV64-NEXT: .cfi_offset ra, -8 1160; RV64-NEXT: .cfi_offset s0, -16 1161; RV64-NEXT: .cfi_offset s2, -24 1162; RV64-NEXT: addi s0, sp, 256 1163; RV64-NEXT: .cfi_def_cfa s0, 0 1164; RV64-NEXT: andi sp, sp, -128 1165; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma 1166; RV64-NEXT: vmv.x.s a0, v8 1167; RV64-NEXT: vslidedown.vi v16, v8, 1 1168; RV64-NEXT: vmv.x.s a1, v16 1169; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma 1170; RV64-NEXT: vslidedown.vi v16, v8, 2 1171; RV64-NEXT: vmv.x.s a2, v16 1172; RV64-NEXT: vslidedown.vi v16, v8, 3 1173; RV64-NEXT: vmv.x.s a3, v16 1174; RV64-NEXT: mv a4, sp 1175; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1176; RV64-NEXT: vse64.v v8, (a4) 1177; RV64-NEXT: ld a4, 32(sp) 1178; RV64-NEXT: ld a5, 40(sp) 1179; RV64-NEXT: ld a6, 48(sp) 1180; RV64-NEXT: ld a7, 56(sp) 1181; RV64-NEXT: ld t0, 64(sp) 1182; RV64-NEXT: ld t1, 72(sp) 1183; RV64-NEXT: ld t2, 80(sp) 1184; RV64-NEXT: ld t3, 88(sp) 1185; RV64-NEXT: ld t4, 96(sp) 1186; RV64-NEXT: ld t5, 104(sp) 1187; RV64-NEXT: ld t6, 112(sp) 1188; RV64-NEXT: ld s2, 120(sp) 1189; RV64-NEXT: xor a0, a0, a1 1190; RV64-NEXT: add a2, a2, a3 1191; RV64-NEXT: add a0, a0, a2 1192; RV64-NEXT: add a0, a0, a4 1193; RV64-NEXT: add a5, a5, a6 1194; RV64-NEXT: add a0, a0, a5 1195; RV64-NEXT: add a7, a7, t0 1196; RV64-NEXT: add a7, a7, t1 1197; RV64-NEXT: add a0, a0, a7 1198; RV64-NEXT: add t2, t2, t3 1199; RV64-NEXT: add t2, t2, t4 1200; RV64-NEXT: add t2, t2, t5 1201; RV64-NEXT: add a0, a0, t2 1202; RV64-NEXT: add t6, t6, s2 1203; RV64-NEXT: add a0, a0, t6 1204; RV64-NEXT: addi sp, s0, -256 1205; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload 1206; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload 1207; RV64-NEXT: ld s2, 232(sp) # 8-byte Folded Reload 1208; RV64-NEXT: addi sp, sp, 256 1209; RV64-NEXT: ret 1210 %e0 = extractelement <16 x i64> %v, i32 0 1211 %e1 = extractelement <16 x i64> %v, i32 1 1212 %e2 = extractelement <16 x i64> %v, i32 2 1213 %e3 = extractelement <16 x i64> %v, i32 3 1214 %e4 = extractelement <16 x i64> %v, i32 4 1215 %e5 = extractelement <16 x i64> %v, i32 5 1216 %e6 = extractelement <16 x i64> %v, i32 6 1217 %e7 = extractelement <16 x i64> %v, i32 7 1218 %e8 = extractelement <16 x i64> %v, i32 8 1219 %e9 = extractelement <16 x i64> %v, i32 9 1220 %e10 = extractelement <16 x i64> %v, i32 10 1221 %e11 = extractelement <16 x i64> %v, i32 11 1222 %e12 = extractelement <16 x i64> %v, i32 12 1223 %e13 = extractelement <16 x i64> %v, i32 13 1224 %e14 = extractelement <16 x i64> %v, i32 14 1225 %e15 = extractelement <16 x i64> %v, i32 15 1226 %add0 = xor i64 %e0, %e1 1227 %add1 = add i64 %add0, %e2 1228 %add2 = add i64 %add1, %e3 1229 %add3 = add i64 %add2, %e4 1230 %add4 = add i64 %add3, %e5 1231 %add5 = add i64 %add4, %e6 1232 %add6 = add i64 %add5, %e7 1233 %add7 = add i64 %add6, %e8 1234 %add8 = add i64 %add7, %e9 1235 %add9 = add i64 %add8, %e10 1236 %add10 = add i64 %add9, %e11 1237 %add11 = add i64 %add10, %e12 1238 %add12 = add i64 %add11, %e13 1239 %add13 = add i64 %add12, %e14 1240 %add14 = add i64 %add13, %e15 1241 ret i64 %add14 1242} 1243