1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5define i8 @explode_2xi8(<2 x i8> %v) { 6; CHECK-LABEL: explode_2xi8: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 9; CHECK-NEXT: vmv.s.x v9, zero 10; CHECK-NEXT: vredxor.vs v8, v8, v9 11; CHECK-NEXT: vmv.x.s a0, v8 12; CHECK-NEXT: ret 13 %e0 = extractelement <2 x i8> %v, i32 0 14 %e1 = extractelement <2 x i8> %v, i32 1 15 %add0 = xor i8 %e0, %e1 16 ret i8 %add0 17} 18 19define i8 @explode_4xi8(<4 x i8> %v) { 20; CHECK-LABEL: explode_4xi8: 21; CHECK: # %bb.0: 22; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma 23; CHECK-NEXT: vslidedown.vi v9, v8, 2 24; CHECK-NEXT: vmv.x.s a0, v9 25; CHECK-NEXT: vslidedown.vi v9, v8, 3 26; CHECK-NEXT: vmv.x.s a1, v9 27; CHECK-NEXT: vmv.s.x v9, zero 28; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 29; CHECK-NEXT: vredxor.vs v8, v8, v9 30; CHECK-NEXT: vmv.x.s a2, v8 31; CHECK-NEXT: add a0, a0, a1 32; CHECK-NEXT: add a0, a2, a0 33; CHECK-NEXT: ret 34 %e0 = extractelement <4 x i8> %v, i32 0 35 %e1 = extractelement <4 x i8> %v, i32 1 36 %e2 = extractelement <4 x i8> %v, i32 2 37 %e3 = extractelement <4 x i8> %v, i32 3 38 %add0 = xor i8 %e0, %e1 39 %add1 = add i8 %add0, %e2 40 %add2 = add i8 %add1, %e3 41 ret i8 %add2 42} 43 44 45define i8 @explode_8xi8(<8 x i8> %v) { 46; CHECK-LABEL: explode_8xi8: 47; CHECK: # %bb.0: 48; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma 49; CHECK-NEXT: vslidedown.vi v9, v8, 2 50; CHECK-NEXT: vmv.x.s a0, v9 51; CHECK-NEXT: vslidedown.vi v9, v8, 3 52; CHECK-NEXT: vmv.x.s a1, v9 53; CHECK-NEXT: vslidedown.vi v9, v8, 4 54; CHECK-NEXT: vmv.x.s a2, v9 55; CHECK-NEXT: vslidedown.vi v9, v8, 5 56; CHECK-NEXT: vmv.x.s a3, v9 57; CHECK-NEXT: vslidedown.vi v9, v8, 6 58; CHECK-NEXT: vmv.x.s a4, v9 59; CHECK-NEXT: vslidedown.vi v9, v8, 7 60; CHECK-NEXT: vmv.x.s a5, v9 61; CHECK-NEXT: vmv.s.x v9, zero 62; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 63; CHECK-NEXT: vredxor.vs v8, v8, v9 64; CHECK-NEXT: vmv.x.s a6, v8 65; CHECK-NEXT: add a0, a0, a1 66; CHECK-NEXT: add a0, a6, a0 67; CHECK-NEXT: add a2, a2, a3 68; CHECK-NEXT: add a2, a2, a4 69; CHECK-NEXT: add a0, a0, a2 70; CHECK-NEXT: add a0, a0, a5 71; CHECK-NEXT: ret 72 %e0 = extractelement <8 x i8> %v, i32 0 73 %e1 = extractelement <8 x i8> %v, i32 1 74 %e2 = extractelement <8 x i8> %v, i32 2 75 %e3 = extractelement <8 x i8> %v, i32 3 76 %e4 = extractelement <8 x i8> %v, i32 4 77 %e5 = extractelement <8 x i8> %v, i32 5 78 %e6 = extractelement <8 x i8> %v, i32 6 79 %e7 = extractelement <8 x i8> %v, i32 7 80 %add0 = xor i8 %e0, %e1 81 %add1 = add i8 %add0, %e2 82 %add2 = add i8 %add1, %e3 83 %add3 = add i8 %add2, %e4 84 %add4 = add i8 %add3, %e5 85 %add5 = add i8 %add4, %e6 86 %add6 = add i8 %add5, %e7 87 ret i8 %add6 88} 89 90define i8 @explode_16xi8(<16 x i8> %v) { 91; CHECK-LABEL: explode_16xi8: 92; CHECK: # %bb.0: 93; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 94; CHECK-NEXT: vslidedown.vi v9, v8, 2 95; CHECK-NEXT: vmv.x.s a0, v9 96; CHECK-NEXT: vslidedown.vi v9, v8, 3 97; CHECK-NEXT: vmv.x.s a1, v9 98; CHECK-NEXT: vslidedown.vi v9, v8, 4 99; CHECK-NEXT: vmv.x.s a2, v9 100; CHECK-NEXT: vslidedown.vi v9, v8, 5 101; CHECK-NEXT: vmv.x.s a3, v9 102; CHECK-NEXT: vslidedown.vi v9, v8, 6 103; CHECK-NEXT: vmv.x.s a4, v9 104; CHECK-NEXT: vslidedown.vi v9, v8, 7 105; CHECK-NEXT: vmv.x.s a5, v9 106; CHECK-NEXT: vslidedown.vi v9, v8, 8 107; CHECK-NEXT: vmv.x.s a6, v9 108; CHECK-NEXT: vslidedown.vi v9, v8, 9 109; CHECK-NEXT: vmv.x.s a7, v9 110; CHECK-NEXT: vslidedown.vi v9, v8, 10 111; CHECK-NEXT: vmv.x.s t0, v9 112; CHECK-NEXT: vslidedown.vi v9, v8, 11 113; CHECK-NEXT: vmv.x.s t1, v9 114; CHECK-NEXT: vslidedown.vi v9, v8, 12 115; CHECK-NEXT: vmv.x.s t2, v9 116; CHECK-NEXT: vslidedown.vi v9, v8, 13 117; CHECK-NEXT: vmv.x.s t3, v9 118; CHECK-NEXT: vslidedown.vi v9, v8, 14 119; CHECK-NEXT: vmv.x.s t4, v9 120; CHECK-NEXT: vslidedown.vi v9, v8, 15 121; CHECK-NEXT: vmv.x.s t5, v9 122; CHECK-NEXT: vmv.s.x v9, zero 123; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 124; CHECK-NEXT: vredxor.vs v8, v8, v9 125; CHECK-NEXT: vmv.x.s t6, v8 126; CHECK-NEXT: add a0, a0, a1 127; CHECK-NEXT: add a0, t6, a0 128; CHECK-NEXT: add a2, a2, a3 129; CHECK-NEXT: add a2, a2, a4 130; CHECK-NEXT: add a0, a0, a2 131; CHECK-NEXT: add a5, a5, a6 132; CHECK-NEXT: add a5, a5, a7 133; CHECK-NEXT: add a5, a5, t0 134; CHECK-NEXT: add a0, a0, a5 135; CHECK-NEXT: add t1, t1, t2 136; CHECK-NEXT: add t1, t1, t3 137; CHECK-NEXT: add t1, t1, t4 138; CHECK-NEXT: add t1, t1, t5 139; CHECK-NEXT: add a0, a0, t1 140; CHECK-NEXT: ret 141 %e0 = extractelement <16 x i8> %v, i32 0 142 %e1 = extractelement <16 x i8> %v, i32 1 143 %e2 = extractelement <16 x i8> %v, i32 2 144 %e3 = extractelement <16 x i8> %v, i32 3 145 %e4 = extractelement <16 x i8> %v, i32 4 146 %e5 = extractelement <16 x i8> %v, i32 5 147 %e6 = extractelement <16 x i8> %v, i32 6 148 %e7 = extractelement <16 x i8> %v, i32 7 149 %e8 = extractelement <16 x i8> %v, i32 8 150 %e9 = extractelement <16 x i8> %v, i32 9 151 %e10 = extractelement <16 x i8> %v, i32 10 152 %e11 = extractelement <16 x i8> %v, i32 11 153 %e12 = extractelement <16 x i8> %v, i32 12 154 %e13 = extractelement <16 x i8> %v, i32 13 155 %e14 = extractelement <16 x i8> %v, i32 14 156 %e15 = extractelement <16 x i8> %v, i32 15 157 %add0 = xor i8 %e0, %e1 158 %add1 = add i8 %add0, %e2 159 %add2 = add i8 %add1, %e3 160 %add3 = add i8 %add2, %e4 161 %add4 = add i8 %add3, %e5 162 %add5 = add i8 %add4, %e6 163 %add6 = add i8 %add5, %e7 164 %add7 = add i8 %add6, %e8 165 %add8 = add i8 %add7, %e9 166 %add9 = add i8 %add8, %e10 167 %add10 = add i8 %add9, %e11 168 %add11 = add i8 %add10, %e12 169 %add12 = add i8 %add11, %e13 170 %add13 = add i8 %add12, %e14 171 %add14 = add i8 %add13, %e15 172 ret i8 %add14 173} 174 175define i16 @explode_2xi16(<2 x i16> %v) { 176; CHECK-LABEL: explode_2xi16: 177; CHECK: # %bb.0: 178; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 179; CHECK-NEXT: vmv.s.x v9, zero 180; CHECK-NEXT: vredxor.vs v8, v8, v9 181; CHECK-NEXT: vmv.x.s a0, v8 182; CHECK-NEXT: ret 183 %e0 = extractelement <2 x i16> %v, i32 0 184 %e1 = extractelement <2 x i16> %v, i32 1 185 %add0 = xor i16 %e0, %e1 186 ret i16 %add0 187} 188 189define i16 @explode_4xi16(<4 x i16> %v) { 190; CHECK-LABEL: explode_4xi16: 191; CHECK: # %bb.0: 192; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma 193; CHECK-NEXT: vslidedown.vi v9, v8, 2 194; CHECK-NEXT: vmv.x.s a0, v9 195; CHECK-NEXT: vslidedown.vi v9, v8, 3 196; CHECK-NEXT: vmv.x.s a1, v9 197; CHECK-NEXT: vmv.s.x v9, zero 198; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 199; CHECK-NEXT: vredxor.vs v8, v8, v9 200; CHECK-NEXT: vmv.x.s a2, v8 201; CHECK-NEXT: add a0, a0, a1 202; CHECK-NEXT: add a0, a2, a0 203; CHECK-NEXT: ret 204 %e0 = extractelement <4 x i16> %v, i32 0 205 %e1 = extractelement <4 x i16> %v, i32 1 206 %e2 = extractelement <4 x i16> %v, i32 2 207 %e3 = extractelement <4 x i16> %v, i32 3 208 %add0 = xor i16 %e0, %e1 209 %add1 = add i16 %add0, %e2 210 %add2 = add i16 %add1, %e3 211 ret i16 %add2 212} 213 214 215define i16 @explode_8xi16(<8 x i16> %v) { 216; CHECK-LABEL: explode_8xi16: 217; CHECK: # %bb.0: 218; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 219; CHECK-NEXT: vslidedown.vi v9, v8, 2 220; CHECK-NEXT: vmv.x.s a0, v9 221; CHECK-NEXT: vslidedown.vi v9, v8, 3 222; CHECK-NEXT: vmv.x.s a1, v9 223; CHECK-NEXT: vslidedown.vi v9, v8, 4 224; CHECK-NEXT: vmv.x.s a2, v9 225; CHECK-NEXT: vslidedown.vi v9, v8, 5 226; CHECK-NEXT: vmv.x.s a3, v9 227; CHECK-NEXT: vslidedown.vi v9, v8, 6 228; CHECK-NEXT: vmv.x.s a4, v9 229; CHECK-NEXT: vslidedown.vi v9, v8, 7 230; CHECK-NEXT: vmv.x.s a5, v9 231; CHECK-NEXT: vmv.s.x v9, zero 232; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 233; CHECK-NEXT: vredxor.vs v8, v8, v9 234; CHECK-NEXT: vmv.x.s a6, v8 235; CHECK-NEXT: add a0, a0, a1 236; CHECK-NEXT: add a0, a6, a0 237; CHECK-NEXT: add a2, a2, a3 238; CHECK-NEXT: add a2, a2, a4 239; CHECK-NEXT: add a0, a0, a2 240; CHECK-NEXT: add a0, a0, a5 241; CHECK-NEXT: ret 242 %e0 = extractelement <8 x i16> %v, i32 0 243 %e1 = extractelement <8 x i16> %v, i32 1 244 %e2 = extractelement <8 x i16> %v, i32 2 245 %e3 = extractelement <8 x i16> %v, i32 3 246 %e4 = extractelement <8 x i16> %v, i32 4 247 %e5 = extractelement <8 x i16> %v, i32 5 248 %e6 = extractelement <8 x i16> %v, i32 6 249 %e7 = extractelement <8 x i16> %v, i32 7 250 %add0 = xor i16 %e0, %e1 251 %add1 = add i16 %add0, %e2 252 %add2 = add i16 %add1, %e3 253 %add3 = add i16 %add2, %e4 254 %add4 = add i16 %add3, %e5 255 %add5 = add i16 %add4, %e6 256 %add6 = add i16 %add5, %e7 257 ret i16 %add6 258} 259 260define i16 @explode_16xi16(<16 x i16> %v) { 261; CHECK-LABEL: explode_16xi16: 262; CHECK: # %bb.0: 263; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 264; CHECK-NEXT: vslidedown.vi v10, v8, 2 265; CHECK-NEXT: vmv.x.s a0, v10 266; CHECK-NEXT: vslidedown.vi v10, v8, 3 267; CHECK-NEXT: vmv.x.s a1, v10 268; CHECK-NEXT: vslidedown.vi v10, v8, 4 269; CHECK-NEXT: vmv.x.s a2, v10 270; CHECK-NEXT: vslidedown.vi v10, v8, 5 271; CHECK-NEXT: vmv.x.s a3, v10 272; CHECK-NEXT: vslidedown.vi v10, v8, 6 273; CHECK-NEXT: vmv.x.s a4, v10 274; CHECK-NEXT: vslidedown.vi v10, v8, 7 275; CHECK-NEXT: vmv.x.s a5, v10 276; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma 277; CHECK-NEXT: vslidedown.vi v10, v8, 8 278; CHECK-NEXT: vmv.x.s a6, v10 279; CHECK-NEXT: vslidedown.vi v10, v8, 9 280; CHECK-NEXT: vmv.x.s a7, v10 281; CHECK-NEXT: vslidedown.vi v10, v8, 10 282; CHECK-NEXT: vmv.x.s t0, v10 283; CHECK-NEXT: vslidedown.vi v10, v8, 11 284; CHECK-NEXT: vmv.x.s t1, v10 285; CHECK-NEXT: vslidedown.vi v10, v8, 12 286; CHECK-NEXT: vmv.x.s t2, v10 287; CHECK-NEXT: vslidedown.vi v10, v8, 13 288; CHECK-NEXT: vmv.x.s t3, v10 289; CHECK-NEXT: vslidedown.vi v10, v8, 14 290; CHECK-NEXT: vmv.x.s t4, v10 291; CHECK-NEXT: vslidedown.vi v10, v8, 15 292; CHECK-NEXT: vmv.x.s t5, v10 293; CHECK-NEXT: vmv.s.x v9, zero 294; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 295; CHECK-NEXT: vredxor.vs v8, v8, v9 296; CHECK-NEXT: vmv.x.s t6, v8 297; CHECK-NEXT: add a0, a0, a1 298; CHECK-NEXT: add a0, t6, a0 299; CHECK-NEXT: add a2, a2, a3 300; CHECK-NEXT: add a2, a2, a4 301; CHECK-NEXT: add a0, a0, a2 302; CHECK-NEXT: add a5, a5, a6 303; CHECK-NEXT: add a5, a5, a7 304; CHECK-NEXT: add a5, a5, t0 305; CHECK-NEXT: add a0, a0, a5 306; CHECK-NEXT: add t1, t1, t2 307; CHECK-NEXT: add t1, t1, t3 308; CHECK-NEXT: add t1, t1, t4 309; CHECK-NEXT: add t1, t1, t5 310; CHECK-NEXT: add a0, a0, t1 311; CHECK-NEXT: ret 312 %e0 = extractelement <16 x i16> %v, i32 0 313 %e1 = extractelement <16 x i16> %v, i32 1 314 %e2 = extractelement <16 x i16> %v, i32 2 315 %e3 = extractelement <16 x i16> %v, i32 3 316 %e4 = extractelement <16 x i16> %v, i32 4 317 %e5 = extractelement <16 x i16> %v, i32 5 318 %e6 = extractelement <16 x i16> %v, i32 6 319 %e7 = extractelement <16 x i16> %v, i32 7 320 %e8 = extractelement <16 x i16> %v, i32 8 321 %e9 = extractelement <16 x i16> %v, i32 9 322 %e10 = extractelement <16 x i16> %v, i32 10 323 %e11 = extractelement <16 x i16> %v, i32 11 324 %e12 = extractelement <16 x i16> %v, i32 12 325 %e13 = extractelement <16 x i16> %v, i32 13 326 %e14 = extractelement <16 x i16> %v, i32 14 327 %e15 = extractelement <16 x i16> %v, i32 15 328 %add0 = xor i16 %e0, %e1 329 %add1 = add i16 %add0, %e2 330 %add2 = add i16 %add1, %e3 331 %add3 = add i16 %add2, %e4 332 %add4 = add i16 %add3, %e5 333 %add5 = add i16 %add4, %e6 334 %add6 = add i16 %add5, %e7 335 %add7 = add i16 %add6, %e8 336 %add8 = add i16 %add7, %e9 337 %add9 = add i16 %add8, %e10 338 %add10 = add i16 %add9, %e11 339 %add11 = add i16 %add10, %e12 340 %add12 = add i16 %add11, %e13 341 %add13 = add i16 %add12, %e14 342 %add14 = add i16 %add13, %e15 343 ret i16 %add14 344} 345 346define i32 @explode_2xi32(<2 x i32> %v) { 347; CHECK-LABEL: explode_2xi32: 348; CHECK: # %bb.0: 349; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 350; CHECK-NEXT: vmv.s.x v9, zero 351; CHECK-NEXT: vredxor.vs v8, v8, v9 352; CHECK-NEXT: vmv.x.s a0, v8 353; CHECK-NEXT: ret 354 %e0 = extractelement <2 x i32> %v, i32 0 355 %e1 = extractelement <2 x i32> %v, i32 1 356 %add0 = xor i32 %e0, %e1 357 ret i32 %add0 358} 359 360define i32 @explode_4xi32(<4 x i32> %v) { 361; RV32-LABEL: explode_4xi32: 362; RV32: # %bb.0: 363; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 364; RV32-NEXT: vslidedown.vi v9, v8, 2 365; RV32-NEXT: vmv.x.s a0, v9 366; RV32-NEXT: vslidedown.vi v9, v8, 3 367; RV32-NEXT: vmv.x.s a1, v9 368; RV32-NEXT: vmv.s.x v9, zero 369; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 370; RV32-NEXT: vredxor.vs v8, v8, v9 371; RV32-NEXT: vmv.x.s a2, v8 372; RV32-NEXT: add a0, a0, a1 373; RV32-NEXT: add a0, a2, a0 374; RV32-NEXT: ret 375; 376; RV64-LABEL: explode_4xi32: 377; RV64: # %bb.0: 378; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma 379; RV64-NEXT: vslidedown.vi v9, v8, 2 380; RV64-NEXT: vmv.x.s a0, v9 381; RV64-NEXT: vslidedown.vi v9, v8, 3 382; RV64-NEXT: vmv.x.s a1, v9 383; RV64-NEXT: vmv.s.x v9, zero 384; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 385; RV64-NEXT: vredxor.vs v8, v8, v9 386; RV64-NEXT: vmv.x.s a2, v8 387; RV64-NEXT: add a0, a0, a1 388; RV64-NEXT: addw a0, a2, a0 389; RV64-NEXT: ret 390 %e0 = extractelement <4 x i32> %v, i32 0 391 %e1 = extractelement <4 x i32> %v, i32 1 392 %e2 = extractelement <4 x i32> %v, i32 2 393 %e3 = extractelement <4 x i32> %v, i32 3 394 %add0 = xor i32 %e0, %e1 395 %add1 = add i32 %add0, %e2 396 %add2 = add i32 %add1, %e3 397 ret i32 %add2 398} 399 400 401define i32 @explode_8xi32(<8 x i32> %v) { 402; RV32-LABEL: explode_8xi32: 403; RV32: # %bb.0: 404; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 405; RV32-NEXT: vslidedown.vi v10, v8, 2 406; RV32-NEXT: vmv.x.s a0, v10 407; RV32-NEXT: vslidedown.vi v10, v8, 3 408; RV32-NEXT: vmv.x.s a1, v10 409; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma 410; RV32-NEXT: vslidedown.vi v10, v8, 4 411; RV32-NEXT: vmv.x.s a2, v10 412; RV32-NEXT: vslidedown.vi v10, v8, 5 413; RV32-NEXT: vmv.x.s a3, v10 414; RV32-NEXT: vslidedown.vi v10, v8, 6 415; RV32-NEXT: vmv.x.s a4, v10 416; RV32-NEXT: vslidedown.vi v10, v8, 7 417; RV32-NEXT: vmv.x.s a5, v10 418; RV32-NEXT: vmv.s.x v9, zero 419; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 420; RV32-NEXT: vredxor.vs v8, v8, v9 421; RV32-NEXT: vmv.x.s a6, v8 422; RV32-NEXT: add a0, a0, a1 423; RV32-NEXT: add a0, a6, a0 424; RV32-NEXT: add a2, a2, a3 425; RV32-NEXT: add a2, a2, a4 426; RV32-NEXT: add a0, a0, a2 427; RV32-NEXT: add a0, a0, a5 428; RV32-NEXT: ret 429; 430; RV64-LABEL: explode_8xi32: 431; RV64: # %bb.0: 432; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma 433; RV64-NEXT: vslidedown.vi v10, v8, 2 434; RV64-NEXT: vmv.x.s a0, v10 435; RV64-NEXT: vslidedown.vi v10, v8, 3 436; RV64-NEXT: vmv.x.s a1, v10 437; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma 438; RV64-NEXT: vslidedown.vi v10, v8, 4 439; RV64-NEXT: vmv.x.s a2, v10 440; RV64-NEXT: vslidedown.vi v10, v8, 5 441; RV64-NEXT: vmv.x.s a3, v10 442; RV64-NEXT: vslidedown.vi v10, v8, 6 443; RV64-NEXT: vmv.x.s a4, v10 444; RV64-NEXT: vslidedown.vi v10, v8, 7 445; RV64-NEXT: vmv.x.s a5, v10 446; RV64-NEXT: vmv.s.x v9, zero 447; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 448; RV64-NEXT: vredxor.vs v8, v8, v9 449; RV64-NEXT: vmv.x.s a6, v8 450; RV64-NEXT: add a0, a0, a1 451; RV64-NEXT: add a0, a6, a0 452; RV64-NEXT: add a2, a2, a3 453; RV64-NEXT: add a2, a2, a4 454; RV64-NEXT: add a0, a0, a2 455; RV64-NEXT: addw a0, a0, a5 456; RV64-NEXT: ret 457 %e0 = extractelement <8 x i32> %v, i32 0 458 %e1 = extractelement <8 x i32> %v, i32 1 459 %e2 = extractelement <8 x i32> %v, i32 2 460 %e3 = extractelement <8 x i32> %v, i32 3 461 %e4 = extractelement <8 x i32> %v, i32 4 462 %e5 = extractelement <8 x i32> %v, i32 5 463 %e6 = extractelement <8 x i32> %v, i32 6 464 %e7 = extractelement <8 x i32> %v, i32 7 465 %add0 = xor i32 %e0, %e1 466 %add1 = add i32 %add0, %e2 467 %add2 = add i32 %add1, %e3 468 %add3 = add i32 %add2, %e4 469 %add4 = add i32 %add3, %e5 470 %add5 = add i32 %add4, %e6 471 %add6 = add i32 %add5, %e7 472 ret i32 %add6 473} 474 475define i32 @explode_16xi32(<16 x i32> %v) { 476; RV32-LABEL: explode_16xi32: 477; RV32: # %bb.0: 478; RV32-NEXT: addi sp, sp, -128 479; RV32-NEXT: .cfi_def_cfa_offset 128 480; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill 481; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill 482; RV32-NEXT: .cfi_offset ra, -4 483; RV32-NEXT: .cfi_offset s0, -8 484; RV32-NEXT: addi s0, sp, 128 485; RV32-NEXT: .cfi_def_cfa s0, 0 486; RV32-NEXT: andi sp, sp, -64 487; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma 488; RV32-NEXT: vslidedown.vi v12, v8, 2 489; RV32-NEXT: vmv.x.s a0, v12 490; RV32-NEXT: vslidedown.vi v12, v8, 3 491; RV32-NEXT: vmv.x.s a1, v12 492; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma 493; RV32-NEXT: vslidedown.vi v12, v8, 4 494; RV32-NEXT: vmv.x.s a2, v12 495; RV32-NEXT: vslidedown.vi v12, v8, 5 496; RV32-NEXT: vmv.x.s a3, v12 497; RV32-NEXT: vslidedown.vi v12, v8, 6 498; RV32-NEXT: vmv.x.s a4, v12 499; RV32-NEXT: vslidedown.vi v12, v8, 7 500; RV32-NEXT: vmv.x.s a5, v12 501; RV32-NEXT: mv a6, sp 502; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 503; RV32-NEXT: vse32.v v8, (a6) 504; RV32-NEXT: lw a6, 32(sp) 505; RV32-NEXT: lw a7, 36(sp) 506; RV32-NEXT: lw t0, 40(sp) 507; RV32-NEXT: lw t1, 44(sp) 508; RV32-NEXT: lw t2, 48(sp) 509; RV32-NEXT: lw t3, 52(sp) 510; RV32-NEXT: lw t4, 56(sp) 511; RV32-NEXT: lw t5, 60(sp) 512; RV32-NEXT: vmv.s.x v9, zero 513; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 514; RV32-NEXT: vredxor.vs v8, v8, v9 515; RV32-NEXT: vmv.x.s t6, v8 516; RV32-NEXT: add a0, a0, a1 517; RV32-NEXT: add a0, t6, a0 518; RV32-NEXT: add a2, a2, a3 519; RV32-NEXT: add a2, a2, a4 520; RV32-NEXT: add a0, a0, a2 521; RV32-NEXT: add a5, a5, a6 522; RV32-NEXT: add a0, a0, a5 523; RV32-NEXT: add a7, a7, t0 524; RV32-NEXT: add a7, a7, t1 525; RV32-NEXT: add a0, a0, a7 526; RV32-NEXT: add t2, t2, t3 527; RV32-NEXT: add t2, t2, t4 528; RV32-NEXT: add t2, t2, t5 529; RV32-NEXT: add a0, a0, t2 530; RV32-NEXT: addi sp, s0, -128 531; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload 532; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload 533; RV32-NEXT: addi sp, sp, 128 534; RV32-NEXT: ret 535; 536; RV64-LABEL: explode_16xi32: 537; RV64: # %bb.0: 538; RV64-NEXT: addi sp, sp, -128 539; RV64-NEXT: .cfi_def_cfa_offset 128 540; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill 541; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill 542; RV64-NEXT: .cfi_offset ra, -8 543; RV64-NEXT: .cfi_offset s0, -16 544; RV64-NEXT: addi s0, sp, 128 545; RV64-NEXT: .cfi_def_cfa s0, 0 546; RV64-NEXT: andi sp, sp, -64 547; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma 548; RV64-NEXT: vslidedown.vi v12, v8, 2 549; RV64-NEXT: vmv.x.s a0, v12 550; RV64-NEXT: vslidedown.vi v12, v8, 3 551; RV64-NEXT: vmv.x.s a1, v12 552; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma 553; RV64-NEXT: vslidedown.vi v12, v8, 4 554; RV64-NEXT: vmv.x.s a2, v12 555; RV64-NEXT: vslidedown.vi v12, v8, 5 556; RV64-NEXT: vmv.x.s a3, v12 557; RV64-NEXT: vslidedown.vi v12, v8, 6 558; RV64-NEXT: vmv.x.s a4, v12 559; RV64-NEXT: vslidedown.vi v12, v8, 7 560; RV64-NEXT: vmv.x.s a5, v12 561; RV64-NEXT: mv a6, sp 562; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma 563; RV64-NEXT: vse32.v v8, (a6) 564; RV64-NEXT: lw a6, 32(sp) 565; RV64-NEXT: lw a7, 36(sp) 566; RV64-NEXT: lw t0, 40(sp) 567; RV64-NEXT: lw t1, 44(sp) 568; RV64-NEXT: lw t2, 48(sp) 569; RV64-NEXT: lw t3, 52(sp) 570; RV64-NEXT: lw t4, 56(sp) 571; RV64-NEXT: lw t5, 60(sp) 572; RV64-NEXT: vmv.s.x v9, zero 573; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 574; RV64-NEXT: vredxor.vs v8, v8, v9 575; RV64-NEXT: vmv.x.s t6, v8 576; RV64-NEXT: add a0, a0, a1 577; RV64-NEXT: add a0, t6, a0 578; RV64-NEXT: add a2, a2, a3 579; RV64-NEXT: add a2, a2, a4 580; RV64-NEXT: add a0, a0, a2 581; RV64-NEXT: add a5, a5, a6 582; RV64-NEXT: add a0, a0, a5 583; RV64-NEXT: add a7, a7, t0 584; RV64-NEXT: add a7, a7, t1 585; RV64-NEXT: add a0, a0, a7 586; RV64-NEXT: add t2, t2, t3 587; RV64-NEXT: add t2, t2, t4 588; RV64-NEXT: add t2, t2, t5 589; RV64-NEXT: addw a0, a0, t2 590; RV64-NEXT: addi sp, s0, -128 591; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload 592; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload 593; RV64-NEXT: addi sp, sp, 128 594; RV64-NEXT: ret 595 %e0 = extractelement <16 x i32> %v, i32 0 596 %e1 = extractelement <16 x i32> %v, i32 1 597 %e2 = extractelement <16 x i32> %v, i32 2 598 %e3 = extractelement <16 x i32> %v, i32 3 599 %e4 = extractelement <16 x i32> %v, i32 4 600 %e5 = extractelement <16 x i32> %v, i32 5 601 %e6 = extractelement <16 x i32> %v, i32 6 602 %e7 = extractelement <16 x i32> %v, i32 7 603 %e8 = extractelement <16 x i32> %v, i32 8 604 %e9 = extractelement <16 x i32> %v, i32 9 605 %e10 = extractelement <16 x i32> %v, i32 10 606 %e11 = extractelement <16 x i32> %v, i32 11 607 %e12 = extractelement <16 x i32> %v, i32 12 608 %e13 = extractelement <16 x i32> %v, i32 13 609 %e14 = extractelement <16 x i32> %v, i32 14 610 %e15 = extractelement <16 x i32> %v, i32 15 611 %add0 = xor i32 %e0, %e1 612 %add1 = add i32 %add0, %e2 613 %add2 = add i32 %add1, %e3 614 %add3 = add i32 %add2, %e4 615 %add4 = add i32 %add3, %e5 616 %add5 = add i32 %add4, %e6 617 %add6 = add i32 %add5, %e7 618 %add7 = add i32 %add6, %e8 619 %add8 = add i32 %add7, %e9 620 %add9 = add i32 %add8, %e10 621 %add10 = add i32 %add9, %e11 622 %add11 = add i32 %add10, %e12 623 %add12 = add i32 %add11, %e13 624 %add13 = add i32 %add12, %e14 625 %add14 = add i32 %add13, %e15 626 ret i32 %add14 627} 628 629define i64 @explode_2xi64(<2 x i64> %v) { 630; RV32-LABEL: explode_2xi64: 631; RV32: # %bb.0: 632; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 633; RV32-NEXT: vmv.s.x v9, zero 634; RV32-NEXT: vredxor.vs v8, v8, v9 635; RV32-NEXT: vmv.x.s a0, v8 636; RV32-NEXT: li a1, 32 637; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 638; RV32-NEXT: vsrl.vx v8, v8, a1 639; RV32-NEXT: vmv.x.s a1, v8 640; RV32-NEXT: ret 641; 642; RV64-LABEL: explode_2xi64: 643; RV64: # %bb.0: 644; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 645; RV64-NEXT: vmv.s.x v9, zero 646; RV64-NEXT: vredxor.vs v8, v8, v9 647; RV64-NEXT: vmv.x.s a0, v8 648; RV64-NEXT: ret 649 %e0 = extractelement <2 x i64> %v, i32 0 650 %e1 = extractelement <2 x i64> %v, i32 1 651 %add0 = xor i64 %e0, %e1 652 ret i64 %add0 653} 654 655define i64 @explode_4xi64(<4 x i64> %v) { 656; RV32-LABEL: explode_4xi64: 657; RV32: # %bb.0: 658; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma 659; RV32-NEXT: vslidedown.vi v10, v8, 2 660; RV32-NEXT: li a0, 32 661; RV32-NEXT: vsrl.vx v12, v10, a0 662; RV32-NEXT: vmv.x.s a1, v12 663; RV32-NEXT: vmv.x.s a2, v10 664; RV32-NEXT: vslidedown.vi v10, v8, 3 665; RV32-NEXT: vsrl.vx v12, v10, a0 666; RV32-NEXT: vmv.x.s a3, v12 667; RV32-NEXT: vmv.x.s a4, v10 668; RV32-NEXT: vmv.s.x v9, zero 669; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 670; RV32-NEXT: vredxor.vs v8, v8, v9 671; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 672; RV32-NEXT: vsrl.vx v9, v8, a0 673; RV32-NEXT: vmv.x.s a0, v9 674; RV32-NEXT: vmv.x.s a5, v8 675; RV32-NEXT: add a2, a5, a2 676; RV32-NEXT: sltu a5, a2, a5 677; RV32-NEXT: add a0, a0, a1 678; RV32-NEXT: add a0, a0, a5 679; RV32-NEXT: add a1, a0, a3 680; RV32-NEXT: add a0, a2, a4 681; RV32-NEXT: sltu a2, a0, a2 682; RV32-NEXT: add a1, a1, a2 683; RV32-NEXT: ret 684; 685; RV64-LABEL: explode_4xi64: 686; RV64: # %bb.0: 687; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma 688; RV64-NEXT: vslidedown.vi v10, v8, 2 689; RV64-NEXT: vmv.x.s a0, v10 690; RV64-NEXT: vslidedown.vi v10, v8, 3 691; RV64-NEXT: vmv.x.s a1, v10 692; RV64-NEXT: vmv.s.x v9, zero 693; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 694; RV64-NEXT: vredxor.vs v8, v8, v9 695; RV64-NEXT: vmv.x.s a2, v8 696; RV64-NEXT: add a0, a0, a1 697; RV64-NEXT: add a0, a2, a0 698; RV64-NEXT: ret 699 %e0 = extractelement <4 x i64> %v, i32 0 700 %e1 = extractelement <4 x i64> %v, i32 1 701 %e2 = extractelement <4 x i64> %v, i32 2 702 %e3 = extractelement <4 x i64> %v, i32 3 703 %add0 = xor i64 %e0, %e1 704 %add1 = add i64 %add0, %e2 705 %add2 = add i64 %add1, %e3 706 ret i64 %add2 707} 708 709 710define i64 @explode_8xi64(<8 x i64> %v) { 711; RV32-LABEL: explode_8xi64: 712; RV32: # %bb.0: 713; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma 714; RV32-NEXT: vslidedown.vi v12, v8, 2 715; RV32-NEXT: li a0, 32 716; RV32-NEXT: vsrl.vx v16, v12, a0 717; RV32-NEXT: vmv.x.s a1, v16 718; RV32-NEXT: vmv.x.s a2, v12 719; RV32-NEXT: vslidedown.vi v12, v8, 3 720; RV32-NEXT: vsrl.vx v16, v12, a0 721; RV32-NEXT: vmv.x.s a3, v16 722; RV32-NEXT: vmv.x.s a4, v12 723; RV32-NEXT: vslidedown.vi v12, v8, 4 724; RV32-NEXT: vsrl.vx v16, v12, a0 725; RV32-NEXT: vmv.x.s a5, v16 726; RV32-NEXT: vmv.x.s a6, v12 727; RV32-NEXT: vslidedown.vi v12, v8, 5 728; RV32-NEXT: vsrl.vx v16, v12, a0 729; RV32-NEXT: vmv.x.s a7, v16 730; RV32-NEXT: vmv.x.s t0, v12 731; RV32-NEXT: vslidedown.vi v12, v8, 6 732; RV32-NEXT: vsrl.vx v16, v12, a0 733; RV32-NEXT: vmv.x.s t1, v16 734; RV32-NEXT: vmv.x.s t2, v12 735; RV32-NEXT: vslidedown.vi v12, v8, 7 736; RV32-NEXT: vsrl.vx v16, v12, a0 737; RV32-NEXT: vmv.x.s t3, v16 738; RV32-NEXT: vmv.x.s t4, v12 739; RV32-NEXT: vmv.s.x v9, zero 740; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 741; RV32-NEXT: vredxor.vs v8, v8, v9 742; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 743; RV32-NEXT: vsrl.vx v9, v8, a0 744; RV32-NEXT: vmv.x.s a0, v9 745; RV32-NEXT: vmv.x.s t5, v8 746; RV32-NEXT: add a2, t5, a2 747; RV32-NEXT: sltu t5, a2, t5 748; RV32-NEXT: add a0, a0, a1 749; RV32-NEXT: add a0, a0, t5 750; RV32-NEXT: add a0, a0, a3 751; RV32-NEXT: add a4, a2, a4 752; RV32-NEXT: sltu a1, a4, a2 753; RV32-NEXT: add a1, a1, a5 754; RV32-NEXT: add a0, a0, a1 755; RV32-NEXT: add a6, a4, a6 756; RV32-NEXT: sltu a1, a6, a4 757; RV32-NEXT: add a1, a1, a7 758; RV32-NEXT: add a0, a0, a1 759; RV32-NEXT: add t0, a6, t0 760; RV32-NEXT: sltu a1, t0, a6 761; RV32-NEXT: add a1, a1, t1 762; RV32-NEXT: add a0, a0, a1 763; RV32-NEXT: add t2, t0, t2 764; RV32-NEXT: sltu a1, t2, t0 765; RV32-NEXT: add a1, a1, t3 766; RV32-NEXT: add a1, a0, a1 767; RV32-NEXT: add a0, t2, t4 768; RV32-NEXT: sltu a2, a0, t2 769; RV32-NEXT: add a1, a1, a2 770; RV32-NEXT: ret 771; 772; RV64-LABEL: explode_8xi64: 773; RV64: # %bb.0: 774; RV64-NEXT: addi sp, sp, -128 775; RV64-NEXT: .cfi_def_cfa_offset 128 776; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill 777; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill 778; RV64-NEXT: .cfi_offset ra, -8 779; RV64-NEXT: .cfi_offset s0, -16 780; RV64-NEXT: addi s0, sp, 128 781; RV64-NEXT: .cfi_def_cfa s0, 0 782; RV64-NEXT: andi sp, sp, -64 783; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma 784; RV64-NEXT: vslidedown.vi v12, v8, 2 785; RV64-NEXT: vmv.x.s a0, v12 786; RV64-NEXT: vslidedown.vi v12, v8, 3 787; RV64-NEXT: vmv.x.s a1, v12 788; RV64-NEXT: mv a2, sp 789; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 790; RV64-NEXT: vse64.v v8, (a2) 791; RV64-NEXT: ld a2, 32(sp) 792; RV64-NEXT: ld a3, 40(sp) 793; RV64-NEXT: ld a4, 48(sp) 794; RV64-NEXT: ld a5, 56(sp) 795; RV64-NEXT: vmv.s.x v9, zero 796; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 797; RV64-NEXT: vredxor.vs v8, v8, v9 798; RV64-NEXT: vmv.x.s a6, v8 799; RV64-NEXT: add a0, a0, a1 800; RV64-NEXT: add a0, a6, a0 801; RV64-NEXT: add a0, a0, a2 802; RV64-NEXT: add a3, a3, a4 803; RV64-NEXT: add a0, a0, a3 804; RV64-NEXT: add a0, a0, a5 805; RV64-NEXT: addi sp, s0, -128 806; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload 807; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload 808; RV64-NEXT: addi sp, sp, 128 809; RV64-NEXT: ret 810 %e0 = extractelement <8 x i64> %v, i32 0 811 %e1 = extractelement <8 x i64> %v, i32 1 812 %e2 = extractelement <8 x i64> %v, i32 2 813 %e3 = extractelement <8 x i64> %v, i32 3 814 %e4 = extractelement <8 x i64> %v, i32 4 815 %e5 = extractelement <8 x i64> %v, i32 5 816 %e6 = extractelement <8 x i64> %v, i32 6 817 %e7 = extractelement <8 x i64> %v, i32 7 818 %add0 = xor i64 %e0, %e1 819 %add1 = add i64 %add0, %e2 820 %add2 = add i64 %add1, %e3 821 %add3 = add i64 %add2, %e4 822 %add4 = add i64 %add3, %e5 823 %add5 = add i64 %add4, %e6 824 %add6 = add i64 %add5, %e7 825 ret i64 %add6 826} 827 828define i64 @explode_16xi64(<16 x i64> %v) { 829; RV32-LABEL: explode_16xi64: 830; RV32: # %bb.0: 831; RV32-NEXT: addi sp, sp, -64 832; RV32-NEXT: .cfi_def_cfa_offset 64 833; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill 834; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill 835; RV32-NEXT: sw s1, 52(sp) # 4-byte Folded Spill 836; RV32-NEXT: sw s2, 48(sp) # 4-byte Folded Spill 837; RV32-NEXT: sw s3, 44(sp) # 4-byte Folded Spill 838; RV32-NEXT: sw s4, 40(sp) # 4-byte Folded Spill 839; RV32-NEXT: sw s5, 36(sp) # 4-byte Folded Spill 840; RV32-NEXT: sw s6, 32(sp) # 4-byte Folded Spill 841; RV32-NEXT: sw s7, 28(sp) # 4-byte Folded Spill 842; RV32-NEXT: sw s8, 24(sp) # 4-byte Folded Spill 843; RV32-NEXT: sw s9, 20(sp) # 4-byte Folded Spill 844; RV32-NEXT: sw s10, 16(sp) # 4-byte Folded Spill 845; RV32-NEXT: sw s11, 12(sp) # 4-byte Folded Spill 846; RV32-NEXT: .cfi_offset ra, -4 847; RV32-NEXT: .cfi_offset s0, -8 848; RV32-NEXT: .cfi_offset s1, -12 849; RV32-NEXT: .cfi_offset s2, -16 850; RV32-NEXT: .cfi_offset s3, -20 851; RV32-NEXT: .cfi_offset s4, -24 852; RV32-NEXT: .cfi_offset s5, -28 853; RV32-NEXT: .cfi_offset s6, -32 854; RV32-NEXT: .cfi_offset s7, -36 855; RV32-NEXT: .cfi_offset s8, -40 856; RV32-NEXT: .cfi_offset s9, -44 857; RV32-NEXT: .cfi_offset s10, -48 858; RV32-NEXT: .cfi_offset s11, -52 859; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma 860; RV32-NEXT: vslidedown.vi v16, v8, 2 861; RV32-NEXT: li a0, 32 862; RV32-NEXT: vsrl.vx v24, v16, a0 863; RV32-NEXT: vmv.x.s t6, v24 864; RV32-NEXT: vmv.x.s a1, v16 865; RV32-NEXT: sw a1, 8(sp) # 4-byte Folded Spill 866; RV32-NEXT: vslidedown.vi v16, v8, 3 867; RV32-NEXT: vsrl.vx v24, v16, a0 868; RV32-NEXT: vmv.x.s a1, v24 869; RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill 870; RV32-NEXT: vmv.x.s a2, v16 871; RV32-NEXT: vslidedown.vi v16, v8, 4 872; RV32-NEXT: vsrl.vx v24, v16, a0 873; RV32-NEXT: vmv.x.s s0, v24 874; RV32-NEXT: vmv.x.s a3, v16 875; RV32-NEXT: vslidedown.vi v16, v8, 5 876; RV32-NEXT: vsrl.vx v24, v16, a0 877; RV32-NEXT: vmv.x.s s1, v24 878; RV32-NEXT: vmv.x.s a4, v16 879; RV32-NEXT: vslidedown.vi v16, v8, 6 880; RV32-NEXT: vsrl.vx v24, v16, a0 881; RV32-NEXT: vmv.x.s s2, v24 882; RV32-NEXT: vmv.x.s a5, v16 883; RV32-NEXT: vslidedown.vi v16, v8, 7 884; RV32-NEXT: vsrl.vx v24, v16, a0 885; RV32-NEXT: vmv.x.s s3, v24 886; RV32-NEXT: vmv.x.s a6, v16 887; RV32-NEXT: vslidedown.vi v16, v8, 8 888; RV32-NEXT: vsrl.vx v24, v16, a0 889; RV32-NEXT: vmv.x.s s4, v24 890; RV32-NEXT: vmv.x.s a7, v16 891; RV32-NEXT: vslidedown.vi v16, v8, 9 892; RV32-NEXT: vsrl.vx v24, v16, a0 893; RV32-NEXT: vmv.x.s s5, v24 894; RV32-NEXT: vmv.x.s t0, v16 895; RV32-NEXT: vslidedown.vi v16, v8, 10 896; RV32-NEXT: vsrl.vx v24, v16, a0 897; RV32-NEXT: vmv.x.s s6, v24 898; RV32-NEXT: vmv.x.s t1, v16 899; RV32-NEXT: vslidedown.vi v16, v8, 11 900; RV32-NEXT: vsrl.vx v24, v16, a0 901; RV32-NEXT: vmv.x.s s7, v24 902; RV32-NEXT: vmv.x.s t2, v16 903; RV32-NEXT: vslidedown.vi v16, v8, 12 904; RV32-NEXT: vsrl.vx v24, v16, a0 905; RV32-NEXT: vmv.x.s s8, v24 906; RV32-NEXT: vmv.x.s t3, v16 907; RV32-NEXT: vslidedown.vi v16, v8, 13 908; RV32-NEXT: vsrl.vx v24, v16, a0 909; RV32-NEXT: vmv.x.s s9, v24 910; RV32-NEXT: vmv.x.s t4, v16 911; RV32-NEXT: vslidedown.vi v16, v8, 14 912; RV32-NEXT: vsrl.vx v24, v16, a0 913; RV32-NEXT: vmv.x.s s10, v24 914; RV32-NEXT: vmv.x.s t5, v16 915; RV32-NEXT: vslidedown.vi v16, v8, 15 916; RV32-NEXT: vsrl.vx v24, v16, a0 917; RV32-NEXT: vmv.x.s s11, v24 918; RV32-NEXT: vmv.s.x v9, zero 919; RV32-NEXT: vmv.x.s ra, v16 920; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 921; RV32-NEXT: vredxor.vs v8, v8, v9 922; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 923; RV32-NEXT: vsrl.vx v9, v8, a0 924; RV32-NEXT: vmv.x.s a0, v9 925; RV32-NEXT: add a1, a0, t6 926; RV32-NEXT: vmv.x.s a0, v8 927; RV32-NEXT: lw t6, 8(sp) # 4-byte Folded Reload 928; RV32-NEXT: add t6, a0, t6 929; RV32-NEXT: sltu a0, t6, a0 930; RV32-NEXT: add a0, a1, a0 931; RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload 932; RV32-NEXT: add a0, a0, a1 933; RV32-NEXT: add a2, t6, a2 934; RV32-NEXT: sltu a1, a2, t6 935; RV32-NEXT: add a1, a1, s0 936; RV32-NEXT: add a0, a0, a1 937; RV32-NEXT: add a3, a2, a3 938; RV32-NEXT: sltu a1, a3, a2 939; RV32-NEXT: add a1, a1, s1 940; RV32-NEXT: add a0, a0, a1 941; RV32-NEXT: add a4, a3, a4 942; RV32-NEXT: sltu a1, a4, a3 943; RV32-NEXT: add a1, a1, s2 944; RV32-NEXT: add a0, a0, a1 945; RV32-NEXT: add a5, a4, a5 946; RV32-NEXT: sltu a1, a5, a4 947; RV32-NEXT: add a1, a1, s3 948; RV32-NEXT: add a0, a0, a1 949; RV32-NEXT: add a6, a5, a6 950; RV32-NEXT: sltu a1, a6, a5 951; RV32-NEXT: add a1, a1, s4 952; RV32-NEXT: add a0, a0, a1 953; RV32-NEXT: add a7, a6, a7 954; RV32-NEXT: sltu a1, a7, a6 955; RV32-NEXT: add a1, a1, s5 956; RV32-NEXT: add a0, a0, a1 957; RV32-NEXT: add t0, a7, t0 958; RV32-NEXT: sltu a1, t0, a7 959; RV32-NEXT: add a1, a1, s6 960; RV32-NEXT: add a0, a0, a1 961; RV32-NEXT: add t1, t0, t1 962; RV32-NEXT: sltu a1, t1, t0 963; RV32-NEXT: add a1, a1, s7 964; RV32-NEXT: add a0, a0, a1 965; RV32-NEXT: add t2, t1, t2 966; RV32-NEXT: sltu a1, t2, t1 967; RV32-NEXT: add a1, a1, s8 968; RV32-NEXT: add a0, a0, a1 969; RV32-NEXT: add t3, t2, t3 970; RV32-NEXT: sltu a1, t3, t2 971; RV32-NEXT: add a1, a1, s9 972; RV32-NEXT: add a0, a0, a1 973; RV32-NEXT: add t4, t3, t4 974; RV32-NEXT: sltu a1, t4, t3 975; RV32-NEXT: add a1, a1, s10 976; RV32-NEXT: add a0, a0, a1 977; RV32-NEXT: add t5, t4, t5 978; RV32-NEXT: sltu a1, t5, t4 979; RV32-NEXT: add a1, a1, s11 980; RV32-NEXT: add a1, a0, a1 981; RV32-NEXT: add a0, t5, ra 982; RV32-NEXT: sltu a2, a0, t5 983; RV32-NEXT: add a1, a1, a2 984; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload 985; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload 986; RV32-NEXT: lw s1, 52(sp) # 4-byte Folded Reload 987; RV32-NEXT: lw s2, 48(sp) # 4-byte Folded Reload 988; RV32-NEXT: lw s3, 44(sp) # 4-byte Folded Reload 989; RV32-NEXT: lw s4, 40(sp) # 4-byte Folded Reload 990; RV32-NEXT: lw s5, 36(sp) # 4-byte Folded Reload 991; RV32-NEXT: lw s6, 32(sp) # 4-byte Folded Reload 992; RV32-NEXT: lw s7, 28(sp) # 4-byte Folded Reload 993; RV32-NEXT: lw s8, 24(sp) # 4-byte Folded Reload 994; RV32-NEXT: lw s9, 20(sp) # 4-byte Folded Reload 995; RV32-NEXT: lw s10, 16(sp) # 4-byte Folded Reload 996; RV32-NEXT: lw s11, 12(sp) # 4-byte Folded Reload 997; RV32-NEXT: addi sp, sp, 64 998; RV32-NEXT: ret 999; 1000; RV64-LABEL: explode_16xi64: 1001; RV64: # %bb.0: 1002; RV64-NEXT: addi sp, sp, -256 1003; RV64-NEXT: .cfi_def_cfa_offset 256 1004; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill 1005; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill 1006; RV64-NEXT: .cfi_offset ra, -8 1007; RV64-NEXT: .cfi_offset s0, -16 1008; RV64-NEXT: addi s0, sp, 256 1009; RV64-NEXT: .cfi_def_cfa s0, 0 1010; RV64-NEXT: andi sp, sp, -128 1011; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma 1012; RV64-NEXT: vslidedown.vi v16, v8, 2 1013; RV64-NEXT: vmv.x.s a0, v16 1014; RV64-NEXT: vslidedown.vi v16, v8, 3 1015; RV64-NEXT: vmv.x.s a1, v16 1016; RV64-NEXT: mv a2, sp 1017; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1018; RV64-NEXT: vse64.v v8, (a2) 1019; RV64-NEXT: ld a2, 32(sp) 1020; RV64-NEXT: ld a3, 40(sp) 1021; RV64-NEXT: ld a4, 48(sp) 1022; RV64-NEXT: ld a5, 56(sp) 1023; RV64-NEXT: ld a6, 64(sp) 1024; RV64-NEXT: ld a7, 72(sp) 1025; RV64-NEXT: ld t0, 80(sp) 1026; RV64-NEXT: ld t1, 88(sp) 1027; RV64-NEXT: ld t2, 96(sp) 1028; RV64-NEXT: ld t3, 104(sp) 1029; RV64-NEXT: ld t4, 112(sp) 1030; RV64-NEXT: ld t5, 120(sp) 1031; RV64-NEXT: vmv.s.x v9, zero 1032; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1033; RV64-NEXT: vredxor.vs v8, v8, v9 1034; RV64-NEXT: vmv.x.s t6, v8 1035; RV64-NEXT: add a0, a0, a1 1036; RV64-NEXT: add a0, t6, a0 1037; RV64-NEXT: add a0, a0, a2 1038; RV64-NEXT: add a3, a3, a4 1039; RV64-NEXT: add a0, a0, a3 1040; RV64-NEXT: add a5, a5, a6 1041; RV64-NEXT: add a5, a5, a7 1042; RV64-NEXT: add a0, a0, a5 1043; RV64-NEXT: add t0, t0, t1 1044; RV64-NEXT: add t0, t0, t2 1045; RV64-NEXT: add t0, t0, t3 1046; RV64-NEXT: add a0, a0, t0 1047; RV64-NEXT: add t4, t4, t5 1048; RV64-NEXT: add a0, a0, t4 1049; RV64-NEXT: addi sp, s0, -256 1050; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload 1051; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload 1052; RV64-NEXT: addi sp, sp, 256 1053; RV64-NEXT: ret 1054 %e0 = extractelement <16 x i64> %v, i32 0 1055 %e1 = extractelement <16 x i64> %v, i32 1 1056 %e2 = extractelement <16 x i64> %v, i32 2 1057 %e3 = extractelement <16 x i64> %v, i32 3 1058 %e4 = extractelement <16 x i64> %v, i32 4 1059 %e5 = extractelement <16 x i64> %v, i32 5 1060 %e6 = extractelement <16 x i64> %v, i32 6 1061 %e7 = extractelement <16 x i64> %v, i32 7 1062 %e8 = extractelement <16 x i64> %v, i32 8 1063 %e9 = extractelement <16 x i64> %v, i32 9 1064 %e10 = extractelement <16 x i64> %v, i32 10 1065 %e11 = extractelement <16 x i64> %v, i32 11 1066 %e12 = extractelement <16 x i64> %v, i32 12 1067 %e13 = extractelement <16 x i64> %v, i32 13 1068 %e14 = extractelement <16 x i64> %v, i32 14 1069 %e15 = extractelement <16 x i64> %v, i32 15 1070 %add0 = xor i64 %e0, %e1 1071 %add1 = add i64 %add0, %e2 1072 %add2 = add i64 %add1, %e3 1073 %add3 = add i64 %add2, %e4 1074 %add4 = add i64 %add3, %e5 1075 %add5 = add i64 %add4, %e6 1076 %add6 = add i64 %add5, %e7 1077 %add7 = add i64 %add6, %e8 1078 %add8 = add i64 %add7, %e9 1079 %add9 = add i64 %add8, %e10 1080 %add10 = add i64 %add9, %e11 1081 %add11 = add i64 %add10, %e12 1082 %add12 = add i64 %add11, %e13 1083 %add13 = add i64 %add12, %e14 1084 %add14 = add i64 %add13, %e15 1085 ret i64 %add14 1086} 1087