1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK 3 4define arm_aapcs_vfpcc i32 @add_v4i32_v4i32(<4 x i32> %x, <4 x i32> %b) { 5; CHECK-LABEL: add_v4i32_v4i32: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vpt.i32 eq, q1, zr 8; CHECK-NEXT: vaddvt.u32 r0, q0 9; CHECK-NEXT: bx lr 10entry: 11 %c = icmp eq <4 x i32> %b, zeroinitializer 12 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer 13 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) 14 ret i32 %z 15} 16 17define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_zext(<4 x i32> %x, <4 x i32> %b) { 18; CHECK-LABEL: add_v4i32_v4i64_zext: 19; CHECK: @ %bb.0: @ %entry 20; CHECK-NEXT: vpt.i32 eq, q1, zr 21; CHECK-NEXT: vaddlvt.u32 r0, r1, q0 22; CHECK-NEXT: bx lr 23entry: 24 %c = icmp eq <4 x i32> %b, zeroinitializer 25 %xx = zext <4 x i32> %x to <4 x i64> 26 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer 27 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s) 28 ret i64 %z 29} 30 31define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_sext(<4 x i32> %x, <4 x i32> %b) { 32; CHECK-LABEL: add_v4i32_v4i64_sext: 33; CHECK: @ %bb.0: @ %entry 34; CHECK-NEXT: vpt.i32 eq, q1, zr 35; CHECK-NEXT: vaddlvt.s32 r0, r1, q0 36; CHECK-NEXT: bx lr 37entry: 38 %c = icmp eq <4 x i32> %b, zeroinitializer 39 %xx = sext <4 x i32> %x to <4 x i64> 40 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer 41 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s) 42 ret i64 %z 43} 44 45define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_zext(<2 x i32> %x, <2 x i32> %b) { 46; CHECK-LABEL: add_v2i32_v2i64_zext: 47; CHECK: @ %bb.0: @ %entry 48; CHECK-NEXT: vmov r0, s4 49; CHECK-NEXT: movs r1, #0 50; CHECK-NEXT: vmov.i64 q2, #0xffffffff 51; CHECK-NEXT: vand q0, q0, q2 52; CHECK-NEXT: cmp r0, #0 53; CHECK-NEXT: csetm r0, eq 54; CHECK-NEXT: bfi r1, r0, #0, #8 55; CHECK-NEXT: vmov r0, s6 56; CHECK-NEXT: vmov.i32 q1, #0x0 57; CHECK-NEXT: cmp r0, #0 58; CHECK-NEXT: csetm r0, eq 59; CHECK-NEXT: bfi r1, r0, #8, #8 60; CHECK-NEXT: vmsr p0, r1 61; CHECK-NEXT: vpsel q0, q0, q1 62; CHECK-NEXT: vmov r0, r1, d1 63; CHECK-NEXT: vmov r2, r3, d0 64; CHECK-NEXT: adds r0, r0, r2 65; CHECK-NEXT: adcs r1, r3 66; CHECK-NEXT: bx lr 67entry: 68 %c = icmp eq <2 x i32> %b, zeroinitializer 69 %xx = zext <2 x i32> %x to <2 x i64> 70 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer 71 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) 72 ret i64 %z 73} 74 75define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_sext(<2 x i32> %x, <2 x i32> %b) { 76; CHECK-LABEL: add_v2i32_v2i64_sext: 77; CHECK: @ %bb.0: @ %entry 78; CHECK-NEXT: vmov r0, s2 79; CHECK-NEXT: vmov r1, s0 80; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 81; CHECK-NEXT: asrs r0, r0, #31 82; CHECK-NEXT: asrs r1, r1, #31 83; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 84; CHECK-NEXT: vmov r0, s4 85; CHECK-NEXT: movs r1, #0 86; CHECK-NEXT: cmp r0, #0 87; CHECK-NEXT: csetm r0, eq 88; CHECK-NEXT: bfi r1, r0, #0, #8 89; CHECK-NEXT: vmov r0, s6 90; CHECK-NEXT: vmov.i32 q1, #0x0 91; CHECK-NEXT: cmp r0, #0 92; CHECK-NEXT: csetm r0, eq 93; CHECK-NEXT: bfi r1, r0, #8, #8 94; CHECK-NEXT: vmsr p0, r1 95; CHECK-NEXT: vpsel q0, q0, q1 96; CHECK-NEXT: vmov r0, r1, d1 97; CHECK-NEXT: vmov r2, r3, d0 98; CHECK-NEXT: adds r0, r0, r2 99; CHECK-NEXT: adcs r1, r3 100; CHECK-NEXT: bx lr 101entry: 102 %c = icmp eq <2 x i32> %b, zeroinitializer 103 %xx = sext <2 x i32> %x to <2 x i64> 104 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer 105 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) 106 ret i64 %z 107} 108 109define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_zext(<8 x i16> %x, <8 x i16> %b) { 110; CHECK-LABEL: add_v8i16_v8i32_zext: 111; CHECK: @ %bb.0: @ %entry 112; CHECK-NEXT: vpt.i16 eq, q1, zr 113; CHECK-NEXT: vaddvt.u16 r0, q0 114; CHECK-NEXT: bx lr 115entry: 116 %c = icmp eq <8 x i16> %b, zeroinitializer 117 %xx = zext <8 x i16> %x to <8 x i32> 118 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer 119 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s) 120 ret i32 %z 121} 122 123define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_sext(<8 x i16> %x, <8 x i16> %b) { 124; CHECK-LABEL: add_v8i16_v8i32_sext: 125; CHECK: @ %bb.0: @ %entry 126; CHECK-NEXT: vpt.i16 eq, q1, zr 127; CHECK-NEXT: vaddvt.s16 r0, q0 128; CHECK-NEXT: bx lr 129entry: 130 %c = icmp eq <8 x i16> %b, zeroinitializer 131 %xx = sext <8 x i16> %x to <8 x i32> 132 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer 133 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s) 134 ret i32 %z 135} 136 137define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_zext(<4 x i16> %x, <4 x i16> %b) { 138; CHECK-LABEL: add_v4i16_v4i32_zext: 139; CHECK: @ %bb.0: @ %entry 140; CHECK-NEXT: vmovlb.u16 q1, q1 141; CHECK-NEXT: vmovlb.u16 q0, q0 142; CHECK-NEXT: vpt.i32 eq, q1, zr 143; CHECK-NEXT: vaddvt.u32 r0, q0 144; CHECK-NEXT: bx lr 145entry: 146 %c = icmp eq <4 x i16> %b, zeroinitializer 147 %xx = zext <4 x i16> %x to <4 x i32> 148 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer 149 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) 150 ret i32 %z 151} 152 153define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_sext(<4 x i16> %x, <4 x i16> %b) { 154; CHECK-LABEL: add_v4i16_v4i32_sext: 155; CHECK: @ %bb.0: @ %entry 156; CHECK-NEXT: vmovlb.u16 q1, q1 157; CHECK-NEXT: vmovlb.s16 q0, q0 158; CHECK-NEXT: vpt.i32 eq, q1, zr 159; CHECK-NEXT: vaddvt.u32 r0, q0 160; CHECK-NEXT: bx lr 161entry: 162 %c = icmp eq <4 x i16> %b, zeroinitializer 163 %xx = sext <4 x i16> %x to <4 x i32> 164 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer 165 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) 166 ret i32 %z 167} 168 169define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16(<8 x i16> %x, <8 x i16> %b) { 170; CHECK-LABEL: add_v8i16_v8i16: 171; CHECK: @ %bb.0: @ %entry 172; CHECK-NEXT: vpt.i16 eq, q1, zr 173; CHECK-NEXT: vaddvt.u16 r0, q0 174; CHECK-NEXT: uxth r0, r0 175; CHECK-NEXT: bx lr 176entry: 177 %c = icmp eq <8 x i16> %b, zeroinitializer 178 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer 179 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) 180 ret i16 %z 181} 182 183define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_zext(<8 x i16> %x, <8 x i16> %b) { 184; CHECK-LABEL: add_v8i16_v8i64_zext: 185; CHECK: @ %bb.0: @ %entry 186; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} 187; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} 188; CHECK-NEXT: vmov.i8 q3, #0x0 189; CHECK-NEXT: vmov.i8 q4, #0xff 190; CHECK-NEXT: vcmp.i16 eq, q1, zr 191; CHECK-NEXT: vpsel q5, q4, q3 192; CHECK-NEXT: vmov.u16 r0, q5[2] 193; CHECK-NEXT: vmov.u16 r1, q5[0] 194; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 195; CHECK-NEXT: vmov.u16 r0, q5[3] 196; CHECK-NEXT: vmov.u16 r1, q5[1] 197; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 198; CHECK-NEXT: vcmp.i32 ne, q1, zr 199; CHECK-NEXT: vpsel q6, q4, q3 200; CHECK-NEXT: vmov r0, r1, d12 201; CHECK-NEXT: vmov q1[2], q1[0], r0, r1 202; CHECK-NEXT: vmov q1[3], q1[1], r0, r1 203; CHECK-NEXT: vmov.u16 r0, q0[1] 204; CHECK-NEXT: vmov.u16 r1, q0[0] 205; CHECK-NEXT: vcmp.i32 ne, q1, zr 206; CHECK-NEXT: vmov q2[2], q2[0], r1, r0 207; CHECK-NEXT: vmov.i64 q1, #0xffff 208; CHECK-NEXT: vand q7, q2, q1 209; CHECK-NEXT: vmov.i32 q2, #0x0 210; CHECK-NEXT: vpsel q7, q7, q2 211; CHECK-NEXT: vmov r0, r1, d15 212; CHECK-NEXT: vmov r2, r3, d14 213; CHECK-NEXT: orrs r1, r3 214; CHECK-NEXT: add r0, r2 215; CHECK-NEXT: vmov r2, r3, d13 216; CHECK-NEXT: vmov q6[2], q6[0], r2, r3 217; CHECK-NEXT: vmov q6[3], q6[1], r2, r3 218; CHECK-NEXT: vmov.u16 r2, q0[3] 219; CHECK-NEXT: vmov.u16 r3, q0[2] 220; CHECK-NEXT: vcmp.i32 ne, q6, zr 221; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 222; CHECK-NEXT: vand q6, q6, q1 223; CHECK-NEXT: vpsel q6, q6, q2 224; CHECK-NEXT: vmov r2, r3, d12 225; CHECK-NEXT: adds r0, r0, r2 226; CHECK-NEXT: adcs r1, r3 227; CHECK-NEXT: vmov r2, r3, d13 228; CHECK-NEXT: adds r0, r0, r2 229; CHECK-NEXT: vmov.u16 r2, q5[6] 230; CHECK-NEXT: adcs r1, r3 231; CHECK-NEXT: vmov.u16 r3, q5[4] 232; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 233; CHECK-NEXT: vmov.u16 r2, q5[7] 234; CHECK-NEXT: vmov.u16 r3, q5[5] 235; CHECK-NEXT: vmov q6[3], q6[1], r3, r2 236; CHECK-NEXT: vcmp.i32 ne, q6, zr 237; CHECK-NEXT: vpsel q3, q4, q3 238; CHECK-NEXT: vmov r2, r3, d6 239; CHECK-NEXT: vmov q4[2], q4[0], r2, r3 240; CHECK-NEXT: vmov q4[3], q4[1], r2, r3 241; CHECK-NEXT: vmov.u16 r2, q0[5] 242; CHECK-NEXT: vmov.u16 r3, q0[4] 243; CHECK-NEXT: vcmp.i32 ne, q4, zr 244; CHECK-NEXT: vmov q4[2], q4[0], r3, r2 245; CHECK-NEXT: vand q4, q4, q1 246; CHECK-NEXT: vpsel q4, q4, q2 247; CHECK-NEXT: vmov r2, r3, d8 248; CHECK-NEXT: adds r0, r0, r2 249; CHECK-NEXT: adcs r1, r3 250; CHECK-NEXT: vmov r2, r3, d9 251; CHECK-NEXT: adds r0, r0, r2 252; CHECK-NEXT: adcs r1, r3 253; CHECK-NEXT: vmov r2, r3, d7 254; CHECK-NEXT: vmov q3[2], q3[0], r2, r3 255; CHECK-NEXT: vmov q3[3], q3[1], r2, r3 256; CHECK-NEXT: vmov.u16 r2, q0[7] 257; CHECK-NEXT: vmov.u16 r3, q0[6] 258; CHECK-NEXT: vcmp.i32 ne, q3, zr 259; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 260; CHECK-NEXT: vand q0, q0, q1 261; CHECK-NEXT: vpsel q0, q0, q2 262; CHECK-NEXT: vmov r2, r3, d0 263; CHECK-NEXT: adds r0, r0, r2 264; CHECK-NEXT: adcs r1, r3 265; CHECK-NEXT: vmov r2, r3, d1 266; CHECK-NEXT: adds r0, r0, r2 267; CHECK-NEXT: adcs r1, r3 268; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} 269; CHECK-NEXT: bx lr 270entry: 271 %c = icmp eq <8 x i16> %b, zeroinitializer 272 %xx = zext <8 x i16> %x to <8 x i64> 273 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer 274 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) 275 ret i64 %z 276} 277 278define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %b) { 279; CHECK-LABEL: add_v8i16_v8i64_sext: 280; CHECK: @ %bb.0: @ %entry 281; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} 282; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} 283; CHECK-NEXT: vmov.i8 q2, #0x0 284; CHECK-NEXT: vmov.i8 q3, #0xff 285; CHECK-NEXT: vcmp.i16 eq, q1, zr 286; CHECK-NEXT: vpsel q4, q3, q2 287; CHECK-NEXT: vmov.u16 r0, q4[2] 288; CHECK-NEXT: vmov.u16 r1, q4[0] 289; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 290; CHECK-NEXT: vmov.u16 r0, q4[3] 291; CHECK-NEXT: vmov.u16 r1, q4[1] 292; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 293; CHECK-NEXT: vcmp.i32 ne, q1, zr 294; CHECK-NEXT: vpsel q5, q3, q2 295; CHECK-NEXT: vmov r0, r1, d10 296; CHECK-NEXT: vmov q1[2], q1[0], r0, r1 297; CHECK-NEXT: vmov q1[3], q1[1], r0, r1 298; CHECK-NEXT: vmov.s16 r0, q0[1] 299; CHECK-NEXT: vmov.s16 r1, q0[0] 300; CHECK-NEXT: vcmp.i32 ne, q1, zr 301; CHECK-NEXT: vmov q6[2], q6[0], r1, r0 302; CHECK-NEXT: asrs r0, r0, #31 303; CHECK-NEXT: asrs r1, r1, #31 304; CHECK-NEXT: vmov.i32 q1, #0x0 305; CHECK-NEXT: vmov q6[3], q6[1], r1, r0 306; CHECK-NEXT: vpsel q6, q6, q1 307; CHECK-NEXT: vmov r0, r1, d13 308; CHECK-NEXT: vmov r2, r3, d12 309; CHECK-NEXT: adds r0, r0, r2 310; CHECK-NEXT: adcs r1, r3 311; CHECK-NEXT: vmov r2, r3, d11 312; CHECK-NEXT: vmov q5[2], q5[0], r2, r3 313; CHECK-NEXT: vmov q5[3], q5[1], r2, r3 314; CHECK-NEXT: vmov.s16 r2, q0[3] 315; CHECK-NEXT: vmov.s16 r3, q0[2] 316; CHECK-NEXT: vcmp.i32 ne, q5, zr 317; CHECK-NEXT: vmov q5[2], q5[0], r3, r2 318; CHECK-NEXT: asrs r2, r2, #31 319; CHECK-NEXT: asrs r3, r3, #31 320; CHECK-NEXT: vmov q5[3], q5[1], r3, r2 321; CHECK-NEXT: vpsel q5, q5, q1 322; CHECK-NEXT: vmov r2, r3, d10 323; CHECK-NEXT: adds r0, r0, r2 324; CHECK-NEXT: adcs r1, r3 325; CHECK-NEXT: vmov r2, r3, d11 326; CHECK-NEXT: adds r0, r0, r2 327; CHECK-NEXT: vmov.u16 r2, q4[6] 328; CHECK-NEXT: adcs r1, r3 329; CHECK-NEXT: vmov.u16 r3, q4[4] 330; CHECK-NEXT: vmov q5[2], q5[0], r3, r2 331; CHECK-NEXT: vmov.u16 r2, q4[7] 332; CHECK-NEXT: vmov.u16 r3, q4[5] 333; CHECK-NEXT: vmov q5[3], q5[1], r3, r2 334; CHECK-NEXT: vcmp.i32 ne, q5, zr 335; CHECK-NEXT: vpsel q2, q3, q2 336; CHECK-NEXT: vmov r2, r3, d4 337; CHECK-NEXT: vmov q3[2], q3[0], r2, r3 338; CHECK-NEXT: vmov q3[3], q3[1], r2, r3 339; CHECK-NEXT: vmov.s16 r2, q0[5] 340; CHECK-NEXT: vmov.s16 r3, q0[4] 341; CHECK-NEXT: vcmp.i32 ne, q3, zr 342; CHECK-NEXT: vmov q3[2], q3[0], r3, r2 343; CHECK-NEXT: asrs r2, r2, #31 344; CHECK-NEXT: asrs r3, r3, #31 345; CHECK-NEXT: vmov q3[3], q3[1], r3, r2 346; CHECK-NEXT: vpsel q3, q3, q1 347; CHECK-NEXT: vmov r2, r3, d6 348; CHECK-NEXT: adds r0, r0, r2 349; CHECK-NEXT: adcs r1, r3 350; CHECK-NEXT: vmov r2, r3, d7 351; CHECK-NEXT: adds r0, r0, r2 352; CHECK-NEXT: adcs r1, r3 353; CHECK-NEXT: vmov r2, r3, d5 354; CHECK-NEXT: vmov q2[2], q2[0], r2, r3 355; CHECK-NEXT: vmov q2[3], q2[1], r2, r3 356; CHECK-NEXT: vmov.s16 r2, q0[7] 357; CHECK-NEXT: vmov.s16 r3, q0[6] 358; CHECK-NEXT: vcmp.i32 ne, q2, zr 359; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 360; CHECK-NEXT: asrs r2, r2, #31 361; CHECK-NEXT: asrs r3, r3, #31 362; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 363; CHECK-NEXT: vpsel q0, q0, q1 364; CHECK-NEXT: vmov r2, r3, d0 365; CHECK-NEXT: adds r0, r0, r2 366; CHECK-NEXT: adcs r1, r3 367; CHECK-NEXT: vmov r2, r3, d1 368; CHECK-NEXT: adds r0, r0, r2 369; CHECK-NEXT: adcs r1, r3 370; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} 371; CHECK-NEXT: bx lr 372entry: 373 %c = icmp eq <8 x i16> %b, zeroinitializer 374 %xx = sext <8 x i16> %x to <8 x i64> 375 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer 376 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) 377 ret i64 %z 378} 379 380define arm_aapcs_vfpcc i64 @add_v4i16_v4i64_zext(<4 x i16> %x, <4 x i16> %b) { 381; CHECK-LABEL: add_v4i16_v4i64_zext: 382; CHECK: @ %bb.0: @ %entry 383; CHECK-NEXT: vmovlb.u16 q1, q1 384; CHECK-NEXT: vmovlb.u16 q0, q0 385; CHECK-NEXT: vpt.i32 eq, q1, zr 386; CHECK-NEXT: vaddlvt.u32 r0, r1, q0 387; CHECK-NEXT: bx lr 388entry: 389 %c = icmp eq <4 x i16> %b, zeroinitializer 390 %xx = zext <4 x i16> %x to <4 x i64> 391 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer 392 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s) 393 ret i64 %z 394} 395 396define arm_aapcs_vfpcc i64 @add_v4i16_v4i64_sext(<4 x i16> %x, <4 x i16> %b) { 397; CHECK-LABEL: add_v4i16_v4i64_sext: 398; CHECK: @ %bb.0: @ %entry 399; CHECK-NEXT: vmovlb.u16 q1, q1 400; CHECK-NEXT: vmovlb.s16 q0, q0 401; CHECK-NEXT: vpt.i32 eq, q1, zr 402; CHECK-NEXT: vaddlvt.s32 r0, r1, q0 403; CHECK-NEXT: bx lr 404entry: 405 %c = icmp eq <4 x i16> %b, zeroinitializer 406 %xx = sext <4 x i16> %x to <4 x i64> 407 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer 408 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s) 409 ret i64 %z 410} 411 412define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %b) { 413; CHECK-LABEL: add_v2i16_v2i64_zext: 414; CHECK: @ %bb.0: @ %entry 415; CHECK-NEXT: vmov.i64 q2, #0xffff 416; CHECK-NEXT: movs r1, #0 417; CHECK-NEXT: vand q1, q1, q2 418; CHECK-NEXT: vand q0, q0, q2 419; CHECK-NEXT: vmov r0, s4 420; CHECK-NEXT: cmp r0, #0 421; CHECK-NEXT: csetm r0, eq 422; CHECK-NEXT: bfi r1, r0, #0, #8 423; CHECK-NEXT: vmov r0, s6 424; CHECK-NEXT: vmov.i32 q1, #0x0 425; CHECK-NEXT: cmp r0, #0 426; CHECK-NEXT: csetm r0, eq 427; CHECK-NEXT: bfi r1, r0, #8, #8 428; CHECK-NEXT: vmsr p0, r1 429; CHECK-NEXT: vpsel q0, q0, q1 430; CHECK-NEXT: vmov r0, r1, d1 431; CHECK-NEXT: vmov r2, r3, d0 432; CHECK-NEXT: add r0, r2 433; CHECK-NEXT: orrs r1, r3 434; CHECK-NEXT: bx lr 435entry: 436 %c = icmp eq <2 x i16> %b, zeroinitializer 437 %xx = zext <2 x i16> %x to <2 x i64> 438 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer 439 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) 440 ret i64 %z 441} 442 443define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_sext(<2 x i16> %x, <2 x i16> %b) { 444; CHECK-LABEL: add_v2i16_v2i64_sext: 445; CHECK: @ %bb.0: @ %entry 446; CHECK-NEXT: vmov.i32 q2, #0xffff 447; CHECK-NEXT: movs r1, #0 448; CHECK-NEXT: vand q1, q1, q2 449; CHECK-NEXT: vmov r0, s4 450; CHECK-NEXT: cmp r0, #0 451; CHECK-NEXT: csetm r0, eq 452; CHECK-NEXT: bfi r1, r0, #0, #8 453; CHECK-NEXT: vmov r0, s6 454; CHECK-NEXT: vmov.i32 q1, #0x0 455; CHECK-NEXT: cmp r0, #0 456; CHECK-NEXT: csetm r0, eq 457; CHECK-NEXT: bfi r1, r0, #8, #8 458; CHECK-NEXT: vmov r0, s2 459; CHECK-NEXT: vmsr p0, r1 460; CHECK-NEXT: vmov r1, s0 461; CHECK-NEXT: sxth r0, r0 462; CHECK-NEXT: sxth r1, r1 463; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 464; CHECK-NEXT: asrs r0, r0, #31 465; CHECK-NEXT: asrs r1, r1, #31 466; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 467; CHECK-NEXT: vpsel q0, q0, q1 468; CHECK-NEXT: vmov r0, r1, d1 469; CHECK-NEXT: vmov r2, r3, d0 470; CHECK-NEXT: adds r0, r0, r2 471; CHECK-NEXT: adcs r1, r3 472; CHECK-NEXT: bx lr 473entry: 474 %c = icmp eq <2 x i16> %b, zeroinitializer 475 %xx = sext <2 x i16> %x to <2 x i64> 476 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer 477 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) 478 ret i64 %z 479} 480 481define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_zext(<16 x i8> %x, <16 x i8> %b) { 482; CHECK-LABEL: add_v16i8_v16i32_zext: 483; CHECK: @ %bb.0: @ %entry 484; CHECK-NEXT: vpt.i8 eq, q1, zr 485; CHECK-NEXT: vaddvt.u8 r0, q0 486; CHECK-NEXT: bx lr 487entry: 488 %c = icmp eq <16 x i8> %b, zeroinitializer 489 %xx = zext <16 x i8> %x to <16 x i32> 490 %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer 491 %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) 492 ret i32 %z 493} 494 495define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_sext(<16 x i8> %x, <16 x i8> %b) { 496; CHECK-LABEL: add_v16i8_v16i32_sext: 497; CHECK: @ %bb.0: @ %entry 498; CHECK-NEXT: vpt.i8 eq, q1, zr 499; CHECK-NEXT: vaddvt.s8 r0, q0 500; CHECK-NEXT: bx lr 501entry: 502 %c = icmp eq <16 x i8> %b, zeroinitializer 503 %xx = sext <16 x i8> %x to <16 x i32> 504 %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer 505 %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) 506 ret i32 %z 507} 508 509define arm_aapcs_vfpcc i32 @add_v8i8_v8i32_zext(<8 x i8> %x, <8 x i8> %b) { 510; CHECK-LABEL: add_v8i8_v8i32_zext: 511; CHECK: @ %bb.0: @ %entry 512; CHECK-NEXT: vmovlb.u8 q1, q1 513; CHECK-NEXT: vmovlb.u8 q0, q0 514; CHECK-NEXT: vpt.i16 eq, q1, zr 515; CHECK-NEXT: vaddvt.u16 r0, q0 516; CHECK-NEXT: bx lr 517entry: 518 %c = icmp eq <8 x i8> %b, zeroinitializer 519 %xx = zext <8 x i8> %x to <8 x i32> 520 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer 521 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s) 522 ret i32 %z 523} 524 525define arm_aapcs_vfpcc i32 @add_v8i8_v8i32_sext(<8 x i8> %x, <8 x i8> %b) { 526; CHECK-LABEL: add_v8i8_v8i32_sext: 527; CHECK: @ %bb.0: @ %entry 528; CHECK-NEXT: vmovlb.u8 q1, q1 529; CHECK-NEXT: vmovlb.s8 q0, q0 530; CHECK-NEXT: vpt.i16 eq, q1, zr 531; CHECK-NEXT: vaddvt.s16 r0, q0 532; CHECK-NEXT: bx lr 533entry: 534 %c = icmp eq <8 x i8> %b, zeroinitializer 535 %xx = sext <8 x i8> %x to <8 x i32> 536 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer 537 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s) 538 ret i32 %z 539} 540 541define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_zext(<4 x i8> %x, <4 x i8> %b) { 542; CHECK-LABEL: add_v4i8_v4i32_zext: 543; CHECK: @ %bb.0: @ %entry 544; CHECK-NEXT: vmov.i32 q2, #0xff 545; CHECK-NEXT: vand q1, q1, q2 546; CHECK-NEXT: vand q0, q0, q2 547; CHECK-NEXT: vpt.i32 eq, q1, zr 548; CHECK-NEXT: vaddvt.u32 r0, q0 549; CHECK-NEXT: bx lr 550entry: 551 %c = icmp eq <4 x i8> %b, zeroinitializer 552 %xx = zext <4 x i8> %x to <4 x i32> 553 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer 554 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) 555 ret i32 %z 556} 557 558define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_sext(<4 x i8> %x, <4 x i8> %b) { 559; CHECK-LABEL: add_v4i8_v4i32_sext: 560; CHECK: @ %bb.0: @ %entry 561; CHECK-NEXT: vmov.i32 q2, #0xff 562; CHECK-NEXT: vmovlb.s8 q0, q0 563; CHECK-NEXT: vand q1, q1, q2 564; CHECK-NEXT: vmovlb.s16 q0, q0 565; CHECK-NEXT: vpt.i32 eq, q1, zr 566; CHECK-NEXT: vaddvt.u32 r0, q0 567; CHECK-NEXT: bx lr 568entry: 569 %c = icmp eq <4 x i8> %b, zeroinitializer 570 %xx = sext <4 x i8> %x to <4 x i32> 571 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer 572 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) 573 ret i32 %z 574} 575 576define arm_aapcs_vfpcc zeroext i16 @add_v16i8_v16i16_zext(<16 x i8> %x, <16 x i8> %b) { 577; CHECK-LABEL: add_v16i8_v16i16_zext: 578; CHECK: @ %bb.0: @ %entry 579; CHECK-NEXT: vpt.i8 eq, q1, zr 580; CHECK-NEXT: vaddvt.u8 r0, q0 581; CHECK-NEXT: uxth r0, r0 582; CHECK-NEXT: bx lr 583entry: 584 %c = icmp eq <16 x i8> %b, zeroinitializer 585 %xx = zext <16 x i8> %x to <16 x i16> 586 %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer 587 %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s) 588 ret i16 %z 589} 590 591define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_sext(<16 x i8> %x, <16 x i8> %b) { 592; CHECK-LABEL: add_v16i8_v16i16_sext: 593; CHECK: @ %bb.0: @ %entry 594; CHECK-NEXT: vpt.i8 eq, q1, zr 595; CHECK-NEXT: vaddvt.s8 r0, q0 596; CHECK-NEXT: sxth r0, r0 597; CHECK-NEXT: bx lr 598entry: 599 %c = icmp eq <16 x i8> %b, zeroinitializer 600 %xx = sext <16 x i8> %x to <16 x i16> 601 %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer 602 %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s) 603 ret i16 %z 604} 605 606define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_zext(<8 x i8> %x, <8 x i8> %b) { 607; CHECK-LABEL: add_v8i8_v8i16_zext: 608; CHECK: @ %bb.0: @ %entry 609; CHECK-NEXT: vmovlb.u8 q1, q1 610; CHECK-NEXT: vmovlb.u8 q0, q0 611; CHECK-NEXT: vpt.i16 eq, q1, zr 612; CHECK-NEXT: vaddvt.u16 r0, q0 613; CHECK-NEXT: uxth r0, r0 614; CHECK-NEXT: bx lr 615entry: 616 %c = icmp eq <8 x i8> %b, zeroinitializer 617 %xx = zext <8 x i8> %x to <8 x i16> 618 %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer 619 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) 620 ret i16 %z 621} 622 623define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_sext(<8 x i8> %x, <8 x i8> %b) { 624; CHECK-LABEL: add_v8i8_v8i16_sext: 625; CHECK: @ %bb.0: @ %entry 626; CHECK-NEXT: vmovlb.u8 q1, q1 627; CHECK-NEXT: vmovlb.s8 q0, q0 628; CHECK-NEXT: vpt.i16 eq, q1, zr 629; CHECK-NEXT: vaddvt.u16 r0, q0 630; CHECK-NEXT: sxth r0, r0 631; CHECK-NEXT: bx lr 632entry: 633 %c = icmp eq <8 x i8> %b, zeroinitializer 634 %xx = sext <8 x i8> %x to <8 x i16> 635 %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer 636 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) 637 ret i16 %z 638} 639 640define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8(<16 x i8> %x, <16 x i8> %b) { 641; CHECK-LABEL: add_v16i8_v16i8: 642; CHECK: @ %bb.0: @ %entry 643; CHECK-NEXT: vpt.i8 eq, q1, zr 644; CHECK-NEXT: vaddvt.u8 r0, q0 645; CHECK-NEXT: uxtb r0, r0 646; CHECK-NEXT: bx lr 647entry: 648 %c = icmp eq <16 x i8> %b, zeroinitializer 649 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer 650 %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %s) 651 ret i8 %z 652} 653 654define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %b) { 655; CHECK-LABEL: add_v16i8_v16i64_zext: 656; CHECK: @ %bb.0: @ %entry 657; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} 658; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} 659; CHECK-NEXT: .pad #16 660; CHECK-NEXT: sub sp, #16 661; CHECK-NEXT: vmov q2, q0 662; CHECK-NEXT: vcmp.i8 eq, q1, zr 663; CHECK-NEXT: vmov.i8 q0, #0x0 664; CHECK-NEXT: vmov.i8 q1, #0xff 665; CHECK-NEXT: vpsel q5, q1, q0 666; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill 667; CHECK-NEXT: vmov.u8 r0, q5[0] 668; CHECK-NEXT: vmov.16 q3[0], r0 669; CHECK-NEXT: vmov.u8 r0, q5[1] 670; CHECK-NEXT: vmov.16 q3[1], r0 671; CHECK-NEXT: vmov.u8 r0, q5[2] 672; CHECK-NEXT: vmov.16 q3[2], r0 673; CHECK-NEXT: vmov.u8 r0, q5[3] 674; CHECK-NEXT: vmov.16 q3[3], r0 675; CHECK-NEXT: vmov.u8 r0, q5[4] 676; CHECK-NEXT: vmov.16 q3[4], r0 677; CHECK-NEXT: vmov.u8 r0, q5[5] 678; CHECK-NEXT: vmov.16 q3[5], r0 679; CHECK-NEXT: vmov.u8 r0, q5[6] 680; CHECK-NEXT: vmov.16 q3[6], r0 681; CHECK-NEXT: vmov.u8 r0, q5[7] 682; CHECK-NEXT: vmov.16 q3[7], r0 683; CHECK-NEXT: vcmp.i16 ne, q3, zr 684; CHECK-NEXT: vpsel q6, q1, q0 685; CHECK-NEXT: vmov.u16 r0, q6[2] 686; CHECK-NEXT: vmov.u16 r1, q6[0] 687; CHECK-NEXT: vmov q3[2], q3[0], r1, r0 688; CHECK-NEXT: vmov.u16 r0, q6[3] 689; CHECK-NEXT: vmov.u16 r1, q6[1] 690; CHECK-NEXT: vmov q3[3], q3[1], r1, r0 691; CHECK-NEXT: vcmp.i32 ne, q3, zr 692; CHECK-NEXT: vpsel q7, q1, q0 693; CHECK-NEXT: vmov r0, r1, d14 694; CHECK-NEXT: vmov q3[2], q3[0], r0, r1 695; CHECK-NEXT: vmov q3[3], q3[1], r0, r1 696; CHECK-NEXT: vmov.u8 r0, q2[1] 697; CHECK-NEXT: vmov.u8 r1, q2[0] 698; CHECK-NEXT: vcmp.i32 ne, q3, zr 699; CHECK-NEXT: vmov q4[2], q4[0], r1, r0 700; CHECK-NEXT: vmov.i64 q3, #0xff 701; CHECK-NEXT: vand q0, q4, q3 702; CHECK-NEXT: vmov.i32 q4, #0x0 703; CHECK-NEXT: vpsel q0, q0, q4 704; CHECK-NEXT: vmov r0, r1, d1 705; CHECK-NEXT: vmov r2, r3, d0 706; CHECK-NEXT: orrs r1, r3 707; CHECK-NEXT: add r0, r2 708; CHECK-NEXT: vmov r2, r3, d15 709; CHECK-NEXT: vldrw.u32 q7, [sp] @ 16-byte Reload 710; CHECK-NEXT: vmov q0[2], q0[0], r2, r3 711; CHECK-NEXT: vmov q0[3], q0[1], r2, r3 712; CHECK-NEXT: vmov.u8 r2, q2[3] 713; CHECK-NEXT: vmov.u8 r3, q2[2] 714; CHECK-NEXT: vcmp.i32 ne, q0, zr 715; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 716; CHECK-NEXT: vand q0, q0, q3 717; CHECK-NEXT: vpsel q0, q0, q4 718; CHECK-NEXT: vmov r2, r3, d0 719; CHECK-NEXT: adds r0, r0, r2 720; CHECK-NEXT: adcs r1, r3 721; CHECK-NEXT: vmov r2, r3, d1 722; CHECK-NEXT: adds r0, r0, r2 723; CHECK-NEXT: vmov.u16 r2, q6[6] 724; CHECK-NEXT: adcs r1, r3 725; CHECK-NEXT: vmov.u16 r3, q6[4] 726; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 727; CHECK-NEXT: vmov.u16 r2, q6[7] 728; CHECK-NEXT: vmov.u16 r3, q6[5] 729; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 730; CHECK-NEXT: vcmp.i32 ne, q0, zr 731; CHECK-NEXT: vpsel q6, q1, q7 732; CHECK-NEXT: vmov r2, r3, d12 733; CHECK-NEXT: vmov q0[2], q0[0], r2, r3 734; CHECK-NEXT: vmov q0[3], q0[1], r2, r3 735; CHECK-NEXT: vmov.u8 r2, q2[5] 736; CHECK-NEXT: vmov.u8 r3, q2[4] 737; CHECK-NEXT: vcmp.i32 ne, q0, zr 738; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 739; CHECK-NEXT: vand q0, q0, q3 740; CHECK-NEXT: vpsel q0, q0, q4 741; CHECK-NEXT: vmov r2, r3, d0 742; CHECK-NEXT: adds r0, r0, r2 743; CHECK-NEXT: adcs r1, r3 744; CHECK-NEXT: vmov r2, r3, d1 745; CHECK-NEXT: adds r0, r0, r2 746; CHECK-NEXT: adcs r1, r3 747; CHECK-NEXT: vmov r2, r3, d13 748; CHECK-NEXT: vmov q0[2], q0[0], r2, r3 749; CHECK-NEXT: vmov q0[3], q0[1], r2, r3 750; CHECK-NEXT: vmov.u8 r2, q2[7] 751; CHECK-NEXT: vmov.u8 r3, q2[6] 752; CHECK-NEXT: vcmp.i32 ne, q0, zr 753; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 754; CHECK-NEXT: vand q0, q0, q3 755; CHECK-NEXT: vpsel q0, q0, q4 756; CHECK-NEXT: vmov r2, r3, d0 757; CHECK-NEXT: adds r0, r0, r2 758; CHECK-NEXT: adcs r1, r3 759; CHECK-NEXT: vmov r2, r3, d1 760; CHECK-NEXT: adds r0, r0, r2 761; CHECK-NEXT: vmov.u8 r2, q5[8] 762; CHECK-NEXT: vmov.16 q6[0], r2 763; CHECK-NEXT: vmov.u8 r2, q5[9] 764; CHECK-NEXT: vmov.16 q6[1], r2 765; CHECK-NEXT: vmov.u8 r2, q5[10] 766; CHECK-NEXT: vmov.16 q6[2], r2 767; CHECK-NEXT: vmov.u8 r2, q5[11] 768; CHECK-NEXT: vmov.16 q6[3], r2 769; CHECK-NEXT: vmov.u8 r2, q5[12] 770; CHECK-NEXT: vmov.16 q6[4], r2 771; CHECK-NEXT: vmov.u8 r2, q5[13] 772; CHECK-NEXT: vmov.16 q6[5], r2 773; CHECK-NEXT: vmov.u8 r2, q5[14] 774; CHECK-NEXT: vmov.16 q6[6], r2 775; CHECK-NEXT: vmov.u8 r2, q5[15] 776; CHECK-NEXT: vmov.16 q6[7], r2 777; CHECK-NEXT: adcs r1, r3 778; CHECK-NEXT: vcmp.i16 ne, q6, zr 779; CHECK-NEXT: vpsel q5, q1, q7 780; CHECK-NEXT: vmov.u16 r2, q5[2] 781; CHECK-NEXT: vmov.u16 r3, q5[0] 782; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 783; CHECK-NEXT: vmov.u16 r2, q5[3] 784; CHECK-NEXT: vmov.u16 r3, q5[1] 785; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 786; CHECK-NEXT: vcmp.i32 ne, q0, zr 787; CHECK-NEXT: vpsel q6, q1, q7 788; CHECK-NEXT: vmov r2, r3, d12 789; CHECK-NEXT: vmov q0[2], q0[0], r2, r3 790; CHECK-NEXT: vmov q0[3], q0[1], r2, r3 791; CHECK-NEXT: vmov.u8 r2, q2[9] 792; CHECK-NEXT: vmov.u8 r3, q2[8] 793; CHECK-NEXT: vcmp.i32 ne, q0, zr 794; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 795; CHECK-NEXT: vand q0, q0, q3 796; CHECK-NEXT: vpsel q0, q0, q4 797; CHECK-NEXT: vmov r2, r3, d0 798; CHECK-NEXT: adds r0, r0, r2 799; CHECK-NEXT: adcs r1, r3 800; CHECK-NEXT: vmov r2, r3, d1 801; CHECK-NEXT: adds r0, r0, r2 802; CHECK-NEXT: adcs r1, r3 803; CHECK-NEXT: vmov r2, r3, d13 804; CHECK-NEXT: vmov q0[2], q0[0], r2, r3 805; CHECK-NEXT: vmov q0[3], q0[1], r2, r3 806; CHECK-NEXT: vmov.u8 r2, q2[11] 807; CHECK-NEXT: vmov.u8 r3, q2[10] 808; CHECK-NEXT: vcmp.i32 ne, q0, zr 809; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 810; CHECK-NEXT: vand q0, q0, q3 811; CHECK-NEXT: vpsel q0, q0, q4 812; CHECK-NEXT: vmov r2, r3, d0 813; CHECK-NEXT: adds r0, r0, r2 814; CHECK-NEXT: adcs r1, r3 815; CHECK-NEXT: vmov r2, r3, d1 816; CHECK-NEXT: adds r0, r0, r2 817; CHECK-NEXT: vmov.u16 r2, q5[6] 818; CHECK-NEXT: adcs r1, r3 819; CHECK-NEXT: vmov.u16 r3, q5[4] 820; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 821; CHECK-NEXT: vmov.u16 r2, q5[7] 822; CHECK-NEXT: vmov.u16 r3, q5[5] 823; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 824; CHECK-NEXT: vcmp.i32 ne, q0, zr 825; CHECK-NEXT: vpsel q1, q1, q7 826; CHECK-NEXT: vmov r2, r3, d2 827; CHECK-NEXT: vmov q0[2], q0[0], r2, r3 828; CHECK-NEXT: vmov q0[3], q0[1], r2, r3 829; CHECK-NEXT: vmov.u8 r2, q2[13] 830; CHECK-NEXT: vmov.u8 r3, q2[12] 831; CHECK-NEXT: vcmp.i32 ne, q0, zr 832; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 833; CHECK-NEXT: vand q0, q0, q3 834; CHECK-NEXT: vpsel q0, q0, q4 835; CHECK-NEXT: vmov r2, r3, d0 836; CHECK-NEXT: adds r0, r0, r2 837; CHECK-NEXT: adcs r1, r3 838; CHECK-NEXT: vmov r2, r3, d1 839; CHECK-NEXT: adds r0, r0, r2 840; CHECK-NEXT: adcs r1, r3 841; CHECK-NEXT: vmov r2, r3, d3 842; CHECK-NEXT: vmov q0[2], q0[0], r2, r3 843; CHECK-NEXT: vmov q0[3], q0[1], r2, r3 844; CHECK-NEXT: vmov.u8 r2, q2[15] 845; CHECK-NEXT: vmov.u8 r3, q2[14] 846; CHECK-NEXT: vcmp.i32 ne, q0, zr 847; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 848; CHECK-NEXT: vand q0, q0, q3 849; CHECK-NEXT: vpsel q0, q0, q4 850; CHECK-NEXT: vmov r2, r3, d0 851; CHECK-NEXT: adds r0, r0, r2 852; CHECK-NEXT: adcs r1, r3 853; CHECK-NEXT: vmov r2, r3, d1 854; CHECK-NEXT: adds r0, r0, r2 855; CHECK-NEXT: adcs r1, r3 856; CHECK-NEXT: add sp, #16 857; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} 858; CHECK-NEXT: bx lr 859entry: 860 %c = icmp eq <16 x i8> %b, zeroinitializer 861 %xx = zext <16 x i8> %x to <16 x i64> 862 %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer 863 %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s) 864 ret i64 %z 865} 866 867define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %b) { 868; CHECK-LABEL: add_v16i8_v16i64_sext: 869; CHECK: @ %bb.0: @ %entry 870; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} 871; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} 872; CHECK-NEXT: vcmp.i8 eq, q1, zr 873; CHECK-NEXT: vmov.i8 q1, #0x0 874; CHECK-NEXT: vmov.i8 q2, #0xff 875; CHECK-NEXT: vpsel q4, q2, q1 876; CHECK-NEXT: vmov.u8 r0, q4[0] 877; CHECK-NEXT: vmov.16 q3[0], r0 878; CHECK-NEXT: vmov.u8 r0, q4[1] 879; CHECK-NEXT: vmov.16 q3[1], r0 880; CHECK-NEXT: vmov.u8 r0, q4[2] 881; CHECK-NEXT: vmov.16 q3[2], r0 882; CHECK-NEXT: vmov.u8 r0, q4[3] 883; CHECK-NEXT: vmov.16 q3[3], r0 884; CHECK-NEXT: vmov.u8 r0, q4[4] 885; CHECK-NEXT: vmov.16 q3[4], r0 886; CHECK-NEXT: vmov.u8 r0, q4[5] 887; CHECK-NEXT: vmov.16 q3[5], r0 888; CHECK-NEXT: vmov.u8 r0, q4[6] 889; CHECK-NEXT: vmov.16 q3[6], r0 890; CHECK-NEXT: vmov.u8 r0, q4[7] 891; CHECK-NEXT: vmov.16 q3[7], r0 892; CHECK-NEXT: vcmp.i16 ne, q3, zr 893; CHECK-NEXT: vpsel q5, q2, q1 894; CHECK-NEXT: vmov.u16 r0, q5[2] 895; CHECK-NEXT: vmov.u16 r1, q5[0] 896; CHECK-NEXT: vmov q3[2], q3[0], r1, r0 897; CHECK-NEXT: vmov.u16 r0, q5[3] 898; CHECK-NEXT: vmov.u16 r1, q5[1] 899; CHECK-NEXT: vmov q3[3], q3[1], r1, r0 900; CHECK-NEXT: vcmp.i32 ne, q3, zr 901; CHECK-NEXT: vpsel q6, q2, q1 902; CHECK-NEXT: vmov r0, r1, d12 903; CHECK-NEXT: vmov q3[2], q3[0], r0, r1 904; CHECK-NEXT: vmov q3[3], q3[1], r0, r1 905; CHECK-NEXT: vmov.s8 r0, q0[1] 906; CHECK-NEXT: vmov.s8 r1, q0[0] 907; CHECK-NEXT: vcmp.i32 ne, q3, zr 908; CHECK-NEXT: vmov q7[2], q7[0], r1, r0 909; CHECK-NEXT: asrs r0, r0, #31 910; CHECK-NEXT: asrs r1, r1, #31 911; CHECK-NEXT: vmov.i32 q3, #0x0 912; CHECK-NEXT: vmov q7[3], q7[1], r1, r0 913; CHECK-NEXT: vpsel q7, q7, q3 914; CHECK-NEXT: vmov r0, r1, d15 915; CHECK-NEXT: vmov r2, r3, d14 916; CHECK-NEXT: adds r0, r0, r2 917; CHECK-NEXT: adcs r1, r3 918; CHECK-NEXT: vmov r2, r3, d13 919; CHECK-NEXT: vmov q6[2], q6[0], r2, r3 920; CHECK-NEXT: vmov q6[3], q6[1], r2, r3 921; CHECK-NEXT: vmov.s8 r2, q0[3] 922; CHECK-NEXT: vmov.s8 r3, q0[2] 923; CHECK-NEXT: vcmp.i32 ne, q6, zr 924; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 925; CHECK-NEXT: asrs r2, r2, #31 926; CHECK-NEXT: asrs r3, r3, #31 927; CHECK-NEXT: vmov q6[3], q6[1], r3, r2 928; CHECK-NEXT: vpsel q6, q6, q3 929; CHECK-NEXT: vmov r2, r3, d12 930; CHECK-NEXT: adds r0, r0, r2 931; CHECK-NEXT: adcs r1, r3 932; CHECK-NEXT: vmov r2, r3, d13 933; CHECK-NEXT: adds r0, r0, r2 934; CHECK-NEXT: vmov.u16 r2, q5[6] 935; CHECK-NEXT: adcs r1, r3 936; CHECK-NEXT: vmov.u16 r3, q5[4] 937; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 938; CHECK-NEXT: vmov.u16 r2, q5[7] 939; CHECK-NEXT: vmov.u16 r3, q5[5] 940; CHECK-NEXT: vmov q6[3], q6[1], r3, r2 941; CHECK-NEXT: vcmp.i32 ne, q6, zr 942; CHECK-NEXT: vpsel q5, q2, q1 943; CHECK-NEXT: vmov r2, r3, d10 944; CHECK-NEXT: vmov q6[2], q6[0], r2, r3 945; CHECK-NEXT: vmov q6[3], q6[1], r2, r3 946; CHECK-NEXT: vmov.s8 r2, q0[5] 947; CHECK-NEXT: vmov.s8 r3, q0[4] 948; CHECK-NEXT: vcmp.i32 ne, q6, zr 949; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 950; CHECK-NEXT: asrs r2, r2, #31 951; CHECK-NEXT: asrs r3, r3, #31 952; CHECK-NEXT: vmov q6[3], q6[1], r3, r2 953; CHECK-NEXT: vpsel q6, q6, q3 954; CHECK-NEXT: vmov r2, r3, d12 955; CHECK-NEXT: adds r0, r0, r2 956; CHECK-NEXT: adcs r1, r3 957; CHECK-NEXT: vmov r2, r3, d13 958; CHECK-NEXT: adds r0, r0, r2 959; CHECK-NEXT: adcs r1, r3 960; CHECK-NEXT: vmov r2, r3, d11 961; CHECK-NEXT: vmov q5[2], q5[0], r2, r3 962; CHECK-NEXT: vmov q5[3], q5[1], r2, r3 963; CHECK-NEXT: vmov.s8 r2, q0[7] 964; CHECK-NEXT: vmov.s8 r3, q0[6] 965; CHECK-NEXT: vcmp.i32 ne, q5, zr 966; CHECK-NEXT: vmov q5[2], q5[0], r3, r2 967; CHECK-NEXT: asrs r2, r2, #31 968; CHECK-NEXT: asrs r3, r3, #31 969; CHECK-NEXT: vmov q5[3], q5[1], r3, r2 970; CHECK-NEXT: vpsel q5, q5, q3 971; CHECK-NEXT: vmov r2, r3, d10 972; CHECK-NEXT: adds r0, r0, r2 973; CHECK-NEXT: adcs r1, r3 974; CHECK-NEXT: vmov r2, r3, d11 975; CHECK-NEXT: adds r0, r0, r2 976; CHECK-NEXT: vmov.u8 r2, q4[8] 977; CHECK-NEXT: vmov.16 q5[0], r2 978; CHECK-NEXT: vmov.u8 r2, q4[9] 979; CHECK-NEXT: vmov.16 q5[1], r2 980; CHECK-NEXT: vmov.u8 r2, q4[10] 981; CHECK-NEXT: vmov.16 q5[2], r2 982; CHECK-NEXT: vmov.u8 r2, q4[11] 983; CHECK-NEXT: vmov.16 q5[3], r2 984; CHECK-NEXT: vmov.u8 r2, q4[12] 985; CHECK-NEXT: vmov.16 q5[4], r2 986; CHECK-NEXT: vmov.u8 r2, q4[13] 987; CHECK-NEXT: vmov.16 q5[5], r2 988; CHECK-NEXT: vmov.u8 r2, q4[14] 989; CHECK-NEXT: vmov.16 q5[6], r2 990; CHECK-NEXT: vmov.u8 r2, q4[15] 991; CHECK-NEXT: vmov.16 q5[7], r2 992; CHECK-NEXT: adcs r1, r3 993; CHECK-NEXT: vcmp.i16 ne, q5, zr 994; CHECK-NEXT: vpsel q4, q2, q1 995; CHECK-NEXT: vmov.u16 r2, q4[2] 996; CHECK-NEXT: vmov.u16 r3, q4[0] 997; CHECK-NEXT: vmov q5[2], q5[0], r3, r2 998; CHECK-NEXT: vmov.u16 r2, q4[3] 999; CHECK-NEXT: vmov.u16 r3, q4[1] 1000; CHECK-NEXT: vmov q5[3], q5[1], r3, r2 1001; CHECK-NEXT: vcmp.i32 ne, q5, zr 1002; CHECK-NEXT: vpsel q5, q2, q1 1003; CHECK-NEXT: vmov r2, r3, d10 1004; CHECK-NEXT: vmov q6[2], q6[0], r2, r3 1005; CHECK-NEXT: vmov q6[3], q6[1], r2, r3 1006; CHECK-NEXT: vmov.s8 r2, q0[9] 1007; CHECK-NEXT: vmov.s8 r3, q0[8] 1008; CHECK-NEXT: vcmp.i32 ne, q6, zr 1009; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 1010; CHECK-NEXT: asrs r2, r2, #31 1011; CHECK-NEXT: asrs r3, r3, #31 1012; CHECK-NEXT: vmov q6[3], q6[1], r3, r2 1013; CHECK-NEXT: vpsel q6, q6, q3 1014; CHECK-NEXT: vmov r2, r3, d12 1015; CHECK-NEXT: adds r0, r0, r2 1016; CHECK-NEXT: adcs r1, r3 1017; CHECK-NEXT: vmov r2, r3, d13 1018; CHECK-NEXT: adds r0, r0, r2 1019; CHECK-NEXT: adcs r1, r3 1020; CHECK-NEXT: vmov r2, r3, d11 1021; CHECK-NEXT: vmov q5[2], q5[0], r2, r3 1022; CHECK-NEXT: vmov q5[3], q5[1], r2, r3 1023; CHECK-NEXT: vmov.s8 r2, q0[11] 1024; CHECK-NEXT: vmov.s8 r3, q0[10] 1025; CHECK-NEXT: vcmp.i32 ne, q5, zr 1026; CHECK-NEXT: vmov q5[2], q5[0], r3, r2 1027; CHECK-NEXT: asrs r2, r2, #31 1028; CHECK-NEXT: asrs r3, r3, #31 1029; CHECK-NEXT: vmov q5[3], q5[1], r3, r2 1030; CHECK-NEXT: vpsel q5, q5, q3 1031; CHECK-NEXT: vmov r2, r3, d10 1032; CHECK-NEXT: adds r0, r0, r2 1033; CHECK-NEXT: adcs r1, r3 1034; CHECK-NEXT: vmov r2, r3, d11 1035; CHECK-NEXT: adds r0, r0, r2 1036; CHECK-NEXT: vmov.u16 r2, q4[6] 1037; CHECK-NEXT: adcs r1, r3 1038; CHECK-NEXT: vmov.u16 r3, q4[4] 1039; CHECK-NEXT: vmov q5[2], q5[0], r3, r2 1040; CHECK-NEXT: vmov.u16 r2, q4[7] 1041; CHECK-NEXT: vmov.u16 r3, q4[5] 1042; CHECK-NEXT: vmov q5[3], q5[1], r3, r2 1043; CHECK-NEXT: vcmp.i32 ne, q5, zr 1044; CHECK-NEXT: vpsel q1, q2, q1 1045; CHECK-NEXT: vmov r2, r3, d2 1046; CHECK-NEXT: vmov q2[2], q2[0], r2, r3 1047; CHECK-NEXT: vmov q2[3], q2[1], r2, r3 1048; CHECK-NEXT: vmov.s8 r2, q0[13] 1049; CHECK-NEXT: vmov.s8 r3, q0[12] 1050; CHECK-NEXT: vcmp.i32 ne, q2, zr 1051; CHECK-NEXT: vmov q2[2], q2[0], r3, r2 1052; CHECK-NEXT: asrs r2, r2, #31 1053; CHECK-NEXT: asrs r3, r3, #31 1054; CHECK-NEXT: vmov q2[3], q2[1], r3, r2 1055; CHECK-NEXT: vpsel q2, q2, q3 1056; CHECK-NEXT: vmov r2, r3, d4 1057; CHECK-NEXT: adds r0, r0, r2 1058; CHECK-NEXT: adcs r1, r3 1059; CHECK-NEXT: vmov r2, r3, d5 1060; CHECK-NEXT: adds r0, r0, r2 1061; CHECK-NEXT: adcs r1, r3 1062; CHECK-NEXT: vmov r2, r3, d3 1063; CHECK-NEXT: vmov q1[2], q1[0], r2, r3 1064; CHECK-NEXT: vmov q1[3], q1[1], r2, r3 1065; CHECK-NEXT: vmov.s8 r2, q0[15] 1066; CHECK-NEXT: vmov.s8 r3, q0[14] 1067; CHECK-NEXT: vcmp.i32 ne, q1, zr 1068; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 1069; CHECK-NEXT: asrs r2, r2, #31 1070; CHECK-NEXT: asrs r3, r3, #31 1071; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 1072; CHECK-NEXT: vpsel q0, q0, q3 1073; CHECK-NEXT: vmov r2, r3, d0 1074; CHECK-NEXT: adds r0, r0, r2 1075; CHECK-NEXT: adcs r1, r3 1076; CHECK-NEXT: vmov r2, r3, d1 1077; CHECK-NEXT: adds r0, r0, r2 1078; CHECK-NEXT: adcs r1, r3 1079; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} 1080; CHECK-NEXT: bx lr 1081entry: 1082 %c = icmp eq <16 x i8> %b, zeroinitializer 1083 %xx = sext <16 x i8> %x to <16 x i64> 1084 %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer 1085 %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s) 1086 ret i64 %z 1087} 1088 1089define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_zext(<8 x i8> %x, <8 x i8> %b) { 1090; CHECK-LABEL: add_v8i8_v8i64_zext: 1091; CHECK: @ %bb.0: @ %entry 1092; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} 1093; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} 1094; CHECK-NEXT: vmovlb.u8 q1, q1 1095; CHECK-NEXT: vmov.i8 q3, #0x0 1096; CHECK-NEXT: vcmp.i16 eq, q1, zr 1097; CHECK-NEXT: vmov.i8 q4, #0xff 1098; CHECK-NEXT: vpsel q5, q4, q3 1099; CHECK-NEXT: vmovlb.u8 q0, q0 1100; CHECK-NEXT: vmov.u16 r0, q5[2] 1101; CHECK-NEXT: vmov.u16 r1, q5[0] 1102; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 1103; CHECK-NEXT: vmov.u16 r0, q5[3] 1104; CHECK-NEXT: vmov.u16 r1, q5[1] 1105; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 1106; CHECK-NEXT: vcmp.i32 ne, q1, zr 1107; CHECK-NEXT: vpsel q6, q4, q3 1108; CHECK-NEXT: vmov r0, r1, d12 1109; CHECK-NEXT: vmov q1[2], q1[0], r0, r1 1110; CHECK-NEXT: vmov q1[3], q1[1], r0, r1 1111; CHECK-NEXT: vmov.u16 r0, q0[1] 1112; CHECK-NEXT: vmov.u16 r1, q0[0] 1113; CHECK-NEXT: vcmp.i32 ne, q1, zr 1114; CHECK-NEXT: vmov q2[2], q2[0], r1, r0 1115; CHECK-NEXT: vmov.i64 q1, #0xffff 1116; CHECK-NEXT: vand q7, q2, q1 1117; CHECK-NEXT: vmov.i32 q2, #0x0 1118; CHECK-NEXT: vpsel q7, q7, q2 1119; CHECK-NEXT: vmov r0, r1, d15 1120; CHECK-NEXT: vmov r2, r3, d14 1121; CHECK-NEXT: orrs r1, r3 1122; CHECK-NEXT: add r0, r2 1123; CHECK-NEXT: vmov r2, r3, d13 1124; CHECK-NEXT: vmov q6[2], q6[0], r2, r3 1125; CHECK-NEXT: vmov q6[3], q6[1], r2, r3 1126; CHECK-NEXT: vmov.u16 r2, q0[3] 1127; CHECK-NEXT: vmov.u16 r3, q0[2] 1128; CHECK-NEXT: vcmp.i32 ne, q6, zr 1129; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 1130; CHECK-NEXT: vand q6, q6, q1 1131; CHECK-NEXT: vpsel q6, q6, q2 1132; CHECK-NEXT: vmov r2, r3, d12 1133; CHECK-NEXT: adds r0, r0, r2 1134; CHECK-NEXT: adcs r1, r3 1135; CHECK-NEXT: vmov r2, r3, d13 1136; CHECK-NEXT: adds r0, r0, r2 1137; CHECK-NEXT: vmov.u16 r2, q5[6] 1138; CHECK-NEXT: adcs r1, r3 1139; CHECK-NEXT: vmov.u16 r3, q5[4] 1140; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 1141; CHECK-NEXT: vmov.u16 r2, q5[7] 1142; CHECK-NEXT: vmov.u16 r3, q5[5] 1143; CHECK-NEXT: vmov q6[3], q6[1], r3, r2 1144; CHECK-NEXT: vcmp.i32 ne, q6, zr 1145; CHECK-NEXT: vpsel q3, q4, q3 1146; CHECK-NEXT: vmov r2, r3, d6 1147; CHECK-NEXT: vmov q4[2], q4[0], r2, r3 1148; CHECK-NEXT: vmov q4[3], q4[1], r2, r3 1149; CHECK-NEXT: vmov.u16 r2, q0[5] 1150; CHECK-NEXT: vmov.u16 r3, q0[4] 1151; CHECK-NEXT: vcmp.i32 ne, q4, zr 1152; CHECK-NEXT: vmov q4[2], q4[0], r3, r2 1153; CHECK-NEXT: vand q4, q4, q1 1154; CHECK-NEXT: vpsel q4, q4, q2 1155; CHECK-NEXT: vmov r2, r3, d8 1156; CHECK-NEXT: adds r0, r0, r2 1157; CHECK-NEXT: adcs r1, r3 1158; CHECK-NEXT: vmov r2, r3, d9 1159; CHECK-NEXT: adds r0, r0, r2 1160; CHECK-NEXT: adcs r1, r3 1161; CHECK-NEXT: vmov r2, r3, d7 1162; CHECK-NEXT: vmov q3[2], q3[0], r2, r3 1163; CHECK-NEXT: vmov q3[3], q3[1], r2, r3 1164; CHECK-NEXT: vmov.u16 r2, q0[7] 1165; CHECK-NEXT: vmov.u16 r3, q0[6] 1166; CHECK-NEXT: vcmp.i32 ne, q3, zr 1167; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 1168; CHECK-NEXT: vand q0, q0, q1 1169; CHECK-NEXT: vpsel q0, q0, q2 1170; CHECK-NEXT: vmov r2, r3, d0 1171; CHECK-NEXT: adds r0, r0, r2 1172; CHECK-NEXT: adcs r1, r3 1173; CHECK-NEXT: vmov r2, r3, d1 1174; CHECK-NEXT: adds r0, r0, r2 1175; CHECK-NEXT: adcs r1, r3 1176; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} 1177; CHECK-NEXT: bx lr 1178entry: 1179 %c = icmp eq <8 x i8> %b, zeroinitializer 1180 %xx = zext <8 x i8> %x to <8 x i64> 1181 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer 1182 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) 1183 ret i64 %z 1184} 1185 1186define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_sext(<8 x i8> %x, <8 x i8> %b) { 1187; CHECK-LABEL: add_v8i8_v8i64_sext: 1188; CHECK: @ %bb.0: @ %entry 1189; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} 1190; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} 1191; CHECK-NEXT: vmovlb.u8 q1, q1 1192; CHECK-NEXT: vmov.i8 q2, #0x0 1193; CHECK-NEXT: vcmp.i16 eq, q1, zr 1194; CHECK-NEXT: vmov.i8 q3, #0xff 1195; CHECK-NEXT: vpsel q4, q3, q2 1196; CHECK-NEXT: vmov.u16 r0, q4[2] 1197; CHECK-NEXT: vmov.u16 r1, q4[0] 1198; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 1199; CHECK-NEXT: vmov.u16 r0, q4[3] 1200; CHECK-NEXT: vmov.u16 r1, q4[1] 1201; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 1202; CHECK-NEXT: vcmp.i32 ne, q1, zr 1203; CHECK-NEXT: vpsel q5, q3, q2 1204; CHECK-NEXT: vmov r0, r1, d10 1205; CHECK-NEXT: vmov q1[2], q1[0], r0, r1 1206; CHECK-NEXT: vmov q1[3], q1[1], r0, r1 1207; CHECK-NEXT: vmov.u16 r0, q0[1] 1208; CHECK-NEXT: vmov.u16 r1, q0[0] 1209; CHECK-NEXT: sxtb r0, r0 1210; CHECK-NEXT: sxtb r1, r1 1211; CHECK-NEXT: vcmp.i32 ne, q1, zr 1212; CHECK-NEXT: vmov q6[2], q6[0], r1, r0 1213; CHECK-NEXT: asrs r0, r0, #31 1214; CHECK-NEXT: asrs r1, r1, #31 1215; CHECK-NEXT: vmov.i32 q1, #0x0 1216; CHECK-NEXT: vmov q6[3], q6[1], r1, r0 1217; CHECK-NEXT: vpsel q6, q6, q1 1218; CHECK-NEXT: vmov r0, r1, d13 1219; CHECK-NEXT: vmov r2, r3, d12 1220; CHECK-NEXT: adds r0, r0, r2 1221; CHECK-NEXT: adcs r1, r3 1222; CHECK-NEXT: vmov r2, r3, d11 1223; CHECK-NEXT: vmov q5[2], q5[0], r2, r3 1224; CHECK-NEXT: vmov q5[3], q5[1], r2, r3 1225; CHECK-NEXT: vmov.u16 r2, q0[3] 1226; CHECK-NEXT: vmov.u16 r3, q0[2] 1227; CHECK-NEXT: sxtb r2, r2 1228; CHECK-NEXT: sxtb r3, r3 1229; CHECK-NEXT: vcmp.i32 ne, q5, zr 1230; CHECK-NEXT: vmov q5[2], q5[0], r3, r2 1231; CHECK-NEXT: asrs r2, r2, #31 1232; CHECK-NEXT: asrs r3, r3, #31 1233; CHECK-NEXT: vmov q5[3], q5[1], r3, r2 1234; CHECK-NEXT: vpsel q5, q5, q1 1235; CHECK-NEXT: vmov r2, r3, d10 1236; CHECK-NEXT: adds r0, r0, r2 1237; CHECK-NEXT: adcs r1, r3 1238; CHECK-NEXT: vmov r2, r3, d11 1239; CHECK-NEXT: adds r0, r0, r2 1240; CHECK-NEXT: vmov.u16 r2, q4[6] 1241; CHECK-NEXT: adcs r1, r3 1242; CHECK-NEXT: vmov.u16 r3, q4[4] 1243; CHECK-NEXT: vmov q5[2], q5[0], r3, r2 1244; CHECK-NEXT: vmov.u16 r2, q4[7] 1245; CHECK-NEXT: vmov.u16 r3, q4[5] 1246; CHECK-NEXT: vmov q5[3], q5[1], r3, r2 1247; CHECK-NEXT: vcmp.i32 ne, q5, zr 1248; CHECK-NEXT: vpsel q2, q3, q2 1249; CHECK-NEXT: vmov r2, r3, d4 1250; CHECK-NEXT: vmov q3[2], q3[0], r2, r3 1251; CHECK-NEXT: vmov q3[3], q3[1], r2, r3 1252; CHECK-NEXT: vmov.u16 r2, q0[5] 1253; CHECK-NEXT: vmov.u16 r3, q0[4] 1254; CHECK-NEXT: sxtb r2, r2 1255; CHECK-NEXT: sxtb r3, r3 1256; CHECK-NEXT: vcmp.i32 ne, q3, zr 1257; CHECK-NEXT: vmov q3[2], q3[0], r3, r2 1258; CHECK-NEXT: asrs r2, r2, #31 1259; CHECK-NEXT: asrs r3, r3, #31 1260; CHECK-NEXT: vmov q3[3], q3[1], r3, r2 1261; CHECK-NEXT: vpsel q3, q3, q1 1262; CHECK-NEXT: vmov r2, r3, d6 1263; CHECK-NEXT: adds r0, r0, r2 1264; CHECK-NEXT: adcs r1, r3 1265; CHECK-NEXT: vmov r2, r3, d7 1266; CHECK-NEXT: adds r0, r0, r2 1267; CHECK-NEXT: adcs r1, r3 1268; CHECK-NEXT: vmov r2, r3, d5 1269; CHECK-NEXT: vmov q2[2], q2[0], r2, r3 1270; CHECK-NEXT: vmov q2[3], q2[1], r2, r3 1271; CHECK-NEXT: vmov.u16 r2, q0[7] 1272; CHECK-NEXT: vmov.u16 r3, q0[6] 1273; CHECK-NEXT: sxtb r2, r2 1274; CHECK-NEXT: sxtb r3, r3 1275; CHECK-NEXT: vcmp.i32 ne, q2, zr 1276; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 1277; CHECK-NEXT: asrs r2, r2, #31 1278; CHECK-NEXT: asrs r3, r3, #31 1279; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 1280; CHECK-NEXT: vpsel q0, q0, q1 1281; CHECK-NEXT: vmov r2, r3, d0 1282; CHECK-NEXT: adds r0, r0, r2 1283; CHECK-NEXT: adcs r1, r3 1284; CHECK-NEXT: vmov r2, r3, d1 1285; CHECK-NEXT: adds r0, r0, r2 1286; CHECK-NEXT: adcs r1, r3 1287; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} 1288; CHECK-NEXT: bx lr 1289entry: 1290 %c = icmp eq <8 x i8> %b, zeroinitializer 1291 %xx = sext <8 x i8> %x to <8 x i64> 1292 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer 1293 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) 1294 ret i64 %z 1295} 1296 1297define arm_aapcs_vfpcc i64 @add_v4i8_v4i64_zext(<4 x i8> %x, <4 x i8> %b) { 1298; CHECK-LABEL: add_v4i8_v4i64_zext: 1299; CHECK: @ %bb.0: @ %entry 1300; CHECK-NEXT: vmov.i32 q2, #0xff 1301; CHECK-NEXT: vand q1, q1, q2 1302; CHECK-NEXT: vand q0, q0, q2 1303; CHECK-NEXT: vpt.i32 eq, q1, zr 1304; CHECK-NEXT: vaddlvt.u32 r0, r1, q0 1305; CHECK-NEXT: bx lr 1306entry: 1307 %c = icmp eq <4 x i8> %b, zeroinitializer 1308 %xx = zext <4 x i8> %x to <4 x i64> 1309 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer 1310 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s) 1311 ret i64 %z 1312} 1313 1314define arm_aapcs_vfpcc i64 @add_v4i8_v4i64_sext(<4 x i8> %x, <4 x i8> %b) { 1315; CHECK-LABEL: add_v4i8_v4i64_sext: 1316; CHECK: @ %bb.0: @ %entry 1317; CHECK-NEXT: vmov.i32 q2, #0xff 1318; CHECK-NEXT: vmovlb.s8 q0, q0 1319; CHECK-NEXT: vand q1, q1, q2 1320; CHECK-NEXT: vmovlb.s16 q0, q0 1321; CHECK-NEXT: vpt.i32 eq, q1, zr 1322; CHECK-NEXT: vaddlvt.s32 r0, r1, q0 1323; CHECK-NEXT: bx lr 1324entry: 1325 %c = icmp eq <4 x i8> %b, zeroinitializer 1326 %xx = sext <4 x i8> %x to <4 x i64> 1327 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer 1328 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s) 1329 ret i64 %z 1330} 1331 1332define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %b) { 1333; CHECK-LABEL: add_v2i8_v2i64_zext: 1334; CHECK: @ %bb.0: @ %entry 1335; CHECK-NEXT: vmov.i64 q2, #0xff 1336; CHECK-NEXT: movs r1, #0 1337; CHECK-NEXT: vand q1, q1, q2 1338; CHECK-NEXT: vand q0, q0, q2 1339; CHECK-NEXT: vmov r0, s4 1340; CHECK-NEXT: cmp r0, #0 1341; CHECK-NEXT: csetm r0, eq 1342; CHECK-NEXT: bfi r1, r0, #0, #8 1343; CHECK-NEXT: vmov r0, s6 1344; CHECK-NEXT: vmov.i32 q1, #0x0 1345; CHECK-NEXT: cmp r0, #0 1346; CHECK-NEXT: csetm r0, eq 1347; CHECK-NEXT: bfi r1, r0, #8, #8 1348; CHECK-NEXT: vmsr p0, r1 1349; CHECK-NEXT: vpsel q0, q0, q1 1350; CHECK-NEXT: vmov r0, r1, d1 1351; CHECK-NEXT: vmov r2, r3, d0 1352; CHECK-NEXT: add r0, r2 1353; CHECK-NEXT: orrs r1, r3 1354; CHECK-NEXT: bx lr 1355entry: 1356 %c = icmp eq <2 x i8> %b, zeroinitializer 1357 %xx = zext <2 x i8> %x to <2 x i64> 1358 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer 1359 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) 1360 ret i64 %z 1361} 1362 1363define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_sext(<2 x i8> %x, <2 x i8> %b) { 1364; CHECK-LABEL: add_v2i8_v2i64_sext: 1365; CHECK: @ %bb.0: @ %entry 1366; CHECK-NEXT: vmov.i32 q2, #0xff 1367; CHECK-NEXT: movs r1, #0 1368; CHECK-NEXT: vand q1, q1, q2 1369; CHECK-NEXT: vmov r0, s4 1370; CHECK-NEXT: cmp r0, #0 1371; CHECK-NEXT: csetm r0, eq 1372; CHECK-NEXT: bfi r1, r0, #0, #8 1373; CHECK-NEXT: vmov r0, s6 1374; CHECK-NEXT: vmov.i32 q1, #0x0 1375; CHECK-NEXT: cmp r0, #0 1376; CHECK-NEXT: csetm r0, eq 1377; CHECK-NEXT: bfi r1, r0, #8, #8 1378; CHECK-NEXT: vmov r0, s2 1379; CHECK-NEXT: vmsr p0, r1 1380; CHECK-NEXT: vmov r1, s0 1381; CHECK-NEXT: sxtb r0, r0 1382; CHECK-NEXT: sxtb r1, r1 1383; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 1384; CHECK-NEXT: asrs r0, r0, #31 1385; CHECK-NEXT: asrs r1, r1, #31 1386; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 1387; CHECK-NEXT: vpsel q0, q0, q1 1388; CHECK-NEXT: vmov r0, r1, d1 1389; CHECK-NEXT: vmov r2, r3, d0 1390; CHECK-NEXT: adds r0, r0, r2 1391; CHECK-NEXT: adcs r1, r3 1392; CHECK-NEXT: bx lr 1393entry: 1394 %c = icmp eq <2 x i8> %b, zeroinitializer 1395 %xx = sext <2 x i8> %x to <2 x i64> 1396 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer 1397 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) 1398 ret i64 %z 1399} 1400 1401define arm_aapcs_vfpcc i64 @add_v2i64_v2i64(<2 x i64> %x, <2 x i64> %b) { 1402; CHECK-LABEL: add_v2i64_v2i64: 1403; CHECK: @ %bb.0: @ %entry 1404; CHECK-NEXT: vmov r0, r1, d2 1405; CHECK-NEXT: orrs r0, r1 1406; CHECK-NEXT: mov.w r1, #0 1407; CHECK-NEXT: csetm r0, eq 1408; CHECK-NEXT: bfi r1, r0, #0, #8 1409; CHECK-NEXT: vmov r0, r2, d3 1410; CHECK-NEXT: vmov.i32 q1, #0x0 1411; CHECK-NEXT: orrs r0, r2 1412; CHECK-NEXT: csetm r0, eq 1413; CHECK-NEXT: bfi r1, r0, #8, #8 1414; CHECK-NEXT: vmsr p0, r1 1415; CHECK-NEXT: vpsel q0, q0, q1 1416; CHECK-NEXT: vmov r0, r1, d1 1417; CHECK-NEXT: vmov r2, r3, d0 1418; CHECK-NEXT: adds r0, r0, r2 1419; CHECK-NEXT: adcs r1, r3 1420; CHECK-NEXT: bx lr 1421entry: 1422 %c = icmp eq <2 x i64> %b, zeroinitializer 1423 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> zeroinitializer 1424 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) 1425 ret i64 %z 1426} 1427 1428define arm_aapcs_vfpcc i32 @add_v4i32_v4i32_acc(<4 x i32> %x, <4 x i32> %b, i32 %a) { 1429; CHECK-LABEL: add_v4i32_v4i32_acc: 1430; CHECK: @ %bb.0: @ %entry 1431; CHECK-NEXT: vpt.i32 eq, q1, zr 1432; CHECK-NEXT: vaddvat.u32 r0, q0 1433; CHECK-NEXT: bx lr 1434entry: 1435 %c = icmp eq <4 x i32> %b, zeroinitializer 1436 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer 1437 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) 1438 %r = add i32 %z, %a 1439 ret i32 %r 1440} 1441 1442define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_acc_zext(<4 x i32> %x, <4 x i32> %b, i64 %a) { 1443; CHECK-LABEL: add_v4i32_v4i64_acc_zext: 1444; CHECK: @ %bb.0: @ %entry 1445; CHECK-NEXT: vpt.i32 eq, q1, zr 1446; CHECK-NEXT: vaddlvat.u32 r0, r1, q0 1447; CHECK-NEXT: bx lr 1448entry: 1449 %c = icmp eq <4 x i32> %b, zeroinitializer 1450 %xx = zext <4 x i32> %x to <4 x i64> 1451 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer 1452 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s) 1453 %r = add i64 %z, %a 1454 ret i64 %r 1455} 1456 1457define arm_aapcs_vfpcc i64 @add_v4i32_v4i64_acc_sext(<4 x i32> %x, <4 x i32> %b, i64 %a) { 1458; CHECK-LABEL: add_v4i32_v4i64_acc_sext: 1459; CHECK: @ %bb.0: @ %entry 1460; CHECK-NEXT: vpt.i32 eq, q1, zr 1461; CHECK-NEXT: vaddlvat.s32 r0, r1, q0 1462; CHECK-NEXT: bx lr 1463entry: 1464 %c = icmp eq <4 x i32> %b, zeroinitializer 1465 %xx = sext <4 x i32> %x to <4 x i64> 1466 %s = select <4 x i1> %c, <4 x i64> %xx, <4 x i64> zeroinitializer 1467 %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %s) 1468 %r = add i64 %z, %a 1469 ret i64 %r 1470} 1471 1472define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, <2 x i32> %b, i64 %a) { 1473; CHECK-LABEL: add_v2i32_v2i64_acc_zext: 1474; CHECK: @ %bb.0: @ %entry 1475; CHECK-NEXT: .save {r7, lr} 1476; CHECK-NEXT: push {r7, lr} 1477; CHECK-NEXT: vmov r2, s4 1478; CHECK-NEXT: movs r3, #0 1479; CHECK-NEXT: vmov.i64 q2, #0xffffffff 1480; CHECK-NEXT: vand q0, q0, q2 1481; CHECK-NEXT: cmp r2, #0 1482; CHECK-NEXT: csetm r2, eq 1483; CHECK-NEXT: bfi r3, r2, #0, #8 1484; CHECK-NEXT: vmov r2, s6 1485; CHECK-NEXT: vmov.i32 q1, #0x0 1486; CHECK-NEXT: cmp r2, #0 1487; CHECK-NEXT: csetm r2, eq 1488; CHECK-NEXT: bfi r3, r2, #8, #8 1489; CHECK-NEXT: vmsr p0, r3 1490; CHECK-NEXT: vpsel q0, q0, q1 1491; CHECK-NEXT: vmov lr, r12, d1 1492; CHECK-NEXT: vmov r3, r2, d0 1493; CHECK-NEXT: adds.w r3, r3, lr 1494; CHECK-NEXT: adc.w r2, r2, r12 1495; CHECK-NEXT: adds r0, r0, r3 1496; CHECK-NEXT: adcs r1, r2 1497; CHECK-NEXT: pop {r7, pc} 1498entry: 1499 %c = icmp eq <2 x i32> %b, zeroinitializer 1500 %xx = zext <2 x i32> %x to <2 x i64> 1501 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer 1502 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) 1503 %r = add i64 %z, %a 1504 ret i64 %r 1505} 1506 1507define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_sext(<2 x i32> %x, <2 x i32> %b, i64 %a) { 1508; CHECK-LABEL: add_v2i32_v2i64_acc_sext: 1509; CHECK: @ %bb.0: @ %entry 1510; CHECK-NEXT: .save {r7, lr} 1511; CHECK-NEXT: push {r7, lr} 1512; CHECK-NEXT: vmov r2, s2 1513; CHECK-NEXT: vmov r3, s0 1514; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 1515; CHECK-NEXT: asrs r2, r2, #31 1516; CHECK-NEXT: asrs r3, r3, #31 1517; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 1518; CHECK-NEXT: vmov r2, s4 1519; CHECK-NEXT: movs r3, #0 1520; CHECK-NEXT: cmp r2, #0 1521; CHECK-NEXT: csetm r2, eq 1522; CHECK-NEXT: bfi r3, r2, #0, #8 1523; CHECK-NEXT: vmov r2, s6 1524; CHECK-NEXT: vmov.i32 q1, #0x0 1525; CHECK-NEXT: cmp r2, #0 1526; CHECK-NEXT: csetm r2, eq 1527; CHECK-NEXT: bfi r3, r2, #8, #8 1528; CHECK-NEXT: vmsr p0, r3 1529; CHECK-NEXT: vpsel q0, q0, q1 1530; CHECK-NEXT: vmov lr, r12, d1 1531; CHECK-NEXT: vmov r3, r2, d0 1532; CHECK-NEXT: adds.w r3, r3, lr 1533; CHECK-NEXT: adc.w r2, r2, r12 1534; CHECK-NEXT: adds r0, r0, r3 1535; CHECK-NEXT: adcs r1, r2 1536; CHECK-NEXT: pop {r7, pc} 1537entry: 1538 %c = icmp eq <2 x i32> %b, zeroinitializer 1539 %xx = sext <2 x i32> %x to <2 x i64> 1540 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer 1541 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) 1542 %r = add i64 %z, %a 1543 ret i64 %r 1544} 1545 1546define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_acc_zext(<8 x i16> %x, <8 x i16> %b, i32 %a) { 1547; CHECK-LABEL: add_v8i16_v8i32_acc_zext: 1548; CHECK: @ %bb.0: @ %entry 1549; CHECK-NEXT: vpt.i16 eq, q1, zr 1550; CHECK-NEXT: vaddvat.u16 r0, q0 1551; CHECK-NEXT: bx lr 1552entry: 1553 %c = icmp eq <8 x i16> %b, zeroinitializer 1554 %xx = zext <8 x i16> %x to <8 x i32> 1555 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer 1556 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s) 1557 %r = add i32 %z, %a 1558 ret i32 %r 1559} 1560 1561define arm_aapcs_vfpcc i32 @add_v8i16_v8i32_acc_sext(<8 x i16> %x, <8 x i16> %b, i32 %a) { 1562; CHECK-LABEL: add_v8i16_v8i32_acc_sext: 1563; CHECK: @ %bb.0: @ %entry 1564; CHECK-NEXT: vpt.i16 eq, q1, zr 1565; CHECK-NEXT: vaddvat.s16 r0, q0 1566; CHECK-NEXT: bx lr 1567entry: 1568 %c = icmp eq <8 x i16> %b, zeroinitializer 1569 %xx = sext <8 x i16> %x to <8 x i32> 1570 %s = select <8 x i1> %c, <8 x i32> %xx, <8 x i32> zeroinitializer 1571 %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %s) 1572 %r = add i32 %z, %a 1573 ret i32 %r 1574} 1575 1576define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_zext(<4 x i16> %x, <4 x i16> %b, i32 %a) { 1577; CHECK-LABEL: add_v4i16_v4i32_acc_zext: 1578; CHECK: @ %bb.0: @ %entry 1579; CHECK-NEXT: vmovlb.u16 q1, q1 1580; CHECK-NEXT: vmovlb.u16 q0, q0 1581; CHECK-NEXT: vpt.i32 eq, q1, zr 1582; CHECK-NEXT: vaddvat.u32 r0, q0 1583; CHECK-NEXT: bx lr 1584entry: 1585 %c = icmp eq <4 x i16> %b, zeroinitializer 1586 %xx = zext <4 x i16> %x to <4 x i32> 1587 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer 1588 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) 1589 %r = add i32 %z, %a 1590 ret i32 %r 1591} 1592 1593define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_sext(<4 x i16> %x, <4 x i16> %b, i32 %a) { 1594; CHECK-LABEL: add_v4i16_v4i32_acc_sext: 1595; CHECK: @ %bb.0: @ %entry 1596; CHECK-NEXT: vmovlb.u16 q1, q1 1597; CHECK-NEXT: vmovlb.s16 q0, q0 1598; CHECK-NEXT: vpt.i32 eq, q1, zr 1599; CHECK-NEXT: vaddvat.u32 r0, q0 1600; CHECK-NEXT: bx lr 1601entry: 1602 %c = icmp eq <4 x i16> %b, zeroinitializer 1603 %xx = sext <4 x i16> %x to <4 x i32> 1604 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer 1605 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) 1606 %r = add i32 %z, %a 1607 ret i32 %r 1608} 1609 1610define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16_acc(<8 x i16> %x, <8 x i16> %b, i16 %a) { 1611; CHECK-LABEL: add_v8i16_v8i16_acc: 1612; CHECK: @ %bb.0: @ %entry 1613; CHECK-NEXT: vpt.i16 eq, q1, zr 1614; CHECK-NEXT: vaddvat.u16 r0, q0 1615; CHECK-NEXT: uxth r0, r0 1616; CHECK-NEXT: bx lr 1617entry: 1618 %c = icmp eq <8 x i16> %b, zeroinitializer 1619 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer 1620 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) 1621 %r = add i16 %z, %a 1622 ret i16 %r 1623} 1624 1625define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, <8 x i16> %b, i64 %a) { 1626; CHECK-LABEL: add_v8i16_v8i64_acc_zext: 1627; CHECK: @ %bb.0: @ %entry 1628; CHECK-NEXT: .save {r7, lr} 1629; CHECK-NEXT: push {r7, lr} 1630; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} 1631; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} 1632; CHECK-NEXT: vmov.i8 q3, #0x0 1633; CHECK-NEXT: vmov.i8 q4, #0xff 1634; CHECK-NEXT: vcmp.i16 eq, q1, zr 1635; CHECK-NEXT: vpsel q5, q4, q3 1636; CHECK-NEXT: vmov.u16 r2, q5[2] 1637; CHECK-NEXT: vmov.u16 r3, q5[0] 1638; CHECK-NEXT: vmov q1[2], q1[0], r3, r2 1639; CHECK-NEXT: vmov.u16 r2, q5[3] 1640; CHECK-NEXT: vmov.u16 r3, q5[1] 1641; CHECK-NEXT: vmov q1[3], q1[1], r3, r2 1642; CHECK-NEXT: vcmp.i32 ne, q1, zr 1643; CHECK-NEXT: vpsel q6, q4, q3 1644; CHECK-NEXT: vmov r2, r3, d12 1645; CHECK-NEXT: vmov q1[2], q1[0], r2, r3 1646; CHECK-NEXT: vmov q1[3], q1[1], r2, r3 1647; CHECK-NEXT: vmov.u16 r2, q0[1] 1648; CHECK-NEXT: vmov.u16 r3, q0[0] 1649; CHECK-NEXT: vcmp.i32 ne, q1, zr 1650; CHECK-NEXT: vmov q2[2], q2[0], r3, r2 1651; CHECK-NEXT: vmov.i64 q1, #0xffff 1652; CHECK-NEXT: vand q7, q2, q1 1653; CHECK-NEXT: vmov.i32 q2, #0x0 1654; CHECK-NEXT: vpsel q7, q7, q2 1655; CHECK-NEXT: vmov r12, lr, d15 1656; CHECK-NEXT: vmov r2, r3, d14 1657; CHECK-NEXT: orr.w lr, lr, r3 1658; CHECK-NEXT: add r12, r2 1659; CHECK-NEXT: vmov r3, r2, d13 1660; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 1661; CHECK-NEXT: vmov q6[3], q6[1], r3, r2 1662; CHECK-NEXT: vmov.u16 r2, q0[3] 1663; CHECK-NEXT: vmov.u16 r3, q0[2] 1664; CHECK-NEXT: vcmp.i32 ne, q6, zr 1665; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 1666; CHECK-NEXT: vand q6, q6, q1 1667; CHECK-NEXT: vpsel q6, q6, q2 1668; CHECK-NEXT: vmov r2, r3, d12 1669; CHECK-NEXT: adds.w r12, r12, r2 1670; CHECK-NEXT: adc.w lr, lr, r3 1671; CHECK-NEXT: vmov r2, r3, d13 1672; CHECK-NEXT: adds.w r12, r12, r2 1673; CHECK-NEXT: vmov.u16 r2, q5[6] 1674; CHECK-NEXT: adc.w lr, lr, r3 1675; CHECK-NEXT: vmov.u16 r3, q5[4] 1676; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 1677; CHECK-NEXT: vmov.u16 r2, q5[7] 1678; CHECK-NEXT: vmov.u16 r3, q5[5] 1679; CHECK-NEXT: vmov q6[3], q6[1], r3, r2 1680; CHECK-NEXT: vcmp.i32 ne, q6, zr 1681; CHECK-NEXT: vpsel q3, q4, q3 1682; CHECK-NEXT: vmov r2, r3, d6 1683; CHECK-NEXT: vmov q4[2], q4[0], r2, r3 1684; CHECK-NEXT: vmov q4[3], q4[1], r2, r3 1685; CHECK-NEXT: vmov.u16 r2, q0[5] 1686; CHECK-NEXT: vmov.u16 r3, q0[4] 1687; CHECK-NEXT: vcmp.i32 ne, q4, zr 1688; CHECK-NEXT: vmov q4[2], q4[0], r3, r2 1689; CHECK-NEXT: vand q4, q4, q1 1690; CHECK-NEXT: vpsel q4, q4, q2 1691; CHECK-NEXT: vmov r2, r3, d8 1692; CHECK-NEXT: adds.w r12, r12, r2 1693; CHECK-NEXT: adc.w lr, lr, r3 1694; CHECK-NEXT: vmov r2, r3, d9 1695; CHECK-NEXT: adds.w r12, r12, r2 1696; CHECK-NEXT: adc.w lr, lr, r3 1697; CHECK-NEXT: vmov r2, r3, d7 1698; CHECK-NEXT: vmov q3[2], q3[0], r2, r3 1699; CHECK-NEXT: vmov q3[3], q3[1], r2, r3 1700; CHECK-NEXT: vmov.u16 r2, q0[7] 1701; CHECK-NEXT: vmov.u16 r3, q0[6] 1702; CHECK-NEXT: vcmp.i32 ne, q3, zr 1703; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 1704; CHECK-NEXT: vand q0, q0, q1 1705; CHECK-NEXT: vpsel q0, q0, q2 1706; CHECK-NEXT: vmov r2, r3, d0 1707; CHECK-NEXT: adds.w r12, r12, r2 1708; CHECK-NEXT: adc.w lr, lr, r3 1709; CHECK-NEXT: vmov r2, r3, d1 1710; CHECK-NEXT: adds.w r2, r2, r12 1711; CHECK-NEXT: adc.w r3, r3, lr 1712; CHECK-NEXT: adds r0, r0, r2 1713; CHECK-NEXT: adcs r1, r3 1714; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} 1715; CHECK-NEXT: pop {r7, pc} 1716entry: 1717 %c = icmp eq <8 x i16> %b, zeroinitializer 1718 %xx = zext <8 x i16> %x to <8 x i64> 1719 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer 1720 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) 1721 %r = add i64 %z, %a 1722 ret i64 %r 1723} 1724 1725define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, <8 x i16> %b, i64 %a) { 1726; CHECK-LABEL: add_v8i16_v8i64_acc_sext: 1727; CHECK: @ %bb.0: @ %entry 1728; CHECK-NEXT: .save {r7, lr} 1729; CHECK-NEXT: push {r7, lr} 1730; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} 1731; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} 1732; CHECK-NEXT: vmov.i8 q2, #0x0 1733; CHECK-NEXT: vmov.i8 q3, #0xff 1734; CHECK-NEXT: vcmp.i16 eq, q1, zr 1735; CHECK-NEXT: vpsel q4, q3, q2 1736; CHECK-NEXT: vmov.u16 r2, q4[2] 1737; CHECK-NEXT: vmov.u16 r3, q4[0] 1738; CHECK-NEXT: vmov q1[2], q1[0], r3, r2 1739; CHECK-NEXT: vmov.u16 r2, q4[3] 1740; CHECK-NEXT: vmov.u16 r3, q4[1] 1741; CHECK-NEXT: vmov q1[3], q1[1], r3, r2 1742; CHECK-NEXT: vcmp.i32 ne, q1, zr 1743; CHECK-NEXT: vpsel q5, q3, q2 1744; CHECK-NEXT: vmov r2, r3, d10 1745; CHECK-NEXT: vmov q1[2], q1[0], r2, r3 1746; CHECK-NEXT: vmov q1[3], q1[1], r2, r3 1747; CHECK-NEXT: vmov.s16 r2, q0[1] 1748; CHECK-NEXT: vmov.s16 r3, q0[0] 1749; CHECK-NEXT: vcmp.i32 ne, q1, zr 1750; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 1751; CHECK-NEXT: asrs r2, r2, #31 1752; CHECK-NEXT: asrs r3, r3, #31 1753; CHECK-NEXT: vmov.i32 q1, #0x0 1754; CHECK-NEXT: vmov q6[3], q6[1], r3, r2 1755; CHECK-NEXT: vpsel q6, q6, q1 1756; CHECK-NEXT: vmov lr, r12, d13 1757; CHECK-NEXT: vmov r3, r2, d12 1758; CHECK-NEXT: adds.w lr, lr, r3 1759; CHECK-NEXT: adc.w r12, r12, r2 1760; CHECK-NEXT: vmov r2, r3, d11 1761; CHECK-NEXT: vmov q5[2], q5[0], r2, r3 1762; CHECK-NEXT: vmov q5[3], q5[1], r2, r3 1763; CHECK-NEXT: vmov.s16 r2, q0[3] 1764; CHECK-NEXT: vmov.s16 r3, q0[2] 1765; CHECK-NEXT: vcmp.i32 ne, q5, zr 1766; CHECK-NEXT: vmov q5[2], q5[0], r3, r2 1767; CHECK-NEXT: asrs r2, r2, #31 1768; CHECK-NEXT: asrs r3, r3, #31 1769; CHECK-NEXT: vmov q5[3], q5[1], r3, r2 1770; CHECK-NEXT: vpsel q5, q5, q1 1771; CHECK-NEXT: vmov r2, r3, d10 1772; CHECK-NEXT: adds.w lr, lr, r2 1773; CHECK-NEXT: adc.w r12, r12, r3 1774; CHECK-NEXT: vmov r2, r3, d11 1775; CHECK-NEXT: adds.w lr, lr, r2 1776; CHECK-NEXT: vmov.u16 r2, q4[6] 1777; CHECK-NEXT: adc.w r12, r12, r3 1778; CHECK-NEXT: vmov.u16 r3, q4[4] 1779; CHECK-NEXT: vmov q5[2], q5[0], r3, r2 1780; CHECK-NEXT: vmov.u16 r2, q4[7] 1781; CHECK-NEXT: vmov.u16 r3, q4[5] 1782; CHECK-NEXT: vmov q5[3], q5[1], r3, r2 1783; CHECK-NEXT: vcmp.i32 ne, q5, zr 1784; CHECK-NEXT: vpsel q2, q3, q2 1785; CHECK-NEXT: vmov r2, r3, d4 1786; CHECK-NEXT: vmov q3[2], q3[0], r2, r3 1787; CHECK-NEXT: vmov q3[3], q3[1], r2, r3 1788; CHECK-NEXT: vmov.s16 r2, q0[5] 1789; CHECK-NEXT: vmov.s16 r3, q0[4] 1790; CHECK-NEXT: vcmp.i32 ne, q3, zr 1791; CHECK-NEXT: vmov q3[2], q3[0], r3, r2 1792; CHECK-NEXT: asrs r2, r2, #31 1793; CHECK-NEXT: asrs r3, r3, #31 1794; CHECK-NEXT: vmov q3[3], q3[1], r3, r2 1795; CHECK-NEXT: vpsel q3, q3, q1 1796; CHECK-NEXT: vmov r2, r3, d6 1797; CHECK-NEXT: adds.w lr, lr, r2 1798; CHECK-NEXT: adc.w r12, r12, r3 1799; CHECK-NEXT: vmov r2, r3, d7 1800; CHECK-NEXT: adds.w lr, lr, r2 1801; CHECK-NEXT: adc.w r12, r12, r3 1802; CHECK-NEXT: vmov r2, r3, d5 1803; CHECK-NEXT: vmov q2[2], q2[0], r2, r3 1804; CHECK-NEXT: vmov q2[3], q2[1], r2, r3 1805; CHECK-NEXT: vmov.s16 r2, q0[7] 1806; CHECK-NEXT: vmov.s16 r3, q0[6] 1807; CHECK-NEXT: vcmp.i32 ne, q2, zr 1808; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 1809; CHECK-NEXT: asrs r2, r2, #31 1810; CHECK-NEXT: asrs r3, r3, #31 1811; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 1812; CHECK-NEXT: vpsel q0, q0, q1 1813; CHECK-NEXT: vmov r2, r3, d0 1814; CHECK-NEXT: adds.w lr, lr, r2 1815; CHECK-NEXT: adc.w r12, r12, r3 1816; CHECK-NEXT: vmov r2, r3, d1 1817; CHECK-NEXT: adds.w r2, r2, lr 1818; CHECK-NEXT: adc.w r3, r3, r12 1819; CHECK-NEXT: adds r0, r0, r2 1820; CHECK-NEXT: adcs r1, r3 1821; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} 1822; CHECK-NEXT: pop {r7, pc} 1823entry: 1824 %c = icmp eq <8 x i16> %b, zeroinitializer 1825 %xx = sext <8 x i16> %x to <8 x i64> 1826 %s = select <8 x i1> %c, <8 x i64> %xx, <8 x i64> zeroinitializer 1827 %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %s) 1828 %r = add i64 %z, %a 1829 ret i64 %r 1830} 1831 1832define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, <2 x i16> %b, i64 %a) { 1833; CHECK-LABEL: add_v2i16_v2i64_acc_zext: 1834; CHECK: @ %bb.0: @ %entry 1835; CHECK-NEXT: .save {r7, lr} 1836; CHECK-NEXT: push {r7, lr} 1837; CHECK-NEXT: vmov.i64 q2, #0xffff 1838; CHECK-NEXT: movs r3, #0 1839; CHECK-NEXT: vand q1, q1, q2 1840; CHECK-NEXT: vand q0, q0, q2 1841; CHECK-NEXT: vmov r2, s4 1842; CHECK-NEXT: cmp r2, #0 1843; CHECK-NEXT: csetm r2, eq 1844; CHECK-NEXT: bfi r3, r2, #0, #8 1845; CHECK-NEXT: vmov r2, s6 1846; CHECK-NEXT: vmov.i32 q1, #0x0 1847; CHECK-NEXT: cmp r2, #0 1848; CHECK-NEXT: csetm r2, eq 1849; CHECK-NEXT: bfi r3, r2, #8, #8 1850; CHECK-NEXT: vmsr p0, r3 1851; CHECK-NEXT: vpsel q0, q0, q1 1852; CHECK-NEXT: vmov r12, lr, d1 1853; CHECK-NEXT: vmov r2, r3, d0 1854; CHECK-NEXT: add r2, r12 1855; CHECK-NEXT: orr.w r3, r3, lr 1856; CHECK-NEXT: adds r0, r0, r2 1857; CHECK-NEXT: adcs r1, r3 1858; CHECK-NEXT: pop {r7, pc} 1859entry: 1860 %c = icmp eq <2 x i16> %b, zeroinitializer 1861 %xx = zext <2 x i16> %x to <2 x i64> 1862 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer 1863 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) 1864 %r = add i64 %z, %a 1865 ret i64 %r 1866} 1867 1868define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_sext(<2 x i16> %x, <2 x i16> %b, i64 %a) { 1869; CHECK-LABEL: add_v2i16_v2i64_acc_sext: 1870; CHECK: @ %bb.0: @ %entry 1871; CHECK-NEXT: .save {r7, lr} 1872; CHECK-NEXT: push {r7, lr} 1873; CHECK-NEXT: vmov.i32 q2, #0xffff 1874; CHECK-NEXT: movs r3, #0 1875; CHECK-NEXT: vand q1, q1, q2 1876; CHECK-NEXT: vmov r2, s4 1877; CHECK-NEXT: cmp r2, #0 1878; CHECK-NEXT: csetm r2, eq 1879; CHECK-NEXT: bfi r3, r2, #0, #8 1880; CHECK-NEXT: vmov r2, s6 1881; CHECK-NEXT: vmov.i32 q1, #0x0 1882; CHECK-NEXT: cmp r2, #0 1883; CHECK-NEXT: csetm r2, eq 1884; CHECK-NEXT: bfi r3, r2, #8, #8 1885; CHECK-NEXT: vmov r2, s2 1886; CHECK-NEXT: vmsr p0, r3 1887; CHECK-NEXT: vmov r3, s0 1888; CHECK-NEXT: sxth r2, r2 1889; CHECK-NEXT: sxth r3, r3 1890; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 1891; CHECK-NEXT: asrs r2, r2, #31 1892; CHECK-NEXT: asrs r3, r3, #31 1893; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 1894; CHECK-NEXT: vpsel q0, q0, q1 1895; CHECK-NEXT: vmov lr, r12, d1 1896; CHECK-NEXT: vmov r3, r2, d0 1897; CHECK-NEXT: adds.w r3, r3, lr 1898; CHECK-NEXT: adc.w r2, r2, r12 1899; CHECK-NEXT: adds r0, r0, r3 1900; CHECK-NEXT: adcs r1, r2 1901; CHECK-NEXT: pop {r7, pc} 1902entry: 1903 %c = icmp eq <2 x i16> %b, zeroinitializer 1904 %xx = sext <2 x i16> %x to <2 x i64> 1905 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer 1906 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) 1907 %r = add i64 %z, %a 1908 ret i64 %r 1909} 1910 1911define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_acc_zext(<16 x i8> %x, <16 x i8> %b, i32 %a) { 1912; CHECK-LABEL: add_v16i8_v16i32_acc_zext: 1913; CHECK: @ %bb.0: @ %entry 1914; CHECK-NEXT: vpt.i8 eq, q1, zr 1915; CHECK-NEXT: vaddvat.u8 r0, q0 1916; CHECK-NEXT: bx lr 1917entry: 1918 %c = icmp eq <16 x i8> %b, zeroinitializer 1919 %xx = zext <16 x i8> %x to <16 x i32> 1920 %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer 1921 %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) 1922 %r = add i32 %z, %a 1923 ret i32 %r 1924} 1925 1926define arm_aapcs_vfpcc i32 @add_v16i8_v16i32_acc_sext(<16 x i8> %x, <16 x i8> %b, i32 %a) { 1927; CHECK-LABEL: add_v16i8_v16i32_acc_sext: 1928; CHECK: @ %bb.0: @ %entry 1929; CHECK-NEXT: vpt.i8 eq, q1, zr 1930; CHECK-NEXT: vaddvat.s8 r0, q0 1931; CHECK-NEXT: bx lr 1932entry: 1933 %c = icmp eq <16 x i8> %b, zeroinitializer 1934 %xx = sext <16 x i8> %x to <16 x i32> 1935 %s = select <16 x i1> %c, <16 x i32> %xx, <16 x i32> zeroinitializer 1936 %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %s) 1937 %r = add i32 %z, %a 1938 ret i32 %r 1939} 1940 1941define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_zext(<4 x i8> %x, <4 x i8> %b, i32 %a) { 1942; CHECK-LABEL: add_v4i8_v4i32_acc_zext: 1943; CHECK: @ %bb.0: @ %entry 1944; CHECK-NEXT: vmov.i32 q2, #0xff 1945; CHECK-NEXT: vand q1, q1, q2 1946; CHECK-NEXT: vand q0, q0, q2 1947; CHECK-NEXT: vpt.i32 eq, q1, zr 1948; CHECK-NEXT: vaddvat.u32 r0, q0 1949; CHECK-NEXT: bx lr 1950entry: 1951 %c = icmp eq <4 x i8> %b, zeroinitializer 1952 %xx = zext <4 x i8> %x to <4 x i32> 1953 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer 1954 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) 1955 %r = add i32 %z, %a 1956 ret i32 %r 1957} 1958 1959define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_sext(<4 x i8> %x, <4 x i8> %b, i32 %a) { 1960; CHECK-LABEL: add_v4i8_v4i32_acc_sext: 1961; CHECK: @ %bb.0: @ %entry 1962; CHECK-NEXT: vmov.i32 q2, #0xff 1963; CHECK-NEXT: vmovlb.s8 q0, q0 1964; CHECK-NEXT: vand q1, q1, q2 1965; CHECK-NEXT: vmovlb.s16 q0, q0 1966; CHECK-NEXT: vpt.i32 eq, q1, zr 1967; CHECK-NEXT: vaddvat.u32 r0, q0 1968; CHECK-NEXT: bx lr 1969entry: 1970 %c = icmp eq <4 x i8> %b, zeroinitializer 1971 %xx = sext <4 x i8> %x to <4 x i32> 1972 %s = select <4 x i1> %c, <4 x i32> %xx, <4 x i32> zeroinitializer 1973 %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %s) 1974 %r = add i32 %z, %a 1975 ret i32 %r 1976} 1977 1978define arm_aapcs_vfpcc zeroext i16 @add_v16i8_v16i16_acc_zext(<16 x i8> %x, <16 x i8> %b, i16 %a) { 1979; CHECK-LABEL: add_v16i8_v16i16_acc_zext: 1980; CHECK: @ %bb.0: @ %entry 1981; CHECK-NEXT: vpt.i8 eq, q1, zr 1982; CHECK-NEXT: vaddvat.u8 r0, q0 1983; CHECK-NEXT: uxth r0, r0 1984; CHECK-NEXT: bx lr 1985entry: 1986 %c = icmp eq <16 x i8> %b, zeroinitializer 1987 %xx = zext <16 x i8> %x to <16 x i16> 1988 %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer 1989 %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s) 1990 %r = add i16 %z, %a 1991 ret i16 %r 1992} 1993 1994define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_acc_sext(<16 x i8> %x, <16 x i8> %b, i16 %a) { 1995; CHECK-LABEL: add_v16i8_v16i16_acc_sext: 1996; CHECK: @ %bb.0: @ %entry 1997; CHECK-NEXT: vpt.i8 eq, q1, zr 1998; CHECK-NEXT: vaddvat.s8 r0, q0 1999; CHECK-NEXT: sxth r0, r0 2000; CHECK-NEXT: bx lr 2001entry: 2002 %c = icmp eq <16 x i8> %b, zeroinitializer 2003 %xx = sext <16 x i8> %x to <16 x i16> 2004 %s = select <16 x i1> %c, <16 x i16> %xx, <16 x i16> zeroinitializer 2005 %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %s) 2006 %r = add i16 %z, %a 2007 ret i16 %r 2008} 2009 2010define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_acc_zext(<8 x i8> %x, <8 x i8> %b, i16 %a) { 2011; CHECK-LABEL: add_v8i8_v8i16_acc_zext: 2012; CHECK: @ %bb.0: @ %entry 2013; CHECK-NEXT: vmovlb.u8 q1, q1 2014; CHECK-NEXT: vmovlb.u8 q0, q0 2015; CHECK-NEXT: vpt.i16 eq, q1, zr 2016; CHECK-NEXT: vaddvat.u16 r0, q0 2017; CHECK-NEXT: uxth r0, r0 2018; CHECK-NEXT: bx lr 2019entry: 2020 %c = icmp eq <8 x i8> %b, zeroinitializer 2021 %xx = zext <8 x i8> %x to <8 x i16> 2022 %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer 2023 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) 2024 %r = add i16 %z, %a 2025 ret i16 %r 2026} 2027 2028define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_acc_sext(<8 x i8> %x, <8 x i8> %b, i16 %a) { 2029; CHECK-LABEL: add_v8i8_v8i16_acc_sext: 2030; CHECK: @ %bb.0: @ %entry 2031; CHECK-NEXT: vmovlb.u8 q1, q1 2032; CHECK-NEXT: vmovlb.s8 q0, q0 2033; CHECK-NEXT: vpt.i16 eq, q1, zr 2034; CHECK-NEXT: vaddvat.u16 r0, q0 2035; CHECK-NEXT: sxth r0, r0 2036; CHECK-NEXT: bx lr 2037entry: 2038 %c = icmp eq <8 x i8> %b, zeroinitializer 2039 %xx = sext <8 x i8> %x to <8 x i16> 2040 %s = select <8 x i1> %c, <8 x i16> %xx, <8 x i16> zeroinitializer 2041 %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %s) 2042 %r = add i16 %z, %a 2043 ret i16 %r 2044} 2045 2046define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8_acc(<16 x i8> %x, <16 x i8> %b, i8 %a) { 2047; CHECK-LABEL: add_v16i8_v16i8_acc: 2048; CHECK: @ %bb.0: @ %entry 2049; CHECK-NEXT: vpt.i8 eq, q1, zr 2050; CHECK-NEXT: vaddvat.u8 r0, q0 2051; CHECK-NEXT: uxtb r0, r0 2052; CHECK-NEXT: bx lr 2053entry: 2054 %c = icmp eq <16 x i8> %b, zeroinitializer 2055 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer 2056 %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %s) 2057 %r = add i8 %z, %a 2058 ret i8 %r 2059} 2060 2061define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, <16 x i8> %b, i64 %a) { 2062; CHECK-LABEL: add_v16i8_v16i64_acc_zext: 2063; CHECK: @ %bb.0: @ %entry 2064; CHECK-NEXT: .save {r7, lr} 2065; CHECK-NEXT: push {r7, lr} 2066; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} 2067; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} 2068; CHECK-NEXT: .pad #16 2069; CHECK-NEXT: sub sp, #16 2070; CHECK-NEXT: vmov q2, q0 2071; CHECK-NEXT: vcmp.i8 eq, q1, zr 2072; CHECK-NEXT: vmov.i8 q0, #0x0 2073; CHECK-NEXT: vmov.i8 q1, #0xff 2074; CHECK-NEXT: vpsel q5, q1, q0 2075; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill 2076; CHECK-NEXT: vmov.u8 r2, q5[0] 2077; CHECK-NEXT: vmov.16 q3[0], r2 2078; CHECK-NEXT: vmov.u8 r2, q5[1] 2079; CHECK-NEXT: vmov.16 q3[1], r2 2080; CHECK-NEXT: vmov.u8 r2, q5[2] 2081; CHECK-NEXT: vmov.16 q3[2], r2 2082; CHECK-NEXT: vmov.u8 r2, q5[3] 2083; CHECK-NEXT: vmov.16 q3[3], r2 2084; CHECK-NEXT: vmov.u8 r2, q5[4] 2085; CHECK-NEXT: vmov.16 q3[4], r2 2086; CHECK-NEXT: vmov.u8 r2, q5[5] 2087; CHECK-NEXT: vmov.16 q3[5], r2 2088; CHECK-NEXT: vmov.u8 r2, q5[6] 2089; CHECK-NEXT: vmov.16 q3[6], r2 2090; CHECK-NEXT: vmov.u8 r2, q5[7] 2091; CHECK-NEXT: vmov.16 q3[7], r2 2092; CHECK-NEXT: vcmp.i16 ne, q3, zr 2093; CHECK-NEXT: vpsel q6, q1, q0 2094; CHECK-NEXT: vmov.u16 r2, q6[2] 2095; CHECK-NEXT: vmov.u16 r3, q6[0] 2096; CHECK-NEXT: vmov q3[2], q3[0], r3, r2 2097; CHECK-NEXT: vmov.u16 r2, q6[3] 2098; CHECK-NEXT: vmov.u16 r3, q6[1] 2099; CHECK-NEXT: vmov q3[3], q3[1], r3, r2 2100; CHECK-NEXT: vcmp.i32 ne, q3, zr 2101; CHECK-NEXT: vpsel q7, q1, q0 2102; CHECK-NEXT: vmov r2, r3, d14 2103; CHECK-NEXT: vmov q3[2], q3[0], r2, r3 2104; CHECK-NEXT: vmov q3[3], q3[1], r2, r3 2105; CHECK-NEXT: vmov.u8 r2, q2[1] 2106; CHECK-NEXT: vmov.u8 r3, q2[0] 2107; CHECK-NEXT: vcmp.i32 ne, q3, zr 2108; CHECK-NEXT: vmov q4[2], q4[0], r3, r2 2109; CHECK-NEXT: vmov.i64 q3, #0xff 2110; CHECK-NEXT: vand q0, q4, q3 2111; CHECK-NEXT: vmov.i32 q4, #0x0 2112; CHECK-NEXT: vpsel q0, q0, q4 2113; CHECK-NEXT: vmov r12, lr, d1 2114; CHECK-NEXT: vmov r2, r3, d0 2115; CHECK-NEXT: orr.w lr, lr, r3 2116; CHECK-NEXT: add r12, r2 2117; CHECK-NEXT: vmov r3, r2, d15 2118; CHECK-NEXT: vldrw.u32 q7, [sp] @ 16-byte Reload 2119; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 2120; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 2121; CHECK-NEXT: vmov.u8 r2, q2[3] 2122; CHECK-NEXT: vmov.u8 r3, q2[2] 2123; CHECK-NEXT: vcmp.i32 ne, q0, zr 2124; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 2125; CHECK-NEXT: vand q0, q0, q3 2126; CHECK-NEXT: vpsel q0, q0, q4 2127; CHECK-NEXT: vmov r2, r3, d0 2128; CHECK-NEXT: adds.w r12, r12, r2 2129; CHECK-NEXT: adc.w lr, lr, r3 2130; CHECK-NEXT: vmov r2, r3, d1 2131; CHECK-NEXT: adds.w r12, r12, r2 2132; CHECK-NEXT: vmov.u16 r2, q6[6] 2133; CHECK-NEXT: adc.w lr, lr, r3 2134; CHECK-NEXT: vmov.u16 r3, q6[4] 2135; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 2136; CHECK-NEXT: vmov.u16 r2, q6[7] 2137; CHECK-NEXT: vmov.u16 r3, q6[5] 2138; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 2139; CHECK-NEXT: vcmp.i32 ne, q0, zr 2140; CHECK-NEXT: vpsel q6, q1, q7 2141; CHECK-NEXT: vmov r2, r3, d12 2142; CHECK-NEXT: vmov q0[2], q0[0], r2, r3 2143; CHECK-NEXT: vmov q0[3], q0[1], r2, r3 2144; CHECK-NEXT: vmov.u8 r2, q2[5] 2145; CHECK-NEXT: vmov.u8 r3, q2[4] 2146; CHECK-NEXT: vcmp.i32 ne, q0, zr 2147; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 2148; CHECK-NEXT: vand q0, q0, q3 2149; CHECK-NEXT: vpsel q0, q0, q4 2150; CHECK-NEXT: vmov r2, r3, d0 2151; CHECK-NEXT: adds.w r12, r12, r2 2152; CHECK-NEXT: adc.w lr, lr, r3 2153; CHECK-NEXT: vmov r2, r3, d1 2154; CHECK-NEXT: adds.w r12, r12, r2 2155; CHECK-NEXT: adc.w lr, lr, r3 2156; CHECK-NEXT: vmov r2, r3, d13 2157; CHECK-NEXT: vmov q0[2], q0[0], r2, r3 2158; CHECK-NEXT: vmov q0[3], q0[1], r2, r3 2159; CHECK-NEXT: vmov.u8 r2, q2[7] 2160; CHECK-NEXT: vmov.u8 r3, q2[6] 2161; CHECK-NEXT: vcmp.i32 ne, q0, zr 2162; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 2163; CHECK-NEXT: vand q0, q0, q3 2164; CHECK-NEXT: vpsel q0, q0, q4 2165; CHECK-NEXT: vmov r2, r3, d0 2166; CHECK-NEXT: adds.w r12, r12, r2 2167; CHECK-NEXT: adc.w lr, lr, r3 2168; CHECK-NEXT: vmov r2, r3, d1 2169; CHECK-NEXT: adds.w r12, r12, r2 2170; CHECK-NEXT: vmov.u8 r2, q5[8] 2171; CHECK-NEXT: vmov.16 q6[0], r2 2172; CHECK-NEXT: vmov.u8 r2, q5[9] 2173; CHECK-NEXT: vmov.16 q6[1], r2 2174; CHECK-NEXT: vmov.u8 r2, q5[10] 2175; CHECK-NEXT: vmov.16 q6[2], r2 2176; CHECK-NEXT: vmov.u8 r2, q5[11] 2177; CHECK-NEXT: vmov.16 q6[3], r2 2178; CHECK-NEXT: vmov.u8 r2, q5[12] 2179; CHECK-NEXT: vmov.16 q6[4], r2 2180; CHECK-NEXT: vmov.u8 r2, q5[13] 2181; CHECK-NEXT: vmov.16 q6[5], r2 2182; CHECK-NEXT: vmov.u8 r2, q5[14] 2183; CHECK-NEXT: vmov.16 q6[6], r2 2184; CHECK-NEXT: vmov.u8 r2, q5[15] 2185; CHECK-NEXT: vmov.16 q6[7], r2 2186; CHECK-NEXT: adc.w lr, lr, r3 2187; CHECK-NEXT: vcmp.i16 ne, q6, zr 2188; CHECK-NEXT: vpsel q5, q1, q7 2189; CHECK-NEXT: vmov.u16 r2, q5[2] 2190; CHECK-NEXT: vmov.u16 r3, q5[0] 2191; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 2192; CHECK-NEXT: vmov.u16 r2, q5[3] 2193; CHECK-NEXT: vmov.u16 r3, q5[1] 2194; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 2195; CHECK-NEXT: vcmp.i32 ne, q0, zr 2196; CHECK-NEXT: vpsel q6, q1, q7 2197; CHECK-NEXT: vmov r2, r3, d12 2198; CHECK-NEXT: vmov q0[2], q0[0], r2, r3 2199; CHECK-NEXT: vmov q0[3], q0[1], r2, r3 2200; CHECK-NEXT: vmov.u8 r2, q2[9] 2201; CHECK-NEXT: vmov.u8 r3, q2[8] 2202; CHECK-NEXT: vcmp.i32 ne, q0, zr 2203; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 2204; CHECK-NEXT: vand q0, q0, q3 2205; CHECK-NEXT: vpsel q0, q0, q4 2206; CHECK-NEXT: vmov r2, r3, d0 2207; CHECK-NEXT: adds.w r12, r12, r2 2208; CHECK-NEXT: adc.w lr, lr, r3 2209; CHECK-NEXT: vmov r2, r3, d1 2210; CHECK-NEXT: adds.w r12, r12, r2 2211; CHECK-NEXT: adc.w lr, lr, r3 2212; CHECK-NEXT: vmov r2, r3, d13 2213; CHECK-NEXT: vmov q0[2], q0[0], r2, r3 2214; CHECK-NEXT: vmov q0[3], q0[1], r2, r3 2215; CHECK-NEXT: vmov.u8 r2, q2[11] 2216; CHECK-NEXT: vmov.u8 r3, q2[10] 2217; CHECK-NEXT: vcmp.i32 ne, q0, zr 2218; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 2219; CHECK-NEXT: vand q0, q0, q3 2220; CHECK-NEXT: vpsel q0, q0, q4 2221; CHECK-NEXT: vmov r2, r3, d0 2222; CHECK-NEXT: adds.w r12, r12, r2 2223; CHECK-NEXT: adc.w lr, lr, r3 2224; CHECK-NEXT: vmov r2, r3, d1 2225; CHECK-NEXT: adds.w r12, r12, r2 2226; CHECK-NEXT: vmov.u16 r2, q5[6] 2227; CHECK-NEXT: adc.w lr, lr, r3 2228; CHECK-NEXT: vmov.u16 r3, q5[4] 2229; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 2230; CHECK-NEXT: vmov.u16 r2, q5[7] 2231; CHECK-NEXT: vmov.u16 r3, q5[5] 2232; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 2233; CHECK-NEXT: vcmp.i32 ne, q0, zr 2234; CHECK-NEXT: vpsel q1, q1, q7 2235; CHECK-NEXT: vmov r2, r3, d2 2236; CHECK-NEXT: vmov q0[2], q0[0], r2, r3 2237; CHECK-NEXT: vmov q0[3], q0[1], r2, r3 2238; CHECK-NEXT: vmov.u8 r2, q2[13] 2239; CHECK-NEXT: vmov.u8 r3, q2[12] 2240; CHECK-NEXT: vcmp.i32 ne, q0, zr 2241; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 2242; CHECK-NEXT: vand q0, q0, q3 2243; CHECK-NEXT: vpsel q0, q0, q4 2244; CHECK-NEXT: vmov r2, r3, d0 2245; CHECK-NEXT: adds.w r12, r12, r2 2246; CHECK-NEXT: adc.w lr, lr, r3 2247; CHECK-NEXT: vmov r2, r3, d1 2248; CHECK-NEXT: adds.w r12, r12, r2 2249; CHECK-NEXT: adc.w lr, lr, r3 2250; CHECK-NEXT: vmov r2, r3, d3 2251; CHECK-NEXT: vmov q0[2], q0[0], r2, r3 2252; CHECK-NEXT: vmov q0[3], q0[1], r2, r3 2253; CHECK-NEXT: vmov.u8 r2, q2[15] 2254; CHECK-NEXT: vmov.u8 r3, q2[14] 2255; CHECK-NEXT: vcmp.i32 ne, q0, zr 2256; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 2257; CHECK-NEXT: vand q0, q0, q3 2258; CHECK-NEXT: vpsel q0, q0, q4 2259; CHECK-NEXT: vmov r2, r3, d0 2260; CHECK-NEXT: adds.w r12, r12, r2 2261; CHECK-NEXT: adc.w lr, lr, r3 2262; CHECK-NEXT: vmov r2, r3, d1 2263; CHECK-NEXT: adds.w r2, r2, r12 2264; CHECK-NEXT: adc.w r3, r3, lr 2265; CHECK-NEXT: adds r0, r0, r2 2266; CHECK-NEXT: adcs r1, r3 2267; CHECK-NEXT: add sp, #16 2268; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} 2269; CHECK-NEXT: pop {r7, pc} 2270entry: 2271 %c = icmp eq <16 x i8> %b, zeroinitializer 2272 %xx = zext <16 x i8> %x to <16 x i64> 2273 %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer 2274 %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s) 2275 %r = add i64 %z, %a 2276 ret i64 %r 2277} 2278 2279define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, <16 x i8> %b, i64 %a) { 2280; CHECK-LABEL: add_v16i8_v16i64_acc_sext: 2281; CHECK: @ %bb.0: @ %entry 2282; CHECK-NEXT: .save {r7, lr} 2283; CHECK-NEXT: push {r7, lr} 2284; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} 2285; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} 2286; CHECK-NEXT: vcmp.i8 eq, q1, zr 2287; CHECK-NEXT: vmov.i8 q1, #0x0 2288; CHECK-NEXT: vmov.i8 q2, #0xff 2289; CHECK-NEXT: vpsel q4, q2, q1 2290; CHECK-NEXT: vmov.u8 r2, q4[0] 2291; CHECK-NEXT: vmov.16 q3[0], r2 2292; CHECK-NEXT: vmov.u8 r2, q4[1] 2293; CHECK-NEXT: vmov.16 q3[1], r2 2294; CHECK-NEXT: vmov.u8 r2, q4[2] 2295; CHECK-NEXT: vmov.16 q3[2], r2 2296; CHECK-NEXT: vmov.u8 r2, q4[3] 2297; CHECK-NEXT: vmov.16 q3[3], r2 2298; CHECK-NEXT: vmov.u8 r2, q4[4] 2299; CHECK-NEXT: vmov.16 q3[4], r2 2300; CHECK-NEXT: vmov.u8 r2, q4[5] 2301; CHECK-NEXT: vmov.16 q3[5], r2 2302; CHECK-NEXT: vmov.u8 r2, q4[6] 2303; CHECK-NEXT: vmov.16 q3[6], r2 2304; CHECK-NEXT: vmov.u8 r2, q4[7] 2305; CHECK-NEXT: vmov.16 q3[7], r2 2306; CHECK-NEXT: vcmp.i16 ne, q3, zr 2307; CHECK-NEXT: vpsel q5, q2, q1 2308; CHECK-NEXT: vmov.u16 r2, q5[2] 2309; CHECK-NEXT: vmov.u16 r3, q5[0] 2310; CHECK-NEXT: vmov q3[2], q3[0], r3, r2 2311; CHECK-NEXT: vmov.u16 r2, q5[3] 2312; CHECK-NEXT: vmov.u16 r3, q5[1] 2313; CHECK-NEXT: vmov q3[3], q3[1], r3, r2 2314; CHECK-NEXT: vcmp.i32 ne, q3, zr 2315; CHECK-NEXT: vpsel q6, q2, q1 2316; CHECK-NEXT: vmov r2, r3, d12 2317; CHECK-NEXT: vmov q3[2], q3[0], r2, r3 2318; CHECK-NEXT: vmov q3[3], q3[1], r2, r3 2319; CHECK-NEXT: vmov.s8 r2, q0[1] 2320; CHECK-NEXT: vmov.s8 r3, q0[0] 2321; CHECK-NEXT: vcmp.i32 ne, q3, zr 2322; CHECK-NEXT: vmov q7[2], q7[0], r3, r2 2323; CHECK-NEXT: asrs r2, r2, #31 2324; CHECK-NEXT: asrs r3, r3, #31 2325; CHECK-NEXT: vmov.i32 q3, #0x0 2326; CHECK-NEXT: vmov q7[3], q7[1], r3, r2 2327; CHECK-NEXT: vpsel q7, q7, q3 2328; CHECK-NEXT: vmov lr, r12, d15 2329; CHECK-NEXT: vmov r3, r2, d14 2330; CHECK-NEXT: adds.w lr, lr, r3 2331; CHECK-NEXT: adc.w r12, r12, r2 2332; CHECK-NEXT: vmov r2, r3, d13 2333; CHECK-NEXT: vmov q6[2], q6[0], r2, r3 2334; CHECK-NEXT: vmov q6[3], q6[1], r2, r3 2335; CHECK-NEXT: vmov.s8 r2, q0[3] 2336; CHECK-NEXT: vmov.s8 r3, q0[2] 2337; CHECK-NEXT: vcmp.i32 ne, q6, zr 2338; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 2339; CHECK-NEXT: asrs r2, r2, #31 2340; CHECK-NEXT: asrs r3, r3, #31 2341; CHECK-NEXT: vmov q6[3], q6[1], r3, r2 2342; CHECK-NEXT: vpsel q6, q6, q3 2343; CHECK-NEXT: vmov r2, r3, d12 2344; CHECK-NEXT: adds.w lr, lr, r2 2345; CHECK-NEXT: adc.w r12, r12, r3 2346; CHECK-NEXT: vmov r2, r3, d13 2347; CHECK-NEXT: adds.w lr, lr, r2 2348; CHECK-NEXT: vmov.u16 r2, q5[6] 2349; CHECK-NEXT: adc.w r12, r12, r3 2350; CHECK-NEXT: vmov.u16 r3, q5[4] 2351; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 2352; CHECK-NEXT: vmov.u16 r2, q5[7] 2353; CHECK-NEXT: vmov.u16 r3, q5[5] 2354; CHECK-NEXT: vmov q6[3], q6[1], r3, r2 2355; CHECK-NEXT: vcmp.i32 ne, q6, zr 2356; CHECK-NEXT: vpsel q5, q2, q1 2357; CHECK-NEXT: vmov r2, r3, d10 2358; CHECK-NEXT: vmov q6[2], q6[0], r2, r3 2359; CHECK-NEXT: vmov q6[3], q6[1], r2, r3 2360; CHECK-NEXT: vmov.s8 r2, q0[5] 2361; CHECK-NEXT: vmov.s8 r3, q0[4] 2362; CHECK-NEXT: vcmp.i32 ne, q6, zr 2363; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 2364; CHECK-NEXT: asrs r2, r2, #31 2365; CHECK-NEXT: asrs r3, r3, #31 2366; CHECK-NEXT: vmov q6[3], q6[1], r3, r2 2367; CHECK-NEXT: vpsel q6, q6, q3 2368; CHECK-NEXT: vmov r2, r3, d12 2369; CHECK-NEXT: adds.w lr, lr, r2 2370; CHECK-NEXT: adc.w r12, r12, r3 2371; CHECK-NEXT: vmov r2, r3, d13 2372; CHECK-NEXT: adds.w lr, lr, r2 2373; CHECK-NEXT: adc.w r12, r12, r3 2374; CHECK-NEXT: vmov r2, r3, d11 2375; CHECK-NEXT: vmov q5[2], q5[0], r2, r3 2376; CHECK-NEXT: vmov q5[3], q5[1], r2, r3 2377; CHECK-NEXT: vmov.s8 r2, q0[7] 2378; CHECK-NEXT: vmov.s8 r3, q0[6] 2379; CHECK-NEXT: vcmp.i32 ne, q5, zr 2380; CHECK-NEXT: vmov q5[2], q5[0], r3, r2 2381; CHECK-NEXT: asrs r2, r2, #31 2382; CHECK-NEXT: asrs r3, r3, #31 2383; CHECK-NEXT: vmov q5[3], q5[1], r3, r2 2384; CHECK-NEXT: vpsel q5, q5, q3 2385; CHECK-NEXT: vmov r2, r3, d10 2386; CHECK-NEXT: adds.w lr, lr, r2 2387; CHECK-NEXT: adc.w r12, r12, r3 2388; CHECK-NEXT: vmov r2, r3, d11 2389; CHECK-NEXT: adds.w lr, lr, r2 2390; CHECK-NEXT: vmov.u8 r2, q4[8] 2391; CHECK-NEXT: vmov.16 q5[0], r2 2392; CHECK-NEXT: vmov.u8 r2, q4[9] 2393; CHECK-NEXT: vmov.16 q5[1], r2 2394; CHECK-NEXT: vmov.u8 r2, q4[10] 2395; CHECK-NEXT: vmov.16 q5[2], r2 2396; CHECK-NEXT: vmov.u8 r2, q4[11] 2397; CHECK-NEXT: vmov.16 q5[3], r2 2398; CHECK-NEXT: vmov.u8 r2, q4[12] 2399; CHECK-NEXT: vmov.16 q5[4], r2 2400; CHECK-NEXT: vmov.u8 r2, q4[13] 2401; CHECK-NEXT: vmov.16 q5[5], r2 2402; CHECK-NEXT: vmov.u8 r2, q4[14] 2403; CHECK-NEXT: vmov.16 q5[6], r2 2404; CHECK-NEXT: vmov.u8 r2, q4[15] 2405; CHECK-NEXT: vmov.16 q5[7], r2 2406; CHECK-NEXT: adc.w r12, r12, r3 2407; CHECK-NEXT: vcmp.i16 ne, q5, zr 2408; CHECK-NEXT: vpsel q4, q2, q1 2409; CHECK-NEXT: vmov.u16 r2, q4[2] 2410; CHECK-NEXT: vmov.u16 r3, q4[0] 2411; CHECK-NEXT: vmov q5[2], q5[0], r3, r2 2412; CHECK-NEXT: vmov.u16 r2, q4[3] 2413; CHECK-NEXT: vmov.u16 r3, q4[1] 2414; CHECK-NEXT: vmov q5[3], q5[1], r3, r2 2415; CHECK-NEXT: vcmp.i32 ne, q5, zr 2416; CHECK-NEXT: vpsel q5, q2, q1 2417; CHECK-NEXT: vmov r2, r3, d10 2418; CHECK-NEXT: vmov q6[2], q6[0], r2, r3 2419; CHECK-NEXT: vmov q6[3], q6[1], r2, r3 2420; CHECK-NEXT: vmov.s8 r2, q0[9] 2421; CHECK-NEXT: vmov.s8 r3, q0[8] 2422; CHECK-NEXT: vcmp.i32 ne, q6, zr 2423; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 2424; CHECK-NEXT: asrs r2, r2, #31 2425; CHECK-NEXT: asrs r3, r3, #31 2426; CHECK-NEXT: vmov q6[3], q6[1], r3, r2 2427; CHECK-NEXT: vpsel q6, q6, q3 2428; CHECK-NEXT: vmov r2, r3, d12 2429; CHECK-NEXT: adds.w lr, lr, r2 2430; CHECK-NEXT: adc.w r12, r12, r3 2431; CHECK-NEXT: vmov r2, r3, d13 2432; CHECK-NEXT: adds.w lr, lr, r2 2433; CHECK-NEXT: adc.w r12, r12, r3 2434; CHECK-NEXT: vmov r2, r3, d11 2435; CHECK-NEXT: vmov q5[2], q5[0], r2, r3 2436; CHECK-NEXT: vmov q5[3], q5[1], r2, r3 2437; CHECK-NEXT: vmov.s8 r2, q0[11] 2438; CHECK-NEXT: vmov.s8 r3, q0[10] 2439; CHECK-NEXT: vcmp.i32 ne, q5, zr 2440; CHECK-NEXT: vmov q5[2], q5[0], r3, r2 2441; CHECK-NEXT: asrs r2, r2, #31 2442; CHECK-NEXT: asrs r3, r3, #31 2443; CHECK-NEXT: vmov q5[3], q5[1], r3, r2 2444; CHECK-NEXT: vpsel q5, q5, q3 2445; CHECK-NEXT: vmov r2, r3, d10 2446; CHECK-NEXT: adds.w lr, lr, r2 2447; CHECK-NEXT: adc.w r12, r12, r3 2448; CHECK-NEXT: vmov r2, r3, d11 2449; CHECK-NEXT: adds.w lr, lr, r2 2450; CHECK-NEXT: vmov.u16 r2, q4[6] 2451; CHECK-NEXT: adc.w r12, r12, r3 2452; CHECK-NEXT: vmov.u16 r3, q4[4] 2453; CHECK-NEXT: vmov q5[2], q5[0], r3, r2 2454; CHECK-NEXT: vmov.u16 r2, q4[7] 2455; CHECK-NEXT: vmov.u16 r3, q4[5] 2456; CHECK-NEXT: vmov q5[3], q5[1], r3, r2 2457; CHECK-NEXT: vcmp.i32 ne, q5, zr 2458; CHECK-NEXT: vpsel q1, q2, q1 2459; CHECK-NEXT: vmov r2, r3, d2 2460; CHECK-NEXT: vmov q2[2], q2[0], r2, r3 2461; CHECK-NEXT: vmov q2[3], q2[1], r2, r3 2462; CHECK-NEXT: vmov.s8 r2, q0[13] 2463; CHECK-NEXT: vmov.s8 r3, q0[12] 2464; CHECK-NEXT: vcmp.i32 ne, q2, zr 2465; CHECK-NEXT: vmov q2[2], q2[0], r3, r2 2466; CHECK-NEXT: asrs r2, r2, #31 2467; CHECK-NEXT: asrs r3, r3, #31 2468; CHECK-NEXT: vmov q2[3], q2[1], r3, r2 2469; CHECK-NEXT: vpsel q2, q2, q3 2470; CHECK-NEXT: vmov r2, r3, d4 2471; CHECK-NEXT: adds.w lr, lr, r2 2472; CHECK-NEXT: adc.w r12, r12, r3 2473; CHECK-NEXT: vmov r2, r3, d5 2474; CHECK-NEXT: adds.w lr, lr, r2 2475; CHECK-NEXT: adc.w r12, r12, r3 2476; CHECK-NEXT: vmov r2, r3, d3 2477; CHECK-NEXT: vmov q1[2], q1[0], r2, r3 2478; CHECK-NEXT: vmov q1[3], q1[1], r2, r3 2479; CHECK-NEXT: vmov.s8 r2, q0[15] 2480; CHECK-NEXT: vmov.s8 r3, q0[14] 2481; CHECK-NEXT: vcmp.i32 ne, q1, zr 2482; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 2483; CHECK-NEXT: asrs r2, r2, #31 2484; CHECK-NEXT: asrs r3, r3, #31 2485; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 2486; CHECK-NEXT: vpsel q0, q0, q3 2487; CHECK-NEXT: vmov r2, r3, d0 2488; CHECK-NEXT: adds.w lr, lr, r2 2489; CHECK-NEXT: adc.w r12, r12, r3 2490; CHECK-NEXT: vmov r2, r3, d1 2491; CHECK-NEXT: adds.w r2, r2, lr 2492; CHECK-NEXT: adc.w r3, r3, r12 2493; CHECK-NEXT: adds r0, r0, r2 2494; CHECK-NEXT: adcs r1, r3 2495; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} 2496; CHECK-NEXT: pop {r7, pc} 2497entry: 2498 %c = icmp eq <16 x i8> %b, zeroinitializer 2499 %xx = sext <16 x i8> %x to <16 x i64> 2500 %s = select <16 x i1> %c, <16 x i64> %xx, <16 x i64> zeroinitializer 2501 %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %s) 2502 %r = add i64 %z, %a 2503 ret i64 %r 2504} 2505 2506define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, <2 x i8> %b, i64 %a) { 2507; CHECK-LABEL: add_v2i8_v2i64_acc_zext: 2508; CHECK: @ %bb.0: @ %entry 2509; CHECK-NEXT: .save {r7, lr} 2510; CHECK-NEXT: push {r7, lr} 2511; CHECK-NEXT: vmov.i64 q2, #0xff 2512; CHECK-NEXT: movs r3, #0 2513; CHECK-NEXT: vand q1, q1, q2 2514; CHECK-NEXT: vand q0, q0, q2 2515; CHECK-NEXT: vmov r2, s4 2516; CHECK-NEXT: cmp r2, #0 2517; CHECK-NEXT: csetm r2, eq 2518; CHECK-NEXT: bfi r3, r2, #0, #8 2519; CHECK-NEXT: vmov r2, s6 2520; CHECK-NEXT: vmov.i32 q1, #0x0 2521; CHECK-NEXT: cmp r2, #0 2522; CHECK-NEXT: csetm r2, eq 2523; CHECK-NEXT: bfi r3, r2, #8, #8 2524; CHECK-NEXT: vmsr p0, r3 2525; CHECK-NEXT: vpsel q0, q0, q1 2526; CHECK-NEXT: vmov r12, lr, d1 2527; CHECK-NEXT: vmov r2, r3, d0 2528; CHECK-NEXT: add r2, r12 2529; CHECK-NEXT: orr.w r3, r3, lr 2530; CHECK-NEXT: adds r0, r0, r2 2531; CHECK-NEXT: adcs r1, r3 2532; CHECK-NEXT: pop {r7, pc} 2533entry: 2534 %c = icmp eq <2 x i8> %b, zeroinitializer 2535 %xx = zext <2 x i8> %x to <2 x i64> 2536 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer 2537 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) 2538 %r = add i64 %z, %a 2539 ret i64 %r 2540} 2541 2542define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_sext(<2 x i8> %x, <2 x i8> %b, i64 %a) { 2543; CHECK-LABEL: add_v2i8_v2i64_acc_sext: 2544; CHECK: @ %bb.0: @ %entry 2545; CHECK-NEXT: .save {r7, lr} 2546; CHECK-NEXT: push {r7, lr} 2547; CHECK-NEXT: vmov.i32 q2, #0xff 2548; CHECK-NEXT: movs r3, #0 2549; CHECK-NEXT: vand q1, q1, q2 2550; CHECK-NEXT: vmov r2, s4 2551; CHECK-NEXT: cmp r2, #0 2552; CHECK-NEXT: csetm r2, eq 2553; CHECK-NEXT: bfi r3, r2, #0, #8 2554; CHECK-NEXT: vmov r2, s6 2555; CHECK-NEXT: vmov.i32 q1, #0x0 2556; CHECK-NEXT: cmp r2, #0 2557; CHECK-NEXT: csetm r2, eq 2558; CHECK-NEXT: bfi r3, r2, #8, #8 2559; CHECK-NEXT: vmov r2, s2 2560; CHECK-NEXT: vmsr p0, r3 2561; CHECK-NEXT: vmov r3, s0 2562; CHECK-NEXT: sxtb r2, r2 2563; CHECK-NEXT: sxtb r3, r3 2564; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 2565; CHECK-NEXT: asrs r2, r2, #31 2566; CHECK-NEXT: asrs r3, r3, #31 2567; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 2568; CHECK-NEXT: vpsel q0, q0, q1 2569; CHECK-NEXT: vmov lr, r12, d1 2570; CHECK-NEXT: vmov r3, r2, d0 2571; CHECK-NEXT: adds.w r3, r3, lr 2572; CHECK-NEXT: adc.w r2, r2, r12 2573; CHECK-NEXT: adds r0, r0, r3 2574; CHECK-NEXT: adcs r1, r2 2575; CHECK-NEXT: pop {r7, pc} 2576entry: 2577 %c = icmp eq <2 x i8> %b, zeroinitializer 2578 %xx = sext <2 x i8> %x to <2 x i64> 2579 %s = select <2 x i1> %c, <2 x i64> %xx, <2 x i64> zeroinitializer 2580 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) 2581 %r = add i64 %z, %a 2582 ret i64 %r 2583} 2584 2585define arm_aapcs_vfpcc i64 @add_v2i64_v2i64_acc(<2 x i64> %x, <2 x i64> %b, i64 %a) { 2586; CHECK-LABEL: add_v2i64_v2i64_acc: 2587; CHECK: @ %bb.0: @ %entry 2588; CHECK-NEXT: .save {r7, lr} 2589; CHECK-NEXT: push {r7, lr} 2590; CHECK-NEXT: vmov r2, r3, d2 2591; CHECK-NEXT: mov.w r12, #0 2592; CHECK-NEXT: orrs r2, r3 2593; CHECK-NEXT: csetm r2, eq 2594; CHECK-NEXT: bfi r12, r2, #0, #8 2595; CHECK-NEXT: vmov r2, r3, d3 2596; CHECK-NEXT: vmov.i32 q1, #0x0 2597; CHECK-NEXT: orrs r2, r3 2598; CHECK-NEXT: csetm r2, eq 2599; CHECK-NEXT: bfi r12, r2, #8, #8 2600; CHECK-NEXT: vmsr p0, r12 2601; CHECK-NEXT: vpsel q0, q0, q1 2602; CHECK-NEXT: vmov lr, r12, d1 2603; CHECK-NEXT: vmov r3, r2, d0 2604; CHECK-NEXT: adds.w r3, r3, lr 2605; CHECK-NEXT: adc.w r2, r2, r12 2606; CHECK-NEXT: adds r0, r0, r3 2607; CHECK-NEXT: adcs r1, r2 2608; CHECK-NEXT: pop {r7, pc} 2609entry: 2610 %c = icmp eq <2 x i64> %b, zeroinitializer 2611 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> zeroinitializer 2612 %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %s) 2613 %r = add i64 %z, %a 2614 ret i64 %r 2615} 2616 2617declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) 2618declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) 2619declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) 2620declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) 2621declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) 2622declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) 2623declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) 2624declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) 2625declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) 2626declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) 2627