1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK 3 4define arm_aapcs_vfpcc i32 @and_v2i32(<2 x i32> %x) { 5; CHECK-LABEL: and_v2i32: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vmov r0, s2 8; CHECK-NEXT: vmov r1, s0 9; CHECK-NEXT: ands r0, r1 10; CHECK-NEXT: bx lr 11entry: 12 %z = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %x) 13 ret i32 %z 14} 15 16define arm_aapcs_vfpcc i32 @and_v4i32(<4 x i32> %x) { 17; CHECK-LABEL: and_v4i32: 18; CHECK: @ %bb.0: @ %entry 19; CHECK-NEXT: vmov r0, r1, d1 20; CHECK-NEXT: ands r0, r1 21; CHECK-NEXT: vmov r1, r2, d0 22; CHECK-NEXT: ands r1, r2 23; CHECK-NEXT: ands r0, r1 24; CHECK-NEXT: bx lr 25entry: 26 %z = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %x) 27 ret i32 %z 28} 29 30define arm_aapcs_vfpcc i32 @and_v8i32(<8 x i32> %x) { 31; CHECK-LABEL: and_v8i32: 32; CHECK: @ %bb.0: @ %entry 33; CHECK-NEXT: vand q0, q0, q1 34; CHECK-NEXT: vmov r0, r1, d1 35; CHECK-NEXT: ands r0, r1 36; CHECK-NEXT: vmov r1, r2, d0 37; CHECK-NEXT: ands r1, r2 38; CHECK-NEXT: ands r0, r1 39; CHECK-NEXT: bx lr 40entry: 41 %z = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %x) 42 ret i32 %z 43} 44 45define arm_aapcs_vfpcc i16 @and_v4i16(<4 x i16> %x) { 46; CHECK-LABEL: and_v4i16: 47; CHECK: @ %bb.0: @ %entry 48; CHECK-NEXT: vmov r0, r1, d1 49; CHECK-NEXT: ands r0, r1 50; CHECK-NEXT: vmov r1, r2, d0 51; CHECK-NEXT: ands r1, r2 52; CHECK-NEXT: ands r0, r1 53; CHECK-NEXT: bx lr 54entry: 55 %z = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %x) 56 ret i16 %z 57} 58 59define arm_aapcs_vfpcc i16 @and_v8i16(<8 x i16> %x) { 60; CHECK-LABEL: and_v8i16: 61; CHECK: @ %bb.0: @ %entry 62; CHECK-NEXT: vrev32.16 q1, q0 63; CHECK-NEXT: vand q0, q0, q1 64; CHECK-NEXT: vmov.u16 r0, q0[6] 65; CHECK-NEXT: vmov.u16 r1, q0[4] 66; CHECK-NEXT: ands r0, r1 67; CHECK-NEXT: vmov.u16 r1, q0[2] 68; CHECK-NEXT: vmov.u16 r2, q0[0] 69; CHECK-NEXT: ands r1, r2 70; CHECK-NEXT: ands r0, r1 71; CHECK-NEXT: bx lr 72entry: 73 %z = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %x) 74 ret i16 %z 75} 76 77define arm_aapcs_vfpcc i16 @and_v16i16(<16 x i16> %x) { 78; CHECK-LABEL: and_v16i16: 79; CHECK: @ %bb.0: @ %entry 80; CHECK-NEXT: vand q0, q0, q1 81; CHECK-NEXT: vrev32.16 q1, q0 82; CHECK-NEXT: vand q0, q0, q1 83; CHECK-NEXT: vmov.u16 r0, q0[6] 84; CHECK-NEXT: vmov.u16 r1, q0[4] 85; CHECK-NEXT: ands r0, r1 86; CHECK-NEXT: vmov.u16 r1, q0[2] 87; CHECK-NEXT: vmov.u16 r2, q0[0] 88; CHECK-NEXT: ands r1, r2 89; CHECK-NEXT: ands r0, r1 90; CHECK-NEXT: bx lr 91entry: 92 %z = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %x) 93 ret i16 %z 94} 95 96define arm_aapcs_vfpcc i8 @and_v8i8(<8 x i8> %x) { 97; CHECK-LABEL: and_v8i8: 98; CHECK: @ %bb.0: @ %entry 99; CHECK-NEXT: vrev32.16 q1, q0 100; CHECK-NEXT: vand q0, q0, q1 101; CHECK-NEXT: vmov.u16 r0, q0[6] 102; CHECK-NEXT: vmov.u16 r1, q0[4] 103; CHECK-NEXT: ands r0, r1 104; CHECK-NEXT: vmov.u16 r1, q0[2] 105; CHECK-NEXT: vmov.u16 r2, q0[0] 106; CHECK-NEXT: ands r1, r2 107; CHECK-NEXT: ands r0, r1 108; CHECK-NEXT: bx lr 109entry: 110 %z = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %x) 111 ret i8 %z 112} 113 114define arm_aapcs_vfpcc i8 @and_v16i8(<16 x i8> %x) { 115; CHECK-LABEL: and_v16i8: 116; CHECK: @ %bb.0: @ %entry 117; CHECK-NEXT: vrev16.8 q1, q0 118; CHECK-NEXT: vand q0, q0, q1 119; CHECK-NEXT: vrev32.8 q1, q0 120; CHECK-NEXT: vand q0, q0, q1 121; CHECK-NEXT: vmov.u8 r0, q0[12] 122; CHECK-NEXT: vmov.u8 r1, q0[8] 123; CHECK-NEXT: ands r0, r1 124; CHECK-NEXT: vmov.u8 r1, q0[4] 125; CHECK-NEXT: vmov.u8 r2, q0[0] 126; CHECK-NEXT: ands r1, r2 127; CHECK-NEXT: ands r0, r1 128; CHECK-NEXT: bx lr 129entry: 130 %z = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %x) 131 ret i8 %z 132} 133 134define arm_aapcs_vfpcc i8 @and_v32i8(<32 x i8> %x) { 135; CHECK-LABEL: and_v32i8: 136; CHECK: @ %bb.0: @ %entry 137; CHECK-NEXT: vand q0, q0, q1 138; CHECK-NEXT: vrev16.8 q1, q0 139; CHECK-NEXT: vand q0, q0, q1 140; CHECK-NEXT: vrev32.8 q1, q0 141; CHECK-NEXT: vand q0, q0, q1 142; CHECK-NEXT: vmov.u8 r0, q0[12] 143; CHECK-NEXT: vmov.u8 r1, q0[8] 144; CHECK-NEXT: ands r0, r1 145; CHECK-NEXT: vmov.u8 r1, q0[4] 146; CHECK-NEXT: vmov.u8 r2, q0[0] 147; CHECK-NEXT: ands r1, r2 148; CHECK-NEXT: ands r0, r1 149; CHECK-NEXT: bx lr 150entry: 151 %z = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %x) 152 ret i8 %z 153} 154 155define arm_aapcs_vfpcc i64 @and_v1i64(<1 x i64> %x) { 156; CHECK-LABEL: and_v1i64: 157; CHECK: @ %bb.0: @ %entry 158; CHECK-NEXT: bx lr 159entry: 160 %z = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %x) 161 ret i64 %z 162} 163 164define arm_aapcs_vfpcc i64 @and_v2i64(<2 x i64> %x) { 165; CHECK-LABEL: and_v2i64: 166; CHECK: @ %bb.0: @ %entry 167; CHECK-NEXT: vmov r0, r1, d1 168; CHECK-NEXT: vmov r2, r3, d0 169; CHECK-NEXT: ands r0, r2 170; CHECK-NEXT: ands r1, r3 171; CHECK-NEXT: bx lr 172entry: 173 %z = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %x) 174 ret i64 %z 175} 176 177define arm_aapcs_vfpcc i64 @and_v4i64(<4 x i64> %x) { 178; CHECK-LABEL: and_v4i64: 179; CHECK: @ %bb.0: @ %entry 180; CHECK-NEXT: vand q0, q0, q1 181; CHECK-NEXT: vmov r0, r1, d1 182; CHECK-NEXT: vmov r2, r3, d0 183; CHECK-NEXT: ands r0, r2 184; CHECK-NEXT: ands r1, r3 185; CHECK-NEXT: bx lr 186entry: 187 %z = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %x) 188 ret i64 %z 189} 190 191define arm_aapcs_vfpcc i32 @and_v2i32_acc(<2 x i32> %x, i32 %y) { 192; CHECK-LABEL: and_v2i32_acc: 193; CHECK: @ %bb.0: @ %entry 194; CHECK-NEXT: vmov r1, s2 195; CHECK-NEXT: vmov r2, s0 196; CHECK-NEXT: ands r1, r2 197; CHECK-NEXT: ands r0, r1 198; CHECK-NEXT: bx lr 199entry: 200 %z = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %x) 201 %r = and i32 %y, %z 202 ret i32 %r 203} 204 205define arm_aapcs_vfpcc i32 @and_v4i32_acc(<4 x i32> %x, i32 %y) { 206; CHECK-LABEL: and_v4i32_acc: 207; CHECK: @ %bb.0: @ %entry 208; CHECK-NEXT: vmov r1, r2, d1 209; CHECK-NEXT: ands r1, r2 210; CHECK-NEXT: vmov r2, r3, d0 211; CHECK-NEXT: ands r2, r3 212; CHECK-NEXT: ands r1, r2 213; CHECK-NEXT: ands r0, r1 214; CHECK-NEXT: bx lr 215entry: 216 %z = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %x) 217 %r = and i32 %y, %z 218 ret i32 %r 219} 220 221define arm_aapcs_vfpcc i32 @and_v8i32_acc(<8 x i32> %x, i32 %y) { 222; CHECK-LABEL: and_v8i32_acc: 223; CHECK: @ %bb.0: @ %entry 224; CHECK-NEXT: vand q0, q0, q1 225; CHECK-NEXT: vmov r1, r2, d1 226; CHECK-NEXT: ands r1, r2 227; CHECK-NEXT: vmov r2, r3, d0 228; CHECK-NEXT: ands r2, r3 229; CHECK-NEXT: ands r1, r2 230; CHECK-NEXT: ands r0, r1 231; CHECK-NEXT: bx lr 232entry: 233 %z = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %x) 234 %r = and i32 %y, %z 235 ret i32 %r 236} 237 238define arm_aapcs_vfpcc i16 @and_v4i16_acc(<4 x i16> %x, i16 %y) { 239; CHECK-LABEL: and_v4i16_acc: 240; CHECK: @ %bb.0: @ %entry 241; CHECK-NEXT: vmov r1, r2, d1 242; CHECK-NEXT: ands r1, r2 243; CHECK-NEXT: vmov r2, r3, d0 244; CHECK-NEXT: ands r2, r3 245; CHECK-NEXT: ands r1, r2 246; CHECK-NEXT: ands r0, r1 247; CHECK-NEXT: bx lr 248entry: 249 %z = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %x) 250 %r = and i16 %y, %z 251 ret i16 %r 252} 253 254define arm_aapcs_vfpcc i16 @and_v8i16_acc(<8 x i16> %x, i16 %y) { 255; CHECK-LABEL: and_v8i16_acc: 256; CHECK: @ %bb.0: @ %entry 257; CHECK-NEXT: vrev32.16 q1, q0 258; CHECK-NEXT: vand q0, q0, q1 259; CHECK-NEXT: vmov.u16 r1, q0[6] 260; CHECK-NEXT: vmov.u16 r2, q0[4] 261; CHECK-NEXT: ands r1, r2 262; CHECK-NEXT: vmov.u16 r2, q0[2] 263; CHECK-NEXT: vmov.u16 r3, q0[0] 264; CHECK-NEXT: ands r2, r3 265; CHECK-NEXT: ands r1, r2 266; CHECK-NEXT: ands r0, r1 267; CHECK-NEXT: bx lr 268entry: 269 %z = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %x) 270 %r = and i16 %y, %z 271 ret i16 %r 272} 273 274define arm_aapcs_vfpcc i16 @and_v16i16_acc(<16 x i16> %x, i16 %y) { 275; CHECK-LABEL: and_v16i16_acc: 276; CHECK: @ %bb.0: @ %entry 277; CHECK-NEXT: vand q0, q0, q1 278; CHECK-NEXT: vrev32.16 q1, q0 279; CHECK-NEXT: vand q0, q0, q1 280; CHECK-NEXT: vmov.u16 r1, q0[6] 281; CHECK-NEXT: vmov.u16 r2, q0[4] 282; CHECK-NEXT: ands r1, r2 283; CHECK-NEXT: vmov.u16 r2, q0[2] 284; CHECK-NEXT: vmov.u16 r3, q0[0] 285; CHECK-NEXT: ands r2, r3 286; CHECK-NEXT: ands r1, r2 287; CHECK-NEXT: ands r0, r1 288; CHECK-NEXT: bx lr 289entry: 290 %z = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %x) 291 %r = and i16 %y, %z 292 ret i16 %r 293} 294 295define arm_aapcs_vfpcc i8 @and_v8i8_acc(<8 x i8> %x, i8 %y) { 296; CHECK-LABEL: and_v8i8_acc: 297; CHECK: @ %bb.0: @ %entry 298; CHECK-NEXT: vrev32.16 q1, q0 299; CHECK-NEXT: vand q0, q0, q1 300; CHECK-NEXT: vmov.u16 r1, q0[6] 301; CHECK-NEXT: vmov.u16 r2, q0[4] 302; CHECK-NEXT: ands r1, r2 303; CHECK-NEXT: vmov.u16 r2, q0[2] 304; CHECK-NEXT: vmov.u16 r3, q0[0] 305; CHECK-NEXT: ands r2, r3 306; CHECK-NEXT: ands r1, r2 307; CHECK-NEXT: ands r0, r1 308; CHECK-NEXT: bx lr 309entry: 310 %z = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %x) 311 %r = and i8 %y, %z 312 ret i8 %r 313} 314 315define arm_aapcs_vfpcc i8 @and_v16i8_acc(<16 x i8> %x, i8 %y) { 316; CHECK-LABEL: and_v16i8_acc: 317; CHECK: @ %bb.0: @ %entry 318; CHECK-NEXT: vrev16.8 q1, q0 319; CHECK-NEXT: vand q0, q0, q1 320; CHECK-NEXT: vrev32.8 q1, q0 321; CHECK-NEXT: vand q0, q0, q1 322; CHECK-NEXT: vmov.u8 r1, q0[12] 323; CHECK-NEXT: vmov.u8 r2, q0[8] 324; CHECK-NEXT: ands r1, r2 325; CHECK-NEXT: vmov.u8 r2, q0[4] 326; CHECK-NEXT: vmov.u8 r3, q0[0] 327; CHECK-NEXT: ands r2, r3 328; CHECK-NEXT: ands r1, r2 329; CHECK-NEXT: ands r0, r1 330; CHECK-NEXT: bx lr 331entry: 332 %z = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %x) 333 %r = and i8 %y, %z 334 ret i8 %r 335} 336 337define arm_aapcs_vfpcc i8 @and_v32i8_acc(<32 x i8> %x, i8 %y) { 338; CHECK-LABEL: and_v32i8_acc: 339; CHECK: @ %bb.0: @ %entry 340; CHECK-NEXT: vand q0, q0, q1 341; CHECK-NEXT: vrev16.8 q1, q0 342; CHECK-NEXT: vand q0, q0, q1 343; CHECK-NEXT: vrev32.8 q1, q0 344; CHECK-NEXT: vand q0, q0, q1 345; CHECK-NEXT: vmov.u8 r1, q0[12] 346; CHECK-NEXT: vmov.u8 r2, q0[8] 347; CHECK-NEXT: ands r1, r2 348; CHECK-NEXT: vmov.u8 r2, q0[4] 349; CHECK-NEXT: vmov.u8 r3, q0[0] 350; CHECK-NEXT: ands r2, r3 351; CHECK-NEXT: ands r1, r2 352; CHECK-NEXT: ands r0, r1 353; CHECK-NEXT: bx lr 354entry: 355 %z = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %x) 356 %r = and i8 %y, %z 357 ret i8 %r 358} 359 360define arm_aapcs_vfpcc i64 @and_v1i64_acc(<1 x i64> %x, i64 %y) { 361; CHECK-LABEL: and_v1i64_acc: 362; CHECK: @ %bb.0: @ %entry 363; CHECK-NEXT: ands r0, r2 364; CHECK-NEXT: ands r1, r3 365; CHECK-NEXT: bx lr 366entry: 367 %z = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %x) 368 %r = and i64 %y, %z 369 ret i64 %r 370} 371 372define arm_aapcs_vfpcc i64 @and_v2i64_acc(<2 x i64> %x, i64 %y) { 373; CHECK-LABEL: and_v2i64_acc: 374; CHECK: @ %bb.0: @ %entry 375; CHECK-NEXT: .save {r7, lr} 376; CHECK-NEXT: push {r7, lr} 377; CHECK-NEXT: vmov r2, r12, d1 378; CHECK-NEXT: vmov r3, lr, d0 379; CHECK-NEXT: ands r2, r3 380; CHECK-NEXT: ands r0, r2 381; CHECK-NEXT: and.w r2, lr, r12 382; CHECK-NEXT: ands r1, r2 383; CHECK-NEXT: pop {r7, pc} 384entry: 385 %z = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %x) 386 %r = and i64 %y, %z 387 ret i64 %r 388} 389 390define arm_aapcs_vfpcc i64 @and_v4i64_acc(<4 x i64> %x, i64 %y) { 391; CHECK-LABEL: and_v4i64_acc: 392; CHECK: @ %bb.0: @ %entry 393; CHECK-NEXT: .save {r7, lr} 394; CHECK-NEXT: push {r7, lr} 395; CHECK-NEXT: vand q0, q0, q1 396; CHECK-NEXT: vmov r2, r12, d1 397; CHECK-NEXT: vmov r3, lr, d0 398; CHECK-NEXT: ands r2, r3 399; CHECK-NEXT: ands r0, r2 400; CHECK-NEXT: and.w r2, lr, r12 401; CHECK-NEXT: ands r1, r2 402; CHECK-NEXT: pop {r7, pc} 403entry: 404 %z = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %x) 405 %r = and i64 %y, %z 406 ret i64 %r 407} 408 409define arm_aapcs_vfpcc i32 @or_v2i32(<2 x i32> %x) { 410; CHECK-LABEL: or_v2i32: 411; CHECK: @ %bb.0: @ %entry 412; CHECK-NEXT: vmov r0, s2 413; CHECK-NEXT: vmov r1, s0 414; CHECK-NEXT: orrs r0, r1 415; CHECK-NEXT: bx lr 416entry: 417 %z = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %x) 418 ret i32 %z 419} 420 421define arm_aapcs_vfpcc i32 @or_v4i32(<4 x i32> %x) { 422; CHECK-LABEL: or_v4i32: 423; CHECK: @ %bb.0: @ %entry 424; CHECK-NEXT: vmov r0, r1, d1 425; CHECK-NEXT: orrs r0, r1 426; CHECK-NEXT: vmov r1, r2, d0 427; CHECK-NEXT: orrs r1, r2 428; CHECK-NEXT: orrs r0, r1 429; CHECK-NEXT: bx lr 430entry: 431 %z = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %x) 432 ret i32 %z 433} 434 435define arm_aapcs_vfpcc i32 @or_v8i32(<8 x i32> %x) { 436; CHECK-LABEL: or_v8i32: 437; CHECK: @ %bb.0: @ %entry 438; CHECK-NEXT: vorr q0, q0, q1 439; CHECK-NEXT: vmov r0, r1, d1 440; CHECK-NEXT: orrs r0, r1 441; CHECK-NEXT: vmov r1, r2, d0 442; CHECK-NEXT: orrs r1, r2 443; CHECK-NEXT: orrs r0, r1 444; CHECK-NEXT: bx lr 445entry: 446 %z = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %x) 447 ret i32 %z 448} 449 450define arm_aapcs_vfpcc i16 @or_v4i16(<4 x i16> %x) { 451; CHECK-LABEL: or_v4i16: 452; CHECK: @ %bb.0: @ %entry 453; CHECK-NEXT: vmov r0, r1, d1 454; CHECK-NEXT: orrs r0, r1 455; CHECK-NEXT: vmov r1, r2, d0 456; CHECK-NEXT: orrs r1, r2 457; CHECK-NEXT: orrs r0, r1 458; CHECK-NEXT: bx lr 459entry: 460 %z = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %x) 461 ret i16 %z 462} 463 464define arm_aapcs_vfpcc i16 @or_v8i16(<8 x i16> %x) { 465; CHECK-LABEL: or_v8i16: 466; CHECK: @ %bb.0: @ %entry 467; CHECK-NEXT: vrev32.16 q1, q0 468; CHECK-NEXT: vorr q0, q0, q1 469; CHECK-NEXT: vmov.u16 r0, q0[6] 470; CHECK-NEXT: vmov.u16 r1, q0[4] 471; CHECK-NEXT: orrs r0, r1 472; CHECK-NEXT: vmov.u16 r1, q0[2] 473; CHECK-NEXT: vmov.u16 r2, q0[0] 474; CHECK-NEXT: orrs r1, r2 475; CHECK-NEXT: orrs r0, r1 476; CHECK-NEXT: bx lr 477entry: 478 %z = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %x) 479 ret i16 %z 480} 481 482define arm_aapcs_vfpcc i16 @or_v16i16(<16 x i16> %x) { 483; CHECK-LABEL: or_v16i16: 484; CHECK: @ %bb.0: @ %entry 485; CHECK-NEXT: vorr q0, q0, q1 486; CHECK-NEXT: vrev32.16 q1, q0 487; CHECK-NEXT: vorr q0, q0, q1 488; CHECK-NEXT: vmov.u16 r0, q0[6] 489; CHECK-NEXT: vmov.u16 r1, q0[4] 490; CHECK-NEXT: orrs r0, r1 491; CHECK-NEXT: vmov.u16 r1, q0[2] 492; CHECK-NEXT: vmov.u16 r2, q0[0] 493; CHECK-NEXT: orrs r1, r2 494; CHECK-NEXT: orrs r0, r1 495; CHECK-NEXT: bx lr 496entry: 497 %z = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %x) 498 ret i16 %z 499} 500 501define arm_aapcs_vfpcc i8 @or_v8i8(<8 x i8> %x) { 502; CHECK-LABEL: or_v8i8: 503; CHECK: @ %bb.0: @ %entry 504; CHECK-NEXT: vrev32.16 q1, q0 505; CHECK-NEXT: vorr q0, q0, q1 506; CHECK-NEXT: vmov.u16 r0, q0[6] 507; CHECK-NEXT: vmov.u16 r1, q0[4] 508; CHECK-NEXT: orrs r0, r1 509; CHECK-NEXT: vmov.u16 r1, q0[2] 510; CHECK-NEXT: vmov.u16 r2, q0[0] 511; CHECK-NEXT: orrs r1, r2 512; CHECK-NEXT: orrs r0, r1 513; CHECK-NEXT: bx lr 514entry: 515 %z = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %x) 516 ret i8 %z 517} 518 519define arm_aapcs_vfpcc i8 @or_v16i8(<16 x i8> %x) { 520; CHECK-LABEL: or_v16i8: 521; CHECK: @ %bb.0: @ %entry 522; CHECK-NEXT: vrev16.8 q1, q0 523; CHECK-NEXT: vorr q0, q0, q1 524; CHECK-NEXT: vrev32.8 q1, q0 525; CHECK-NEXT: vorr q0, q0, q1 526; CHECK-NEXT: vmov.u8 r0, q0[12] 527; CHECK-NEXT: vmov.u8 r1, q0[8] 528; CHECK-NEXT: orrs r0, r1 529; CHECK-NEXT: vmov.u8 r1, q0[4] 530; CHECK-NEXT: vmov.u8 r2, q0[0] 531; CHECK-NEXT: orrs r1, r2 532; CHECK-NEXT: orrs r0, r1 533; CHECK-NEXT: bx lr 534entry: 535 %z = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %x) 536 ret i8 %z 537} 538 539define arm_aapcs_vfpcc i8 @or_v32i8(<32 x i8> %x) { 540; CHECK-LABEL: or_v32i8: 541; CHECK: @ %bb.0: @ %entry 542; CHECK-NEXT: vorr q0, q0, q1 543; CHECK-NEXT: vrev16.8 q1, q0 544; CHECK-NEXT: vorr q0, q0, q1 545; CHECK-NEXT: vrev32.8 q1, q0 546; CHECK-NEXT: vorr q0, q0, q1 547; CHECK-NEXT: vmov.u8 r0, q0[12] 548; CHECK-NEXT: vmov.u8 r1, q0[8] 549; CHECK-NEXT: orrs r0, r1 550; CHECK-NEXT: vmov.u8 r1, q0[4] 551; CHECK-NEXT: vmov.u8 r2, q0[0] 552; CHECK-NEXT: orrs r1, r2 553; CHECK-NEXT: orrs r0, r1 554; CHECK-NEXT: bx lr 555entry: 556 %z = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %x) 557 ret i8 %z 558} 559 560define arm_aapcs_vfpcc i64 @or_v1i64(<1 x i64> %x) { 561; CHECK-LABEL: or_v1i64: 562; CHECK: @ %bb.0: @ %entry 563; CHECK-NEXT: bx lr 564entry: 565 %z = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> %x) 566 ret i64 %z 567} 568 569define arm_aapcs_vfpcc i64 @or_v2i64(<2 x i64> %x) { 570; CHECK-LABEL: or_v2i64: 571; CHECK: @ %bb.0: @ %entry 572; CHECK-NEXT: vmov r0, r1, d1 573; CHECK-NEXT: vmov r2, r3, d0 574; CHECK-NEXT: orrs r0, r2 575; CHECK-NEXT: orrs r1, r3 576; CHECK-NEXT: bx lr 577entry: 578 %z = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %x) 579 ret i64 %z 580} 581 582define arm_aapcs_vfpcc i64 @or_v4i64(<4 x i64> %x) { 583; CHECK-LABEL: or_v4i64: 584; CHECK: @ %bb.0: @ %entry 585; CHECK-NEXT: vorr q0, q0, q1 586; CHECK-NEXT: vmov r0, r1, d1 587; CHECK-NEXT: vmov r2, r3, d0 588; CHECK-NEXT: orrs r0, r2 589; CHECK-NEXT: orrs r1, r3 590; CHECK-NEXT: bx lr 591entry: 592 %z = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %x) 593 ret i64 %z 594} 595 596define arm_aapcs_vfpcc i32 @or_v2i32_acc(<2 x i32> %x, i32 %y) { 597; CHECK-LABEL: or_v2i32_acc: 598; CHECK: @ %bb.0: @ %entry 599; CHECK-NEXT: vmov r1, s2 600; CHECK-NEXT: vmov r2, s0 601; CHECK-NEXT: orrs r1, r2 602; CHECK-NEXT: orrs r0, r1 603; CHECK-NEXT: bx lr 604entry: 605 %z = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %x) 606 %r = or i32 %y, %z 607 ret i32 %r 608} 609 610define arm_aapcs_vfpcc i32 @or_v4i32_acc(<4 x i32> %x, i32 %y) { 611; CHECK-LABEL: or_v4i32_acc: 612; CHECK: @ %bb.0: @ %entry 613; CHECK-NEXT: vmov r1, r2, d1 614; CHECK-NEXT: orrs r1, r2 615; CHECK-NEXT: vmov r2, r3, d0 616; CHECK-NEXT: orrs r2, r3 617; CHECK-NEXT: orrs r1, r2 618; CHECK-NEXT: orrs r0, r1 619; CHECK-NEXT: bx lr 620entry: 621 %z = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %x) 622 %r = or i32 %y, %z 623 ret i32 %r 624} 625 626define arm_aapcs_vfpcc i32 @or_v8i32_acc(<8 x i32> %x, i32 %y) { 627; CHECK-LABEL: or_v8i32_acc: 628; CHECK: @ %bb.0: @ %entry 629; CHECK-NEXT: vorr q0, q0, q1 630; CHECK-NEXT: vmov r1, r2, d1 631; CHECK-NEXT: orrs r1, r2 632; CHECK-NEXT: vmov r2, r3, d0 633; CHECK-NEXT: orrs r2, r3 634; CHECK-NEXT: orrs r1, r2 635; CHECK-NEXT: orrs r0, r1 636; CHECK-NEXT: bx lr 637entry: 638 %z = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %x) 639 %r = or i32 %y, %z 640 ret i32 %r 641} 642 643define arm_aapcs_vfpcc i16 @or_v4i16_acc(<4 x i16> %x, i16 %y) { 644; CHECK-LABEL: or_v4i16_acc: 645; CHECK: @ %bb.0: @ %entry 646; CHECK-NEXT: vmov r1, r2, d1 647; CHECK-NEXT: orrs r1, r2 648; CHECK-NEXT: vmov r2, r3, d0 649; CHECK-NEXT: orrs r2, r3 650; CHECK-NEXT: orrs r1, r2 651; CHECK-NEXT: orrs r0, r1 652; CHECK-NEXT: bx lr 653entry: 654 %z = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %x) 655 %r = or i16 %y, %z 656 ret i16 %r 657} 658 659define arm_aapcs_vfpcc i16 @or_v8i16_acc(<8 x i16> %x, i16 %y) { 660; CHECK-LABEL: or_v8i16_acc: 661; CHECK: @ %bb.0: @ %entry 662; CHECK-NEXT: vrev32.16 q1, q0 663; CHECK-NEXT: vorr q0, q0, q1 664; CHECK-NEXT: vmov.u16 r1, q0[6] 665; CHECK-NEXT: vmov.u16 r2, q0[4] 666; CHECK-NEXT: orrs r1, r2 667; CHECK-NEXT: vmov.u16 r2, q0[2] 668; CHECK-NEXT: vmov.u16 r3, q0[0] 669; CHECK-NEXT: orrs r2, r3 670; CHECK-NEXT: orrs r1, r2 671; CHECK-NEXT: orrs r0, r1 672; CHECK-NEXT: bx lr 673entry: 674 %z = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %x) 675 %r = or i16 %y, %z 676 ret i16 %r 677} 678 679define arm_aapcs_vfpcc i16 @or_v16i16_acc(<16 x i16> %x, i16 %y) { 680; CHECK-LABEL: or_v16i16_acc: 681; CHECK: @ %bb.0: @ %entry 682; CHECK-NEXT: vorr q0, q0, q1 683; CHECK-NEXT: vrev32.16 q1, q0 684; CHECK-NEXT: vorr q0, q0, q1 685; CHECK-NEXT: vmov.u16 r1, q0[6] 686; CHECK-NEXT: vmov.u16 r2, q0[4] 687; CHECK-NEXT: orrs r1, r2 688; CHECK-NEXT: vmov.u16 r2, q0[2] 689; CHECK-NEXT: vmov.u16 r3, q0[0] 690; CHECK-NEXT: orrs r2, r3 691; CHECK-NEXT: orrs r1, r2 692; CHECK-NEXT: orrs r0, r1 693; CHECK-NEXT: bx lr 694entry: 695 %z = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %x) 696 %r = or i16 %y, %z 697 ret i16 %r 698} 699 700define arm_aapcs_vfpcc i8 @or_v8i8_acc(<8 x i8> %x, i8 %y) { 701; CHECK-LABEL: or_v8i8_acc: 702; CHECK: @ %bb.0: @ %entry 703; CHECK-NEXT: vrev32.16 q1, q0 704; CHECK-NEXT: vorr q0, q0, q1 705; CHECK-NEXT: vmov.u16 r1, q0[6] 706; CHECK-NEXT: vmov.u16 r2, q0[4] 707; CHECK-NEXT: orrs r1, r2 708; CHECK-NEXT: vmov.u16 r2, q0[2] 709; CHECK-NEXT: vmov.u16 r3, q0[0] 710; CHECK-NEXT: orrs r2, r3 711; CHECK-NEXT: orrs r1, r2 712; CHECK-NEXT: orrs r0, r1 713; CHECK-NEXT: bx lr 714entry: 715 %z = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %x) 716 %r = or i8 %y, %z 717 ret i8 %r 718} 719 720define arm_aapcs_vfpcc i8 @or_v16i8_acc(<16 x i8> %x, i8 %y) { 721; CHECK-LABEL: or_v16i8_acc: 722; CHECK: @ %bb.0: @ %entry 723; CHECK-NEXT: vrev16.8 q1, q0 724; CHECK-NEXT: vorr q0, q0, q1 725; CHECK-NEXT: vrev32.8 q1, q0 726; CHECK-NEXT: vorr q0, q0, q1 727; CHECK-NEXT: vmov.u8 r1, q0[12] 728; CHECK-NEXT: vmov.u8 r2, q0[8] 729; CHECK-NEXT: orrs r1, r2 730; CHECK-NEXT: vmov.u8 r2, q0[4] 731; CHECK-NEXT: vmov.u8 r3, q0[0] 732; CHECK-NEXT: orrs r2, r3 733; CHECK-NEXT: orrs r1, r2 734; CHECK-NEXT: orrs r0, r1 735; CHECK-NEXT: bx lr 736entry: 737 %z = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %x) 738 %r = or i8 %y, %z 739 ret i8 %r 740} 741 742define arm_aapcs_vfpcc i8 @or_v32i8_acc(<32 x i8> %x, i8 %y) { 743; CHECK-LABEL: or_v32i8_acc: 744; CHECK: @ %bb.0: @ %entry 745; CHECK-NEXT: vorr q0, q0, q1 746; CHECK-NEXT: vrev16.8 q1, q0 747; CHECK-NEXT: vorr q0, q0, q1 748; CHECK-NEXT: vrev32.8 q1, q0 749; CHECK-NEXT: vorr q0, q0, q1 750; CHECK-NEXT: vmov.u8 r1, q0[12] 751; CHECK-NEXT: vmov.u8 r2, q0[8] 752; CHECK-NEXT: orrs r1, r2 753; CHECK-NEXT: vmov.u8 r2, q0[4] 754; CHECK-NEXT: vmov.u8 r3, q0[0] 755; CHECK-NEXT: orrs r2, r3 756; CHECK-NEXT: orrs r1, r2 757; CHECK-NEXT: orrs r0, r1 758; CHECK-NEXT: bx lr 759entry: 760 %z = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %x) 761 %r = or i8 %y, %z 762 ret i8 %r 763} 764 765define arm_aapcs_vfpcc i64 @or_v1i64_acc(<1 x i64> %x, i64 %y) { 766; CHECK-LABEL: or_v1i64_acc: 767; CHECK: @ %bb.0: @ %entry 768; CHECK-NEXT: orrs r0, r2 769; CHECK-NEXT: orrs r1, r3 770; CHECK-NEXT: bx lr 771entry: 772 %z = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> %x) 773 %r = or i64 %y, %z 774 ret i64 %r 775} 776 777define arm_aapcs_vfpcc i64 @or_v2i64_acc(<2 x i64> %x, i64 %y) { 778; CHECK-LABEL: or_v2i64_acc: 779; CHECK: @ %bb.0: @ %entry 780; CHECK-NEXT: .save {r7, lr} 781; CHECK-NEXT: push {r7, lr} 782; CHECK-NEXT: vmov r2, r12, d1 783; CHECK-NEXT: vmov r3, lr, d0 784; CHECK-NEXT: orrs r2, r3 785; CHECK-NEXT: orrs r0, r2 786; CHECK-NEXT: orr.w r2, lr, r12 787; CHECK-NEXT: orrs r1, r2 788; CHECK-NEXT: pop {r7, pc} 789entry: 790 %z = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %x) 791 %r = or i64 %y, %z 792 ret i64 %r 793} 794 795define arm_aapcs_vfpcc i64 @or_v4i64_acc(<4 x i64> %x, i64 %y) { 796; CHECK-LABEL: or_v4i64_acc: 797; CHECK: @ %bb.0: @ %entry 798; CHECK-NEXT: .save {r7, lr} 799; CHECK-NEXT: push {r7, lr} 800; CHECK-NEXT: vorr q0, q0, q1 801; CHECK-NEXT: vmov r2, r12, d1 802; CHECK-NEXT: vmov r3, lr, d0 803; CHECK-NEXT: orrs r2, r3 804; CHECK-NEXT: orrs r0, r2 805; CHECK-NEXT: orr.w r2, lr, r12 806; CHECK-NEXT: orrs r1, r2 807; CHECK-NEXT: pop {r7, pc} 808entry: 809 %z = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %x) 810 %r = or i64 %y, %z 811 ret i64 %r 812} 813 814define arm_aapcs_vfpcc i32 @xor_v2i32(<2 x i32> %x) { 815; CHECK-LABEL: xor_v2i32: 816; CHECK: @ %bb.0: @ %entry 817; CHECK-NEXT: vmov r0, s2 818; CHECK-NEXT: vmov r1, s0 819; CHECK-NEXT: eors r0, r1 820; CHECK-NEXT: bx lr 821entry: 822 %z = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %x) 823 ret i32 %z 824} 825 826define arm_aapcs_vfpcc i32 @xor_v4i32(<4 x i32> %x) { 827; CHECK-LABEL: xor_v4i32: 828; CHECK: @ %bb.0: @ %entry 829; CHECK-NEXT: vmov r0, r1, d1 830; CHECK-NEXT: eors r0, r1 831; CHECK-NEXT: vmov r1, r2, d0 832; CHECK-NEXT: eors r1, r2 833; CHECK-NEXT: eors r0, r1 834; CHECK-NEXT: bx lr 835entry: 836 %z = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %x) 837 ret i32 %z 838} 839 840define arm_aapcs_vfpcc i32 @xor_v8i32(<8 x i32> %x) { 841; CHECK-LABEL: xor_v8i32: 842; CHECK: @ %bb.0: @ %entry 843; CHECK-NEXT: veor q0, q0, q1 844; CHECK-NEXT: vmov r0, r1, d1 845; CHECK-NEXT: eors r0, r1 846; CHECK-NEXT: vmov r1, r2, d0 847; CHECK-NEXT: eors r1, r2 848; CHECK-NEXT: eors r0, r1 849; CHECK-NEXT: bx lr 850entry: 851 %z = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %x) 852 ret i32 %z 853} 854 855define arm_aapcs_vfpcc i16 @xor_v4i16(<4 x i16> %x) { 856; CHECK-LABEL: xor_v4i16: 857; CHECK: @ %bb.0: @ %entry 858; CHECK-NEXT: vmov r0, r1, d1 859; CHECK-NEXT: eors r0, r1 860; CHECK-NEXT: vmov r1, r2, d0 861; CHECK-NEXT: eors r1, r2 862; CHECK-NEXT: eors r0, r1 863; CHECK-NEXT: bx lr 864entry: 865 %z = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %x) 866 ret i16 %z 867} 868 869define arm_aapcs_vfpcc i16 @xor_v8i16(<8 x i16> %x) { 870; CHECK-LABEL: xor_v8i16: 871; CHECK: @ %bb.0: @ %entry 872; CHECK-NEXT: vrev32.16 q1, q0 873; CHECK-NEXT: veor q0, q0, q1 874; CHECK-NEXT: vmov.u16 r0, q0[6] 875; CHECK-NEXT: vmov.u16 r1, q0[4] 876; CHECK-NEXT: eors r0, r1 877; CHECK-NEXT: vmov.u16 r1, q0[2] 878; CHECK-NEXT: vmov.u16 r2, q0[0] 879; CHECK-NEXT: eors r1, r2 880; CHECK-NEXT: eors r0, r1 881; CHECK-NEXT: bx lr 882entry: 883 %z = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %x) 884 ret i16 %z 885} 886 887define arm_aapcs_vfpcc i16 @xor_v16i16(<16 x i16> %x) { 888; CHECK-LABEL: xor_v16i16: 889; CHECK: @ %bb.0: @ %entry 890; CHECK-NEXT: veor q0, q0, q1 891; CHECK-NEXT: vrev32.16 q1, q0 892; CHECK-NEXT: veor q0, q0, q1 893; CHECK-NEXT: vmov.u16 r0, q0[6] 894; CHECK-NEXT: vmov.u16 r1, q0[4] 895; CHECK-NEXT: eors r0, r1 896; CHECK-NEXT: vmov.u16 r1, q0[2] 897; CHECK-NEXT: vmov.u16 r2, q0[0] 898; CHECK-NEXT: eors r1, r2 899; CHECK-NEXT: eors r0, r1 900; CHECK-NEXT: bx lr 901entry: 902 %z = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %x) 903 ret i16 %z 904} 905 906define arm_aapcs_vfpcc i8 @xor_v8i8(<8 x i8> %x) { 907; CHECK-LABEL: xor_v8i8: 908; CHECK: @ %bb.0: @ %entry 909; CHECK-NEXT: vrev32.16 q1, q0 910; CHECK-NEXT: veor q0, q0, q1 911; CHECK-NEXT: vmov.u16 r0, q0[6] 912; CHECK-NEXT: vmov.u16 r1, q0[4] 913; CHECK-NEXT: eors r0, r1 914; CHECK-NEXT: vmov.u16 r1, q0[2] 915; CHECK-NEXT: vmov.u16 r2, q0[0] 916; CHECK-NEXT: eors r1, r2 917; CHECK-NEXT: eors r0, r1 918; CHECK-NEXT: bx lr 919entry: 920 %z = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %x) 921 ret i8 %z 922} 923 924define arm_aapcs_vfpcc i8 @xor_v16i8(<16 x i8> %x) { 925; CHECK-LABEL: xor_v16i8: 926; CHECK: @ %bb.0: @ %entry 927; CHECK-NEXT: vrev16.8 q1, q0 928; CHECK-NEXT: veor q0, q0, q1 929; CHECK-NEXT: vrev32.8 q1, q0 930; CHECK-NEXT: veor q0, q0, q1 931; CHECK-NEXT: vmov.u8 r0, q0[12] 932; CHECK-NEXT: vmov.u8 r1, q0[8] 933; CHECK-NEXT: eors r0, r1 934; CHECK-NEXT: vmov.u8 r1, q0[4] 935; CHECK-NEXT: vmov.u8 r2, q0[0] 936; CHECK-NEXT: eors r1, r2 937; CHECK-NEXT: eors r0, r1 938; CHECK-NEXT: bx lr 939entry: 940 %z = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %x) 941 ret i8 %z 942} 943 944define arm_aapcs_vfpcc i8 @xor_v32i8(<32 x i8> %x) { 945; CHECK-LABEL: xor_v32i8: 946; CHECK: @ %bb.0: @ %entry 947; CHECK-NEXT: veor q0, q0, q1 948; CHECK-NEXT: vrev16.8 q1, q0 949; CHECK-NEXT: veor q0, q0, q1 950; CHECK-NEXT: vrev32.8 q1, q0 951; CHECK-NEXT: veor q0, q0, q1 952; CHECK-NEXT: vmov.u8 r0, q0[12] 953; CHECK-NEXT: vmov.u8 r1, q0[8] 954; CHECK-NEXT: eors r0, r1 955; CHECK-NEXT: vmov.u8 r1, q0[4] 956; CHECK-NEXT: vmov.u8 r2, q0[0] 957; CHECK-NEXT: eors r1, r2 958; CHECK-NEXT: eors r0, r1 959; CHECK-NEXT: bx lr 960entry: 961 %z = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %x) 962 ret i8 %z 963} 964 965define arm_aapcs_vfpcc i64 @xor_v1i64(<1 x i64> %x) { 966; CHECK-LABEL: xor_v1i64: 967; CHECK: @ %bb.0: @ %entry 968; CHECK-NEXT: bx lr 969entry: 970 %z = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> %x) 971 ret i64 %z 972} 973 974define arm_aapcs_vfpcc i64 @xor_v2i64(<2 x i64> %x) { 975; CHECK-LABEL: xor_v2i64: 976; CHECK: @ %bb.0: @ %entry 977; CHECK-NEXT: vmov r0, r1, d1 978; CHECK-NEXT: vmov r2, r3, d0 979; CHECK-NEXT: eors r0, r2 980; CHECK-NEXT: eors r1, r3 981; CHECK-NEXT: bx lr 982entry: 983 %z = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %x) 984 ret i64 %z 985} 986 987define arm_aapcs_vfpcc i64 @xor_v4i64(<4 x i64> %x) { 988; CHECK-LABEL: xor_v4i64: 989; CHECK: @ %bb.0: @ %entry 990; CHECK-NEXT: veor q0, q0, q1 991; CHECK-NEXT: vmov r0, r1, d1 992; CHECK-NEXT: vmov r2, r3, d0 993; CHECK-NEXT: eors r0, r2 994; CHECK-NEXT: eors r1, r3 995; CHECK-NEXT: bx lr 996entry: 997 %z = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %x) 998 ret i64 %z 999} 1000 1001define arm_aapcs_vfpcc i32 @xor_v2i32_acc(<2 x i32> %x, i32 %y) { 1002; CHECK-LABEL: xor_v2i32_acc: 1003; CHECK: @ %bb.0: @ %entry 1004; CHECK-NEXT: vmov r1, s2 1005; CHECK-NEXT: vmov r2, s0 1006; CHECK-NEXT: eors r1, r2 1007; CHECK-NEXT: eors r0, r1 1008; CHECK-NEXT: bx lr 1009entry: 1010 %z = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %x) 1011 %r = xor i32 %y, %z 1012 ret i32 %r 1013} 1014 1015define arm_aapcs_vfpcc i32 @xor_v4i32_acc(<4 x i32> %x, i32 %y) { 1016; CHECK-LABEL: xor_v4i32_acc: 1017; CHECK: @ %bb.0: @ %entry 1018; CHECK-NEXT: vmov r1, r2, d1 1019; CHECK-NEXT: eors r1, r2 1020; CHECK-NEXT: vmov r2, r3, d0 1021; CHECK-NEXT: eors r2, r3 1022; CHECK-NEXT: eors r1, r2 1023; CHECK-NEXT: eors r0, r1 1024; CHECK-NEXT: bx lr 1025entry: 1026 %z = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %x) 1027 %r = xor i32 %y, %z 1028 ret i32 %r 1029} 1030 1031define arm_aapcs_vfpcc i32 @xor_v8i32_acc(<8 x i32> %x, i32 %y) { 1032; CHECK-LABEL: xor_v8i32_acc: 1033; CHECK: @ %bb.0: @ %entry 1034; CHECK-NEXT: veor q0, q0, q1 1035; CHECK-NEXT: vmov r1, r2, d1 1036; CHECK-NEXT: eors r1, r2 1037; CHECK-NEXT: vmov r2, r3, d0 1038; CHECK-NEXT: eors r2, r3 1039; CHECK-NEXT: eors r1, r2 1040; CHECK-NEXT: eors r0, r1 1041; CHECK-NEXT: bx lr 1042entry: 1043 %z = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %x) 1044 %r = xor i32 %y, %z 1045 ret i32 %r 1046} 1047 1048define arm_aapcs_vfpcc i16 @xor_v4i16_acc(<4 x i16> %x, i16 %y) { 1049; CHECK-LABEL: xor_v4i16_acc: 1050; CHECK: @ %bb.0: @ %entry 1051; CHECK-NEXT: vmov r1, r2, d1 1052; CHECK-NEXT: eors r1, r2 1053; CHECK-NEXT: vmov r2, r3, d0 1054; CHECK-NEXT: eors r2, r3 1055; CHECK-NEXT: eors r1, r2 1056; CHECK-NEXT: eors r0, r1 1057; CHECK-NEXT: bx lr 1058entry: 1059 %z = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %x) 1060 %r = xor i16 %y, %z 1061 ret i16 %r 1062} 1063 1064define arm_aapcs_vfpcc i16 @xor_v8i16_acc(<8 x i16> %x, i16 %y) { 1065; CHECK-LABEL: xor_v8i16_acc: 1066; CHECK: @ %bb.0: @ %entry 1067; CHECK-NEXT: vrev32.16 q1, q0 1068; CHECK-NEXT: veor q0, q0, q1 1069; CHECK-NEXT: vmov.u16 r1, q0[6] 1070; CHECK-NEXT: vmov.u16 r2, q0[4] 1071; CHECK-NEXT: eors r1, r2 1072; CHECK-NEXT: vmov.u16 r2, q0[2] 1073; CHECK-NEXT: vmov.u16 r3, q0[0] 1074; CHECK-NEXT: eors r2, r3 1075; CHECK-NEXT: eors r1, r2 1076; CHECK-NEXT: eors r0, r1 1077; CHECK-NEXT: bx lr 1078entry: 1079 %z = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %x) 1080 %r = xor i16 %y, %z 1081 ret i16 %r 1082} 1083 1084define arm_aapcs_vfpcc i16 @xor_v16i16_acc(<16 x i16> %x, i16 %y) { 1085; CHECK-LABEL: xor_v16i16_acc: 1086; CHECK: @ %bb.0: @ %entry 1087; CHECK-NEXT: veor q0, q0, q1 1088; CHECK-NEXT: vrev32.16 q1, q0 1089; CHECK-NEXT: veor q0, q0, q1 1090; CHECK-NEXT: vmov.u16 r1, q0[6] 1091; CHECK-NEXT: vmov.u16 r2, q0[4] 1092; CHECK-NEXT: eors r1, r2 1093; CHECK-NEXT: vmov.u16 r2, q0[2] 1094; CHECK-NEXT: vmov.u16 r3, q0[0] 1095; CHECK-NEXT: eors r2, r3 1096; CHECK-NEXT: eors r1, r2 1097; CHECK-NEXT: eors r0, r1 1098; CHECK-NEXT: bx lr 1099entry: 1100 %z = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %x) 1101 %r = xor i16 %y, %z 1102 ret i16 %r 1103} 1104 1105define arm_aapcs_vfpcc i8 @xor_v8i8_acc(<8 x i8> %x, i8 %y) { 1106; CHECK-LABEL: xor_v8i8_acc: 1107; CHECK: @ %bb.0: @ %entry 1108; CHECK-NEXT: vrev32.16 q1, q0 1109; CHECK-NEXT: veor q0, q0, q1 1110; CHECK-NEXT: vmov.u16 r1, q0[6] 1111; CHECK-NEXT: vmov.u16 r2, q0[4] 1112; CHECK-NEXT: eors r1, r2 1113; CHECK-NEXT: vmov.u16 r2, q0[2] 1114; CHECK-NEXT: vmov.u16 r3, q0[0] 1115; CHECK-NEXT: eors r2, r3 1116; CHECK-NEXT: eors r1, r2 1117; CHECK-NEXT: eors r0, r1 1118; CHECK-NEXT: bx lr 1119entry: 1120 %z = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %x) 1121 %r = xor i8 %y, %z 1122 ret i8 %r 1123} 1124 1125define arm_aapcs_vfpcc i8 @xor_v16i8_acc(<16 x i8> %x, i8 %y) { 1126; CHECK-LABEL: xor_v16i8_acc: 1127; CHECK: @ %bb.0: @ %entry 1128; CHECK-NEXT: vrev16.8 q1, q0 1129; CHECK-NEXT: veor q0, q0, q1 1130; CHECK-NEXT: vrev32.8 q1, q0 1131; CHECK-NEXT: veor q0, q0, q1 1132; CHECK-NEXT: vmov.u8 r1, q0[12] 1133; CHECK-NEXT: vmov.u8 r2, q0[8] 1134; CHECK-NEXT: eors r1, r2 1135; CHECK-NEXT: vmov.u8 r2, q0[4] 1136; CHECK-NEXT: vmov.u8 r3, q0[0] 1137; CHECK-NEXT: eors r2, r3 1138; CHECK-NEXT: eors r1, r2 1139; CHECK-NEXT: eors r0, r1 1140; CHECK-NEXT: bx lr 1141entry: 1142 %z = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %x) 1143 %r = xor i8 %y, %z 1144 ret i8 %r 1145} 1146 1147define arm_aapcs_vfpcc i8 @xor_v32i8_acc(<32 x i8> %x, i8 %y) { 1148; CHECK-LABEL: xor_v32i8_acc: 1149; CHECK: @ %bb.0: @ %entry 1150; CHECK-NEXT: veor q0, q0, q1 1151; CHECK-NEXT: vrev16.8 q1, q0 1152; CHECK-NEXT: veor q0, q0, q1 1153; CHECK-NEXT: vrev32.8 q1, q0 1154; CHECK-NEXT: veor q0, q0, q1 1155; CHECK-NEXT: vmov.u8 r1, q0[12] 1156; CHECK-NEXT: vmov.u8 r2, q0[8] 1157; CHECK-NEXT: eors r1, r2 1158; CHECK-NEXT: vmov.u8 r2, q0[4] 1159; CHECK-NEXT: vmov.u8 r3, q0[0] 1160; CHECK-NEXT: eors r2, r3 1161; CHECK-NEXT: eors r1, r2 1162; CHECK-NEXT: eors r0, r1 1163; CHECK-NEXT: bx lr 1164entry: 1165 %z = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %x) 1166 %r = xor i8 %y, %z 1167 ret i8 %r 1168} 1169 1170define arm_aapcs_vfpcc i64 @xor_v1i64_acc(<1 x i64> %x, i64 %y) { 1171; CHECK-LABEL: xor_v1i64_acc: 1172; CHECK: @ %bb.0: @ %entry 1173; CHECK-NEXT: eors r0, r2 1174; CHECK-NEXT: eors r1, r3 1175; CHECK-NEXT: bx lr 1176entry: 1177 %z = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> %x) 1178 %r = xor i64 %y, %z 1179 ret i64 %r 1180} 1181 1182define arm_aapcs_vfpcc i64 @xor_v2i64_acc(<2 x i64> %x, i64 %y) { 1183; CHECK-LABEL: xor_v2i64_acc: 1184; CHECK: @ %bb.0: @ %entry 1185; CHECK-NEXT: .save {r7, lr} 1186; CHECK-NEXT: push {r7, lr} 1187; CHECK-NEXT: vmov r2, r12, d1 1188; CHECK-NEXT: vmov r3, lr, d0 1189; CHECK-NEXT: eors r2, r3 1190; CHECK-NEXT: eors r0, r2 1191; CHECK-NEXT: eor.w r2, lr, r12 1192; CHECK-NEXT: eors r1, r2 1193; CHECK-NEXT: pop {r7, pc} 1194entry: 1195 %z = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %x) 1196 %r = xor i64 %y, %z 1197 ret i64 %r 1198} 1199 1200define arm_aapcs_vfpcc i64 @xor_v4i64_acc(<4 x i64> %x, i64 %y) { 1201; CHECK-LABEL: xor_v4i64_acc: 1202; CHECK: @ %bb.0: @ %entry 1203; CHECK-NEXT: .save {r7, lr} 1204; CHECK-NEXT: push {r7, lr} 1205; CHECK-NEXT: veor q0, q0, q1 1206; CHECK-NEXT: vmov r2, r12, d1 1207; CHECK-NEXT: vmov r3, lr, d0 1208; CHECK-NEXT: eors r2, r3 1209; CHECK-NEXT: eors r0, r2 1210; CHECK-NEXT: eor.w r2, lr, r12 1211; CHECK-NEXT: eors r1, r2 1212; CHECK-NEXT: pop {r7, pc} 1213entry: 1214 %z = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %x) 1215 %r = xor i64 %y, %z 1216 ret i64 %r 1217} 1218 1219declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>) 1220declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>) 1221declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>) 1222declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>) 1223declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>) 1224declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>) 1225declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>) 1226declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>) 1227declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>) 1228declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>) 1229declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>) 1230declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>) 1231declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>) 1232declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>) 1233declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>) 1234declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>) 1235declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>) 1236declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>) 1237declare i64 @llvm.vector.reduce.and.v1i64(<1 x i64>) 1238declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>) 1239declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>) 1240declare i64 @llvm.vector.reduce.or.v1i64(<1 x i64>) 1241declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) 1242declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>) 1243declare i64 @llvm.vector.reduce.xor.v1i64(<1 x i64>) 1244declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>) 1245declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>) 1246declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>) 1247declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>) 1248declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>) 1249declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>) 1250declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>) 1251declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>) 1252declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>) 1253declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>) 1254declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>) 1255