1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s 3 4define <2 x i64> @shuffle1_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) { 5; CHECK-LABEL: shuffle1_v2i64: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: orrs r2, r3 8; CHECK-NEXT: mov.w r3, #0 9; CHECK-NEXT: csetm r2, eq 10; CHECK-NEXT: orrs r0, r1 11; CHECK-NEXT: bfi r3, r2, #0, #8 12; CHECK-NEXT: csetm r0, eq 13; CHECK-NEXT: bfi r3, r0, #8, #8 14; CHECK-NEXT: add r0, sp, #16 15; CHECK-NEXT: vldrw.u32 q0, [r0] 16; CHECK-NEXT: mov r0, sp 17; CHECK-NEXT: vldrw.u32 q1, [r0] 18; CHECK-NEXT: vmsr p0, r3 19; CHECK-NEXT: vpsel q0, q1, q0 20; CHECK-NEXT: vmov r0, r1, d0 21; CHECK-NEXT: vmov r2, r3, d1 22; CHECK-NEXT: bx lr 23entry: 24 %c = icmp eq <2 x i64> %src, zeroinitializer 25 %sh = shufflevector <2 x i1> %c, <2 x i1> undef, <2 x i32> <i32 1, i32 0> 26 %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b 27 ret <2 x i64> %s 28} 29 30define <4 x i32> @shuffle1_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { 31; CHECK-LABEL: shuffle1_v4i32: 32; CHECK: @ %bb.0: @ %entry 33; CHECK-NEXT: vmov d1, r2, r3 34; CHECK-NEXT: vmov d0, r0, r1 35; CHECK-NEXT: vcmp.i32 eq, q0, zr 36; CHECK-NEXT: vmrs r0, p0 37; CHECK-NEXT: rbit r0, r0 38; CHECK-NEXT: lsrs r0, r0, #16 39; CHECK-NEXT: vmsr p0, r0 40; CHECK-NEXT: add r0, sp, #16 41; CHECK-NEXT: vldrw.u32 q0, [r0] 42; CHECK-NEXT: mov r0, sp 43; CHECK-NEXT: vldrw.u32 q1, [r0] 44; CHECK-NEXT: vpsel q0, q1, q0 45; CHECK-NEXT: vmov r0, r1, d0 46; CHECK-NEXT: vmov r2, r3, d1 47; CHECK-NEXT: bx lr 48entry: 49 %c = icmp eq <4 x i32> %src, zeroinitializer 50 %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 51 %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b 52 ret <4 x i32> %s 53} 54 55define <8 x i16> @shuffle1_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { 56; CHECK-LABEL: shuffle1_v8i16: 57; CHECK: @ %bb.0: @ %entry 58; CHECK-NEXT: vmov d1, r2, r3 59; CHECK-NEXT: vmov d0, r0, r1 60; CHECK-NEXT: vcmp.i16 eq, q0, zr 61; CHECK-NEXT: vmrs r0, p0 62; CHECK-NEXT: rbit r0, r0 63; CHECK-NEXT: lsrs r0, r0, #16 64; CHECK-NEXT: vmsr p0, r0 65; CHECK-NEXT: add r0, sp, #16 66; CHECK-NEXT: vldrw.u32 q0, [r0] 67; CHECK-NEXT: mov r0, sp 68; CHECK-NEXT: vldrw.u32 q1, [r0] 69; CHECK-NEXT: vpsel q0, q1, q0 70; CHECK-NEXT: vmov r0, r1, d0 71; CHECK-NEXT: vmov r2, r3, d1 72; CHECK-NEXT: bx lr 73entry: 74 %c = icmp eq <8 x i16> %src, zeroinitializer 75 %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 76 %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b 77 ret <8 x i16> %s 78} 79 80define <16 x i8> @shuffle1_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { 81; CHECK-LABEL: shuffle1_v16i8: 82; CHECK: @ %bb.0: @ %entry 83; CHECK-NEXT: vmov d1, r2, r3 84; CHECK-NEXT: vmov d0, r0, r1 85; CHECK-NEXT: vcmp.i8 eq, q0, zr 86; CHECK-NEXT: vmrs r0, p0 87; CHECK-NEXT: rbit r0, r0 88; CHECK-NEXT: lsrs r0, r0, #16 89; CHECK-NEXT: vmsr p0, r0 90; CHECK-NEXT: add r0, sp, #16 91; CHECK-NEXT: vldrw.u32 q0, [r0] 92; CHECK-NEXT: mov r0, sp 93; CHECK-NEXT: vldrw.u32 q1, [r0] 94; CHECK-NEXT: vpsel q0, q1, q0 95; CHECK-NEXT: vmov r0, r1, d0 96; CHECK-NEXT: vmov r2, r3, d1 97; CHECK-NEXT: bx lr 98entry: 99 %c = icmp eq <16 x i8> %src, zeroinitializer 100 %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 101 %s = select <16 x i1> %sh, <16 x i8> %a, <16 x i8> %b 102 ret <16 x i8> %s 103} 104 105define <2 x i64> @shuffle2_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) { 106; CHECK-LABEL: shuffle2_v2i64: 107; CHECK: @ %bb.0: @ %entry 108; CHECK-NEXT: orrs r0, r1 109; CHECK-NEXT: mov.w r1, #0 110; CHECK-NEXT: csetm r0, eq 111; CHECK-NEXT: bfi r1, r0, #0, #8 112; CHECK-NEXT: orrs.w r0, r2, r3 113; CHECK-NEXT: csetm r0, eq 114; CHECK-NEXT: bfi r1, r0, #8, #8 115; CHECK-NEXT: add r0, sp, #16 116; CHECK-NEXT: vldrw.u32 q0, [r0] 117; CHECK-NEXT: mov r0, sp 118; CHECK-NEXT: vldrw.u32 q1, [r0] 119; CHECK-NEXT: vmsr p0, r1 120; CHECK-NEXT: vpsel q0, q1, q0 121; CHECK-NEXT: vmov r0, r1, d0 122; CHECK-NEXT: vmov r2, r3, d1 123; CHECK-NEXT: bx lr 124entry: 125 %c = icmp eq <2 x i64> %src, zeroinitializer 126 %sh = shufflevector <2 x i1> %c, <2 x i1> undef, <2 x i32> <i32 0, i32 1> 127 %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b 128 ret <2 x i64> %s 129} 130 131define <4 x i32> @shuffle2_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { 132; CHECK-LABEL: shuffle2_v4i32: 133; CHECK: @ %bb.0: @ %entry 134; CHECK-NEXT: vmov d0, r0, r1 135; CHECK-NEXT: add r0, sp, #16 136; CHECK-NEXT: vmov d1, r2, r3 137; CHECK-NEXT: vcmp.i32 eq, q0, zr 138; CHECK-NEXT: vldrw.u32 q0, [r0] 139; CHECK-NEXT: mov r0, sp 140; CHECK-NEXT: vldrw.u32 q1, [r0] 141; CHECK-NEXT: vpsel q0, q1, q0 142; CHECK-NEXT: vmov r0, r1, d0 143; CHECK-NEXT: vmov r2, r3, d1 144; CHECK-NEXT: bx lr 145entry: 146 %c = icmp eq <4 x i32> %src, zeroinitializer 147 %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 148 %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b 149 ret <4 x i32> %s 150} 151 152define <8 x i16> @shuffle2_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { 153; CHECK-LABEL: shuffle2_v8i16: 154; CHECK: @ %bb.0: @ %entry 155; CHECK-NEXT: vmov d0, r0, r1 156; CHECK-NEXT: add r0, sp, #16 157; CHECK-NEXT: vmov d1, r2, r3 158; CHECK-NEXT: vcmp.i16 eq, q0, zr 159; CHECK-NEXT: vldrw.u32 q0, [r0] 160; CHECK-NEXT: mov r0, sp 161; CHECK-NEXT: vldrw.u32 q1, [r0] 162; CHECK-NEXT: vpsel q0, q1, q0 163; CHECK-NEXT: vmov r0, r1, d0 164; CHECK-NEXT: vmov r2, r3, d1 165; CHECK-NEXT: bx lr 166entry: 167 %c = icmp eq <8 x i16> %src, zeroinitializer 168 %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 169 %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b 170 ret <8 x i16> %s 171} 172 173define <16 x i8> @shuffle2_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { 174; CHECK-LABEL: shuffle2_v16i8: 175; CHECK: @ %bb.0: @ %entry 176; CHECK-NEXT: vmov d0, r0, r1 177; CHECK-NEXT: add r0, sp, #16 178; CHECK-NEXT: vmov d1, r2, r3 179; CHECK-NEXT: vcmp.i8 eq, q0, zr 180; CHECK-NEXT: vldrw.u32 q0, [r0] 181; CHECK-NEXT: mov r0, sp 182; CHECK-NEXT: vldrw.u32 q1, [r0] 183; CHECK-NEXT: vpsel q0, q1, q0 184; CHECK-NEXT: vmov r0, r1, d0 185; CHECK-NEXT: vmov r2, r3, d1 186; CHECK-NEXT: bx lr 187entry: 188 %c = icmp eq <16 x i8> %src, zeroinitializer 189 %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 190 %s = select <16 x i1> %sh, <16 x i8> %a, <16 x i8> %b 191 ret <16 x i8> %s 192} 193 194define <2 x i64> @shuffle3_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) { 195; CHECK-LABEL: shuffle3_v2i64: 196; CHECK: @ %bb.0: @ %entry 197; CHECK-NEXT: orrs r0, r1 198; CHECK-NEXT: csetm r0, eq 199; CHECK-NEXT: vmsr p0, r0 200; CHECK-NEXT: add r0, sp, #16 201; CHECK-NEXT: vldrw.u32 q0, [r0] 202; CHECK-NEXT: mov r0, sp 203; CHECK-NEXT: vldrw.u32 q1, [r0] 204; CHECK-NEXT: vpsel q0, q1, q0 205; CHECK-NEXT: vmov r0, r1, d0 206; CHECK-NEXT: vmov r2, r3, d1 207; CHECK-NEXT: bx lr 208entry: 209 %c = icmp eq <2 x i64> %src, zeroinitializer 210 %sh = shufflevector <2 x i1> %c, <2 x i1> undef, <2 x i32> <i32 0, i32 0> 211 %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b 212 ret <2 x i64> %s 213} 214 215define <4 x i32> @shuffle3_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { 216; CHECK-LABEL: shuffle3_v4i32: 217; CHECK: @ %bb.0: @ %entry 218; CHECK-NEXT: vmov d1, r2, r3 219; CHECK-NEXT: vmov.i8 q1, #0xff 220; CHECK-NEXT: vmov d0, r0, r1 221; CHECK-NEXT: vcmp.i32 eq, q0, zr 222; CHECK-NEXT: vmov.i8 q0, #0x0 223; CHECK-NEXT: vpsel q0, q1, q0 224; CHECK-NEXT: vmov r0, s0 225; CHECK-NEXT: vdup.32 q0, r0 226; CHECK-NEXT: add r0, sp, #16 227; CHECK-NEXT: vcmp.i32 ne, q0, zr 228; CHECK-NEXT: vldrw.u32 q0, [r0] 229; CHECK-NEXT: mov r0, sp 230; CHECK-NEXT: vldrw.u32 q1, [r0] 231; CHECK-NEXT: vpsel q0, q1, q0 232; CHECK-NEXT: vmov r0, r1, d0 233; CHECK-NEXT: vmov r2, r3, d1 234; CHECK-NEXT: bx lr 235entry: 236 %c = icmp eq <4 x i32> %src, zeroinitializer 237 %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 238 %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b 239 ret <4 x i32> %s 240} 241 242define <8 x i16> @shuffle3_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { 243; CHECK-LABEL: shuffle3_v8i16: 244; CHECK: @ %bb.0: @ %entry 245; CHECK-NEXT: vmov d1, r2, r3 246; CHECK-NEXT: vmov.i8 q1, #0xff 247; CHECK-NEXT: vmov d0, r0, r1 248; CHECK-NEXT: vcmp.i16 eq, q0, zr 249; CHECK-NEXT: vmov.i8 q0, #0x0 250; CHECK-NEXT: vpsel q0, q1, q0 251; CHECK-NEXT: vmov.u16 r0, q0[0] 252; CHECK-NEXT: vdup.16 q0, r0 253; CHECK-NEXT: add r0, sp, #16 254; CHECK-NEXT: vcmp.i16 ne, q0, zr 255; CHECK-NEXT: vldrw.u32 q0, [r0] 256; CHECK-NEXT: mov r0, sp 257; CHECK-NEXT: vldrw.u32 q1, [r0] 258; CHECK-NEXT: vpsel q0, q1, q0 259; CHECK-NEXT: vmov r0, r1, d0 260; CHECK-NEXT: vmov r2, r3, d1 261; CHECK-NEXT: bx lr 262entry: 263 %c = icmp eq <8 x i16> %src, zeroinitializer 264 %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 265 %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b 266 ret <8 x i16> %s 267} 268 269define <16 x i8> @shuffle3_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { 270; CHECK-LABEL: shuffle3_v16i8: 271; CHECK: @ %bb.0: @ %entry 272; CHECK-NEXT: vmov d1, r2, r3 273; CHECK-NEXT: vmov.i8 q1, #0xff 274; CHECK-NEXT: vmov d0, r0, r1 275; CHECK-NEXT: vcmp.i8 eq, q0, zr 276; CHECK-NEXT: vmov.i8 q0, #0x0 277; CHECK-NEXT: vpsel q0, q1, q0 278; CHECK-NEXT: vmov.u8 r0, q0[0] 279; CHECK-NEXT: vdup.8 q0, r0 280; CHECK-NEXT: add r0, sp, #16 281; CHECK-NEXT: vcmp.i8 ne, q0, zr 282; CHECK-NEXT: vldrw.u32 q0, [r0] 283; CHECK-NEXT: mov r0, sp 284; CHECK-NEXT: vldrw.u32 q1, [r0] 285; CHECK-NEXT: vpsel q0, q1, q0 286; CHECK-NEXT: vmov r0, r1, d0 287; CHECK-NEXT: vmov r2, r3, d1 288; CHECK-NEXT: bx lr 289entry: 290 %c = icmp eq <16 x i8> %src, zeroinitializer 291 %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 292 %s = select <16 x i1> %sh, <16 x i8> %a, <16 x i8> %b 293 ret <16 x i8> %s 294} 295 296define <2 x i64> @shuffle4_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) { 297; CHECK-LABEL: shuffle4_v2i64: 298; CHECK: @ %bb.0: @ %entry 299; CHECK-NEXT: orrs.w r0, r2, r3 300; CHECK-NEXT: csetm r0, eq 301; CHECK-NEXT: vmsr p0, r0 302; CHECK-NEXT: add r0, sp, #16 303; CHECK-NEXT: vldrw.u32 q0, [r0] 304; CHECK-NEXT: mov r0, sp 305; CHECK-NEXT: vldrw.u32 q1, [r0] 306; CHECK-NEXT: vpsel q0, q1, q0 307; CHECK-NEXT: vmov r0, r1, d0 308; CHECK-NEXT: vmov r2, r3, d1 309; CHECK-NEXT: bx lr 310entry: 311 %c = icmp eq <2 x i64> %src, zeroinitializer 312 %sh = shufflevector <2 x i1> %c, <2 x i1> undef, <2 x i32> <i32 1, i32 1> 313 %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b 314 ret <2 x i64> %s 315} 316 317define <4 x i32> @shuffle4_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { 318; CHECK-LABEL: shuffle4_v4i32: 319; CHECK: @ %bb.0: @ %entry 320; CHECK-NEXT: vmov d0, r0, r1 321; CHECK-NEXT: vmov.i8 q1, #0xff 322; CHECK-NEXT: vmov d1, r2, r3 323; CHECK-NEXT: add r0, sp, #16 324; CHECK-NEXT: vcmp.i32 eq, q0, zr 325; CHECK-NEXT: vmov.i8 q0, #0x0 326; CHECK-NEXT: vpsel q0, q1, q0 327; CHECK-NEXT: vmov.f32 s4, s0 328; CHECK-NEXT: vmov.f32 s5, s0 329; CHECK-NEXT: vmov.f32 s6, s0 330; CHECK-NEXT: vmov.f32 s7, s1 331; CHECK-NEXT: vldrw.u32 q0, [r0] 332; CHECK-NEXT: mov r0, sp 333; CHECK-NEXT: vcmp.i32 ne, q1, zr 334; CHECK-NEXT: vldrw.u32 q1, [r0] 335; CHECK-NEXT: vpsel q0, q1, q0 336; CHECK-NEXT: vmov r0, r1, d0 337; CHECK-NEXT: vmov r2, r3, d1 338; CHECK-NEXT: bx lr 339entry: 340 %c = icmp eq <4 x i32> %src, zeroinitializer 341 %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 342 %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b 343 ret <4 x i32> %s 344} 345 346define <8 x i16> @shuffle4_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { 347; CHECK-LABEL: shuffle4_v8i16: 348; CHECK: @ %bb.0: @ %entry 349; CHECK-NEXT: vmov d1, r2, r3 350; CHECK-NEXT: vmov.i8 q1, #0xff 351; CHECK-NEXT: vmov d0, r0, r1 352; CHECK-NEXT: vcmp.i16 eq, q0, zr 353; CHECK-NEXT: vmov.i8 q0, #0x0 354; CHECK-NEXT: vpsel q0, q1, q0 355; CHECK-NEXT: vmov.u16 r0, q0[0] 356; CHECK-NEXT: vdup.16 q1, r0 357; CHECK-NEXT: add r0, sp, #16 358; CHECK-NEXT: vmov.f32 s7, s0 359; CHECK-NEXT: vldrw.u32 q0, [r0] 360; CHECK-NEXT: mov r0, sp 361; CHECK-NEXT: vcmp.i16 ne, q1, zr 362; CHECK-NEXT: vldrw.u32 q1, [r0] 363; CHECK-NEXT: vpsel q0, q1, q0 364; CHECK-NEXT: vmov r0, r1, d0 365; CHECK-NEXT: vmov r2, r3, d1 366; CHECK-NEXT: bx lr 367entry: 368 %c = icmp eq <8 x i16> %src, zeroinitializer 369 %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1> 370 %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b 371 ret <8 x i16> %s 372} 373 374define <16 x i8> @shuffle4_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { 375; CHECK-LABEL: shuffle4_v16i8: 376; CHECK: @ %bb.0: @ %entry 377; CHECK-NEXT: vmov d1, r2, r3 378; CHECK-NEXT: vmov.i8 q1, #0xff 379; CHECK-NEXT: vmov d0, r0, r1 380; CHECK-NEXT: vcmp.i8 eq, q0, zr 381; CHECK-NEXT: vmov.i8 q0, #0x0 382; CHECK-NEXT: vpsel q0, q1, q0 383; CHECK-NEXT: vmov.u8 r0, q0[0] 384; CHECK-NEXT: vdup.8 q1, r0 385; CHECK-NEXT: vmov.u8 r0, q0[1] 386; CHECK-NEXT: vmov.8 q1[15], r0 387; CHECK-NEXT: add r0, sp, #16 388; CHECK-NEXT: vldrw.u32 q0, [r0] 389; CHECK-NEXT: mov r0, sp 390; CHECK-NEXT: vcmp.i8 ne, q1, zr 391; CHECK-NEXT: vldrw.u32 q1, [r0] 392; CHECK-NEXT: vpsel q0, q1, q0 393; CHECK-NEXT: vmov r0, r1, d0 394; CHECK-NEXT: vmov r2, r3, d1 395; CHECK-NEXT: bx lr 396entry: 397 %c = icmp eq <16 x i8> %src, zeroinitializer 398 %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1> 399 %s = select <16 x i1> %sh, <16 x i8> %a, <16 x i8> %b 400 ret <16 x i8> %s 401} 402 403define <2 x i64> @shuffle5_b_v2i64(<4 x i32> %src, <2 x i64> %a, <2 x i64> %b) { 404; CHECK-LABEL: shuffle5_b_v2i64: 405; CHECK: @ %bb.0: @ %entry 406; CHECK-NEXT: vmov d1, r2, r3 407; CHECK-NEXT: vmov.i8 q1, #0xff 408; CHECK-NEXT: vmov d0, r0, r1 409; CHECK-NEXT: vcmp.i32 eq, q0, zr 410; CHECK-NEXT: vmov.i8 q0, #0x0 411; CHECK-NEXT: vpsel q0, q1, q0 412; CHECK-NEXT: vmov r0, r1, d0 413; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 414; CHECK-NEXT: vmov q0[3], q0[1], r0, r1 415; CHECK-NEXT: add r0, sp, #16 416; CHECK-NEXT: vcmp.i32 ne, q0, zr 417; CHECK-NEXT: vldrw.u32 q0, [r0] 418; CHECK-NEXT: mov r0, sp 419; CHECK-NEXT: vldrw.u32 q1, [r0] 420; CHECK-NEXT: vpsel q0, q1, q0 421; CHECK-NEXT: vmov r0, r1, d0 422; CHECK-NEXT: vmov r2, r3, d1 423; CHECK-NEXT: bx lr 424entry: 425 %c = icmp eq <4 x i32> %src, zeroinitializer 426 %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <2 x i32> <i32 0, i32 1> 427 %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b 428 ret <2 x i64> %s 429} 430 431define <2 x i64> @shuffle5_t_v2i64(<4 x i32> %src, <2 x i64> %a, <2 x i64> %b) { 432; CHECK-LABEL: shuffle5_t_v2i64: 433; CHECK: @ %bb.0: @ %entry 434; CHECK-NEXT: vmov d1, r2, r3 435; CHECK-NEXT: vmov.i8 q1, #0xff 436; CHECK-NEXT: vmov d0, r0, r1 437; CHECK-NEXT: vcmp.i32 eq, q0, zr 438; CHECK-NEXT: vmov.i8 q0, #0x0 439; CHECK-NEXT: vpsel q0, q1, q0 440; CHECK-NEXT: vmov r0, r1, d1 441; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 442; CHECK-NEXT: vmov q0[3], q0[1], r0, r1 443; CHECK-NEXT: add r0, sp, #16 444; CHECK-NEXT: vcmp.i32 ne, q0, zr 445; CHECK-NEXT: vldrw.u32 q0, [r0] 446; CHECK-NEXT: mov r0, sp 447; CHECK-NEXT: vldrw.u32 q1, [r0] 448; CHECK-NEXT: vpsel q0, q1, q0 449; CHECK-NEXT: vmov r0, r1, d0 450; CHECK-NEXT: vmov r2, r3, d1 451; CHECK-NEXT: bx lr 452entry: 453 %c = icmp eq <4 x i32> %src, zeroinitializer 454 %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <2 x i32> <i32 2, i32 3> 455 %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b 456 ret <2 x i64> %s 457} 458 459define <4 x i32> @shuffle5_b_v4i32(<8 x i16> %src, <4 x i32> %a, <4 x i32> %b) { 460; CHECK-LABEL: shuffle5_b_v4i32: 461; CHECK: @ %bb.0: @ %entry 462; CHECK-NEXT: vmov d1, r2, r3 463; CHECK-NEXT: vmov.i8 q1, #0xff 464; CHECK-NEXT: vmov d0, r0, r1 465; CHECK-NEXT: vcmp.i16 eq, q0, zr 466; CHECK-NEXT: vmov.i8 q0, #0x0 467; CHECK-NEXT: vpsel q0, q1, q0 468; CHECK-NEXT: vmov.u16 r0, q0[2] 469; CHECK-NEXT: vmov.u16 r1, q0[0] 470; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 471; CHECK-NEXT: vmov.u16 r0, q0[3] 472; CHECK-NEXT: vmov.u16 r1, q0[1] 473; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 474; CHECK-NEXT: add r0, sp, #16 475; CHECK-NEXT: vldrw.u32 q0, [r0] 476; CHECK-NEXT: mov r0, sp 477; CHECK-NEXT: vcmp.i32 ne, q1, zr 478; CHECK-NEXT: vldrw.u32 q1, [r0] 479; CHECK-NEXT: vpsel q0, q1, q0 480; CHECK-NEXT: vmov r0, r1, d0 481; CHECK-NEXT: vmov r2, r3, d1 482; CHECK-NEXT: bx lr 483entry: 484 %c = icmp eq <8 x i16> %src, zeroinitializer 485 %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 486 %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b 487 ret <4 x i32> %s 488} 489 490define <4 x i32> @shuffle5_t_v4i32(<8 x i16> %src, <4 x i32> %a, <4 x i32> %b) { 491; CHECK-LABEL: shuffle5_t_v4i32: 492; CHECK: @ %bb.0: @ %entry 493; CHECK-NEXT: vmov d1, r2, r3 494; CHECK-NEXT: vmov.i8 q1, #0xff 495; CHECK-NEXT: vmov d0, r0, r1 496; CHECK-NEXT: vcmp.i16 eq, q0, zr 497; CHECK-NEXT: vmov.i8 q0, #0x0 498; CHECK-NEXT: vpsel q0, q1, q0 499; CHECK-NEXT: vmov.u16 r0, q0[6] 500; CHECK-NEXT: vmov.u16 r1, q0[4] 501; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 502; CHECK-NEXT: vmov.u16 r0, q0[7] 503; CHECK-NEXT: vmov.u16 r1, q0[5] 504; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 505; CHECK-NEXT: add r0, sp, #16 506; CHECK-NEXT: vldrw.u32 q0, [r0] 507; CHECK-NEXT: mov r0, sp 508; CHECK-NEXT: vcmp.i32 ne, q1, zr 509; CHECK-NEXT: vldrw.u32 q1, [r0] 510; CHECK-NEXT: vpsel q0, q1, q0 511; CHECK-NEXT: vmov r0, r1, d0 512; CHECK-NEXT: vmov r2, r3, d1 513; CHECK-NEXT: bx lr 514entry: 515 %c = icmp eq <8 x i16> %src, zeroinitializer 516 %sh = shufflevector <8 x i1> %c, <8 x i1> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 517 %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b 518 ret <4 x i32> %s 519} 520 521define <8 x i16> @shuffle5_b_v8i16(<16 x i8> %src, <8 x i16> %a, <8 x i16> %b) { 522; CHECK-LABEL: shuffle5_b_v8i16: 523; CHECK: @ %bb.0: @ %entry 524; CHECK-NEXT: vmov d1, r2, r3 525; CHECK-NEXT: vmov.i8 q1, #0xff 526; CHECK-NEXT: vmov d0, r0, r1 527; CHECK-NEXT: vcmp.i8 eq, q0, zr 528; CHECK-NEXT: vmov.i8 q0, #0x0 529; CHECK-NEXT: vpsel q1, q1, q0 530; CHECK-NEXT: vmov.u8 r0, q1[0] 531; CHECK-NEXT: vmov.16 q0[0], r0 532; CHECK-NEXT: vmov.u8 r0, q1[1] 533; CHECK-NEXT: vmov.16 q0[1], r0 534; CHECK-NEXT: vmov.u8 r0, q1[2] 535; CHECK-NEXT: vmov.16 q0[2], r0 536; CHECK-NEXT: vmov.u8 r0, q1[3] 537; CHECK-NEXT: vmov.16 q0[3], r0 538; CHECK-NEXT: vmov.u8 r0, q1[4] 539; CHECK-NEXT: vmov.16 q0[4], r0 540; CHECK-NEXT: vmov.u8 r0, q1[5] 541; CHECK-NEXT: vmov.16 q0[5], r0 542; CHECK-NEXT: vmov.u8 r0, q1[6] 543; CHECK-NEXT: vmov.16 q0[6], r0 544; CHECK-NEXT: vmov.u8 r0, q1[7] 545; CHECK-NEXT: vmov.16 q0[7], r0 546; CHECK-NEXT: add r0, sp, #16 547; CHECK-NEXT: vcmp.i16 ne, q0, zr 548; CHECK-NEXT: vldrw.u32 q0, [r0] 549; CHECK-NEXT: mov r0, sp 550; CHECK-NEXT: vldrw.u32 q1, [r0] 551; CHECK-NEXT: vpsel q0, q1, q0 552; CHECK-NEXT: vmov r0, r1, d0 553; CHECK-NEXT: vmov r2, r3, d1 554; CHECK-NEXT: bx lr 555entry: 556 %c = icmp eq <16 x i8> %src, zeroinitializer 557 %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 558 %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b 559 ret <8 x i16> %s 560} 561 562define <8 x i16> @shuffle5_t_v8i16(<16 x i8> %src, <8 x i16> %a, <8 x i16> %b) { 563; CHECK-LABEL: shuffle5_t_v8i16: 564; CHECK: @ %bb.0: @ %entry 565; CHECK-NEXT: vmov d1, r2, r3 566; CHECK-NEXT: vmov.i8 q1, #0xff 567; CHECK-NEXT: vmov d0, r0, r1 568; CHECK-NEXT: vcmp.i8 eq, q0, zr 569; CHECK-NEXT: vmov.i8 q0, #0x0 570; CHECK-NEXT: vpsel q1, q1, q0 571; CHECK-NEXT: vmov.u8 r0, q1[8] 572; CHECK-NEXT: vmov.16 q0[0], r0 573; CHECK-NEXT: vmov.u8 r0, q1[9] 574; CHECK-NEXT: vmov.16 q0[1], r0 575; CHECK-NEXT: vmov.u8 r0, q1[10] 576; CHECK-NEXT: vmov.16 q0[2], r0 577; CHECK-NEXT: vmov.u8 r0, q1[11] 578; CHECK-NEXT: vmov.16 q0[3], r0 579; CHECK-NEXT: vmov.u8 r0, q1[12] 580; CHECK-NEXT: vmov.16 q0[4], r0 581; CHECK-NEXT: vmov.u8 r0, q1[13] 582; CHECK-NEXT: vmov.16 q0[5], r0 583; CHECK-NEXT: vmov.u8 r0, q1[14] 584; CHECK-NEXT: vmov.16 q0[6], r0 585; CHECK-NEXT: vmov.u8 r0, q1[15] 586; CHECK-NEXT: vmov.16 q0[7], r0 587; CHECK-NEXT: add r0, sp, #16 588; CHECK-NEXT: vcmp.i16 ne, q0, zr 589; CHECK-NEXT: vldrw.u32 q0, [r0] 590; CHECK-NEXT: mov r0, sp 591; CHECK-NEXT: vldrw.u32 q1, [r0] 592; CHECK-NEXT: vpsel q0, q1, q0 593; CHECK-NEXT: vmov r0, r1, d0 594; CHECK-NEXT: vmov r2, r3, d1 595; CHECK-NEXT: bx lr 596entry: 597 %c = icmp eq <16 x i8> %src, zeroinitializer 598 %sh = shufflevector <16 x i1> %c, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 599 %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b 600 ret <8 x i16> %s 601} 602 603define <4 x i32> @shuffle6_v2i64(<2 x i64> %src1, <2 x i64> %src2, <4 x i32> %a, <4 x i32> %b) { 604; CHECK-LABEL: shuffle6_v2i64: 605; CHECK: @ %bb.0: @ %entry 606; CHECK-NEXT: orrs r0, r1 607; CHECK-NEXT: mov.w r1, #0 608; CHECK-NEXT: csetm r0, eq 609; CHECK-NEXT: bfi r1, r0, #0, #4 610; CHECK-NEXT: orrs.w r0, r2, r3 611; CHECK-NEXT: csetm r0, eq 612; CHECK-NEXT: bfi r1, r0, #4, #4 613; CHECK-NEXT: mov r0, sp 614; CHECK-NEXT: vldrw.u32 q0, [r0] 615; CHECK-NEXT: vmov r0, r2, d0 616; CHECK-NEXT: orrs r0, r2 617; CHECK-NEXT: csetm r0, eq 618; CHECK-NEXT: bfi r1, r0, #8, #4 619; CHECK-NEXT: vmov r0, r2, d1 620; CHECK-NEXT: orrs r0, r2 621; CHECK-NEXT: csetm r0, eq 622; CHECK-NEXT: bfi r1, r0, #12, #4 623; CHECK-NEXT: add r0, sp, #32 624; CHECK-NEXT: vldrw.u32 q0, [r0] 625; CHECK-NEXT: add r0, sp, #16 626; CHECK-NEXT: vldrw.u32 q1, [r0] 627; CHECK-NEXT: vmsr p0, r1 628; CHECK-NEXT: vpsel q0, q1, q0 629; CHECK-NEXT: vmov r0, r1, d0 630; CHECK-NEXT: vmov r2, r3, d1 631; CHECK-NEXT: bx lr 632entry: 633 %c1 = icmp eq <2 x i64> %src1, zeroinitializer 634 %c2 = icmp eq <2 x i64> %src2, zeroinitializer 635 %sh = shufflevector <2 x i1> %c1, <2 x i1> %c2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 636 %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b 637 ret <4 x i32> %s 638} 639 640define <8 x i16> @shuffle6_v4i32(<4 x i32> %src1, <4 x i32> %src2, <8 x i16> %a, <8 x i16> %b) { 641; CHECK-LABEL: shuffle6_v4i32: 642; CHECK: @ %bb.0: @ %entry 643; CHECK-NEXT: .save {r4, lr} 644; CHECK-NEXT: push {r4, lr} 645; CHECK-NEXT: .pad #16 646; CHECK-NEXT: sub sp, #16 647; CHECK-NEXT: add.w r12, sp, #24 648; CHECK-NEXT: vmov.i8 q0, #0x0 649; CHECK-NEXT: vldrw.u32 q2, [r12] 650; CHECK-NEXT: vmov.i8 q1, #0xff 651; CHECK-NEXT: mov r4, sp 652; CHECK-NEXT: vcmp.i32 eq, q2, zr 653; CHECK-NEXT: vpsel q2, q1, q0 654; CHECK-NEXT: vstrh.32 q2, [r4, #8] 655; CHECK-NEXT: vmov d4, r0, r1 656; CHECK-NEXT: vmov d5, r2, r3 657; CHECK-NEXT: add r0, sp, #56 658; CHECK-NEXT: vcmp.i32 eq, q2, zr 659; CHECK-NEXT: vpsel q0, q1, q0 660; CHECK-NEXT: vstrh.32 q0, [r4] 661; CHECK-NEXT: vldrw.u32 q0, [r0] 662; CHECK-NEXT: add r0, sp, #40 663; CHECK-NEXT: vldrw.u32 q2, [r4] 664; CHECK-NEXT: vldrw.u32 q1, [r0] 665; CHECK-NEXT: vcmp.i16 ne, q2, zr 666; CHECK-NEXT: vpsel q0, q1, q0 667; CHECK-NEXT: vmov r0, r1, d0 668; CHECK-NEXT: vmov r2, r3, d1 669; CHECK-NEXT: add sp, #16 670; CHECK-NEXT: pop {r4, pc} 671entry: 672 %c1 = icmp eq <4 x i32> %src1, zeroinitializer 673 %c2 = icmp eq <4 x i32> %src2, zeroinitializer 674 %sh = shufflevector <4 x i1> %c1, <4 x i1> %c2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 675 %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b 676 ret <8 x i16> %s 677} 678 679define <16 x i8> @shuffle6_v8i16(<8 x i16> %src1, <8 x i16> %src2, <16 x i8> %a, <16 x i8> %b) { 680; CHECK-LABEL: shuffle6_v8i16: 681; CHECK: @ %bb.0: @ %entry 682; CHECK-NEXT: .save {r4, lr} 683; CHECK-NEXT: push {r4, lr} 684; CHECK-NEXT: .pad #16 685; CHECK-NEXT: sub sp, #16 686; CHECK-NEXT: add.w r12, sp, #24 687; CHECK-NEXT: vmov.i8 q0, #0x0 688; CHECK-NEXT: vldrw.u32 q2, [r12] 689; CHECK-NEXT: vmov.i8 q1, #0xff 690; CHECK-NEXT: mov r4, sp 691; CHECK-NEXT: vcmp.i16 eq, q2, zr 692; CHECK-NEXT: vpsel q2, q1, q0 693; CHECK-NEXT: vstrb.16 q2, [r4, #8] 694; CHECK-NEXT: vmov d4, r0, r1 695; CHECK-NEXT: vmov d5, r2, r3 696; CHECK-NEXT: add r0, sp, #56 697; CHECK-NEXT: vcmp.i16 eq, q2, zr 698; CHECK-NEXT: vpsel q0, q1, q0 699; CHECK-NEXT: vstrb.16 q0, [r4] 700; CHECK-NEXT: vldrw.u32 q0, [r0] 701; CHECK-NEXT: add r0, sp, #40 702; CHECK-NEXT: vldrw.u32 q2, [r4] 703; CHECK-NEXT: vldrw.u32 q1, [r0] 704; CHECK-NEXT: vcmp.i8 ne, q2, zr 705; CHECK-NEXT: vpsel q0, q1, q0 706; CHECK-NEXT: vmov r0, r1, d0 707; CHECK-NEXT: vmov r2, r3, d1 708; CHECK-NEXT: add sp, #16 709; CHECK-NEXT: pop {r4, pc} 710entry: 711 %c1 = icmp eq <8 x i16> %src1, zeroinitializer 712 %c2 = icmp eq <8 x i16> %src2, zeroinitializer 713 %sh = shufflevector <8 x i1> %c1, <8 x i1> %c2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 714 %s = select <16 x i1> %sh, <16 x i8> %a, <16 x i8> %b 715 ret <16 x i8> %s 716} 717 718define <16 x i8> @shuffle2src_v16i8(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %a, <16 x i8> %b) { 719; CHECK-LABEL: shuffle2src_v16i8: 720; CHECK: @ %bb.0: @ %entry 721; CHECK-NEXT: mov r12, sp 722; CHECK-NEXT: vmov d6, r0, r1 723; CHECK-NEXT: vldrw.u32 q2, [r12] 724; CHECK-NEXT: vmov.i8 q0, #0x0 725; CHECK-NEXT: vmov.i8 q1, #0xff 726; CHECK-NEXT: vmov d7, r2, r3 727; CHECK-NEXT: vcmp.i8 eq, q2, zr 728; CHECK-NEXT: add r0, sp, #32 729; CHECK-NEXT: vpsel q2, q1, q0 730; CHECK-NEXT: vcmp.i8 eq, q3, zr 731; CHECK-NEXT: vpsel q0, q1, q0 732; CHECK-NEXT: vmovnt.i16 q2, q0 733; CHECK-NEXT: vldrw.u32 q0, [r0] 734; CHECK-NEXT: add r0, sp, #16 735; CHECK-NEXT: vcmp.i8 ne, q2, zr 736; CHECK-NEXT: vldrw.u32 q1, [r0] 737; CHECK-NEXT: vpsel q0, q1, q0 738; CHECK-NEXT: vmov r0, r1, d0 739; CHECK-NEXT: vmov r2, r3, d1 740; CHECK-NEXT: bx lr 741entry: 742 %c1 = icmp eq <16 x i8> %src1, zeroinitializer 743 %c2 = icmp eq <16 x i8> %src2, zeroinitializer 744 %sh = shufflevector <16 x i1> %c1, <16 x i1> %c2, <16 x i32> <i32 16, i32 0, i32 18, i32 2, i32 20, i32 4, i32 22, i32 6, i32 24, i32 8, i32 26, i32 10, i32 28, i32 12, i32 30, i32 14> 745 %s = select <16 x i1> %sh, <16 x i8> %a, <16 x i8> %b 746 ret <16 x i8> %s 747} 748 749define <8 x i16> @shuffle2src_v8i16(<8 x i16> %src1, <8 x i16> %src2, <8 x i16> %a, <8 x i16> %b) { 750; CHECK-LABEL: shuffle2src_v8i16: 751; CHECK: @ %bb.0: @ %entry 752; CHECK-NEXT: mov r12, sp 753; CHECK-NEXT: vmov d6, r0, r1 754; CHECK-NEXT: vldrw.u32 q2, [r12] 755; CHECK-NEXT: vmov.i8 q0, #0x0 756; CHECK-NEXT: vmov.i8 q1, #0xff 757; CHECK-NEXT: vmov d7, r2, r3 758; CHECK-NEXT: vcmp.i16 eq, q2, zr 759; CHECK-NEXT: add r0, sp, #32 760; CHECK-NEXT: vpsel q2, q1, q0 761; CHECK-NEXT: vcmp.i16 eq, q3, zr 762; CHECK-NEXT: vpsel q0, q1, q0 763; CHECK-NEXT: vmovnt.i32 q2, q0 764; CHECK-NEXT: vldrw.u32 q0, [r0] 765; CHECK-NEXT: add r0, sp, #16 766; CHECK-NEXT: vcmp.i16 ne, q2, zr 767; CHECK-NEXT: vldrw.u32 q1, [r0] 768; CHECK-NEXT: vpsel q0, q1, q0 769; CHECK-NEXT: vmov r0, r1, d0 770; CHECK-NEXT: vmov r2, r3, d1 771; CHECK-NEXT: bx lr 772entry: 773 %c1 = icmp eq <8 x i16> %src1, zeroinitializer 774 %c2 = icmp eq <8 x i16> %src2, zeroinitializer 775 %sh = shufflevector <8 x i1> %c1, <8 x i1> %c2, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6> 776 %s = select <8 x i1> %sh, <8 x i16> %a, <8 x i16> %b 777 ret <8 x i16> %s 778} 779 780define <4 x i32> @shuffle2src_v4i32(<4 x i32> %src1, <4 x i32> %src2, <4 x i32> %a, <4 x i32> %b) { 781; CHECK-LABEL: shuffle2src_v4i32: 782; CHECK: @ %bb.0: @ %entry 783; CHECK-NEXT: mov r12, sp 784; CHECK-NEXT: vmov d6, r0, r1 785; CHECK-NEXT: vldrw.u32 q2, [r12] 786; CHECK-NEXT: vmov.i8 q0, #0x0 787; CHECK-NEXT: vmov.i8 q1, #0xff 788; CHECK-NEXT: vmov d7, r2, r3 789; CHECK-NEXT: vcmp.i32 eq, q2, zr 790; CHECK-NEXT: add r0, sp, #32 791; CHECK-NEXT: vpsel q2, q1, q0 792; CHECK-NEXT: vcmp.i32 eq, q3, zr 793; CHECK-NEXT: vpsel q0, q1, q0 794; CHECK-NEXT: vmov.f32 s9, s0 795; CHECK-NEXT: vmov.f32 s11, s2 796; CHECK-NEXT: vldrw.u32 q0, [r0] 797; CHECK-NEXT: add r0, sp, #16 798; CHECK-NEXT: vcmp.i32 ne, q2, zr 799; CHECK-NEXT: vldrw.u32 q1, [r0] 800; CHECK-NEXT: vpsel q0, q1, q0 801; CHECK-NEXT: vmov r0, r1, d0 802; CHECK-NEXT: vmov r2, r3, d1 803; CHECK-NEXT: bx lr 804entry: 805 %c1 = icmp eq <4 x i32> %src1, zeroinitializer 806 %c2 = icmp eq <4 x i32> %src2, zeroinitializer 807 %sh = shufflevector <4 x i1> %c1, <4 x i1> %c2, <4 x i32> <i32 4, i32 0, i32 6, i32 2> 808 %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b 809 ret <4 x i32> %s 810} 811 812define <2 x i64> @shuffle2src_v2i64(<2 x i64> %src1, <2 x i64> %src2, <2 x i64> %a, <2 x i64> %b) { 813; CHECK-LABEL: shuffle2src_v2i64: 814; CHECK: @ %bb.0: @ %entry 815; CHECK-NEXT: ldrd r2, r3, [sp] 816; CHECK-NEXT: orrs r2, r3 817; CHECK-NEXT: mov.w r3, #0 818; CHECK-NEXT: csetm r2, eq 819; CHECK-NEXT: orrs r0, r1 820; CHECK-NEXT: bfi r3, r2, #0, #8 821; CHECK-NEXT: csetm r0, eq 822; CHECK-NEXT: bfi r3, r0, #8, #8 823; CHECK-NEXT: add r0, sp, #32 824; CHECK-NEXT: vldrw.u32 q0, [r0] 825; CHECK-NEXT: add r0, sp, #16 826; CHECK-NEXT: vldrw.u32 q1, [r0] 827; CHECK-NEXT: vmsr p0, r3 828; CHECK-NEXT: vpsel q0, q1, q0 829; CHECK-NEXT: vmov r0, r1, d0 830; CHECK-NEXT: vmov r2, r3, d1 831; CHECK-NEXT: bx lr 832entry: 833 %c1 = icmp eq <2 x i64> %src1, zeroinitializer 834 %c2 = icmp eq <2 x i64> %src2, zeroinitializer 835 %sh = shufflevector <2 x i1> %c1, <2 x i1> %c2, <2 x i32> <i32 2, i32 0> 836 %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b 837 ret <2 x i64> %s 838} 839