1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s 3 4define arm_aapcs_vfpcc <4 x i32> @cmpeqz_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 5; CHECK-LABEL: cmpeqz_v4i1: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vpt.i32 eq, q1, zr 8; CHECK-NEXT: vcmpt.i32 ne, q2, zr 9; CHECK-NEXT: vmrs r0, p0 10; CHECK-NEXT: vpt.i32 eq, q0, zr 11; CHECK-NEXT: vcmpt.i32 eq, q2, zr 12; CHECK-NEXT: vmrs r1, p0 13; CHECK-NEXT: orrs r0, r1 14; CHECK-NEXT: vmsr p0, r0 15; CHECK-NEXT: vpsel q0, q0, q1 16; CHECK-NEXT: bx lr 17entry: 18 %c1 = icmp eq <4 x i32> %a, zeroinitializer 19 %c2 = icmp eq <4 x i32> %b, zeroinitializer 20 %c3 = icmp eq <4 x i32> %c, zeroinitializer 21 %c4 = select <4 x i1> %c3, <4 x i1> %c1, <4 x i1> %c2 22 %s = select <4 x i1> %c4, <4 x i32> %a, <4 x i32> %b 23 ret <4 x i32> %s 24} 25 26define arm_aapcs_vfpcc <8 x i16> @cmpeqz_v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { 27; CHECK-LABEL: cmpeqz_v8i1: 28; CHECK: @ %bb.0: @ %entry 29; CHECK-NEXT: vpt.i16 eq, q1, zr 30; CHECK-NEXT: vcmpt.i16 ne, q2, zr 31; CHECK-NEXT: vmrs r0, p0 32; CHECK-NEXT: vpt.i16 eq, q0, zr 33; CHECK-NEXT: vcmpt.i16 eq, q2, zr 34; CHECK-NEXT: vmrs r1, p0 35; CHECK-NEXT: orrs r0, r1 36; CHECK-NEXT: vmsr p0, r0 37; CHECK-NEXT: vpsel q0, q0, q1 38; CHECK-NEXT: bx lr 39entry: 40 %c1 = icmp eq <8 x i16> %a, zeroinitializer 41 %c2 = icmp eq <8 x i16> %b, zeroinitializer 42 %c3 = icmp eq <8 x i16> %c, zeroinitializer 43 %c4 = select <8 x i1> %c3, <8 x i1> %c1, <8 x i1> %c2 44 %s = select <8 x i1> %c4, <8 x i16> %a, <8 x i16> %b 45 ret <8 x i16> %s 46} 47 48define arm_aapcs_vfpcc <16 x i8> @cmpeqz_v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { 49; CHECK-LABEL: cmpeqz_v16i1: 50; CHECK: @ %bb.0: @ %entry 51; CHECK-NEXT: vpt.i8 eq, q1, zr 52; CHECK-NEXT: vcmpt.i8 ne, q2, zr 53; CHECK-NEXT: vmrs r0, p0 54; CHECK-NEXT: vpt.i8 eq, q0, zr 55; CHECK-NEXT: vcmpt.i8 eq, q2, zr 56; CHECK-NEXT: vmrs r1, p0 57; CHECK-NEXT: orrs r0, r1 58; CHECK-NEXT: vmsr p0, r0 59; CHECK-NEXT: vpsel q0, q0, q1 60; CHECK-NEXT: bx lr 61entry: 62 %c1 = icmp eq <16 x i8> %a, zeroinitializer 63 %c2 = icmp eq <16 x i8> %b, zeroinitializer 64 %c3 = icmp eq <16 x i8> %c, zeroinitializer 65 %c4 = select <16 x i1> %c3, <16 x i1> %c1, <16 x i1> %c2 66 %s = select <16 x i1> %c4, <16 x i8> %a, <16 x i8> %b 67 ret <16 x i8> %s 68} 69 70define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { 71; CHECK-LABEL: cmpeqz_v2i1: 72; CHECK: @ %bb.0: @ %entry 73; CHECK-NEXT: vmov r0, r1, d0 74; CHECK-NEXT: orrs r0, r1 75; CHECK-NEXT: vmov r1, r2, d2 76; CHECK-NEXT: cset r0, eq 77; CHECK-NEXT: orrs r1, r2 78; CHECK-NEXT: vmov r2, r3, d4 79; CHECK-NEXT: cset r1, eq 80; CHECK-NEXT: orrs r2, r3 81; CHECK-NEXT: csel r0, r0, r1, eq 82; CHECK-NEXT: movs r1, #0 83; CHECK-NEXT: rsbs r0, r0, #0 84; CHECK-NEXT: bfi r1, r0, #0, #8 85; CHECK-NEXT: vmov r0, r2, d1 86; CHECK-NEXT: orrs r0, r2 87; CHECK-NEXT: vmov r2, r3, d3 88; CHECK-NEXT: cset r12, eq 89; CHECK-NEXT: orrs r2, r3 90; CHECK-NEXT: vmov r3, r0, d5 91; CHECK-NEXT: cset r2, eq 92; CHECK-NEXT: orrs r0, r3 93; CHECK-NEXT: csel r0, r12, r2, eq 94; CHECK-NEXT: rsbs r0, r0, #0 95; CHECK-NEXT: bfi r1, r0, #8, #8 96; CHECK-NEXT: vmsr p0, r1 97; CHECK-NEXT: vpsel q0, q0, q1 98; CHECK-NEXT: bx lr 99entry: 100 %c1 = icmp eq <2 x i64> %a, zeroinitializer 101 %c2 = icmp eq <2 x i64> %b, zeroinitializer 102 %c3 = icmp eq <2 x i64> %c, zeroinitializer 103 %c4 = select <2 x i1> %c3, <2 x i1> %c1, <2 x i1> %c2 104 %s = select <2 x i1> %c4, <2 x i64> %a, <2 x i64> %b 105 ret <2 x i64> %s 106} 107 108define arm_aapcs_vfpcc <4 x i32> @cmpnez_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 109; CHECK-LABEL: cmpnez_v4i1: 110; CHECK: @ %bb.0: @ %entry 111; CHECK-NEXT: vpt.i32 ne, q1, zr 112; CHECK-NEXT: vcmpt.i32 eq, q2, zr 113; CHECK-NEXT: vmrs r0, p0 114; CHECK-NEXT: vpt.i32 ne, q0, zr 115; CHECK-NEXT: vcmpt.i32 ne, q2, zr 116; CHECK-NEXT: vmrs r1, p0 117; CHECK-NEXT: orrs r0, r1 118; CHECK-NEXT: vmsr p0, r0 119; CHECK-NEXT: vpsel q0, q0, q1 120; CHECK-NEXT: bx lr 121entry: 122 %c1 = icmp ne <4 x i32> %a, zeroinitializer 123 %c2 = icmp ne <4 x i32> %b, zeroinitializer 124 %c3 = icmp ne <4 x i32> %c, zeroinitializer 125 %c4 = select <4 x i1> %c3, <4 x i1> %c1, <4 x i1> %c2 126 %s = select <4 x i1> %c4, <4 x i32> %a, <4 x i32> %b 127 ret <4 x i32> %s 128} 129 130define arm_aapcs_vfpcc <8 x i16> @cmpnez_v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { 131; CHECK-LABEL: cmpnez_v8i1: 132; CHECK: @ %bb.0: @ %entry 133; CHECK-NEXT: vpt.i16 ne, q1, zr 134; CHECK-NEXT: vcmpt.i16 eq, q2, zr 135; CHECK-NEXT: vmrs r0, p0 136; CHECK-NEXT: vpt.i16 ne, q0, zr 137; CHECK-NEXT: vcmpt.i16 ne, q2, zr 138; CHECK-NEXT: vmrs r1, p0 139; CHECK-NEXT: orrs r0, r1 140; CHECK-NEXT: vmsr p0, r0 141; CHECK-NEXT: vpsel q0, q0, q1 142; CHECK-NEXT: bx lr 143entry: 144 %c1 = icmp ne <8 x i16> %a, zeroinitializer 145 %c2 = icmp ne <8 x i16> %b, zeroinitializer 146 %c3 = icmp ne <8 x i16> %c, zeroinitializer 147 %c4 = select <8 x i1> %c3, <8 x i1> %c1, <8 x i1> %c2 148 %s = select <8 x i1> %c4, <8 x i16> %a, <8 x i16> %b 149 ret <8 x i16> %s 150} 151 152define arm_aapcs_vfpcc <16 x i8> @cmpnez_v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { 153; CHECK-LABEL: cmpnez_v16i1: 154; CHECK: @ %bb.0: @ %entry 155; CHECK-NEXT: vpt.i8 ne, q1, zr 156; CHECK-NEXT: vcmpt.i8 eq, q2, zr 157; CHECK-NEXT: vmrs r0, p0 158; CHECK-NEXT: vpt.i8 ne, q0, zr 159; CHECK-NEXT: vcmpt.i8 ne, q2, zr 160; CHECK-NEXT: vmrs r1, p0 161; CHECK-NEXT: orrs r0, r1 162; CHECK-NEXT: vmsr p0, r0 163; CHECK-NEXT: vpsel q0, q0, q1 164; CHECK-NEXT: bx lr 165entry: 166 %c1 = icmp ne <16 x i8> %a, zeroinitializer 167 %c2 = icmp ne <16 x i8> %b, zeroinitializer 168 %c3 = icmp ne <16 x i8> %c, zeroinitializer 169 %c4 = select <16 x i1> %c3, <16 x i1> %c1, <16 x i1> %c2 170 %s = select <16 x i1> %c4, <16 x i8> %a, <16 x i8> %b 171 ret <16 x i8> %s 172} 173 174define arm_aapcs_vfpcc <2 x i64> @cmpnez_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { 175; CHECK-LABEL: cmpnez_v2i1: 176; CHECK: @ %bb.0: @ %entry 177; CHECK-NEXT: vmov r0, r1, d0 178; CHECK-NEXT: orrs r0, r1 179; CHECK-NEXT: vmov r1, r2, d2 180; CHECK-NEXT: cset r0, ne 181; CHECK-NEXT: orrs r1, r2 182; CHECK-NEXT: vmov r2, r3, d4 183; CHECK-NEXT: cset r1, ne 184; CHECK-NEXT: orrs r2, r3 185; CHECK-NEXT: csel r0, r0, r1, ne 186; CHECK-NEXT: movs r1, #0 187; CHECK-NEXT: rsbs r0, r0, #0 188; CHECK-NEXT: bfi r1, r0, #0, #8 189; CHECK-NEXT: vmov r0, r2, d1 190; CHECK-NEXT: orrs r0, r2 191; CHECK-NEXT: vmov r2, r3, d3 192; CHECK-NEXT: cset r12, ne 193; CHECK-NEXT: orrs r2, r3 194; CHECK-NEXT: vmov r3, r0, d5 195; CHECK-NEXT: cset r2, ne 196; CHECK-NEXT: orrs r0, r3 197; CHECK-NEXT: csel r0, r12, r2, ne 198; CHECK-NEXT: rsbs r0, r0, #0 199; CHECK-NEXT: bfi r1, r0, #8, #8 200; CHECK-NEXT: vmsr p0, r1 201; CHECK-NEXT: vpsel q0, q0, q1 202; CHECK-NEXT: bx lr 203entry: 204 %c1 = icmp ne <2 x i64> %a, zeroinitializer 205 %c2 = icmp ne <2 x i64> %b, zeroinitializer 206 %c3 = icmp ne <2 x i64> %c, zeroinitializer 207 %c4 = select <2 x i1> %c3, <2 x i1> %c1, <2 x i1> %c2 208 %s = select <2 x i1> %c4, <2 x i64> %a, <2 x i64> %b 209 ret <2 x i64> %s 210} 211 212 213 214define arm_aapcs_vfpcc <4 x i32> @cmpsltz_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 215; CHECK-LABEL: cmpsltz_v4i1: 216; CHECK: @ %bb.0: @ %entry 217; CHECK-NEXT: vpt.s32 lt, q1, zr 218; CHECK-NEXT: vcmpt.s32 ge, q2, zr 219; CHECK-NEXT: vmrs r0, p0 220; CHECK-NEXT: vpt.s32 lt, q0, zr 221; CHECK-NEXT: vcmpt.s32 lt, q2, zr 222; CHECK-NEXT: vmrs r1, p0 223; CHECK-NEXT: orrs r0, r1 224; CHECK-NEXT: vmsr p0, r0 225; CHECK-NEXT: vpsel q0, q0, q1 226; CHECK-NEXT: bx lr 227entry: 228 %c1 = icmp slt <4 x i32> %a, zeroinitializer 229 %c2 = icmp slt <4 x i32> %b, zeroinitializer 230 %c3 = icmp slt <4 x i32> %c, zeroinitializer 231 %c4 = select <4 x i1> %c3, <4 x i1> %c1, <4 x i1> %c2 232 %s = select <4 x i1> %c4, <4 x i32> %a, <4 x i32> %b 233 ret <4 x i32> %s 234} 235 236define arm_aapcs_vfpcc <8 x i16> @cmpsltz_v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { 237; CHECK-LABEL: cmpsltz_v8i1: 238; CHECK: @ %bb.0: @ %entry 239; CHECK-NEXT: vpt.s16 lt, q1, zr 240; CHECK-NEXT: vcmpt.s16 ge, q2, zr 241; CHECK-NEXT: vmrs r0, p0 242; CHECK-NEXT: vpt.s16 lt, q0, zr 243; CHECK-NEXT: vcmpt.s16 lt, q2, zr 244; CHECK-NEXT: vmrs r1, p0 245; CHECK-NEXT: orrs r0, r1 246; CHECK-NEXT: vmsr p0, r0 247; CHECK-NEXT: vpsel q0, q0, q1 248; CHECK-NEXT: bx lr 249entry: 250 %c1 = icmp slt <8 x i16> %a, zeroinitializer 251 %c2 = icmp slt <8 x i16> %b, zeroinitializer 252 %c3 = icmp slt <8 x i16> %c, zeroinitializer 253 %c4 = select <8 x i1> %c3, <8 x i1> %c1, <8 x i1> %c2 254 %s = select <8 x i1> %c4, <8 x i16> %a, <8 x i16> %b 255 ret <8 x i16> %s 256} 257 258define arm_aapcs_vfpcc <16 x i8> @cmpsltz_v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { 259; CHECK-LABEL: cmpsltz_v16i1: 260; CHECK: @ %bb.0: @ %entry 261; CHECK-NEXT: vpt.s8 lt, q1, zr 262; CHECK-NEXT: vcmpt.s8 ge, q2, zr 263; CHECK-NEXT: vmrs r0, p0 264; CHECK-NEXT: vpt.s8 lt, q0, zr 265; CHECK-NEXT: vcmpt.s8 lt, q2, zr 266; CHECK-NEXT: vmrs r1, p0 267; CHECK-NEXT: orrs r0, r1 268; CHECK-NEXT: vmsr p0, r0 269; CHECK-NEXT: vpsel q0, q0, q1 270; CHECK-NEXT: bx lr 271entry: 272 %c1 = icmp slt <16 x i8> %a, zeroinitializer 273 %c2 = icmp slt <16 x i8> %b, zeroinitializer 274 %c3 = icmp slt <16 x i8> %c, zeroinitializer 275 %c4 = select <16 x i1> %c3, <16 x i1> %c1, <16 x i1> %c2 276 %s = select <16 x i1> %c4, <16 x i8> %a, <16 x i8> %b 277 ret <16 x i8> %s 278} 279 280define arm_aapcs_vfpcc <2 x i64> @cmpsltz_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { 281; CHECK-LABEL: cmpsltz_v2i1: 282; CHECK: @ %bb.0: @ %entry 283; CHECK-NEXT: vmov r2, s9 284; CHECK-NEXT: movs r3, #0 285; CHECK-NEXT: vmov r0, s1 286; CHECK-NEXT: vmov r1, s5 287; CHECK-NEXT: cmp.w r3, r2, lsr #31 288; CHECK-NEXT: vmov r2, s7 289; CHECK-NEXT: csel r0, r0, r1, ne 290; CHECK-NEXT: vmov r1, s3 291; CHECK-NEXT: asr.w r12, r0, #31 292; CHECK-NEXT: vmov r0, s11 293; CHECK-NEXT: cmp.w r3, r0, lsr #31 294; CHECK-NEXT: bfi r3, r12, #0, #8 295; CHECK-NEXT: csel r0, r1, r2, ne 296; CHECK-NEXT: asrs r0, r0, #31 297; CHECK-NEXT: bfi r3, r0, #8, #8 298; CHECK-NEXT: vmsr p0, r3 299; CHECK-NEXT: vpsel q0, q0, q1 300; CHECK-NEXT: bx lr 301entry: 302 %c1 = icmp slt <2 x i64> %a, zeroinitializer 303 %c2 = icmp slt <2 x i64> %b, zeroinitializer 304 %c3 = icmp slt <2 x i64> %c, zeroinitializer 305 %c4 = select <2 x i1> %c3, <2 x i1> %c1, <2 x i1> %c2 306 %s = select <2 x i1> %c4, <2 x i64> %a, <2 x i64> %b 307 ret <2 x i64> %s 308} 309 310 311 312define arm_aapcs_vfpcc <4 x i32> @cmpeqz_v4i1_i1(<4 x i32> %a, <4 x i32> %b, i32 %c) { 313; CHECK-LABEL: cmpeqz_v4i1_i1: 314; CHECK: @ %bb.0: @ %entry 315; CHECK-NEXT: cbz r0, .LBB12_2 316; CHECK-NEXT: @ %bb.1: @ %select.false 317; CHECK-NEXT: vcmp.i32 eq, q1, zr 318; CHECK-NEXT: vpsel q0, q0, q1 319; CHECK-NEXT: bx lr 320; CHECK-NEXT: .LBB12_2: 321; CHECK-NEXT: vcmp.i32 eq, q0, zr 322; CHECK-NEXT: vpsel q0, q0, q1 323; CHECK-NEXT: bx lr 324entry: 325 %c1 = icmp eq <4 x i32> %a, zeroinitializer 326 %c2 = icmp eq <4 x i32> %b, zeroinitializer 327 %c3 = icmp eq i32 %c, 0 328 %c4 = select i1 %c3, <4 x i1> %c1, <4 x i1> %c2 329 %s = select <4 x i1> %c4, <4 x i32> %a, <4 x i32> %b 330 ret <4 x i32> %s 331} 332 333define arm_aapcs_vfpcc <8 x i16> @cmpeqz_v8i1_i1(<8 x i16> %a, <8 x i16> %b, i16 %c) { 334; CHECK-LABEL: cmpeqz_v8i1_i1: 335; CHECK: @ %bb.0: @ %entry 336; CHECK-NEXT: lsls r0, r0, #16 337; CHECK-NEXT: beq .LBB13_2 338; CHECK-NEXT: @ %bb.1: @ %select.false 339; CHECK-NEXT: vcmp.i16 eq, q1, zr 340; CHECK-NEXT: vpsel q0, q0, q1 341; CHECK-NEXT: bx lr 342; CHECK-NEXT: .LBB13_2: 343; CHECK-NEXT: vcmp.i16 eq, q0, zr 344; CHECK-NEXT: vpsel q0, q0, q1 345; CHECK-NEXT: bx lr 346entry: 347 %c1 = icmp eq <8 x i16> %a, zeroinitializer 348 %c2 = icmp eq <8 x i16> %b, zeroinitializer 349 %c3 = icmp eq i16 %c, 0 350 %c4 = select i1 %c3, <8 x i1> %c1, <8 x i1> %c2 351 %s = select <8 x i1> %c4, <8 x i16> %a, <8 x i16> %b 352 ret <8 x i16> %s 353} 354 355define arm_aapcs_vfpcc <16 x i8> @cmpeqz_v16i1_i1(<16 x i8> %a, <16 x i8> %b, i8 %c) { 356; CHECK-LABEL: cmpeqz_v16i1_i1: 357; CHECK: @ %bb.0: @ %entry 358; CHECK-NEXT: lsls r0, r0, #24 359; CHECK-NEXT: beq .LBB14_2 360; CHECK-NEXT: @ %bb.1: @ %select.false 361; CHECK-NEXT: vcmp.i8 eq, q1, zr 362; CHECK-NEXT: vpsel q0, q0, q1 363; CHECK-NEXT: bx lr 364; CHECK-NEXT: .LBB14_2: 365; CHECK-NEXT: vcmp.i8 eq, q0, zr 366; CHECK-NEXT: vpsel q0, q0, q1 367; CHECK-NEXT: bx lr 368entry: 369 %c1 = icmp eq <16 x i8> %a, zeroinitializer 370 %c2 = icmp eq <16 x i8> %b, zeroinitializer 371 %c3 = icmp eq i8 %c, 0 372 %c4 = select i1 %c3, <16 x i1> %c1, <16 x i1> %c2 373 %s = select <16 x i1> %c4, <16 x i8> %a, <16 x i8> %b 374 ret <16 x i8> %s 375} 376 377define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1_i1(<2 x i64> %a, <2 x i64> %b, i64 %c) { 378; CHECK-LABEL: cmpeqz_v2i1_i1: 379; CHECK: @ %bb.0: @ %entry 380; CHECK-NEXT: .save {r4, r5, r7, lr} 381; CHECK-NEXT: push {r4, r5, r7, lr} 382; CHECK-NEXT: vmov r2, r3, d2 383; CHECK-NEXT: orrs r2, r3 384; CHECK-NEXT: vmov r3, r4, d3 385; CHECK-NEXT: csetm r12, eq 386; CHECK-NEXT: movs r2, #0 387; CHECK-NEXT: orrs r3, r4 388; CHECK-NEXT: vmov r4, r3, d0 389; CHECK-NEXT: csetm r5, eq 390; CHECK-NEXT: orrs r3, r4 391; CHECK-NEXT: vmov r3, r4, d1 392; CHECK-NEXT: csetm lr, eq 393; CHECK-NEXT: orrs r3, r4 394; CHECK-NEXT: csetm r4, eq 395; CHECK-NEXT: orrs r0, r1 396; CHECK-NEXT: beq .LBB15_2 397; CHECK-NEXT: @ %bb.1: @ %select.false 398; CHECK-NEXT: bfi r2, r12, #0, #8 399; CHECK-NEXT: bfi r2, r5, #8, #8 400; CHECK-NEXT: b .LBB15_3 401; CHECK-NEXT: .LBB15_2: 402; CHECK-NEXT: bfi r2, lr, #0, #8 403; CHECK-NEXT: bfi r2, r4, #8, #8 404; CHECK-NEXT: .LBB15_3: @ %select.end 405; CHECK-NEXT: vmsr p0, r2 406; CHECK-NEXT: vpsel q0, q0, q1 407; CHECK-NEXT: pop {r4, r5, r7, pc} 408entry: 409 %c1 = icmp eq <2 x i64> %a, zeroinitializer 410 %c2 = icmp eq <2 x i64> %b, zeroinitializer 411 %c3 = icmp eq i64 %c, zeroinitializer 412 %c4 = select i1 %c3, <2 x i1> %c1, <2 x i1> %c2 413 %s = select <2 x i1> %c4, <2 x i64> %a, <2 x i64> %b 414 ret <2 x i64> %s 415} 416