1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s 3 4define arm_aapcs_vfpcc <4 x i32> @concat_v2i1(i32 %a, i32 %b, <4 x i32> %c) { 5; CHECK-LABEL: concat_v2i1: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vmsr p0, r1 8; CHECK-NEXT: vmov.i8 q1, #0x0 9; CHECK-NEXT: vmov.i8 q2, #0xff 10; CHECK-NEXT: vpsel q3, q2, q1 11; CHECK-NEXT: vmsr p0, r0 12; CHECK-NEXT: vpsel q1, q2, q1 13; CHECK-NEXT: vmov r1, s12 14; CHECK-NEXT: vmov r0, s4 15; CHECK-NEXT: vmov q2[2], q2[0], r0, r1 16; CHECK-NEXT: vmov r1, s6 17; CHECK-NEXT: vmov r0, s14 18; CHECK-NEXT: vmov.i32 q1, #0x0 19; CHECK-NEXT: vmov q2[3], q2[1], r1, r0 20; CHECK-NEXT: vcmp.i32 ne, q2, zr 21; CHECK-NEXT: vpsel q0, q0, q1 22; CHECK-NEXT: bx lr 23entry: 24 %ai = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %a) 25 %bi = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %b) 26 %s = shufflevector <2 x i1> %ai, <2 x i1> %bi, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 27 %ci = select <4 x i1> %s, <4 x i32> %c, <4 x i32> zeroinitializer 28 ret <4 x i32> %ci 29} 30 31declare <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32) 32 33 34define arm_aapcs_vfpcc <8 x i16> @concat_v4i1(<4 x i32> %a, <4 x i32> %b, <8 x i16> %c) { 35; CHECK-LABEL: concat_v4i1: 36; CHECK: @ %bb.0: @ %entry 37; CHECK-NEXT: .vsave {d8, d9} 38; CHECK-NEXT: vpush {d8, d9} 39; CHECK-NEXT: .pad #16 40; CHECK-NEXT: sub sp, #16 41; CHECK-NEXT: vmov.i8 q3, #0x0 42; CHECK-NEXT: vmov.i8 q4, #0xff 43; CHECK-NEXT: vcmp.s32 lt, q1, zr 44; CHECK-NEXT: mov r0, sp 45; CHECK-NEXT: vpsel q1, q4, q3 46; CHECK-NEXT: vcmp.s32 lt, q0, zr 47; CHECK-NEXT: vpsel q0, q4, q3 48; CHECK-NEXT: vstrh.32 q1, [r0, #8] 49; CHECK-NEXT: vstrh.32 q0, [r0] 50; CHECK-NEXT: vmov.i32 q0, #0x0 51; CHECK-NEXT: vldrw.u32 q1, [r0] 52; CHECK-NEXT: vcmp.i16 ne, q1, zr 53; CHECK-NEXT: vpsel q0, q2, q0 54; CHECK-NEXT: add sp, #16 55; CHECK-NEXT: vpop {d8, d9} 56; CHECK-NEXT: bx lr 57entry: 58 %ai = icmp slt <4 x i32> %a, zeroinitializer 59 %bi = icmp slt <4 x i32> %b, zeroinitializer 60 %s = shufflevector <4 x i1> %ai, <4 x i1> %bi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 61 %ci = select <8 x i1> %s, <8 x i16> %c, <8 x i16> zeroinitializer 62 ret <8 x i16> %ci 63} 64 65define arm_aapcs_vfpcc <16 x i8> @concat_v8i1(<8 x i16> %a, <8 x i16> %b, <16 x i8> %c) { 66; CHECK-LABEL: concat_v8i1: 67; CHECK: @ %bb.0: @ %entry 68; CHECK-NEXT: .vsave {d8, d9} 69; CHECK-NEXT: vpush {d8, d9} 70; CHECK-NEXT: .pad #16 71; CHECK-NEXT: sub sp, #16 72; CHECK-NEXT: vmov.i8 q3, #0x0 73; CHECK-NEXT: vmov.i8 q4, #0xff 74; CHECK-NEXT: vcmp.s16 lt, q1, zr 75; CHECK-NEXT: mov r0, sp 76; CHECK-NEXT: vpsel q1, q4, q3 77; CHECK-NEXT: vcmp.s16 lt, q0, zr 78; CHECK-NEXT: vpsel q0, q4, q3 79; CHECK-NEXT: vstrb.16 q1, [r0, #8] 80; CHECK-NEXT: vstrb.16 q0, [r0] 81; CHECK-NEXT: vmov.i32 q0, #0x0 82; CHECK-NEXT: vldrw.u32 q1, [r0] 83; CHECK-NEXT: vcmp.i8 ne, q1, zr 84; CHECK-NEXT: vpsel q0, q2, q0 85; CHECK-NEXT: add sp, #16 86; CHECK-NEXT: vpop {d8, d9} 87; CHECK-NEXT: bx lr 88entry: 89 %ai = icmp slt <8 x i16> %a, zeroinitializer 90 %bi = icmp slt <8 x i16> %b, zeroinitializer 91 %s = shufflevector <8 x i1> %ai, <8 x i1> %bi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 92 %ci = select <16 x i1> %s, <16 x i8> %c, <16 x i8> zeroinitializer 93 ret <16 x i8> %ci 94} 95 96 97define arm_aapcs_vfpcc <16 x i8> @concat_v48i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %d, <4 x i32> %e, <16 x i8> %c) { 98; CHECK-LABEL: concat_v48i1: 99; CHECK: @ %bb.0: @ %entry 100; CHECK-NEXT: .vsave {d8, d9, d10, d11} 101; CHECK-NEXT: vpush {d8, d9, d10, d11} 102; CHECK-NEXT: .pad #48 103; CHECK-NEXT: sub sp, #48 104; CHECK-NEXT: vmov.i8 q4, #0x0 105; CHECK-NEXT: vmov.i8 q5, #0xff 106; CHECK-NEXT: vcmp.s32 lt, q3, zr 107; CHECK-NEXT: add r0, sp, #16 108; CHECK-NEXT: vpsel q3, q5, q4 109; CHECK-NEXT: vcmp.s32 lt, q2, zr 110; CHECK-NEXT: vpsel q2, q5, q4 111; CHECK-NEXT: vcmp.s32 lt, q1, zr 112; CHECK-NEXT: vpsel q1, q5, q4 113; CHECK-NEXT: vcmp.s32 lt, q0, zr 114; CHECK-NEXT: mov r1, sp 115; CHECK-NEXT: vpsel q0, q5, q4 116; CHECK-NEXT: vstrh.32 q3, [r0, #8] 117; CHECK-NEXT: vstrh.32 q2, [r0] 118; CHECK-NEXT: vstrh.32 q1, [r1, #8] 119; CHECK-NEXT: vstrh.32 q0, [r1] 120; CHECK-NEXT: vldrw.u32 q0, [r0] 121; CHECK-NEXT: add r0, sp, #32 122; CHECK-NEXT: vcmp.i16 ne, q0, zr 123; CHECK-NEXT: vpsel q0, q5, q4 124; CHECK-NEXT: vstrb.16 q0, [r0, #8] 125; CHECK-NEXT: vldrw.u32 q0, [r1] 126; CHECK-NEXT: add r1, sp, #80 127; CHECK-NEXT: vldrw.u32 q1, [r1] 128; CHECK-NEXT: vcmp.i16 ne, q0, zr 129; CHECK-NEXT: vpsel q0, q5, q4 130; CHECK-NEXT: vstrb.16 q0, [r0] 131; CHECK-NEXT: vmov.i32 q0, #0x0 132; CHECK-NEXT: vldrw.u32 q2, [r0] 133; CHECK-NEXT: vcmp.i8 ne, q2, zr 134; CHECK-NEXT: vpsel q0, q1, q0 135; CHECK-NEXT: add sp, #48 136; CHECK-NEXT: vpop {d8, d9, d10, d11} 137; CHECK-NEXT: bx lr 138entry: 139 %ai = icmp slt <4 x i32> %a, zeroinitializer 140 %bi = icmp slt <4 x i32> %b, zeroinitializer 141 %di = icmp slt <4 x i32> %d, zeroinitializer 142 %ei = icmp slt <4 x i32> %e, zeroinitializer 143 %s1 = shufflevector <4 x i1> %ai, <4 x i1> %bi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 144 %s2 = shufflevector <4 x i1> %di, <4 x i1> %ei, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 145 %s = shufflevector <8 x i1> %s1, <8 x i1> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 146 %ci = select <16 x i1> %s, <16 x i8> %c, <16 x i8> zeroinitializer 147 ret <16 x i8> %ci 148} 149