1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s 3 4declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 5declare <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32) 6 7declare <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16>, <8 x i16>, i32) 8declare <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32>, <4 x i32>, i32) 9declare <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16>, <8 x i16>, i32, <4 x i1>, <4 x i32>) 10declare <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v2i1(<4 x i32>, <4 x i32>, i32, <2 x i1>, <2 x i64>) 11 12define arm_aapcs_vfpcc <4 x i32> @test_vqdmullbq_s16(<8 x i16> %a, <8 x i16> %b) { 13; CHECK-LABEL: test_vqdmullbq_s16: 14; CHECK: @ %bb.0: @ %entry 15; CHECK-NEXT: vqdmullb.s16 q0, q0, q1 16; CHECK-NEXT: bx lr 17entry: 18 %0 = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> %a, <8 x i16> %b, i32 0) 19 ret <4 x i32> %0 20} 21 22define arm_aapcs_vfpcc <2 x i64> @test_vqdmullbq_s32(<4 x i32> %a, <4 x i32> %b) { 23; CHECK-LABEL: test_vqdmullbq_s32: 24; CHECK: @ %bb.0: @ %entry 25; CHECK-NEXT: vqdmullb.s32 q2, q0, q1 26; CHECK-NEXT: vmov q0, q2 27; CHECK-NEXT: bx lr 28entry: 29 %0 = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0) 30 ret <2 x i64> %0 31} 32 33define arm_aapcs_vfpcc <4 x i32> @test_vqdmullbq_m_s16(<4 x i32> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 34; CHECK-LABEL: test_vqdmullbq_m_s16: 35; CHECK: @ %bb.0: @ %entry 36; CHECK-NEXT: vmsr p0, r0 37; CHECK-NEXT: vpst 38; CHECK-NEXT: vqdmullbt.s16 q0, q1, q2 39; CHECK-NEXT: bx lr 40entry: 41 %0 = zext i16 %p to i32 42 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 43 %2 = call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, <8 x i16> %b, i32 0, <4 x i1> %1, <4 x i32> %inactive) 44 ret <4 x i32> %2 45} 46 47define arm_aapcs_vfpcc <2 x i64> @test_vqdmullbq_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 48; CHECK-LABEL: test_vqdmullbq_m_s32: 49; CHECK: @ %bb.0: @ %entry 50; CHECK-NEXT: vmsr p0, r0 51; CHECK-NEXT: vpst 52; CHECK-NEXT: vqdmullbt.s32 q0, q1, q2 53; CHECK-NEXT: bx lr 54entry: 55 %0 = zext i16 %p to i32 56 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 57 %2 = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v2i1(<4 x i32> %a, <4 x i32> %b, i32 0, <2 x i1> %1, <2 x i64> %inactive) 58 ret <2 x i64> %2 59} 60 61define arm_aapcs_vfpcc <4 x i32> @test_vqdmullbq_n_s16(<8 x i16> %a, i16 signext %b) { 62; CHECK-LABEL: test_vqdmullbq_n_s16: 63; CHECK: @ %bb.0: @ %entry 64; CHECK-NEXT: vqdmullb.s16 q0, q0, r0 65; CHECK-NEXT: bx lr 66entry: 67 %.splatinsert = insertelement <8 x i16> undef, i16 %b, i32 0 68 %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer 69 %0 = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> %a, <8 x i16> %.splat, i32 0) 70 ret <4 x i32> %0 71} 72 73define arm_aapcs_vfpcc <2 x i64> @test_vqdmullbq_n_s32(<4 x i32> %a, i32 %b) #0 { 74; CHECK-LABEL: test_vqdmullbq_n_s32: 75; CHECK: @ %bb.0: @ %entry 76; CHECK-NEXT: vqdmullb.s32 q1, q0, r0 77; CHECK-NEXT: vmov q0, q1 78; CHECK-NEXT: bx lr 79entry: 80 %.splatinsert = insertelement <4 x i32> undef, i32 %b, i32 0 81 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer 82 %0 = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> %a, <4 x i32> %.splat, i32 0) 83 ret <2 x i64> %0 84} 85 86define arm_aapcs_vfpcc <4 x i32> @test_vqdmullbq_m_n_s16(<4 x i32> %inactive, <8 x i16> %a, i16 signext %b, i16 zeroext %p) { 87; CHECK-LABEL: test_vqdmullbq_m_n_s16: 88; CHECK: @ %bb.0: @ %entry 89; CHECK-NEXT: vmsr p0, r1 90; CHECK-NEXT: vpst 91; CHECK-NEXT: vqdmullbt.s16 q0, q1, r0 92; CHECK-NEXT: bx lr 93entry: 94 %.splatinsert = insertelement <8 x i16> undef, i16 %b, i32 0 95 %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer 96 %0 = zext i16 %p to i32 97 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 98 %2 = call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, <8 x i16> %.splat, i32 0, <4 x i1> %1, <4 x i32> %inactive) 99 ret <4 x i32> %2 100} 101 102define arm_aapcs_vfpcc <2 x i64> @test_vqdmullbq_m_n_s32(<2 x i64> %inactive, <4 x i32> %a, i32 %b, i16 zeroext %p) { 103; CHECK-LABEL: test_vqdmullbq_m_n_s32: 104; CHECK: @ %bb.0: @ %entry 105; CHECK-NEXT: vmsr p0, r1 106; CHECK-NEXT: vpst 107; CHECK-NEXT: vqdmullbt.s32 q0, q1, r0 108; CHECK-NEXT: bx lr 109entry: 110 %.splatinsert = insertelement <4 x i32> undef, i32 %b, i32 0 111 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer 112 %0 = zext i16 %p to i32 113 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 114 %2 = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v2i1(<4 x i32> %a, <4 x i32> %.splat, i32 0, <2 x i1> %1, <2 x i64> %inactive) 115 ret <2 x i64> %2 116} 117 118define arm_aapcs_vfpcc <4 x i32> @test_vqdmulltq_s16(<8 x i16> %a, <8 x i16> %b) { 119; CHECK-LABEL: test_vqdmulltq_s16: 120; CHECK: @ %bb.0: @ %entry 121; CHECK-NEXT: vqdmullt.s16 q0, q0, q1 122; CHECK-NEXT: bx lr 123entry: 124 %0 = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> %a, <8 x i16> %b, i32 1) 125 ret <4 x i32> %0 126} 127 128define arm_aapcs_vfpcc <2 x i64> @test_vqdmulltq_s32(<4 x i32> %a, <4 x i32> %b) { 129; CHECK-LABEL: test_vqdmulltq_s32: 130; CHECK: @ %bb.0: @ %entry 131; CHECK-NEXT: vqdmullt.s32 q2, q0, q1 132; CHECK-NEXT: vmov q0, q2 133; CHECK-NEXT: bx lr 134entry: 135 %0 = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> %a, <4 x i32> %b, i32 1) 136 ret <2 x i64> %0 137} 138 139define arm_aapcs_vfpcc <4 x i32> @test_vqdmulltq_m_s16(<4 x i32> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 140; CHECK-LABEL: test_vqdmulltq_m_s16: 141; CHECK: @ %bb.0: @ %entry 142; CHECK-NEXT: vmsr p0, r0 143; CHECK-NEXT: vpst 144; CHECK-NEXT: vqdmulltt.s16 q0, q1, q2 145; CHECK-NEXT: bx lr 146entry: 147 %0 = zext i16 %p to i32 148 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 149 %2 = call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, <8 x i16> %b, i32 1, <4 x i1> %1, <4 x i32> %inactive) 150 ret <4 x i32> %2 151} 152 153define arm_aapcs_vfpcc <2 x i64> @test_vqdmulltq_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 154; CHECK-LABEL: test_vqdmulltq_m_s32: 155; CHECK: @ %bb.0: @ %entry 156; CHECK-NEXT: vmsr p0, r0 157; CHECK-NEXT: vpst 158; CHECK-NEXT: vqdmulltt.s32 q0, q1, q2 159; CHECK-NEXT: bx lr 160entry: 161 %0 = zext i16 %p to i32 162 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 163 %2 = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v2i1(<4 x i32> %a, <4 x i32> %b, i32 1, <2 x i1> %1, <2 x i64> %inactive) 164 ret <2 x i64> %2 165} 166 167define arm_aapcs_vfpcc <4 x i32> @test_vqdmulltq_n_s16(<8 x i16> %a, i16 signext %b) { 168; CHECK-LABEL: test_vqdmulltq_n_s16: 169; CHECK: @ %bb.0: @ %entry 170; CHECK-NEXT: vqdmullt.s16 q0, q0, r0 171; CHECK-NEXT: bx lr 172entry: 173 %.splatinsert = insertelement <8 x i16> undef, i16 %b, i32 0 174 %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer 175 %0 = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> %a, <8 x i16> %.splat, i32 1) 176 ret <4 x i32> %0 177} 178 179define arm_aapcs_vfpcc <2 x i64> @test_vqdmulltq_n_s32(<4 x i32> %a, i32 %b) { 180; CHECK-LABEL: test_vqdmulltq_n_s32: 181; CHECK: @ %bb.0: @ %entry 182; CHECK-NEXT: vqdmullt.s32 q1, q0, r0 183; CHECK-NEXT: vmov q0, q1 184; CHECK-NEXT: bx lr 185entry: 186 %.splatinsert = insertelement <4 x i32> undef, i32 %b, i32 0 187 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer 188 %0 = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> %a, <4 x i32> %.splat, i32 1) 189 ret <2 x i64> %0 190} 191 192define arm_aapcs_vfpcc <4 x i32> @test_vqdmulltq_m_n_s16(<4 x i32> %inactive, <8 x i16> %a, i16 signext %b, i16 zeroext %p) { 193; CHECK-LABEL: test_vqdmulltq_m_n_s16: 194; CHECK: @ %bb.0: @ %entry 195; CHECK-NEXT: vmsr p0, r1 196; CHECK-NEXT: vpst 197; CHECK-NEXT: vqdmulltt.s16 q0, q1, r0 198; CHECK-NEXT: bx lr 199entry: 200 %.splatinsert = insertelement <8 x i16> undef, i16 %b, i32 0 201 %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer 202 %0 = zext i16 %p to i32 203 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 204 %2 = call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, <8 x i16> %.splat, i32 1, <4 x i1> %1, <4 x i32> %inactive) 205 ret <4 x i32> %2 206} 207 208define arm_aapcs_vfpcc <2 x i64> @test_vqdmulltq_m_n_s32(<2 x i64> %inactive, <4 x i32> %a, i32 %b, i16 zeroext %p) { 209; CHECK-LABEL: test_vqdmulltq_m_n_s32: 210; CHECK: @ %bb.0: @ %entry 211; CHECK-NEXT: vmsr p0, r1 212; CHECK-NEXT: vpst 213; CHECK-NEXT: vqdmulltt.s32 q0, q1, r0 214; CHECK-NEXT: bx lr 215entry: 216 %.splatinsert = insertelement <4 x i32> undef, i32 %b, i32 0 217 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer 218 %0 = zext i16 %p to i32 219 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 220 %2 = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v2i1(<4 x i32> %a, <4 x i32> %.splat, i32 1, <2 x i1> %1, <2 x i64> %inactive) 221 ret <2 x i64> %2 222} 223