1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+cdecp0 -mattr=+cdecp1 -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s 3 4declare <16 x i8> @llvm.arm.cde.vcx1q(i32 immarg, i32 immarg) 5declare <16 x i8> @llvm.arm.cde.vcx1qa(i32 immarg, <16 x i8>, i32 immarg) 6declare <16 x i8> @llvm.arm.cde.vcx2q(i32 immarg, <16 x i8>, i32 immarg) 7declare <16 x i8> @llvm.arm.cde.vcx2qa(i32 immarg, <16 x i8>, <16 x i8>, i32 immarg) 8declare <16 x i8> @llvm.arm.cde.vcx3q(i32 immarg, <16 x i8>, <16 x i8>, i32 immarg) 9declare <16 x i8> @llvm.arm.cde.vcx3qa(i32 immarg, <16 x i8>, <16 x i8>, <16 x i8>, i32 immarg) 10 11define arm_aapcs_vfpcc <16 x i8> @test_vcx1q_u8() { 12; CHECK-LABEL: test_vcx1q_u8: 13; CHECK: @ %bb.0: @ %entry 14; CHECK-NEXT: vcx1 p0, q0, #1111 15; CHECK-NEXT: bx lr 16entry: 17 %0 = call <16 x i8> @llvm.arm.cde.vcx1q(i32 0, i32 1111) 18 ret <16 x i8> %0 19} 20 21define arm_aapcs_vfpcc <16 x i8> @test_vcx1qa_1(<16 x i8> %acc) { 22; CHECK-LABEL: test_vcx1qa_1: 23; CHECK: @ %bb.0: @ %entry 24; CHECK-NEXT: vcx1a p1, q0, #1112 25; CHECK-NEXT: bx lr 26entry: 27 %0 = call <16 x i8> @llvm.arm.cde.vcx1qa(i32 1, <16 x i8> %acc, i32 1112) 28 ret <16 x i8> %0 29} 30 31define arm_aapcs_vfpcc <4 x i32> @test_vcx1qa_2(<4 x i32> %acc) { 32; CHECK-LABEL: test_vcx1qa_2: 33; CHECK: @ %bb.0: @ %entry 34; CHECK-NEXT: vcx1a p0, q0, #1113 35; CHECK-NEXT: bx lr 36entry: 37 %0 = bitcast <4 x i32> %acc to <16 x i8> 38 %1 = call <16 x i8> @llvm.arm.cde.vcx1qa(i32 0, <16 x i8> %0, i32 1113) 39 %2 = bitcast <16 x i8> %1 to <4 x i32> 40 ret <4 x i32> %2 41} 42 43define arm_aapcs_vfpcc <16 x i8> @test_vcx2q_u8(<8 x half> %n) { 44; CHECK-LABEL: test_vcx2q_u8: 45; CHECK: @ %bb.0: @ %entry 46; CHECK-NEXT: vcx2 p1, q0, q0, #111 47; CHECK-NEXT: bx lr 48entry: 49 %0 = bitcast <8 x half> %n to <16 x i8> 50 %1 = call <16 x i8> @llvm.arm.cde.vcx2q(i32 1, <16 x i8> %0, i32 111) 51 ret <16 x i8> %1 52} 53 54define arm_aapcs_vfpcc <4 x float> @test_vcx2q(<4 x float> %n) { 55; CHECK-LABEL: test_vcx2q: 56; CHECK: @ %bb.0: @ %entry 57; CHECK-NEXT: vcx2 p1, q0, q0, #112 58; CHECK-NEXT: bx lr 59entry: 60 %0 = bitcast <4 x float> %n to <16 x i8> 61 %1 = call <16 x i8> @llvm.arm.cde.vcx2q(i32 1, <16 x i8> %0, i32 112) 62 %2 = bitcast <16 x i8> %1 to <4 x float> 63 ret <4 x float> %2 64} 65 66define arm_aapcs_vfpcc <4 x float> @test_vcx2qa(<4 x float> %acc, <2 x i64> %n) { 67; CHECK-LABEL: test_vcx2qa: 68; CHECK: @ %bb.0: @ %entry 69; CHECK-NEXT: vcx2a p0, q0, q1, #113 70; CHECK-NEXT: bx lr 71entry: 72 %0 = bitcast <4 x float> %acc to <16 x i8> 73 %1 = bitcast <2 x i64> %n to <16 x i8> 74 %2 = call <16 x i8> @llvm.arm.cde.vcx2qa(i32 0, <16 x i8> %0, <16 x i8> %1, i32 113) 75 %3 = bitcast <16 x i8> %2 to <4 x float> 76 ret <4 x float> %3 77} 78 79define arm_aapcs_vfpcc <16 x i8> @test_vcx3q_u8(<8 x i16> %n, <4 x i32> %m) { 80; CHECK-LABEL: test_vcx3q_u8: 81; CHECK: @ %bb.0: @ %entry 82; CHECK-NEXT: vcx3 p0, q0, q0, q1, #11 83; CHECK-NEXT: bx lr 84entry: 85 %0 = bitcast <8 x i16> %n to <16 x i8> 86 %1 = bitcast <4 x i32> %m to <16 x i8> 87 %2 = call <16 x i8> @llvm.arm.cde.vcx3q(i32 0, <16 x i8> %0, <16 x i8> %1, i32 11) 88 ret <16 x i8> %2 89} 90 91define arm_aapcs_vfpcc <2 x i64> @test_vcx3q(<2 x i64> %n, <4 x float> %m) { 92; CHECK-LABEL: test_vcx3q: 93; CHECK: @ %bb.0: @ %entry 94; CHECK-NEXT: vcx3 p1, q0, q0, q1, #12 95; CHECK-NEXT: bx lr 96entry: 97 %0 = bitcast <2 x i64> %n to <16 x i8> 98 %1 = bitcast <4 x float> %m to <16 x i8> 99 %2 = call <16 x i8> @llvm.arm.cde.vcx3q(i32 1, <16 x i8> %0, <16 x i8> %1, i32 12) 100 %3 = bitcast <16 x i8> %2 to <2 x i64> 101 ret <2 x i64> %3 102} 103 104define arm_aapcs_vfpcc <16 x i8> @test_vcx3qa(<16 x i8> %acc, <8 x i16> %n, <4 x float> %m) { 105; CHECK-LABEL: test_vcx3qa: 106; CHECK: @ %bb.0: @ %entry 107; CHECK-NEXT: vcx3a p1, q0, q1, q2, #13 108; CHECK-NEXT: bx lr 109entry: 110 %0 = bitcast <8 x i16> %n to <16 x i8> 111 %1 = bitcast <4 x float> %m to <16 x i8> 112 %2 = call <16 x i8> @llvm.arm.cde.vcx3qa(i32 1, <16 x i8> %acc, <16 x i8> %0, <16 x i8> %1, i32 13) 113 ret <16 x i8> %2 114} 115 116declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) 117declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 118declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 119declare <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32) 120declare <8 x i16> @llvm.arm.cde.vcx1q.predicated.v8i16.v8i1(i32 immarg, <8 x i16>, i32 immarg, <8 x i1>) 121declare <16 x i8> @llvm.arm.cde.vcx1qa.predicated.v16i8.v16i1(i32 immarg, <16 x i8>, i32 immarg, <16 x i1>) 122declare <4 x i32> @llvm.arm.cde.vcx2q.predicated.v4i32.v4i1(i32 immarg, <4 x i32>, <16 x i8>, i32 immarg, <4 x i1>) 123declare <4 x float> @llvm.arm.cde.vcx2qa.predicated.v4f32.v4i1(i32 immarg, <4 x float>, <16 x i8>, i32 immarg, <4 x i1>) 124declare <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v2i1(i32 immarg, <2 x i64>, <16 x i8>, <16 x i8>, i32 immarg, <2 x i1>) 125declare <4 x float> @llvm.arm.cde.vcx3qa.predicated.v4f32.v4i1(i32 immarg, <4 x float>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>) 126 127define arm_aapcs_vfpcc <8 x i16> @test_vcx1q_m(<8 x i16> %inactive, i16 zeroext %p) { 128; CHECK-LABEL: test_vcx1q_m: 129; CHECK: @ %bb.0: @ %entry 130; CHECK-NEXT: vmsr p0, r0 131; CHECK-NEXT: vpst 132; CHECK-NEXT: vcx1t p0, q0, #1111 133; CHECK-NEXT: bx lr 134entry: 135 %0 = zext i16 %p to i32 136 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 137 %2 = call <8 x i16> @llvm.arm.cde.vcx1q.predicated.v8i16.v8i1(i32 0, <8 x i16> %inactive, i32 1111, <8 x i1> %1) 138 ret <8 x i16> %2 139} 140 141define arm_aapcs_vfpcc <16 x i8> @test_vcx1qa_m(<16 x i8> %acc, i16 zeroext %p) { 142; CHECK-LABEL: test_vcx1qa_m: 143; CHECK: @ %bb.0: @ %entry 144; CHECK-NEXT: vmsr p0, r0 145; CHECK-NEXT: vpst 146; CHECK-NEXT: vcx1at p1, q0, #1112 147; CHECK-NEXT: bx lr 148entry: 149 %0 = zext i16 %p to i32 150 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 151 %2 = call <16 x i8> @llvm.arm.cde.vcx1qa.predicated.v16i8.v16i1(i32 1, <16 x i8> %acc, i32 1112, <16 x i1> %1) 152 ret <16 x i8> %2 153} 154 155define arm_aapcs_vfpcc <4 x i32> @test_vcx2q_m(<4 x i32> %inactive, <4 x float> %n, i16 zeroext %p) { 156; CHECK-LABEL: test_vcx2q_m: 157; CHECK: @ %bb.0: @ %entry 158; CHECK-NEXT: vmsr p0, r0 159; CHECK-NEXT: vpst 160; CHECK-NEXT: vcx2t p0, q0, q1, #111 161; CHECK-NEXT: bx lr 162entry: 163 %0 = bitcast <4 x float> %n to <16 x i8> 164 %1 = zext i16 %p to i32 165 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 166 %3 = call <4 x i32> @llvm.arm.cde.vcx2q.predicated.v4i32.v4i1(i32 0, <4 x i32> %inactive, <16 x i8> %0, i32 111, <4 x i1> %2) 167 ret <4 x i32> %3 168} 169 170define arm_aapcs_vfpcc <4 x float> @test_vcx2qa_m(<4 x float> %acc, <8 x half> %n, i16 zeroext %p) { 171; CHECK-LABEL: test_vcx2qa_m: 172; CHECK: @ %bb.0: @ %entry 173; CHECK-NEXT: vmsr p0, r0 174; CHECK-NEXT: vpst 175; CHECK-NEXT: vcx2at p0, q0, q1, #112 176; CHECK-NEXT: bx lr 177entry: 178 %0 = bitcast <8 x half> %n to <16 x i8> 179 %1 = zext i16 %p to i32 180 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 181 %3 = call <4 x float> @llvm.arm.cde.vcx2qa.predicated.v4f32.v4i1(i32 0, <4 x float> %acc, <16 x i8> %0, i32 112, <4 x i1> %2) 182 ret <4 x float> %3 183} 184 185define arm_aapcs_vfpcc <2 x i64> @test_vcx3q_m(<2 x i64> %inactive, <4 x float> %n, <16 x i8> %m, i16 zeroext %p) { 186; CHECK-LABEL: test_vcx3q_m: 187; CHECK: @ %bb.0: @ %entry 188; CHECK-NEXT: vmsr p0, r0 189; CHECK-NEXT: vpst 190; CHECK-NEXT: vcx3t p0, q0, q1, q2, #11 191; CHECK-NEXT: bx lr 192entry: 193 %0 = bitcast <4 x float> %n to <16 x i8> 194 %1 = zext i16 %p to i32 195 %2 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %1) 196 %3 = call <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v2i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, <16 x i8> %m, i32 11, <2 x i1> %2) 197 ret <2 x i64> %3 198} 199 200define arm_aapcs_vfpcc <8 x half> @test_vcx3qa_m(<4 x float> %inactive, <8 x half> %n, <4 x i32> %m, i16 zeroext %p) { 201; CHECK-LABEL: test_vcx3qa_m: 202; CHECK: @ %bb.0: @ %entry 203; CHECK-NEXT: vmsr p0, r0 204; CHECK-NEXT: vpst 205; CHECK-NEXT: vcx3at p0, q0, q1, q2, #12 206; CHECK-NEXT: bx lr 207entry: 208 %0 = bitcast <8 x half> %n to <16 x i8> 209 %1 = bitcast <4 x i32> %m to <16 x i8> 210 %2 = zext i16 %p to i32 211 %3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) 212 %4 = call <4 x float> @llvm.arm.cde.vcx3qa.predicated.v4f32.v4i1(i32 0, <4 x float> %inactive, <16 x i8> %0, <16 x i8> %1, i32 12, <4 x i1> %3) 213 %5 = bitcast <4 x float> %4 to <8 x half> 214 ret <8 x half> %5 215} 216