1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: opt -passes=instcombine -mtriple=thumbv8.1m.main %s | llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - | FileCheck %s 3 4declare <16 x i1> @llvm.arm.mve.vctp8(i32) 5declare <8 x i1> @llvm.arm.mve.vctp16(i32) 6declare <4 x i1> @llvm.arm.mve.vctp32(i32) 7declare <2 x i1> @llvm.arm.mve.vctp64(i32) 8 9declare i32 @llvm.arm.mve.pred.v2i.v2i1(<2 x i1>) 10declare i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1>) 11declare i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1>) 12declare i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1>) 13 14declare <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32) 15declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 16declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 17declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) 18 19define arm_aapcs_vfpcc zeroext i16 @test_vctp8q(i32 %a) { 20; CHECK-LABEL: test_vctp8q: 21; CHECK: @ %bb.0: @ %entry 22; CHECK-NEXT: vctp.8 r0 23; CHECK-NEXT: vmrs r0, p0 24; CHECK-NEXT: bx lr 25entry: 26 %0 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %a) 27 %1 = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %0) 28 %2 = trunc i32 %1 to i16 29 ret i16 %2 30} 31 32define arm_aapcs_vfpcc zeroext i16 @test_vctp8q_m(i32 %a, i16 zeroext %p) { 33; CHECK-LABEL: test_vctp8q_m: 34; CHECK: @ %bb.0: @ %entry 35; CHECK-NEXT: vmsr p0, r1 36; CHECK-NEXT: vpst 37; CHECK-NEXT: vctpt.8 r0 38; CHECK-NEXT: vmrs r0, p0 39; CHECK-NEXT: bx lr 40entry: 41 %0 = zext i16 %p to i32 42 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 43 %2 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %a) 44 %3 = and <16 x i1> %1, %2 45 %4 = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %3) 46 %5 = trunc i32 %4 to i16 47 ret i16 %5 48} 49 50define arm_aapcs_vfpcc zeroext i16 @test_vctp16q(i32 %a) { 51; CHECK-LABEL: test_vctp16q: 52; CHECK: @ %bb.0: @ %entry 53; CHECK-NEXT: vctp.16 r0 54; CHECK-NEXT: vmrs r0, p0 55; CHECK-NEXT: bx lr 56entry: 57 %0 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %a) 58 %1 = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %0) 59 %2 = trunc i32 %1 to i16 60 ret i16 %2 61} 62 63define arm_aapcs_vfpcc zeroext i16 @test_vctp16q_m(i32 %a, i16 zeroext %p) { 64; CHECK-LABEL: test_vctp16q_m: 65; CHECK: @ %bb.0: @ %entry 66; CHECK-NEXT: vmsr p0, r1 67; CHECK-NEXT: vpst 68; CHECK-NEXT: vctpt.16 r0 69; CHECK-NEXT: vmrs r0, p0 70; CHECK-NEXT: bx lr 71entry: 72 %0 = zext i16 %p to i32 73 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 74 %2 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %a) 75 %3 = and <8 x i1> %1, %2 76 %4 = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %3) 77 %5 = trunc i32 %4 to i16 78 ret i16 %5 79} 80 81define arm_aapcs_vfpcc zeroext i16 @test_vctp32q(i32 %a) { 82; CHECK-LABEL: test_vctp32q: 83; CHECK: @ %bb.0: @ %entry 84; CHECK-NEXT: vctp.32 r0 85; CHECK-NEXT: vmrs r0, p0 86; CHECK-NEXT: bx lr 87entry: 88 %0 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %a) 89 %1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %0) 90 %2 = trunc i32 %1 to i16 91 ret i16 %2 92} 93 94define arm_aapcs_vfpcc zeroext i16 @test_vctp32q_m(i32 %a, i16 zeroext %p) { 95; CHECK-LABEL: test_vctp32q_m: 96; CHECK: @ %bb.0: @ %entry 97; CHECK-NEXT: vmsr p0, r1 98; CHECK-NEXT: vpst 99; CHECK-NEXT: vctpt.32 r0 100; CHECK-NEXT: vmrs r0, p0 101; CHECK-NEXT: bx lr 102entry: 103 %0 = zext i16 %p to i32 104 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 105 %2 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %a) 106 %3 = and <4 x i1> %1, %2 107 %4 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %3) 108 %5 = trunc i32 %4 to i16 109 ret i16 %5 110} 111 112define arm_aapcs_vfpcc zeroext i16 @test_vctp64q(i32 %a) { 113; CHECK-LABEL: test_vctp64q: 114; CHECK: @ %bb.0: @ %entry 115; CHECK-NEXT: vctp.64 r0 116; CHECK-NEXT: vmrs r0, p0 117; CHECK-NEXT: bx lr 118entry: 119 %0 = call <2 x i1> @llvm.arm.mve.vctp64(i32 %a) 120 %1 = call i32 @llvm.arm.mve.pred.v2i.v2i1(<2 x i1> %0) 121 %2 = trunc i32 %1 to i16 122 ret i16 %2 123} 124 125define arm_aapcs_vfpcc zeroext i16 @test_vctp64q_m(i32 %a, i16 zeroext %p) { 126; CHECK-LABEL: test_vctp64q_m: 127; CHECK: @ %bb.0: @ %entry 128; CHECK-NEXT: vctp.64 r0 129; CHECK-NEXT: vmrs r0, p0 130; CHECK-NEXT: ands r1, r0 131; CHECK-NEXT: and r0, r1, #1 132; CHECK-NEXT: ubfx r1, r1, #8, #1 133; CHECK-NEXT: rsbs r2, r0, #0 134; CHECK-NEXT: movs r0, #0 135; CHECK-NEXT: rsbs r1, r1, #0 136; CHECK-NEXT: bfi r0, r2, #0, #8 137; CHECK-NEXT: bfi r0, r1, #8, #8 138; CHECK-NEXT: bx lr 139entry: 140 %0 = zext i16 %p to i32 141 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 142 %2 = call <2 x i1> @llvm.arm.mve.vctp64(i32 %a) 143 %3 = and <2 x i1> %1, %2 144 %4 = call i32 @llvm.arm.mve.pred.v2i.v2i1(<2 x i1> %3) 145 %5 = trunc i32 %4 to i16 146 ret i16 %5 147} 148 149define arm_aapcs_vfpcc <16 x i8> @test_vpselq_i8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) #2 { 150; CHECK-LABEL: test_vpselq_i8: 151; CHECK: @ %bb.0: @ %entry 152; CHECK-NEXT: vmsr p0, r0 153; CHECK-NEXT: vpsel q0, q0, q1 154; CHECK-NEXT: bx lr 155entry: 156 %0 = zext i16 %p to i32 157 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 158 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b 159 ret <16 x i8> %2 160} 161 162define arm_aapcs_vfpcc <8 x i16> @test_vpselq_i16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) #2 { 163; CHECK-LABEL: test_vpselq_i16: 164; CHECK: @ %bb.0: @ %entry 165; CHECK-NEXT: vmsr p0, r0 166; CHECK-NEXT: vpsel q0, q0, q1 167; CHECK-NEXT: bx lr 168entry: 169 %0 = zext i16 %p to i32 170 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 171 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b 172 ret <8 x i16> %2 173} 174 175define arm_aapcs_vfpcc <8 x half> @test_vpselq_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) #2 { 176; CHECK-LABEL: test_vpselq_f16: 177; CHECK: @ %bb.0: @ %entry 178; CHECK-NEXT: vmsr p0, r0 179; CHECK-NEXT: vpsel q0, q0, q1 180; CHECK-NEXT: bx lr 181entry: 182 %0 = zext i16 %p to i32 183 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 184 %2 = select <8 x i1> %1, <8 x half> %a, <8 x half> %b 185 ret <8 x half> %2 186} 187 188define arm_aapcs_vfpcc <4 x i32> @test_vpselq_i32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) #2 { 189; CHECK-LABEL: test_vpselq_i32: 190; CHECK: @ %bb.0: @ %entry 191; CHECK-NEXT: vmsr p0, r0 192; CHECK-NEXT: vpsel q0, q0, q1 193; CHECK-NEXT: bx lr 194entry: 195 %0 = zext i16 %p to i32 196 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 197 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b 198 ret <4 x i32> %2 199} 200 201define arm_aapcs_vfpcc <4 x float> @test_vpselq_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) #2 { 202; CHECK-LABEL: test_vpselq_f32: 203; CHECK: @ %bb.0: @ %entry 204; CHECK-NEXT: vmsr p0, r0 205; CHECK-NEXT: vpsel q0, q0, q1 206; CHECK-NEXT: bx lr 207entry: 208 %0 = zext i16 %p to i32 209 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 210 %2 = select <4 x i1> %1, <4 x float> %a, <4 x float> %b 211 ret <4 x float> %2 212} 213 214define arm_aapcs_vfpcc <2 x i64> @test_vpselq_i64(<2 x i64> %a, <2 x i64> %b, i16 zeroext %p) #2 { 215; CHECK-LABEL: test_vpselq_i64: 216; CHECK: @ %bb.0: @ %entry 217; CHECK-NEXT: vmsr p0, r0 218; CHECK-NEXT: vpsel q0, q0, q1 219; CHECK-NEXT: bx lr 220entry: 221 %0 = zext i16 %p to i32 222 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 223 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b 224 ret <2 x i64> %2 225} 226 227define arm_aapcs_vfpcc <2 x double> @test_vpselq_f64(<2 x double> %a, <2 x double> %b, i16 zeroext %p) #2 { 228; CHECK-LABEL: test_vpselq_f64: 229; CHECK: @ %bb.0: @ %entry 230; CHECK-NEXT: vmsr p0, r0 231; CHECK-NEXT: vpsel q0, q0, q1 232; CHECK-NEXT: bx lr 233entry: 234 %0 = zext i16 %p to i32 235 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0) 236 %2 = select <2 x i1> %1, <2 x double> %a, <2 x double> %b 237 ret <2 x double> %2 238} 239 240define arm_aapcs_vfpcc <2 x i64> @test_vpselq_i64_2(<2 x i64> %a, <2 x i64> %b, i16 zeroext %p) #2 { 241; CHECK-LABEL: test_vpselq_i64_2: 242; CHECK: @ %bb.0: @ %entry 243; CHECK-NEXT: vmsr p0, r0 244; CHECK-NEXT: vpsel q0, q0, q1 245; CHECK-NEXT: bx lr 246entry: 247 %0 = zext i16 %p to i32 248 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 249 %2 = bitcast <2 x i64> %a to <4 x i32> 250 %3 = bitcast <2 x i64> %b to <4 x i32> 251 %4 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %3 252 %5 = bitcast <4 x i32> %4 to <2 x i64> 253 ret <2 x i64> %5 254} 255