1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -o - %s | FileCheck %s 3 4declare <4 x i1> @llvm.arm.mve.vctp64(i32) 5declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 6declare i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1>) 7declare <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, i32, <4 x i1>, <2 x i64>) 8declare <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <2 x i64>) 9 10declare <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>) 11declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>) 12declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1(ptr, <2 x i64>, i32, i32, i32, <4 x i1>) 13declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>) 14declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>) 15declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1(ptr, <2 x i64>, <2 x i64>, i32, i32, <4 x i1>) 16 17declare <2 x i64> @llvm.arm.cde.vcx1q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, i32 immarg, <4 x i1>) 18declare <2 x i64> @llvm.arm.cde.vcx1qa.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, i32 immarg, <4 x i1>) 19declare <2 x i64> @llvm.arm.cde.vcx2q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, i32 immarg, <4 x i1>) 20declare <2 x i64> @llvm.arm.cde.vcx2qa.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, i32 immarg, <4 x i1>) 21declare <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>) 22declare <2 x i64> @llvm.arm.cde.vcx3qa.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>) 23 24define arm_aapcs_vfpcc zeroext i16 @test_vctp64q(i32 %a) { 25; CHECK-LABEL: @test_vctp64q( 26; CHECK-NEXT: entry: 27; CHECK-NEXT: [[TMP0:%.*]] = call <2 x i1> @llvm.arm.mve.vctp64(i32 [[A:%.*]]) 28; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v2i1(<2 x i1> [[TMP0]]) 29; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) 30; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) 31; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 32; CHECK-NEXT: ret i16 [[TMP4]] 33; 34entry: 35 %0 = call <4 x i1> @llvm.arm.mve.vctp64(i32 %a) 36 %1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %0) 37 %2 = trunc i32 %1 to i16 38 ret i16 %2 39} 40 41define arm_aapcs_vfpcc zeroext i16 @test_vctp64q_m(i32 %a, i16 zeroext %p) { 42; CHECK-LABEL: @test_vctp64q_m( 43; CHECK-NEXT: entry: 44; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 45; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) 46; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i1> @llvm.arm.mve.vctp64(i32 [[A:%.*]]) 47; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v2i1(<2 x i1> [[TMP2]]) 48; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]]) 49; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP1]], [[TMP4]] 50; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP5]]) 51; CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 52; CHECK-NEXT: ret i16 [[TMP7]] 53; 54entry: 55 %0 = zext i16 %p to i32 56 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 57 %2 = call <4 x i1> @llvm.arm.mve.vctp64(i32 %a) 58 %3 = and <4 x i1> %1, %2 59 %4 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %3) 60 %5 = trunc i32 %4 to i16 61 ret i16 %5 62} 63 64define arm_aapcs_vfpcc <2 x i64> @test_vmullbq_int_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 65; CHECK-LABEL: @test_vmullbq_int_m_s32( 66; CHECK-NEXT: entry: 67; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 68; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) 69; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) 70; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) 71; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v2i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, i32 0, <2 x i1> [[TMP3]], <2 x i64> [[INACTIVE:%.*]]) 72; CHECK-NEXT: ret <2 x i64> [[TMP4]] 73; 74entry: 75 %0 = zext i16 %p to i32 76 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 77 %2 = tail call <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 0, <4 x i1> %1, <2 x i64> %inactive) 78 ret <2 x i64> %2 79} 80 81define arm_aapcs_vfpcc <2 x i64> @test_vqdmullbq_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 82; CHECK-LABEL: @test_vqdmullbq_m_s32( 83; CHECK-NEXT: entry: 84; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 85; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) 86; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) 87; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) 88; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v2i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <2 x i1> [[TMP3]], <2 x i64> [[INACTIVE:%.*]]) 89; CHECK-NEXT: ret <2 x i64> [[TMP4]] 90; 91entry: 92 %0 = zext i16 %p to i32 93 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 94 %2 = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1, <2 x i64> %inactive) 95 ret <2 x i64> %2 96} 97 98define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_s64(<2 x i64> %addr, i16 zeroext %p) { 99; CHECK-LABEL: @test_vldrdq_gather_base_z_s64( 100; CHECK-NEXT: entry: 101; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 102; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) 103; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) 104; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) 105; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v2i1(<2 x i64> [[ADDR:%.*]], i32 888, <2 x i1> [[TMP3]]) 106; CHECK-NEXT: ret <2 x i64> [[TMP4]] 107; 108entry: 109 %0 = zext i16 %p to i32 110 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 111 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <4 x i1> %1) 112 ret <2 x i64> %2 113} 114 115define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(ptr %addr, i16 zeroext %p) { 116; CHECK-LABEL: @test_vldrdq_gather_base_wb_z_s64( 117; CHECK-NEXT: entry: 118; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[ADDR:%.*]], align 8 119; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 120; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) 121; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) 122; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]]) 123; CHECK-NEXT: [[TMP5:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> [[TMP0]], i32 664, <2 x i1> [[TMP4]]) 124; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP5]], 1 125; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr [[ADDR]], align 8 126; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP5]], 0 127; CHECK-NEXT: ret <2 x i64> [[TMP7]] 128; 129entry: 130 %0 = load <2 x i64>, ptr %addr, align 8 131 %1 = zext i16 %p to i32 132 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 133 %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 664, <4 x i1> %2) 134 %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1 135 store <2 x i64> %4, ptr %addr, align 8 136 %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0 137 ret <2 x i64> %5 138} 139 140define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_s64(ptr %base, <2 x i64> %offset, i16 zeroext %p) { 141; CHECK-LABEL: @test_vldrdq_gather_offset_z_s64( 142; CHECK-NEXT: entry: 143; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 144; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) 145; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) 146; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) 147; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 0, i32 0, <2 x i1> [[TMP3]]) 148; CHECK-NEXT: ret <2 x i64> [[TMP4]] 149; 150entry: 151 %0 = zext i16 %p to i32 152 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 153 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1(ptr %base, <2 x i64> %offset, i32 64, i32 0, i32 0, <4 x i1> %1) 154 ret <2 x i64> %2 155} 156 157define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) { 158; CHECK-LABEL: @test_vstrdq_scatter_base_p_s64( 159; CHECK-NEXT: entry: 160; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 161; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) 162; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) 163; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) 164; CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64> [[ADDR:%.*]], i32 888, <2 x i64> [[VALUE:%.*]], <2 x i1> [[TMP3]]) 165; CHECK-NEXT: ret void 166; 167entry: 168 %0 = zext i16 %p to i32 169 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 170 call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <2 x i64> %value, <4 x i1> %1) 171 ret void 172} 173 174define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(ptr %addr, <2 x i64> %value, i16 zeroext %p) { 175; CHECK-LABEL: @test_vstrdq_scatter_base_wb_p_s64( 176; CHECK-NEXT: entry: 177; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[ADDR:%.*]], align 8 178; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 179; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) 180; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) 181; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]]) 182; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> [[TMP0]], i32 248, <2 x i64> [[VALUE:%.*]], <2 x i1> [[TMP4]]) 183; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[ADDR]], align 8 184; CHECK-NEXT: ret void 185; 186entry: 187 %0 = load <2 x i64>, ptr %addr, align 8 188 %1 = zext i16 %p to i32 189 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 190 %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 248, <2 x i64> %value, <4 x i1> %2) 191 store <2 x i64> %3, ptr %addr, align 8 192 ret void 193} 194 195define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_s64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { 196; CHECK-LABEL: @test_vstrdq_scatter_offset_p_s64( 197; CHECK-NEXT: entry: 198; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 199; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) 200; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) 201; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) 202; CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0, <2 x i1> [[TMP3]]) 203; CHECK-NEXT: ret void 204; 205entry: 206 %0 = zext i16 %p to i32 207 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 208 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1) 209 ret void 210} 211 212define <8 x i16> @test_vcx1q_m(<2 x i64> %inactive, i16 zeroext %p) { 213; CHECK-LABEL: @test_vcx1q_m( 214; CHECK-NEXT: entry: 215; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 216; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) 217; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) 218; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) 219; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.cde.vcx1q.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], i32 1111, <2 x i1> [[TMP3]]) 220; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <8 x i16> 221; CHECK-NEXT: ret <8 x i16> [[TMP5]] 222; 223entry: 224 %0 = zext i16 %p to i32 225 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 226 %2 = tail call <2 x i64> @llvm.arm.cde.vcx1q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, i32 1111, <4 x i1> %1) 227 %3 = bitcast <2 x i64> %2 to <8 x i16> 228 ret <8 x i16> %3 229} 230 231define <16 x i8> @test_vcx1qa_m(<2 x i64> %acc, i16 zeroext %p) { 232; CHECK-LABEL: @test_vcx1qa_m( 233; CHECK-NEXT: entry: 234; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 235; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) 236; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) 237; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) 238; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.cde.vcx1qa.predicated.v2i64.v2i1(i32 0, <2 x i64> [[ACC:%.*]], i32 1112, <2 x i1> [[TMP3]]) 239; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 240; CHECK-NEXT: ret <16 x i8> [[TMP5]] 241; 242entry: 243 %0 = zext i16 %p to i32 244 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 245 %2 = tail call <2 x i64> @llvm.arm.cde.vcx1qa.predicated.v2i64.v4i1(i32 0, <2 x i64> %acc, i32 1112, <4 x i1> %1) 246 %3 = bitcast <2 x i64> %2 to <16 x i8> 247 ret <16 x i8> %3 248} 249 250define <4 x i32> @test_vcx2q_m(<2 x i64> %inactive, <4 x float> %n, i16 zeroext %p) { 251; CHECK-LABEL: @test_vcx2q_m( 252; CHECK-NEXT: entry: 253; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[N:%.*]] to <16 x i8> 254; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 255; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) 256; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) 257; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]]) 258; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.arm.cde.vcx2q.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], <16 x i8> [[TMP0]], i32 111, <2 x i1> [[TMP4]]) 259; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <4 x i32> 260; CHECK-NEXT: ret <4 x i32> [[TMP6]] 261; 262entry: 263 %0 = bitcast <4 x float> %n to <16 x i8> 264 %1 = zext i16 %p to i32 265 %2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 266 %3 = tail call <2 x i64> @llvm.arm.cde.vcx2q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, i32 111, <4 x i1> %2) 267 %4 = bitcast <2 x i64> %3 to <4 x i32> 268 ret <4 x i32> %4 269} 270 271define <4 x float> @test_vcx2qa_m(<2 x i64> %acc, <8 x half> %n, i16 zeroext %p) { 272; CHECK-LABEL: @test_vcx2qa_m( 273; CHECK-NEXT: entry: 274; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[N:%.*]] to <16 x i8> 275; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 276; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) 277; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) 278; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]]) 279; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.arm.cde.vcx2qa.predicated.v2i64.v2i1(i32 0, <2 x i64> [[ACC:%.*]], <16 x i8> [[TMP0]], i32 112, <2 x i1> [[TMP4]]) 280; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <4 x float> 281; CHECK-NEXT: ret <4 x float> [[TMP6]] 282; 283entry: 284 %0 = bitcast <8 x half> %n to <16 x i8> 285 %1 = zext i16 %p to i32 286 %2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 287 %3 = tail call <2 x i64> @llvm.arm.cde.vcx2qa.predicated.v2i64.v4i1(i32 0, <2 x i64> %acc, <16 x i8> %0, i32 112, <4 x i1> %2) 288 %4 = bitcast <2 x i64> %3 to <4 x float> 289 ret <4 x float> %4 290} 291 292define <2 x i64> @test_vcx3q_m(<2 x i64> %inactive, <4 x float> %n, <16 x i8> %m, i16 zeroext %p) { 293; CHECK-LABEL: @test_vcx3q_m( 294; CHECK-NEXT: entry: 295; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[N:%.*]] to <16 x i8> 296; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 297; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) 298; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) 299; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]]) 300; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], <16 x i8> [[TMP0]], <16 x i8> [[M:%.*]], i32 11, <2 x i1> [[TMP4]]) 301; CHECK-NEXT: ret <2 x i64> [[TMP5]] 302; 303entry: 304 %0 = bitcast <4 x float> %n to <16 x i8> 305 %1 = zext i16 %p to i32 306 %2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 307 %3 = tail call <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, <16 x i8> %m, i32 11, <4 x i1> %2) 308 ret <2 x i64> %3 309} 310 311define <8 x half> @test_vcx3qa_m(<2 x i64> %inactive, <8 x half> %n, <4 x i32> %m, i16 zeroext %p) { 312; CHECK-LABEL: @test_vcx3qa_m( 313; CHECK-NEXT: entry: 314; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[N:%.*]] to <16 x i8> 315; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[M:%.*]] to <16 x i8> 316; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 317; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]]) 318; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) 319; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP4]]) 320; CHECK-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.arm.cde.vcx3qa.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 12, <2 x i1> [[TMP5]]) 321; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <8 x half> 322; CHECK-NEXT: ret <8 x half> [[TMP7]] 323; 324entry: 325 %0 = bitcast <8 x half> %n to <16 x i8> 326 %1 = bitcast <4 x i32> %m to <16 x i8> 327 %2 = zext i16 %p to i32 328 %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) 329 %4 = tail call <2 x i64> @llvm.arm.cde.vcx3qa.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, <16 x i8> %1, i32 12, <4 x i1> %3) 330 %5 = bitcast <2 x i64> %4 to <8 x half> 331 ret <8 x half> %5 332} 333