xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-intrinsics/v2i1-upgrade.ll (revision 41e48838216a3c2546f6dee3c406a77fb666ddc0)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -o - %s | FileCheck %s
3
4declare <4 x i1> @llvm.arm.mve.vctp64(i32)
5declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
6declare i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1>)
7declare <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, i32, <4 x i1>, <2 x i64>)
8declare <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <2 x i64>)
9
10declare <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>)
11declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>)
12declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1(ptr, <2 x i64>, i32, i32, i32, <4 x i1>)
13declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>)
14declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>)
15declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1(ptr, <2 x i64>, <2 x i64>, i32, i32, <4 x i1>)
16
17declare <2 x i64> @llvm.arm.cde.vcx1q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, i32 immarg, <4 x i1>)
18declare <2 x i64> @llvm.arm.cde.vcx1qa.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, i32 immarg, <4 x i1>)
19declare <2 x i64> @llvm.arm.cde.vcx2q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, i32 immarg, <4 x i1>)
20declare <2 x i64> @llvm.arm.cde.vcx2qa.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, i32 immarg, <4 x i1>)
21declare <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>)
22declare <2 x i64> @llvm.arm.cde.vcx3qa.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>)
23
24define arm_aapcs_vfpcc zeroext i16 @test_vctp64q(i32 %a) {
25; CHECK-LABEL: @test_vctp64q(
26; CHECK-NEXT:  entry:
27; CHECK-NEXT:    [[TMP0:%.*]] = call <2 x i1> @llvm.arm.mve.vctp64(i32 [[A:%.*]])
28; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v2i1(<2 x i1> [[TMP0]])
29; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
30; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]])
31; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
32; CHECK-NEXT:    ret i16 [[TMP4]]
33;
34entry:
35  %0 = call <4 x i1> @llvm.arm.mve.vctp64(i32 %a)
36  %1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %0)
37  %2 = trunc i32 %1 to i16
38  ret i16 %2
39}
40
41define arm_aapcs_vfpcc zeroext i16 @test_vctp64q_m(i32 %a, i16 zeroext %p) {
42; CHECK-LABEL: @test_vctp64q_m(
43; CHECK-NEXT:  entry:
44; CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
45; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
46; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i1> @llvm.arm.mve.vctp64(i32 [[A:%.*]])
47; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v2i1(<2 x i1> [[TMP2]])
48; CHECK-NEXT:    [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
49; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i1> [[TMP1]], [[TMP4]]
50; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP5]])
51; CHECK-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
52; CHECK-NEXT:    ret i16 [[TMP7]]
53;
54entry:
55  %0 = zext i16 %p to i32
56  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
57  %2 = call <4 x i1> @llvm.arm.mve.vctp64(i32 %a)
58  %3 = and <4 x i1> %1, %2
59  %4 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %3)
60  %5 = trunc i32 %4 to i16
61  ret i16 %5
62}
63
64define arm_aapcs_vfpcc <2 x i64> @test_vmullbq_int_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
65; CHECK-LABEL: @test_vmullbq_int_m_s32(
66; CHECK-NEXT:  entry:
67; CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
68; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
69; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
70; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
71; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v2i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, i32 0, <2 x i1> [[TMP3]], <2 x i64> [[INACTIVE:%.*]])
72; CHECK-NEXT:    ret <2 x i64> [[TMP4]]
73;
74entry:
75  %0 = zext i16 %p to i32
76  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
77  %2 = tail call <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 0, <4 x i1> %1, <2 x i64> %inactive)
78  ret <2 x i64> %2
79}
80
81define arm_aapcs_vfpcc <2 x i64> @test_vqdmullbq_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
82; CHECK-LABEL: @test_vqdmullbq_m_s32(
83; CHECK-NEXT:  entry:
84; CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
85; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
86; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
87; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
88; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v2i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <2 x i1> [[TMP3]], <2 x i64> [[INACTIVE:%.*]])
89; CHECK-NEXT:    ret <2 x i64> [[TMP4]]
90;
91entry:
92  %0 = zext i16 %p to i32
93  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
94  %2 = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1, <2 x i64> %inactive)
95  ret <2 x i64> %2
96}
97
98define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_s64(<2 x i64> %addr, i16 zeroext %p) {
99; CHECK-LABEL: @test_vldrdq_gather_base_z_s64(
100; CHECK-NEXT:  entry:
101; CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
102; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
103; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
104; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
105; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v2i1(<2 x i64> [[ADDR:%.*]], i32 888, <2 x i1> [[TMP3]])
106; CHECK-NEXT:    ret <2 x i64> [[TMP4]]
107;
108entry:
109  %0 = zext i16 %p to i32
110  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
111  %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <4 x i1> %1)
112  ret <2 x i64> %2
113}
114
115define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(ptr %addr, i16 zeroext %p) {
116; CHECK-LABEL: @test_vldrdq_gather_base_wb_z_s64(
117; CHECK-NEXT:  entry:
118; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[ADDR:%.*]], align 8
119; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
120; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
121; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]])
122; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]])
123; CHECK-NEXT:    [[TMP5:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> [[TMP0]], i32 664, <2 x i1> [[TMP4]])
124; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP5]], 1
125; CHECK-NEXT:    store <2 x i64> [[TMP6]], ptr [[ADDR]], align 8
126; CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP5]], 0
127; CHECK-NEXT:    ret <2 x i64> [[TMP7]]
128;
129entry:
130  %0 = load <2 x i64>, ptr %addr, align 8
131  %1 = zext i16 %p to i32
132  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
133  %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 664, <4 x i1> %2)
134  %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1
135  store <2 x i64> %4, ptr %addr, align 8
136  %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0
137  ret <2 x i64> %5
138}
139
140define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_s64(ptr %base, <2 x i64> %offset, i16 zeroext %p) {
141; CHECK-LABEL: @test_vldrdq_gather_offset_z_s64(
142; CHECK-NEXT:  entry:
143; CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
144; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
145; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
146; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
147; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 0, i32 0, <2 x i1> [[TMP3]])
148; CHECK-NEXT:    ret <2 x i64> [[TMP4]]
149;
150entry:
151  %0 = zext i16 %p to i32
152  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
153  %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1(ptr %base, <2 x i64> %offset, i32 64, i32 0, i32 0, <4 x i1> %1)
154  ret <2 x i64> %2
155}
156
157define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) {
158; CHECK-LABEL: @test_vstrdq_scatter_base_p_s64(
159; CHECK-NEXT:  entry:
160; CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
161; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
162; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
163; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
164; CHECK-NEXT:    call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64> [[ADDR:%.*]], i32 888, <2 x i64> [[VALUE:%.*]], <2 x i1> [[TMP3]])
165; CHECK-NEXT:    ret void
166;
167entry:
168  %0 = zext i16 %p to i32
169  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
170  call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <2 x i64> %value, <4 x i1> %1)
171  ret void
172}
173
174define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(ptr %addr, <2 x i64> %value, i16 zeroext %p) {
175; CHECK-LABEL: @test_vstrdq_scatter_base_wb_p_s64(
176; CHECK-NEXT:  entry:
177; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[ADDR:%.*]], align 8
178; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
179; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
180; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]])
181; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]])
182; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> [[TMP0]], i32 248, <2 x i64> [[VALUE:%.*]], <2 x i1> [[TMP4]])
183; CHECK-NEXT:    store <2 x i64> [[TMP5]], ptr [[ADDR]], align 8
184; CHECK-NEXT:    ret void
185;
186entry:
187  %0 = load <2 x i64>, ptr %addr, align 8
188  %1 = zext i16 %p to i32
189  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
190  %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 248, <2 x i64> %value, <4 x i1> %2)
191  store <2 x i64> %3, ptr %addr, align 8
192  ret void
193}
194
195define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_s64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
196; CHECK-LABEL: @test_vstrdq_scatter_offset_p_s64(
197; CHECK-NEXT:  entry:
198; CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
199; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
200; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
201; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
202; CHECK-NEXT:    call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0, <2 x i1> [[TMP3]])
203; CHECK-NEXT:    ret void
204;
205entry:
206  %0 = zext i16 %p to i32
207  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
208  call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1)
209  ret void
210}
211
212define <8 x i16> @test_vcx1q_m(<2 x i64> %inactive, i16 zeroext %p) {
213; CHECK-LABEL: @test_vcx1q_m(
214; CHECK-NEXT:  entry:
215; CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
216; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
217; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
218; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
219; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i64> @llvm.arm.cde.vcx1q.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], i32 1111, <2 x i1> [[TMP3]])
220; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <8 x i16>
221; CHECK-NEXT:    ret <8 x i16> [[TMP5]]
222;
223entry:
224  %0 = zext i16 %p to i32
225  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
226  %2 = tail call <2 x i64> @llvm.arm.cde.vcx1q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, i32 1111, <4 x i1> %1)
227  %3 = bitcast <2 x i64> %2 to <8 x i16>
228  ret <8 x i16> %3
229}
230
231define <16 x i8> @test_vcx1qa_m(<2 x i64> %acc, i16 zeroext %p) {
232; CHECK-LABEL: @test_vcx1qa_m(
233; CHECK-NEXT:  entry:
234; CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
235; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
236; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
237; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
238; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i64> @llvm.arm.cde.vcx1qa.predicated.v2i64.v2i1(i32 0, <2 x i64> [[ACC:%.*]], i32 1112, <2 x i1> [[TMP3]])
239; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8>
240; CHECK-NEXT:    ret <16 x i8> [[TMP5]]
241;
242entry:
243  %0 = zext i16 %p to i32
244  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
245  %2 = tail call <2 x i64> @llvm.arm.cde.vcx1qa.predicated.v2i64.v4i1(i32 0, <2 x i64> %acc, i32 1112, <4 x i1> %1)
246  %3 = bitcast <2 x i64> %2 to <16 x i8>
247  ret <16 x i8> %3
248}
249
250define <4 x i32> @test_vcx2q_m(<2 x i64> %inactive, <4 x float> %n, i16 zeroext %p) {
251; CHECK-LABEL: @test_vcx2q_m(
252; CHECK-NEXT:  entry:
253; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[N:%.*]] to <16 x i8>
254; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
255; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
256; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]])
257; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]])
258; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x i64> @llvm.arm.cde.vcx2q.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], <16 x i8> [[TMP0]], i32 111, <2 x i1> [[TMP4]])
259; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <4 x i32>
260; CHECK-NEXT:    ret <4 x i32> [[TMP6]]
261;
262entry:
263  %0 = bitcast <4 x float> %n to <16 x i8>
264  %1 = zext i16 %p to i32
265  %2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
266  %3 = tail call <2 x i64> @llvm.arm.cde.vcx2q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, i32 111, <4 x i1> %2)
267  %4 = bitcast <2 x i64> %3 to <4 x i32>
268  ret <4 x i32> %4
269}
270
271define <4 x float> @test_vcx2qa_m(<2 x i64> %acc, <8 x half> %n, i16 zeroext %p) {
272; CHECK-LABEL: @test_vcx2qa_m(
273; CHECK-NEXT:  entry:
274; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[N:%.*]] to <16 x i8>
275; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
276; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
277; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]])
278; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]])
279; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x i64> @llvm.arm.cde.vcx2qa.predicated.v2i64.v2i1(i32 0, <2 x i64> [[ACC:%.*]], <16 x i8> [[TMP0]], i32 112, <2 x i1> [[TMP4]])
280; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <4 x float>
281; CHECK-NEXT:    ret <4 x float> [[TMP6]]
282;
283entry:
284  %0 = bitcast <8 x half> %n to <16 x i8>
285  %1 = zext i16 %p to i32
286  %2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
287  %3 = tail call <2 x i64> @llvm.arm.cde.vcx2qa.predicated.v2i64.v4i1(i32 0, <2 x i64> %acc, <16 x i8> %0, i32 112, <4 x i1> %2)
288  %4 = bitcast <2 x i64> %3 to <4 x float>
289  ret <4 x float> %4
290}
291
292define <2 x i64> @test_vcx3q_m(<2 x i64> %inactive, <4 x float> %n, <16 x i8> %m, i16 zeroext %p) {
293; CHECK-LABEL: @test_vcx3q_m(
294; CHECK-NEXT:  entry:
295; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[N:%.*]] to <16 x i8>
296; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
297; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
298; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]])
299; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]])
300; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], <16 x i8> [[TMP0]], <16 x i8> [[M:%.*]], i32 11, <2 x i1> [[TMP4]])
301; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
302;
303entry:
304  %0 = bitcast <4 x float> %n to <16 x i8>
305  %1 = zext i16 %p to i32
306  %2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
307  %3 = tail call <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, <16 x i8> %m, i32 11, <4 x i1> %2)
308  ret <2 x i64> %3
309}
310
311define <8 x half> @test_vcx3qa_m(<2 x i64> %inactive, <8 x half> %n, <4 x i32> %m, i16 zeroext %p) {
312; CHECK-LABEL: @test_vcx3qa_m(
313; CHECK-NEXT:  entry:
314; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[N:%.*]] to <16 x i8>
315; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[M:%.*]] to <16 x i8>
316; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
317; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
318; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
319; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP4]])
320; CHECK-NEXT:    [[TMP6:%.*]] = call <2 x i64> @llvm.arm.cde.vcx3qa.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 12, <2 x i1> [[TMP5]])
321; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <8 x half>
322; CHECK-NEXT:    ret <8 x half> [[TMP7]]
323;
324entry:
325  %0 = bitcast <8 x half> %n to <16 x i8>
326  %1 = bitcast <4 x i32> %m to <16 x i8>
327  %2 = zext i16 %p to i32
328  %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
329  %4 = tail call <2 x i64> @llvm.arm.cde.vcx3qa.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, <16 x i8> %1, i32 12, <4 x i1> %3)
330  %5 = bitcast <2 x i64> %4 to <8 x half>
331  ret <8 x half> %5
332}
333