xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll (revision 90b83a6d6caa651ea32987c94955ed18fffcb40c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s | FileCheck %s
3
4target triple = "aarch64-unknown-linux-gnu"
5
6; INCP
7
8define i32 @cntp_add_all_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
9; CHECK-LABEL: cntp_add_all_active_nxv16i1:
10; CHECK:       // %bb.0:
11; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
12; CHECK-NEXT:    incp x0, p0.b
13; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
14; CHECK-NEXT:    ret
15  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
16  %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %pg)
17  %3 = trunc i64 %2 to i32
18  %add = add i32 %3, %x
19  ret i32 %add
20}
21
22define i32 @cntp_add_all_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
23; CHECK-LABEL: cntp_add_all_active_nxv8i1:
24; CHECK:       // %bb.0:
25; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
26; CHECK-NEXT:    incp x0, p0.h
27; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
28; CHECK-NEXT:    ret
29  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
30  %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %1, <vscale x 8 x i1> %pg)
31  %3 = trunc i64 %2 to i32
32  %add = add i32 %3, %x
33  ret i32 %add
34}
35
36define i32 @cntp_add_all_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
37; CHECK-LABEL: cntp_add_all_active_nxv4i1:
38; CHECK:       // %bb.0:
39; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
40; CHECK-NEXT:    incp x0, p0.s
41; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
42; CHECK-NEXT:    ret
43  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
44  %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %pg)
45  %3 = trunc i64 %2 to i32
46  %add = add i32 %3, %x
47  ret i32 %add
48}
49
50define i32 @cntp_add_all_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 {
51; CHECK-LABEL: cntp_add_all_active_nxv2i1:
52; CHECK:       // %bb.0:
53; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
54; CHECK-NEXT:    incp x0, p0.d
55; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
56; CHECK-NEXT:    ret
57  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
58  %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %1, <vscale x 2 x i1> %pg)
59  %3 = trunc i64 %2 to i32
60  %add = add i32 %3, %x
61  ret i32 %add
62}
63
64define i32 @cntp_add_all_active_nxv8i1_via_cast(i32 %x, <vscale x 8 x i1> %pg) #0 {
65; CHECK-LABEL: cntp_add_all_active_nxv8i1_via_cast:
66; CHECK:       // %bb.0:
67; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
68; CHECK-NEXT:    incp x0, p0.h
69; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
70; CHECK-NEXT:    ret
71  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
72  %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1)
73  %3 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %2, <vscale x 8 x i1> %pg)
74  %4 = trunc i64 %3 to i32
75  %add = add i32 %4, %x
76  ret i32 %add
77}
78
79define i64 @cntp_add_all_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i1> %pg) #0 {
80; CHECK-LABEL: cntp_add_all_active_nxv2i1_multiuse:
81; CHECK:       // %bb.0:
82; CHECK-NEXT:    ptrue p1.d
83; CHECK-NEXT:    cntp x8, p1, p0.d
84; CHECK-NEXT:    add w9, w8, w0
85; CHECK-NEXT:    madd x0, x8, x1, x9
86; CHECK-NEXT:    ret
87  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
88  %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %1, <vscale x 2 x i1> %pg)
89  %3 = trunc i64 %2 to i32
90  %add = add i32 %3, %x
91  %add.ext = zext i32 %add to i64
92  %mul = mul i64 %2, %y
93  %res = add i64 %add.ext, %mul
94  ret i64 %res
95}
96
97define i32 @cntp_add_same_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
98; CHECK-LABEL: cntp_add_same_active_nxv16i1:
99; CHECK:       // %bb.0:
100; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
101; CHECK-NEXT:    incp x0, p0.b
102; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
103; CHECK-NEXT:    ret
104  %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %pg)
105  %2 = trunc i64 %1 to i32
106  %add = add i32 %2, %x
107  ret i32 %add
108}
109
110define i32 @cntp_add_same_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
111; CHECK-LABEL: cntp_add_same_active_nxv8i1:
112; CHECK:       // %bb.0:
113; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
114; CHECK-NEXT:    incp x0, p0.h
115; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
116; CHECK-NEXT:    ret
117  %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg)
118  %2 = trunc i64 %1 to i32
119  %add = add i32 %2, %x
120  ret i32 %add
121}
122
123define i32 @cntp_add_same_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
124; CHECK-LABEL: cntp_add_same_active_nxv4i1:
125; CHECK:       // %bb.0:
126; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
127; CHECK-NEXT:    incp x0, p0.s
128; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
129; CHECK-NEXT:    ret
130  %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg)
131  %2 = trunc i64 %1 to i32
132  %add = add i32 %2, %x
133  ret i32 %add
134}
135
136define i32 @cntp_add_same_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 {
137; CHECK-LABEL: cntp_add_same_active_nxv2i1:
138; CHECK:       // %bb.0:
139; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
140; CHECK-NEXT:    incp x0, p0.d
141; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
142; CHECK-NEXT:    ret
143  %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg)
144  %2 = trunc i64 %1 to i32
145  %add = add i32 %2, %x
146  ret i32 %add
147}
148
149define i64 @cntp_add_same_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i1> %pg) #0 {
150; CHECK-LABEL: cntp_add_same_active_nxv2i1_multiuse:
151; CHECK:       // %bb.0:
152; CHECK-NEXT:    cntp x8, p0, p0.d
153; CHECK-NEXT:    add w9, w8, w0
154; CHECK-NEXT:    madd x0, x8, x1, x9
155; CHECK-NEXT:    ret
156  %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg)
157  %2 = trunc i64 %1 to i32
158  %add = add i32 %2, %x
159  %add.ext = zext i32 %add to i64
160  %mul = mul i64 %1, %y
161  %res = add i64 %add.ext, %mul
162  ret i64 %res
163}
164
165; DECP
166
167define i32 @cntp_sub_all_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
168; CHECK-LABEL: cntp_sub_all_active_nxv16i1:
169; CHECK:       // %bb.0:
170; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
171; CHECK-NEXT:    decp x0, p0.b
172; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
173; CHECK-NEXT:    ret
174  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
175  %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %pg)
176  %3 = trunc i64 %2 to i32
177  %sub = sub i32 %x, %3
178  ret i32 %sub
179}
180
181define i32 @cntp_sub_all_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
182; CHECK-LABEL: cntp_sub_all_active_nxv8i1:
183; CHECK:       // %bb.0:
184; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
185; CHECK-NEXT:    decp x0, p0.h
186; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
187; CHECK-NEXT:    ret
188  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
189  %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %1, <vscale x 8 x i1> %pg)
190  %3 = trunc i64 %2 to i32
191  %sub = sub i32 %x, %3
192  ret i32 %sub
193}
194
195define i32 @cntp_sub_all_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
196; CHECK-LABEL: cntp_sub_all_active_nxv4i1:
197; CHECK:       // %bb.0:
198; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
199; CHECK-NEXT:    decp x0, p0.s
200; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
201; CHECK-NEXT:    ret
202  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
203  %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %pg)
204  %3 = trunc i64 %2 to i32
205  %sub = sub i32 %x, %3
206  ret i32 %sub
207}
208
209define i32 @cntp_sub_all_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 {
210; CHECK-LABEL: cntp_sub_all_active_nxv2i1:
211; CHECK:       // %bb.0:
212; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
213; CHECK-NEXT:    decp x0, p0.d
214; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
215; CHECK-NEXT:    ret
216  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
217  %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %1, <vscale x 2 x i1> %pg)
218  %3 = trunc i64 %2 to i32
219  %sub = sub i32 %x, %3
220  ret i32 %sub
221}
222
223define i32 @cntp_sub_all_active_nxv8i1_via_cast(i32 %x, <vscale x 8 x i1> %pg) #0 {
224; CHECK-LABEL: cntp_sub_all_active_nxv8i1_via_cast:
225; CHECK:       // %bb.0:
226; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
227; CHECK-NEXT:    decp x0, p0.h
228; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
229; CHECK-NEXT:    ret
230  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
231  %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1)
232  %3 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %2, <vscale x 8 x i1> %pg)
233  %4 = trunc i64 %3 to i32
234  %sub = sub i32 %x, %4
235  ret i32 %sub
236}
237
238define i64 @cntp_sub_all_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i1> %pg) #0 {
239; CHECK-LABEL: cntp_sub_all_active_nxv2i1_multiuse:
240; CHECK:       // %bb.0:
241; CHECK-NEXT:    ptrue p1.d
242; CHECK-NEXT:    cntp x8, p1, p0.d
243; CHECK-NEXT:    sub w9, w8, w0
244; CHECK-NEXT:    madd x0, x8, x1, x9
245; CHECK-NEXT:    ret
246  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
247  %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %1, <vscale x 2 x i1> %pg)
248  %3 = trunc i64 %2 to i32
249  %sub = sub i32 %3, %x
250  %sub.ext = zext i32 %sub to i64
251  %mul = mul i64 %2, %y
252  %res = add i64 %sub.ext, %mul
253  ret i64 %res
254}
255
256define i32 @cntp_sub_same_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
257; CHECK-LABEL: cntp_sub_same_active_nxv16i1:
258; CHECK:       // %bb.0:
259; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
260; CHECK-NEXT:    decp x0, p0.b
261; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
262; CHECK-NEXT:    ret
263  %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %pg)
264  %2 = trunc i64 %1 to i32
265  %sub = sub i32 %x, %2
266  ret i32 %sub
267}
268
269define i32 @cntp_sub_same_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
270; CHECK-LABEL: cntp_sub_same_active_nxv8i1:
271; CHECK:       // %bb.0:
272; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
273; CHECK-NEXT:    decp x0, p0.h
274; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
275; CHECK-NEXT:    ret
276  %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg)
277  %2 = trunc i64 %1 to i32
278  %sub = sub i32 %x, %2
279  ret i32 %sub
280}
281
282define i32 @cntp_sub_same_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
283; CHECK-LABEL: cntp_sub_same_active_nxv4i1:
284; CHECK:       // %bb.0:
285; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
286; CHECK-NEXT:    decp x0, p0.s
287; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
288; CHECK-NEXT:    ret
289  %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg)
290  %2 = trunc i64 %1 to i32
291  %sub = sub i32 %x, %2
292  ret i32 %sub
293}
294
295define i32 @cntp_sub_same_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 {
296; CHECK-LABEL: cntp_sub_same_active_nxv2i1:
297; CHECK:       // %bb.0:
298; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
299; CHECK-NEXT:    decp x0, p0.d
300; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
301; CHECK-NEXT:    ret
302  %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg)
303  %2 = trunc i64 %1 to i32
304  %sub = sub i32 %x, %2
305  ret i32 %sub
306}
307
308define i64 @cntp_sub_same_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i1> %pg) #0 {
309; CHECK-LABEL: cntp_sub_same_active_nxv2i1_multiuse:
310; CHECK:       // %bb.0:
311; CHECK-NEXT:    cntp x8, p0, p0.d
312; CHECK-NEXT:    sub w9, w8, w0
313; CHECK-NEXT:    madd x0, x8, x1, x9
314; CHECK-NEXT:    ret
315  %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg)
316  %2 = trunc i64 %1 to i32
317  %sub = sub i32 %2, %x
318  %sub.ext = zext i32 %sub to i64
319  %mul = mul i64 %1, %y
320  %res = add i64 %sub.ext, %mul
321  ret i64 %res
322}
323
324declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
325declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
326declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
327
328declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
329declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
330declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
331declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
332
333declare i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
334declare i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
335declare i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
336declare i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
337
338attributes #0 = { "target-features"="+sve" }
339