xref: /llvm-project/llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll (revision af81d8ec224900de7a4d2c96a675269499b55a0c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s
3
4; This tests that various ands, sexts, and zexts (and other operations)
5; operating on vscale or the SVE count instructions can be eliminated
6; (via demanded bits) due to their known limited range.
7
8; On AArch64 vscale can be at most 16 (for a 2048-bit vector).
9; The counting instructions (sans multiplier) have a value of at most 256
10; (for a 2048-bit vector of i8s).
11
12define i32 @vscale_and_elimination() vscale_range(1,16) {
13; CHECK-LABEL: vscale_and_elimination:
14; CHECK:       // %bb.0:
15; CHECK-NEXT:    rdvl x8, #1
16; CHECK-NEXT:    lsr x8, x8, #4
17; CHECK-NEXT:    and w9, w8, #0x1c
18; CHECK-NEXT:    add w0, w8, w9
19; CHECK-NEXT:    ret
20  %vscale = call i32 @llvm.vscale.i32()
21  %and_redundant = and i32 %vscale, 31
22  %and_required = and i32 %vscale, 17179869180
23  %result = add i32 %and_redundant, %and_required
24  ret i32 %result
25}
26
27define i64 @cntb_and_elimination() {
28; CHECK-LABEL: cntb_and_elimination:
29; CHECK:       // %bb.0:
30; CHECK-NEXT:    cntb x8
31; CHECK-NEXT:    and x9, x8, #0x1fc
32; CHECK-NEXT:    add x0, x8, x9
33; CHECK-NEXT:    ret
34  %cntb = call i64 @llvm.aarch64.sve.cntb(i32 31)
35  %and_redundant = and i64 %cntb, 511
36  %and_required = and i64 %cntb, 17179869180
37  %result = add i64 %and_redundant, %and_required
38  ret i64 %result
39}
40
41define i64 @cnth_and_elimination() {
42; CHECK-LABEL: cnth_and_elimination:
43; CHECK:       // %bb.0:
44; CHECK-NEXT:    cnth x8
45; CHECK-NEXT:    and x9, x8, #0xfc
46; CHECK-NEXT:    add x0, x8, x9
47; CHECK-NEXT:    ret
48  %cnth = call i64 @llvm.aarch64.sve.cnth(i32 31)
49  %and_redundant = and i64 %cnth, 1023
50  %and_required = and i64 %cnth, 17179869180
51  %result = add i64 %and_redundant, %and_required
52  ret i64 %result
53}
54
55define i64 @cntw_and_elimination() {
56; CHECK-LABEL: cntw_and_elimination:
57; CHECK:       // %bb.0:
58; CHECK-NEXT:    cntw x8
59; CHECK-NEXT:    and x9, x8, #0x7c
60; CHECK-NEXT:    add x0, x8, x9
61; CHECK-NEXT:    ret
62  %cntw = call i64 @llvm.aarch64.sve.cntw(i32 31)
63  %and_redundant = and i64 %cntw, 127
64  %and_required = and i64 %cntw, 17179869180
65  %result = add i64 %and_redundant, %and_required
66  ret i64 %result
67}
68
69define i64 @cntd_and_elimination() {
70; CHECK-LABEL: cntd_and_elimination:
71; CHECK:       // %bb.0:
72; CHECK-NEXT:    cntd x8
73; CHECK-NEXT:    and x9, x8, #0x3c
74; CHECK-NEXT:    add x0, x8, x9
75; CHECK-NEXT:    ret
76  %cntd = call i64 @llvm.aarch64.sve.cntd(i32 31)
77  %and_redundant = and i64 %cntd, 63
78  %and_required = and i64 %cntd, 17179869180
79  %result = add i64 %and_redundant, %and_required
80  ret i64 %result
81}
82
83define i64 @vscale_trunc_zext() vscale_range(1,16) {
84; CHECK-LABEL: vscale_trunc_zext:
85; CHECK:       // %bb.0:
86; CHECK-NEXT:    rdvl x8, #1
87; CHECK-NEXT:    lsr x0, x8, #4
88; CHECK-NEXT:    ret
89  %vscale = call i32 @llvm.vscale.i32()
90  %zext = zext i32 %vscale to i64
91  ret i64 %zext
92}
93
94define i64 @vscale_trunc_sext() vscale_range(1,16) {
95; CHECK-LABEL: vscale_trunc_sext:
96; CHECK:       // %bb.0:
97; CHECK-NEXT:    rdvl x8, #1
98; CHECK-NEXT:    lsr x0, x8, #4
99; CHECK-NEXT:    ret
100  %vscale = call i32 @llvm.vscale.i32()
101  %sext = sext i32 %vscale to i64
102  ret i64 %sext
103}
104
105define i64 @count_bytes_trunc_zext() {
106; CHECK-LABEL: count_bytes_trunc_zext:
107; CHECK:       // %bb.0:
108; CHECK-NEXT:    cntb x0
109; CHECK-NEXT:    ret
110  %cnt = call i64 @llvm.aarch64.sve.cntb(i32 31)
111  %trunc = trunc i64 %cnt to i32
112  %zext = zext i32 %trunc to i64
113  ret i64 %zext
114}
115
116define i64 @count_halfs_trunc_zext() {
117; CHECK-LABEL: count_halfs_trunc_zext:
118; CHECK:       // %bb.0:
119; CHECK-NEXT:    cnth x0
120; CHECK-NEXT:    ret
121  %cnt = call i64 @llvm.aarch64.sve.cnth(i32 31)
122  %trunc = trunc i64 %cnt to i32
123  %zext = zext i32 %trunc to i64
124  ret i64 %zext
125}
126
127define i64 @count_words_trunc_zext() {
128; CHECK-LABEL: count_words_trunc_zext:
129; CHECK:       // %bb.0:
130; CHECK-NEXT:    cntw x0
131; CHECK-NEXT:    ret
132  %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31)
133  %trunc = trunc i64 %cnt to i32
134  %zext = zext i32 %trunc to i64
135  ret i64 %zext
136}
137
138define i64 @count_doubles_trunc_zext() {
139; CHECK-LABEL: count_doubles_trunc_zext:
140; CHECK:       // %bb.0:
141; CHECK-NEXT:    cntd x0
142; CHECK-NEXT:    ret
143  %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31)
144  %trunc = trunc i64 %cnt to i32
145  %zext = zext i32 %trunc to i64
146  ret i64 %zext
147}
148
149define i64 @count_bytes_trunc_sext() {
150; CHECK-LABEL: count_bytes_trunc_sext:
151; CHECK:       // %bb.0:
152; CHECK-NEXT:    cntb x0
153; CHECK-NEXT:    ret
154  %cnt = call i64 @llvm.aarch64.sve.cntb(i32 31)
155  %trunc = trunc i64 %cnt to i32
156  %sext = sext i32 %trunc to i64
157  ret i64 %sext
158}
159
160define i64 @count_halfs_trunc_sext() {
161; CHECK-LABEL: count_halfs_trunc_sext:
162; CHECK:       // %bb.0:
163; CHECK-NEXT:    cnth x0
164; CHECK-NEXT:    ret
165  %cnt = call i64 @llvm.aarch64.sve.cnth(i32 31)
166  %trunc = trunc i64 %cnt to i32
167  %sext = sext i32 %trunc to i64
168  ret i64 %sext
169}
170
171define i64 @count_words_trunc_sext() {
172; CHECK-LABEL: count_words_trunc_sext:
173; CHECK:       // %bb.0:
174; CHECK-NEXT:    cntw x0
175; CHECK-NEXT:    ret
176  %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31)
177  %trunc = trunc i64 %cnt to i32
178  %sext = sext i32 %trunc to i64
179  ret i64 %sext
180}
181
182define i64 @count_doubles_trunc_sext() {
183; CHECK-LABEL: count_doubles_trunc_sext:
184; CHECK:       // %bb.0:
185; CHECK-NEXT:    cntd x0
186; CHECK-NEXT:    ret
187  %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31)
188  %trunc = trunc i64 %cnt to i32
189  %sext = sext i32 %trunc to i64
190  ret i64 %sext
191}
192
193define i32 @vscale_with_multiplier() vscale_range(1,16) {
194; CHECK-LABEL: vscale_with_multiplier:
195; CHECK:       // %bb.0:
196; CHECK-NEXT:    rdvl x8, #1
197; CHECK-NEXT:    mov w9, #5 // =0x5
198; CHECK-NEXT:    lsr x8, x8, #4
199; CHECK-NEXT:    mul x8, x8, x9
200; CHECK-NEXT:    and w9, w8, #0x3f
201; CHECK-NEXT:    add w0, w8, w9
202; CHECK-NEXT:    ret
203  %vscale = call i32 @llvm.vscale.i32()
204  %mul = mul i32 %vscale, 5
205  %and_redundant = and i32 %mul, 127
206  %and_required = and i32 %mul, 63
207  %result = add i32 %and_redundant, %and_required
208  ret i32 %result
209}
210
211define i32 @vscale_with_negative_multiplier() vscale_range(1,16) {
212; CHECK-LABEL: vscale_with_negative_multiplier:
213; CHECK:       // %bb.0:
214; CHECK-NEXT:    rdvl x8, #1
215; CHECK-NEXT:    mov x9, #-5 // =0xfffffffffffffffb
216; CHECK-NEXT:    lsr x8, x8, #4
217; CHECK-NEXT:    mul x8, x8, x9
218; CHECK-NEXT:    and w9, w8, #0xffffffc0
219; CHECK-NEXT:    add w0, w8, w9
220; CHECK-NEXT:    ret
221  %vscale = call i32 @llvm.vscale.i32()
222  %mul = mul i32 %vscale, -5
223  %or_redundant = or i32 %mul, 4294967168
224  %or_required = and i32 %mul, 4294967232
225  %result = add i32 %or_redundant, %or_required
226  ret i32 %result
227}
228
229define i32 @pow2_vscale_with_negative_multiplier() vscale_range(1,16) {
230; CHECK-LABEL: pow2_vscale_with_negative_multiplier:
231; CHECK:       // %bb.0:
232; CHECK-NEXT:    cntd x8
233; CHECK-NEXT:    neg x9, x8
234; CHECK-NEXT:    orr w9, w9, #0xfffffff0
235; CHECK-NEXT:    sub w0, w9, w8
236; CHECK-NEXT:    ret
237  %vscale = call i32 @llvm.vscale.i32()
238  %mul = mul i32 %vscale, -2
239  %or_redundant = or i32 %mul, 4294967264
240  %or_required = or i32 %mul, 4294967280
241  %result = add i32 %or_redundant, %or_required
242  ret i32 %result
243}
244
245declare i32 @llvm.vscale.i32()
246declare i64 @llvm.aarch64.sve.cntb(i32 %pattern)
247declare i64 @llvm.aarch64.sve.cnth(i32 %pattern)
248declare i64 @llvm.aarch64.sve.cntw(i32 %pattern)
249declare i64 @llvm.aarch64.sve.cntd(i32 %pattern)
250