xref: /llvm-project/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll (revision d78fe84d49b3db675ae4c502ead38ce9e5c2539f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \
3; RUN:   | FileCheck %s -check-prefixes=CHECK,RV32I
4; RUN: llc -mtriple=riscv32 -global-isel -mattr=+zbb -verify-machineinstrs < %s \
5; RUN:   | FileCheck %s -check-prefixes=CHECK,RV32ZBB
6
7declare i32 @llvm.ctlz.i32(i32, i1)
8
9define i32 @ctlz_i32(i32 %a) nounwind {
10; RV32I-LABEL: ctlz_i32:
11; RV32I:       # %bb.0:
12; RV32I-NEXT:    beqz a0, .LBB0_2
13; RV32I-NEXT:  # %bb.1: # %cond.false
14; RV32I-NEXT:    srli a1, a0, 1
15; RV32I-NEXT:    lui a2, 349525
16; RV32I-NEXT:    or a0, a0, a1
17; RV32I-NEXT:    addi a1, a2, 1365
18; RV32I-NEXT:    srli a2, a0, 2
19; RV32I-NEXT:    or a0, a0, a2
20; RV32I-NEXT:    srli a2, a0, 4
21; RV32I-NEXT:    or a0, a0, a2
22; RV32I-NEXT:    srli a2, a0, 8
23; RV32I-NEXT:    or a0, a0, a2
24; RV32I-NEXT:    srli a2, a0, 16
25; RV32I-NEXT:    or a0, a0, a2
26; RV32I-NEXT:    srli a2, a0, 1
27; RV32I-NEXT:    and a1, a2, a1
28; RV32I-NEXT:    lui a2, 209715
29; RV32I-NEXT:    addi a2, a2, 819
30; RV32I-NEXT:    sub a0, a0, a1
31; RV32I-NEXT:    srli a1, a0, 2
32; RV32I-NEXT:    and a0, a0, a2
33; RV32I-NEXT:    and a1, a1, a2
34; RV32I-NEXT:    lui a2, 61681
35; RV32I-NEXT:    addi a2, a2, -241
36; RV32I-NEXT:    add a0, a1, a0
37; RV32I-NEXT:    srli a1, a0, 4
38; RV32I-NEXT:    add a0, a1, a0
39; RV32I-NEXT:    and a0, a0, a2
40; RV32I-NEXT:    slli a1, a0, 8
41; RV32I-NEXT:    add a0, a0, a1
42; RV32I-NEXT:    slli a1, a0, 16
43; RV32I-NEXT:    add a0, a0, a1
44; RV32I-NEXT:    srli a0, a0, 24
45; RV32I-NEXT:    li a1, 32
46; RV32I-NEXT:    sub a0, a1, a0
47; RV32I-NEXT:    ret
48; RV32I-NEXT:  .LBB0_2:
49; RV32I-NEXT:    li a0, 32
50; RV32I-NEXT:    ret
51;
52; RV32ZBB-LABEL: ctlz_i32:
53; RV32ZBB:       # %bb.0:
54; RV32ZBB-NEXT:    clz a0, a0
55; RV32ZBB-NEXT:    ret
56  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
57  ret i32 %1
58}
59
60declare i64 @llvm.ctlz.i64(i64, i1)
61
62define i64 @ctlz_i64(i64 %a) nounwind {
63; RV32I-LABEL: ctlz_i64:
64; RV32I:       # %bb.0:
65; RV32I-NEXT:    lui a2, 349525
66; RV32I-NEXT:    lui a3, 209715
67; RV32I-NEXT:    lui a6, 61681
68; RV32I-NEXT:    addi a5, a2, 1365
69; RV32I-NEXT:    addi a4, a3, 819
70; RV32I-NEXT:    addi a3, a6, -241
71; RV32I-NEXT:    li a2, 32
72; RV32I-NEXT:    beqz a1, .LBB1_2
73; RV32I-NEXT:  # %bb.1:
74; RV32I-NEXT:    srli a0, a1, 1
75; RV32I-NEXT:    or a0, a1, a0
76; RV32I-NEXT:    srli a1, a0, 2
77; RV32I-NEXT:    or a0, a0, a1
78; RV32I-NEXT:    srli a1, a0, 4
79; RV32I-NEXT:    or a0, a0, a1
80; RV32I-NEXT:    srli a1, a0, 8
81; RV32I-NEXT:    or a0, a0, a1
82; RV32I-NEXT:    srli a1, a0, 16
83; RV32I-NEXT:    or a0, a0, a1
84; RV32I-NEXT:    srli a1, a0, 1
85; RV32I-NEXT:    and a1, a1, a5
86; RV32I-NEXT:    sub a0, a0, a1
87; RV32I-NEXT:    srli a1, a0, 2
88; RV32I-NEXT:    and a1, a1, a4
89; RV32I-NEXT:    and a0, a0, a4
90; RV32I-NEXT:    add a0, a1, a0
91; RV32I-NEXT:    srli a1, a0, 4
92; RV32I-NEXT:    add a0, a1, a0
93; RV32I-NEXT:    and a0, a0, a3
94; RV32I-NEXT:    slli a1, a0, 8
95; RV32I-NEXT:    add a0, a0, a1
96; RV32I-NEXT:    slli a1, a0, 16
97; RV32I-NEXT:    add a0, a0, a1
98; RV32I-NEXT:    srli a0, a0, 24
99; RV32I-NEXT:    sub a0, a2, a0
100; RV32I-NEXT:    li a1, 0
101; RV32I-NEXT:    ret
102; RV32I-NEXT:  .LBB1_2:
103; RV32I-NEXT:    srli a1, a0, 1
104; RV32I-NEXT:    or a0, a0, a1
105; RV32I-NEXT:    srli a1, a0, 2
106; RV32I-NEXT:    or a0, a0, a1
107; RV32I-NEXT:    srli a1, a0, 4
108; RV32I-NEXT:    or a0, a0, a1
109; RV32I-NEXT:    srli a1, a0, 8
110; RV32I-NEXT:    or a0, a0, a1
111; RV32I-NEXT:    srli a1, a0, 16
112; RV32I-NEXT:    or a0, a0, a1
113; RV32I-NEXT:    srli a1, a0, 1
114; RV32I-NEXT:    and a1, a1, a5
115; RV32I-NEXT:    sub a0, a0, a1
116; RV32I-NEXT:    srli a1, a0, 2
117; RV32I-NEXT:    and a1, a1, a4
118; RV32I-NEXT:    and a0, a0, a4
119; RV32I-NEXT:    add a0, a1, a0
120; RV32I-NEXT:    srli a1, a0, 4
121; RV32I-NEXT:    add a0, a1, a0
122; RV32I-NEXT:    and a0, a0, a3
123; RV32I-NEXT:    slli a1, a0, 8
124; RV32I-NEXT:    add a0, a0, a1
125; RV32I-NEXT:    slli a1, a0, 16
126; RV32I-NEXT:    add a0, a0, a1
127; RV32I-NEXT:    srli a0, a0, 24
128; RV32I-NEXT:    sub a2, a2, a0
129; RV32I-NEXT:    addi a0, a2, 32
130; RV32I-NEXT:    li a1, 0
131; RV32I-NEXT:    ret
132;
133; RV32ZBB-LABEL: ctlz_i64:
134; RV32ZBB:       # %bb.0:
135; RV32ZBB-NEXT:    beqz a1, .LBB1_2
136; RV32ZBB-NEXT:  # %bb.1:
137; RV32ZBB-NEXT:    clz a0, a1
138; RV32ZBB-NEXT:    li a1, 0
139; RV32ZBB-NEXT:    ret
140; RV32ZBB-NEXT:  .LBB1_2:
141; RV32ZBB-NEXT:    clz a0, a0
142; RV32ZBB-NEXT:    addi a0, a0, 32
143; RV32ZBB-NEXT:    li a1, 0
144; RV32ZBB-NEXT:    ret
145  %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
146  ret i64 %1
147}
148
149declare i32 @llvm.cttz.i32(i32, i1)
150
151define i32 @cttz_i32(i32 %a) nounwind {
152; RV32I-LABEL: cttz_i32:
153; RV32I:       # %bb.0:
154; RV32I-NEXT:    beqz a0, .LBB2_2
155; RV32I-NEXT:  # %bb.1: # %cond.false
156; RV32I-NEXT:    not a1, a0
157; RV32I-NEXT:    addi a0, a0, -1
158; RV32I-NEXT:    lui a2, 349525
159; RV32I-NEXT:    and a0, a1, a0
160; RV32I-NEXT:    addi a1, a2, 1365
161; RV32I-NEXT:    srli a2, a0, 1
162; RV32I-NEXT:    and a1, a2, a1
163; RV32I-NEXT:    lui a2, 209715
164; RV32I-NEXT:    addi a2, a2, 819
165; RV32I-NEXT:    sub a0, a0, a1
166; RV32I-NEXT:    srli a1, a0, 2
167; RV32I-NEXT:    and a0, a0, a2
168; RV32I-NEXT:    and a1, a1, a2
169; RV32I-NEXT:    lui a2, 61681
170; RV32I-NEXT:    add a0, a1, a0
171; RV32I-NEXT:    srli a1, a0, 4
172; RV32I-NEXT:    add a0, a1, a0
173; RV32I-NEXT:    addi a1, a2, -241
174; RV32I-NEXT:    and a0, a0, a1
175; RV32I-NEXT:    slli a1, a0, 8
176; RV32I-NEXT:    add a0, a0, a1
177; RV32I-NEXT:    slli a1, a0, 16
178; RV32I-NEXT:    add a0, a0, a1
179; RV32I-NEXT:    srli a0, a0, 24
180; RV32I-NEXT:    ret
181; RV32I-NEXT:  .LBB2_2:
182; RV32I-NEXT:    li a0, 32
183; RV32I-NEXT:    ret
184;
185; RV32ZBB-LABEL: cttz_i32:
186; RV32ZBB:       # %bb.0:
187; RV32ZBB-NEXT:    ctz a0, a0
188; RV32ZBB-NEXT:    ret
189  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
190  ret i32 %1
191}
192
193declare i64 @llvm.cttz.i64(i64, i1)
194
195define i64 @cttz_i64(i64 %a) nounwind {
196; RV32I-LABEL: cttz_i64:
197; RV32I:       # %bb.0:
198; RV32I-NEXT:    lui a2, 349525
199; RV32I-NEXT:    lui a3, 209715
200; RV32I-NEXT:    lui a5, 61681
201; RV32I-NEXT:    addi a4, a2, 1365
202; RV32I-NEXT:    addi a3, a3, 819
203; RV32I-NEXT:    addi a2, a5, -241
204; RV32I-NEXT:    beqz a0, .LBB3_2
205; RV32I-NEXT:  # %bb.1:
206; RV32I-NEXT:    not a1, a0
207; RV32I-NEXT:    addi a0, a0, -1
208; RV32I-NEXT:    and a0, a1, a0
209; RV32I-NEXT:    srli a1, a0, 1
210; RV32I-NEXT:    and a1, a1, a4
211; RV32I-NEXT:    sub a0, a0, a1
212; RV32I-NEXT:    srli a1, a0, 2
213; RV32I-NEXT:    and a1, a1, a3
214; RV32I-NEXT:    and a0, a0, a3
215; RV32I-NEXT:    add a0, a1, a0
216; RV32I-NEXT:    srli a1, a0, 4
217; RV32I-NEXT:    add a0, a1, a0
218; RV32I-NEXT:    and a0, a0, a2
219; RV32I-NEXT:    slli a1, a0, 8
220; RV32I-NEXT:    add a0, a0, a1
221; RV32I-NEXT:    slli a1, a0, 16
222; RV32I-NEXT:    add a0, a0, a1
223; RV32I-NEXT:    srli a0, a0, 24
224; RV32I-NEXT:    li a1, 0
225; RV32I-NEXT:    ret
226; RV32I-NEXT:  .LBB3_2:
227; RV32I-NEXT:    not a0, a1
228; RV32I-NEXT:    addi a1, a1, -1
229; RV32I-NEXT:    and a0, a0, a1
230; RV32I-NEXT:    srli a1, a0, 1
231; RV32I-NEXT:    and a1, a1, a4
232; RV32I-NEXT:    sub a0, a0, a1
233; RV32I-NEXT:    srli a1, a0, 2
234; RV32I-NEXT:    and a1, a1, a3
235; RV32I-NEXT:    and a0, a0, a3
236; RV32I-NEXT:    add a0, a1, a0
237; RV32I-NEXT:    srli a1, a0, 4
238; RV32I-NEXT:    add a0, a1, a0
239; RV32I-NEXT:    and a0, a0, a2
240; RV32I-NEXT:    slli a1, a0, 8
241; RV32I-NEXT:    add a0, a0, a1
242; RV32I-NEXT:    slli a1, a0, 16
243; RV32I-NEXT:    add a0, a0, a1
244; RV32I-NEXT:    srli a0, a0, 24
245; RV32I-NEXT:    addi a0, a0, 32
246; RV32I-NEXT:    li a1, 0
247; RV32I-NEXT:    ret
248;
249; RV32ZBB-LABEL: cttz_i64:
250; RV32ZBB:       # %bb.0:
251; RV32ZBB-NEXT:    beqz a0, .LBB3_2
252; RV32ZBB-NEXT:  # %bb.1:
253; RV32ZBB-NEXT:    ctz a0, a0
254; RV32ZBB-NEXT:    li a1, 0
255; RV32ZBB-NEXT:    ret
256; RV32ZBB-NEXT:  .LBB3_2:
257; RV32ZBB-NEXT:    ctz a0, a1
258; RV32ZBB-NEXT:    addi a0, a0, 32
259; RV32ZBB-NEXT:    li a1, 0
260; RV32ZBB-NEXT:    ret
261  %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
262  ret i64 %1
263}
264
265declare i32 @llvm.ctpop.i32(i32)
266
267define i32 @ctpop_i32(i32 %a) nounwind {
268; RV32I-LABEL: ctpop_i32:
269; RV32I:       # %bb.0:
270; RV32I-NEXT:    srli a1, a0, 1
271; RV32I-NEXT:    lui a2, 349525
272; RV32I-NEXT:    addi a2, a2, 1365
273; RV32I-NEXT:    and a1, a1, a2
274; RV32I-NEXT:    lui a2, 209715
275; RV32I-NEXT:    addi a2, a2, 819
276; RV32I-NEXT:    sub a0, a0, a1
277; RV32I-NEXT:    srli a1, a0, 2
278; RV32I-NEXT:    and a0, a0, a2
279; RV32I-NEXT:    and a1, a1, a2
280; RV32I-NEXT:    lui a2, 61681
281; RV32I-NEXT:    add a0, a1, a0
282; RV32I-NEXT:    srli a1, a0, 4
283; RV32I-NEXT:    add a0, a1, a0
284; RV32I-NEXT:    addi a1, a2, -241
285; RV32I-NEXT:    and a0, a0, a1
286; RV32I-NEXT:    slli a1, a0, 8
287; RV32I-NEXT:    add a0, a0, a1
288; RV32I-NEXT:    slli a1, a0, 16
289; RV32I-NEXT:    add a0, a0, a1
290; RV32I-NEXT:    srli a0, a0, 24
291; RV32I-NEXT:    ret
292;
293; RV32ZBB-LABEL: ctpop_i32:
294; RV32ZBB:       # %bb.0:
295; RV32ZBB-NEXT:    cpop a0, a0
296; RV32ZBB-NEXT:    ret
297  %1 = call i32 @llvm.ctpop.i32(i32 %a)
298  ret i32 %1
299}
300
301declare i64 @llvm.ctpop.i64(i64)
302
303define i64 @ctpop_i64(i64 %a) nounwind {
304; RV32I-LABEL: ctpop_i64:
305; RV32I:       # %bb.0:
306; RV32I-NEXT:    srli a2, a0, 1
307; RV32I-NEXT:    lui a3, 349525
308; RV32I-NEXT:    lui a4, 209715
309; RV32I-NEXT:    srli a5, a1, 1
310; RV32I-NEXT:    addi a3, a3, 1365
311; RV32I-NEXT:    and a2, a2, a3
312; RV32I-NEXT:    and a3, a5, a3
313; RV32I-NEXT:    lui a5, 61681
314; RV32I-NEXT:    addi a4, a4, 819
315; RV32I-NEXT:    addi a5, a5, -241
316; RV32I-NEXT:    sub a0, a0, a2
317; RV32I-NEXT:    sub a1, a1, a3
318; RV32I-NEXT:    srli a2, a0, 2
319; RV32I-NEXT:    and a0, a0, a4
320; RV32I-NEXT:    srli a3, a1, 2
321; RV32I-NEXT:    and a1, a1, a4
322; RV32I-NEXT:    and a2, a2, a4
323; RV32I-NEXT:    and a3, a3, a4
324; RV32I-NEXT:    add a0, a2, a0
325; RV32I-NEXT:    add a1, a3, a1
326; RV32I-NEXT:    srli a2, a0, 4
327; RV32I-NEXT:    srli a3, a1, 4
328; RV32I-NEXT:    add a0, a2, a0
329; RV32I-NEXT:    add a1, a3, a1
330; RV32I-NEXT:    and a0, a0, a5
331; RV32I-NEXT:    and a1, a1, a5
332; RV32I-NEXT:    slli a2, a0, 8
333; RV32I-NEXT:    slli a3, a1, 8
334; RV32I-NEXT:    add a0, a0, a2
335; RV32I-NEXT:    add a1, a1, a3
336; RV32I-NEXT:    slli a2, a0, 16
337; RV32I-NEXT:    slli a3, a1, 16
338; RV32I-NEXT:    add a0, a0, a2
339; RV32I-NEXT:    add a1, a1, a3
340; RV32I-NEXT:    srli a0, a0, 24
341; RV32I-NEXT:    srli a1, a1, 24
342; RV32I-NEXT:    add a0, a1, a0
343; RV32I-NEXT:    li a1, 0
344; RV32I-NEXT:    ret
345;
346; RV32ZBB-LABEL: ctpop_i64:
347; RV32ZBB:       # %bb.0:
348; RV32ZBB-NEXT:    cpop a0, a0
349; RV32ZBB-NEXT:    cpop a1, a1
350; RV32ZBB-NEXT:    add a0, a1, a0
351; RV32ZBB-NEXT:    li a1, 0
352; RV32ZBB-NEXT:    ret
353  %1 = call i64 @llvm.ctpop.i64(i64 %a)
354  ret i64 %1
355}
356
357define i1 @ctpop_i64_ugt_two(i64 %a) nounwind {
358; RV32I-LABEL: ctpop_i64_ugt_two:
359; RV32I:       # %bb.0:
360; RV32I-NEXT:    beqz zero, .LBB6_2
361; RV32I-NEXT:  # %bb.1:
362; RV32I-NEXT:    sltiu a0, zero, 0
363; RV32I-NEXT:    ret
364; RV32I-NEXT:  .LBB6_2:
365; RV32I-NEXT:    srli a2, a0, 1
366; RV32I-NEXT:    lui a3, 349525
367; RV32I-NEXT:    lui a4, 209715
368; RV32I-NEXT:    srli a5, a1, 1
369; RV32I-NEXT:    addi a3, a3, 1365
370; RV32I-NEXT:    and a2, a2, a3
371; RV32I-NEXT:    and a3, a5, a3
372; RV32I-NEXT:    lui a5, 61681
373; RV32I-NEXT:    addi a4, a4, 819
374; RV32I-NEXT:    addi a5, a5, -241
375; RV32I-NEXT:    sub a0, a0, a2
376; RV32I-NEXT:    sub a1, a1, a3
377; RV32I-NEXT:    srli a2, a0, 2
378; RV32I-NEXT:    and a0, a0, a4
379; RV32I-NEXT:    srli a3, a1, 2
380; RV32I-NEXT:    and a1, a1, a4
381; RV32I-NEXT:    and a2, a2, a4
382; RV32I-NEXT:    and a3, a3, a4
383; RV32I-NEXT:    add a0, a2, a0
384; RV32I-NEXT:    add a1, a3, a1
385; RV32I-NEXT:    srli a2, a0, 4
386; RV32I-NEXT:    srli a3, a1, 4
387; RV32I-NEXT:    add a0, a2, a0
388; RV32I-NEXT:    add a1, a3, a1
389; RV32I-NEXT:    and a0, a0, a5
390; RV32I-NEXT:    and a1, a1, a5
391; RV32I-NEXT:    slli a2, a0, 8
392; RV32I-NEXT:    slli a3, a1, 8
393; RV32I-NEXT:    add a0, a0, a2
394; RV32I-NEXT:    add a1, a1, a3
395; RV32I-NEXT:    slli a2, a0, 16
396; RV32I-NEXT:    slli a3, a1, 16
397; RV32I-NEXT:    add a0, a0, a2
398; RV32I-NEXT:    add a1, a1, a3
399; RV32I-NEXT:    srli a0, a0, 24
400; RV32I-NEXT:    srli a1, a1, 24
401; RV32I-NEXT:    add a0, a1, a0
402; RV32I-NEXT:    sltiu a0, a0, 2
403; RV32I-NEXT:    ret
404;
405; RV32ZBB-LABEL: ctpop_i64_ugt_two:
406; RV32ZBB:       # %bb.0:
407; RV32ZBB-NEXT:    beqz zero, .LBB6_2
408; RV32ZBB-NEXT:  # %bb.1:
409; RV32ZBB-NEXT:    sltiu a0, zero, 0
410; RV32ZBB-NEXT:    ret
411; RV32ZBB-NEXT:  .LBB6_2:
412; RV32ZBB-NEXT:    cpop a0, a0
413; RV32ZBB-NEXT:    cpop a1, a1
414; RV32ZBB-NEXT:    add a0, a1, a0
415; RV32ZBB-NEXT:    sltiu a0, a0, 2
416; RV32ZBB-NEXT:    ret
417  %1 = call i64 @llvm.ctpop.i64(i64 %a)
418  %2 = icmp ult i64 %1, 2
419  ret i1 %2
420}
421
422define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
423; RV32I-LABEL: ctpop_i64_ugt_one:
424; RV32I:       # %bb.0:
425; RV32I-NEXT:    beqz zero, .LBB7_2
426; RV32I-NEXT:  # %bb.1:
427; RV32I-NEXT:    snez a0, zero
428; RV32I-NEXT:    ret
429; RV32I-NEXT:  .LBB7_2:
430; RV32I-NEXT:    srli a2, a0, 1
431; RV32I-NEXT:    lui a3, 349525
432; RV32I-NEXT:    lui a4, 209715
433; RV32I-NEXT:    srli a5, a1, 1
434; RV32I-NEXT:    addi a3, a3, 1365
435; RV32I-NEXT:    and a2, a2, a3
436; RV32I-NEXT:    and a3, a5, a3
437; RV32I-NEXT:    lui a5, 61681
438; RV32I-NEXT:    addi a4, a4, 819
439; RV32I-NEXT:    addi a5, a5, -241
440; RV32I-NEXT:    sub a0, a0, a2
441; RV32I-NEXT:    sub a1, a1, a3
442; RV32I-NEXT:    srli a2, a0, 2
443; RV32I-NEXT:    and a0, a0, a4
444; RV32I-NEXT:    srli a3, a1, 2
445; RV32I-NEXT:    and a1, a1, a4
446; RV32I-NEXT:    and a2, a2, a4
447; RV32I-NEXT:    and a3, a3, a4
448; RV32I-NEXT:    add a0, a2, a0
449; RV32I-NEXT:    add a1, a3, a1
450; RV32I-NEXT:    srli a2, a0, 4
451; RV32I-NEXT:    srli a3, a1, 4
452; RV32I-NEXT:    add a0, a2, a0
453; RV32I-NEXT:    add a1, a3, a1
454; RV32I-NEXT:    and a0, a0, a5
455; RV32I-NEXT:    and a1, a1, a5
456; RV32I-NEXT:    slli a2, a0, 8
457; RV32I-NEXT:    slli a3, a1, 8
458; RV32I-NEXT:    add a0, a0, a2
459; RV32I-NEXT:    add a1, a1, a3
460; RV32I-NEXT:    slli a2, a0, 16
461; RV32I-NEXT:    slli a3, a1, 16
462; RV32I-NEXT:    add a0, a0, a2
463; RV32I-NEXT:    add a1, a1, a3
464; RV32I-NEXT:    srli a0, a0, 24
465; RV32I-NEXT:    srli a1, a1, 24
466; RV32I-NEXT:    add a0, a1, a0
467; RV32I-NEXT:    sltiu a0, a0, 2
468; RV32I-NEXT:    xori a0, a0, 1
469; RV32I-NEXT:    ret
470;
471; RV32ZBB-LABEL: ctpop_i64_ugt_one:
472; RV32ZBB:       # %bb.0:
473; RV32ZBB-NEXT:    beqz zero, .LBB7_2
474; RV32ZBB-NEXT:  # %bb.1:
475; RV32ZBB-NEXT:    snez a0, zero
476; RV32ZBB-NEXT:    ret
477; RV32ZBB-NEXT:  .LBB7_2:
478; RV32ZBB-NEXT:    cpop a0, a0
479; RV32ZBB-NEXT:    cpop a1, a1
480; RV32ZBB-NEXT:    add a0, a1, a0
481; RV32ZBB-NEXT:    sltiu a0, a0, 2
482; RV32ZBB-NEXT:    xori a0, a0, 1
483; RV32ZBB-NEXT:    ret
484  %1 = call i64 @llvm.ctpop.i64(i64 %a)
485  %2 = icmp ugt i64 %1, 1
486  ret i1 %2
487}
488
489define i1 @ctpop_i64_eq_one(i64 %a) nounwind {
490; RV32I-LABEL: ctpop_i64_eq_one:
491; RV32I:       # %bb.0:
492; RV32I-NEXT:    srli a2, a0, 1
493; RV32I-NEXT:    lui a3, 349525
494; RV32I-NEXT:    lui a4, 209715
495; RV32I-NEXT:    srli a5, a1, 1
496; RV32I-NEXT:    addi a3, a3, 1365
497; RV32I-NEXT:    and a2, a2, a3
498; RV32I-NEXT:    and a3, a5, a3
499; RV32I-NEXT:    lui a5, 61681
500; RV32I-NEXT:    addi a4, a4, 819
501; RV32I-NEXT:    addi a5, a5, -241
502; RV32I-NEXT:    sub a0, a0, a2
503; RV32I-NEXT:    sub a1, a1, a3
504; RV32I-NEXT:    srli a2, a0, 2
505; RV32I-NEXT:    and a0, a0, a4
506; RV32I-NEXT:    srli a3, a1, 2
507; RV32I-NEXT:    and a1, a1, a4
508; RV32I-NEXT:    and a2, a2, a4
509; RV32I-NEXT:    and a3, a3, a4
510; RV32I-NEXT:    add a0, a2, a0
511; RV32I-NEXT:    add a1, a3, a1
512; RV32I-NEXT:    srli a2, a0, 4
513; RV32I-NEXT:    srli a3, a1, 4
514; RV32I-NEXT:    add a0, a2, a0
515; RV32I-NEXT:    add a1, a3, a1
516; RV32I-NEXT:    and a0, a0, a5
517; RV32I-NEXT:    and a1, a1, a5
518; RV32I-NEXT:    slli a2, a0, 8
519; RV32I-NEXT:    slli a3, a1, 8
520; RV32I-NEXT:    add a0, a0, a2
521; RV32I-NEXT:    add a1, a1, a3
522; RV32I-NEXT:    slli a2, a0, 16
523; RV32I-NEXT:    slli a3, a1, 16
524; RV32I-NEXT:    add a0, a0, a2
525; RV32I-NEXT:    add a1, a1, a3
526; RV32I-NEXT:    srli a0, a0, 24
527; RV32I-NEXT:    srli a1, a1, 24
528; RV32I-NEXT:    add a0, a1, a0
529; RV32I-NEXT:    xori a0, a0, 1
530; RV32I-NEXT:    seqz a0, a0
531; RV32I-NEXT:    ret
532;
533; RV32ZBB-LABEL: ctpop_i64_eq_one:
534; RV32ZBB:       # %bb.0:
535; RV32ZBB-NEXT:    cpop a0, a0
536; RV32ZBB-NEXT:    cpop a1, a1
537; RV32ZBB-NEXT:    add a0, a1, a0
538; RV32ZBB-NEXT:    xori a0, a0, 1
539; RV32ZBB-NEXT:    seqz a0, a0
540; RV32ZBB-NEXT:    ret
541  %1 = call i64 @llvm.ctpop.i64(i64 %a)
542  %2 = icmp eq i64 %1, 1
543  ret i1 %2
544}
545
546define i1 @ctpop_i64_ne_one(i64 %a) nounwind {
547; RV32I-LABEL: ctpop_i64_ne_one:
548; RV32I:       # %bb.0:
549; RV32I-NEXT:    srli a2, a0, 1
550; RV32I-NEXT:    lui a3, 349525
551; RV32I-NEXT:    lui a4, 209715
552; RV32I-NEXT:    srli a5, a1, 1
553; RV32I-NEXT:    addi a3, a3, 1365
554; RV32I-NEXT:    and a2, a2, a3
555; RV32I-NEXT:    and a3, a5, a3
556; RV32I-NEXT:    lui a5, 61681
557; RV32I-NEXT:    addi a4, a4, 819
558; RV32I-NEXT:    addi a5, a5, -241
559; RV32I-NEXT:    sub a0, a0, a2
560; RV32I-NEXT:    sub a1, a1, a3
561; RV32I-NEXT:    srli a2, a0, 2
562; RV32I-NEXT:    and a0, a0, a4
563; RV32I-NEXT:    srli a3, a1, 2
564; RV32I-NEXT:    and a1, a1, a4
565; RV32I-NEXT:    and a2, a2, a4
566; RV32I-NEXT:    and a3, a3, a4
567; RV32I-NEXT:    add a0, a2, a0
568; RV32I-NEXT:    add a1, a3, a1
569; RV32I-NEXT:    srli a2, a0, 4
570; RV32I-NEXT:    srli a3, a1, 4
571; RV32I-NEXT:    add a0, a2, a0
572; RV32I-NEXT:    add a1, a3, a1
573; RV32I-NEXT:    and a0, a0, a5
574; RV32I-NEXT:    and a1, a1, a5
575; RV32I-NEXT:    slli a2, a0, 8
576; RV32I-NEXT:    slli a3, a1, 8
577; RV32I-NEXT:    add a0, a0, a2
578; RV32I-NEXT:    add a1, a1, a3
579; RV32I-NEXT:    slli a2, a0, 16
580; RV32I-NEXT:    slli a3, a1, 16
581; RV32I-NEXT:    add a0, a0, a2
582; RV32I-NEXT:    add a1, a1, a3
583; RV32I-NEXT:    srli a0, a0, 24
584; RV32I-NEXT:    srli a1, a1, 24
585; RV32I-NEXT:    add a0, a1, a0
586; RV32I-NEXT:    xori a0, a0, 1
587; RV32I-NEXT:    snez a0, a0
588; RV32I-NEXT:    ret
589;
590; RV32ZBB-LABEL: ctpop_i64_ne_one:
591; RV32ZBB:       # %bb.0:
592; RV32ZBB-NEXT:    cpop a0, a0
593; RV32ZBB-NEXT:    cpop a1, a1
594; RV32ZBB-NEXT:    add a0, a1, a0
595; RV32ZBB-NEXT:    xori a0, a0, 1
596; RV32ZBB-NEXT:    snez a0, a0
597; RV32ZBB-NEXT:    ret
598  %1 = call i64 @llvm.ctpop.i64(i64 %a)
599  %2 = icmp ne i64 %1, 1
600  ret i1 %2
601}
602
603define i32 @sextb_i32(i32 %a) nounwind {
604; RV32I-LABEL: sextb_i32:
605; RV32I:       # %bb.0:
606; RV32I-NEXT:    slli a0, a0, 24
607; RV32I-NEXT:    srai a0, a0, 24
608; RV32I-NEXT:    ret
609;
610; RV32ZBB-LABEL: sextb_i32:
611; RV32ZBB:       # %bb.0:
612; RV32ZBB-NEXT:    sext.b a0, a0
613; RV32ZBB-NEXT:    ret
614  %shl = shl i32 %a, 24
615  %shr = ashr exact i32 %shl, 24
616  ret i32 %shr
617}
618
619; FIXME: Combine back to back srai.
620define i64 @sextb_i64(i64 %a) nounwind {
621; RV32I-LABEL: sextb_i64:
622; RV32I:       # %bb.0:
623; RV32I-NEXT:    slli a1, a0, 24
624; RV32I-NEXT:    srai a0, a1, 24
625; RV32I-NEXT:    srai a1, a1, 31
626; RV32I-NEXT:    ret
627;
628; RV32ZBB-LABEL: sextb_i64:
629; RV32ZBB:       # %bb.0:
630; RV32ZBB-NEXT:    sext.b a0, a0
631; RV32ZBB-NEXT:    srai a1, a0, 31
632; RV32ZBB-NEXT:    ret
633  %shl = shl i64 %a, 56
634  %shr = ashr exact i64 %shl, 56
635  ret i64 %shr
636}
637
638define i32 @sexth_i32(i32 %a) nounwind {
639; RV32I-LABEL: sexth_i32:
640; RV32I:       # %bb.0:
641; RV32I-NEXT:    slli a0, a0, 16
642; RV32I-NEXT:    srai a0, a0, 16
643; RV32I-NEXT:    ret
644;
645; RV32ZBB-LABEL: sexth_i32:
646; RV32ZBB:       # %bb.0:
647; RV32ZBB-NEXT:    sext.h a0, a0
648; RV32ZBB-NEXT:    ret
649  %shl = shl i32 %a, 16
650  %shr = ashr exact i32 %shl, 16
651  ret i32 %shr
652}
653
654; FIXME: Combine back to back srai.
655define i64 @sexth_i64(i64 %a) nounwind {
656; RV32I-LABEL: sexth_i64:
657; RV32I:       # %bb.0:
658; RV32I-NEXT:    slli a1, a0, 16
659; RV32I-NEXT:    srai a0, a1, 16
660; RV32I-NEXT:    srai a1, a1, 31
661; RV32I-NEXT:    ret
662;
663; RV32ZBB-LABEL: sexth_i64:
664; RV32ZBB:       # %bb.0:
665; RV32ZBB-NEXT:    sext.h a0, a0
666; RV32ZBB-NEXT:    srai a1, a0, 31
667; RV32ZBB-NEXT:    ret
668  %shl = shl i64 %a, 48
669  %shr = ashr exact i64 %shl, 48
670  ret i64 %shr
671}
672
673define i32 @min_i32(i32 %a, i32 %b) nounwind {
674; RV32I-LABEL: min_i32:
675; RV32I:       # %bb.0:
676; RV32I-NEXT:    blt a0, a1, .LBB14_2
677; RV32I-NEXT:  # %bb.1:
678; RV32I-NEXT:    mv a0, a1
679; RV32I-NEXT:  .LBB14_2:
680; RV32I-NEXT:    ret
681;
682; RV32ZBB-LABEL: min_i32:
683; RV32ZBB:       # %bb.0:
684; RV32ZBB-NEXT:    min a0, a0, a1
685; RV32ZBB-NEXT:    ret
686  %cmp = icmp slt i32 %a, %b
687  %cond = select i1 %cmp, i32 %a, i32 %b
688  ret i32 %cond
689}
690
691; As we are not matching directly i64 code patterns on RV32 some i64 patterns
692; don't have yet any matching bit manipulation instructions on RV32.
693; This test is presented here in case future expansions of the Bitmanip
694; extensions introduce instructions suitable for this pattern.
695
696define i64 @min_i64(i64 %a, i64 %b) nounwind {
697; CHECK-LABEL: min_i64:
698; CHECK:       # %bb.0:
699; CHECK-NEXT:    beq a1, a3, .LBB15_2
700; CHECK-NEXT:  # %bb.1:
701; CHECK-NEXT:    slt a4, a1, a3
702; CHECK-NEXT:    beqz a4, .LBB15_3
703; CHECK-NEXT:    j .LBB15_4
704; CHECK-NEXT:  .LBB15_2:
705; CHECK-NEXT:    sltu a4, a0, a2
706; CHECK-NEXT:    bnez a4, .LBB15_4
707; CHECK-NEXT:  .LBB15_3:
708; CHECK-NEXT:    mv a0, a2
709; CHECK-NEXT:    mv a1, a3
710; CHECK-NEXT:  .LBB15_4:
711; CHECK-NEXT:    ret
712  %cmp = icmp slt i64 %a, %b
713  %cond = select i1 %cmp, i64 %a, i64 %b
714  ret i64 %cond
715}
716
717define i32 @max_i32(i32 %a, i32 %b) nounwind {
718; RV32I-LABEL: max_i32:
719; RV32I:       # %bb.0:
720; RV32I-NEXT:    blt a1, a0, .LBB16_2
721; RV32I-NEXT:  # %bb.1:
722; RV32I-NEXT:    mv a0, a1
723; RV32I-NEXT:  .LBB16_2:
724; RV32I-NEXT:    ret
725;
726; RV32ZBB-LABEL: max_i32:
727; RV32ZBB:       # %bb.0:
728; RV32ZBB-NEXT:    max a0, a0, a1
729; RV32ZBB-NEXT:    ret
730  %cmp = icmp sgt i32 %a, %b
731  %cond = select i1 %cmp, i32 %a, i32 %b
732  ret i32 %cond
733}
734
735; As we are not matching directly i64 code patterns on RV32 some i64 patterns
736; don't have yet any matching bit manipulation instructions on RV32.
737; This test is presented here in case future expansions of the Bitmanip
738; extensions introduce instructions suitable for this pattern.
739
740define i64 @max_i64(i64 %a, i64 %b) nounwind {
741; CHECK-LABEL: max_i64:
742; CHECK:       # %bb.0:
743; CHECK-NEXT:    beq a1, a3, .LBB17_2
744; CHECK-NEXT:  # %bb.1:
745; CHECK-NEXT:    slt a4, a3, a1
746; CHECK-NEXT:    beqz a4, .LBB17_3
747; CHECK-NEXT:    j .LBB17_4
748; CHECK-NEXT:  .LBB17_2:
749; CHECK-NEXT:    sltu a4, a2, a0
750; CHECK-NEXT:    bnez a4, .LBB17_4
751; CHECK-NEXT:  .LBB17_3:
752; CHECK-NEXT:    mv a0, a2
753; CHECK-NEXT:    mv a1, a3
754; CHECK-NEXT:  .LBB17_4:
755; CHECK-NEXT:    ret
756  %cmp = icmp sgt i64 %a, %b
757  %cond = select i1 %cmp, i64 %a, i64 %b
758  ret i64 %cond
759}
760
761define i32 @minu_i32(i32 %a, i32 %b) nounwind {
762; RV32I-LABEL: minu_i32:
763; RV32I:       # %bb.0:
764; RV32I-NEXT:    bltu a0, a1, .LBB18_2
765; RV32I-NEXT:  # %bb.1:
766; RV32I-NEXT:    mv a0, a1
767; RV32I-NEXT:  .LBB18_2:
768; RV32I-NEXT:    ret
769;
770; RV32ZBB-LABEL: minu_i32:
771; RV32ZBB:       # %bb.0:
772; RV32ZBB-NEXT:    minu a0, a0, a1
773; RV32ZBB-NEXT:    ret
774  %cmp = icmp ult i32 %a, %b
775  %cond = select i1 %cmp, i32 %a, i32 %b
776  ret i32 %cond
777}
778
779; As we are not matching directly i64 code patterns on RV32 some i64 patterns
780; don't have yet any matching bit manipulation instructions on RV32.
781; This test is presented here in case future expansions of the Bitmanip
782; extensions introduce instructions suitable for this pattern.
783
784define i64 @minu_i64(i64 %a, i64 %b) nounwind {
785; CHECK-LABEL: minu_i64:
786; CHECK:       # %bb.0:
787; CHECK-NEXT:    beq a1, a3, .LBB19_2
788; CHECK-NEXT:  # %bb.1:
789; CHECK-NEXT:    sltu a4, a1, a3
790; CHECK-NEXT:    beqz a4, .LBB19_3
791; CHECK-NEXT:    j .LBB19_4
792; CHECK-NEXT:  .LBB19_2:
793; CHECK-NEXT:    sltu a4, a0, a2
794; CHECK-NEXT:    bnez a4, .LBB19_4
795; CHECK-NEXT:  .LBB19_3:
796; CHECK-NEXT:    mv a0, a2
797; CHECK-NEXT:    mv a1, a3
798; CHECK-NEXT:  .LBB19_4:
799; CHECK-NEXT:    ret
800  %cmp = icmp ult i64 %a, %b
801  %cond = select i1 %cmp, i64 %a, i64 %b
802  ret i64 %cond
803}
804
805define i32 @maxu_i32(i32 %a, i32 %b) nounwind {
806; RV32I-LABEL: maxu_i32:
807; RV32I:       # %bb.0:
808; RV32I-NEXT:    bltu a1, a0, .LBB20_2
809; RV32I-NEXT:  # %bb.1:
810; RV32I-NEXT:    mv a0, a1
811; RV32I-NEXT:  .LBB20_2:
812; RV32I-NEXT:    ret
813;
814; RV32ZBB-LABEL: maxu_i32:
815; RV32ZBB:       # %bb.0:
816; RV32ZBB-NEXT:    maxu a0, a0, a1
817; RV32ZBB-NEXT:    ret
818  %cmp = icmp ugt i32 %a, %b
819  %cond = select i1 %cmp, i32 %a, i32 %b
820  ret i32 %cond
821}
822
823; As we are not matching directly i64 code patterns on RV32 some i64 patterns
824; don't have yet any matching bit manipulation instructions on RV32.
825; This test is presented here in case future expansions of the Bitmanip
826; extensions introduce instructions suitable for this pattern.
827
828define i64 @maxu_i64(i64 %a, i64 %b) nounwind {
829; CHECK-LABEL: maxu_i64:
830; CHECK:       # %bb.0:
831; CHECK-NEXT:    beq a1, a3, .LBB21_2
832; CHECK-NEXT:  # %bb.1:
833; CHECK-NEXT:    sltu a4, a3, a1
834; CHECK-NEXT:    beqz a4, .LBB21_3
835; CHECK-NEXT:    j .LBB21_4
836; CHECK-NEXT:  .LBB21_2:
837; CHECK-NEXT:    sltu a4, a2, a0
838; CHECK-NEXT:    bnez a4, .LBB21_4
839; CHECK-NEXT:  .LBB21_3:
840; CHECK-NEXT:    mv a0, a2
841; CHECK-NEXT:    mv a1, a3
842; CHECK-NEXT:  .LBB21_4:
843; CHECK-NEXT:    ret
844  %cmp = icmp ugt i64 %a, %b
845  %cond = select i1 %cmp, i64 %a, i64 %b
846  ret i64 %cond
847}
848
849declare i32 @llvm.abs.i32(i32, i1 immarg)
850
851define i32 @abs_i32(i32 %x) {
852; RV32I-LABEL: abs_i32:
853; RV32I:       # %bb.0:
854; RV32I-NEXT:    srai a1, a0, 31
855; RV32I-NEXT:    add a0, a0, a1
856; RV32I-NEXT:    xor a0, a0, a1
857; RV32I-NEXT:    ret
858;
859; RV32ZBB-LABEL: abs_i32:
860; RV32ZBB:       # %bb.0:
861; RV32ZBB-NEXT:    neg a1, a0
862; RV32ZBB-NEXT:    max a0, a0, a1
863; RV32ZBB-NEXT:    ret
864  %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
865  ret i32 %abs
866}
867
868declare i64 @llvm.abs.i64(i64, i1 immarg)
869
870define i64 @abs_i64(i64 %x) {
871; CHECK-LABEL: abs_i64:
872; CHECK:       # %bb.0:
873; CHECK-NEXT:    srai a2, a1, 31
874; CHECK-NEXT:    add a0, a0, a2
875; CHECK-NEXT:    add a1, a1, a2
876; CHECK-NEXT:    sltu a3, a0, a2
877; CHECK-NEXT:    add a1, a1, a3
878; CHECK-NEXT:    xor a0, a0, a2
879; CHECK-NEXT:    xor a1, a1, a2
880; CHECK-NEXT:    ret
881  %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true)
882  ret i64 %abs
883}
884
885define i32 @zexth_i32(i32 %a) nounwind {
886; RV32I-LABEL: zexth_i32:
887; RV32I:       # %bb.0:
888; RV32I-NEXT:    slli a0, a0, 16
889; RV32I-NEXT:    srli a0, a0, 16
890; RV32I-NEXT:    ret
891;
892; RV32ZBB-LABEL: zexth_i32:
893; RV32ZBB:       # %bb.0:
894; RV32ZBB-NEXT:    zext.h a0, a0
895; RV32ZBB-NEXT:    ret
896  %and = and i32 %a, 65535
897  ret i32 %and
898}
899
900define i64 @zexth_i64(i64 %a) nounwind {
901; RV32I-LABEL: zexth_i64:
902; RV32I:       # %bb.0:
903; RV32I-NEXT:    slli a0, a0, 16
904; RV32I-NEXT:    srli a0, a0, 16
905; RV32I-NEXT:    li a1, 0
906; RV32I-NEXT:    ret
907;
908; RV32ZBB-LABEL: zexth_i64:
909; RV32ZBB:       # %bb.0:
910; RV32ZBB-NEXT:    zext.h a0, a0
911; RV32ZBB-NEXT:    li a1, 0
912; RV32ZBB-NEXT:    ret
913  %and = and i64 %a, 65535
914  ret i64 %and
915}
916
917declare i32 @llvm.bswap.i32(i32)
918
919define i32 @bswap_i32(i32 %a) nounwind {
920; RV32I-LABEL: bswap_i32:
921; RV32I:       # %bb.0:
922; RV32I-NEXT:    slli a1, a0, 24
923; RV32I-NEXT:    srli a2, a0, 24
924; RV32I-NEXT:    lui a3, 16
925; RV32I-NEXT:    or a1, a2, a1
926; RV32I-NEXT:    srli a2, a0, 8
927; RV32I-NEXT:    addi a3, a3, -256
928; RV32I-NEXT:    and a0, a0, a3
929; RV32I-NEXT:    and a2, a2, a3
930; RV32I-NEXT:    slli a0, a0, 8
931; RV32I-NEXT:    or a1, a1, a2
932; RV32I-NEXT:    or a0, a1, a0
933; RV32I-NEXT:    ret
934;
935; RV32ZBB-LABEL: bswap_i32:
936; RV32ZBB:       # %bb.0:
937; RV32ZBB-NEXT:    rev8 a0, a0
938; RV32ZBB-NEXT:    ret
939  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
940  ret i32 %1
941}
942
943declare i64 @llvm.bswap.i64(i64)
944
945define i64 @bswap_i64(i64 %a) {
946; RV32I-LABEL: bswap_i64:
947; RV32I:       # %bb.0:
948; RV32I-NEXT:    slli a2, a1, 24
949; RV32I-NEXT:    srli a3, a1, 24
950; RV32I-NEXT:    lui a4, 16
951; RV32I-NEXT:    srli a5, a1, 8
952; RV32I-NEXT:    slli a6, a0, 24
953; RV32I-NEXT:    or a2, a3, a2
954; RV32I-NEXT:    srli a3, a0, 24
955; RV32I-NEXT:    or a3, a3, a6
956; RV32I-NEXT:    srli a6, a0, 8
957; RV32I-NEXT:    addi a4, a4, -256
958; RV32I-NEXT:    and a1, a1, a4
959; RV32I-NEXT:    and a5, a5, a4
960; RV32I-NEXT:    and a0, a0, a4
961; RV32I-NEXT:    and a4, a6, a4
962; RV32I-NEXT:    or a2, a2, a5
963; RV32I-NEXT:    slli a1, a1, 8
964; RV32I-NEXT:    slli a5, a0, 8
965; RV32I-NEXT:    or a3, a3, a4
966; RV32I-NEXT:    or a0, a2, a1
967; RV32I-NEXT:    or a1, a3, a5
968; RV32I-NEXT:    ret
969;
970; RV32ZBB-LABEL: bswap_i64:
971; RV32ZBB:       # %bb.0:
972; RV32ZBB-NEXT:    rev8 a2, a1
973; RV32ZBB-NEXT:    rev8 a1, a0
974; RV32ZBB-NEXT:    mv a0, a2
975; RV32ZBB-NEXT:    ret
976  %1 = call i64 @llvm.bswap.i64(i64 %a)
977  ret i64 %1
978}
979