xref: /llvm-project/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
3; RUN:   | FileCheck %s -check-prefix=RV64I
4; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -verify-machineinstrs < %s \
5; RUN:   | FileCheck %s -check-prefix=RV64XTHEADBB
6
7declare i32 @llvm.ctlz.i32(i32, i1)
8
9define signext i32 @ctlz_i32(i32 signext %a) nounwind {
10; RV64I-LABEL: ctlz_i32:
11; RV64I:       # %bb.0:
12; RV64I-NEXT:    beqz a0, .LBB0_2
13; RV64I-NEXT:  # %bb.1: # %cond.false
14; RV64I-NEXT:    srliw a1, a0, 1
15; RV64I-NEXT:    lui a2, 349525
16; RV64I-NEXT:    or a0, a0, a1
17; RV64I-NEXT:    addiw a1, a2, 1365
18; RV64I-NEXT:    srliw a2, a0, 2
19; RV64I-NEXT:    or a0, a0, a2
20; RV64I-NEXT:    srliw a2, a0, 4
21; RV64I-NEXT:    or a0, a0, a2
22; RV64I-NEXT:    srliw a2, a0, 8
23; RV64I-NEXT:    or a0, a0, a2
24; RV64I-NEXT:    srliw a2, a0, 16
25; RV64I-NEXT:    or a0, a0, a2
26; RV64I-NEXT:    not a0, a0
27; RV64I-NEXT:    srli a2, a0, 1
28; RV64I-NEXT:    and a1, a2, a1
29; RV64I-NEXT:    lui a2, 209715
30; RV64I-NEXT:    addiw a2, a2, 819
31; RV64I-NEXT:    sub a0, a0, a1
32; RV64I-NEXT:    and a1, a0, a2
33; RV64I-NEXT:    srli a0, a0, 2
34; RV64I-NEXT:    and a0, a0, a2
35; RV64I-NEXT:    lui a2, 61681
36; RV64I-NEXT:    add a0, a1, a0
37; RV64I-NEXT:    srli a1, a0, 4
38; RV64I-NEXT:    add a0, a0, a1
39; RV64I-NEXT:    addi a1, a2, -241
40; RV64I-NEXT:    and a0, a0, a1
41; RV64I-NEXT:    slli a1, a0, 8
42; RV64I-NEXT:    add a0, a0, a1
43; RV64I-NEXT:    slli a1, a0, 16
44; RV64I-NEXT:    add a0, a0, a1
45; RV64I-NEXT:    srliw a0, a0, 24
46; RV64I-NEXT:    ret
47; RV64I-NEXT:  .LBB0_2:
48; RV64I-NEXT:    li a0, 32
49; RV64I-NEXT:    ret
50;
51; RV64XTHEADBB-LABEL: ctlz_i32:
52; RV64XTHEADBB:       # %bb.0:
53; RV64XTHEADBB-NEXT:    not a0, a0
54; RV64XTHEADBB-NEXT:    slli a0, a0, 32
55; RV64XTHEADBB-NEXT:    th.ff0 a0, a0
56; RV64XTHEADBB-NEXT:    ret
57  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
58  ret i32 %1
59}
60
61define signext i32 @log2_i32(i32 signext %a) nounwind {
62; RV64I-LABEL: log2_i32:
63; RV64I:       # %bb.0:
64; RV64I-NEXT:    beqz a0, .LBB1_2
65; RV64I-NEXT:  # %bb.1: # %cond.false
66; RV64I-NEXT:    srliw a1, a0, 1
67; RV64I-NEXT:    lui a2, 349525
68; RV64I-NEXT:    or a0, a0, a1
69; RV64I-NEXT:    addiw a1, a2, 1365
70; RV64I-NEXT:    srliw a2, a0, 2
71; RV64I-NEXT:    or a0, a0, a2
72; RV64I-NEXT:    srliw a2, a0, 4
73; RV64I-NEXT:    or a0, a0, a2
74; RV64I-NEXT:    srliw a2, a0, 8
75; RV64I-NEXT:    or a0, a0, a2
76; RV64I-NEXT:    srliw a2, a0, 16
77; RV64I-NEXT:    or a0, a0, a2
78; RV64I-NEXT:    not a0, a0
79; RV64I-NEXT:    srli a2, a0, 1
80; RV64I-NEXT:    and a1, a2, a1
81; RV64I-NEXT:    lui a2, 209715
82; RV64I-NEXT:    addiw a2, a2, 819
83; RV64I-NEXT:    sub a0, a0, a1
84; RV64I-NEXT:    and a1, a0, a2
85; RV64I-NEXT:    srli a0, a0, 2
86; RV64I-NEXT:    and a0, a0, a2
87; RV64I-NEXT:    lui a2, 61681
88; RV64I-NEXT:    add a0, a1, a0
89; RV64I-NEXT:    srli a1, a0, 4
90; RV64I-NEXT:    add a0, a0, a1
91; RV64I-NEXT:    addi a1, a2, -241
92; RV64I-NEXT:    and a0, a0, a1
93; RV64I-NEXT:    slli a1, a0, 8
94; RV64I-NEXT:    add a0, a0, a1
95; RV64I-NEXT:    slli a1, a0, 16
96; RV64I-NEXT:    add a0, a0, a1
97; RV64I-NEXT:    srliw a0, a0, 24
98; RV64I-NEXT:    j .LBB1_3
99; RV64I-NEXT:  .LBB1_2:
100; RV64I-NEXT:    li a0, 32
101; RV64I-NEXT:  .LBB1_3: # %cond.end
102; RV64I-NEXT:    li a1, 31
103; RV64I-NEXT:    sub a0, a1, a0
104; RV64I-NEXT:    ret
105;
106; RV64XTHEADBB-LABEL: log2_i32:
107; RV64XTHEADBB:       # %bb.0:
108; RV64XTHEADBB-NEXT:    not a0, a0
109; RV64XTHEADBB-NEXT:    slli a0, a0, 32
110; RV64XTHEADBB-NEXT:    th.ff0 a0, a0
111; RV64XTHEADBB-NEXT:    li a1, 31
112; RV64XTHEADBB-NEXT:    sub a0, a1, a0
113; RV64XTHEADBB-NEXT:    ret
114  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
115  %2 = sub i32 31, %1
116  ret i32 %2
117}
118
119define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
120; RV64I-LABEL: log2_ceil_i32:
121; RV64I:       # %bb.0:
122; RV64I-NEXT:    addiw a1, a0, -1
123; RV64I-NEXT:    li a0, 32
124; RV64I-NEXT:    li a2, 32
125; RV64I-NEXT:    beqz a1, .LBB2_2
126; RV64I-NEXT:  # %bb.1: # %cond.false
127; RV64I-NEXT:    srliw a2, a1, 1
128; RV64I-NEXT:    lui a3, 349525
129; RV64I-NEXT:    or a1, a1, a2
130; RV64I-NEXT:    addiw a2, a3, 1365
131; RV64I-NEXT:    srliw a3, a1, 2
132; RV64I-NEXT:    or a1, a1, a3
133; RV64I-NEXT:    srliw a3, a1, 4
134; RV64I-NEXT:    or a1, a1, a3
135; RV64I-NEXT:    srliw a3, a1, 8
136; RV64I-NEXT:    or a1, a1, a3
137; RV64I-NEXT:    srliw a3, a1, 16
138; RV64I-NEXT:    or a1, a1, a3
139; RV64I-NEXT:    not a1, a1
140; RV64I-NEXT:    srli a3, a1, 1
141; RV64I-NEXT:    and a2, a3, a2
142; RV64I-NEXT:    lui a3, 209715
143; RV64I-NEXT:    addiw a3, a3, 819
144; RV64I-NEXT:    sub a1, a1, a2
145; RV64I-NEXT:    and a2, a1, a3
146; RV64I-NEXT:    srli a1, a1, 2
147; RV64I-NEXT:    and a1, a1, a3
148; RV64I-NEXT:    lui a3, 61681
149; RV64I-NEXT:    add a1, a2, a1
150; RV64I-NEXT:    srli a2, a1, 4
151; RV64I-NEXT:    add a1, a1, a2
152; RV64I-NEXT:    addi a2, a3, -241
153; RV64I-NEXT:    and a1, a1, a2
154; RV64I-NEXT:    slli a2, a1, 8
155; RV64I-NEXT:    add a1, a1, a2
156; RV64I-NEXT:    slli a2, a1, 16
157; RV64I-NEXT:    add a1, a1, a2
158; RV64I-NEXT:    srliw a2, a1, 24
159; RV64I-NEXT:  .LBB2_2: # %cond.end
160; RV64I-NEXT:    sub a0, a0, a2
161; RV64I-NEXT:    ret
162;
163; RV64XTHEADBB-LABEL: log2_ceil_i32:
164; RV64XTHEADBB:       # %bb.0:
165; RV64XTHEADBB-NEXT:    addi a0, a0, -1
166; RV64XTHEADBB-NEXT:    not a0, a0
167; RV64XTHEADBB-NEXT:    slli a0, a0, 32
168; RV64XTHEADBB-NEXT:    th.ff0 a0, a0
169; RV64XTHEADBB-NEXT:    li a1, 32
170; RV64XTHEADBB-NEXT:    sub a0, a1, a0
171; RV64XTHEADBB-NEXT:    ret
172  %1 = sub i32 %a, 1
173  %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
174  %3 = sub i32 32, %2
175  ret i32 %3
176}
177
178define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
179; RV64I-LABEL: findLastSet_i32:
180; RV64I:       # %bb.0:
181; RV64I-NEXT:    srliw a1, a0, 1
182; RV64I-NEXT:    lui a2, 349525
183; RV64I-NEXT:    or a1, a0, a1
184; RV64I-NEXT:    addiw a2, a2, 1365
185; RV64I-NEXT:    srliw a3, a1, 2
186; RV64I-NEXT:    or a1, a1, a3
187; RV64I-NEXT:    srliw a3, a1, 4
188; RV64I-NEXT:    or a1, a1, a3
189; RV64I-NEXT:    srliw a3, a1, 8
190; RV64I-NEXT:    or a1, a1, a3
191; RV64I-NEXT:    srliw a3, a1, 16
192; RV64I-NEXT:    or a1, a1, a3
193; RV64I-NEXT:    not a1, a1
194; RV64I-NEXT:    srli a3, a1, 1
195; RV64I-NEXT:    and a2, a3, a2
196; RV64I-NEXT:    lui a3, 209715
197; RV64I-NEXT:    addiw a3, a3, 819
198; RV64I-NEXT:    sub a1, a1, a2
199; RV64I-NEXT:    and a2, a1, a3
200; RV64I-NEXT:    srli a1, a1, 2
201; RV64I-NEXT:    and a1, a1, a3
202; RV64I-NEXT:    lui a3, 61681
203; RV64I-NEXT:    snez a0, a0
204; RV64I-NEXT:    addi a3, a3, -241
205; RV64I-NEXT:    add a1, a2, a1
206; RV64I-NEXT:    srli a2, a1, 4
207; RV64I-NEXT:    add a1, a1, a2
208; RV64I-NEXT:    and a1, a1, a3
209; RV64I-NEXT:    slli a2, a1, 8
210; RV64I-NEXT:    add a1, a1, a2
211; RV64I-NEXT:    slli a2, a1, 16
212; RV64I-NEXT:    add a1, a1, a2
213; RV64I-NEXT:    srliw a1, a1, 24
214; RV64I-NEXT:    xori a1, a1, 31
215; RV64I-NEXT:    addi a0, a0, -1
216; RV64I-NEXT:    or a0, a0, a1
217; RV64I-NEXT:    ret
218;
219; RV64XTHEADBB-LABEL: findLastSet_i32:
220; RV64XTHEADBB:       # %bb.0:
221; RV64XTHEADBB-NEXT:    not a1, a0
222; RV64XTHEADBB-NEXT:    snez a0, a0
223; RV64XTHEADBB-NEXT:    slli a1, a1, 32
224; RV64XTHEADBB-NEXT:    th.ff0 a1, a1
225; RV64XTHEADBB-NEXT:    xori a1, a1, 31
226; RV64XTHEADBB-NEXT:    addi a0, a0, -1
227; RV64XTHEADBB-NEXT:    or a0, a0, a1
228; RV64XTHEADBB-NEXT:    ret
229  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
230  %2 = xor i32 31, %1
231  %3 = icmp eq i32 %a, 0
232  %4 = select i1 %3, i32 -1, i32 %2
233  ret i32 %4
234}
235
236define i32 @ctlz_lshr_i32(i32 signext %a) {
237; RV64I-LABEL: ctlz_lshr_i32:
238; RV64I:       # %bb.0:
239; RV64I-NEXT:    srliw a0, a0, 1
240; RV64I-NEXT:    beqz a0, .LBB4_2
241; RV64I-NEXT:  # %bb.1: # %cond.false
242; RV64I-NEXT:    srliw a1, a0, 1
243; RV64I-NEXT:    lui a2, 349525
244; RV64I-NEXT:    or a0, a0, a1
245; RV64I-NEXT:    addiw a1, a2, 1365
246; RV64I-NEXT:    srliw a2, a0, 2
247; RV64I-NEXT:    or a0, a0, a2
248; RV64I-NEXT:    srliw a2, a0, 4
249; RV64I-NEXT:    or a0, a0, a2
250; RV64I-NEXT:    srliw a2, a0, 8
251; RV64I-NEXT:    or a0, a0, a2
252; RV64I-NEXT:    srliw a2, a0, 16
253; RV64I-NEXT:    or a0, a0, a2
254; RV64I-NEXT:    not a0, a0
255; RV64I-NEXT:    srli a2, a0, 1
256; RV64I-NEXT:    and a1, a2, a1
257; RV64I-NEXT:    lui a2, 209715
258; RV64I-NEXT:    addiw a2, a2, 819
259; RV64I-NEXT:    sub a0, a0, a1
260; RV64I-NEXT:    and a1, a0, a2
261; RV64I-NEXT:    srli a0, a0, 2
262; RV64I-NEXT:    and a0, a0, a2
263; RV64I-NEXT:    lui a2, 61681
264; RV64I-NEXT:    add a0, a1, a0
265; RV64I-NEXT:    srli a1, a0, 4
266; RV64I-NEXT:    add a0, a0, a1
267; RV64I-NEXT:    addi a1, a2, -241
268; RV64I-NEXT:    and a0, a0, a1
269; RV64I-NEXT:    slli a1, a0, 8
270; RV64I-NEXT:    add a0, a0, a1
271; RV64I-NEXT:    slli a1, a0, 16
272; RV64I-NEXT:    add a0, a0, a1
273; RV64I-NEXT:    srliw a0, a0, 24
274; RV64I-NEXT:    ret
275; RV64I-NEXT:  .LBB4_2:
276; RV64I-NEXT:    li a0, 32
277; RV64I-NEXT:    ret
278;
279; RV64XTHEADBB-LABEL: ctlz_lshr_i32:
280; RV64XTHEADBB:       # %bb.0:
281; RV64XTHEADBB-NEXT:    srliw a0, a0, 1
282; RV64XTHEADBB-NEXT:    not a0, a0
283; RV64XTHEADBB-NEXT:    slli a0, a0, 32
284; RV64XTHEADBB-NEXT:    th.ff0 a0, a0
285; RV64XTHEADBB-NEXT:    ret
286  %1 = lshr i32 %a, 1
287  %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
288  ret i32 %2
289}
290
291declare i64 @llvm.ctlz.i64(i64, i1)
292
293define i64 @ctlz_i64(i64 %a) nounwind {
294; RV64I-LABEL: ctlz_i64:
295; RV64I:       # %bb.0:
296; RV64I-NEXT:    beqz a0, .LBB5_2
297; RV64I-NEXT:  # %bb.1: # %cond.false
298; RV64I-NEXT:    srli a1, a0, 1
299; RV64I-NEXT:    lui a2, 349525
300; RV64I-NEXT:    lui a3, 209715
301; RV64I-NEXT:    or a0, a0, a1
302; RV64I-NEXT:    addiw a1, a2, 1365
303; RV64I-NEXT:    addiw a2, a3, 819
304; RV64I-NEXT:    srli a3, a0, 2
305; RV64I-NEXT:    or a0, a0, a3
306; RV64I-NEXT:    slli a3, a1, 32
307; RV64I-NEXT:    add a1, a1, a3
308; RV64I-NEXT:    slli a3, a2, 32
309; RV64I-NEXT:    add a2, a2, a3
310; RV64I-NEXT:    srli a3, a0, 4
311; RV64I-NEXT:    or a0, a0, a3
312; RV64I-NEXT:    srli a3, a0, 8
313; RV64I-NEXT:    or a0, a0, a3
314; RV64I-NEXT:    srli a3, a0, 16
315; RV64I-NEXT:    or a0, a0, a3
316; RV64I-NEXT:    srli a3, a0, 32
317; RV64I-NEXT:    or a0, a0, a3
318; RV64I-NEXT:    not a0, a0
319; RV64I-NEXT:    srli a3, a0, 1
320; RV64I-NEXT:    and a1, a3, a1
321; RV64I-NEXT:    lui a3, 61681
322; RV64I-NEXT:    addiw a3, a3, -241
323; RV64I-NEXT:    sub a0, a0, a1
324; RV64I-NEXT:    and a1, a0, a2
325; RV64I-NEXT:    srli a0, a0, 2
326; RV64I-NEXT:    and a0, a0, a2
327; RV64I-NEXT:    slli a2, a3, 32
328; RV64I-NEXT:    add a0, a1, a0
329; RV64I-NEXT:    srli a1, a0, 4
330; RV64I-NEXT:    add a0, a0, a1
331; RV64I-NEXT:    add a2, a3, a2
332; RV64I-NEXT:    and a0, a0, a2
333; RV64I-NEXT:    slli a1, a0, 8
334; RV64I-NEXT:    add a0, a0, a1
335; RV64I-NEXT:    slli a1, a0, 16
336; RV64I-NEXT:    add a0, a0, a1
337; RV64I-NEXT:    slli a1, a0, 32
338; RV64I-NEXT:    add a0, a0, a1
339; RV64I-NEXT:    srli a0, a0, 56
340; RV64I-NEXT:    ret
341; RV64I-NEXT:  .LBB5_2:
342; RV64I-NEXT:    li a0, 64
343; RV64I-NEXT:    ret
344;
345; RV64XTHEADBB-LABEL: ctlz_i64:
346; RV64XTHEADBB:       # %bb.0:
347; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
348; RV64XTHEADBB-NEXT:    ret
349  %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
350  ret i64 %1
351}
352
353declare i32 @llvm.cttz.i32(i32, i1)
354
355define signext i32 @cttz_i32(i32 signext %a) nounwind {
356; RV64I-LABEL: cttz_i32:
357; RV64I:       # %bb.0:
358; RV64I-NEXT:    beqz a0, .LBB6_2
359; RV64I-NEXT:  # %bb.1: # %cond.false
360; RV64I-NEXT:    addi sp, sp, -16
361; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
362; RV64I-NEXT:    neg a1, a0
363; RV64I-NEXT:    and a0, a0, a1
364; RV64I-NEXT:    lui a1, 30667
365; RV64I-NEXT:    addiw a1, a1, 1329
366; RV64I-NEXT:    call __muldi3
367; RV64I-NEXT:    srliw a0, a0, 27
368; RV64I-NEXT:    lui a1, %hi(.LCPI6_0)
369; RV64I-NEXT:    addi a1, a1, %lo(.LCPI6_0)
370; RV64I-NEXT:    add a0, a1, a0
371; RV64I-NEXT:    lbu a0, 0(a0)
372; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
373; RV64I-NEXT:    addi sp, sp, 16
374; RV64I-NEXT:    ret
375; RV64I-NEXT:  .LBB6_2:
376; RV64I-NEXT:    li a0, 32
377; RV64I-NEXT:    ret
378;
379; RV64XTHEADBB-LABEL: cttz_i32:
380; RV64XTHEADBB:       # %bb.0:
381; RV64XTHEADBB-NEXT:    beqz a0, .LBB6_2
382; RV64XTHEADBB-NEXT:  # %bb.1: # %cond.false
383; RV64XTHEADBB-NEXT:    addi a1, a0, -1
384; RV64XTHEADBB-NEXT:    not a0, a0
385; RV64XTHEADBB-NEXT:    and a0, a0, a1
386; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
387; RV64XTHEADBB-NEXT:    li a1, 64
388; RV64XTHEADBB-NEXT:    sub a0, a1, a0
389; RV64XTHEADBB-NEXT:    ret
390; RV64XTHEADBB-NEXT:  .LBB6_2:
391; RV64XTHEADBB-NEXT:    li a0, 32
392; RV64XTHEADBB-NEXT:    ret
393  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
394  ret i32 %1
395}
396
397define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
398; RV64I-LABEL: cttz_zero_undef_i32:
399; RV64I:       # %bb.0:
400; RV64I-NEXT:    addi sp, sp, -16
401; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
402; RV64I-NEXT:    neg a1, a0
403; RV64I-NEXT:    and a0, a0, a1
404; RV64I-NEXT:    lui a1, 30667
405; RV64I-NEXT:    addiw a1, a1, 1329
406; RV64I-NEXT:    call __muldi3
407; RV64I-NEXT:    srliw a0, a0, 27
408; RV64I-NEXT:    lui a1, %hi(.LCPI7_0)
409; RV64I-NEXT:    addi a1, a1, %lo(.LCPI7_0)
410; RV64I-NEXT:    add a0, a1, a0
411; RV64I-NEXT:    lbu a0, 0(a0)
412; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
413; RV64I-NEXT:    addi sp, sp, 16
414; RV64I-NEXT:    ret
415;
416; RV64XTHEADBB-LABEL: cttz_zero_undef_i32:
417; RV64XTHEADBB:       # %bb.0:
418; RV64XTHEADBB-NEXT:    addi a1, a0, -1
419; RV64XTHEADBB-NEXT:    not a0, a0
420; RV64XTHEADBB-NEXT:    and a0, a0, a1
421; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
422; RV64XTHEADBB-NEXT:    li a1, 64
423; RV64XTHEADBB-NEXT:    sub a0, a1, a0
424; RV64XTHEADBB-NEXT:    ret
425  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
426  ret i32 %1
427}
428
429define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
430; RV64I-LABEL: findFirstSet_i32:
431; RV64I:       # %bb.0:
432; RV64I-NEXT:    addi sp, sp, -16
433; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
434; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
435; RV64I-NEXT:    mv s0, a0
436; RV64I-NEXT:    neg a0, a0
437; RV64I-NEXT:    and a0, s0, a0
438; RV64I-NEXT:    lui a1, 30667
439; RV64I-NEXT:    addiw a1, a1, 1329
440; RV64I-NEXT:    call __muldi3
441; RV64I-NEXT:    srliw a0, a0, 27
442; RV64I-NEXT:    lui a1, %hi(.LCPI8_0)
443; RV64I-NEXT:    addi a1, a1, %lo(.LCPI8_0)
444; RV64I-NEXT:    add a0, a1, a0
445; RV64I-NEXT:    lbu a0, 0(a0)
446; RV64I-NEXT:    snez a1, s0
447; RV64I-NEXT:    addi a1, a1, -1
448; RV64I-NEXT:    or a0, a1, a0
449; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
450; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
451; RV64I-NEXT:    addi sp, sp, 16
452; RV64I-NEXT:    ret
453;
454; RV64XTHEADBB-LABEL: findFirstSet_i32:
455; RV64XTHEADBB:       # %bb.0:
456; RV64XTHEADBB-NEXT:    addi a1, a0, -1
457; RV64XTHEADBB-NEXT:    not a2, a0
458; RV64XTHEADBB-NEXT:    and a1, a2, a1
459; RV64XTHEADBB-NEXT:    li a2, 64
460; RV64XTHEADBB-NEXT:    snez a0, a0
461; RV64XTHEADBB-NEXT:    th.ff1 a1, a1
462; RV64XTHEADBB-NEXT:    sub a2, a2, a1
463; RV64XTHEADBB-NEXT:    addi a0, a0, -1
464; RV64XTHEADBB-NEXT:    or a0, a0, a2
465; RV64XTHEADBB-NEXT:    ret
466  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
467  %2 = icmp eq i32 %a, 0
468  %3 = select i1 %2, i32 -1, i32 %1
469  ret i32 %3
470}
471
472define signext i32 @ffs_i32(i32 signext %a) nounwind {
473; RV64I-LABEL: ffs_i32:
474; RV64I:       # %bb.0:
475; RV64I-NEXT:    addi sp, sp, -16
476; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
477; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
478; RV64I-NEXT:    mv s0, a0
479; RV64I-NEXT:    neg a0, a0
480; RV64I-NEXT:    and a0, s0, a0
481; RV64I-NEXT:    lui a1, 30667
482; RV64I-NEXT:    addiw a1, a1, 1329
483; RV64I-NEXT:    call __muldi3
484; RV64I-NEXT:    srliw a0, a0, 27
485; RV64I-NEXT:    lui a1, %hi(.LCPI9_0)
486; RV64I-NEXT:    addi a1, a1, %lo(.LCPI9_0)
487; RV64I-NEXT:    add a0, a1, a0
488; RV64I-NEXT:    lbu a0, 0(a0)
489; RV64I-NEXT:    seqz a1, s0
490; RV64I-NEXT:    addi a0, a0, 1
491; RV64I-NEXT:    addi a1, a1, -1
492; RV64I-NEXT:    and a0, a1, a0
493; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
494; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
495; RV64I-NEXT:    addi sp, sp, 16
496; RV64I-NEXT:    ret
497;
498; RV64XTHEADBB-LABEL: ffs_i32:
499; RV64XTHEADBB:       # %bb.0:
500; RV64XTHEADBB-NEXT:    addi a1, a0, -1
501; RV64XTHEADBB-NEXT:    not a2, a0
502; RV64XTHEADBB-NEXT:    and a1, a2, a1
503; RV64XTHEADBB-NEXT:    li a2, 65
504; RV64XTHEADBB-NEXT:    seqz a0, a0
505; RV64XTHEADBB-NEXT:    th.ff1 a1, a1
506; RV64XTHEADBB-NEXT:    sub a2, a2, a1
507; RV64XTHEADBB-NEXT:    addi a0, a0, -1
508; RV64XTHEADBB-NEXT:    and a0, a0, a2
509; RV64XTHEADBB-NEXT:    ret
510  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
511  %2 = add i32 %1, 1
512  %3 = icmp eq i32 %a, 0
513  %4 = select i1 %3, i32 0, i32 %2
514  ret i32 %4
515}
516
517declare i64 @llvm.cttz.i64(i64, i1)
518
519define i64 @cttz_i64(i64 %a) nounwind {
520; RV64I-LABEL: cttz_i64:
521; RV64I:       # %bb.0:
522; RV64I-NEXT:    beqz a0, .LBB10_2
523; RV64I-NEXT:  # %bb.1: # %cond.false
524; RV64I-NEXT:    addi sp, sp, -16
525; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
526; RV64I-NEXT:    neg a1, a0
527; RV64I-NEXT:    and a0, a0, a1
528; RV64I-NEXT:    lui a1, %hi(.LCPI10_0)
529; RV64I-NEXT:    ld a1, %lo(.LCPI10_0)(a1)
530; RV64I-NEXT:    call __muldi3
531; RV64I-NEXT:    srli a0, a0, 58
532; RV64I-NEXT:    lui a1, %hi(.LCPI10_1)
533; RV64I-NEXT:    addi a1, a1, %lo(.LCPI10_1)
534; RV64I-NEXT:    add a0, a1, a0
535; RV64I-NEXT:    lbu a0, 0(a0)
536; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
537; RV64I-NEXT:    addi sp, sp, 16
538; RV64I-NEXT:    ret
539; RV64I-NEXT:  .LBB10_2:
540; RV64I-NEXT:    li a0, 64
541; RV64I-NEXT:    ret
542;
543; RV64XTHEADBB-LABEL: cttz_i64:
544; RV64XTHEADBB:       # %bb.0:
545; RV64XTHEADBB-NEXT:    beqz a0, .LBB10_2
546; RV64XTHEADBB-NEXT:  # %bb.1: # %cond.false
547; RV64XTHEADBB-NEXT:    addi a1, a0, -1
548; RV64XTHEADBB-NEXT:    not a0, a0
549; RV64XTHEADBB-NEXT:    and a0, a0, a1
550; RV64XTHEADBB-NEXT:    th.ff1 a0, a0
551; RV64XTHEADBB-NEXT:    li a1, 64
552; RV64XTHEADBB-NEXT:    sub a0, a1, a0
553; RV64XTHEADBB-NEXT:    ret
554; RV64XTHEADBB-NEXT:  .LBB10_2:
555; RV64XTHEADBB-NEXT:    li a0, 64
556; RV64XTHEADBB-NEXT:    ret
557  %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
558  ret i64 %1
559}
560
561define signext i32 @sexti1_i32(i32 signext %a) nounwind {
562; RV64I-LABEL: sexti1_i32:
563; RV64I:       # %bb.0:
564; RV64I-NEXT:    slli a0, a0, 63
565; RV64I-NEXT:    srai a0, a0, 63
566; RV64I-NEXT:    ret
567;
568; RV64XTHEADBB-LABEL: sexti1_i32:
569; RV64XTHEADBB:       # %bb.0:
570; RV64XTHEADBB-NEXT:    th.ext a0, a0, 0, 0
571; RV64XTHEADBB-NEXT:    ret
572  %shl = shl i32 %a, 31
573  %shr = ashr exact i32 %shl, 31
574  ret i32 %shr
575}
576
577define signext i32 @sexti1_i32_2(i1 %a) nounwind {
578; RV64I-LABEL: sexti1_i32_2:
579; RV64I:       # %bb.0:
580; RV64I-NEXT:    slli a0, a0, 63
581; RV64I-NEXT:    srai a0, a0, 63
582; RV64I-NEXT:    ret
583;
584; RV64XTHEADBB-LABEL: sexti1_i32_2:
585; RV64XTHEADBB:       # %bb.0:
586; RV64XTHEADBB-NEXT:    th.ext a0, a0, 0, 0
587; RV64XTHEADBB-NEXT:    ret
588  %sext = sext i1 %a to i32
589  ret i32 %sext
590}
591
592define i64 @sexti1_i64(i64 %a) nounwind {
593; RV64I-LABEL: sexti1_i64:
594; RV64I:       # %bb.0:
595; RV64I-NEXT:    slli a0, a0, 63
596; RV64I-NEXT:    srai a0, a0, 63
597; RV64I-NEXT:    ret
598;
599; RV64XTHEADBB-LABEL: sexti1_i64:
600; RV64XTHEADBB:       # %bb.0:
601; RV64XTHEADBB-NEXT:    th.ext a0, a0, 0, 0
602; RV64XTHEADBB-NEXT:    ret
603  %shl = shl i64 %a, 63
604  %shr = ashr exact i64 %shl, 63
605  ret i64 %shr
606}
607
608define i64 @sexti1_i64_2(i1 %a) nounwind {
609; RV64I-LABEL: sexti1_i64_2:
610; RV64I:       # %bb.0:
611; RV64I-NEXT:    slli a0, a0, 63
612; RV64I-NEXT:    srai a0, a0, 63
613; RV64I-NEXT:    ret
614;
615; RV64XTHEADBB-LABEL: sexti1_i64_2:
616; RV64XTHEADBB:       # %bb.0:
617; RV64XTHEADBB-NEXT:    th.ext a0, a0, 0, 0
618; RV64XTHEADBB-NEXT:    ret
619  %sext = sext i1 %a to i64
620  ret i64 %sext
621}
622
623define signext i32 @sextb_i32(i32 signext %a) nounwind {
624; RV64I-LABEL: sextb_i32:
625; RV64I:       # %bb.0:
626; RV64I-NEXT:    slli a0, a0, 56
627; RV64I-NEXT:    srai a0, a0, 56
628; RV64I-NEXT:    ret
629;
630; RV64XTHEADBB-LABEL: sextb_i32:
631; RV64XTHEADBB:       # %bb.0:
632; RV64XTHEADBB-NEXT:    th.ext a0, a0, 7, 0
633; RV64XTHEADBB-NEXT:    ret
634  %shl = shl i32 %a, 24
635  %shr = ashr exact i32 %shl, 24
636  ret i32 %shr
637}
638
639define i64 @sextb_i64(i64 %a) nounwind {
640; RV64I-LABEL: sextb_i64:
641; RV64I:       # %bb.0:
642; RV64I-NEXT:    slli a0, a0, 56
643; RV64I-NEXT:    srai a0, a0, 56
644; RV64I-NEXT:    ret
645;
646; RV64XTHEADBB-LABEL: sextb_i64:
647; RV64XTHEADBB:       # %bb.0:
648; RV64XTHEADBB-NEXT:    th.ext a0, a0, 7, 0
649; RV64XTHEADBB-NEXT:    ret
650  %shl = shl i64 %a, 56
651  %shr = ashr exact i64 %shl, 56
652  ret i64 %shr
653}
654
655define signext i32 @sexth_i32(i32 signext %a) nounwind {
656; RV64I-LABEL: sexth_i32:
657; RV64I:       # %bb.0:
658; RV64I-NEXT:    slli a0, a0, 48
659; RV64I-NEXT:    srai a0, a0, 48
660; RV64I-NEXT:    ret
661;
662; RV64XTHEADBB-LABEL: sexth_i32:
663; RV64XTHEADBB:       # %bb.0:
664; RV64XTHEADBB-NEXT:    th.ext a0, a0, 15, 0
665; RV64XTHEADBB-NEXT:    ret
666  %shl = shl i32 %a, 16
667  %shr = ashr exact i32 %shl, 16
668  ret i32 %shr
669}
670
671define signext i32 @no_sexth_i32(i32 signext %a) nounwind {
672; RV64I-LABEL: no_sexth_i32:
673; RV64I:       # %bb.0:
674; RV64I-NEXT:    slli a0, a0, 49
675; RV64I-NEXT:    srai a0, a0, 48
676; RV64I-NEXT:    ret
677;
678; RV64XTHEADBB-LABEL: no_sexth_i32:
679; RV64XTHEADBB:       # %bb.0:
680; RV64XTHEADBB-NEXT:    slli a0, a0, 49
681; RV64XTHEADBB-NEXT:    srai a0, a0, 48
682; RV64XTHEADBB-NEXT:    ret
683  %shl = shl i32 %a, 17
684  %shr = ashr exact i32 %shl, 16
685  ret i32 %shr
686}
687
688define i64 @sexth_i64(i64 %a) nounwind {
689; RV64I-LABEL: sexth_i64:
690; RV64I:       # %bb.0:
691; RV64I-NEXT:    slli a0, a0, 48
692; RV64I-NEXT:    srai a0, a0, 48
693; RV64I-NEXT:    ret
694;
695; RV64XTHEADBB-LABEL: sexth_i64:
696; RV64XTHEADBB:       # %bb.0:
697; RV64XTHEADBB-NEXT:    th.ext a0, a0, 15, 0
698; RV64XTHEADBB-NEXT:    ret
699  %shl = shl i64 %a, 48
700  %shr = ashr exact i64 %shl, 48
701  ret i64 %shr
702}
703
704define i64 @no_sexth_i64(i64 %a) nounwind {
705; RV64I-LABEL: no_sexth_i64:
706; RV64I:       # %bb.0:
707; RV64I-NEXT:    slli a0, a0, 49
708; RV64I-NEXT:    srai a0, a0, 48
709; RV64I-NEXT:    ret
710;
711; RV64XTHEADBB-LABEL: no_sexth_i64:
712; RV64XTHEADBB:       # %bb.0:
713; RV64XTHEADBB-NEXT:    slli a0, a0, 49
714; RV64XTHEADBB-NEXT:    srai a0, a0, 48
715; RV64XTHEADBB-NEXT:    ret
716  %shl = shl i64 %a, 49
717  %shr = ashr exact i64 %shl, 48
718  ret i64 %shr
719}
720
721define i32 @zexth_i32(i32 %a) nounwind {
722; RV64I-LABEL: zexth_i32:
723; RV64I:       # %bb.0:
724; RV64I-NEXT:    slli a0, a0, 48
725; RV64I-NEXT:    srli a0, a0, 48
726; RV64I-NEXT:    ret
727;
728; RV64XTHEADBB-LABEL: zexth_i32:
729; RV64XTHEADBB:       # %bb.0:
730; RV64XTHEADBB-NEXT:    th.extu a0, a0, 15, 0
731; RV64XTHEADBB-NEXT:    ret
732  %and = and i32 %a, 65535
733  ret i32 %and
734}
735
736define i64 @zexth_i64(i64 %a) nounwind {
737; RV64I-LABEL: zexth_i64:
738; RV64I:       # %bb.0:
739; RV64I-NEXT:    slli a0, a0, 48
740; RV64I-NEXT:    srli a0, a0, 48
741; RV64I-NEXT:    ret
742;
743; RV64XTHEADBB-LABEL: zexth_i64:
744; RV64XTHEADBB:       # %bb.0:
745; RV64XTHEADBB-NEXT:    th.extu a0, a0, 15, 0
746; RV64XTHEADBB-NEXT:    ret
747  %and = and i64 %a, 65535
748  ret i64 %and
749}
750
751define i64 @zext_bf_i64(i64 %a) nounwind {
752; RV64I-LABEL: zext_bf_i64:
753; RV64I:       # %bb.0:
754; RV64I-NEXT:    slli a0, a0, 47
755; RV64I-NEXT:    srli a0, a0, 48
756; RV64I-NEXT:    ret
757;
758; RV64XTHEADBB-LABEL: zext_bf_i64:
759; RV64XTHEADBB:       # %bb.0:
760; RV64XTHEADBB-NEXT:    th.extu a0, a0, 16, 1
761; RV64XTHEADBB-NEXT:    ret
762  %1 = lshr i64 %a, 1
763  %and = and i64 %1, 65535
764  ret i64 %and
765}
766
767define i64 @zext_bf2_i64(i64 %a) nounwind {
768; RV64I-LABEL: zext_bf2_i64:
769; RV64I:       # %bb.0:
770; RV64I-NEXT:    slli a0, a0, 48
771; RV64I-NEXT:    srli a0, a0, 49
772; RV64I-NEXT:    ret
773;
774; RV64XTHEADBB-LABEL: zext_bf2_i64:
775; RV64XTHEADBB:       # %bb.0:
776; RV64XTHEADBB-NEXT:    th.extu a0, a0, 15, 1
777; RV64XTHEADBB-NEXT:    ret
778  %t0 = and i64 %a, 65535
779  %result = lshr i64 %t0, 1
780  ret i64 %result
781}
782
783define i64 @zext_i64_srliw(i64 %a) nounwind {
784; RV64I-LABEL: zext_i64_srliw:
785; RV64I:       # %bb.0:
786; RV64I-NEXT:    srliw a0, a0, 16
787; RV64I-NEXT:    ret
788;
789; RV64XTHEADBB-LABEL: zext_i64_srliw:
790; RV64XTHEADBB:       # %bb.0:
791; RV64XTHEADBB-NEXT:    srliw a0, a0, 16
792; RV64XTHEADBB-NEXT:    ret
793  %1 = lshr i64 %a, 16
794  %and = and i64 %1, 65535
795  ret i64 %and
796}
797
798declare i32 @llvm.bswap.i32(i32)
799
800define signext i32 @bswap_i32(i32 signext %a) nounwind {
801; RV64I-LABEL: bswap_i32:
802; RV64I:       # %bb.0:
803; RV64I-NEXT:    srli a1, a0, 8
804; RV64I-NEXT:    lui a2, 16
805; RV64I-NEXT:    srliw a3, a0, 24
806; RV64I-NEXT:    addiw a2, a2, -256
807; RV64I-NEXT:    and a1, a1, a2
808; RV64I-NEXT:    and a2, a0, a2
809; RV64I-NEXT:    or a1, a1, a3
810; RV64I-NEXT:    slli a2, a2, 8
811; RV64I-NEXT:    slliw a0, a0, 24
812; RV64I-NEXT:    or a0, a0, a2
813; RV64I-NEXT:    or a0, a0, a1
814; RV64I-NEXT:    ret
815;
816; RV64XTHEADBB-LABEL: bswap_i32:
817; RV64XTHEADBB:       # %bb.0:
818; RV64XTHEADBB-NEXT:    th.revw a0, a0
819; RV64XTHEADBB-NEXT:    ret
820  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
821  ret i32 %1
822}
823
824; Similar to bswap_i32 but the result is not sign extended.
825define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind {
826; RV64I-LABEL: bswap_i32_nosext:
827; RV64I:       # %bb.0:
828; RV64I-NEXT:    srli a2, a0, 8
829; RV64I-NEXT:    lui a3, 16
830; RV64I-NEXT:    srliw a4, a0, 24
831; RV64I-NEXT:    addi a3, a3, -256
832; RV64I-NEXT:    and a2, a2, a3
833; RV64I-NEXT:    and a3, a0, a3
834; RV64I-NEXT:    or a2, a2, a4
835; RV64I-NEXT:    slli a3, a3, 8
836; RV64I-NEXT:    slli a0, a0, 24
837; RV64I-NEXT:    or a0, a0, a3
838; RV64I-NEXT:    or a0, a0, a2
839; RV64I-NEXT:    sw a0, 0(a1)
840; RV64I-NEXT:    ret
841;
842; RV64XTHEADBB-LABEL: bswap_i32_nosext:
843; RV64XTHEADBB:       # %bb.0:
844; RV64XTHEADBB-NEXT:    th.revw a0, a0
845; RV64XTHEADBB-NEXT:    sw a0, 0(a1)
846; RV64XTHEADBB-NEXT:    ret
847  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
848  store i32 %1, ptr %x
849  ret void
850}
851
852declare i64 @llvm.bswap.i64(i64)
853
854define i64 @bswap_i64(i64 %a) {
855; RV64I-LABEL: bswap_i64:
856; RV64I:       # %bb.0:
857; RV64I-NEXT:    srli a1, a0, 40
858; RV64I-NEXT:    lui a2, 16
859; RV64I-NEXT:    srli a3, a0, 56
860; RV64I-NEXT:    srli a4, a0, 24
861; RV64I-NEXT:    lui a5, 4080
862; RV64I-NEXT:    addiw a2, a2, -256
863; RV64I-NEXT:    and a1, a1, a2
864; RV64I-NEXT:    or a1, a1, a3
865; RV64I-NEXT:    srli a3, a0, 8
866; RV64I-NEXT:    and a4, a4, a5
867; RV64I-NEXT:    srliw a3, a3, 24
868; RV64I-NEXT:    slli a3, a3, 24
869; RV64I-NEXT:    or a3, a3, a4
870; RV64I-NEXT:    srliw a4, a0, 24
871; RV64I-NEXT:    and a5, a0, a5
872; RV64I-NEXT:    and a2, a0, a2
873; RV64I-NEXT:    slli a0, a0, 56
874; RV64I-NEXT:    slli a4, a4, 32
875; RV64I-NEXT:    slli a5, a5, 24
876; RV64I-NEXT:    or a4, a5, a4
877; RV64I-NEXT:    slli a2, a2, 40
878; RV64I-NEXT:    or a1, a3, a1
879; RV64I-NEXT:    or a0, a0, a2
880; RV64I-NEXT:    or a0, a0, a4
881; RV64I-NEXT:    or a0, a0, a1
882; RV64I-NEXT:    ret
883;
884; RV64XTHEADBB-LABEL: bswap_i64:
885; RV64XTHEADBB:       # %bb.0:
886; RV64XTHEADBB-NEXT:    th.rev a0, a0
887; RV64XTHEADBB-NEXT:    ret
888  %1 = call i64 @llvm.bswap.i64(i64 %a)
889  ret i64 %1
890}
891