xref: /llvm-project/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll (revision d624b9217d35740051e91066fd1d59bff201ee6a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3; RUN:   | FileCheck %s -check-prefixes=RV32
4; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
5; RUN:   | FileCheck %s -check-prefixes=RV64
6
7;Copy tests from llvm/tests/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
8;to test shouldFormOverflowOp on RISCV
9
10define i64 @uaddo1_overflow_used(i64 %a, i64 %b) nounwind ssp {
11; RV32-LABEL: uaddo1_overflow_used:
12; RV32:       # %bb.0:
13; RV32-NEXT:    add a5, a3, a1
14; RV32-NEXT:    add a4, a2, a0
15; RV32-NEXT:    sltu a6, a4, a2
16; RV32-NEXT:    add a5, a5, a6
17; RV32-NEXT:    beq a5, a1, .LBB0_2
18; RV32-NEXT:  # %bb.1:
19; RV32-NEXT:    sltu a0, a5, a1
20; RV32-NEXT:    beqz a0, .LBB0_3
21; RV32-NEXT:    j .LBB0_4
22; RV32-NEXT:  .LBB0_2:
23; RV32-NEXT:    sltu a0, a4, a0
24; RV32-NEXT:    bnez a0, .LBB0_4
25; RV32-NEXT:  .LBB0_3:
26; RV32-NEXT:    li a2, 42
27; RV32-NEXT:  .LBB0_4:
28; RV32-NEXT:    neg a1, a0
29; RV32-NEXT:    and a1, a1, a3
30; RV32-NEXT:    mv a0, a2
31; RV32-NEXT:    ret
32;
33; RV64-LABEL: uaddo1_overflow_used:
34; RV64:       # %bb.0:
35; RV64-NEXT:    add a2, a1, a0
36; RV64-NEXT:    bltu a2, a0, .LBB0_2
37; RV64-NEXT:  # %bb.1:
38; RV64-NEXT:    li a1, 42
39; RV64-NEXT:  .LBB0_2:
40; RV64-NEXT:    mv a0, a1
41; RV64-NEXT:    ret
42  %add = add i64 %b, %a
43  %cmp = icmp ult i64 %add, %a
44  %Q = select i1 %cmp, i64 %b, i64 42
45  ret i64 %Q
46}
47
48define i64 @uaddo1_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
49; RV32-LABEL: uaddo1_math_overflow_used:
50; RV32:       # %bb.0:
51; RV32-NEXT:    add a5, a3, a1
52; RV32-NEXT:    add a0, a2, a0
53; RV32-NEXT:    sltu a1, a0, a2
54; RV32-NEXT:    add a5, a5, a1
55; RV32-NEXT:    beq a5, a3, .LBB1_2
56; RV32-NEXT:  # %bb.1:
57; RV32-NEXT:    sltu a1, a5, a3
58; RV32-NEXT:  .LBB1_2:
59; RV32-NEXT:    bnez a1, .LBB1_4
60; RV32-NEXT:  # %bb.3:
61; RV32-NEXT:    li a2, 42
62; RV32-NEXT:  .LBB1_4:
63; RV32-NEXT:    neg a1, a1
64; RV32-NEXT:    and a1, a1, a3
65; RV32-NEXT:    sw a0, 0(a4)
66; RV32-NEXT:    sw a5, 4(a4)
67; RV32-NEXT:    mv a0, a2
68; RV32-NEXT:    ret
69;
70; RV64-LABEL: uaddo1_math_overflow_used:
71; RV64:       # %bb.0:
72; RV64-NEXT:    add a0, a1, a0
73; RV64-NEXT:    bltu a0, a1, .LBB1_2
74; RV64-NEXT:  # %bb.1:
75; RV64-NEXT:    li a1, 42
76; RV64-NEXT:  .LBB1_2:
77; RV64-NEXT:    sd a0, 0(a2)
78; RV64-NEXT:    mv a0, a1
79; RV64-NEXT:    ret
80  %add = add i64 %b, %a
81  %cmp = icmp ult i64 %add, %a
82  %Q = select i1 %cmp, i64 %b, i64 42
83  store i64 %add, ptr %res
84  ret i64 %Q
85}
86
87define i64 @uaddo2_overflow_used(i64 %a, i64 %b) nounwind ssp {
88; RV32-LABEL: uaddo2_overflow_used:
89; RV32:       # %bb.0:
90; RV32-NEXT:    add a1, a3, a1
91; RV32-NEXT:    add a0, a2, a0
92; RV32-NEXT:    sltu a0, a0, a2
93; RV32-NEXT:    add a1, a1, a0
94; RV32-NEXT:    beq a1, a3, .LBB2_2
95; RV32-NEXT:  # %bb.1:
96; RV32-NEXT:    sltu a0, a1, a3
97; RV32-NEXT:  .LBB2_2:
98; RV32-NEXT:    bnez a0, .LBB2_4
99; RV32-NEXT:  # %bb.3:
100; RV32-NEXT:    li a2, 42
101; RV32-NEXT:  .LBB2_4:
102; RV32-NEXT:    neg a1, a0
103; RV32-NEXT:    and a1, a1, a3
104; RV32-NEXT:    mv a0, a2
105; RV32-NEXT:    ret
106;
107; RV64-LABEL: uaddo2_overflow_used:
108; RV64:       # %bb.0:
109; RV64-NEXT:    add a0, a1, a0
110; RV64-NEXT:    bltu a0, a1, .LBB2_2
111; RV64-NEXT:  # %bb.1:
112; RV64-NEXT:    li a1, 42
113; RV64-NEXT:  .LBB2_2:
114; RV64-NEXT:    mv a0, a1
115; RV64-NEXT:    ret
116  %add = add i64 %b, %a
117  %cmp = icmp ult i64 %add, %b
118  %Q = select i1 %cmp, i64 %b, i64 42
119  ret i64 %Q
120}
121
122define i64 @uaddo2_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
123; RV32-LABEL: uaddo2_math_overflow_used:
124; RV32:       # %bb.0:
125; RV32-NEXT:    add a5, a3, a1
126; RV32-NEXT:    add a0, a2, a0
127; RV32-NEXT:    sltu a1, a0, a2
128; RV32-NEXT:    add a5, a5, a1
129; RV32-NEXT:    beq a5, a3, .LBB3_2
130; RV32-NEXT:  # %bb.1:
131; RV32-NEXT:    sltu a1, a5, a3
132; RV32-NEXT:  .LBB3_2:
133; RV32-NEXT:    bnez a1, .LBB3_4
134; RV32-NEXT:  # %bb.3:
135; RV32-NEXT:    li a2, 42
136; RV32-NEXT:  .LBB3_4:
137; RV32-NEXT:    neg a1, a1
138; RV32-NEXT:    and a1, a1, a3
139; RV32-NEXT:    sw a0, 0(a4)
140; RV32-NEXT:    sw a5, 4(a4)
141; RV32-NEXT:    mv a0, a2
142; RV32-NEXT:    ret
143;
144; RV64-LABEL: uaddo2_math_overflow_used:
145; RV64:       # %bb.0:
146; RV64-NEXT:    add a0, a1, a0
147; RV64-NEXT:    bltu a0, a1, .LBB3_2
148; RV64-NEXT:  # %bb.1:
149; RV64-NEXT:    li a1, 42
150; RV64-NEXT:  .LBB3_2:
151; RV64-NEXT:    sd a0, 0(a2)
152; RV64-NEXT:    mv a0, a1
153; RV64-NEXT:    ret
154  %add = add i64 %b, %a
155  %cmp = icmp ult i64 %add, %b
156  %Q = select i1 %cmp, i64 %b, i64 42
157  store i64 %add, ptr %res
158  ret i64 %Q
159}
160
161define i64 @uaddo3_overflow_used(i64 %a, i64 %b) nounwind ssp {
162; RV32-LABEL: uaddo3_overflow_used:
163; RV32:       # %bb.0:
164; RV32-NEXT:    add a1, a3, a1
165; RV32-NEXT:    add a0, a2, a0
166; RV32-NEXT:    sltu a0, a0, a2
167; RV32-NEXT:    add a1, a1, a0
168; RV32-NEXT:    beq a3, a1, .LBB4_2
169; RV32-NEXT:  # %bb.1:
170; RV32-NEXT:    sltu a0, a1, a3
171; RV32-NEXT:  .LBB4_2:
172; RV32-NEXT:    bnez a0, .LBB4_4
173; RV32-NEXT:  # %bb.3:
174; RV32-NEXT:    li a2, 42
175; RV32-NEXT:  .LBB4_4:
176; RV32-NEXT:    neg a1, a0
177; RV32-NEXT:    and a1, a1, a3
178; RV32-NEXT:    mv a0, a2
179; RV32-NEXT:    ret
180;
181; RV64-LABEL: uaddo3_overflow_used:
182; RV64:       # %bb.0:
183; RV64-NEXT:    add a0, a1, a0
184; RV64-NEXT:    bltu a0, a1, .LBB4_2
185; RV64-NEXT:  # %bb.1:
186; RV64-NEXT:    li a1, 42
187; RV64-NEXT:  .LBB4_2:
188; RV64-NEXT:    mv a0, a1
189; RV64-NEXT:    ret
190  %add = add i64 %b, %a
191  %cmp = icmp ugt i64 %b, %add
192  %Q = select i1 %cmp, i64 %b, i64 42
193  ret i64 %Q
194}
195
196define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
197; RV32-LABEL: uaddo3_math_overflow_used:
198; RV32:       # %bb.0:
199; RV32-NEXT:    add a5, a3, a1
200; RV32-NEXT:    add a0, a2, a0
201; RV32-NEXT:    sltu a1, a0, a2
202; RV32-NEXT:    add a5, a5, a1
203; RV32-NEXT:    beq a5, a3, .LBB5_2
204; RV32-NEXT:  # %bb.1:
205; RV32-NEXT:    sltu a1, a5, a3
206; RV32-NEXT:  .LBB5_2:
207; RV32-NEXT:    bnez a1, .LBB5_4
208; RV32-NEXT:  # %bb.3:
209; RV32-NEXT:    li a2, 42
210; RV32-NEXT:  .LBB5_4:
211; RV32-NEXT:    neg a1, a1
212; RV32-NEXT:    and a1, a1, a3
213; RV32-NEXT:    sw a0, 0(a4)
214; RV32-NEXT:    sw a5, 4(a4)
215; RV32-NEXT:    mv a0, a2
216; RV32-NEXT:    ret
217;
218; RV64-LABEL: uaddo3_math_overflow_used:
219; RV64:       # %bb.0:
220; RV64-NEXT:    add a0, a1, a0
221; RV64-NEXT:    bltu a0, a1, .LBB5_2
222; RV64-NEXT:  # %bb.1:
223; RV64-NEXT:    li a1, 42
224; RV64-NEXT:  .LBB5_2:
225; RV64-NEXT:    sd a0, 0(a2)
226; RV64-NEXT:    mv a0, a1
227; RV64-NEXT:    ret
228  %add = add i64 %b, %a
229  %cmp = icmp ugt i64 %b, %add
230  %Q = select i1 %cmp, i64 %b, i64 42
231  store i64 %add, ptr %res
232  ret i64 %Q
233}
234
235; TODO? CGP sinks the compare before we have a chance to form the overflow intrinsic.
236
237define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp {
238; RV32-LABEL: uaddo4:
239; RV32:       # %bb.0: # %entry
240; RV32-NEXT:    andi a4, a4, 1
241; RV32-NEXT:    beqz a4, .LBB6_6
242; RV32-NEXT:  # %bb.1: # %next
243; RV32-NEXT:    add a1, a3, a1
244; RV32-NEXT:    add a0, a2, a0
245; RV32-NEXT:    sltu a0, a0, a2
246; RV32-NEXT:    add a1, a1, a0
247; RV32-NEXT:    beq a3, a1, .LBB6_3
248; RV32-NEXT:  # %bb.2: # %next
249; RV32-NEXT:    sltu a0, a1, a3
250; RV32-NEXT:  .LBB6_3: # %next
251; RV32-NEXT:    bnez a0, .LBB6_5
252; RV32-NEXT:  # %bb.4: # %next
253; RV32-NEXT:    li a2, 42
254; RV32-NEXT:  .LBB6_5: # %next
255; RV32-NEXT:    neg a1, a0
256; RV32-NEXT:    and a1, a1, a3
257; RV32-NEXT:    mv a0, a2
258; RV32-NEXT:    ret
259; RV32-NEXT:  .LBB6_6: # %exit
260; RV32-NEXT:    li a0, 0
261; RV32-NEXT:    li a1, 0
262; RV32-NEXT:    ret
263;
264; RV64-LABEL: uaddo4:
265; RV64:       # %bb.0: # %entry
266; RV64-NEXT:    andi a2, a2, 1
267; RV64-NEXT:    beqz a2, .LBB6_4
268; RV64-NEXT:  # %bb.1: # %next
269; RV64-NEXT:    add a0, a1, a0
270; RV64-NEXT:    bltu a0, a1, .LBB6_3
271; RV64-NEXT:  # %bb.2: # %next
272; RV64-NEXT:    li a1, 42
273; RV64-NEXT:  .LBB6_3: # %next
274; RV64-NEXT:    mv a0, a1
275; RV64-NEXT:    ret
276; RV64-NEXT:  .LBB6_4: # %exit
277; RV64-NEXT:    li a0, 0
278; RV64-NEXT:    ret
279entry:
280  %add = add i64 %b, %a
281  %cmp = icmp ugt i64 %b, %add
282  br i1 %c, label %next, label %exit
283
284next:
285  %Q = select i1 %cmp, i64 %b, i64 42
286  ret i64 %Q
287
288exit:
289  ret i64 0
290}
291
292define i64 @uaddo5(i64 %a, i64 %b, ptr %ptr, i1 %c) nounwind ssp {
293; RV32-LABEL: uaddo5:
294; RV32:       # %bb.0: # %entry
295; RV32-NEXT:    andi a5, a5, 1
296; RV32-NEXT:    add a1, a3, a1
297; RV32-NEXT:    add a6, a2, a0
298; RV32-NEXT:    sltu a0, a6, a2
299; RV32-NEXT:    add a1, a1, a0
300; RV32-NEXT:    sw a6, 0(a4)
301; RV32-NEXT:    sw a1, 4(a4)
302; RV32-NEXT:    beqz a5, .LBB7_6
303; RV32-NEXT:  # %bb.1: # %next
304; RV32-NEXT:    beq a3, a1, .LBB7_3
305; RV32-NEXT:  # %bb.2: # %next
306; RV32-NEXT:    sltu a0, a1, a3
307; RV32-NEXT:  .LBB7_3: # %next
308; RV32-NEXT:    bnez a0, .LBB7_5
309; RV32-NEXT:  # %bb.4: # %next
310; RV32-NEXT:    li a2, 42
311; RV32-NEXT:  .LBB7_5: # %next
312; RV32-NEXT:    neg a1, a0
313; RV32-NEXT:    and a1, a1, a3
314; RV32-NEXT:    mv a0, a2
315; RV32-NEXT:    ret
316; RV32-NEXT:  .LBB7_6: # %exit
317; RV32-NEXT:    li a0, 0
318; RV32-NEXT:    li a1, 0
319; RV32-NEXT:    ret
320;
321; RV64-LABEL: uaddo5:
322; RV64:       # %bb.0: # %entry
323; RV64-NEXT:    andi a3, a3, 1
324; RV64-NEXT:    add a0, a1, a0
325; RV64-NEXT:    sd a0, 0(a2)
326; RV64-NEXT:    beqz a3, .LBB7_4
327; RV64-NEXT:  # %bb.1: # %next
328; RV64-NEXT:    bltu a0, a1, .LBB7_3
329; RV64-NEXT:  # %bb.2: # %next
330; RV64-NEXT:    li a1, 42
331; RV64-NEXT:  .LBB7_3: # %next
332; RV64-NEXT:    mv a0, a1
333; RV64-NEXT:    ret
334; RV64-NEXT:  .LBB7_4: # %exit
335; RV64-NEXT:    li a0, 0
336; RV64-NEXT:    ret
337entry:
338  %add = add i64 %b, %a
339  store i64 %add, ptr %ptr
340  %cmp = icmp ugt i64 %b, %add
341  br i1 %c, label %next, label %exit
342
343next:
344  %Q = select i1 %cmp, i64 %b, i64 42
345  ret i64 %Q
346
347exit:
348  ret i64 0
349}
350
351; Instcombine folds (a + b <u a)  to (a ^ -1 <u b). Make sure we match this
352; pattern as well.
353define i64 @uaddo6_xor(i64 %a, i64 %b) {
354; RV32-LABEL: uaddo6_xor:
355; RV32:       # %bb.0:
356; RV32-NEXT:    not a1, a1
357; RV32-NEXT:    beq a1, a3, .LBB8_2
358; RV32-NEXT:  # %bb.1:
359; RV32-NEXT:    sltu a0, a1, a3
360; RV32-NEXT:    beqz a0, .LBB8_3
361; RV32-NEXT:    j .LBB8_4
362; RV32-NEXT:  .LBB8_2:
363; RV32-NEXT:    not a0, a0
364; RV32-NEXT:    sltu a0, a0, a2
365; RV32-NEXT:    bnez a0, .LBB8_4
366; RV32-NEXT:  .LBB8_3:
367; RV32-NEXT:    li a2, 42
368; RV32-NEXT:  .LBB8_4:
369; RV32-NEXT:    neg a1, a0
370; RV32-NEXT:    and a1, a1, a3
371; RV32-NEXT:    mv a0, a2
372; RV32-NEXT:    ret
373;
374; RV64-LABEL: uaddo6_xor:
375; RV64:       # %bb.0:
376; RV64-NEXT:    not a2, a0
377; RV64-NEXT:    mv a0, a1
378; RV64-NEXT:    bltu a2, a1, .LBB8_2
379; RV64-NEXT:  # %bb.1:
380; RV64-NEXT:    li a0, 42
381; RV64-NEXT:  .LBB8_2:
382; RV64-NEXT:    ret
383  %x = xor i64 %a, -1
384  %cmp = icmp ult i64 %x, %b
385  %Q = select i1 %cmp, i64 %b, i64 42
386  ret i64 %Q
387}
388
389define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) {
390; RV32-LABEL: uaddo6_xor_commuted:
391; RV32:       # %bb.0:
392; RV32-NEXT:    not a1, a1
393; RV32-NEXT:    beq a1, a3, .LBB9_2
394; RV32-NEXT:  # %bb.1:
395; RV32-NEXT:    sltu a0, a1, a3
396; RV32-NEXT:    beqz a0, .LBB9_3
397; RV32-NEXT:    j .LBB9_4
398; RV32-NEXT:  .LBB9_2:
399; RV32-NEXT:    not a0, a0
400; RV32-NEXT:    sltu a0, a0, a2
401; RV32-NEXT:    bnez a0, .LBB9_4
402; RV32-NEXT:  .LBB9_3:
403; RV32-NEXT:    li a2, 42
404; RV32-NEXT:  .LBB9_4:
405; RV32-NEXT:    neg a1, a0
406; RV32-NEXT:    and a1, a1, a3
407; RV32-NEXT:    mv a0, a2
408; RV32-NEXT:    ret
409;
410; RV64-LABEL: uaddo6_xor_commuted:
411; RV64:       # %bb.0:
412; RV64-NEXT:    not a2, a0
413; RV64-NEXT:    mv a0, a1
414; RV64-NEXT:    bltu a2, a1, .LBB9_2
415; RV64-NEXT:  # %bb.1:
416; RV64-NEXT:    li a0, 42
417; RV64-NEXT:  .LBB9_2:
418; RV64-NEXT:    ret
419  %x = xor i64 %a, -1
420  %cmp = icmp ult i64 %x, %b
421  %Q = select i1 %cmp, i64 %b, i64 42
422  ret i64 %Q
423}
424
425declare void @use(i64)
426
427define i64 @uaddo6_xor_multi_use(i64 %a, i64 %b) {
428; RV32-LABEL: uaddo6_xor_multi_use:
429; RV32:       # %bb.0:
430; RV32-NEXT:    addi sp, sp, -16
431; RV32-NEXT:    .cfi_def_cfa_offset 16
432; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
433; RV32-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
434; RV32-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
435; RV32-NEXT:    .cfi_offset ra, -4
436; RV32-NEXT:    .cfi_offset s0, -8
437; RV32-NEXT:    .cfi_offset s1, -12
438; RV32-NEXT:    mv s0, a2
439; RV32-NEXT:    not a1, a1
440; RV32-NEXT:    not a0, a0
441; RV32-NEXT:    beq a1, a3, .LBB10_2
442; RV32-NEXT:  # %bb.1:
443; RV32-NEXT:    sltu a2, a1, a3
444; RV32-NEXT:    beqz a2, .LBB10_3
445; RV32-NEXT:    j .LBB10_4
446; RV32-NEXT:  .LBB10_2:
447; RV32-NEXT:    sltu a2, a0, s0
448; RV32-NEXT:    bnez a2, .LBB10_4
449; RV32-NEXT:  .LBB10_3:
450; RV32-NEXT:    li s0, 42
451; RV32-NEXT:  .LBB10_4:
452; RV32-NEXT:    neg s1, a2
453; RV32-NEXT:    and s1, s1, a3
454; RV32-NEXT:    call use@plt
455; RV32-NEXT:    mv a0, s0
456; RV32-NEXT:    mv a1, s1
457; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
458; RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
459; RV32-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
460; RV32-NEXT:    addi sp, sp, 16
461; RV32-NEXT:    ret
462;
463; RV64-LABEL: uaddo6_xor_multi_use:
464; RV64:       # %bb.0:
465; RV64-NEXT:    addi sp, sp, -16
466; RV64-NEXT:    .cfi_def_cfa_offset 16
467; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
468; RV64-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
469; RV64-NEXT:    .cfi_offset ra, -8
470; RV64-NEXT:    .cfi_offset s0, -16
471; RV64-NEXT:    not a0, a0
472; RV64-NEXT:    mv s0, a1
473; RV64-NEXT:    bltu a0, a1, .LBB10_2
474; RV64-NEXT:  # %bb.1:
475; RV64-NEXT:    li s0, 42
476; RV64-NEXT:  .LBB10_2:
477; RV64-NEXT:    call use@plt
478; RV64-NEXT:    mv a0, s0
479; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
480; RV64-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
481; RV64-NEXT:    addi sp, sp, 16
482; RV64-NEXT:    ret
483  %x = xor i64 -1, %a
484  %cmp = icmp ult i64 %x, %b
485  %Q = select i1 %cmp, i64 %b, i64 42
486  call void @use(i64 %x)
487  ret i64 %Q
488}
489
490; Make sure we do not use the XOR binary operator as insert point, as it may
491; come before the second operand of the overflow intrinsic.
492define i1 @uaddo6_xor_op_after_XOR(i32 %a, ptr %b.ptr) {
493; RV32-LABEL: uaddo6_xor_op_after_XOR:
494; RV32:       # %bb.0:
495; RV32-NEXT:    lw a1, 0(a1)
496; RV32-NEXT:    not a0, a0
497; RV32-NEXT:    sltu a0, a0, a1
498; RV32-NEXT:    xori a0, a0, 1
499; RV32-NEXT:    ret
500;
501; RV64-LABEL: uaddo6_xor_op_after_XOR:
502; RV64:       # %bb.0:
503; RV64-NEXT:    lw a1, 0(a1)
504; RV64-NEXT:    not a0, a0
505; RV64-NEXT:    sext.w a0, a0
506; RV64-NEXT:    sltu a0, a0, a1
507; RV64-NEXT:    xori a0, a0, 1
508; RV64-NEXT:    ret
509  %x = xor i32 %a, -1
510  %b = load i32, ptr %b.ptr, align 8
511  %cmp14 = icmp ugt i32 %b, %x
512  %ov = xor i1 %cmp14, true
513  ret i1 %ov
514}
515
516; When adding 1, the general pattern for add-overflow may be different due to icmp canonicalization.
517; PR31754: https://bugs.llvm.org/show_bug.cgi?id=31754
518
519define i1 @uaddo_i64_increment(i64 %x, ptr %p) {
520; RV32-LABEL: uaddo_i64_increment:
521; RV32:       # %bb.0:
522; RV32-NEXT:    mv a3, a0
523; RV32-NEXT:    addi a4, a0, 1
524; RV32-NEXT:    sltu a0, a4, a0
525; RV32-NEXT:    add a5, a1, a0
526; RV32-NEXT:    bgeu a4, a3, .LBB12_2
527; RV32-NEXT:  # %bb.1:
528; RV32-NEXT:    sltu a0, a5, a1
529; RV32-NEXT:  .LBB12_2:
530; RV32-NEXT:    sw a4, 0(a2)
531; RV32-NEXT:    sw a5, 4(a2)
532; RV32-NEXT:    ret
533;
534; RV64-LABEL: uaddo_i64_increment:
535; RV64:       # %bb.0:
536; RV64-NEXT:    addi a2, a0, 1
537; RV64-NEXT:    seqz a0, a2
538; RV64-NEXT:    sd a2, 0(a1)
539; RV64-NEXT:    ret
540  %a = add i64 %x, 1
541  %ov = icmp eq i64 %a, 0
542  store i64 %a, ptr %p
543  ret i1 %ov
544}
545
546define i1 @uaddo_i8_increment_noncanonical_1(i8 %x, ptr %p) {
547; RV32-LABEL: uaddo_i8_increment_noncanonical_1:
548; RV32:       # %bb.0:
549; RV32-NEXT:    andi a0, a0, 255
550; RV32-NEXT:    addi a2, a0, 1
551; RV32-NEXT:    andi a0, a2, 255
552; RV32-NEXT:    xor a0, a0, a2
553; RV32-NEXT:    snez a0, a0
554; RV32-NEXT:    sb a2, 0(a1)
555; RV32-NEXT:    ret
556;
557; RV64-LABEL: uaddo_i8_increment_noncanonical_1:
558; RV64:       # %bb.0:
559; RV64-NEXT:    andi a0, a0, 255
560; RV64-NEXT:    addi a2, a0, 1
561; RV64-NEXT:    andi a0, a2, 255
562; RV64-NEXT:    xor a0, a0, a2
563; RV64-NEXT:    snez a0, a0
564; RV64-NEXT:    sb a2, 0(a1)
565; RV64-NEXT:    ret
566  %a = add i8 1, %x        ; commute
567  %ov = icmp eq i8 %a, 0
568  store i8 %a, ptr %p
569  ret i1 %ov
570}
571
572define i1 @uaddo_i32_increment_noncanonical_2(i32 %x, ptr %p) {
573; RV32-LABEL: uaddo_i32_increment_noncanonical_2:
574; RV32:       # %bb.0:
575; RV32-NEXT:    addi a2, a0, 1
576; RV32-NEXT:    seqz a0, a2
577; RV32-NEXT:    sw a2, 0(a1)
578; RV32-NEXT:    ret
579;
580; RV64-LABEL: uaddo_i32_increment_noncanonical_2:
581; RV64:       # %bb.0:
582; RV64-NEXT:    addiw a2, a0, 1
583; RV64-NEXT:    seqz a0, a2
584; RV64-NEXT:    sw a2, 0(a1)
585; RV64-NEXT:    ret
586  %a = add i32 %x, 1
587  %ov = icmp eq i32 0, %a   ; commute
588  store i32 %a, ptr %p
589  ret i1 %ov
590}
591
592define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, ptr %p) {
593; RV32-LABEL: uaddo_i16_increment_noncanonical_3:
594; RV32:       # %bb.0:
595; RV32-NEXT:    lui a2, 16
596; RV32-NEXT:    addi a2, a2, -1
597; RV32-NEXT:    and a0, a0, a2
598; RV32-NEXT:    addi a3, a0, 1
599; RV32-NEXT:    and a2, a3, a2
600; RV32-NEXT:    xor a2, a2, a3
601; RV32-NEXT:    snez a0, a2
602; RV32-NEXT:    sh a3, 0(a1)
603; RV32-NEXT:    ret
604;
605; RV64-LABEL: uaddo_i16_increment_noncanonical_3:
606; RV64:       # %bb.0:
607; RV64-NEXT:    lui a2, 16
608; RV64-NEXT:    addiw a2, a2, -1
609; RV64-NEXT:    and a0, a0, a2
610; RV64-NEXT:    addi a3, a0, 1
611; RV64-NEXT:    and a2, a3, a2
612; RV64-NEXT:    xor a2, a2, a3
613; RV64-NEXT:    snez a0, a2
614; RV64-NEXT:    sh a3, 0(a1)
615; RV64-NEXT:    ret
616  %a = add i16 1, %x        ; commute
617  %ov = icmp eq i16 0, %a   ; commute
618  store i16 %a, ptr %p
619  ret i1 %ov
620}
621
622; The overflow check may be against the input rather than the sum.
623
624define i1 @uaddo_i64_increment_alt(i64 %x, ptr %p) {
625; RV32-LABEL: uaddo_i64_increment_alt:
626; RV32:       # %bb.0:
627; RV32-NEXT:    addi a3, a0, 1
628; RV32-NEXT:    sltu a4, a3, a0
629; RV32-NEXT:    add a4, a1, a4
630; RV32-NEXT:    sw a3, 0(a2)
631; RV32-NEXT:    and a0, a0, a1
632; RV32-NEXT:    addi a0, a0, 1
633; RV32-NEXT:    seqz a0, a0
634; RV32-NEXT:    sw a4, 4(a2)
635; RV32-NEXT:    ret
636;
637; RV64-LABEL: uaddo_i64_increment_alt:
638; RV64:       # %bb.0:
639; RV64-NEXT:    addi a2, a0, 1
640; RV64-NEXT:    seqz a0, a2
641; RV64-NEXT:    sd a2, 0(a1)
642; RV64-NEXT:    ret
643  %a = add i64 %x, 1
644  store i64 %a, ptr %p
645  %ov = icmp eq i64 %x, -1
646  ret i1 %ov
647}
648
649; Make sure insertion is done correctly based on dominance.
650
651define i1 @uaddo_i64_increment_alt_dom(i64 %x, ptr %p) {
652; RV32-LABEL: uaddo_i64_increment_alt_dom:
653; RV32:       # %bb.0:
654; RV32-NEXT:    and a3, a0, a1
655; RV32-NEXT:    addi a3, a3, 1
656; RV32-NEXT:    seqz a3, a3
657; RV32-NEXT:    addi a4, a0, 1
658; RV32-NEXT:    sltu a0, a4, a0
659; RV32-NEXT:    add a0, a1, a0
660; RV32-NEXT:    sw a4, 0(a2)
661; RV32-NEXT:    sw a0, 4(a2)
662; RV32-NEXT:    mv a0, a3
663; RV32-NEXT:    ret
664;
665; RV64-LABEL: uaddo_i64_increment_alt_dom:
666; RV64:       # %bb.0:
667; RV64-NEXT:    addi a2, a0, 1
668; RV64-NEXT:    seqz a0, a2
669; RV64-NEXT:    sd a2, 0(a1)
670; RV64-NEXT:    ret
671  %ov = icmp eq i64 %x, -1
672  %a = add i64 %x, 1
673  store i64 %a, ptr %p
674  ret i1 %ov
675}
676
677; The overflow check may be against the input rather than the sum.
678
679define i1 @uaddo_i64_decrement_alt(i64 %x, ptr %p) {
680; RV32-LABEL: uaddo_i64_decrement_alt:
681; RV32:       # %bb.0:
682; RV32-NEXT:    addi a3, a0, -1
683; RV32-NEXT:    sltu a4, a3, a0
684; RV32-NEXT:    add a4, a1, a4
685; RV32-NEXT:    addi a4, a4, -1
686; RV32-NEXT:    sw a3, 0(a2)
687; RV32-NEXT:    or a0, a0, a1
688; RV32-NEXT:    snez a0, a0
689; RV32-NEXT:    sw a4, 4(a2)
690; RV32-NEXT:    ret
691;
692; RV64-LABEL: uaddo_i64_decrement_alt:
693; RV64:       # %bb.0:
694; RV64-NEXT:    addi a2, a0, -1
695; RV64-NEXT:    snez a0, a0
696; RV64-NEXT:    sd a2, 0(a1)
697; RV64-NEXT:    ret
698  %a = add i64 %x, -1
699  store i64 %a, ptr %p
700  %ov = icmp ne i64 %x, 0
701  ret i1 %ov
702}
703
704; Make sure insertion is done correctly based on dominance.
705
706define i1 @uaddo_i64_decrement_alt_dom(i64 %x, ptr %p) {
707; RV32-LABEL: uaddo_i64_decrement_alt_dom:
708; RV32:       # %bb.0:
709; RV32-NEXT:    or a3, a0, a1
710; RV32-NEXT:    snez a3, a3
711; RV32-NEXT:    addi a4, a0, -1
712; RV32-NEXT:    sltu a0, a4, a0
713; RV32-NEXT:    add a0, a1, a0
714; RV32-NEXT:    addi a0, a0, -1
715; RV32-NEXT:    sw a4, 0(a2)
716; RV32-NEXT:    sw a0, 4(a2)
717; RV32-NEXT:    mv a0, a3
718; RV32-NEXT:    ret
719;
720; RV64-LABEL: uaddo_i64_decrement_alt_dom:
721; RV64:       # %bb.0:
722; RV64-NEXT:    snez a2, a0
723; RV64-NEXT:    addi a0, a0, -1
724; RV64-NEXT:    sd a0, 0(a1)
725; RV64-NEXT:    mv a0, a2
726; RV64-NEXT:    ret
727  %ov = icmp ne i64 %x, 0
728  %a = add i64 %x, -1
729  store i64 %a, ptr %p
730  ret i1 %ov
731}
732
733; No transform for illegal types.
734
735define i1 @uaddo_i42_increment_illegal_type(i42 %x, ptr %p) {
736; RV32-LABEL: uaddo_i42_increment_illegal_type:
737; RV32:       # %bb.0:
738; RV32-NEXT:    addi a3, a0, 1
739; RV32-NEXT:    sltu a0, a3, a0
740; RV32-NEXT:    add a0, a1, a0
741; RV32-NEXT:    andi a1, a0, 1023
742; RV32-NEXT:    or a0, a3, a1
743; RV32-NEXT:    seqz a0, a0
744; RV32-NEXT:    sw a3, 0(a2)
745; RV32-NEXT:    sh a1, 4(a2)
746; RV32-NEXT:    ret
747;
748; RV64-LABEL: uaddo_i42_increment_illegal_type:
749; RV64:       # %bb.0:
750; RV64-NEXT:    addi a2, a0, 1
751; RV64-NEXT:    slli a0, a2, 22
752; RV64-NEXT:    srli a3, a0, 22
753; RV64-NEXT:    seqz a0, a3
754; RV64-NEXT:    sw a2, 0(a1)
755; RV64-NEXT:    srli a3, a3, 32
756; RV64-NEXT:    sh a3, 4(a1)
757; RV64-NEXT:    ret
758  %a = add i42 %x, 1
759  %ov = icmp eq i42 %a, 0
760  store i42 %a, ptr %p
761  ret i1 %ov
762}
763
764define i1 @usubo_ult_i64_overflow_used(i64 %x, i64 %y, ptr %p) {
765; RV32-LABEL: usubo_ult_i64_overflow_used:
766; RV32:       # %bb.0:
767; RV32-NEXT:    beq a1, a3, .LBB21_2
768; RV32-NEXT:  # %bb.1:
769; RV32-NEXT:    sltu a0, a1, a3
770; RV32-NEXT:    ret
771; RV32-NEXT:  .LBB21_2:
772; RV32-NEXT:    sltu a0, a0, a2
773; RV32-NEXT:    ret
774;
775; RV64-LABEL: usubo_ult_i64_overflow_used:
776; RV64:       # %bb.0:
777; RV64-NEXT:    sltu a0, a0, a1
778; RV64-NEXT:    ret
779  %s = sub i64 %x, %y
780  %ov = icmp ult i64 %x, %y
781  ret i1 %ov
782}
783
784define i1 @usubo_ult_i64_math_overflow_used(i64 %x, i64 %y, ptr %p) {
785; RV32-LABEL: usubo_ult_i64_math_overflow_used:
786; RV32:       # %bb.0:
787; RV32-NEXT:    mv a5, a0
788; RV32-NEXT:    sltu a0, a0, a2
789; RV32-NEXT:    sub a6, a1, a3
790; RV32-NEXT:    sub a6, a6, a0
791; RV32-NEXT:    sub a5, a5, a2
792; RV32-NEXT:    sw a5, 0(a4)
793; RV32-NEXT:    sw a6, 4(a4)
794; RV32-NEXT:    beq a1, a3, .LBB22_2
795; RV32-NEXT:  # %bb.1:
796; RV32-NEXT:    sltu a0, a1, a3
797; RV32-NEXT:  .LBB22_2:
798; RV32-NEXT:    ret
799;
800; RV64-LABEL: usubo_ult_i64_math_overflow_used:
801; RV64:       # %bb.0:
802; RV64-NEXT:    sub a3, a0, a1
803; RV64-NEXT:    sltu a0, a0, a1
804; RV64-NEXT:    sd a3, 0(a2)
805; RV64-NEXT:    ret
806  %s = sub i64 %x, %y
807  store i64 %s, ptr %p
808  %ov = icmp ult i64 %x, %y
809  ret i1 %ov
810}
811
812; Verify insertion point for single-BB. Toggle predicate.
813
814define i1 @usubo_ugt_i32(i32 %x, i32 %y, ptr %p) {
815; RV32-LABEL: usubo_ugt_i32:
816; RV32:       # %bb.0:
817; RV32-NEXT:    sltu a3, a0, a1
818; RV32-NEXT:    sub a0, a0, a1
819; RV32-NEXT:    sw a0, 0(a2)
820; RV32-NEXT:    mv a0, a3
821; RV32-NEXT:    ret
822;
823; RV64-LABEL: usubo_ugt_i32:
824; RV64:       # %bb.0:
825; RV64-NEXT:    sext.w a3, a1
826; RV64-NEXT:    sext.w a4, a0
827; RV64-NEXT:    sltu a3, a4, a3
828; RV64-NEXT:    subw a0, a0, a1
829; RV64-NEXT:    sw a0, 0(a2)
830; RV64-NEXT:    mv a0, a3
831; RV64-NEXT:    ret
832  %ov = icmp ugt i32 %y, %x
833  %s = sub i32 %x, %y
834  store i32 %s, ptr %p
835  ret i1 %ov
836}
837
838; Constant operand should match.
839
840define i1 @usubo_ugt_constant_op0_i8(i8 %x, ptr %p) {
841; RV32-LABEL: usubo_ugt_constant_op0_i8:
842; RV32:       # %bb.0:
843; RV32-NEXT:    andi a2, a0, 255
844; RV32-NEXT:    li a3, 42
845; RV32-NEXT:    sub a3, a3, a0
846; RV32-NEXT:    sltiu a0, a2, 43
847; RV32-NEXT:    xori a0, a0, 1
848; RV32-NEXT:    sb a3, 0(a1)
849; RV32-NEXT:    ret
850;
851; RV64-LABEL: usubo_ugt_constant_op0_i8:
852; RV64:       # %bb.0:
853; RV64-NEXT:    andi a2, a0, 255
854; RV64-NEXT:    li a3, 42
855; RV64-NEXT:    subw a3, a3, a0
856; RV64-NEXT:    sltiu a0, a2, 43
857; RV64-NEXT:    xori a0, a0, 1
858; RV64-NEXT:    sb a3, 0(a1)
859; RV64-NEXT:    ret
860  %s = sub i8 42, %x
861  %ov = icmp ugt i8 %x, 42
862  store i8 %s, ptr %p
863  ret i1 %ov
864}
865
866; Compare with constant operand 0 is canonicalized by commuting, but verify match for non-canonical form.
867
868define i1 @usubo_ult_constant_op0_i16(i16 %x, ptr %p) {
869; RV32-LABEL: usubo_ult_constant_op0_i16:
870; RV32:       # %bb.0:
871; RV32-NEXT:    slli a2, a0, 16
872; RV32-NEXT:    srli a2, a2, 16
873; RV32-NEXT:    li a3, 43
874; RV32-NEXT:    sub a3, a3, a0
875; RV32-NEXT:    sltiu a0, a2, 44
876; RV32-NEXT:    xori a0, a0, 1
877; RV32-NEXT:    sh a3, 0(a1)
878; RV32-NEXT:    ret
879;
880; RV64-LABEL: usubo_ult_constant_op0_i16:
881; RV64:       # %bb.0:
882; RV64-NEXT:    slli a2, a0, 48
883; RV64-NEXT:    srli a2, a2, 48
884; RV64-NEXT:    li a3, 43
885; RV64-NEXT:    subw a3, a3, a0
886; RV64-NEXT:    sltiu a0, a2, 44
887; RV64-NEXT:    xori a0, a0, 1
888; RV64-NEXT:    sh a3, 0(a1)
889; RV64-NEXT:    ret
890  %s = sub i16 43, %x
891  %ov = icmp ult i16 43, %x
892  store i16 %s, ptr %p
893  ret i1 %ov
894}
895
896; Subtract with constant operand 1 is canonicalized to add.
897
898define i1 @usubo_ult_constant_op1_i16(i16 %x, ptr %p) {
899; RV32-LABEL: usubo_ult_constant_op1_i16:
900; RV32:       # %bb.0:
901; RV32-NEXT:    slli a2, a0, 16
902; RV32-NEXT:    srli a2, a2, 16
903; RV32-NEXT:    addi a3, a0, -44
904; RV32-NEXT:    sltiu a0, a2, 44
905; RV32-NEXT:    sh a3, 0(a1)
906; RV32-NEXT:    ret
907;
908; RV64-LABEL: usubo_ult_constant_op1_i16:
909; RV64:       # %bb.0:
910; RV64-NEXT:    slli a2, a0, 48
911; RV64-NEXT:    srli a2, a2, 48
912; RV64-NEXT:    addiw a3, a0, -44
913; RV64-NEXT:    sltiu a0, a2, 44
914; RV64-NEXT:    sh a3, 0(a1)
915; RV64-NEXT:    ret
916  %s = add i16 %x, -44
917  %ov = icmp ult i16 %x, 44
918  store i16 %s, ptr %p
919  ret i1 %ov
920}
921
922define i1 @usubo_ugt_constant_op1_i8(i8 %x, ptr %p) {
923; RV32-LABEL: usubo_ugt_constant_op1_i8:
924; RV32:       # %bb.0:
925; RV32-NEXT:    andi a2, a0, 255
926; RV32-NEXT:    sltiu a2, a2, 45
927; RV32-NEXT:    addi a0, a0, -45
928; RV32-NEXT:    sb a0, 0(a1)
929; RV32-NEXT:    mv a0, a2
930; RV32-NEXT:    ret
931;
932; RV64-LABEL: usubo_ugt_constant_op1_i8:
933; RV64:       # %bb.0:
934; RV64-NEXT:    andi a2, a0, 255
935; RV64-NEXT:    sltiu a2, a2, 45
936; RV64-NEXT:    addiw a0, a0, -45
937; RV64-NEXT:    sb a0, 0(a1)
938; RV64-NEXT:    mv a0, a2
939; RV64-NEXT:    ret
940  %ov = icmp ugt i8 45, %x
941  %s = add i8 %x, -45
942  store i8 %s, ptr %p
943  ret i1 %ov
944}
945
946; Special-case: subtract 1 changes the compare predicate and constant.
947
948define i1 @usubo_eq_constant1_op1_i32(i32 %x, ptr %p) {
949; RV32-LABEL: usubo_eq_constant1_op1_i32:
950; RV32:       # %bb.0:
951; RV32-NEXT:    addi a2, a0, -1
952; RV32-NEXT:    seqz a0, a0
953; RV32-NEXT:    sw a2, 0(a1)
954; RV32-NEXT:    ret
955;
956; RV64-LABEL: usubo_eq_constant1_op1_i32:
957; RV64:       # %bb.0:
958; RV64-NEXT:    sext.w a2, a0
959; RV64-NEXT:    addiw a3, a0, -1
960; RV64-NEXT:    seqz a0, a2
961; RV64-NEXT:    sw a3, 0(a1)
962; RV64-NEXT:    ret
963  %s = add i32 %x, -1
964  %ov = icmp eq i32 %x, 0
965  store i32 %s, ptr %p
966  ret i1 %ov
967}
968
969; Special-case: subtract from 0 (negate) changes the compare predicate.
970
971define i1 @usubo_ne_constant0_op1_i32(i32 %x, ptr %p) {
972; RV32-LABEL: usubo_ne_constant0_op1_i32:
973; RV32:       # %bb.0:
974; RV32-NEXT:    neg a2, a0
975; RV32-NEXT:    snez a0, a0
976; RV32-NEXT:    sw a2, 0(a1)
977; RV32-NEXT:    ret
978;
979; RV64-LABEL: usubo_ne_constant0_op1_i32:
980; RV64:       # %bb.0:
981; RV64-NEXT:    sext.w a2, a0
982; RV64-NEXT:    negw a3, a0
983; RV64-NEXT:    snez a0, a2
984; RV64-NEXT:    sw a3, 0(a1)
985; RV64-NEXT:    ret
986  %s = sub i32 0, %x
987  %ov = icmp ne i32 %x, 0
988  store i32 %s, ptr %p
989  ret i1 %ov
990}
991
992; This used to verify insertion point for multi-BB, but now we just bail out.
993
994declare void @call(i1)
995
996define i1 @usubo_ult_sub_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) {
997; RV32-LABEL: usubo_ult_sub_dominates_i64:
998; RV32:       # %bb.0: # %entry
999; RV32-NEXT:    andi a7, a5, 1
1000; RV32-NEXT:    beqz a7, .LBB30_5
1001; RV32-NEXT:  # %bb.1: # %t
1002; RV32-NEXT:    mv a6, a0
1003; RV32-NEXT:    sltu a0, a0, a2
1004; RV32-NEXT:    sub t0, a1, a3
1005; RV32-NEXT:    sub t0, t0, a0
1006; RV32-NEXT:    sub a2, a6, a2
1007; RV32-NEXT:    sw a2, 0(a4)
1008; RV32-NEXT:    sw t0, 4(a4)
1009; RV32-NEXT:    beqz a7, .LBB30_5
1010; RV32-NEXT:  # %bb.2: # %end
1011; RV32-NEXT:    beq a1, a3, .LBB30_4
1012; RV32-NEXT:  # %bb.3: # %end
1013; RV32-NEXT:    sltu a0, a1, a3
1014; RV32-NEXT:  .LBB30_4: # %end
1015; RV32-NEXT:    ret
1016; RV32-NEXT:  .LBB30_5: # %f
1017; RV32-NEXT:    mv a0, a5
1018; RV32-NEXT:    ret
1019;
1020; RV64-LABEL: usubo_ult_sub_dominates_i64:
1021; RV64:       # %bb.0: # %entry
1022; RV64-NEXT:    andi a4, a3, 1
1023; RV64-NEXT:    beqz a4, .LBB30_3
1024; RV64-NEXT:  # %bb.1: # %t
1025; RV64-NEXT:    sub a5, a0, a1
1026; RV64-NEXT:    sd a5, 0(a2)
1027; RV64-NEXT:    beqz a4, .LBB30_3
1028; RV64-NEXT:  # %bb.2: # %end
1029; RV64-NEXT:    sltu a0, a0, a1
1030; RV64-NEXT:    ret
1031; RV64-NEXT:  .LBB30_3: # %f
1032; RV64-NEXT:    mv a0, a3
1033; RV64-NEXT:    ret
1034entry:
1035  br i1 %cond, label %t, label %f
1036
1037t:
1038  %s = sub i64 %x, %y
1039  store i64 %s, ptr %p
1040  br i1 %cond, label %end, label %f
1041
1042f:
1043  ret i1 %cond
1044
1045end:
1046  %ov = icmp ult i64 %x, %y
1047  ret i1 %ov
1048}
1049
1050define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) {
1051; RV32-LABEL: usubo_ult_cmp_dominates_i64:
1052; RV32:       # %bb.0: # %entry
1053; RV32-NEXT:    addi sp, sp, -32
1054; RV32-NEXT:    .cfi_def_cfa_offset 32
1055; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
1056; RV32-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
1057; RV32-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
1058; RV32-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
1059; RV32-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
1060; RV32-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
1061; RV32-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
1062; RV32-NEXT:    sw s6, 0(sp) # 4-byte Folded Spill
1063; RV32-NEXT:    .cfi_offset ra, -4
1064; RV32-NEXT:    .cfi_offset s0, -8
1065; RV32-NEXT:    .cfi_offset s1, -12
1066; RV32-NEXT:    .cfi_offset s2, -16
1067; RV32-NEXT:    .cfi_offset s3, -20
1068; RV32-NEXT:    .cfi_offset s4, -24
1069; RV32-NEXT:    .cfi_offset s5, -28
1070; RV32-NEXT:    .cfi_offset s6, -32
1071; RV32-NEXT:    mv s4, a5
1072; RV32-NEXT:    andi a5, a5, 1
1073; RV32-NEXT:    beqz a5, .LBB31_8
1074; RV32-NEXT:  # %bb.1: # %t
1075; RV32-NEXT:    mv s0, a4
1076; RV32-NEXT:    mv s3, a3
1077; RV32-NEXT:    mv s1, a2
1078; RV32-NEXT:    mv s5, a1
1079; RV32-NEXT:    mv s2, a0
1080; RV32-NEXT:    beq a1, a3, .LBB31_3
1081; RV32-NEXT:  # %bb.2: # %t
1082; RV32-NEXT:    sltu s6, s5, s3
1083; RV32-NEXT:    j .LBB31_4
1084; RV32-NEXT:  .LBB31_3:
1085; RV32-NEXT:    sltu s6, s2, s1
1086; RV32-NEXT:  .LBB31_4: # %t
1087; RV32-NEXT:    mv a0, s6
1088; RV32-NEXT:    call call@plt
1089; RV32-NEXT:    beqz s6, .LBB31_8
1090; RV32-NEXT:  # %bb.5: # %end
1091; RV32-NEXT:    sltu a1, s2, s1
1092; RV32-NEXT:    mv a0, a1
1093; RV32-NEXT:    beq s5, s3, .LBB31_7
1094; RV32-NEXT:  # %bb.6: # %end
1095; RV32-NEXT:    sltu a0, s5, s3
1096; RV32-NEXT:  .LBB31_7: # %end
1097; RV32-NEXT:    sub a2, s5, s3
1098; RV32-NEXT:    sub a2, a2, a1
1099; RV32-NEXT:    sub a1, s2, s1
1100; RV32-NEXT:    sw a1, 0(s0)
1101; RV32-NEXT:    sw a2, 4(s0)
1102; RV32-NEXT:    j .LBB31_9
1103; RV32-NEXT:  .LBB31_8: # %f
1104; RV32-NEXT:    mv a0, s4
1105; RV32-NEXT:  .LBB31_9: # %f
1106; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
1107; RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
1108; RV32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
1109; RV32-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
1110; RV32-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
1111; RV32-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
1112; RV32-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
1113; RV32-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
1114; RV32-NEXT:    addi sp, sp, 32
1115; RV32-NEXT:    ret
1116;
1117; RV64-LABEL: usubo_ult_cmp_dominates_i64:
1118; RV64:       # %bb.0: # %entry
1119; RV64-NEXT:    addi sp, sp, -48
1120; RV64-NEXT:    .cfi_def_cfa_offset 48
1121; RV64-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
1122; RV64-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
1123; RV64-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
1124; RV64-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
1125; RV64-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
1126; RV64-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
1127; RV64-NEXT:    .cfi_offset ra, -8
1128; RV64-NEXT:    .cfi_offset s0, -16
1129; RV64-NEXT:    .cfi_offset s1, -24
1130; RV64-NEXT:    .cfi_offset s2, -32
1131; RV64-NEXT:    .cfi_offset s3, -40
1132; RV64-NEXT:    .cfi_offset s4, -48
1133; RV64-NEXT:    mv s0, a3
1134; RV64-NEXT:    andi a3, a3, 1
1135; RV64-NEXT:    beqz a3, .LBB31_3
1136; RV64-NEXT:  # %bb.1: # %t
1137; RV64-NEXT:    mv s1, a2
1138; RV64-NEXT:    mv s2, a1
1139; RV64-NEXT:    mv s3, a0
1140; RV64-NEXT:    sltu s4, a0, a1
1141; RV64-NEXT:    mv a0, s4
1142; RV64-NEXT:    call call@plt
1143; RV64-NEXT:    bgeu s3, s2, .LBB31_3
1144; RV64-NEXT:  # %bb.2: # %end
1145; RV64-NEXT:    sub a0, s3, s2
1146; RV64-NEXT:    sd a0, 0(s1)
1147; RV64-NEXT:    mv a0, s4
1148; RV64-NEXT:    j .LBB31_4
1149; RV64-NEXT:  .LBB31_3: # %f
1150; RV64-NEXT:    mv a0, s0
1151; RV64-NEXT:  .LBB31_4: # %f
1152; RV64-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
1153; RV64-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
1154; RV64-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
1155; RV64-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
1156; RV64-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
1157; RV64-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
1158; RV64-NEXT:    addi sp, sp, 48
1159; RV64-NEXT:    ret
1160entry:
1161  br i1 %cond, label %t, label %f
1162
1163t:
1164  %ov = icmp ult i64 %x, %y
1165  call void @call(i1 %ov)
1166  br i1 %ov, label %end, label %f
1167
1168f:
1169  ret i1 %cond
1170
1171end:
1172  %s = sub i64 %x, %y
1173  store i64 %s, ptr %p
1174  ret i1 %ov
1175}
1176
1177; Verify that crazy/non-canonical code does not crash.
1178
1179define void @bar() {
1180; RV32-LABEL: bar:
1181; RV32:       # %bb.0:
1182;
1183; RV64-LABEL: bar:
1184; RV64:       # %bb.0:
1185  %cmp = icmp eq i64 1, -1
1186  %frombool = zext i1 %cmp to i8
1187  unreachable
1188}
1189
1190define void @foo() {
1191; RV32-LABEL: foo:
1192; RV32:       # %bb.0:
1193;
1194; RV64-LABEL: foo:
1195; RV64:       # %bb.0:
1196  %sub = add nsw i64 1, 1
1197  %conv = trunc i64 %sub to i32
1198  unreachable
1199}
1200
1201; Similarly for usubo.
1202
1203define i1 @bar2() {
1204; RV32-LABEL: bar2:
1205; RV32:       # %bb.0:
1206; RV32-NEXT:    li a0, 0
1207; RV32-NEXT:    ret
1208;
1209; RV64-LABEL: bar2:
1210; RV64:       # %bb.0:
1211; RV64-NEXT:    li a0, 0
1212; RV64-NEXT:    ret
1213  %cmp = icmp eq i64 1, 0
1214  ret i1 %cmp
1215}
1216
1217define i64 @foo2(ptr %p) {
1218; RV32-LABEL: foo2:
1219; RV32:       # %bb.0:
1220; RV32-NEXT:    li a0, 0
1221; RV32-NEXT:    li a1, 0
1222; RV32-NEXT:    ret
1223;
1224; RV64-LABEL: foo2:
1225; RV64:       # %bb.0:
1226; RV64-NEXT:    li a0, 0
1227; RV64-NEXT:    ret
1228  %sub = add nsw i64 1, -1
1229  ret i64 %sub
1230}
1231
1232; Avoid hoisting a math op into a dominating block which would
1233; increase the critical path.
1234
1235define void @PR41129(ptr %p64) {
1236; RV32-LABEL: PR41129:
1237; RV32:       # %bb.0: # %entry
1238; RV32-NEXT:    lw a1, 4(a0)
1239; RV32-NEXT:    lw a2, 0(a0)
1240; RV32-NEXT:    or a3, a2, a1
1241; RV32-NEXT:    beqz a3, .LBB36_2
1242; RV32-NEXT:  # %bb.1: # %false
1243; RV32-NEXT:    andi a2, a2, 7
1244; RV32-NEXT:    sw zero, 4(a0)
1245; RV32-NEXT:    sw a2, 0(a0)
1246; RV32-NEXT:    ret
1247; RV32-NEXT:  .LBB36_2: # %true
1248; RV32-NEXT:    addi a3, a2, -1
1249; RV32-NEXT:    sltu a2, a3, a2
1250; RV32-NEXT:    add a1, a1, a2
1251; RV32-NEXT:    addi a1, a1, -1
1252; RV32-NEXT:    sw a3, 0(a0)
1253; RV32-NEXT:    sw a1, 4(a0)
1254; RV32-NEXT:    ret
1255;
1256; RV64-LABEL: PR41129:
1257; RV64:       # %bb.0: # %entry
1258; RV64-NEXT:    ld a1, 0(a0)
1259; RV64-NEXT:    beqz a1, .LBB36_2
1260; RV64-NEXT:  # %bb.1: # %false
1261; RV64-NEXT:    andi a1, a1, 7
1262; RV64-NEXT:    sd a1, 0(a0)
1263; RV64-NEXT:    ret
1264; RV64-NEXT:  .LBB36_2: # %true
1265; RV64-NEXT:    addi a1, a1, -1
1266; RV64-NEXT:    sd a1, 0(a0)
1267; RV64-NEXT:    ret
1268entry:
1269  %key = load i64, ptr %p64, align 8
1270  %cond17 = icmp eq i64 %key, 0
1271  br i1 %cond17, label %true, label %false
1272
1273false:
1274  %andval = and i64 %key, 7
1275  store i64 %andval, ptr %p64
1276  br label %exit
1277
1278true:
1279  %svalue = add i64 %key, -1
1280  store i64 %svalue, ptr %p64
1281  br label %exit
1282
1283exit:
1284  ret void
1285}
1286
1287