xref: /llvm-project/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3; RUN:   | FileCheck -check-prefix=RV32I %s
4; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
5; RUN:   | FileCheck -check-prefix=RV32IA %s
6; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \
7; RUN:   | FileCheck -check-prefix=RV32IA %s
8; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
9; RUN:   | FileCheck -check-prefix=RV64I %s
10; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
11; RUN:   | FileCheck -check-prefix=RV64IA %s
12; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \
13; RUN:   | FileCheck -check-prefix=RV64IA %s
14
15
16define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
17; RV32I-LABEL: atomicrmw_usub_cond_i8:
18; RV32I:       # %bb.0:
19; RV32I-NEXT:    addi sp, sp, -32
20; RV32I-NEXT:    .cfi_def_cfa_offset 32
21; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
22; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
23; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
24; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
25; RV32I-NEXT:    .cfi_offset ra, -4
26; RV32I-NEXT:    .cfi_offset s0, -8
27; RV32I-NEXT:    .cfi_offset s1, -12
28; RV32I-NEXT:    .cfi_offset s2, -16
29; RV32I-NEXT:    mv s0, a0
30; RV32I-NEXT:    lbu a3, 0(a0)
31; RV32I-NEXT:    mv s1, a1
32; RV32I-NEXT:    andi s2, a1, 255
33; RV32I-NEXT:  .LBB0_1: # %atomicrmw.start
34; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
35; RV32I-NEXT:    andi a0, a3, 255
36; RV32I-NEXT:    sltu a0, a0, s2
37; RV32I-NEXT:    addi a0, a0, -1
38; RV32I-NEXT:    and a0, a0, s1
39; RV32I-NEXT:    sub a2, a3, a0
40; RV32I-NEXT:    sb a3, 15(sp)
41; RV32I-NEXT:    addi a1, sp, 15
42; RV32I-NEXT:    li a3, 5
43; RV32I-NEXT:    li a4, 5
44; RV32I-NEXT:    mv a0, s0
45; RV32I-NEXT:    call __atomic_compare_exchange_1
46; RV32I-NEXT:    lbu a3, 15(sp)
47; RV32I-NEXT:    beqz a0, .LBB0_1
48; RV32I-NEXT:  # %bb.2: # %atomicrmw.end
49; RV32I-NEXT:    mv a0, a3
50; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
51; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
52; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
53; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
54; RV32I-NEXT:    .cfi_restore ra
55; RV32I-NEXT:    .cfi_restore s0
56; RV32I-NEXT:    .cfi_restore s1
57; RV32I-NEXT:    .cfi_restore s2
58; RV32I-NEXT:    addi sp, sp, 32
59; RV32I-NEXT:    .cfi_def_cfa_offset 0
60; RV32I-NEXT:    ret
61;
62; RV32IA-LABEL: atomicrmw_usub_cond_i8:
63; RV32IA:       # %bb.0:
64; RV32IA-NEXT:    andi a2, a0, -4
65; RV32IA-NEXT:    slli a3, a0, 3
66; RV32IA-NEXT:    li a4, 255
67; RV32IA-NEXT:    andi a0, a3, 24
68; RV32IA-NEXT:    lw a5, 0(a2)
69; RV32IA-NEXT:    sll a3, a4, a3
70; RV32IA-NEXT:    not a3, a3
71; RV32IA-NEXT:    andi a4, a1, 255
72; RV32IA-NEXT:  .LBB0_1: # %atomicrmw.start
73; RV32IA-NEXT:    # =>This Loop Header: Depth=1
74; RV32IA-NEXT:    # Child Loop BB0_3 Depth 2
75; RV32IA-NEXT:    mv a6, a5
76; RV32IA-NEXT:    srl a5, a5, a0
77; RV32IA-NEXT:    andi a7, a5, 255
78; RV32IA-NEXT:    sltu a7, a7, a4
79; RV32IA-NEXT:    addi a7, a7, -1
80; RV32IA-NEXT:    and a7, a7, a1
81; RV32IA-NEXT:    sub a5, a5, a7
82; RV32IA-NEXT:    andi a5, a5, 255
83; RV32IA-NEXT:    sll a5, a5, a0
84; RV32IA-NEXT:    and a7, a6, a3
85; RV32IA-NEXT:    or a7, a7, a5
86; RV32IA-NEXT:  .LBB0_3: # %atomicrmw.start
87; RV32IA-NEXT:    # Parent Loop BB0_1 Depth=1
88; RV32IA-NEXT:    # => This Inner Loop Header: Depth=2
89; RV32IA-NEXT:    lr.w.aqrl a5, (a2)
90; RV32IA-NEXT:    bne a5, a6, .LBB0_1
91; RV32IA-NEXT:  # %bb.4: # %atomicrmw.start
92; RV32IA-NEXT:    # in Loop: Header=BB0_3 Depth=2
93; RV32IA-NEXT:    sc.w.rl t0, a7, (a2)
94; RV32IA-NEXT:    bnez t0, .LBB0_3
95; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
96; RV32IA-NEXT:  # %bb.2: # %atomicrmw.end
97; RV32IA-NEXT:    srl a0, a5, a0
98; RV32IA-NEXT:    ret
99;
100; RV64I-LABEL: atomicrmw_usub_cond_i8:
101; RV64I:       # %bb.0:
102; RV64I-NEXT:    addi sp, sp, -48
103; RV64I-NEXT:    .cfi_def_cfa_offset 48
104; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
105; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
106; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
107; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
108; RV64I-NEXT:    .cfi_offset ra, -8
109; RV64I-NEXT:    .cfi_offset s0, -16
110; RV64I-NEXT:    .cfi_offset s1, -24
111; RV64I-NEXT:    .cfi_offset s2, -32
112; RV64I-NEXT:    mv s0, a0
113; RV64I-NEXT:    lbu a3, 0(a0)
114; RV64I-NEXT:    mv s1, a1
115; RV64I-NEXT:    andi s2, a1, 255
116; RV64I-NEXT:  .LBB0_1: # %atomicrmw.start
117; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
118; RV64I-NEXT:    andi a0, a3, 255
119; RV64I-NEXT:    sltu a0, a0, s2
120; RV64I-NEXT:    addi a0, a0, -1
121; RV64I-NEXT:    and a0, a0, s1
122; RV64I-NEXT:    sub a2, a3, a0
123; RV64I-NEXT:    sb a3, 15(sp)
124; RV64I-NEXT:    addi a1, sp, 15
125; RV64I-NEXT:    li a3, 5
126; RV64I-NEXT:    li a4, 5
127; RV64I-NEXT:    mv a0, s0
128; RV64I-NEXT:    call __atomic_compare_exchange_1
129; RV64I-NEXT:    lbu a3, 15(sp)
130; RV64I-NEXT:    beqz a0, .LBB0_1
131; RV64I-NEXT:  # %bb.2: # %atomicrmw.end
132; RV64I-NEXT:    mv a0, a3
133; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
134; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
135; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
136; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
137; RV64I-NEXT:    .cfi_restore ra
138; RV64I-NEXT:    .cfi_restore s0
139; RV64I-NEXT:    .cfi_restore s1
140; RV64I-NEXT:    .cfi_restore s2
141; RV64I-NEXT:    addi sp, sp, 48
142; RV64I-NEXT:    .cfi_def_cfa_offset 0
143; RV64I-NEXT:    ret
144;
145; RV64IA-LABEL: atomicrmw_usub_cond_i8:
146; RV64IA:       # %bb.0:
147; RV64IA-NEXT:    andi a2, a0, -4
148; RV64IA-NEXT:    slli a4, a0, 3
149; RV64IA-NEXT:    li a5, 255
150; RV64IA-NEXT:    andi a0, a4, 24
151; RV64IA-NEXT:    lw a3, 0(a2)
152; RV64IA-NEXT:    sllw a4, a5, a4
153; RV64IA-NEXT:    not a4, a4
154; RV64IA-NEXT:    andi a5, a1, 255
155; RV64IA-NEXT:  .LBB0_1: # %atomicrmw.start
156; RV64IA-NEXT:    # =>This Loop Header: Depth=1
157; RV64IA-NEXT:    # Child Loop BB0_3 Depth 2
158; RV64IA-NEXT:    srlw a6, a3, a0
159; RV64IA-NEXT:    sext.w a7, a3
160; RV64IA-NEXT:    andi t0, a6, 255
161; RV64IA-NEXT:    sltu t0, t0, a5
162; RV64IA-NEXT:    addi t0, t0, -1
163; RV64IA-NEXT:    and t0, t0, a1
164; RV64IA-NEXT:    subw a6, a6, t0
165; RV64IA-NEXT:    andi a6, a6, 255
166; RV64IA-NEXT:    sllw a6, a6, a0
167; RV64IA-NEXT:    and a3, a3, a4
168; RV64IA-NEXT:    or a6, a3, a6
169; RV64IA-NEXT:  .LBB0_3: # %atomicrmw.start
170; RV64IA-NEXT:    # Parent Loop BB0_1 Depth=1
171; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
172; RV64IA-NEXT:    lr.w.aqrl a3, (a2)
173; RV64IA-NEXT:    bne a3, a7, .LBB0_1
174; RV64IA-NEXT:  # %bb.4: # %atomicrmw.start
175; RV64IA-NEXT:    # in Loop: Header=BB0_3 Depth=2
176; RV64IA-NEXT:    sc.w.rl t0, a6, (a2)
177; RV64IA-NEXT:    bnez t0, .LBB0_3
178; RV64IA-NEXT:  # %bb.5: # %atomicrmw.start
179; RV64IA-NEXT:  # %bb.2: # %atomicrmw.end
180; RV64IA-NEXT:    srlw a0, a3, a0
181; RV64IA-NEXT:    ret
182  %result = atomicrmw usub_cond ptr %ptr, i8 %val seq_cst
183  ret i8 %result
184}
185
186define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
187; RV32I-LABEL: atomicrmw_usub_cond_i16:
188; RV32I:       # %bb.0:
189; RV32I-NEXT:    addi sp, sp, -32
190; RV32I-NEXT:    .cfi_def_cfa_offset 32
191; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
192; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
193; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
194; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
195; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
196; RV32I-NEXT:    .cfi_offset ra, -4
197; RV32I-NEXT:    .cfi_offset s0, -8
198; RV32I-NEXT:    .cfi_offset s1, -12
199; RV32I-NEXT:    .cfi_offset s2, -16
200; RV32I-NEXT:    .cfi_offset s3, -20
201; RV32I-NEXT:    mv s0, a1
202; RV32I-NEXT:    mv s1, a0
203; RV32I-NEXT:    lhu a1, 0(a0)
204; RV32I-NEXT:    lui s2, 16
205; RV32I-NEXT:    addi s2, s2, -1
206; RV32I-NEXT:    and s3, s0, s2
207; RV32I-NEXT:  .LBB1_1: # %atomicrmw.start
208; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
209; RV32I-NEXT:    and a0, a1, s2
210; RV32I-NEXT:    sltu a0, a0, s3
211; RV32I-NEXT:    addi a0, a0, -1
212; RV32I-NEXT:    and a0, a0, s0
213; RV32I-NEXT:    sub a2, a1, a0
214; RV32I-NEXT:    sh a1, 10(sp)
215; RV32I-NEXT:    addi a1, sp, 10
216; RV32I-NEXT:    li a3, 5
217; RV32I-NEXT:    li a4, 5
218; RV32I-NEXT:    mv a0, s1
219; RV32I-NEXT:    call __atomic_compare_exchange_2
220; RV32I-NEXT:    lh a1, 10(sp)
221; RV32I-NEXT:    beqz a0, .LBB1_1
222; RV32I-NEXT:  # %bb.2: # %atomicrmw.end
223; RV32I-NEXT:    mv a0, a1
224; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
225; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
226; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
227; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
228; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
229; RV32I-NEXT:    .cfi_restore ra
230; RV32I-NEXT:    .cfi_restore s0
231; RV32I-NEXT:    .cfi_restore s1
232; RV32I-NEXT:    .cfi_restore s2
233; RV32I-NEXT:    .cfi_restore s3
234; RV32I-NEXT:    addi sp, sp, 32
235; RV32I-NEXT:    .cfi_def_cfa_offset 0
236; RV32I-NEXT:    ret
237;
238; RV32IA-LABEL: atomicrmw_usub_cond_i16:
239; RV32IA:       # %bb.0:
240; RV32IA-NEXT:    andi a2, a0, -4
241; RV32IA-NEXT:    slli a4, a0, 3
242; RV32IA-NEXT:    lui a3, 16
243; RV32IA-NEXT:    andi a0, a4, 24
244; RV32IA-NEXT:    addi a3, a3, -1
245; RV32IA-NEXT:    lw a6, 0(a2)
246; RV32IA-NEXT:    sll a4, a3, a4
247; RV32IA-NEXT:    not a4, a4
248; RV32IA-NEXT:    and a5, a1, a3
249; RV32IA-NEXT:  .LBB1_1: # %atomicrmw.start
250; RV32IA-NEXT:    # =>This Loop Header: Depth=1
251; RV32IA-NEXT:    # Child Loop BB1_3 Depth 2
252; RV32IA-NEXT:    mv a7, a6
253; RV32IA-NEXT:    srl a6, a6, a0
254; RV32IA-NEXT:    and t0, a6, a3
255; RV32IA-NEXT:    sltu t0, t0, a5
256; RV32IA-NEXT:    addi t0, t0, -1
257; RV32IA-NEXT:    and t0, t0, a1
258; RV32IA-NEXT:    sub a6, a6, t0
259; RV32IA-NEXT:    and a6, a6, a3
260; RV32IA-NEXT:    sll a6, a6, a0
261; RV32IA-NEXT:    and t0, a7, a4
262; RV32IA-NEXT:    or t0, t0, a6
263; RV32IA-NEXT:  .LBB1_3: # %atomicrmw.start
264; RV32IA-NEXT:    # Parent Loop BB1_1 Depth=1
265; RV32IA-NEXT:    # => This Inner Loop Header: Depth=2
266; RV32IA-NEXT:    lr.w.aqrl a6, (a2)
267; RV32IA-NEXT:    bne a6, a7, .LBB1_1
268; RV32IA-NEXT:  # %bb.4: # %atomicrmw.start
269; RV32IA-NEXT:    # in Loop: Header=BB1_3 Depth=2
270; RV32IA-NEXT:    sc.w.rl t1, t0, (a2)
271; RV32IA-NEXT:    bnez t1, .LBB1_3
272; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
273; RV32IA-NEXT:  # %bb.2: # %atomicrmw.end
274; RV32IA-NEXT:    srl a0, a6, a0
275; RV32IA-NEXT:    ret
276;
277; RV64I-LABEL: atomicrmw_usub_cond_i16:
278; RV64I:       # %bb.0:
279; RV64I-NEXT:    addi sp, sp, -48
280; RV64I-NEXT:    .cfi_def_cfa_offset 48
281; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
282; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
283; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
284; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
285; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
286; RV64I-NEXT:    .cfi_offset ra, -8
287; RV64I-NEXT:    .cfi_offset s0, -16
288; RV64I-NEXT:    .cfi_offset s1, -24
289; RV64I-NEXT:    .cfi_offset s2, -32
290; RV64I-NEXT:    .cfi_offset s3, -40
291; RV64I-NEXT:    mv s0, a1
292; RV64I-NEXT:    mv s1, a0
293; RV64I-NEXT:    lhu a1, 0(a0)
294; RV64I-NEXT:    lui s2, 16
295; RV64I-NEXT:    addiw s2, s2, -1
296; RV64I-NEXT:    and s3, s0, s2
297; RV64I-NEXT:  .LBB1_1: # %atomicrmw.start
298; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
299; RV64I-NEXT:    and a0, a1, s2
300; RV64I-NEXT:    sltu a0, a0, s3
301; RV64I-NEXT:    addi a0, a0, -1
302; RV64I-NEXT:    and a0, a0, s0
303; RV64I-NEXT:    sub a2, a1, a0
304; RV64I-NEXT:    sh a1, 6(sp)
305; RV64I-NEXT:    addi a1, sp, 6
306; RV64I-NEXT:    li a3, 5
307; RV64I-NEXT:    li a4, 5
308; RV64I-NEXT:    mv a0, s1
309; RV64I-NEXT:    call __atomic_compare_exchange_2
310; RV64I-NEXT:    lh a1, 6(sp)
311; RV64I-NEXT:    beqz a0, .LBB1_1
312; RV64I-NEXT:  # %bb.2: # %atomicrmw.end
313; RV64I-NEXT:    mv a0, a1
314; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
315; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
316; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
317; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
318; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
319; RV64I-NEXT:    .cfi_restore ra
320; RV64I-NEXT:    .cfi_restore s0
321; RV64I-NEXT:    .cfi_restore s1
322; RV64I-NEXT:    .cfi_restore s2
323; RV64I-NEXT:    .cfi_restore s3
324; RV64I-NEXT:    addi sp, sp, 48
325; RV64I-NEXT:    .cfi_def_cfa_offset 0
326; RV64I-NEXT:    ret
327;
328; RV64IA-LABEL: atomicrmw_usub_cond_i16:
329; RV64IA:       # %bb.0:
330; RV64IA-NEXT:    andi a2, a0, -4
331; RV64IA-NEXT:    slli a5, a0, 3
332; RV64IA-NEXT:    lui a3, 16
333; RV64IA-NEXT:    andi a0, a5, 24
334; RV64IA-NEXT:    addiw a3, a3, -1
335; RV64IA-NEXT:    lw a4, 0(a2)
336; RV64IA-NEXT:    sllw a5, a3, a5
337; RV64IA-NEXT:    not a5, a5
338; RV64IA-NEXT:    and a6, a1, a3
339; RV64IA-NEXT:  .LBB1_1: # %atomicrmw.start
340; RV64IA-NEXT:    # =>This Loop Header: Depth=1
341; RV64IA-NEXT:    # Child Loop BB1_3 Depth 2
342; RV64IA-NEXT:    srlw a7, a4, a0
343; RV64IA-NEXT:    sext.w t0, a4
344; RV64IA-NEXT:    and t1, a7, a3
345; RV64IA-NEXT:    sltu t1, t1, a6
346; RV64IA-NEXT:    addi t1, t1, -1
347; RV64IA-NEXT:    and t1, t1, a1
348; RV64IA-NEXT:    subw a7, a7, t1
349; RV64IA-NEXT:    and a7, a7, a3
350; RV64IA-NEXT:    sllw a7, a7, a0
351; RV64IA-NEXT:    and a4, a4, a5
352; RV64IA-NEXT:    or a7, a4, a7
353; RV64IA-NEXT:  .LBB1_3: # %atomicrmw.start
354; RV64IA-NEXT:    # Parent Loop BB1_1 Depth=1
355; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
356; RV64IA-NEXT:    lr.w.aqrl a4, (a2)
357; RV64IA-NEXT:    bne a4, t0, .LBB1_1
358; RV64IA-NEXT:  # %bb.4: # %atomicrmw.start
359; RV64IA-NEXT:    # in Loop: Header=BB1_3 Depth=2
360; RV64IA-NEXT:    sc.w.rl t1, a7, (a2)
361; RV64IA-NEXT:    bnez t1, .LBB1_3
362; RV64IA-NEXT:  # %bb.5: # %atomicrmw.start
363; RV64IA-NEXT:  # %bb.2: # %atomicrmw.end
364; RV64IA-NEXT:    srlw a0, a4, a0
365; RV64IA-NEXT:    ret
366  %result = atomicrmw usub_cond ptr %ptr, i16 %val seq_cst
367  ret i16 %result
368}
369
370define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
371; RV32I-LABEL: atomicrmw_usub_cond_i32:
372; RV32I:       # %bb.0:
373; RV32I-NEXT:    addi sp, sp, -16
374; RV32I-NEXT:    .cfi_def_cfa_offset 16
375; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
376; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
377; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
378; RV32I-NEXT:    .cfi_offset ra, -4
379; RV32I-NEXT:    .cfi_offset s0, -8
380; RV32I-NEXT:    .cfi_offset s1, -12
381; RV32I-NEXT:    mv s0, a0
382; RV32I-NEXT:    lw a3, 0(a0)
383; RV32I-NEXT:    mv s1, a1
384; RV32I-NEXT:  .LBB2_1: # %atomicrmw.start
385; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
386; RV32I-NEXT:    sltu a0, a3, s1
387; RV32I-NEXT:    addi a0, a0, -1
388; RV32I-NEXT:    and a0, a0, s1
389; RV32I-NEXT:    sub a2, a3, a0
390; RV32I-NEXT:    sw a3, 0(sp)
391; RV32I-NEXT:    mv a1, sp
392; RV32I-NEXT:    li a3, 5
393; RV32I-NEXT:    li a4, 5
394; RV32I-NEXT:    mv a0, s0
395; RV32I-NEXT:    call __atomic_compare_exchange_4
396; RV32I-NEXT:    lw a3, 0(sp)
397; RV32I-NEXT:    beqz a0, .LBB2_1
398; RV32I-NEXT:  # %bb.2: # %atomicrmw.end
399; RV32I-NEXT:    mv a0, a3
400; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
401; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
402; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
403; RV32I-NEXT:    .cfi_restore ra
404; RV32I-NEXT:    .cfi_restore s0
405; RV32I-NEXT:    .cfi_restore s1
406; RV32I-NEXT:    addi sp, sp, 16
407; RV32I-NEXT:    .cfi_def_cfa_offset 0
408; RV32I-NEXT:    ret
409;
410; RV32IA-LABEL: atomicrmw_usub_cond_i32:
411; RV32IA:       # %bb.0:
412; RV32IA-NEXT:    lw a2, 0(a0)
413; RV32IA-NEXT:  .LBB2_1: # %atomicrmw.start
414; RV32IA-NEXT:    # =>This Loop Header: Depth=1
415; RV32IA-NEXT:    # Child Loop BB2_3 Depth 2
416; RV32IA-NEXT:    mv a3, a2
417; RV32IA-NEXT:    sltu a2, a2, a1
418; RV32IA-NEXT:    addi a2, a2, -1
419; RV32IA-NEXT:    and a2, a2, a1
420; RV32IA-NEXT:    sub a4, a3, a2
421; RV32IA-NEXT:  .LBB2_3: # %atomicrmw.start
422; RV32IA-NEXT:    # Parent Loop BB2_1 Depth=1
423; RV32IA-NEXT:    # => This Inner Loop Header: Depth=2
424; RV32IA-NEXT:    lr.w.aqrl a2, (a0)
425; RV32IA-NEXT:    bne a2, a3, .LBB2_1
426; RV32IA-NEXT:  # %bb.4: # %atomicrmw.start
427; RV32IA-NEXT:    # in Loop: Header=BB2_3 Depth=2
428; RV32IA-NEXT:    sc.w.rl a5, a4, (a0)
429; RV32IA-NEXT:    bnez a5, .LBB2_3
430; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
431; RV32IA-NEXT:  # %bb.2: # %atomicrmw.end
432; RV32IA-NEXT:    mv a0, a2
433; RV32IA-NEXT:    ret
434;
435; RV64I-LABEL: atomicrmw_usub_cond_i32:
436; RV64I:       # %bb.0:
437; RV64I-NEXT:    addi sp, sp, -48
438; RV64I-NEXT:    .cfi_def_cfa_offset 48
439; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
440; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
441; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
442; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
443; RV64I-NEXT:    .cfi_offset ra, -8
444; RV64I-NEXT:    .cfi_offset s0, -16
445; RV64I-NEXT:    .cfi_offset s1, -24
446; RV64I-NEXT:    .cfi_offset s2, -32
447; RV64I-NEXT:    mv s0, a0
448; RV64I-NEXT:    lw a3, 0(a0)
449; RV64I-NEXT:    mv s1, a1
450; RV64I-NEXT:    sext.w s2, a1
451; RV64I-NEXT:  .LBB2_1: # %atomicrmw.start
452; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
453; RV64I-NEXT:    sltu a0, a3, s2
454; RV64I-NEXT:    addi a0, a0, -1
455; RV64I-NEXT:    and a0, a0, s1
456; RV64I-NEXT:    subw a2, a3, a0
457; RV64I-NEXT:    sw a3, 12(sp)
458; RV64I-NEXT:    addi a1, sp, 12
459; RV64I-NEXT:    li a3, 5
460; RV64I-NEXT:    li a4, 5
461; RV64I-NEXT:    mv a0, s0
462; RV64I-NEXT:    call __atomic_compare_exchange_4
463; RV64I-NEXT:    lw a3, 12(sp)
464; RV64I-NEXT:    beqz a0, .LBB2_1
465; RV64I-NEXT:  # %bb.2: # %atomicrmw.end
466; RV64I-NEXT:    mv a0, a3
467; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
468; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
469; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
470; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
471; RV64I-NEXT:    .cfi_restore ra
472; RV64I-NEXT:    .cfi_restore s0
473; RV64I-NEXT:    .cfi_restore s1
474; RV64I-NEXT:    .cfi_restore s2
475; RV64I-NEXT:    addi sp, sp, 48
476; RV64I-NEXT:    .cfi_def_cfa_offset 0
477; RV64I-NEXT:    ret
478;
479; RV64IA-LABEL: atomicrmw_usub_cond_i32:
480; RV64IA:       # %bb.0:
481; RV64IA-NEXT:    lw a2, 0(a0)
482; RV64IA-NEXT:    sext.w a3, a1
483; RV64IA-NEXT:  .LBB2_1: # %atomicrmw.start
484; RV64IA-NEXT:    # =>This Loop Header: Depth=1
485; RV64IA-NEXT:    # Child Loop BB2_3 Depth 2
486; RV64IA-NEXT:    sext.w a4, a2
487; RV64IA-NEXT:    sltu a5, a4, a3
488; RV64IA-NEXT:    addi a5, a5, -1
489; RV64IA-NEXT:    and a5, a5, a1
490; RV64IA-NEXT:    subw a5, a2, a5
491; RV64IA-NEXT:  .LBB2_3: # %atomicrmw.start
492; RV64IA-NEXT:    # Parent Loop BB2_1 Depth=1
493; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
494; RV64IA-NEXT:    lr.w.aqrl a2, (a0)
495; RV64IA-NEXT:    bne a2, a4, .LBB2_1
496; RV64IA-NEXT:  # %bb.4: # %atomicrmw.start
497; RV64IA-NEXT:    # in Loop: Header=BB2_3 Depth=2
498; RV64IA-NEXT:    sc.w.rl a6, a5, (a0)
499; RV64IA-NEXT:    bnez a6, .LBB2_3
500; RV64IA-NEXT:  # %bb.5: # %atomicrmw.start
501; RV64IA-NEXT:  # %bb.2: # %atomicrmw.end
502; RV64IA-NEXT:    mv a0, a2
503; RV64IA-NEXT:    ret
504  %result = atomicrmw usub_cond ptr %ptr, i32 %val seq_cst
505  ret i32 %result
506}
507
508define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
509; RV32I-LABEL: atomicrmw_usub_cond_i64:
510; RV32I:       # %bb.0:
511; RV32I-NEXT:    addi sp, sp, -32
512; RV32I-NEXT:    .cfi_def_cfa_offset 32
513; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
514; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
515; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
516; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
517; RV32I-NEXT:    .cfi_offset ra, -4
518; RV32I-NEXT:    .cfi_offset s0, -8
519; RV32I-NEXT:    .cfi_offset s1, -12
520; RV32I-NEXT:    .cfi_offset s2, -16
521; RV32I-NEXT:    mv s0, a2
522; RV32I-NEXT:    mv s1, a0
523; RV32I-NEXT:    lw a4, 0(a0)
524; RV32I-NEXT:    lw a5, 4(a0)
525; RV32I-NEXT:    mv s2, a1
526; RV32I-NEXT:    j .LBB3_3
527; RV32I-NEXT:  .LBB3_1: # %atomicrmw.start
528; RV32I-NEXT:    # in Loop: Header=BB3_3 Depth=1
529; RV32I-NEXT:    sltu a0, a5, s0
530; RV32I-NEXT:  .LBB3_2: # %atomicrmw.start
531; RV32I-NEXT:    # in Loop: Header=BB3_3 Depth=1
532; RV32I-NEXT:    xori a0, a0, 1
533; RV32I-NEXT:    neg a0, a0
534; RV32I-NEXT:    and a1, a0, s2
535; RV32I-NEXT:    and a0, a0, s0
536; RV32I-NEXT:    sltu a3, a4, a1
537; RV32I-NEXT:    sub a0, a5, a0
538; RV32I-NEXT:    sub a2, a4, a1
539; RV32I-NEXT:    sub a3, a0, a3
540; RV32I-NEXT:    sw a4, 8(sp)
541; RV32I-NEXT:    sw a5, 12(sp)
542; RV32I-NEXT:    addi a1, sp, 8
543; RV32I-NEXT:    li a4, 5
544; RV32I-NEXT:    li a5, 5
545; RV32I-NEXT:    mv a0, s1
546; RV32I-NEXT:    call __atomic_compare_exchange_8
547; RV32I-NEXT:    lw a4, 8(sp)
548; RV32I-NEXT:    lw a5, 12(sp)
549; RV32I-NEXT:    bnez a0, .LBB3_5
550; RV32I-NEXT:  .LBB3_3: # %atomicrmw.start
551; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
552; RV32I-NEXT:    bne a5, s0, .LBB3_1
553; RV32I-NEXT:  # %bb.4: # in Loop: Header=BB3_3 Depth=1
554; RV32I-NEXT:    sltu a0, a4, s2
555; RV32I-NEXT:    j .LBB3_2
556; RV32I-NEXT:  .LBB3_5: # %atomicrmw.end
557; RV32I-NEXT:    mv a0, a4
558; RV32I-NEXT:    mv a1, a5
559; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
560; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
561; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
562; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
563; RV32I-NEXT:    .cfi_restore ra
564; RV32I-NEXT:    .cfi_restore s0
565; RV32I-NEXT:    .cfi_restore s1
566; RV32I-NEXT:    .cfi_restore s2
567; RV32I-NEXT:    addi sp, sp, 32
568; RV32I-NEXT:    .cfi_def_cfa_offset 0
569; RV32I-NEXT:    ret
570;
571; RV32IA-LABEL: atomicrmw_usub_cond_i64:
572; RV32IA:       # %bb.0:
573; RV32IA-NEXT:    addi sp, sp, -32
574; RV32IA-NEXT:    .cfi_def_cfa_offset 32
575; RV32IA-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
576; RV32IA-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
577; RV32IA-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
578; RV32IA-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
579; RV32IA-NEXT:    .cfi_offset ra, -4
580; RV32IA-NEXT:    .cfi_offset s0, -8
581; RV32IA-NEXT:    .cfi_offset s1, -12
582; RV32IA-NEXT:    .cfi_offset s2, -16
583; RV32IA-NEXT:    mv s0, a2
584; RV32IA-NEXT:    mv s1, a0
585; RV32IA-NEXT:    lw a4, 0(a0)
586; RV32IA-NEXT:    lw a5, 4(a0)
587; RV32IA-NEXT:    mv s2, a1
588; RV32IA-NEXT:    j .LBB3_3
589; RV32IA-NEXT:  .LBB3_1: # %atomicrmw.start
590; RV32IA-NEXT:    # in Loop: Header=BB3_3 Depth=1
591; RV32IA-NEXT:    sltu a0, a5, s0
592; RV32IA-NEXT:  .LBB3_2: # %atomicrmw.start
593; RV32IA-NEXT:    # in Loop: Header=BB3_3 Depth=1
594; RV32IA-NEXT:    xori a0, a0, 1
595; RV32IA-NEXT:    neg a0, a0
596; RV32IA-NEXT:    and a1, a0, s2
597; RV32IA-NEXT:    and a0, a0, s0
598; RV32IA-NEXT:    sltu a3, a4, a1
599; RV32IA-NEXT:    sub a0, a5, a0
600; RV32IA-NEXT:    sub a2, a4, a1
601; RV32IA-NEXT:    sub a3, a0, a3
602; RV32IA-NEXT:    sw a4, 8(sp)
603; RV32IA-NEXT:    sw a5, 12(sp)
604; RV32IA-NEXT:    addi a1, sp, 8
605; RV32IA-NEXT:    li a4, 5
606; RV32IA-NEXT:    li a5, 5
607; RV32IA-NEXT:    mv a0, s1
608; RV32IA-NEXT:    call __atomic_compare_exchange_8
609; RV32IA-NEXT:    lw a4, 8(sp)
610; RV32IA-NEXT:    lw a5, 12(sp)
611; RV32IA-NEXT:    bnez a0, .LBB3_5
612; RV32IA-NEXT:  .LBB3_3: # %atomicrmw.start
613; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
614; RV32IA-NEXT:    bne a5, s0, .LBB3_1
615; RV32IA-NEXT:  # %bb.4: # in Loop: Header=BB3_3 Depth=1
616; RV32IA-NEXT:    sltu a0, a4, s2
617; RV32IA-NEXT:    j .LBB3_2
618; RV32IA-NEXT:  .LBB3_5: # %atomicrmw.end
619; RV32IA-NEXT:    mv a0, a4
620; RV32IA-NEXT:    mv a1, a5
621; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
622; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
623; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
624; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
625; RV32IA-NEXT:    .cfi_restore ra
626; RV32IA-NEXT:    .cfi_restore s0
627; RV32IA-NEXT:    .cfi_restore s1
628; RV32IA-NEXT:    .cfi_restore s2
629; RV32IA-NEXT:    addi sp, sp, 32
630; RV32IA-NEXT:    .cfi_def_cfa_offset 0
631; RV32IA-NEXT:    ret
632;
633; RV64I-LABEL: atomicrmw_usub_cond_i64:
634; RV64I:       # %bb.0:
635; RV64I-NEXT:    addi sp, sp, -32
636; RV64I-NEXT:    .cfi_def_cfa_offset 32
637; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
638; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
639; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
640; RV64I-NEXT:    .cfi_offset ra, -8
641; RV64I-NEXT:    .cfi_offset s0, -16
642; RV64I-NEXT:    .cfi_offset s1, -24
643; RV64I-NEXT:    mv s0, a0
644; RV64I-NEXT:    ld a3, 0(a0)
645; RV64I-NEXT:    mv s1, a1
646; RV64I-NEXT:  .LBB3_1: # %atomicrmw.start
647; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
648; RV64I-NEXT:    sltu a0, a3, s1
649; RV64I-NEXT:    addi a0, a0, -1
650; RV64I-NEXT:    and a0, a0, s1
651; RV64I-NEXT:    sub a2, a3, a0
652; RV64I-NEXT:    sd a3, 0(sp)
653; RV64I-NEXT:    mv a1, sp
654; RV64I-NEXT:    li a3, 5
655; RV64I-NEXT:    li a4, 5
656; RV64I-NEXT:    mv a0, s0
657; RV64I-NEXT:    call __atomic_compare_exchange_8
658; RV64I-NEXT:    ld a3, 0(sp)
659; RV64I-NEXT:    beqz a0, .LBB3_1
660; RV64I-NEXT:  # %bb.2: # %atomicrmw.end
661; RV64I-NEXT:    mv a0, a3
662; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
663; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
664; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
665; RV64I-NEXT:    .cfi_restore ra
666; RV64I-NEXT:    .cfi_restore s0
667; RV64I-NEXT:    .cfi_restore s1
668; RV64I-NEXT:    addi sp, sp, 32
669; RV64I-NEXT:    .cfi_def_cfa_offset 0
670; RV64I-NEXT:    ret
671;
672; RV64IA-LABEL: atomicrmw_usub_cond_i64:
673; RV64IA:       # %bb.0:
674; RV64IA-NEXT:    ld a2, 0(a0)
675; RV64IA-NEXT:  .LBB3_1: # %atomicrmw.start
676; RV64IA-NEXT:    # =>This Loop Header: Depth=1
677; RV64IA-NEXT:    # Child Loop BB3_3 Depth 2
678; RV64IA-NEXT:    mv a3, a2
679; RV64IA-NEXT:    sltu a2, a2, a1
680; RV64IA-NEXT:    addi a2, a2, -1
681; RV64IA-NEXT:    and a2, a2, a1
682; RV64IA-NEXT:    sub a4, a3, a2
683; RV64IA-NEXT:  .LBB3_3: # %atomicrmw.start
684; RV64IA-NEXT:    # Parent Loop BB3_1 Depth=1
685; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
686; RV64IA-NEXT:    lr.d.aqrl a2, (a0)
687; RV64IA-NEXT:    bne a2, a3, .LBB3_1
688; RV64IA-NEXT:  # %bb.4: # %atomicrmw.start
689; RV64IA-NEXT:    # in Loop: Header=BB3_3 Depth=2
690; RV64IA-NEXT:    sc.d.rl a5, a4, (a0)
691; RV64IA-NEXT:    bnez a5, .LBB3_3
692; RV64IA-NEXT:  # %bb.5: # %atomicrmw.start
693; RV64IA-NEXT:  # %bb.2: # %atomicrmw.end
694; RV64IA-NEXT:    mv a0, a2
695; RV64IA-NEXT:    ret
696  %result = atomicrmw usub_cond ptr %ptr, i64 %val seq_cst
697  ret i64 %result
698}
699
700define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
701; RV32I-LABEL: atomicrmw_usub_sat_i8:
702; RV32I:       # %bb.0:
703; RV32I-NEXT:    addi sp, sp, -16
704; RV32I-NEXT:    .cfi_def_cfa_offset 16
705; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
706; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
707; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
708; RV32I-NEXT:    .cfi_offset ra, -4
709; RV32I-NEXT:    .cfi_offset s0, -8
710; RV32I-NEXT:    .cfi_offset s1, -12
711; RV32I-NEXT:    mv s0, a0
712; RV32I-NEXT:    lbu a3, 0(a0)
713; RV32I-NEXT:    andi s1, a1, 255
714; RV32I-NEXT:  .LBB4_1: # %atomicrmw.start
715; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
716; RV32I-NEXT:    andi a0, a3, 255
717; RV32I-NEXT:    sub a1, a0, s1
718; RV32I-NEXT:    sltu a0, a0, a1
719; RV32I-NEXT:    addi a0, a0, -1
720; RV32I-NEXT:    and a2, a0, a1
721; RV32I-NEXT:    sb a3, 3(sp)
722; RV32I-NEXT:    addi a1, sp, 3
723; RV32I-NEXT:    li a3, 5
724; RV32I-NEXT:    li a4, 5
725; RV32I-NEXT:    mv a0, s0
726; RV32I-NEXT:    call __atomic_compare_exchange_1
727; RV32I-NEXT:    lbu a3, 3(sp)
728; RV32I-NEXT:    beqz a0, .LBB4_1
729; RV32I-NEXT:  # %bb.2: # %atomicrmw.end
730; RV32I-NEXT:    mv a0, a3
731; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
732; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
733; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
734; RV32I-NEXT:    .cfi_restore ra
735; RV32I-NEXT:    .cfi_restore s0
736; RV32I-NEXT:    .cfi_restore s1
737; RV32I-NEXT:    addi sp, sp, 16
738; RV32I-NEXT:    .cfi_def_cfa_offset 0
739; RV32I-NEXT:    ret
740;
741; RV32IA-LABEL: atomicrmw_usub_sat_i8:
742; RV32IA:       # %bb.0:
743; RV32IA-NEXT:    andi a2, a0, -4
744; RV32IA-NEXT:    slli a0, a0, 3
745; RV32IA-NEXT:    li a3, 255
746; RV32IA-NEXT:    sll a3, a3, a0
747; RV32IA-NEXT:    lw a4, 0(a2)
748; RV32IA-NEXT:    andi a0, a0, 24
749; RV32IA-NEXT:    not a3, a3
750; RV32IA-NEXT:    andi a1, a1, 255
751; RV32IA-NEXT:  .LBB4_1: # %atomicrmw.start
752; RV32IA-NEXT:    # =>This Loop Header: Depth=1
753; RV32IA-NEXT:    # Child Loop BB4_3 Depth 2
754; RV32IA-NEXT:    mv a5, a4
755; RV32IA-NEXT:    srl a4, a4, a0
756; RV32IA-NEXT:    andi a4, a4, 255
757; RV32IA-NEXT:    sub a6, a4, a1
758; RV32IA-NEXT:    sltu a4, a4, a6
759; RV32IA-NEXT:    addi a4, a4, -1
760; RV32IA-NEXT:    and a4, a4, a6
761; RV32IA-NEXT:    sll a4, a4, a0
762; RV32IA-NEXT:    and a6, a5, a3
763; RV32IA-NEXT:    or a6, a6, a4
764; RV32IA-NEXT:  .LBB4_3: # %atomicrmw.start
765; RV32IA-NEXT:    # Parent Loop BB4_1 Depth=1
766; RV32IA-NEXT:    # => This Inner Loop Header: Depth=2
767; RV32IA-NEXT:    lr.w.aqrl a4, (a2)
768; RV32IA-NEXT:    bne a4, a5, .LBB4_1
769; RV32IA-NEXT:  # %bb.4: # %atomicrmw.start
770; RV32IA-NEXT:    # in Loop: Header=BB4_3 Depth=2
771; RV32IA-NEXT:    sc.w.rl a7, a6, (a2)
772; RV32IA-NEXT:    bnez a7, .LBB4_3
773; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
774; RV32IA-NEXT:  # %bb.2: # %atomicrmw.end
775; RV32IA-NEXT:    srl a0, a4, a0
776; RV32IA-NEXT:    ret
777;
778; RV64I-LABEL: atomicrmw_usub_sat_i8:
779; RV64I:       # %bb.0:
780; RV64I-NEXT:    addi sp, sp, -32
781; RV64I-NEXT:    .cfi_def_cfa_offset 32
782; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
783; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
784; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
785; RV64I-NEXT:    .cfi_offset ra, -8
786; RV64I-NEXT:    .cfi_offset s0, -16
787; RV64I-NEXT:    .cfi_offset s1, -24
788; RV64I-NEXT:    mv s0, a0
789; RV64I-NEXT:    lbu a3, 0(a0)
790; RV64I-NEXT:    andi s1, a1, 255
791; RV64I-NEXT:  .LBB4_1: # %atomicrmw.start
792; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
793; RV64I-NEXT:    andi a0, a3, 255
794; RV64I-NEXT:    sub a1, a0, s1
795; RV64I-NEXT:    sltu a0, a0, a1
796; RV64I-NEXT:    addi a0, a0, -1
797; RV64I-NEXT:    and a2, a0, a1
798; RV64I-NEXT:    sb a3, 7(sp)
799; RV64I-NEXT:    addi a1, sp, 7
800; RV64I-NEXT:    li a3, 5
801; RV64I-NEXT:    li a4, 5
802; RV64I-NEXT:    mv a0, s0
803; RV64I-NEXT:    call __atomic_compare_exchange_1
804; RV64I-NEXT:    lbu a3, 7(sp)
805; RV64I-NEXT:    beqz a0, .LBB4_1
806; RV64I-NEXT:  # %bb.2: # %atomicrmw.end
807; RV64I-NEXT:    mv a0, a3
808; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
809; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
810; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
811; RV64I-NEXT:    .cfi_restore ra
812; RV64I-NEXT:    .cfi_restore s0
813; RV64I-NEXT:    .cfi_restore s1
814; RV64I-NEXT:    addi sp, sp, 32
815; RV64I-NEXT:    .cfi_def_cfa_offset 0
816; RV64I-NEXT:    ret
817;
818; RV64IA-LABEL: atomicrmw_usub_sat_i8:
819; RV64IA:       # %bb.0:
820; RV64IA-NEXT:    andi a2, a0, -4
821; RV64IA-NEXT:    slli a0, a0, 3
822; RV64IA-NEXT:    li a3, 255
823; RV64IA-NEXT:    sllw a4, a3, a0
824; RV64IA-NEXT:    lw a3, 0(a2)
825; RV64IA-NEXT:    andi a0, a0, 24
826; RV64IA-NEXT:    not a4, a4
827; RV64IA-NEXT:    andi a1, a1, 255
828; RV64IA-NEXT:  .LBB4_1: # %atomicrmw.start
829; RV64IA-NEXT:    # =>This Loop Header: Depth=1
830; RV64IA-NEXT:    # Child Loop BB4_3 Depth 2
831; RV64IA-NEXT:    srlw a5, a3, a0
832; RV64IA-NEXT:    sext.w a6, a3
833; RV64IA-NEXT:    andi a5, a5, 255
834; RV64IA-NEXT:    sub a7, a5, a1
835; RV64IA-NEXT:    sltu a5, a5, a7
836; RV64IA-NEXT:    addi a5, a5, -1
837; RV64IA-NEXT:    and a5, a5, a7
838; RV64IA-NEXT:    sllw a5, a5, a0
839; RV64IA-NEXT:    and a3, a3, a4
840; RV64IA-NEXT:    or a5, a3, a5
841; RV64IA-NEXT:  .LBB4_3: # %atomicrmw.start
842; RV64IA-NEXT:    # Parent Loop BB4_1 Depth=1
843; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
844; RV64IA-NEXT:    lr.w.aqrl a3, (a2)
845; RV64IA-NEXT:    bne a3, a6, .LBB4_1
846; RV64IA-NEXT:  # %bb.4: # %atomicrmw.start
847; RV64IA-NEXT:    # in Loop: Header=BB4_3 Depth=2
848; RV64IA-NEXT:    sc.w.rl a7, a5, (a2)
849; RV64IA-NEXT:    bnez a7, .LBB4_3
850; RV64IA-NEXT:  # %bb.5: # %atomicrmw.start
851; RV64IA-NEXT:  # %bb.2: # %atomicrmw.end
852; RV64IA-NEXT:    srlw a0, a3, a0
853; RV64IA-NEXT:    ret
854  %result = atomicrmw usub_sat ptr %ptr, i8 %val seq_cst
855  ret i8 %result
856}
857
858define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
859; RV32I-LABEL: atomicrmw_usub_sat_i16:
860; RV32I:       # %bb.0:
861; RV32I-NEXT:    addi sp, sp, -32
862; RV32I-NEXT:    .cfi_def_cfa_offset 32
863; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
864; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
865; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
866; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
867; RV32I-NEXT:    .cfi_offset ra, -4
868; RV32I-NEXT:    .cfi_offset s0, -8
869; RV32I-NEXT:    .cfi_offset s1, -12
870; RV32I-NEXT:    .cfi_offset s2, -16
871; RV32I-NEXT:    mv s0, a0
872; RV32I-NEXT:    lhu a3, 0(a0)
873; RV32I-NEXT:    lui s1, 16
874; RV32I-NEXT:    addi s1, s1, -1
875; RV32I-NEXT:    and s2, a1, s1
876; RV32I-NEXT:  .LBB5_1: # %atomicrmw.start
877; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
878; RV32I-NEXT:    and a0, a3, s1
879; RV32I-NEXT:    sub a1, a0, s2
880; RV32I-NEXT:    sltu a0, a0, a1
881; RV32I-NEXT:    addi a0, a0, -1
882; RV32I-NEXT:    and a2, a0, a1
883; RV32I-NEXT:    sh a3, 14(sp)
884; RV32I-NEXT:    addi a1, sp, 14
885; RV32I-NEXT:    li a3, 5
886; RV32I-NEXT:    li a4, 5
887; RV32I-NEXT:    mv a0, s0
888; RV32I-NEXT:    call __atomic_compare_exchange_2
889; RV32I-NEXT:    lh a3, 14(sp)
890; RV32I-NEXT:    beqz a0, .LBB5_1
891; RV32I-NEXT:  # %bb.2: # %atomicrmw.end
892; RV32I-NEXT:    mv a0, a3
893; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
894; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
895; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
896; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
897; RV32I-NEXT:    .cfi_restore ra
898; RV32I-NEXT:    .cfi_restore s0
899; RV32I-NEXT:    .cfi_restore s1
900; RV32I-NEXT:    .cfi_restore s2
901; RV32I-NEXT:    addi sp, sp, 32
902; RV32I-NEXT:    .cfi_def_cfa_offset 0
903; RV32I-NEXT:    ret
904;
905; RV32IA-LABEL: atomicrmw_usub_sat_i16:
906; RV32IA:       # %bb.0:
907; RV32IA-NEXT:    andi a2, a0, -4
908; RV32IA-NEXT:    slli a4, a0, 3
909; RV32IA-NEXT:    lui a3, 16
910; RV32IA-NEXT:    andi a0, a4, 24
911; RV32IA-NEXT:    addi a3, a3, -1
912; RV32IA-NEXT:    lw a5, 0(a2)
913; RV32IA-NEXT:    sll a4, a3, a4
914; RV32IA-NEXT:    not a4, a4
915; RV32IA-NEXT:    and a1, a1, a3
916; RV32IA-NEXT:  .LBB5_1: # %atomicrmw.start
917; RV32IA-NEXT:    # =>This Loop Header: Depth=1
918; RV32IA-NEXT:    # Child Loop BB5_3 Depth 2
919; RV32IA-NEXT:    mv a6, a5
920; RV32IA-NEXT:    srl a5, a5, a0
921; RV32IA-NEXT:    and a5, a5, a3
922; RV32IA-NEXT:    sub a7, a5, a1
923; RV32IA-NEXT:    sltu a5, a5, a7
924; RV32IA-NEXT:    addi a5, a5, -1
925; RV32IA-NEXT:    and a5, a5, a7
926; RV32IA-NEXT:    sll a5, a5, a0
927; RV32IA-NEXT:    and a7, a6, a4
928; RV32IA-NEXT:    or a7, a7, a5
929; RV32IA-NEXT:  .LBB5_3: # %atomicrmw.start
930; RV32IA-NEXT:    # Parent Loop BB5_1 Depth=1
931; RV32IA-NEXT:    # => This Inner Loop Header: Depth=2
932; RV32IA-NEXT:    lr.w.aqrl a5, (a2)
933; RV32IA-NEXT:    bne a5, a6, .LBB5_1
934; RV32IA-NEXT:  # %bb.4: # %atomicrmw.start
935; RV32IA-NEXT:    # in Loop: Header=BB5_3 Depth=2
936; RV32IA-NEXT:    sc.w.rl t0, a7, (a2)
937; RV32IA-NEXT:    bnez t0, .LBB5_3
938; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
939; RV32IA-NEXT:  # %bb.2: # %atomicrmw.end
940; RV32IA-NEXT:    srl a0, a5, a0
941; RV32IA-NEXT:    ret
942;
943; RV64I-LABEL: atomicrmw_usub_sat_i16:
944; RV64I:       # %bb.0:
945; RV64I-NEXT:    addi sp, sp, -48
946; RV64I-NEXT:    .cfi_def_cfa_offset 48
947; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
948; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
949; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
950; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
951; RV64I-NEXT:    .cfi_offset ra, -8
952; RV64I-NEXT:    .cfi_offset s0, -16
953; RV64I-NEXT:    .cfi_offset s1, -24
954; RV64I-NEXT:    .cfi_offset s2, -32
955; RV64I-NEXT:    mv s0, a0
956; RV64I-NEXT:    lhu a3, 0(a0)
957; RV64I-NEXT:    lui s1, 16
958; RV64I-NEXT:    addiw s1, s1, -1
959; RV64I-NEXT:    and s2, a1, s1
960; RV64I-NEXT:  .LBB5_1: # %atomicrmw.start
961; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
962; RV64I-NEXT:    and a0, a3, s1
963; RV64I-NEXT:    sub a1, a0, s2
964; RV64I-NEXT:    sltu a0, a0, a1
965; RV64I-NEXT:    addi a0, a0, -1
966; RV64I-NEXT:    and a2, a0, a1
967; RV64I-NEXT:    sh a3, 14(sp)
968; RV64I-NEXT:    addi a1, sp, 14
969; RV64I-NEXT:    li a3, 5
970; RV64I-NEXT:    li a4, 5
971; RV64I-NEXT:    mv a0, s0
972; RV64I-NEXT:    call __atomic_compare_exchange_2
973; RV64I-NEXT:    lh a3, 14(sp)
974; RV64I-NEXT:    beqz a0, .LBB5_1
975; RV64I-NEXT:  # %bb.2: # %atomicrmw.end
976; RV64I-NEXT:    mv a0, a3
977; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
978; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
979; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
980; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
981; RV64I-NEXT:    .cfi_restore ra
982; RV64I-NEXT:    .cfi_restore s0
983; RV64I-NEXT:    .cfi_restore s1
984; RV64I-NEXT:    .cfi_restore s2
985; RV64I-NEXT:    addi sp, sp, 48
986; RV64I-NEXT:    .cfi_def_cfa_offset 0
987; RV64I-NEXT:    ret
988;
989; RV64IA-LABEL: atomicrmw_usub_sat_i16:
990; RV64IA:       # %bb.0:
991; RV64IA-NEXT:    andi a2, a0, -4
992; RV64IA-NEXT:    slli a5, a0, 3
993; RV64IA-NEXT:    lui a3, 16
994; RV64IA-NEXT:    andi a0, a5, 24
995; RV64IA-NEXT:    addiw a3, a3, -1
996; RV64IA-NEXT:    lw a4, 0(a2)
997; RV64IA-NEXT:    sllw a5, a3, a5
998; RV64IA-NEXT:    not a5, a5
999; RV64IA-NEXT:    and a1, a1, a3
1000; RV64IA-NEXT:  .LBB5_1: # %atomicrmw.start
1001; RV64IA-NEXT:    # =>This Loop Header: Depth=1
1002; RV64IA-NEXT:    # Child Loop BB5_3 Depth 2
1003; RV64IA-NEXT:    srlw a6, a4, a0
1004; RV64IA-NEXT:    sext.w a7, a4
1005; RV64IA-NEXT:    and a6, a6, a3
1006; RV64IA-NEXT:    sub t0, a6, a1
1007; RV64IA-NEXT:    sltu a6, a6, t0
1008; RV64IA-NEXT:    addi a6, a6, -1
1009; RV64IA-NEXT:    and a6, a6, t0
1010; RV64IA-NEXT:    sllw a6, a6, a0
1011; RV64IA-NEXT:    and a4, a4, a5
1012; RV64IA-NEXT:    or a6, a4, a6
1013; RV64IA-NEXT:  .LBB5_3: # %atomicrmw.start
1014; RV64IA-NEXT:    # Parent Loop BB5_1 Depth=1
1015; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
1016; RV64IA-NEXT:    lr.w.aqrl a4, (a2)
1017; RV64IA-NEXT:    bne a4, a7, .LBB5_1
1018; RV64IA-NEXT:  # %bb.4: # %atomicrmw.start
1019; RV64IA-NEXT:    # in Loop: Header=BB5_3 Depth=2
1020; RV64IA-NEXT:    sc.w.rl t0, a6, (a2)
1021; RV64IA-NEXT:    bnez t0, .LBB5_3
1022; RV64IA-NEXT:  # %bb.5: # %atomicrmw.start
1023; RV64IA-NEXT:  # %bb.2: # %atomicrmw.end
1024; RV64IA-NEXT:    srlw a0, a4, a0
1025; RV64IA-NEXT:    ret
1026  %result = atomicrmw usub_sat ptr %ptr, i16 %val seq_cst
1027  ret i16 %result
1028}
1029
1030define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
1031; RV32I-LABEL: atomicrmw_usub_sat_i32:
1032; RV32I:       # %bb.0:
1033; RV32I-NEXT:    addi sp, sp, -16
1034; RV32I-NEXT:    .cfi_def_cfa_offset 16
1035; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
1036; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
1037; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
1038; RV32I-NEXT:    .cfi_offset ra, -4
1039; RV32I-NEXT:    .cfi_offset s0, -8
1040; RV32I-NEXT:    .cfi_offset s1, -12
1041; RV32I-NEXT:    mv s0, a0
1042; RV32I-NEXT:    lw a3, 0(a0)
1043; RV32I-NEXT:    mv s1, a1
1044; RV32I-NEXT:  .LBB6_1: # %atomicrmw.start
1045; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
1046; RV32I-NEXT:    sub a0, a3, s1
1047; RV32I-NEXT:    sltu a1, a3, a0
1048; RV32I-NEXT:    addi a1, a1, -1
1049; RV32I-NEXT:    and a2, a1, a0
1050; RV32I-NEXT:    sw a3, 0(sp)
1051; RV32I-NEXT:    mv a1, sp
1052; RV32I-NEXT:    li a3, 5
1053; RV32I-NEXT:    li a4, 5
1054; RV32I-NEXT:    mv a0, s0
1055; RV32I-NEXT:    call __atomic_compare_exchange_4
1056; RV32I-NEXT:    lw a3, 0(sp)
1057; RV32I-NEXT:    beqz a0, .LBB6_1
1058; RV32I-NEXT:  # %bb.2: # %atomicrmw.end
1059; RV32I-NEXT:    mv a0, a3
1060; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
1061; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
1062; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
1063; RV32I-NEXT:    .cfi_restore ra
1064; RV32I-NEXT:    .cfi_restore s0
1065; RV32I-NEXT:    .cfi_restore s1
1066; RV32I-NEXT:    addi sp, sp, 16
1067; RV32I-NEXT:    .cfi_def_cfa_offset 0
1068; RV32I-NEXT:    ret
1069;
1070; RV32IA-LABEL: atomicrmw_usub_sat_i32:
1071; RV32IA:       # %bb.0:
1072; RV32IA-NEXT:    lw a2, 0(a0)
1073; RV32IA-NEXT:  .LBB6_1: # %atomicrmw.start
1074; RV32IA-NEXT:    # =>This Loop Header: Depth=1
1075; RV32IA-NEXT:    # Child Loop BB6_3 Depth 2
1076; RV32IA-NEXT:    mv a3, a2
1077; RV32IA-NEXT:    sub a2, a2, a1
1078; RV32IA-NEXT:    sltu a4, a3, a2
1079; RV32IA-NEXT:    addi a4, a4, -1
1080; RV32IA-NEXT:    and a4, a4, a2
1081; RV32IA-NEXT:  .LBB6_3: # %atomicrmw.start
1082; RV32IA-NEXT:    # Parent Loop BB6_1 Depth=1
1083; RV32IA-NEXT:    # => This Inner Loop Header: Depth=2
1084; RV32IA-NEXT:    lr.w.aqrl a2, (a0)
1085; RV32IA-NEXT:    bne a2, a3, .LBB6_1
1086; RV32IA-NEXT:  # %bb.4: # %atomicrmw.start
1087; RV32IA-NEXT:    # in Loop: Header=BB6_3 Depth=2
1088; RV32IA-NEXT:    sc.w.rl a5, a4, (a0)
1089; RV32IA-NEXT:    bnez a5, .LBB6_3
1090; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
1091; RV32IA-NEXT:  # %bb.2: # %atomicrmw.end
1092; RV32IA-NEXT:    mv a0, a2
1093; RV32IA-NEXT:    ret
1094;
1095; RV64I-LABEL: atomicrmw_usub_sat_i32:
1096; RV64I:       # %bb.0:
1097; RV64I-NEXT:    addi sp, sp, -32
1098; RV64I-NEXT:    .cfi_def_cfa_offset 32
1099; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
1100; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
1101; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
1102; RV64I-NEXT:    .cfi_offset ra, -8
1103; RV64I-NEXT:    .cfi_offset s0, -16
1104; RV64I-NEXT:    .cfi_offset s1, -24
1105; RV64I-NEXT:    mv s0, a0
1106; RV64I-NEXT:    lw a3, 0(a0)
1107; RV64I-NEXT:    mv s1, a1
1108; RV64I-NEXT:  .LBB6_1: # %atomicrmw.start
1109; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
1110; RV64I-NEXT:    subw a0, a3, s1
1111; RV64I-NEXT:    sltu a1, a3, a0
1112; RV64I-NEXT:    addi a1, a1, -1
1113; RV64I-NEXT:    and a2, a1, a0
1114; RV64I-NEXT:    sw a3, 4(sp)
1115; RV64I-NEXT:    addi a1, sp, 4
1116; RV64I-NEXT:    li a3, 5
1117; RV64I-NEXT:    li a4, 5
1118; RV64I-NEXT:    mv a0, s0
1119; RV64I-NEXT:    call __atomic_compare_exchange_4
1120; RV64I-NEXT:    lw a3, 4(sp)
1121; RV64I-NEXT:    beqz a0, .LBB6_1
1122; RV64I-NEXT:  # %bb.2: # %atomicrmw.end
1123; RV64I-NEXT:    mv a0, a3
1124; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
1125; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
1126; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
1127; RV64I-NEXT:    .cfi_restore ra
1128; RV64I-NEXT:    .cfi_restore s0
1129; RV64I-NEXT:    .cfi_restore s1
1130; RV64I-NEXT:    addi sp, sp, 32
1131; RV64I-NEXT:    .cfi_def_cfa_offset 0
1132; RV64I-NEXT:    ret
1133;
1134; RV64IA-LABEL: atomicrmw_usub_sat_i32:
1135; RV64IA:       # %bb.0:
1136; RV64IA-NEXT:    lw a2, 0(a0)
1137; RV64IA-NEXT:  .LBB6_1: # %atomicrmw.start
1138; RV64IA-NEXT:    # =>This Loop Header: Depth=1
1139; RV64IA-NEXT:    # Child Loop BB6_3 Depth 2
1140; RV64IA-NEXT:    subw a3, a2, a1
1141; RV64IA-NEXT:    sext.w a4, a2
1142; RV64IA-NEXT:    sltu a2, a4, a3
1143; RV64IA-NEXT:    addi a2, a2, -1
1144; RV64IA-NEXT:    and a3, a2, a3
1145; RV64IA-NEXT:  .LBB6_3: # %atomicrmw.start
1146; RV64IA-NEXT:    # Parent Loop BB6_1 Depth=1
1147; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
1148; RV64IA-NEXT:    lr.w.aqrl a2, (a0)
1149; RV64IA-NEXT:    bne a2, a4, .LBB6_1
1150; RV64IA-NEXT:  # %bb.4: # %atomicrmw.start
1151; RV64IA-NEXT:    # in Loop: Header=BB6_3 Depth=2
1152; RV64IA-NEXT:    sc.w.rl a5, a3, (a0)
1153; RV64IA-NEXT:    bnez a5, .LBB6_3
1154; RV64IA-NEXT:  # %bb.5: # %atomicrmw.start
1155; RV64IA-NEXT:  # %bb.2: # %atomicrmw.end
1156; RV64IA-NEXT:    mv a0, a2
1157; RV64IA-NEXT:    ret
1158  %result = atomicrmw usub_sat ptr %ptr, i32 %val seq_cst
1159  ret i32 %result
1160}
1161
1162define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
1163; RV32I-LABEL: atomicrmw_usub_sat_i64:
1164; RV32I:       # %bb.0:
1165; RV32I-NEXT:    addi sp, sp, -32
1166; RV32I-NEXT:    .cfi_def_cfa_offset 32
1167; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
1168; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
1169; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
1170; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
1171; RV32I-NEXT:    .cfi_offset ra, -4
1172; RV32I-NEXT:    .cfi_offset s0, -8
1173; RV32I-NEXT:    .cfi_offset s1, -12
1174; RV32I-NEXT:    .cfi_offset s2, -16
1175; RV32I-NEXT:    mv s0, a2
1176; RV32I-NEXT:    mv s1, a0
1177; RV32I-NEXT:    lw a4, 0(a0)
1178; RV32I-NEXT:    lw a5, 4(a0)
1179; RV32I-NEXT:    mv s2, a1
1180; RV32I-NEXT:    j .LBB7_3
1181; RV32I-NEXT:  .LBB7_1: # %atomicrmw.start
1182; RV32I-NEXT:    # in Loop: Header=BB7_3 Depth=1
1183; RV32I-NEXT:    sltu a2, a5, a0
1184; RV32I-NEXT:  .LBB7_2: # %atomicrmw.start
1185; RV32I-NEXT:    # in Loop: Header=BB7_3 Depth=1
1186; RV32I-NEXT:    addi a3, a2, -1
1187; RV32I-NEXT:    and a2, a3, a1
1188; RV32I-NEXT:    and a3, a3, a0
1189; RV32I-NEXT:    sw a4, 8(sp)
1190; RV32I-NEXT:    sw a5, 12(sp)
1191; RV32I-NEXT:    addi a1, sp, 8
1192; RV32I-NEXT:    li a4, 5
1193; RV32I-NEXT:    li a5, 5
1194; RV32I-NEXT:    mv a0, s1
1195; RV32I-NEXT:    call __atomic_compare_exchange_8
1196; RV32I-NEXT:    lw a4, 8(sp)
1197; RV32I-NEXT:    lw a5, 12(sp)
1198; RV32I-NEXT:    bnez a0, .LBB7_5
1199; RV32I-NEXT:  .LBB7_3: # %atomicrmw.start
1200; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
1201; RV32I-NEXT:    sltu a0, a4, s2
1202; RV32I-NEXT:    sub a1, a5, s0
1203; RV32I-NEXT:    sub a0, a1, a0
1204; RV32I-NEXT:    sub a1, a4, s2
1205; RV32I-NEXT:    bne a0, a5, .LBB7_1
1206; RV32I-NEXT:  # %bb.4: # in Loop: Header=BB7_3 Depth=1
1207; RV32I-NEXT:    sltu a2, a4, a1
1208; RV32I-NEXT:    j .LBB7_2
1209; RV32I-NEXT:  .LBB7_5: # %atomicrmw.end
1210; RV32I-NEXT:    mv a0, a4
1211; RV32I-NEXT:    mv a1, a5
1212; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
1213; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
1214; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
1215; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
1216; RV32I-NEXT:    .cfi_restore ra
1217; RV32I-NEXT:    .cfi_restore s0
1218; RV32I-NEXT:    .cfi_restore s1
1219; RV32I-NEXT:    .cfi_restore s2
1220; RV32I-NEXT:    addi sp, sp, 32
1221; RV32I-NEXT:    .cfi_def_cfa_offset 0
1222; RV32I-NEXT:    ret
1223;
1224; RV32IA-LABEL: atomicrmw_usub_sat_i64:
1225; RV32IA:       # %bb.0:
1226; RV32IA-NEXT:    addi sp, sp, -32
1227; RV32IA-NEXT:    .cfi_def_cfa_offset 32
1228; RV32IA-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
1229; RV32IA-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
1230; RV32IA-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
1231; RV32IA-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
1232; RV32IA-NEXT:    .cfi_offset ra, -4
1233; RV32IA-NEXT:    .cfi_offset s0, -8
1234; RV32IA-NEXT:    .cfi_offset s1, -12
1235; RV32IA-NEXT:    .cfi_offset s2, -16
1236; RV32IA-NEXT:    mv s0, a2
1237; RV32IA-NEXT:    mv s1, a0
1238; RV32IA-NEXT:    lw a4, 0(a0)
1239; RV32IA-NEXT:    lw a5, 4(a0)
1240; RV32IA-NEXT:    mv s2, a1
1241; RV32IA-NEXT:    j .LBB7_3
1242; RV32IA-NEXT:  .LBB7_1: # %atomicrmw.start
1243; RV32IA-NEXT:    # in Loop: Header=BB7_3 Depth=1
1244; RV32IA-NEXT:    sltu a2, a5, a0
1245; RV32IA-NEXT:  .LBB7_2: # %atomicrmw.start
1246; RV32IA-NEXT:    # in Loop: Header=BB7_3 Depth=1
1247; RV32IA-NEXT:    addi a3, a2, -1
1248; RV32IA-NEXT:    and a2, a3, a1
1249; RV32IA-NEXT:    and a3, a3, a0
1250; RV32IA-NEXT:    sw a4, 8(sp)
1251; RV32IA-NEXT:    sw a5, 12(sp)
1252; RV32IA-NEXT:    addi a1, sp, 8
1253; RV32IA-NEXT:    li a4, 5
1254; RV32IA-NEXT:    li a5, 5
1255; RV32IA-NEXT:    mv a0, s1
1256; RV32IA-NEXT:    call __atomic_compare_exchange_8
1257; RV32IA-NEXT:    lw a4, 8(sp)
1258; RV32IA-NEXT:    lw a5, 12(sp)
1259; RV32IA-NEXT:    bnez a0, .LBB7_5
1260; RV32IA-NEXT:  .LBB7_3: # %atomicrmw.start
1261; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
1262; RV32IA-NEXT:    sltu a0, a4, s2
1263; RV32IA-NEXT:    sub a1, a5, s0
1264; RV32IA-NEXT:    sub a0, a1, a0
1265; RV32IA-NEXT:    sub a1, a4, s2
1266; RV32IA-NEXT:    bne a0, a5, .LBB7_1
1267; RV32IA-NEXT:  # %bb.4: # in Loop: Header=BB7_3 Depth=1
1268; RV32IA-NEXT:    sltu a2, a4, a1
1269; RV32IA-NEXT:    j .LBB7_2
1270; RV32IA-NEXT:  .LBB7_5: # %atomicrmw.end
1271; RV32IA-NEXT:    mv a0, a4
1272; RV32IA-NEXT:    mv a1, a5
1273; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
1274; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
1275; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
1276; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
1277; RV32IA-NEXT:    .cfi_restore ra
1278; RV32IA-NEXT:    .cfi_restore s0
1279; RV32IA-NEXT:    .cfi_restore s1
1280; RV32IA-NEXT:    .cfi_restore s2
1281; RV32IA-NEXT:    addi sp, sp, 32
1282; RV32IA-NEXT:    .cfi_def_cfa_offset 0
1283; RV32IA-NEXT:    ret
1284;
1285; RV64I-LABEL: atomicrmw_usub_sat_i64:
1286; RV64I:       # %bb.0:
1287; RV64I-NEXT:    addi sp, sp, -32
1288; RV64I-NEXT:    .cfi_def_cfa_offset 32
1289; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
1290; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
1291; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
1292; RV64I-NEXT:    .cfi_offset ra, -8
1293; RV64I-NEXT:    .cfi_offset s0, -16
1294; RV64I-NEXT:    .cfi_offset s1, -24
1295; RV64I-NEXT:    mv s0, a0
1296; RV64I-NEXT:    ld a3, 0(a0)
1297; RV64I-NEXT:    mv s1, a1
1298; RV64I-NEXT:  .LBB7_1: # %atomicrmw.start
1299; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
1300; RV64I-NEXT:    sub a0, a3, s1
1301; RV64I-NEXT:    sltu a1, a3, a0
1302; RV64I-NEXT:    addi a1, a1, -1
1303; RV64I-NEXT:    and a2, a1, a0
1304; RV64I-NEXT:    sd a3, 0(sp)
1305; RV64I-NEXT:    mv a1, sp
1306; RV64I-NEXT:    li a3, 5
1307; RV64I-NEXT:    li a4, 5
1308; RV64I-NEXT:    mv a0, s0
1309; RV64I-NEXT:    call __atomic_compare_exchange_8
1310; RV64I-NEXT:    ld a3, 0(sp)
1311; RV64I-NEXT:    beqz a0, .LBB7_1
1312; RV64I-NEXT:  # %bb.2: # %atomicrmw.end
1313; RV64I-NEXT:    mv a0, a3
1314; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
1315; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
1316; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
1317; RV64I-NEXT:    .cfi_restore ra
1318; RV64I-NEXT:    .cfi_restore s0
1319; RV64I-NEXT:    .cfi_restore s1
1320; RV64I-NEXT:    addi sp, sp, 32
1321; RV64I-NEXT:    .cfi_def_cfa_offset 0
1322; RV64I-NEXT:    ret
1323;
1324; RV64IA-LABEL: atomicrmw_usub_sat_i64:
1325; RV64IA:       # %bb.0:
1326; RV64IA-NEXT:    ld a2, 0(a0)
1327; RV64IA-NEXT:  .LBB7_1: # %atomicrmw.start
1328; RV64IA-NEXT:    # =>This Loop Header: Depth=1
1329; RV64IA-NEXT:    # Child Loop BB7_3 Depth 2
1330; RV64IA-NEXT:    mv a3, a2
1331; RV64IA-NEXT:    sub a2, a2, a1
1332; RV64IA-NEXT:    sltu a4, a3, a2
1333; RV64IA-NEXT:    addi a4, a4, -1
1334; RV64IA-NEXT:    and a4, a4, a2
1335; RV64IA-NEXT:  .LBB7_3: # %atomicrmw.start
1336; RV64IA-NEXT:    # Parent Loop BB7_1 Depth=1
1337; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
1338; RV64IA-NEXT:    lr.d.aqrl a2, (a0)
1339; RV64IA-NEXT:    bne a2, a3, .LBB7_1
1340; RV64IA-NEXT:  # %bb.4: # %atomicrmw.start
1341; RV64IA-NEXT:    # in Loop: Header=BB7_3 Depth=2
1342; RV64IA-NEXT:    sc.d.rl a5, a4, (a0)
1343; RV64IA-NEXT:    bnez a5, .LBB7_3
1344; RV64IA-NEXT:  # %bb.5: # %atomicrmw.start
1345; RV64IA-NEXT:  # %bb.2: # %atomicrmw.end
1346; RV64IA-NEXT:    mv a0, a2
1347; RV64IA-NEXT:    ret
1348  %result = atomicrmw usub_sat ptr %ptr, i64 %val seq_cst
1349  ret i64 %result
1350}
1351