xref: /llvm-project/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3; RUN:   | FileCheck -check-prefix=RV32I %s
4; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
5; RUN:   | FileCheck -check-prefix=RV32IA %s
6; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \
7; RUN:   | FileCheck -check-prefix=RV32IA %s
8; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
9; RUN:   | FileCheck -check-prefix=RV64I %s
10; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
11; RUN:   | FileCheck -check-prefix=RV64IA %s
12; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \
13; RUN:   | FileCheck -check-prefix=RV64IA %s
14
15
16define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
17; RV32I-LABEL: atomicrmw_uinc_wrap_i8:
18; RV32I:       # %bb.0:
19; RV32I-NEXT:    addi sp, sp, -16
20; RV32I-NEXT:    .cfi_def_cfa_offset 16
21; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
22; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
23; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
24; RV32I-NEXT:    .cfi_offset ra, -4
25; RV32I-NEXT:    .cfi_offset s0, -8
26; RV32I-NEXT:    .cfi_offset s1, -12
27; RV32I-NEXT:    mv s0, a0
28; RV32I-NEXT:    lbu a3, 0(a0)
29; RV32I-NEXT:    andi s1, a1, 255
30; RV32I-NEXT:  .LBB0_1: # %atomicrmw.start
31; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
32; RV32I-NEXT:    addi a0, a3, 1
33; RV32I-NEXT:    andi a1, a3, 255
34; RV32I-NEXT:    sltu a1, a1, s1
35; RV32I-NEXT:    neg a2, a1
36; RV32I-NEXT:    and a2, a2, a0
37; RV32I-NEXT:    sb a3, 3(sp)
38; RV32I-NEXT:    addi a1, sp, 3
39; RV32I-NEXT:    li a3, 5
40; RV32I-NEXT:    li a4, 5
41; RV32I-NEXT:    mv a0, s0
42; RV32I-NEXT:    call __atomic_compare_exchange_1
43; RV32I-NEXT:    lbu a3, 3(sp)
44; RV32I-NEXT:    beqz a0, .LBB0_1
45; RV32I-NEXT:  # %bb.2: # %atomicrmw.end
46; RV32I-NEXT:    mv a0, a3
47; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
48; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
49; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
50; RV32I-NEXT:    .cfi_restore ra
51; RV32I-NEXT:    .cfi_restore s0
52; RV32I-NEXT:    .cfi_restore s1
53; RV32I-NEXT:    addi sp, sp, 16
54; RV32I-NEXT:    .cfi_def_cfa_offset 0
55; RV32I-NEXT:    ret
56;
57; RV32IA-LABEL: atomicrmw_uinc_wrap_i8:
58; RV32IA:       # %bb.0:
59; RV32IA-NEXT:    andi a2, a0, -4
60; RV32IA-NEXT:    slli a0, a0, 3
61; RV32IA-NEXT:    li a3, 255
62; RV32IA-NEXT:    sll a3, a3, a0
63; RV32IA-NEXT:    lw a4, 0(a2)
64; RV32IA-NEXT:    andi a0, a0, 24
65; RV32IA-NEXT:    not a3, a3
66; RV32IA-NEXT:    andi a1, a1, 255
67; RV32IA-NEXT:  .LBB0_1: # %atomicrmw.start
68; RV32IA-NEXT:    # =>This Loop Header: Depth=1
69; RV32IA-NEXT:    # Child Loop BB0_3 Depth 2
70; RV32IA-NEXT:    mv a5, a4
71; RV32IA-NEXT:    srl a4, a4, a0
72; RV32IA-NEXT:    andi a6, a4, 255
73; RV32IA-NEXT:    addi a4, a4, 1
74; RV32IA-NEXT:    sltu a6, a6, a1
75; RV32IA-NEXT:    neg a6, a6
76; RV32IA-NEXT:    and a4, a6, a4
77; RV32IA-NEXT:    andi a4, a4, 255
78; RV32IA-NEXT:    sll a4, a4, a0
79; RV32IA-NEXT:    and a6, a5, a3
80; RV32IA-NEXT:    or a6, a6, a4
81; RV32IA-NEXT:  .LBB0_3: # %atomicrmw.start
82; RV32IA-NEXT:    # Parent Loop BB0_1 Depth=1
83; RV32IA-NEXT:    # => This Inner Loop Header: Depth=2
84; RV32IA-NEXT:    lr.w.aqrl a4, (a2)
85; RV32IA-NEXT:    bne a4, a5, .LBB0_1
86; RV32IA-NEXT:  # %bb.4: # %atomicrmw.start
87; RV32IA-NEXT:    # in Loop: Header=BB0_3 Depth=2
88; RV32IA-NEXT:    sc.w.rl a7, a6, (a2)
89; RV32IA-NEXT:    bnez a7, .LBB0_3
90; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
91; RV32IA-NEXT:  # %bb.2: # %atomicrmw.end
92; RV32IA-NEXT:    srl a0, a4, a0
93; RV32IA-NEXT:    ret
94;
95; RV64I-LABEL: atomicrmw_uinc_wrap_i8:
96; RV64I:       # %bb.0:
97; RV64I-NEXT:    addi sp, sp, -32
98; RV64I-NEXT:    .cfi_def_cfa_offset 32
99; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
100; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
101; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
102; RV64I-NEXT:    .cfi_offset ra, -8
103; RV64I-NEXT:    .cfi_offset s0, -16
104; RV64I-NEXT:    .cfi_offset s1, -24
105; RV64I-NEXT:    mv s0, a0
106; RV64I-NEXT:    lbu a3, 0(a0)
107; RV64I-NEXT:    andi s1, a1, 255
108; RV64I-NEXT:  .LBB0_1: # %atomicrmw.start
109; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
110; RV64I-NEXT:    addi a0, a3, 1
111; RV64I-NEXT:    andi a1, a3, 255
112; RV64I-NEXT:    sltu a1, a1, s1
113; RV64I-NEXT:    neg a2, a1
114; RV64I-NEXT:    and a2, a2, a0
115; RV64I-NEXT:    sb a3, 7(sp)
116; RV64I-NEXT:    addi a1, sp, 7
117; RV64I-NEXT:    li a3, 5
118; RV64I-NEXT:    li a4, 5
119; RV64I-NEXT:    mv a0, s0
120; RV64I-NEXT:    call __atomic_compare_exchange_1
121; RV64I-NEXT:    lbu a3, 7(sp)
122; RV64I-NEXT:    beqz a0, .LBB0_1
123; RV64I-NEXT:  # %bb.2: # %atomicrmw.end
124; RV64I-NEXT:    mv a0, a3
125; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
126; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
127; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
128; RV64I-NEXT:    .cfi_restore ra
129; RV64I-NEXT:    .cfi_restore s0
130; RV64I-NEXT:    .cfi_restore s1
131; RV64I-NEXT:    addi sp, sp, 32
132; RV64I-NEXT:    .cfi_def_cfa_offset 0
133; RV64I-NEXT:    ret
134;
135; RV64IA-LABEL: atomicrmw_uinc_wrap_i8:
136; RV64IA:       # %bb.0:
137; RV64IA-NEXT:    andi a2, a0, -4
138; RV64IA-NEXT:    slli a0, a0, 3
139; RV64IA-NEXT:    li a3, 255
140; RV64IA-NEXT:    sllw a4, a3, a0
141; RV64IA-NEXT:    lw a3, 0(a2)
142; RV64IA-NEXT:    andi a0, a0, 24
143; RV64IA-NEXT:    not a4, a4
144; RV64IA-NEXT:    andi a1, a1, 255
145; RV64IA-NEXT:  .LBB0_1: # %atomicrmw.start
146; RV64IA-NEXT:    # =>This Loop Header: Depth=1
147; RV64IA-NEXT:    # Child Loop BB0_3 Depth 2
148; RV64IA-NEXT:    srlw a5, a3, a0
149; RV64IA-NEXT:    sext.w a6, a3
150; RV64IA-NEXT:    andi a7, a5, 255
151; RV64IA-NEXT:    addi a5, a5, 1
152; RV64IA-NEXT:    sltu a7, a7, a1
153; RV64IA-NEXT:    negw a7, a7
154; RV64IA-NEXT:    and a5, a7, a5
155; RV64IA-NEXT:    andi a5, a5, 255
156; RV64IA-NEXT:    sllw a5, a5, a0
157; RV64IA-NEXT:    and a3, a3, a4
158; RV64IA-NEXT:    or a5, a3, a5
159; RV64IA-NEXT:  .LBB0_3: # %atomicrmw.start
160; RV64IA-NEXT:    # Parent Loop BB0_1 Depth=1
161; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
162; RV64IA-NEXT:    lr.w.aqrl a3, (a2)
163; RV64IA-NEXT:    bne a3, a6, .LBB0_1
164; RV64IA-NEXT:  # %bb.4: # %atomicrmw.start
165; RV64IA-NEXT:    # in Loop: Header=BB0_3 Depth=2
166; RV64IA-NEXT:    sc.w.rl a7, a5, (a2)
167; RV64IA-NEXT:    bnez a7, .LBB0_3
168; RV64IA-NEXT:  # %bb.5: # %atomicrmw.start
169; RV64IA-NEXT:  # %bb.2: # %atomicrmw.end
170; RV64IA-NEXT:    srlw a0, a3, a0
171; RV64IA-NEXT:    ret
172  %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst
173  ret i8 %result
174}
175
176define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
177; RV32I-LABEL: atomicrmw_uinc_wrap_i16:
178; RV32I:       # %bb.0:
179; RV32I-NEXT:    addi sp, sp, -32
180; RV32I-NEXT:    .cfi_def_cfa_offset 32
181; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
182; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
183; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
184; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
185; RV32I-NEXT:    .cfi_offset ra, -4
186; RV32I-NEXT:    .cfi_offset s0, -8
187; RV32I-NEXT:    .cfi_offset s1, -12
188; RV32I-NEXT:    .cfi_offset s2, -16
189; RV32I-NEXT:    mv s0, a0
190; RV32I-NEXT:    lhu a3, 0(a0)
191; RV32I-NEXT:    lui s1, 16
192; RV32I-NEXT:    addi s1, s1, -1
193; RV32I-NEXT:    and s2, a1, s1
194; RV32I-NEXT:  .LBB1_1: # %atomicrmw.start
195; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
196; RV32I-NEXT:    and a0, a3, s1
197; RV32I-NEXT:    addi a1, a3, 1
198; RV32I-NEXT:    sltu a0, a0, s2
199; RV32I-NEXT:    neg a2, a0
200; RV32I-NEXT:    and a2, a2, a1
201; RV32I-NEXT:    sh a3, 14(sp)
202; RV32I-NEXT:    addi a1, sp, 14
203; RV32I-NEXT:    li a3, 5
204; RV32I-NEXT:    li a4, 5
205; RV32I-NEXT:    mv a0, s0
206; RV32I-NEXT:    call __atomic_compare_exchange_2
207; RV32I-NEXT:    lh a3, 14(sp)
208; RV32I-NEXT:    beqz a0, .LBB1_1
209; RV32I-NEXT:  # %bb.2: # %atomicrmw.end
210; RV32I-NEXT:    mv a0, a3
211; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
212; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
213; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
214; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
215; RV32I-NEXT:    .cfi_restore ra
216; RV32I-NEXT:    .cfi_restore s0
217; RV32I-NEXT:    .cfi_restore s1
218; RV32I-NEXT:    .cfi_restore s2
219; RV32I-NEXT:    addi sp, sp, 32
220; RV32I-NEXT:    .cfi_def_cfa_offset 0
221; RV32I-NEXT:    ret
222;
223; RV32IA-LABEL: atomicrmw_uinc_wrap_i16:
224; RV32IA:       # %bb.0:
225; RV32IA-NEXT:    andi a2, a0, -4
226; RV32IA-NEXT:    slli a4, a0, 3
227; RV32IA-NEXT:    lui a3, 16
228; RV32IA-NEXT:    andi a0, a4, 24
229; RV32IA-NEXT:    addi a3, a3, -1
230; RV32IA-NEXT:    lw a5, 0(a2)
231; RV32IA-NEXT:    sll a4, a3, a4
232; RV32IA-NEXT:    not a4, a4
233; RV32IA-NEXT:    and a1, a1, a3
234; RV32IA-NEXT:  .LBB1_1: # %atomicrmw.start
235; RV32IA-NEXT:    # =>This Loop Header: Depth=1
236; RV32IA-NEXT:    # Child Loop BB1_3 Depth 2
237; RV32IA-NEXT:    mv a6, a5
238; RV32IA-NEXT:    srl a5, a5, a0
239; RV32IA-NEXT:    and a7, a5, a3
240; RV32IA-NEXT:    addi a5, a5, 1
241; RV32IA-NEXT:    sltu a7, a7, a1
242; RV32IA-NEXT:    and a5, a5, a3
243; RV32IA-NEXT:    neg a7, a7
244; RV32IA-NEXT:    and a5, a7, a5
245; RV32IA-NEXT:    sll a5, a5, a0
246; RV32IA-NEXT:    and a7, a6, a4
247; RV32IA-NEXT:    or a7, a7, a5
248; RV32IA-NEXT:  .LBB1_3: # %atomicrmw.start
249; RV32IA-NEXT:    # Parent Loop BB1_1 Depth=1
250; RV32IA-NEXT:    # => This Inner Loop Header: Depth=2
251; RV32IA-NEXT:    lr.w.aqrl a5, (a2)
252; RV32IA-NEXT:    bne a5, a6, .LBB1_1
253; RV32IA-NEXT:  # %bb.4: # %atomicrmw.start
254; RV32IA-NEXT:    # in Loop: Header=BB1_3 Depth=2
255; RV32IA-NEXT:    sc.w.rl t0, a7, (a2)
256; RV32IA-NEXT:    bnez t0, .LBB1_3
257; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
258; RV32IA-NEXT:  # %bb.2: # %atomicrmw.end
259; RV32IA-NEXT:    srl a0, a5, a0
260; RV32IA-NEXT:    ret
261;
262; RV64I-LABEL: atomicrmw_uinc_wrap_i16:
263; RV64I:       # %bb.0:
264; RV64I-NEXT:    addi sp, sp, -48
265; RV64I-NEXT:    .cfi_def_cfa_offset 48
266; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
267; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
268; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
269; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
270; RV64I-NEXT:    .cfi_offset ra, -8
271; RV64I-NEXT:    .cfi_offset s0, -16
272; RV64I-NEXT:    .cfi_offset s1, -24
273; RV64I-NEXT:    .cfi_offset s2, -32
274; RV64I-NEXT:    mv s0, a0
275; RV64I-NEXT:    lhu a3, 0(a0)
276; RV64I-NEXT:    lui s1, 16
277; RV64I-NEXT:    addiw s1, s1, -1
278; RV64I-NEXT:    and s2, a1, s1
279; RV64I-NEXT:  .LBB1_1: # %atomicrmw.start
280; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
281; RV64I-NEXT:    and a0, a3, s1
282; RV64I-NEXT:    addi a1, a3, 1
283; RV64I-NEXT:    sltu a0, a0, s2
284; RV64I-NEXT:    neg a2, a0
285; RV64I-NEXT:    and a2, a2, a1
286; RV64I-NEXT:    sh a3, 14(sp)
287; RV64I-NEXT:    addi a1, sp, 14
288; RV64I-NEXT:    li a3, 5
289; RV64I-NEXT:    li a4, 5
290; RV64I-NEXT:    mv a0, s0
291; RV64I-NEXT:    call __atomic_compare_exchange_2
292; RV64I-NEXT:    lh a3, 14(sp)
293; RV64I-NEXT:    beqz a0, .LBB1_1
294; RV64I-NEXT:  # %bb.2: # %atomicrmw.end
295; RV64I-NEXT:    mv a0, a3
296; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
297; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
298; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
299; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
300; RV64I-NEXT:    .cfi_restore ra
301; RV64I-NEXT:    .cfi_restore s0
302; RV64I-NEXT:    .cfi_restore s1
303; RV64I-NEXT:    .cfi_restore s2
304; RV64I-NEXT:    addi sp, sp, 48
305; RV64I-NEXT:    .cfi_def_cfa_offset 0
306; RV64I-NEXT:    ret
307;
308; RV64IA-LABEL: atomicrmw_uinc_wrap_i16:
309; RV64IA:       # %bb.0:
310; RV64IA-NEXT:    andi a2, a0, -4
311; RV64IA-NEXT:    slli a5, a0, 3
312; RV64IA-NEXT:    lui a3, 16
313; RV64IA-NEXT:    andi a0, a5, 24
314; RV64IA-NEXT:    addiw a3, a3, -1
315; RV64IA-NEXT:    lw a4, 0(a2)
316; RV64IA-NEXT:    sllw a5, a3, a5
317; RV64IA-NEXT:    not a5, a5
318; RV64IA-NEXT:    and a1, a1, a3
319; RV64IA-NEXT:  .LBB1_1: # %atomicrmw.start
320; RV64IA-NEXT:    # =>This Loop Header: Depth=1
321; RV64IA-NEXT:    # Child Loop BB1_3 Depth 2
322; RV64IA-NEXT:    srlw a6, a4, a0
323; RV64IA-NEXT:    sext.w a7, a4
324; RV64IA-NEXT:    and t0, a6, a3
325; RV64IA-NEXT:    addi a6, a6, 1
326; RV64IA-NEXT:    sltu t0, t0, a1
327; RV64IA-NEXT:    and a6, a6, a3
328; RV64IA-NEXT:    negw t0, t0
329; RV64IA-NEXT:    and a6, t0, a6
330; RV64IA-NEXT:    sllw a6, a6, a0
331; RV64IA-NEXT:    and a4, a4, a5
332; RV64IA-NEXT:    or a6, a4, a6
333; RV64IA-NEXT:  .LBB1_3: # %atomicrmw.start
334; RV64IA-NEXT:    # Parent Loop BB1_1 Depth=1
335; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
336; RV64IA-NEXT:    lr.w.aqrl a4, (a2)
337; RV64IA-NEXT:    bne a4, a7, .LBB1_1
338; RV64IA-NEXT:  # %bb.4: # %atomicrmw.start
339; RV64IA-NEXT:    # in Loop: Header=BB1_3 Depth=2
340; RV64IA-NEXT:    sc.w.rl t0, a6, (a2)
341; RV64IA-NEXT:    bnez t0, .LBB1_3
342; RV64IA-NEXT:  # %bb.5: # %atomicrmw.start
343; RV64IA-NEXT:  # %bb.2: # %atomicrmw.end
344; RV64IA-NEXT:    srlw a0, a4, a0
345; RV64IA-NEXT:    ret
346  %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst
347  ret i16 %result
348}
349
350define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
351; RV32I-LABEL: atomicrmw_uinc_wrap_i32:
352; RV32I:       # %bb.0:
353; RV32I-NEXT:    addi sp, sp, -16
354; RV32I-NEXT:    .cfi_def_cfa_offset 16
355; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
356; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
357; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
358; RV32I-NEXT:    .cfi_offset ra, -4
359; RV32I-NEXT:    .cfi_offset s0, -8
360; RV32I-NEXT:    .cfi_offset s1, -12
361; RV32I-NEXT:    mv s0, a0
362; RV32I-NEXT:    lw a3, 0(a0)
363; RV32I-NEXT:    mv s1, a1
364; RV32I-NEXT:  .LBB2_1: # %atomicrmw.start
365; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
366; RV32I-NEXT:    addi a0, a3, 1
367; RV32I-NEXT:    sltu a1, a3, s1
368; RV32I-NEXT:    neg a2, a1
369; RV32I-NEXT:    and a2, a2, a0
370; RV32I-NEXT:    sw a3, 0(sp)
371; RV32I-NEXT:    mv a1, sp
372; RV32I-NEXT:    li a3, 5
373; RV32I-NEXT:    li a4, 5
374; RV32I-NEXT:    mv a0, s0
375; RV32I-NEXT:    call __atomic_compare_exchange_4
376; RV32I-NEXT:    lw a3, 0(sp)
377; RV32I-NEXT:    beqz a0, .LBB2_1
378; RV32I-NEXT:  # %bb.2: # %atomicrmw.end
379; RV32I-NEXT:    mv a0, a3
380; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
381; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
382; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
383; RV32I-NEXT:    .cfi_restore ra
384; RV32I-NEXT:    .cfi_restore s0
385; RV32I-NEXT:    .cfi_restore s1
386; RV32I-NEXT:    addi sp, sp, 16
387; RV32I-NEXT:    .cfi_def_cfa_offset 0
388; RV32I-NEXT:    ret
389;
390; RV32IA-LABEL: atomicrmw_uinc_wrap_i32:
391; RV32IA:       # %bb.0:
392; RV32IA-NEXT:    lw a2, 0(a0)
393; RV32IA-NEXT:  .LBB2_1: # %atomicrmw.start
394; RV32IA-NEXT:    # =>This Loop Header: Depth=1
395; RV32IA-NEXT:    # Child Loop BB2_3 Depth 2
396; RV32IA-NEXT:    mv a3, a2
397; RV32IA-NEXT:    addi a2, a2, 1
398; RV32IA-NEXT:    sltu a4, a3, a1
399; RV32IA-NEXT:    neg a4, a4
400; RV32IA-NEXT:    and a4, a4, a2
401; RV32IA-NEXT:  .LBB2_3: # %atomicrmw.start
402; RV32IA-NEXT:    # Parent Loop BB2_1 Depth=1
403; RV32IA-NEXT:    # => This Inner Loop Header: Depth=2
404; RV32IA-NEXT:    lr.w.aqrl a2, (a0)
405; RV32IA-NEXT:    bne a2, a3, .LBB2_1
406; RV32IA-NEXT:  # %bb.4: # %atomicrmw.start
407; RV32IA-NEXT:    # in Loop: Header=BB2_3 Depth=2
408; RV32IA-NEXT:    sc.w.rl a5, a4, (a0)
409; RV32IA-NEXT:    bnez a5, .LBB2_3
410; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
411; RV32IA-NEXT:  # %bb.2: # %atomicrmw.end
412; RV32IA-NEXT:    mv a0, a2
413; RV32IA-NEXT:    ret
414;
415; RV64I-LABEL: atomicrmw_uinc_wrap_i32:
416; RV64I:       # %bb.0:
417; RV64I-NEXT:    addi sp, sp, -32
418; RV64I-NEXT:    .cfi_def_cfa_offset 32
419; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
420; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
421; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
422; RV64I-NEXT:    .cfi_offset ra, -8
423; RV64I-NEXT:    .cfi_offset s0, -16
424; RV64I-NEXT:    .cfi_offset s1, -24
425; RV64I-NEXT:    mv s0, a0
426; RV64I-NEXT:    lw a3, 0(a0)
427; RV64I-NEXT:    sext.w s1, a1
428; RV64I-NEXT:  .LBB2_1: # %atomicrmw.start
429; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
430; RV64I-NEXT:    addiw a0, a3, 1
431; RV64I-NEXT:    sltu a1, a3, s1
432; RV64I-NEXT:    neg a2, a1
433; RV64I-NEXT:    and a2, a2, a0
434; RV64I-NEXT:    sw a3, 4(sp)
435; RV64I-NEXT:    addi a1, sp, 4
436; RV64I-NEXT:    li a3, 5
437; RV64I-NEXT:    li a4, 5
438; RV64I-NEXT:    mv a0, s0
439; RV64I-NEXT:    call __atomic_compare_exchange_4
440; RV64I-NEXT:    lw a3, 4(sp)
441; RV64I-NEXT:    beqz a0, .LBB2_1
442; RV64I-NEXT:  # %bb.2: # %atomicrmw.end
443; RV64I-NEXT:    mv a0, a3
444; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
445; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
446; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
447; RV64I-NEXT:    .cfi_restore ra
448; RV64I-NEXT:    .cfi_restore s0
449; RV64I-NEXT:    .cfi_restore s1
450; RV64I-NEXT:    addi sp, sp, 32
451; RV64I-NEXT:    .cfi_def_cfa_offset 0
452; RV64I-NEXT:    ret
453;
454; RV64IA-LABEL: atomicrmw_uinc_wrap_i32:
455; RV64IA:       # %bb.0:
456; RV64IA-NEXT:    lw a2, 0(a0)
457; RV64IA-NEXT:    sext.w a1, a1
458; RV64IA-NEXT:  .LBB2_1: # %atomicrmw.start
459; RV64IA-NEXT:    # =>This Loop Header: Depth=1
460; RV64IA-NEXT:    # Child Loop BB2_3 Depth 2
461; RV64IA-NEXT:    addiw a3, a2, 1
462; RV64IA-NEXT:    sext.w a4, a2
463; RV64IA-NEXT:    sltu a2, a4, a1
464; RV64IA-NEXT:    neg a2, a2
465; RV64IA-NEXT:    and a3, a2, a3
466; RV64IA-NEXT:  .LBB2_3: # %atomicrmw.start
467; RV64IA-NEXT:    # Parent Loop BB2_1 Depth=1
468; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
469; RV64IA-NEXT:    lr.w.aqrl a2, (a0)
470; RV64IA-NEXT:    bne a2, a4, .LBB2_1
471; RV64IA-NEXT:  # %bb.4: # %atomicrmw.start
472; RV64IA-NEXT:    # in Loop: Header=BB2_3 Depth=2
473; RV64IA-NEXT:    sc.w.rl a5, a3, (a0)
474; RV64IA-NEXT:    bnez a5, .LBB2_3
475; RV64IA-NEXT:  # %bb.5: # %atomicrmw.start
476; RV64IA-NEXT:  # %bb.2: # %atomicrmw.end
477; RV64IA-NEXT:    mv a0, a2
478; RV64IA-NEXT:    ret
479  %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst
480  ret i32 %result
481}
482
483define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
484; RV32I-LABEL: atomicrmw_uinc_wrap_i64:
485; RV32I:       # %bb.0:
486; RV32I-NEXT:    addi sp, sp, -32
487; RV32I-NEXT:    .cfi_def_cfa_offset 32
488; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
489; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
490; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
491; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
492; RV32I-NEXT:    .cfi_offset ra, -4
493; RV32I-NEXT:    .cfi_offset s0, -8
494; RV32I-NEXT:    .cfi_offset s1, -12
495; RV32I-NEXT:    .cfi_offset s2, -16
496; RV32I-NEXT:    mv s0, a2
497; RV32I-NEXT:    mv s1, a0
498; RV32I-NEXT:    lw a4, 0(a0)
499; RV32I-NEXT:    lw a5, 4(a0)
500; RV32I-NEXT:    mv s2, a1
501; RV32I-NEXT:    j .LBB3_3
502; RV32I-NEXT:  .LBB3_1: # %atomicrmw.start
503; RV32I-NEXT:    # in Loop: Header=BB3_3 Depth=1
504; RV32I-NEXT:    sltu a0, a5, s0
505; RV32I-NEXT:  .LBB3_2: # %atomicrmw.start
506; RV32I-NEXT:    # in Loop: Header=BB3_3 Depth=1
507; RV32I-NEXT:    addi a1, a4, 1
508; RV32I-NEXT:    neg a0, a0
509; RV32I-NEXT:    seqz a3, a1
510; RV32I-NEXT:    and a2, a0, a1
511; RV32I-NEXT:    add a3, a5, a3
512; RV32I-NEXT:    and a3, a0, a3
513; RV32I-NEXT:    sw a4, 8(sp)
514; RV32I-NEXT:    sw a5, 12(sp)
515; RV32I-NEXT:    addi a1, sp, 8
516; RV32I-NEXT:    li a4, 5
517; RV32I-NEXT:    li a5, 5
518; RV32I-NEXT:    mv a0, s1
519; RV32I-NEXT:    call __atomic_compare_exchange_8
520; RV32I-NEXT:    lw a4, 8(sp)
521; RV32I-NEXT:    lw a5, 12(sp)
522; RV32I-NEXT:    bnez a0, .LBB3_5
523; RV32I-NEXT:  .LBB3_3: # %atomicrmw.start
524; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
525; RV32I-NEXT:    bne a5, s0, .LBB3_1
526; RV32I-NEXT:  # %bb.4: # in Loop: Header=BB3_3 Depth=1
527; RV32I-NEXT:    sltu a0, a4, s2
528; RV32I-NEXT:    j .LBB3_2
529; RV32I-NEXT:  .LBB3_5: # %atomicrmw.end
530; RV32I-NEXT:    mv a0, a4
531; RV32I-NEXT:    mv a1, a5
532; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
533; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
534; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
535; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
536; RV32I-NEXT:    .cfi_restore ra
537; RV32I-NEXT:    .cfi_restore s0
538; RV32I-NEXT:    .cfi_restore s1
539; RV32I-NEXT:    .cfi_restore s2
540; RV32I-NEXT:    addi sp, sp, 32
541; RV32I-NEXT:    .cfi_def_cfa_offset 0
542; RV32I-NEXT:    ret
543;
544; RV32IA-LABEL: atomicrmw_uinc_wrap_i64:
545; RV32IA:       # %bb.0:
546; RV32IA-NEXT:    addi sp, sp, -32
547; RV32IA-NEXT:    .cfi_def_cfa_offset 32
548; RV32IA-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
549; RV32IA-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
550; RV32IA-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
551; RV32IA-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
552; RV32IA-NEXT:    .cfi_offset ra, -4
553; RV32IA-NEXT:    .cfi_offset s0, -8
554; RV32IA-NEXT:    .cfi_offset s1, -12
555; RV32IA-NEXT:    .cfi_offset s2, -16
556; RV32IA-NEXT:    mv s0, a2
557; RV32IA-NEXT:    mv s1, a0
558; RV32IA-NEXT:    lw a4, 0(a0)
559; RV32IA-NEXT:    lw a5, 4(a0)
560; RV32IA-NEXT:    mv s2, a1
561; RV32IA-NEXT:    j .LBB3_3
562; RV32IA-NEXT:  .LBB3_1: # %atomicrmw.start
563; RV32IA-NEXT:    # in Loop: Header=BB3_3 Depth=1
564; RV32IA-NEXT:    sltu a0, a5, s0
565; RV32IA-NEXT:  .LBB3_2: # %atomicrmw.start
566; RV32IA-NEXT:    # in Loop: Header=BB3_3 Depth=1
567; RV32IA-NEXT:    addi a1, a4, 1
568; RV32IA-NEXT:    neg a0, a0
569; RV32IA-NEXT:    seqz a3, a1
570; RV32IA-NEXT:    and a2, a0, a1
571; RV32IA-NEXT:    add a3, a5, a3
572; RV32IA-NEXT:    and a3, a0, a3
573; RV32IA-NEXT:    sw a4, 8(sp)
574; RV32IA-NEXT:    sw a5, 12(sp)
575; RV32IA-NEXT:    addi a1, sp, 8
576; RV32IA-NEXT:    li a4, 5
577; RV32IA-NEXT:    li a5, 5
578; RV32IA-NEXT:    mv a0, s1
579; RV32IA-NEXT:    call __atomic_compare_exchange_8
580; RV32IA-NEXT:    lw a4, 8(sp)
581; RV32IA-NEXT:    lw a5, 12(sp)
582; RV32IA-NEXT:    bnez a0, .LBB3_5
583; RV32IA-NEXT:  .LBB3_3: # %atomicrmw.start
584; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
585; RV32IA-NEXT:    bne a5, s0, .LBB3_1
586; RV32IA-NEXT:  # %bb.4: # in Loop: Header=BB3_3 Depth=1
587; RV32IA-NEXT:    sltu a0, a4, s2
588; RV32IA-NEXT:    j .LBB3_2
589; RV32IA-NEXT:  .LBB3_5: # %atomicrmw.end
590; RV32IA-NEXT:    mv a0, a4
591; RV32IA-NEXT:    mv a1, a5
592; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
593; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
594; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
595; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
596; RV32IA-NEXT:    .cfi_restore ra
597; RV32IA-NEXT:    .cfi_restore s0
598; RV32IA-NEXT:    .cfi_restore s1
599; RV32IA-NEXT:    .cfi_restore s2
600; RV32IA-NEXT:    addi sp, sp, 32
601; RV32IA-NEXT:    .cfi_def_cfa_offset 0
602; RV32IA-NEXT:    ret
603;
604; RV64I-LABEL: atomicrmw_uinc_wrap_i64:
605; RV64I:       # %bb.0:
606; RV64I-NEXT:    addi sp, sp, -32
607; RV64I-NEXT:    .cfi_def_cfa_offset 32
608; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
609; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
610; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
611; RV64I-NEXT:    .cfi_offset ra, -8
612; RV64I-NEXT:    .cfi_offset s0, -16
613; RV64I-NEXT:    .cfi_offset s1, -24
614; RV64I-NEXT:    mv s0, a0
615; RV64I-NEXT:    ld a3, 0(a0)
616; RV64I-NEXT:    mv s1, a1
617; RV64I-NEXT:  .LBB3_1: # %atomicrmw.start
618; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
619; RV64I-NEXT:    addi a0, a3, 1
620; RV64I-NEXT:    sltu a1, a3, s1
621; RV64I-NEXT:    neg a2, a1
622; RV64I-NEXT:    and a2, a2, a0
623; RV64I-NEXT:    sd a3, 0(sp)
624; RV64I-NEXT:    mv a1, sp
625; RV64I-NEXT:    li a3, 5
626; RV64I-NEXT:    li a4, 5
627; RV64I-NEXT:    mv a0, s0
628; RV64I-NEXT:    call __atomic_compare_exchange_8
629; RV64I-NEXT:    ld a3, 0(sp)
630; RV64I-NEXT:    beqz a0, .LBB3_1
631; RV64I-NEXT:  # %bb.2: # %atomicrmw.end
632; RV64I-NEXT:    mv a0, a3
633; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
634; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
635; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
636; RV64I-NEXT:    .cfi_restore ra
637; RV64I-NEXT:    .cfi_restore s0
638; RV64I-NEXT:    .cfi_restore s1
639; RV64I-NEXT:    addi sp, sp, 32
640; RV64I-NEXT:    .cfi_def_cfa_offset 0
641; RV64I-NEXT:    ret
642;
643; RV64IA-LABEL: atomicrmw_uinc_wrap_i64:
644; RV64IA:       # %bb.0:
645; RV64IA-NEXT:    ld a2, 0(a0)
646; RV64IA-NEXT:  .LBB3_1: # %atomicrmw.start
647; RV64IA-NEXT:    # =>This Loop Header: Depth=1
648; RV64IA-NEXT:    # Child Loop BB3_3 Depth 2
649; RV64IA-NEXT:    mv a3, a2
650; RV64IA-NEXT:    addi a2, a2, 1
651; RV64IA-NEXT:    sltu a4, a3, a1
652; RV64IA-NEXT:    neg a4, a4
653; RV64IA-NEXT:    and a4, a4, a2
654; RV64IA-NEXT:  .LBB3_3: # %atomicrmw.start
655; RV64IA-NEXT:    # Parent Loop BB3_1 Depth=1
656; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
657; RV64IA-NEXT:    lr.d.aqrl a2, (a0)
658; RV64IA-NEXT:    bne a2, a3, .LBB3_1
659; RV64IA-NEXT:  # %bb.4: # %atomicrmw.start
660; RV64IA-NEXT:    # in Loop: Header=BB3_3 Depth=2
661; RV64IA-NEXT:    sc.d.rl a5, a4, (a0)
662; RV64IA-NEXT:    bnez a5, .LBB3_3
663; RV64IA-NEXT:  # %bb.5: # %atomicrmw.start
664; RV64IA-NEXT:  # %bb.2: # %atomicrmw.end
665; RV64IA-NEXT:    mv a0, a2
666; RV64IA-NEXT:    ret
667  %result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst
668  ret i64 %result
669}
670
671define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
672; RV32I-LABEL: atomicrmw_udec_wrap_i8:
673; RV32I:       # %bb.0:
674; RV32I-NEXT:    addi sp, sp, -32
675; RV32I-NEXT:    .cfi_def_cfa_offset 32
676; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
677; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
678; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
679; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
680; RV32I-NEXT:    .cfi_offset ra, -4
681; RV32I-NEXT:    .cfi_offset s0, -8
682; RV32I-NEXT:    .cfi_offset s1, -12
683; RV32I-NEXT:    .cfi_offset s2, -16
684; RV32I-NEXT:    mv s0, a0
685; RV32I-NEXT:    lbu a3, 0(a0)
686; RV32I-NEXT:    mv s1, a1
687; RV32I-NEXT:    andi s2, a1, 255
688; RV32I-NEXT:    j .LBB4_2
689; RV32I-NEXT:  .LBB4_1: # %atomicrmw.start
690; RV32I-NEXT:    # in Loop: Header=BB4_2 Depth=1
691; RV32I-NEXT:    sb a3, 15(sp)
692; RV32I-NEXT:    addi a1, sp, 15
693; RV32I-NEXT:    li a3, 5
694; RV32I-NEXT:    li a4, 5
695; RV32I-NEXT:    mv a0, s0
696; RV32I-NEXT:    call __atomic_compare_exchange_1
697; RV32I-NEXT:    lbu a3, 15(sp)
698; RV32I-NEXT:    bnez a0, .LBB4_4
699; RV32I-NEXT:  .LBB4_2: # %atomicrmw.start
700; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
701; RV32I-NEXT:    andi a0, a3, 255
702; RV32I-NEXT:    seqz a1, a0
703; RV32I-NEXT:    sltu a0, s2, a0
704; RV32I-NEXT:    or a0, a1, a0
705; RV32I-NEXT:    mv a2, s1
706; RV32I-NEXT:    bnez a0, .LBB4_1
707; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
708; RV32I-NEXT:    # in Loop: Header=BB4_2 Depth=1
709; RV32I-NEXT:    addi a2, a3, -1
710; RV32I-NEXT:    j .LBB4_1
711; RV32I-NEXT:  .LBB4_4: # %atomicrmw.end
712; RV32I-NEXT:    mv a0, a3
713; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
714; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
715; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
716; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
717; RV32I-NEXT:    .cfi_restore ra
718; RV32I-NEXT:    .cfi_restore s0
719; RV32I-NEXT:    .cfi_restore s1
720; RV32I-NEXT:    .cfi_restore s2
721; RV32I-NEXT:    addi sp, sp, 32
722; RV32I-NEXT:    .cfi_def_cfa_offset 0
723; RV32I-NEXT:    ret
724;
725; RV32IA-LABEL: atomicrmw_udec_wrap_i8:
726; RV32IA:       # %bb.0:
727; RV32IA-NEXT:    andi a2, a0, -4
728; RV32IA-NEXT:    slli a3, a0, 3
729; RV32IA-NEXT:    li a4, 255
730; RV32IA-NEXT:    andi a0, a3, 24
731; RV32IA-NEXT:    lw a6, 0(a2)
732; RV32IA-NEXT:    sll a3, a4, a3
733; RV32IA-NEXT:    not a3, a3
734; RV32IA-NEXT:    andi a4, a1, 255
735; RV32IA-NEXT:    j .LBB4_2
736; RV32IA-NEXT:  .LBB4_1: # %atomicrmw.start
737; RV32IA-NEXT:    # in Loop: Header=BB4_2 Depth=1
738; RV32IA-NEXT:    andi a6, a7, 255
739; RV32IA-NEXT:    sll a6, a6, a0
740; RV32IA-NEXT:    and a7, a5, a3
741; RV32IA-NEXT:    or a7, a7, a6
742; RV32IA-NEXT:  .LBB4_5: # %atomicrmw.start
743; RV32IA-NEXT:    # Parent Loop BB4_2 Depth=1
744; RV32IA-NEXT:    # => This Inner Loop Header: Depth=2
745; RV32IA-NEXT:    lr.w.aqrl a6, (a2)
746; RV32IA-NEXT:    bne a6, a5, .LBB4_7
747; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
748; RV32IA-NEXT:    # in Loop: Header=BB4_5 Depth=2
749; RV32IA-NEXT:    sc.w.rl t0, a7, (a2)
750; RV32IA-NEXT:    bnez t0, .LBB4_5
751; RV32IA-NEXT:  .LBB4_7: # %atomicrmw.start
752; RV32IA-NEXT:    # in Loop: Header=BB4_2 Depth=1
753; RV32IA-NEXT:    beq a6, a5, .LBB4_4
754; RV32IA-NEXT:  .LBB4_2: # %atomicrmw.start
755; RV32IA-NEXT:    # =>This Loop Header: Depth=1
756; RV32IA-NEXT:    # Child Loop BB4_5 Depth 2
757; RV32IA-NEXT:    mv a5, a6
758; RV32IA-NEXT:    srl a6, a6, a0
759; RV32IA-NEXT:    andi a7, a6, 255
760; RV32IA-NEXT:    seqz t0, a7
761; RV32IA-NEXT:    sltu a7, a4, a7
762; RV32IA-NEXT:    or t0, t0, a7
763; RV32IA-NEXT:    mv a7, a1
764; RV32IA-NEXT:    bnez t0, .LBB4_1
765; RV32IA-NEXT:  # %bb.3: # %atomicrmw.start
766; RV32IA-NEXT:    # in Loop: Header=BB4_2 Depth=1
767; RV32IA-NEXT:    addi a7, a6, -1
768; RV32IA-NEXT:    j .LBB4_1
769; RV32IA-NEXT:  .LBB4_4: # %atomicrmw.end
770; RV32IA-NEXT:    srl a0, a6, a0
771; RV32IA-NEXT:    ret
772;
773; RV64I-LABEL: atomicrmw_udec_wrap_i8:
774; RV64I:       # %bb.0:
775; RV64I-NEXT:    addi sp, sp, -48
776; RV64I-NEXT:    .cfi_def_cfa_offset 48
777; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
778; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
779; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
780; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
781; RV64I-NEXT:    .cfi_offset ra, -8
782; RV64I-NEXT:    .cfi_offset s0, -16
783; RV64I-NEXT:    .cfi_offset s1, -24
784; RV64I-NEXT:    .cfi_offset s2, -32
785; RV64I-NEXT:    mv s0, a0
786; RV64I-NEXT:    lbu a3, 0(a0)
787; RV64I-NEXT:    mv s1, a1
788; RV64I-NEXT:    andi s2, a1, 255
789; RV64I-NEXT:    j .LBB4_2
790; RV64I-NEXT:  .LBB4_1: # %atomicrmw.start
791; RV64I-NEXT:    # in Loop: Header=BB4_2 Depth=1
792; RV64I-NEXT:    sb a3, 15(sp)
793; RV64I-NEXT:    addi a1, sp, 15
794; RV64I-NEXT:    li a3, 5
795; RV64I-NEXT:    li a4, 5
796; RV64I-NEXT:    mv a0, s0
797; RV64I-NEXT:    call __atomic_compare_exchange_1
798; RV64I-NEXT:    lbu a3, 15(sp)
799; RV64I-NEXT:    bnez a0, .LBB4_4
800; RV64I-NEXT:  .LBB4_2: # %atomicrmw.start
801; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
802; RV64I-NEXT:    andi a0, a3, 255
803; RV64I-NEXT:    seqz a1, a0
804; RV64I-NEXT:    sltu a0, s2, a0
805; RV64I-NEXT:    or a0, a1, a0
806; RV64I-NEXT:    mv a2, s1
807; RV64I-NEXT:    bnez a0, .LBB4_1
808; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
809; RV64I-NEXT:    # in Loop: Header=BB4_2 Depth=1
810; RV64I-NEXT:    addi a2, a3, -1
811; RV64I-NEXT:    j .LBB4_1
812; RV64I-NEXT:  .LBB4_4: # %atomicrmw.end
813; RV64I-NEXT:    mv a0, a3
814; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
815; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
816; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
817; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
818; RV64I-NEXT:    .cfi_restore ra
819; RV64I-NEXT:    .cfi_restore s0
820; RV64I-NEXT:    .cfi_restore s1
821; RV64I-NEXT:    .cfi_restore s2
822; RV64I-NEXT:    addi sp, sp, 48
823; RV64I-NEXT:    .cfi_def_cfa_offset 0
824; RV64I-NEXT:    ret
825;
826; RV64IA-LABEL: atomicrmw_udec_wrap_i8:
827; RV64IA:       # %bb.0:
828; RV64IA-NEXT:    andi a2, a0, -4
829; RV64IA-NEXT:    slli a4, a0, 3
830; RV64IA-NEXT:    li a5, 255
831; RV64IA-NEXT:    andi a0, a4, 24
832; RV64IA-NEXT:    lw a3, 0(a2)
833; RV64IA-NEXT:    sllw a4, a5, a4
834; RV64IA-NEXT:    not a4, a4
835; RV64IA-NEXT:    andi a5, a1, 255
836; RV64IA-NEXT:    j .LBB4_2
837; RV64IA-NEXT:  .LBB4_1: # %atomicrmw.start
838; RV64IA-NEXT:    # in Loop: Header=BB4_2 Depth=1
839; RV64IA-NEXT:    sext.w a6, a3
840; RV64IA-NEXT:    andi a7, a7, 255
841; RV64IA-NEXT:    sllw a7, a7, a0
842; RV64IA-NEXT:    and a3, a3, a4
843; RV64IA-NEXT:    or a7, a3, a7
844; RV64IA-NEXT:  .LBB4_5: # %atomicrmw.start
845; RV64IA-NEXT:    # Parent Loop BB4_2 Depth=1
846; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
847; RV64IA-NEXT:    lr.w.aqrl a3, (a2)
848; RV64IA-NEXT:    bne a3, a6, .LBB4_7
849; RV64IA-NEXT:  # %bb.6: # %atomicrmw.start
850; RV64IA-NEXT:    # in Loop: Header=BB4_5 Depth=2
851; RV64IA-NEXT:    sc.w.rl t0, a7, (a2)
852; RV64IA-NEXT:    bnez t0, .LBB4_5
853; RV64IA-NEXT:  .LBB4_7: # %atomicrmw.start
854; RV64IA-NEXT:    # in Loop: Header=BB4_2 Depth=1
855; RV64IA-NEXT:    beq a3, a6, .LBB4_4
856; RV64IA-NEXT:  .LBB4_2: # %atomicrmw.start
857; RV64IA-NEXT:    # =>This Loop Header: Depth=1
858; RV64IA-NEXT:    # Child Loop BB4_5 Depth 2
859; RV64IA-NEXT:    srlw a6, a3, a0
860; RV64IA-NEXT:    andi a7, a6, 255
861; RV64IA-NEXT:    seqz t0, a7
862; RV64IA-NEXT:    sltu a7, a5, a7
863; RV64IA-NEXT:    or t0, t0, a7
864; RV64IA-NEXT:    mv a7, a1
865; RV64IA-NEXT:    bnez t0, .LBB4_1
866; RV64IA-NEXT:  # %bb.3: # %atomicrmw.start
867; RV64IA-NEXT:    # in Loop: Header=BB4_2 Depth=1
868; RV64IA-NEXT:    addi a7, a6, -1
869; RV64IA-NEXT:    j .LBB4_1
870; RV64IA-NEXT:  .LBB4_4: # %atomicrmw.end
871; RV64IA-NEXT:    srlw a0, a3, a0
872; RV64IA-NEXT:    ret
873  %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst
874  ret i8 %result
875}
876
877define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
878; RV32I-LABEL: atomicrmw_udec_wrap_i16:
879; RV32I:       # %bb.0:
880; RV32I-NEXT:    addi sp, sp, -32
881; RV32I-NEXT:    .cfi_def_cfa_offset 32
882; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
883; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
884; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
885; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
886; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
887; RV32I-NEXT:    .cfi_offset ra, -4
888; RV32I-NEXT:    .cfi_offset s0, -8
889; RV32I-NEXT:    .cfi_offset s1, -12
890; RV32I-NEXT:    .cfi_offset s2, -16
891; RV32I-NEXT:    .cfi_offset s3, -20
892; RV32I-NEXT:    mv s0, a1
893; RV32I-NEXT:    mv s1, a0
894; RV32I-NEXT:    lhu a1, 0(a0)
895; RV32I-NEXT:    lui s2, 16
896; RV32I-NEXT:    addi s2, s2, -1
897; RV32I-NEXT:    and s3, s0, s2
898; RV32I-NEXT:    j .LBB5_2
899; RV32I-NEXT:  .LBB5_1: # %atomicrmw.start
900; RV32I-NEXT:    # in Loop: Header=BB5_2 Depth=1
901; RV32I-NEXT:    sh a1, 10(sp)
902; RV32I-NEXT:    addi a1, sp, 10
903; RV32I-NEXT:    li a3, 5
904; RV32I-NEXT:    li a4, 5
905; RV32I-NEXT:    mv a0, s1
906; RV32I-NEXT:    call __atomic_compare_exchange_2
907; RV32I-NEXT:    lh a1, 10(sp)
908; RV32I-NEXT:    bnez a0, .LBB5_4
909; RV32I-NEXT:  .LBB5_2: # %atomicrmw.start
910; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
911; RV32I-NEXT:    and a0, a1, s2
912; RV32I-NEXT:    seqz a2, a0
913; RV32I-NEXT:    sltu a0, s3, a0
914; RV32I-NEXT:    or a0, a2, a0
915; RV32I-NEXT:    mv a2, s0
916; RV32I-NEXT:    bnez a0, .LBB5_1
917; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
918; RV32I-NEXT:    # in Loop: Header=BB5_2 Depth=1
919; RV32I-NEXT:    addi a2, a1, -1
920; RV32I-NEXT:    j .LBB5_1
921; RV32I-NEXT:  .LBB5_4: # %atomicrmw.end
922; RV32I-NEXT:    mv a0, a1
923; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
924; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
925; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
926; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
927; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
928; RV32I-NEXT:    .cfi_restore ra
929; RV32I-NEXT:    .cfi_restore s0
930; RV32I-NEXT:    .cfi_restore s1
931; RV32I-NEXT:    .cfi_restore s2
932; RV32I-NEXT:    .cfi_restore s3
933; RV32I-NEXT:    addi sp, sp, 32
934; RV32I-NEXT:    .cfi_def_cfa_offset 0
935; RV32I-NEXT:    ret
936;
937; RV32IA-LABEL: atomicrmw_udec_wrap_i16:
938; RV32IA:       # %bb.0:
939; RV32IA-NEXT:    andi a2, a0, -4
940; RV32IA-NEXT:    slli a4, a0, 3
941; RV32IA-NEXT:    lui a3, 16
942; RV32IA-NEXT:    andi a0, a4, 24
943; RV32IA-NEXT:    addi a3, a3, -1
944; RV32IA-NEXT:    lw a7, 0(a2)
945; RV32IA-NEXT:    sll a4, a3, a4
946; RV32IA-NEXT:    not a4, a4
947; RV32IA-NEXT:    and a5, a1, a3
948; RV32IA-NEXT:    j .LBB5_2
949; RV32IA-NEXT:  .LBB5_1: # %atomicrmw.start
950; RV32IA-NEXT:    # in Loop: Header=BB5_2 Depth=1
951; RV32IA-NEXT:    and a7, t0, a3
952; RV32IA-NEXT:    sll a7, a7, a0
953; RV32IA-NEXT:    and t0, a6, a4
954; RV32IA-NEXT:    or t0, t0, a7
955; RV32IA-NEXT:  .LBB5_5: # %atomicrmw.start
956; RV32IA-NEXT:    # Parent Loop BB5_2 Depth=1
957; RV32IA-NEXT:    # => This Inner Loop Header: Depth=2
958; RV32IA-NEXT:    lr.w.aqrl a7, (a2)
959; RV32IA-NEXT:    bne a7, a6, .LBB5_7
960; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
961; RV32IA-NEXT:    # in Loop: Header=BB5_5 Depth=2
962; RV32IA-NEXT:    sc.w.rl t1, t0, (a2)
963; RV32IA-NEXT:    bnez t1, .LBB5_5
964; RV32IA-NEXT:  .LBB5_7: # %atomicrmw.start
965; RV32IA-NEXT:    # in Loop: Header=BB5_2 Depth=1
966; RV32IA-NEXT:    beq a7, a6, .LBB5_4
967; RV32IA-NEXT:  .LBB5_2: # %atomicrmw.start
968; RV32IA-NEXT:    # =>This Loop Header: Depth=1
969; RV32IA-NEXT:    # Child Loop BB5_5 Depth 2
970; RV32IA-NEXT:    mv a6, a7
971; RV32IA-NEXT:    srl a7, a7, a0
972; RV32IA-NEXT:    and t0, a7, a3
973; RV32IA-NEXT:    seqz t1, t0
974; RV32IA-NEXT:    sltu t0, a5, t0
975; RV32IA-NEXT:    or t1, t1, t0
976; RV32IA-NEXT:    mv t0, a1
977; RV32IA-NEXT:    bnez t1, .LBB5_1
978; RV32IA-NEXT:  # %bb.3: # %atomicrmw.start
979; RV32IA-NEXT:    # in Loop: Header=BB5_2 Depth=1
980; RV32IA-NEXT:    addi t0, a7, -1
981; RV32IA-NEXT:    j .LBB5_1
982; RV32IA-NEXT:  .LBB5_4: # %atomicrmw.end
983; RV32IA-NEXT:    srl a0, a7, a0
984; RV32IA-NEXT:    ret
985;
986; RV64I-LABEL: atomicrmw_udec_wrap_i16:
987; RV64I:       # %bb.0:
988; RV64I-NEXT:    addi sp, sp, -48
989; RV64I-NEXT:    .cfi_def_cfa_offset 48
990; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
991; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
992; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
993; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
994; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
995; RV64I-NEXT:    .cfi_offset ra, -8
996; RV64I-NEXT:    .cfi_offset s0, -16
997; RV64I-NEXT:    .cfi_offset s1, -24
998; RV64I-NEXT:    .cfi_offset s2, -32
999; RV64I-NEXT:    .cfi_offset s3, -40
1000; RV64I-NEXT:    mv s0, a1
1001; RV64I-NEXT:    mv s1, a0
1002; RV64I-NEXT:    lhu a1, 0(a0)
1003; RV64I-NEXT:    lui s2, 16
1004; RV64I-NEXT:    addiw s2, s2, -1
1005; RV64I-NEXT:    and s3, s0, s2
1006; RV64I-NEXT:    j .LBB5_2
1007; RV64I-NEXT:  .LBB5_1: # %atomicrmw.start
1008; RV64I-NEXT:    # in Loop: Header=BB5_2 Depth=1
1009; RV64I-NEXT:    sh a1, 6(sp)
1010; RV64I-NEXT:    addi a1, sp, 6
1011; RV64I-NEXT:    li a3, 5
1012; RV64I-NEXT:    li a4, 5
1013; RV64I-NEXT:    mv a0, s1
1014; RV64I-NEXT:    call __atomic_compare_exchange_2
1015; RV64I-NEXT:    lh a1, 6(sp)
1016; RV64I-NEXT:    bnez a0, .LBB5_4
1017; RV64I-NEXT:  .LBB5_2: # %atomicrmw.start
1018; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
1019; RV64I-NEXT:    and a0, a1, s2
1020; RV64I-NEXT:    seqz a2, a0
1021; RV64I-NEXT:    sltu a0, s3, a0
1022; RV64I-NEXT:    or a0, a2, a0
1023; RV64I-NEXT:    mv a2, s0
1024; RV64I-NEXT:    bnez a0, .LBB5_1
1025; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
1026; RV64I-NEXT:    # in Loop: Header=BB5_2 Depth=1
1027; RV64I-NEXT:    addi a2, a1, -1
1028; RV64I-NEXT:    j .LBB5_1
1029; RV64I-NEXT:  .LBB5_4: # %atomicrmw.end
1030; RV64I-NEXT:    mv a0, a1
1031; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
1032; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
1033; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
1034; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
1035; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
1036; RV64I-NEXT:    .cfi_restore ra
1037; RV64I-NEXT:    .cfi_restore s0
1038; RV64I-NEXT:    .cfi_restore s1
1039; RV64I-NEXT:    .cfi_restore s2
1040; RV64I-NEXT:    .cfi_restore s3
1041; RV64I-NEXT:    addi sp, sp, 48
1042; RV64I-NEXT:    .cfi_def_cfa_offset 0
1043; RV64I-NEXT:    ret
1044;
1045; RV64IA-LABEL: atomicrmw_udec_wrap_i16:
1046; RV64IA:       # %bb.0:
1047; RV64IA-NEXT:    andi a2, a0, -4
1048; RV64IA-NEXT:    slli a5, a0, 3
1049; RV64IA-NEXT:    lui a3, 16
1050; RV64IA-NEXT:    andi a0, a5, 24
1051; RV64IA-NEXT:    addiw a3, a3, -1
1052; RV64IA-NEXT:    lw a4, 0(a2)
1053; RV64IA-NEXT:    sllw a5, a3, a5
1054; RV64IA-NEXT:    not a5, a5
1055; RV64IA-NEXT:    and a6, a1, a3
1056; RV64IA-NEXT:    j .LBB5_2
1057; RV64IA-NEXT:  .LBB5_1: # %atomicrmw.start
1058; RV64IA-NEXT:    # in Loop: Header=BB5_2 Depth=1
1059; RV64IA-NEXT:    sext.w a7, a4
1060; RV64IA-NEXT:    and t0, t0, a3
1061; RV64IA-NEXT:    sllw t0, t0, a0
1062; RV64IA-NEXT:    and a4, a4, a5
1063; RV64IA-NEXT:    or t0, a4, t0
1064; RV64IA-NEXT:  .LBB5_5: # %atomicrmw.start
1065; RV64IA-NEXT:    # Parent Loop BB5_2 Depth=1
1066; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
1067; RV64IA-NEXT:    lr.w.aqrl a4, (a2)
1068; RV64IA-NEXT:    bne a4, a7, .LBB5_7
1069; RV64IA-NEXT:  # %bb.6: # %atomicrmw.start
1070; RV64IA-NEXT:    # in Loop: Header=BB5_5 Depth=2
1071; RV64IA-NEXT:    sc.w.rl t1, t0, (a2)
1072; RV64IA-NEXT:    bnez t1, .LBB5_5
1073; RV64IA-NEXT:  .LBB5_7: # %atomicrmw.start
1074; RV64IA-NEXT:    # in Loop: Header=BB5_2 Depth=1
1075; RV64IA-NEXT:    beq a4, a7, .LBB5_4
1076; RV64IA-NEXT:  .LBB5_2: # %atomicrmw.start
1077; RV64IA-NEXT:    # =>This Loop Header: Depth=1
1078; RV64IA-NEXT:    # Child Loop BB5_5 Depth 2
1079; RV64IA-NEXT:    srlw a7, a4, a0
1080; RV64IA-NEXT:    and t0, a7, a3
1081; RV64IA-NEXT:    seqz t1, t0
1082; RV64IA-NEXT:    sltu t0, a6, t0
1083; RV64IA-NEXT:    or t1, t1, t0
1084; RV64IA-NEXT:    mv t0, a1
1085; RV64IA-NEXT:    bnez t1, .LBB5_1
1086; RV64IA-NEXT:  # %bb.3: # %atomicrmw.start
1087; RV64IA-NEXT:    # in Loop: Header=BB5_2 Depth=1
1088; RV64IA-NEXT:    addi t0, a7, -1
1089; RV64IA-NEXT:    j .LBB5_1
1090; RV64IA-NEXT:  .LBB5_4: # %atomicrmw.end
1091; RV64IA-NEXT:    srlw a0, a4, a0
1092; RV64IA-NEXT:    ret
1093  %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst
1094  ret i16 %result
1095}
1096
1097define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
1098; RV32I-LABEL: atomicrmw_udec_wrap_i32:
1099; RV32I:       # %bb.0:
1100; RV32I-NEXT:    addi sp, sp, -16
1101; RV32I-NEXT:    .cfi_def_cfa_offset 16
1102; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
1103; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
1104; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
1105; RV32I-NEXT:    .cfi_offset ra, -4
1106; RV32I-NEXT:    .cfi_offset s0, -8
1107; RV32I-NEXT:    .cfi_offset s1, -12
1108; RV32I-NEXT:    mv s0, a0
1109; RV32I-NEXT:    lw a3, 0(a0)
1110; RV32I-NEXT:    mv s1, a1
1111; RV32I-NEXT:    j .LBB6_2
1112; RV32I-NEXT:  .LBB6_1: # %atomicrmw.start
1113; RV32I-NEXT:    # in Loop: Header=BB6_2 Depth=1
1114; RV32I-NEXT:    sw a3, 0(sp)
1115; RV32I-NEXT:    mv a1, sp
1116; RV32I-NEXT:    li a3, 5
1117; RV32I-NEXT:    li a4, 5
1118; RV32I-NEXT:    mv a0, s0
1119; RV32I-NEXT:    call __atomic_compare_exchange_4
1120; RV32I-NEXT:    lw a3, 0(sp)
1121; RV32I-NEXT:    bnez a0, .LBB6_4
1122; RV32I-NEXT:  .LBB6_2: # %atomicrmw.start
1123; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
1124; RV32I-NEXT:    seqz a0, a3
1125; RV32I-NEXT:    sltu a1, s1, a3
1126; RV32I-NEXT:    or a0, a0, a1
1127; RV32I-NEXT:    mv a2, s1
1128; RV32I-NEXT:    bnez a0, .LBB6_1
1129; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
1130; RV32I-NEXT:    # in Loop: Header=BB6_2 Depth=1
1131; RV32I-NEXT:    addi a2, a3, -1
1132; RV32I-NEXT:    j .LBB6_1
1133; RV32I-NEXT:  .LBB6_4: # %atomicrmw.end
1134; RV32I-NEXT:    mv a0, a3
1135; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
1136; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
1137; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
1138; RV32I-NEXT:    .cfi_restore ra
1139; RV32I-NEXT:    .cfi_restore s0
1140; RV32I-NEXT:    .cfi_restore s1
1141; RV32I-NEXT:    addi sp, sp, 16
1142; RV32I-NEXT:    .cfi_def_cfa_offset 0
1143; RV32I-NEXT:    ret
1144;
1145; RV32IA-LABEL: atomicrmw_udec_wrap_i32:
1146; RV32IA:       # %bb.0:
1147; RV32IA-NEXT:    lw a2, 0(a0)
1148; RV32IA-NEXT:    j .LBB6_2
1149; RV32IA-NEXT:  .LBB6_1: # %atomicrmw.start
1150; RV32IA-NEXT:    # in Loop: Header=BB6_2 Depth=1
1151; RV32IA-NEXT:  .LBB6_5: # %atomicrmw.start
1152; RV32IA-NEXT:    # Parent Loop BB6_2 Depth=1
1153; RV32IA-NEXT:    # => This Inner Loop Header: Depth=2
1154; RV32IA-NEXT:    lr.w.aqrl a2, (a0)
1155; RV32IA-NEXT:    bne a2, a3, .LBB6_7
1156; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
1157; RV32IA-NEXT:    # in Loop: Header=BB6_5 Depth=2
1158; RV32IA-NEXT:    sc.w.rl a5, a4, (a0)
1159; RV32IA-NEXT:    bnez a5, .LBB6_5
1160; RV32IA-NEXT:  .LBB6_7: # %atomicrmw.start
1161; RV32IA-NEXT:    # in Loop: Header=BB6_2 Depth=1
1162; RV32IA-NEXT:    beq a2, a3, .LBB6_4
1163; RV32IA-NEXT:  .LBB6_2: # %atomicrmw.start
1164; RV32IA-NEXT:    # =>This Loop Header: Depth=1
1165; RV32IA-NEXT:    # Child Loop BB6_5 Depth 2
1166; RV32IA-NEXT:    mv a3, a2
1167; RV32IA-NEXT:    seqz a2, a2
1168; RV32IA-NEXT:    sltu a4, a1, a3
1169; RV32IA-NEXT:    or a2, a2, a4
1170; RV32IA-NEXT:    mv a4, a1
1171; RV32IA-NEXT:    bnez a2, .LBB6_1
1172; RV32IA-NEXT:  # %bb.3: # %atomicrmw.start
1173; RV32IA-NEXT:    # in Loop: Header=BB6_2 Depth=1
1174; RV32IA-NEXT:    addi a4, a3, -1
1175; RV32IA-NEXT:    j .LBB6_1
1176; RV32IA-NEXT:  .LBB6_4: # %atomicrmw.end
1177; RV32IA-NEXT:    mv a0, a2
1178; RV32IA-NEXT:    ret
1179;
1180; RV64I-LABEL: atomicrmw_udec_wrap_i32:
1181; RV64I:       # %bb.0:
1182; RV64I-NEXT:    addi sp, sp, -48
1183; RV64I-NEXT:    .cfi_def_cfa_offset 48
1184; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
1185; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
1186; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
1187; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
1188; RV64I-NEXT:    .cfi_offset ra, -8
1189; RV64I-NEXT:    .cfi_offset s0, -16
1190; RV64I-NEXT:    .cfi_offset s1, -24
1191; RV64I-NEXT:    .cfi_offset s2, -32
1192; RV64I-NEXT:    mv s0, a0
1193; RV64I-NEXT:    lw a3, 0(a0)
1194; RV64I-NEXT:    mv s1, a1
1195; RV64I-NEXT:    sext.w s2, a1
1196; RV64I-NEXT:    j .LBB6_2
1197; RV64I-NEXT:  .LBB6_1: # %atomicrmw.start
1198; RV64I-NEXT:    # in Loop: Header=BB6_2 Depth=1
1199; RV64I-NEXT:    sw a3, 12(sp)
1200; RV64I-NEXT:    addi a1, sp, 12
1201; RV64I-NEXT:    li a3, 5
1202; RV64I-NEXT:    li a4, 5
1203; RV64I-NEXT:    mv a0, s0
1204; RV64I-NEXT:    call __atomic_compare_exchange_4
1205; RV64I-NEXT:    lw a3, 12(sp)
1206; RV64I-NEXT:    bnez a0, .LBB6_4
1207; RV64I-NEXT:  .LBB6_2: # %atomicrmw.start
1208; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
1209; RV64I-NEXT:    seqz a0, a3
1210; RV64I-NEXT:    sltu a1, s2, a3
1211; RV64I-NEXT:    or a0, a0, a1
1212; RV64I-NEXT:    mv a2, s1
1213; RV64I-NEXT:    bnez a0, .LBB6_1
1214; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
1215; RV64I-NEXT:    # in Loop: Header=BB6_2 Depth=1
1216; RV64I-NEXT:    addiw a2, a3, -1
1217; RV64I-NEXT:    j .LBB6_1
1218; RV64I-NEXT:  .LBB6_4: # %atomicrmw.end
1219; RV64I-NEXT:    mv a0, a3
1220; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
1221; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
1222; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
1223; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
1224; RV64I-NEXT:    .cfi_restore ra
1225; RV64I-NEXT:    .cfi_restore s0
1226; RV64I-NEXT:    .cfi_restore s1
1227; RV64I-NEXT:    .cfi_restore s2
1228; RV64I-NEXT:    addi sp, sp, 48
1229; RV64I-NEXT:    .cfi_def_cfa_offset 0
1230; RV64I-NEXT:    ret
1231;
1232; RV64IA-LABEL: atomicrmw_udec_wrap_i32:
1233; RV64IA:       # %bb.0:
1234; RV64IA-NEXT:    lw a2, 0(a0)
1235; RV64IA-NEXT:    sext.w a3, a1
1236; RV64IA-NEXT:    j .LBB6_2
1237; RV64IA-NEXT:  .LBB6_1: # %atomicrmw.start
1238; RV64IA-NEXT:    # in Loop: Header=BB6_2 Depth=1
1239; RV64IA-NEXT:  .LBB6_5: # %atomicrmw.start
1240; RV64IA-NEXT:    # Parent Loop BB6_2 Depth=1
1241; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
1242; RV64IA-NEXT:    lr.w.aqrl a2, (a0)
1243; RV64IA-NEXT:    bne a2, a4, .LBB6_7
1244; RV64IA-NEXT:  # %bb.6: # %atomicrmw.start
1245; RV64IA-NEXT:    # in Loop: Header=BB6_5 Depth=2
1246; RV64IA-NEXT:    sc.w.rl a6, a5, (a0)
1247; RV64IA-NEXT:    bnez a6, .LBB6_5
1248; RV64IA-NEXT:  .LBB6_7: # %atomicrmw.start
1249; RV64IA-NEXT:    # in Loop: Header=BB6_2 Depth=1
1250; RV64IA-NEXT:    beq a2, a4, .LBB6_4
1251; RV64IA-NEXT:  .LBB6_2: # %atomicrmw.start
1252; RV64IA-NEXT:    # =>This Loop Header: Depth=1
1253; RV64IA-NEXT:    # Child Loop BB6_5 Depth 2
1254; RV64IA-NEXT:    sext.w a4, a2
1255; RV64IA-NEXT:    seqz a5, a4
1256; RV64IA-NEXT:    sltu a6, a3, a4
1257; RV64IA-NEXT:    or a6, a5, a6
1258; RV64IA-NEXT:    mv a5, a1
1259; RV64IA-NEXT:    bnez a6, .LBB6_1
1260; RV64IA-NEXT:  # %bb.3: # %atomicrmw.start
1261; RV64IA-NEXT:    # in Loop: Header=BB6_2 Depth=1
1262; RV64IA-NEXT:    addiw a5, a2, -1
1263; RV64IA-NEXT:    j .LBB6_1
1264; RV64IA-NEXT:  .LBB6_4: # %atomicrmw.end
1265; RV64IA-NEXT:    mv a0, a2
1266; RV64IA-NEXT:    ret
1267  %result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst
1268  ret i32 %result
1269}
1270
1271define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
1272; RV32I-LABEL: atomicrmw_udec_wrap_i64:
1273; RV32I:       # %bb.0:
1274; RV32I-NEXT:    addi sp, sp, -32
1275; RV32I-NEXT:    .cfi_def_cfa_offset 32
1276; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
1277; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
1278; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
1279; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
1280; RV32I-NEXT:    .cfi_offset ra, -4
1281; RV32I-NEXT:    .cfi_offset s0, -8
1282; RV32I-NEXT:    .cfi_offset s1, -12
1283; RV32I-NEXT:    .cfi_offset s2, -16
1284; RV32I-NEXT:    mv s0, a2
1285; RV32I-NEXT:    mv s1, a0
1286; RV32I-NEXT:    lw a5, 0(a0)
1287; RV32I-NEXT:    lw a4, 4(a0)
1288; RV32I-NEXT:    mv s2, a1
1289; RV32I-NEXT:    j .LBB7_2
1290; RV32I-NEXT:  .LBB7_1: # %atomicrmw.start
1291; RV32I-NEXT:    # in Loop: Header=BB7_2 Depth=1
1292; RV32I-NEXT:    sw a5, 8(sp)
1293; RV32I-NEXT:    sw a4, 12(sp)
1294; RV32I-NEXT:    addi a1, sp, 8
1295; RV32I-NEXT:    li a4, 5
1296; RV32I-NEXT:    li a5, 5
1297; RV32I-NEXT:    mv a0, s1
1298; RV32I-NEXT:    call __atomic_compare_exchange_8
1299; RV32I-NEXT:    lw a5, 8(sp)
1300; RV32I-NEXT:    lw a4, 12(sp)
1301; RV32I-NEXT:    bnez a0, .LBB7_7
1302; RV32I-NEXT:  .LBB7_2: # %atomicrmw.start
1303; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
1304; RV32I-NEXT:    beq a4, s0, .LBB7_4
1305; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
1306; RV32I-NEXT:    # in Loop: Header=BB7_2 Depth=1
1307; RV32I-NEXT:    sltu a0, s0, a4
1308; RV32I-NEXT:    j .LBB7_5
1309; RV32I-NEXT:  .LBB7_4: # in Loop: Header=BB7_2 Depth=1
1310; RV32I-NEXT:    sltu a0, s2, a5
1311; RV32I-NEXT:  .LBB7_5: # %atomicrmw.start
1312; RV32I-NEXT:    # in Loop: Header=BB7_2 Depth=1
1313; RV32I-NEXT:    or a1, a5, a4
1314; RV32I-NEXT:    seqz a1, a1
1315; RV32I-NEXT:    or a0, a1, a0
1316; RV32I-NEXT:    mv a2, s2
1317; RV32I-NEXT:    mv a3, s0
1318; RV32I-NEXT:    bnez a0, .LBB7_1
1319; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
1320; RV32I-NEXT:    # in Loop: Header=BB7_2 Depth=1
1321; RV32I-NEXT:    seqz a0, a5
1322; RV32I-NEXT:    sub a3, a4, a0
1323; RV32I-NEXT:    addi a2, a5, -1
1324; RV32I-NEXT:    j .LBB7_1
1325; RV32I-NEXT:  .LBB7_7: # %atomicrmw.end
1326; RV32I-NEXT:    mv a0, a5
1327; RV32I-NEXT:    mv a1, a4
1328; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
1329; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
1330; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
1331; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
1332; RV32I-NEXT:    .cfi_restore ra
1333; RV32I-NEXT:    .cfi_restore s0
1334; RV32I-NEXT:    .cfi_restore s1
1335; RV32I-NEXT:    .cfi_restore s2
1336; RV32I-NEXT:    addi sp, sp, 32
1337; RV32I-NEXT:    .cfi_def_cfa_offset 0
1338; RV32I-NEXT:    ret
1339;
1340; RV32IA-LABEL: atomicrmw_udec_wrap_i64:
1341; RV32IA:       # %bb.0:
1342; RV32IA-NEXT:    addi sp, sp, -32
1343; RV32IA-NEXT:    .cfi_def_cfa_offset 32
1344; RV32IA-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
1345; RV32IA-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
1346; RV32IA-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
1347; RV32IA-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
1348; RV32IA-NEXT:    .cfi_offset ra, -4
1349; RV32IA-NEXT:    .cfi_offset s0, -8
1350; RV32IA-NEXT:    .cfi_offset s1, -12
1351; RV32IA-NEXT:    .cfi_offset s2, -16
1352; RV32IA-NEXT:    mv s0, a2
1353; RV32IA-NEXT:    mv s1, a0
1354; RV32IA-NEXT:    lw a5, 0(a0)
1355; RV32IA-NEXT:    lw a4, 4(a0)
1356; RV32IA-NEXT:    mv s2, a1
1357; RV32IA-NEXT:    j .LBB7_2
1358; RV32IA-NEXT:  .LBB7_1: # %atomicrmw.start
1359; RV32IA-NEXT:    # in Loop: Header=BB7_2 Depth=1
1360; RV32IA-NEXT:    sw a5, 8(sp)
1361; RV32IA-NEXT:    sw a4, 12(sp)
1362; RV32IA-NEXT:    addi a1, sp, 8
1363; RV32IA-NEXT:    li a4, 5
1364; RV32IA-NEXT:    li a5, 5
1365; RV32IA-NEXT:    mv a0, s1
1366; RV32IA-NEXT:    call __atomic_compare_exchange_8
1367; RV32IA-NEXT:    lw a5, 8(sp)
1368; RV32IA-NEXT:    lw a4, 12(sp)
1369; RV32IA-NEXT:    bnez a0, .LBB7_7
1370; RV32IA-NEXT:  .LBB7_2: # %atomicrmw.start
1371; RV32IA-NEXT:    # =>This Inner Loop Header: Depth=1
1372; RV32IA-NEXT:    beq a4, s0, .LBB7_4
1373; RV32IA-NEXT:  # %bb.3: # %atomicrmw.start
1374; RV32IA-NEXT:    # in Loop: Header=BB7_2 Depth=1
1375; RV32IA-NEXT:    sltu a0, s0, a4
1376; RV32IA-NEXT:    j .LBB7_5
1377; RV32IA-NEXT:  .LBB7_4: # in Loop: Header=BB7_2 Depth=1
1378; RV32IA-NEXT:    sltu a0, s2, a5
1379; RV32IA-NEXT:  .LBB7_5: # %atomicrmw.start
1380; RV32IA-NEXT:    # in Loop: Header=BB7_2 Depth=1
1381; RV32IA-NEXT:    or a1, a5, a4
1382; RV32IA-NEXT:    seqz a1, a1
1383; RV32IA-NEXT:    or a0, a1, a0
1384; RV32IA-NEXT:    mv a2, s2
1385; RV32IA-NEXT:    mv a3, s0
1386; RV32IA-NEXT:    bnez a0, .LBB7_1
1387; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
1388; RV32IA-NEXT:    # in Loop: Header=BB7_2 Depth=1
1389; RV32IA-NEXT:    seqz a0, a5
1390; RV32IA-NEXT:    sub a3, a4, a0
1391; RV32IA-NEXT:    addi a2, a5, -1
1392; RV32IA-NEXT:    j .LBB7_1
1393; RV32IA-NEXT:  .LBB7_7: # %atomicrmw.end
1394; RV32IA-NEXT:    mv a0, a5
1395; RV32IA-NEXT:    mv a1, a4
1396; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
1397; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
1398; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
1399; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
1400; RV32IA-NEXT:    .cfi_restore ra
1401; RV32IA-NEXT:    .cfi_restore s0
1402; RV32IA-NEXT:    .cfi_restore s1
1403; RV32IA-NEXT:    .cfi_restore s2
1404; RV32IA-NEXT:    addi sp, sp, 32
1405; RV32IA-NEXT:    .cfi_def_cfa_offset 0
1406; RV32IA-NEXT:    ret
1407;
1408; RV64I-LABEL: atomicrmw_udec_wrap_i64:
1409; RV64I:       # %bb.0:
1410; RV64I-NEXT:    addi sp, sp, -32
1411; RV64I-NEXT:    .cfi_def_cfa_offset 32
1412; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
1413; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
1414; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
1415; RV64I-NEXT:    .cfi_offset ra, -8
1416; RV64I-NEXT:    .cfi_offset s0, -16
1417; RV64I-NEXT:    .cfi_offset s1, -24
1418; RV64I-NEXT:    mv s0, a0
1419; RV64I-NEXT:    ld a3, 0(a0)
1420; RV64I-NEXT:    mv s1, a1
1421; RV64I-NEXT:    j .LBB7_2
1422; RV64I-NEXT:  .LBB7_1: # %atomicrmw.start
1423; RV64I-NEXT:    # in Loop: Header=BB7_2 Depth=1
1424; RV64I-NEXT:    sd a3, 0(sp)
1425; RV64I-NEXT:    mv a1, sp
1426; RV64I-NEXT:    li a3, 5
1427; RV64I-NEXT:    li a4, 5
1428; RV64I-NEXT:    mv a0, s0
1429; RV64I-NEXT:    call __atomic_compare_exchange_8
1430; RV64I-NEXT:    ld a3, 0(sp)
1431; RV64I-NEXT:    bnez a0, .LBB7_4
1432; RV64I-NEXT:  .LBB7_2: # %atomicrmw.start
1433; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
1434; RV64I-NEXT:    seqz a0, a3
1435; RV64I-NEXT:    sltu a1, s1, a3
1436; RV64I-NEXT:    or a0, a0, a1
1437; RV64I-NEXT:    mv a2, s1
1438; RV64I-NEXT:    bnez a0, .LBB7_1
1439; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
1440; RV64I-NEXT:    # in Loop: Header=BB7_2 Depth=1
1441; RV64I-NEXT:    addi a2, a3, -1
1442; RV64I-NEXT:    j .LBB7_1
1443; RV64I-NEXT:  .LBB7_4: # %atomicrmw.end
1444; RV64I-NEXT:    mv a0, a3
1445; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
1446; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
1447; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
1448; RV64I-NEXT:    .cfi_restore ra
1449; RV64I-NEXT:    .cfi_restore s0
1450; RV64I-NEXT:    .cfi_restore s1
1451; RV64I-NEXT:    addi sp, sp, 32
1452; RV64I-NEXT:    .cfi_def_cfa_offset 0
1453; RV64I-NEXT:    ret
1454;
1455; RV64IA-LABEL: atomicrmw_udec_wrap_i64:
1456; RV64IA:       # %bb.0:
1457; RV64IA-NEXT:    ld a2, 0(a0)
1458; RV64IA-NEXT:    j .LBB7_2
1459; RV64IA-NEXT:  .LBB7_1: # %atomicrmw.start
1460; RV64IA-NEXT:    # in Loop: Header=BB7_2 Depth=1
1461; RV64IA-NEXT:  .LBB7_5: # %atomicrmw.start
1462; RV64IA-NEXT:    # Parent Loop BB7_2 Depth=1
1463; RV64IA-NEXT:    # => This Inner Loop Header: Depth=2
1464; RV64IA-NEXT:    lr.d.aqrl a2, (a0)
1465; RV64IA-NEXT:    bne a2, a3, .LBB7_7
1466; RV64IA-NEXT:  # %bb.6: # %atomicrmw.start
1467; RV64IA-NEXT:    # in Loop: Header=BB7_5 Depth=2
1468; RV64IA-NEXT:    sc.d.rl a5, a4, (a0)
1469; RV64IA-NEXT:    bnez a5, .LBB7_5
1470; RV64IA-NEXT:  .LBB7_7: # %atomicrmw.start
1471; RV64IA-NEXT:    # in Loop: Header=BB7_2 Depth=1
1472; RV64IA-NEXT:    beq a2, a3, .LBB7_4
1473; RV64IA-NEXT:  .LBB7_2: # %atomicrmw.start
1474; RV64IA-NEXT:    # =>This Loop Header: Depth=1
1475; RV64IA-NEXT:    # Child Loop BB7_5 Depth 2
1476; RV64IA-NEXT:    mv a3, a2
1477; RV64IA-NEXT:    seqz a2, a2
1478; RV64IA-NEXT:    sltu a4, a1, a3
1479; RV64IA-NEXT:    or a2, a2, a4
1480; RV64IA-NEXT:    mv a4, a1
1481; RV64IA-NEXT:    bnez a2, .LBB7_1
1482; RV64IA-NEXT:  # %bb.3: # %atomicrmw.start
1483; RV64IA-NEXT:    # in Loop: Header=BB7_2 Depth=1
1484; RV64IA-NEXT:    addi a4, a3, -1
1485; RV64IA-NEXT:    j .LBB7_1
1486; RV64IA-NEXT:  .LBB7_4: # %atomicrmw.end
1487; RV64IA-NEXT:    mv a0, a2
1488; RV64IA-NEXT:    ret
1489  %result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst
1490  ret i64 %result
1491}
1492