xref: /llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll (revision 5ddce70ef0e5a641d7fea95e31fc5e2439cb98cb)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=NOLSE
3; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -mattr=+lse -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=LSE
4
5; Ensure there's no stack spill in between ldxr/stxr pairs.
6
7define i8 @test_rmw_add_8(ptr %dst)   {
8; NOLSE-LABEL: test_rmw_add_8:
9; NOLSE:       // %bb.0: // %entry
10; NOLSE-NEXT:    sub sp, sp, #32
11; NOLSE-NEXT:    .cfi_def_cfa_offset 32
12; NOLSE-NEXT:    str x0, [sp, #16] // 8-byte Folded Spill
13; NOLSE-NEXT:    ldrb w8, [x0]
14; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
15; NOLSE-NEXT:    b .LBB0_1
16; NOLSE-NEXT:  .LBB0_1: // %atomicrmw.start
17; NOLSE-NEXT:    // =>This Loop Header: Depth=1
18; NOLSE-NEXT:    // Child Loop BB0_2 Depth 2
19; NOLSE-NEXT:    ldr w9, [sp, #28] // 4-byte Folded Reload
20; NOLSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
21; NOLSE-NEXT:    add w12, w9, #1
22; NOLSE-NEXT:  .LBB0_2: // %atomicrmw.start
23; NOLSE-NEXT:    // Parent Loop BB0_1 Depth=1
24; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
25; NOLSE-NEXT:    ldaxrb w8, [x11]
26; NOLSE-NEXT:    cmp w8, w9, uxtb
27; NOLSE-NEXT:    b.ne .LBB0_4
28; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
29; NOLSE-NEXT:    // in Loop: Header=BB0_2 Depth=2
30; NOLSE-NEXT:    stlxrb w10, w12, [x11]
31; NOLSE-NEXT:    cbnz w10, .LBB0_2
32; NOLSE-NEXT:  .LBB0_4: // %atomicrmw.start
33; NOLSE-NEXT:    // in Loop: Header=BB0_1 Depth=1
34; NOLSE-NEXT:    subs w9, w8, w9, uxtb
35; NOLSE-NEXT:    cset w9, eq
36; NOLSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
37; NOLSE-NEXT:    subs w9, w9, #1
38; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
39; NOLSE-NEXT:    b.ne .LBB0_1
40; NOLSE-NEXT:    b .LBB0_5
41; NOLSE-NEXT:  .LBB0_5: // %atomicrmw.end
42; NOLSE-NEXT:    ldr w0, [sp, #12] // 4-byte Folded Reload
43; NOLSE-NEXT:    add sp, sp, #32
44; NOLSE-NEXT:    ret
45;
46; LSE-LABEL: test_rmw_add_8:
47; LSE:       // %bb.0: // %entry
48; LSE-NEXT:    mov w8, #1
49; LSE-NEXT:    ldaddalb w8, w0, [x0]
50; LSE-NEXT:    ret
51entry:
52  %res = atomicrmw add ptr %dst, i8 1 seq_cst
53  ret i8 %res
54}
55
56define i16 @test_rmw_add_16(ptr %dst)   {
57; NOLSE-LABEL: test_rmw_add_16:
58; NOLSE:       // %bb.0: // %entry
59; NOLSE-NEXT:    sub sp, sp, #32
60; NOLSE-NEXT:    .cfi_def_cfa_offset 32
61; NOLSE-NEXT:    str x0, [sp, #16] // 8-byte Folded Spill
62; NOLSE-NEXT:    ldrh w8, [x0]
63; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
64; NOLSE-NEXT:    b .LBB1_1
65; NOLSE-NEXT:  .LBB1_1: // %atomicrmw.start
66; NOLSE-NEXT:    // =>This Loop Header: Depth=1
67; NOLSE-NEXT:    // Child Loop BB1_2 Depth 2
68; NOLSE-NEXT:    ldr w9, [sp, #28] // 4-byte Folded Reload
69; NOLSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
70; NOLSE-NEXT:    add w12, w9, #1
71; NOLSE-NEXT:  .LBB1_2: // %atomicrmw.start
72; NOLSE-NEXT:    // Parent Loop BB1_1 Depth=1
73; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
74; NOLSE-NEXT:    ldaxrh w8, [x11]
75; NOLSE-NEXT:    cmp w8, w9, uxth
76; NOLSE-NEXT:    b.ne .LBB1_4
77; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
78; NOLSE-NEXT:    // in Loop: Header=BB1_2 Depth=2
79; NOLSE-NEXT:    stlxrh w10, w12, [x11]
80; NOLSE-NEXT:    cbnz w10, .LBB1_2
81; NOLSE-NEXT:  .LBB1_4: // %atomicrmw.start
82; NOLSE-NEXT:    // in Loop: Header=BB1_1 Depth=1
83; NOLSE-NEXT:    subs w9, w8, w9, uxth
84; NOLSE-NEXT:    cset w9, eq
85; NOLSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
86; NOLSE-NEXT:    subs w9, w9, #1
87; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
88; NOLSE-NEXT:    b.ne .LBB1_1
89; NOLSE-NEXT:    b .LBB1_5
90; NOLSE-NEXT:  .LBB1_5: // %atomicrmw.end
91; NOLSE-NEXT:    ldr w0, [sp, #12] // 4-byte Folded Reload
92; NOLSE-NEXT:    add sp, sp, #32
93; NOLSE-NEXT:    ret
94;
95; LSE-LABEL: test_rmw_add_16:
96; LSE:       // %bb.0: // %entry
97; LSE-NEXT:    mov w8, #1
98; LSE-NEXT:    ldaddalh w8, w0, [x0]
99; LSE-NEXT:    ret
100entry:
101  %res = atomicrmw add ptr %dst, i16 1 seq_cst
102  ret i16 %res
103}
104
105define i32 @test_rmw_add_32(ptr %dst)   {
106; NOLSE-LABEL: test_rmw_add_32:
107; NOLSE:       // %bb.0: // %entry
108; NOLSE-NEXT:    sub sp, sp, #32
109; NOLSE-NEXT:    .cfi_def_cfa_offset 32
110; NOLSE-NEXT:    str x0, [sp, #16] // 8-byte Folded Spill
111; NOLSE-NEXT:    ldr w8, [x0]
112; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
113; NOLSE-NEXT:    b .LBB2_1
114; NOLSE-NEXT:  .LBB2_1: // %atomicrmw.start
115; NOLSE-NEXT:    // =>This Loop Header: Depth=1
116; NOLSE-NEXT:    // Child Loop BB2_2 Depth 2
117; NOLSE-NEXT:    ldr w9, [sp, #28] // 4-byte Folded Reload
118; NOLSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
119; NOLSE-NEXT:    add w12, w9, #1
120; NOLSE-NEXT:  .LBB2_2: // %atomicrmw.start
121; NOLSE-NEXT:    // Parent Loop BB2_1 Depth=1
122; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
123; NOLSE-NEXT:    ldaxr w8, [x11]
124; NOLSE-NEXT:    cmp w8, w9
125; NOLSE-NEXT:    b.ne .LBB2_4
126; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
127; NOLSE-NEXT:    // in Loop: Header=BB2_2 Depth=2
128; NOLSE-NEXT:    stlxr w10, w12, [x11]
129; NOLSE-NEXT:    cbnz w10, .LBB2_2
130; NOLSE-NEXT:  .LBB2_4: // %atomicrmw.start
131; NOLSE-NEXT:    // in Loop: Header=BB2_1 Depth=1
132; NOLSE-NEXT:    subs w9, w8, w9
133; NOLSE-NEXT:    cset w9, eq
134; NOLSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
135; NOLSE-NEXT:    subs w9, w9, #1
136; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
137; NOLSE-NEXT:    b.ne .LBB2_1
138; NOLSE-NEXT:    b .LBB2_5
139; NOLSE-NEXT:  .LBB2_5: // %atomicrmw.end
140; NOLSE-NEXT:    ldr w0, [sp, #12] // 4-byte Folded Reload
141; NOLSE-NEXT:    add sp, sp, #32
142; NOLSE-NEXT:    ret
143;
144; LSE-LABEL: test_rmw_add_32:
145; LSE:       // %bb.0: // %entry
146; LSE-NEXT:    mov w8, #1
147; LSE-NEXT:    ldaddal w8, w0, [x0]
148; LSE-NEXT:    ret
149entry:
150  %res = atomicrmw add ptr %dst, i32 1 seq_cst
151  ret i32 %res
152}
153
154define i64 @test_rmw_add_64(ptr %dst)   {
155; NOLSE-LABEL: test_rmw_add_64:
156; NOLSE:       // %bb.0: // %entry
157; NOLSE-NEXT:    sub sp, sp, #32
158; NOLSE-NEXT:    .cfi_def_cfa_offset 32
159; NOLSE-NEXT:    str x0, [sp, #16] // 8-byte Folded Spill
160; NOLSE-NEXT:    ldr x8, [x0]
161; NOLSE-NEXT:    str x8, [sp, #24] // 8-byte Folded Spill
162; NOLSE-NEXT:    b .LBB3_1
163; NOLSE-NEXT:  .LBB3_1: // %atomicrmw.start
164; NOLSE-NEXT:    // =>This Loop Header: Depth=1
165; NOLSE-NEXT:    // Child Loop BB3_2 Depth 2
166; NOLSE-NEXT:    ldr x9, [sp, #24] // 8-byte Folded Reload
167; NOLSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
168; NOLSE-NEXT:    add x12, x9, #1
169; NOLSE-NEXT:  .LBB3_2: // %atomicrmw.start
170; NOLSE-NEXT:    // Parent Loop BB3_1 Depth=1
171; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
172; NOLSE-NEXT:    ldaxr x8, [x11]
173; NOLSE-NEXT:    cmp x8, x9
174; NOLSE-NEXT:    b.ne .LBB3_4
175; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
176; NOLSE-NEXT:    // in Loop: Header=BB3_2 Depth=2
177; NOLSE-NEXT:    stlxr w10, x12, [x11]
178; NOLSE-NEXT:    cbnz w10, .LBB3_2
179; NOLSE-NEXT:  .LBB3_4: // %atomicrmw.start
180; NOLSE-NEXT:    // in Loop: Header=BB3_1 Depth=1
181; NOLSE-NEXT:    subs x9, x8, x9
182; NOLSE-NEXT:    cset w9, eq
183; NOLSE-NEXT:    str x8, [sp, #8] // 8-byte Folded Spill
184; NOLSE-NEXT:    subs w9, w9, #1
185; NOLSE-NEXT:    str x8, [sp, #24] // 8-byte Folded Spill
186; NOLSE-NEXT:    b.ne .LBB3_1
187; NOLSE-NEXT:    b .LBB3_5
188; NOLSE-NEXT:  .LBB3_5: // %atomicrmw.end
189; NOLSE-NEXT:    ldr x0, [sp, #8] // 8-byte Folded Reload
190; NOLSE-NEXT:    add sp, sp, #32
191; NOLSE-NEXT:    ret
192;
193; LSE-LABEL: test_rmw_add_64:
194; LSE:       // %bb.0: // %entry
195; LSE-NEXT:    mov w8, #1
196; LSE-NEXT:    // kill: def $x8 killed $w8
197; LSE-NEXT:    ldaddal x8, x0, [x0]
198; LSE-NEXT:    ret
199entry:
200  %res = atomicrmw add ptr %dst, i64 1 seq_cst
201  ret i64 %res
202}
203
204define i128 @test_rmw_add_128(ptr %dst)   {
205; NOLSE-LABEL: test_rmw_add_128:
206; NOLSE:       // %bb.0: // %entry
207; NOLSE-NEXT:    sub sp, sp, #48
208; NOLSE-NEXT:    .cfi_def_cfa_offset 48
209; NOLSE-NEXT:    str x0, [sp, #24] // 8-byte Folded Spill
210; NOLSE-NEXT:    ldr x8, [x0, #8]
211; NOLSE-NEXT:    ldr x9, [x0]
212; NOLSE-NEXT:    str x9, [sp, #32] // 8-byte Folded Spill
213; NOLSE-NEXT:    str x8, [sp, #40] // 8-byte Folded Spill
214; NOLSE-NEXT:    b .LBB4_1
215; NOLSE-NEXT:  .LBB4_1: // %atomicrmw.start
216; NOLSE-NEXT:    // =>This Loop Header: Depth=1
217; NOLSE-NEXT:    // Child Loop BB4_2 Depth 2
218; NOLSE-NEXT:    ldr x13, [sp, #40] // 8-byte Folded Reload
219; NOLSE-NEXT:    ldr x11, [sp, #32] // 8-byte Folded Reload
220; NOLSE-NEXT:    ldr x9, [sp, #24] // 8-byte Folded Reload
221; NOLSE-NEXT:    adds x14, x11, #1
222; NOLSE-NEXT:    cinc x15, x13, hs
223; NOLSE-NEXT:  .LBB4_2: // %atomicrmw.start
224; NOLSE-NEXT:    // Parent Loop BB4_1 Depth=1
225; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
226; NOLSE-NEXT:    ldaxp x10, x12, [x9]
227; NOLSE-NEXT:    cmp x10, x11
228; NOLSE-NEXT:    cset w8, ne
229; NOLSE-NEXT:    cmp x12, x13
230; NOLSE-NEXT:    cinc w8, w8, ne
231; NOLSE-NEXT:    cbnz w8, .LBB4_4
232; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
233; NOLSE-NEXT:    // in Loop: Header=BB4_2 Depth=2
234; NOLSE-NEXT:    stlxp w8, x14, x15, [x9]
235; NOLSE-NEXT:    cbnz w8, .LBB4_2
236; NOLSE-NEXT:    b .LBB4_5
237; NOLSE-NEXT:  .LBB4_4: // %atomicrmw.start
238; NOLSE-NEXT:    // in Loop: Header=BB4_2 Depth=2
239; NOLSE-NEXT:    stlxp w8, x10, x12, [x9]
240; NOLSE-NEXT:    cbnz w8, .LBB4_2
241; NOLSE-NEXT:  .LBB4_5: // %atomicrmw.start
242; NOLSE-NEXT:    // in Loop: Header=BB4_1 Depth=1
243; NOLSE-NEXT:    mov x8, x12
244; NOLSE-NEXT:    str x8, [sp, #8] // 8-byte Folded Spill
245; NOLSE-NEXT:    mov x9, x10
246; NOLSE-NEXT:    str x9, [sp, #16] // 8-byte Folded Spill
247; NOLSE-NEXT:    subs x12, x12, x13
248; NOLSE-NEXT:    ccmp x10, x11, #0, eq
249; NOLSE-NEXT:    str x9, [sp, #32] // 8-byte Folded Spill
250; NOLSE-NEXT:    str x8, [sp, #40] // 8-byte Folded Spill
251; NOLSE-NEXT:    b.ne .LBB4_1
252; NOLSE-NEXT:    b .LBB4_6
253; NOLSE-NEXT:  .LBB4_6: // %atomicrmw.end
254; NOLSE-NEXT:    ldr x1, [sp, #8] // 8-byte Folded Reload
255; NOLSE-NEXT:    ldr x0, [sp, #16] // 8-byte Folded Reload
256; NOLSE-NEXT:    add sp, sp, #48
257; NOLSE-NEXT:    ret
258;
259; LSE-LABEL: test_rmw_add_128:
260; LSE:       // %bb.0: // %entry
261; LSE-NEXT:    sub sp, sp, #48
262; LSE-NEXT:    .cfi_def_cfa_offset 48
263; LSE-NEXT:    str x0, [sp, #24] // 8-byte Folded Spill
264; LSE-NEXT:    ldr x8, [x0, #8]
265; LSE-NEXT:    ldr x9, [x0]
266; LSE-NEXT:    str x9, [sp, #32] // 8-byte Folded Spill
267; LSE-NEXT:    str x8, [sp, #40] // 8-byte Folded Spill
268; LSE-NEXT:    b .LBB4_1
269; LSE-NEXT:  .LBB4_1: // %atomicrmw.start
270; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
271; LSE-NEXT:    ldr x11, [sp, #40] // 8-byte Folded Reload
272; LSE-NEXT:    ldr x10, [sp, #32] // 8-byte Folded Reload
273; LSE-NEXT:    ldr x8, [sp, #24] // 8-byte Folded Reload
274; LSE-NEXT:    mov x0, x10
275; LSE-NEXT:    mov x1, x11
276; LSE-NEXT:    adds x2, x10, #1
277; LSE-NEXT:    cinc x9, x11, hs
278; LSE-NEXT:    // kill: def $x2 killed $x2 def $x2_x3
279; LSE-NEXT:    mov x3, x9
280; LSE-NEXT:    caspal x0, x1, x2, x3, [x8]
281; LSE-NEXT:    mov x9, x0
282; LSE-NEXT:    str x9, [sp, #8] // 8-byte Folded Spill
283; LSE-NEXT:    mov x8, x1
284; LSE-NEXT:    str x8, [sp, #16] // 8-byte Folded Spill
285; LSE-NEXT:    subs x11, x8, x11
286; LSE-NEXT:    ccmp x9, x10, #0, eq
287; LSE-NEXT:    str x9, [sp, #32] // 8-byte Folded Spill
288; LSE-NEXT:    str x8, [sp, #40] // 8-byte Folded Spill
289; LSE-NEXT:    b.ne .LBB4_1
290; LSE-NEXT:    b .LBB4_2
291; LSE-NEXT:  .LBB4_2: // %atomicrmw.end
292; LSE-NEXT:    ldr x1, [sp, #16] // 8-byte Folded Reload
293; LSE-NEXT:    ldr x0, [sp, #8] // 8-byte Folded Reload
294; LSE-NEXT:    add sp, sp, #48
295; LSE-NEXT:    ret
296entry:
297  %res = atomicrmw add ptr %dst, i128 1 seq_cst
298  ret i128 %res
299}
300define i8 @test_rmw_nand_8(ptr %dst)   {
301; NOLSE-LABEL: test_rmw_nand_8:
302; NOLSE:       // %bb.0: // %entry
303; NOLSE-NEXT:    sub sp, sp, #32
304; NOLSE-NEXT:    .cfi_def_cfa_offset 32
305; NOLSE-NEXT:    str x0, [sp, #16] // 8-byte Folded Spill
306; NOLSE-NEXT:    ldrb w8, [x0]
307; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
308; NOLSE-NEXT:    b .LBB5_1
309; NOLSE-NEXT:  .LBB5_1: // %atomicrmw.start
310; NOLSE-NEXT:    // =>This Loop Header: Depth=1
311; NOLSE-NEXT:    // Child Loop BB5_2 Depth 2
312; NOLSE-NEXT:    ldr w9, [sp, #28] // 4-byte Folded Reload
313; NOLSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
314; NOLSE-NEXT:    mvn w8, w9
315; NOLSE-NEXT:    orr w12, w8, #0xfffffffe
316; NOLSE-NEXT:  .LBB5_2: // %atomicrmw.start
317; NOLSE-NEXT:    // Parent Loop BB5_1 Depth=1
318; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
319; NOLSE-NEXT:    ldaxrb w8, [x11]
320; NOLSE-NEXT:    cmp w8, w9, uxtb
321; NOLSE-NEXT:    b.ne .LBB5_4
322; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
323; NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=2
324; NOLSE-NEXT:    stlxrb w10, w12, [x11]
325; NOLSE-NEXT:    cbnz w10, .LBB5_2
326; NOLSE-NEXT:  .LBB5_4: // %atomicrmw.start
327; NOLSE-NEXT:    // in Loop: Header=BB5_1 Depth=1
328; NOLSE-NEXT:    subs w9, w8, w9, uxtb
329; NOLSE-NEXT:    cset w9, eq
330; NOLSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
331; NOLSE-NEXT:    subs w9, w9, #1
332; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
333; NOLSE-NEXT:    b.ne .LBB5_1
334; NOLSE-NEXT:    b .LBB5_5
335; NOLSE-NEXT:  .LBB5_5: // %atomicrmw.end
336; NOLSE-NEXT:    ldr w0, [sp, #12] // 4-byte Folded Reload
337; NOLSE-NEXT:    add sp, sp, #32
338; NOLSE-NEXT:    ret
339;
340; LSE-LABEL: test_rmw_nand_8:
341; LSE:       // %bb.0: // %entry
342; LSE-NEXT:    sub sp, sp, #32
343; LSE-NEXT:    .cfi_def_cfa_offset 32
344; LSE-NEXT:    str x0, [sp, #16] // 8-byte Folded Spill
345; LSE-NEXT:    ldrb w8, [x0]
346; LSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
347; LSE-NEXT:    b .LBB5_1
348; LSE-NEXT:  .LBB5_1: // %atomicrmw.start
349; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
350; LSE-NEXT:    ldr w9, [sp, #28] // 4-byte Folded Reload
351; LSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
352; LSE-NEXT:    mvn w8, w9
353; LSE-NEXT:    orr w10, w8, #0xfffffffe
354; LSE-NEXT:    mov w8, w9
355; LSE-NEXT:    casalb w8, w10, [x11]
356; LSE-NEXT:    subs w9, w8, w9, uxtb
357; LSE-NEXT:    cset w9, eq
358; LSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
359; LSE-NEXT:    subs w9, w9, #1
360; LSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
361; LSE-NEXT:    b.ne .LBB5_1
362; LSE-NEXT:    b .LBB5_2
363; LSE-NEXT:  .LBB5_2: // %atomicrmw.end
364; LSE-NEXT:    ldr w0, [sp, #12] // 4-byte Folded Reload
365; LSE-NEXT:    add sp, sp, #32
366; LSE-NEXT:    ret
367entry:
368  %res = atomicrmw nand ptr %dst, i8 1 seq_cst
369  ret i8 %res
370}
371
372define i16 @test_rmw_nand_16(ptr %dst)   {
373; NOLSE-LABEL: test_rmw_nand_16:
374; NOLSE:       // %bb.0: // %entry
375; NOLSE-NEXT:    sub sp, sp, #32
376; NOLSE-NEXT:    .cfi_def_cfa_offset 32
377; NOLSE-NEXT:    str x0, [sp, #16] // 8-byte Folded Spill
378; NOLSE-NEXT:    ldrh w8, [x0]
379; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
380; NOLSE-NEXT:    b .LBB6_1
381; NOLSE-NEXT:  .LBB6_1: // %atomicrmw.start
382; NOLSE-NEXT:    // =>This Loop Header: Depth=1
383; NOLSE-NEXT:    // Child Loop BB6_2 Depth 2
384; NOLSE-NEXT:    ldr w9, [sp, #28] // 4-byte Folded Reload
385; NOLSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
386; NOLSE-NEXT:    mvn w8, w9
387; NOLSE-NEXT:    orr w12, w8, #0xfffffffe
388; NOLSE-NEXT:  .LBB6_2: // %atomicrmw.start
389; NOLSE-NEXT:    // Parent Loop BB6_1 Depth=1
390; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
391; NOLSE-NEXT:    ldaxrh w8, [x11]
392; NOLSE-NEXT:    cmp w8, w9, uxth
393; NOLSE-NEXT:    b.ne .LBB6_4
394; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
395; NOLSE-NEXT:    // in Loop: Header=BB6_2 Depth=2
396; NOLSE-NEXT:    stlxrh w10, w12, [x11]
397; NOLSE-NEXT:    cbnz w10, .LBB6_2
398; NOLSE-NEXT:  .LBB6_4: // %atomicrmw.start
399; NOLSE-NEXT:    // in Loop: Header=BB6_1 Depth=1
400; NOLSE-NEXT:    subs w9, w8, w9, uxth
401; NOLSE-NEXT:    cset w9, eq
402; NOLSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
403; NOLSE-NEXT:    subs w9, w9, #1
404; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
405; NOLSE-NEXT:    b.ne .LBB6_1
406; NOLSE-NEXT:    b .LBB6_5
407; NOLSE-NEXT:  .LBB6_5: // %atomicrmw.end
408; NOLSE-NEXT:    ldr w0, [sp, #12] // 4-byte Folded Reload
409; NOLSE-NEXT:    add sp, sp, #32
410; NOLSE-NEXT:    ret
411;
412; LSE-LABEL: test_rmw_nand_16:
413; LSE:       // %bb.0: // %entry
414; LSE-NEXT:    sub sp, sp, #32
415; LSE-NEXT:    .cfi_def_cfa_offset 32
416; LSE-NEXT:    str x0, [sp, #16] // 8-byte Folded Spill
417; LSE-NEXT:    ldrh w8, [x0]
418; LSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
419; LSE-NEXT:    b .LBB6_1
420; LSE-NEXT:  .LBB6_1: // %atomicrmw.start
421; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
422; LSE-NEXT:    ldr w9, [sp, #28] // 4-byte Folded Reload
423; LSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
424; LSE-NEXT:    mvn w8, w9
425; LSE-NEXT:    orr w10, w8, #0xfffffffe
426; LSE-NEXT:    mov w8, w9
427; LSE-NEXT:    casalh w8, w10, [x11]
428; LSE-NEXT:    subs w9, w8, w9, uxth
429; LSE-NEXT:    cset w9, eq
430; LSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
431; LSE-NEXT:    subs w9, w9, #1
432; LSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
433; LSE-NEXT:    b.ne .LBB6_1
434; LSE-NEXT:    b .LBB6_2
435; LSE-NEXT:  .LBB6_2: // %atomicrmw.end
436; LSE-NEXT:    ldr w0, [sp, #12] // 4-byte Folded Reload
437; LSE-NEXT:    add sp, sp, #32
438; LSE-NEXT:    ret
439entry:
440  %res = atomicrmw nand ptr %dst, i16 1 seq_cst
441  ret i16 %res
442}
443
444define i32 @test_rmw_nand_32(ptr %dst)   {
445; NOLSE-LABEL: test_rmw_nand_32:
446; NOLSE:       // %bb.0: // %entry
447; NOLSE-NEXT:    sub sp, sp, #32
448; NOLSE-NEXT:    .cfi_def_cfa_offset 32
449; NOLSE-NEXT:    str x0, [sp, #16] // 8-byte Folded Spill
450; NOLSE-NEXT:    ldr w8, [x0]
451; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
452; NOLSE-NEXT:    b .LBB7_1
453; NOLSE-NEXT:  .LBB7_1: // %atomicrmw.start
454; NOLSE-NEXT:    // =>This Loop Header: Depth=1
455; NOLSE-NEXT:    // Child Loop BB7_2 Depth 2
456; NOLSE-NEXT:    ldr w9, [sp, #28] // 4-byte Folded Reload
457; NOLSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
458; NOLSE-NEXT:    mvn w8, w9
459; NOLSE-NEXT:    orr w12, w8, #0xfffffffe
460; NOLSE-NEXT:  .LBB7_2: // %atomicrmw.start
461; NOLSE-NEXT:    // Parent Loop BB7_1 Depth=1
462; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
463; NOLSE-NEXT:    ldaxr w8, [x11]
464; NOLSE-NEXT:    cmp w8, w9
465; NOLSE-NEXT:    b.ne .LBB7_4
466; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
467; NOLSE-NEXT:    // in Loop: Header=BB7_2 Depth=2
468; NOLSE-NEXT:    stlxr w10, w12, [x11]
469; NOLSE-NEXT:    cbnz w10, .LBB7_2
470; NOLSE-NEXT:  .LBB7_4: // %atomicrmw.start
471; NOLSE-NEXT:    // in Loop: Header=BB7_1 Depth=1
472; NOLSE-NEXT:    subs w9, w8, w9
473; NOLSE-NEXT:    cset w9, eq
474; NOLSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
475; NOLSE-NEXT:    subs w9, w9, #1
476; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
477; NOLSE-NEXT:    b.ne .LBB7_1
478; NOLSE-NEXT:    b .LBB7_5
479; NOLSE-NEXT:  .LBB7_5: // %atomicrmw.end
480; NOLSE-NEXT:    ldr w0, [sp, #12] // 4-byte Folded Reload
481; NOLSE-NEXT:    add sp, sp, #32
482; NOLSE-NEXT:    ret
483;
484; LSE-LABEL: test_rmw_nand_32:
485; LSE:       // %bb.0: // %entry
486; LSE-NEXT:    sub sp, sp, #32
487; LSE-NEXT:    .cfi_def_cfa_offset 32
488; LSE-NEXT:    str x0, [sp, #16] // 8-byte Folded Spill
489; LSE-NEXT:    ldr w8, [x0]
490; LSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
491; LSE-NEXT:    b .LBB7_1
492; LSE-NEXT:  .LBB7_1: // %atomicrmw.start
493; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
494; LSE-NEXT:    ldr w9, [sp, #28] // 4-byte Folded Reload
495; LSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
496; LSE-NEXT:    mvn w8, w9
497; LSE-NEXT:    orr w10, w8, #0xfffffffe
498; LSE-NEXT:    mov w8, w9
499; LSE-NEXT:    casal w8, w10, [x11]
500; LSE-NEXT:    subs w9, w8, w9
501; LSE-NEXT:    cset w9, eq
502; LSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
503; LSE-NEXT:    subs w9, w9, #1
504; LSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
505; LSE-NEXT:    b.ne .LBB7_1
506; LSE-NEXT:    b .LBB7_2
507; LSE-NEXT:  .LBB7_2: // %atomicrmw.end
508; LSE-NEXT:    ldr w0, [sp, #12] // 4-byte Folded Reload
509; LSE-NEXT:    add sp, sp, #32
510; LSE-NEXT:    ret
511entry:
512  %res = atomicrmw nand ptr %dst, i32 1 seq_cst
513  ret i32 %res
514}
515
516define i64 @test_rmw_nand_64(ptr %dst)   {
517; NOLSE-LABEL: test_rmw_nand_64:
518; NOLSE:       // %bb.0: // %entry
519; NOLSE-NEXT:    sub sp, sp, #32
520; NOLSE-NEXT:    .cfi_def_cfa_offset 32
521; NOLSE-NEXT:    str x0, [sp, #16] // 8-byte Folded Spill
522; NOLSE-NEXT:    ldr x8, [x0]
523; NOLSE-NEXT:    str x8, [sp, #24] // 8-byte Folded Spill
524; NOLSE-NEXT:    b .LBB8_1
525; NOLSE-NEXT:  .LBB8_1: // %atomicrmw.start
526; NOLSE-NEXT:    // =>This Loop Header: Depth=1
527; NOLSE-NEXT:    // Child Loop BB8_2 Depth 2
528; NOLSE-NEXT:    ldr x9, [sp, #24] // 8-byte Folded Reload
529; NOLSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
530; NOLSE-NEXT:    mov w8, w9
531; NOLSE-NEXT:    mvn w10, w8
532; NOLSE-NEXT:    // implicit-def: $x8
533; NOLSE-NEXT:    mov w8, w10
534; NOLSE-NEXT:    orr x12, x8, #0xfffffffffffffffe
535; NOLSE-NEXT:  .LBB8_2: // %atomicrmw.start
536; NOLSE-NEXT:    // Parent Loop BB8_1 Depth=1
537; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
538; NOLSE-NEXT:    ldaxr x8, [x11]
539; NOLSE-NEXT:    cmp x8, x9
540; NOLSE-NEXT:    b.ne .LBB8_4
541; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
542; NOLSE-NEXT:    // in Loop: Header=BB8_2 Depth=2
543; NOLSE-NEXT:    stlxr w10, x12, [x11]
544; NOLSE-NEXT:    cbnz w10, .LBB8_2
545; NOLSE-NEXT:  .LBB8_4: // %atomicrmw.start
546; NOLSE-NEXT:    // in Loop: Header=BB8_1 Depth=1
547; NOLSE-NEXT:    subs x9, x8, x9
548; NOLSE-NEXT:    cset w9, eq
549; NOLSE-NEXT:    str x8, [sp, #8] // 8-byte Folded Spill
550; NOLSE-NEXT:    subs w9, w9, #1
551; NOLSE-NEXT:    str x8, [sp, #24] // 8-byte Folded Spill
552; NOLSE-NEXT:    b.ne .LBB8_1
553; NOLSE-NEXT:    b .LBB8_5
554; NOLSE-NEXT:  .LBB8_5: // %atomicrmw.end
555; NOLSE-NEXT:    ldr x0, [sp, #8] // 8-byte Folded Reload
556; NOLSE-NEXT:    add sp, sp, #32
557; NOLSE-NEXT:    ret
558;
559; LSE-LABEL: test_rmw_nand_64:
560; LSE:       // %bb.0: // %entry
561; LSE-NEXT:    sub sp, sp, #32
562; LSE-NEXT:    .cfi_def_cfa_offset 32
563; LSE-NEXT:    str x0, [sp, #16] // 8-byte Folded Spill
564; LSE-NEXT:    ldr x8, [x0]
565; LSE-NEXT:    str x8, [sp, #24] // 8-byte Folded Spill
566; LSE-NEXT:    b .LBB8_1
567; LSE-NEXT:  .LBB8_1: // %atomicrmw.start
568; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
569; LSE-NEXT:    ldr x9, [sp, #24] // 8-byte Folded Reload
570; LSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
571; LSE-NEXT:    mov w8, w9
572; LSE-NEXT:    mvn w10, w8
573; LSE-NEXT:    // implicit-def: $x8
574; LSE-NEXT:    mov w8, w10
575; LSE-NEXT:    orr x10, x8, #0xfffffffffffffffe
576; LSE-NEXT:    mov x8, x9
577; LSE-NEXT:    casal x8, x10, [x11]
578; LSE-NEXT:    subs x9, x8, x9
579; LSE-NEXT:    cset w9, eq
580; LSE-NEXT:    str x8, [sp, #8] // 8-byte Folded Spill
581; LSE-NEXT:    subs w9, w9, #1
582; LSE-NEXT:    str x8, [sp, #24] // 8-byte Folded Spill
583; LSE-NEXT:    b.ne .LBB8_1
584; LSE-NEXT:    b .LBB8_2
585; LSE-NEXT:  .LBB8_2: // %atomicrmw.end
586; LSE-NEXT:    ldr x0, [sp, #8] // 8-byte Folded Reload
587; LSE-NEXT:    add sp, sp, #32
588; LSE-NEXT:    ret
589entry:
590  %res = atomicrmw nand ptr %dst, i64 1 seq_cst
591  ret i64 %res
592}
593
594define i128 @test_rmw_nand_128(ptr %dst)   {
595; NOLSE-LABEL: test_rmw_nand_128:
596; NOLSE:       // %bb.0: // %entry
597; NOLSE-NEXT:    sub sp, sp, #48
598; NOLSE-NEXT:    .cfi_def_cfa_offset 48
599; NOLSE-NEXT:    str x0, [sp, #24] // 8-byte Folded Spill
600; NOLSE-NEXT:    ldr x8, [x0, #8]
601; NOLSE-NEXT:    ldr x9, [x0]
602; NOLSE-NEXT:    str x9, [sp, #32] // 8-byte Folded Spill
603; NOLSE-NEXT:    str x8, [sp, #40] // 8-byte Folded Spill
604; NOLSE-NEXT:    b .LBB9_1
605; NOLSE-NEXT:  .LBB9_1: // %atomicrmw.start
606; NOLSE-NEXT:    // =>This Loop Header: Depth=1
607; NOLSE-NEXT:    // Child Loop BB9_2 Depth 2
608; NOLSE-NEXT:    ldr x13, [sp, #40] // 8-byte Folded Reload
609; NOLSE-NEXT:    ldr x11, [sp, #32] // 8-byte Folded Reload
610; NOLSE-NEXT:    ldr x9, [sp, #24] // 8-byte Folded Reload
611; NOLSE-NEXT:    mov w8, w11
612; NOLSE-NEXT:    mvn w10, w8
613; NOLSE-NEXT:    // implicit-def: $x8
614; NOLSE-NEXT:    mov w8, w10
615; NOLSE-NEXT:    orr x14, x8, #0xfffffffffffffffe
616; NOLSE-NEXT:    mov x15, #-1
617; NOLSE-NEXT:  .LBB9_2: // %atomicrmw.start
618; NOLSE-NEXT:    // Parent Loop BB9_1 Depth=1
619; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
620; NOLSE-NEXT:    ldaxp x10, x12, [x9]
621; NOLSE-NEXT:    cmp x10, x11
622; NOLSE-NEXT:    cset w8, ne
623; NOLSE-NEXT:    cmp x12, x13
624; NOLSE-NEXT:    cinc w8, w8, ne
625; NOLSE-NEXT:    cbnz w8, .LBB9_4
626; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
627; NOLSE-NEXT:    // in Loop: Header=BB9_2 Depth=2
628; NOLSE-NEXT:    stlxp w8, x14, x15, [x9]
629; NOLSE-NEXT:    cbnz w8, .LBB9_2
630; NOLSE-NEXT:    b .LBB9_5
631; NOLSE-NEXT:  .LBB9_4: // %atomicrmw.start
632; NOLSE-NEXT:    // in Loop: Header=BB9_2 Depth=2
633; NOLSE-NEXT:    stlxp w8, x10, x12, [x9]
634; NOLSE-NEXT:    cbnz w8, .LBB9_2
635; NOLSE-NEXT:  .LBB9_5: // %atomicrmw.start
636; NOLSE-NEXT:    // in Loop: Header=BB9_1 Depth=1
637; NOLSE-NEXT:    mov x8, x12
638; NOLSE-NEXT:    str x8, [sp, #8] // 8-byte Folded Spill
639; NOLSE-NEXT:    mov x9, x10
640; NOLSE-NEXT:    str x9, [sp, #16] // 8-byte Folded Spill
641; NOLSE-NEXT:    subs x12, x12, x13
642; NOLSE-NEXT:    ccmp x10, x11, #0, eq
643; NOLSE-NEXT:    str x9, [sp, #32] // 8-byte Folded Spill
644; NOLSE-NEXT:    str x8, [sp, #40] // 8-byte Folded Spill
645; NOLSE-NEXT:    b.ne .LBB9_1
646; NOLSE-NEXT:    b .LBB9_6
647; NOLSE-NEXT:  .LBB9_6: // %atomicrmw.end
648; NOLSE-NEXT:    ldr x1, [sp, #8] // 8-byte Folded Reload
649; NOLSE-NEXT:    ldr x0, [sp, #16] // 8-byte Folded Reload
650; NOLSE-NEXT:    add sp, sp, #48
651; NOLSE-NEXT:    ret
652;
653; LSE-LABEL: test_rmw_nand_128:
654; LSE:       // %bb.0: // %entry
655; LSE-NEXT:    sub sp, sp, #48
656; LSE-NEXT:    .cfi_def_cfa_offset 48
657; LSE-NEXT:    str x0, [sp, #24] // 8-byte Folded Spill
658; LSE-NEXT:    ldr x8, [x0, #8]
659; LSE-NEXT:    ldr x9, [x0]
660; LSE-NEXT:    str x9, [sp, #32] // 8-byte Folded Spill
661; LSE-NEXT:    str x8, [sp, #40] // 8-byte Folded Spill
662; LSE-NEXT:    b .LBB9_1
663; LSE-NEXT:  .LBB9_1: // %atomicrmw.start
664; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
665; LSE-NEXT:    ldr x11, [sp, #40] // 8-byte Folded Reload
666; LSE-NEXT:    ldr x10, [sp, #32] // 8-byte Folded Reload
667; LSE-NEXT:    ldr x8, [sp, #24] // 8-byte Folded Reload
668; LSE-NEXT:    mov x0, x10
669; LSE-NEXT:    mov x1, x11
670; LSE-NEXT:    mov w9, w10
671; LSE-NEXT:    mvn w12, w9
672; LSE-NEXT:    // implicit-def: $x9
673; LSE-NEXT:    mov w9, w12
674; LSE-NEXT:    orr x2, x9, #0xfffffffffffffffe
675; LSE-NEXT:    mov x9, #-1
676; LSE-NEXT:    // kill: def $x2 killed $x2 def $x2_x3
677; LSE-NEXT:    mov x3, x9
678; LSE-NEXT:    caspal x0, x1, x2, x3, [x8]
679; LSE-NEXT:    mov x9, x0
680; LSE-NEXT:    str x9, [sp, #8] // 8-byte Folded Spill
681; LSE-NEXT:    mov x8, x1
682; LSE-NEXT:    str x8, [sp, #16] // 8-byte Folded Spill
683; LSE-NEXT:    subs x11, x8, x11
684; LSE-NEXT:    ccmp x9, x10, #0, eq
685; LSE-NEXT:    str x9, [sp, #32] // 8-byte Folded Spill
686; LSE-NEXT:    str x8, [sp, #40] // 8-byte Folded Spill
687; LSE-NEXT:    b.ne .LBB9_1
688; LSE-NEXT:    b .LBB9_2
689; LSE-NEXT:  .LBB9_2: // %atomicrmw.end
690; LSE-NEXT:    ldr x1, [sp, #16] // 8-byte Folded Reload
691; LSE-NEXT:    ldr x0, [sp, #8] // 8-byte Folded Reload
692; LSE-NEXT:    add sp, sp, #48
693; LSE-NEXT:    ret
694entry:
695  %res = atomicrmw nand ptr %dst, i128 1 seq_cst
696  ret i128 %res
697}
698