xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll (revision f6947e479e14e7904aa0b2539a95f5dfdc8f9295)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone | FileCheck %s -check-prefixes=NOOUTLINE
3; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -mattr=+outline-atomics | FileCheck %s -check-prefixes=OUTLINE
4; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -mattr=+lse | FileCheck %s -check-prefixes=LSE
5
6@var = global i128 0
7
8define i128 @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) {
9; NOOUTLINE-LABEL: val_compare_and_swap:
10; NOOUTLINE:       // %bb.0:
11; NOOUTLINE-NEXT:  .LBB0_1: // =>This Inner Loop Header: Depth=1
12; NOOUTLINE-NEXT:    ldaxp x8, x1, [x0]
13; NOOUTLINE-NEXT:    cmp x8, x2
14; NOOUTLINE-NEXT:    cset w9, ne
15; NOOUTLINE-NEXT:    cmp x1, x3
16; NOOUTLINE-NEXT:    cinc w9, w9, ne
17; NOOUTLINE-NEXT:    cbz w9, .LBB0_3
18; NOOUTLINE-NEXT:  // %bb.2: // in Loop: Header=BB0_1 Depth=1
19; NOOUTLINE-NEXT:    stxp w9, x8, x1, [x0]
20; NOOUTLINE-NEXT:    cbnz w9, .LBB0_1
21; NOOUTLINE-NEXT:    b .LBB0_4
22; NOOUTLINE-NEXT:  .LBB0_3: // in Loop: Header=BB0_1 Depth=1
23; NOOUTLINE-NEXT:    stxp w9, x4, x5, [x0]
24; NOOUTLINE-NEXT:    cbnz w9, .LBB0_1
25; NOOUTLINE-NEXT:  .LBB0_4:
26; NOOUTLINE-NEXT:    mov x0, x8
27; NOOUTLINE-NEXT:    ret
28;
29; OUTLINE-LABEL: val_compare_and_swap:
30; OUTLINE:       // %bb.0:
31; OUTLINE-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
32; OUTLINE-NEXT:    .cfi_def_cfa_offset 16
33; OUTLINE-NEXT:    .cfi_offset w30, -16
34; OUTLINE-NEXT:    mov x1, x3
35; OUTLINE-NEXT:    mov x8, x0
36; OUTLINE-NEXT:    mov x0, x2
37; OUTLINE-NEXT:    mov x2, x4
38; OUTLINE-NEXT:    mov x3, x5
39; OUTLINE-NEXT:    mov x4, x8
40; OUTLINE-NEXT:    bl __aarch64_cas16_acq
41; OUTLINE-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
42; OUTLINE-NEXT:    ret
43;
44; LSE-LABEL: val_compare_and_swap:
45; LSE:       // %bb.0:
46; LSE-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
47; LSE-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
48; LSE-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
49; LSE-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
50; LSE-NEXT:    caspa x2, x3, x4, x5, [x0]
51; LSE-NEXT:    mov x0, x2
52; LSE-NEXT:    mov x1, x3
53; LSE-NEXT:    ret
54  %pair = cmpxchg ptr %p, i128 %oldval, i128 %newval acquire acquire
55  %val = extractvalue { i128, i1 } %pair, 0
56  ret i128 %val
57}
58
59define i128 @val_compare_and_swap_seqcst(ptr %p, i128 %oldval, i128 %newval) {
60; NOOUTLINE-LABEL: val_compare_and_swap_seqcst:
61; NOOUTLINE:       // %bb.0:
62; NOOUTLINE-NEXT:  .LBB1_1: // =>This Inner Loop Header: Depth=1
63; NOOUTLINE-NEXT:    ldaxp x8, x1, [x0]
64; NOOUTLINE-NEXT:    cmp x8, x2
65; NOOUTLINE-NEXT:    cset w9, ne
66; NOOUTLINE-NEXT:    cmp x1, x3
67; NOOUTLINE-NEXT:    cinc w9, w9, ne
68; NOOUTLINE-NEXT:    cbz w9, .LBB1_3
69; NOOUTLINE-NEXT:  // %bb.2: // in Loop: Header=BB1_1 Depth=1
70; NOOUTLINE-NEXT:    stlxp w9, x8, x1, [x0]
71; NOOUTLINE-NEXT:    cbnz w9, .LBB1_1
72; NOOUTLINE-NEXT:    b .LBB1_4
73; NOOUTLINE-NEXT:  .LBB1_3: // in Loop: Header=BB1_1 Depth=1
74; NOOUTLINE-NEXT:    stlxp w9, x4, x5, [x0]
75; NOOUTLINE-NEXT:    cbnz w9, .LBB1_1
76; NOOUTLINE-NEXT:  .LBB1_4:
77; NOOUTLINE-NEXT:    mov x0, x8
78; NOOUTLINE-NEXT:    ret
79;
80; OUTLINE-LABEL: val_compare_and_swap_seqcst:
81; OUTLINE:       // %bb.0:
82; OUTLINE-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
83; OUTLINE-NEXT:    .cfi_def_cfa_offset 16
84; OUTLINE-NEXT:    .cfi_offset w30, -16
85; OUTLINE-NEXT:    mov x1, x3
86; OUTLINE-NEXT:    mov x8, x0
87; OUTLINE-NEXT:    mov x0, x2
88; OUTLINE-NEXT:    mov x2, x4
89; OUTLINE-NEXT:    mov x3, x5
90; OUTLINE-NEXT:    mov x4, x8
91; OUTLINE-NEXT:    bl __aarch64_cas16_acq_rel
92; OUTLINE-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
93; OUTLINE-NEXT:    ret
94;
95; LSE-LABEL: val_compare_and_swap_seqcst:
96; LSE:       // %bb.0:
97; LSE-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
98; LSE-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
99; LSE-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
100; LSE-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
101; LSE-NEXT:    caspal x2, x3, x4, x5, [x0]
102; LSE-NEXT:    mov x0, x2
103; LSE-NEXT:    mov x1, x3
104; LSE-NEXT:    ret
105  %pair = cmpxchg ptr %p, i128 %oldval, i128 %newval seq_cst seq_cst
106  %val = extractvalue { i128, i1 } %pair, 0
107  ret i128 %val
108}
109
110define i128 @val_compare_and_swap_release(ptr %p, i128 %oldval, i128 %newval) {
111; NOOUTLINE-LABEL: val_compare_and_swap_release:
112; NOOUTLINE:       // %bb.0:
113; NOOUTLINE-NEXT:  .LBB2_1: // =>This Inner Loop Header: Depth=1
114; NOOUTLINE-NEXT:    ldxp x8, x1, [x0]
115; NOOUTLINE-NEXT:    cmp x8, x2
116; NOOUTLINE-NEXT:    cset w9, ne
117; NOOUTLINE-NEXT:    cmp x1, x3
118; NOOUTLINE-NEXT:    cinc w9, w9, ne
119; NOOUTLINE-NEXT:    cbz w9, .LBB2_3
120; NOOUTLINE-NEXT:  // %bb.2: // in Loop: Header=BB2_1 Depth=1
121; NOOUTLINE-NEXT:    stlxp w9, x8, x1, [x0]
122; NOOUTLINE-NEXT:    cbnz w9, .LBB2_1
123; NOOUTLINE-NEXT:    b .LBB2_4
124; NOOUTLINE-NEXT:  .LBB2_3: // in Loop: Header=BB2_1 Depth=1
125; NOOUTLINE-NEXT:    stlxp w9, x4, x5, [x0]
126; NOOUTLINE-NEXT:    cbnz w9, .LBB2_1
127; NOOUTLINE-NEXT:  .LBB2_4:
128; NOOUTLINE-NEXT:    mov x0, x8
129; NOOUTLINE-NEXT:    ret
130;
131; OUTLINE-LABEL: val_compare_and_swap_release:
132; OUTLINE:       // %bb.0:
133; OUTLINE-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
134; OUTLINE-NEXT:    .cfi_def_cfa_offset 16
135; OUTLINE-NEXT:    .cfi_offset w30, -16
136; OUTLINE-NEXT:    mov x1, x3
137; OUTLINE-NEXT:    mov x8, x0
138; OUTLINE-NEXT:    mov x0, x2
139; OUTLINE-NEXT:    mov x2, x4
140; OUTLINE-NEXT:    mov x3, x5
141; OUTLINE-NEXT:    mov x4, x8
142; OUTLINE-NEXT:    bl __aarch64_cas16_rel
143; OUTLINE-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
144; OUTLINE-NEXT:    ret
145;
146; LSE-LABEL: val_compare_and_swap_release:
147; LSE:       // %bb.0:
148; LSE-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
149; LSE-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
150; LSE-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
151; LSE-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
152; LSE-NEXT:    caspl x2, x3, x4, x5, [x0]
153; LSE-NEXT:    mov x0, x2
154; LSE-NEXT:    mov x1, x3
155; LSE-NEXT:    ret
156  %pair = cmpxchg ptr %p, i128 %oldval, i128 %newval release monotonic
157  %val = extractvalue { i128, i1 } %pair, 0
158  ret i128 %val
159}
160
161define i128 @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval) {
162; NOOUTLINE-LABEL: val_compare_and_swap_monotonic:
163; NOOUTLINE:       // %bb.0:
164; NOOUTLINE-NEXT:  .LBB3_1: // =>This Inner Loop Header: Depth=1
165; NOOUTLINE-NEXT:    ldxp x8, x1, [x0]
166; NOOUTLINE-NEXT:    cmp x8, x2
167; NOOUTLINE-NEXT:    cset w9, ne
168; NOOUTLINE-NEXT:    cmp x1, x3
169; NOOUTLINE-NEXT:    cinc w9, w9, ne
170; NOOUTLINE-NEXT:    cbz w9, .LBB3_3
171; NOOUTLINE-NEXT:  // %bb.2: // in Loop: Header=BB3_1 Depth=1
172; NOOUTLINE-NEXT:    stxp w9, x8, x1, [x0]
173; NOOUTLINE-NEXT:    cbnz w9, .LBB3_1
174; NOOUTLINE-NEXT:    b .LBB3_4
175; NOOUTLINE-NEXT:  .LBB3_3: // in Loop: Header=BB3_1 Depth=1
176; NOOUTLINE-NEXT:    stxp w9, x4, x5, [x0]
177; NOOUTLINE-NEXT:    cbnz w9, .LBB3_1
178; NOOUTLINE-NEXT:  .LBB3_4:
179; NOOUTLINE-NEXT:    mov x0, x8
180; NOOUTLINE-NEXT:    ret
181;
182; OUTLINE-LABEL: val_compare_and_swap_monotonic:
183; OUTLINE:       // %bb.0:
184; OUTLINE-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
185; OUTLINE-NEXT:    .cfi_def_cfa_offset 16
186; OUTLINE-NEXT:    .cfi_offset w30, -16
187; OUTLINE-NEXT:    mov x1, x3
188; OUTLINE-NEXT:    mov x8, x0
189; OUTLINE-NEXT:    mov x0, x2
190; OUTLINE-NEXT:    mov x2, x4
191; OUTLINE-NEXT:    mov x3, x5
192; OUTLINE-NEXT:    mov x4, x8
193; OUTLINE-NEXT:    bl __aarch64_cas16_relax
194; OUTLINE-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
195; OUTLINE-NEXT:    ret
196;
197; LSE-LABEL: val_compare_and_swap_monotonic:
198; LSE:       // %bb.0:
199; LSE-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
200; LSE-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
201; LSE-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
202; LSE-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
203; LSE-NEXT:    casp x2, x3, x4, x5, [x0]
204; LSE-NEXT:    mov x0, x2
205; LSE-NEXT:    mov x1, x3
206; LSE-NEXT:    ret
207  %pair = cmpxchg ptr %p, i128 %oldval, i128 %newval monotonic monotonic
208  %val = extractvalue { i128, i1 } %pair, 0
209  ret i128 %val
210}
211
212define void @fetch_and_nand(ptr %p, i128 %bits) {
213; NOOUTLINE-LABEL: fetch_and_nand:
214; NOOUTLINE:       // %bb.0:
215; NOOUTLINE-NEXT:  .LBB4_1: // %atomicrmw.start
216; NOOUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
217; NOOUTLINE-NEXT:    ldxp x9, x8, [x0]
218; NOOUTLINE-NEXT:    and x10, x9, x2
219; NOOUTLINE-NEXT:    and x11, x8, x3
220; NOOUTLINE-NEXT:    mvn x11, x11
221; NOOUTLINE-NEXT:    mvn x10, x10
222; NOOUTLINE-NEXT:    stlxp w12, x10, x11, [x0]
223; NOOUTLINE-NEXT:    cbnz w12, .LBB4_1
224; NOOUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
225; NOOUTLINE-NEXT:    adrp x10, :got:var
226; NOOUTLINE-NEXT:    ldr x10, [x10, :got_lo12:var]
227; NOOUTLINE-NEXT:    stp x9, x8, [x10]
228; NOOUTLINE-NEXT:    ret
229;
230; OUTLINE-LABEL: fetch_and_nand:
231; OUTLINE:       // %bb.0:
232; OUTLINE-NEXT:  .LBB4_1: // %atomicrmw.start
233; OUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
234; OUTLINE-NEXT:    ldxp x9, x8, [x0]
235; OUTLINE-NEXT:    and x10, x9, x2
236; OUTLINE-NEXT:    and x11, x8, x3
237; OUTLINE-NEXT:    mvn x11, x11
238; OUTLINE-NEXT:    mvn x10, x10
239; OUTLINE-NEXT:    stlxp w12, x10, x11, [x0]
240; OUTLINE-NEXT:    cbnz w12, .LBB4_1
241; OUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
242; OUTLINE-NEXT:    adrp x10, :got:var
243; OUTLINE-NEXT:    ldr x10, [x10, :got_lo12:var]
244; OUTLINE-NEXT:    stp x9, x8, [x10]
245; OUTLINE-NEXT:    ret
246;
247; LSE-LABEL: fetch_and_nand:
248; LSE:       // %bb.0:
249; LSE-NEXT:    ldp x4, x5, [x0]
250; LSE-NEXT:  .LBB4_1: // %atomicrmw.start
251; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
252; LSE-NEXT:    mov x7, x5
253; LSE-NEXT:    mov x6, x4
254; LSE-NEXT:    and x8, x7, x3
255; LSE-NEXT:    and x9, x4, x2
256; LSE-NEXT:    mvn x10, x9
257; LSE-NEXT:    mvn x11, x8
258; LSE-NEXT:    mov x4, x6
259; LSE-NEXT:    mov x5, x7
260; LSE-NEXT:    caspl x4, x5, x10, x11, [x0]
261; LSE-NEXT:    cmp x5, x7
262; LSE-NEXT:    ccmp x4, x6, #0, eq
263; LSE-NEXT:    b.ne .LBB4_1
264; LSE-NEXT:  // %bb.2: // %atomicrmw.end
265; LSE-NEXT:    adrp x8, :got:var
266; LSE-NEXT:    ldr x8, [x8, :got_lo12:var]
267; LSE-NEXT:    stp x4, x5, [x8]
268; LSE-NEXT:    ret
269
270  %val = atomicrmw nand ptr %p, i128 %bits release
271  store i128 %val, ptr @var, align 16
272  ret void
273}
274
275define void @fetch_and_or(ptr %p, i128 %bits) {
276; NOOUTLINE-LABEL: fetch_and_or:
277; NOOUTLINE:       // %bb.0:
278; NOOUTLINE-NEXT:  .LBB5_1: // %atomicrmw.start
279; NOOUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
280; NOOUTLINE-NEXT:    ldaxp x9, x8, [x0]
281; NOOUTLINE-NEXT:    orr x10, x8, x3
282; NOOUTLINE-NEXT:    orr x11, x9, x2
283; NOOUTLINE-NEXT:    stlxp w12, x11, x10, [x0]
284; NOOUTLINE-NEXT:    cbnz w12, .LBB5_1
285; NOOUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
286; NOOUTLINE-NEXT:    adrp x10, :got:var
287; NOOUTLINE-NEXT:    ldr x10, [x10, :got_lo12:var]
288; NOOUTLINE-NEXT:    stp x9, x8, [x10]
289; NOOUTLINE-NEXT:    ret
290;
291; OUTLINE-LABEL: fetch_and_or:
292; OUTLINE:       // %bb.0:
293; OUTLINE-NEXT:  .LBB5_1: // %atomicrmw.start
294; OUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
295; OUTLINE-NEXT:    ldaxp x9, x8, [x0]
296; OUTLINE-NEXT:    orr x10, x8, x3
297; OUTLINE-NEXT:    orr x11, x9, x2
298; OUTLINE-NEXT:    stlxp w12, x11, x10, [x0]
299; OUTLINE-NEXT:    cbnz w12, .LBB5_1
300; OUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
301; OUTLINE-NEXT:    adrp x10, :got:var
302; OUTLINE-NEXT:    ldr x10, [x10, :got_lo12:var]
303; OUTLINE-NEXT:    stp x9, x8, [x10]
304; OUTLINE-NEXT:    ret
305;
306; LSE-LABEL: fetch_and_or:
307; LSE:       // %bb.0:
308; LSE-NEXT:    ldp x4, x5, [x0]
309; LSE-NEXT:  .LBB5_1: // %atomicrmw.start
310; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
311; LSE-NEXT:    mov x7, x5
312; LSE-NEXT:    mov x6, x4
313; LSE-NEXT:    orr x8, x4, x2
314; LSE-NEXT:    orr x9, x7, x3
315; LSE-NEXT:    mov x4, x6
316; LSE-NEXT:    mov x5, x7
317; LSE-NEXT:    caspal x4, x5, x8, x9, [x0]
318; LSE-NEXT:    cmp x5, x7
319; LSE-NEXT:    ccmp x4, x6, #0, eq
320; LSE-NEXT:    b.ne .LBB5_1
321; LSE-NEXT:  // %bb.2: // %atomicrmw.end
322; LSE-NEXT:    adrp x8, :got:var
323; LSE-NEXT:    ldr x8, [x8, :got_lo12:var]
324; LSE-NEXT:    stp x4, x5, [x8]
325; LSE-NEXT:    ret
326
327  %val = atomicrmw or ptr %p, i128 %bits seq_cst
328  store i128 %val, ptr @var, align 16
329  ret void
330}
331
332define void @fetch_and_add(ptr %p, i128 %bits) {
333; NOOUTLINE-LABEL: fetch_and_add:
334; NOOUTLINE:       // %bb.0:
335; NOOUTLINE-NEXT:  .LBB6_1: // %atomicrmw.start
336; NOOUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
337; NOOUTLINE-NEXT:    ldaxp x9, x8, [x0]
338; NOOUTLINE-NEXT:    adds x10, x9, x2
339; NOOUTLINE-NEXT:    adc x11, x8, x3
340; NOOUTLINE-NEXT:    stlxp w12, x10, x11, [x0]
341; NOOUTLINE-NEXT:    cbnz w12, .LBB6_1
342; NOOUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
343; NOOUTLINE-NEXT:    adrp x10, :got:var
344; NOOUTLINE-NEXT:    ldr x10, [x10, :got_lo12:var]
345; NOOUTLINE-NEXT:    stp x9, x8, [x10]
346; NOOUTLINE-NEXT:    ret
347;
348; OUTLINE-LABEL: fetch_and_add:
349; OUTLINE:       // %bb.0:
350; OUTLINE-NEXT:  .LBB6_1: // %atomicrmw.start
351; OUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
352; OUTLINE-NEXT:    ldaxp x9, x8, [x0]
353; OUTLINE-NEXT:    adds x10, x9, x2
354; OUTLINE-NEXT:    adc x11, x8, x3
355; OUTLINE-NEXT:    stlxp w12, x10, x11, [x0]
356; OUTLINE-NEXT:    cbnz w12, .LBB6_1
357; OUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
358; OUTLINE-NEXT:    adrp x10, :got:var
359; OUTLINE-NEXT:    ldr x10, [x10, :got_lo12:var]
360; OUTLINE-NEXT:    stp x9, x8, [x10]
361; OUTLINE-NEXT:    ret
362;
363; LSE-LABEL: fetch_and_add:
364; LSE:       // %bb.0:
365; LSE-NEXT:    ldp x4, x5, [x0]
366; LSE-NEXT:  .LBB6_1: // %atomicrmw.start
367; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
368; LSE-NEXT:    mov x7, x5
369; LSE-NEXT:    mov x6, x4
370; LSE-NEXT:    adds x8, x4, x2
371; LSE-NEXT:    adc x9, x7, x3
372; LSE-NEXT:    mov x4, x6
373; LSE-NEXT:    mov x5, x7
374; LSE-NEXT:    caspal x4, x5, x8, x9, [x0]
375; LSE-NEXT:    cmp x5, x7
376; LSE-NEXT:    ccmp x4, x6, #0, eq
377; LSE-NEXT:    b.ne .LBB6_1
378; LSE-NEXT:  // %bb.2: // %atomicrmw.end
379; LSE-NEXT:    adrp x8, :got:var
380; LSE-NEXT:    ldr x8, [x8, :got_lo12:var]
381; LSE-NEXT:    stp x4, x5, [x8]
382; LSE-NEXT:    ret
383  %val = atomicrmw add ptr %p, i128 %bits seq_cst
384  store i128 %val, ptr @var, align 16
385  ret void
386}
387
388define void @fetch_and_sub(ptr %p, i128 %bits) {
389; NOOUTLINE-LABEL: fetch_and_sub:
390; NOOUTLINE:       // %bb.0:
391; NOOUTLINE-NEXT:  .LBB7_1: // %atomicrmw.start
392; NOOUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
393; NOOUTLINE-NEXT:    ldaxp x9, x8, [x0]
394; NOOUTLINE-NEXT:    subs x10, x9, x2
395; NOOUTLINE-NEXT:    sbc x11, x8, x3
396; NOOUTLINE-NEXT:    stlxp w12, x10, x11, [x0]
397; NOOUTLINE-NEXT:    cbnz w12, .LBB7_1
398; NOOUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
399; NOOUTLINE-NEXT:    adrp x10, :got:var
400; NOOUTLINE-NEXT:    ldr x10, [x10, :got_lo12:var]
401; NOOUTLINE-NEXT:    stp x9, x8, [x10]
402; NOOUTLINE-NEXT:    ret
403;
404; OUTLINE-LABEL: fetch_and_sub:
405; OUTLINE:       // %bb.0:
406; OUTLINE-NEXT:  .LBB7_1: // %atomicrmw.start
407; OUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
408; OUTLINE-NEXT:    ldaxp x9, x8, [x0]
409; OUTLINE-NEXT:    subs x10, x9, x2
410; OUTLINE-NEXT:    sbc x11, x8, x3
411; OUTLINE-NEXT:    stlxp w12, x10, x11, [x0]
412; OUTLINE-NEXT:    cbnz w12, .LBB7_1
413; OUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
414; OUTLINE-NEXT:    adrp x10, :got:var
415; OUTLINE-NEXT:    ldr x10, [x10, :got_lo12:var]
416; OUTLINE-NEXT:    stp x9, x8, [x10]
417; OUTLINE-NEXT:    ret
418;
419; LSE-LABEL: fetch_and_sub:
420; LSE:       // %bb.0:
421; LSE-NEXT:    ldp x4, x5, [x0]
422; LSE-NEXT:  .LBB7_1: // %atomicrmw.start
423; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
424; LSE-NEXT:    mov x7, x5
425; LSE-NEXT:    mov x6, x4
426; LSE-NEXT:    subs x8, x4, x2
427; LSE-NEXT:    sbc x9, x7, x3
428; LSE-NEXT:    mov x4, x6
429; LSE-NEXT:    mov x5, x7
430; LSE-NEXT:    caspal x4, x5, x8, x9, [x0]
431; LSE-NEXT:    cmp x5, x7
432; LSE-NEXT:    ccmp x4, x6, #0, eq
433; LSE-NEXT:    b.ne .LBB7_1
434; LSE-NEXT:  // %bb.2: // %atomicrmw.end
435; LSE-NEXT:    adrp x8, :got:var
436; LSE-NEXT:    ldr x8, [x8, :got_lo12:var]
437; LSE-NEXT:    stp x4, x5, [x8]
438; LSE-NEXT:    ret
439  %val = atomicrmw sub ptr %p, i128 %bits seq_cst
440  store i128 %val, ptr @var, align 16
441  ret void
442}
443
444define void @fetch_and_min(ptr %p, i128 %bits) {
445; NOOUTLINE-LABEL: fetch_and_min:
446; NOOUTLINE:       // %bb.0:
447; NOOUTLINE-NEXT:  .LBB8_1: // %atomicrmw.start
448; NOOUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
449; NOOUTLINE-NEXT:    ldaxp x9, x8, [x0]
450; NOOUTLINE-NEXT:    cmp x2, x9
451; NOOUTLINE-NEXT:    sbcs xzr, x3, x8
452; NOOUTLINE-NEXT:    csel x10, x8, x3, ge
453; NOOUTLINE-NEXT:    csel x11, x9, x2, ge
454; NOOUTLINE-NEXT:    stlxp w12, x11, x10, [x0]
455; NOOUTLINE-NEXT:    cbnz w12, .LBB8_1
456; NOOUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
457; NOOUTLINE-NEXT:    adrp x10, :got:var
458; NOOUTLINE-NEXT:    ldr x10, [x10, :got_lo12:var]
459; NOOUTLINE-NEXT:    stp x9, x8, [x10]
460; NOOUTLINE-NEXT:    ret
461;
462; OUTLINE-LABEL: fetch_and_min:
463; OUTLINE:       // %bb.0:
464; OUTLINE-NEXT:  .LBB8_1: // %atomicrmw.start
465; OUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
466; OUTLINE-NEXT:    ldaxp x9, x8, [x0]
467; OUTLINE-NEXT:    cmp x2, x9
468; OUTLINE-NEXT:    sbcs xzr, x3, x8
469; OUTLINE-NEXT:    csel x10, x8, x3, ge
470; OUTLINE-NEXT:    csel x11, x9, x2, ge
471; OUTLINE-NEXT:    stlxp w12, x11, x10, [x0]
472; OUTLINE-NEXT:    cbnz w12, .LBB8_1
473; OUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
474; OUTLINE-NEXT:    adrp x10, :got:var
475; OUTLINE-NEXT:    ldr x10, [x10, :got_lo12:var]
476; OUTLINE-NEXT:    stp x9, x8, [x10]
477; OUTLINE-NEXT:    ret
478;
479; LSE-LABEL: fetch_and_min:
480; LSE:       // %bb.0:
481; LSE-NEXT:    ldp x4, x5, [x0]
482; LSE-NEXT:  .LBB8_1: // %atomicrmw.start
483; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
484; LSE-NEXT:    mov x7, x5
485; LSE-NEXT:    mov x6, x4
486; LSE-NEXT:    cmp x2, x4
487; LSE-NEXT:    sbcs xzr, x3, x7
488; LSE-NEXT:    csel x9, x7, x3, ge
489; LSE-NEXT:    csel x8, x4, x2, ge
490; LSE-NEXT:    mov x4, x6
491; LSE-NEXT:    mov x5, x7
492; LSE-NEXT:    caspal x4, x5, x8, x9, [x0]
493; LSE-NEXT:    cmp x5, x7
494; LSE-NEXT:    ccmp x4, x6, #0, eq
495; LSE-NEXT:    b.ne .LBB8_1
496; LSE-NEXT:  // %bb.2: // %atomicrmw.end
497; LSE-NEXT:    adrp x8, :got:var
498; LSE-NEXT:    ldr x8, [x8, :got_lo12:var]
499; LSE-NEXT:    stp x4, x5, [x8]
500; LSE-NEXT:    ret
501  %val = atomicrmw min ptr %p, i128 %bits seq_cst
502  store i128 %val, ptr @var, align 16
503  ret void
504}
505
506define void @fetch_and_max(ptr %p, i128 %bits) {
507; NOOUTLINE-LABEL: fetch_and_max:
508; NOOUTLINE:       // %bb.0:
509; NOOUTLINE-NEXT:  .LBB9_1: // %atomicrmw.start
510; NOOUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
511; NOOUTLINE-NEXT:    ldaxp x9, x8, [x0]
512; NOOUTLINE-NEXT:    cmp x2, x9
513; NOOUTLINE-NEXT:    sbcs xzr, x3, x8
514; NOOUTLINE-NEXT:    csel x10, x8, x3, lt
515; NOOUTLINE-NEXT:    csel x11, x9, x2, lt
516; NOOUTLINE-NEXT:    stlxp w12, x11, x10, [x0]
517; NOOUTLINE-NEXT:    cbnz w12, .LBB9_1
518; NOOUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
519; NOOUTLINE-NEXT:    adrp x10, :got:var
520; NOOUTLINE-NEXT:    ldr x10, [x10, :got_lo12:var]
521; NOOUTLINE-NEXT:    stp x9, x8, [x10]
522; NOOUTLINE-NEXT:    ret
523;
524; OUTLINE-LABEL: fetch_and_max:
525; OUTLINE:       // %bb.0:
526; OUTLINE-NEXT:  .LBB9_1: // %atomicrmw.start
527; OUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
528; OUTLINE-NEXT:    ldaxp x9, x8, [x0]
529; OUTLINE-NEXT:    cmp x2, x9
530; OUTLINE-NEXT:    sbcs xzr, x3, x8
531; OUTLINE-NEXT:    csel x10, x8, x3, lt
532; OUTLINE-NEXT:    csel x11, x9, x2, lt
533; OUTLINE-NEXT:    stlxp w12, x11, x10, [x0]
534; OUTLINE-NEXT:    cbnz w12, .LBB9_1
535; OUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
536; OUTLINE-NEXT:    adrp x10, :got:var
537; OUTLINE-NEXT:    ldr x10, [x10, :got_lo12:var]
538; OUTLINE-NEXT:    stp x9, x8, [x10]
539; OUTLINE-NEXT:    ret
540;
541; LSE-LABEL: fetch_and_max:
542; LSE:       // %bb.0:
543; LSE-NEXT:    ldp x4, x5, [x0]
544; LSE-NEXT:  .LBB9_1: // %atomicrmw.start
545; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
546; LSE-NEXT:    mov x7, x5
547; LSE-NEXT:    mov x6, x4
548; LSE-NEXT:    cmp x2, x4
549; LSE-NEXT:    sbcs xzr, x3, x7
550; LSE-NEXT:    csel x9, x7, x3, lt
551; LSE-NEXT:    csel x8, x4, x2, lt
552; LSE-NEXT:    mov x4, x6
553; LSE-NEXT:    mov x5, x7
554; LSE-NEXT:    caspal x4, x5, x8, x9, [x0]
555; LSE-NEXT:    cmp x5, x7
556; LSE-NEXT:    ccmp x4, x6, #0, eq
557; LSE-NEXT:    b.ne .LBB9_1
558; LSE-NEXT:  // %bb.2: // %atomicrmw.end
559; LSE-NEXT:    adrp x8, :got:var
560; LSE-NEXT:    ldr x8, [x8, :got_lo12:var]
561; LSE-NEXT:    stp x4, x5, [x8]
562; LSE-NEXT:    ret
563  %val = atomicrmw max ptr %p, i128 %bits seq_cst
564  store i128 %val, ptr @var, align 16
565  ret void
566}
567
568define void @fetch_and_umin(ptr %p, i128 %bits) {
569; NOOUTLINE-LABEL: fetch_and_umin:
570; NOOUTLINE:       // %bb.0:
571; NOOUTLINE-NEXT:  .LBB10_1: // %atomicrmw.start
572; NOOUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
573; NOOUTLINE-NEXT:    ldaxp x9, x8, [x0]
574; NOOUTLINE-NEXT:    cmp x2, x9
575; NOOUTLINE-NEXT:    sbcs xzr, x3, x8
576; NOOUTLINE-NEXT:    csel x10, x8, x3, hs
577; NOOUTLINE-NEXT:    csel x11, x9, x2, hs
578; NOOUTLINE-NEXT:    stlxp w12, x11, x10, [x0]
579; NOOUTLINE-NEXT:    cbnz w12, .LBB10_1
580; NOOUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
581; NOOUTLINE-NEXT:    adrp x10, :got:var
582; NOOUTLINE-NEXT:    ldr x10, [x10, :got_lo12:var]
583; NOOUTLINE-NEXT:    stp x9, x8, [x10]
584; NOOUTLINE-NEXT:    ret
585;
586; OUTLINE-LABEL: fetch_and_umin:
587; OUTLINE:       // %bb.0:
588; OUTLINE-NEXT:  .LBB10_1: // %atomicrmw.start
589; OUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
590; OUTLINE-NEXT:    ldaxp x9, x8, [x0]
591; OUTLINE-NEXT:    cmp x2, x9
592; OUTLINE-NEXT:    sbcs xzr, x3, x8
593; OUTLINE-NEXT:    csel x10, x8, x3, hs
594; OUTLINE-NEXT:    csel x11, x9, x2, hs
595; OUTLINE-NEXT:    stlxp w12, x11, x10, [x0]
596; OUTLINE-NEXT:    cbnz w12, .LBB10_1
597; OUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
598; OUTLINE-NEXT:    adrp x10, :got:var
599; OUTLINE-NEXT:    ldr x10, [x10, :got_lo12:var]
600; OUTLINE-NEXT:    stp x9, x8, [x10]
601; OUTLINE-NEXT:    ret
602;
603; LSE-LABEL: fetch_and_umin:
604; LSE:       // %bb.0:
605; LSE-NEXT:    ldp x4, x5, [x0]
606; LSE-NEXT:  .LBB10_1: // %atomicrmw.start
607; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
608; LSE-NEXT:    mov x7, x5
609; LSE-NEXT:    mov x6, x4
610; LSE-NEXT:    cmp x2, x4
611; LSE-NEXT:    sbcs xzr, x3, x7
612; LSE-NEXT:    csel x9, x7, x3, hs
613; LSE-NEXT:    csel x8, x4, x2, hs
614; LSE-NEXT:    mov x4, x6
615; LSE-NEXT:    mov x5, x7
616; LSE-NEXT:    caspal x4, x5, x8, x9, [x0]
617; LSE-NEXT:    cmp x5, x7
618; LSE-NEXT:    ccmp x4, x6, #0, eq
619; LSE-NEXT:    b.ne .LBB10_1
620; LSE-NEXT:  // %bb.2: // %atomicrmw.end
621; LSE-NEXT:    adrp x8, :got:var
622; LSE-NEXT:    ldr x8, [x8, :got_lo12:var]
623; LSE-NEXT:    stp x4, x5, [x8]
624; LSE-NEXT:    ret
625  %val = atomicrmw umin ptr %p, i128 %bits seq_cst
626  store i128 %val, ptr @var, align 16
627  ret void
628}
629
630define void @fetch_and_umax(ptr %p, i128 %bits) {
631; NOOUTLINE-LABEL: fetch_and_umax:
632; NOOUTLINE:       // %bb.0:
633; NOOUTLINE-NEXT:  .LBB11_1: // %atomicrmw.start
634; NOOUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
635; NOOUTLINE-NEXT:    ldaxp x9, x8, [x0]
636; NOOUTLINE-NEXT:    cmp x2, x9
637; NOOUTLINE-NEXT:    sbcs xzr, x3, x8
638; NOOUTLINE-NEXT:    csel x10, x8, x3, lo
639; NOOUTLINE-NEXT:    csel x11, x9, x2, lo
640; NOOUTLINE-NEXT:    stlxp w12, x11, x10, [x0]
641; NOOUTLINE-NEXT:    cbnz w12, .LBB11_1
642; NOOUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
643; NOOUTLINE-NEXT:    adrp x10, :got:var
644; NOOUTLINE-NEXT:    ldr x10, [x10, :got_lo12:var]
645; NOOUTLINE-NEXT:    stp x9, x8, [x10]
646; NOOUTLINE-NEXT:    ret
647;
648; OUTLINE-LABEL: fetch_and_umax:
649; OUTLINE:       // %bb.0:
650; OUTLINE-NEXT:  .LBB11_1: // %atomicrmw.start
651; OUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
652; OUTLINE-NEXT:    ldaxp x9, x8, [x0]
653; OUTLINE-NEXT:    cmp x2, x9
654; OUTLINE-NEXT:    sbcs xzr, x3, x8
655; OUTLINE-NEXT:    csel x10, x8, x3, lo
656; OUTLINE-NEXT:    csel x11, x9, x2, lo
657; OUTLINE-NEXT:    stlxp w12, x11, x10, [x0]
658; OUTLINE-NEXT:    cbnz w12, .LBB11_1
659; OUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
660; OUTLINE-NEXT:    adrp x10, :got:var
661; OUTLINE-NEXT:    ldr x10, [x10, :got_lo12:var]
662; OUTLINE-NEXT:    stp x9, x8, [x10]
663; OUTLINE-NEXT:    ret
664;
665; LSE-LABEL: fetch_and_umax:
666; LSE:       // %bb.0:
667; LSE-NEXT:    ldp x4, x5, [x0]
668; LSE-NEXT:  .LBB11_1: // %atomicrmw.start
669; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
670; LSE-NEXT:    mov x7, x5
671; LSE-NEXT:    mov x6, x4
672; LSE-NEXT:    cmp x2, x4
673; LSE-NEXT:    sbcs xzr, x3, x7
674; LSE-NEXT:    csel x9, x7, x3, lo
675; LSE-NEXT:    csel x8, x4, x2, lo
676; LSE-NEXT:    mov x4, x6
677; LSE-NEXT:    mov x5, x7
678; LSE-NEXT:    caspal x4, x5, x8, x9, [x0]
679; LSE-NEXT:    cmp x5, x7
680; LSE-NEXT:    ccmp x4, x6, #0, eq
681; LSE-NEXT:    b.ne .LBB11_1
682; LSE-NEXT:  // %bb.2: // %atomicrmw.end
683; LSE-NEXT:    adrp x8, :got:var
684; LSE-NEXT:    ldr x8, [x8, :got_lo12:var]
685; LSE-NEXT:    stp x4, x5, [x8]
686; LSE-NEXT:    ret
687  %val = atomicrmw umax ptr %p, i128 %bits seq_cst
688  store i128 %val, ptr @var, align 16
689  ret void
690}
691
692define i128 @atomic_load_seq_cst(ptr %p) {
693; NOOUTLINE-LABEL: atomic_load_seq_cst:
694; NOOUTLINE:       // %bb.0:
695; NOOUTLINE-NEXT:    mov x8, x0
696; NOOUTLINE-NEXT:  .LBB12_1: // %atomicrmw.start
697; NOOUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
698; NOOUTLINE-NEXT:    ldaxp x0, x1, [x8]
699; NOOUTLINE-NEXT:    stlxp w9, x0, x1, [x8]
700; NOOUTLINE-NEXT:    cbnz w9, .LBB12_1
701; NOOUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
702; NOOUTLINE-NEXT:    ret
703;
704; OUTLINE-LABEL: atomic_load_seq_cst:
705; OUTLINE:       // %bb.0:
706; OUTLINE-NEXT:    mov x8, x0
707; OUTLINE-NEXT:  .LBB12_1: // %atomicrmw.start
708; OUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
709; OUTLINE-NEXT:    ldaxp x0, x1, [x8]
710; OUTLINE-NEXT:    stlxp w9, x0, x1, [x8]
711; OUTLINE-NEXT:    cbnz w9, .LBB12_1
712; OUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
713; OUTLINE-NEXT:    ret
714;
715; LSE-LABEL: atomic_load_seq_cst:
716; LSE:       // %bb.0:
717; LSE-NEXT:    mov x2, #0
718; LSE-NEXT:    mov x3, #0
719; LSE-NEXT:    caspal x2, x3, x2, x3, [x0]
720; LSE-NEXT:    mov x0, x2
721; LSE-NEXT:    mov x1, x3
722; LSE-NEXT:    ret
723   %r = load atomic i128, ptr %p seq_cst, align 16
724   ret i128 %r
725}
726
727define i128 @atomic_load_relaxed(i64, i64, ptr %p) {
728; NOOUTLINE-LABEL: atomic_load_relaxed:
729; NOOUTLINE:       // %bb.0:
730; NOOUTLINE-NEXT:  .LBB13_1: // %atomicrmw.start
731; NOOUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
732; NOOUTLINE-NEXT:    ldxp x0, x1, [x2]
733; NOOUTLINE-NEXT:    stxp w8, x0, x1, [x2]
734; NOOUTLINE-NEXT:    cbnz w8, .LBB13_1
735; NOOUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
736; NOOUTLINE-NEXT:    ret
737;
738; OUTLINE-LABEL: atomic_load_relaxed:
739; OUTLINE:       // %bb.0:
740; OUTLINE-NEXT:  .LBB13_1: // %atomicrmw.start
741; OUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
742; OUTLINE-NEXT:    ldxp x0, x1, [x2]
743; OUTLINE-NEXT:    stxp w8, x0, x1, [x2]
744; OUTLINE-NEXT:    cbnz w8, .LBB13_1
745; OUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
746; OUTLINE-NEXT:    ret
747;
748; LSE-LABEL: atomic_load_relaxed:
749; LSE:       // %bb.0:
750; LSE-NEXT:    mov x0, #0
751; LSE-NEXT:    mov x1, #0
752; LSE-NEXT:    casp x0, x1, x0, x1, [x2]
753; LSE-NEXT:    ret
754    %r = load atomic i128, ptr %p monotonic, align 16
755    ret i128 %r
756}
757
758
759define void @atomic_store_seq_cst(i128 %in, ptr %p) {
760; NOOUTLINE-LABEL: atomic_store_seq_cst:
761; NOOUTLINE:       // %bb.0:
762; NOOUTLINE-NEXT:  .LBB14_1: // %atomicrmw.start
763; NOOUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
764; NOOUTLINE-NEXT:    ldaxp xzr, x8, [x2]
765; NOOUTLINE-NEXT:    stlxp w8, x0, x1, [x2]
766; NOOUTLINE-NEXT:    cbnz w8, .LBB14_1
767; NOOUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
768; NOOUTLINE-NEXT:    ret
769;
770; OUTLINE-LABEL: atomic_store_seq_cst:
771; OUTLINE:       // %bb.0:
772; OUTLINE-NEXT:  .LBB14_1: // %atomicrmw.start
773; OUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
774; OUTLINE-NEXT:    ldaxp xzr, x8, [x2]
775; OUTLINE-NEXT:    stlxp w8, x0, x1, [x2]
776; OUTLINE-NEXT:    cbnz w8, .LBB14_1
777; OUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
778; OUTLINE-NEXT:    ret
779;
780; LSE-LABEL: atomic_store_seq_cst:
781; LSE:       // %bb.0:
782; LSE-NEXT:    // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
783; LSE-NEXT:    ldp x4, x5, [x2]
784; LSE-NEXT:    // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
785; LSE-NEXT:  .LBB14_1: // %atomicrmw.start
786; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
787; LSE-NEXT:    mov x6, x4
788; LSE-NEXT:    mov x7, x5
789; LSE-NEXT:    caspal x6, x7, x0, x1, [x2]
790; LSE-NEXT:    cmp x7, x5
791; LSE-NEXT:    ccmp x6, x4, #0, eq
792; LSE-NEXT:    mov x4, x6
793; LSE-NEXT:    mov x5, x7
794; LSE-NEXT:    b.ne .LBB14_1
795; LSE-NEXT:  // %bb.2: // %atomicrmw.end
796; LSE-NEXT:    ret
797   store atomic i128 %in, ptr %p seq_cst, align 16
798   ret void
799}
800
801define void @atomic_store_release(i128 %in, ptr %p) {
802; NOOUTLINE-LABEL: atomic_store_release:
803; NOOUTLINE:       // %bb.0:
804; NOOUTLINE-NEXT:  .LBB15_1: // %atomicrmw.start
805; NOOUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
806; NOOUTLINE-NEXT:    ldxp xzr, x8, [x2]
807; NOOUTLINE-NEXT:    stlxp w8, x0, x1, [x2]
808; NOOUTLINE-NEXT:    cbnz w8, .LBB15_1
809; NOOUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
810; NOOUTLINE-NEXT:    ret
811;
812; OUTLINE-LABEL: atomic_store_release:
813; OUTLINE:       // %bb.0:
814; OUTLINE-NEXT:  .LBB15_1: // %atomicrmw.start
815; OUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
816; OUTLINE-NEXT:    ldxp xzr, x8, [x2]
817; OUTLINE-NEXT:    stlxp w8, x0, x1, [x2]
818; OUTLINE-NEXT:    cbnz w8, .LBB15_1
819; OUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
820; OUTLINE-NEXT:    ret
821;
822; LSE-LABEL: atomic_store_release:
823; LSE:       // %bb.0:
824; LSE-NEXT:    // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
825; LSE-NEXT:    ldp x4, x5, [x2]
826; LSE-NEXT:    // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
827; LSE-NEXT:  .LBB15_1: // %atomicrmw.start
828; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
829; LSE-NEXT:    mov x6, x4
830; LSE-NEXT:    mov x7, x5
831; LSE-NEXT:    caspl x6, x7, x0, x1, [x2]
832; LSE-NEXT:    cmp x7, x5
833; LSE-NEXT:    ccmp x6, x4, #0, eq
834; LSE-NEXT:    mov x4, x6
835; LSE-NEXT:    mov x5, x7
836; LSE-NEXT:    b.ne .LBB15_1
837; LSE-NEXT:  // %bb.2: // %atomicrmw.end
838; LSE-NEXT:    ret
839   store atomic i128 %in, ptr %p release, align 16
840   ret void
841}
842
843define void @atomic_store_relaxed(i128 %in, ptr %p) {
844; NOOUTLINE-LABEL: atomic_store_relaxed:
845; NOOUTLINE:       // %bb.0:
846; NOOUTLINE-NEXT:  .LBB16_1: // %atomicrmw.start
847; NOOUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
848; NOOUTLINE-NEXT:    ldxp xzr, x8, [x2]
849; NOOUTLINE-NEXT:    stxp w8, x0, x1, [x2]
850; NOOUTLINE-NEXT:    cbnz w8, .LBB16_1
851; NOOUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
852; NOOUTLINE-NEXT:    ret
853;
854; OUTLINE-LABEL: atomic_store_relaxed:
855; OUTLINE:       // %bb.0:
856; OUTLINE-NEXT:  .LBB16_1: // %atomicrmw.start
857; OUTLINE-NEXT:    // =>This Inner Loop Header: Depth=1
858; OUTLINE-NEXT:    ldxp xzr, x8, [x2]
859; OUTLINE-NEXT:    stxp w8, x0, x1, [x2]
860; OUTLINE-NEXT:    cbnz w8, .LBB16_1
861; OUTLINE-NEXT:  // %bb.2: // %atomicrmw.end
862; OUTLINE-NEXT:    ret
863;
864; LSE-LABEL: atomic_store_relaxed:
865; LSE:       // %bb.0:
866; LSE-NEXT:    // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
867; LSE-NEXT:    ldp x4, x5, [x2]
868; LSE-NEXT:    // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
869; LSE-NEXT:  .LBB16_1: // %atomicrmw.start
870; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
871; LSE-NEXT:    mov x6, x4
872; LSE-NEXT:    mov x7, x5
873; LSE-NEXT:    casp x6, x7, x0, x1, [x2]
874; LSE-NEXT:    cmp x7, x5
875; LSE-NEXT:    ccmp x6, x4, #0, eq
876; LSE-NEXT:    mov x4, x6
877; LSE-NEXT:    mov x5, x7
878; LSE-NEXT:    b.ne .LBB16_1
879; LSE-NEXT:  // %bb.2: // %atomicrmw.end
880; LSE-NEXT:    ret
881   store atomic i128 %in, ptr %p unordered, align 16
882   ret void
883}
884
885; Since we store the original value to ensure no tearing for the unsuccessful
886; case, the register used must not be xzr.
887define void @cmpxchg_dead(ptr %ptr, i128 %desired, i128 %new) {
888; NOOUTLINE-LABEL: cmpxchg_dead:
889; NOOUTLINE:       // %bb.0:
890; NOOUTLINE-NEXT:  .LBB17_1: // =>This Inner Loop Header: Depth=1
891; NOOUTLINE-NEXT:    ldxp x8, x9, [x0]
892; NOOUTLINE-NEXT:    cmp x8, x2
893; NOOUTLINE-NEXT:    cset w10, ne
894; NOOUTLINE-NEXT:    cmp x9, x3
895; NOOUTLINE-NEXT:    cinc w10, w10, ne
896; NOOUTLINE-NEXT:    cbz w10, .LBB17_3
897; NOOUTLINE-NEXT:  // %bb.2: // in Loop: Header=BB17_1 Depth=1
898; NOOUTLINE-NEXT:    stxp w10, x8, x9, [x0]
899; NOOUTLINE-NEXT:    cbnz w10, .LBB17_1
900; NOOUTLINE-NEXT:    b .LBB17_4
901; NOOUTLINE-NEXT:  .LBB17_3: // in Loop: Header=BB17_1 Depth=1
902; NOOUTLINE-NEXT:    stxp w10, x4, x5, [x0]
903; NOOUTLINE-NEXT:    cbnz w10, .LBB17_1
904; NOOUTLINE-NEXT:  .LBB17_4:
905; NOOUTLINE-NEXT:    ret
906;
907; OUTLINE-LABEL: cmpxchg_dead:
908; OUTLINE:       // %bb.0:
909; OUTLINE-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
910; OUTLINE-NEXT:    .cfi_def_cfa_offset 16
911; OUTLINE-NEXT:    .cfi_offset w30, -16
912; OUTLINE-NEXT:    mov x1, x3
913; OUTLINE-NEXT:    mov x8, x0
914; OUTLINE-NEXT:    mov x0, x2
915; OUTLINE-NEXT:    mov x2, x4
916; OUTLINE-NEXT:    mov x3, x5
917; OUTLINE-NEXT:    mov x4, x8
918; OUTLINE-NEXT:    bl __aarch64_cas16_relax
919; OUTLINE-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
920; OUTLINE-NEXT:    ret
921;
922; LSE-LABEL: cmpxchg_dead:
923; LSE:       // %bb.0:
924; LSE-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
925; LSE-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
926; LSE-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
927; LSE-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
928; LSE-NEXT:    casp x2, x3, x4, x5, [x0]
929; LSE-NEXT:    ret
930  cmpxchg ptr %ptr, i128 %desired, i128 %new monotonic monotonic
931  ret void
932}
933