xref: /llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll (revision 1729e6e742ba9f6f210550000ace4bec72530c2e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=aarch64-linux-gnu -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefix=NOLSE %s
3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefix=LSE %s
4; RUN: llc -mtriple=aarch64-linux-gnu -mattr=-lse,-fp-armv8 -O1 < %s | FileCheck -check-prefix=SOFTFP-NOLSE %s
5
6define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
7; NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2:
8; NOLSE:       // %bb.0:
9; NOLSE-NEXT:    fcvt s1, h0
10; NOLSE-NEXT:  .LBB0_1: // %atomicrmw.start
11; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
12; NOLSE-NEXT:    ldaxrh w8, [x0]
13; NOLSE-NEXT:    fmov s0, w8
14; NOLSE-NEXT:    fcvt s2, h0
15; NOLSE-NEXT:    fadd s2, s2, s1
16; NOLSE-NEXT:    fcvt h2, s2
17; NOLSE-NEXT:    fmov w8, s2
18; NOLSE-NEXT:    stlxrh w9, w8, [x0]
19; NOLSE-NEXT:    cbnz w9, .LBB0_1
20; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
21; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
22; NOLSE-NEXT:    ret
23;
24; LSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2:
25; LSE:       // %bb.0:
26; LSE-NEXT:    fcvt s1, h0
27; LSE-NEXT:    ldr h0, [x0]
28; LSE-NEXT:  .LBB0_1: // %atomicrmw.start
29; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
30; LSE-NEXT:    fcvt s2, h0
31; LSE-NEXT:    fmov w8, s0
32; LSE-NEXT:    mov w10, w8
33; LSE-NEXT:    fadd s2, s2, s1
34; LSE-NEXT:    fcvt h2, s2
35; LSE-NEXT:    fmov w9, s2
36; LSE-NEXT:    casalh w10, w9, [x0]
37; LSE-NEXT:    fmov s0, w10
38; LSE-NEXT:    cmp w10, w8, uxth
39; LSE-NEXT:    b.ne .LBB0_1
40; LSE-NEXT:  // %bb.2: // %atomicrmw.end
41; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
42; LSE-NEXT:    ret
43;
44; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2:
45; SOFTFP-NOLSE:       // %bb.0:
46; SOFTFP-NOLSE-NEXT:    str x30, [sp, #-48]! // 8-byte Folded Spill
47; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
48; SOFTFP-NOLSE-NEXT:    mov x19, x0
49; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
50; SOFTFP-NOLSE-NEXT:    mov w20, w1
51; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
52; SOFTFP-NOLSE-NEXT:    b .LBB0_2
53; SOFTFP-NOLSE-NEXT:  .LBB0_1: // %cmpxchg.nostore
54; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_2 Depth=1
55; SOFTFP-NOLSE-NEXT:    mov w8, wzr
56; SOFTFP-NOLSE-NEXT:    clrex
57; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB0_6
58; SOFTFP-NOLSE-NEXT:  .LBB0_2: // %atomicrmw.start
59; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
60; SOFTFP-NOLSE-NEXT:    // Child Loop BB0_3 Depth 2
61; SOFTFP-NOLSE-NEXT:    mov w22, w0
62; SOFTFP-NOLSE-NEXT:    and w0, w20, #0xffff
63; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
64; SOFTFP-NOLSE-NEXT:    mov w21, w0
65; SOFTFP-NOLSE-NEXT:    and w0, w22, #0xffff
66; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
67; SOFTFP-NOLSE-NEXT:    mov w1, w21
68; SOFTFP-NOLSE-NEXT:    bl __addsf3
69; SOFTFP-NOLSE-NEXT:    bl __gnu_f2h_ieee
70; SOFTFP-NOLSE-NEXT:    mov w8, w0
71; SOFTFP-NOLSE-NEXT:  .LBB0_3: // %cmpxchg.start
72; SOFTFP-NOLSE-NEXT:    // Parent Loop BB0_2 Depth=1
73; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
74; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
75; SOFTFP-NOLSE-NEXT:    cmp w0, w22, uxth
76; SOFTFP-NOLSE-NEXT:    b.ne .LBB0_1
77; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
78; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_3 Depth=2
79; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
80; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB0_3
81; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB0_2 Depth=1
82; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
83; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_2
84; SOFTFP-NOLSE-NEXT:  .LBB0_6: // %atomicrmw.end
85; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
86; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
87; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
88; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
89; SOFTFP-NOLSE-NEXT:    ret
90  %res = atomicrmw fadd ptr %ptr, half %value seq_cst, align 2
91  ret half %res
92}
93
94define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
95; NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align4:
96; NOLSE:       // %bb.0:
97; NOLSE-NEXT:    fcvt s1, h0
98; NOLSE-NEXT:  .LBB1_1: // %atomicrmw.start
99; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
100; NOLSE-NEXT:    ldaxrh w8, [x0]
101; NOLSE-NEXT:    fmov s0, w8
102; NOLSE-NEXT:    fcvt s2, h0
103; NOLSE-NEXT:    fadd s2, s2, s1
104; NOLSE-NEXT:    fcvt h2, s2
105; NOLSE-NEXT:    fmov w8, s2
106; NOLSE-NEXT:    stlxrh w9, w8, [x0]
107; NOLSE-NEXT:    cbnz w9, .LBB1_1
108; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
109; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
110; NOLSE-NEXT:    ret
111;
112; LSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align4:
113; LSE:       // %bb.0:
114; LSE-NEXT:    fcvt s1, h0
115; LSE-NEXT:    ldr h0, [x0]
116; LSE-NEXT:  .LBB1_1: // %atomicrmw.start
117; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
118; LSE-NEXT:    fcvt s2, h0
119; LSE-NEXT:    fmov w8, s0
120; LSE-NEXT:    mov w10, w8
121; LSE-NEXT:    fadd s2, s2, s1
122; LSE-NEXT:    fcvt h2, s2
123; LSE-NEXT:    fmov w9, s2
124; LSE-NEXT:    casalh w10, w9, [x0]
125; LSE-NEXT:    fmov s0, w10
126; LSE-NEXT:    cmp w10, w8, uxth
127; LSE-NEXT:    b.ne .LBB1_1
128; LSE-NEXT:  // %bb.2: // %atomicrmw.end
129; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
130; LSE-NEXT:    ret
131;
132; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align4:
133; SOFTFP-NOLSE:       // %bb.0:
134; SOFTFP-NOLSE-NEXT:    str x30, [sp, #-48]! // 8-byte Folded Spill
135; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
136; SOFTFP-NOLSE-NEXT:    mov x19, x0
137; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
138; SOFTFP-NOLSE-NEXT:    mov w20, w1
139; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
140; SOFTFP-NOLSE-NEXT:    b .LBB1_2
141; SOFTFP-NOLSE-NEXT:  .LBB1_1: // %cmpxchg.nostore
142; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_2 Depth=1
143; SOFTFP-NOLSE-NEXT:    mov w8, wzr
144; SOFTFP-NOLSE-NEXT:    clrex
145; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB1_6
146; SOFTFP-NOLSE-NEXT:  .LBB1_2: // %atomicrmw.start
147; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
148; SOFTFP-NOLSE-NEXT:    // Child Loop BB1_3 Depth 2
149; SOFTFP-NOLSE-NEXT:    mov w22, w0
150; SOFTFP-NOLSE-NEXT:    and w0, w20, #0xffff
151; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
152; SOFTFP-NOLSE-NEXT:    mov w21, w0
153; SOFTFP-NOLSE-NEXT:    and w0, w22, #0xffff
154; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
155; SOFTFP-NOLSE-NEXT:    mov w1, w21
156; SOFTFP-NOLSE-NEXT:    bl __addsf3
157; SOFTFP-NOLSE-NEXT:    bl __gnu_f2h_ieee
158; SOFTFP-NOLSE-NEXT:    mov w8, w0
159; SOFTFP-NOLSE-NEXT:  .LBB1_3: // %cmpxchg.start
160; SOFTFP-NOLSE-NEXT:    // Parent Loop BB1_2 Depth=1
161; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
162; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
163; SOFTFP-NOLSE-NEXT:    cmp w0, w22, uxth
164; SOFTFP-NOLSE-NEXT:    b.ne .LBB1_1
165; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
166; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_3 Depth=2
167; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
168; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB1_3
169; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB1_2 Depth=1
170; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
171; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_2
172; SOFTFP-NOLSE-NEXT:  .LBB1_6: // %atomicrmw.end
173; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
174; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
175; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
176; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
177; SOFTFP-NOLSE-NEXT:    ret
178  %res = atomicrmw fadd ptr %ptr, half %value seq_cst, align 4
179  ret half %res
180}
181
182define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value) #0 {
183; NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2:
184; NOLSE:       // %bb.0:
185; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $d0
186; NOLSE-NEXT:    shll v1.4s, v0.4h, #16
187; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
188; NOLSE-NEXT:  .LBB2_1: // %atomicrmw.start
189; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
190; NOLSE-NEXT:    ldaxrh w9, [x0]
191; NOLSE-NEXT:    fmov s0, w9
192; NOLSE-NEXT:    shll v2.4s, v0.4h, #16
193; NOLSE-NEXT:    fadd s2, s2, s1
194; NOLSE-NEXT:    fmov w9, s2
195; NOLSE-NEXT:    ubfx w10, w9, #16, #1
196; NOLSE-NEXT:    add w9, w9, w8
197; NOLSE-NEXT:    add w9, w10, w9
198; NOLSE-NEXT:    lsr w9, w9, #16
199; NOLSE-NEXT:    stlxrh w10, w9, [x0]
200; NOLSE-NEXT:    cbnz w10, .LBB2_1
201; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
202; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $d0
203; NOLSE-NEXT:    ret
204;
205; LSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2:
206; LSE:       // %bb.0:
207; LSE-NEXT:    // kill: def $h0 killed $h0 def $d0
208; LSE-NEXT:    shll v1.4s, v0.4h, #16
209; LSE-NEXT:    mov w8, #32767 // =0x7fff
210; LSE-NEXT:    ldr h0, [x0]
211; LSE-NEXT:  .LBB2_1: // %atomicrmw.start
212; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
213; LSE-NEXT:    shll v2.4s, v0.4h, #16
214; LSE-NEXT:    fadd s2, s2, s1
215; LSE-NEXT:    fmov w9, s2
216; LSE-NEXT:    ubfx w10, w9, #16, #1
217; LSE-NEXT:    add w9, w9, w8
218; LSE-NEXT:    add w9, w10, w9
219; LSE-NEXT:    lsr w9, w9, #16
220; LSE-NEXT:    fmov s2, w9
221; LSE-NEXT:    fmov w9, s0
222; LSE-NEXT:    fmov w10, s2
223; LSE-NEXT:    mov w11, w9
224; LSE-NEXT:    casalh w11, w10, [x0]
225; LSE-NEXT:    fmov s0, w11
226; LSE-NEXT:    cmp w11, w9, uxth
227; LSE-NEXT:    b.ne .LBB2_1
228; LSE-NEXT:  // %bb.2: // %atomicrmw.end
229; LSE-NEXT:    // kill: def $h0 killed $h0 killed $d0
230; LSE-NEXT:    ret
231;
232; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2:
233; SOFTFP-NOLSE:       // %bb.0:
234; SOFTFP-NOLSE-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
235; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
236; SOFTFP-NOLSE-NEXT:    mov x19, x0
237; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
238; SOFTFP-NOLSE-NEXT:    lsl w20, w1, #16
239; SOFTFP-NOLSE-NEXT:    b .LBB2_2
240; SOFTFP-NOLSE-NEXT:  .LBB2_1: // %cmpxchg.nostore
241; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_2 Depth=1
242; SOFTFP-NOLSE-NEXT:    mov w8, wzr
243; SOFTFP-NOLSE-NEXT:    clrex
244; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB2_6
245; SOFTFP-NOLSE-NEXT:  .LBB2_2: // %atomicrmw.start
246; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
247; SOFTFP-NOLSE-NEXT:    // Child Loop BB2_3 Depth 2
248; SOFTFP-NOLSE-NEXT:    mov w21, w0
249; SOFTFP-NOLSE-NEXT:    lsl w0, w0, #16
250; SOFTFP-NOLSE-NEXT:    mov w1, w20
251; SOFTFP-NOLSE-NEXT:    bl __addsf3
252; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
253; SOFTFP-NOLSE-NEXT:    mov w8, w0
254; SOFTFP-NOLSE-NEXT:  .LBB2_3: // %cmpxchg.start
255; SOFTFP-NOLSE-NEXT:    // Parent Loop BB2_2 Depth=1
256; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
257; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
258; SOFTFP-NOLSE-NEXT:    cmp w0, w21, uxth
259; SOFTFP-NOLSE-NEXT:    b.ne .LBB2_1
260; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
261; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_3 Depth=2
262; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
263; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB2_3
264; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB2_2 Depth=1
265; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
266; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_2
267; SOFTFP-NOLSE-NEXT:  .LBB2_6: // %atomicrmw.end
268; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
269; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
270; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
271; SOFTFP-NOLSE-NEXT:    ret
272  %res = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 2
273  ret bfloat %res
274}
275
276define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value) #0 {
277; NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4:
278; NOLSE:       // %bb.0:
279; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $d0
280; NOLSE-NEXT:    shll v1.4s, v0.4h, #16
281; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
282; NOLSE-NEXT:  .LBB3_1: // %atomicrmw.start
283; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
284; NOLSE-NEXT:    ldaxrh w9, [x0]
285; NOLSE-NEXT:    fmov s0, w9
286; NOLSE-NEXT:    shll v2.4s, v0.4h, #16
287; NOLSE-NEXT:    fadd s2, s2, s1
288; NOLSE-NEXT:    fmov w9, s2
289; NOLSE-NEXT:    ubfx w10, w9, #16, #1
290; NOLSE-NEXT:    add w9, w9, w8
291; NOLSE-NEXT:    add w9, w10, w9
292; NOLSE-NEXT:    lsr w9, w9, #16
293; NOLSE-NEXT:    stlxrh w10, w9, [x0]
294; NOLSE-NEXT:    cbnz w10, .LBB3_1
295; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
296; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $d0
297; NOLSE-NEXT:    ret
298;
299; LSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4:
300; LSE:       // %bb.0:
301; LSE-NEXT:    // kill: def $h0 killed $h0 def $d0
302; LSE-NEXT:    shll v1.4s, v0.4h, #16
303; LSE-NEXT:    mov w8, #32767 // =0x7fff
304; LSE-NEXT:    ldr h0, [x0]
305; LSE-NEXT:  .LBB3_1: // %atomicrmw.start
306; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
307; LSE-NEXT:    shll v2.4s, v0.4h, #16
308; LSE-NEXT:    fadd s2, s2, s1
309; LSE-NEXT:    fmov w9, s2
310; LSE-NEXT:    ubfx w10, w9, #16, #1
311; LSE-NEXT:    add w9, w9, w8
312; LSE-NEXT:    add w9, w10, w9
313; LSE-NEXT:    lsr w9, w9, #16
314; LSE-NEXT:    fmov s2, w9
315; LSE-NEXT:    fmov w9, s0
316; LSE-NEXT:    fmov w10, s2
317; LSE-NEXT:    mov w11, w9
318; LSE-NEXT:    casalh w11, w10, [x0]
319; LSE-NEXT:    fmov s0, w11
320; LSE-NEXT:    cmp w11, w9, uxth
321; LSE-NEXT:    b.ne .LBB3_1
322; LSE-NEXT:  // %bb.2: // %atomicrmw.end
323; LSE-NEXT:    // kill: def $h0 killed $h0 killed $d0
324; LSE-NEXT:    ret
325;
326; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4:
327; SOFTFP-NOLSE:       // %bb.0:
328; SOFTFP-NOLSE-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
329; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
330; SOFTFP-NOLSE-NEXT:    mov x19, x0
331; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
332; SOFTFP-NOLSE-NEXT:    lsl w20, w1, #16
333; SOFTFP-NOLSE-NEXT:    b .LBB3_2
334; SOFTFP-NOLSE-NEXT:  .LBB3_1: // %cmpxchg.nostore
335; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_2 Depth=1
336; SOFTFP-NOLSE-NEXT:    mov w8, wzr
337; SOFTFP-NOLSE-NEXT:    clrex
338; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB3_6
339; SOFTFP-NOLSE-NEXT:  .LBB3_2: // %atomicrmw.start
340; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
341; SOFTFP-NOLSE-NEXT:    // Child Loop BB3_3 Depth 2
342; SOFTFP-NOLSE-NEXT:    mov w21, w0
343; SOFTFP-NOLSE-NEXT:    lsl w0, w0, #16
344; SOFTFP-NOLSE-NEXT:    mov w1, w20
345; SOFTFP-NOLSE-NEXT:    bl __addsf3
346; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
347; SOFTFP-NOLSE-NEXT:    mov w8, w0
348; SOFTFP-NOLSE-NEXT:  .LBB3_3: // %cmpxchg.start
349; SOFTFP-NOLSE-NEXT:    // Parent Loop BB3_2 Depth=1
350; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
351; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
352; SOFTFP-NOLSE-NEXT:    cmp w0, w21, uxth
353; SOFTFP-NOLSE-NEXT:    b.ne .LBB3_1
354; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
355; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_3 Depth=2
356; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
357; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB3_3
358; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB3_2 Depth=1
359; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
360; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_2
361; SOFTFP-NOLSE-NEXT:  .LBB3_6: // %atomicrmw.end
362; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
363; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
364; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
365; SOFTFP-NOLSE-NEXT:    ret
366  %res = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 4
367  ret bfloat %res
368}
369
370define float @test_atomicrmw_fadd_f32_seq_cst_align4(ptr %ptr, float %value) #0 {
371; NOLSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align4:
372; NOLSE:       // %bb.0:
373; NOLSE-NEXT:  .LBB4_1: // %atomicrmw.start
374; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
375; NOLSE-NEXT:    ldaxr w8, [x0]
376; NOLSE-NEXT:    fmov s1, w8
377; NOLSE-NEXT:    fadd s2, s1, s0
378; NOLSE-NEXT:    fmov w8, s2
379; NOLSE-NEXT:    stlxr w9, w8, [x0]
380; NOLSE-NEXT:    cbnz w9, .LBB4_1
381; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
382; NOLSE-NEXT:    fmov s0, s1
383; NOLSE-NEXT:    ret
384;
385; LSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align4:
386; LSE:       // %bb.0:
387; LSE-NEXT:    ldr s1, [x0]
388; LSE-NEXT:  .LBB4_1: // %atomicrmw.start
389; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
390; LSE-NEXT:    fadd s2, s1, s0
391; LSE-NEXT:    fmov w8, s1
392; LSE-NEXT:    mov w10, w8
393; LSE-NEXT:    fmov w9, s2
394; LSE-NEXT:    casal w10, w9, [x0]
395; LSE-NEXT:    fmov s1, w10
396; LSE-NEXT:    cmp w10, w8
397; LSE-NEXT:    b.ne .LBB4_1
398; LSE-NEXT:  // %bb.2: // %atomicrmw.end
399; LSE-NEXT:    fmov s0, s1
400; LSE-NEXT:    ret
401;
402; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align4:
403; SOFTFP-NOLSE:       // %bb.0:
404; SOFTFP-NOLSE-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
405; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
406; SOFTFP-NOLSE-NEXT:    mov x19, x0
407; SOFTFP-NOLSE-NEXT:    ldr w0, [x0]
408; SOFTFP-NOLSE-NEXT:    mov w20, w1
409; SOFTFP-NOLSE-NEXT:    b .LBB4_2
410; SOFTFP-NOLSE-NEXT:  .LBB4_1: // %cmpxchg.nostore
411; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_2 Depth=1
412; SOFTFP-NOLSE-NEXT:    mov w8, wzr
413; SOFTFP-NOLSE-NEXT:    clrex
414; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB4_6
415; SOFTFP-NOLSE-NEXT:  .LBB4_2: // %atomicrmw.start
416; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
417; SOFTFP-NOLSE-NEXT:    // Child Loop BB4_3 Depth 2
418; SOFTFP-NOLSE-NEXT:    mov w1, w20
419; SOFTFP-NOLSE-NEXT:    mov w21, w0
420; SOFTFP-NOLSE-NEXT:    bl __addsf3
421; SOFTFP-NOLSE-NEXT:    mov w8, w0
422; SOFTFP-NOLSE-NEXT:  .LBB4_3: // %cmpxchg.start
423; SOFTFP-NOLSE-NEXT:    // Parent Loop BB4_2 Depth=1
424; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
425; SOFTFP-NOLSE-NEXT:    ldaxr w0, [x19]
426; SOFTFP-NOLSE-NEXT:    cmp w0, w21
427; SOFTFP-NOLSE-NEXT:    b.ne .LBB4_1
428; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
429; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_3 Depth=2
430; SOFTFP-NOLSE-NEXT:    stlxr w9, w8, [x19]
431; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB4_3
432; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB4_2 Depth=1
433; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
434; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_2
435; SOFTFP-NOLSE-NEXT:  .LBB4_6: // %atomicrmw.end
436; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
437; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
438; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
439; SOFTFP-NOLSE-NEXT:    ret
440  %res = atomicrmw fadd ptr %ptr, float %value seq_cst, align 4
441  ret float %res
442}
443
444define double @test_atomicrmw_fadd_f32_seq_cst_align8(ptr %ptr, double %value) #0 {
445; NOLSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align8:
446; NOLSE:       // %bb.0:
447; NOLSE-NEXT:  .LBB5_1: // %atomicrmw.start
448; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
449; NOLSE-NEXT:    ldaxr x8, [x0]
450; NOLSE-NEXT:    fmov d1, x8
451; NOLSE-NEXT:    fadd d2, d1, d0
452; NOLSE-NEXT:    fmov x8, d2
453; NOLSE-NEXT:    stlxr w9, x8, [x0]
454; NOLSE-NEXT:    cbnz w9, .LBB5_1
455; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
456; NOLSE-NEXT:    fmov d0, d1
457; NOLSE-NEXT:    ret
458;
459; LSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align8:
460; LSE:       // %bb.0:
461; LSE-NEXT:    ldr d1, [x0]
462; LSE-NEXT:  .LBB5_1: // %atomicrmw.start
463; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
464; LSE-NEXT:    fadd d2, d1, d0
465; LSE-NEXT:    fmov x8, d1
466; LSE-NEXT:    mov x10, x8
467; LSE-NEXT:    fmov x9, d2
468; LSE-NEXT:    casal x10, x9, [x0]
469; LSE-NEXT:    fmov d1, x10
470; LSE-NEXT:    cmp x10, x8
471; LSE-NEXT:    b.ne .LBB5_1
472; LSE-NEXT:  // %bb.2: // %atomicrmw.end
473; LSE-NEXT:    fmov d0, d1
474; LSE-NEXT:    ret
475;
476; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align8:
477; SOFTFP-NOLSE:       // %bb.0:
478; SOFTFP-NOLSE-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
479; SOFTFP-NOLSE-NEXT:    ldr x21, [x0]
480; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
481; SOFTFP-NOLSE-NEXT:    mov x19, x0
482; SOFTFP-NOLSE-NEXT:    mov x20, x1
483; SOFTFP-NOLSE-NEXT:    b .LBB5_2
484; SOFTFP-NOLSE-NEXT:  .LBB5_1: // %cmpxchg.nostore
485; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=1
486; SOFTFP-NOLSE-NEXT:    mov w9, wzr
487; SOFTFP-NOLSE-NEXT:    clrex
488; SOFTFP-NOLSE-NEXT:    mov x21, x8
489; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_6
490; SOFTFP-NOLSE-NEXT:  .LBB5_2: // %atomicrmw.start
491; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
492; SOFTFP-NOLSE-NEXT:    // Child Loop BB5_3 Depth 2
493; SOFTFP-NOLSE-NEXT:    mov x0, x21
494; SOFTFP-NOLSE-NEXT:    mov x1, x20
495; SOFTFP-NOLSE-NEXT:    bl __adddf3
496; SOFTFP-NOLSE-NEXT:  .LBB5_3: // %cmpxchg.start
497; SOFTFP-NOLSE-NEXT:    // Parent Loop BB5_2 Depth=1
498; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
499; SOFTFP-NOLSE-NEXT:    ldaxr x8, [x19]
500; SOFTFP-NOLSE-NEXT:    cmp x8, x21
501; SOFTFP-NOLSE-NEXT:    b.ne .LBB5_1
502; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
503; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_3 Depth=2
504; SOFTFP-NOLSE-NEXT:    stlxr w9, x0, [x19]
505; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_3
506; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB5_2 Depth=1
507; SOFTFP-NOLSE-NEXT:    mov w9, #1 // =0x1
508; SOFTFP-NOLSE-NEXT:    mov x21, x8
509; SOFTFP-NOLSE-NEXT:    cbz w9, .LBB5_2
510; SOFTFP-NOLSE-NEXT:  .LBB5_6: // %atomicrmw.end
511; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
512; SOFTFP-NOLSE-NEXT:    mov x0, x21
513; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
514; SOFTFP-NOLSE-NEXT:    ret
515  %res = atomicrmw fadd ptr %ptr, double %value seq_cst, align 8
516  ret double %res
517}
518
519define fp128 @test_atomicrmw_fadd_fp128_seq_cst_align16(ptr %ptr, fp128 %value) #0 {
520; NOLSE-LABEL: test_atomicrmw_fadd_fp128_seq_cst_align16:
521; NOLSE:       // %bb.0:
522; NOLSE-NEXT:    sub sp, sp, #96
523; NOLSE-NEXT:    ldr q1, [x0]
524; NOLSE-NEXT:    stp x30, x19, [sp, #80] // 16-byte Folded Spill
525; NOLSE-NEXT:    mov x19, x0
526; NOLSE-NEXT:    str q0, [sp] // 16-byte Folded Spill
527; NOLSE-NEXT:    b .LBB6_2
528; NOLSE-NEXT:  .LBB6_1: // %atomicrmw.start
529; NOLSE-NEXT:    // in Loop: Header=BB6_2 Depth=1
530; NOLSE-NEXT:    stp x12, x13, [sp, #32]
531; NOLSE-NEXT:    cmp x13, x10
532; NOLSE-NEXT:    ldr q1, [sp, #32]
533; NOLSE-NEXT:    ccmp x12, x11, #0, eq
534; NOLSE-NEXT:    b.eq .LBB6_6
535; NOLSE-NEXT:  .LBB6_2: // %atomicrmw.start
536; NOLSE-NEXT:    // =>This Loop Header: Depth=1
537; NOLSE-NEXT:    // Child Loop BB6_3 Depth 2
538; NOLSE-NEXT:    mov v0.16b, v1.16b
539; NOLSE-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
540; NOLSE-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
541; NOLSE-NEXT:    bl __addtf3
542; NOLSE-NEXT:    str q0, [sp, #48]
543; NOLSE-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
544; NOLSE-NEXT:    ldp x9, x8, [sp, #48]
545; NOLSE-NEXT:    str q0, [sp, #64]
546; NOLSE-NEXT:    ldp x11, x10, [sp, #64]
547; NOLSE-NEXT:  .LBB6_3: // %atomicrmw.start
548; NOLSE-NEXT:    // Parent Loop BB6_2 Depth=1
549; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
550; NOLSE-NEXT:    ldaxp x12, x13, [x19]
551; NOLSE-NEXT:    cmp x12, x11
552; NOLSE-NEXT:    cset w14, ne
553; NOLSE-NEXT:    cmp x13, x10
554; NOLSE-NEXT:    cinc w14, w14, ne
555; NOLSE-NEXT:    cbz w14, .LBB6_5
556; NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
557; NOLSE-NEXT:    // in Loop: Header=BB6_3 Depth=2
558; NOLSE-NEXT:    stlxp w14, x12, x13, [x19]
559; NOLSE-NEXT:    cbnz w14, .LBB6_3
560; NOLSE-NEXT:    b .LBB6_1
561; NOLSE-NEXT:  .LBB6_5: // %atomicrmw.start
562; NOLSE-NEXT:    // in Loop: Header=BB6_3 Depth=2
563; NOLSE-NEXT:    stlxp w14, x9, x8, [x19]
564; NOLSE-NEXT:    cbnz w14, .LBB6_3
565; NOLSE-NEXT:    b .LBB6_1
566; NOLSE-NEXT:  .LBB6_6: // %atomicrmw.end
567; NOLSE-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
568; NOLSE-NEXT:    mov v0.16b, v1.16b
569; NOLSE-NEXT:    add sp, sp, #96
570; NOLSE-NEXT:    ret
571;
572; LSE-LABEL: test_atomicrmw_fadd_fp128_seq_cst_align16:
573; LSE:       // %bb.0:
574; LSE-NEXT:    sub sp, sp, #96
575; LSE-NEXT:    ldr q1, [x0]
576; LSE-NEXT:    stp x30, x19, [sp, #80] // 16-byte Folded Spill
577; LSE-NEXT:    mov x19, x0
578; LSE-NEXT:    str q0, [sp] // 16-byte Folded Spill
579; LSE-NEXT:  .LBB6_1: // %atomicrmw.start
580; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
581; LSE-NEXT:    mov v0.16b, v1.16b
582; LSE-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
583; LSE-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
584; LSE-NEXT:    bl __addtf3
585; LSE-NEXT:    str q0, [sp, #48]
586; LSE-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
587; LSE-NEXT:    ldp x0, x1, [sp, #48]
588; LSE-NEXT:    str q0, [sp, #64]
589; LSE-NEXT:    ldp x2, x3, [sp, #64]
590; LSE-NEXT:    mov x4, x2
591; LSE-NEXT:    mov x5, x3
592; LSE-NEXT:    caspal x4, x5, x0, x1, [x19]
593; LSE-NEXT:    stp x4, x5, [sp, #32]
594; LSE-NEXT:    cmp x5, x3
595; LSE-NEXT:    ldr q1, [sp, #32]
596; LSE-NEXT:    ccmp x4, x2, #0, eq
597; LSE-NEXT:    b.ne .LBB6_1
598; LSE-NEXT:  // %bb.2: // %atomicrmw.end
599; LSE-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
600; LSE-NEXT:    mov v0.16b, v1.16b
601; LSE-NEXT:    add sp, sp, #96
602; LSE-NEXT:    ret
603;
604; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_fp128_seq_cst_align16:
605; SOFTFP-NOLSE:       // %bb.0:
606; SOFTFP-NOLSE-NEXT:    stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
607; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
608; SOFTFP-NOLSE-NEXT:    mov x20, x0
609; SOFTFP-NOLSE-NEXT:    mov x19, x3
610; SOFTFP-NOLSE-NEXT:    ldp x0, x1, [x0]
611; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
612; SOFTFP-NOLSE-NEXT:    mov x21, x2
613; SOFTFP-NOLSE-NEXT:    b .LBB6_2
614; SOFTFP-NOLSE-NEXT:  .LBB6_1: // %atomicrmw.start
615; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB6_2 Depth=1
616; SOFTFP-NOLSE-NEXT:    cmp x1, x22
617; SOFTFP-NOLSE-NEXT:    ccmp x0, x23, #0, eq
618; SOFTFP-NOLSE-NEXT:    b.eq .LBB6_6
619; SOFTFP-NOLSE-NEXT:  .LBB6_2: // %atomicrmw.start
620; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
621; SOFTFP-NOLSE-NEXT:    // Child Loop BB6_3 Depth 2
622; SOFTFP-NOLSE-NEXT:    mov x2, x21
623; SOFTFP-NOLSE-NEXT:    mov x3, x19
624; SOFTFP-NOLSE-NEXT:    mov x22, x1
625; SOFTFP-NOLSE-NEXT:    mov x23, x0
626; SOFTFP-NOLSE-NEXT:    bl __addtf3
627; SOFTFP-NOLSE-NEXT:    mov x8, x0
628; SOFTFP-NOLSE-NEXT:    mov x9, x1
629; SOFTFP-NOLSE-NEXT:  .LBB6_3: // %atomicrmw.start
630; SOFTFP-NOLSE-NEXT:    // Parent Loop BB6_2 Depth=1
631; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
632; SOFTFP-NOLSE-NEXT:    ldaxp x0, x1, [x20]
633; SOFTFP-NOLSE-NEXT:    cmp x0, x23
634; SOFTFP-NOLSE-NEXT:    cset w10, ne
635; SOFTFP-NOLSE-NEXT:    cmp x1, x22
636; SOFTFP-NOLSE-NEXT:    cinc w10, w10, ne
637; SOFTFP-NOLSE-NEXT:    cbz w10, .LBB6_5
638; SOFTFP-NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
639; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB6_3 Depth=2
640; SOFTFP-NOLSE-NEXT:    stlxp w10, x0, x1, [x20]
641; SOFTFP-NOLSE-NEXT:    cbnz w10, .LBB6_3
642; SOFTFP-NOLSE-NEXT:    b .LBB6_1
643; SOFTFP-NOLSE-NEXT:  .LBB6_5: // %atomicrmw.start
644; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB6_3 Depth=2
645; SOFTFP-NOLSE-NEXT:    stlxp w10, x8, x9, [x20]
646; SOFTFP-NOLSE-NEXT:    cbnz w10, .LBB6_3
647; SOFTFP-NOLSE-NEXT:    b .LBB6_1
648; SOFTFP-NOLSE-NEXT:  .LBB6_6: // %atomicrmw.end
649; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
650; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
651; SOFTFP-NOLSE-NEXT:    ldp x30, x23, [sp], #48 // 16-byte Folded Reload
652; SOFTFP-NOLSE-NEXT:    ret
653  %res = atomicrmw fadd ptr %ptr, fp128 %value seq_cst, align 16
654  ret fp128 %res
655}
656
657define <2 x half> @test_atomicrmw_fadd_v2f16_seq_cst_align4(ptr %ptr, <2 x half> %value) #0 {
658; NOLSE-LABEL: test_atomicrmw_fadd_v2f16_seq_cst_align4:
659; NOLSE:       // %bb.0:
660; NOLSE-NEXT:    fcvtl v0.4s, v0.4h
661; NOLSE-NEXT:  .LBB7_1: // %atomicrmw.start
662; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
663; NOLSE-NEXT:    ldaxr w8, [x0]
664; NOLSE-NEXT:    fmov s1, w8
665; NOLSE-NEXT:    fcvtl v1.4s, v1.4h
666; NOLSE-NEXT:    fadd v1.4s, v1.4s, v0.4s
667; NOLSE-NEXT:    fcvtn v1.4h, v1.4s
668; NOLSE-NEXT:    fmov w9, s1
669; NOLSE-NEXT:    stlxr w10, w9, [x0]
670; NOLSE-NEXT:    cbnz w10, .LBB7_1
671; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
672; NOLSE-NEXT:    fmov d0, x8
673; NOLSE-NEXT:    ret
674;
675; LSE-LABEL: test_atomicrmw_fadd_v2f16_seq_cst_align4:
676; LSE:       // %bb.0:
677; LSE-NEXT:    fcvtl v1.4s, v0.4h
678; LSE-NEXT:    ldr s0, [x0]
679; LSE-NEXT:  .LBB7_1: // %atomicrmw.start
680; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
681; LSE-NEXT:    fcvtl v2.4s, v0.4h
682; LSE-NEXT:    fmov w8, s0
683; LSE-NEXT:    mov w10, w8
684; LSE-NEXT:    fadd v2.4s, v2.4s, v1.4s
685; LSE-NEXT:    fcvtn v2.4h, v2.4s
686; LSE-NEXT:    fmov w9, s2
687; LSE-NEXT:    casal w10, w9, [x0]
688; LSE-NEXT:    fmov s0, w10
689; LSE-NEXT:    cmp w10, w8
690; LSE-NEXT:    b.ne .LBB7_1
691; LSE-NEXT:  // %bb.2: // %atomicrmw.end
692; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
693; LSE-NEXT:    ret
694;
695; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_v2f16_seq_cst_align4:
696; SOFTFP-NOLSE:       // %bb.0:
697; SOFTFP-NOLSE-NEXT:    stp x30, x25, [sp, #-64]! // 16-byte Folded Spill
698; SOFTFP-NOLSE-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
699; SOFTFP-NOLSE-NEXT:    ldrh w23, [x0, #2]
700; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
701; SOFTFP-NOLSE-NEXT:    ldrh w22, [x0]
702; SOFTFP-NOLSE-NEXT:    mov w21, w1
703; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
704; SOFTFP-NOLSE-NEXT:    mov w19, w2
705; SOFTFP-NOLSE-NEXT:    mov x20, x0
706; SOFTFP-NOLSE-NEXT:    b .LBB7_2
707; SOFTFP-NOLSE-NEXT:  .LBB7_1: // %cmpxchg.nostore
708; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_2 Depth=1
709; SOFTFP-NOLSE-NEXT:    mov w8, wzr
710; SOFTFP-NOLSE-NEXT:    clrex
711; SOFTFP-NOLSE-NEXT:    lsr w23, w22, #16
712; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB7_6
713; SOFTFP-NOLSE-NEXT:  .LBB7_2: // %atomicrmw.start
714; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
715; SOFTFP-NOLSE-NEXT:    // Child Loop BB7_3 Depth 2
716; SOFTFP-NOLSE-NEXT:    and w0, w19, #0xffff
717; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
718; SOFTFP-NOLSE-NEXT:    mov w24, w0
719; SOFTFP-NOLSE-NEXT:    and w0, w23, #0xffff
720; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
721; SOFTFP-NOLSE-NEXT:    mov w1, w24
722; SOFTFP-NOLSE-NEXT:    bl __addsf3
723; SOFTFP-NOLSE-NEXT:    bl __gnu_f2h_ieee
724; SOFTFP-NOLSE-NEXT:    mov w24, w0
725; SOFTFP-NOLSE-NEXT:    and w0, w21, #0xffff
726; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
727; SOFTFP-NOLSE-NEXT:    mov w25, w0
728; SOFTFP-NOLSE-NEXT:    and w0, w22, #0xffff
729; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
730; SOFTFP-NOLSE-NEXT:    mov w1, w25
731; SOFTFP-NOLSE-NEXT:    bl __addsf3
732; SOFTFP-NOLSE-NEXT:    bl __gnu_f2h_ieee
733; SOFTFP-NOLSE-NEXT:    mov w8, w22
734; SOFTFP-NOLSE-NEXT:    bfi w0, w24, #16, #16
735; SOFTFP-NOLSE-NEXT:    bfi w8, w23, #16, #16
736; SOFTFP-NOLSE-NEXT:  .LBB7_3: // %cmpxchg.start
737; SOFTFP-NOLSE-NEXT:    // Parent Loop BB7_2 Depth=1
738; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
739; SOFTFP-NOLSE-NEXT:    ldaxr w22, [x20]
740; SOFTFP-NOLSE-NEXT:    cmp w22, w8
741; SOFTFP-NOLSE-NEXT:    b.ne .LBB7_1
742; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
743; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_3 Depth=2
744; SOFTFP-NOLSE-NEXT:    stlxr w9, w0, [x20]
745; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB7_3
746; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB7_2 Depth=1
747; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
748; SOFTFP-NOLSE-NEXT:    lsr w23, w22, #16
749; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB7_2
750; SOFTFP-NOLSE-NEXT:  .LBB7_6: // %atomicrmw.end
751; SOFTFP-NOLSE-NEXT:    mov w0, w22
752; SOFTFP-NOLSE-NEXT:    mov w1, w23
753; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
754; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
755; SOFTFP-NOLSE-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
756; SOFTFP-NOLSE-NEXT:    ldp x30, x25, [sp], #64 // 16-byte Folded Reload
757; SOFTFP-NOLSE-NEXT:    ret
758  %res = atomicrmw fadd ptr %ptr, <2 x half> %value seq_cst, align 4
759  ret <2 x half> %res
760}
761
762define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_seq_cst_align4(ptr %ptr, <2 x bfloat> %value) #0 {
763; NOLSE-LABEL: test_atomicrmw_fadd_v2bf16_seq_cst_align4:
764; NOLSE:       // %bb.0:
765; NOLSE-NEXT:    movi v1.4s, #1
766; NOLSE-NEXT:    movi v2.4s, #127, msl #8
767; NOLSE-NEXT:    shll v0.4s, v0.4h, #16
768; NOLSE-NEXT:  .LBB8_1: // %atomicrmw.start
769; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
770; NOLSE-NEXT:    ldaxr w8, [x0]
771; NOLSE-NEXT:    fmov s3, w8
772; NOLSE-NEXT:    shll v3.4s, v3.4h, #16
773; NOLSE-NEXT:    fadd v3.4s, v3.4s, v0.4s
774; NOLSE-NEXT:    ushr v4.4s, v3.4s, #16
775; NOLSE-NEXT:    and v4.16b, v4.16b, v1.16b
776; NOLSE-NEXT:    add v3.4s, v4.4s, v3.4s
777; NOLSE-NEXT:    addhn v3.4h, v3.4s, v2.4s
778; NOLSE-NEXT:    fmov w9, s3
779; NOLSE-NEXT:    stlxr w10, w9, [x0]
780; NOLSE-NEXT:    cbnz w10, .LBB8_1
781; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
782; NOLSE-NEXT:    fmov d0, x8
783; NOLSE-NEXT:    ret
784;
785; LSE-LABEL: test_atomicrmw_fadd_v2bf16_seq_cst_align4:
786; LSE:       // %bb.0:
787; LSE-NEXT:    movi v1.4s, #1
788; LSE-NEXT:    movi v2.4s, #127, msl #8
789; LSE-NEXT:    shll v3.4s, v0.4h, #16
790; LSE-NEXT:    ldr s0, [x0]
791; LSE-NEXT:  .LBB8_1: // %atomicrmw.start
792; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
793; LSE-NEXT:    shll v4.4s, v0.4h, #16
794; LSE-NEXT:    fmov w8, s0
795; LSE-NEXT:    fadd v4.4s, v4.4s, v3.4s
796; LSE-NEXT:    mov w10, w8
797; LSE-NEXT:    ushr v5.4s, v4.4s, #16
798; LSE-NEXT:    and v5.16b, v5.16b, v1.16b
799; LSE-NEXT:    add v4.4s, v5.4s, v4.4s
800; LSE-NEXT:    addhn v4.4h, v4.4s, v2.4s
801; LSE-NEXT:    fmov w9, s4
802; LSE-NEXT:    casal w10, w9, [x0]
803; LSE-NEXT:    fmov s0, w10
804; LSE-NEXT:    cmp w10, w8
805; LSE-NEXT:    b.ne .LBB8_1
806; LSE-NEXT:  // %bb.2: // %atomicrmw.end
807; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
808; LSE-NEXT:    ret
809;
810; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_v2bf16_seq_cst_align4:
811; SOFTFP-NOLSE:       // %bb.0:
812; SOFTFP-NOLSE-NEXT:    str x30, [sp, #-64]! // 8-byte Folded Spill
813; SOFTFP-NOLSE-NEXT:    mov w8, w1
814; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
815; SOFTFP-NOLSE-NEXT:    ldrh w1, [x0, #2]
816; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
817; SOFTFP-NOLSE-NEXT:    ldrh w22, [x0]
818; SOFTFP-NOLSE-NEXT:    lsl w20, w2, #16
819; SOFTFP-NOLSE-NEXT:    lsl w21, w8, #16
820; SOFTFP-NOLSE-NEXT:    mov x19, x0
821; SOFTFP-NOLSE-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
822; SOFTFP-NOLSE-NEXT:    b .LBB8_2
823; SOFTFP-NOLSE-NEXT:  .LBB8_1: // %cmpxchg.nostore
824; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_2 Depth=1
825; SOFTFP-NOLSE-NEXT:    mov w8, wzr
826; SOFTFP-NOLSE-NEXT:    clrex
827; SOFTFP-NOLSE-NEXT:    lsr w1, w22, #16
828; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB8_6
829; SOFTFP-NOLSE-NEXT:  .LBB8_2: // %atomicrmw.start
830; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
831; SOFTFP-NOLSE-NEXT:    // Child Loop BB8_3 Depth 2
832; SOFTFP-NOLSE-NEXT:    lsl w23, w1, #16
833; SOFTFP-NOLSE-NEXT:    mov w1, w20
834; SOFTFP-NOLSE-NEXT:    mov w0, w23
835; SOFTFP-NOLSE-NEXT:    bl __addsf3
836; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
837; SOFTFP-NOLSE-NEXT:    mov w24, w0
838; SOFTFP-NOLSE-NEXT:    lsl w0, w22, #16
839; SOFTFP-NOLSE-NEXT:    mov w1, w21
840; SOFTFP-NOLSE-NEXT:    bl __addsf3
841; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
842; SOFTFP-NOLSE-NEXT:    bfxil w23, w22, #0, #16
843; SOFTFP-NOLSE-NEXT:    bfi w0, w24, #16, #16
844; SOFTFP-NOLSE-NEXT:  .LBB8_3: // %cmpxchg.start
845; SOFTFP-NOLSE-NEXT:    // Parent Loop BB8_2 Depth=1
846; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
847; SOFTFP-NOLSE-NEXT:    ldaxr w22, [x19]
848; SOFTFP-NOLSE-NEXT:    cmp w22, w23
849; SOFTFP-NOLSE-NEXT:    b.ne .LBB8_1
850; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
851; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_3 Depth=2
852; SOFTFP-NOLSE-NEXT:    stlxr w8, w0, [x19]
853; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB8_3
854; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB8_2 Depth=1
855; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
856; SOFTFP-NOLSE-NEXT:    lsr w1, w22, #16
857; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB8_2
858; SOFTFP-NOLSE-NEXT:  .LBB8_6: // %atomicrmw.end
859; SOFTFP-NOLSE-NEXT:    mov w0, w22
860; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
861; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
862; SOFTFP-NOLSE-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
863; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #64 // 8-byte Folded Reload
864; SOFTFP-NOLSE-NEXT:    ret
865  %res = atomicrmw fadd ptr %ptr, <2 x bfloat> %value seq_cst, align 4
866  ret <2 x bfloat> %res
867}
868
869define <2 x float> @test_atomicrmw_fadd_v2f32_seq_cst_align8(ptr %ptr, <2 x float> %value) #0 {
870; NOLSE-LABEL: test_atomicrmw_fadd_v2f32_seq_cst_align8:
871; NOLSE:       // %bb.0:
872; NOLSE-NEXT:  .LBB9_1: // %atomicrmw.start
873; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
874; NOLSE-NEXT:    ldaxr x8, [x0]
875; NOLSE-NEXT:    fmov d1, x8
876; NOLSE-NEXT:    fadd v2.2s, v1.2s, v0.2s
877; NOLSE-NEXT:    fmov x8, d2
878; NOLSE-NEXT:    stlxr w9, x8, [x0]
879; NOLSE-NEXT:    cbnz w9, .LBB9_1
880; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
881; NOLSE-NEXT:    fmov d0, d1
882; NOLSE-NEXT:    ret
883;
884; LSE-LABEL: test_atomicrmw_fadd_v2f32_seq_cst_align8:
885; LSE:       // %bb.0:
886; LSE-NEXT:    ldr d1, [x0]
887; LSE-NEXT:  .LBB9_1: // %atomicrmw.start
888; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
889; LSE-NEXT:    fadd v2.2s, v1.2s, v0.2s
890; LSE-NEXT:    fmov x8, d1
891; LSE-NEXT:    mov x10, x8
892; LSE-NEXT:    fmov x9, d2
893; LSE-NEXT:    casal x10, x9, [x0]
894; LSE-NEXT:    fmov d1, x10
895; LSE-NEXT:    cmp x10, x8
896; LSE-NEXT:    b.ne .LBB9_1
897; LSE-NEXT:  // %bb.2: // %atomicrmw.end
898; LSE-NEXT:    fmov d0, d1
899; LSE-NEXT:    ret
900;
901; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_v2f32_seq_cst_align8:
902; SOFTFP-NOLSE:       // %bb.0:
903; SOFTFP-NOLSE-NEXT:    str x30, [sp, #-64]! // 8-byte Folded Spill
904; SOFTFP-NOLSE-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
905; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
906; SOFTFP-NOLSE-NEXT:    mov w21, w1
907; SOFTFP-NOLSE-NEXT:    ldp w22, w23, [x0]
908; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
909; SOFTFP-NOLSE-NEXT:    mov w19, w2
910; SOFTFP-NOLSE-NEXT:    mov x20, x0
911; SOFTFP-NOLSE-NEXT:    b .LBB9_2
912; SOFTFP-NOLSE-NEXT:  .LBB9_1: // %cmpxchg.nostore
913; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB9_2 Depth=1
914; SOFTFP-NOLSE-NEXT:    mov w8, wzr
915; SOFTFP-NOLSE-NEXT:    clrex
916; SOFTFP-NOLSE-NEXT:    lsr x23, x22, #32
917; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB9_6
918; SOFTFP-NOLSE-NEXT:  .LBB9_2: // %atomicrmw.start
919; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
920; SOFTFP-NOLSE-NEXT:    // Child Loop BB9_3 Depth 2
921; SOFTFP-NOLSE-NEXT:    mov w0, w23
922; SOFTFP-NOLSE-NEXT:    mov w1, w19
923; SOFTFP-NOLSE-NEXT:    bl __addsf3
924; SOFTFP-NOLSE-NEXT:    mov w24, w0
925; SOFTFP-NOLSE-NEXT:    mov w0, w22
926; SOFTFP-NOLSE-NEXT:    mov w1, w21
927; SOFTFP-NOLSE-NEXT:    bl __addsf3
928; SOFTFP-NOLSE-NEXT:    mov w8, w0
929; SOFTFP-NOLSE-NEXT:    mov w9, w22
930; SOFTFP-NOLSE-NEXT:    // kill: def $w23 killed $w23 killed $x23 def $x23
931; SOFTFP-NOLSE-NEXT:    orr x8, x8, x24, lsl #32
932; SOFTFP-NOLSE-NEXT:    orr x9, x9, x23, lsl #32
933; SOFTFP-NOLSE-NEXT:  .LBB9_3: // %cmpxchg.start
934; SOFTFP-NOLSE-NEXT:    // Parent Loop BB9_2 Depth=1
935; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
936; SOFTFP-NOLSE-NEXT:    ldaxr x22, [x20]
937; SOFTFP-NOLSE-NEXT:    cmp x22, x9
938; SOFTFP-NOLSE-NEXT:    b.ne .LBB9_1
939; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
940; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB9_3 Depth=2
941; SOFTFP-NOLSE-NEXT:    stlxr w10, x8, [x20]
942; SOFTFP-NOLSE-NEXT:    cbnz w10, .LBB9_3
943; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB9_2 Depth=1
944; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
945; SOFTFP-NOLSE-NEXT:    lsr x23, x22, #32
946; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB9_2
947; SOFTFP-NOLSE-NEXT:  .LBB9_6: // %atomicrmw.end
948; SOFTFP-NOLSE-NEXT:    mov w0, w22
949; SOFTFP-NOLSE-NEXT:    mov w1, w23
950; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
951; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
952; SOFTFP-NOLSE-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
953; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #64 // 8-byte Folded Reload
954; SOFTFP-NOLSE-NEXT:    ret
955  %res = atomicrmw fadd ptr %ptr, <2 x float> %value seq_cst, align 8
956  ret <2 x float> %res
957}
958
959define <2 x double> @test_atomicrmw_fadd_v2f64_seq_cst_align8(ptr %ptr, <2 x double> %value) #0 {
960; NOLSE-LABEL: test_atomicrmw_fadd_v2f64_seq_cst_align8:
961; NOLSE:       // %bb.0:
962; NOLSE-NEXT:  .LBB10_1: // %atomicrmw.start
963; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
964; NOLSE-NEXT:    ldaxp x8, x9, [x0]
965; NOLSE-NEXT:    fmov d1, x8
966; NOLSE-NEXT:    mov v1.d[1], x9
967; NOLSE-NEXT:    fadd v2.2d, v1.2d, v0.2d
968; NOLSE-NEXT:    mov x8, v2.d[1]
969; NOLSE-NEXT:    fmov x9, d2
970; NOLSE-NEXT:    stlxp w10, x9, x8, [x0]
971; NOLSE-NEXT:    cbnz w10, .LBB10_1
972; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
973; NOLSE-NEXT:    mov v0.16b, v1.16b
974; NOLSE-NEXT:    ret
975;
976; LSE-LABEL: test_atomicrmw_fadd_v2f64_seq_cst_align8:
977; LSE:       // %bb.0:
978; LSE-NEXT:    ldr q1, [x0]
979; LSE-NEXT:  .LBB10_1: // %atomicrmw.start
980; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
981; LSE-NEXT:    fadd v2.2d, v1.2d, v0.2d
982; LSE-NEXT:    mov x3, v1.d[1]
983; LSE-NEXT:    fmov x2, d1
984; LSE-NEXT:    mov x7, x3
985; LSE-NEXT:    mov x5, v2.d[1]
986; LSE-NEXT:    mov x6, x2
987; LSE-NEXT:    fmov x4, d2
988; LSE-NEXT:    caspal x6, x7, x4, x5, [x0]
989; LSE-NEXT:    fmov d1, x6
990; LSE-NEXT:    cmp x7, x3
991; LSE-NEXT:    ccmp x6, x2, #0, eq
992; LSE-NEXT:    mov v1.d[1], x7
993; LSE-NEXT:    b.ne .LBB10_1
994; LSE-NEXT:  // %bb.2: // %atomicrmw.end
995; LSE-NEXT:    mov v0.16b, v1.16b
996; LSE-NEXT:    ret
997;
998; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_v2f64_seq_cst_align8:
999; SOFTFP-NOLSE:       // %bb.0:
1000; SOFTFP-NOLSE-NEXT:    str x30, [sp, #-64]! // 8-byte Folded Spill
1001; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
1002; SOFTFP-NOLSE-NEXT:    mov x20, x0
1003; SOFTFP-NOLSE-NEXT:    mov x19, x3
1004; SOFTFP-NOLSE-NEXT:    ldp x0, x1, [x0]
1005; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
1006; SOFTFP-NOLSE-NEXT:    mov x21, x2
1007; SOFTFP-NOLSE-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
1008; SOFTFP-NOLSE-NEXT:    b .LBB10_2
1009; SOFTFP-NOLSE-NEXT:  .LBB10_1: // %atomicrmw.start
1010; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB10_2 Depth=1
1011; SOFTFP-NOLSE-NEXT:    cmp x1, x22
1012; SOFTFP-NOLSE-NEXT:    ccmp x0, x23, #0, eq
1013; SOFTFP-NOLSE-NEXT:    b.eq .LBB10_6
1014; SOFTFP-NOLSE-NEXT:  .LBB10_2: // %atomicrmw.start
1015; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
1016; SOFTFP-NOLSE-NEXT:    // Child Loop BB10_3 Depth 2
1017; SOFTFP-NOLSE-NEXT:    mov x22, x1
1018; SOFTFP-NOLSE-NEXT:    mov x23, x0
1019; SOFTFP-NOLSE-NEXT:    mov x0, x1
1020; SOFTFP-NOLSE-NEXT:    mov x1, x19
1021; SOFTFP-NOLSE-NEXT:    bl __adddf3
1022; SOFTFP-NOLSE-NEXT:    mov x24, x0
1023; SOFTFP-NOLSE-NEXT:    mov x0, x23
1024; SOFTFP-NOLSE-NEXT:    mov x1, x21
1025; SOFTFP-NOLSE-NEXT:    bl __adddf3
1026; SOFTFP-NOLSE-NEXT:    mov x8, x0
1027; SOFTFP-NOLSE-NEXT:  .LBB10_3: // %atomicrmw.start
1028; SOFTFP-NOLSE-NEXT:    // Parent Loop BB10_2 Depth=1
1029; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
1030; SOFTFP-NOLSE-NEXT:    ldaxp x0, x1, [x20]
1031; SOFTFP-NOLSE-NEXT:    cmp x0, x23
1032; SOFTFP-NOLSE-NEXT:    cset w9, ne
1033; SOFTFP-NOLSE-NEXT:    cmp x1, x22
1034; SOFTFP-NOLSE-NEXT:    cinc w9, w9, ne
1035; SOFTFP-NOLSE-NEXT:    cbz w9, .LBB10_5
1036; SOFTFP-NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
1037; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB10_3 Depth=2
1038; SOFTFP-NOLSE-NEXT:    stlxp w9, x0, x1, [x20]
1039; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB10_3
1040; SOFTFP-NOLSE-NEXT:    b .LBB10_1
1041; SOFTFP-NOLSE-NEXT:  .LBB10_5: // %atomicrmw.start
1042; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB10_3 Depth=2
1043; SOFTFP-NOLSE-NEXT:    stlxp w9, x8, x24, [x20]
1044; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB10_3
1045; SOFTFP-NOLSE-NEXT:    b .LBB10_1
1046; SOFTFP-NOLSE-NEXT:  .LBB10_6: // %atomicrmw.end
1047; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
1048; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
1049; SOFTFP-NOLSE-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
1050; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #64 // 8-byte Folded Reload
1051; SOFTFP-NOLSE-NEXT:    ret
1052  %res = atomicrmw fadd ptr %ptr, <2 x double> %value seq_cst, align 16
1053  ret <2 x double> %res
1054}
1055
1056attributes #0 = { nounwind }
1057