xref: /llvm-project/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll (revision 1729e6e742ba9f6f210550000ace4bec72530c2e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=aarch64-linux-gnu -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefix=NOLSE %s
3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefix=LSE %s
4; RUN: llc -mtriple=aarch64-linux-gnu -mattr=-lse,-fp-armv8 -O1 < %s | FileCheck -check-prefix=SOFTFP-NOLSE %s
5
6; FIXME: Restore test of fp128 case
7
8define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
9; NOLSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align2:
10; NOLSE:       // %bb.0:
11; NOLSE-NEXT:    fcvt s1, h0
12; NOLSE-NEXT:  .LBB0_1: // %atomicrmw.start
13; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
14; NOLSE-NEXT:    ldaxrh w8, [x0]
15; NOLSE-NEXT:    fmov s0, w8
16; NOLSE-NEXT:    fcvt s2, h0
17; NOLSE-NEXT:    fmaxnm s2, s2, s1
18; NOLSE-NEXT:    fcvt h2, s2
19; NOLSE-NEXT:    fmov w8, s2
20; NOLSE-NEXT:    stlxrh w9, w8, [x0]
21; NOLSE-NEXT:    cbnz w9, .LBB0_1
22; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
23; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
24; NOLSE-NEXT:    ret
25;
26; LSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align2:
27; LSE:       // %bb.0:
28; LSE-NEXT:    fcvt s1, h0
29; LSE-NEXT:    ldr h0, [x0]
30; LSE-NEXT:  .LBB0_1: // %atomicrmw.start
31; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
32; LSE-NEXT:    fcvt s2, h0
33; LSE-NEXT:    fmov w8, s0
34; LSE-NEXT:    mov w10, w8
35; LSE-NEXT:    fmaxnm s2, s2, s1
36; LSE-NEXT:    fcvt h2, s2
37; LSE-NEXT:    fmov w9, s2
38; LSE-NEXT:    casalh w10, w9, [x0]
39; LSE-NEXT:    fmov s0, w10
40; LSE-NEXT:    cmp w10, w8, uxth
41; LSE-NEXT:    b.ne .LBB0_1
42; LSE-NEXT:  // %bb.2: // %atomicrmw.end
43; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
44; LSE-NEXT:    ret
45;
46; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align2:
47; SOFTFP-NOLSE:       // %bb.0:
48; SOFTFP-NOLSE-NEXT:    str x30, [sp, #-48]! // 8-byte Folded Spill
49; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
50; SOFTFP-NOLSE-NEXT:    mov x19, x0
51; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
52; SOFTFP-NOLSE-NEXT:    mov w20, w1
53; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
54; SOFTFP-NOLSE-NEXT:    b .LBB0_2
55; SOFTFP-NOLSE-NEXT:  .LBB0_1: // %cmpxchg.nostore
56; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_2 Depth=1
57; SOFTFP-NOLSE-NEXT:    mov w8, wzr
58; SOFTFP-NOLSE-NEXT:    clrex
59; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB0_6
60; SOFTFP-NOLSE-NEXT:  .LBB0_2: // %atomicrmw.start
61; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
62; SOFTFP-NOLSE-NEXT:    // Child Loop BB0_3 Depth 2
63; SOFTFP-NOLSE-NEXT:    mov w22, w0
64; SOFTFP-NOLSE-NEXT:    and w0, w20, #0xffff
65; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
66; SOFTFP-NOLSE-NEXT:    mov w21, w0
67; SOFTFP-NOLSE-NEXT:    and w0, w22, #0xffff
68; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
69; SOFTFP-NOLSE-NEXT:    mov w1, w21
70; SOFTFP-NOLSE-NEXT:    bl fmaxf
71; SOFTFP-NOLSE-NEXT:    bl __gnu_f2h_ieee
72; SOFTFP-NOLSE-NEXT:    mov w8, w0
73; SOFTFP-NOLSE-NEXT:  .LBB0_3: // %cmpxchg.start
74; SOFTFP-NOLSE-NEXT:    // Parent Loop BB0_2 Depth=1
75; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
76; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
77; SOFTFP-NOLSE-NEXT:    cmp w0, w22, uxth
78; SOFTFP-NOLSE-NEXT:    b.ne .LBB0_1
79; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
80; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_3 Depth=2
81; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
82; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB0_3
83; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB0_2 Depth=1
84; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
85; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_2
86; SOFTFP-NOLSE-NEXT:  .LBB0_6: // %atomicrmw.end
87; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
88; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
89; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
90; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
91; SOFTFP-NOLSE-NEXT:    ret
92  %res = atomicrmw fmax ptr %ptr, half %value seq_cst, align 2
93  ret half %res
94}
95
96define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
97; NOLSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align4:
98; NOLSE:       // %bb.0:
99; NOLSE-NEXT:    fcvt s1, h0
100; NOLSE-NEXT:  .LBB1_1: // %atomicrmw.start
101; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
102; NOLSE-NEXT:    ldaxrh w8, [x0]
103; NOLSE-NEXT:    fmov s0, w8
104; NOLSE-NEXT:    fcvt s2, h0
105; NOLSE-NEXT:    fmaxnm s2, s2, s1
106; NOLSE-NEXT:    fcvt h2, s2
107; NOLSE-NEXT:    fmov w8, s2
108; NOLSE-NEXT:    stlxrh w9, w8, [x0]
109; NOLSE-NEXT:    cbnz w9, .LBB1_1
110; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
111; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
112; NOLSE-NEXT:    ret
113;
114; LSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align4:
115; LSE:       // %bb.0:
116; LSE-NEXT:    fcvt s1, h0
117; LSE-NEXT:    ldr h0, [x0]
118; LSE-NEXT:  .LBB1_1: // %atomicrmw.start
119; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
120; LSE-NEXT:    fcvt s2, h0
121; LSE-NEXT:    fmov w8, s0
122; LSE-NEXT:    mov w10, w8
123; LSE-NEXT:    fmaxnm s2, s2, s1
124; LSE-NEXT:    fcvt h2, s2
125; LSE-NEXT:    fmov w9, s2
126; LSE-NEXT:    casalh w10, w9, [x0]
127; LSE-NEXT:    fmov s0, w10
128; LSE-NEXT:    cmp w10, w8, uxth
129; LSE-NEXT:    b.ne .LBB1_1
130; LSE-NEXT:  // %bb.2: // %atomicrmw.end
131; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
132; LSE-NEXT:    ret
133;
134; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align4:
135; SOFTFP-NOLSE:       // %bb.0:
136; SOFTFP-NOLSE-NEXT:    str x30, [sp, #-48]! // 8-byte Folded Spill
137; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
138; SOFTFP-NOLSE-NEXT:    mov x19, x0
139; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
140; SOFTFP-NOLSE-NEXT:    mov w20, w1
141; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
142; SOFTFP-NOLSE-NEXT:    b .LBB1_2
143; SOFTFP-NOLSE-NEXT:  .LBB1_1: // %cmpxchg.nostore
144; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_2 Depth=1
145; SOFTFP-NOLSE-NEXT:    mov w8, wzr
146; SOFTFP-NOLSE-NEXT:    clrex
147; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB1_6
148; SOFTFP-NOLSE-NEXT:  .LBB1_2: // %atomicrmw.start
149; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
150; SOFTFP-NOLSE-NEXT:    // Child Loop BB1_3 Depth 2
151; SOFTFP-NOLSE-NEXT:    mov w22, w0
152; SOFTFP-NOLSE-NEXT:    and w0, w20, #0xffff
153; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
154; SOFTFP-NOLSE-NEXT:    mov w21, w0
155; SOFTFP-NOLSE-NEXT:    and w0, w22, #0xffff
156; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
157; SOFTFP-NOLSE-NEXT:    mov w1, w21
158; SOFTFP-NOLSE-NEXT:    bl fmaxf
159; SOFTFP-NOLSE-NEXT:    bl __gnu_f2h_ieee
160; SOFTFP-NOLSE-NEXT:    mov w8, w0
161; SOFTFP-NOLSE-NEXT:  .LBB1_3: // %cmpxchg.start
162; SOFTFP-NOLSE-NEXT:    // Parent Loop BB1_2 Depth=1
163; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
164; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
165; SOFTFP-NOLSE-NEXT:    cmp w0, w22, uxth
166; SOFTFP-NOLSE-NEXT:    b.ne .LBB1_1
167; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
168; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_3 Depth=2
169; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
170; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB1_3
171; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB1_2 Depth=1
172; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
173; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_2
174; SOFTFP-NOLSE-NEXT:  .LBB1_6: // %atomicrmw.end
175; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
176; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
177; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
178; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
179; SOFTFP-NOLSE-NEXT:    ret
180  %res = atomicrmw fmax ptr %ptr, half %value seq_cst, align 4
181  ret half %res
182}
183
184define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align2(ptr %ptr, bfloat %value) #0 {
185; NOLSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align2:
186; NOLSE:       // %bb.0:
187; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $d0
188; NOLSE-NEXT:    shll v1.4s, v0.4h, #16
189; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
190; NOLSE-NEXT:  .LBB2_1: // %atomicrmw.start
191; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
192; NOLSE-NEXT:    ldaxrh w9, [x0]
193; NOLSE-NEXT:    fmov s0, w9
194; NOLSE-NEXT:    shll v2.4s, v0.4h, #16
195; NOLSE-NEXT:    fmaxnm s2, s2, s1
196; NOLSE-NEXT:    fmov w9, s2
197; NOLSE-NEXT:    ubfx w10, w9, #16, #1
198; NOLSE-NEXT:    add w9, w9, w8
199; NOLSE-NEXT:    add w9, w10, w9
200; NOLSE-NEXT:    lsr w9, w9, #16
201; NOLSE-NEXT:    stlxrh w10, w9, [x0]
202; NOLSE-NEXT:    cbnz w10, .LBB2_1
203; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
204; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $d0
205; NOLSE-NEXT:    ret
206;
207; LSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align2:
208; LSE:       // %bb.0:
209; LSE-NEXT:    // kill: def $h0 killed $h0 def $d0
210; LSE-NEXT:    shll v1.4s, v0.4h, #16
211; LSE-NEXT:    mov w8, #32767 // =0x7fff
212; LSE-NEXT:    ldr h0, [x0]
213; LSE-NEXT:  .LBB2_1: // %atomicrmw.start
214; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
215; LSE-NEXT:    shll v2.4s, v0.4h, #16
216; LSE-NEXT:    fmaxnm s2, s2, s1
217; LSE-NEXT:    fmov w9, s2
218; LSE-NEXT:    ubfx w10, w9, #16, #1
219; LSE-NEXT:    add w9, w9, w8
220; LSE-NEXT:    add w9, w10, w9
221; LSE-NEXT:    lsr w9, w9, #16
222; LSE-NEXT:    fmov s2, w9
223; LSE-NEXT:    fmov w9, s0
224; LSE-NEXT:    fmov w10, s2
225; LSE-NEXT:    mov w11, w9
226; LSE-NEXT:    casalh w11, w10, [x0]
227; LSE-NEXT:    fmov s0, w11
228; LSE-NEXT:    cmp w11, w9, uxth
229; LSE-NEXT:    b.ne .LBB2_1
230; LSE-NEXT:  // %bb.2: // %atomicrmw.end
231; LSE-NEXT:    // kill: def $h0 killed $h0 killed $d0
232; LSE-NEXT:    ret
233;
234; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align2:
235; SOFTFP-NOLSE:       // %bb.0:
236; SOFTFP-NOLSE-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
237; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
238; SOFTFP-NOLSE-NEXT:    mov x19, x0
239; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
240; SOFTFP-NOLSE-NEXT:    lsl w20, w1, #16
241; SOFTFP-NOLSE-NEXT:    b .LBB2_2
242; SOFTFP-NOLSE-NEXT:  .LBB2_1: // %cmpxchg.nostore
243; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_2 Depth=1
244; SOFTFP-NOLSE-NEXT:    mov w8, wzr
245; SOFTFP-NOLSE-NEXT:    clrex
246; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB2_6
247; SOFTFP-NOLSE-NEXT:  .LBB2_2: // %atomicrmw.start
248; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
249; SOFTFP-NOLSE-NEXT:    // Child Loop BB2_3 Depth 2
250; SOFTFP-NOLSE-NEXT:    mov w21, w0
251; SOFTFP-NOLSE-NEXT:    lsl w0, w0, #16
252; SOFTFP-NOLSE-NEXT:    mov w1, w20
253; SOFTFP-NOLSE-NEXT:    bl fmaxf
254; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
255; SOFTFP-NOLSE-NEXT:    mov w8, w0
256; SOFTFP-NOLSE-NEXT:  .LBB2_3: // %cmpxchg.start
257; SOFTFP-NOLSE-NEXT:    // Parent Loop BB2_2 Depth=1
258; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
259; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
260; SOFTFP-NOLSE-NEXT:    cmp w0, w21, uxth
261; SOFTFP-NOLSE-NEXT:    b.ne .LBB2_1
262; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
263; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_3 Depth=2
264; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
265; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB2_3
266; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB2_2 Depth=1
267; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
268; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_2
269; SOFTFP-NOLSE-NEXT:  .LBB2_6: // %atomicrmw.end
270; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
271; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
272; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
273; SOFTFP-NOLSE-NEXT:    ret
274  %res = atomicrmw fmax ptr %ptr, bfloat %value seq_cst, align 2
275  ret bfloat %res
276}
277
278define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align4(ptr %ptr, bfloat %value) #0 {
279; NOLSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align4:
280; NOLSE:       // %bb.0:
281; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $d0
282; NOLSE-NEXT:    shll v1.4s, v0.4h, #16
283; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
284; NOLSE-NEXT:  .LBB3_1: // %atomicrmw.start
285; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
286; NOLSE-NEXT:    ldaxrh w9, [x0]
287; NOLSE-NEXT:    fmov s0, w9
288; NOLSE-NEXT:    shll v2.4s, v0.4h, #16
289; NOLSE-NEXT:    fmaxnm s2, s2, s1
290; NOLSE-NEXT:    fmov w9, s2
291; NOLSE-NEXT:    ubfx w10, w9, #16, #1
292; NOLSE-NEXT:    add w9, w9, w8
293; NOLSE-NEXT:    add w9, w10, w9
294; NOLSE-NEXT:    lsr w9, w9, #16
295; NOLSE-NEXT:    stlxrh w10, w9, [x0]
296; NOLSE-NEXT:    cbnz w10, .LBB3_1
297; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
298; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $d0
299; NOLSE-NEXT:    ret
300;
301; LSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align4:
302; LSE:       // %bb.0:
303; LSE-NEXT:    // kill: def $h0 killed $h0 def $d0
304; LSE-NEXT:    shll v1.4s, v0.4h, #16
305; LSE-NEXT:    mov w8, #32767 // =0x7fff
306; LSE-NEXT:    ldr h0, [x0]
307; LSE-NEXT:  .LBB3_1: // %atomicrmw.start
308; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
309; LSE-NEXT:    shll v2.4s, v0.4h, #16
310; LSE-NEXT:    fmaxnm s2, s2, s1
311; LSE-NEXT:    fmov w9, s2
312; LSE-NEXT:    ubfx w10, w9, #16, #1
313; LSE-NEXT:    add w9, w9, w8
314; LSE-NEXT:    add w9, w10, w9
315; LSE-NEXT:    lsr w9, w9, #16
316; LSE-NEXT:    fmov s2, w9
317; LSE-NEXT:    fmov w9, s0
318; LSE-NEXT:    fmov w10, s2
319; LSE-NEXT:    mov w11, w9
320; LSE-NEXT:    casalh w11, w10, [x0]
321; LSE-NEXT:    fmov s0, w11
322; LSE-NEXT:    cmp w11, w9, uxth
323; LSE-NEXT:    b.ne .LBB3_1
324; LSE-NEXT:  // %bb.2: // %atomicrmw.end
325; LSE-NEXT:    // kill: def $h0 killed $h0 killed $d0
326; LSE-NEXT:    ret
327;
328; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align4:
329; SOFTFP-NOLSE:       // %bb.0:
330; SOFTFP-NOLSE-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
331; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
332; SOFTFP-NOLSE-NEXT:    mov x19, x0
333; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
334; SOFTFP-NOLSE-NEXT:    lsl w20, w1, #16
335; SOFTFP-NOLSE-NEXT:    b .LBB3_2
336; SOFTFP-NOLSE-NEXT:  .LBB3_1: // %cmpxchg.nostore
337; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_2 Depth=1
338; SOFTFP-NOLSE-NEXT:    mov w8, wzr
339; SOFTFP-NOLSE-NEXT:    clrex
340; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB3_6
341; SOFTFP-NOLSE-NEXT:  .LBB3_2: // %atomicrmw.start
342; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
343; SOFTFP-NOLSE-NEXT:    // Child Loop BB3_3 Depth 2
344; SOFTFP-NOLSE-NEXT:    mov w21, w0
345; SOFTFP-NOLSE-NEXT:    lsl w0, w0, #16
346; SOFTFP-NOLSE-NEXT:    mov w1, w20
347; SOFTFP-NOLSE-NEXT:    bl fmaxf
348; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
349; SOFTFP-NOLSE-NEXT:    mov w8, w0
350; SOFTFP-NOLSE-NEXT:  .LBB3_3: // %cmpxchg.start
351; SOFTFP-NOLSE-NEXT:    // Parent Loop BB3_2 Depth=1
352; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
353; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
354; SOFTFP-NOLSE-NEXT:    cmp w0, w21, uxth
355; SOFTFP-NOLSE-NEXT:    b.ne .LBB3_1
356; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
357; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_3 Depth=2
358; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
359; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB3_3
360; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB3_2 Depth=1
361; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
362; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_2
363; SOFTFP-NOLSE-NEXT:  .LBB3_6: // %atomicrmw.end
364; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
365; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
366; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
367; SOFTFP-NOLSE-NEXT:    ret
368  %res = atomicrmw fmax ptr %ptr, bfloat %value seq_cst, align 4
369  ret bfloat %res
370}
371
372define float @test_atomicrmw_fmax_f32_seq_cst_align4(ptr %ptr, float %value) #0 {
373; NOLSE-LABEL: test_atomicrmw_fmax_f32_seq_cst_align4:
374; NOLSE:       // %bb.0:
375; NOLSE-NEXT:  .LBB4_1: // %atomicrmw.start
376; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
377; NOLSE-NEXT:    ldaxr w8, [x0]
378; NOLSE-NEXT:    fmov s1, w8
379; NOLSE-NEXT:    fmaxnm s2, s1, s0
380; NOLSE-NEXT:    fmov w8, s2
381; NOLSE-NEXT:    stlxr w9, w8, [x0]
382; NOLSE-NEXT:    cbnz w9, .LBB4_1
383; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
384; NOLSE-NEXT:    fmov s0, s1
385; NOLSE-NEXT:    ret
386;
387; LSE-LABEL: test_atomicrmw_fmax_f32_seq_cst_align4:
388; LSE:       // %bb.0:
389; LSE-NEXT:    ldr s1, [x0]
390; LSE-NEXT:  .LBB4_1: // %atomicrmw.start
391; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
392; LSE-NEXT:    fmaxnm s2, s1, s0
393; LSE-NEXT:    fmov w8, s1
394; LSE-NEXT:    mov w10, w8
395; LSE-NEXT:    fmov w9, s2
396; LSE-NEXT:    casal w10, w9, [x0]
397; LSE-NEXT:    fmov s1, w10
398; LSE-NEXT:    cmp w10, w8
399; LSE-NEXT:    b.ne .LBB4_1
400; LSE-NEXT:  // %bb.2: // %atomicrmw.end
401; LSE-NEXT:    fmov s0, s1
402; LSE-NEXT:    ret
403;
404; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_f32_seq_cst_align4:
405; SOFTFP-NOLSE:       // %bb.0:
406; SOFTFP-NOLSE-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
407; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
408; SOFTFP-NOLSE-NEXT:    mov x19, x0
409; SOFTFP-NOLSE-NEXT:    ldr w0, [x0]
410; SOFTFP-NOLSE-NEXT:    mov w20, w1
411; SOFTFP-NOLSE-NEXT:    b .LBB4_2
412; SOFTFP-NOLSE-NEXT:  .LBB4_1: // %cmpxchg.nostore
413; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_2 Depth=1
414; SOFTFP-NOLSE-NEXT:    mov w8, wzr
415; SOFTFP-NOLSE-NEXT:    clrex
416; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB4_6
417; SOFTFP-NOLSE-NEXT:  .LBB4_2: // %atomicrmw.start
418; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
419; SOFTFP-NOLSE-NEXT:    // Child Loop BB4_3 Depth 2
420; SOFTFP-NOLSE-NEXT:    mov w1, w20
421; SOFTFP-NOLSE-NEXT:    mov w21, w0
422; SOFTFP-NOLSE-NEXT:    bl fmaxf
423; SOFTFP-NOLSE-NEXT:    mov w8, w0
424; SOFTFP-NOLSE-NEXT:  .LBB4_3: // %cmpxchg.start
425; SOFTFP-NOLSE-NEXT:    // Parent Loop BB4_2 Depth=1
426; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
427; SOFTFP-NOLSE-NEXT:    ldaxr w0, [x19]
428; SOFTFP-NOLSE-NEXT:    cmp w0, w21
429; SOFTFP-NOLSE-NEXT:    b.ne .LBB4_1
430; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
431; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_3 Depth=2
432; SOFTFP-NOLSE-NEXT:    stlxr w9, w8, [x19]
433; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB4_3
434; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB4_2 Depth=1
435; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
436; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_2
437; SOFTFP-NOLSE-NEXT:  .LBB4_6: // %atomicrmw.end
438; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
439; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
440; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
441; SOFTFP-NOLSE-NEXT:    ret
442  %res = atomicrmw fmax ptr %ptr, float %value seq_cst, align 4
443  ret float %res
444}
445
446define double @test_atomicrmw_fmax_f32_seq_cst_align8(ptr %ptr, double %value) #0 {
447; NOLSE-LABEL: test_atomicrmw_fmax_f32_seq_cst_align8:
448; NOLSE:       // %bb.0:
449; NOLSE-NEXT:  .LBB5_1: // %atomicrmw.start
450; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
451; NOLSE-NEXT:    ldaxr x8, [x0]
452; NOLSE-NEXT:    fmov d1, x8
453; NOLSE-NEXT:    fmaxnm d2, d1, d0
454; NOLSE-NEXT:    fmov x8, d2
455; NOLSE-NEXT:    stlxr w9, x8, [x0]
456; NOLSE-NEXT:    cbnz w9, .LBB5_1
457; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
458; NOLSE-NEXT:    fmov d0, d1
459; NOLSE-NEXT:    ret
460;
461; LSE-LABEL: test_atomicrmw_fmax_f32_seq_cst_align8:
462; LSE:       // %bb.0:
463; LSE-NEXT:    ldr d1, [x0]
464; LSE-NEXT:  .LBB5_1: // %atomicrmw.start
465; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
466; LSE-NEXT:    fmaxnm d2, d1, d0
467; LSE-NEXT:    fmov x8, d1
468; LSE-NEXT:    mov x10, x8
469; LSE-NEXT:    fmov x9, d2
470; LSE-NEXT:    casal x10, x9, [x0]
471; LSE-NEXT:    fmov d1, x10
472; LSE-NEXT:    cmp x10, x8
473; LSE-NEXT:    b.ne .LBB5_1
474; LSE-NEXT:  // %bb.2: // %atomicrmw.end
475; LSE-NEXT:    fmov d0, d1
476; LSE-NEXT:    ret
477;
478; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_f32_seq_cst_align8:
479; SOFTFP-NOLSE:       // %bb.0:
480; SOFTFP-NOLSE-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
481; SOFTFP-NOLSE-NEXT:    ldr x21, [x0]
482; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
483; SOFTFP-NOLSE-NEXT:    mov x19, x0
484; SOFTFP-NOLSE-NEXT:    mov x20, x1
485; SOFTFP-NOLSE-NEXT:    b .LBB5_2
486; SOFTFP-NOLSE-NEXT:  .LBB5_1: // %cmpxchg.nostore
487; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=1
488; SOFTFP-NOLSE-NEXT:    mov w9, wzr
489; SOFTFP-NOLSE-NEXT:    clrex
490; SOFTFP-NOLSE-NEXT:    mov x21, x8
491; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_6
492; SOFTFP-NOLSE-NEXT:  .LBB5_2: // %atomicrmw.start
493; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
494; SOFTFP-NOLSE-NEXT:    // Child Loop BB5_3 Depth 2
495; SOFTFP-NOLSE-NEXT:    mov x0, x21
496; SOFTFP-NOLSE-NEXT:    mov x1, x20
497; SOFTFP-NOLSE-NEXT:    bl fmax
498; SOFTFP-NOLSE-NEXT:  .LBB5_3: // %cmpxchg.start
499; SOFTFP-NOLSE-NEXT:    // Parent Loop BB5_2 Depth=1
500; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
501; SOFTFP-NOLSE-NEXT:    ldaxr x8, [x19]
502; SOFTFP-NOLSE-NEXT:    cmp x8, x21
503; SOFTFP-NOLSE-NEXT:    b.ne .LBB5_1
504; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
505; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_3 Depth=2
506; SOFTFP-NOLSE-NEXT:    stlxr w9, x0, [x19]
507; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_3
508; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB5_2 Depth=1
509; SOFTFP-NOLSE-NEXT:    mov w9, #1 // =0x1
510; SOFTFP-NOLSE-NEXT:    mov x21, x8
511; SOFTFP-NOLSE-NEXT:    cbz w9, .LBB5_2
512; SOFTFP-NOLSE-NEXT:  .LBB5_6: // %atomicrmw.end
513; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
514; SOFTFP-NOLSE-NEXT:    mov x0, x21
515; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
516; SOFTFP-NOLSE-NEXT:    ret
517  %res = atomicrmw fmax ptr %ptr, double %value seq_cst, align 8
518  ret double %res
519}
520
521define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half> %value) #0 {
522; NOLSE-LABEL: test_atomicrmw_fmax_v2f16_seq_cst_align4:
523; NOLSE:       // %bb.0:
524; NOLSE-NEXT:    // kill: def $d0 killed $d0 def $q0
525; NOLSE-NEXT:    mov h1, v0.h[1]
526; NOLSE-NEXT:    fcvt s0, h0
527; NOLSE-NEXT:    fcvt s1, h1
528; NOLSE-NEXT:  .LBB6_1: // %atomicrmw.start
529; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
530; NOLSE-NEXT:    ldaxr w8, [x0]
531; NOLSE-NEXT:    fmov s2, w8
532; NOLSE-NEXT:    mov h3, v2.h[1]
533; NOLSE-NEXT:    fcvt s2, h2
534; NOLSE-NEXT:    fcvt s3, h3
535; NOLSE-NEXT:    fmaxnm s2, s2, s0
536; NOLSE-NEXT:    fmaxnm s3, s3, s1
537; NOLSE-NEXT:    fcvt h2, s2
538; NOLSE-NEXT:    fcvt h3, s3
539; NOLSE-NEXT:    mov v2.h[1], v3.h[0]
540; NOLSE-NEXT:    fmov w9, s2
541; NOLSE-NEXT:    stlxr w10, w9, [x0]
542; NOLSE-NEXT:    cbnz w10, .LBB6_1
543; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
544; NOLSE-NEXT:    fmov d0, x8
545; NOLSE-NEXT:    ret
546;
547; LSE-LABEL: test_atomicrmw_fmax_v2f16_seq_cst_align4:
548; LSE:       // %bb.0:
549; LSE-NEXT:    // kill: def $d0 killed $d0 def $q0
550; LSE-NEXT:    mov h1, v0.h[1]
551; LSE-NEXT:    fcvt s2, h0
552; LSE-NEXT:    ldr s0, [x0]
553; LSE-NEXT:    fcvt s1, h1
554; LSE-NEXT:  .LBB6_1: // %atomicrmw.start
555; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
556; LSE-NEXT:    mov h3, v0.h[1]
557; LSE-NEXT:    fcvt s4, h0
558; LSE-NEXT:    fmov w8, s0
559; LSE-NEXT:    mov w10, w8
560; LSE-NEXT:    fcvt s3, h3
561; LSE-NEXT:    fmaxnm s4, s4, s2
562; LSE-NEXT:    fmaxnm s3, s3, s1
563; LSE-NEXT:    fcvt h4, s4
564; LSE-NEXT:    fcvt h3, s3
565; LSE-NEXT:    mov v4.h[1], v3.h[0]
566; LSE-NEXT:    fmov w9, s4
567; LSE-NEXT:    casal w10, w9, [x0]
568; LSE-NEXT:    fmov s0, w10
569; LSE-NEXT:    cmp w10, w8
570; LSE-NEXT:    b.ne .LBB6_1
571; LSE-NEXT:  // %bb.2: // %atomicrmw.end
572; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
573; LSE-NEXT:    ret
574;
575; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_v2f16_seq_cst_align4:
576; SOFTFP-NOLSE:       // %bb.0:
577; SOFTFP-NOLSE-NEXT:    stp x30, x25, [sp, #-64]! // 16-byte Folded Spill
578; SOFTFP-NOLSE-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
579; SOFTFP-NOLSE-NEXT:    ldrh w23, [x0, #2]
580; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
581; SOFTFP-NOLSE-NEXT:    ldrh w22, [x0]
582; SOFTFP-NOLSE-NEXT:    mov w21, w1
583; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
584; SOFTFP-NOLSE-NEXT:    mov w19, w2
585; SOFTFP-NOLSE-NEXT:    mov x20, x0
586; SOFTFP-NOLSE-NEXT:    b .LBB6_2
587; SOFTFP-NOLSE-NEXT:  .LBB6_1: // %cmpxchg.nostore
588; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB6_2 Depth=1
589; SOFTFP-NOLSE-NEXT:    mov w8, wzr
590; SOFTFP-NOLSE-NEXT:    clrex
591; SOFTFP-NOLSE-NEXT:    lsr w23, w22, #16
592; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB6_6
593; SOFTFP-NOLSE-NEXT:  .LBB6_2: // %atomicrmw.start
594; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
595; SOFTFP-NOLSE-NEXT:    // Child Loop BB6_3 Depth 2
596; SOFTFP-NOLSE-NEXT:    and w0, w19, #0xffff
597; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
598; SOFTFP-NOLSE-NEXT:    mov w24, w0
599; SOFTFP-NOLSE-NEXT:    and w0, w23, #0xffff
600; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
601; SOFTFP-NOLSE-NEXT:    mov w1, w24
602; SOFTFP-NOLSE-NEXT:    bl fmaxf
603; SOFTFP-NOLSE-NEXT:    bl __gnu_f2h_ieee
604; SOFTFP-NOLSE-NEXT:    mov w24, w0
605; SOFTFP-NOLSE-NEXT:    and w0, w21, #0xffff
606; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
607; SOFTFP-NOLSE-NEXT:    mov w25, w0
608; SOFTFP-NOLSE-NEXT:    and w0, w22, #0xffff
609; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
610; SOFTFP-NOLSE-NEXT:    mov w1, w25
611; SOFTFP-NOLSE-NEXT:    bl fmaxf
612; SOFTFP-NOLSE-NEXT:    bl __gnu_f2h_ieee
613; SOFTFP-NOLSE-NEXT:    mov w8, w22
614; SOFTFP-NOLSE-NEXT:    bfi w0, w24, #16, #16
615; SOFTFP-NOLSE-NEXT:    bfi w8, w23, #16, #16
616; SOFTFP-NOLSE-NEXT:  .LBB6_3: // %cmpxchg.start
617; SOFTFP-NOLSE-NEXT:    // Parent Loop BB6_2 Depth=1
618; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
619; SOFTFP-NOLSE-NEXT:    ldaxr w22, [x20]
620; SOFTFP-NOLSE-NEXT:    cmp w22, w8
621; SOFTFP-NOLSE-NEXT:    b.ne .LBB6_1
622; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
623; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB6_3 Depth=2
624; SOFTFP-NOLSE-NEXT:    stlxr w9, w0, [x20]
625; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB6_3
626; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB6_2 Depth=1
627; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
628; SOFTFP-NOLSE-NEXT:    lsr w23, w22, #16
629; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB6_2
630; SOFTFP-NOLSE-NEXT:  .LBB6_6: // %atomicrmw.end
631; SOFTFP-NOLSE-NEXT:    mov w0, w22
632; SOFTFP-NOLSE-NEXT:    mov w1, w23
633; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
634; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
635; SOFTFP-NOLSE-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
636; SOFTFP-NOLSE-NEXT:    ldp x30, x25, [sp], #64 // 16-byte Folded Reload
637; SOFTFP-NOLSE-NEXT:    ret
638  %res = atomicrmw fmax ptr %ptr, <2 x half> %value seq_cst, align 4
639  ret <2 x half> %res
640}
641
642define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bfloat> %value) #0 {
643; NOLSE-LABEL: test_atomicrmw_fmax_v2bf16_seq_cst_align4:
644; NOLSE:       // %bb.0:
645; NOLSE-NEXT:    // kill: def $d0 killed $d0 def $q0
646; NOLSE-NEXT:    dup v1.4h, v0.h[1]
647; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
648; NOLSE-NEXT:    shll v0.4s, v0.4h, #16
649; NOLSE-NEXT:    shll v1.4s, v1.4h, #16
650; NOLSE-NEXT:  .LBB7_1: // %atomicrmw.start
651; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
652; NOLSE-NEXT:    ldaxr w9, [x0]
653; NOLSE-NEXT:    fmov s2, w9
654; NOLSE-NEXT:    dup v3.4h, v2.h[1]
655; NOLSE-NEXT:    shll v2.4s, v2.4h, #16
656; NOLSE-NEXT:    fmaxnm s2, s2, s0
657; NOLSE-NEXT:    shll v3.4s, v3.4h, #16
658; NOLSE-NEXT:    fmaxnm s3, s3, s1
659; NOLSE-NEXT:    fmov w11, s2
660; NOLSE-NEXT:    ubfx w13, w11, #16, #1
661; NOLSE-NEXT:    add w11, w11, w8
662; NOLSE-NEXT:    fmov w10, s3
663; NOLSE-NEXT:    add w11, w13, w11
664; NOLSE-NEXT:    lsr w11, w11, #16
665; NOLSE-NEXT:    ubfx w12, w10, #16, #1
666; NOLSE-NEXT:    add w10, w10, w8
667; NOLSE-NEXT:    fmov s3, w11
668; NOLSE-NEXT:    add w10, w12, w10
669; NOLSE-NEXT:    lsr w10, w10, #16
670; NOLSE-NEXT:    fmov s2, w10
671; NOLSE-NEXT:    mov v3.h[1], v2.h[0]
672; NOLSE-NEXT:    fmov w10, s3
673; NOLSE-NEXT:    stlxr w11, w10, [x0]
674; NOLSE-NEXT:    cbnz w11, .LBB7_1
675; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
676; NOLSE-NEXT:    fmov d0, x9
677; NOLSE-NEXT:    ret
678;
679; LSE-LABEL: test_atomicrmw_fmax_v2bf16_seq_cst_align4:
680; LSE:       // %bb.0:
681; LSE-NEXT:    // kill: def $d0 killed $d0 def $q0
682; LSE-NEXT:    dup v1.4h, v0.h[1]
683; LSE-NEXT:    shll v2.4s, v0.4h, #16
684; LSE-NEXT:    mov w8, #32767 // =0x7fff
685; LSE-NEXT:    ldr s0, [x0]
686; LSE-NEXT:    shll v1.4s, v1.4h, #16
687; LSE-NEXT:  .LBB7_1: // %atomicrmw.start
688; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
689; LSE-NEXT:    dup v3.4h, v0.h[1]
690; LSE-NEXT:    shll v4.4s, v0.4h, #16
691; LSE-NEXT:    fmaxnm s4, s4, s2
692; LSE-NEXT:    shll v3.4s, v3.4h, #16
693; LSE-NEXT:    fmaxnm s3, s3, s1
694; LSE-NEXT:    fmov w10, s4
695; LSE-NEXT:    ubfx w12, w10, #16, #1
696; LSE-NEXT:    add w10, w10, w8
697; LSE-NEXT:    fmov w9, s3
698; LSE-NEXT:    add w10, w12, w10
699; LSE-NEXT:    lsr w10, w10, #16
700; LSE-NEXT:    ubfx w11, w9, #16, #1
701; LSE-NEXT:    add w9, w9, w8
702; LSE-NEXT:    fmov s4, w10
703; LSE-NEXT:    add w9, w11, w9
704; LSE-NEXT:    lsr w9, w9, #16
705; LSE-NEXT:    fmov s3, w9
706; LSE-NEXT:    fmov w9, s0
707; LSE-NEXT:    mov v4.h[1], v3.h[0]
708; LSE-NEXT:    mov w11, w9
709; LSE-NEXT:    fmov w10, s4
710; LSE-NEXT:    casal w11, w10, [x0]
711; LSE-NEXT:    fmov s0, w11
712; LSE-NEXT:    cmp w11, w9
713; LSE-NEXT:    b.ne .LBB7_1
714; LSE-NEXT:  // %bb.2: // %atomicrmw.end
715; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
716; LSE-NEXT:    ret
717;
718; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_v2bf16_seq_cst_align4:
719; SOFTFP-NOLSE:       // %bb.0:
720; SOFTFP-NOLSE-NEXT:    str x30, [sp, #-64]! // 8-byte Folded Spill
721; SOFTFP-NOLSE-NEXT:    mov w8, w1
722; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
723; SOFTFP-NOLSE-NEXT:    ldrh w1, [x0, #2]
724; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
725; SOFTFP-NOLSE-NEXT:    ldrh w22, [x0]
726; SOFTFP-NOLSE-NEXT:    lsl w20, w2, #16
727; SOFTFP-NOLSE-NEXT:    lsl w21, w8, #16
728; SOFTFP-NOLSE-NEXT:    mov x19, x0
729; SOFTFP-NOLSE-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
730; SOFTFP-NOLSE-NEXT:    b .LBB7_2
731; SOFTFP-NOLSE-NEXT:  .LBB7_1: // %cmpxchg.nostore
732; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_2 Depth=1
733; SOFTFP-NOLSE-NEXT:    mov w8, wzr
734; SOFTFP-NOLSE-NEXT:    clrex
735; SOFTFP-NOLSE-NEXT:    lsr w1, w22, #16
736; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB7_6
737; SOFTFP-NOLSE-NEXT:  .LBB7_2: // %atomicrmw.start
738; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
739; SOFTFP-NOLSE-NEXT:    // Child Loop BB7_3 Depth 2
740; SOFTFP-NOLSE-NEXT:    lsl w23, w1, #16
741; SOFTFP-NOLSE-NEXT:    mov w1, w20
742; SOFTFP-NOLSE-NEXT:    mov w0, w23
743; SOFTFP-NOLSE-NEXT:    bl fmaxf
744; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
745; SOFTFP-NOLSE-NEXT:    mov w24, w0
746; SOFTFP-NOLSE-NEXT:    lsl w0, w22, #16
747; SOFTFP-NOLSE-NEXT:    mov w1, w21
748; SOFTFP-NOLSE-NEXT:    bl fmaxf
749; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
750; SOFTFP-NOLSE-NEXT:    bfxil w23, w22, #0, #16
751; SOFTFP-NOLSE-NEXT:    bfi w0, w24, #16, #16
752; SOFTFP-NOLSE-NEXT:  .LBB7_3: // %cmpxchg.start
753; SOFTFP-NOLSE-NEXT:    // Parent Loop BB7_2 Depth=1
754; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
755; SOFTFP-NOLSE-NEXT:    ldaxr w22, [x19]
756; SOFTFP-NOLSE-NEXT:    cmp w22, w23
757; SOFTFP-NOLSE-NEXT:    b.ne .LBB7_1
758; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
759; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_3 Depth=2
760; SOFTFP-NOLSE-NEXT:    stlxr w8, w0, [x19]
761; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB7_3
762; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB7_2 Depth=1
763; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
764; SOFTFP-NOLSE-NEXT:    lsr w1, w22, #16
765; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB7_2
766; SOFTFP-NOLSE-NEXT:  .LBB7_6: // %atomicrmw.end
767; SOFTFP-NOLSE-NEXT:    mov w0, w22
768; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
769; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
770; SOFTFP-NOLSE-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
771; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #64 // 8-byte Folded Reload
772; SOFTFP-NOLSE-NEXT:    ret
773  %res = atomicrmw fmax ptr %ptr, <2 x bfloat> %value seq_cst, align 4
774  ret <2 x bfloat> %res
775}
776
777define <2 x float> @test_atomicrmw_fmax_v2f32_seq_cst_align8(ptr %ptr, <2 x float> %value) #0 {
778; NOLSE-LABEL: test_atomicrmw_fmax_v2f32_seq_cst_align8:
779; NOLSE:       // %bb.0:
780; NOLSE-NEXT:  .LBB8_1: // %atomicrmw.start
781; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
782; NOLSE-NEXT:    ldaxr x8, [x0]
783; NOLSE-NEXT:    fmov d1, x8
784; NOLSE-NEXT:    fmaxnm v2.2s, v1.2s, v0.2s
785; NOLSE-NEXT:    fmov x8, d2
786; NOLSE-NEXT:    stlxr w9, x8, [x0]
787; NOLSE-NEXT:    cbnz w9, .LBB8_1
788; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
789; NOLSE-NEXT:    fmov d0, d1
790; NOLSE-NEXT:    ret
791;
792; LSE-LABEL: test_atomicrmw_fmax_v2f32_seq_cst_align8:
793; LSE:       // %bb.0:
794; LSE-NEXT:    ldr d1, [x0]
795; LSE-NEXT:  .LBB8_1: // %atomicrmw.start
796; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
797; LSE-NEXT:    fmaxnm v2.2s, v1.2s, v0.2s
798; LSE-NEXT:    fmov x8, d1
799; LSE-NEXT:    mov x10, x8
800; LSE-NEXT:    fmov x9, d2
801; LSE-NEXT:    casal x10, x9, [x0]
802; LSE-NEXT:    fmov d1, x10
803; LSE-NEXT:    cmp x10, x8
804; LSE-NEXT:    b.ne .LBB8_1
805; LSE-NEXT:  // %bb.2: // %atomicrmw.end
806; LSE-NEXT:    fmov d0, d1
807; LSE-NEXT:    ret
808;
809; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_v2f32_seq_cst_align8:
810; SOFTFP-NOLSE:       // %bb.0:
811; SOFTFP-NOLSE-NEXT:    str x30, [sp, #-64]! // 8-byte Folded Spill
812; SOFTFP-NOLSE-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
813; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
814; SOFTFP-NOLSE-NEXT:    mov w21, w1
815; SOFTFP-NOLSE-NEXT:    ldp w22, w23, [x0]
816; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
817; SOFTFP-NOLSE-NEXT:    mov w19, w2
818; SOFTFP-NOLSE-NEXT:    mov x20, x0
819; SOFTFP-NOLSE-NEXT:    b .LBB8_2
820; SOFTFP-NOLSE-NEXT:  .LBB8_1: // %cmpxchg.nostore
821; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_2 Depth=1
822; SOFTFP-NOLSE-NEXT:    mov w8, wzr
823; SOFTFP-NOLSE-NEXT:    clrex
824; SOFTFP-NOLSE-NEXT:    lsr x23, x22, #32
825; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB8_6
826; SOFTFP-NOLSE-NEXT:  .LBB8_2: // %atomicrmw.start
827; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
828; SOFTFP-NOLSE-NEXT:    // Child Loop BB8_3 Depth 2
829; SOFTFP-NOLSE-NEXT:    mov w0, w23
830; SOFTFP-NOLSE-NEXT:    mov w1, w19
831; SOFTFP-NOLSE-NEXT:    bl fmaxf
832; SOFTFP-NOLSE-NEXT:    mov w24, w0
833; SOFTFP-NOLSE-NEXT:    mov w0, w22
834; SOFTFP-NOLSE-NEXT:    mov w1, w21
835; SOFTFP-NOLSE-NEXT:    bl fmaxf
836; SOFTFP-NOLSE-NEXT:    mov w8, w0
837; SOFTFP-NOLSE-NEXT:    mov w9, w22
838; SOFTFP-NOLSE-NEXT:    // kill: def $w23 killed $w23 killed $x23 def $x23
839; SOFTFP-NOLSE-NEXT:    orr x8, x8, x24, lsl #32
840; SOFTFP-NOLSE-NEXT:    orr x9, x9, x23, lsl #32
841; SOFTFP-NOLSE-NEXT:  .LBB8_3: // %cmpxchg.start
842; SOFTFP-NOLSE-NEXT:    // Parent Loop BB8_2 Depth=1
843; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
844; SOFTFP-NOLSE-NEXT:    ldaxr x22, [x20]
845; SOFTFP-NOLSE-NEXT:    cmp x22, x9
846; SOFTFP-NOLSE-NEXT:    b.ne .LBB8_1
847; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
848; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_3 Depth=2
849; SOFTFP-NOLSE-NEXT:    stlxr w10, x8, [x20]
850; SOFTFP-NOLSE-NEXT:    cbnz w10, .LBB8_3
851; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB8_2 Depth=1
852; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
853; SOFTFP-NOLSE-NEXT:    lsr x23, x22, #32
854; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB8_2
855; SOFTFP-NOLSE-NEXT:  .LBB8_6: // %atomicrmw.end
856; SOFTFP-NOLSE-NEXT:    mov w0, w22
857; SOFTFP-NOLSE-NEXT:    mov w1, w23
858; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
859; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
860; SOFTFP-NOLSE-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
861; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #64 // 8-byte Folded Reload
862; SOFTFP-NOLSE-NEXT:    ret
863  %res = atomicrmw fmax ptr %ptr, <2 x float> %value seq_cst, align 8
864  ret <2 x float> %res
865}
866
867define <2 x double> @test_atomicrmw_fmax_v2f64_seq_cst_align8(ptr %ptr, <2 x double> %value) #0 {
868; NOLSE-LABEL: test_atomicrmw_fmax_v2f64_seq_cst_align8:
869; NOLSE:       // %bb.0:
870; NOLSE-NEXT:  .LBB9_1: // %atomicrmw.start
871; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
872; NOLSE-NEXT:    ldaxp x8, x9, [x0]
873; NOLSE-NEXT:    fmov d1, x8
874; NOLSE-NEXT:    mov v1.d[1], x9
875; NOLSE-NEXT:    fmaxnm v2.2d, v1.2d, v0.2d
876; NOLSE-NEXT:    mov x8, v2.d[1]
877; NOLSE-NEXT:    fmov x9, d2
878; NOLSE-NEXT:    stlxp w10, x9, x8, [x0]
879; NOLSE-NEXT:    cbnz w10, .LBB9_1
880; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
881; NOLSE-NEXT:    mov v0.16b, v1.16b
882; NOLSE-NEXT:    ret
883;
884; LSE-LABEL: test_atomicrmw_fmax_v2f64_seq_cst_align8:
885; LSE:       // %bb.0:
886; LSE-NEXT:    ldr q1, [x0]
887; LSE-NEXT:  .LBB9_1: // %atomicrmw.start
888; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
889; LSE-NEXT:    fmaxnm v2.2d, v1.2d, v0.2d
890; LSE-NEXT:    mov x3, v1.d[1]
891; LSE-NEXT:    fmov x2, d1
892; LSE-NEXT:    mov x7, x3
893; LSE-NEXT:    mov x5, v2.d[1]
894; LSE-NEXT:    mov x6, x2
895; LSE-NEXT:    fmov x4, d2
896; LSE-NEXT:    caspal x6, x7, x4, x5, [x0]
897; LSE-NEXT:    fmov d1, x6
898; LSE-NEXT:    cmp x7, x3
899; LSE-NEXT:    ccmp x6, x2, #0, eq
900; LSE-NEXT:    mov v1.d[1], x7
901; LSE-NEXT:    b.ne .LBB9_1
902; LSE-NEXT:  // %bb.2: // %atomicrmw.end
903; LSE-NEXT:    mov v0.16b, v1.16b
904; LSE-NEXT:    ret
905;
906; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_v2f64_seq_cst_align8:
907; SOFTFP-NOLSE:       // %bb.0:
908; SOFTFP-NOLSE-NEXT:    str x30, [sp, #-64]! // 8-byte Folded Spill
909; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
910; SOFTFP-NOLSE-NEXT:    mov x20, x0
911; SOFTFP-NOLSE-NEXT:    mov x19, x3
912; SOFTFP-NOLSE-NEXT:    ldp x0, x1, [x0]
913; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
914; SOFTFP-NOLSE-NEXT:    mov x21, x2
915; SOFTFP-NOLSE-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
916; SOFTFP-NOLSE-NEXT:    b .LBB9_2
917; SOFTFP-NOLSE-NEXT:  .LBB9_1: // %atomicrmw.start
918; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB9_2 Depth=1
919; SOFTFP-NOLSE-NEXT:    cmp x1, x22
920; SOFTFP-NOLSE-NEXT:    ccmp x0, x23, #0, eq
921; SOFTFP-NOLSE-NEXT:    b.eq .LBB9_6
922; SOFTFP-NOLSE-NEXT:  .LBB9_2: // %atomicrmw.start
923; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
924; SOFTFP-NOLSE-NEXT:    // Child Loop BB9_3 Depth 2
925; SOFTFP-NOLSE-NEXT:    mov x22, x1
926; SOFTFP-NOLSE-NEXT:    mov x23, x0
927; SOFTFP-NOLSE-NEXT:    mov x0, x1
928; SOFTFP-NOLSE-NEXT:    mov x1, x19
929; SOFTFP-NOLSE-NEXT:    bl fmax
930; SOFTFP-NOLSE-NEXT:    mov x24, x0
931; SOFTFP-NOLSE-NEXT:    mov x0, x23
932; SOFTFP-NOLSE-NEXT:    mov x1, x21
933; SOFTFP-NOLSE-NEXT:    bl fmax
934; SOFTFP-NOLSE-NEXT:    mov x8, x0
935; SOFTFP-NOLSE-NEXT:  .LBB9_3: // %atomicrmw.start
936; SOFTFP-NOLSE-NEXT:    // Parent Loop BB9_2 Depth=1
937; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
938; SOFTFP-NOLSE-NEXT:    ldaxp x0, x1, [x20]
939; SOFTFP-NOLSE-NEXT:    cmp x0, x23
940; SOFTFP-NOLSE-NEXT:    cset w9, ne
941; SOFTFP-NOLSE-NEXT:    cmp x1, x22
942; SOFTFP-NOLSE-NEXT:    cinc w9, w9, ne
943; SOFTFP-NOLSE-NEXT:    cbz w9, .LBB9_5
944; SOFTFP-NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
945; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB9_3 Depth=2
946; SOFTFP-NOLSE-NEXT:    stlxp w9, x0, x1, [x20]
947; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB9_3
948; SOFTFP-NOLSE-NEXT:    b .LBB9_1
949; SOFTFP-NOLSE-NEXT:  .LBB9_5: // %atomicrmw.start
950; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB9_3 Depth=2
951; SOFTFP-NOLSE-NEXT:    stlxp w9, x8, x24, [x20]
952; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB9_3
953; SOFTFP-NOLSE-NEXT:    b .LBB9_1
954; SOFTFP-NOLSE-NEXT:  .LBB9_6: // %atomicrmw.end
955; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
956; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
957; SOFTFP-NOLSE-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
958; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #64 // 8-byte Folded Reload
959; SOFTFP-NOLSE-NEXT:    ret
960  %res = atomicrmw fmax ptr %ptr, <2 x double> %value seq_cst, align 16
961  ret <2 x double> %res
962}
963
964attributes #0 = { nounwind }
965