xref: /llvm-project/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll (revision 1897bf61f0bc85c8637997d0f2aa7d94d375d787)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F
3; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D
4
5define float @float_fadd_acquire(ptr %p) nounwind {
6; LA64F-LABEL: float_fadd_acquire:
7; LA64F:       # %bb.0:
8; LA64F-NEXT:    fld.s $fa0, $a0, 0
9; LA64F-NEXT:    addi.w $a1, $zero, 1
10; LA64F-NEXT:    movgr2fr.w $fa1, $a1
11; LA64F-NEXT:    ffint.s.w $fa1, $fa1
12; LA64F-NEXT:    .p2align 4, , 16
13; LA64F-NEXT:  .LBB0_1: # %atomicrmw.start
14; LA64F-NEXT:    # =>This Loop Header: Depth=1
15; LA64F-NEXT:    # Child Loop BB0_3 Depth 2
16; LA64F-NEXT:    fadd.s $fa2, $fa0, $fa1
17; LA64F-NEXT:    movfr2gr.s $a1, $fa2
18; LA64F-NEXT:    movfr2gr.s $a2, $fa0
19; LA64F-NEXT:  .LBB0_3: # %atomicrmw.start
20; LA64F-NEXT:    # Parent Loop BB0_1 Depth=1
21; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
22; LA64F-NEXT:    ll.w $a3, $a0, 0
23; LA64F-NEXT:    bne $a3, $a2, .LBB0_5
24; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
25; LA64F-NEXT:    # in Loop: Header=BB0_3 Depth=2
26; LA64F-NEXT:    move $a4, $a1
27; LA64F-NEXT:    sc.w $a4, $a0, 0
28; LA64F-NEXT:    beqz $a4, .LBB0_3
29; LA64F-NEXT:    b .LBB0_6
30; LA64F-NEXT:  .LBB0_5: # %atomicrmw.start
31; LA64F-NEXT:    # in Loop: Header=BB0_1 Depth=1
32; LA64F-NEXT:    dbar 20
33; LA64F-NEXT:  .LBB0_6: # %atomicrmw.start
34; LA64F-NEXT:    # in Loop: Header=BB0_1 Depth=1
35; LA64F-NEXT:    movgr2fr.w $fa0, $a3
36; LA64F-NEXT:    bne $a3, $a2, .LBB0_1
37; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
38; LA64F-NEXT:    ret
39;
40; LA64D-LABEL: float_fadd_acquire:
41; LA64D:       # %bb.0:
42; LA64D-NEXT:    fld.s $fa0, $a0, 0
43; LA64D-NEXT:    vldi $vr1, -1168
44; LA64D-NEXT:    .p2align 4, , 16
45; LA64D-NEXT:  .LBB0_1: # %atomicrmw.start
46; LA64D-NEXT:    # =>This Loop Header: Depth=1
47; LA64D-NEXT:    # Child Loop BB0_3 Depth 2
48; LA64D-NEXT:    fadd.s $fa2, $fa0, $fa1
49; LA64D-NEXT:    movfr2gr.s $a1, $fa2
50; LA64D-NEXT:    movfr2gr.s $a2, $fa0
51; LA64D-NEXT:  .LBB0_3: # %atomicrmw.start
52; LA64D-NEXT:    # Parent Loop BB0_1 Depth=1
53; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
54; LA64D-NEXT:    ll.w $a3, $a0, 0
55; LA64D-NEXT:    bne $a3, $a2, .LBB0_5
56; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
57; LA64D-NEXT:    # in Loop: Header=BB0_3 Depth=2
58; LA64D-NEXT:    move $a4, $a1
59; LA64D-NEXT:    sc.w $a4, $a0, 0
60; LA64D-NEXT:    beqz $a4, .LBB0_3
61; LA64D-NEXT:    b .LBB0_6
62; LA64D-NEXT:  .LBB0_5: # %atomicrmw.start
63; LA64D-NEXT:    # in Loop: Header=BB0_1 Depth=1
64; LA64D-NEXT:    dbar 20
65; LA64D-NEXT:  .LBB0_6: # %atomicrmw.start
66; LA64D-NEXT:    # in Loop: Header=BB0_1 Depth=1
67; LA64D-NEXT:    movgr2fr.w $fa0, $a3
68; LA64D-NEXT:    bne $a3, $a2, .LBB0_1
69; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
70; LA64D-NEXT:    ret
71  %v = atomicrmw fadd ptr %p, float 1.0 acquire, align 4
72  ret float %v
73}
74
75define float @float_fsub_acquire(ptr %p) nounwind {
76; LA64F-LABEL: float_fsub_acquire:
77; LA64F:       # %bb.0:
78; LA64F-NEXT:    fld.s $fa0, $a0, 0
79; LA64F-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI1_0)
80; LA64F-NEXT:    fld.s $fa1, $a1, %pc_lo12(.LCPI1_0)
81; LA64F-NEXT:    .p2align 4, , 16
82; LA64F-NEXT:  .LBB1_1: # %atomicrmw.start
83; LA64F-NEXT:    # =>This Loop Header: Depth=1
84; LA64F-NEXT:    # Child Loop BB1_3 Depth 2
85; LA64F-NEXT:    fadd.s $fa2, $fa0, $fa1
86; LA64F-NEXT:    movfr2gr.s $a1, $fa2
87; LA64F-NEXT:    movfr2gr.s $a2, $fa0
88; LA64F-NEXT:  .LBB1_3: # %atomicrmw.start
89; LA64F-NEXT:    # Parent Loop BB1_1 Depth=1
90; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
91; LA64F-NEXT:    ll.w $a3, $a0, 0
92; LA64F-NEXT:    bne $a3, $a2, .LBB1_5
93; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
94; LA64F-NEXT:    # in Loop: Header=BB1_3 Depth=2
95; LA64F-NEXT:    move $a4, $a1
96; LA64F-NEXT:    sc.w $a4, $a0, 0
97; LA64F-NEXT:    beqz $a4, .LBB1_3
98; LA64F-NEXT:    b .LBB1_6
99; LA64F-NEXT:  .LBB1_5: # %atomicrmw.start
100; LA64F-NEXT:    # in Loop: Header=BB1_1 Depth=1
101; LA64F-NEXT:    dbar 20
102; LA64F-NEXT:  .LBB1_6: # %atomicrmw.start
103; LA64F-NEXT:    # in Loop: Header=BB1_1 Depth=1
104; LA64F-NEXT:    movgr2fr.w $fa0, $a3
105; LA64F-NEXT:    bne $a3, $a2, .LBB1_1
106; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
107; LA64F-NEXT:    ret
108;
109; LA64D-LABEL: float_fsub_acquire:
110; LA64D:       # %bb.0:
111; LA64D-NEXT:    fld.s $fa0, $a0, 0
112; LA64D-NEXT:    vldi $vr1, -1040
113; LA64D-NEXT:    .p2align 4, , 16
114; LA64D-NEXT:  .LBB1_1: # %atomicrmw.start
115; LA64D-NEXT:    # =>This Loop Header: Depth=1
116; LA64D-NEXT:    # Child Loop BB1_3 Depth 2
117; LA64D-NEXT:    fadd.s $fa2, $fa0, $fa1
118; LA64D-NEXT:    movfr2gr.s $a1, $fa2
119; LA64D-NEXT:    movfr2gr.s $a2, $fa0
120; LA64D-NEXT:  .LBB1_3: # %atomicrmw.start
121; LA64D-NEXT:    # Parent Loop BB1_1 Depth=1
122; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
123; LA64D-NEXT:    ll.w $a3, $a0, 0
124; LA64D-NEXT:    bne $a3, $a2, .LBB1_5
125; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
126; LA64D-NEXT:    # in Loop: Header=BB1_3 Depth=2
127; LA64D-NEXT:    move $a4, $a1
128; LA64D-NEXT:    sc.w $a4, $a0, 0
129; LA64D-NEXT:    beqz $a4, .LBB1_3
130; LA64D-NEXT:    b .LBB1_6
131; LA64D-NEXT:  .LBB1_5: # %atomicrmw.start
132; LA64D-NEXT:    # in Loop: Header=BB1_1 Depth=1
133; LA64D-NEXT:    dbar 20
134; LA64D-NEXT:  .LBB1_6: # %atomicrmw.start
135; LA64D-NEXT:    # in Loop: Header=BB1_1 Depth=1
136; LA64D-NEXT:    movgr2fr.w $fa0, $a3
137; LA64D-NEXT:    bne $a3, $a2, .LBB1_1
138; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
139; LA64D-NEXT:    ret
140  %v = atomicrmw fsub ptr %p, float 1.0 acquire, align 4
141  ret float %v
142}
143
144define float @float_fmin_acquire(ptr %p) nounwind {
145; LA64F-LABEL: float_fmin_acquire:
146; LA64F:       # %bb.0:
147; LA64F-NEXT:    fld.s $fa0, $a0, 0
148; LA64F-NEXT:    addi.w $a1, $zero, 1
149; LA64F-NEXT:    movgr2fr.w $fa1, $a1
150; LA64F-NEXT:    ffint.s.w $fa1, $fa1
151; LA64F-NEXT:    .p2align 4, , 16
152; LA64F-NEXT:  .LBB2_1: # %atomicrmw.start
153; LA64F-NEXT:    # =>This Loop Header: Depth=1
154; LA64F-NEXT:    # Child Loop BB2_3 Depth 2
155; LA64F-NEXT:    fmax.s $fa2, $fa0, $fa0
156; LA64F-NEXT:    fmin.s $fa2, $fa2, $fa1
157; LA64F-NEXT:    movfr2gr.s $a1, $fa2
158; LA64F-NEXT:    movfr2gr.s $a2, $fa0
159; LA64F-NEXT:  .LBB2_3: # %atomicrmw.start
160; LA64F-NEXT:    # Parent Loop BB2_1 Depth=1
161; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
162; LA64F-NEXT:    ll.w $a3, $a0, 0
163; LA64F-NEXT:    bne $a3, $a2, .LBB2_5
164; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
165; LA64F-NEXT:    # in Loop: Header=BB2_3 Depth=2
166; LA64F-NEXT:    move $a4, $a1
167; LA64F-NEXT:    sc.w $a4, $a0, 0
168; LA64F-NEXT:    beqz $a4, .LBB2_3
169; LA64F-NEXT:    b .LBB2_6
170; LA64F-NEXT:  .LBB2_5: # %atomicrmw.start
171; LA64F-NEXT:    # in Loop: Header=BB2_1 Depth=1
172; LA64F-NEXT:    dbar 20
173; LA64F-NEXT:  .LBB2_6: # %atomicrmw.start
174; LA64F-NEXT:    # in Loop: Header=BB2_1 Depth=1
175; LA64F-NEXT:    movgr2fr.w $fa0, $a3
176; LA64F-NEXT:    bne $a3, $a2, .LBB2_1
177; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
178; LA64F-NEXT:    ret
179;
180; LA64D-LABEL: float_fmin_acquire:
181; LA64D:       # %bb.0:
182; LA64D-NEXT:    fld.s $fa0, $a0, 0
183; LA64D-NEXT:    vldi $vr1, -1168
184; LA64D-NEXT:    .p2align 4, , 16
185; LA64D-NEXT:  .LBB2_1: # %atomicrmw.start
186; LA64D-NEXT:    # =>This Loop Header: Depth=1
187; LA64D-NEXT:    # Child Loop BB2_3 Depth 2
188; LA64D-NEXT:    fmax.s $fa2, $fa0, $fa0
189; LA64D-NEXT:    fmin.s $fa2, $fa2, $fa1
190; LA64D-NEXT:    movfr2gr.s $a1, $fa2
191; LA64D-NEXT:    movfr2gr.s $a2, $fa0
192; LA64D-NEXT:  .LBB2_3: # %atomicrmw.start
193; LA64D-NEXT:    # Parent Loop BB2_1 Depth=1
194; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
195; LA64D-NEXT:    ll.w $a3, $a0, 0
196; LA64D-NEXT:    bne $a3, $a2, .LBB2_5
197; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
198; LA64D-NEXT:    # in Loop: Header=BB2_3 Depth=2
199; LA64D-NEXT:    move $a4, $a1
200; LA64D-NEXT:    sc.w $a4, $a0, 0
201; LA64D-NEXT:    beqz $a4, .LBB2_3
202; LA64D-NEXT:    b .LBB2_6
203; LA64D-NEXT:  .LBB2_5: # %atomicrmw.start
204; LA64D-NEXT:    # in Loop: Header=BB2_1 Depth=1
205; LA64D-NEXT:    dbar 20
206; LA64D-NEXT:  .LBB2_6: # %atomicrmw.start
207; LA64D-NEXT:    # in Loop: Header=BB2_1 Depth=1
208; LA64D-NEXT:    movgr2fr.w $fa0, $a3
209; LA64D-NEXT:    bne $a3, $a2, .LBB2_1
210; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
211; LA64D-NEXT:    ret
212  %v = atomicrmw fmin ptr %p, float 1.0 acquire, align 4
213  ret float %v
214}
215
216define float @float_fmax_acquire(ptr %p) nounwind {
217; LA64F-LABEL: float_fmax_acquire:
218; LA64F:       # %bb.0:
219; LA64F-NEXT:    fld.s $fa0, $a0, 0
220; LA64F-NEXT:    addi.w $a1, $zero, 1
221; LA64F-NEXT:    movgr2fr.w $fa1, $a1
222; LA64F-NEXT:    ffint.s.w $fa1, $fa1
223; LA64F-NEXT:    .p2align 4, , 16
224; LA64F-NEXT:  .LBB3_1: # %atomicrmw.start
225; LA64F-NEXT:    # =>This Loop Header: Depth=1
226; LA64F-NEXT:    # Child Loop BB3_3 Depth 2
227; LA64F-NEXT:    fmax.s $fa2, $fa0, $fa0
228; LA64F-NEXT:    fmax.s $fa2, $fa2, $fa1
229; LA64F-NEXT:    movfr2gr.s $a1, $fa2
230; LA64F-NEXT:    movfr2gr.s $a2, $fa0
231; LA64F-NEXT:  .LBB3_3: # %atomicrmw.start
232; LA64F-NEXT:    # Parent Loop BB3_1 Depth=1
233; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
234; LA64F-NEXT:    ll.w $a3, $a0, 0
235; LA64F-NEXT:    bne $a3, $a2, .LBB3_5
236; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
237; LA64F-NEXT:    # in Loop: Header=BB3_3 Depth=2
238; LA64F-NEXT:    move $a4, $a1
239; LA64F-NEXT:    sc.w $a4, $a0, 0
240; LA64F-NEXT:    beqz $a4, .LBB3_3
241; LA64F-NEXT:    b .LBB3_6
242; LA64F-NEXT:  .LBB3_5: # %atomicrmw.start
243; LA64F-NEXT:    # in Loop: Header=BB3_1 Depth=1
244; LA64F-NEXT:    dbar 20
245; LA64F-NEXT:  .LBB3_6: # %atomicrmw.start
246; LA64F-NEXT:    # in Loop: Header=BB3_1 Depth=1
247; LA64F-NEXT:    movgr2fr.w $fa0, $a3
248; LA64F-NEXT:    bne $a3, $a2, .LBB3_1
249; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
250; LA64F-NEXT:    ret
251;
252; LA64D-LABEL: float_fmax_acquire:
253; LA64D:       # %bb.0:
254; LA64D-NEXT:    fld.s $fa0, $a0, 0
255; LA64D-NEXT:    vldi $vr1, -1168
256; LA64D-NEXT:    .p2align 4, , 16
257; LA64D-NEXT:  .LBB3_1: # %atomicrmw.start
258; LA64D-NEXT:    # =>This Loop Header: Depth=1
259; LA64D-NEXT:    # Child Loop BB3_3 Depth 2
260; LA64D-NEXT:    fmax.s $fa2, $fa0, $fa0
261; LA64D-NEXT:    fmax.s $fa2, $fa2, $fa1
262; LA64D-NEXT:    movfr2gr.s $a1, $fa2
263; LA64D-NEXT:    movfr2gr.s $a2, $fa0
264; LA64D-NEXT:  .LBB3_3: # %atomicrmw.start
265; LA64D-NEXT:    # Parent Loop BB3_1 Depth=1
266; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
267; LA64D-NEXT:    ll.w $a3, $a0, 0
268; LA64D-NEXT:    bne $a3, $a2, .LBB3_5
269; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
270; LA64D-NEXT:    # in Loop: Header=BB3_3 Depth=2
271; LA64D-NEXT:    move $a4, $a1
272; LA64D-NEXT:    sc.w $a4, $a0, 0
273; LA64D-NEXT:    beqz $a4, .LBB3_3
274; LA64D-NEXT:    b .LBB3_6
275; LA64D-NEXT:  .LBB3_5: # %atomicrmw.start
276; LA64D-NEXT:    # in Loop: Header=BB3_1 Depth=1
277; LA64D-NEXT:    dbar 20
278; LA64D-NEXT:  .LBB3_6: # %atomicrmw.start
279; LA64D-NEXT:    # in Loop: Header=BB3_1 Depth=1
280; LA64D-NEXT:    movgr2fr.w $fa0, $a3
281; LA64D-NEXT:    bne $a3, $a2, .LBB3_1
282; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
283; LA64D-NEXT:    ret
284  %v = atomicrmw fmax ptr %p, float 1.0 acquire, align 4
285  ret float %v
286}
287
288define double @double_fadd_acquire(ptr %p) nounwind {
289; LA64F-LABEL: double_fadd_acquire:
290; LA64F:       # %bb.0:
291; LA64F-NEXT:    addi.d $sp, $sp, -48
292; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
293; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
294; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
295; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
296; LA64F-NEXT:    move $fp, $a0
297; LA64F-NEXT:    ld.d $s1, $a0, 0
298; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
299; LA64F-NEXT:    .p2align 4, , 16
300; LA64F-NEXT:  .LBB4_1: # %atomicrmw.start
301; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
302; LA64F-NEXT:    move $a0, $s1
303; LA64F-NEXT:    move $a1, $s0
304; LA64F-NEXT:    bl %plt(__adddf3)
305; LA64F-NEXT:    st.d $s1, $sp, 8
306; LA64F-NEXT:    st.d $a0, $sp, 0
307; LA64F-NEXT:    ori $a0, $zero, 8
308; LA64F-NEXT:    addi.d $a2, $sp, 8
309; LA64F-NEXT:    addi.d $a3, $sp, 0
310; LA64F-NEXT:    ori $a4, $zero, 2
311; LA64F-NEXT:    ori $a5, $zero, 2
312; LA64F-NEXT:    move $a1, $fp
313; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
314; LA64F-NEXT:    ld.d $s1, $sp, 8
315; LA64F-NEXT:    beqz $a0, .LBB4_1
316; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
317; LA64F-NEXT:    move $a0, $s1
318; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
319; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
320; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
321; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
322; LA64F-NEXT:    addi.d $sp, $sp, 48
323; LA64F-NEXT:    ret
324;
325; LA64D-LABEL: double_fadd_acquire:
326; LA64D:       # %bb.0:
327; LA64D-NEXT:    addi.d $sp, $sp, -32
328; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
329; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
330; LA64D-NEXT:    move $fp, $a0
331; LA64D-NEXT:    fld.d $fa0, $a0, 0
332; LA64D-NEXT:    .p2align 4, , 16
333; LA64D-NEXT:  .LBB4_1: # %atomicrmw.start
334; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
335; LA64D-NEXT:    vldi $vr1, -912
336; LA64D-NEXT:    fadd.d $fa1, $fa0, $fa1
337; LA64D-NEXT:    fst.d $fa0, $sp, 8
338; LA64D-NEXT:    fst.d $fa1, $sp, 0
339; LA64D-NEXT:    ori $a0, $zero, 8
340; LA64D-NEXT:    addi.d $a2, $sp, 8
341; LA64D-NEXT:    addi.d $a3, $sp, 0
342; LA64D-NEXT:    ori $a4, $zero, 2
343; LA64D-NEXT:    ori $a5, $zero, 2
344; LA64D-NEXT:    move $a1, $fp
345; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
346; LA64D-NEXT:    fld.d $fa0, $sp, 8
347; LA64D-NEXT:    beqz $a0, .LBB4_1
348; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
349; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
350; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
351; LA64D-NEXT:    addi.d $sp, $sp, 32
352; LA64D-NEXT:    ret
353  %v = atomicrmw fadd ptr %p, double 1.0 acquire, align 4
354  ret double %v
355}
356
357define double @double_fsub_acquire(ptr %p) nounwind {
358; LA64F-LABEL: double_fsub_acquire:
359; LA64F:       # %bb.0:
360; LA64F-NEXT:    addi.d $sp, $sp, -48
361; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
362; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
363; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
364; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
365; LA64F-NEXT:    move $fp, $a0
366; LA64F-NEXT:    ld.d $s1, $a0, 0
367; LA64F-NEXT:    lu52i.d $s0, $zero, -1025
368; LA64F-NEXT:    .p2align 4, , 16
369; LA64F-NEXT:  .LBB5_1: # %atomicrmw.start
370; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
371; LA64F-NEXT:    move $a0, $s1
372; LA64F-NEXT:    move $a1, $s0
373; LA64F-NEXT:    bl %plt(__adddf3)
374; LA64F-NEXT:    st.d $s1, $sp, 8
375; LA64F-NEXT:    st.d $a0, $sp, 0
376; LA64F-NEXT:    ori $a0, $zero, 8
377; LA64F-NEXT:    addi.d $a2, $sp, 8
378; LA64F-NEXT:    addi.d $a3, $sp, 0
379; LA64F-NEXT:    ori $a4, $zero, 2
380; LA64F-NEXT:    ori $a5, $zero, 2
381; LA64F-NEXT:    move $a1, $fp
382; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
383; LA64F-NEXT:    ld.d $s1, $sp, 8
384; LA64F-NEXT:    beqz $a0, .LBB5_1
385; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
386; LA64F-NEXT:    move $a0, $s1
387; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
388; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
389; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
390; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
391; LA64F-NEXT:    addi.d $sp, $sp, 48
392; LA64F-NEXT:    ret
393;
394; LA64D-LABEL: double_fsub_acquire:
395; LA64D:       # %bb.0:
396; LA64D-NEXT:    addi.d $sp, $sp, -32
397; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
398; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
399; LA64D-NEXT:    move $fp, $a0
400; LA64D-NEXT:    fld.d $fa0, $a0, 0
401; LA64D-NEXT:    .p2align 4, , 16
402; LA64D-NEXT:  .LBB5_1: # %atomicrmw.start
403; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
404; LA64D-NEXT:    vldi $vr1, -784
405; LA64D-NEXT:    fadd.d $fa1, $fa0, $fa1
406; LA64D-NEXT:    fst.d $fa0, $sp, 8
407; LA64D-NEXT:    fst.d $fa1, $sp, 0
408; LA64D-NEXT:    ori $a0, $zero, 8
409; LA64D-NEXT:    addi.d $a2, $sp, 8
410; LA64D-NEXT:    addi.d $a3, $sp, 0
411; LA64D-NEXT:    ori $a4, $zero, 2
412; LA64D-NEXT:    ori $a5, $zero, 2
413; LA64D-NEXT:    move $a1, $fp
414; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
415; LA64D-NEXT:    fld.d $fa0, $sp, 8
416; LA64D-NEXT:    beqz $a0, .LBB5_1
417; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
418; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
419; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
420; LA64D-NEXT:    addi.d $sp, $sp, 32
421; LA64D-NEXT:    ret
422  %v = atomicrmw fsub ptr %p, double 1.0 acquire, align 4
423  ret double %v
424}
425
426define double @double_fmin_acquire(ptr %p) nounwind {
427; LA64F-LABEL: double_fmin_acquire:
428; LA64F:       # %bb.0:
429; LA64F-NEXT:    addi.d $sp, $sp, -48
430; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
431; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
432; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
433; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
434; LA64F-NEXT:    move $fp, $a0
435; LA64F-NEXT:    ld.d $s1, $a0, 0
436; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
437; LA64F-NEXT:    .p2align 4, , 16
438; LA64F-NEXT:  .LBB6_1: # %atomicrmw.start
439; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
440; LA64F-NEXT:    move $a0, $s1
441; LA64F-NEXT:    move $a1, $s0
442; LA64F-NEXT:    bl %plt(fmin)
443; LA64F-NEXT:    st.d $s1, $sp, 8
444; LA64F-NEXT:    st.d $a0, $sp, 0
445; LA64F-NEXT:    ori $a0, $zero, 8
446; LA64F-NEXT:    addi.d $a2, $sp, 8
447; LA64F-NEXT:    addi.d $a3, $sp, 0
448; LA64F-NEXT:    ori $a4, $zero, 2
449; LA64F-NEXT:    ori $a5, $zero, 2
450; LA64F-NEXT:    move $a1, $fp
451; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
452; LA64F-NEXT:    ld.d $s1, $sp, 8
453; LA64F-NEXT:    beqz $a0, .LBB6_1
454; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
455; LA64F-NEXT:    move $a0, $s1
456; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
457; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
458; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
459; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
460; LA64F-NEXT:    addi.d $sp, $sp, 48
461; LA64F-NEXT:    ret
462;
463; LA64D-LABEL: double_fmin_acquire:
464; LA64D:       # %bb.0:
465; LA64D-NEXT:    addi.d $sp, $sp, -32
466; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
467; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
468; LA64D-NEXT:    move $fp, $a0
469; LA64D-NEXT:    fld.d $fa0, $a0, 0
470; LA64D-NEXT:    .p2align 4, , 16
471; LA64D-NEXT:  .LBB6_1: # %atomicrmw.start
472; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
473; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
474; LA64D-NEXT:    vldi $vr2, -912
475; LA64D-NEXT:    fmin.d $fa1, $fa1, $fa2
476; LA64D-NEXT:    fst.d $fa0, $sp, 8
477; LA64D-NEXT:    fst.d $fa1, $sp, 0
478; LA64D-NEXT:    ori $a0, $zero, 8
479; LA64D-NEXT:    addi.d $a2, $sp, 8
480; LA64D-NEXT:    addi.d $a3, $sp, 0
481; LA64D-NEXT:    ori $a4, $zero, 2
482; LA64D-NEXT:    ori $a5, $zero, 2
483; LA64D-NEXT:    move $a1, $fp
484; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
485; LA64D-NEXT:    fld.d $fa0, $sp, 8
486; LA64D-NEXT:    beqz $a0, .LBB6_1
487; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
488; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
489; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
490; LA64D-NEXT:    addi.d $sp, $sp, 32
491; LA64D-NEXT:    ret
492  %v = atomicrmw fmin ptr %p, double 1.0 acquire, align 4
493  ret double %v
494}
495
496define double @double_fmax_acquire(ptr %p) nounwind {
497; LA64F-LABEL: double_fmax_acquire:
498; LA64F:       # %bb.0:
499; LA64F-NEXT:    addi.d $sp, $sp, -48
500; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
501; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
502; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
503; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
504; LA64F-NEXT:    move $fp, $a0
505; LA64F-NEXT:    ld.d $s1, $a0, 0
506; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
507; LA64F-NEXT:    .p2align 4, , 16
508; LA64F-NEXT:  .LBB7_1: # %atomicrmw.start
509; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
510; LA64F-NEXT:    move $a0, $s1
511; LA64F-NEXT:    move $a1, $s0
512; LA64F-NEXT:    bl %plt(fmax)
513; LA64F-NEXT:    st.d $s1, $sp, 8
514; LA64F-NEXT:    st.d $a0, $sp, 0
515; LA64F-NEXT:    ori $a0, $zero, 8
516; LA64F-NEXT:    addi.d $a2, $sp, 8
517; LA64F-NEXT:    addi.d $a3, $sp, 0
518; LA64F-NEXT:    ori $a4, $zero, 2
519; LA64F-NEXT:    ori $a5, $zero, 2
520; LA64F-NEXT:    move $a1, $fp
521; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
522; LA64F-NEXT:    ld.d $s1, $sp, 8
523; LA64F-NEXT:    beqz $a0, .LBB7_1
524; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
525; LA64F-NEXT:    move $a0, $s1
526; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
527; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
528; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
529; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
530; LA64F-NEXT:    addi.d $sp, $sp, 48
531; LA64F-NEXT:    ret
532;
533; LA64D-LABEL: double_fmax_acquire:
534; LA64D:       # %bb.0:
535; LA64D-NEXT:    addi.d $sp, $sp, -32
536; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
537; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
538; LA64D-NEXT:    move $fp, $a0
539; LA64D-NEXT:    fld.d $fa0, $a0, 0
540; LA64D-NEXT:    .p2align 4, , 16
541; LA64D-NEXT:  .LBB7_1: # %atomicrmw.start
542; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
543; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
544; LA64D-NEXT:    vldi $vr2, -912
545; LA64D-NEXT:    fmax.d $fa1, $fa1, $fa2
546; LA64D-NEXT:    fst.d $fa0, $sp, 8
547; LA64D-NEXT:    fst.d $fa1, $sp, 0
548; LA64D-NEXT:    ori $a0, $zero, 8
549; LA64D-NEXT:    addi.d $a2, $sp, 8
550; LA64D-NEXT:    addi.d $a3, $sp, 0
551; LA64D-NEXT:    ori $a4, $zero, 2
552; LA64D-NEXT:    ori $a5, $zero, 2
553; LA64D-NEXT:    move $a1, $fp
554; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
555; LA64D-NEXT:    fld.d $fa0, $sp, 8
556; LA64D-NEXT:    beqz $a0, .LBB7_1
557; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
558; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
559; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
560; LA64D-NEXT:    addi.d $sp, $sp, 32
561; LA64D-NEXT:    ret
562  %v = atomicrmw fmax ptr %p, double 1.0 acquire, align 4
563  ret double %v
564}
565
566define float @float_fadd_release(ptr %p) nounwind {
567; LA64F-LABEL: float_fadd_release:
568; LA64F:       # %bb.0:
569; LA64F-NEXT:    fld.s $fa0, $a0, 0
570; LA64F-NEXT:    addi.w $a1, $zero, 1
571; LA64F-NEXT:    movgr2fr.w $fa1, $a1
572; LA64F-NEXT:    ffint.s.w $fa1, $fa1
573; LA64F-NEXT:    .p2align 4, , 16
574; LA64F-NEXT:  .LBB8_1: # %atomicrmw.start
575; LA64F-NEXT:    # =>This Loop Header: Depth=1
576; LA64F-NEXT:    # Child Loop BB8_3 Depth 2
577; LA64F-NEXT:    fadd.s $fa2, $fa0, $fa1
578; LA64F-NEXT:    movfr2gr.s $a1, $fa2
579; LA64F-NEXT:    movfr2gr.s $a2, $fa0
580; LA64F-NEXT:  .LBB8_3: # %atomicrmw.start
581; LA64F-NEXT:    # Parent Loop BB8_1 Depth=1
582; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
583; LA64F-NEXT:    ll.w $a3, $a0, 0
584; LA64F-NEXT:    bne $a3, $a2, .LBB8_5
585; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
586; LA64F-NEXT:    # in Loop: Header=BB8_3 Depth=2
587; LA64F-NEXT:    move $a4, $a1
588; LA64F-NEXT:    sc.w $a4, $a0, 0
589; LA64F-NEXT:    beqz $a4, .LBB8_3
590; LA64F-NEXT:    b .LBB8_6
591; LA64F-NEXT:  .LBB8_5: # %atomicrmw.start
592; LA64F-NEXT:    # in Loop: Header=BB8_1 Depth=1
593; LA64F-NEXT:    dbar 1792
594; LA64F-NEXT:  .LBB8_6: # %atomicrmw.start
595; LA64F-NEXT:    # in Loop: Header=BB8_1 Depth=1
596; LA64F-NEXT:    movgr2fr.w $fa0, $a3
597; LA64F-NEXT:    bne $a3, $a2, .LBB8_1
598; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
599; LA64F-NEXT:    ret
600;
601; LA64D-LABEL: float_fadd_release:
602; LA64D:       # %bb.0:
603; LA64D-NEXT:    fld.s $fa0, $a0, 0
604; LA64D-NEXT:    vldi $vr1, -1168
605; LA64D-NEXT:    .p2align 4, , 16
606; LA64D-NEXT:  .LBB8_1: # %atomicrmw.start
607; LA64D-NEXT:    # =>This Loop Header: Depth=1
608; LA64D-NEXT:    # Child Loop BB8_3 Depth 2
609; LA64D-NEXT:    fadd.s $fa2, $fa0, $fa1
610; LA64D-NEXT:    movfr2gr.s $a1, $fa2
611; LA64D-NEXT:    movfr2gr.s $a2, $fa0
612; LA64D-NEXT:  .LBB8_3: # %atomicrmw.start
613; LA64D-NEXT:    # Parent Loop BB8_1 Depth=1
614; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
615; LA64D-NEXT:    ll.w $a3, $a0, 0
616; LA64D-NEXT:    bne $a3, $a2, .LBB8_5
617; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
618; LA64D-NEXT:    # in Loop: Header=BB8_3 Depth=2
619; LA64D-NEXT:    move $a4, $a1
620; LA64D-NEXT:    sc.w $a4, $a0, 0
621; LA64D-NEXT:    beqz $a4, .LBB8_3
622; LA64D-NEXT:    b .LBB8_6
623; LA64D-NEXT:  .LBB8_5: # %atomicrmw.start
624; LA64D-NEXT:    # in Loop: Header=BB8_1 Depth=1
625; LA64D-NEXT:    dbar 1792
626; LA64D-NEXT:  .LBB8_6: # %atomicrmw.start
627; LA64D-NEXT:    # in Loop: Header=BB8_1 Depth=1
628; LA64D-NEXT:    movgr2fr.w $fa0, $a3
629; LA64D-NEXT:    bne $a3, $a2, .LBB8_1
630; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
631; LA64D-NEXT:    ret
632  %v = atomicrmw fadd ptr %p, float 1.0 release, align 4
633  ret float %v
634}
635
636define float @float_fsub_release(ptr %p) nounwind {
637; LA64F-LABEL: float_fsub_release:
638; LA64F:       # %bb.0:
639; LA64F-NEXT:    fld.s $fa0, $a0, 0
640; LA64F-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI9_0)
641; LA64F-NEXT:    fld.s $fa1, $a1, %pc_lo12(.LCPI9_0)
642; LA64F-NEXT:    .p2align 4, , 16
643; LA64F-NEXT:  .LBB9_1: # %atomicrmw.start
644; LA64F-NEXT:    # =>This Loop Header: Depth=1
645; LA64F-NEXT:    # Child Loop BB9_3 Depth 2
646; LA64F-NEXT:    fadd.s $fa2, $fa0, $fa1
647; LA64F-NEXT:    movfr2gr.s $a1, $fa2
648; LA64F-NEXT:    movfr2gr.s $a2, $fa0
649; LA64F-NEXT:  .LBB9_3: # %atomicrmw.start
650; LA64F-NEXT:    # Parent Loop BB9_1 Depth=1
651; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
652; LA64F-NEXT:    ll.w $a3, $a0, 0
653; LA64F-NEXT:    bne $a3, $a2, .LBB9_5
654; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
655; LA64F-NEXT:    # in Loop: Header=BB9_3 Depth=2
656; LA64F-NEXT:    move $a4, $a1
657; LA64F-NEXT:    sc.w $a4, $a0, 0
658; LA64F-NEXT:    beqz $a4, .LBB9_3
659; LA64F-NEXT:    b .LBB9_6
660; LA64F-NEXT:  .LBB9_5: # %atomicrmw.start
661; LA64F-NEXT:    # in Loop: Header=BB9_1 Depth=1
662; LA64F-NEXT:    dbar 1792
663; LA64F-NEXT:  .LBB9_6: # %atomicrmw.start
664; LA64F-NEXT:    # in Loop: Header=BB9_1 Depth=1
665; LA64F-NEXT:    movgr2fr.w $fa0, $a3
666; LA64F-NEXT:    bne $a3, $a2, .LBB9_1
667; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
668; LA64F-NEXT:    ret
669;
670; LA64D-LABEL: float_fsub_release:
671; LA64D:       # %bb.0:
672; LA64D-NEXT:    fld.s $fa0, $a0, 0
673; LA64D-NEXT:    vldi $vr1, -1040
674; LA64D-NEXT:    .p2align 4, , 16
675; LA64D-NEXT:  .LBB9_1: # %atomicrmw.start
676; LA64D-NEXT:    # =>This Loop Header: Depth=1
677; LA64D-NEXT:    # Child Loop BB9_3 Depth 2
678; LA64D-NEXT:    fadd.s $fa2, $fa0, $fa1
679; LA64D-NEXT:    movfr2gr.s $a1, $fa2
680; LA64D-NEXT:    movfr2gr.s $a2, $fa0
681; LA64D-NEXT:  .LBB9_3: # %atomicrmw.start
682; LA64D-NEXT:    # Parent Loop BB9_1 Depth=1
683; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
684; LA64D-NEXT:    ll.w $a3, $a0, 0
685; LA64D-NEXT:    bne $a3, $a2, .LBB9_5
686; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
687; LA64D-NEXT:    # in Loop: Header=BB9_3 Depth=2
688; LA64D-NEXT:    move $a4, $a1
689; LA64D-NEXT:    sc.w $a4, $a0, 0
690; LA64D-NEXT:    beqz $a4, .LBB9_3
691; LA64D-NEXT:    b .LBB9_6
692; LA64D-NEXT:  .LBB9_5: # %atomicrmw.start
693; LA64D-NEXT:    # in Loop: Header=BB9_1 Depth=1
694; LA64D-NEXT:    dbar 1792
695; LA64D-NEXT:  .LBB9_6: # %atomicrmw.start
696; LA64D-NEXT:    # in Loop: Header=BB9_1 Depth=1
697; LA64D-NEXT:    movgr2fr.w $fa0, $a3
698; LA64D-NEXT:    bne $a3, $a2, .LBB9_1
699; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
700; LA64D-NEXT:    ret
701  %v = atomicrmw fsub ptr %p, float 1.0 release, align 4
702  ret float %v
703}
704
705define float @float_fmin_release(ptr %p) nounwind {
706; LA64F-LABEL: float_fmin_release:
707; LA64F:       # %bb.0:
708; LA64F-NEXT:    fld.s $fa0, $a0, 0
709; LA64F-NEXT:    addi.w $a1, $zero, 1
710; LA64F-NEXT:    movgr2fr.w $fa1, $a1
711; LA64F-NEXT:    ffint.s.w $fa1, $fa1
712; LA64F-NEXT:    .p2align 4, , 16
713; LA64F-NEXT:  .LBB10_1: # %atomicrmw.start
714; LA64F-NEXT:    # =>This Loop Header: Depth=1
715; LA64F-NEXT:    # Child Loop BB10_3 Depth 2
716; LA64F-NEXT:    fmax.s $fa2, $fa0, $fa0
717; LA64F-NEXT:    fmin.s $fa2, $fa2, $fa1
718; LA64F-NEXT:    movfr2gr.s $a1, $fa2
719; LA64F-NEXT:    movfr2gr.s $a2, $fa0
720; LA64F-NEXT:  .LBB10_3: # %atomicrmw.start
721; LA64F-NEXT:    # Parent Loop BB10_1 Depth=1
722; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
723; LA64F-NEXT:    ll.w $a3, $a0, 0
724; LA64F-NEXT:    bne $a3, $a2, .LBB10_5
725; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
726; LA64F-NEXT:    # in Loop: Header=BB10_3 Depth=2
727; LA64F-NEXT:    move $a4, $a1
728; LA64F-NEXT:    sc.w $a4, $a0, 0
729; LA64F-NEXT:    beqz $a4, .LBB10_3
730; LA64F-NEXT:    b .LBB10_6
731; LA64F-NEXT:  .LBB10_5: # %atomicrmw.start
732; LA64F-NEXT:    # in Loop: Header=BB10_1 Depth=1
733; LA64F-NEXT:    dbar 1792
734; LA64F-NEXT:  .LBB10_6: # %atomicrmw.start
735; LA64F-NEXT:    # in Loop: Header=BB10_1 Depth=1
736; LA64F-NEXT:    movgr2fr.w $fa0, $a3
737; LA64F-NEXT:    bne $a3, $a2, .LBB10_1
738; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
739; LA64F-NEXT:    ret
740;
741; LA64D-LABEL: float_fmin_release:
742; LA64D:       # %bb.0:
743; LA64D-NEXT:    fld.s $fa0, $a0, 0
744; LA64D-NEXT:    vldi $vr1, -1168
745; LA64D-NEXT:    .p2align 4, , 16
746; LA64D-NEXT:  .LBB10_1: # %atomicrmw.start
747; LA64D-NEXT:    # =>This Loop Header: Depth=1
748; LA64D-NEXT:    # Child Loop BB10_3 Depth 2
749; LA64D-NEXT:    fmax.s $fa2, $fa0, $fa0
750; LA64D-NEXT:    fmin.s $fa2, $fa2, $fa1
751; LA64D-NEXT:    movfr2gr.s $a1, $fa2
752; LA64D-NEXT:    movfr2gr.s $a2, $fa0
753; LA64D-NEXT:  .LBB10_3: # %atomicrmw.start
754; LA64D-NEXT:    # Parent Loop BB10_1 Depth=1
755; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
756; LA64D-NEXT:    ll.w $a3, $a0, 0
757; LA64D-NEXT:    bne $a3, $a2, .LBB10_5
758; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
759; LA64D-NEXT:    # in Loop: Header=BB10_3 Depth=2
760; LA64D-NEXT:    move $a4, $a1
761; LA64D-NEXT:    sc.w $a4, $a0, 0
762; LA64D-NEXT:    beqz $a4, .LBB10_3
763; LA64D-NEXT:    b .LBB10_6
764; LA64D-NEXT:  .LBB10_5: # %atomicrmw.start
765; LA64D-NEXT:    # in Loop: Header=BB10_1 Depth=1
766; LA64D-NEXT:    dbar 1792
767; LA64D-NEXT:  .LBB10_6: # %atomicrmw.start
768; LA64D-NEXT:    # in Loop: Header=BB10_1 Depth=1
769; LA64D-NEXT:    movgr2fr.w $fa0, $a3
770; LA64D-NEXT:    bne $a3, $a2, .LBB10_1
771; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
772; LA64D-NEXT:    ret
773  %v = atomicrmw fmin ptr %p, float 1.0 release, align 4
774  ret float %v
775}
776
777define float @float_fmax_release(ptr %p) nounwind {
778; LA64F-LABEL: float_fmax_release:
779; LA64F:       # %bb.0:
780; LA64F-NEXT:    fld.s $fa0, $a0, 0
781; LA64F-NEXT:    addi.w $a1, $zero, 1
782; LA64F-NEXT:    movgr2fr.w $fa1, $a1
783; LA64F-NEXT:    ffint.s.w $fa1, $fa1
784; LA64F-NEXT:    .p2align 4, , 16
785; LA64F-NEXT:  .LBB11_1: # %atomicrmw.start
786; LA64F-NEXT:    # =>This Loop Header: Depth=1
787; LA64F-NEXT:    # Child Loop BB11_3 Depth 2
788; LA64F-NEXT:    fmax.s $fa2, $fa0, $fa0
789; LA64F-NEXT:    fmax.s $fa2, $fa2, $fa1
790; LA64F-NEXT:    movfr2gr.s $a1, $fa2
791; LA64F-NEXT:    movfr2gr.s $a2, $fa0
792; LA64F-NEXT:  .LBB11_3: # %atomicrmw.start
793; LA64F-NEXT:    # Parent Loop BB11_1 Depth=1
794; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
795; LA64F-NEXT:    ll.w $a3, $a0, 0
796; LA64F-NEXT:    bne $a3, $a2, .LBB11_5
797; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
798; LA64F-NEXT:    # in Loop: Header=BB11_3 Depth=2
799; LA64F-NEXT:    move $a4, $a1
800; LA64F-NEXT:    sc.w $a4, $a0, 0
801; LA64F-NEXT:    beqz $a4, .LBB11_3
802; LA64F-NEXT:    b .LBB11_6
803; LA64F-NEXT:  .LBB11_5: # %atomicrmw.start
804; LA64F-NEXT:    # in Loop: Header=BB11_1 Depth=1
805; LA64F-NEXT:    dbar 1792
806; LA64F-NEXT:  .LBB11_6: # %atomicrmw.start
807; LA64F-NEXT:    # in Loop: Header=BB11_1 Depth=1
808; LA64F-NEXT:    movgr2fr.w $fa0, $a3
809; LA64F-NEXT:    bne $a3, $a2, .LBB11_1
810; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
811; LA64F-NEXT:    ret
812;
813; LA64D-LABEL: float_fmax_release:
814; LA64D:       # %bb.0:
815; LA64D-NEXT:    fld.s $fa0, $a0, 0
816; LA64D-NEXT:    vldi $vr1, -1168
817; LA64D-NEXT:    .p2align 4, , 16
818; LA64D-NEXT:  .LBB11_1: # %atomicrmw.start
819; LA64D-NEXT:    # =>This Loop Header: Depth=1
820; LA64D-NEXT:    # Child Loop BB11_3 Depth 2
821; LA64D-NEXT:    fmax.s $fa2, $fa0, $fa0
822; LA64D-NEXT:    fmax.s $fa2, $fa2, $fa1
823; LA64D-NEXT:    movfr2gr.s $a1, $fa2
824; LA64D-NEXT:    movfr2gr.s $a2, $fa0
825; LA64D-NEXT:  .LBB11_3: # %atomicrmw.start
826; LA64D-NEXT:    # Parent Loop BB11_1 Depth=1
827; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
828; LA64D-NEXT:    ll.w $a3, $a0, 0
829; LA64D-NEXT:    bne $a3, $a2, .LBB11_5
830; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
831; LA64D-NEXT:    # in Loop: Header=BB11_3 Depth=2
832; LA64D-NEXT:    move $a4, $a1
833; LA64D-NEXT:    sc.w $a4, $a0, 0
834; LA64D-NEXT:    beqz $a4, .LBB11_3
835; LA64D-NEXT:    b .LBB11_6
836; LA64D-NEXT:  .LBB11_5: # %atomicrmw.start
837; LA64D-NEXT:    # in Loop: Header=BB11_1 Depth=1
838; LA64D-NEXT:    dbar 1792
839; LA64D-NEXT:  .LBB11_6: # %atomicrmw.start
840; LA64D-NEXT:    # in Loop: Header=BB11_1 Depth=1
841; LA64D-NEXT:    movgr2fr.w $fa0, $a3
842; LA64D-NEXT:    bne $a3, $a2, .LBB11_1
843; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
844; LA64D-NEXT:    ret
845  %v = atomicrmw fmax ptr %p, float 1.0 release, align 4
846  ret float %v
847}
848
849define double @double_fadd_release(ptr %p) nounwind {
850; LA64F-LABEL: double_fadd_release:
851; LA64F:       # %bb.0:
852; LA64F-NEXT:    addi.d $sp, $sp, -48
853; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
854; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
855; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
856; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
857; LA64F-NEXT:    move $fp, $a0
858; LA64F-NEXT:    ld.d $s1, $a0, 0
859; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
860; LA64F-NEXT:    .p2align 4, , 16
861; LA64F-NEXT:  .LBB12_1: # %atomicrmw.start
862; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
863; LA64F-NEXT:    move $a0, $s1
864; LA64F-NEXT:    move $a1, $s0
865; LA64F-NEXT:    bl %plt(__adddf3)
866; LA64F-NEXT:    st.d $s1, $sp, 8
867; LA64F-NEXT:    st.d $a0, $sp, 0
868; LA64F-NEXT:    ori $a0, $zero, 8
869; LA64F-NEXT:    addi.d $a2, $sp, 8
870; LA64F-NEXT:    addi.d $a3, $sp, 0
871; LA64F-NEXT:    ori $a4, $zero, 3
872; LA64F-NEXT:    move $a1, $fp
873; LA64F-NEXT:    move $a5, $zero
874; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
875; LA64F-NEXT:    ld.d $s1, $sp, 8
876; LA64F-NEXT:    beqz $a0, .LBB12_1
877; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
878; LA64F-NEXT:    move $a0, $s1
879; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
880; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
881; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
882; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
883; LA64F-NEXT:    addi.d $sp, $sp, 48
884; LA64F-NEXT:    ret
885;
886; LA64D-LABEL: double_fadd_release:
887; LA64D:       # %bb.0:
888; LA64D-NEXT:    addi.d $sp, $sp, -32
889; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
890; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
891; LA64D-NEXT:    move $fp, $a0
892; LA64D-NEXT:    fld.d $fa0, $a0, 0
893; LA64D-NEXT:    .p2align 4, , 16
894; LA64D-NEXT:  .LBB12_1: # %atomicrmw.start
895; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
896; LA64D-NEXT:    vldi $vr1, -912
897; LA64D-NEXT:    fadd.d $fa1, $fa0, $fa1
898; LA64D-NEXT:    fst.d $fa0, $sp, 8
899; LA64D-NEXT:    fst.d $fa1, $sp, 0
900; LA64D-NEXT:    ori $a0, $zero, 8
901; LA64D-NEXT:    addi.d $a2, $sp, 8
902; LA64D-NEXT:    addi.d $a3, $sp, 0
903; LA64D-NEXT:    ori $a4, $zero, 3
904; LA64D-NEXT:    move $a1, $fp
905; LA64D-NEXT:    move $a5, $zero
906; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
907; LA64D-NEXT:    fld.d $fa0, $sp, 8
908; LA64D-NEXT:    beqz $a0, .LBB12_1
909; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
910; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
911; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
912; LA64D-NEXT:    addi.d $sp, $sp, 32
913; LA64D-NEXT:    ret
914  %v = atomicrmw fadd ptr %p, double 1.0 release, align 4
915  ret double %v
916}
917
918define double @double_fsub_release(ptr %p) nounwind {
919; LA64F-LABEL: double_fsub_release:
920; LA64F:       # %bb.0:
921; LA64F-NEXT:    addi.d $sp, $sp, -48
922; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
923; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
924; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
925; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
926; LA64F-NEXT:    move $fp, $a0
927; LA64F-NEXT:    ld.d $s1, $a0, 0
928; LA64F-NEXT:    lu52i.d $s0, $zero, -1025
929; LA64F-NEXT:    .p2align 4, , 16
930; LA64F-NEXT:  .LBB13_1: # %atomicrmw.start
931; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
932; LA64F-NEXT:    move $a0, $s1
933; LA64F-NEXT:    move $a1, $s0
934; LA64F-NEXT:    bl %plt(__adddf3)
935; LA64F-NEXT:    st.d $s1, $sp, 8
936; LA64F-NEXT:    st.d $a0, $sp, 0
937; LA64F-NEXT:    ori $a0, $zero, 8
938; LA64F-NEXT:    addi.d $a2, $sp, 8
939; LA64F-NEXT:    addi.d $a3, $sp, 0
940; LA64F-NEXT:    ori $a4, $zero, 3
941; LA64F-NEXT:    move $a1, $fp
942; LA64F-NEXT:    move $a5, $zero
943; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
944; LA64F-NEXT:    ld.d $s1, $sp, 8
945; LA64F-NEXT:    beqz $a0, .LBB13_1
946; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
947; LA64F-NEXT:    move $a0, $s1
948; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
949; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
950; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
951; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
952; LA64F-NEXT:    addi.d $sp, $sp, 48
953; LA64F-NEXT:    ret
954;
955; LA64D-LABEL: double_fsub_release:
956; LA64D:       # %bb.0:
957; LA64D-NEXT:    addi.d $sp, $sp, -32
958; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
959; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
960; LA64D-NEXT:    move $fp, $a0
961; LA64D-NEXT:    fld.d $fa0, $a0, 0
962; LA64D-NEXT:    .p2align 4, , 16
963; LA64D-NEXT:  .LBB13_1: # %atomicrmw.start
964; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
965; LA64D-NEXT:    vldi $vr1, -784
966; LA64D-NEXT:    fadd.d $fa1, $fa0, $fa1
967; LA64D-NEXT:    fst.d $fa0, $sp, 8
968; LA64D-NEXT:    fst.d $fa1, $sp, 0
969; LA64D-NEXT:    ori $a0, $zero, 8
970; LA64D-NEXT:    addi.d $a2, $sp, 8
971; LA64D-NEXT:    addi.d $a3, $sp, 0
972; LA64D-NEXT:    ori $a4, $zero, 3
973; LA64D-NEXT:    move $a1, $fp
974; LA64D-NEXT:    move $a5, $zero
975; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
976; LA64D-NEXT:    fld.d $fa0, $sp, 8
977; LA64D-NEXT:    beqz $a0, .LBB13_1
978; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
979; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
980; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
981; LA64D-NEXT:    addi.d $sp, $sp, 32
982; LA64D-NEXT:    ret
983  %v = atomicrmw fsub ptr %p, double 1.0 release, align 4
984  ret double %v
985}
986
987define double @double_fmin_release(ptr %p) nounwind {
988; LA64F-LABEL: double_fmin_release:
989; LA64F:       # %bb.0:
990; LA64F-NEXT:    addi.d $sp, $sp, -48
991; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
992; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
993; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
994; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
995; LA64F-NEXT:    move $fp, $a0
996; LA64F-NEXT:    ld.d $s1, $a0, 0
997; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
998; LA64F-NEXT:    .p2align 4, , 16
999; LA64F-NEXT:  .LBB14_1: # %atomicrmw.start
1000; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
1001; LA64F-NEXT:    move $a0, $s1
1002; LA64F-NEXT:    move $a1, $s0
1003; LA64F-NEXT:    bl %plt(fmin)
1004; LA64F-NEXT:    st.d $s1, $sp, 8
1005; LA64F-NEXT:    st.d $a0, $sp, 0
1006; LA64F-NEXT:    ori $a0, $zero, 8
1007; LA64F-NEXT:    addi.d $a2, $sp, 8
1008; LA64F-NEXT:    addi.d $a3, $sp, 0
1009; LA64F-NEXT:    ori $a4, $zero, 3
1010; LA64F-NEXT:    move $a1, $fp
1011; LA64F-NEXT:    move $a5, $zero
1012; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
1013; LA64F-NEXT:    ld.d $s1, $sp, 8
1014; LA64F-NEXT:    beqz $a0, .LBB14_1
1015; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
1016; LA64F-NEXT:    move $a0, $s1
1017; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
1018; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
1019; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
1020; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
1021; LA64F-NEXT:    addi.d $sp, $sp, 48
1022; LA64F-NEXT:    ret
1023;
1024; LA64D-LABEL: double_fmin_release:
1025; LA64D:       # %bb.0:
1026; LA64D-NEXT:    addi.d $sp, $sp, -32
1027; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
1028; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
1029; LA64D-NEXT:    move $fp, $a0
1030; LA64D-NEXT:    fld.d $fa0, $a0, 0
1031; LA64D-NEXT:    .p2align 4, , 16
1032; LA64D-NEXT:  .LBB14_1: # %atomicrmw.start
1033; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
1034; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
1035; LA64D-NEXT:    vldi $vr2, -912
1036; LA64D-NEXT:    fmin.d $fa1, $fa1, $fa2
1037; LA64D-NEXT:    fst.d $fa0, $sp, 8
1038; LA64D-NEXT:    fst.d $fa1, $sp, 0
1039; LA64D-NEXT:    ori $a0, $zero, 8
1040; LA64D-NEXT:    addi.d $a2, $sp, 8
1041; LA64D-NEXT:    addi.d $a3, $sp, 0
1042; LA64D-NEXT:    ori $a4, $zero, 3
1043; LA64D-NEXT:    move $a1, $fp
1044; LA64D-NEXT:    move $a5, $zero
1045; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
1046; LA64D-NEXT:    fld.d $fa0, $sp, 8
1047; LA64D-NEXT:    beqz $a0, .LBB14_1
1048; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
1049; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
1050; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
1051; LA64D-NEXT:    addi.d $sp, $sp, 32
1052; LA64D-NEXT:    ret
1053  %v = atomicrmw fmin ptr %p, double 1.0 release, align 4
1054  ret double %v
1055}
1056
1057define double @double_fmax_release(ptr %p) nounwind {
1058; LA64F-LABEL: double_fmax_release:
1059; LA64F:       # %bb.0:
1060; LA64F-NEXT:    addi.d $sp, $sp, -48
1061; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
1062; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
1063; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
1064; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
1065; LA64F-NEXT:    move $fp, $a0
1066; LA64F-NEXT:    ld.d $s1, $a0, 0
1067; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
1068; LA64F-NEXT:    .p2align 4, , 16
1069; LA64F-NEXT:  .LBB15_1: # %atomicrmw.start
1070; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
1071; LA64F-NEXT:    move $a0, $s1
1072; LA64F-NEXT:    move $a1, $s0
1073; LA64F-NEXT:    bl %plt(fmax)
1074; LA64F-NEXT:    st.d $s1, $sp, 8
1075; LA64F-NEXT:    st.d $a0, $sp, 0
1076; LA64F-NEXT:    ori $a0, $zero, 8
1077; LA64F-NEXT:    addi.d $a2, $sp, 8
1078; LA64F-NEXT:    addi.d $a3, $sp, 0
1079; LA64F-NEXT:    ori $a4, $zero, 3
1080; LA64F-NEXT:    move $a1, $fp
1081; LA64F-NEXT:    move $a5, $zero
1082; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
1083; LA64F-NEXT:    ld.d $s1, $sp, 8
1084; LA64F-NEXT:    beqz $a0, .LBB15_1
1085; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
1086; LA64F-NEXT:    move $a0, $s1
1087; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
1088; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
1089; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
1090; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
1091; LA64F-NEXT:    addi.d $sp, $sp, 48
1092; LA64F-NEXT:    ret
1093;
1094; LA64D-LABEL: double_fmax_release:
1095; LA64D:       # %bb.0:
1096; LA64D-NEXT:    addi.d $sp, $sp, -32
1097; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
1098; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
1099; LA64D-NEXT:    move $fp, $a0
1100; LA64D-NEXT:    fld.d $fa0, $a0, 0
1101; LA64D-NEXT:    .p2align 4, , 16
1102; LA64D-NEXT:  .LBB15_1: # %atomicrmw.start
1103; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
1104; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
1105; LA64D-NEXT:    vldi $vr2, -912
1106; LA64D-NEXT:    fmax.d $fa1, $fa1, $fa2
1107; LA64D-NEXT:    fst.d $fa0, $sp, 8
1108; LA64D-NEXT:    fst.d $fa1, $sp, 0
1109; LA64D-NEXT:    ori $a0, $zero, 8
1110; LA64D-NEXT:    addi.d $a2, $sp, 8
1111; LA64D-NEXT:    addi.d $a3, $sp, 0
1112; LA64D-NEXT:    ori $a4, $zero, 3
1113; LA64D-NEXT:    move $a1, $fp
1114; LA64D-NEXT:    move $a5, $zero
1115; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
1116; LA64D-NEXT:    fld.d $fa0, $sp, 8
1117; LA64D-NEXT:    beqz $a0, .LBB15_1
1118; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
1119; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
1120; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
1121; LA64D-NEXT:    addi.d $sp, $sp, 32
1122; LA64D-NEXT:    ret
1123  %v = atomicrmw fmax ptr %p, double 1.0 release, align 4
1124  ret double %v
1125}
1126
1127define float @float_fadd_acq_rel(ptr %p) nounwind {
1128; LA64F-LABEL: float_fadd_acq_rel:
1129; LA64F:       # %bb.0:
1130; LA64F-NEXT:    fld.s $fa0, $a0, 0
1131; LA64F-NEXT:    addi.w $a1, $zero, 1
1132; LA64F-NEXT:    movgr2fr.w $fa1, $a1
1133; LA64F-NEXT:    ffint.s.w $fa1, $fa1
1134; LA64F-NEXT:    .p2align 4, , 16
1135; LA64F-NEXT:  .LBB16_1: # %atomicrmw.start
1136; LA64F-NEXT:    # =>This Loop Header: Depth=1
1137; LA64F-NEXT:    # Child Loop BB16_3 Depth 2
1138; LA64F-NEXT:    fadd.s $fa2, $fa0, $fa1
1139; LA64F-NEXT:    movfr2gr.s $a1, $fa2
1140; LA64F-NEXT:    movfr2gr.s $a2, $fa0
1141; LA64F-NEXT:  .LBB16_3: # %atomicrmw.start
1142; LA64F-NEXT:    # Parent Loop BB16_1 Depth=1
1143; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
1144; LA64F-NEXT:    ll.w $a3, $a0, 0
1145; LA64F-NEXT:    bne $a3, $a2, .LBB16_5
1146; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
1147; LA64F-NEXT:    # in Loop: Header=BB16_3 Depth=2
1148; LA64F-NEXT:    move $a4, $a1
1149; LA64F-NEXT:    sc.w $a4, $a0, 0
1150; LA64F-NEXT:    beqz $a4, .LBB16_3
1151; LA64F-NEXT:    b .LBB16_6
1152; LA64F-NEXT:  .LBB16_5: # %atomicrmw.start
1153; LA64F-NEXT:    # in Loop: Header=BB16_1 Depth=1
1154; LA64F-NEXT:    dbar 20
1155; LA64F-NEXT:  .LBB16_6: # %atomicrmw.start
1156; LA64F-NEXT:    # in Loop: Header=BB16_1 Depth=1
1157; LA64F-NEXT:    movgr2fr.w $fa0, $a3
1158; LA64F-NEXT:    bne $a3, $a2, .LBB16_1
1159; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
1160; LA64F-NEXT:    ret
1161;
1162; LA64D-LABEL: float_fadd_acq_rel:
1163; LA64D:       # %bb.0:
1164; LA64D-NEXT:    fld.s $fa0, $a0, 0
1165; LA64D-NEXT:    vldi $vr1, -1168
1166; LA64D-NEXT:    .p2align 4, , 16
1167; LA64D-NEXT:  .LBB16_1: # %atomicrmw.start
1168; LA64D-NEXT:    # =>This Loop Header: Depth=1
1169; LA64D-NEXT:    # Child Loop BB16_3 Depth 2
1170; LA64D-NEXT:    fadd.s $fa2, $fa0, $fa1
1171; LA64D-NEXT:    movfr2gr.s $a1, $fa2
1172; LA64D-NEXT:    movfr2gr.s $a2, $fa0
1173; LA64D-NEXT:  .LBB16_3: # %atomicrmw.start
1174; LA64D-NEXT:    # Parent Loop BB16_1 Depth=1
1175; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
1176; LA64D-NEXT:    ll.w $a3, $a0, 0
1177; LA64D-NEXT:    bne $a3, $a2, .LBB16_5
1178; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
1179; LA64D-NEXT:    # in Loop: Header=BB16_3 Depth=2
1180; LA64D-NEXT:    move $a4, $a1
1181; LA64D-NEXT:    sc.w $a4, $a0, 0
1182; LA64D-NEXT:    beqz $a4, .LBB16_3
1183; LA64D-NEXT:    b .LBB16_6
1184; LA64D-NEXT:  .LBB16_5: # %atomicrmw.start
1185; LA64D-NEXT:    # in Loop: Header=BB16_1 Depth=1
1186; LA64D-NEXT:    dbar 20
1187; LA64D-NEXT:  .LBB16_6: # %atomicrmw.start
1188; LA64D-NEXT:    # in Loop: Header=BB16_1 Depth=1
1189; LA64D-NEXT:    movgr2fr.w $fa0, $a3
1190; LA64D-NEXT:    bne $a3, $a2, .LBB16_1
1191; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
1192; LA64D-NEXT:    ret
1193  %v = atomicrmw fadd ptr %p, float 1.0 acq_rel, align 4
1194  ret float %v
1195}
1196
1197define float @float_fsub_acq_rel(ptr %p) nounwind {
1198; LA64F-LABEL: float_fsub_acq_rel:
1199; LA64F:       # %bb.0:
1200; LA64F-NEXT:    fld.s $fa0, $a0, 0
1201; LA64F-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI17_0)
1202; LA64F-NEXT:    fld.s $fa1, $a1, %pc_lo12(.LCPI17_0)
1203; LA64F-NEXT:    .p2align 4, , 16
1204; LA64F-NEXT:  .LBB17_1: # %atomicrmw.start
1205; LA64F-NEXT:    # =>This Loop Header: Depth=1
1206; LA64F-NEXT:    # Child Loop BB17_3 Depth 2
1207; LA64F-NEXT:    fadd.s $fa2, $fa0, $fa1
1208; LA64F-NEXT:    movfr2gr.s $a1, $fa2
1209; LA64F-NEXT:    movfr2gr.s $a2, $fa0
1210; LA64F-NEXT:  .LBB17_3: # %atomicrmw.start
1211; LA64F-NEXT:    # Parent Loop BB17_1 Depth=1
1212; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
1213; LA64F-NEXT:    ll.w $a3, $a0, 0
1214; LA64F-NEXT:    bne $a3, $a2, .LBB17_5
1215; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
1216; LA64F-NEXT:    # in Loop: Header=BB17_3 Depth=2
1217; LA64F-NEXT:    move $a4, $a1
1218; LA64F-NEXT:    sc.w $a4, $a0, 0
1219; LA64F-NEXT:    beqz $a4, .LBB17_3
1220; LA64F-NEXT:    b .LBB17_6
1221; LA64F-NEXT:  .LBB17_5: # %atomicrmw.start
1222; LA64F-NEXT:    # in Loop: Header=BB17_1 Depth=1
1223; LA64F-NEXT:    dbar 20
1224; LA64F-NEXT:  .LBB17_6: # %atomicrmw.start
1225; LA64F-NEXT:    # in Loop: Header=BB17_1 Depth=1
1226; LA64F-NEXT:    movgr2fr.w $fa0, $a3
1227; LA64F-NEXT:    bne $a3, $a2, .LBB17_1
1228; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
1229; LA64F-NEXT:    ret
1230;
1231; LA64D-LABEL: float_fsub_acq_rel:
1232; LA64D:       # %bb.0:
1233; LA64D-NEXT:    fld.s $fa0, $a0, 0
1234; LA64D-NEXT:    vldi $vr1, -1040
1235; LA64D-NEXT:    .p2align 4, , 16
1236; LA64D-NEXT:  .LBB17_1: # %atomicrmw.start
1237; LA64D-NEXT:    # =>This Loop Header: Depth=1
1238; LA64D-NEXT:    # Child Loop BB17_3 Depth 2
1239; LA64D-NEXT:    fadd.s $fa2, $fa0, $fa1
1240; LA64D-NEXT:    movfr2gr.s $a1, $fa2
1241; LA64D-NEXT:    movfr2gr.s $a2, $fa0
1242; LA64D-NEXT:  .LBB17_3: # %atomicrmw.start
1243; LA64D-NEXT:    # Parent Loop BB17_1 Depth=1
1244; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
1245; LA64D-NEXT:    ll.w $a3, $a0, 0
1246; LA64D-NEXT:    bne $a3, $a2, .LBB17_5
1247; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
1248; LA64D-NEXT:    # in Loop: Header=BB17_3 Depth=2
1249; LA64D-NEXT:    move $a4, $a1
1250; LA64D-NEXT:    sc.w $a4, $a0, 0
1251; LA64D-NEXT:    beqz $a4, .LBB17_3
1252; LA64D-NEXT:    b .LBB17_6
1253; LA64D-NEXT:  .LBB17_5: # %atomicrmw.start
1254; LA64D-NEXT:    # in Loop: Header=BB17_1 Depth=1
1255; LA64D-NEXT:    dbar 20
1256; LA64D-NEXT:  .LBB17_6: # %atomicrmw.start
1257; LA64D-NEXT:    # in Loop: Header=BB17_1 Depth=1
1258; LA64D-NEXT:    movgr2fr.w $fa0, $a3
1259; LA64D-NEXT:    bne $a3, $a2, .LBB17_1
1260; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
1261; LA64D-NEXT:    ret
1262  %v = atomicrmw fsub ptr %p, float 1.0 acq_rel, align 4
1263  ret float %v
1264}
1265
1266define float @float_fmin_acq_rel(ptr %p) nounwind {
1267; LA64F-LABEL: float_fmin_acq_rel:
1268; LA64F:       # %bb.0:
1269; LA64F-NEXT:    fld.s $fa0, $a0, 0
1270; LA64F-NEXT:    addi.w $a1, $zero, 1
1271; LA64F-NEXT:    movgr2fr.w $fa1, $a1
1272; LA64F-NEXT:    ffint.s.w $fa1, $fa1
1273; LA64F-NEXT:    .p2align 4, , 16
1274; LA64F-NEXT:  .LBB18_1: # %atomicrmw.start
1275; LA64F-NEXT:    # =>This Loop Header: Depth=1
1276; LA64F-NEXT:    # Child Loop BB18_3 Depth 2
1277; LA64F-NEXT:    fmax.s $fa2, $fa0, $fa0
1278; LA64F-NEXT:    fmin.s $fa2, $fa2, $fa1
1279; LA64F-NEXT:    movfr2gr.s $a1, $fa2
1280; LA64F-NEXT:    movfr2gr.s $a2, $fa0
1281; LA64F-NEXT:  .LBB18_3: # %atomicrmw.start
1282; LA64F-NEXT:    # Parent Loop BB18_1 Depth=1
1283; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
1284; LA64F-NEXT:    ll.w $a3, $a0, 0
1285; LA64F-NEXT:    bne $a3, $a2, .LBB18_5
1286; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
1287; LA64F-NEXT:    # in Loop: Header=BB18_3 Depth=2
1288; LA64F-NEXT:    move $a4, $a1
1289; LA64F-NEXT:    sc.w $a4, $a0, 0
1290; LA64F-NEXT:    beqz $a4, .LBB18_3
1291; LA64F-NEXT:    b .LBB18_6
1292; LA64F-NEXT:  .LBB18_5: # %atomicrmw.start
1293; LA64F-NEXT:    # in Loop: Header=BB18_1 Depth=1
1294; LA64F-NEXT:    dbar 20
1295; LA64F-NEXT:  .LBB18_6: # %atomicrmw.start
1296; LA64F-NEXT:    # in Loop: Header=BB18_1 Depth=1
1297; LA64F-NEXT:    movgr2fr.w $fa0, $a3
1298; LA64F-NEXT:    bne $a3, $a2, .LBB18_1
1299; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
1300; LA64F-NEXT:    ret
1301;
1302; LA64D-LABEL: float_fmin_acq_rel:
1303; LA64D:       # %bb.0:
1304; LA64D-NEXT:    fld.s $fa0, $a0, 0
1305; LA64D-NEXT:    vldi $vr1, -1168
1306; LA64D-NEXT:    .p2align 4, , 16
1307; LA64D-NEXT:  .LBB18_1: # %atomicrmw.start
1308; LA64D-NEXT:    # =>This Loop Header: Depth=1
1309; LA64D-NEXT:    # Child Loop BB18_3 Depth 2
1310; LA64D-NEXT:    fmax.s $fa2, $fa0, $fa0
1311; LA64D-NEXT:    fmin.s $fa2, $fa2, $fa1
1312; LA64D-NEXT:    movfr2gr.s $a1, $fa2
1313; LA64D-NEXT:    movfr2gr.s $a2, $fa0
1314; LA64D-NEXT:  .LBB18_3: # %atomicrmw.start
1315; LA64D-NEXT:    # Parent Loop BB18_1 Depth=1
1316; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
1317; LA64D-NEXT:    ll.w $a3, $a0, 0
1318; LA64D-NEXT:    bne $a3, $a2, .LBB18_5
1319; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
1320; LA64D-NEXT:    # in Loop: Header=BB18_3 Depth=2
1321; LA64D-NEXT:    move $a4, $a1
1322; LA64D-NEXT:    sc.w $a4, $a0, 0
1323; LA64D-NEXT:    beqz $a4, .LBB18_3
1324; LA64D-NEXT:    b .LBB18_6
1325; LA64D-NEXT:  .LBB18_5: # %atomicrmw.start
1326; LA64D-NEXT:    # in Loop: Header=BB18_1 Depth=1
1327; LA64D-NEXT:    dbar 20
1328; LA64D-NEXT:  .LBB18_6: # %atomicrmw.start
1329; LA64D-NEXT:    # in Loop: Header=BB18_1 Depth=1
1330; LA64D-NEXT:    movgr2fr.w $fa0, $a3
1331; LA64D-NEXT:    bne $a3, $a2, .LBB18_1
1332; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
1333; LA64D-NEXT:    ret
1334  %v = atomicrmw fmin ptr %p, float 1.0 acq_rel, align 4
1335  ret float %v
1336}
1337
1338define float @float_fmax_acq_rel(ptr %p) nounwind {
1339; LA64F-LABEL: float_fmax_acq_rel:
1340; LA64F:       # %bb.0:
1341; LA64F-NEXT:    fld.s $fa0, $a0, 0
1342; LA64F-NEXT:    addi.w $a1, $zero, 1
1343; LA64F-NEXT:    movgr2fr.w $fa1, $a1
1344; LA64F-NEXT:    ffint.s.w $fa1, $fa1
1345; LA64F-NEXT:    .p2align 4, , 16
1346; LA64F-NEXT:  .LBB19_1: # %atomicrmw.start
1347; LA64F-NEXT:    # =>This Loop Header: Depth=1
1348; LA64F-NEXT:    # Child Loop BB19_3 Depth 2
1349; LA64F-NEXT:    fmax.s $fa2, $fa0, $fa0
1350; LA64F-NEXT:    fmax.s $fa2, $fa2, $fa1
1351; LA64F-NEXT:    movfr2gr.s $a1, $fa2
1352; LA64F-NEXT:    movfr2gr.s $a2, $fa0
1353; LA64F-NEXT:  .LBB19_3: # %atomicrmw.start
1354; LA64F-NEXT:    # Parent Loop BB19_1 Depth=1
1355; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
1356; LA64F-NEXT:    ll.w $a3, $a0, 0
1357; LA64F-NEXT:    bne $a3, $a2, .LBB19_5
1358; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
1359; LA64F-NEXT:    # in Loop: Header=BB19_3 Depth=2
1360; LA64F-NEXT:    move $a4, $a1
1361; LA64F-NEXT:    sc.w $a4, $a0, 0
1362; LA64F-NEXT:    beqz $a4, .LBB19_3
1363; LA64F-NEXT:    b .LBB19_6
1364; LA64F-NEXT:  .LBB19_5: # %atomicrmw.start
1365; LA64F-NEXT:    # in Loop: Header=BB19_1 Depth=1
1366; LA64F-NEXT:    dbar 20
1367; LA64F-NEXT:  .LBB19_6: # %atomicrmw.start
1368; LA64F-NEXT:    # in Loop: Header=BB19_1 Depth=1
1369; LA64F-NEXT:    movgr2fr.w $fa0, $a3
1370; LA64F-NEXT:    bne $a3, $a2, .LBB19_1
1371; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
1372; LA64F-NEXT:    ret
1373;
1374; LA64D-LABEL: float_fmax_acq_rel:
1375; LA64D:       # %bb.0:
1376; LA64D-NEXT:    fld.s $fa0, $a0, 0
1377; LA64D-NEXT:    vldi $vr1, -1168
1378; LA64D-NEXT:    .p2align 4, , 16
1379; LA64D-NEXT:  .LBB19_1: # %atomicrmw.start
1380; LA64D-NEXT:    # =>This Loop Header: Depth=1
1381; LA64D-NEXT:    # Child Loop BB19_3 Depth 2
1382; LA64D-NEXT:    fmax.s $fa2, $fa0, $fa0
1383; LA64D-NEXT:    fmax.s $fa2, $fa2, $fa1
1384; LA64D-NEXT:    movfr2gr.s $a1, $fa2
1385; LA64D-NEXT:    movfr2gr.s $a2, $fa0
1386; LA64D-NEXT:  .LBB19_3: # %atomicrmw.start
1387; LA64D-NEXT:    # Parent Loop BB19_1 Depth=1
1388; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
1389; LA64D-NEXT:    ll.w $a3, $a0, 0
1390; LA64D-NEXT:    bne $a3, $a2, .LBB19_5
1391; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
1392; LA64D-NEXT:    # in Loop: Header=BB19_3 Depth=2
1393; LA64D-NEXT:    move $a4, $a1
1394; LA64D-NEXT:    sc.w $a4, $a0, 0
1395; LA64D-NEXT:    beqz $a4, .LBB19_3
1396; LA64D-NEXT:    b .LBB19_6
1397; LA64D-NEXT:  .LBB19_5: # %atomicrmw.start
1398; LA64D-NEXT:    # in Loop: Header=BB19_1 Depth=1
1399; LA64D-NEXT:    dbar 20
1400; LA64D-NEXT:  .LBB19_6: # %atomicrmw.start
1401; LA64D-NEXT:    # in Loop: Header=BB19_1 Depth=1
1402; LA64D-NEXT:    movgr2fr.w $fa0, $a3
1403; LA64D-NEXT:    bne $a3, $a2, .LBB19_1
1404; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
1405; LA64D-NEXT:    ret
1406  %v = atomicrmw fmax ptr %p, float 1.0 acq_rel, align 4
1407  ret float %v
1408}
1409
1410define double @double_fadd_acq_rel(ptr %p) nounwind {
1411; LA64F-LABEL: double_fadd_acq_rel:
1412; LA64F:       # %bb.0:
1413; LA64F-NEXT:    addi.d $sp, $sp, -48
1414; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
1415; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
1416; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
1417; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
1418; LA64F-NEXT:    move $fp, $a0
1419; LA64F-NEXT:    ld.d $s1, $a0, 0
1420; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
1421; LA64F-NEXT:    .p2align 4, , 16
1422; LA64F-NEXT:  .LBB20_1: # %atomicrmw.start
1423; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
1424; LA64F-NEXT:    move $a0, $s1
1425; LA64F-NEXT:    move $a1, $s0
1426; LA64F-NEXT:    bl %plt(__adddf3)
1427; LA64F-NEXT:    st.d $s1, $sp, 8
1428; LA64F-NEXT:    st.d $a0, $sp, 0
1429; LA64F-NEXT:    ori $a0, $zero, 8
1430; LA64F-NEXT:    addi.d $a2, $sp, 8
1431; LA64F-NEXT:    addi.d $a3, $sp, 0
1432; LA64F-NEXT:    ori $a4, $zero, 4
1433; LA64F-NEXT:    ori $a5, $zero, 2
1434; LA64F-NEXT:    move $a1, $fp
1435; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
1436; LA64F-NEXT:    ld.d $s1, $sp, 8
1437; LA64F-NEXT:    beqz $a0, .LBB20_1
1438; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
1439; LA64F-NEXT:    move $a0, $s1
1440; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
1441; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
1442; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
1443; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
1444; LA64F-NEXT:    addi.d $sp, $sp, 48
1445; LA64F-NEXT:    ret
1446;
1447; LA64D-LABEL: double_fadd_acq_rel:
1448; LA64D:       # %bb.0:
1449; LA64D-NEXT:    addi.d $sp, $sp, -32
1450; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
1451; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
1452; LA64D-NEXT:    move $fp, $a0
1453; LA64D-NEXT:    fld.d $fa0, $a0, 0
1454; LA64D-NEXT:    .p2align 4, , 16
1455; LA64D-NEXT:  .LBB20_1: # %atomicrmw.start
1456; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
1457; LA64D-NEXT:    vldi $vr1, -912
1458; LA64D-NEXT:    fadd.d $fa1, $fa0, $fa1
1459; LA64D-NEXT:    fst.d $fa0, $sp, 8
1460; LA64D-NEXT:    fst.d $fa1, $sp, 0
1461; LA64D-NEXT:    ori $a0, $zero, 8
1462; LA64D-NEXT:    addi.d $a2, $sp, 8
1463; LA64D-NEXT:    addi.d $a3, $sp, 0
1464; LA64D-NEXT:    ori $a4, $zero, 4
1465; LA64D-NEXT:    ori $a5, $zero, 2
1466; LA64D-NEXT:    move $a1, $fp
1467; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
1468; LA64D-NEXT:    fld.d $fa0, $sp, 8
1469; LA64D-NEXT:    beqz $a0, .LBB20_1
1470; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
1471; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
1472; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
1473; LA64D-NEXT:    addi.d $sp, $sp, 32
1474; LA64D-NEXT:    ret
1475  %v = atomicrmw fadd ptr %p, double 1.0 acq_rel, align 4
1476  ret double %v
1477}
1478
1479define double @double_fsub_acq_rel(ptr %p) nounwind {
1480; LA64F-LABEL: double_fsub_acq_rel:
1481; LA64F:       # %bb.0:
1482; LA64F-NEXT:    addi.d $sp, $sp, -48
1483; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
1484; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
1485; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
1486; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
1487; LA64F-NEXT:    move $fp, $a0
1488; LA64F-NEXT:    ld.d $s1, $a0, 0
1489; LA64F-NEXT:    lu52i.d $s0, $zero, -1025
1490; LA64F-NEXT:    .p2align 4, , 16
1491; LA64F-NEXT:  .LBB21_1: # %atomicrmw.start
1492; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
1493; LA64F-NEXT:    move $a0, $s1
1494; LA64F-NEXT:    move $a1, $s0
1495; LA64F-NEXT:    bl %plt(__adddf3)
1496; LA64F-NEXT:    st.d $s1, $sp, 8
1497; LA64F-NEXT:    st.d $a0, $sp, 0
1498; LA64F-NEXT:    ori $a0, $zero, 8
1499; LA64F-NEXT:    addi.d $a2, $sp, 8
1500; LA64F-NEXT:    addi.d $a3, $sp, 0
1501; LA64F-NEXT:    ori $a4, $zero, 4
1502; LA64F-NEXT:    ori $a5, $zero, 2
1503; LA64F-NEXT:    move $a1, $fp
1504; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
1505; LA64F-NEXT:    ld.d $s1, $sp, 8
1506; LA64F-NEXT:    beqz $a0, .LBB21_1
1507; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
1508; LA64F-NEXT:    move $a0, $s1
1509; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
1510; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
1511; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
1512; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
1513; LA64F-NEXT:    addi.d $sp, $sp, 48
1514; LA64F-NEXT:    ret
1515;
1516; LA64D-LABEL: double_fsub_acq_rel:
1517; LA64D:       # %bb.0:
1518; LA64D-NEXT:    addi.d $sp, $sp, -32
1519; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
1520; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
1521; LA64D-NEXT:    move $fp, $a0
1522; LA64D-NEXT:    fld.d $fa0, $a0, 0
1523; LA64D-NEXT:    .p2align 4, , 16
1524; LA64D-NEXT:  .LBB21_1: # %atomicrmw.start
1525; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
1526; LA64D-NEXT:    vldi $vr1, -784
1527; LA64D-NEXT:    fadd.d $fa1, $fa0, $fa1
1528; LA64D-NEXT:    fst.d $fa0, $sp, 8
1529; LA64D-NEXT:    fst.d $fa1, $sp, 0
1530; LA64D-NEXT:    ori $a0, $zero, 8
1531; LA64D-NEXT:    addi.d $a2, $sp, 8
1532; LA64D-NEXT:    addi.d $a3, $sp, 0
1533; LA64D-NEXT:    ori $a4, $zero, 4
1534; LA64D-NEXT:    ori $a5, $zero, 2
1535; LA64D-NEXT:    move $a1, $fp
1536; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
1537; LA64D-NEXT:    fld.d $fa0, $sp, 8
1538; LA64D-NEXT:    beqz $a0, .LBB21_1
1539; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
1540; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
1541; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
1542; LA64D-NEXT:    addi.d $sp, $sp, 32
1543; LA64D-NEXT:    ret
1544  %v = atomicrmw fsub ptr %p, double 1.0 acq_rel, align 4
1545  ret double %v
1546}
1547
1548define double @double_fmin_acq_rel(ptr %p) nounwind {
1549; LA64F-LABEL: double_fmin_acq_rel:
1550; LA64F:       # %bb.0:
1551; LA64F-NEXT:    addi.d $sp, $sp, -48
1552; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
1553; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
1554; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
1555; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
1556; LA64F-NEXT:    move $fp, $a0
1557; LA64F-NEXT:    ld.d $s1, $a0, 0
1558; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
1559; LA64F-NEXT:    .p2align 4, , 16
1560; LA64F-NEXT:  .LBB22_1: # %atomicrmw.start
1561; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
1562; LA64F-NEXT:    move $a0, $s1
1563; LA64F-NEXT:    move $a1, $s0
1564; LA64F-NEXT:    bl %plt(fmin)
1565; LA64F-NEXT:    st.d $s1, $sp, 8
1566; LA64F-NEXT:    st.d $a0, $sp, 0
1567; LA64F-NEXT:    ori $a0, $zero, 8
1568; LA64F-NEXT:    addi.d $a2, $sp, 8
1569; LA64F-NEXT:    addi.d $a3, $sp, 0
1570; LA64F-NEXT:    ori $a4, $zero, 4
1571; LA64F-NEXT:    ori $a5, $zero, 2
1572; LA64F-NEXT:    move $a1, $fp
1573; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
1574; LA64F-NEXT:    ld.d $s1, $sp, 8
1575; LA64F-NEXT:    beqz $a0, .LBB22_1
1576; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
1577; LA64F-NEXT:    move $a0, $s1
1578; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
1579; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
1580; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
1581; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
1582; LA64F-NEXT:    addi.d $sp, $sp, 48
1583; LA64F-NEXT:    ret
1584;
1585; LA64D-LABEL: double_fmin_acq_rel:
1586; LA64D:       # %bb.0:
1587; LA64D-NEXT:    addi.d $sp, $sp, -32
1588; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
1589; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
1590; LA64D-NEXT:    move $fp, $a0
1591; LA64D-NEXT:    fld.d $fa0, $a0, 0
1592; LA64D-NEXT:    .p2align 4, , 16
1593; LA64D-NEXT:  .LBB22_1: # %atomicrmw.start
1594; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
1595; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
1596; LA64D-NEXT:    vldi $vr2, -912
1597; LA64D-NEXT:    fmin.d $fa1, $fa1, $fa2
1598; LA64D-NEXT:    fst.d $fa0, $sp, 8
1599; LA64D-NEXT:    fst.d $fa1, $sp, 0
1600; LA64D-NEXT:    ori $a0, $zero, 8
1601; LA64D-NEXT:    addi.d $a2, $sp, 8
1602; LA64D-NEXT:    addi.d $a3, $sp, 0
1603; LA64D-NEXT:    ori $a4, $zero, 4
1604; LA64D-NEXT:    ori $a5, $zero, 2
1605; LA64D-NEXT:    move $a1, $fp
1606; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
1607; LA64D-NEXT:    fld.d $fa0, $sp, 8
1608; LA64D-NEXT:    beqz $a0, .LBB22_1
1609; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
1610; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
1611; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
1612; LA64D-NEXT:    addi.d $sp, $sp, 32
1613; LA64D-NEXT:    ret
1614  %v = atomicrmw fmin ptr %p, double 1.0 acq_rel, align 4
1615  ret double %v
1616}
1617
1618define double @double_fmax_acq_rel(ptr %p) nounwind {
1619; LA64F-LABEL: double_fmax_acq_rel:
1620; LA64F:       # %bb.0:
1621; LA64F-NEXT:    addi.d $sp, $sp, -48
1622; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
1623; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
1624; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
1625; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
1626; LA64F-NEXT:    move $fp, $a0
1627; LA64F-NEXT:    ld.d $s1, $a0, 0
1628; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
1629; LA64F-NEXT:    .p2align 4, , 16
1630; LA64F-NEXT:  .LBB23_1: # %atomicrmw.start
1631; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
1632; LA64F-NEXT:    move $a0, $s1
1633; LA64F-NEXT:    move $a1, $s0
1634; LA64F-NEXT:    bl %plt(fmax)
1635; LA64F-NEXT:    st.d $s1, $sp, 8
1636; LA64F-NEXT:    st.d $a0, $sp, 0
1637; LA64F-NEXT:    ori $a0, $zero, 8
1638; LA64F-NEXT:    addi.d $a2, $sp, 8
1639; LA64F-NEXT:    addi.d $a3, $sp, 0
1640; LA64F-NEXT:    ori $a4, $zero, 4
1641; LA64F-NEXT:    ori $a5, $zero, 2
1642; LA64F-NEXT:    move $a1, $fp
1643; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
1644; LA64F-NEXT:    ld.d $s1, $sp, 8
1645; LA64F-NEXT:    beqz $a0, .LBB23_1
1646; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
1647; LA64F-NEXT:    move $a0, $s1
1648; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
1649; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
1650; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
1651; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
1652; LA64F-NEXT:    addi.d $sp, $sp, 48
1653; LA64F-NEXT:    ret
1654;
1655; LA64D-LABEL: double_fmax_acq_rel:
1656; LA64D:       # %bb.0:
1657; LA64D-NEXT:    addi.d $sp, $sp, -32
1658; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
1659; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
1660; LA64D-NEXT:    move $fp, $a0
1661; LA64D-NEXT:    fld.d $fa0, $a0, 0
1662; LA64D-NEXT:    .p2align 4, , 16
1663; LA64D-NEXT:  .LBB23_1: # %atomicrmw.start
1664; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
1665; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
1666; LA64D-NEXT:    vldi $vr2, -912
1667; LA64D-NEXT:    fmax.d $fa1, $fa1, $fa2
1668; LA64D-NEXT:    fst.d $fa0, $sp, 8
1669; LA64D-NEXT:    fst.d $fa1, $sp, 0
1670; LA64D-NEXT:    ori $a0, $zero, 8
1671; LA64D-NEXT:    addi.d $a2, $sp, 8
1672; LA64D-NEXT:    addi.d $a3, $sp, 0
1673; LA64D-NEXT:    ori $a4, $zero, 4
1674; LA64D-NEXT:    ori $a5, $zero, 2
1675; LA64D-NEXT:    move $a1, $fp
1676; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
1677; LA64D-NEXT:    fld.d $fa0, $sp, 8
1678; LA64D-NEXT:    beqz $a0, .LBB23_1
1679; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
1680; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
1681; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
1682; LA64D-NEXT:    addi.d $sp, $sp, 32
1683; LA64D-NEXT:    ret
1684  %v = atomicrmw fmax ptr %p, double 1.0 acq_rel, align 4
1685  ret double %v
1686}
1687
1688define float @float_fadd_seq_cst(ptr %p) nounwind {
1689; LA64F-LABEL: float_fadd_seq_cst:
1690; LA64F:       # %bb.0:
1691; LA64F-NEXT:    fld.s $fa0, $a0, 0
1692; LA64F-NEXT:    addi.w $a1, $zero, 1
1693; LA64F-NEXT:    movgr2fr.w $fa1, $a1
1694; LA64F-NEXT:    ffint.s.w $fa1, $fa1
1695; LA64F-NEXT:    .p2align 4, , 16
1696; LA64F-NEXT:  .LBB24_1: # %atomicrmw.start
1697; LA64F-NEXT:    # =>This Loop Header: Depth=1
1698; LA64F-NEXT:    # Child Loop BB24_3 Depth 2
1699; LA64F-NEXT:    fadd.s $fa2, $fa0, $fa1
1700; LA64F-NEXT:    movfr2gr.s $a1, $fa2
1701; LA64F-NEXT:    movfr2gr.s $a2, $fa0
1702; LA64F-NEXT:  .LBB24_3: # %atomicrmw.start
1703; LA64F-NEXT:    # Parent Loop BB24_1 Depth=1
1704; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
1705; LA64F-NEXT:    ll.w $a3, $a0, 0
1706; LA64F-NEXT:    bne $a3, $a2, .LBB24_5
1707; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
1708; LA64F-NEXT:    # in Loop: Header=BB24_3 Depth=2
1709; LA64F-NEXT:    move $a4, $a1
1710; LA64F-NEXT:    sc.w $a4, $a0, 0
1711; LA64F-NEXT:    beqz $a4, .LBB24_3
1712; LA64F-NEXT:    b .LBB24_6
1713; LA64F-NEXT:  .LBB24_5: # %atomicrmw.start
1714; LA64F-NEXT:    # in Loop: Header=BB24_1 Depth=1
1715; LA64F-NEXT:    dbar 20
1716; LA64F-NEXT:  .LBB24_6: # %atomicrmw.start
1717; LA64F-NEXT:    # in Loop: Header=BB24_1 Depth=1
1718; LA64F-NEXT:    movgr2fr.w $fa0, $a3
1719; LA64F-NEXT:    bne $a3, $a2, .LBB24_1
1720; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
1721; LA64F-NEXT:    ret
1722;
1723; LA64D-LABEL: float_fadd_seq_cst:
1724; LA64D:       # %bb.0:
1725; LA64D-NEXT:    fld.s $fa0, $a0, 0
1726; LA64D-NEXT:    vldi $vr1, -1168
1727; LA64D-NEXT:    .p2align 4, , 16
1728; LA64D-NEXT:  .LBB24_1: # %atomicrmw.start
1729; LA64D-NEXT:    # =>This Loop Header: Depth=1
1730; LA64D-NEXT:    # Child Loop BB24_3 Depth 2
1731; LA64D-NEXT:    fadd.s $fa2, $fa0, $fa1
1732; LA64D-NEXT:    movfr2gr.s $a1, $fa2
1733; LA64D-NEXT:    movfr2gr.s $a2, $fa0
1734; LA64D-NEXT:  .LBB24_3: # %atomicrmw.start
1735; LA64D-NEXT:    # Parent Loop BB24_1 Depth=1
1736; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
1737; LA64D-NEXT:    ll.w $a3, $a0, 0
1738; LA64D-NEXT:    bne $a3, $a2, .LBB24_5
1739; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
1740; LA64D-NEXT:    # in Loop: Header=BB24_3 Depth=2
1741; LA64D-NEXT:    move $a4, $a1
1742; LA64D-NEXT:    sc.w $a4, $a0, 0
1743; LA64D-NEXT:    beqz $a4, .LBB24_3
1744; LA64D-NEXT:    b .LBB24_6
1745; LA64D-NEXT:  .LBB24_5: # %atomicrmw.start
1746; LA64D-NEXT:    # in Loop: Header=BB24_1 Depth=1
1747; LA64D-NEXT:    dbar 20
1748; LA64D-NEXT:  .LBB24_6: # %atomicrmw.start
1749; LA64D-NEXT:    # in Loop: Header=BB24_1 Depth=1
1750; LA64D-NEXT:    movgr2fr.w $fa0, $a3
1751; LA64D-NEXT:    bne $a3, $a2, .LBB24_1
1752; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
1753; LA64D-NEXT:    ret
1754  %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4
1755  ret float %v
1756}
1757
1758define float @float_fsub_seq_cst(ptr %p) nounwind {
1759; LA64F-LABEL: float_fsub_seq_cst:
1760; LA64F:       # %bb.0:
1761; LA64F-NEXT:    fld.s $fa0, $a0, 0
1762; LA64F-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI25_0)
1763; LA64F-NEXT:    fld.s $fa1, $a1, %pc_lo12(.LCPI25_0)
1764; LA64F-NEXT:    .p2align 4, , 16
1765; LA64F-NEXT:  .LBB25_1: # %atomicrmw.start
1766; LA64F-NEXT:    # =>This Loop Header: Depth=1
1767; LA64F-NEXT:    # Child Loop BB25_3 Depth 2
1768; LA64F-NEXT:    fadd.s $fa2, $fa0, $fa1
1769; LA64F-NEXT:    movfr2gr.s $a1, $fa2
1770; LA64F-NEXT:    movfr2gr.s $a2, $fa0
1771; LA64F-NEXT:  .LBB25_3: # %atomicrmw.start
1772; LA64F-NEXT:    # Parent Loop BB25_1 Depth=1
1773; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
1774; LA64F-NEXT:    ll.w $a3, $a0, 0
1775; LA64F-NEXT:    bne $a3, $a2, .LBB25_5
1776; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
1777; LA64F-NEXT:    # in Loop: Header=BB25_3 Depth=2
1778; LA64F-NEXT:    move $a4, $a1
1779; LA64F-NEXT:    sc.w $a4, $a0, 0
1780; LA64F-NEXT:    beqz $a4, .LBB25_3
1781; LA64F-NEXT:    b .LBB25_6
1782; LA64F-NEXT:  .LBB25_5: # %atomicrmw.start
1783; LA64F-NEXT:    # in Loop: Header=BB25_1 Depth=1
1784; LA64F-NEXT:    dbar 20
1785; LA64F-NEXT:  .LBB25_6: # %atomicrmw.start
1786; LA64F-NEXT:    # in Loop: Header=BB25_1 Depth=1
1787; LA64F-NEXT:    movgr2fr.w $fa0, $a3
1788; LA64F-NEXT:    bne $a3, $a2, .LBB25_1
1789; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
1790; LA64F-NEXT:    ret
1791;
1792; LA64D-LABEL: float_fsub_seq_cst:
1793; LA64D:       # %bb.0:
1794; LA64D-NEXT:    fld.s $fa0, $a0, 0
1795; LA64D-NEXT:    vldi $vr1, -1040
1796; LA64D-NEXT:    .p2align 4, , 16
1797; LA64D-NEXT:  .LBB25_1: # %atomicrmw.start
1798; LA64D-NEXT:    # =>This Loop Header: Depth=1
1799; LA64D-NEXT:    # Child Loop BB25_3 Depth 2
1800; LA64D-NEXT:    fadd.s $fa2, $fa0, $fa1
1801; LA64D-NEXT:    movfr2gr.s $a1, $fa2
1802; LA64D-NEXT:    movfr2gr.s $a2, $fa0
1803; LA64D-NEXT:  .LBB25_3: # %atomicrmw.start
1804; LA64D-NEXT:    # Parent Loop BB25_1 Depth=1
1805; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
1806; LA64D-NEXT:    ll.w $a3, $a0, 0
1807; LA64D-NEXT:    bne $a3, $a2, .LBB25_5
1808; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
1809; LA64D-NEXT:    # in Loop: Header=BB25_3 Depth=2
1810; LA64D-NEXT:    move $a4, $a1
1811; LA64D-NEXT:    sc.w $a4, $a0, 0
1812; LA64D-NEXT:    beqz $a4, .LBB25_3
1813; LA64D-NEXT:    b .LBB25_6
1814; LA64D-NEXT:  .LBB25_5: # %atomicrmw.start
1815; LA64D-NEXT:    # in Loop: Header=BB25_1 Depth=1
1816; LA64D-NEXT:    dbar 20
1817; LA64D-NEXT:  .LBB25_6: # %atomicrmw.start
1818; LA64D-NEXT:    # in Loop: Header=BB25_1 Depth=1
1819; LA64D-NEXT:    movgr2fr.w $fa0, $a3
1820; LA64D-NEXT:    bne $a3, $a2, .LBB25_1
1821; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
1822; LA64D-NEXT:    ret
1823  %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4
1824  ret float %v
1825}
1826
1827define float @float_fmin_seq_cst(ptr %p) nounwind {
1828; LA64F-LABEL: float_fmin_seq_cst:
1829; LA64F:       # %bb.0:
1830; LA64F-NEXT:    fld.s $fa0, $a0, 0
1831; LA64F-NEXT:    addi.w $a1, $zero, 1
1832; LA64F-NEXT:    movgr2fr.w $fa1, $a1
1833; LA64F-NEXT:    ffint.s.w $fa1, $fa1
1834; LA64F-NEXT:    .p2align 4, , 16
1835; LA64F-NEXT:  .LBB26_1: # %atomicrmw.start
1836; LA64F-NEXT:    # =>This Loop Header: Depth=1
1837; LA64F-NEXT:    # Child Loop BB26_3 Depth 2
1838; LA64F-NEXT:    fmax.s $fa2, $fa0, $fa0
1839; LA64F-NEXT:    fmin.s $fa2, $fa2, $fa1
1840; LA64F-NEXT:    movfr2gr.s $a1, $fa2
1841; LA64F-NEXT:    movfr2gr.s $a2, $fa0
1842; LA64F-NEXT:  .LBB26_3: # %atomicrmw.start
1843; LA64F-NEXT:    # Parent Loop BB26_1 Depth=1
1844; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
1845; LA64F-NEXT:    ll.w $a3, $a0, 0
1846; LA64F-NEXT:    bne $a3, $a2, .LBB26_5
1847; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
1848; LA64F-NEXT:    # in Loop: Header=BB26_3 Depth=2
1849; LA64F-NEXT:    move $a4, $a1
1850; LA64F-NEXT:    sc.w $a4, $a0, 0
1851; LA64F-NEXT:    beqz $a4, .LBB26_3
1852; LA64F-NEXT:    b .LBB26_6
1853; LA64F-NEXT:  .LBB26_5: # %atomicrmw.start
1854; LA64F-NEXT:    # in Loop: Header=BB26_1 Depth=1
1855; LA64F-NEXT:    dbar 20
1856; LA64F-NEXT:  .LBB26_6: # %atomicrmw.start
1857; LA64F-NEXT:    # in Loop: Header=BB26_1 Depth=1
1858; LA64F-NEXT:    movgr2fr.w $fa0, $a3
1859; LA64F-NEXT:    bne $a3, $a2, .LBB26_1
1860; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
1861; LA64F-NEXT:    ret
1862;
1863; LA64D-LABEL: float_fmin_seq_cst:
1864; LA64D:       # %bb.0:
1865; LA64D-NEXT:    fld.s $fa0, $a0, 0
1866; LA64D-NEXT:    vldi $vr1, -1168
1867; LA64D-NEXT:    .p2align 4, , 16
1868; LA64D-NEXT:  .LBB26_1: # %atomicrmw.start
1869; LA64D-NEXT:    # =>This Loop Header: Depth=1
1870; LA64D-NEXT:    # Child Loop BB26_3 Depth 2
1871; LA64D-NEXT:    fmax.s $fa2, $fa0, $fa0
1872; LA64D-NEXT:    fmin.s $fa2, $fa2, $fa1
1873; LA64D-NEXT:    movfr2gr.s $a1, $fa2
1874; LA64D-NEXT:    movfr2gr.s $a2, $fa0
1875; LA64D-NEXT:  .LBB26_3: # %atomicrmw.start
1876; LA64D-NEXT:    # Parent Loop BB26_1 Depth=1
1877; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
1878; LA64D-NEXT:    ll.w $a3, $a0, 0
1879; LA64D-NEXT:    bne $a3, $a2, .LBB26_5
1880; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
1881; LA64D-NEXT:    # in Loop: Header=BB26_3 Depth=2
1882; LA64D-NEXT:    move $a4, $a1
1883; LA64D-NEXT:    sc.w $a4, $a0, 0
1884; LA64D-NEXT:    beqz $a4, .LBB26_3
1885; LA64D-NEXT:    b .LBB26_6
1886; LA64D-NEXT:  .LBB26_5: # %atomicrmw.start
1887; LA64D-NEXT:    # in Loop: Header=BB26_1 Depth=1
1888; LA64D-NEXT:    dbar 20
1889; LA64D-NEXT:  .LBB26_6: # %atomicrmw.start
1890; LA64D-NEXT:    # in Loop: Header=BB26_1 Depth=1
1891; LA64D-NEXT:    movgr2fr.w $fa0, $a3
1892; LA64D-NEXT:    bne $a3, $a2, .LBB26_1
1893; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
1894; LA64D-NEXT:    ret
1895  %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4
1896  ret float %v
1897}
1898
1899define float @float_fmax_seq_cst(ptr %p) nounwind {
1900; LA64F-LABEL: float_fmax_seq_cst:
1901; LA64F:       # %bb.0:
1902; LA64F-NEXT:    fld.s $fa0, $a0, 0
1903; LA64F-NEXT:    addi.w $a1, $zero, 1
1904; LA64F-NEXT:    movgr2fr.w $fa1, $a1
1905; LA64F-NEXT:    ffint.s.w $fa1, $fa1
1906; LA64F-NEXT:    .p2align 4, , 16
1907; LA64F-NEXT:  .LBB27_1: # %atomicrmw.start
1908; LA64F-NEXT:    # =>This Loop Header: Depth=1
1909; LA64F-NEXT:    # Child Loop BB27_3 Depth 2
1910; LA64F-NEXT:    fmax.s $fa2, $fa0, $fa0
1911; LA64F-NEXT:    fmax.s $fa2, $fa2, $fa1
1912; LA64F-NEXT:    movfr2gr.s $a1, $fa2
1913; LA64F-NEXT:    movfr2gr.s $a2, $fa0
1914; LA64F-NEXT:  .LBB27_3: # %atomicrmw.start
1915; LA64F-NEXT:    # Parent Loop BB27_1 Depth=1
1916; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
1917; LA64F-NEXT:    ll.w $a3, $a0, 0
1918; LA64F-NEXT:    bne $a3, $a2, .LBB27_5
1919; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
1920; LA64F-NEXT:    # in Loop: Header=BB27_3 Depth=2
1921; LA64F-NEXT:    move $a4, $a1
1922; LA64F-NEXT:    sc.w $a4, $a0, 0
1923; LA64F-NEXT:    beqz $a4, .LBB27_3
1924; LA64F-NEXT:    b .LBB27_6
1925; LA64F-NEXT:  .LBB27_5: # %atomicrmw.start
1926; LA64F-NEXT:    # in Loop: Header=BB27_1 Depth=1
1927; LA64F-NEXT:    dbar 20
1928; LA64F-NEXT:  .LBB27_6: # %atomicrmw.start
1929; LA64F-NEXT:    # in Loop: Header=BB27_1 Depth=1
1930; LA64F-NEXT:    movgr2fr.w $fa0, $a3
1931; LA64F-NEXT:    bne $a3, $a2, .LBB27_1
1932; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
1933; LA64F-NEXT:    ret
1934;
1935; LA64D-LABEL: float_fmax_seq_cst:
1936; LA64D:       # %bb.0:
1937; LA64D-NEXT:    fld.s $fa0, $a0, 0
1938; LA64D-NEXT:    vldi $vr1, -1168
1939; LA64D-NEXT:    .p2align 4, , 16
1940; LA64D-NEXT:  .LBB27_1: # %atomicrmw.start
1941; LA64D-NEXT:    # =>This Loop Header: Depth=1
1942; LA64D-NEXT:    # Child Loop BB27_3 Depth 2
1943; LA64D-NEXT:    fmax.s $fa2, $fa0, $fa0
1944; LA64D-NEXT:    fmax.s $fa2, $fa2, $fa1
1945; LA64D-NEXT:    movfr2gr.s $a1, $fa2
1946; LA64D-NEXT:    movfr2gr.s $a2, $fa0
1947; LA64D-NEXT:  .LBB27_3: # %atomicrmw.start
1948; LA64D-NEXT:    # Parent Loop BB27_1 Depth=1
1949; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
1950; LA64D-NEXT:    ll.w $a3, $a0, 0
1951; LA64D-NEXT:    bne $a3, $a2, .LBB27_5
1952; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
1953; LA64D-NEXT:    # in Loop: Header=BB27_3 Depth=2
1954; LA64D-NEXT:    move $a4, $a1
1955; LA64D-NEXT:    sc.w $a4, $a0, 0
1956; LA64D-NEXT:    beqz $a4, .LBB27_3
1957; LA64D-NEXT:    b .LBB27_6
1958; LA64D-NEXT:  .LBB27_5: # %atomicrmw.start
1959; LA64D-NEXT:    # in Loop: Header=BB27_1 Depth=1
1960; LA64D-NEXT:    dbar 20
1961; LA64D-NEXT:  .LBB27_6: # %atomicrmw.start
1962; LA64D-NEXT:    # in Loop: Header=BB27_1 Depth=1
1963; LA64D-NEXT:    movgr2fr.w $fa0, $a3
1964; LA64D-NEXT:    bne $a3, $a2, .LBB27_1
1965; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
1966; LA64D-NEXT:    ret
1967  %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4
1968  ret float %v
1969}
1970
1971define double @double_fadd_seq_cst(ptr %p) nounwind {
1972; LA64F-LABEL: double_fadd_seq_cst:
1973; LA64F:       # %bb.0:
1974; LA64F-NEXT:    addi.d $sp, $sp, -48
1975; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
1976; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
1977; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
1978; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
1979; LA64F-NEXT:    move $fp, $a0
1980; LA64F-NEXT:    ld.d $s1, $a0, 0
1981; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
1982; LA64F-NEXT:    .p2align 4, , 16
1983; LA64F-NEXT:  .LBB28_1: # %atomicrmw.start
1984; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
1985; LA64F-NEXT:    move $a0, $s1
1986; LA64F-NEXT:    move $a1, $s0
1987; LA64F-NEXT:    bl %plt(__adddf3)
1988; LA64F-NEXT:    st.d $s1, $sp, 8
1989; LA64F-NEXT:    st.d $a0, $sp, 0
1990; LA64F-NEXT:    ori $a0, $zero, 8
1991; LA64F-NEXT:    addi.d $a2, $sp, 8
1992; LA64F-NEXT:    addi.d $a3, $sp, 0
1993; LA64F-NEXT:    ori $a4, $zero, 5
1994; LA64F-NEXT:    ori $a5, $zero, 5
1995; LA64F-NEXT:    move $a1, $fp
1996; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
1997; LA64F-NEXT:    ld.d $s1, $sp, 8
1998; LA64F-NEXT:    beqz $a0, .LBB28_1
1999; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
2000; LA64F-NEXT:    move $a0, $s1
2001; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
2002; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
2003; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
2004; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
2005; LA64F-NEXT:    addi.d $sp, $sp, 48
2006; LA64F-NEXT:    ret
2007;
2008; LA64D-LABEL: double_fadd_seq_cst:
2009; LA64D:       # %bb.0:
2010; LA64D-NEXT:    addi.d $sp, $sp, -32
2011; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
2012; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
2013; LA64D-NEXT:    move $fp, $a0
2014; LA64D-NEXT:    fld.d $fa0, $a0, 0
2015; LA64D-NEXT:    .p2align 4, , 16
2016; LA64D-NEXT:  .LBB28_1: # %atomicrmw.start
2017; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
2018; LA64D-NEXT:    vldi $vr1, -912
2019; LA64D-NEXT:    fadd.d $fa1, $fa0, $fa1
2020; LA64D-NEXT:    fst.d $fa0, $sp, 8
2021; LA64D-NEXT:    fst.d $fa1, $sp, 0
2022; LA64D-NEXT:    ori $a0, $zero, 8
2023; LA64D-NEXT:    addi.d $a2, $sp, 8
2024; LA64D-NEXT:    addi.d $a3, $sp, 0
2025; LA64D-NEXT:    ori $a4, $zero, 5
2026; LA64D-NEXT:    ori $a5, $zero, 5
2027; LA64D-NEXT:    move $a1, $fp
2028; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
2029; LA64D-NEXT:    fld.d $fa0, $sp, 8
2030; LA64D-NEXT:    beqz $a0, .LBB28_1
2031; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
2032; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
2033; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
2034; LA64D-NEXT:    addi.d $sp, $sp, 32
2035; LA64D-NEXT:    ret
2036  %v = atomicrmw fadd ptr %p, double 1.0 seq_cst, align 4
2037  ret double %v
2038}
2039
2040define double @double_fsub_seq_cst(ptr %p) nounwind {
2041; LA64F-LABEL: double_fsub_seq_cst:
2042; LA64F:       # %bb.0:
2043; LA64F-NEXT:    addi.d $sp, $sp, -48
2044; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
2045; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
2046; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
2047; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
2048; LA64F-NEXT:    move $fp, $a0
2049; LA64F-NEXT:    ld.d $s1, $a0, 0
2050; LA64F-NEXT:    lu52i.d $s0, $zero, -1025
2051; LA64F-NEXT:    .p2align 4, , 16
2052; LA64F-NEXT:  .LBB29_1: # %atomicrmw.start
2053; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
2054; LA64F-NEXT:    move $a0, $s1
2055; LA64F-NEXT:    move $a1, $s0
2056; LA64F-NEXT:    bl %plt(__adddf3)
2057; LA64F-NEXT:    st.d $s1, $sp, 8
2058; LA64F-NEXT:    st.d $a0, $sp, 0
2059; LA64F-NEXT:    ori $a0, $zero, 8
2060; LA64F-NEXT:    addi.d $a2, $sp, 8
2061; LA64F-NEXT:    addi.d $a3, $sp, 0
2062; LA64F-NEXT:    ori $a4, $zero, 5
2063; LA64F-NEXT:    ori $a5, $zero, 5
2064; LA64F-NEXT:    move $a1, $fp
2065; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
2066; LA64F-NEXT:    ld.d $s1, $sp, 8
2067; LA64F-NEXT:    beqz $a0, .LBB29_1
2068; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
2069; LA64F-NEXT:    move $a0, $s1
2070; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
2071; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
2072; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
2073; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
2074; LA64F-NEXT:    addi.d $sp, $sp, 48
2075; LA64F-NEXT:    ret
2076;
2077; LA64D-LABEL: double_fsub_seq_cst:
2078; LA64D:       # %bb.0:
2079; LA64D-NEXT:    addi.d $sp, $sp, -32
2080; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
2081; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
2082; LA64D-NEXT:    move $fp, $a0
2083; LA64D-NEXT:    fld.d $fa0, $a0, 0
2084; LA64D-NEXT:    .p2align 4, , 16
2085; LA64D-NEXT:  .LBB29_1: # %atomicrmw.start
2086; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
2087; LA64D-NEXT:    vldi $vr1, -784
2088; LA64D-NEXT:    fadd.d $fa1, $fa0, $fa1
2089; LA64D-NEXT:    fst.d $fa0, $sp, 8
2090; LA64D-NEXT:    fst.d $fa1, $sp, 0
2091; LA64D-NEXT:    ori $a0, $zero, 8
2092; LA64D-NEXT:    addi.d $a2, $sp, 8
2093; LA64D-NEXT:    addi.d $a3, $sp, 0
2094; LA64D-NEXT:    ori $a4, $zero, 5
2095; LA64D-NEXT:    ori $a5, $zero, 5
2096; LA64D-NEXT:    move $a1, $fp
2097; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
2098; LA64D-NEXT:    fld.d $fa0, $sp, 8
2099; LA64D-NEXT:    beqz $a0, .LBB29_1
2100; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
2101; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
2102; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
2103; LA64D-NEXT:    addi.d $sp, $sp, 32
2104; LA64D-NEXT:    ret
2105  %v = atomicrmw fsub ptr %p, double 1.0 seq_cst, align 4
2106  ret double %v
2107}
2108
2109define double @double_fmin_seq_cst(ptr %p) nounwind {
2110; LA64F-LABEL: double_fmin_seq_cst:
2111; LA64F:       # %bb.0:
2112; LA64F-NEXT:    addi.d $sp, $sp, -48
2113; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
2114; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
2115; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
2116; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
2117; LA64F-NEXT:    move $fp, $a0
2118; LA64F-NEXT:    ld.d $s1, $a0, 0
2119; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
2120; LA64F-NEXT:    .p2align 4, , 16
2121; LA64F-NEXT:  .LBB30_1: # %atomicrmw.start
2122; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
2123; LA64F-NEXT:    move $a0, $s1
2124; LA64F-NEXT:    move $a1, $s0
2125; LA64F-NEXT:    bl %plt(fmin)
2126; LA64F-NEXT:    st.d $s1, $sp, 8
2127; LA64F-NEXT:    st.d $a0, $sp, 0
2128; LA64F-NEXT:    ori $a0, $zero, 8
2129; LA64F-NEXT:    addi.d $a2, $sp, 8
2130; LA64F-NEXT:    addi.d $a3, $sp, 0
2131; LA64F-NEXT:    ori $a4, $zero, 5
2132; LA64F-NEXT:    ori $a5, $zero, 5
2133; LA64F-NEXT:    move $a1, $fp
2134; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
2135; LA64F-NEXT:    ld.d $s1, $sp, 8
2136; LA64F-NEXT:    beqz $a0, .LBB30_1
2137; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
2138; LA64F-NEXT:    move $a0, $s1
2139; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
2140; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
2141; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
2142; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
2143; LA64F-NEXT:    addi.d $sp, $sp, 48
2144; LA64F-NEXT:    ret
2145;
2146; LA64D-LABEL: double_fmin_seq_cst:
2147; LA64D:       # %bb.0:
2148; LA64D-NEXT:    addi.d $sp, $sp, -32
2149; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
2150; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
2151; LA64D-NEXT:    move $fp, $a0
2152; LA64D-NEXT:    fld.d $fa0, $a0, 0
2153; LA64D-NEXT:    .p2align 4, , 16
2154; LA64D-NEXT:  .LBB30_1: # %atomicrmw.start
2155; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
2156; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
2157; LA64D-NEXT:    vldi $vr2, -912
2158; LA64D-NEXT:    fmin.d $fa1, $fa1, $fa2
2159; LA64D-NEXT:    fst.d $fa0, $sp, 8
2160; LA64D-NEXT:    fst.d $fa1, $sp, 0
2161; LA64D-NEXT:    ori $a0, $zero, 8
2162; LA64D-NEXT:    addi.d $a2, $sp, 8
2163; LA64D-NEXT:    addi.d $a3, $sp, 0
2164; LA64D-NEXT:    ori $a4, $zero, 5
2165; LA64D-NEXT:    ori $a5, $zero, 5
2166; LA64D-NEXT:    move $a1, $fp
2167; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
2168; LA64D-NEXT:    fld.d $fa0, $sp, 8
2169; LA64D-NEXT:    beqz $a0, .LBB30_1
2170; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
2171; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
2172; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
2173; LA64D-NEXT:    addi.d $sp, $sp, 32
2174; LA64D-NEXT:    ret
2175  %v = atomicrmw fmin ptr %p, double 1.0 seq_cst, align 4
2176  ret double %v
2177}
2178
2179define double @double_fmax_seq_cst(ptr %p) nounwind {
2180; LA64F-LABEL: double_fmax_seq_cst:
2181; LA64F:       # %bb.0:
2182; LA64F-NEXT:    addi.d $sp, $sp, -48
2183; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
2184; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
2185; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
2186; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
2187; LA64F-NEXT:    move $fp, $a0
2188; LA64F-NEXT:    ld.d $s1, $a0, 0
2189; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
2190; LA64F-NEXT:    .p2align 4, , 16
2191; LA64F-NEXT:  .LBB31_1: # %atomicrmw.start
2192; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
2193; LA64F-NEXT:    move $a0, $s1
2194; LA64F-NEXT:    move $a1, $s0
2195; LA64F-NEXT:    bl %plt(fmax)
2196; LA64F-NEXT:    st.d $s1, $sp, 8
2197; LA64F-NEXT:    st.d $a0, $sp, 0
2198; LA64F-NEXT:    ori $a0, $zero, 8
2199; LA64F-NEXT:    addi.d $a2, $sp, 8
2200; LA64F-NEXT:    addi.d $a3, $sp, 0
2201; LA64F-NEXT:    ori $a4, $zero, 5
2202; LA64F-NEXT:    ori $a5, $zero, 5
2203; LA64F-NEXT:    move $a1, $fp
2204; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
2205; LA64F-NEXT:    ld.d $s1, $sp, 8
2206; LA64F-NEXT:    beqz $a0, .LBB31_1
2207; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
2208; LA64F-NEXT:    move $a0, $s1
2209; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
2210; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
2211; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
2212; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
2213; LA64F-NEXT:    addi.d $sp, $sp, 48
2214; LA64F-NEXT:    ret
2215;
2216; LA64D-LABEL: double_fmax_seq_cst:
2217; LA64D:       # %bb.0:
2218; LA64D-NEXT:    addi.d $sp, $sp, -32
2219; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
2220; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
2221; LA64D-NEXT:    move $fp, $a0
2222; LA64D-NEXT:    fld.d $fa0, $a0, 0
2223; LA64D-NEXT:    .p2align 4, , 16
2224; LA64D-NEXT:  .LBB31_1: # %atomicrmw.start
2225; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
2226; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
2227; LA64D-NEXT:    vldi $vr2, -912
2228; LA64D-NEXT:    fmax.d $fa1, $fa1, $fa2
2229; LA64D-NEXT:    fst.d $fa0, $sp, 8
2230; LA64D-NEXT:    fst.d $fa1, $sp, 0
2231; LA64D-NEXT:    ori $a0, $zero, 8
2232; LA64D-NEXT:    addi.d $a2, $sp, 8
2233; LA64D-NEXT:    addi.d $a3, $sp, 0
2234; LA64D-NEXT:    ori $a4, $zero, 5
2235; LA64D-NEXT:    ori $a5, $zero, 5
2236; LA64D-NEXT:    move $a1, $fp
2237; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
2238; LA64D-NEXT:    fld.d $fa0, $sp, 8
2239; LA64D-NEXT:    beqz $a0, .LBB31_1
2240; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
2241; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
2242; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
2243; LA64D-NEXT:    addi.d $sp, $sp, 32
2244; LA64D-NEXT:    ret
2245  %v = atomicrmw fmax ptr %p, double 1.0 seq_cst, align 4
2246  ret double %v
2247}
2248
2249define float @float_fadd_monotonic(ptr %p) nounwind {
2250; LA64F-LABEL: float_fadd_monotonic:
2251; LA64F:       # %bb.0:
2252; LA64F-NEXT:    fld.s $fa0, $a0, 0
2253; LA64F-NEXT:    addi.w $a1, $zero, 1
2254; LA64F-NEXT:    movgr2fr.w $fa1, $a1
2255; LA64F-NEXT:    ffint.s.w $fa1, $fa1
2256; LA64F-NEXT:    .p2align 4, , 16
2257; LA64F-NEXT:  .LBB32_1: # %atomicrmw.start
2258; LA64F-NEXT:    # =>This Loop Header: Depth=1
2259; LA64F-NEXT:    # Child Loop BB32_3 Depth 2
2260; LA64F-NEXT:    fadd.s $fa2, $fa0, $fa1
2261; LA64F-NEXT:    movfr2gr.s $a1, $fa2
2262; LA64F-NEXT:    movfr2gr.s $a2, $fa0
2263; LA64F-NEXT:  .LBB32_3: # %atomicrmw.start
2264; LA64F-NEXT:    # Parent Loop BB32_1 Depth=1
2265; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
2266; LA64F-NEXT:    ll.w $a3, $a0, 0
2267; LA64F-NEXT:    bne $a3, $a2, .LBB32_5
2268; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
2269; LA64F-NEXT:    # in Loop: Header=BB32_3 Depth=2
2270; LA64F-NEXT:    move $a4, $a1
2271; LA64F-NEXT:    sc.w $a4, $a0, 0
2272; LA64F-NEXT:    beqz $a4, .LBB32_3
2273; LA64F-NEXT:    b .LBB32_6
2274; LA64F-NEXT:  .LBB32_5: # %atomicrmw.start
2275; LA64F-NEXT:    # in Loop: Header=BB32_1 Depth=1
2276; LA64F-NEXT:    dbar 1792
2277; LA64F-NEXT:  .LBB32_6: # %atomicrmw.start
2278; LA64F-NEXT:    # in Loop: Header=BB32_1 Depth=1
2279; LA64F-NEXT:    movgr2fr.w $fa0, $a3
2280; LA64F-NEXT:    bne $a3, $a2, .LBB32_1
2281; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
2282; LA64F-NEXT:    ret
2283;
2284; LA64D-LABEL: float_fadd_monotonic:
2285; LA64D:       # %bb.0:
2286; LA64D-NEXT:    fld.s $fa0, $a0, 0
2287; LA64D-NEXT:    vldi $vr1, -1168
2288; LA64D-NEXT:    .p2align 4, , 16
2289; LA64D-NEXT:  .LBB32_1: # %atomicrmw.start
2290; LA64D-NEXT:    # =>This Loop Header: Depth=1
2291; LA64D-NEXT:    # Child Loop BB32_3 Depth 2
2292; LA64D-NEXT:    fadd.s $fa2, $fa0, $fa1
2293; LA64D-NEXT:    movfr2gr.s $a1, $fa2
2294; LA64D-NEXT:    movfr2gr.s $a2, $fa0
2295; LA64D-NEXT:  .LBB32_3: # %atomicrmw.start
2296; LA64D-NEXT:    # Parent Loop BB32_1 Depth=1
2297; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
2298; LA64D-NEXT:    ll.w $a3, $a0, 0
2299; LA64D-NEXT:    bne $a3, $a2, .LBB32_5
2300; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
2301; LA64D-NEXT:    # in Loop: Header=BB32_3 Depth=2
2302; LA64D-NEXT:    move $a4, $a1
2303; LA64D-NEXT:    sc.w $a4, $a0, 0
2304; LA64D-NEXT:    beqz $a4, .LBB32_3
2305; LA64D-NEXT:    b .LBB32_6
2306; LA64D-NEXT:  .LBB32_5: # %atomicrmw.start
2307; LA64D-NEXT:    # in Loop: Header=BB32_1 Depth=1
2308; LA64D-NEXT:    dbar 1792
2309; LA64D-NEXT:  .LBB32_6: # %atomicrmw.start
2310; LA64D-NEXT:    # in Loop: Header=BB32_1 Depth=1
2311; LA64D-NEXT:    movgr2fr.w $fa0, $a3
2312; LA64D-NEXT:    bne $a3, $a2, .LBB32_1
2313; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
2314; LA64D-NEXT:    ret
2315  %v = atomicrmw fadd ptr %p, float 1.0 monotonic, align 4
2316  ret float %v
2317}
2318
2319define float @float_fsub_monotonic(ptr %p) nounwind {
2320; LA64F-LABEL: float_fsub_monotonic:
2321; LA64F:       # %bb.0:
2322; LA64F-NEXT:    fld.s $fa0, $a0, 0
2323; LA64F-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI33_0)
2324; LA64F-NEXT:    fld.s $fa1, $a1, %pc_lo12(.LCPI33_0)
2325; LA64F-NEXT:    .p2align 4, , 16
2326; LA64F-NEXT:  .LBB33_1: # %atomicrmw.start
2327; LA64F-NEXT:    # =>This Loop Header: Depth=1
2328; LA64F-NEXT:    # Child Loop BB33_3 Depth 2
2329; LA64F-NEXT:    fadd.s $fa2, $fa0, $fa1
2330; LA64F-NEXT:    movfr2gr.s $a1, $fa2
2331; LA64F-NEXT:    movfr2gr.s $a2, $fa0
2332; LA64F-NEXT:  .LBB33_3: # %atomicrmw.start
2333; LA64F-NEXT:    # Parent Loop BB33_1 Depth=1
2334; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
2335; LA64F-NEXT:    ll.w $a3, $a0, 0
2336; LA64F-NEXT:    bne $a3, $a2, .LBB33_5
2337; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
2338; LA64F-NEXT:    # in Loop: Header=BB33_3 Depth=2
2339; LA64F-NEXT:    move $a4, $a1
2340; LA64F-NEXT:    sc.w $a4, $a0, 0
2341; LA64F-NEXT:    beqz $a4, .LBB33_3
2342; LA64F-NEXT:    b .LBB33_6
2343; LA64F-NEXT:  .LBB33_5: # %atomicrmw.start
2344; LA64F-NEXT:    # in Loop: Header=BB33_1 Depth=1
2345; LA64F-NEXT:    dbar 1792
2346; LA64F-NEXT:  .LBB33_6: # %atomicrmw.start
2347; LA64F-NEXT:    # in Loop: Header=BB33_1 Depth=1
2348; LA64F-NEXT:    movgr2fr.w $fa0, $a3
2349; LA64F-NEXT:    bne $a3, $a2, .LBB33_1
2350; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
2351; LA64F-NEXT:    ret
2352;
2353; LA64D-LABEL: float_fsub_monotonic:
2354; LA64D:       # %bb.0:
2355; LA64D-NEXT:    fld.s $fa0, $a0, 0
2356; LA64D-NEXT:    vldi $vr1, -1040
2357; LA64D-NEXT:    .p2align 4, , 16
2358; LA64D-NEXT:  .LBB33_1: # %atomicrmw.start
2359; LA64D-NEXT:    # =>This Loop Header: Depth=1
2360; LA64D-NEXT:    # Child Loop BB33_3 Depth 2
2361; LA64D-NEXT:    fadd.s $fa2, $fa0, $fa1
2362; LA64D-NEXT:    movfr2gr.s $a1, $fa2
2363; LA64D-NEXT:    movfr2gr.s $a2, $fa0
2364; LA64D-NEXT:  .LBB33_3: # %atomicrmw.start
2365; LA64D-NEXT:    # Parent Loop BB33_1 Depth=1
2366; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
2367; LA64D-NEXT:    ll.w $a3, $a0, 0
2368; LA64D-NEXT:    bne $a3, $a2, .LBB33_5
2369; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
2370; LA64D-NEXT:    # in Loop: Header=BB33_3 Depth=2
2371; LA64D-NEXT:    move $a4, $a1
2372; LA64D-NEXT:    sc.w $a4, $a0, 0
2373; LA64D-NEXT:    beqz $a4, .LBB33_3
2374; LA64D-NEXT:    b .LBB33_6
2375; LA64D-NEXT:  .LBB33_5: # %atomicrmw.start
2376; LA64D-NEXT:    # in Loop: Header=BB33_1 Depth=1
2377; LA64D-NEXT:    dbar 1792
2378; LA64D-NEXT:  .LBB33_6: # %atomicrmw.start
2379; LA64D-NEXT:    # in Loop: Header=BB33_1 Depth=1
2380; LA64D-NEXT:    movgr2fr.w $fa0, $a3
2381; LA64D-NEXT:    bne $a3, $a2, .LBB33_1
2382; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
2383; LA64D-NEXT:    ret
2384  %v = atomicrmw fsub ptr %p, float 1.0 monotonic, align 4
2385  ret float %v
2386}
2387
2388define float @float_fmin_monotonic(ptr %p) nounwind {
2389; LA64F-LABEL: float_fmin_monotonic:
2390; LA64F:       # %bb.0:
2391; LA64F-NEXT:    fld.s $fa0, $a0, 0
2392; LA64F-NEXT:    addi.w $a1, $zero, 1
2393; LA64F-NEXT:    movgr2fr.w $fa1, $a1
2394; LA64F-NEXT:    ffint.s.w $fa1, $fa1
2395; LA64F-NEXT:    .p2align 4, , 16
2396; LA64F-NEXT:  .LBB34_1: # %atomicrmw.start
2397; LA64F-NEXT:    # =>This Loop Header: Depth=1
2398; LA64F-NEXT:    # Child Loop BB34_3 Depth 2
2399; LA64F-NEXT:    fmax.s $fa2, $fa0, $fa0
2400; LA64F-NEXT:    fmin.s $fa2, $fa2, $fa1
2401; LA64F-NEXT:    movfr2gr.s $a1, $fa2
2402; LA64F-NEXT:    movfr2gr.s $a2, $fa0
2403; LA64F-NEXT:  .LBB34_3: # %atomicrmw.start
2404; LA64F-NEXT:    # Parent Loop BB34_1 Depth=1
2405; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
2406; LA64F-NEXT:    ll.w $a3, $a0, 0
2407; LA64F-NEXT:    bne $a3, $a2, .LBB34_5
2408; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
2409; LA64F-NEXT:    # in Loop: Header=BB34_3 Depth=2
2410; LA64F-NEXT:    move $a4, $a1
2411; LA64F-NEXT:    sc.w $a4, $a0, 0
2412; LA64F-NEXT:    beqz $a4, .LBB34_3
2413; LA64F-NEXT:    b .LBB34_6
2414; LA64F-NEXT:  .LBB34_5: # %atomicrmw.start
2415; LA64F-NEXT:    # in Loop: Header=BB34_1 Depth=1
2416; LA64F-NEXT:    dbar 1792
2417; LA64F-NEXT:  .LBB34_6: # %atomicrmw.start
2418; LA64F-NEXT:    # in Loop: Header=BB34_1 Depth=1
2419; LA64F-NEXT:    movgr2fr.w $fa0, $a3
2420; LA64F-NEXT:    bne $a3, $a2, .LBB34_1
2421; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
2422; LA64F-NEXT:    ret
2423;
2424; LA64D-LABEL: float_fmin_monotonic:
2425; LA64D:       # %bb.0:
2426; LA64D-NEXT:    fld.s $fa0, $a0, 0
2427; LA64D-NEXT:    vldi $vr1, -1168
2428; LA64D-NEXT:    .p2align 4, , 16
2429; LA64D-NEXT:  .LBB34_1: # %atomicrmw.start
2430; LA64D-NEXT:    # =>This Loop Header: Depth=1
2431; LA64D-NEXT:    # Child Loop BB34_3 Depth 2
2432; LA64D-NEXT:    fmax.s $fa2, $fa0, $fa0
2433; LA64D-NEXT:    fmin.s $fa2, $fa2, $fa1
2434; LA64D-NEXT:    movfr2gr.s $a1, $fa2
2435; LA64D-NEXT:    movfr2gr.s $a2, $fa0
2436; LA64D-NEXT:  .LBB34_3: # %atomicrmw.start
2437; LA64D-NEXT:    # Parent Loop BB34_1 Depth=1
2438; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
2439; LA64D-NEXT:    ll.w $a3, $a0, 0
2440; LA64D-NEXT:    bne $a3, $a2, .LBB34_5
2441; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
2442; LA64D-NEXT:    # in Loop: Header=BB34_3 Depth=2
2443; LA64D-NEXT:    move $a4, $a1
2444; LA64D-NEXT:    sc.w $a4, $a0, 0
2445; LA64D-NEXT:    beqz $a4, .LBB34_3
2446; LA64D-NEXT:    b .LBB34_6
2447; LA64D-NEXT:  .LBB34_5: # %atomicrmw.start
2448; LA64D-NEXT:    # in Loop: Header=BB34_1 Depth=1
2449; LA64D-NEXT:    dbar 1792
2450; LA64D-NEXT:  .LBB34_6: # %atomicrmw.start
2451; LA64D-NEXT:    # in Loop: Header=BB34_1 Depth=1
2452; LA64D-NEXT:    movgr2fr.w $fa0, $a3
2453; LA64D-NEXT:    bne $a3, $a2, .LBB34_1
2454; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
2455; LA64D-NEXT:    ret
2456  %v = atomicrmw fmin ptr %p, float 1.0 monotonic, align 4
2457  ret float %v
2458}
2459
2460define float @float_fmax_monotonic(ptr %p) nounwind {
2461; LA64F-LABEL: float_fmax_monotonic:
2462; LA64F:       # %bb.0:
2463; LA64F-NEXT:    fld.s $fa0, $a0, 0
2464; LA64F-NEXT:    addi.w $a1, $zero, 1
2465; LA64F-NEXT:    movgr2fr.w $fa1, $a1
2466; LA64F-NEXT:    ffint.s.w $fa1, $fa1
2467; LA64F-NEXT:    .p2align 4, , 16
2468; LA64F-NEXT:  .LBB35_1: # %atomicrmw.start
2469; LA64F-NEXT:    # =>This Loop Header: Depth=1
2470; LA64F-NEXT:    # Child Loop BB35_3 Depth 2
2471; LA64F-NEXT:    fmax.s $fa2, $fa0, $fa0
2472; LA64F-NEXT:    fmax.s $fa2, $fa2, $fa1
2473; LA64F-NEXT:    movfr2gr.s $a1, $fa2
2474; LA64F-NEXT:    movfr2gr.s $a2, $fa0
2475; LA64F-NEXT:  .LBB35_3: # %atomicrmw.start
2476; LA64F-NEXT:    # Parent Loop BB35_1 Depth=1
2477; LA64F-NEXT:    # => This Inner Loop Header: Depth=2
2478; LA64F-NEXT:    ll.w $a3, $a0, 0
2479; LA64F-NEXT:    bne $a3, $a2, .LBB35_5
2480; LA64F-NEXT:  # %bb.4: # %atomicrmw.start
2481; LA64F-NEXT:    # in Loop: Header=BB35_3 Depth=2
2482; LA64F-NEXT:    move $a4, $a1
2483; LA64F-NEXT:    sc.w $a4, $a0, 0
2484; LA64F-NEXT:    beqz $a4, .LBB35_3
2485; LA64F-NEXT:    b .LBB35_6
2486; LA64F-NEXT:  .LBB35_5: # %atomicrmw.start
2487; LA64F-NEXT:    # in Loop: Header=BB35_1 Depth=1
2488; LA64F-NEXT:    dbar 1792
2489; LA64F-NEXT:  .LBB35_6: # %atomicrmw.start
2490; LA64F-NEXT:    # in Loop: Header=BB35_1 Depth=1
2491; LA64F-NEXT:    movgr2fr.w $fa0, $a3
2492; LA64F-NEXT:    bne $a3, $a2, .LBB35_1
2493; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
2494; LA64F-NEXT:    ret
2495;
2496; LA64D-LABEL: float_fmax_monotonic:
2497; LA64D:       # %bb.0:
2498; LA64D-NEXT:    fld.s $fa0, $a0, 0
2499; LA64D-NEXT:    vldi $vr1, -1168
2500; LA64D-NEXT:    .p2align 4, , 16
2501; LA64D-NEXT:  .LBB35_1: # %atomicrmw.start
2502; LA64D-NEXT:    # =>This Loop Header: Depth=1
2503; LA64D-NEXT:    # Child Loop BB35_3 Depth 2
2504; LA64D-NEXT:    fmax.s $fa2, $fa0, $fa0
2505; LA64D-NEXT:    fmax.s $fa2, $fa2, $fa1
2506; LA64D-NEXT:    movfr2gr.s $a1, $fa2
2507; LA64D-NEXT:    movfr2gr.s $a2, $fa0
2508; LA64D-NEXT:  .LBB35_3: # %atomicrmw.start
2509; LA64D-NEXT:    # Parent Loop BB35_1 Depth=1
2510; LA64D-NEXT:    # => This Inner Loop Header: Depth=2
2511; LA64D-NEXT:    ll.w $a3, $a0, 0
2512; LA64D-NEXT:    bne $a3, $a2, .LBB35_5
2513; LA64D-NEXT:  # %bb.4: # %atomicrmw.start
2514; LA64D-NEXT:    # in Loop: Header=BB35_3 Depth=2
2515; LA64D-NEXT:    move $a4, $a1
2516; LA64D-NEXT:    sc.w $a4, $a0, 0
2517; LA64D-NEXT:    beqz $a4, .LBB35_3
2518; LA64D-NEXT:    b .LBB35_6
2519; LA64D-NEXT:  .LBB35_5: # %atomicrmw.start
2520; LA64D-NEXT:    # in Loop: Header=BB35_1 Depth=1
2521; LA64D-NEXT:    dbar 1792
2522; LA64D-NEXT:  .LBB35_6: # %atomicrmw.start
2523; LA64D-NEXT:    # in Loop: Header=BB35_1 Depth=1
2524; LA64D-NEXT:    movgr2fr.w $fa0, $a3
2525; LA64D-NEXT:    bne $a3, $a2, .LBB35_1
2526; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
2527; LA64D-NEXT:    ret
2528  %v = atomicrmw fmax ptr %p, float 1.0 monotonic, align 4
2529  ret float %v
2530}
2531
2532define double @double_fadd_monotonic(ptr %p) nounwind {
2533; LA64F-LABEL: double_fadd_monotonic:
2534; LA64F:       # %bb.0:
2535; LA64F-NEXT:    addi.d $sp, $sp, -48
2536; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
2537; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
2538; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
2539; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
2540; LA64F-NEXT:    move $fp, $a0
2541; LA64F-NEXT:    ld.d $s1, $a0, 0
2542; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
2543; LA64F-NEXT:    .p2align 4, , 16
2544; LA64F-NEXT:  .LBB36_1: # %atomicrmw.start
2545; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
2546; LA64F-NEXT:    move $a0, $s1
2547; LA64F-NEXT:    move $a1, $s0
2548; LA64F-NEXT:    bl %plt(__adddf3)
2549; LA64F-NEXT:    st.d $s1, $sp, 8
2550; LA64F-NEXT:    st.d $a0, $sp, 0
2551; LA64F-NEXT:    ori $a0, $zero, 8
2552; LA64F-NEXT:    addi.d $a2, $sp, 8
2553; LA64F-NEXT:    addi.d $a3, $sp, 0
2554; LA64F-NEXT:    move $a1, $fp
2555; LA64F-NEXT:    move $a4, $zero
2556; LA64F-NEXT:    move $a5, $zero
2557; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
2558; LA64F-NEXT:    ld.d $s1, $sp, 8
2559; LA64F-NEXT:    beqz $a0, .LBB36_1
2560; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
2561; LA64F-NEXT:    move $a0, $s1
2562; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
2563; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
2564; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
2565; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
2566; LA64F-NEXT:    addi.d $sp, $sp, 48
2567; LA64F-NEXT:    ret
2568;
2569; LA64D-LABEL: double_fadd_monotonic:
2570; LA64D:       # %bb.0:
2571; LA64D-NEXT:    addi.d $sp, $sp, -32
2572; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
2573; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
2574; LA64D-NEXT:    move $fp, $a0
2575; LA64D-NEXT:    fld.d $fa0, $a0, 0
2576; LA64D-NEXT:    .p2align 4, , 16
2577; LA64D-NEXT:  .LBB36_1: # %atomicrmw.start
2578; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
2579; LA64D-NEXT:    vldi $vr1, -912
2580; LA64D-NEXT:    fadd.d $fa1, $fa0, $fa1
2581; LA64D-NEXT:    fst.d $fa0, $sp, 8
2582; LA64D-NEXT:    fst.d $fa1, $sp, 0
2583; LA64D-NEXT:    ori $a0, $zero, 8
2584; LA64D-NEXT:    addi.d $a2, $sp, 8
2585; LA64D-NEXT:    addi.d $a3, $sp, 0
2586; LA64D-NEXT:    move $a1, $fp
2587; LA64D-NEXT:    move $a4, $zero
2588; LA64D-NEXT:    move $a5, $zero
2589; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
2590; LA64D-NEXT:    fld.d $fa0, $sp, 8
2591; LA64D-NEXT:    beqz $a0, .LBB36_1
2592; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
2593; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
2594; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
2595; LA64D-NEXT:    addi.d $sp, $sp, 32
2596; LA64D-NEXT:    ret
2597  %v = atomicrmw fadd ptr %p, double 1.0 monotonic, align 4
2598  ret double %v
2599}
2600
2601define double @double_fsub_monotonic(ptr %p) nounwind {
2602; LA64F-LABEL: double_fsub_monotonic:
2603; LA64F:       # %bb.0:
2604; LA64F-NEXT:    addi.d $sp, $sp, -48
2605; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
2606; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
2607; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
2608; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
2609; LA64F-NEXT:    move $fp, $a0
2610; LA64F-NEXT:    ld.d $s1, $a0, 0
2611; LA64F-NEXT:    lu52i.d $s0, $zero, -1025
2612; LA64F-NEXT:    .p2align 4, , 16
2613; LA64F-NEXT:  .LBB37_1: # %atomicrmw.start
2614; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
2615; LA64F-NEXT:    move $a0, $s1
2616; LA64F-NEXT:    move $a1, $s0
2617; LA64F-NEXT:    bl %plt(__adddf3)
2618; LA64F-NEXT:    st.d $s1, $sp, 8
2619; LA64F-NEXT:    st.d $a0, $sp, 0
2620; LA64F-NEXT:    ori $a0, $zero, 8
2621; LA64F-NEXT:    addi.d $a2, $sp, 8
2622; LA64F-NEXT:    addi.d $a3, $sp, 0
2623; LA64F-NEXT:    move $a1, $fp
2624; LA64F-NEXT:    move $a4, $zero
2625; LA64F-NEXT:    move $a5, $zero
2626; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
2627; LA64F-NEXT:    ld.d $s1, $sp, 8
2628; LA64F-NEXT:    beqz $a0, .LBB37_1
2629; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
2630; LA64F-NEXT:    move $a0, $s1
2631; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
2632; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
2633; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
2634; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
2635; LA64F-NEXT:    addi.d $sp, $sp, 48
2636; LA64F-NEXT:    ret
2637;
2638; LA64D-LABEL: double_fsub_monotonic:
2639; LA64D:       # %bb.0:
2640; LA64D-NEXT:    addi.d $sp, $sp, -32
2641; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
2642; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
2643; LA64D-NEXT:    move $fp, $a0
2644; LA64D-NEXT:    fld.d $fa0, $a0, 0
2645; LA64D-NEXT:    .p2align 4, , 16
2646; LA64D-NEXT:  .LBB37_1: # %atomicrmw.start
2647; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
2648; LA64D-NEXT:    vldi $vr1, -784
2649; LA64D-NEXT:    fadd.d $fa1, $fa0, $fa1
2650; LA64D-NEXT:    fst.d $fa0, $sp, 8
2651; LA64D-NEXT:    fst.d $fa1, $sp, 0
2652; LA64D-NEXT:    ori $a0, $zero, 8
2653; LA64D-NEXT:    addi.d $a2, $sp, 8
2654; LA64D-NEXT:    addi.d $a3, $sp, 0
2655; LA64D-NEXT:    move $a1, $fp
2656; LA64D-NEXT:    move $a4, $zero
2657; LA64D-NEXT:    move $a5, $zero
2658; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
2659; LA64D-NEXT:    fld.d $fa0, $sp, 8
2660; LA64D-NEXT:    beqz $a0, .LBB37_1
2661; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
2662; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
2663; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
2664; LA64D-NEXT:    addi.d $sp, $sp, 32
2665; LA64D-NEXT:    ret
2666  %v = atomicrmw fsub ptr %p, double 1.0 monotonic, align 4
2667  ret double %v
2668}
2669
2670define double @double_fmin_monotonic(ptr %p) nounwind {
2671; LA64F-LABEL: double_fmin_monotonic:
2672; LA64F:       # %bb.0:
2673; LA64F-NEXT:    addi.d $sp, $sp, -48
2674; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
2675; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
2676; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
2677; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
2678; LA64F-NEXT:    move $fp, $a0
2679; LA64F-NEXT:    ld.d $s1, $a0, 0
2680; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
2681; LA64F-NEXT:    .p2align 4, , 16
2682; LA64F-NEXT:  .LBB38_1: # %atomicrmw.start
2683; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
2684; LA64F-NEXT:    move $a0, $s1
2685; LA64F-NEXT:    move $a1, $s0
2686; LA64F-NEXT:    bl %plt(fmin)
2687; LA64F-NEXT:    st.d $s1, $sp, 8
2688; LA64F-NEXT:    st.d $a0, $sp, 0
2689; LA64F-NEXT:    ori $a0, $zero, 8
2690; LA64F-NEXT:    addi.d $a2, $sp, 8
2691; LA64F-NEXT:    addi.d $a3, $sp, 0
2692; LA64F-NEXT:    move $a1, $fp
2693; LA64F-NEXT:    move $a4, $zero
2694; LA64F-NEXT:    move $a5, $zero
2695; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
2696; LA64F-NEXT:    ld.d $s1, $sp, 8
2697; LA64F-NEXT:    beqz $a0, .LBB38_1
2698; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
2699; LA64F-NEXT:    move $a0, $s1
2700; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
2701; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
2702; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
2703; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
2704; LA64F-NEXT:    addi.d $sp, $sp, 48
2705; LA64F-NEXT:    ret
2706;
2707; LA64D-LABEL: double_fmin_monotonic:
2708; LA64D:       # %bb.0:
2709; LA64D-NEXT:    addi.d $sp, $sp, -32
2710; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
2711; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
2712; LA64D-NEXT:    move $fp, $a0
2713; LA64D-NEXT:    fld.d $fa0, $a0, 0
2714; LA64D-NEXT:    .p2align 4, , 16
2715; LA64D-NEXT:  .LBB38_1: # %atomicrmw.start
2716; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
2717; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
2718; LA64D-NEXT:    vldi $vr2, -912
2719; LA64D-NEXT:    fmin.d $fa1, $fa1, $fa2
2720; LA64D-NEXT:    fst.d $fa0, $sp, 8
2721; LA64D-NEXT:    fst.d $fa1, $sp, 0
2722; LA64D-NEXT:    ori $a0, $zero, 8
2723; LA64D-NEXT:    addi.d $a2, $sp, 8
2724; LA64D-NEXT:    addi.d $a3, $sp, 0
2725; LA64D-NEXT:    move $a1, $fp
2726; LA64D-NEXT:    move $a4, $zero
2727; LA64D-NEXT:    move $a5, $zero
2728; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
2729; LA64D-NEXT:    fld.d $fa0, $sp, 8
2730; LA64D-NEXT:    beqz $a0, .LBB38_1
2731; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
2732; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
2733; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
2734; LA64D-NEXT:    addi.d $sp, $sp, 32
2735; LA64D-NEXT:    ret
2736  %v = atomicrmw fmin ptr %p, double 1.0 monotonic, align 4
2737  ret double %v
2738}
2739
2740define double @double_fmax_monotonic(ptr %p) nounwind {
2741; LA64F-LABEL: double_fmax_monotonic:
2742; LA64F:       # %bb.0:
2743; LA64F-NEXT:    addi.d $sp, $sp, -48
2744; LA64F-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
2745; LA64F-NEXT:    st.d $fp, $sp, 32 # 8-byte Folded Spill
2746; LA64F-NEXT:    st.d $s0, $sp, 24 # 8-byte Folded Spill
2747; LA64F-NEXT:    st.d $s1, $sp, 16 # 8-byte Folded Spill
2748; LA64F-NEXT:    move $fp, $a0
2749; LA64F-NEXT:    ld.d $s1, $a0, 0
2750; LA64F-NEXT:    lu52i.d $s0, $zero, 1023
2751; LA64F-NEXT:    .p2align 4, , 16
2752; LA64F-NEXT:  .LBB39_1: # %atomicrmw.start
2753; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
2754; LA64F-NEXT:    move $a0, $s1
2755; LA64F-NEXT:    move $a1, $s0
2756; LA64F-NEXT:    bl %plt(fmax)
2757; LA64F-NEXT:    st.d $s1, $sp, 8
2758; LA64F-NEXT:    st.d $a0, $sp, 0
2759; LA64F-NEXT:    ori $a0, $zero, 8
2760; LA64F-NEXT:    addi.d $a2, $sp, 8
2761; LA64F-NEXT:    addi.d $a3, $sp, 0
2762; LA64F-NEXT:    move $a1, $fp
2763; LA64F-NEXT:    move $a4, $zero
2764; LA64F-NEXT:    move $a5, $zero
2765; LA64F-NEXT:    bl %plt(__atomic_compare_exchange)
2766; LA64F-NEXT:    ld.d $s1, $sp, 8
2767; LA64F-NEXT:    beqz $a0, .LBB39_1
2768; LA64F-NEXT:  # %bb.2: # %atomicrmw.end
2769; LA64F-NEXT:    move $a0, $s1
2770; LA64F-NEXT:    ld.d $s1, $sp, 16 # 8-byte Folded Reload
2771; LA64F-NEXT:    ld.d $s0, $sp, 24 # 8-byte Folded Reload
2772; LA64F-NEXT:    ld.d $fp, $sp, 32 # 8-byte Folded Reload
2773; LA64F-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
2774; LA64F-NEXT:    addi.d $sp, $sp, 48
2775; LA64F-NEXT:    ret
2776;
2777; LA64D-LABEL: double_fmax_monotonic:
2778; LA64D:       # %bb.0:
2779; LA64D-NEXT:    addi.d $sp, $sp, -32
2780; LA64D-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
2781; LA64D-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
2782; LA64D-NEXT:    move $fp, $a0
2783; LA64D-NEXT:    fld.d $fa0, $a0, 0
2784; LA64D-NEXT:    .p2align 4, , 16
2785; LA64D-NEXT:  .LBB39_1: # %atomicrmw.start
2786; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
2787; LA64D-NEXT:    fmax.d $fa1, $fa0, $fa0
2788; LA64D-NEXT:    vldi $vr2, -912
2789; LA64D-NEXT:    fmax.d $fa1, $fa1, $fa2
2790; LA64D-NEXT:    fst.d $fa0, $sp, 8
2791; LA64D-NEXT:    fst.d $fa1, $sp, 0
2792; LA64D-NEXT:    ori $a0, $zero, 8
2793; LA64D-NEXT:    addi.d $a2, $sp, 8
2794; LA64D-NEXT:    addi.d $a3, $sp, 0
2795; LA64D-NEXT:    move $a1, $fp
2796; LA64D-NEXT:    move $a4, $zero
2797; LA64D-NEXT:    move $a5, $zero
2798; LA64D-NEXT:    bl %plt(__atomic_compare_exchange)
2799; LA64D-NEXT:    fld.d $fa0, $sp, 8
2800; LA64D-NEXT:    beqz $a0, .LBB39_1
2801; LA64D-NEXT:  # %bb.2: # %atomicrmw.end
2802; LA64D-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
2803; LA64D-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
2804; LA64D-NEXT:    addi.d $sp, $sp, 32
2805; LA64D-NEXT:    ret
2806  %v = atomicrmw fmax ptr %p, double 1.0 monotonic, align 4
2807  ret double %v
2808}
2809