1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F 3; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D 4 5define float @float_fadd_acquire(ptr %p) nounwind { 6; LA64F-LABEL: float_fadd_acquire: 7; LA64F: # %bb.0: 8; LA64F-NEXT: fld.s $fa0, $a0, 0 9; LA64F-NEXT: addi.w $a1, $zero, 1 10; LA64F-NEXT: movgr2fr.w $fa1, $a1 11; LA64F-NEXT: ffint.s.w $fa1, $fa1 12; LA64F-NEXT: .p2align 4, , 16 13; LA64F-NEXT: .LBB0_1: # %atomicrmw.start 14; LA64F-NEXT: # =>This Loop Header: Depth=1 15; LA64F-NEXT: # Child Loop BB0_3 Depth 2 16; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 17; LA64F-NEXT: movfr2gr.s $a1, $fa2 18; LA64F-NEXT: movfr2gr.s $a2, $fa0 19; LA64F-NEXT: .LBB0_3: # %atomicrmw.start 20; LA64F-NEXT: # Parent Loop BB0_1 Depth=1 21; LA64F-NEXT: # => This Inner Loop Header: Depth=2 22; LA64F-NEXT: ll.w $a3, $a0, 0 23; LA64F-NEXT: bne $a3, $a2, .LBB0_5 24; LA64F-NEXT: # %bb.4: # %atomicrmw.start 25; LA64F-NEXT: # in Loop: Header=BB0_3 Depth=2 26; LA64F-NEXT: move $a4, $a1 27; LA64F-NEXT: sc.w $a4, $a0, 0 28; LA64F-NEXT: beqz $a4, .LBB0_3 29; LA64F-NEXT: b .LBB0_6 30; LA64F-NEXT: .LBB0_5: # %atomicrmw.start 31; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 32; LA64F-NEXT: dbar 20 33; LA64F-NEXT: .LBB0_6: # %atomicrmw.start 34; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 35; LA64F-NEXT: movgr2fr.w $fa0, $a3 36; LA64F-NEXT: bne $a3, $a2, .LBB0_1 37; LA64F-NEXT: # %bb.2: # %atomicrmw.end 38; LA64F-NEXT: ret 39; 40; LA64D-LABEL: float_fadd_acquire: 41; LA64D: # %bb.0: 42; LA64D-NEXT: fld.s $fa0, $a0, 0 43; LA64D-NEXT: vldi $vr1, -1168 44; LA64D-NEXT: .p2align 4, , 16 45; LA64D-NEXT: .LBB0_1: # %atomicrmw.start 46; LA64D-NEXT: # =>This Loop Header: Depth=1 47; LA64D-NEXT: # Child Loop BB0_3 Depth 2 48; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 49; LA64D-NEXT: movfr2gr.s $a1, $fa2 50; LA64D-NEXT: movfr2gr.s $a2, $fa0 51; LA64D-NEXT: .LBB0_3: # %atomicrmw.start 52; LA64D-NEXT: # Parent Loop BB0_1 Depth=1 53; LA64D-NEXT: # => This Inner Loop Header: Depth=2 54; LA64D-NEXT: ll.w $a3, $a0, 0 55; LA64D-NEXT: bne $a3, $a2, .LBB0_5 56; LA64D-NEXT: # %bb.4: # %atomicrmw.start 57; LA64D-NEXT: # in Loop: Header=BB0_3 Depth=2 58; LA64D-NEXT: move $a4, $a1 59; LA64D-NEXT: sc.w $a4, $a0, 0 60; LA64D-NEXT: beqz $a4, .LBB0_3 61; LA64D-NEXT: b .LBB0_6 62; LA64D-NEXT: .LBB0_5: # %atomicrmw.start 63; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 64; LA64D-NEXT: dbar 20 65; LA64D-NEXT: .LBB0_6: # %atomicrmw.start 66; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 67; LA64D-NEXT: movgr2fr.w $fa0, $a3 68; LA64D-NEXT: bne $a3, $a2, .LBB0_1 69; LA64D-NEXT: # %bb.2: # %atomicrmw.end 70; LA64D-NEXT: ret 71 %v = atomicrmw fadd ptr %p, float 1.0 acquire, align 4 72 ret float %v 73} 74 75define float @float_fsub_acquire(ptr %p) nounwind { 76; LA64F-LABEL: float_fsub_acquire: 77; LA64F: # %bb.0: 78; LA64F-NEXT: fld.s $fa0, $a0, 0 79; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) 80; LA64F-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI1_0) 81; LA64F-NEXT: .p2align 4, , 16 82; LA64F-NEXT: .LBB1_1: # %atomicrmw.start 83; LA64F-NEXT: # =>This Loop Header: Depth=1 84; LA64F-NEXT: # Child Loop BB1_3 Depth 2 85; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 86; LA64F-NEXT: movfr2gr.s $a1, $fa2 87; LA64F-NEXT: movfr2gr.s $a2, $fa0 88; LA64F-NEXT: .LBB1_3: # %atomicrmw.start 89; LA64F-NEXT: # Parent Loop BB1_1 Depth=1 90; LA64F-NEXT: # => This Inner Loop Header: Depth=2 91; LA64F-NEXT: ll.w $a3, $a0, 0 92; LA64F-NEXT: bne $a3, $a2, .LBB1_5 93; LA64F-NEXT: # %bb.4: # %atomicrmw.start 94; LA64F-NEXT: # in Loop: Header=BB1_3 Depth=2 95; LA64F-NEXT: move $a4, $a1 96; LA64F-NEXT: sc.w $a4, $a0, 0 97; LA64F-NEXT: beqz $a4, .LBB1_3 98; LA64F-NEXT: b .LBB1_6 99; LA64F-NEXT: .LBB1_5: # %atomicrmw.start 100; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 101; LA64F-NEXT: dbar 20 102; LA64F-NEXT: .LBB1_6: # %atomicrmw.start 103; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 104; LA64F-NEXT: movgr2fr.w $fa0, $a3 105; LA64F-NEXT: bne $a3, $a2, .LBB1_1 106; LA64F-NEXT: # %bb.2: # %atomicrmw.end 107; LA64F-NEXT: ret 108; 109; LA64D-LABEL: float_fsub_acquire: 110; LA64D: # %bb.0: 111; LA64D-NEXT: fld.s $fa0, $a0, 0 112; LA64D-NEXT: vldi $vr1, -1040 113; LA64D-NEXT: .p2align 4, , 16 114; LA64D-NEXT: .LBB1_1: # %atomicrmw.start 115; LA64D-NEXT: # =>This Loop Header: Depth=1 116; LA64D-NEXT: # Child Loop BB1_3 Depth 2 117; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 118; LA64D-NEXT: movfr2gr.s $a1, $fa2 119; LA64D-NEXT: movfr2gr.s $a2, $fa0 120; LA64D-NEXT: .LBB1_3: # %atomicrmw.start 121; LA64D-NEXT: # Parent Loop BB1_1 Depth=1 122; LA64D-NEXT: # => This Inner Loop Header: Depth=2 123; LA64D-NEXT: ll.w $a3, $a0, 0 124; LA64D-NEXT: bne $a3, $a2, .LBB1_5 125; LA64D-NEXT: # %bb.4: # %atomicrmw.start 126; LA64D-NEXT: # in Loop: Header=BB1_3 Depth=2 127; LA64D-NEXT: move $a4, $a1 128; LA64D-NEXT: sc.w $a4, $a0, 0 129; LA64D-NEXT: beqz $a4, .LBB1_3 130; LA64D-NEXT: b .LBB1_6 131; LA64D-NEXT: .LBB1_5: # %atomicrmw.start 132; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 133; LA64D-NEXT: dbar 20 134; LA64D-NEXT: .LBB1_6: # %atomicrmw.start 135; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 136; LA64D-NEXT: movgr2fr.w $fa0, $a3 137; LA64D-NEXT: bne $a3, $a2, .LBB1_1 138; LA64D-NEXT: # %bb.2: # %atomicrmw.end 139; LA64D-NEXT: ret 140 %v = atomicrmw fsub ptr %p, float 1.0 acquire, align 4 141 ret float %v 142} 143 144define float @float_fmin_acquire(ptr %p) nounwind { 145; LA64F-LABEL: float_fmin_acquire: 146; LA64F: # %bb.0: 147; LA64F-NEXT: fld.s $fa0, $a0, 0 148; LA64F-NEXT: addi.w $a1, $zero, 1 149; LA64F-NEXT: movgr2fr.w $fa1, $a1 150; LA64F-NEXT: ffint.s.w $fa1, $fa1 151; LA64F-NEXT: .p2align 4, , 16 152; LA64F-NEXT: .LBB2_1: # %atomicrmw.start 153; LA64F-NEXT: # =>This Loop Header: Depth=1 154; LA64F-NEXT: # Child Loop BB2_3 Depth 2 155; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 156; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 157; LA64F-NEXT: movfr2gr.s $a1, $fa2 158; LA64F-NEXT: movfr2gr.s $a2, $fa0 159; LA64F-NEXT: .LBB2_3: # %atomicrmw.start 160; LA64F-NEXT: # Parent Loop BB2_1 Depth=1 161; LA64F-NEXT: # => This Inner Loop Header: Depth=2 162; LA64F-NEXT: ll.w $a3, $a0, 0 163; LA64F-NEXT: bne $a3, $a2, .LBB2_5 164; LA64F-NEXT: # %bb.4: # %atomicrmw.start 165; LA64F-NEXT: # in Loop: Header=BB2_3 Depth=2 166; LA64F-NEXT: move $a4, $a1 167; LA64F-NEXT: sc.w $a4, $a0, 0 168; LA64F-NEXT: beqz $a4, .LBB2_3 169; LA64F-NEXT: b .LBB2_6 170; LA64F-NEXT: .LBB2_5: # %atomicrmw.start 171; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 172; LA64F-NEXT: dbar 20 173; LA64F-NEXT: .LBB2_6: # %atomicrmw.start 174; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 175; LA64F-NEXT: movgr2fr.w $fa0, $a3 176; LA64F-NEXT: bne $a3, $a2, .LBB2_1 177; LA64F-NEXT: # %bb.2: # %atomicrmw.end 178; LA64F-NEXT: ret 179; 180; LA64D-LABEL: float_fmin_acquire: 181; LA64D: # %bb.0: 182; LA64D-NEXT: fld.s $fa0, $a0, 0 183; LA64D-NEXT: vldi $vr1, -1168 184; LA64D-NEXT: .p2align 4, , 16 185; LA64D-NEXT: .LBB2_1: # %atomicrmw.start 186; LA64D-NEXT: # =>This Loop Header: Depth=1 187; LA64D-NEXT: # Child Loop BB2_3 Depth 2 188; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 189; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 190; LA64D-NEXT: movfr2gr.s $a1, $fa2 191; LA64D-NEXT: movfr2gr.s $a2, $fa0 192; LA64D-NEXT: .LBB2_3: # %atomicrmw.start 193; LA64D-NEXT: # Parent Loop BB2_1 Depth=1 194; LA64D-NEXT: # => This Inner Loop Header: Depth=2 195; LA64D-NEXT: ll.w $a3, $a0, 0 196; LA64D-NEXT: bne $a3, $a2, .LBB2_5 197; LA64D-NEXT: # %bb.4: # %atomicrmw.start 198; LA64D-NEXT: # in Loop: Header=BB2_3 Depth=2 199; LA64D-NEXT: move $a4, $a1 200; LA64D-NEXT: sc.w $a4, $a0, 0 201; LA64D-NEXT: beqz $a4, .LBB2_3 202; LA64D-NEXT: b .LBB2_6 203; LA64D-NEXT: .LBB2_5: # %atomicrmw.start 204; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 205; LA64D-NEXT: dbar 20 206; LA64D-NEXT: .LBB2_6: # %atomicrmw.start 207; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 208; LA64D-NEXT: movgr2fr.w $fa0, $a3 209; LA64D-NEXT: bne $a3, $a2, .LBB2_1 210; LA64D-NEXT: # %bb.2: # %atomicrmw.end 211; LA64D-NEXT: ret 212 %v = atomicrmw fmin ptr %p, float 1.0 acquire, align 4 213 ret float %v 214} 215 216define float @float_fmax_acquire(ptr %p) nounwind { 217; LA64F-LABEL: float_fmax_acquire: 218; LA64F: # %bb.0: 219; LA64F-NEXT: fld.s $fa0, $a0, 0 220; LA64F-NEXT: addi.w $a1, $zero, 1 221; LA64F-NEXT: movgr2fr.w $fa1, $a1 222; LA64F-NEXT: ffint.s.w $fa1, $fa1 223; LA64F-NEXT: .p2align 4, , 16 224; LA64F-NEXT: .LBB3_1: # %atomicrmw.start 225; LA64F-NEXT: # =>This Loop Header: Depth=1 226; LA64F-NEXT: # Child Loop BB3_3 Depth 2 227; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 228; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 229; LA64F-NEXT: movfr2gr.s $a1, $fa2 230; LA64F-NEXT: movfr2gr.s $a2, $fa0 231; LA64F-NEXT: .LBB3_3: # %atomicrmw.start 232; LA64F-NEXT: # Parent Loop BB3_1 Depth=1 233; LA64F-NEXT: # => This Inner Loop Header: Depth=2 234; LA64F-NEXT: ll.w $a3, $a0, 0 235; LA64F-NEXT: bne $a3, $a2, .LBB3_5 236; LA64F-NEXT: # %bb.4: # %atomicrmw.start 237; LA64F-NEXT: # in Loop: Header=BB3_3 Depth=2 238; LA64F-NEXT: move $a4, $a1 239; LA64F-NEXT: sc.w $a4, $a0, 0 240; LA64F-NEXT: beqz $a4, .LBB3_3 241; LA64F-NEXT: b .LBB3_6 242; LA64F-NEXT: .LBB3_5: # %atomicrmw.start 243; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 244; LA64F-NEXT: dbar 20 245; LA64F-NEXT: .LBB3_6: # %atomicrmw.start 246; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 247; LA64F-NEXT: movgr2fr.w $fa0, $a3 248; LA64F-NEXT: bne $a3, $a2, .LBB3_1 249; LA64F-NEXT: # %bb.2: # %atomicrmw.end 250; LA64F-NEXT: ret 251; 252; LA64D-LABEL: float_fmax_acquire: 253; LA64D: # %bb.0: 254; LA64D-NEXT: fld.s $fa0, $a0, 0 255; LA64D-NEXT: vldi $vr1, -1168 256; LA64D-NEXT: .p2align 4, , 16 257; LA64D-NEXT: .LBB3_1: # %atomicrmw.start 258; LA64D-NEXT: # =>This Loop Header: Depth=1 259; LA64D-NEXT: # Child Loop BB3_3 Depth 2 260; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 261; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 262; LA64D-NEXT: movfr2gr.s $a1, $fa2 263; LA64D-NEXT: movfr2gr.s $a2, $fa0 264; LA64D-NEXT: .LBB3_3: # %atomicrmw.start 265; LA64D-NEXT: # Parent Loop BB3_1 Depth=1 266; LA64D-NEXT: # => This Inner Loop Header: Depth=2 267; LA64D-NEXT: ll.w $a3, $a0, 0 268; LA64D-NEXT: bne $a3, $a2, .LBB3_5 269; LA64D-NEXT: # %bb.4: # %atomicrmw.start 270; LA64D-NEXT: # in Loop: Header=BB3_3 Depth=2 271; LA64D-NEXT: move $a4, $a1 272; LA64D-NEXT: sc.w $a4, $a0, 0 273; LA64D-NEXT: beqz $a4, .LBB3_3 274; LA64D-NEXT: b .LBB3_6 275; LA64D-NEXT: .LBB3_5: # %atomicrmw.start 276; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 277; LA64D-NEXT: dbar 20 278; LA64D-NEXT: .LBB3_6: # %atomicrmw.start 279; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 280; LA64D-NEXT: movgr2fr.w $fa0, $a3 281; LA64D-NEXT: bne $a3, $a2, .LBB3_1 282; LA64D-NEXT: # %bb.2: # %atomicrmw.end 283; LA64D-NEXT: ret 284 %v = atomicrmw fmax ptr %p, float 1.0 acquire, align 4 285 ret float %v 286} 287 288define double @double_fadd_acquire(ptr %p) nounwind { 289; LA64F-LABEL: double_fadd_acquire: 290; LA64F: # %bb.0: 291; LA64F-NEXT: addi.d $sp, $sp, -48 292; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 293; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 294; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 295; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 296; LA64F-NEXT: move $fp, $a0 297; LA64F-NEXT: ld.d $s1, $a0, 0 298; LA64F-NEXT: lu52i.d $s0, $zero, 1023 299; LA64F-NEXT: .p2align 4, , 16 300; LA64F-NEXT: .LBB4_1: # %atomicrmw.start 301; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 302; LA64F-NEXT: move $a0, $s1 303; LA64F-NEXT: move $a1, $s0 304; LA64F-NEXT: bl %plt(__adddf3) 305; LA64F-NEXT: st.d $s1, $sp, 8 306; LA64F-NEXT: st.d $a0, $sp, 0 307; LA64F-NEXT: ori $a0, $zero, 8 308; LA64F-NEXT: addi.d $a2, $sp, 8 309; LA64F-NEXT: addi.d $a3, $sp, 0 310; LA64F-NEXT: ori $a4, $zero, 2 311; LA64F-NEXT: ori $a5, $zero, 2 312; LA64F-NEXT: move $a1, $fp 313; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 314; LA64F-NEXT: ld.d $s1, $sp, 8 315; LA64F-NEXT: beqz $a0, .LBB4_1 316; LA64F-NEXT: # %bb.2: # %atomicrmw.end 317; LA64F-NEXT: move $a0, $s1 318; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 319; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 320; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 321; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 322; LA64F-NEXT: addi.d $sp, $sp, 48 323; LA64F-NEXT: ret 324; 325; LA64D-LABEL: double_fadd_acquire: 326; LA64D: # %bb.0: 327; LA64D-NEXT: addi.d $sp, $sp, -32 328; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 329; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 330; LA64D-NEXT: move $fp, $a0 331; LA64D-NEXT: fld.d $fa0, $a0, 0 332; LA64D-NEXT: .p2align 4, , 16 333; LA64D-NEXT: .LBB4_1: # %atomicrmw.start 334; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 335; LA64D-NEXT: vldi $vr1, -912 336; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 337; LA64D-NEXT: fst.d $fa0, $sp, 8 338; LA64D-NEXT: fst.d $fa1, $sp, 0 339; LA64D-NEXT: ori $a0, $zero, 8 340; LA64D-NEXT: addi.d $a2, $sp, 8 341; LA64D-NEXT: addi.d $a3, $sp, 0 342; LA64D-NEXT: ori $a4, $zero, 2 343; LA64D-NEXT: ori $a5, $zero, 2 344; LA64D-NEXT: move $a1, $fp 345; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 346; LA64D-NEXT: fld.d $fa0, $sp, 8 347; LA64D-NEXT: beqz $a0, .LBB4_1 348; LA64D-NEXT: # %bb.2: # %atomicrmw.end 349; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 350; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 351; LA64D-NEXT: addi.d $sp, $sp, 32 352; LA64D-NEXT: ret 353 %v = atomicrmw fadd ptr %p, double 1.0 acquire, align 4 354 ret double %v 355} 356 357define double @double_fsub_acquire(ptr %p) nounwind { 358; LA64F-LABEL: double_fsub_acquire: 359; LA64F: # %bb.0: 360; LA64F-NEXT: addi.d $sp, $sp, -48 361; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 362; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 363; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 364; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 365; LA64F-NEXT: move $fp, $a0 366; LA64F-NEXT: ld.d $s1, $a0, 0 367; LA64F-NEXT: lu52i.d $s0, $zero, -1025 368; LA64F-NEXT: .p2align 4, , 16 369; LA64F-NEXT: .LBB5_1: # %atomicrmw.start 370; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 371; LA64F-NEXT: move $a0, $s1 372; LA64F-NEXT: move $a1, $s0 373; LA64F-NEXT: bl %plt(__adddf3) 374; LA64F-NEXT: st.d $s1, $sp, 8 375; LA64F-NEXT: st.d $a0, $sp, 0 376; LA64F-NEXT: ori $a0, $zero, 8 377; LA64F-NEXT: addi.d $a2, $sp, 8 378; LA64F-NEXT: addi.d $a3, $sp, 0 379; LA64F-NEXT: ori $a4, $zero, 2 380; LA64F-NEXT: ori $a5, $zero, 2 381; LA64F-NEXT: move $a1, $fp 382; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 383; LA64F-NEXT: ld.d $s1, $sp, 8 384; LA64F-NEXT: beqz $a0, .LBB5_1 385; LA64F-NEXT: # %bb.2: # %atomicrmw.end 386; LA64F-NEXT: move $a0, $s1 387; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 388; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 389; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 390; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 391; LA64F-NEXT: addi.d $sp, $sp, 48 392; LA64F-NEXT: ret 393; 394; LA64D-LABEL: double_fsub_acquire: 395; LA64D: # %bb.0: 396; LA64D-NEXT: addi.d $sp, $sp, -32 397; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 398; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 399; LA64D-NEXT: move $fp, $a0 400; LA64D-NEXT: fld.d $fa0, $a0, 0 401; LA64D-NEXT: .p2align 4, , 16 402; LA64D-NEXT: .LBB5_1: # %atomicrmw.start 403; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 404; LA64D-NEXT: vldi $vr1, -784 405; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 406; LA64D-NEXT: fst.d $fa0, $sp, 8 407; LA64D-NEXT: fst.d $fa1, $sp, 0 408; LA64D-NEXT: ori $a0, $zero, 8 409; LA64D-NEXT: addi.d $a2, $sp, 8 410; LA64D-NEXT: addi.d $a3, $sp, 0 411; LA64D-NEXT: ori $a4, $zero, 2 412; LA64D-NEXT: ori $a5, $zero, 2 413; LA64D-NEXT: move $a1, $fp 414; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 415; LA64D-NEXT: fld.d $fa0, $sp, 8 416; LA64D-NEXT: beqz $a0, .LBB5_1 417; LA64D-NEXT: # %bb.2: # %atomicrmw.end 418; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 419; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 420; LA64D-NEXT: addi.d $sp, $sp, 32 421; LA64D-NEXT: ret 422 %v = atomicrmw fsub ptr %p, double 1.0 acquire, align 4 423 ret double %v 424} 425 426define double @double_fmin_acquire(ptr %p) nounwind { 427; LA64F-LABEL: double_fmin_acquire: 428; LA64F: # %bb.0: 429; LA64F-NEXT: addi.d $sp, $sp, -48 430; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 431; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 432; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 433; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 434; LA64F-NEXT: move $fp, $a0 435; LA64F-NEXT: ld.d $s1, $a0, 0 436; LA64F-NEXT: lu52i.d $s0, $zero, 1023 437; LA64F-NEXT: .p2align 4, , 16 438; LA64F-NEXT: .LBB6_1: # %atomicrmw.start 439; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 440; LA64F-NEXT: move $a0, $s1 441; LA64F-NEXT: move $a1, $s0 442; LA64F-NEXT: bl %plt(fmin) 443; LA64F-NEXT: st.d $s1, $sp, 8 444; LA64F-NEXT: st.d $a0, $sp, 0 445; LA64F-NEXT: ori $a0, $zero, 8 446; LA64F-NEXT: addi.d $a2, $sp, 8 447; LA64F-NEXT: addi.d $a3, $sp, 0 448; LA64F-NEXT: ori $a4, $zero, 2 449; LA64F-NEXT: ori $a5, $zero, 2 450; LA64F-NEXT: move $a1, $fp 451; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 452; LA64F-NEXT: ld.d $s1, $sp, 8 453; LA64F-NEXT: beqz $a0, .LBB6_1 454; LA64F-NEXT: # %bb.2: # %atomicrmw.end 455; LA64F-NEXT: move $a0, $s1 456; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 457; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 458; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 459; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 460; LA64F-NEXT: addi.d $sp, $sp, 48 461; LA64F-NEXT: ret 462; 463; LA64D-LABEL: double_fmin_acquire: 464; LA64D: # %bb.0: 465; LA64D-NEXT: addi.d $sp, $sp, -32 466; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 467; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 468; LA64D-NEXT: move $fp, $a0 469; LA64D-NEXT: fld.d $fa0, $a0, 0 470; LA64D-NEXT: .p2align 4, , 16 471; LA64D-NEXT: .LBB6_1: # %atomicrmw.start 472; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 473; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 474; LA64D-NEXT: vldi $vr2, -912 475; LA64D-NEXT: fmin.d $fa1, $fa1, $fa2 476; LA64D-NEXT: fst.d $fa0, $sp, 8 477; LA64D-NEXT: fst.d $fa1, $sp, 0 478; LA64D-NEXT: ori $a0, $zero, 8 479; LA64D-NEXT: addi.d $a2, $sp, 8 480; LA64D-NEXT: addi.d $a3, $sp, 0 481; LA64D-NEXT: ori $a4, $zero, 2 482; LA64D-NEXT: ori $a5, $zero, 2 483; LA64D-NEXT: move $a1, $fp 484; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 485; LA64D-NEXT: fld.d $fa0, $sp, 8 486; LA64D-NEXT: beqz $a0, .LBB6_1 487; LA64D-NEXT: # %bb.2: # %atomicrmw.end 488; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 489; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 490; LA64D-NEXT: addi.d $sp, $sp, 32 491; LA64D-NEXT: ret 492 %v = atomicrmw fmin ptr %p, double 1.0 acquire, align 4 493 ret double %v 494} 495 496define double @double_fmax_acquire(ptr %p) nounwind { 497; LA64F-LABEL: double_fmax_acquire: 498; LA64F: # %bb.0: 499; LA64F-NEXT: addi.d $sp, $sp, -48 500; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 501; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 502; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 503; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 504; LA64F-NEXT: move $fp, $a0 505; LA64F-NEXT: ld.d $s1, $a0, 0 506; LA64F-NEXT: lu52i.d $s0, $zero, 1023 507; LA64F-NEXT: .p2align 4, , 16 508; LA64F-NEXT: .LBB7_1: # %atomicrmw.start 509; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 510; LA64F-NEXT: move $a0, $s1 511; LA64F-NEXT: move $a1, $s0 512; LA64F-NEXT: bl %plt(fmax) 513; LA64F-NEXT: st.d $s1, $sp, 8 514; LA64F-NEXT: st.d $a0, $sp, 0 515; LA64F-NEXT: ori $a0, $zero, 8 516; LA64F-NEXT: addi.d $a2, $sp, 8 517; LA64F-NEXT: addi.d $a3, $sp, 0 518; LA64F-NEXT: ori $a4, $zero, 2 519; LA64F-NEXT: ori $a5, $zero, 2 520; LA64F-NEXT: move $a1, $fp 521; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 522; LA64F-NEXT: ld.d $s1, $sp, 8 523; LA64F-NEXT: beqz $a0, .LBB7_1 524; LA64F-NEXT: # %bb.2: # %atomicrmw.end 525; LA64F-NEXT: move $a0, $s1 526; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 527; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 528; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 529; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 530; LA64F-NEXT: addi.d $sp, $sp, 48 531; LA64F-NEXT: ret 532; 533; LA64D-LABEL: double_fmax_acquire: 534; LA64D: # %bb.0: 535; LA64D-NEXT: addi.d $sp, $sp, -32 536; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 537; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 538; LA64D-NEXT: move $fp, $a0 539; LA64D-NEXT: fld.d $fa0, $a0, 0 540; LA64D-NEXT: .p2align 4, , 16 541; LA64D-NEXT: .LBB7_1: # %atomicrmw.start 542; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 543; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 544; LA64D-NEXT: vldi $vr2, -912 545; LA64D-NEXT: fmax.d $fa1, $fa1, $fa2 546; LA64D-NEXT: fst.d $fa0, $sp, 8 547; LA64D-NEXT: fst.d $fa1, $sp, 0 548; LA64D-NEXT: ori $a0, $zero, 8 549; LA64D-NEXT: addi.d $a2, $sp, 8 550; LA64D-NEXT: addi.d $a3, $sp, 0 551; LA64D-NEXT: ori $a4, $zero, 2 552; LA64D-NEXT: ori $a5, $zero, 2 553; LA64D-NEXT: move $a1, $fp 554; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 555; LA64D-NEXT: fld.d $fa0, $sp, 8 556; LA64D-NEXT: beqz $a0, .LBB7_1 557; LA64D-NEXT: # %bb.2: # %atomicrmw.end 558; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 559; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 560; LA64D-NEXT: addi.d $sp, $sp, 32 561; LA64D-NEXT: ret 562 %v = atomicrmw fmax ptr %p, double 1.0 acquire, align 4 563 ret double %v 564} 565 566define float @float_fadd_release(ptr %p) nounwind { 567; LA64F-LABEL: float_fadd_release: 568; LA64F: # %bb.0: 569; LA64F-NEXT: fld.s $fa0, $a0, 0 570; LA64F-NEXT: addi.w $a1, $zero, 1 571; LA64F-NEXT: movgr2fr.w $fa1, $a1 572; LA64F-NEXT: ffint.s.w $fa1, $fa1 573; LA64F-NEXT: .p2align 4, , 16 574; LA64F-NEXT: .LBB8_1: # %atomicrmw.start 575; LA64F-NEXT: # =>This Loop Header: Depth=1 576; LA64F-NEXT: # Child Loop BB8_3 Depth 2 577; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 578; LA64F-NEXT: movfr2gr.s $a1, $fa2 579; LA64F-NEXT: movfr2gr.s $a2, $fa0 580; LA64F-NEXT: .LBB8_3: # %atomicrmw.start 581; LA64F-NEXT: # Parent Loop BB8_1 Depth=1 582; LA64F-NEXT: # => This Inner Loop Header: Depth=2 583; LA64F-NEXT: ll.w $a3, $a0, 0 584; LA64F-NEXT: bne $a3, $a2, .LBB8_5 585; LA64F-NEXT: # %bb.4: # %atomicrmw.start 586; LA64F-NEXT: # in Loop: Header=BB8_3 Depth=2 587; LA64F-NEXT: move $a4, $a1 588; LA64F-NEXT: sc.w $a4, $a0, 0 589; LA64F-NEXT: beqz $a4, .LBB8_3 590; LA64F-NEXT: b .LBB8_6 591; LA64F-NEXT: .LBB8_5: # %atomicrmw.start 592; LA64F-NEXT: # in Loop: Header=BB8_1 Depth=1 593; LA64F-NEXT: dbar 1792 594; LA64F-NEXT: .LBB8_6: # %atomicrmw.start 595; LA64F-NEXT: # in Loop: Header=BB8_1 Depth=1 596; LA64F-NEXT: movgr2fr.w $fa0, $a3 597; LA64F-NEXT: bne $a3, $a2, .LBB8_1 598; LA64F-NEXT: # %bb.2: # %atomicrmw.end 599; LA64F-NEXT: ret 600; 601; LA64D-LABEL: float_fadd_release: 602; LA64D: # %bb.0: 603; LA64D-NEXT: fld.s $fa0, $a0, 0 604; LA64D-NEXT: vldi $vr1, -1168 605; LA64D-NEXT: .p2align 4, , 16 606; LA64D-NEXT: .LBB8_1: # %atomicrmw.start 607; LA64D-NEXT: # =>This Loop Header: Depth=1 608; LA64D-NEXT: # Child Loop BB8_3 Depth 2 609; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 610; LA64D-NEXT: movfr2gr.s $a1, $fa2 611; LA64D-NEXT: movfr2gr.s $a2, $fa0 612; LA64D-NEXT: .LBB8_3: # %atomicrmw.start 613; LA64D-NEXT: # Parent Loop BB8_1 Depth=1 614; LA64D-NEXT: # => This Inner Loop Header: Depth=2 615; LA64D-NEXT: ll.w $a3, $a0, 0 616; LA64D-NEXT: bne $a3, $a2, .LBB8_5 617; LA64D-NEXT: # %bb.4: # %atomicrmw.start 618; LA64D-NEXT: # in Loop: Header=BB8_3 Depth=2 619; LA64D-NEXT: move $a4, $a1 620; LA64D-NEXT: sc.w $a4, $a0, 0 621; LA64D-NEXT: beqz $a4, .LBB8_3 622; LA64D-NEXT: b .LBB8_6 623; LA64D-NEXT: .LBB8_5: # %atomicrmw.start 624; LA64D-NEXT: # in Loop: Header=BB8_1 Depth=1 625; LA64D-NEXT: dbar 1792 626; LA64D-NEXT: .LBB8_6: # %atomicrmw.start 627; LA64D-NEXT: # in Loop: Header=BB8_1 Depth=1 628; LA64D-NEXT: movgr2fr.w $fa0, $a3 629; LA64D-NEXT: bne $a3, $a2, .LBB8_1 630; LA64D-NEXT: # %bb.2: # %atomicrmw.end 631; LA64D-NEXT: ret 632 %v = atomicrmw fadd ptr %p, float 1.0 release, align 4 633 ret float %v 634} 635 636define float @float_fsub_release(ptr %p) nounwind { 637; LA64F-LABEL: float_fsub_release: 638; LA64F: # %bb.0: 639; LA64F-NEXT: fld.s $fa0, $a0, 0 640; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) 641; LA64F-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI9_0) 642; LA64F-NEXT: .p2align 4, , 16 643; LA64F-NEXT: .LBB9_1: # %atomicrmw.start 644; LA64F-NEXT: # =>This Loop Header: Depth=1 645; LA64F-NEXT: # Child Loop BB9_3 Depth 2 646; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 647; LA64F-NEXT: movfr2gr.s $a1, $fa2 648; LA64F-NEXT: movfr2gr.s $a2, $fa0 649; LA64F-NEXT: .LBB9_3: # %atomicrmw.start 650; LA64F-NEXT: # Parent Loop BB9_1 Depth=1 651; LA64F-NEXT: # => This Inner Loop Header: Depth=2 652; LA64F-NEXT: ll.w $a3, $a0, 0 653; LA64F-NEXT: bne $a3, $a2, .LBB9_5 654; LA64F-NEXT: # %bb.4: # %atomicrmw.start 655; LA64F-NEXT: # in Loop: Header=BB9_3 Depth=2 656; LA64F-NEXT: move $a4, $a1 657; LA64F-NEXT: sc.w $a4, $a0, 0 658; LA64F-NEXT: beqz $a4, .LBB9_3 659; LA64F-NEXT: b .LBB9_6 660; LA64F-NEXT: .LBB9_5: # %atomicrmw.start 661; LA64F-NEXT: # in Loop: Header=BB9_1 Depth=1 662; LA64F-NEXT: dbar 1792 663; LA64F-NEXT: .LBB9_6: # %atomicrmw.start 664; LA64F-NEXT: # in Loop: Header=BB9_1 Depth=1 665; LA64F-NEXT: movgr2fr.w $fa0, $a3 666; LA64F-NEXT: bne $a3, $a2, .LBB9_1 667; LA64F-NEXT: # %bb.2: # %atomicrmw.end 668; LA64F-NEXT: ret 669; 670; LA64D-LABEL: float_fsub_release: 671; LA64D: # %bb.0: 672; LA64D-NEXT: fld.s $fa0, $a0, 0 673; LA64D-NEXT: vldi $vr1, -1040 674; LA64D-NEXT: .p2align 4, , 16 675; LA64D-NEXT: .LBB9_1: # %atomicrmw.start 676; LA64D-NEXT: # =>This Loop Header: Depth=1 677; LA64D-NEXT: # Child Loop BB9_3 Depth 2 678; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 679; LA64D-NEXT: movfr2gr.s $a1, $fa2 680; LA64D-NEXT: movfr2gr.s $a2, $fa0 681; LA64D-NEXT: .LBB9_3: # %atomicrmw.start 682; LA64D-NEXT: # Parent Loop BB9_1 Depth=1 683; LA64D-NEXT: # => This Inner Loop Header: Depth=2 684; LA64D-NEXT: ll.w $a3, $a0, 0 685; LA64D-NEXT: bne $a3, $a2, .LBB9_5 686; LA64D-NEXT: # %bb.4: # %atomicrmw.start 687; LA64D-NEXT: # in Loop: Header=BB9_3 Depth=2 688; LA64D-NEXT: move $a4, $a1 689; LA64D-NEXT: sc.w $a4, $a0, 0 690; LA64D-NEXT: beqz $a4, .LBB9_3 691; LA64D-NEXT: b .LBB9_6 692; LA64D-NEXT: .LBB9_5: # %atomicrmw.start 693; LA64D-NEXT: # in Loop: Header=BB9_1 Depth=1 694; LA64D-NEXT: dbar 1792 695; LA64D-NEXT: .LBB9_6: # %atomicrmw.start 696; LA64D-NEXT: # in Loop: Header=BB9_1 Depth=1 697; LA64D-NEXT: movgr2fr.w $fa0, $a3 698; LA64D-NEXT: bne $a3, $a2, .LBB9_1 699; LA64D-NEXT: # %bb.2: # %atomicrmw.end 700; LA64D-NEXT: ret 701 %v = atomicrmw fsub ptr %p, float 1.0 release, align 4 702 ret float %v 703} 704 705define float @float_fmin_release(ptr %p) nounwind { 706; LA64F-LABEL: float_fmin_release: 707; LA64F: # %bb.0: 708; LA64F-NEXT: fld.s $fa0, $a0, 0 709; LA64F-NEXT: addi.w $a1, $zero, 1 710; LA64F-NEXT: movgr2fr.w $fa1, $a1 711; LA64F-NEXT: ffint.s.w $fa1, $fa1 712; LA64F-NEXT: .p2align 4, , 16 713; LA64F-NEXT: .LBB10_1: # %atomicrmw.start 714; LA64F-NEXT: # =>This Loop Header: Depth=1 715; LA64F-NEXT: # Child Loop BB10_3 Depth 2 716; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 717; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 718; LA64F-NEXT: movfr2gr.s $a1, $fa2 719; LA64F-NEXT: movfr2gr.s $a2, $fa0 720; LA64F-NEXT: .LBB10_3: # %atomicrmw.start 721; LA64F-NEXT: # Parent Loop BB10_1 Depth=1 722; LA64F-NEXT: # => This Inner Loop Header: Depth=2 723; LA64F-NEXT: ll.w $a3, $a0, 0 724; LA64F-NEXT: bne $a3, $a2, .LBB10_5 725; LA64F-NEXT: # %bb.4: # %atomicrmw.start 726; LA64F-NEXT: # in Loop: Header=BB10_3 Depth=2 727; LA64F-NEXT: move $a4, $a1 728; LA64F-NEXT: sc.w $a4, $a0, 0 729; LA64F-NEXT: beqz $a4, .LBB10_3 730; LA64F-NEXT: b .LBB10_6 731; LA64F-NEXT: .LBB10_5: # %atomicrmw.start 732; LA64F-NEXT: # in Loop: Header=BB10_1 Depth=1 733; LA64F-NEXT: dbar 1792 734; LA64F-NEXT: .LBB10_6: # %atomicrmw.start 735; LA64F-NEXT: # in Loop: Header=BB10_1 Depth=1 736; LA64F-NEXT: movgr2fr.w $fa0, $a3 737; LA64F-NEXT: bne $a3, $a2, .LBB10_1 738; LA64F-NEXT: # %bb.2: # %atomicrmw.end 739; LA64F-NEXT: ret 740; 741; LA64D-LABEL: float_fmin_release: 742; LA64D: # %bb.0: 743; LA64D-NEXT: fld.s $fa0, $a0, 0 744; LA64D-NEXT: vldi $vr1, -1168 745; LA64D-NEXT: .p2align 4, , 16 746; LA64D-NEXT: .LBB10_1: # %atomicrmw.start 747; LA64D-NEXT: # =>This Loop Header: Depth=1 748; LA64D-NEXT: # Child Loop BB10_3 Depth 2 749; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 750; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 751; LA64D-NEXT: movfr2gr.s $a1, $fa2 752; LA64D-NEXT: movfr2gr.s $a2, $fa0 753; LA64D-NEXT: .LBB10_3: # %atomicrmw.start 754; LA64D-NEXT: # Parent Loop BB10_1 Depth=1 755; LA64D-NEXT: # => This Inner Loop Header: Depth=2 756; LA64D-NEXT: ll.w $a3, $a0, 0 757; LA64D-NEXT: bne $a3, $a2, .LBB10_5 758; LA64D-NEXT: # %bb.4: # %atomicrmw.start 759; LA64D-NEXT: # in Loop: Header=BB10_3 Depth=2 760; LA64D-NEXT: move $a4, $a1 761; LA64D-NEXT: sc.w $a4, $a0, 0 762; LA64D-NEXT: beqz $a4, .LBB10_3 763; LA64D-NEXT: b .LBB10_6 764; LA64D-NEXT: .LBB10_5: # %atomicrmw.start 765; LA64D-NEXT: # in Loop: Header=BB10_1 Depth=1 766; LA64D-NEXT: dbar 1792 767; LA64D-NEXT: .LBB10_6: # %atomicrmw.start 768; LA64D-NEXT: # in Loop: Header=BB10_1 Depth=1 769; LA64D-NEXT: movgr2fr.w $fa0, $a3 770; LA64D-NEXT: bne $a3, $a2, .LBB10_1 771; LA64D-NEXT: # %bb.2: # %atomicrmw.end 772; LA64D-NEXT: ret 773 %v = atomicrmw fmin ptr %p, float 1.0 release, align 4 774 ret float %v 775} 776 777define float @float_fmax_release(ptr %p) nounwind { 778; LA64F-LABEL: float_fmax_release: 779; LA64F: # %bb.0: 780; LA64F-NEXT: fld.s $fa0, $a0, 0 781; LA64F-NEXT: addi.w $a1, $zero, 1 782; LA64F-NEXT: movgr2fr.w $fa1, $a1 783; LA64F-NEXT: ffint.s.w $fa1, $fa1 784; LA64F-NEXT: .p2align 4, , 16 785; LA64F-NEXT: .LBB11_1: # %atomicrmw.start 786; LA64F-NEXT: # =>This Loop Header: Depth=1 787; LA64F-NEXT: # Child Loop BB11_3 Depth 2 788; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 789; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 790; LA64F-NEXT: movfr2gr.s $a1, $fa2 791; LA64F-NEXT: movfr2gr.s $a2, $fa0 792; LA64F-NEXT: .LBB11_3: # %atomicrmw.start 793; LA64F-NEXT: # Parent Loop BB11_1 Depth=1 794; LA64F-NEXT: # => This Inner Loop Header: Depth=2 795; LA64F-NEXT: ll.w $a3, $a0, 0 796; LA64F-NEXT: bne $a3, $a2, .LBB11_5 797; LA64F-NEXT: # %bb.4: # %atomicrmw.start 798; LA64F-NEXT: # in Loop: Header=BB11_3 Depth=2 799; LA64F-NEXT: move $a4, $a1 800; LA64F-NEXT: sc.w $a4, $a0, 0 801; LA64F-NEXT: beqz $a4, .LBB11_3 802; LA64F-NEXT: b .LBB11_6 803; LA64F-NEXT: .LBB11_5: # %atomicrmw.start 804; LA64F-NEXT: # in Loop: Header=BB11_1 Depth=1 805; LA64F-NEXT: dbar 1792 806; LA64F-NEXT: .LBB11_6: # %atomicrmw.start 807; LA64F-NEXT: # in Loop: Header=BB11_1 Depth=1 808; LA64F-NEXT: movgr2fr.w $fa0, $a3 809; LA64F-NEXT: bne $a3, $a2, .LBB11_1 810; LA64F-NEXT: # %bb.2: # %atomicrmw.end 811; LA64F-NEXT: ret 812; 813; LA64D-LABEL: float_fmax_release: 814; LA64D: # %bb.0: 815; LA64D-NEXT: fld.s $fa0, $a0, 0 816; LA64D-NEXT: vldi $vr1, -1168 817; LA64D-NEXT: .p2align 4, , 16 818; LA64D-NEXT: .LBB11_1: # %atomicrmw.start 819; LA64D-NEXT: # =>This Loop Header: Depth=1 820; LA64D-NEXT: # Child Loop BB11_3 Depth 2 821; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 822; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 823; LA64D-NEXT: movfr2gr.s $a1, $fa2 824; LA64D-NEXT: movfr2gr.s $a2, $fa0 825; LA64D-NEXT: .LBB11_3: # %atomicrmw.start 826; LA64D-NEXT: # Parent Loop BB11_1 Depth=1 827; LA64D-NEXT: # => This Inner Loop Header: Depth=2 828; LA64D-NEXT: ll.w $a3, $a0, 0 829; LA64D-NEXT: bne $a3, $a2, .LBB11_5 830; LA64D-NEXT: # %bb.4: # %atomicrmw.start 831; LA64D-NEXT: # in Loop: Header=BB11_3 Depth=2 832; LA64D-NEXT: move $a4, $a1 833; LA64D-NEXT: sc.w $a4, $a0, 0 834; LA64D-NEXT: beqz $a4, .LBB11_3 835; LA64D-NEXT: b .LBB11_6 836; LA64D-NEXT: .LBB11_5: # %atomicrmw.start 837; LA64D-NEXT: # in Loop: Header=BB11_1 Depth=1 838; LA64D-NEXT: dbar 1792 839; LA64D-NEXT: .LBB11_6: # %atomicrmw.start 840; LA64D-NEXT: # in Loop: Header=BB11_1 Depth=1 841; LA64D-NEXT: movgr2fr.w $fa0, $a3 842; LA64D-NEXT: bne $a3, $a2, .LBB11_1 843; LA64D-NEXT: # %bb.2: # %atomicrmw.end 844; LA64D-NEXT: ret 845 %v = atomicrmw fmax ptr %p, float 1.0 release, align 4 846 ret float %v 847} 848 849define double @double_fadd_release(ptr %p) nounwind { 850; LA64F-LABEL: double_fadd_release: 851; LA64F: # %bb.0: 852; LA64F-NEXT: addi.d $sp, $sp, -48 853; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 854; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 855; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 856; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 857; LA64F-NEXT: move $fp, $a0 858; LA64F-NEXT: ld.d $s1, $a0, 0 859; LA64F-NEXT: lu52i.d $s0, $zero, 1023 860; LA64F-NEXT: .p2align 4, , 16 861; LA64F-NEXT: .LBB12_1: # %atomicrmw.start 862; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 863; LA64F-NEXT: move $a0, $s1 864; LA64F-NEXT: move $a1, $s0 865; LA64F-NEXT: bl %plt(__adddf3) 866; LA64F-NEXT: st.d $s1, $sp, 8 867; LA64F-NEXT: st.d $a0, $sp, 0 868; LA64F-NEXT: ori $a0, $zero, 8 869; LA64F-NEXT: addi.d $a2, $sp, 8 870; LA64F-NEXT: addi.d $a3, $sp, 0 871; LA64F-NEXT: ori $a4, $zero, 3 872; LA64F-NEXT: move $a1, $fp 873; LA64F-NEXT: move $a5, $zero 874; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 875; LA64F-NEXT: ld.d $s1, $sp, 8 876; LA64F-NEXT: beqz $a0, .LBB12_1 877; LA64F-NEXT: # %bb.2: # %atomicrmw.end 878; LA64F-NEXT: move $a0, $s1 879; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 880; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 881; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 882; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 883; LA64F-NEXT: addi.d $sp, $sp, 48 884; LA64F-NEXT: ret 885; 886; LA64D-LABEL: double_fadd_release: 887; LA64D: # %bb.0: 888; LA64D-NEXT: addi.d $sp, $sp, -32 889; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 890; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 891; LA64D-NEXT: move $fp, $a0 892; LA64D-NEXT: fld.d $fa0, $a0, 0 893; LA64D-NEXT: .p2align 4, , 16 894; LA64D-NEXT: .LBB12_1: # %atomicrmw.start 895; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 896; LA64D-NEXT: vldi $vr1, -912 897; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 898; LA64D-NEXT: fst.d $fa0, $sp, 8 899; LA64D-NEXT: fst.d $fa1, $sp, 0 900; LA64D-NEXT: ori $a0, $zero, 8 901; LA64D-NEXT: addi.d $a2, $sp, 8 902; LA64D-NEXT: addi.d $a3, $sp, 0 903; LA64D-NEXT: ori $a4, $zero, 3 904; LA64D-NEXT: move $a1, $fp 905; LA64D-NEXT: move $a5, $zero 906; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 907; LA64D-NEXT: fld.d $fa0, $sp, 8 908; LA64D-NEXT: beqz $a0, .LBB12_1 909; LA64D-NEXT: # %bb.2: # %atomicrmw.end 910; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 911; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 912; LA64D-NEXT: addi.d $sp, $sp, 32 913; LA64D-NEXT: ret 914 %v = atomicrmw fadd ptr %p, double 1.0 release, align 4 915 ret double %v 916} 917 918define double @double_fsub_release(ptr %p) nounwind { 919; LA64F-LABEL: double_fsub_release: 920; LA64F: # %bb.0: 921; LA64F-NEXT: addi.d $sp, $sp, -48 922; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 923; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 924; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 925; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 926; LA64F-NEXT: move $fp, $a0 927; LA64F-NEXT: ld.d $s1, $a0, 0 928; LA64F-NEXT: lu52i.d $s0, $zero, -1025 929; LA64F-NEXT: .p2align 4, , 16 930; LA64F-NEXT: .LBB13_1: # %atomicrmw.start 931; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 932; LA64F-NEXT: move $a0, $s1 933; LA64F-NEXT: move $a1, $s0 934; LA64F-NEXT: bl %plt(__adddf3) 935; LA64F-NEXT: st.d $s1, $sp, 8 936; LA64F-NEXT: st.d $a0, $sp, 0 937; LA64F-NEXT: ori $a0, $zero, 8 938; LA64F-NEXT: addi.d $a2, $sp, 8 939; LA64F-NEXT: addi.d $a3, $sp, 0 940; LA64F-NEXT: ori $a4, $zero, 3 941; LA64F-NEXT: move $a1, $fp 942; LA64F-NEXT: move $a5, $zero 943; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 944; LA64F-NEXT: ld.d $s1, $sp, 8 945; LA64F-NEXT: beqz $a0, .LBB13_1 946; LA64F-NEXT: # %bb.2: # %atomicrmw.end 947; LA64F-NEXT: move $a0, $s1 948; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 949; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 950; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 951; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 952; LA64F-NEXT: addi.d $sp, $sp, 48 953; LA64F-NEXT: ret 954; 955; LA64D-LABEL: double_fsub_release: 956; LA64D: # %bb.0: 957; LA64D-NEXT: addi.d $sp, $sp, -32 958; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 959; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 960; LA64D-NEXT: move $fp, $a0 961; LA64D-NEXT: fld.d $fa0, $a0, 0 962; LA64D-NEXT: .p2align 4, , 16 963; LA64D-NEXT: .LBB13_1: # %atomicrmw.start 964; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 965; LA64D-NEXT: vldi $vr1, -784 966; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 967; LA64D-NEXT: fst.d $fa0, $sp, 8 968; LA64D-NEXT: fst.d $fa1, $sp, 0 969; LA64D-NEXT: ori $a0, $zero, 8 970; LA64D-NEXT: addi.d $a2, $sp, 8 971; LA64D-NEXT: addi.d $a3, $sp, 0 972; LA64D-NEXT: ori $a4, $zero, 3 973; LA64D-NEXT: move $a1, $fp 974; LA64D-NEXT: move $a5, $zero 975; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 976; LA64D-NEXT: fld.d $fa0, $sp, 8 977; LA64D-NEXT: beqz $a0, .LBB13_1 978; LA64D-NEXT: # %bb.2: # %atomicrmw.end 979; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 980; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 981; LA64D-NEXT: addi.d $sp, $sp, 32 982; LA64D-NEXT: ret 983 %v = atomicrmw fsub ptr %p, double 1.0 release, align 4 984 ret double %v 985} 986 987define double @double_fmin_release(ptr %p) nounwind { 988; LA64F-LABEL: double_fmin_release: 989; LA64F: # %bb.0: 990; LA64F-NEXT: addi.d $sp, $sp, -48 991; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 992; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 993; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 994; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 995; LA64F-NEXT: move $fp, $a0 996; LA64F-NEXT: ld.d $s1, $a0, 0 997; LA64F-NEXT: lu52i.d $s0, $zero, 1023 998; LA64F-NEXT: .p2align 4, , 16 999; LA64F-NEXT: .LBB14_1: # %atomicrmw.start 1000; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 1001; LA64F-NEXT: move $a0, $s1 1002; LA64F-NEXT: move $a1, $s0 1003; LA64F-NEXT: bl %plt(fmin) 1004; LA64F-NEXT: st.d $s1, $sp, 8 1005; LA64F-NEXT: st.d $a0, $sp, 0 1006; LA64F-NEXT: ori $a0, $zero, 8 1007; LA64F-NEXT: addi.d $a2, $sp, 8 1008; LA64F-NEXT: addi.d $a3, $sp, 0 1009; LA64F-NEXT: ori $a4, $zero, 3 1010; LA64F-NEXT: move $a1, $fp 1011; LA64F-NEXT: move $a5, $zero 1012; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 1013; LA64F-NEXT: ld.d $s1, $sp, 8 1014; LA64F-NEXT: beqz $a0, .LBB14_1 1015; LA64F-NEXT: # %bb.2: # %atomicrmw.end 1016; LA64F-NEXT: move $a0, $s1 1017; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 1018; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 1019; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 1020; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 1021; LA64F-NEXT: addi.d $sp, $sp, 48 1022; LA64F-NEXT: ret 1023; 1024; LA64D-LABEL: double_fmin_release: 1025; LA64D: # %bb.0: 1026; LA64D-NEXT: addi.d $sp, $sp, -32 1027; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 1028; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 1029; LA64D-NEXT: move $fp, $a0 1030; LA64D-NEXT: fld.d $fa0, $a0, 0 1031; LA64D-NEXT: .p2align 4, , 16 1032; LA64D-NEXT: .LBB14_1: # %atomicrmw.start 1033; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 1034; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 1035; LA64D-NEXT: vldi $vr2, -912 1036; LA64D-NEXT: fmin.d $fa1, $fa1, $fa2 1037; LA64D-NEXT: fst.d $fa0, $sp, 8 1038; LA64D-NEXT: fst.d $fa1, $sp, 0 1039; LA64D-NEXT: ori $a0, $zero, 8 1040; LA64D-NEXT: addi.d $a2, $sp, 8 1041; LA64D-NEXT: addi.d $a3, $sp, 0 1042; LA64D-NEXT: ori $a4, $zero, 3 1043; LA64D-NEXT: move $a1, $fp 1044; LA64D-NEXT: move $a5, $zero 1045; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 1046; LA64D-NEXT: fld.d $fa0, $sp, 8 1047; LA64D-NEXT: beqz $a0, .LBB14_1 1048; LA64D-NEXT: # %bb.2: # %atomicrmw.end 1049; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 1050; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 1051; LA64D-NEXT: addi.d $sp, $sp, 32 1052; LA64D-NEXT: ret 1053 %v = atomicrmw fmin ptr %p, double 1.0 release, align 4 1054 ret double %v 1055} 1056 1057define double @double_fmax_release(ptr %p) nounwind { 1058; LA64F-LABEL: double_fmax_release: 1059; LA64F: # %bb.0: 1060; LA64F-NEXT: addi.d $sp, $sp, -48 1061; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 1062; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 1063; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 1064; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 1065; LA64F-NEXT: move $fp, $a0 1066; LA64F-NEXT: ld.d $s1, $a0, 0 1067; LA64F-NEXT: lu52i.d $s0, $zero, 1023 1068; LA64F-NEXT: .p2align 4, , 16 1069; LA64F-NEXT: .LBB15_1: # %atomicrmw.start 1070; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 1071; LA64F-NEXT: move $a0, $s1 1072; LA64F-NEXT: move $a1, $s0 1073; LA64F-NEXT: bl %plt(fmax) 1074; LA64F-NEXT: st.d $s1, $sp, 8 1075; LA64F-NEXT: st.d $a0, $sp, 0 1076; LA64F-NEXT: ori $a0, $zero, 8 1077; LA64F-NEXT: addi.d $a2, $sp, 8 1078; LA64F-NEXT: addi.d $a3, $sp, 0 1079; LA64F-NEXT: ori $a4, $zero, 3 1080; LA64F-NEXT: move $a1, $fp 1081; LA64F-NEXT: move $a5, $zero 1082; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 1083; LA64F-NEXT: ld.d $s1, $sp, 8 1084; LA64F-NEXT: beqz $a0, .LBB15_1 1085; LA64F-NEXT: # %bb.2: # %atomicrmw.end 1086; LA64F-NEXT: move $a0, $s1 1087; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 1088; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 1089; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 1090; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 1091; LA64F-NEXT: addi.d $sp, $sp, 48 1092; LA64F-NEXT: ret 1093; 1094; LA64D-LABEL: double_fmax_release: 1095; LA64D: # %bb.0: 1096; LA64D-NEXT: addi.d $sp, $sp, -32 1097; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 1098; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 1099; LA64D-NEXT: move $fp, $a0 1100; LA64D-NEXT: fld.d $fa0, $a0, 0 1101; LA64D-NEXT: .p2align 4, , 16 1102; LA64D-NEXT: .LBB15_1: # %atomicrmw.start 1103; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 1104; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 1105; LA64D-NEXT: vldi $vr2, -912 1106; LA64D-NEXT: fmax.d $fa1, $fa1, $fa2 1107; LA64D-NEXT: fst.d $fa0, $sp, 8 1108; LA64D-NEXT: fst.d $fa1, $sp, 0 1109; LA64D-NEXT: ori $a0, $zero, 8 1110; LA64D-NEXT: addi.d $a2, $sp, 8 1111; LA64D-NEXT: addi.d $a3, $sp, 0 1112; LA64D-NEXT: ori $a4, $zero, 3 1113; LA64D-NEXT: move $a1, $fp 1114; LA64D-NEXT: move $a5, $zero 1115; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 1116; LA64D-NEXT: fld.d $fa0, $sp, 8 1117; LA64D-NEXT: beqz $a0, .LBB15_1 1118; LA64D-NEXT: # %bb.2: # %atomicrmw.end 1119; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 1120; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 1121; LA64D-NEXT: addi.d $sp, $sp, 32 1122; LA64D-NEXT: ret 1123 %v = atomicrmw fmax ptr %p, double 1.0 release, align 4 1124 ret double %v 1125} 1126 1127define float @float_fadd_acq_rel(ptr %p) nounwind { 1128; LA64F-LABEL: float_fadd_acq_rel: 1129; LA64F: # %bb.0: 1130; LA64F-NEXT: fld.s $fa0, $a0, 0 1131; LA64F-NEXT: addi.w $a1, $zero, 1 1132; LA64F-NEXT: movgr2fr.w $fa1, $a1 1133; LA64F-NEXT: ffint.s.w $fa1, $fa1 1134; LA64F-NEXT: .p2align 4, , 16 1135; LA64F-NEXT: .LBB16_1: # %atomicrmw.start 1136; LA64F-NEXT: # =>This Loop Header: Depth=1 1137; LA64F-NEXT: # Child Loop BB16_3 Depth 2 1138; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 1139; LA64F-NEXT: movfr2gr.s $a1, $fa2 1140; LA64F-NEXT: movfr2gr.s $a2, $fa0 1141; LA64F-NEXT: .LBB16_3: # %atomicrmw.start 1142; LA64F-NEXT: # Parent Loop BB16_1 Depth=1 1143; LA64F-NEXT: # => This Inner Loop Header: Depth=2 1144; LA64F-NEXT: ll.w $a3, $a0, 0 1145; LA64F-NEXT: bne $a3, $a2, .LBB16_5 1146; LA64F-NEXT: # %bb.4: # %atomicrmw.start 1147; LA64F-NEXT: # in Loop: Header=BB16_3 Depth=2 1148; LA64F-NEXT: move $a4, $a1 1149; LA64F-NEXT: sc.w $a4, $a0, 0 1150; LA64F-NEXT: beqz $a4, .LBB16_3 1151; LA64F-NEXT: b .LBB16_6 1152; LA64F-NEXT: .LBB16_5: # %atomicrmw.start 1153; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 1154; LA64F-NEXT: dbar 20 1155; LA64F-NEXT: .LBB16_6: # %atomicrmw.start 1156; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 1157; LA64F-NEXT: movgr2fr.w $fa0, $a3 1158; LA64F-NEXT: bne $a3, $a2, .LBB16_1 1159; LA64F-NEXT: # %bb.2: # %atomicrmw.end 1160; LA64F-NEXT: ret 1161; 1162; LA64D-LABEL: float_fadd_acq_rel: 1163; LA64D: # %bb.0: 1164; LA64D-NEXT: fld.s $fa0, $a0, 0 1165; LA64D-NEXT: vldi $vr1, -1168 1166; LA64D-NEXT: .p2align 4, , 16 1167; LA64D-NEXT: .LBB16_1: # %atomicrmw.start 1168; LA64D-NEXT: # =>This Loop Header: Depth=1 1169; LA64D-NEXT: # Child Loop BB16_3 Depth 2 1170; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 1171; LA64D-NEXT: movfr2gr.s $a1, $fa2 1172; LA64D-NEXT: movfr2gr.s $a2, $fa0 1173; LA64D-NEXT: .LBB16_3: # %atomicrmw.start 1174; LA64D-NEXT: # Parent Loop BB16_1 Depth=1 1175; LA64D-NEXT: # => This Inner Loop Header: Depth=2 1176; LA64D-NEXT: ll.w $a3, $a0, 0 1177; LA64D-NEXT: bne $a3, $a2, .LBB16_5 1178; LA64D-NEXT: # %bb.4: # %atomicrmw.start 1179; LA64D-NEXT: # in Loop: Header=BB16_3 Depth=2 1180; LA64D-NEXT: move $a4, $a1 1181; LA64D-NEXT: sc.w $a4, $a0, 0 1182; LA64D-NEXT: beqz $a4, .LBB16_3 1183; LA64D-NEXT: b .LBB16_6 1184; LA64D-NEXT: .LBB16_5: # %atomicrmw.start 1185; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 1186; LA64D-NEXT: dbar 20 1187; LA64D-NEXT: .LBB16_6: # %atomicrmw.start 1188; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 1189; LA64D-NEXT: movgr2fr.w $fa0, $a3 1190; LA64D-NEXT: bne $a3, $a2, .LBB16_1 1191; LA64D-NEXT: # %bb.2: # %atomicrmw.end 1192; LA64D-NEXT: ret 1193 %v = atomicrmw fadd ptr %p, float 1.0 acq_rel, align 4 1194 ret float %v 1195} 1196 1197define float @float_fsub_acq_rel(ptr %p) nounwind { 1198; LA64F-LABEL: float_fsub_acq_rel: 1199; LA64F: # %bb.0: 1200; LA64F-NEXT: fld.s $fa0, $a0, 0 1201; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) 1202; LA64F-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI17_0) 1203; LA64F-NEXT: .p2align 4, , 16 1204; LA64F-NEXT: .LBB17_1: # %atomicrmw.start 1205; LA64F-NEXT: # =>This Loop Header: Depth=1 1206; LA64F-NEXT: # Child Loop BB17_3 Depth 2 1207; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 1208; LA64F-NEXT: movfr2gr.s $a1, $fa2 1209; LA64F-NEXT: movfr2gr.s $a2, $fa0 1210; LA64F-NEXT: .LBB17_3: # %atomicrmw.start 1211; LA64F-NEXT: # Parent Loop BB17_1 Depth=1 1212; LA64F-NEXT: # => This Inner Loop Header: Depth=2 1213; LA64F-NEXT: ll.w $a3, $a0, 0 1214; LA64F-NEXT: bne $a3, $a2, .LBB17_5 1215; LA64F-NEXT: # %bb.4: # %atomicrmw.start 1216; LA64F-NEXT: # in Loop: Header=BB17_3 Depth=2 1217; LA64F-NEXT: move $a4, $a1 1218; LA64F-NEXT: sc.w $a4, $a0, 0 1219; LA64F-NEXT: beqz $a4, .LBB17_3 1220; LA64F-NEXT: b .LBB17_6 1221; LA64F-NEXT: .LBB17_5: # %atomicrmw.start 1222; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 1223; LA64F-NEXT: dbar 20 1224; LA64F-NEXT: .LBB17_6: # %atomicrmw.start 1225; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 1226; LA64F-NEXT: movgr2fr.w $fa0, $a3 1227; LA64F-NEXT: bne $a3, $a2, .LBB17_1 1228; LA64F-NEXT: # %bb.2: # %atomicrmw.end 1229; LA64F-NEXT: ret 1230; 1231; LA64D-LABEL: float_fsub_acq_rel: 1232; LA64D: # %bb.0: 1233; LA64D-NEXT: fld.s $fa0, $a0, 0 1234; LA64D-NEXT: vldi $vr1, -1040 1235; LA64D-NEXT: .p2align 4, , 16 1236; LA64D-NEXT: .LBB17_1: # %atomicrmw.start 1237; LA64D-NEXT: # =>This Loop Header: Depth=1 1238; LA64D-NEXT: # Child Loop BB17_3 Depth 2 1239; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 1240; LA64D-NEXT: movfr2gr.s $a1, $fa2 1241; LA64D-NEXT: movfr2gr.s $a2, $fa0 1242; LA64D-NEXT: .LBB17_3: # %atomicrmw.start 1243; LA64D-NEXT: # Parent Loop BB17_1 Depth=1 1244; LA64D-NEXT: # => This Inner Loop Header: Depth=2 1245; LA64D-NEXT: ll.w $a3, $a0, 0 1246; LA64D-NEXT: bne $a3, $a2, .LBB17_5 1247; LA64D-NEXT: # %bb.4: # %atomicrmw.start 1248; LA64D-NEXT: # in Loop: Header=BB17_3 Depth=2 1249; LA64D-NEXT: move $a4, $a1 1250; LA64D-NEXT: sc.w $a4, $a0, 0 1251; LA64D-NEXT: beqz $a4, .LBB17_3 1252; LA64D-NEXT: b .LBB17_6 1253; LA64D-NEXT: .LBB17_5: # %atomicrmw.start 1254; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 1255; LA64D-NEXT: dbar 20 1256; LA64D-NEXT: .LBB17_6: # %atomicrmw.start 1257; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 1258; LA64D-NEXT: movgr2fr.w $fa0, $a3 1259; LA64D-NEXT: bne $a3, $a2, .LBB17_1 1260; LA64D-NEXT: # %bb.2: # %atomicrmw.end 1261; LA64D-NEXT: ret 1262 %v = atomicrmw fsub ptr %p, float 1.0 acq_rel, align 4 1263 ret float %v 1264} 1265 1266define float @float_fmin_acq_rel(ptr %p) nounwind { 1267; LA64F-LABEL: float_fmin_acq_rel: 1268; LA64F: # %bb.0: 1269; LA64F-NEXT: fld.s $fa0, $a0, 0 1270; LA64F-NEXT: addi.w $a1, $zero, 1 1271; LA64F-NEXT: movgr2fr.w $fa1, $a1 1272; LA64F-NEXT: ffint.s.w $fa1, $fa1 1273; LA64F-NEXT: .p2align 4, , 16 1274; LA64F-NEXT: .LBB18_1: # %atomicrmw.start 1275; LA64F-NEXT: # =>This Loop Header: Depth=1 1276; LA64F-NEXT: # Child Loop BB18_3 Depth 2 1277; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 1278; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 1279; LA64F-NEXT: movfr2gr.s $a1, $fa2 1280; LA64F-NEXT: movfr2gr.s $a2, $fa0 1281; LA64F-NEXT: .LBB18_3: # %atomicrmw.start 1282; LA64F-NEXT: # Parent Loop BB18_1 Depth=1 1283; LA64F-NEXT: # => This Inner Loop Header: Depth=2 1284; LA64F-NEXT: ll.w $a3, $a0, 0 1285; LA64F-NEXT: bne $a3, $a2, .LBB18_5 1286; LA64F-NEXT: # %bb.4: # %atomicrmw.start 1287; LA64F-NEXT: # in Loop: Header=BB18_3 Depth=2 1288; LA64F-NEXT: move $a4, $a1 1289; LA64F-NEXT: sc.w $a4, $a0, 0 1290; LA64F-NEXT: beqz $a4, .LBB18_3 1291; LA64F-NEXT: b .LBB18_6 1292; LA64F-NEXT: .LBB18_5: # %atomicrmw.start 1293; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 1294; LA64F-NEXT: dbar 20 1295; LA64F-NEXT: .LBB18_6: # %atomicrmw.start 1296; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 1297; LA64F-NEXT: movgr2fr.w $fa0, $a3 1298; LA64F-NEXT: bne $a3, $a2, .LBB18_1 1299; LA64F-NEXT: # %bb.2: # %atomicrmw.end 1300; LA64F-NEXT: ret 1301; 1302; LA64D-LABEL: float_fmin_acq_rel: 1303; LA64D: # %bb.0: 1304; LA64D-NEXT: fld.s $fa0, $a0, 0 1305; LA64D-NEXT: vldi $vr1, -1168 1306; LA64D-NEXT: .p2align 4, , 16 1307; LA64D-NEXT: .LBB18_1: # %atomicrmw.start 1308; LA64D-NEXT: # =>This Loop Header: Depth=1 1309; LA64D-NEXT: # Child Loop BB18_3 Depth 2 1310; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 1311; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 1312; LA64D-NEXT: movfr2gr.s $a1, $fa2 1313; LA64D-NEXT: movfr2gr.s $a2, $fa0 1314; LA64D-NEXT: .LBB18_3: # %atomicrmw.start 1315; LA64D-NEXT: # Parent Loop BB18_1 Depth=1 1316; LA64D-NEXT: # => This Inner Loop Header: Depth=2 1317; LA64D-NEXT: ll.w $a3, $a0, 0 1318; LA64D-NEXT: bne $a3, $a2, .LBB18_5 1319; LA64D-NEXT: # %bb.4: # %atomicrmw.start 1320; LA64D-NEXT: # in Loop: Header=BB18_3 Depth=2 1321; LA64D-NEXT: move $a4, $a1 1322; LA64D-NEXT: sc.w $a4, $a0, 0 1323; LA64D-NEXT: beqz $a4, .LBB18_3 1324; LA64D-NEXT: b .LBB18_6 1325; LA64D-NEXT: .LBB18_5: # %atomicrmw.start 1326; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 1327; LA64D-NEXT: dbar 20 1328; LA64D-NEXT: .LBB18_6: # %atomicrmw.start 1329; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 1330; LA64D-NEXT: movgr2fr.w $fa0, $a3 1331; LA64D-NEXT: bne $a3, $a2, .LBB18_1 1332; LA64D-NEXT: # %bb.2: # %atomicrmw.end 1333; LA64D-NEXT: ret 1334 %v = atomicrmw fmin ptr %p, float 1.0 acq_rel, align 4 1335 ret float %v 1336} 1337 1338define float @float_fmax_acq_rel(ptr %p) nounwind { 1339; LA64F-LABEL: float_fmax_acq_rel: 1340; LA64F: # %bb.0: 1341; LA64F-NEXT: fld.s $fa0, $a0, 0 1342; LA64F-NEXT: addi.w $a1, $zero, 1 1343; LA64F-NEXT: movgr2fr.w $fa1, $a1 1344; LA64F-NEXT: ffint.s.w $fa1, $fa1 1345; LA64F-NEXT: .p2align 4, , 16 1346; LA64F-NEXT: .LBB19_1: # %atomicrmw.start 1347; LA64F-NEXT: # =>This Loop Header: Depth=1 1348; LA64F-NEXT: # Child Loop BB19_3 Depth 2 1349; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 1350; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 1351; LA64F-NEXT: movfr2gr.s $a1, $fa2 1352; LA64F-NEXT: movfr2gr.s $a2, $fa0 1353; LA64F-NEXT: .LBB19_3: # %atomicrmw.start 1354; LA64F-NEXT: # Parent Loop BB19_1 Depth=1 1355; LA64F-NEXT: # => This Inner Loop Header: Depth=2 1356; LA64F-NEXT: ll.w $a3, $a0, 0 1357; LA64F-NEXT: bne $a3, $a2, .LBB19_5 1358; LA64F-NEXT: # %bb.4: # %atomicrmw.start 1359; LA64F-NEXT: # in Loop: Header=BB19_3 Depth=2 1360; LA64F-NEXT: move $a4, $a1 1361; LA64F-NEXT: sc.w $a4, $a0, 0 1362; LA64F-NEXT: beqz $a4, .LBB19_3 1363; LA64F-NEXT: b .LBB19_6 1364; LA64F-NEXT: .LBB19_5: # %atomicrmw.start 1365; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 1366; LA64F-NEXT: dbar 20 1367; LA64F-NEXT: .LBB19_6: # %atomicrmw.start 1368; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 1369; LA64F-NEXT: movgr2fr.w $fa0, $a3 1370; LA64F-NEXT: bne $a3, $a2, .LBB19_1 1371; LA64F-NEXT: # %bb.2: # %atomicrmw.end 1372; LA64F-NEXT: ret 1373; 1374; LA64D-LABEL: float_fmax_acq_rel: 1375; LA64D: # %bb.0: 1376; LA64D-NEXT: fld.s $fa0, $a0, 0 1377; LA64D-NEXT: vldi $vr1, -1168 1378; LA64D-NEXT: .p2align 4, , 16 1379; LA64D-NEXT: .LBB19_1: # %atomicrmw.start 1380; LA64D-NEXT: # =>This Loop Header: Depth=1 1381; LA64D-NEXT: # Child Loop BB19_3 Depth 2 1382; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 1383; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 1384; LA64D-NEXT: movfr2gr.s $a1, $fa2 1385; LA64D-NEXT: movfr2gr.s $a2, $fa0 1386; LA64D-NEXT: .LBB19_3: # %atomicrmw.start 1387; LA64D-NEXT: # Parent Loop BB19_1 Depth=1 1388; LA64D-NEXT: # => This Inner Loop Header: Depth=2 1389; LA64D-NEXT: ll.w $a3, $a0, 0 1390; LA64D-NEXT: bne $a3, $a2, .LBB19_5 1391; LA64D-NEXT: # %bb.4: # %atomicrmw.start 1392; LA64D-NEXT: # in Loop: Header=BB19_3 Depth=2 1393; LA64D-NEXT: move $a4, $a1 1394; LA64D-NEXT: sc.w $a4, $a0, 0 1395; LA64D-NEXT: beqz $a4, .LBB19_3 1396; LA64D-NEXT: b .LBB19_6 1397; LA64D-NEXT: .LBB19_5: # %atomicrmw.start 1398; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 1399; LA64D-NEXT: dbar 20 1400; LA64D-NEXT: .LBB19_6: # %atomicrmw.start 1401; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 1402; LA64D-NEXT: movgr2fr.w $fa0, $a3 1403; LA64D-NEXT: bne $a3, $a2, .LBB19_1 1404; LA64D-NEXT: # %bb.2: # %atomicrmw.end 1405; LA64D-NEXT: ret 1406 %v = atomicrmw fmax ptr %p, float 1.0 acq_rel, align 4 1407 ret float %v 1408} 1409 1410define double @double_fadd_acq_rel(ptr %p) nounwind { 1411; LA64F-LABEL: double_fadd_acq_rel: 1412; LA64F: # %bb.0: 1413; LA64F-NEXT: addi.d $sp, $sp, -48 1414; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 1415; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 1416; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 1417; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 1418; LA64F-NEXT: move $fp, $a0 1419; LA64F-NEXT: ld.d $s1, $a0, 0 1420; LA64F-NEXT: lu52i.d $s0, $zero, 1023 1421; LA64F-NEXT: .p2align 4, , 16 1422; LA64F-NEXT: .LBB20_1: # %atomicrmw.start 1423; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 1424; LA64F-NEXT: move $a0, $s1 1425; LA64F-NEXT: move $a1, $s0 1426; LA64F-NEXT: bl %plt(__adddf3) 1427; LA64F-NEXT: st.d $s1, $sp, 8 1428; LA64F-NEXT: st.d $a0, $sp, 0 1429; LA64F-NEXT: ori $a0, $zero, 8 1430; LA64F-NEXT: addi.d $a2, $sp, 8 1431; LA64F-NEXT: addi.d $a3, $sp, 0 1432; LA64F-NEXT: ori $a4, $zero, 4 1433; LA64F-NEXT: ori $a5, $zero, 2 1434; LA64F-NEXT: move $a1, $fp 1435; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 1436; LA64F-NEXT: ld.d $s1, $sp, 8 1437; LA64F-NEXT: beqz $a0, .LBB20_1 1438; LA64F-NEXT: # %bb.2: # %atomicrmw.end 1439; LA64F-NEXT: move $a0, $s1 1440; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 1441; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 1442; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 1443; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 1444; LA64F-NEXT: addi.d $sp, $sp, 48 1445; LA64F-NEXT: ret 1446; 1447; LA64D-LABEL: double_fadd_acq_rel: 1448; LA64D: # %bb.0: 1449; LA64D-NEXT: addi.d $sp, $sp, -32 1450; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 1451; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 1452; LA64D-NEXT: move $fp, $a0 1453; LA64D-NEXT: fld.d $fa0, $a0, 0 1454; LA64D-NEXT: .p2align 4, , 16 1455; LA64D-NEXT: .LBB20_1: # %atomicrmw.start 1456; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 1457; LA64D-NEXT: vldi $vr1, -912 1458; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 1459; LA64D-NEXT: fst.d $fa0, $sp, 8 1460; LA64D-NEXT: fst.d $fa1, $sp, 0 1461; LA64D-NEXT: ori $a0, $zero, 8 1462; LA64D-NEXT: addi.d $a2, $sp, 8 1463; LA64D-NEXT: addi.d $a3, $sp, 0 1464; LA64D-NEXT: ori $a4, $zero, 4 1465; LA64D-NEXT: ori $a5, $zero, 2 1466; LA64D-NEXT: move $a1, $fp 1467; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 1468; LA64D-NEXT: fld.d $fa0, $sp, 8 1469; LA64D-NEXT: beqz $a0, .LBB20_1 1470; LA64D-NEXT: # %bb.2: # %atomicrmw.end 1471; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 1472; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 1473; LA64D-NEXT: addi.d $sp, $sp, 32 1474; LA64D-NEXT: ret 1475 %v = atomicrmw fadd ptr %p, double 1.0 acq_rel, align 4 1476 ret double %v 1477} 1478 1479define double @double_fsub_acq_rel(ptr %p) nounwind { 1480; LA64F-LABEL: double_fsub_acq_rel: 1481; LA64F: # %bb.0: 1482; LA64F-NEXT: addi.d $sp, $sp, -48 1483; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 1484; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 1485; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 1486; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 1487; LA64F-NEXT: move $fp, $a0 1488; LA64F-NEXT: ld.d $s1, $a0, 0 1489; LA64F-NEXT: lu52i.d $s0, $zero, -1025 1490; LA64F-NEXT: .p2align 4, , 16 1491; LA64F-NEXT: .LBB21_1: # %atomicrmw.start 1492; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 1493; LA64F-NEXT: move $a0, $s1 1494; LA64F-NEXT: move $a1, $s0 1495; LA64F-NEXT: bl %plt(__adddf3) 1496; LA64F-NEXT: st.d $s1, $sp, 8 1497; LA64F-NEXT: st.d $a0, $sp, 0 1498; LA64F-NEXT: ori $a0, $zero, 8 1499; LA64F-NEXT: addi.d $a2, $sp, 8 1500; LA64F-NEXT: addi.d $a3, $sp, 0 1501; LA64F-NEXT: ori $a4, $zero, 4 1502; LA64F-NEXT: ori $a5, $zero, 2 1503; LA64F-NEXT: move $a1, $fp 1504; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 1505; LA64F-NEXT: ld.d $s1, $sp, 8 1506; LA64F-NEXT: beqz $a0, .LBB21_1 1507; LA64F-NEXT: # %bb.2: # %atomicrmw.end 1508; LA64F-NEXT: move $a0, $s1 1509; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 1510; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 1511; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 1512; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 1513; LA64F-NEXT: addi.d $sp, $sp, 48 1514; LA64F-NEXT: ret 1515; 1516; LA64D-LABEL: double_fsub_acq_rel: 1517; LA64D: # %bb.0: 1518; LA64D-NEXT: addi.d $sp, $sp, -32 1519; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 1520; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 1521; LA64D-NEXT: move $fp, $a0 1522; LA64D-NEXT: fld.d $fa0, $a0, 0 1523; LA64D-NEXT: .p2align 4, , 16 1524; LA64D-NEXT: .LBB21_1: # %atomicrmw.start 1525; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 1526; LA64D-NEXT: vldi $vr1, -784 1527; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 1528; LA64D-NEXT: fst.d $fa0, $sp, 8 1529; LA64D-NEXT: fst.d $fa1, $sp, 0 1530; LA64D-NEXT: ori $a0, $zero, 8 1531; LA64D-NEXT: addi.d $a2, $sp, 8 1532; LA64D-NEXT: addi.d $a3, $sp, 0 1533; LA64D-NEXT: ori $a4, $zero, 4 1534; LA64D-NEXT: ori $a5, $zero, 2 1535; LA64D-NEXT: move $a1, $fp 1536; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 1537; LA64D-NEXT: fld.d $fa0, $sp, 8 1538; LA64D-NEXT: beqz $a0, .LBB21_1 1539; LA64D-NEXT: # %bb.2: # %atomicrmw.end 1540; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 1541; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 1542; LA64D-NEXT: addi.d $sp, $sp, 32 1543; LA64D-NEXT: ret 1544 %v = atomicrmw fsub ptr %p, double 1.0 acq_rel, align 4 1545 ret double %v 1546} 1547 1548define double @double_fmin_acq_rel(ptr %p) nounwind { 1549; LA64F-LABEL: double_fmin_acq_rel: 1550; LA64F: # %bb.0: 1551; LA64F-NEXT: addi.d $sp, $sp, -48 1552; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 1553; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 1554; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 1555; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 1556; LA64F-NEXT: move $fp, $a0 1557; LA64F-NEXT: ld.d $s1, $a0, 0 1558; LA64F-NEXT: lu52i.d $s0, $zero, 1023 1559; LA64F-NEXT: .p2align 4, , 16 1560; LA64F-NEXT: .LBB22_1: # %atomicrmw.start 1561; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 1562; LA64F-NEXT: move $a0, $s1 1563; LA64F-NEXT: move $a1, $s0 1564; LA64F-NEXT: bl %plt(fmin) 1565; LA64F-NEXT: st.d $s1, $sp, 8 1566; LA64F-NEXT: st.d $a0, $sp, 0 1567; LA64F-NEXT: ori $a0, $zero, 8 1568; LA64F-NEXT: addi.d $a2, $sp, 8 1569; LA64F-NEXT: addi.d $a3, $sp, 0 1570; LA64F-NEXT: ori $a4, $zero, 4 1571; LA64F-NEXT: ori $a5, $zero, 2 1572; LA64F-NEXT: move $a1, $fp 1573; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 1574; LA64F-NEXT: ld.d $s1, $sp, 8 1575; LA64F-NEXT: beqz $a0, .LBB22_1 1576; LA64F-NEXT: # %bb.2: # %atomicrmw.end 1577; LA64F-NEXT: move $a0, $s1 1578; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 1579; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 1580; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 1581; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 1582; LA64F-NEXT: addi.d $sp, $sp, 48 1583; LA64F-NEXT: ret 1584; 1585; LA64D-LABEL: double_fmin_acq_rel: 1586; LA64D: # %bb.0: 1587; LA64D-NEXT: addi.d $sp, $sp, -32 1588; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 1589; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 1590; LA64D-NEXT: move $fp, $a0 1591; LA64D-NEXT: fld.d $fa0, $a0, 0 1592; LA64D-NEXT: .p2align 4, , 16 1593; LA64D-NEXT: .LBB22_1: # %atomicrmw.start 1594; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 1595; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 1596; LA64D-NEXT: vldi $vr2, -912 1597; LA64D-NEXT: fmin.d $fa1, $fa1, $fa2 1598; LA64D-NEXT: fst.d $fa0, $sp, 8 1599; LA64D-NEXT: fst.d $fa1, $sp, 0 1600; LA64D-NEXT: ori $a0, $zero, 8 1601; LA64D-NEXT: addi.d $a2, $sp, 8 1602; LA64D-NEXT: addi.d $a3, $sp, 0 1603; LA64D-NEXT: ori $a4, $zero, 4 1604; LA64D-NEXT: ori $a5, $zero, 2 1605; LA64D-NEXT: move $a1, $fp 1606; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 1607; LA64D-NEXT: fld.d $fa0, $sp, 8 1608; LA64D-NEXT: beqz $a0, .LBB22_1 1609; LA64D-NEXT: # %bb.2: # %atomicrmw.end 1610; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 1611; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 1612; LA64D-NEXT: addi.d $sp, $sp, 32 1613; LA64D-NEXT: ret 1614 %v = atomicrmw fmin ptr %p, double 1.0 acq_rel, align 4 1615 ret double %v 1616} 1617 1618define double @double_fmax_acq_rel(ptr %p) nounwind { 1619; LA64F-LABEL: double_fmax_acq_rel: 1620; LA64F: # %bb.0: 1621; LA64F-NEXT: addi.d $sp, $sp, -48 1622; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 1623; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 1624; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 1625; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 1626; LA64F-NEXT: move $fp, $a0 1627; LA64F-NEXT: ld.d $s1, $a0, 0 1628; LA64F-NEXT: lu52i.d $s0, $zero, 1023 1629; LA64F-NEXT: .p2align 4, , 16 1630; LA64F-NEXT: .LBB23_1: # %atomicrmw.start 1631; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 1632; LA64F-NEXT: move $a0, $s1 1633; LA64F-NEXT: move $a1, $s0 1634; LA64F-NEXT: bl %plt(fmax) 1635; LA64F-NEXT: st.d $s1, $sp, 8 1636; LA64F-NEXT: st.d $a0, $sp, 0 1637; LA64F-NEXT: ori $a0, $zero, 8 1638; LA64F-NEXT: addi.d $a2, $sp, 8 1639; LA64F-NEXT: addi.d $a3, $sp, 0 1640; LA64F-NEXT: ori $a4, $zero, 4 1641; LA64F-NEXT: ori $a5, $zero, 2 1642; LA64F-NEXT: move $a1, $fp 1643; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 1644; LA64F-NEXT: ld.d $s1, $sp, 8 1645; LA64F-NEXT: beqz $a0, .LBB23_1 1646; LA64F-NEXT: # %bb.2: # %atomicrmw.end 1647; LA64F-NEXT: move $a0, $s1 1648; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 1649; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 1650; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 1651; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 1652; LA64F-NEXT: addi.d $sp, $sp, 48 1653; LA64F-NEXT: ret 1654; 1655; LA64D-LABEL: double_fmax_acq_rel: 1656; LA64D: # %bb.0: 1657; LA64D-NEXT: addi.d $sp, $sp, -32 1658; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 1659; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 1660; LA64D-NEXT: move $fp, $a0 1661; LA64D-NEXT: fld.d $fa0, $a0, 0 1662; LA64D-NEXT: .p2align 4, , 16 1663; LA64D-NEXT: .LBB23_1: # %atomicrmw.start 1664; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 1665; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 1666; LA64D-NEXT: vldi $vr2, -912 1667; LA64D-NEXT: fmax.d $fa1, $fa1, $fa2 1668; LA64D-NEXT: fst.d $fa0, $sp, 8 1669; LA64D-NEXT: fst.d $fa1, $sp, 0 1670; LA64D-NEXT: ori $a0, $zero, 8 1671; LA64D-NEXT: addi.d $a2, $sp, 8 1672; LA64D-NEXT: addi.d $a3, $sp, 0 1673; LA64D-NEXT: ori $a4, $zero, 4 1674; LA64D-NEXT: ori $a5, $zero, 2 1675; LA64D-NEXT: move $a1, $fp 1676; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 1677; LA64D-NEXT: fld.d $fa0, $sp, 8 1678; LA64D-NEXT: beqz $a0, .LBB23_1 1679; LA64D-NEXT: # %bb.2: # %atomicrmw.end 1680; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 1681; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 1682; LA64D-NEXT: addi.d $sp, $sp, 32 1683; LA64D-NEXT: ret 1684 %v = atomicrmw fmax ptr %p, double 1.0 acq_rel, align 4 1685 ret double %v 1686} 1687 1688define float @float_fadd_seq_cst(ptr %p) nounwind { 1689; LA64F-LABEL: float_fadd_seq_cst: 1690; LA64F: # %bb.0: 1691; LA64F-NEXT: fld.s $fa0, $a0, 0 1692; LA64F-NEXT: addi.w $a1, $zero, 1 1693; LA64F-NEXT: movgr2fr.w $fa1, $a1 1694; LA64F-NEXT: ffint.s.w $fa1, $fa1 1695; LA64F-NEXT: .p2align 4, , 16 1696; LA64F-NEXT: .LBB24_1: # %atomicrmw.start 1697; LA64F-NEXT: # =>This Loop Header: Depth=1 1698; LA64F-NEXT: # Child Loop BB24_3 Depth 2 1699; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 1700; LA64F-NEXT: movfr2gr.s $a1, $fa2 1701; LA64F-NEXT: movfr2gr.s $a2, $fa0 1702; LA64F-NEXT: .LBB24_3: # %atomicrmw.start 1703; LA64F-NEXT: # Parent Loop BB24_1 Depth=1 1704; LA64F-NEXT: # => This Inner Loop Header: Depth=2 1705; LA64F-NEXT: ll.w $a3, $a0, 0 1706; LA64F-NEXT: bne $a3, $a2, .LBB24_5 1707; LA64F-NEXT: # %bb.4: # %atomicrmw.start 1708; LA64F-NEXT: # in Loop: Header=BB24_3 Depth=2 1709; LA64F-NEXT: move $a4, $a1 1710; LA64F-NEXT: sc.w $a4, $a0, 0 1711; LA64F-NEXT: beqz $a4, .LBB24_3 1712; LA64F-NEXT: b .LBB24_6 1713; LA64F-NEXT: .LBB24_5: # %atomicrmw.start 1714; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 1715; LA64F-NEXT: dbar 20 1716; LA64F-NEXT: .LBB24_6: # %atomicrmw.start 1717; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 1718; LA64F-NEXT: movgr2fr.w $fa0, $a3 1719; LA64F-NEXT: bne $a3, $a2, .LBB24_1 1720; LA64F-NEXT: # %bb.2: # %atomicrmw.end 1721; LA64F-NEXT: ret 1722; 1723; LA64D-LABEL: float_fadd_seq_cst: 1724; LA64D: # %bb.0: 1725; LA64D-NEXT: fld.s $fa0, $a0, 0 1726; LA64D-NEXT: vldi $vr1, -1168 1727; LA64D-NEXT: .p2align 4, , 16 1728; LA64D-NEXT: .LBB24_1: # %atomicrmw.start 1729; LA64D-NEXT: # =>This Loop Header: Depth=1 1730; LA64D-NEXT: # Child Loop BB24_3 Depth 2 1731; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 1732; LA64D-NEXT: movfr2gr.s $a1, $fa2 1733; LA64D-NEXT: movfr2gr.s $a2, $fa0 1734; LA64D-NEXT: .LBB24_3: # %atomicrmw.start 1735; LA64D-NEXT: # Parent Loop BB24_1 Depth=1 1736; LA64D-NEXT: # => This Inner Loop Header: Depth=2 1737; LA64D-NEXT: ll.w $a3, $a0, 0 1738; LA64D-NEXT: bne $a3, $a2, .LBB24_5 1739; LA64D-NEXT: # %bb.4: # %atomicrmw.start 1740; LA64D-NEXT: # in Loop: Header=BB24_3 Depth=2 1741; LA64D-NEXT: move $a4, $a1 1742; LA64D-NEXT: sc.w $a4, $a0, 0 1743; LA64D-NEXT: beqz $a4, .LBB24_3 1744; LA64D-NEXT: b .LBB24_6 1745; LA64D-NEXT: .LBB24_5: # %atomicrmw.start 1746; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 1747; LA64D-NEXT: dbar 20 1748; LA64D-NEXT: .LBB24_6: # %atomicrmw.start 1749; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 1750; LA64D-NEXT: movgr2fr.w $fa0, $a3 1751; LA64D-NEXT: bne $a3, $a2, .LBB24_1 1752; LA64D-NEXT: # %bb.2: # %atomicrmw.end 1753; LA64D-NEXT: ret 1754 %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4 1755 ret float %v 1756} 1757 1758define float @float_fsub_seq_cst(ptr %p) nounwind { 1759; LA64F-LABEL: float_fsub_seq_cst: 1760; LA64F: # %bb.0: 1761; LA64F-NEXT: fld.s $fa0, $a0, 0 1762; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI25_0) 1763; LA64F-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI25_0) 1764; LA64F-NEXT: .p2align 4, , 16 1765; LA64F-NEXT: .LBB25_1: # %atomicrmw.start 1766; LA64F-NEXT: # =>This Loop Header: Depth=1 1767; LA64F-NEXT: # Child Loop BB25_3 Depth 2 1768; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 1769; LA64F-NEXT: movfr2gr.s $a1, $fa2 1770; LA64F-NEXT: movfr2gr.s $a2, $fa0 1771; LA64F-NEXT: .LBB25_3: # %atomicrmw.start 1772; LA64F-NEXT: # Parent Loop BB25_1 Depth=1 1773; LA64F-NEXT: # => This Inner Loop Header: Depth=2 1774; LA64F-NEXT: ll.w $a3, $a0, 0 1775; LA64F-NEXT: bne $a3, $a2, .LBB25_5 1776; LA64F-NEXT: # %bb.4: # %atomicrmw.start 1777; LA64F-NEXT: # in Loop: Header=BB25_3 Depth=2 1778; LA64F-NEXT: move $a4, $a1 1779; LA64F-NEXT: sc.w $a4, $a0, 0 1780; LA64F-NEXT: beqz $a4, .LBB25_3 1781; LA64F-NEXT: b .LBB25_6 1782; LA64F-NEXT: .LBB25_5: # %atomicrmw.start 1783; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 1784; LA64F-NEXT: dbar 20 1785; LA64F-NEXT: .LBB25_6: # %atomicrmw.start 1786; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 1787; LA64F-NEXT: movgr2fr.w $fa0, $a3 1788; LA64F-NEXT: bne $a3, $a2, .LBB25_1 1789; LA64F-NEXT: # %bb.2: # %atomicrmw.end 1790; LA64F-NEXT: ret 1791; 1792; LA64D-LABEL: float_fsub_seq_cst: 1793; LA64D: # %bb.0: 1794; LA64D-NEXT: fld.s $fa0, $a0, 0 1795; LA64D-NEXT: vldi $vr1, -1040 1796; LA64D-NEXT: .p2align 4, , 16 1797; LA64D-NEXT: .LBB25_1: # %atomicrmw.start 1798; LA64D-NEXT: # =>This Loop Header: Depth=1 1799; LA64D-NEXT: # Child Loop BB25_3 Depth 2 1800; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 1801; LA64D-NEXT: movfr2gr.s $a1, $fa2 1802; LA64D-NEXT: movfr2gr.s $a2, $fa0 1803; LA64D-NEXT: .LBB25_3: # %atomicrmw.start 1804; LA64D-NEXT: # Parent Loop BB25_1 Depth=1 1805; LA64D-NEXT: # => This Inner Loop Header: Depth=2 1806; LA64D-NEXT: ll.w $a3, $a0, 0 1807; LA64D-NEXT: bne $a3, $a2, .LBB25_5 1808; LA64D-NEXT: # %bb.4: # %atomicrmw.start 1809; LA64D-NEXT: # in Loop: Header=BB25_3 Depth=2 1810; LA64D-NEXT: move $a4, $a1 1811; LA64D-NEXT: sc.w $a4, $a0, 0 1812; LA64D-NEXT: beqz $a4, .LBB25_3 1813; LA64D-NEXT: b .LBB25_6 1814; LA64D-NEXT: .LBB25_5: # %atomicrmw.start 1815; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 1816; LA64D-NEXT: dbar 20 1817; LA64D-NEXT: .LBB25_6: # %atomicrmw.start 1818; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 1819; LA64D-NEXT: movgr2fr.w $fa0, $a3 1820; LA64D-NEXT: bne $a3, $a2, .LBB25_1 1821; LA64D-NEXT: # %bb.2: # %atomicrmw.end 1822; LA64D-NEXT: ret 1823 %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4 1824 ret float %v 1825} 1826 1827define float @float_fmin_seq_cst(ptr %p) nounwind { 1828; LA64F-LABEL: float_fmin_seq_cst: 1829; LA64F: # %bb.0: 1830; LA64F-NEXT: fld.s $fa0, $a0, 0 1831; LA64F-NEXT: addi.w $a1, $zero, 1 1832; LA64F-NEXT: movgr2fr.w $fa1, $a1 1833; LA64F-NEXT: ffint.s.w $fa1, $fa1 1834; LA64F-NEXT: .p2align 4, , 16 1835; LA64F-NEXT: .LBB26_1: # %atomicrmw.start 1836; LA64F-NEXT: # =>This Loop Header: Depth=1 1837; LA64F-NEXT: # Child Loop BB26_3 Depth 2 1838; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 1839; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 1840; LA64F-NEXT: movfr2gr.s $a1, $fa2 1841; LA64F-NEXT: movfr2gr.s $a2, $fa0 1842; LA64F-NEXT: .LBB26_3: # %atomicrmw.start 1843; LA64F-NEXT: # Parent Loop BB26_1 Depth=1 1844; LA64F-NEXT: # => This Inner Loop Header: Depth=2 1845; LA64F-NEXT: ll.w $a3, $a0, 0 1846; LA64F-NEXT: bne $a3, $a2, .LBB26_5 1847; LA64F-NEXT: # %bb.4: # %atomicrmw.start 1848; LA64F-NEXT: # in Loop: Header=BB26_3 Depth=2 1849; LA64F-NEXT: move $a4, $a1 1850; LA64F-NEXT: sc.w $a4, $a0, 0 1851; LA64F-NEXT: beqz $a4, .LBB26_3 1852; LA64F-NEXT: b .LBB26_6 1853; LA64F-NEXT: .LBB26_5: # %atomicrmw.start 1854; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 1855; LA64F-NEXT: dbar 20 1856; LA64F-NEXT: .LBB26_6: # %atomicrmw.start 1857; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 1858; LA64F-NEXT: movgr2fr.w $fa0, $a3 1859; LA64F-NEXT: bne $a3, $a2, .LBB26_1 1860; LA64F-NEXT: # %bb.2: # %atomicrmw.end 1861; LA64F-NEXT: ret 1862; 1863; LA64D-LABEL: float_fmin_seq_cst: 1864; LA64D: # %bb.0: 1865; LA64D-NEXT: fld.s $fa0, $a0, 0 1866; LA64D-NEXT: vldi $vr1, -1168 1867; LA64D-NEXT: .p2align 4, , 16 1868; LA64D-NEXT: .LBB26_1: # %atomicrmw.start 1869; LA64D-NEXT: # =>This Loop Header: Depth=1 1870; LA64D-NEXT: # Child Loop BB26_3 Depth 2 1871; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 1872; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 1873; LA64D-NEXT: movfr2gr.s $a1, $fa2 1874; LA64D-NEXT: movfr2gr.s $a2, $fa0 1875; LA64D-NEXT: .LBB26_3: # %atomicrmw.start 1876; LA64D-NEXT: # Parent Loop BB26_1 Depth=1 1877; LA64D-NEXT: # => This Inner Loop Header: Depth=2 1878; LA64D-NEXT: ll.w $a3, $a0, 0 1879; LA64D-NEXT: bne $a3, $a2, .LBB26_5 1880; LA64D-NEXT: # %bb.4: # %atomicrmw.start 1881; LA64D-NEXT: # in Loop: Header=BB26_3 Depth=2 1882; LA64D-NEXT: move $a4, $a1 1883; LA64D-NEXT: sc.w $a4, $a0, 0 1884; LA64D-NEXT: beqz $a4, .LBB26_3 1885; LA64D-NEXT: b .LBB26_6 1886; LA64D-NEXT: .LBB26_5: # %atomicrmw.start 1887; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 1888; LA64D-NEXT: dbar 20 1889; LA64D-NEXT: .LBB26_6: # %atomicrmw.start 1890; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 1891; LA64D-NEXT: movgr2fr.w $fa0, $a3 1892; LA64D-NEXT: bne $a3, $a2, .LBB26_1 1893; LA64D-NEXT: # %bb.2: # %atomicrmw.end 1894; LA64D-NEXT: ret 1895 %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4 1896 ret float %v 1897} 1898 1899define float @float_fmax_seq_cst(ptr %p) nounwind { 1900; LA64F-LABEL: float_fmax_seq_cst: 1901; LA64F: # %bb.0: 1902; LA64F-NEXT: fld.s $fa0, $a0, 0 1903; LA64F-NEXT: addi.w $a1, $zero, 1 1904; LA64F-NEXT: movgr2fr.w $fa1, $a1 1905; LA64F-NEXT: ffint.s.w $fa1, $fa1 1906; LA64F-NEXT: .p2align 4, , 16 1907; LA64F-NEXT: .LBB27_1: # %atomicrmw.start 1908; LA64F-NEXT: # =>This Loop Header: Depth=1 1909; LA64F-NEXT: # Child Loop BB27_3 Depth 2 1910; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 1911; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 1912; LA64F-NEXT: movfr2gr.s $a1, $fa2 1913; LA64F-NEXT: movfr2gr.s $a2, $fa0 1914; LA64F-NEXT: .LBB27_3: # %atomicrmw.start 1915; LA64F-NEXT: # Parent Loop BB27_1 Depth=1 1916; LA64F-NEXT: # => This Inner Loop Header: Depth=2 1917; LA64F-NEXT: ll.w $a3, $a0, 0 1918; LA64F-NEXT: bne $a3, $a2, .LBB27_5 1919; LA64F-NEXT: # %bb.4: # %atomicrmw.start 1920; LA64F-NEXT: # in Loop: Header=BB27_3 Depth=2 1921; LA64F-NEXT: move $a4, $a1 1922; LA64F-NEXT: sc.w $a4, $a0, 0 1923; LA64F-NEXT: beqz $a4, .LBB27_3 1924; LA64F-NEXT: b .LBB27_6 1925; LA64F-NEXT: .LBB27_5: # %atomicrmw.start 1926; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 1927; LA64F-NEXT: dbar 20 1928; LA64F-NEXT: .LBB27_6: # %atomicrmw.start 1929; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 1930; LA64F-NEXT: movgr2fr.w $fa0, $a3 1931; LA64F-NEXT: bne $a3, $a2, .LBB27_1 1932; LA64F-NEXT: # %bb.2: # %atomicrmw.end 1933; LA64F-NEXT: ret 1934; 1935; LA64D-LABEL: float_fmax_seq_cst: 1936; LA64D: # %bb.0: 1937; LA64D-NEXT: fld.s $fa0, $a0, 0 1938; LA64D-NEXT: vldi $vr1, -1168 1939; LA64D-NEXT: .p2align 4, , 16 1940; LA64D-NEXT: .LBB27_1: # %atomicrmw.start 1941; LA64D-NEXT: # =>This Loop Header: Depth=1 1942; LA64D-NEXT: # Child Loop BB27_3 Depth 2 1943; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 1944; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 1945; LA64D-NEXT: movfr2gr.s $a1, $fa2 1946; LA64D-NEXT: movfr2gr.s $a2, $fa0 1947; LA64D-NEXT: .LBB27_3: # %atomicrmw.start 1948; LA64D-NEXT: # Parent Loop BB27_1 Depth=1 1949; LA64D-NEXT: # => This Inner Loop Header: Depth=2 1950; LA64D-NEXT: ll.w $a3, $a0, 0 1951; LA64D-NEXT: bne $a3, $a2, .LBB27_5 1952; LA64D-NEXT: # %bb.4: # %atomicrmw.start 1953; LA64D-NEXT: # in Loop: Header=BB27_3 Depth=2 1954; LA64D-NEXT: move $a4, $a1 1955; LA64D-NEXT: sc.w $a4, $a0, 0 1956; LA64D-NEXT: beqz $a4, .LBB27_3 1957; LA64D-NEXT: b .LBB27_6 1958; LA64D-NEXT: .LBB27_5: # %atomicrmw.start 1959; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 1960; LA64D-NEXT: dbar 20 1961; LA64D-NEXT: .LBB27_6: # %atomicrmw.start 1962; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 1963; LA64D-NEXT: movgr2fr.w $fa0, $a3 1964; LA64D-NEXT: bne $a3, $a2, .LBB27_1 1965; LA64D-NEXT: # %bb.2: # %atomicrmw.end 1966; LA64D-NEXT: ret 1967 %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4 1968 ret float %v 1969} 1970 1971define double @double_fadd_seq_cst(ptr %p) nounwind { 1972; LA64F-LABEL: double_fadd_seq_cst: 1973; LA64F: # %bb.0: 1974; LA64F-NEXT: addi.d $sp, $sp, -48 1975; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 1976; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 1977; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 1978; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 1979; LA64F-NEXT: move $fp, $a0 1980; LA64F-NEXT: ld.d $s1, $a0, 0 1981; LA64F-NEXT: lu52i.d $s0, $zero, 1023 1982; LA64F-NEXT: .p2align 4, , 16 1983; LA64F-NEXT: .LBB28_1: # %atomicrmw.start 1984; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 1985; LA64F-NEXT: move $a0, $s1 1986; LA64F-NEXT: move $a1, $s0 1987; LA64F-NEXT: bl %plt(__adddf3) 1988; LA64F-NEXT: st.d $s1, $sp, 8 1989; LA64F-NEXT: st.d $a0, $sp, 0 1990; LA64F-NEXT: ori $a0, $zero, 8 1991; LA64F-NEXT: addi.d $a2, $sp, 8 1992; LA64F-NEXT: addi.d $a3, $sp, 0 1993; LA64F-NEXT: ori $a4, $zero, 5 1994; LA64F-NEXT: ori $a5, $zero, 5 1995; LA64F-NEXT: move $a1, $fp 1996; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 1997; LA64F-NEXT: ld.d $s1, $sp, 8 1998; LA64F-NEXT: beqz $a0, .LBB28_1 1999; LA64F-NEXT: # %bb.2: # %atomicrmw.end 2000; LA64F-NEXT: move $a0, $s1 2001; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 2002; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 2003; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 2004; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 2005; LA64F-NEXT: addi.d $sp, $sp, 48 2006; LA64F-NEXT: ret 2007; 2008; LA64D-LABEL: double_fadd_seq_cst: 2009; LA64D: # %bb.0: 2010; LA64D-NEXT: addi.d $sp, $sp, -32 2011; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 2012; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 2013; LA64D-NEXT: move $fp, $a0 2014; LA64D-NEXT: fld.d $fa0, $a0, 0 2015; LA64D-NEXT: .p2align 4, , 16 2016; LA64D-NEXT: .LBB28_1: # %atomicrmw.start 2017; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 2018; LA64D-NEXT: vldi $vr1, -912 2019; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 2020; LA64D-NEXT: fst.d $fa0, $sp, 8 2021; LA64D-NEXT: fst.d $fa1, $sp, 0 2022; LA64D-NEXT: ori $a0, $zero, 8 2023; LA64D-NEXT: addi.d $a2, $sp, 8 2024; LA64D-NEXT: addi.d $a3, $sp, 0 2025; LA64D-NEXT: ori $a4, $zero, 5 2026; LA64D-NEXT: ori $a5, $zero, 5 2027; LA64D-NEXT: move $a1, $fp 2028; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 2029; LA64D-NEXT: fld.d $fa0, $sp, 8 2030; LA64D-NEXT: beqz $a0, .LBB28_1 2031; LA64D-NEXT: # %bb.2: # %atomicrmw.end 2032; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 2033; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 2034; LA64D-NEXT: addi.d $sp, $sp, 32 2035; LA64D-NEXT: ret 2036 %v = atomicrmw fadd ptr %p, double 1.0 seq_cst, align 4 2037 ret double %v 2038} 2039 2040define double @double_fsub_seq_cst(ptr %p) nounwind { 2041; LA64F-LABEL: double_fsub_seq_cst: 2042; LA64F: # %bb.0: 2043; LA64F-NEXT: addi.d $sp, $sp, -48 2044; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 2045; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 2046; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 2047; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 2048; LA64F-NEXT: move $fp, $a0 2049; LA64F-NEXT: ld.d $s1, $a0, 0 2050; LA64F-NEXT: lu52i.d $s0, $zero, -1025 2051; LA64F-NEXT: .p2align 4, , 16 2052; LA64F-NEXT: .LBB29_1: # %atomicrmw.start 2053; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 2054; LA64F-NEXT: move $a0, $s1 2055; LA64F-NEXT: move $a1, $s0 2056; LA64F-NEXT: bl %plt(__adddf3) 2057; LA64F-NEXT: st.d $s1, $sp, 8 2058; LA64F-NEXT: st.d $a0, $sp, 0 2059; LA64F-NEXT: ori $a0, $zero, 8 2060; LA64F-NEXT: addi.d $a2, $sp, 8 2061; LA64F-NEXT: addi.d $a3, $sp, 0 2062; LA64F-NEXT: ori $a4, $zero, 5 2063; LA64F-NEXT: ori $a5, $zero, 5 2064; LA64F-NEXT: move $a1, $fp 2065; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 2066; LA64F-NEXT: ld.d $s1, $sp, 8 2067; LA64F-NEXT: beqz $a0, .LBB29_1 2068; LA64F-NEXT: # %bb.2: # %atomicrmw.end 2069; LA64F-NEXT: move $a0, $s1 2070; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 2071; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 2072; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 2073; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 2074; LA64F-NEXT: addi.d $sp, $sp, 48 2075; LA64F-NEXT: ret 2076; 2077; LA64D-LABEL: double_fsub_seq_cst: 2078; LA64D: # %bb.0: 2079; LA64D-NEXT: addi.d $sp, $sp, -32 2080; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 2081; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 2082; LA64D-NEXT: move $fp, $a0 2083; LA64D-NEXT: fld.d $fa0, $a0, 0 2084; LA64D-NEXT: .p2align 4, , 16 2085; LA64D-NEXT: .LBB29_1: # %atomicrmw.start 2086; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 2087; LA64D-NEXT: vldi $vr1, -784 2088; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 2089; LA64D-NEXT: fst.d $fa0, $sp, 8 2090; LA64D-NEXT: fst.d $fa1, $sp, 0 2091; LA64D-NEXT: ori $a0, $zero, 8 2092; LA64D-NEXT: addi.d $a2, $sp, 8 2093; LA64D-NEXT: addi.d $a3, $sp, 0 2094; LA64D-NEXT: ori $a4, $zero, 5 2095; LA64D-NEXT: ori $a5, $zero, 5 2096; LA64D-NEXT: move $a1, $fp 2097; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 2098; LA64D-NEXT: fld.d $fa0, $sp, 8 2099; LA64D-NEXT: beqz $a0, .LBB29_1 2100; LA64D-NEXT: # %bb.2: # %atomicrmw.end 2101; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 2102; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 2103; LA64D-NEXT: addi.d $sp, $sp, 32 2104; LA64D-NEXT: ret 2105 %v = atomicrmw fsub ptr %p, double 1.0 seq_cst, align 4 2106 ret double %v 2107} 2108 2109define double @double_fmin_seq_cst(ptr %p) nounwind { 2110; LA64F-LABEL: double_fmin_seq_cst: 2111; LA64F: # %bb.0: 2112; LA64F-NEXT: addi.d $sp, $sp, -48 2113; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 2114; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 2115; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 2116; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 2117; LA64F-NEXT: move $fp, $a0 2118; LA64F-NEXT: ld.d $s1, $a0, 0 2119; LA64F-NEXT: lu52i.d $s0, $zero, 1023 2120; LA64F-NEXT: .p2align 4, , 16 2121; LA64F-NEXT: .LBB30_1: # %atomicrmw.start 2122; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 2123; LA64F-NEXT: move $a0, $s1 2124; LA64F-NEXT: move $a1, $s0 2125; LA64F-NEXT: bl %plt(fmin) 2126; LA64F-NEXT: st.d $s1, $sp, 8 2127; LA64F-NEXT: st.d $a0, $sp, 0 2128; LA64F-NEXT: ori $a0, $zero, 8 2129; LA64F-NEXT: addi.d $a2, $sp, 8 2130; LA64F-NEXT: addi.d $a3, $sp, 0 2131; LA64F-NEXT: ori $a4, $zero, 5 2132; LA64F-NEXT: ori $a5, $zero, 5 2133; LA64F-NEXT: move $a1, $fp 2134; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 2135; LA64F-NEXT: ld.d $s1, $sp, 8 2136; LA64F-NEXT: beqz $a0, .LBB30_1 2137; LA64F-NEXT: # %bb.2: # %atomicrmw.end 2138; LA64F-NEXT: move $a0, $s1 2139; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 2140; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 2141; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 2142; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 2143; LA64F-NEXT: addi.d $sp, $sp, 48 2144; LA64F-NEXT: ret 2145; 2146; LA64D-LABEL: double_fmin_seq_cst: 2147; LA64D: # %bb.0: 2148; LA64D-NEXT: addi.d $sp, $sp, -32 2149; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 2150; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 2151; LA64D-NEXT: move $fp, $a0 2152; LA64D-NEXT: fld.d $fa0, $a0, 0 2153; LA64D-NEXT: .p2align 4, , 16 2154; LA64D-NEXT: .LBB30_1: # %atomicrmw.start 2155; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 2156; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 2157; LA64D-NEXT: vldi $vr2, -912 2158; LA64D-NEXT: fmin.d $fa1, $fa1, $fa2 2159; LA64D-NEXT: fst.d $fa0, $sp, 8 2160; LA64D-NEXT: fst.d $fa1, $sp, 0 2161; LA64D-NEXT: ori $a0, $zero, 8 2162; LA64D-NEXT: addi.d $a2, $sp, 8 2163; LA64D-NEXT: addi.d $a3, $sp, 0 2164; LA64D-NEXT: ori $a4, $zero, 5 2165; LA64D-NEXT: ori $a5, $zero, 5 2166; LA64D-NEXT: move $a1, $fp 2167; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 2168; LA64D-NEXT: fld.d $fa0, $sp, 8 2169; LA64D-NEXT: beqz $a0, .LBB30_1 2170; LA64D-NEXT: # %bb.2: # %atomicrmw.end 2171; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 2172; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 2173; LA64D-NEXT: addi.d $sp, $sp, 32 2174; LA64D-NEXT: ret 2175 %v = atomicrmw fmin ptr %p, double 1.0 seq_cst, align 4 2176 ret double %v 2177} 2178 2179define double @double_fmax_seq_cst(ptr %p) nounwind { 2180; LA64F-LABEL: double_fmax_seq_cst: 2181; LA64F: # %bb.0: 2182; LA64F-NEXT: addi.d $sp, $sp, -48 2183; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 2184; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 2185; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 2186; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 2187; LA64F-NEXT: move $fp, $a0 2188; LA64F-NEXT: ld.d $s1, $a0, 0 2189; LA64F-NEXT: lu52i.d $s0, $zero, 1023 2190; LA64F-NEXT: .p2align 4, , 16 2191; LA64F-NEXT: .LBB31_1: # %atomicrmw.start 2192; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 2193; LA64F-NEXT: move $a0, $s1 2194; LA64F-NEXT: move $a1, $s0 2195; LA64F-NEXT: bl %plt(fmax) 2196; LA64F-NEXT: st.d $s1, $sp, 8 2197; LA64F-NEXT: st.d $a0, $sp, 0 2198; LA64F-NEXT: ori $a0, $zero, 8 2199; LA64F-NEXT: addi.d $a2, $sp, 8 2200; LA64F-NEXT: addi.d $a3, $sp, 0 2201; LA64F-NEXT: ori $a4, $zero, 5 2202; LA64F-NEXT: ori $a5, $zero, 5 2203; LA64F-NEXT: move $a1, $fp 2204; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 2205; LA64F-NEXT: ld.d $s1, $sp, 8 2206; LA64F-NEXT: beqz $a0, .LBB31_1 2207; LA64F-NEXT: # %bb.2: # %atomicrmw.end 2208; LA64F-NEXT: move $a0, $s1 2209; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 2210; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 2211; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 2212; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 2213; LA64F-NEXT: addi.d $sp, $sp, 48 2214; LA64F-NEXT: ret 2215; 2216; LA64D-LABEL: double_fmax_seq_cst: 2217; LA64D: # %bb.0: 2218; LA64D-NEXT: addi.d $sp, $sp, -32 2219; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 2220; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 2221; LA64D-NEXT: move $fp, $a0 2222; LA64D-NEXT: fld.d $fa0, $a0, 0 2223; LA64D-NEXT: .p2align 4, , 16 2224; LA64D-NEXT: .LBB31_1: # %atomicrmw.start 2225; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 2226; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 2227; LA64D-NEXT: vldi $vr2, -912 2228; LA64D-NEXT: fmax.d $fa1, $fa1, $fa2 2229; LA64D-NEXT: fst.d $fa0, $sp, 8 2230; LA64D-NEXT: fst.d $fa1, $sp, 0 2231; LA64D-NEXT: ori $a0, $zero, 8 2232; LA64D-NEXT: addi.d $a2, $sp, 8 2233; LA64D-NEXT: addi.d $a3, $sp, 0 2234; LA64D-NEXT: ori $a4, $zero, 5 2235; LA64D-NEXT: ori $a5, $zero, 5 2236; LA64D-NEXT: move $a1, $fp 2237; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 2238; LA64D-NEXT: fld.d $fa0, $sp, 8 2239; LA64D-NEXT: beqz $a0, .LBB31_1 2240; LA64D-NEXT: # %bb.2: # %atomicrmw.end 2241; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 2242; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 2243; LA64D-NEXT: addi.d $sp, $sp, 32 2244; LA64D-NEXT: ret 2245 %v = atomicrmw fmax ptr %p, double 1.0 seq_cst, align 4 2246 ret double %v 2247} 2248 2249define float @float_fadd_monotonic(ptr %p) nounwind { 2250; LA64F-LABEL: float_fadd_monotonic: 2251; LA64F: # %bb.0: 2252; LA64F-NEXT: fld.s $fa0, $a0, 0 2253; LA64F-NEXT: addi.w $a1, $zero, 1 2254; LA64F-NEXT: movgr2fr.w $fa1, $a1 2255; LA64F-NEXT: ffint.s.w $fa1, $fa1 2256; LA64F-NEXT: .p2align 4, , 16 2257; LA64F-NEXT: .LBB32_1: # %atomicrmw.start 2258; LA64F-NEXT: # =>This Loop Header: Depth=1 2259; LA64F-NEXT: # Child Loop BB32_3 Depth 2 2260; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 2261; LA64F-NEXT: movfr2gr.s $a1, $fa2 2262; LA64F-NEXT: movfr2gr.s $a2, $fa0 2263; LA64F-NEXT: .LBB32_3: # %atomicrmw.start 2264; LA64F-NEXT: # Parent Loop BB32_1 Depth=1 2265; LA64F-NEXT: # => This Inner Loop Header: Depth=2 2266; LA64F-NEXT: ll.w $a3, $a0, 0 2267; LA64F-NEXT: bne $a3, $a2, .LBB32_5 2268; LA64F-NEXT: # %bb.4: # %atomicrmw.start 2269; LA64F-NEXT: # in Loop: Header=BB32_3 Depth=2 2270; LA64F-NEXT: move $a4, $a1 2271; LA64F-NEXT: sc.w $a4, $a0, 0 2272; LA64F-NEXT: beqz $a4, .LBB32_3 2273; LA64F-NEXT: b .LBB32_6 2274; LA64F-NEXT: .LBB32_5: # %atomicrmw.start 2275; LA64F-NEXT: # in Loop: Header=BB32_1 Depth=1 2276; LA64F-NEXT: dbar 1792 2277; LA64F-NEXT: .LBB32_6: # %atomicrmw.start 2278; LA64F-NEXT: # in Loop: Header=BB32_1 Depth=1 2279; LA64F-NEXT: movgr2fr.w $fa0, $a3 2280; LA64F-NEXT: bne $a3, $a2, .LBB32_1 2281; LA64F-NEXT: # %bb.2: # %atomicrmw.end 2282; LA64F-NEXT: ret 2283; 2284; LA64D-LABEL: float_fadd_monotonic: 2285; LA64D: # %bb.0: 2286; LA64D-NEXT: fld.s $fa0, $a0, 0 2287; LA64D-NEXT: vldi $vr1, -1168 2288; LA64D-NEXT: .p2align 4, , 16 2289; LA64D-NEXT: .LBB32_1: # %atomicrmw.start 2290; LA64D-NEXT: # =>This Loop Header: Depth=1 2291; LA64D-NEXT: # Child Loop BB32_3 Depth 2 2292; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 2293; LA64D-NEXT: movfr2gr.s $a1, $fa2 2294; LA64D-NEXT: movfr2gr.s $a2, $fa0 2295; LA64D-NEXT: .LBB32_3: # %atomicrmw.start 2296; LA64D-NEXT: # Parent Loop BB32_1 Depth=1 2297; LA64D-NEXT: # => This Inner Loop Header: Depth=2 2298; LA64D-NEXT: ll.w $a3, $a0, 0 2299; LA64D-NEXT: bne $a3, $a2, .LBB32_5 2300; LA64D-NEXT: # %bb.4: # %atomicrmw.start 2301; LA64D-NEXT: # in Loop: Header=BB32_3 Depth=2 2302; LA64D-NEXT: move $a4, $a1 2303; LA64D-NEXT: sc.w $a4, $a0, 0 2304; LA64D-NEXT: beqz $a4, .LBB32_3 2305; LA64D-NEXT: b .LBB32_6 2306; LA64D-NEXT: .LBB32_5: # %atomicrmw.start 2307; LA64D-NEXT: # in Loop: Header=BB32_1 Depth=1 2308; LA64D-NEXT: dbar 1792 2309; LA64D-NEXT: .LBB32_6: # %atomicrmw.start 2310; LA64D-NEXT: # in Loop: Header=BB32_1 Depth=1 2311; LA64D-NEXT: movgr2fr.w $fa0, $a3 2312; LA64D-NEXT: bne $a3, $a2, .LBB32_1 2313; LA64D-NEXT: # %bb.2: # %atomicrmw.end 2314; LA64D-NEXT: ret 2315 %v = atomicrmw fadd ptr %p, float 1.0 monotonic, align 4 2316 ret float %v 2317} 2318 2319define float @float_fsub_monotonic(ptr %p) nounwind { 2320; LA64F-LABEL: float_fsub_monotonic: 2321; LA64F: # %bb.0: 2322; LA64F-NEXT: fld.s $fa0, $a0, 0 2323; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI33_0) 2324; LA64F-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI33_0) 2325; LA64F-NEXT: .p2align 4, , 16 2326; LA64F-NEXT: .LBB33_1: # %atomicrmw.start 2327; LA64F-NEXT: # =>This Loop Header: Depth=1 2328; LA64F-NEXT: # Child Loop BB33_3 Depth 2 2329; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 2330; LA64F-NEXT: movfr2gr.s $a1, $fa2 2331; LA64F-NEXT: movfr2gr.s $a2, $fa0 2332; LA64F-NEXT: .LBB33_3: # %atomicrmw.start 2333; LA64F-NEXT: # Parent Loop BB33_1 Depth=1 2334; LA64F-NEXT: # => This Inner Loop Header: Depth=2 2335; LA64F-NEXT: ll.w $a3, $a0, 0 2336; LA64F-NEXT: bne $a3, $a2, .LBB33_5 2337; LA64F-NEXT: # %bb.4: # %atomicrmw.start 2338; LA64F-NEXT: # in Loop: Header=BB33_3 Depth=2 2339; LA64F-NEXT: move $a4, $a1 2340; LA64F-NEXT: sc.w $a4, $a0, 0 2341; LA64F-NEXT: beqz $a4, .LBB33_3 2342; LA64F-NEXT: b .LBB33_6 2343; LA64F-NEXT: .LBB33_5: # %atomicrmw.start 2344; LA64F-NEXT: # in Loop: Header=BB33_1 Depth=1 2345; LA64F-NEXT: dbar 1792 2346; LA64F-NEXT: .LBB33_6: # %atomicrmw.start 2347; LA64F-NEXT: # in Loop: Header=BB33_1 Depth=1 2348; LA64F-NEXT: movgr2fr.w $fa0, $a3 2349; LA64F-NEXT: bne $a3, $a2, .LBB33_1 2350; LA64F-NEXT: # %bb.2: # %atomicrmw.end 2351; LA64F-NEXT: ret 2352; 2353; LA64D-LABEL: float_fsub_monotonic: 2354; LA64D: # %bb.0: 2355; LA64D-NEXT: fld.s $fa0, $a0, 0 2356; LA64D-NEXT: vldi $vr1, -1040 2357; LA64D-NEXT: .p2align 4, , 16 2358; LA64D-NEXT: .LBB33_1: # %atomicrmw.start 2359; LA64D-NEXT: # =>This Loop Header: Depth=1 2360; LA64D-NEXT: # Child Loop BB33_3 Depth 2 2361; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 2362; LA64D-NEXT: movfr2gr.s $a1, $fa2 2363; LA64D-NEXT: movfr2gr.s $a2, $fa0 2364; LA64D-NEXT: .LBB33_3: # %atomicrmw.start 2365; LA64D-NEXT: # Parent Loop BB33_1 Depth=1 2366; LA64D-NEXT: # => This Inner Loop Header: Depth=2 2367; LA64D-NEXT: ll.w $a3, $a0, 0 2368; LA64D-NEXT: bne $a3, $a2, .LBB33_5 2369; LA64D-NEXT: # %bb.4: # %atomicrmw.start 2370; LA64D-NEXT: # in Loop: Header=BB33_3 Depth=2 2371; LA64D-NEXT: move $a4, $a1 2372; LA64D-NEXT: sc.w $a4, $a0, 0 2373; LA64D-NEXT: beqz $a4, .LBB33_3 2374; LA64D-NEXT: b .LBB33_6 2375; LA64D-NEXT: .LBB33_5: # %atomicrmw.start 2376; LA64D-NEXT: # in Loop: Header=BB33_1 Depth=1 2377; LA64D-NEXT: dbar 1792 2378; LA64D-NEXT: .LBB33_6: # %atomicrmw.start 2379; LA64D-NEXT: # in Loop: Header=BB33_1 Depth=1 2380; LA64D-NEXT: movgr2fr.w $fa0, $a3 2381; LA64D-NEXT: bne $a3, $a2, .LBB33_1 2382; LA64D-NEXT: # %bb.2: # %atomicrmw.end 2383; LA64D-NEXT: ret 2384 %v = atomicrmw fsub ptr %p, float 1.0 monotonic, align 4 2385 ret float %v 2386} 2387 2388define float @float_fmin_monotonic(ptr %p) nounwind { 2389; LA64F-LABEL: float_fmin_monotonic: 2390; LA64F: # %bb.0: 2391; LA64F-NEXT: fld.s $fa0, $a0, 0 2392; LA64F-NEXT: addi.w $a1, $zero, 1 2393; LA64F-NEXT: movgr2fr.w $fa1, $a1 2394; LA64F-NEXT: ffint.s.w $fa1, $fa1 2395; LA64F-NEXT: .p2align 4, , 16 2396; LA64F-NEXT: .LBB34_1: # %atomicrmw.start 2397; LA64F-NEXT: # =>This Loop Header: Depth=1 2398; LA64F-NEXT: # Child Loop BB34_3 Depth 2 2399; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 2400; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 2401; LA64F-NEXT: movfr2gr.s $a1, $fa2 2402; LA64F-NEXT: movfr2gr.s $a2, $fa0 2403; LA64F-NEXT: .LBB34_3: # %atomicrmw.start 2404; LA64F-NEXT: # Parent Loop BB34_1 Depth=1 2405; LA64F-NEXT: # => This Inner Loop Header: Depth=2 2406; LA64F-NEXT: ll.w $a3, $a0, 0 2407; LA64F-NEXT: bne $a3, $a2, .LBB34_5 2408; LA64F-NEXT: # %bb.4: # %atomicrmw.start 2409; LA64F-NEXT: # in Loop: Header=BB34_3 Depth=2 2410; LA64F-NEXT: move $a4, $a1 2411; LA64F-NEXT: sc.w $a4, $a0, 0 2412; LA64F-NEXT: beqz $a4, .LBB34_3 2413; LA64F-NEXT: b .LBB34_6 2414; LA64F-NEXT: .LBB34_5: # %atomicrmw.start 2415; LA64F-NEXT: # in Loop: Header=BB34_1 Depth=1 2416; LA64F-NEXT: dbar 1792 2417; LA64F-NEXT: .LBB34_6: # %atomicrmw.start 2418; LA64F-NEXT: # in Loop: Header=BB34_1 Depth=1 2419; LA64F-NEXT: movgr2fr.w $fa0, $a3 2420; LA64F-NEXT: bne $a3, $a2, .LBB34_1 2421; LA64F-NEXT: # %bb.2: # %atomicrmw.end 2422; LA64F-NEXT: ret 2423; 2424; LA64D-LABEL: float_fmin_monotonic: 2425; LA64D: # %bb.0: 2426; LA64D-NEXT: fld.s $fa0, $a0, 0 2427; LA64D-NEXT: vldi $vr1, -1168 2428; LA64D-NEXT: .p2align 4, , 16 2429; LA64D-NEXT: .LBB34_1: # %atomicrmw.start 2430; LA64D-NEXT: # =>This Loop Header: Depth=1 2431; LA64D-NEXT: # Child Loop BB34_3 Depth 2 2432; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 2433; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 2434; LA64D-NEXT: movfr2gr.s $a1, $fa2 2435; LA64D-NEXT: movfr2gr.s $a2, $fa0 2436; LA64D-NEXT: .LBB34_3: # %atomicrmw.start 2437; LA64D-NEXT: # Parent Loop BB34_1 Depth=1 2438; LA64D-NEXT: # => This Inner Loop Header: Depth=2 2439; LA64D-NEXT: ll.w $a3, $a0, 0 2440; LA64D-NEXT: bne $a3, $a2, .LBB34_5 2441; LA64D-NEXT: # %bb.4: # %atomicrmw.start 2442; LA64D-NEXT: # in Loop: Header=BB34_3 Depth=2 2443; LA64D-NEXT: move $a4, $a1 2444; LA64D-NEXT: sc.w $a4, $a0, 0 2445; LA64D-NEXT: beqz $a4, .LBB34_3 2446; LA64D-NEXT: b .LBB34_6 2447; LA64D-NEXT: .LBB34_5: # %atomicrmw.start 2448; LA64D-NEXT: # in Loop: Header=BB34_1 Depth=1 2449; LA64D-NEXT: dbar 1792 2450; LA64D-NEXT: .LBB34_6: # %atomicrmw.start 2451; LA64D-NEXT: # in Loop: Header=BB34_1 Depth=1 2452; LA64D-NEXT: movgr2fr.w $fa0, $a3 2453; LA64D-NEXT: bne $a3, $a2, .LBB34_1 2454; LA64D-NEXT: # %bb.2: # %atomicrmw.end 2455; LA64D-NEXT: ret 2456 %v = atomicrmw fmin ptr %p, float 1.0 monotonic, align 4 2457 ret float %v 2458} 2459 2460define float @float_fmax_monotonic(ptr %p) nounwind { 2461; LA64F-LABEL: float_fmax_monotonic: 2462; LA64F: # %bb.0: 2463; LA64F-NEXT: fld.s $fa0, $a0, 0 2464; LA64F-NEXT: addi.w $a1, $zero, 1 2465; LA64F-NEXT: movgr2fr.w $fa1, $a1 2466; LA64F-NEXT: ffint.s.w $fa1, $fa1 2467; LA64F-NEXT: .p2align 4, , 16 2468; LA64F-NEXT: .LBB35_1: # %atomicrmw.start 2469; LA64F-NEXT: # =>This Loop Header: Depth=1 2470; LA64F-NEXT: # Child Loop BB35_3 Depth 2 2471; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 2472; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 2473; LA64F-NEXT: movfr2gr.s $a1, $fa2 2474; LA64F-NEXT: movfr2gr.s $a2, $fa0 2475; LA64F-NEXT: .LBB35_3: # %atomicrmw.start 2476; LA64F-NEXT: # Parent Loop BB35_1 Depth=1 2477; LA64F-NEXT: # => This Inner Loop Header: Depth=2 2478; LA64F-NEXT: ll.w $a3, $a0, 0 2479; LA64F-NEXT: bne $a3, $a2, .LBB35_5 2480; LA64F-NEXT: # %bb.4: # %atomicrmw.start 2481; LA64F-NEXT: # in Loop: Header=BB35_3 Depth=2 2482; LA64F-NEXT: move $a4, $a1 2483; LA64F-NEXT: sc.w $a4, $a0, 0 2484; LA64F-NEXT: beqz $a4, .LBB35_3 2485; LA64F-NEXT: b .LBB35_6 2486; LA64F-NEXT: .LBB35_5: # %atomicrmw.start 2487; LA64F-NEXT: # in Loop: Header=BB35_1 Depth=1 2488; LA64F-NEXT: dbar 1792 2489; LA64F-NEXT: .LBB35_6: # %atomicrmw.start 2490; LA64F-NEXT: # in Loop: Header=BB35_1 Depth=1 2491; LA64F-NEXT: movgr2fr.w $fa0, $a3 2492; LA64F-NEXT: bne $a3, $a2, .LBB35_1 2493; LA64F-NEXT: # %bb.2: # %atomicrmw.end 2494; LA64F-NEXT: ret 2495; 2496; LA64D-LABEL: float_fmax_monotonic: 2497; LA64D: # %bb.0: 2498; LA64D-NEXT: fld.s $fa0, $a0, 0 2499; LA64D-NEXT: vldi $vr1, -1168 2500; LA64D-NEXT: .p2align 4, , 16 2501; LA64D-NEXT: .LBB35_1: # %atomicrmw.start 2502; LA64D-NEXT: # =>This Loop Header: Depth=1 2503; LA64D-NEXT: # Child Loop BB35_3 Depth 2 2504; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 2505; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 2506; LA64D-NEXT: movfr2gr.s $a1, $fa2 2507; LA64D-NEXT: movfr2gr.s $a2, $fa0 2508; LA64D-NEXT: .LBB35_3: # %atomicrmw.start 2509; LA64D-NEXT: # Parent Loop BB35_1 Depth=1 2510; LA64D-NEXT: # => This Inner Loop Header: Depth=2 2511; LA64D-NEXT: ll.w $a3, $a0, 0 2512; LA64D-NEXT: bne $a3, $a2, .LBB35_5 2513; LA64D-NEXT: # %bb.4: # %atomicrmw.start 2514; LA64D-NEXT: # in Loop: Header=BB35_3 Depth=2 2515; LA64D-NEXT: move $a4, $a1 2516; LA64D-NEXT: sc.w $a4, $a0, 0 2517; LA64D-NEXT: beqz $a4, .LBB35_3 2518; LA64D-NEXT: b .LBB35_6 2519; LA64D-NEXT: .LBB35_5: # %atomicrmw.start 2520; LA64D-NEXT: # in Loop: Header=BB35_1 Depth=1 2521; LA64D-NEXT: dbar 1792 2522; LA64D-NEXT: .LBB35_6: # %atomicrmw.start 2523; LA64D-NEXT: # in Loop: Header=BB35_1 Depth=1 2524; LA64D-NEXT: movgr2fr.w $fa0, $a3 2525; LA64D-NEXT: bne $a3, $a2, .LBB35_1 2526; LA64D-NEXT: # %bb.2: # %atomicrmw.end 2527; LA64D-NEXT: ret 2528 %v = atomicrmw fmax ptr %p, float 1.0 monotonic, align 4 2529 ret float %v 2530} 2531 2532define double @double_fadd_monotonic(ptr %p) nounwind { 2533; LA64F-LABEL: double_fadd_monotonic: 2534; LA64F: # %bb.0: 2535; LA64F-NEXT: addi.d $sp, $sp, -48 2536; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 2537; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 2538; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 2539; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 2540; LA64F-NEXT: move $fp, $a0 2541; LA64F-NEXT: ld.d $s1, $a0, 0 2542; LA64F-NEXT: lu52i.d $s0, $zero, 1023 2543; LA64F-NEXT: .p2align 4, , 16 2544; LA64F-NEXT: .LBB36_1: # %atomicrmw.start 2545; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 2546; LA64F-NEXT: move $a0, $s1 2547; LA64F-NEXT: move $a1, $s0 2548; LA64F-NEXT: bl %plt(__adddf3) 2549; LA64F-NEXT: st.d $s1, $sp, 8 2550; LA64F-NEXT: st.d $a0, $sp, 0 2551; LA64F-NEXT: ori $a0, $zero, 8 2552; LA64F-NEXT: addi.d $a2, $sp, 8 2553; LA64F-NEXT: addi.d $a3, $sp, 0 2554; LA64F-NEXT: move $a1, $fp 2555; LA64F-NEXT: move $a4, $zero 2556; LA64F-NEXT: move $a5, $zero 2557; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 2558; LA64F-NEXT: ld.d $s1, $sp, 8 2559; LA64F-NEXT: beqz $a0, .LBB36_1 2560; LA64F-NEXT: # %bb.2: # %atomicrmw.end 2561; LA64F-NEXT: move $a0, $s1 2562; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 2563; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 2564; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 2565; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 2566; LA64F-NEXT: addi.d $sp, $sp, 48 2567; LA64F-NEXT: ret 2568; 2569; LA64D-LABEL: double_fadd_monotonic: 2570; LA64D: # %bb.0: 2571; LA64D-NEXT: addi.d $sp, $sp, -32 2572; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 2573; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 2574; LA64D-NEXT: move $fp, $a0 2575; LA64D-NEXT: fld.d $fa0, $a0, 0 2576; LA64D-NEXT: .p2align 4, , 16 2577; LA64D-NEXT: .LBB36_1: # %atomicrmw.start 2578; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 2579; LA64D-NEXT: vldi $vr1, -912 2580; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 2581; LA64D-NEXT: fst.d $fa0, $sp, 8 2582; LA64D-NEXT: fst.d $fa1, $sp, 0 2583; LA64D-NEXT: ori $a0, $zero, 8 2584; LA64D-NEXT: addi.d $a2, $sp, 8 2585; LA64D-NEXT: addi.d $a3, $sp, 0 2586; LA64D-NEXT: move $a1, $fp 2587; LA64D-NEXT: move $a4, $zero 2588; LA64D-NEXT: move $a5, $zero 2589; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 2590; LA64D-NEXT: fld.d $fa0, $sp, 8 2591; LA64D-NEXT: beqz $a0, .LBB36_1 2592; LA64D-NEXT: # %bb.2: # %atomicrmw.end 2593; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 2594; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 2595; LA64D-NEXT: addi.d $sp, $sp, 32 2596; LA64D-NEXT: ret 2597 %v = atomicrmw fadd ptr %p, double 1.0 monotonic, align 4 2598 ret double %v 2599} 2600 2601define double @double_fsub_monotonic(ptr %p) nounwind { 2602; LA64F-LABEL: double_fsub_monotonic: 2603; LA64F: # %bb.0: 2604; LA64F-NEXT: addi.d $sp, $sp, -48 2605; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 2606; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 2607; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 2608; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 2609; LA64F-NEXT: move $fp, $a0 2610; LA64F-NEXT: ld.d $s1, $a0, 0 2611; LA64F-NEXT: lu52i.d $s0, $zero, -1025 2612; LA64F-NEXT: .p2align 4, , 16 2613; LA64F-NEXT: .LBB37_1: # %atomicrmw.start 2614; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 2615; LA64F-NEXT: move $a0, $s1 2616; LA64F-NEXT: move $a1, $s0 2617; LA64F-NEXT: bl %plt(__adddf3) 2618; LA64F-NEXT: st.d $s1, $sp, 8 2619; LA64F-NEXT: st.d $a0, $sp, 0 2620; LA64F-NEXT: ori $a0, $zero, 8 2621; LA64F-NEXT: addi.d $a2, $sp, 8 2622; LA64F-NEXT: addi.d $a3, $sp, 0 2623; LA64F-NEXT: move $a1, $fp 2624; LA64F-NEXT: move $a4, $zero 2625; LA64F-NEXT: move $a5, $zero 2626; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 2627; LA64F-NEXT: ld.d $s1, $sp, 8 2628; LA64F-NEXT: beqz $a0, .LBB37_1 2629; LA64F-NEXT: # %bb.2: # %atomicrmw.end 2630; LA64F-NEXT: move $a0, $s1 2631; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 2632; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 2633; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 2634; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 2635; LA64F-NEXT: addi.d $sp, $sp, 48 2636; LA64F-NEXT: ret 2637; 2638; LA64D-LABEL: double_fsub_monotonic: 2639; LA64D: # %bb.0: 2640; LA64D-NEXT: addi.d $sp, $sp, -32 2641; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 2642; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 2643; LA64D-NEXT: move $fp, $a0 2644; LA64D-NEXT: fld.d $fa0, $a0, 0 2645; LA64D-NEXT: .p2align 4, , 16 2646; LA64D-NEXT: .LBB37_1: # %atomicrmw.start 2647; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 2648; LA64D-NEXT: vldi $vr1, -784 2649; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 2650; LA64D-NEXT: fst.d $fa0, $sp, 8 2651; LA64D-NEXT: fst.d $fa1, $sp, 0 2652; LA64D-NEXT: ori $a0, $zero, 8 2653; LA64D-NEXT: addi.d $a2, $sp, 8 2654; LA64D-NEXT: addi.d $a3, $sp, 0 2655; LA64D-NEXT: move $a1, $fp 2656; LA64D-NEXT: move $a4, $zero 2657; LA64D-NEXT: move $a5, $zero 2658; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 2659; LA64D-NEXT: fld.d $fa0, $sp, 8 2660; LA64D-NEXT: beqz $a0, .LBB37_1 2661; LA64D-NEXT: # %bb.2: # %atomicrmw.end 2662; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 2663; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 2664; LA64D-NEXT: addi.d $sp, $sp, 32 2665; LA64D-NEXT: ret 2666 %v = atomicrmw fsub ptr %p, double 1.0 monotonic, align 4 2667 ret double %v 2668} 2669 2670define double @double_fmin_monotonic(ptr %p) nounwind { 2671; LA64F-LABEL: double_fmin_monotonic: 2672; LA64F: # %bb.0: 2673; LA64F-NEXT: addi.d $sp, $sp, -48 2674; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 2675; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 2676; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 2677; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 2678; LA64F-NEXT: move $fp, $a0 2679; LA64F-NEXT: ld.d $s1, $a0, 0 2680; LA64F-NEXT: lu52i.d $s0, $zero, 1023 2681; LA64F-NEXT: .p2align 4, , 16 2682; LA64F-NEXT: .LBB38_1: # %atomicrmw.start 2683; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 2684; LA64F-NEXT: move $a0, $s1 2685; LA64F-NEXT: move $a1, $s0 2686; LA64F-NEXT: bl %plt(fmin) 2687; LA64F-NEXT: st.d $s1, $sp, 8 2688; LA64F-NEXT: st.d $a0, $sp, 0 2689; LA64F-NEXT: ori $a0, $zero, 8 2690; LA64F-NEXT: addi.d $a2, $sp, 8 2691; LA64F-NEXT: addi.d $a3, $sp, 0 2692; LA64F-NEXT: move $a1, $fp 2693; LA64F-NEXT: move $a4, $zero 2694; LA64F-NEXT: move $a5, $zero 2695; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 2696; LA64F-NEXT: ld.d $s1, $sp, 8 2697; LA64F-NEXT: beqz $a0, .LBB38_1 2698; LA64F-NEXT: # %bb.2: # %atomicrmw.end 2699; LA64F-NEXT: move $a0, $s1 2700; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 2701; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 2702; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 2703; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 2704; LA64F-NEXT: addi.d $sp, $sp, 48 2705; LA64F-NEXT: ret 2706; 2707; LA64D-LABEL: double_fmin_monotonic: 2708; LA64D: # %bb.0: 2709; LA64D-NEXT: addi.d $sp, $sp, -32 2710; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 2711; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 2712; LA64D-NEXT: move $fp, $a0 2713; LA64D-NEXT: fld.d $fa0, $a0, 0 2714; LA64D-NEXT: .p2align 4, , 16 2715; LA64D-NEXT: .LBB38_1: # %atomicrmw.start 2716; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 2717; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 2718; LA64D-NEXT: vldi $vr2, -912 2719; LA64D-NEXT: fmin.d $fa1, $fa1, $fa2 2720; LA64D-NEXT: fst.d $fa0, $sp, 8 2721; LA64D-NEXT: fst.d $fa1, $sp, 0 2722; LA64D-NEXT: ori $a0, $zero, 8 2723; LA64D-NEXT: addi.d $a2, $sp, 8 2724; LA64D-NEXT: addi.d $a3, $sp, 0 2725; LA64D-NEXT: move $a1, $fp 2726; LA64D-NEXT: move $a4, $zero 2727; LA64D-NEXT: move $a5, $zero 2728; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 2729; LA64D-NEXT: fld.d $fa0, $sp, 8 2730; LA64D-NEXT: beqz $a0, .LBB38_1 2731; LA64D-NEXT: # %bb.2: # %atomicrmw.end 2732; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 2733; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 2734; LA64D-NEXT: addi.d $sp, $sp, 32 2735; LA64D-NEXT: ret 2736 %v = atomicrmw fmin ptr %p, double 1.0 monotonic, align 4 2737 ret double %v 2738} 2739 2740define double @double_fmax_monotonic(ptr %p) nounwind { 2741; LA64F-LABEL: double_fmax_monotonic: 2742; LA64F: # %bb.0: 2743; LA64F-NEXT: addi.d $sp, $sp, -48 2744; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill 2745; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill 2746; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill 2747; LA64F-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill 2748; LA64F-NEXT: move $fp, $a0 2749; LA64F-NEXT: ld.d $s1, $a0, 0 2750; LA64F-NEXT: lu52i.d $s0, $zero, 1023 2751; LA64F-NEXT: .p2align 4, , 16 2752; LA64F-NEXT: .LBB39_1: # %atomicrmw.start 2753; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 2754; LA64F-NEXT: move $a0, $s1 2755; LA64F-NEXT: move $a1, $s0 2756; LA64F-NEXT: bl %plt(fmax) 2757; LA64F-NEXT: st.d $s1, $sp, 8 2758; LA64F-NEXT: st.d $a0, $sp, 0 2759; LA64F-NEXT: ori $a0, $zero, 8 2760; LA64F-NEXT: addi.d $a2, $sp, 8 2761; LA64F-NEXT: addi.d $a3, $sp, 0 2762; LA64F-NEXT: move $a1, $fp 2763; LA64F-NEXT: move $a4, $zero 2764; LA64F-NEXT: move $a5, $zero 2765; LA64F-NEXT: bl %plt(__atomic_compare_exchange) 2766; LA64F-NEXT: ld.d $s1, $sp, 8 2767; LA64F-NEXT: beqz $a0, .LBB39_1 2768; LA64F-NEXT: # %bb.2: # %atomicrmw.end 2769; LA64F-NEXT: move $a0, $s1 2770; LA64F-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload 2771; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload 2772; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload 2773; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload 2774; LA64F-NEXT: addi.d $sp, $sp, 48 2775; LA64F-NEXT: ret 2776; 2777; LA64D-LABEL: double_fmax_monotonic: 2778; LA64D: # %bb.0: 2779; LA64D-NEXT: addi.d $sp, $sp, -32 2780; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill 2781; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill 2782; LA64D-NEXT: move $fp, $a0 2783; LA64D-NEXT: fld.d $fa0, $a0, 0 2784; LA64D-NEXT: .p2align 4, , 16 2785; LA64D-NEXT: .LBB39_1: # %atomicrmw.start 2786; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 2787; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 2788; LA64D-NEXT: vldi $vr2, -912 2789; LA64D-NEXT: fmax.d $fa1, $fa1, $fa2 2790; LA64D-NEXT: fst.d $fa0, $sp, 8 2791; LA64D-NEXT: fst.d $fa1, $sp, 0 2792; LA64D-NEXT: ori $a0, $zero, 8 2793; LA64D-NEXT: addi.d $a2, $sp, 8 2794; LA64D-NEXT: addi.d $a3, $sp, 0 2795; LA64D-NEXT: move $a1, $fp 2796; LA64D-NEXT: move $a4, $zero 2797; LA64D-NEXT: move $a5, $zero 2798; LA64D-NEXT: bl %plt(__atomic_compare_exchange) 2799; LA64D-NEXT: fld.d $fa0, $sp, 8 2800; LA64D-NEXT: beqz $a0, .LBB39_1 2801; LA64D-NEXT: # %bb.2: # %atomicrmw.end 2802; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload 2803; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload 2804; LA64D-NEXT: addi.d $sp, $sp, 32 2805; LA64D-NEXT: ret 2806 %v = atomicrmw fmax ptr %p, double 1.0 monotonic, align 4 2807 ret double %v 2808} 2809