1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=aarch64-linux-gnu -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefix=NOLSE %s 3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefix=LSE %s 4; RUN: llc -mtriple=aarch64-linux-gnu -mattr=-lse,-fp-armv8 -O1 < %s | FileCheck -check-prefix=SOFTFP-NOLSE %s 5 6define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 { 7; NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2: 8; NOLSE: // %bb.0: 9; NOLSE-NEXT: fcvt s1, h0 10; NOLSE-NEXT: .LBB0_1: // %atomicrmw.start 11; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 12; NOLSE-NEXT: ldaxrh w8, [x0] 13; NOLSE-NEXT: fmov s0, w8 14; NOLSE-NEXT: fcvt s2, h0 15; NOLSE-NEXT: fadd s2, s2, s1 16; NOLSE-NEXT: fcvt h2, s2 17; NOLSE-NEXT: fmov w8, s2 18; NOLSE-NEXT: stlxrh w9, w8, [x0] 19; NOLSE-NEXT: cbnz w9, .LBB0_1 20; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 21; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $s0 22; NOLSE-NEXT: ret 23; 24; LSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2: 25; LSE: // %bb.0: 26; LSE-NEXT: fcvt s1, h0 27; LSE-NEXT: ldr h0, [x0] 28; LSE-NEXT: .LBB0_1: // %atomicrmw.start 29; LSE-NEXT: // =>This Inner Loop Header: Depth=1 30; LSE-NEXT: fcvt s2, h0 31; LSE-NEXT: fmov w8, s0 32; LSE-NEXT: mov w10, w8 33; LSE-NEXT: fadd s2, s2, s1 34; LSE-NEXT: fcvt h2, s2 35; LSE-NEXT: fmov w9, s2 36; LSE-NEXT: casalh w10, w9, [x0] 37; LSE-NEXT: fmov s0, w10 38; LSE-NEXT: cmp w10, w8, uxth 39; LSE-NEXT: b.ne .LBB0_1 40; LSE-NEXT: // %bb.2: // %atomicrmw.end 41; LSE-NEXT: // kill: def $h0 killed $h0 killed $s0 42; LSE-NEXT: ret 43; 44; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2: 45; SOFTFP-NOLSE: // %bb.0: 46; SOFTFP-NOLSE-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill 47; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill 48; SOFTFP-NOLSE-NEXT: mov x19, x0 49; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] 50; SOFTFP-NOLSE-NEXT: mov w20, w1 51; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill 52; SOFTFP-NOLSE-NEXT: b .LBB0_2 53; SOFTFP-NOLSE-NEXT: .LBB0_1: // %cmpxchg.nostore 54; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1 55; SOFTFP-NOLSE-NEXT: mov w8, wzr 56; SOFTFP-NOLSE-NEXT: clrex 57; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB0_6 58; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start 59; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 60; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2 61; SOFTFP-NOLSE-NEXT: mov w22, w0 62; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff 63; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee 64; SOFTFP-NOLSE-NEXT: mov w21, w0 65; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff 66; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee 67; SOFTFP-NOLSE-NEXT: mov w1, w21 68; SOFTFP-NOLSE-NEXT: bl __addsf3 69; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee 70; SOFTFP-NOLSE-NEXT: mov w8, w0 71; SOFTFP-NOLSE-NEXT: .LBB0_3: // %cmpxchg.start 72; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1 73; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 74; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] 75; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth 76; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1 77; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 78; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2 79; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] 80; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_3 81; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB0_2 Depth=1 82; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 83; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_2 84; SOFTFP-NOLSE-NEXT: .LBB0_6: // %atomicrmw.end 85; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload 86; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 87; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload 88; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload 89; SOFTFP-NOLSE-NEXT: ret 90 %res = atomicrmw fadd ptr %ptr, half %value seq_cst, align 2 91 ret half %res 92} 93 94define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 { 95; NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align4: 96; NOLSE: // %bb.0: 97; NOLSE-NEXT: fcvt s1, h0 98; NOLSE-NEXT: .LBB1_1: // %atomicrmw.start 99; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 100; NOLSE-NEXT: ldaxrh w8, [x0] 101; NOLSE-NEXT: fmov s0, w8 102; NOLSE-NEXT: fcvt s2, h0 103; NOLSE-NEXT: fadd s2, s2, s1 104; NOLSE-NEXT: fcvt h2, s2 105; NOLSE-NEXT: fmov w8, s2 106; NOLSE-NEXT: stlxrh w9, w8, [x0] 107; NOLSE-NEXT: cbnz w9, .LBB1_1 108; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 109; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $s0 110; NOLSE-NEXT: ret 111; 112; LSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align4: 113; LSE: // %bb.0: 114; LSE-NEXT: fcvt s1, h0 115; LSE-NEXT: ldr h0, [x0] 116; LSE-NEXT: .LBB1_1: // %atomicrmw.start 117; LSE-NEXT: // =>This Inner Loop Header: Depth=1 118; LSE-NEXT: fcvt s2, h0 119; LSE-NEXT: fmov w8, s0 120; LSE-NEXT: mov w10, w8 121; LSE-NEXT: fadd s2, s2, s1 122; LSE-NEXT: fcvt h2, s2 123; LSE-NEXT: fmov w9, s2 124; LSE-NEXT: casalh w10, w9, [x0] 125; LSE-NEXT: fmov s0, w10 126; LSE-NEXT: cmp w10, w8, uxth 127; LSE-NEXT: b.ne .LBB1_1 128; LSE-NEXT: // %bb.2: // %atomicrmw.end 129; LSE-NEXT: // kill: def $h0 killed $h0 killed $s0 130; LSE-NEXT: ret 131; 132; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align4: 133; SOFTFP-NOLSE: // %bb.0: 134; SOFTFP-NOLSE-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill 135; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill 136; SOFTFP-NOLSE-NEXT: mov x19, x0 137; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] 138; SOFTFP-NOLSE-NEXT: mov w20, w1 139; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill 140; SOFTFP-NOLSE-NEXT: b .LBB1_2 141; SOFTFP-NOLSE-NEXT: .LBB1_1: // %cmpxchg.nostore 142; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1 143; SOFTFP-NOLSE-NEXT: mov w8, wzr 144; SOFTFP-NOLSE-NEXT: clrex 145; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB1_6 146; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start 147; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 148; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2 149; SOFTFP-NOLSE-NEXT: mov w22, w0 150; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff 151; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee 152; SOFTFP-NOLSE-NEXT: mov w21, w0 153; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff 154; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee 155; SOFTFP-NOLSE-NEXT: mov w1, w21 156; SOFTFP-NOLSE-NEXT: bl __addsf3 157; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee 158; SOFTFP-NOLSE-NEXT: mov w8, w0 159; SOFTFP-NOLSE-NEXT: .LBB1_3: // %cmpxchg.start 160; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1 161; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 162; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] 163; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth 164; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1 165; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 166; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2 167; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] 168; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_3 169; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB1_2 Depth=1 170; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 171; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_2 172; SOFTFP-NOLSE-NEXT: .LBB1_6: // %atomicrmw.end 173; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload 174; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 175; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload 176; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload 177; SOFTFP-NOLSE-NEXT: ret 178 %res = atomicrmw fadd ptr %ptr, half %value seq_cst, align 4 179 ret half %res 180} 181 182define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value) #0 { 183; NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2: 184; NOLSE: // %bb.0: 185; NOLSE-NEXT: // kill: def $h0 killed $h0 def $d0 186; NOLSE-NEXT: shll v1.4s, v0.4h, #16 187; NOLSE-NEXT: mov w8, #32767 // =0x7fff 188; NOLSE-NEXT: .LBB2_1: // %atomicrmw.start 189; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 190; NOLSE-NEXT: ldaxrh w9, [x0] 191; NOLSE-NEXT: fmov s0, w9 192; NOLSE-NEXT: shll v2.4s, v0.4h, #16 193; NOLSE-NEXT: fadd s2, s2, s1 194; NOLSE-NEXT: fmov w9, s2 195; NOLSE-NEXT: ubfx w10, w9, #16, #1 196; NOLSE-NEXT: add w9, w9, w8 197; NOLSE-NEXT: add w9, w10, w9 198; NOLSE-NEXT: lsr w9, w9, #16 199; NOLSE-NEXT: stlxrh w10, w9, [x0] 200; NOLSE-NEXT: cbnz w10, .LBB2_1 201; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 202; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $d0 203; NOLSE-NEXT: ret 204; 205; LSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2: 206; LSE: // %bb.0: 207; LSE-NEXT: // kill: def $h0 killed $h0 def $d0 208; LSE-NEXT: shll v1.4s, v0.4h, #16 209; LSE-NEXT: mov w8, #32767 // =0x7fff 210; LSE-NEXT: ldr h0, [x0] 211; LSE-NEXT: .LBB2_1: // %atomicrmw.start 212; LSE-NEXT: // =>This Inner Loop Header: Depth=1 213; LSE-NEXT: shll v2.4s, v0.4h, #16 214; LSE-NEXT: fadd s2, s2, s1 215; LSE-NEXT: fmov w9, s2 216; LSE-NEXT: ubfx w10, w9, #16, #1 217; LSE-NEXT: add w9, w9, w8 218; LSE-NEXT: add w9, w10, w9 219; LSE-NEXT: lsr w9, w9, #16 220; LSE-NEXT: fmov s2, w9 221; LSE-NEXT: fmov w9, s0 222; LSE-NEXT: fmov w10, s2 223; LSE-NEXT: mov w11, w9 224; LSE-NEXT: casalh w11, w10, [x0] 225; LSE-NEXT: fmov s0, w11 226; LSE-NEXT: cmp w11, w9, uxth 227; LSE-NEXT: b.ne .LBB2_1 228; LSE-NEXT: // %bb.2: // %atomicrmw.end 229; LSE-NEXT: // kill: def $h0 killed $h0 killed $d0 230; LSE-NEXT: ret 231; 232; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2: 233; SOFTFP-NOLSE: // %bb.0: 234; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill 235; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 236; SOFTFP-NOLSE-NEXT: mov x19, x0 237; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] 238; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16 239; SOFTFP-NOLSE-NEXT: b .LBB2_2 240; SOFTFP-NOLSE-NEXT: .LBB2_1: // %cmpxchg.nostore 241; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=1 242; SOFTFP-NOLSE-NEXT: mov w8, wzr 243; SOFTFP-NOLSE-NEXT: clrex 244; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB2_6 245; SOFTFP-NOLSE-NEXT: .LBB2_2: // %atomicrmw.start 246; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 247; SOFTFP-NOLSE-NEXT: // Child Loop BB2_3 Depth 2 248; SOFTFP-NOLSE-NEXT: mov w21, w0 249; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16 250; SOFTFP-NOLSE-NEXT: mov w1, w20 251; SOFTFP-NOLSE-NEXT: bl __addsf3 252; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 253; SOFTFP-NOLSE-NEXT: mov w8, w0 254; SOFTFP-NOLSE-NEXT: .LBB2_3: // %cmpxchg.start 255; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_2 Depth=1 256; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 257; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] 258; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth 259; SOFTFP-NOLSE-NEXT: b.ne .LBB2_1 260; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 261; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_3 Depth=2 262; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] 263; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_3 264; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB2_2 Depth=1 265; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 266; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_2 267; SOFTFP-NOLSE-NEXT: .LBB2_6: // %atomicrmw.end 268; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 269; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 270; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload 271; SOFTFP-NOLSE-NEXT: ret 272 %res = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 2 273 ret bfloat %res 274} 275 276define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value) #0 { 277; NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4: 278; NOLSE: // %bb.0: 279; NOLSE-NEXT: // kill: def $h0 killed $h0 def $d0 280; NOLSE-NEXT: shll v1.4s, v0.4h, #16 281; NOLSE-NEXT: mov w8, #32767 // =0x7fff 282; NOLSE-NEXT: .LBB3_1: // %atomicrmw.start 283; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 284; NOLSE-NEXT: ldaxrh w9, [x0] 285; NOLSE-NEXT: fmov s0, w9 286; NOLSE-NEXT: shll v2.4s, v0.4h, #16 287; NOLSE-NEXT: fadd s2, s2, s1 288; NOLSE-NEXT: fmov w9, s2 289; NOLSE-NEXT: ubfx w10, w9, #16, #1 290; NOLSE-NEXT: add w9, w9, w8 291; NOLSE-NEXT: add w9, w10, w9 292; NOLSE-NEXT: lsr w9, w9, #16 293; NOLSE-NEXT: stlxrh w10, w9, [x0] 294; NOLSE-NEXT: cbnz w10, .LBB3_1 295; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 296; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $d0 297; NOLSE-NEXT: ret 298; 299; LSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4: 300; LSE: // %bb.0: 301; LSE-NEXT: // kill: def $h0 killed $h0 def $d0 302; LSE-NEXT: shll v1.4s, v0.4h, #16 303; LSE-NEXT: mov w8, #32767 // =0x7fff 304; LSE-NEXT: ldr h0, [x0] 305; LSE-NEXT: .LBB3_1: // %atomicrmw.start 306; LSE-NEXT: // =>This Inner Loop Header: Depth=1 307; LSE-NEXT: shll v2.4s, v0.4h, #16 308; LSE-NEXT: fadd s2, s2, s1 309; LSE-NEXT: fmov w9, s2 310; LSE-NEXT: ubfx w10, w9, #16, #1 311; LSE-NEXT: add w9, w9, w8 312; LSE-NEXT: add w9, w10, w9 313; LSE-NEXT: lsr w9, w9, #16 314; LSE-NEXT: fmov s2, w9 315; LSE-NEXT: fmov w9, s0 316; LSE-NEXT: fmov w10, s2 317; LSE-NEXT: mov w11, w9 318; LSE-NEXT: casalh w11, w10, [x0] 319; LSE-NEXT: fmov s0, w11 320; LSE-NEXT: cmp w11, w9, uxth 321; LSE-NEXT: b.ne .LBB3_1 322; LSE-NEXT: // %bb.2: // %atomicrmw.end 323; LSE-NEXT: // kill: def $h0 killed $h0 killed $d0 324; LSE-NEXT: ret 325; 326; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4: 327; SOFTFP-NOLSE: // %bb.0: 328; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill 329; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 330; SOFTFP-NOLSE-NEXT: mov x19, x0 331; SOFTFP-NOLSE-NEXT: ldrh w0, [x0] 332; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16 333; SOFTFP-NOLSE-NEXT: b .LBB3_2 334; SOFTFP-NOLSE-NEXT: .LBB3_1: // %cmpxchg.nostore 335; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=1 336; SOFTFP-NOLSE-NEXT: mov w8, wzr 337; SOFTFP-NOLSE-NEXT: clrex 338; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB3_6 339; SOFTFP-NOLSE-NEXT: .LBB3_2: // %atomicrmw.start 340; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 341; SOFTFP-NOLSE-NEXT: // Child Loop BB3_3 Depth 2 342; SOFTFP-NOLSE-NEXT: mov w21, w0 343; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16 344; SOFTFP-NOLSE-NEXT: mov w1, w20 345; SOFTFP-NOLSE-NEXT: bl __addsf3 346; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 347; SOFTFP-NOLSE-NEXT: mov w8, w0 348; SOFTFP-NOLSE-NEXT: .LBB3_3: // %cmpxchg.start 349; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_2 Depth=1 350; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 351; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] 352; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth 353; SOFTFP-NOLSE-NEXT: b.ne .LBB3_1 354; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 355; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_3 Depth=2 356; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19] 357; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_3 358; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB3_2 Depth=1 359; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 360; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_2 361; SOFTFP-NOLSE-NEXT: .LBB3_6: // %atomicrmw.end 362; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 363; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 364; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload 365; SOFTFP-NOLSE-NEXT: ret 366 %res = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 4 367 ret bfloat %res 368} 369 370define float @test_atomicrmw_fadd_f32_seq_cst_align4(ptr %ptr, float %value) #0 { 371; NOLSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align4: 372; NOLSE: // %bb.0: 373; NOLSE-NEXT: .LBB4_1: // %atomicrmw.start 374; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 375; NOLSE-NEXT: ldaxr w8, [x0] 376; NOLSE-NEXT: fmov s1, w8 377; NOLSE-NEXT: fadd s2, s1, s0 378; NOLSE-NEXT: fmov w8, s2 379; NOLSE-NEXT: stlxr w9, w8, [x0] 380; NOLSE-NEXT: cbnz w9, .LBB4_1 381; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 382; NOLSE-NEXT: fmov s0, s1 383; NOLSE-NEXT: ret 384; 385; LSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align4: 386; LSE: // %bb.0: 387; LSE-NEXT: ldr s1, [x0] 388; LSE-NEXT: .LBB4_1: // %atomicrmw.start 389; LSE-NEXT: // =>This Inner Loop Header: Depth=1 390; LSE-NEXT: fadd s2, s1, s0 391; LSE-NEXT: fmov w8, s1 392; LSE-NEXT: mov w10, w8 393; LSE-NEXT: fmov w9, s2 394; LSE-NEXT: casal w10, w9, [x0] 395; LSE-NEXT: fmov s1, w10 396; LSE-NEXT: cmp w10, w8 397; LSE-NEXT: b.ne .LBB4_1 398; LSE-NEXT: // %bb.2: // %atomicrmw.end 399; LSE-NEXT: fmov s0, s1 400; LSE-NEXT: ret 401; 402; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align4: 403; SOFTFP-NOLSE: // %bb.0: 404; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill 405; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 406; SOFTFP-NOLSE-NEXT: mov x19, x0 407; SOFTFP-NOLSE-NEXT: ldr w0, [x0] 408; SOFTFP-NOLSE-NEXT: mov w20, w1 409; SOFTFP-NOLSE-NEXT: b .LBB4_2 410; SOFTFP-NOLSE-NEXT: .LBB4_1: // %cmpxchg.nostore 411; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=1 412; SOFTFP-NOLSE-NEXT: mov w8, wzr 413; SOFTFP-NOLSE-NEXT: clrex 414; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB4_6 415; SOFTFP-NOLSE-NEXT: .LBB4_2: // %atomicrmw.start 416; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 417; SOFTFP-NOLSE-NEXT: // Child Loop BB4_3 Depth 2 418; SOFTFP-NOLSE-NEXT: mov w1, w20 419; SOFTFP-NOLSE-NEXT: mov w21, w0 420; SOFTFP-NOLSE-NEXT: bl __addsf3 421; SOFTFP-NOLSE-NEXT: mov w8, w0 422; SOFTFP-NOLSE-NEXT: .LBB4_3: // %cmpxchg.start 423; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_2 Depth=1 424; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 425; SOFTFP-NOLSE-NEXT: ldaxr w0, [x19] 426; SOFTFP-NOLSE-NEXT: cmp w0, w21 427; SOFTFP-NOLSE-NEXT: b.ne .LBB4_1 428; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 429; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_3 Depth=2 430; SOFTFP-NOLSE-NEXT: stlxr w9, w8, [x19] 431; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_3 432; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB4_2 Depth=1 433; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 434; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_2 435; SOFTFP-NOLSE-NEXT: .LBB4_6: // %atomicrmw.end 436; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 437; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0 438; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload 439; SOFTFP-NOLSE-NEXT: ret 440 %res = atomicrmw fadd ptr %ptr, float %value seq_cst, align 4 441 ret float %res 442} 443 444define double @test_atomicrmw_fadd_f32_seq_cst_align8(ptr %ptr, double %value) #0 { 445; NOLSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align8: 446; NOLSE: // %bb.0: 447; NOLSE-NEXT: .LBB5_1: // %atomicrmw.start 448; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 449; NOLSE-NEXT: ldaxr x8, [x0] 450; NOLSE-NEXT: fmov d1, x8 451; NOLSE-NEXT: fadd d2, d1, d0 452; NOLSE-NEXT: fmov x8, d2 453; NOLSE-NEXT: stlxr w9, x8, [x0] 454; NOLSE-NEXT: cbnz w9, .LBB5_1 455; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 456; NOLSE-NEXT: fmov d0, d1 457; NOLSE-NEXT: ret 458; 459; LSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align8: 460; LSE: // %bb.0: 461; LSE-NEXT: ldr d1, [x0] 462; LSE-NEXT: .LBB5_1: // %atomicrmw.start 463; LSE-NEXT: // =>This Inner Loop Header: Depth=1 464; LSE-NEXT: fadd d2, d1, d0 465; LSE-NEXT: fmov x8, d1 466; LSE-NEXT: mov x10, x8 467; LSE-NEXT: fmov x9, d2 468; LSE-NEXT: casal x10, x9, [x0] 469; LSE-NEXT: fmov d1, x10 470; LSE-NEXT: cmp x10, x8 471; LSE-NEXT: b.ne .LBB5_1 472; LSE-NEXT: // %bb.2: // %atomicrmw.end 473; LSE-NEXT: fmov d0, d1 474; LSE-NEXT: ret 475; 476; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align8: 477; SOFTFP-NOLSE: // %bb.0: 478; SOFTFP-NOLSE-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill 479; SOFTFP-NOLSE-NEXT: ldr x21, [x0] 480; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 481; SOFTFP-NOLSE-NEXT: mov x19, x0 482; SOFTFP-NOLSE-NEXT: mov x20, x1 483; SOFTFP-NOLSE-NEXT: b .LBB5_2 484; SOFTFP-NOLSE-NEXT: .LBB5_1: // %cmpxchg.nostore 485; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=1 486; SOFTFP-NOLSE-NEXT: mov w9, wzr 487; SOFTFP-NOLSE-NEXT: clrex 488; SOFTFP-NOLSE-NEXT: mov x21, x8 489; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_6 490; SOFTFP-NOLSE-NEXT: .LBB5_2: // %atomicrmw.start 491; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 492; SOFTFP-NOLSE-NEXT: // Child Loop BB5_3 Depth 2 493; SOFTFP-NOLSE-NEXT: mov x0, x21 494; SOFTFP-NOLSE-NEXT: mov x1, x20 495; SOFTFP-NOLSE-NEXT: bl __adddf3 496; SOFTFP-NOLSE-NEXT: .LBB5_3: // %cmpxchg.start 497; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_2 Depth=1 498; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 499; SOFTFP-NOLSE-NEXT: ldaxr x8, [x19] 500; SOFTFP-NOLSE-NEXT: cmp x8, x21 501; SOFTFP-NOLSE-NEXT: b.ne .LBB5_1 502; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 503; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_3 Depth=2 504; SOFTFP-NOLSE-NEXT: stlxr w9, x0, [x19] 505; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_3 506; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB5_2 Depth=1 507; SOFTFP-NOLSE-NEXT: mov w9, #1 // =0x1 508; SOFTFP-NOLSE-NEXT: mov x21, x8 509; SOFTFP-NOLSE-NEXT: cbz w9, .LBB5_2 510; SOFTFP-NOLSE-NEXT: .LBB5_6: // %atomicrmw.end 511; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 512; SOFTFP-NOLSE-NEXT: mov x0, x21 513; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload 514; SOFTFP-NOLSE-NEXT: ret 515 %res = atomicrmw fadd ptr %ptr, double %value seq_cst, align 8 516 ret double %res 517} 518 519define fp128 @test_atomicrmw_fadd_fp128_seq_cst_align16(ptr %ptr, fp128 %value) #0 { 520; NOLSE-LABEL: test_atomicrmw_fadd_fp128_seq_cst_align16: 521; NOLSE: // %bb.0: 522; NOLSE-NEXT: sub sp, sp, #96 523; NOLSE-NEXT: ldr q1, [x0] 524; NOLSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill 525; NOLSE-NEXT: mov x19, x0 526; NOLSE-NEXT: str q0, [sp] // 16-byte Folded Spill 527; NOLSE-NEXT: b .LBB6_2 528; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start 529; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1 530; NOLSE-NEXT: stp x12, x13, [sp, #32] 531; NOLSE-NEXT: cmp x13, x10 532; NOLSE-NEXT: ldr q1, [sp, #32] 533; NOLSE-NEXT: ccmp x12, x11, #0, eq 534; NOLSE-NEXT: b.eq .LBB6_6 535; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start 536; NOLSE-NEXT: // =>This Loop Header: Depth=1 537; NOLSE-NEXT: // Child Loop BB6_3 Depth 2 538; NOLSE-NEXT: mov v0.16b, v1.16b 539; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 540; NOLSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload 541; NOLSE-NEXT: bl __addtf3 542; NOLSE-NEXT: str q0, [sp, #48] 543; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 544; NOLSE-NEXT: ldp x9, x8, [sp, #48] 545; NOLSE-NEXT: str q0, [sp, #64] 546; NOLSE-NEXT: ldp x11, x10, [sp, #64] 547; NOLSE-NEXT: .LBB6_3: // %atomicrmw.start 548; NOLSE-NEXT: // Parent Loop BB6_2 Depth=1 549; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 550; NOLSE-NEXT: ldaxp x12, x13, [x19] 551; NOLSE-NEXT: cmp x12, x11 552; NOLSE-NEXT: cset w14, ne 553; NOLSE-NEXT: cmp x13, x10 554; NOLSE-NEXT: cinc w14, w14, ne 555; NOLSE-NEXT: cbz w14, .LBB6_5 556; NOLSE-NEXT: // %bb.4: // %atomicrmw.start 557; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2 558; NOLSE-NEXT: stlxp w14, x12, x13, [x19] 559; NOLSE-NEXT: cbnz w14, .LBB6_3 560; NOLSE-NEXT: b .LBB6_1 561; NOLSE-NEXT: .LBB6_5: // %atomicrmw.start 562; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2 563; NOLSE-NEXT: stlxp w14, x9, x8, [x19] 564; NOLSE-NEXT: cbnz w14, .LBB6_3 565; NOLSE-NEXT: b .LBB6_1 566; NOLSE-NEXT: .LBB6_6: // %atomicrmw.end 567; NOLSE-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload 568; NOLSE-NEXT: mov v0.16b, v1.16b 569; NOLSE-NEXT: add sp, sp, #96 570; NOLSE-NEXT: ret 571; 572; LSE-LABEL: test_atomicrmw_fadd_fp128_seq_cst_align16: 573; LSE: // %bb.0: 574; LSE-NEXT: sub sp, sp, #96 575; LSE-NEXT: ldr q1, [x0] 576; LSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill 577; LSE-NEXT: mov x19, x0 578; LSE-NEXT: str q0, [sp] // 16-byte Folded Spill 579; LSE-NEXT: .LBB6_1: // %atomicrmw.start 580; LSE-NEXT: // =>This Inner Loop Header: Depth=1 581; LSE-NEXT: mov v0.16b, v1.16b 582; LSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 583; LSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload 584; LSE-NEXT: bl __addtf3 585; LSE-NEXT: str q0, [sp, #48] 586; LSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 587; LSE-NEXT: ldp x0, x1, [sp, #48] 588; LSE-NEXT: str q0, [sp, #64] 589; LSE-NEXT: ldp x2, x3, [sp, #64] 590; LSE-NEXT: mov x4, x2 591; LSE-NEXT: mov x5, x3 592; LSE-NEXT: caspal x4, x5, x0, x1, [x19] 593; LSE-NEXT: stp x4, x5, [sp, #32] 594; LSE-NEXT: cmp x5, x3 595; LSE-NEXT: ldr q1, [sp, #32] 596; LSE-NEXT: ccmp x4, x2, #0, eq 597; LSE-NEXT: b.ne .LBB6_1 598; LSE-NEXT: // %bb.2: // %atomicrmw.end 599; LSE-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload 600; LSE-NEXT: mov v0.16b, v1.16b 601; LSE-NEXT: add sp, sp, #96 602; LSE-NEXT: ret 603; 604; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_fp128_seq_cst_align16: 605; SOFTFP-NOLSE: // %bb.0: 606; SOFTFP-NOLSE-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill 607; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill 608; SOFTFP-NOLSE-NEXT: mov x20, x0 609; SOFTFP-NOLSE-NEXT: mov x19, x3 610; SOFTFP-NOLSE-NEXT: ldp x0, x1, [x0] 611; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill 612; SOFTFP-NOLSE-NEXT: mov x21, x2 613; SOFTFP-NOLSE-NEXT: b .LBB6_2 614; SOFTFP-NOLSE-NEXT: .LBB6_1: // %atomicrmw.start 615; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1 616; SOFTFP-NOLSE-NEXT: cmp x1, x22 617; SOFTFP-NOLSE-NEXT: ccmp x0, x23, #0, eq 618; SOFTFP-NOLSE-NEXT: b.eq .LBB6_6 619; SOFTFP-NOLSE-NEXT: .LBB6_2: // %atomicrmw.start 620; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 621; SOFTFP-NOLSE-NEXT: // Child Loop BB6_3 Depth 2 622; SOFTFP-NOLSE-NEXT: mov x2, x21 623; SOFTFP-NOLSE-NEXT: mov x3, x19 624; SOFTFP-NOLSE-NEXT: mov x22, x1 625; SOFTFP-NOLSE-NEXT: mov x23, x0 626; SOFTFP-NOLSE-NEXT: bl __addtf3 627; SOFTFP-NOLSE-NEXT: mov x8, x0 628; SOFTFP-NOLSE-NEXT: mov x9, x1 629; SOFTFP-NOLSE-NEXT: .LBB6_3: // %atomicrmw.start 630; SOFTFP-NOLSE-NEXT: // Parent Loop BB6_2 Depth=1 631; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 632; SOFTFP-NOLSE-NEXT: ldaxp x0, x1, [x20] 633; SOFTFP-NOLSE-NEXT: cmp x0, x23 634; SOFTFP-NOLSE-NEXT: cset w10, ne 635; SOFTFP-NOLSE-NEXT: cmp x1, x22 636; SOFTFP-NOLSE-NEXT: cinc w10, w10, ne 637; SOFTFP-NOLSE-NEXT: cbz w10, .LBB6_5 638; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start 639; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2 640; SOFTFP-NOLSE-NEXT: stlxp w10, x0, x1, [x20] 641; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB6_3 642; SOFTFP-NOLSE-NEXT: b .LBB6_1 643; SOFTFP-NOLSE-NEXT: .LBB6_5: // %atomicrmw.start 644; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2 645; SOFTFP-NOLSE-NEXT: stlxp w10, x8, x9, [x20] 646; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB6_3 647; SOFTFP-NOLSE-NEXT: b .LBB6_1 648; SOFTFP-NOLSE-NEXT: .LBB6_6: // %atomicrmw.end 649; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload 650; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload 651; SOFTFP-NOLSE-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload 652; SOFTFP-NOLSE-NEXT: ret 653 %res = atomicrmw fadd ptr %ptr, fp128 %value seq_cst, align 16 654 ret fp128 %res 655} 656 657define <2 x half> @test_atomicrmw_fadd_v2f16_seq_cst_align4(ptr %ptr, <2 x half> %value) #0 { 658; NOLSE-LABEL: test_atomicrmw_fadd_v2f16_seq_cst_align4: 659; NOLSE: // %bb.0: 660; NOLSE-NEXT: fcvtl v0.4s, v0.4h 661; NOLSE-NEXT: .LBB7_1: // %atomicrmw.start 662; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 663; NOLSE-NEXT: ldaxr w8, [x0] 664; NOLSE-NEXT: fmov s1, w8 665; NOLSE-NEXT: fcvtl v1.4s, v1.4h 666; NOLSE-NEXT: fadd v1.4s, v1.4s, v0.4s 667; NOLSE-NEXT: fcvtn v1.4h, v1.4s 668; NOLSE-NEXT: fmov w9, s1 669; NOLSE-NEXT: stlxr w10, w9, [x0] 670; NOLSE-NEXT: cbnz w10, .LBB7_1 671; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 672; NOLSE-NEXT: fmov d0, x8 673; NOLSE-NEXT: ret 674; 675; LSE-LABEL: test_atomicrmw_fadd_v2f16_seq_cst_align4: 676; LSE: // %bb.0: 677; LSE-NEXT: fcvtl v1.4s, v0.4h 678; LSE-NEXT: ldr s0, [x0] 679; LSE-NEXT: .LBB7_1: // %atomicrmw.start 680; LSE-NEXT: // =>This Inner Loop Header: Depth=1 681; LSE-NEXT: fcvtl v2.4s, v0.4h 682; LSE-NEXT: fmov w8, s0 683; LSE-NEXT: mov w10, w8 684; LSE-NEXT: fadd v2.4s, v2.4s, v1.4s 685; LSE-NEXT: fcvtn v2.4h, v2.4s 686; LSE-NEXT: fmov w9, s2 687; LSE-NEXT: casal w10, w9, [x0] 688; LSE-NEXT: fmov s0, w10 689; LSE-NEXT: cmp w10, w8 690; LSE-NEXT: b.ne .LBB7_1 691; LSE-NEXT: // %bb.2: // %atomicrmw.end 692; LSE-NEXT: // kill: def $d0 killed $d0 killed $q0 693; LSE-NEXT: ret 694; 695; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_v2f16_seq_cst_align4: 696; SOFTFP-NOLSE: // %bb.0: 697; SOFTFP-NOLSE-NEXT: stp x30, x25, [sp, #-64]! // 16-byte Folded Spill 698; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill 699; SOFTFP-NOLSE-NEXT: ldrh w23, [x0, #2] 700; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill 701; SOFTFP-NOLSE-NEXT: ldrh w22, [x0] 702; SOFTFP-NOLSE-NEXT: mov w21, w1 703; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill 704; SOFTFP-NOLSE-NEXT: mov w19, w2 705; SOFTFP-NOLSE-NEXT: mov x20, x0 706; SOFTFP-NOLSE-NEXT: b .LBB7_2 707; SOFTFP-NOLSE-NEXT: .LBB7_1: // %cmpxchg.nostore 708; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=1 709; SOFTFP-NOLSE-NEXT: mov w8, wzr 710; SOFTFP-NOLSE-NEXT: clrex 711; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16 712; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_6 713; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start 714; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 715; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2 716; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff 717; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee 718; SOFTFP-NOLSE-NEXT: mov w24, w0 719; SOFTFP-NOLSE-NEXT: and w0, w23, #0xffff 720; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee 721; SOFTFP-NOLSE-NEXT: mov w1, w24 722; SOFTFP-NOLSE-NEXT: bl __addsf3 723; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee 724; SOFTFP-NOLSE-NEXT: mov w24, w0 725; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff 726; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee 727; SOFTFP-NOLSE-NEXT: mov w25, w0 728; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff 729; SOFTFP-NOLSE-NEXT: bl __gnu_h2f_ieee 730; SOFTFP-NOLSE-NEXT: mov w1, w25 731; SOFTFP-NOLSE-NEXT: bl __addsf3 732; SOFTFP-NOLSE-NEXT: bl __gnu_f2h_ieee 733; SOFTFP-NOLSE-NEXT: mov w8, w22 734; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16 735; SOFTFP-NOLSE-NEXT: bfi w8, w23, #16, #16 736; SOFTFP-NOLSE-NEXT: .LBB7_3: // %cmpxchg.start 737; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_2 Depth=1 738; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 739; SOFTFP-NOLSE-NEXT: ldaxr w22, [x20] 740; SOFTFP-NOLSE-NEXT: cmp w22, w8 741; SOFTFP-NOLSE-NEXT: b.ne .LBB7_1 742; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 743; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_3 Depth=2 744; SOFTFP-NOLSE-NEXT: stlxr w9, w0, [x20] 745; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB7_3 746; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB7_2 Depth=1 747; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 748; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16 749; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_2 750; SOFTFP-NOLSE-NEXT: .LBB7_6: // %atomicrmw.end 751; SOFTFP-NOLSE-NEXT: mov w0, w22 752; SOFTFP-NOLSE-NEXT: mov w1, w23 753; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload 754; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload 755; SOFTFP-NOLSE-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload 756; SOFTFP-NOLSE-NEXT: ldp x30, x25, [sp], #64 // 16-byte Folded Reload 757; SOFTFP-NOLSE-NEXT: ret 758 %res = atomicrmw fadd ptr %ptr, <2 x half> %value seq_cst, align 4 759 ret <2 x half> %res 760} 761 762define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_seq_cst_align4(ptr %ptr, <2 x bfloat> %value) #0 { 763; NOLSE-LABEL: test_atomicrmw_fadd_v2bf16_seq_cst_align4: 764; NOLSE: // %bb.0: 765; NOLSE-NEXT: movi v1.4s, #1 766; NOLSE-NEXT: movi v2.4s, #127, msl #8 767; NOLSE-NEXT: shll v0.4s, v0.4h, #16 768; NOLSE-NEXT: .LBB8_1: // %atomicrmw.start 769; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 770; NOLSE-NEXT: ldaxr w8, [x0] 771; NOLSE-NEXT: fmov s3, w8 772; NOLSE-NEXT: shll v3.4s, v3.4h, #16 773; NOLSE-NEXT: fadd v3.4s, v3.4s, v0.4s 774; NOLSE-NEXT: ushr v4.4s, v3.4s, #16 775; NOLSE-NEXT: and v4.16b, v4.16b, v1.16b 776; NOLSE-NEXT: add v3.4s, v4.4s, v3.4s 777; NOLSE-NEXT: addhn v3.4h, v3.4s, v2.4s 778; NOLSE-NEXT: fmov w9, s3 779; NOLSE-NEXT: stlxr w10, w9, [x0] 780; NOLSE-NEXT: cbnz w10, .LBB8_1 781; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 782; NOLSE-NEXT: fmov d0, x8 783; NOLSE-NEXT: ret 784; 785; LSE-LABEL: test_atomicrmw_fadd_v2bf16_seq_cst_align4: 786; LSE: // %bb.0: 787; LSE-NEXT: movi v1.4s, #1 788; LSE-NEXT: movi v2.4s, #127, msl #8 789; LSE-NEXT: shll v3.4s, v0.4h, #16 790; LSE-NEXT: ldr s0, [x0] 791; LSE-NEXT: .LBB8_1: // %atomicrmw.start 792; LSE-NEXT: // =>This Inner Loop Header: Depth=1 793; LSE-NEXT: shll v4.4s, v0.4h, #16 794; LSE-NEXT: fmov w8, s0 795; LSE-NEXT: fadd v4.4s, v4.4s, v3.4s 796; LSE-NEXT: mov w10, w8 797; LSE-NEXT: ushr v5.4s, v4.4s, #16 798; LSE-NEXT: and v5.16b, v5.16b, v1.16b 799; LSE-NEXT: add v4.4s, v5.4s, v4.4s 800; LSE-NEXT: addhn v4.4h, v4.4s, v2.4s 801; LSE-NEXT: fmov w9, s4 802; LSE-NEXT: casal w10, w9, [x0] 803; LSE-NEXT: fmov s0, w10 804; LSE-NEXT: cmp w10, w8 805; LSE-NEXT: b.ne .LBB8_1 806; LSE-NEXT: // %bb.2: // %atomicrmw.end 807; LSE-NEXT: // kill: def $d0 killed $d0 killed $q0 808; LSE-NEXT: ret 809; 810; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_v2bf16_seq_cst_align4: 811; SOFTFP-NOLSE: // %bb.0: 812; SOFTFP-NOLSE-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill 813; SOFTFP-NOLSE-NEXT: mov w8, w1 814; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill 815; SOFTFP-NOLSE-NEXT: ldrh w1, [x0, #2] 816; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill 817; SOFTFP-NOLSE-NEXT: ldrh w22, [x0] 818; SOFTFP-NOLSE-NEXT: lsl w20, w2, #16 819; SOFTFP-NOLSE-NEXT: lsl w21, w8, #16 820; SOFTFP-NOLSE-NEXT: mov x19, x0 821; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill 822; SOFTFP-NOLSE-NEXT: b .LBB8_2 823; SOFTFP-NOLSE-NEXT: .LBB8_1: // %cmpxchg.nostore 824; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=1 825; SOFTFP-NOLSE-NEXT: mov w8, wzr 826; SOFTFP-NOLSE-NEXT: clrex 827; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16 828; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_6 829; SOFTFP-NOLSE-NEXT: .LBB8_2: // %atomicrmw.start 830; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 831; SOFTFP-NOLSE-NEXT: // Child Loop BB8_3 Depth 2 832; SOFTFP-NOLSE-NEXT: lsl w23, w1, #16 833; SOFTFP-NOLSE-NEXT: mov w1, w20 834; SOFTFP-NOLSE-NEXT: mov w0, w23 835; SOFTFP-NOLSE-NEXT: bl __addsf3 836; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 837; SOFTFP-NOLSE-NEXT: mov w24, w0 838; SOFTFP-NOLSE-NEXT: lsl w0, w22, #16 839; SOFTFP-NOLSE-NEXT: mov w1, w21 840; SOFTFP-NOLSE-NEXT: bl __addsf3 841; SOFTFP-NOLSE-NEXT: bl __truncsfbf2 842; SOFTFP-NOLSE-NEXT: bfxil w23, w22, #0, #16 843; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16 844; SOFTFP-NOLSE-NEXT: .LBB8_3: // %cmpxchg.start 845; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_2 Depth=1 846; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 847; SOFTFP-NOLSE-NEXT: ldaxr w22, [x19] 848; SOFTFP-NOLSE-NEXT: cmp w22, w23 849; SOFTFP-NOLSE-NEXT: b.ne .LBB8_1 850; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 851; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_3 Depth=2 852; SOFTFP-NOLSE-NEXT: stlxr w8, w0, [x19] 853; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_3 854; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB8_2 Depth=1 855; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 856; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16 857; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_2 858; SOFTFP-NOLSE-NEXT: .LBB8_6: // %atomicrmw.end 859; SOFTFP-NOLSE-NEXT: mov w0, w22 860; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload 861; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload 862; SOFTFP-NOLSE-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload 863; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload 864; SOFTFP-NOLSE-NEXT: ret 865 %res = atomicrmw fadd ptr %ptr, <2 x bfloat> %value seq_cst, align 4 866 ret <2 x bfloat> %res 867} 868 869define <2 x float> @test_atomicrmw_fadd_v2f32_seq_cst_align8(ptr %ptr, <2 x float> %value) #0 { 870; NOLSE-LABEL: test_atomicrmw_fadd_v2f32_seq_cst_align8: 871; NOLSE: // %bb.0: 872; NOLSE-NEXT: .LBB9_1: // %atomicrmw.start 873; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 874; NOLSE-NEXT: ldaxr x8, [x0] 875; NOLSE-NEXT: fmov d1, x8 876; NOLSE-NEXT: fadd v2.2s, v1.2s, v0.2s 877; NOLSE-NEXT: fmov x8, d2 878; NOLSE-NEXT: stlxr w9, x8, [x0] 879; NOLSE-NEXT: cbnz w9, .LBB9_1 880; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 881; NOLSE-NEXT: fmov d0, d1 882; NOLSE-NEXT: ret 883; 884; LSE-LABEL: test_atomicrmw_fadd_v2f32_seq_cst_align8: 885; LSE: // %bb.0: 886; LSE-NEXT: ldr d1, [x0] 887; LSE-NEXT: .LBB9_1: // %atomicrmw.start 888; LSE-NEXT: // =>This Inner Loop Header: Depth=1 889; LSE-NEXT: fadd v2.2s, v1.2s, v0.2s 890; LSE-NEXT: fmov x8, d1 891; LSE-NEXT: mov x10, x8 892; LSE-NEXT: fmov x9, d2 893; LSE-NEXT: casal x10, x9, [x0] 894; LSE-NEXT: fmov d1, x10 895; LSE-NEXT: cmp x10, x8 896; LSE-NEXT: b.ne .LBB9_1 897; LSE-NEXT: // %bb.2: // %atomicrmw.end 898; LSE-NEXT: fmov d0, d1 899; LSE-NEXT: ret 900; 901; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_v2f32_seq_cst_align8: 902; SOFTFP-NOLSE: // %bb.0: 903; SOFTFP-NOLSE-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill 904; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill 905; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill 906; SOFTFP-NOLSE-NEXT: mov w21, w1 907; SOFTFP-NOLSE-NEXT: ldp w22, w23, [x0] 908; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill 909; SOFTFP-NOLSE-NEXT: mov w19, w2 910; SOFTFP-NOLSE-NEXT: mov x20, x0 911; SOFTFP-NOLSE-NEXT: b .LBB9_2 912; SOFTFP-NOLSE-NEXT: .LBB9_1: // %cmpxchg.nostore 913; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=1 914; SOFTFP-NOLSE-NEXT: mov w8, wzr 915; SOFTFP-NOLSE-NEXT: clrex 916; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32 917; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB9_6 918; SOFTFP-NOLSE-NEXT: .LBB9_2: // %atomicrmw.start 919; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 920; SOFTFP-NOLSE-NEXT: // Child Loop BB9_3 Depth 2 921; SOFTFP-NOLSE-NEXT: mov w0, w23 922; SOFTFP-NOLSE-NEXT: mov w1, w19 923; SOFTFP-NOLSE-NEXT: bl __addsf3 924; SOFTFP-NOLSE-NEXT: mov w24, w0 925; SOFTFP-NOLSE-NEXT: mov w0, w22 926; SOFTFP-NOLSE-NEXT: mov w1, w21 927; SOFTFP-NOLSE-NEXT: bl __addsf3 928; SOFTFP-NOLSE-NEXT: mov w8, w0 929; SOFTFP-NOLSE-NEXT: mov w9, w22 930; SOFTFP-NOLSE-NEXT: // kill: def $w23 killed $w23 killed $x23 def $x23 931; SOFTFP-NOLSE-NEXT: orr x8, x8, x24, lsl #32 932; SOFTFP-NOLSE-NEXT: orr x9, x9, x23, lsl #32 933; SOFTFP-NOLSE-NEXT: .LBB9_3: // %cmpxchg.start 934; SOFTFP-NOLSE-NEXT: // Parent Loop BB9_2 Depth=1 935; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 936; SOFTFP-NOLSE-NEXT: ldaxr x22, [x20] 937; SOFTFP-NOLSE-NEXT: cmp x22, x9 938; SOFTFP-NOLSE-NEXT: b.ne .LBB9_1 939; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore 940; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_3 Depth=2 941; SOFTFP-NOLSE-NEXT: stlxr w10, x8, [x20] 942; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB9_3 943; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB9_2 Depth=1 944; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1 945; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32 946; SOFTFP-NOLSE-NEXT: cbz w8, .LBB9_2 947; SOFTFP-NOLSE-NEXT: .LBB9_6: // %atomicrmw.end 948; SOFTFP-NOLSE-NEXT: mov w0, w22 949; SOFTFP-NOLSE-NEXT: mov w1, w23 950; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload 951; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload 952; SOFTFP-NOLSE-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload 953; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload 954; SOFTFP-NOLSE-NEXT: ret 955 %res = atomicrmw fadd ptr %ptr, <2 x float> %value seq_cst, align 8 956 ret <2 x float> %res 957} 958 959define <2 x double> @test_atomicrmw_fadd_v2f64_seq_cst_align8(ptr %ptr, <2 x double> %value) #0 { 960; NOLSE-LABEL: test_atomicrmw_fadd_v2f64_seq_cst_align8: 961; NOLSE: // %bb.0: 962; NOLSE-NEXT: .LBB10_1: // %atomicrmw.start 963; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 964; NOLSE-NEXT: ldaxp x8, x9, [x0] 965; NOLSE-NEXT: fmov d1, x8 966; NOLSE-NEXT: mov v1.d[1], x9 967; NOLSE-NEXT: fadd v2.2d, v1.2d, v0.2d 968; NOLSE-NEXT: mov x8, v2.d[1] 969; NOLSE-NEXT: fmov x9, d2 970; NOLSE-NEXT: stlxp w10, x9, x8, [x0] 971; NOLSE-NEXT: cbnz w10, .LBB10_1 972; NOLSE-NEXT: // %bb.2: // %atomicrmw.end 973; NOLSE-NEXT: mov v0.16b, v1.16b 974; NOLSE-NEXT: ret 975; 976; LSE-LABEL: test_atomicrmw_fadd_v2f64_seq_cst_align8: 977; LSE: // %bb.0: 978; LSE-NEXT: ldr q1, [x0] 979; LSE-NEXT: .LBB10_1: // %atomicrmw.start 980; LSE-NEXT: // =>This Inner Loop Header: Depth=1 981; LSE-NEXT: fadd v2.2d, v1.2d, v0.2d 982; LSE-NEXT: mov x3, v1.d[1] 983; LSE-NEXT: fmov x2, d1 984; LSE-NEXT: mov x7, x3 985; LSE-NEXT: mov x5, v2.d[1] 986; LSE-NEXT: mov x6, x2 987; LSE-NEXT: fmov x4, d2 988; LSE-NEXT: caspal x6, x7, x4, x5, [x0] 989; LSE-NEXT: fmov d1, x6 990; LSE-NEXT: cmp x7, x3 991; LSE-NEXT: ccmp x6, x2, #0, eq 992; LSE-NEXT: mov v1.d[1], x7 993; LSE-NEXT: b.ne .LBB10_1 994; LSE-NEXT: // %bb.2: // %atomicrmw.end 995; LSE-NEXT: mov v0.16b, v1.16b 996; LSE-NEXT: ret 997; 998; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_v2f64_seq_cst_align8: 999; SOFTFP-NOLSE: // %bb.0: 1000; SOFTFP-NOLSE-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill 1001; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill 1002; SOFTFP-NOLSE-NEXT: mov x20, x0 1003; SOFTFP-NOLSE-NEXT: mov x19, x3 1004; SOFTFP-NOLSE-NEXT: ldp x0, x1, [x0] 1005; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill 1006; SOFTFP-NOLSE-NEXT: mov x21, x2 1007; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill 1008; SOFTFP-NOLSE-NEXT: b .LBB10_2 1009; SOFTFP-NOLSE-NEXT: .LBB10_1: // %atomicrmw.start 1010; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB10_2 Depth=1 1011; SOFTFP-NOLSE-NEXT: cmp x1, x22 1012; SOFTFP-NOLSE-NEXT: ccmp x0, x23, #0, eq 1013; SOFTFP-NOLSE-NEXT: b.eq .LBB10_6 1014; SOFTFP-NOLSE-NEXT: .LBB10_2: // %atomicrmw.start 1015; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 1016; SOFTFP-NOLSE-NEXT: // Child Loop BB10_3 Depth 2 1017; SOFTFP-NOLSE-NEXT: mov x22, x1 1018; SOFTFP-NOLSE-NEXT: mov x23, x0 1019; SOFTFP-NOLSE-NEXT: mov x0, x1 1020; SOFTFP-NOLSE-NEXT: mov x1, x19 1021; SOFTFP-NOLSE-NEXT: bl __adddf3 1022; SOFTFP-NOLSE-NEXT: mov x24, x0 1023; SOFTFP-NOLSE-NEXT: mov x0, x23 1024; SOFTFP-NOLSE-NEXT: mov x1, x21 1025; SOFTFP-NOLSE-NEXT: bl __adddf3 1026; SOFTFP-NOLSE-NEXT: mov x8, x0 1027; SOFTFP-NOLSE-NEXT: .LBB10_3: // %atomicrmw.start 1028; SOFTFP-NOLSE-NEXT: // Parent Loop BB10_2 Depth=1 1029; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 1030; SOFTFP-NOLSE-NEXT: ldaxp x0, x1, [x20] 1031; SOFTFP-NOLSE-NEXT: cmp x0, x23 1032; SOFTFP-NOLSE-NEXT: cset w9, ne 1033; SOFTFP-NOLSE-NEXT: cmp x1, x22 1034; SOFTFP-NOLSE-NEXT: cinc w9, w9, ne 1035; SOFTFP-NOLSE-NEXT: cbz w9, .LBB10_5 1036; SOFTFP-NOLSE-NEXT: // %bb.4: // %atomicrmw.start 1037; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB10_3 Depth=2 1038; SOFTFP-NOLSE-NEXT: stlxp w9, x0, x1, [x20] 1039; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB10_3 1040; SOFTFP-NOLSE-NEXT: b .LBB10_1 1041; SOFTFP-NOLSE-NEXT: .LBB10_5: // %atomicrmw.start 1042; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB10_3 Depth=2 1043; SOFTFP-NOLSE-NEXT: stlxp w9, x8, x24, [x20] 1044; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB10_3 1045; SOFTFP-NOLSE-NEXT: b .LBB10_1 1046; SOFTFP-NOLSE-NEXT: .LBB10_6: // %atomicrmw.end 1047; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload 1048; SOFTFP-NOLSE-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload 1049; SOFTFP-NOLSE-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload 1050; SOFTFP-NOLSE-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload 1051; SOFTFP-NOLSE-NEXT: ret 1052 %res = atomicrmw fadd ptr %ptr, <2 x double> %value seq_cst, align 16 1053 ret <2 x double> %res 1054} 1055 1056attributes #0 = { nounwind } 1057