1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone | FileCheck %s -check-prefixes=NOOUTLINE 3; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -mattr=+outline-atomics | FileCheck %s -check-prefixes=OUTLINE 4; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -mattr=+lse | FileCheck %s -check-prefixes=LSE 5 6@var = global i128 0 7 8define i128 @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) { 9; NOOUTLINE-LABEL: val_compare_and_swap: 10; NOOUTLINE: // %bb.0: 11; NOOUTLINE-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1 12; NOOUTLINE-NEXT: ldaxp x8, x1, [x0] 13; NOOUTLINE-NEXT: cmp x8, x2 14; NOOUTLINE-NEXT: cset w9, ne 15; NOOUTLINE-NEXT: cmp x1, x3 16; NOOUTLINE-NEXT: cinc w9, w9, ne 17; NOOUTLINE-NEXT: cbz w9, .LBB0_3 18; NOOUTLINE-NEXT: // %bb.2: // in Loop: Header=BB0_1 Depth=1 19; NOOUTLINE-NEXT: stxp w9, x8, x1, [x0] 20; NOOUTLINE-NEXT: cbnz w9, .LBB0_1 21; NOOUTLINE-NEXT: b .LBB0_4 22; NOOUTLINE-NEXT: .LBB0_3: // in Loop: Header=BB0_1 Depth=1 23; NOOUTLINE-NEXT: stxp w9, x4, x5, [x0] 24; NOOUTLINE-NEXT: cbnz w9, .LBB0_1 25; NOOUTLINE-NEXT: .LBB0_4: 26; NOOUTLINE-NEXT: mov x0, x8 27; NOOUTLINE-NEXT: ret 28; 29; OUTLINE-LABEL: val_compare_and_swap: 30; OUTLINE: // %bb.0: 31; OUTLINE-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 32; OUTLINE-NEXT: .cfi_def_cfa_offset 16 33; OUTLINE-NEXT: .cfi_offset w30, -16 34; OUTLINE-NEXT: mov x1, x3 35; OUTLINE-NEXT: mov x8, x0 36; OUTLINE-NEXT: mov x0, x2 37; OUTLINE-NEXT: mov x2, x4 38; OUTLINE-NEXT: mov x3, x5 39; OUTLINE-NEXT: mov x4, x8 40; OUTLINE-NEXT: bl __aarch64_cas16_acq 41; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 42; OUTLINE-NEXT: ret 43; 44; LSE-LABEL: val_compare_and_swap: 45; LSE: // %bb.0: 46; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 47; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 48; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 49; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 50; LSE-NEXT: caspa x2, x3, x4, x5, [x0] 51; LSE-NEXT: mov x0, x2 52; LSE-NEXT: mov x1, x3 53; LSE-NEXT: ret 54 %pair = cmpxchg ptr %p, i128 %oldval, i128 %newval acquire acquire 55 %val = extractvalue { i128, i1 } %pair, 0 56 ret i128 %val 57} 58 59define i128 @val_compare_and_swap_seqcst(ptr %p, i128 %oldval, i128 %newval) { 60; NOOUTLINE-LABEL: val_compare_and_swap_seqcst: 61; NOOUTLINE: // %bb.0: 62; NOOUTLINE-NEXT: .LBB1_1: // =>This Inner Loop Header: Depth=1 63; NOOUTLINE-NEXT: ldaxp x8, x1, [x0] 64; NOOUTLINE-NEXT: cmp x8, x2 65; NOOUTLINE-NEXT: cset w9, ne 66; NOOUTLINE-NEXT: cmp x1, x3 67; NOOUTLINE-NEXT: cinc w9, w9, ne 68; NOOUTLINE-NEXT: cbz w9, .LBB1_3 69; NOOUTLINE-NEXT: // %bb.2: // in Loop: Header=BB1_1 Depth=1 70; NOOUTLINE-NEXT: stlxp w9, x8, x1, [x0] 71; NOOUTLINE-NEXT: cbnz w9, .LBB1_1 72; NOOUTLINE-NEXT: b .LBB1_4 73; NOOUTLINE-NEXT: .LBB1_3: // in Loop: Header=BB1_1 Depth=1 74; NOOUTLINE-NEXT: stlxp w9, x4, x5, [x0] 75; NOOUTLINE-NEXT: cbnz w9, .LBB1_1 76; NOOUTLINE-NEXT: .LBB1_4: 77; NOOUTLINE-NEXT: mov x0, x8 78; NOOUTLINE-NEXT: ret 79; 80; OUTLINE-LABEL: val_compare_and_swap_seqcst: 81; OUTLINE: // %bb.0: 82; OUTLINE-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 83; OUTLINE-NEXT: .cfi_def_cfa_offset 16 84; OUTLINE-NEXT: .cfi_offset w30, -16 85; OUTLINE-NEXT: mov x1, x3 86; OUTLINE-NEXT: mov x8, x0 87; OUTLINE-NEXT: mov x0, x2 88; OUTLINE-NEXT: mov x2, x4 89; OUTLINE-NEXT: mov x3, x5 90; OUTLINE-NEXT: mov x4, x8 91; OUTLINE-NEXT: bl __aarch64_cas16_acq_rel 92; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 93; OUTLINE-NEXT: ret 94; 95; LSE-LABEL: val_compare_and_swap_seqcst: 96; LSE: // %bb.0: 97; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 98; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 99; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 100; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 101; LSE-NEXT: caspal x2, x3, x4, x5, [x0] 102; LSE-NEXT: mov x0, x2 103; LSE-NEXT: mov x1, x3 104; LSE-NEXT: ret 105 %pair = cmpxchg ptr %p, i128 %oldval, i128 %newval seq_cst seq_cst 106 %val = extractvalue { i128, i1 } %pair, 0 107 ret i128 %val 108} 109 110define i128 @val_compare_and_swap_release(ptr %p, i128 %oldval, i128 %newval) { 111; NOOUTLINE-LABEL: val_compare_and_swap_release: 112; NOOUTLINE: // %bb.0: 113; NOOUTLINE-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1 114; NOOUTLINE-NEXT: ldxp x8, x1, [x0] 115; NOOUTLINE-NEXT: cmp x8, x2 116; NOOUTLINE-NEXT: cset w9, ne 117; NOOUTLINE-NEXT: cmp x1, x3 118; NOOUTLINE-NEXT: cinc w9, w9, ne 119; NOOUTLINE-NEXT: cbz w9, .LBB2_3 120; NOOUTLINE-NEXT: // %bb.2: // in Loop: Header=BB2_1 Depth=1 121; NOOUTLINE-NEXT: stlxp w9, x8, x1, [x0] 122; NOOUTLINE-NEXT: cbnz w9, .LBB2_1 123; NOOUTLINE-NEXT: b .LBB2_4 124; NOOUTLINE-NEXT: .LBB2_3: // in Loop: Header=BB2_1 Depth=1 125; NOOUTLINE-NEXT: stlxp w9, x4, x5, [x0] 126; NOOUTLINE-NEXT: cbnz w9, .LBB2_1 127; NOOUTLINE-NEXT: .LBB2_4: 128; NOOUTLINE-NEXT: mov x0, x8 129; NOOUTLINE-NEXT: ret 130; 131; OUTLINE-LABEL: val_compare_and_swap_release: 132; OUTLINE: // %bb.0: 133; OUTLINE-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 134; OUTLINE-NEXT: .cfi_def_cfa_offset 16 135; OUTLINE-NEXT: .cfi_offset w30, -16 136; OUTLINE-NEXT: mov x1, x3 137; OUTLINE-NEXT: mov x8, x0 138; OUTLINE-NEXT: mov x0, x2 139; OUTLINE-NEXT: mov x2, x4 140; OUTLINE-NEXT: mov x3, x5 141; OUTLINE-NEXT: mov x4, x8 142; OUTLINE-NEXT: bl __aarch64_cas16_rel 143; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 144; OUTLINE-NEXT: ret 145; 146; LSE-LABEL: val_compare_and_swap_release: 147; LSE: // %bb.0: 148; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 149; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 150; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 151; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 152; LSE-NEXT: caspl x2, x3, x4, x5, [x0] 153; LSE-NEXT: mov x0, x2 154; LSE-NEXT: mov x1, x3 155; LSE-NEXT: ret 156 %pair = cmpxchg ptr %p, i128 %oldval, i128 %newval release monotonic 157 %val = extractvalue { i128, i1 } %pair, 0 158 ret i128 %val 159} 160 161define i128 @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval) { 162; NOOUTLINE-LABEL: val_compare_and_swap_monotonic: 163; NOOUTLINE: // %bb.0: 164; NOOUTLINE-NEXT: .LBB3_1: // =>This Inner Loop Header: Depth=1 165; NOOUTLINE-NEXT: ldxp x8, x1, [x0] 166; NOOUTLINE-NEXT: cmp x8, x2 167; NOOUTLINE-NEXT: cset w9, ne 168; NOOUTLINE-NEXT: cmp x1, x3 169; NOOUTLINE-NEXT: cinc w9, w9, ne 170; NOOUTLINE-NEXT: cbz w9, .LBB3_3 171; NOOUTLINE-NEXT: // %bb.2: // in Loop: Header=BB3_1 Depth=1 172; NOOUTLINE-NEXT: stxp w9, x8, x1, [x0] 173; NOOUTLINE-NEXT: cbnz w9, .LBB3_1 174; NOOUTLINE-NEXT: b .LBB3_4 175; NOOUTLINE-NEXT: .LBB3_3: // in Loop: Header=BB3_1 Depth=1 176; NOOUTLINE-NEXT: stxp w9, x4, x5, [x0] 177; NOOUTLINE-NEXT: cbnz w9, .LBB3_1 178; NOOUTLINE-NEXT: .LBB3_4: 179; NOOUTLINE-NEXT: mov x0, x8 180; NOOUTLINE-NEXT: ret 181; 182; OUTLINE-LABEL: val_compare_and_swap_monotonic: 183; OUTLINE: // %bb.0: 184; OUTLINE-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 185; OUTLINE-NEXT: .cfi_def_cfa_offset 16 186; OUTLINE-NEXT: .cfi_offset w30, -16 187; OUTLINE-NEXT: mov x1, x3 188; OUTLINE-NEXT: mov x8, x0 189; OUTLINE-NEXT: mov x0, x2 190; OUTLINE-NEXT: mov x2, x4 191; OUTLINE-NEXT: mov x3, x5 192; OUTLINE-NEXT: mov x4, x8 193; OUTLINE-NEXT: bl __aarch64_cas16_relax 194; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 195; OUTLINE-NEXT: ret 196; 197; LSE-LABEL: val_compare_and_swap_monotonic: 198; LSE: // %bb.0: 199; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 200; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 201; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 202; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 203; LSE-NEXT: casp x2, x3, x4, x5, [x0] 204; LSE-NEXT: mov x0, x2 205; LSE-NEXT: mov x1, x3 206; LSE-NEXT: ret 207 %pair = cmpxchg ptr %p, i128 %oldval, i128 %newval monotonic monotonic 208 %val = extractvalue { i128, i1 } %pair, 0 209 ret i128 %val 210} 211 212define void @fetch_and_nand(ptr %p, i128 %bits) { 213; NOOUTLINE-LABEL: fetch_and_nand: 214; NOOUTLINE: // %bb.0: 215; NOOUTLINE-NEXT: .LBB4_1: // %atomicrmw.start 216; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 217; NOOUTLINE-NEXT: ldxp x9, x8, [x0] 218; NOOUTLINE-NEXT: and x10, x9, x2 219; NOOUTLINE-NEXT: and x11, x8, x3 220; NOOUTLINE-NEXT: mvn x11, x11 221; NOOUTLINE-NEXT: mvn x10, x10 222; NOOUTLINE-NEXT: stlxp w12, x10, x11, [x0] 223; NOOUTLINE-NEXT: cbnz w12, .LBB4_1 224; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end 225; NOOUTLINE-NEXT: adrp x10, :got:var 226; NOOUTLINE-NEXT: ldr x10, [x10, :got_lo12:var] 227; NOOUTLINE-NEXT: stp x9, x8, [x10] 228; NOOUTLINE-NEXT: ret 229; 230; OUTLINE-LABEL: fetch_and_nand: 231; OUTLINE: // %bb.0: 232; OUTLINE-NEXT: .LBB4_1: // %atomicrmw.start 233; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 234; OUTLINE-NEXT: ldxp x9, x8, [x0] 235; OUTLINE-NEXT: and x10, x9, x2 236; OUTLINE-NEXT: and x11, x8, x3 237; OUTLINE-NEXT: mvn x11, x11 238; OUTLINE-NEXT: mvn x10, x10 239; OUTLINE-NEXT: stlxp w12, x10, x11, [x0] 240; OUTLINE-NEXT: cbnz w12, .LBB4_1 241; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end 242; OUTLINE-NEXT: adrp x10, :got:var 243; OUTLINE-NEXT: ldr x10, [x10, :got_lo12:var] 244; OUTLINE-NEXT: stp x9, x8, [x10] 245; OUTLINE-NEXT: ret 246; 247; LSE-LABEL: fetch_and_nand: 248; LSE: // %bb.0: 249; LSE-NEXT: ldp x4, x5, [x0] 250; LSE-NEXT: .LBB4_1: // %atomicrmw.start 251; LSE-NEXT: // =>This Inner Loop Header: Depth=1 252; LSE-NEXT: mov x7, x5 253; LSE-NEXT: mov x6, x4 254; LSE-NEXT: and x8, x7, x3 255; LSE-NEXT: and x9, x4, x2 256; LSE-NEXT: mvn x10, x9 257; LSE-NEXT: mvn x11, x8 258; LSE-NEXT: mov x4, x6 259; LSE-NEXT: mov x5, x7 260; LSE-NEXT: caspl x4, x5, x10, x11, [x0] 261; LSE-NEXT: cmp x5, x7 262; LSE-NEXT: ccmp x4, x6, #0, eq 263; LSE-NEXT: b.ne .LBB4_1 264; LSE-NEXT: // %bb.2: // %atomicrmw.end 265; LSE-NEXT: adrp x8, :got:var 266; LSE-NEXT: ldr x8, [x8, :got_lo12:var] 267; LSE-NEXT: stp x4, x5, [x8] 268; LSE-NEXT: ret 269 270 %val = atomicrmw nand ptr %p, i128 %bits release 271 store i128 %val, ptr @var, align 16 272 ret void 273} 274 275define void @fetch_and_or(ptr %p, i128 %bits) { 276; NOOUTLINE-LABEL: fetch_and_or: 277; NOOUTLINE: // %bb.0: 278; NOOUTLINE-NEXT: .LBB5_1: // %atomicrmw.start 279; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 280; NOOUTLINE-NEXT: ldaxp x9, x8, [x0] 281; NOOUTLINE-NEXT: orr x10, x8, x3 282; NOOUTLINE-NEXT: orr x11, x9, x2 283; NOOUTLINE-NEXT: stlxp w12, x11, x10, [x0] 284; NOOUTLINE-NEXT: cbnz w12, .LBB5_1 285; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end 286; NOOUTLINE-NEXT: adrp x10, :got:var 287; NOOUTLINE-NEXT: ldr x10, [x10, :got_lo12:var] 288; NOOUTLINE-NEXT: stp x9, x8, [x10] 289; NOOUTLINE-NEXT: ret 290; 291; OUTLINE-LABEL: fetch_and_or: 292; OUTLINE: // %bb.0: 293; OUTLINE-NEXT: .LBB5_1: // %atomicrmw.start 294; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 295; OUTLINE-NEXT: ldaxp x9, x8, [x0] 296; OUTLINE-NEXT: orr x10, x8, x3 297; OUTLINE-NEXT: orr x11, x9, x2 298; OUTLINE-NEXT: stlxp w12, x11, x10, [x0] 299; OUTLINE-NEXT: cbnz w12, .LBB5_1 300; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end 301; OUTLINE-NEXT: adrp x10, :got:var 302; OUTLINE-NEXT: ldr x10, [x10, :got_lo12:var] 303; OUTLINE-NEXT: stp x9, x8, [x10] 304; OUTLINE-NEXT: ret 305; 306; LSE-LABEL: fetch_and_or: 307; LSE: // %bb.0: 308; LSE-NEXT: ldp x4, x5, [x0] 309; LSE-NEXT: .LBB5_1: // %atomicrmw.start 310; LSE-NEXT: // =>This Inner Loop Header: Depth=1 311; LSE-NEXT: mov x7, x5 312; LSE-NEXT: mov x6, x4 313; LSE-NEXT: orr x8, x4, x2 314; LSE-NEXT: orr x9, x7, x3 315; LSE-NEXT: mov x4, x6 316; LSE-NEXT: mov x5, x7 317; LSE-NEXT: caspal x4, x5, x8, x9, [x0] 318; LSE-NEXT: cmp x5, x7 319; LSE-NEXT: ccmp x4, x6, #0, eq 320; LSE-NEXT: b.ne .LBB5_1 321; LSE-NEXT: // %bb.2: // %atomicrmw.end 322; LSE-NEXT: adrp x8, :got:var 323; LSE-NEXT: ldr x8, [x8, :got_lo12:var] 324; LSE-NEXT: stp x4, x5, [x8] 325; LSE-NEXT: ret 326 327 %val = atomicrmw or ptr %p, i128 %bits seq_cst 328 store i128 %val, ptr @var, align 16 329 ret void 330} 331 332define void @fetch_and_add(ptr %p, i128 %bits) { 333; NOOUTLINE-LABEL: fetch_and_add: 334; NOOUTLINE: // %bb.0: 335; NOOUTLINE-NEXT: .LBB6_1: // %atomicrmw.start 336; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 337; NOOUTLINE-NEXT: ldaxp x9, x8, [x0] 338; NOOUTLINE-NEXT: adds x10, x9, x2 339; NOOUTLINE-NEXT: adc x11, x8, x3 340; NOOUTLINE-NEXT: stlxp w12, x10, x11, [x0] 341; NOOUTLINE-NEXT: cbnz w12, .LBB6_1 342; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end 343; NOOUTLINE-NEXT: adrp x10, :got:var 344; NOOUTLINE-NEXT: ldr x10, [x10, :got_lo12:var] 345; NOOUTLINE-NEXT: stp x9, x8, [x10] 346; NOOUTLINE-NEXT: ret 347; 348; OUTLINE-LABEL: fetch_and_add: 349; OUTLINE: // %bb.0: 350; OUTLINE-NEXT: .LBB6_1: // %atomicrmw.start 351; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 352; OUTLINE-NEXT: ldaxp x9, x8, [x0] 353; OUTLINE-NEXT: adds x10, x9, x2 354; OUTLINE-NEXT: adc x11, x8, x3 355; OUTLINE-NEXT: stlxp w12, x10, x11, [x0] 356; OUTLINE-NEXT: cbnz w12, .LBB6_1 357; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end 358; OUTLINE-NEXT: adrp x10, :got:var 359; OUTLINE-NEXT: ldr x10, [x10, :got_lo12:var] 360; OUTLINE-NEXT: stp x9, x8, [x10] 361; OUTLINE-NEXT: ret 362; 363; LSE-LABEL: fetch_and_add: 364; LSE: // %bb.0: 365; LSE-NEXT: ldp x4, x5, [x0] 366; LSE-NEXT: .LBB6_1: // %atomicrmw.start 367; LSE-NEXT: // =>This Inner Loop Header: Depth=1 368; LSE-NEXT: mov x7, x5 369; LSE-NEXT: mov x6, x4 370; LSE-NEXT: adds x8, x4, x2 371; LSE-NEXT: adc x9, x7, x3 372; LSE-NEXT: mov x4, x6 373; LSE-NEXT: mov x5, x7 374; LSE-NEXT: caspal x4, x5, x8, x9, [x0] 375; LSE-NEXT: cmp x5, x7 376; LSE-NEXT: ccmp x4, x6, #0, eq 377; LSE-NEXT: b.ne .LBB6_1 378; LSE-NEXT: // %bb.2: // %atomicrmw.end 379; LSE-NEXT: adrp x8, :got:var 380; LSE-NEXT: ldr x8, [x8, :got_lo12:var] 381; LSE-NEXT: stp x4, x5, [x8] 382; LSE-NEXT: ret 383 %val = atomicrmw add ptr %p, i128 %bits seq_cst 384 store i128 %val, ptr @var, align 16 385 ret void 386} 387 388define void @fetch_and_sub(ptr %p, i128 %bits) { 389; NOOUTLINE-LABEL: fetch_and_sub: 390; NOOUTLINE: // %bb.0: 391; NOOUTLINE-NEXT: .LBB7_1: // %atomicrmw.start 392; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 393; NOOUTLINE-NEXT: ldaxp x9, x8, [x0] 394; NOOUTLINE-NEXT: subs x10, x9, x2 395; NOOUTLINE-NEXT: sbc x11, x8, x3 396; NOOUTLINE-NEXT: stlxp w12, x10, x11, [x0] 397; NOOUTLINE-NEXT: cbnz w12, .LBB7_1 398; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end 399; NOOUTLINE-NEXT: adrp x10, :got:var 400; NOOUTLINE-NEXT: ldr x10, [x10, :got_lo12:var] 401; NOOUTLINE-NEXT: stp x9, x8, [x10] 402; NOOUTLINE-NEXT: ret 403; 404; OUTLINE-LABEL: fetch_and_sub: 405; OUTLINE: // %bb.0: 406; OUTLINE-NEXT: .LBB7_1: // %atomicrmw.start 407; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 408; OUTLINE-NEXT: ldaxp x9, x8, [x0] 409; OUTLINE-NEXT: subs x10, x9, x2 410; OUTLINE-NEXT: sbc x11, x8, x3 411; OUTLINE-NEXT: stlxp w12, x10, x11, [x0] 412; OUTLINE-NEXT: cbnz w12, .LBB7_1 413; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end 414; OUTLINE-NEXT: adrp x10, :got:var 415; OUTLINE-NEXT: ldr x10, [x10, :got_lo12:var] 416; OUTLINE-NEXT: stp x9, x8, [x10] 417; OUTLINE-NEXT: ret 418; 419; LSE-LABEL: fetch_and_sub: 420; LSE: // %bb.0: 421; LSE-NEXT: ldp x4, x5, [x0] 422; LSE-NEXT: .LBB7_1: // %atomicrmw.start 423; LSE-NEXT: // =>This Inner Loop Header: Depth=1 424; LSE-NEXT: mov x7, x5 425; LSE-NEXT: mov x6, x4 426; LSE-NEXT: subs x8, x4, x2 427; LSE-NEXT: sbc x9, x7, x3 428; LSE-NEXT: mov x4, x6 429; LSE-NEXT: mov x5, x7 430; LSE-NEXT: caspal x4, x5, x8, x9, [x0] 431; LSE-NEXT: cmp x5, x7 432; LSE-NEXT: ccmp x4, x6, #0, eq 433; LSE-NEXT: b.ne .LBB7_1 434; LSE-NEXT: // %bb.2: // %atomicrmw.end 435; LSE-NEXT: adrp x8, :got:var 436; LSE-NEXT: ldr x8, [x8, :got_lo12:var] 437; LSE-NEXT: stp x4, x5, [x8] 438; LSE-NEXT: ret 439 %val = atomicrmw sub ptr %p, i128 %bits seq_cst 440 store i128 %val, ptr @var, align 16 441 ret void 442} 443 444define void @fetch_and_min(ptr %p, i128 %bits) { 445; NOOUTLINE-LABEL: fetch_and_min: 446; NOOUTLINE: // %bb.0: 447; NOOUTLINE-NEXT: .LBB8_1: // %atomicrmw.start 448; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 449; NOOUTLINE-NEXT: ldaxp x9, x8, [x0] 450; NOOUTLINE-NEXT: cmp x2, x9 451; NOOUTLINE-NEXT: sbcs xzr, x3, x8 452; NOOUTLINE-NEXT: csel x10, x8, x3, ge 453; NOOUTLINE-NEXT: csel x11, x9, x2, ge 454; NOOUTLINE-NEXT: stlxp w12, x11, x10, [x0] 455; NOOUTLINE-NEXT: cbnz w12, .LBB8_1 456; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end 457; NOOUTLINE-NEXT: adrp x10, :got:var 458; NOOUTLINE-NEXT: ldr x10, [x10, :got_lo12:var] 459; NOOUTLINE-NEXT: stp x9, x8, [x10] 460; NOOUTLINE-NEXT: ret 461; 462; OUTLINE-LABEL: fetch_and_min: 463; OUTLINE: // %bb.0: 464; OUTLINE-NEXT: .LBB8_1: // %atomicrmw.start 465; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 466; OUTLINE-NEXT: ldaxp x9, x8, [x0] 467; OUTLINE-NEXT: cmp x2, x9 468; OUTLINE-NEXT: sbcs xzr, x3, x8 469; OUTLINE-NEXT: csel x10, x8, x3, ge 470; OUTLINE-NEXT: csel x11, x9, x2, ge 471; OUTLINE-NEXT: stlxp w12, x11, x10, [x0] 472; OUTLINE-NEXT: cbnz w12, .LBB8_1 473; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end 474; OUTLINE-NEXT: adrp x10, :got:var 475; OUTLINE-NEXT: ldr x10, [x10, :got_lo12:var] 476; OUTLINE-NEXT: stp x9, x8, [x10] 477; OUTLINE-NEXT: ret 478; 479; LSE-LABEL: fetch_and_min: 480; LSE: // %bb.0: 481; LSE-NEXT: ldp x4, x5, [x0] 482; LSE-NEXT: .LBB8_1: // %atomicrmw.start 483; LSE-NEXT: // =>This Inner Loop Header: Depth=1 484; LSE-NEXT: mov x7, x5 485; LSE-NEXT: mov x6, x4 486; LSE-NEXT: cmp x2, x4 487; LSE-NEXT: sbcs xzr, x3, x7 488; LSE-NEXT: csel x9, x7, x3, ge 489; LSE-NEXT: csel x8, x4, x2, ge 490; LSE-NEXT: mov x4, x6 491; LSE-NEXT: mov x5, x7 492; LSE-NEXT: caspal x4, x5, x8, x9, [x0] 493; LSE-NEXT: cmp x5, x7 494; LSE-NEXT: ccmp x4, x6, #0, eq 495; LSE-NEXT: b.ne .LBB8_1 496; LSE-NEXT: // %bb.2: // %atomicrmw.end 497; LSE-NEXT: adrp x8, :got:var 498; LSE-NEXT: ldr x8, [x8, :got_lo12:var] 499; LSE-NEXT: stp x4, x5, [x8] 500; LSE-NEXT: ret 501 %val = atomicrmw min ptr %p, i128 %bits seq_cst 502 store i128 %val, ptr @var, align 16 503 ret void 504} 505 506define void @fetch_and_max(ptr %p, i128 %bits) { 507; NOOUTLINE-LABEL: fetch_and_max: 508; NOOUTLINE: // %bb.0: 509; NOOUTLINE-NEXT: .LBB9_1: // %atomicrmw.start 510; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 511; NOOUTLINE-NEXT: ldaxp x9, x8, [x0] 512; NOOUTLINE-NEXT: cmp x2, x9 513; NOOUTLINE-NEXT: sbcs xzr, x3, x8 514; NOOUTLINE-NEXT: csel x10, x8, x3, lt 515; NOOUTLINE-NEXT: csel x11, x9, x2, lt 516; NOOUTLINE-NEXT: stlxp w12, x11, x10, [x0] 517; NOOUTLINE-NEXT: cbnz w12, .LBB9_1 518; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end 519; NOOUTLINE-NEXT: adrp x10, :got:var 520; NOOUTLINE-NEXT: ldr x10, [x10, :got_lo12:var] 521; NOOUTLINE-NEXT: stp x9, x8, [x10] 522; NOOUTLINE-NEXT: ret 523; 524; OUTLINE-LABEL: fetch_and_max: 525; OUTLINE: // %bb.0: 526; OUTLINE-NEXT: .LBB9_1: // %atomicrmw.start 527; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 528; OUTLINE-NEXT: ldaxp x9, x8, [x0] 529; OUTLINE-NEXT: cmp x2, x9 530; OUTLINE-NEXT: sbcs xzr, x3, x8 531; OUTLINE-NEXT: csel x10, x8, x3, lt 532; OUTLINE-NEXT: csel x11, x9, x2, lt 533; OUTLINE-NEXT: stlxp w12, x11, x10, [x0] 534; OUTLINE-NEXT: cbnz w12, .LBB9_1 535; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end 536; OUTLINE-NEXT: adrp x10, :got:var 537; OUTLINE-NEXT: ldr x10, [x10, :got_lo12:var] 538; OUTLINE-NEXT: stp x9, x8, [x10] 539; OUTLINE-NEXT: ret 540; 541; LSE-LABEL: fetch_and_max: 542; LSE: // %bb.0: 543; LSE-NEXT: ldp x4, x5, [x0] 544; LSE-NEXT: .LBB9_1: // %atomicrmw.start 545; LSE-NEXT: // =>This Inner Loop Header: Depth=1 546; LSE-NEXT: mov x7, x5 547; LSE-NEXT: mov x6, x4 548; LSE-NEXT: cmp x2, x4 549; LSE-NEXT: sbcs xzr, x3, x7 550; LSE-NEXT: csel x9, x7, x3, lt 551; LSE-NEXT: csel x8, x4, x2, lt 552; LSE-NEXT: mov x4, x6 553; LSE-NEXT: mov x5, x7 554; LSE-NEXT: caspal x4, x5, x8, x9, [x0] 555; LSE-NEXT: cmp x5, x7 556; LSE-NEXT: ccmp x4, x6, #0, eq 557; LSE-NEXT: b.ne .LBB9_1 558; LSE-NEXT: // %bb.2: // %atomicrmw.end 559; LSE-NEXT: adrp x8, :got:var 560; LSE-NEXT: ldr x8, [x8, :got_lo12:var] 561; LSE-NEXT: stp x4, x5, [x8] 562; LSE-NEXT: ret 563 %val = atomicrmw max ptr %p, i128 %bits seq_cst 564 store i128 %val, ptr @var, align 16 565 ret void 566} 567 568define void @fetch_and_umin(ptr %p, i128 %bits) { 569; NOOUTLINE-LABEL: fetch_and_umin: 570; NOOUTLINE: // %bb.0: 571; NOOUTLINE-NEXT: .LBB10_1: // %atomicrmw.start 572; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 573; NOOUTLINE-NEXT: ldaxp x9, x8, [x0] 574; NOOUTLINE-NEXT: cmp x2, x9 575; NOOUTLINE-NEXT: sbcs xzr, x3, x8 576; NOOUTLINE-NEXT: csel x10, x8, x3, hs 577; NOOUTLINE-NEXT: csel x11, x9, x2, hs 578; NOOUTLINE-NEXT: stlxp w12, x11, x10, [x0] 579; NOOUTLINE-NEXT: cbnz w12, .LBB10_1 580; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end 581; NOOUTLINE-NEXT: adrp x10, :got:var 582; NOOUTLINE-NEXT: ldr x10, [x10, :got_lo12:var] 583; NOOUTLINE-NEXT: stp x9, x8, [x10] 584; NOOUTLINE-NEXT: ret 585; 586; OUTLINE-LABEL: fetch_and_umin: 587; OUTLINE: // %bb.0: 588; OUTLINE-NEXT: .LBB10_1: // %atomicrmw.start 589; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 590; OUTLINE-NEXT: ldaxp x9, x8, [x0] 591; OUTLINE-NEXT: cmp x2, x9 592; OUTLINE-NEXT: sbcs xzr, x3, x8 593; OUTLINE-NEXT: csel x10, x8, x3, hs 594; OUTLINE-NEXT: csel x11, x9, x2, hs 595; OUTLINE-NEXT: stlxp w12, x11, x10, [x0] 596; OUTLINE-NEXT: cbnz w12, .LBB10_1 597; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end 598; OUTLINE-NEXT: adrp x10, :got:var 599; OUTLINE-NEXT: ldr x10, [x10, :got_lo12:var] 600; OUTLINE-NEXT: stp x9, x8, [x10] 601; OUTLINE-NEXT: ret 602; 603; LSE-LABEL: fetch_and_umin: 604; LSE: // %bb.0: 605; LSE-NEXT: ldp x4, x5, [x0] 606; LSE-NEXT: .LBB10_1: // %atomicrmw.start 607; LSE-NEXT: // =>This Inner Loop Header: Depth=1 608; LSE-NEXT: mov x7, x5 609; LSE-NEXT: mov x6, x4 610; LSE-NEXT: cmp x2, x4 611; LSE-NEXT: sbcs xzr, x3, x7 612; LSE-NEXT: csel x9, x7, x3, hs 613; LSE-NEXT: csel x8, x4, x2, hs 614; LSE-NEXT: mov x4, x6 615; LSE-NEXT: mov x5, x7 616; LSE-NEXT: caspal x4, x5, x8, x9, [x0] 617; LSE-NEXT: cmp x5, x7 618; LSE-NEXT: ccmp x4, x6, #0, eq 619; LSE-NEXT: b.ne .LBB10_1 620; LSE-NEXT: // %bb.2: // %atomicrmw.end 621; LSE-NEXT: adrp x8, :got:var 622; LSE-NEXT: ldr x8, [x8, :got_lo12:var] 623; LSE-NEXT: stp x4, x5, [x8] 624; LSE-NEXT: ret 625 %val = atomicrmw umin ptr %p, i128 %bits seq_cst 626 store i128 %val, ptr @var, align 16 627 ret void 628} 629 630define void @fetch_and_umax(ptr %p, i128 %bits) { 631; NOOUTLINE-LABEL: fetch_and_umax: 632; NOOUTLINE: // %bb.0: 633; NOOUTLINE-NEXT: .LBB11_1: // %atomicrmw.start 634; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 635; NOOUTLINE-NEXT: ldaxp x9, x8, [x0] 636; NOOUTLINE-NEXT: cmp x2, x9 637; NOOUTLINE-NEXT: sbcs xzr, x3, x8 638; NOOUTLINE-NEXT: csel x10, x8, x3, lo 639; NOOUTLINE-NEXT: csel x11, x9, x2, lo 640; NOOUTLINE-NEXT: stlxp w12, x11, x10, [x0] 641; NOOUTLINE-NEXT: cbnz w12, .LBB11_1 642; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end 643; NOOUTLINE-NEXT: adrp x10, :got:var 644; NOOUTLINE-NEXT: ldr x10, [x10, :got_lo12:var] 645; NOOUTLINE-NEXT: stp x9, x8, [x10] 646; NOOUTLINE-NEXT: ret 647; 648; OUTLINE-LABEL: fetch_and_umax: 649; OUTLINE: // %bb.0: 650; OUTLINE-NEXT: .LBB11_1: // %atomicrmw.start 651; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 652; OUTLINE-NEXT: ldaxp x9, x8, [x0] 653; OUTLINE-NEXT: cmp x2, x9 654; OUTLINE-NEXT: sbcs xzr, x3, x8 655; OUTLINE-NEXT: csel x10, x8, x3, lo 656; OUTLINE-NEXT: csel x11, x9, x2, lo 657; OUTLINE-NEXT: stlxp w12, x11, x10, [x0] 658; OUTLINE-NEXT: cbnz w12, .LBB11_1 659; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end 660; OUTLINE-NEXT: adrp x10, :got:var 661; OUTLINE-NEXT: ldr x10, [x10, :got_lo12:var] 662; OUTLINE-NEXT: stp x9, x8, [x10] 663; OUTLINE-NEXT: ret 664; 665; LSE-LABEL: fetch_and_umax: 666; LSE: // %bb.0: 667; LSE-NEXT: ldp x4, x5, [x0] 668; LSE-NEXT: .LBB11_1: // %atomicrmw.start 669; LSE-NEXT: // =>This Inner Loop Header: Depth=1 670; LSE-NEXT: mov x7, x5 671; LSE-NEXT: mov x6, x4 672; LSE-NEXT: cmp x2, x4 673; LSE-NEXT: sbcs xzr, x3, x7 674; LSE-NEXT: csel x9, x7, x3, lo 675; LSE-NEXT: csel x8, x4, x2, lo 676; LSE-NEXT: mov x4, x6 677; LSE-NEXT: mov x5, x7 678; LSE-NEXT: caspal x4, x5, x8, x9, [x0] 679; LSE-NEXT: cmp x5, x7 680; LSE-NEXT: ccmp x4, x6, #0, eq 681; LSE-NEXT: b.ne .LBB11_1 682; LSE-NEXT: // %bb.2: // %atomicrmw.end 683; LSE-NEXT: adrp x8, :got:var 684; LSE-NEXT: ldr x8, [x8, :got_lo12:var] 685; LSE-NEXT: stp x4, x5, [x8] 686; LSE-NEXT: ret 687 %val = atomicrmw umax ptr %p, i128 %bits seq_cst 688 store i128 %val, ptr @var, align 16 689 ret void 690} 691 692define i128 @atomic_load_seq_cst(ptr %p) { 693; NOOUTLINE-LABEL: atomic_load_seq_cst: 694; NOOUTLINE: // %bb.0: 695; NOOUTLINE-NEXT: mov x8, x0 696; NOOUTLINE-NEXT: .LBB12_1: // %atomicrmw.start 697; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 698; NOOUTLINE-NEXT: ldaxp x0, x1, [x8] 699; NOOUTLINE-NEXT: stlxp w9, x0, x1, [x8] 700; NOOUTLINE-NEXT: cbnz w9, .LBB12_1 701; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end 702; NOOUTLINE-NEXT: ret 703; 704; OUTLINE-LABEL: atomic_load_seq_cst: 705; OUTLINE: // %bb.0: 706; OUTLINE-NEXT: mov x8, x0 707; OUTLINE-NEXT: .LBB12_1: // %atomicrmw.start 708; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 709; OUTLINE-NEXT: ldaxp x0, x1, [x8] 710; OUTLINE-NEXT: stlxp w9, x0, x1, [x8] 711; OUTLINE-NEXT: cbnz w9, .LBB12_1 712; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end 713; OUTLINE-NEXT: ret 714; 715; LSE-LABEL: atomic_load_seq_cst: 716; LSE: // %bb.0: 717; LSE-NEXT: mov x2, #0 718; LSE-NEXT: mov x3, #0 719; LSE-NEXT: caspal x2, x3, x2, x3, [x0] 720; LSE-NEXT: mov x0, x2 721; LSE-NEXT: mov x1, x3 722; LSE-NEXT: ret 723 %r = load atomic i128, ptr %p seq_cst, align 16 724 ret i128 %r 725} 726 727define i128 @atomic_load_relaxed(i64, i64, ptr %p) { 728; NOOUTLINE-LABEL: atomic_load_relaxed: 729; NOOUTLINE: // %bb.0: 730; NOOUTLINE-NEXT: .LBB13_1: // %atomicrmw.start 731; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 732; NOOUTLINE-NEXT: ldxp x0, x1, [x2] 733; NOOUTLINE-NEXT: stxp w8, x0, x1, [x2] 734; NOOUTLINE-NEXT: cbnz w8, .LBB13_1 735; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end 736; NOOUTLINE-NEXT: ret 737; 738; OUTLINE-LABEL: atomic_load_relaxed: 739; OUTLINE: // %bb.0: 740; OUTLINE-NEXT: .LBB13_1: // %atomicrmw.start 741; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 742; OUTLINE-NEXT: ldxp x0, x1, [x2] 743; OUTLINE-NEXT: stxp w8, x0, x1, [x2] 744; OUTLINE-NEXT: cbnz w8, .LBB13_1 745; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end 746; OUTLINE-NEXT: ret 747; 748; LSE-LABEL: atomic_load_relaxed: 749; LSE: // %bb.0: 750; LSE-NEXT: mov x0, #0 751; LSE-NEXT: mov x1, #0 752; LSE-NEXT: casp x0, x1, x0, x1, [x2] 753; LSE-NEXT: ret 754 %r = load atomic i128, ptr %p monotonic, align 16 755 ret i128 %r 756} 757 758 759define void @atomic_store_seq_cst(i128 %in, ptr %p) { 760; NOOUTLINE-LABEL: atomic_store_seq_cst: 761; NOOUTLINE: // %bb.0: 762; NOOUTLINE-NEXT: .LBB14_1: // %atomicrmw.start 763; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 764; NOOUTLINE-NEXT: ldaxp xzr, x8, [x2] 765; NOOUTLINE-NEXT: stlxp w8, x0, x1, [x2] 766; NOOUTLINE-NEXT: cbnz w8, .LBB14_1 767; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end 768; NOOUTLINE-NEXT: ret 769; 770; OUTLINE-LABEL: atomic_store_seq_cst: 771; OUTLINE: // %bb.0: 772; OUTLINE-NEXT: .LBB14_1: // %atomicrmw.start 773; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 774; OUTLINE-NEXT: ldaxp xzr, x8, [x2] 775; OUTLINE-NEXT: stlxp w8, x0, x1, [x2] 776; OUTLINE-NEXT: cbnz w8, .LBB14_1 777; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end 778; OUTLINE-NEXT: ret 779; 780; LSE-LABEL: atomic_store_seq_cst: 781; LSE: // %bb.0: 782; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 783; LSE-NEXT: ldp x4, x5, [x2] 784; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 785; LSE-NEXT: .LBB14_1: // %atomicrmw.start 786; LSE-NEXT: // =>This Inner Loop Header: Depth=1 787; LSE-NEXT: mov x6, x4 788; LSE-NEXT: mov x7, x5 789; LSE-NEXT: caspal x6, x7, x0, x1, [x2] 790; LSE-NEXT: cmp x7, x5 791; LSE-NEXT: ccmp x6, x4, #0, eq 792; LSE-NEXT: mov x4, x6 793; LSE-NEXT: mov x5, x7 794; LSE-NEXT: b.ne .LBB14_1 795; LSE-NEXT: // %bb.2: // %atomicrmw.end 796; LSE-NEXT: ret 797 store atomic i128 %in, ptr %p seq_cst, align 16 798 ret void 799} 800 801define void @atomic_store_release(i128 %in, ptr %p) { 802; NOOUTLINE-LABEL: atomic_store_release: 803; NOOUTLINE: // %bb.0: 804; NOOUTLINE-NEXT: .LBB15_1: // %atomicrmw.start 805; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 806; NOOUTLINE-NEXT: ldxp xzr, x8, [x2] 807; NOOUTLINE-NEXT: stlxp w8, x0, x1, [x2] 808; NOOUTLINE-NEXT: cbnz w8, .LBB15_1 809; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end 810; NOOUTLINE-NEXT: ret 811; 812; OUTLINE-LABEL: atomic_store_release: 813; OUTLINE: // %bb.0: 814; OUTLINE-NEXT: .LBB15_1: // %atomicrmw.start 815; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 816; OUTLINE-NEXT: ldxp xzr, x8, [x2] 817; OUTLINE-NEXT: stlxp w8, x0, x1, [x2] 818; OUTLINE-NEXT: cbnz w8, .LBB15_1 819; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end 820; OUTLINE-NEXT: ret 821; 822; LSE-LABEL: atomic_store_release: 823; LSE: // %bb.0: 824; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 825; LSE-NEXT: ldp x4, x5, [x2] 826; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 827; LSE-NEXT: .LBB15_1: // %atomicrmw.start 828; LSE-NEXT: // =>This Inner Loop Header: Depth=1 829; LSE-NEXT: mov x6, x4 830; LSE-NEXT: mov x7, x5 831; LSE-NEXT: caspl x6, x7, x0, x1, [x2] 832; LSE-NEXT: cmp x7, x5 833; LSE-NEXT: ccmp x6, x4, #0, eq 834; LSE-NEXT: mov x4, x6 835; LSE-NEXT: mov x5, x7 836; LSE-NEXT: b.ne .LBB15_1 837; LSE-NEXT: // %bb.2: // %atomicrmw.end 838; LSE-NEXT: ret 839 store atomic i128 %in, ptr %p release, align 16 840 ret void 841} 842 843define void @atomic_store_relaxed(i128 %in, ptr %p) { 844; NOOUTLINE-LABEL: atomic_store_relaxed: 845; NOOUTLINE: // %bb.0: 846; NOOUTLINE-NEXT: .LBB16_1: // %atomicrmw.start 847; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 848; NOOUTLINE-NEXT: ldxp xzr, x8, [x2] 849; NOOUTLINE-NEXT: stxp w8, x0, x1, [x2] 850; NOOUTLINE-NEXT: cbnz w8, .LBB16_1 851; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end 852; NOOUTLINE-NEXT: ret 853; 854; OUTLINE-LABEL: atomic_store_relaxed: 855; OUTLINE: // %bb.0: 856; OUTLINE-NEXT: .LBB16_1: // %atomicrmw.start 857; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1 858; OUTLINE-NEXT: ldxp xzr, x8, [x2] 859; OUTLINE-NEXT: stxp w8, x0, x1, [x2] 860; OUTLINE-NEXT: cbnz w8, .LBB16_1 861; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end 862; OUTLINE-NEXT: ret 863; 864; LSE-LABEL: atomic_store_relaxed: 865; LSE: // %bb.0: 866; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 867; LSE-NEXT: ldp x4, x5, [x2] 868; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 869; LSE-NEXT: .LBB16_1: // %atomicrmw.start 870; LSE-NEXT: // =>This Inner Loop Header: Depth=1 871; LSE-NEXT: mov x6, x4 872; LSE-NEXT: mov x7, x5 873; LSE-NEXT: casp x6, x7, x0, x1, [x2] 874; LSE-NEXT: cmp x7, x5 875; LSE-NEXT: ccmp x6, x4, #0, eq 876; LSE-NEXT: mov x4, x6 877; LSE-NEXT: mov x5, x7 878; LSE-NEXT: b.ne .LBB16_1 879; LSE-NEXT: // %bb.2: // %atomicrmw.end 880; LSE-NEXT: ret 881 store atomic i128 %in, ptr %p unordered, align 16 882 ret void 883} 884 885; Since we store the original value to ensure no tearing for the unsuccessful 886; case, the register used must not be xzr. 887define void @cmpxchg_dead(ptr %ptr, i128 %desired, i128 %new) { 888; NOOUTLINE-LABEL: cmpxchg_dead: 889; NOOUTLINE: // %bb.0: 890; NOOUTLINE-NEXT: .LBB17_1: // =>This Inner Loop Header: Depth=1 891; NOOUTLINE-NEXT: ldxp x8, x9, [x0] 892; NOOUTLINE-NEXT: cmp x8, x2 893; NOOUTLINE-NEXT: cset w10, ne 894; NOOUTLINE-NEXT: cmp x9, x3 895; NOOUTLINE-NEXT: cinc w10, w10, ne 896; NOOUTLINE-NEXT: cbz w10, .LBB17_3 897; NOOUTLINE-NEXT: // %bb.2: // in Loop: Header=BB17_1 Depth=1 898; NOOUTLINE-NEXT: stxp w10, x8, x9, [x0] 899; NOOUTLINE-NEXT: cbnz w10, .LBB17_1 900; NOOUTLINE-NEXT: b .LBB17_4 901; NOOUTLINE-NEXT: .LBB17_3: // in Loop: Header=BB17_1 Depth=1 902; NOOUTLINE-NEXT: stxp w10, x4, x5, [x0] 903; NOOUTLINE-NEXT: cbnz w10, .LBB17_1 904; NOOUTLINE-NEXT: .LBB17_4: 905; NOOUTLINE-NEXT: ret 906; 907; OUTLINE-LABEL: cmpxchg_dead: 908; OUTLINE: // %bb.0: 909; OUTLINE-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 910; OUTLINE-NEXT: .cfi_def_cfa_offset 16 911; OUTLINE-NEXT: .cfi_offset w30, -16 912; OUTLINE-NEXT: mov x1, x3 913; OUTLINE-NEXT: mov x8, x0 914; OUTLINE-NEXT: mov x0, x2 915; OUTLINE-NEXT: mov x2, x4 916; OUTLINE-NEXT: mov x3, x5 917; OUTLINE-NEXT: mov x4, x8 918; OUTLINE-NEXT: bl __aarch64_cas16_relax 919; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 920; OUTLINE-NEXT: ret 921; 922; LSE-LABEL: cmpxchg_dead: 923; LSE: // %bb.0: 924; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 925; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 926; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 927; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 928; LSE-NEXT: casp x2, x3, x4, x5, [x0] 929; LSE-NEXT: ret 930 cmpxchg ptr %ptr, i128 %desired, i128 %new monotonic monotonic 931 ret void 932} 933