1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=NOLSE 3; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -mattr=+lse -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=LSE 4 5; Ensure there's no stack spill in between ldxr/stxr pairs. 6 7define i8 @test_rmw_add_8(ptr %dst) { 8; NOLSE-LABEL: test_rmw_add_8: 9; NOLSE: // %bb.0: // %entry 10; NOLSE-NEXT: sub sp, sp, #32 11; NOLSE-NEXT: .cfi_def_cfa_offset 32 12; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill 13; NOLSE-NEXT: ldrb w8, [x0] 14; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 15; NOLSE-NEXT: b .LBB0_1 16; NOLSE-NEXT: .LBB0_1: // %atomicrmw.start 17; NOLSE-NEXT: // =>This Loop Header: Depth=1 18; NOLSE-NEXT: // Child Loop BB0_2 Depth 2 19; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload 20; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload 21; NOLSE-NEXT: add w12, w9, #1 22; NOLSE-NEXT: .LBB0_2: // %atomicrmw.start 23; NOLSE-NEXT: // Parent Loop BB0_1 Depth=1 24; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 25; NOLSE-NEXT: ldaxrb w8, [x11] 26; NOLSE-NEXT: cmp w8, w9, uxtb 27; NOLSE-NEXT: b.ne .LBB0_4 28; NOLSE-NEXT: // %bb.3: // %atomicrmw.start 29; NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=2 30; NOLSE-NEXT: stlxrb w10, w12, [x11] 31; NOLSE-NEXT: cbnz w10, .LBB0_2 32; NOLSE-NEXT: .LBB0_4: // %atomicrmw.start 33; NOLSE-NEXT: // in Loop: Header=BB0_1 Depth=1 34; NOLSE-NEXT: subs w9, w8, w9, uxtb 35; NOLSE-NEXT: cset w9, eq 36; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill 37; NOLSE-NEXT: subs w9, w9, #1 38; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 39; NOLSE-NEXT: b.ne .LBB0_1 40; NOLSE-NEXT: b .LBB0_5 41; NOLSE-NEXT: .LBB0_5: // %atomicrmw.end 42; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload 43; NOLSE-NEXT: add sp, sp, #32 44; NOLSE-NEXT: ret 45; 46; LSE-LABEL: test_rmw_add_8: 47; LSE: // %bb.0: // %entry 48; LSE-NEXT: mov w8, #1 49; LSE-NEXT: ldaddalb w8, w0, [x0] 50; LSE-NEXT: ret 51entry: 52 %res = atomicrmw add ptr %dst, i8 1 seq_cst 53 ret i8 %res 54} 55 56define i16 @test_rmw_add_16(ptr %dst) { 57; NOLSE-LABEL: test_rmw_add_16: 58; NOLSE: // %bb.0: // %entry 59; NOLSE-NEXT: sub sp, sp, #32 60; NOLSE-NEXT: .cfi_def_cfa_offset 32 61; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill 62; NOLSE-NEXT: ldrh w8, [x0] 63; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 64; NOLSE-NEXT: b .LBB1_1 65; NOLSE-NEXT: .LBB1_1: // %atomicrmw.start 66; NOLSE-NEXT: // =>This Loop Header: Depth=1 67; NOLSE-NEXT: // Child Loop BB1_2 Depth 2 68; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload 69; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload 70; NOLSE-NEXT: add w12, w9, #1 71; NOLSE-NEXT: .LBB1_2: // %atomicrmw.start 72; NOLSE-NEXT: // Parent Loop BB1_1 Depth=1 73; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 74; NOLSE-NEXT: ldaxrh w8, [x11] 75; NOLSE-NEXT: cmp w8, w9, uxth 76; NOLSE-NEXT: b.ne .LBB1_4 77; NOLSE-NEXT: // %bb.3: // %atomicrmw.start 78; NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=2 79; NOLSE-NEXT: stlxrh w10, w12, [x11] 80; NOLSE-NEXT: cbnz w10, .LBB1_2 81; NOLSE-NEXT: .LBB1_4: // %atomicrmw.start 82; NOLSE-NEXT: // in Loop: Header=BB1_1 Depth=1 83; NOLSE-NEXT: subs w9, w8, w9, uxth 84; NOLSE-NEXT: cset w9, eq 85; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill 86; NOLSE-NEXT: subs w9, w9, #1 87; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 88; NOLSE-NEXT: b.ne .LBB1_1 89; NOLSE-NEXT: b .LBB1_5 90; NOLSE-NEXT: .LBB1_5: // %atomicrmw.end 91; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload 92; NOLSE-NEXT: add sp, sp, #32 93; NOLSE-NEXT: ret 94; 95; LSE-LABEL: test_rmw_add_16: 96; LSE: // %bb.0: // %entry 97; LSE-NEXT: mov w8, #1 98; LSE-NEXT: ldaddalh w8, w0, [x0] 99; LSE-NEXT: ret 100entry: 101 %res = atomicrmw add ptr %dst, i16 1 seq_cst 102 ret i16 %res 103} 104 105define i32 @test_rmw_add_32(ptr %dst) { 106; NOLSE-LABEL: test_rmw_add_32: 107; NOLSE: // %bb.0: // %entry 108; NOLSE-NEXT: sub sp, sp, #32 109; NOLSE-NEXT: .cfi_def_cfa_offset 32 110; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill 111; NOLSE-NEXT: ldr w8, [x0] 112; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 113; NOLSE-NEXT: b .LBB2_1 114; NOLSE-NEXT: .LBB2_1: // %atomicrmw.start 115; NOLSE-NEXT: // =>This Loop Header: Depth=1 116; NOLSE-NEXT: // Child Loop BB2_2 Depth 2 117; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload 118; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload 119; NOLSE-NEXT: add w12, w9, #1 120; NOLSE-NEXT: .LBB2_2: // %atomicrmw.start 121; NOLSE-NEXT: // Parent Loop BB2_1 Depth=1 122; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 123; NOLSE-NEXT: ldaxr w8, [x11] 124; NOLSE-NEXT: cmp w8, w9 125; NOLSE-NEXT: b.ne .LBB2_4 126; NOLSE-NEXT: // %bb.3: // %atomicrmw.start 127; NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=2 128; NOLSE-NEXT: stlxr w10, w12, [x11] 129; NOLSE-NEXT: cbnz w10, .LBB2_2 130; NOLSE-NEXT: .LBB2_4: // %atomicrmw.start 131; NOLSE-NEXT: // in Loop: Header=BB2_1 Depth=1 132; NOLSE-NEXT: subs w9, w8, w9 133; NOLSE-NEXT: cset w9, eq 134; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill 135; NOLSE-NEXT: subs w9, w9, #1 136; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 137; NOLSE-NEXT: b.ne .LBB2_1 138; NOLSE-NEXT: b .LBB2_5 139; NOLSE-NEXT: .LBB2_5: // %atomicrmw.end 140; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload 141; NOLSE-NEXT: add sp, sp, #32 142; NOLSE-NEXT: ret 143; 144; LSE-LABEL: test_rmw_add_32: 145; LSE: // %bb.0: // %entry 146; LSE-NEXT: mov w8, #1 147; LSE-NEXT: ldaddal w8, w0, [x0] 148; LSE-NEXT: ret 149entry: 150 %res = atomicrmw add ptr %dst, i32 1 seq_cst 151 ret i32 %res 152} 153 154define i64 @test_rmw_add_64(ptr %dst) { 155; NOLSE-LABEL: test_rmw_add_64: 156; NOLSE: // %bb.0: // %entry 157; NOLSE-NEXT: sub sp, sp, #32 158; NOLSE-NEXT: .cfi_def_cfa_offset 32 159; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill 160; NOLSE-NEXT: ldr x8, [x0] 161; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill 162; NOLSE-NEXT: b .LBB3_1 163; NOLSE-NEXT: .LBB3_1: // %atomicrmw.start 164; NOLSE-NEXT: // =>This Loop Header: Depth=1 165; NOLSE-NEXT: // Child Loop BB3_2 Depth 2 166; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload 167; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload 168; NOLSE-NEXT: add x12, x9, #1 169; NOLSE-NEXT: .LBB3_2: // %atomicrmw.start 170; NOLSE-NEXT: // Parent Loop BB3_1 Depth=1 171; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 172; NOLSE-NEXT: ldaxr x8, [x11] 173; NOLSE-NEXT: cmp x8, x9 174; NOLSE-NEXT: b.ne .LBB3_4 175; NOLSE-NEXT: // %bb.3: // %atomicrmw.start 176; NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=2 177; NOLSE-NEXT: stlxr w10, x12, [x11] 178; NOLSE-NEXT: cbnz w10, .LBB3_2 179; NOLSE-NEXT: .LBB3_4: // %atomicrmw.start 180; NOLSE-NEXT: // in Loop: Header=BB3_1 Depth=1 181; NOLSE-NEXT: subs x9, x8, x9 182; NOLSE-NEXT: cset w9, eq 183; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill 184; NOLSE-NEXT: subs w9, w9, #1 185; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill 186; NOLSE-NEXT: b.ne .LBB3_1 187; NOLSE-NEXT: b .LBB3_5 188; NOLSE-NEXT: .LBB3_5: // %atomicrmw.end 189; NOLSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload 190; NOLSE-NEXT: add sp, sp, #32 191; NOLSE-NEXT: ret 192; 193; LSE-LABEL: test_rmw_add_64: 194; LSE: // %bb.0: // %entry 195; LSE-NEXT: mov w8, #1 196; LSE-NEXT: // kill: def $x8 killed $w8 197; LSE-NEXT: ldaddal x8, x0, [x0] 198; LSE-NEXT: ret 199entry: 200 %res = atomicrmw add ptr %dst, i64 1 seq_cst 201 ret i64 %res 202} 203 204define i128 @test_rmw_add_128(ptr %dst) { 205; NOLSE-LABEL: test_rmw_add_128: 206; NOLSE: // %bb.0: // %entry 207; NOLSE-NEXT: sub sp, sp, #48 208; NOLSE-NEXT: .cfi_def_cfa_offset 48 209; NOLSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill 210; NOLSE-NEXT: ldr x8, [x0, #8] 211; NOLSE-NEXT: ldr x9, [x0] 212; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill 213; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill 214; NOLSE-NEXT: b .LBB4_1 215; NOLSE-NEXT: .LBB4_1: // %atomicrmw.start 216; NOLSE-NEXT: // =>This Loop Header: Depth=1 217; NOLSE-NEXT: // Child Loop BB4_2 Depth 2 218; NOLSE-NEXT: ldr x13, [sp, #40] // 8-byte Folded Reload 219; NOLSE-NEXT: ldr x11, [sp, #32] // 8-byte Folded Reload 220; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload 221; NOLSE-NEXT: adds x14, x11, #1 222; NOLSE-NEXT: cinc x15, x13, hs 223; NOLSE-NEXT: .LBB4_2: // %atomicrmw.start 224; NOLSE-NEXT: // Parent Loop BB4_1 Depth=1 225; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 226; NOLSE-NEXT: ldaxp x10, x12, [x9] 227; NOLSE-NEXT: cmp x10, x11 228; NOLSE-NEXT: cset w8, ne 229; NOLSE-NEXT: cmp x12, x13 230; NOLSE-NEXT: cinc w8, w8, ne 231; NOLSE-NEXT: cbnz w8, .LBB4_4 232; NOLSE-NEXT: // %bb.3: // %atomicrmw.start 233; NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2 234; NOLSE-NEXT: stlxp w8, x14, x15, [x9] 235; NOLSE-NEXT: cbnz w8, .LBB4_2 236; NOLSE-NEXT: b .LBB4_5 237; NOLSE-NEXT: .LBB4_4: // %atomicrmw.start 238; NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2 239; NOLSE-NEXT: stlxp w8, x10, x12, [x9] 240; NOLSE-NEXT: cbnz w8, .LBB4_2 241; NOLSE-NEXT: .LBB4_5: // %atomicrmw.start 242; NOLSE-NEXT: // in Loop: Header=BB4_1 Depth=1 243; NOLSE-NEXT: mov x8, x12 244; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill 245; NOLSE-NEXT: mov x9, x10 246; NOLSE-NEXT: str x9, [sp, #16] // 8-byte Folded Spill 247; NOLSE-NEXT: subs x12, x12, x13 248; NOLSE-NEXT: ccmp x10, x11, #0, eq 249; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill 250; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill 251; NOLSE-NEXT: b.ne .LBB4_1 252; NOLSE-NEXT: b .LBB4_6 253; NOLSE-NEXT: .LBB4_6: // %atomicrmw.end 254; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload 255; NOLSE-NEXT: ldr x0, [sp, #16] // 8-byte Folded Reload 256; NOLSE-NEXT: add sp, sp, #48 257; NOLSE-NEXT: ret 258; 259; LSE-LABEL: test_rmw_add_128: 260; LSE: // %bb.0: // %entry 261; LSE-NEXT: sub sp, sp, #48 262; LSE-NEXT: .cfi_def_cfa_offset 48 263; LSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill 264; LSE-NEXT: ldr x8, [x0, #8] 265; LSE-NEXT: ldr x9, [x0] 266; LSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill 267; LSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill 268; LSE-NEXT: b .LBB4_1 269; LSE-NEXT: .LBB4_1: // %atomicrmw.start 270; LSE-NEXT: // =>This Inner Loop Header: Depth=1 271; LSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload 272; LSE-NEXT: ldr x10, [sp, #32] // 8-byte Folded Reload 273; LSE-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload 274; LSE-NEXT: mov x0, x10 275; LSE-NEXT: mov x1, x11 276; LSE-NEXT: adds x2, x10, #1 277; LSE-NEXT: cinc x9, x11, hs 278; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3 279; LSE-NEXT: mov x3, x9 280; LSE-NEXT: caspal x0, x1, x2, x3, [x8] 281; LSE-NEXT: mov x9, x0 282; LSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill 283; LSE-NEXT: mov x8, x1 284; LSE-NEXT: str x8, [sp, #16] // 8-byte Folded Spill 285; LSE-NEXT: subs x11, x8, x11 286; LSE-NEXT: ccmp x9, x10, #0, eq 287; LSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill 288; LSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill 289; LSE-NEXT: b.ne .LBB4_1 290; LSE-NEXT: b .LBB4_2 291; LSE-NEXT: .LBB4_2: // %atomicrmw.end 292; LSE-NEXT: ldr x1, [sp, #16] // 8-byte Folded Reload 293; LSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload 294; LSE-NEXT: add sp, sp, #48 295; LSE-NEXT: ret 296entry: 297 %res = atomicrmw add ptr %dst, i128 1 seq_cst 298 ret i128 %res 299} 300define i8 @test_rmw_nand_8(ptr %dst) { 301; NOLSE-LABEL: test_rmw_nand_8: 302; NOLSE: // %bb.0: // %entry 303; NOLSE-NEXT: sub sp, sp, #32 304; NOLSE-NEXT: .cfi_def_cfa_offset 32 305; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill 306; NOLSE-NEXT: ldrb w8, [x0] 307; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 308; NOLSE-NEXT: b .LBB5_1 309; NOLSE-NEXT: .LBB5_1: // %atomicrmw.start 310; NOLSE-NEXT: // =>This Loop Header: Depth=1 311; NOLSE-NEXT: // Child Loop BB5_2 Depth 2 312; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload 313; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload 314; NOLSE-NEXT: mvn w8, w9 315; NOLSE-NEXT: orr w12, w8, #0xfffffffe 316; NOLSE-NEXT: .LBB5_2: // %atomicrmw.start 317; NOLSE-NEXT: // Parent Loop BB5_1 Depth=1 318; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 319; NOLSE-NEXT: ldaxrb w8, [x11] 320; NOLSE-NEXT: cmp w8, w9, uxtb 321; NOLSE-NEXT: b.ne .LBB5_4 322; NOLSE-NEXT: // %bb.3: // %atomicrmw.start 323; NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=2 324; NOLSE-NEXT: stlxrb w10, w12, [x11] 325; NOLSE-NEXT: cbnz w10, .LBB5_2 326; NOLSE-NEXT: .LBB5_4: // %atomicrmw.start 327; NOLSE-NEXT: // in Loop: Header=BB5_1 Depth=1 328; NOLSE-NEXT: subs w9, w8, w9, uxtb 329; NOLSE-NEXT: cset w9, eq 330; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill 331; NOLSE-NEXT: subs w9, w9, #1 332; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 333; NOLSE-NEXT: b.ne .LBB5_1 334; NOLSE-NEXT: b .LBB5_5 335; NOLSE-NEXT: .LBB5_5: // %atomicrmw.end 336; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload 337; NOLSE-NEXT: add sp, sp, #32 338; NOLSE-NEXT: ret 339; 340; LSE-LABEL: test_rmw_nand_8: 341; LSE: // %bb.0: // %entry 342; LSE-NEXT: sub sp, sp, #32 343; LSE-NEXT: .cfi_def_cfa_offset 32 344; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill 345; LSE-NEXT: ldrb w8, [x0] 346; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 347; LSE-NEXT: b .LBB5_1 348; LSE-NEXT: .LBB5_1: // %atomicrmw.start 349; LSE-NEXT: // =>This Inner Loop Header: Depth=1 350; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload 351; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload 352; LSE-NEXT: mvn w8, w9 353; LSE-NEXT: orr w10, w8, #0xfffffffe 354; LSE-NEXT: mov w8, w9 355; LSE-NEXT: casalb w8, w10, [x11] 356; LSE-NEXT: subs w9, w8, w9, uxtb 357; LSE-NEXT: cset w9, eq 358; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill 359; LSE-NEXT: subs w9, w9, #1 360; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 361; LSE-NEXT: b.ne .LBB5_1 362; LSE-NEXT: b .LBB5_2 363; LSE-NEXT: .LBB5_2: // %atomicrmw.end 364; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload 365; LSE-NEXT: add sp, sp, #32 366; LSE-NEXT: ret 367entry: 368 %res = atomicrmw nand ptr %dst, i8 1 seq_cst 369 ret i8 %res 370} 371 372define i16 @test_rmw_nand_16(ptr %dst) { 373; NOLSE-LABEL: test_rmw_nand_16: 374; NOLSE: // %bb.0: // %entry 375; NOLSE-NEXT: sub sp, sp, #32 376; NOLSE-NEXT: .cfi_def_cfa_offset 32 377; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill 378; NOLSE-NEXT: ldrh w8, [x0] 379; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 380; NOLSE-NEXT: b .LBB6_1 381; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start 382; NOLSE-NEXT: // =>This Loop Header: Depth=1 383; NOLSE-NEXT: // Child Loop BB6_2 Depth 2 384; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload 385; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload 386; NOLSE-NEXT: mvn w8, w9 387; NOLSE-NEXT: orr w12, w8, #0xfffffffe 388; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start 389; NOLSE-NEXT: // Parent Loop BB6_1 Depth=1 390; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 391; NOLSE-NEXT: ldaxrh w8, [x11] 392; NOLSE-NEXT: cmp w8, w9, uxth 393; NOLSE-NEXT: b.ne .LBB6_4 394; NOLSE-NEXT: // %bb.3: // %atomicrmw.start 395; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=2 396; NOLSE-NEXT: stlxrh w10, w12, [x11] 397; NOLSE-NEXT: cbnz w10, .LBB6_2 398; NOLSE-NEXT: .LBB6_4: // %atomicrmw.start 399; NOLSE-NEXT: // in Loop: Header=BB6_1 Depth=1 400; NOLSE-NEXT: subs w9, w8, w9, uxth 401; NOLSE-NEXT: cset w9, eq 402; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill 403; NOLSE-NEXT: subs w9, w9, #1 404; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 405; NOLSE-NEXT: b.ne .LBB6_1 406; NOLSE-NEXT: b .LBB6_5 407; NOLSE-NEXT: .LBB6_5: // %atomicrmw.end 408; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload 409; NOLSE-NEXT: add sp, sp, #32 410; NOLSE-NEXT: ret 411; 412; LSE-LABEL: test_rmw_nand_16: 413; LSE: // %bb.0: // %entry 414; LSE-NEXT: sub sp, sp, #32 415; LSE-NEXT: .cfi_def_cfa_offset 32 416; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill 417; LSE-NEXT: ldrh w8, [x0] 418; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 419; LSE-NEXT: b .LBB6_1 420; LSE-NEXT: .LBB6_1: // %atomicrmw.start 421; LSE-NEXT: // =>This Inner Loop Header: Depth=1 422; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload 423; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload 424; LSE-NEXT: mvn w8, w9 425; LSE-NEXT: orr w10, w8, #0xfffffffe 426; LSE-NEXT: mov w8, w9 427; LSE-NEXT: casalh w8, w10, [x11] 428; LSE-NEXT: subs w9, w8, w9, uxth 429; LSE-NEXT: cset w9, eq 430; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill 431; LSE-NEXT: subs w9, w9, #1 432; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 433; LSE-NEXT: b.ne .LBB6_1 434; LSE-NEXT: b .LBB6_2 435; LSE-NEXT: .LBB6_2: // %atomicrmw.end 436; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload 437; LSE-NEXT: add sp, sp, #32 438; LSE-NEXT: ret 439entry: 440 %res = atomicrmw nand ptr %dst, i16 1 seq_cst 441 ret i16 %res 442} 443 444define i32 @test_rmw_nand_32(ptr %dst) { 445; NOLSE-LABEL: test_rmw_nand_32: 446; NOLSE: // %bb.0: // %entry 447; NOLSE-NEXT: sub sp, sp, #32 448; NOLSE-NEXT: .cfi_def_cfa_offset 32 449; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill 450; NOLSE-NEXT: ldr w8, [x0] 451; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 452; NOLSE-NEXT: b .LBB7_1 453; NOLSE-NEXT: .LBB7_1: // %atomicrmw.start 454; NOLSE-NEXT: // =>This Loop Header: Depth=1 455; NOLSE-NEXT: // Child Loop BB7_2 Depth 2 456; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload 457; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload 458; NOLSE-NEXT: mvn w8, w9 459; NOLSE-NEXT: orr w12, w8, #0xfffffffe 460; NOLSE-NEXT: .LBB7_2: // %atomicrmw.start 461; NOLSE-NEXT: // Parent Loop BB7_1 Depth=1 462; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 463; NOLSE-NEXT: ldaxr w8, [x11] 464; NOLSE-NEXT: cmp w8, w9 465; NOLSE-NEXT: b.ne .LBB7_4 466; NOLSE-NEXT: // %bb.3: // %atomicrmw.start 467; NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=2 468; NOLSE-NEXT: stlxr w10, w12, [x11] 469; NOLSE-NEXT: cbnz w10, .LBB7_2 470; NOLSE-NEXT: .LBB7_4: // %atomicrmw.start 471; NOLSE-NEXT: // in Loop: Header=BB7_1 Depth=1 472; NOLSE-NEXT: subs w9, w8, w9 473; NOLSE-NEXT: cset w9, eq 474; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill 475; NOLSE-NEXT: subs w9, w9, #1 476; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 477; NOLSE-NEXT: b.ne .LBB7_1 478; NOLSE-NEXT: b .LBB7_5 479; NOLSE-NEXT: .LBB7_5: // %atomicrmw.end 480; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload 481; NOLSE-NEXT: add sp, sp, #32 482; NOLSE-NEXT: ret 483; 484; LSE-LABEL: test_rmw_nand_32: 485; LSE: // %bb.0: // %entry 486; LSE-NEXT: sub sp, sp, #32 487; LSE-NEXT: .cfi_def_cfa_offset 32 488; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill 489; LSE-NEXT: ldr w8, [x0] 490; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 491; LSE-NEXT: b .LBB7_1 492; LSE-NEXT: .LBB7_1: // %atomicrmw.start 493; LSE-NEXT: // =>This Inner Loop Header: Depth=1 494; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload 495; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload 496; LSE-NEXT: mvn w8, w9 497; LSE-NEXT: orr w10, w8, #0xfffffffe 498; LSE-NEXT: mov w8, w9 499; LSE-NEXT: casal w8, w10, [x11] 500; LSE-NEXT: subs w9, w8, w9 501; LSE-NEXT: cset w9, eq 502; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill 503; LSE-NEXT: subs w9, w9, #1 504; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 505; LSE-NEXT: b.ne .LBB7_1 506; LSE-NEXT: b .LBB7_2 507; LSE-NEXT: .LBB7_2: // %atomicrmw.end 508; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload 509; LSE-NEXT: add sp, sp, #32 510; LSE-NEXT: ret 511entry: 512 %res = atomicrmw nand ptr %dst, i32 1 seq_cst 513 ret i32 %res 514} 515 516define i64 @test_rmw_nand_64(ptr %dst) { 517; NOLSE-LABEL: test_rmw_nand_64: 518; NOLSE: // %bb.0: // %entry 519; NOLSE-NEXT: sub sp, sp, #32 520; NOLSE-NEXT: .cfi_def_cfa_offset 32 521; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill 522; NOLSE-NEXT: ldr x8, [x0] 523; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill 524; NOLSE-NEXT: b .LBB8_1 525; NOLSE-NEXT: .LBB8_1: // %atomicrmw.start 526; NOLSE-NEXT: // =>This Loop Header: Depth=1 527; NOLSE-NEXT: // Child Loop BB8_2 Depth 2 528; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload 529; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload 530; NOLSE-NEXT: mov w8, w9 531; NOLSE-NEXT: mvn w10, w8 532; NOLSE-NEXT: // implicit-def: $x8 533; NOLSE-NEXT: mov w8, w10 534; NOLSE-NEXT: orr x12, x8, #0xfffffffffffffffe 535; NOLSE-NEXT: .LBB8_2: // %atomicrmw.start 536; NOLSE-NEXT: // Parent Loop BB8_1 Depth=1 537; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 538; NOLSE-NEXT: ldaxr x8, [x11] 539; NOLSE-NEXT: cmp x8, x9 540; NOLSE-NEXT: b.ne .LBB8_4 541; NOLSE-NEXT: // %bb.3: // %atomicrmw.start 542; NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=2 543; NOLSE-NEXT: stlxr w10, x12, [x11] 544; NOLSE-NEXT: cbnz w10, .LBB8_2 545; NOLSE-NEXT: .LBB8_4: // %atomicrmw.start 546; NOLSE-NEXT: // in Loop: Header=BB8_1 Depth=1 547; NOLSE-NEXT: subs x9, x8, x9 548; NOLSE-NEXT: cset w9, eq 549; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill 550; NOLSE-NEXT: subs w9, w9, #1 551; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill 552; NOLSE-NEXT: b.ne .LBB8_1 553; NOLSE-NEXT: b .LBB8_5 554; NOLSE-NEXT: .LBB8_5: // %atomicrmw.end 555; NOLSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload 556; NOLSE-NEXT: add sp, sp, #32 557; NOLSE-NEXT: ret 558; 559; LSE-LABEL: test_rmw_nand_64: 560; LSE: // %bb.0: // %entry 561; LSE-NEXT: sub sp, sp, #32 562; LSE-NEXT: .cfi_def_cfa_offset 32 563; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill 564; LSE-NEXT: ldr x8, [x0] 565; LSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill 566; LSE-NEXT: b .LBB8_1 567; LSE-NEXT: .LBB8_1: // %atomicrmw.start 568; LSE-NEXT: // =>This Inner Loop Header: Depth=1 569; LSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload 570; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload 571; LSE-NEXT: mov w8, w9 572; LSE-NEXT: mvn w10, w8 573; LSE-NEXT: // implicit-def: $x8 574; LSE-NEXT: mov w8, w10 575; LSE-NEXT: orr x10, x8, #0xfffffffffffffffe 576; LSE-NEXT: mov x8, x9 577; LSE-NEXT: casal x8, x10, [x11] 578; LSE-NEXT: subs x9, x8, x9 579; LSE-NEXT: cset w9, eq 580; LSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill 581; LSE-NEXT: subs w9, w9, #1 582; LSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill 583; LSE-NEXT: b.ne .LBB8_1 584; LSE-NEXT: b .LBB8_2 585; LSE-NEXT: .LBB8_2: // %atomicrmw.end 586; LSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload 587; LSE-NEXT: add sp, sp, #32 588; LSE-NEXT: ret 589entry: 590 %res = atomicrmw nand ptr %dst, i64 1 seq_cst 591 ret i64 %res 592} 593 594define i128 @test_rmw_nand_128(ptr %dst) { 595; NOLSE-LABEL: test_rmw_nand_128: 596; NOLSE: // %bb.0: // %entry 597; NOLSE-NEXT: sub sp, sp, #48 598; NOLSE-NEXT: .cfi_def_cfa_offset 48 599; NOLSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill 600; NOLSE-NEXT: ldr x8, [x0, #8] 601; NOLSE-NEXT: ldr x9, [x0] 602; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill 603; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill 604; NOLSE-NEXT: b .LBB9_1 605; NOLSE-NEXT: .LBB9_1: // %atomicrmw.start 606; NOLSE-NEXT: // =>This Loop Header: Depth=1 607; NOLSE-NEXT: // Child Loop BB9_2 Depth 2 608; NOLSE-NEXT: ldr x13, [sp, #40] // 8-byte Folded Reload 609; NOLSE-NEXT: ldr x11, [sp, #32] // 8-byte Folded Reload 610; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload 611; NOLSE-NEXT: mov w8, w11 612; NOLSE-NEXT: mvn w10, w8 613; NOLSE-NEXT: // implicit-def: $x8 614; NOLSE-NEXT: mov w8, w10 615; NOLSE-NEXT: orr x14, x8, #0xfffffffffffffffe 616; NOLSE-NEXT: mov x15, #-1 617; NOLSE-NEXT: .LBB9_2: // %atomicrmw.start 618; NOLSE-NEXT: // Parent Loop BB9_1 Depth=1 619; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 620; NOLSE-NEXT: ldaxp x10, x12, [x9] 621; NOLSE-NEXT: cmp x10, x11 622; NOLSE-NEXT: cset w8, ne 623; NOLSE-NEXT: cmp x12, x13 624; NOLSE-NEXT: cinc w8, w8, ne 625; NOLSE-NEXT: cbnz w8, .LBB9_4 626; NOLSE-NEXT: // %bb.3: // %atomicrmw.start 627; NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=2 628; NOLSE-NEXT: stlxp w8, x14, x15, [x9] 629; NOLSE-NEXT: cbnz w8, .LBB9_2 630; NOLSE-NEXT: b .LBB9_5 631; NOLSE-NEXT: .LBB9_4: // %atomicrmw.start 632; NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=2 633; NOLSE-NEXT: stlxp w8, x10, x12, [x9] 634; NOLSE-NEXT: cbnz w8, .LBB9_2 635; NOLSE-NEXT: .LBB9_5: // %atomicrmw.start 636; NOLSE-NEXT: // in Loop: Header=BB9_1 Depth=1 637; NOLSE-NEXT: mov x8, x12 638; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill 639; NOLSE-NEXT: mov x9, x10 640; NOLSE-NEXT: str x9, [sp, #16] // 8-byte Folded Spill 641; NOLSE-NEXT: subs x12, x12, x13 642; NOLSE-NEXT: ccmp x10, x11, #0, eq 643; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill 644; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill 645; NOLSE-NEXT: b.ne .LBB9_1 646; NOLSE-NEXT: b .LBB9_6 647; NOLSE-NEXT: .LBB9_6: // %atomicrmw.end 648; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload 649; NOLSE-NEXT: ldr x0, [sp, #16] // 8-byte Folded Reload 650; NOLSE-NEXT: add sp, sp, #48 651; NOLSE-NEXT: ret 652; 653; LSE-LABEL: test_rmw_nand_128: 654; LSE: // %bb.0: // %entry 655; LSE-NEXT: sub sp, sp, #48 656; LSE-NEXT: .cfi_def_cfa_offset 48 657; LSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill 658; LSE-NEXT: ldr x8, [x0, #8] 659; LSE-NEXT: ldr x9, [x0] 660; LSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill 661; LSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill 662; LSE-NEXT: b .LBB9_1 663; LSE-NEXT: .LBB9_1: // %atomicrmw.start 664; LSE-NEXT: // =>This Inner Loop Header: Depth=1 665; LSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload 666; LSE-NEXT: ldr x10, [sp, #32] // 8-byte Folded Reload 667; LSE-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload 668; LSE-NEXT: mov x0, x10 669; LSE-NEXT: mov x1, x11 670; LSE-NEXT: mov w9, w10 671; LSE-NEXT: mvn w12, w9 672; LSE-NEXT: // implicit-def: $x9 673; LSE-NEXT: mov w9, w12 674; LSE-NEXT: orr x2, x9, #0xfffffffffffffffe 675; LSE-NEXT: mov x9, #-1 676; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3 677; LSE-NEXT: mov x3, x9 678; LSE-NEXT: caspal x0, x1, x2, x3, [x8] 679; LSE-NEXT: mov x9, x0 680; LSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill 681; LSE-NEXT: mov x8, x1 682; LSE-NEXT: str x8, [sp, #16] // 8-byte Folded Spill 683; LSE-NEXT: subs x11, x8, x11 684; LSE-NEXT: ccmp x9, x10, #0, eq 685; LSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill 686; LSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill 687; LSE-NEXT: b.ne .LBB9_1 688; LSE-NEXT: b .LBB9_2 689; LSE-NEXT: .LBB9_2: // %atomicrmw.end 690; LSE-NEXT: ldr x1, [sp, #16] // 8-byte Folded Reload 691; LSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload 692; LSE-NEXT: add sp, sp, #48 693; LSE-NEXT: ret 694entry: 695 %res = atomicrmw nand ptr %dst, i128 1 seq_cst 696 ret i128 %res 697} 698