1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu -verify-machineinstrs -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32 3; This is already checked for in Atomics-64.ll 4; RUN: llc -verify-machineinstrs < %s -mcpu=ppc -mtriple=powerpc64-unknown-linux-gnu -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC64 5 6; FIXME: we don't currently check for the operations themselves with CHECK-NEXT, 7; because they are implemented in a very messy way with lwarx/stwcx. 8; It should be fixed soon in another patch. 9 10; We first check loads, for all sizes from i8 to i64. 11; We also vary orderings to check for barriers. 12define i8 @load_i8_unordered(ptr %mem) { 13; CHECK-LABEL: load_i8_unordered: 14; CHECK: # %bb.0: 15; CHECK-NEXT: lbz r3, 0(r3) 16; CHECK-NEXT: blr 17 %val = load atomic i8, ptr %mem unordered, align 1 18 ret i8 %val 19} 20define i16 @load_i16_monotonic(ptr %mem) { 21; CHECK-LABEL: load_i16_monotonic: 22; CHECK: # %bb.0: 23; CHECK-NEXT: lhz r3, 0(r3) 24; CHECK-NEXT: blr 25 %val = load atomic i16, ptr %mem monotonic, align 2 26 ret i16 %val 27} 28define i32 @load_i32_acquire(ptr %mem) { 29; PPC32-LABEL: load_i32_acquire: 30; PPC32: # %bb.0: 31; PPC32-NEXT: lwz r3, 0(r3) 32; PPC32-NEXT: cmpw cr7, r3, r3 33; PPC32-NEXT: bne- cr7, .+4 34; PPC32-NEXT: isync 35; PPC32-NEXT: blr 36; 37; PPC64-LABEL: load_i32_acquire: 38; PPC64: # %bb.0: 39; PPC64-NEXT: lwz r3, 0(r3) 40; PPC64-NEXT: cmpd cr7, r3, r3 41; PPC64-NEXT: bne- cr7, .+4 42; PPC64-NEXT: isync 43; PPC64-NEXT: blr 44 %val = load atomic i32, ptr %mem acquire, align 4 45; CHECK-PPC32: lwsync 46; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]] 47; CHECK-PPC64: bne- [[CR]], .+4 48; CHECK-PPC64: isync 49 ret i32 %val 50} 51define i64 @load_i64_seq_cst(ptr %mem) { 52; PPC32-LABEL: load_i64_seq_cst: 53; PPC32: # %bb.0: 54; PPC32-NEXT: mflr r0 55; PPC32-NEXT: stwu r1, -16(r1) 56; PPC32-NEXT: stw r0, 20(r1) 57; PPC32-NEXT: .cfi_def_cfa_offset 16 58; PPC32-NEXT: .cfi_offset lr, 4 59; PPC32-NEXT: li r4, 5 60; PPC32-NEXT: bl __atomic_load_8 61; PPC32-NEXT: lwz r0, 20(r1) 62; PPC32-NEXT: addi r1, r1, 16 63; PPC32-NEXT: mtlr r0 64; PPC32-NEXT: blr 65; 66; PPC64-LABEL: load_i64_seq_cst: 67; PPC64: # %bb.0: 68; PPC64-NEXT: sync 69; PPC64-NEXT: ld r3, 0(r3) 70; PPC64-NEXT: cmpd cr7, r3, r3 71; PPC64-NEXT: bne- cr7, .+4 72; PPC64-NEXT: isync 73; PPC64-NEXT: blr 74 %val = load atomic i64, ptr %mem seq_cst, align 8 75; CHECK-PPC32: lwsync 76; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]] 77; CHECK-PPC64: bne- [[CR]], .+4 78; CHECK-PPC64: isync 79 ret i64 %val 80} 81 82; Stores 83define void @store_i8_unordered(ptr %mem) { 84; CHECK-LABEL: store_i8_unordered: 85; CHECK: # %bb.0: 86; CHECK-NEXT: li r4, 42 87; CHECK-NEXT: stb r4, 0(r3) 88; CHECK-NEXT: blr 89 store atomic i8 42, ptr %mem unordered, align 1 90 ret void 91} 92define void @store_i16_monotonic(ptr %mem) { 93; CHECK-LABEL: store_i16_monotonic: 94; CHECK: # %bb.0: 95; CHECK-NEXT: li r4, 42 96; CHECK-NEXT: sth r4, 0(r3) 97; CHECK-NEXT: blr 98 store atomic i16 42, ptr %mem monotonic, align 2 99 ret void 100} 101define void @store_i32_release(ptr %mem) { 102; CHECK-LABEL: store_i32_release: 103; CHECK: # %bb.0: 104; CHECK-NEXT: li r4, 42 105; CHECK-NEXT: lwsync 106; CHECK-NEXT: stw r4, 0(r3) 107; CHECK-NEXT: blr 108 store atomic i32 42, ptr %mem release, align 4 109 ret void 110} 111define void @store_i64_seq_cst(ptr %mem) { 112; PPC32-LABEL: store_i64_seq_cst: 113; PPC32: # %bb.0: 114; PPC32-NEXT: mflr r0 115; PPC32-NEXT: stwu r1, -16(r1) 116; PPC32-NEXT: stw r0, 20(r1) 117; PPC32-NEXT: .cfi_def_cfa_offset 16 118; PPC32-NEXT: .cfi_offset lr, 4 119; PPC32-NEXT: li r5, 0 120; PPC32-NEXT: li r6, 42 121; PPC32-NEXT: li r7, 5 122; PPC32-NEXT: bl __atomic_store_8 123; PPC32-NEXT: lwz r0, 20(r1) 124; PPC32-NEXT: addi r1, r1, 16 125; PPC32-NEXT: mtlr r0 126; PPC32-NEXT: blr 127; 128; PPC64-LABEL: store_i64_seq_cst: 129; PPC64: # %bb.0: 130; PPC64-NEXT: li r4, 42 131; PPC64-NEXT: sync 132; PPC64-NEXT: std r4, 0(r3) 133; PPC64-NEXT: blr 134 store atomic i64 42, ptr %mem seq_cst, align 8 135 ret void 136} 137 138; Atomic CmpXchg 139define i8 @cas_strong_i8_sc_sc(ptr %mem) { 140; PPC32-LABEL: cas_strong_i8_sc_sc: 141; PPC32: # %bb.0: 142; PPC32-NEXT: rlwinm r8, r3, 3, 27, 28 143; PPC32-NEXT: li r5, 1 144; PPC32-NEXT: li r6, 0 145; PPC32-NEXT: li r7, 255 146; PPC32-NEXT: rlwinm r4, r3, 0, 0, 29 147; PPC32-NEXT: xori r3, r8, 24 148; PPC32-NEXT: slw r8, r5, r3 149; PPC32-NEXT: slw r9, r6, r3 150; PPC32-NEXT: slw r5, r7, r3 151; PPC32-NEXT: and r6, r8, r5 152; PPC32-NEXT: and r7, r9, r5 153; PPC32-NEXT: sync 154; PPC32-NEXT: .LBB8_1: 155; PPC32-NEXT: lwarx r9, 0, r4 156; PPC32-NEXT: and r8, r9, r5 157; PPC32-NEXT: cmpw r8, r7 158; PPC32-NEXT: bne cr0, .LBB8_3 159; PPC32-NEXT: # %bb.2: 160; PPC32-NEXT: andc r9, r9, r5 161; PPC32-NEXT: or r9, r9, r6 162; PPC32-NEXT: stwcx. r9, 0, r4 163; PPC32-NEXT: bne cr0, .LBB8_1 164; PPC32-NEXT: .LBB8_3: 165; PPC32-NEXT: srw r3, r8, r3 166; PPC32-NEXT: lwsync 167; PPC32-NEXT: blr 168; 169; PPC64-LABEL: cas_strong_i8_sc_sc: 170; PPC64: # %bb.0: 171; PPC64-NEXT: rlwinm r8, r3, 3, 27, 28 172; PPC64-NEXT: li r5, 1 173; PPC64-NEXT: li r6, 0 174; PPC64-NEXT: li r7, 255 175; PPC64-NEXT: rldicr r4, r3, 0, 61 176; PPC64-NEXT: xori r3, r8, 24 177; PPC64-NEXT: slw r8, r5, r3 178; PPC64-NEXT: slw r9, r6, r3 179; PPC64-NEXT: slw r5, r7, r3 180; PPC64-NEXT: and r6, r8, r5 181; PPC64-NEXT: and r7, r9, r5 182; PPC64-NEXT: sync 183; PPC64-NEXT: .LBB8_1: 184; PPC64-NEXT: lwarx r9, 0, r4 185; PPC64-NEXT: and r8, r9, r5 186; PPC64-NEXT: cmpw r8, r7 187; PPC64-NEXT: bne cr0, .LBB8_3 188; PPC64-NEXT: # %bb.2: 189; PPC64-NEXT: andc r9, r9, r5 190; PPC64-NEXT: or r9, r9, r6 191; PPC64-NEXT: stwcx. r9, 0, r4 192; PPC64-NEXT: bne cr0, .LBB8_1 193; PPC64-NEXT: .LBB8_3: 194; PPC64-NEXT: srw r3, r8, r3 195; PPC64-NEXT: lwsync 196; PPC64-NEXT: blr 197 %val = cmpxchg ptr %mem, i8 0, i8 1 seq_cst seq_cst 198 %loaded = extractvalue { i8, i1} %val, 0 199 ret i8 %loaded 200} 201define i16 @cas_weak_i16_acquire_acquire(ptr %mem) { 202; PPC32-LABEL: cas_weak_i16_acquire_acquire: 203; PPC32: # %bb.0: 204; PPC32-NEXT: li r6, 0 205; PPC32-NEXT: rlwinm r4, r3, 3, 27, 27 206; PPC32-NEXT: li r5, 1 207; PPC32-NEXT: ori r7, r6, 65535 208; PPC32-NEXT: xori r4, r4, 16 209; PPC32-NEXT: slw r8, r5, r4 210; PPC32-NEXT: slw r9, r6, r4 211; PPC32-NEXT: slw r5, r7, r4 212; PPC32-NEXT: rlwinm r3, r3, 0, 0, 29 213; PPC32-NEXT: and r6, r8, r5 214; PPC32-NEXT: and r7, r9, r5 215; PPC32-NEXT: .LBB9_1: 216; PPC32-NEXT: lwarx r9, 0, r3 217; PPC32-NEXT: and r8, r9, r5 218; PPC32-NEXT: cmpw r8, r7 219; PPC32-NEXT: bne cr0, .LBB9_3 220; PPC32-NEXT: # %bb.2: 221; PPC32-NEXT: andc r9, r9, r5 222; PPC32-NEXT: or r9, r9, r6 223; PPC32-NEXT: stwcx. r9, 0, r3 224; PPC32-NEXT: bne cr0, .LBB9_1 225; PPC32-NEXT: .LBB9_3: 226; PPC32-NEXT: srw r3, r8, r4 227; PPC32-NEXT: lwsync 228; PPC32-NEXT: blr 229; 230; PPC64-LABEL: cas_weak_i16_acquire_acquire: 231; PPC64: # %bb.0: 232; PPC64-NEXT: li r6, 0 233; PPC64-NEXT: rlwinm r4, r3, 3, 27, 27 234; PPC64-NEXT: li r5, 1 235; PPC64-NEXT: ori r7, r6, 65535 236; PPC64-NEXT: xori r4, r4, 16 237; PPC64-NEXT: slw r8, r5, r4 238; PPC64-NEXT: slw r9, r6, r4 239; PPC64-NEXT: slw r5, r7, r4 240; PPC64-NEXT: rldicr r3, r3, 0, 61 241; PPC64-NEXT: and r6, r8, r5 242; PPC64-NEXT: and r7, r9, r5 243; PPC64-NEXT: .LBB9_1: 244; PPC64-NEXT: lwarx r9, 0, r3 245; PPC64-NEXT: and r8, r9, r5 246; PPC64-NEXT: cmpw r8, r7 247; PPC64-NEXT: bne cr0, .LBB9_3 248; PPC64-NEXT: # %bb.2: 249; PPC64-NEXT: andc r9, r9, r5 250; PPC64-NEXT: or r9, r9, r6 251; PPC64-NEXT: stwcx. r9, 0, r3 252; PPC64-NEXT: bne cr0, .LBB9_1 253; PPC64-NEXT: .LBB9_3: 254; PPC64-NEXT: srw r3, r8, r4 255; PPC64-NEXT: lwsync 256; PPC64-NEXT: blr 257 %val = cmpxchg weak ptr %mem, i16 0, i16 1 acquire acquire 258 %loaded = extractvalue { i16, i1} %val, 0 259 ret i16 %loaded 260} 261define i32 @cas_strong_i32_acqrel_acquire(ptr %mem) { 262; CHECK-LABEL: cas_strong_i32_acqrel_acquire: 263; CHECK: # %bb.0: 264; CHECK-NEXT: li r5, 1 265; CHECK-NEXT: lwsync 266; CHECK-NEXT: .LBB10_1: 267; CHECK-NEXT: lwarx r4, 0, r3 268; CHECK-NEXT: cmpwi r4, 0 269; CHECK-NEXT: bne cr0, .LBB10_3 270; CHECK-NEXT: # %bb.2: 271; CHECK-NEXT: stwcx. r5, 0, r3 272; CHECK-NEXT: bne cr0, .LBB10_1 273; CHECK-NEXT: .LBB10_3: 274; CHECK-NEXT: mr r3, r4 275; CHECK-NEXT: lwsync 276; CHECK-NEXT: blr 277 %val = cmpxchg ptr %mem, i32 0, i32 1 acq_rel acquire 278 %loaded = extractvalue { i32, i1} %val, 0 279 ret i32 %loaded 280} 281define i64 @cas_weak_i64_release_monotonic(ptr %mem) { 282; PPC32-LABEL: cas_weak_i64_release_monotonic: 283; PPC32: # %bb.0: 284; PPC32-NEXT: mflr r0 285; PPC32-NEXT: stwu r1, -16(r1) 286; PPC32-NEXT: stw r0, 20(r1) 287; PPC32-NEXT: .cfi_def_cfa_offset 16 288; PPC32-NEXT: .cfi_offset lr, 4 289; PPC32-NEXT: li r4, 0 290; PPC32-NEXT: stw r4, 12(r1) 291; PPC32-NEXT: li r5, 0 292; PPC32-NEXT: stw r4, 8(r1) 293; PPC32-NEXT: addi r4, r1, 8 294; PPC32-NEXT: li r6, 1 295; PPC32-NEXT: li r7, 3 296; PPC32-NEXT: li r8, 0 297; PPC32-NEXT: bl __atomic_compare_exchange_8 298; PPC32-NEXT: lwz r4, 12(r1) 299; PPC32-NEXT: lwz r3, 8(r1) 300; PPC32-NEXT: lwz r0, 20(r1) 301; PPC32-NEXT: addi r1, r1, 16 302; PPC32-NEXT: mtlr r0 303; PPC32-NEXT: blr 304; 305; PPC64-LABEL: cas_weak_i64_release_monotonic: 306; PPC64: # %bb.0: 307; PPC64-NEXT: li r5, 1 308; PPC64-NEXT: lwsync 309; PPC64-NEXT: .LBB11_1: 310; PPC64-NEXT: ldarx r4, 0, r3 311; PPC64-NEXT: cmpdi r4, 0 312; PPC64-NEXT: bne cr0, .LBB11_3 313; PPC64-NEXT: # %bb.2: 314; PPC64-NEXT: stdcx. r5, 0, r3 315; PPC64-NEXT: bne cr0, .LBB11_1 316; PPC64-NEXT: .LBB11_3: 317; PPC64-NEXT: mr r3, r4 318; PPC64-NEXT: blr 319 %val = cmpxchg weak ptr %mem, i64 0, i64 1 release monotonic 320 %loaded = extractvalue { i64, i1} %val, 0 321 ret i64 %loaded 322} 323 324; AtomicRMW 325define i8 @add_i8_monotonic(ptr %mem, i8 %operand) { 326; PPC32-LABEL: add_i8_monotonic: 327; PPC32: # %bb.0: 328; PPC32-NEXT: rlwinm r7, r3, 3, 27, 28 329; PPC32-NEXT: li r6, 255 330; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29 331; PPC32-NEXT: xori r3, r7, 24 332; PPC32-NEXT: slw r4, r4, r3 333; PPC32-NEXT: slw r6, r6, r3 334; PPC32-NEXT: .LBB12_1: 335; PPC32-NEXT: lwarx r7, 0, r5 336; PPC32-NEXT: add r8, r4, r7 337; PPC32-NEXT: andc r9, r7, r6 338; PPC32-NEXT: and r8, r8, r6 339; PPC32-NEXT: or r8, r8, r9 340; PPC32-NEXT: stwcx. r8, 0, r5 341; PPC32-NEXT: bne cr0, .LBB12_1 342; PPC32-NEXT: # %bb.2: 343; PPC32-NEXT: srw r3, r7, r3 344; PPC32-NEXT: clrlwi r3, r3, 24 345; PPC32-NEXT: blr 346; 347; PPC64-LABEL: add_i8_monotonic: 348; PPC64: # %bb.0: 349; PPC64-NEXT: rlwinm r7, r3, 3, 27, 28 350; PPC64-NEXT: li r6, 255 351; PPC64-NEXT: rldicr r5, r3, 0, 61 352; PPC64-NEXT: xori r3, r7, 24 353; PPC64-NEXT: slw r4, r4, r3 354; PPC64-NEXT: slw r6, r6, r3 355; PPC64-NEXT: .LBB12_1: 356; PPC64-NEXT: lwarx r7, 0, r5 357; PPC64-NEXT: add r8, r4, r7 358; PPC64-NEXT: andc r9, r7, r6 359; PPC64-NEXT: and r8, r8, r6 360; PPC64-NEXT: or r8, r8, r9 361; PPC64-NEXT: stwcx. r8, 0, r5 362; PPC64-NEXT: bne cr0, .LBB12_1 363; PPC64-NEXT: # %bb.2: 364; PPC64-NEXT: srw r3, r7, r3 365; PPC64-NEXT: clrlwi r3, r3, 24 366; PPC64-NEXT: blr 367 %val = atomicrmw add ptr %mem, i8 %operand monotonic 368 ret i8 %val 369} 370define i16 @xor_i16_seq_cst(ptr %mem, i16 %operand) { 371; PPC32-LABEL: xor_i16_seq_cst: 372; PPC32: # %bb.0: 373; PPC32-NEXT: li r5, 0 374; PPC32-NEXT: rlwinm r6, r3, 3, 27, 27 375; PPC32-NEXT: ori r7, r5, 65535 376; PPC32-NEXT: xori r5, r6, 16 377; PPC32-NEXT: rlwinm r3, r3, 0, 0, 29 378; PPC32-NEXT: slw r4, r4, r5 379; PPC32-NEXT: slw r6, r7, r5 380; PPC32-NEXT: sync 381; PPC32-NEXT: .LBB13_1: 382; PPC32-NEXT: lwarx r7, 0, r3 383; PPC32-NEXT: xor r8, r4, r7 384; PPC32-NEXT: andc r9, r7, r6 385; PPC32-NEXT: and r8, r8, r6 386; PPC32-NEXT: or r8, r8, r9 387; PPC32-NEXT: stwcx. r8, 0, r3 388; PPC32-NEXT: bne cr0, .LBB13_1 389; PPC32-NEXT: # %bb.2: 390; PPC32-NEXT: srw r3, r7, r5 391; PPC32-NEXT: clrlwi r3, r3, 16 392; PPC32-NEXT: lwsync 393; PPC32-NEXT: blr 394; 395; PPC64-LABEL: xor_i16_seq_cst: 396; PPC64: # %bb.0: 397; PPC64-NEXT: li r5, 0 398; PPC64-NEXT: rlwinm r6, r3, 3, 27, 27 399; PPC64-NEXT: ori r7, r5, 65535 400; PPC64-NEXT: xori r5, r6, 16 401; PPC64-NEXT: rldicr r3, r3, 0, 61 402; PPC64-NEXT: slw r4, r4, r5 403; PPC64-NEXT: slw r6, r7, r5 404; PPC64-NEXT: sync 405; PPC64-NEXT: .LBB13_1: 406; PPC64-NEXT: lwarx r7, 0, r3 407; PPC64-NEXT: xor r8, r4, r7 408; PPC64-NEXT: andc r9, r7, r6 409; PPC64-NEXT: and r8, r8, r6 410; PPC64-NEXT: or r8, r8, r9 411; PPC64-NEXT: stwcx. r8, 0, r3 412; PPC64-NEXT: bne cr0, .LBB13_1 413; PPC64-NEXT: # %bb.2: 414; PPC64-NEXT: srw r3, r7, r5 415; PPC64-NEXT: clrlwi r3, r3, 16 416; PPC64-NEXT: lwsync 417; PPC64-NEXT: blr 418 %val = atomicrmw xor ptr %mem, i16 %operand seq_cst 419 ret i16 %val 420} 421define i32 @xchg_i32_acq_rel(ptr %mem, i32 %operand) { 422; CHECK-LABEL: xchg_i32_acq_rel: 423; CHECK: # %bb.0: 424; CHECK-NEXT: lwsync 425; CHECK-NEXT: .LBB14_1: 426; CHECK-NEXT: lwarx r5, 0, r3 427; CHECK-NEXT: stwcx. r4, 0, r3 428; CHECK-NEXT: bne cr0, .LBB14_1 429; CHECK-NEXT: # %bb.2: 430; CHECK-NEXT: mr r3, r5 431; CHECK-NEXT: lwsync 432; CHECK-NEXT: blr 433 %val = atomicrmw xchg ptr %mem, i32 %operand acq_rel 434 ret i32 %val 435} 436define i64 @and_i64_release(ptr %mem, i64 %operand) { 437; PPC32-LABEL: and_i64_release: 438; PPC32: # %bb.0: 439; PPC32-NEXT: mflr r0 440; PPC32-NEXT: stwu r1, -16(r1) 441; PPC32-NEXT: stw r0, 20(r1) 442; PPC32-NEXT: .cfi_def_cfa_offset 16 443; PPC32-NEXT: .cfi_offset lr, 4 444; PPC32-NEXT: li r7, 3 445; PPC32-NEXT: bl __atomic_fetch_and_8 446; PPC32-NEXT: lwz r0, 20(r1) 447; PPC32-NEXT: addi r1, r1, 16 448; PPC32-NEXT: mtlr r0 449; PPC32-NEXT: blr 450; 451; PPC64-LABEL: and_i64_release: 452; PPC64: # %bb.0: 453; PPC64-NEXT: lwsync 454; PPC64-NEXT: .LBB15_1: 455; PPC64-NEXT: ldarx r5, 0, r3 456; PPC64-NEXT: and r6, r4, r5 457; PPC64-NEXT: stdcx. r6, 0, r3 458; PPC64-NEXT: bne cr0, .LBB15_1 459; PPC64-NEXT: # %bb.2: 460; PPC64-NEXT: mr r3, r5 461; PPC64-NEXT: blr 462 %val = atomicrmw and ptr %mem, i64 %operand release 463 ret i64 %val 464} 465 466define half @load_atomic_f16__seq_cst(ptr %ptr) { 467; PPC32-LABEL: load_atomic_f16__seq_cst: 468; PPC32: # %bb.0: 469; PPC32-NEXT: mflr r0 470; PPC32-NEXT: stwu r1, -16(r1) 471; PPC32-NEXT: stw r0, 20(r1) 472; PPC32-NEXT: .cfi_def_cfa_offset 16 473; PPC32-NEXT: .cfi_offset lr, 4 474; PPC32-NEXT: sync 475; PPC32-NEXT: lhz r3, 0(r3) 476; PPC32-NEXT: cmpw cr7, r3, r3 477; PPC32-NEXT: bne- cr7, .+4 478; PPC32-NEXT: isync 479; PPC32-NEXT: bl __gnu_h2f_ieee 480; PPC32-NEXT: lwz r0, 20(r1) 481; PPC32-NEXT: addi r1, r1, 16 482; PPC32-NEXT: mtlr r0 483; PPC32-NEXT: blr 484; 485; PPC64-LABEL: load_atomic_f16__seq_cst: 486; PPC64: # %bb.0: 487; PPC64-NEXT: mflr r0 488; PPC64-NEXT: stdu r1, -112(r1) 489; PPC64-NEXT: std r0, 128(r1) 490; PPC64-NEXT: .cfi_def_cfa_offset 112 491; PPC64-NEXT: .cfi_offset lr, 16 492; PPC64-NEXT: sync 493; PPC64-NEXT: lhz r3, 0(r3) 494; PPC64-NEXT: cmpd cr7, r3, r3 495; PPC64-NEXT: bne- cr7, .+4 496; PPC64-NEXT: isync 497; PPC64-NEXT: bl __gnu_h2f_ieee 498; PPC64-NEXT: nop 499; PPC64-NEXT: addi r1, r1, 112 500; PPC64-NEXT: ld r0, 16(r1) 501; PPC64-NEXT: mtlr r0 502; PPC64-NEXT: blr 503 %val = load atomic half, ptr %ptr seq_cst, align 2 504 ret half %val 505} 506 507; FIXME: bf16_to_fp fails to select 508; define bfloat @load_atomic_bf16__seq_cst(ptr %ptr) { 509; %val = load atomic bfloat, ptr %ptr seq_cst, align 2 510; ret bfloat %val 511; } 512 513define float @load_atomic_f32__seq_cst(ptr %ptr) { 514; PPC32-LABEL: load_atomic_f32__seq_cst: 515; PPC32: # %bb.0: 516; PPC32-NEXT: stwu r1, -16(r1) 517; PPC32-NEXT: .cfi_def_cfa_offset 16 518; PPC32-NEXT: sync 519; PPC32-NEXT: lwz r3, 0(r3) 520; PPC32-NEXT: cmpw cr7, r3, r3 521; PPC32-NEXT: bne- cr7, .+4 522; PPC32-NEXT: isync 523; PPC32-NEXT: stw r3, 12(r1) 524; PPC32-NEXT: lfs f1, 12(r1) 525; PPC32-NEXT: addi r1, r1, 16 526; PPC32-NEXT: blr 527; 528; PPC64-LABEL: load_atomic_f32__seq_cst: 529; PPC64: # %bb.0: 530; PPC64-NEXT: sync 531; PPC64-NEXT: lwz r3, 0(r3) 532; PPC64-NEXT: cmpd cr7, r3, r3 533; PPC64-NEXT: bne- cr7, .+4 534; PPC64-NEXT: isync 535; PPC64-NEXT: stw r3, -4(r1) 536; PPC64-NEXT: lfs f1, -4(r1) 537; PPC64-NEXT: blr 538 %val = load atomic float, ptr %ptr seq_cst, align 4 539 ret float %val 540} 541 542define double @load_atomic_f64__seq_cst(ptr %ptr) { 543; PPC32-LABEL: load_atomic_f64__seq_cst: 544; PPC32: # %bb.0: 545; PPC32-NEXT: mflr r0 546; PPC32-NEXT: stwu r1, -16(r1) 547; PPC32-NEXT: stw r0, 20(r1) 548; PPC32-NEXT: .cfi_def_cfa_offset 16 549; PPC32-NEXT: .cfi_offset lr, 4 550; PPC32-NEXT: li r4, 5 551; PPC32-NEXT: bl __atomic_load_8 552; PPC32-NEXT: stw r4, 12(r1) 553; PPC32-NEXT: stw r3, 8(r1) 554; PPC32-NEXT: lfd f1, 8(r1) 555; PPC32-NEXT: lwz r0, 20(r1) 556; PPC32-NEXT: addi r1, r1, 16 557; PPC32-NEXT: mtlr r0 558; PPC32-NEXT: blr 559; 560; PPC64-LABEL: load_atomic_f64__seq_cst: 561; PPC64: # %bb.0: 562; PPC64-NEXT: sync 563; PPC64-NEXT: ld r3, 0(r3) 564; PPC64-NEXT: cmpd cr7, r3, r3 565; PPC64-NEXT: bne- cr7, .+4 566; PPC64-NEXT: isync 567; PPC64-NEXT: std r3, -8(r1) 568; PPC64-NEXT: lfd f1, -8(r1) 569; PPC64-NEXT: blr 570 %val = load atomic double, ptr %ptr seq_cst, align 8 571 ret double %val 572} 573 574define void @store_atomic_f16__seq_cst(ptr %ptr, half %val1) { 575; PPC32-LABEL: store_atomic_f16__seq_cst: 576; PPC32: # %bb.0: 577; PPC32-NEXT: mflr r0 578; PPC32-NEXT: stwu r1, -16(r1) 579; PPC32-NEXT: stw r0, 20(r1) 580; PPC32-NEXT: .cfi_def_cfa_offset 16 581; PPC32-NEXT: .cfi_offset lr, 4 582; PPC32-NEXT: .cfi_offset r30, -8 583; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill 584; PPC32-NEXT: mr r30, r3 585; PPC32-NEXT: bl __gnu_f2h_ieee 586; PPC32-NEXT: sync 587; PPC32-NEXT: sth r3, 0(r30) 588; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload 589; PPC32-NEXT: lwz r0, 20(r1) 590; PPC32-NEXT: addi r1, r1, 16 591; PPC32-NEXT: mtlr r0 592; PPC32-NEXT: blr 593; 594; PPC64-LABEL: store_atomic_f16__seq_cst: 595; PPC64: # %bb.0: 596; PPC64-NEXT: mflr r0 597; PPC64-NEXT: stdu r1, -128(r1) 598; PPC64-NEXT: std r0, 144(r1) 599; PPC64-NEXT: .cfi_def_cfa_offset 128 600; PPC64-NEXT: .cfi_offset lr, 16 601; PPC64-NEXT: .cfi_offset r30, -16 602; PPC64-NEXT: std r30, 112(r1) # 8-byte Folded Spill 603; PPC64-NEXT: mr r30, r3 604; PPC64-NEXT: bl __gnu_f2h_ieee 605; PPC64-NEXT: nop 606; PPC64-NEXT: sync 607; PPC64-NEXT: sth r3, 0(r30) 608; PPC64-NEXT: ld r30, 112(r1) # 8-byte Folded Reload 609; PPC64-NEXT: addi r1, r1, 128 610; PPC64-NEXT: ld r0, 16(r1) 611; PPC64-NEXT: mtlr r0 612; PPC64-NEXT: blr 613 store atomic half %val1, ptr %ptr seq_cst, align 2 614 ret void 615} 616 617; FIXME: bf16_to_fp fails to select 618; define void @store_atomic_bf16__seq_cst(ptr %ptr, bfloat %val1) { 619; store atomic bfloat %val1, ptr %ptr seq_cst, align 2 620; ret void 621; } 622 623define void @store_atomic_f32__seq_cst(ptr %ptr, float %val1) { 624; PPC32-LABEL: store_atomic_f32__seq_cst: 625; PPC32: # %bb.0: 626; PPC32-NEXT: stwu r1, -16(r1) 627; PPC32-NEXT: .cfi_def_cfa_offset 16 628; PPC32-NEXT: stfs f1, 12(r1) 629; PPC32-NEXT: lwz r4, 12(r1) 630; PPC32-NEXT: sync 631; PPC32-NEXT: stw r4, 0(r3) 632; PPC32-NEXT: addi r1, r1, 16 633; PPC32-NEXT: blr 634; 635; PPC64-LABEL: store_atomic_f32__seq_cst: 636; PPC64: # %bb.0: 637; PPC64-NEXT: stfs f1, -4(r1) 638; PPC64-NEXT: lwz r4, -4(r1) 639; PPC64-NEXT: sync 640; PPC64-NEXT: stw r4, 0(r3) 641; PPC64-NEXT: blr 642 store atomic float %val1, ptr %ptr seq_cst, align 4 643 ret void 644} 645 646define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { 647; PPC32-LABEL: store_atomic_f64__seq_cst: 648; PPC32: # %bb.0: 649; PPC32-NEXT: mflr r0 650; PPC32-NEXT: stwu r1, -16(r1) 651; PPC32-NEXT: stw r0, 20(r1) 652; PPC32-NEXT: .cfi_def_cfa_offset 16 653; PPC32-NEXT: .cfi_offset lr, 4 654; PPC32-NEXT: stfd f1, 8(r1) 655; PPC32-NEXT: li r7, 5 656; PPC32-NEXT: lwz r5, 8(r1) 657; PPC32-NEXT: lwz r6, 12(r1) 658; PPC32-NEXT: bl __atomic_store_8 659; PPC32-NEXT: lwz r0, 20(r1) 660; PPC32-NEXT: addi r1, r1, 16 661; PPC32-NEXT: mtlr r0 662; PPC32-NEXT: blr 663; 664; PPC64-LABEL: store_atomic_f64__seq_cst: 665; PPC64: # %bb.0: 666; PPC64-NEXT: stfd f1, -8(r1) 667; PPC64-NEXT: ld r4, -8(r1) 668; PPC64-NEXT: sync 669; PPC64-NEXT: std r4, 0(r3) 670; PPC64-NEXT: blr 671 store atomic double %val1, ptr %ptr seq_cst, align 8 672 ret void 673} 674