xref: /llvm-project/llvm/test/CodeGen/PowerPC/atomics.ll (revision a51712751c184ebe056718c938d2526693a31564)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu -verify-machineinstrs  -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32
3; This is already checked for in Atomics-64.ll
4; RUN: llc -verify-machineinstrs < %s -mcpu=ppc -mtriple=powerpc64-unknown-linux-gnu  -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC64
5
6; FIXME: we don't currently check for the operations themselves with CHECK-NEXT,
7;   because they are implemented in a very messy way with lwarx/stwcx.
8;   It should be fixed soon in another patch.
9
10; We first check loads, for all sizes from i8 to i64.
11; We also vary orderings to check for barriers.
12define i8 @load_i8_unordered(ptr %mem) {
13; CHECK-LABEL: load_i8_unordered:
14; CHECK:       # %bb.0:
15; CHECK-NEXT:    lbz r3, 0(r3)
16; CHECK-NEXT:    blr
17  %val = load atomic i8, ptr %mem unordered, align 1
18  ret i8 %val
19}
20define i16 @load_i16_monotonic(ptr %mem) {
21; CHECK-LABEL: load_i16_monotonic:
22; CHECK:       # %bb.0:
23; CHECK-NEXT:    lhz r3, 0(r3)
24; CHECK-NEXT:    blr
25  %val = load atomic i16, ptr %mem monotonic, align 2
26  ret i16 %val
27}
28define i32 @load_i32_acquire(ptr %mem) {
29; PPC32-LABEL: load_i32_acquire:
30; PPC32:       # %bb.0:
31; PPC32-NEXT:    lwz r3, 0(r3)
32; PPC32-NEXT:    cmpw cr7, r3, r3
33; PPC32-NEXT:    bne- cr7, .+4
34; PPC32-NEXT:    isync
35; PPC32-NEXT:    blr
36;
37; PPC64-LABEL: load_i32_acquire:
38; PPC64:       # %bb.0:
39; PPC64-NEXT:    lwz r3, 0(r3)
40; PPC64-NEXT:    cmpd cr7, r3, r3
41; PPC64-NEXT:    bne- cr7, .+4
42; PPC64-NEXT:    isync
43; PPC64-NEXT:    blr
44  %val = load atomic i32, ptr %mem acquire, align 4
45; CHECK-PPC32: lwsync
46; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
47; CHECK-PPC64: bne- [[CR]], .+4
48; CHECK-PPC64: isync
49  ret i32 %val
50}
51define i64 @load_i64_seq_cst(ptr %mem) {
52; PPC32-LABEL: load_i64_seq_cst:
53; PPC32:       # %bb.0:
54; PPC32-NEXT:    mflr r0
55; PPC32-NEXT:    stwu r1, -16(r1)
56; PPC32-NEXT:    stw r0, 20(r1)
57; PPC32-NEXT:    .cfi_def_cfa_offset 16
58; PPC32-NEXT:    .cfi_offset lr, 4
59; PPC32-NEXT:    li r4, 5
60; PPC32-NEXT:    bl __atomic_load_8
61; PPC32-NEXT:    lwz r0, 20(r1)
62; PPC32-NEXT:    addi r1, r1, 16
63; PPC32-NEXT:    mtlr r0
64; PPC32-NEXT:    blr
65;
66; PPC64-LABEL: load_i64_seq_cst:
67; PPC64:       # %bb.0:
68; PPC64-NEXT:    sync
69; PPC64-NEXT:    ld r3, 0(r3)
70; PPC64-NEXT:    cmpd cr7, r3, r3
71; PPC64-NEXT:    bne- cr7, .+4
72; PPC64-NEXT:    isync
73; PPC64-NEXT:    blr
74  %val = load atomic i64, ptr %mem seq_cst, align 8
75; CHECK-PPC32: lwsync
76; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
77; CHECK-PPC64: bne- [[CR]], .+4
78; CHECK-PPC64: isync
79  ret i64 %val
80}
81
82; Stores
83define void @store_i8_unordered(ptr %mem) {
84; CHECK-LABEL: store_i8_unordered:
85; CHECK:       # %bb.0:
86; CHECK-NEXT:    li r4, 42
87; CHECK-NEXT:    stb r4, 0(r3)
88; CHECK-NEXT:    blr
89  store atomic i8 42, ptr %mem unordered, align 1
90  ret void
91}
92define void @store_i16_monotonic(ptr %mem) {
93; CHECK-LABEL: store_i16_monotonic:
94; CHECK:       # %bb.0:
95; CHECK-NEXT:    li r4, 42
96; CHECK-NEXT:    sth r4, 0(r3)
97; CHECK-NEXT:    blr
98  store atomic i16 42, ptr %mem monotonic, align 2
99  ret void
100}
101define void @store_i32_release(ptr %mem) {
102; CHECK-LABEL: store_i32_release:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    li r4, 42
105; CHECK-NEXT:    lwsync
106; CHECK-NEXT:    stw r4, 0(r3)
107; CHECK-NEXT:    blr
108  store atomic i32 42, ptr %mem release, align 4
109  ret void
110}
111define void @store_i64_seq_cst(ptr %mem) {
112; PPC32-LABEL: store_i64_seq_cst:
113; PPC32:       # %bb.0:
114; PPC32-NEXT:    mflr r0
115; PPC32-NEXT:    stwu r1, -16(r1)
116; PPC32-NEXT:    stw r0, 20(r1)
117; PPC32-NEXT:    .cfi_def_cfa_offset 16
118; PPC32-NEXT:    .cfi_offset lr, 4
119; PPC32-NEXT:    li r5, 0
120; PPC32-NEXT:    li r6, 42
121; PPC32-NEXT:    li r7, 5
122; PPC32-NEXT:    bl __atomic_store_8
123; PPC32-NEXT:    lwz r0, 20(r1)
124; PPC32-NEXT:    addi r1, r1, 16
125; PPC32-NEXT:    mtlr r0
126; PPC32-NEXT:    blr
127;
128; PPC64-LABEL: store_i64_seq_cst:
129; PPC64:       # %bb.0:
130; PPC64-NEXT:    li r4, 42
131; PPC64-NEXT:    sync
132; PPC64-NEXT:    std r4, 0(r3)
133; PPC64-NEXT:    blr
134  store atomic i64 42, ptr %mem seq_cst, align 8
135  ret void
136}
137
138; Atomic CmpXchg
139define i8 @cas_strong_i8_sc_sc(ptr %mem) {
140; PPC32-LABEL: cas_strong_i8_sc_sc:
141; PPC32:       # %bb.0:
142; PPC32-NEXT:    rlwinm r8, r3, 3, 27, 28
143; PPC32-NEXT:    li r5, 1
144; PPC32-NEXT:    li r6, 0
145; PPC32-NEXT:    li r7, 255
146; PPC32-NEXT:    rlwinm r4, r3, 0, 0, 29
147; PPC32-NEXT:    xori r3, r8, 24
148; PPC32-NEXT:    slw r8, r5, r3
149; PPC32-NEXT:    slw r9, r6, r3
150; PPC32-NEXT:    slw r5, r7, r3
151; PPC32-NEXT:    and r6, r8, r5
152; PPC32-NEXT:    and r7, r9, r5
153; PPC32-NEXT:    sync
154; PPC32-NEXT:  .LBB8_1:
155; PPC32-NEXT:    lwarx r9, 0, r4
156; PPC32-NEXT:    and r8, r9, r5
157; PPC32-NEXT:    cmpw r8, r7
158; PPC32-NEXT:    bne cr0, .LBB8_3
159; PPC32-NEXT:  # %bb.2:
160; PPC32-NEXT:    andc r9, r9, r5
161; PPC32-NEXT:    or r9, r9, r6
162; PPC32-NEXT:    stwcx. r9, 0, r4
163; PPC32-NEXT:    bne cr0, .LBB8_1
164; PPC32-NEXT:  .LBB8_3:
165; PPC32-NEXT:    srw r3, r8, r3
166; PPC32-NEXT:    lwsync
167; PPC32-NEXT:    blr
168;
169; PPC64-LABEL: cas_strong_i8_sc_sc:
170; PPC64:       # %bb.0:
171; PPC64-NEXT:    rlwinm r8, r3, 3, 27, 28
172; PPC64-NEXT:    li r5, 1
173; PPC64-NEXT:    li r6, 0
174; PPC64-NEXT:    li r7, 255
175; PPC64-NEXT:    rldicr r4, r3, 0, 61
176; PPC64-NEXT:    xori r3, r8, 24
177; PPC64-NEXT:    slw r8, r5, r3
178; PPC64-NEXT:    slw r9, r6, r3
179; PPC64-NEXT:    slw r5, r7, r3
180; PPC64-NEXT:    and r6, r8, r5
181; PPC64-NEXT:    and r7, r9, r5
182; PPC64-NEXT:    sync
183; PPC64-NEXT:  .LBB8_1:
184; PPC64-NEXT:    lwarx r9, 0, r4
185; PPC64-NEXT:    and r8, r9, r5
186; PPC64-NEXT:    cmpw r8, r7
187; PPC64-NEXT:    bne cr0, .LBB8_3
188; PPC64-NEXT:  # %bb.2:
189; PPC64-NEXT:    andc r9, r9, r5
190; PPC64-NEXT:    or r9, r9, r6
191; PPC64-NEXT:    stwcx. r9, 0, r4
192; PPC64-NEXT:    bne cr0, .LBB8_1
193; PPC64-NEXT:  .LBB8_3:
194; PPC64-NEXT:    srw r3, r8, r3
195; PPC64-NEXT:    lwsync
196; PPC64-NEXT:    blr
197  %val = cmpxchg ptr %mem, i8 0, i8 1 seq_cst seq_cst
198  %loaded = extractvalue { i8, i1} %val, 0
199  ret i8 %loaded
200}
201define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
202; PPC32-LABEL: cas_weak_i16_acquire_acquire:
203; PPC32:       # %bb.0:
204; PPC32-NEXT:    li r6, 0
205; PPC32-NEXT:    rlwinm r4, r3, 3, 27, 27
206; PPC32-NEXT:    li r5, 1
207; PPC32-NEXT:    ori r7, r6, 65535
208; PPC32-NEXT:    xori r4, r4, 16
209; PPC32-NEXT:    slw r8, r5, r4
210; PPC32-NEXT:    slw r9, r6, r4
211; PPC32-NEXT:    slw r5, r7, r4
212; PPC32-NEXT:    rlwinm r3, r3, 0, 0, 29
213; PPC32-NEXT:    and r6, r8, r5
214; PPC32-NEXT:    and r7, r9, r5
215; PPC32-NEXT:  .LBB9_1:
216; PPC32-NEXT:    lwarx r9, 0, r3
217; PPC32-NEXT:    and r8, r9, r5
218; PPC32-NEXT:    cmpw r8, r7
219; PPC32-NEXT:    bne cr0, .LBB9_3
220; PPC32-NEXT:  # %bb.2:
221; PPC32-NEXT:    andc r9, r9, r5
222; PPC32-NEXT:    or r9, r9, r6
223; PPC32-NEXT:    stwcx. r9, 0, r3
224; PPC32-NEXT:    bne cr0, .LBB9_1
225; PPC32-NEXT:  .LBB9_3:
226; PPC32-NEXT:    srw r3, r8, r4
227; PPC32-NEXT:    lwsync
228; PPC32-NEXT:    blr
229;
230; PPC64-LABEL: cas_weak_i16_acquire_acquire:
231; PPC64:       # %bb.0:
232; PPC64-NEXT:    li r6, 0
233; PPC64-NEXT:    rlwinm r4, r3, 3, 27, 27
234; PPC64-NEXT:    li r5, 1
235; PPC64-NEXT:    ori r7, r6, 65535
236; PPC64-NEXT:    xori r4, r4, 16
237; PPC64-NEXT:    slw r8, r5, r4
238; PPC64-NEXT:    slw r9, r6, r4
239; PPC64-NEXT:    slw r5, r7, r4
240; PPC64-NEXT:    rldicr r3, r3, 0, 61
241; PPC64-NEXT:    and r6, r8, r5
242; PPC64-NEXT:    and r7, r9, r5
243; PPC64-NEXT:  .LBB9_1:
244; PPC64-NEXT:    lwarx r9, 0, r3
245; PPC64-NEXT:    and r8, r9, r5
246; PPC64-NEXT:    cmpw r8, r7
247; PPC64-NEXT:    bne cr0, .LBB9_3
248; PPC64-NEXT:  # %bb.2:
249; PPC64-NEXT:    andc r9, r9, r5
250; PPC64-NEXT:    or r9, r9, r6
251; PPC64-NEXT:    stwcx. r9, 0, r3
252; PPC64-NEXT:    bne cr0, .LBB9_1
253; PPC64-NEXT:  .LBB9_3:
254; PPC64-NEXT:    srw r3, r8, r4
255; PPC64-NEXT:    lwsync
256; PPC64-NEXT:    blr
257  %val = cmpxchg weak ptr %mem, i16 0, i16 1 acquire acquire
258  %loaded = extractvalue { i16, i1} %val, 0
259  ret i16 %loaded
260}
261define i32 @cas_strong_i32_acqrel_acquire(ptr %mem) {
262; CHECK-LABEL: cas_strong_i32_acqrel_acquire:
263; CHECK:       # %bb.0:
264; CHECK-NEXT:    li r5, 1
265; CHECK-NEXT:    lwsync
266; CHECK-NEXT:  .LBB10_1:
267; CHECK-NEXT:    lwarx r4, 0, r3
268; CHECK-NEXT:    cmpwi r4, 0
269; CHECK-NEXT:    bne cr0, .LBB10_3
270; CHECK-NEXT:  # %bb.2:
271; CHECK-NEXT:    stwcx. r5, 0, r3
272; CHECK-NEXT:    bne cr0, .LBB10_1
273; CHECK-NEXT:  .LBB10_3:
274; CHECK-NEXT:    mr r3, r4
275; CHECK-NEXT:    lwsync
276; CHECK-NEXT:    blr
277  %val = cmpxchg ptr %mem, i32 0, i32 1 acq_rel acquire
278  %loaded = extractvalue { i32, i1} %val, 0
279  ret i32 %loaded
280}
281define i64 @cas_weak_i64_release_monotonic(ptr %mem) {
282; PPC32-LABEL: cas_weak_i64_release_monotonic:
283; PPC32:       # %bb.0:
284; PPC32-NEXT:    mflr r0
285; PPC32-NEXT:    stwu r1, -16(r1)
286; PPC32-NEXT:    stw r0, 20(r1)
287; PPC32-NEXT:    .cfi_def_cfa_offset 16
288; PPC32-NEXT:    .cfi_offset lr, 4
289; PPC32-NEXT:    li r4, 0
290; PPC32-NEXT:    stw r4, 12(r1)
291; PPC32-NEXT:    li r5, 0
292; PPC32-NEXT:    stw r4, 8(r1)
293; PPC32-NEXT:    addi r4, r1, 8
294; PPC32-NEXT:    li r6, 1
295; PPC32-NEXT:    li r7, 3
296; PPC32-NEXT:    li r8, 0
297; PPC32-NEXT:    bl __atomic_compare_exchange_8
298; PPC32-NEXT:    lwz r4, 12(r1)
299; PPC32-NEXT:    lwz r3, 8(r1)
300; PPC32-NEXT:    lwz r0, 20(r1)
301; PPC32-NEXT:    addi r1, r1, 16
302; PPC32-NEXT:    mtlr r0
303; PPC32-NEXT:    blr
304;
305; PPC64-LABEL: cas_weak_i64_release_monotonic:
306; PPC64:       # %bb.0:
307; PPC64-NEXT:    li r5, 1
308; PPC64-NEXT:    lwsync
309; PPC64-NEXT:  .LBB11_1:
310; PPC64-NEXT:    ldarx r4, 0, r3
311; PPC64-NEXT:    cmpdi r4, 0
312; PPC64-NEXT:    bne cr0, .LBB11_3
313; PPC64-NEXT:  # %bb.2:
314; PPC64-NEXT:    stdcx. r5, 0, r3
315; PPC64-NEXT:    bne cr0, .LBB11_1
316; PPC64-NEXT:  .LBB11_3:
317; PPC64-NEXT:    mr r3, r4
318; PPC64-NEXT:    blr
319  %val = cmpxchg weak ptr %mem, i64 0, i64 1 release monotonic
320  %loaded = extractvalue { i64, i1} %val, 0
321  ret i64 %loaded
322}
323
324; AtomicRMW
325define i8 @add_i8_monotonic(ptr %mem, i8 %operand) {
326; PPC32-LABEL: add_i8_monotonic:
327; PPC32:       # %bb.0:
328; PPC32-NEXT:    rlwinm r7, r3, 3, 27, 28
329; PPC32-NEXT:    li r6, 255
330; PPC32-NEXT:    rlwinm r5, r3, 0, 0, 29
331; PPC32-NEXT:    xori r3, r7, 24
332; PPC32-NEXT:    slw r4, r4, r3
333; PPC32-NEXT:    slw r6, r6, r3
334; PPC32-NEXT:  .LBB12_1:
335; PPC32-NEXT:    lwarx r7, 0, r5
336; PPC32-NEXT:    add r8, r4, r7
337; PPC32-NEXT:    andc r9, r7, r6
338; PPC32-NEXT:    and r8, r8, r6
339; PPC32-NEXT:    or r8, r8, r9
340; PPC32-NEXT:    stwcx. r8, 0, r5
341; PPC32-NEXT:    bne cr0, .LBB12_1
342; PPC32-NEXT:  # %bb.2:
343; PPC32-NEXT:    srw r3, r7, r3
344; PPC32-NEXT:    clrlwi r3, r3, 24
345; PPC32-NEXT:    blr
346;
347; PPC64-LABEL: add_i8_monotonic:
348; PPC64:       # %bb.0:
349; PPC64-NEXT:    rlwinm r7, r3, 3, 27, 28
350; PPC64-NEXT:    li r6, 255
351; PPC64-NEXT:    rldicr r5, r3, 0, 61
352; PPC64-NEXT:    xori r3, r7, 24
353; PPC64-NEXT:    slw r4, r4, r3
354; PPC64-NEXT:    slw r6, r6, r3
355; PPC64-NEXT:  .LBB12_1:
356; PPC64-NEXT:    lwarx r7, 0, r5
357; PPC64-NEXT:    add r8, r4, r7
358; PPC64-NEXT:    andc r9, r7, r6
359; PPC64-NEXT:    and r8, r8, r6
360; PPC64-NEXT:    or r8, r8, r9
361; PPC64-NEXT:    stwcx. r8, 0, r5
362; PPC64-NEXT:    bne cr0, .LBB12_1
363; PPC64-NEXT:  # %bb.2:
364; PPC64-NEXT:    srw r3, r7, r3
365; PPC64-NEXT:    clrlwi r3, r3, 24
366; PPC64-NEXT:    blr
367  %val = atomicrmw add ptr %mem, i8 %operand monotonic
368  ret i8 %val
369}
370define i16 @xor_i16_seq_cst(ptr %mem, i16 %operand) {
371; PPC32-LABEL: xor_i16_seq_cst:
372; PPC32:       # %bb.0:
373; PPC32-NEXT:    li r5, 0
374; PPC32-NEXT:    rlwinm r6, r3, 3, 27, 27
375; PPC32-NEXT:    ori r7, r5, 65535
376; PPC32-NEXT:    xori r5, r6, 16
377; PPC32-NEXT:    rlwinm r3, r3, 0, 0, 29
378; PPC32-NEXT:    slw r4, r4, r5
379; PPC32-NEXT:    slw r6, r7, r5
380; PPC32-NEXT:    sync
381; PPC32-NEXT:  .LBB13_1:
382; PPC32-NEXT:    lwarx r7, 0, r3
383; PPC32-NEXT:    xor r8, r4, r7
384; PPC32-NEXT:    andc r9, r7, r6
385; PPC32-NEXT:    and r8, r8, r6
386; PPC32-NEXT:    or r8, r8, r9
387; PPC32-NEXT:    stwcx. r8, 0, r3
388; PPC32-NEXT:    bne cr0, .LBB13_1
389; PPC32-NEXT:  # %bb.2:
390; PPC32-NEXT:    srw r3, r7, r5
391; PPC32-NEXT:    clrlwi r3, r3, 16
392; PPC32-NEXT:    lwsync
393; PPC32-NEXT:    blr
394;
395; PPC64-LABEL: xor_i16_seq_cst:
396; PPC64:       # %bb.0:
397; PPC64-NEXT:    li r5, 0
398; PPC64-NEXT:    rlwinm r6, r3, 3, 27, 27
399; PPC64-NEXT:    ori r7, r5, 65535
400; PPC64-NEXT:    xori r5, r6, 16
401; PPC64-NEXT:    rldicr r3, r3, 0, 61
402; PPC64-NEXT:    slw r4, r4, r5
403; PPC64-NEXT:    slw r6, r7, r5
404; PPC64-NEXT:    sync
405; PPC64-NEXT:  .LBB13_1:
406; PPC64-NEXT:    lwarx r7, 0, r3
407; PPC64-NEXT:    xor r8, r4, r7
408; PPC64-NEXT:    andc r9, r7, r6
409; PPC64-NEXT:    and r8, r8, r6
410; PPC64-NEXT:    or r8, r8, r9
411; PPC64-NEXT:    stwcx. r8, 0, r3
412; PPC64-NEXT:    bne cr0, .LBB13_1
413; PPC64-NEXT:  # %bb.2:
414; PPC64-NEXT:    srw r3, r7, r5
415; PPC64-NEXT:    clrlwi r3, r3, 16
416; PPC64-NEXT:    lwsync
417; PPC64-NEXT:    blr
418  %val = atomicrmw xor ptr %mem, i16 %operand seq_cst
419  ret i16 %val
420}
421define i32 @xchg_i32_acq_rel(ptr %mem, i32 %operand) {
422; CHECK-LABEL: xchg_i32_acq_rel:
423; CHECK:       # %bb.0:
424; CHECK-NEXT:    lwsync
425; CHECK-NEXT:  .LBB14_1:
426; CHECK-NEXT:    lwarx r5, 0, r3
427; CHECK-NEXT:    stwcx. r4, 0, r3
428; CHECK-NEXT:    bne cr0, .LBB14_1
429; CHECK-NEXT:  # %bb.2:
430; CHECK-NEXT:    mr r3, r5
431; CHECK-NEXT:    lwsync
432; CHECK-NEXT:    blr
433  %val = atomicrmw xchg ptr %mem, i32 %operand acq_rel
434  ret i32 %val
435}
436define i64 @and_i64_release(ptr %mem, i64 %operand) {
437; PPC32-LABEL: and_i64_release:
438; PPC32:       # %bb.0:
439; PPC32-NEXT:    mflr r0
440; PPC32-NEXT:    stwu r1, -16(r1)
441; PPC32-NEXT:    stw r0, 20(r1)
442; PPC32-NEXT:    .cfi_def_cfa_offset 16
443; PPC32-NEXT:    .cfi_offset lr, 4
444; PPC32-NEXT:    li r7, 3
445; PPC32-NEXT:    bl __atomic_fetch_and_8
446; PPC32-NEXT:    lwz r0, 20(r1)
447; PPC32-NEXT:    addi r1, r1, 16
448; PPC32-NEXT:    mtlr r0
449; PPC32-NEXT:    blr
450;
451; PPC64-LABEL: and_i64_release:
452; PPC64:       # %bb.0:
453; PPC64-NEXT:    lwsync
454; PPC64-NEXT:  .LBB15_1:
455; PPC64-NEXT:    ldarx r5, 0, r3
456; PPC64-NEXT:    and r6, r4, r5
457; PPC64-NEXT:    stdcx. r6, 0, r3
458; PPC64-NEXT:    bne cr0, .LBB15_1
459; PPC64-NEXT:  # %bb.2:
460; PPC64-NEXT:    mr r3, r5
461; PPC64-NEXT:    blr
462  %val = atomicrmw and ptr %mem, i64 %operand release
463  ret i64 %val
464}
465
466define half @load_atomic_f16__seq_cst(ptr %ptr) {
467; PPC32-LABEL: load_atomic_f16__seq_cst:
468; PPC32:       # %bb.0:
469; PPC32-NEXT:    mflr r0
470; PPC32-NEXT:    stwu r1, -16(r1)
471; PPC32-NEXT:    stw r0, 20(r1)
472; PPC32-NEXT:    .cfi_def_cfa_offset 16
473; PPC32-NEXT:    .cfi_offset lr, 4
474; PPC32-NEXT:    sync
475; PPC32-NEXT:    lhz r3, 0(r3)
476; PPC32-NEXT:    cmpw cr7, r3, r3
477; PPC32-NEXT:    bne- cr7, .+4
478; PPC32-NEXT:    isync
479; PPC32-NEXT:    bl __gnu_h2f_ieee
480; PPC32-NEXT:    lwz r0, 20(r1)
481; PPC32-NEXT:    addi r1, r1, 16
482; PPC32-NEXT:    mtlr r0
483; PPC32-NEXT:    blr
484;
485; PPC64-LABEL: load_atomic_f16__seq_cst:
486; PPC64:       # %bb.0:
487; PPC64-NEXT:    mflr r0
488; PPC64-NEXT:    stdu r1, -112(r1)
489; PPC64-NEXT:    std r0, 128(r1)
490; PPC64-NEXT:    .cfi_def_cfa_offset 112
491; PPC64-NEXT:    .cfi_offset lr, 16
492; PPC64-NEXT:    sync
493; PPC64-NEXT:    lhz r3, 0(r3)
494; PPC64-NEXT:    cmpd cr7, r3, r3
495; PPC64-NEXT:    bne- cr7, .+4
496; PPC64-NEXT:    isync
497; PPC64-NEXT:    bl __gnu_h2f_ieee
498; PPC64-NEXT:    nop
499; PPC64-NEXT:    addi r1, r1, 112
500; PPC64-NEXT:    ld r0, 16(r1)
501; PPC64-NEXT:    mtlr r0
502; PPC64-NEXT:    blr
503  %val = load atomic half, ptr %ptr seq_cst, align 2
504  ret half %val
505}
506
507; FIXME: bf16_to_fp fails to select
508; define bfloat @load_atomic_bf16__seq_cst(ptr %ptr) {
509;   %val = load atomic bfloat, ptr %ptr seq_cst, align 2
510;   ret bfloat %val
511; }
512
513define float @load_atomic_f32__seq_cst(ptr %ptr) {
514; PPC32-LABEL: load_atomic_f32__seq_cst:
515; PPC32:       # %bb.0:
516; PPC32-NEXT:    stwu r1, -16(r1)
517; PPC32-NEXT:    .cfi_def_cfa_offset 16
518; PPC32-NEXT:    sync
519; PPC32-NEXT:    lwz r3, 0(r3)
520; PPC32-NEXT:    cmpw cr7, r3, r3
521; PPC32-NEXT:    bne- cr7, .+4
522; PPC32-NEXT:    isync
523; PPC32-NEXT:    stw r3, 12(r1)
524; PPC32-NEXT:    lfs f1, 12(r1)
525; PPC32-NEXT:    addi r1, r1, 16
526; PPC32-NEXT:    blr
527;
528; PPC64-LABEL: load_atomic_f32__seq_cst:
529; PPC64:       # %bb.0:
530; PPC64-NEXT:    sync
531; PPC64-NEXT:    lwz r3, 0(r3)
532; PPC64-NEXT:    cmpd cr7, r3, r3
533; PPC64-NEXT:    bne- cr7, .+4
534; PPC64-NEXT:    isync
535; PPC64-NEXT:    stw r3, -4(r1)
536; PPC64-NEXT:    lfs f1, -4(r1)
537; PPC64-NEXT:    blr
538  %val = load atomic float, ptr %ptr seq_cst, align 4
539  ret float %val
540}
541
542define double @load_atomic_f64__seq_cst(ptr %ptr) {
543; PPC32-LABEL: load_atomic_f64__seq_cst:
544; PPC32:       # %bb.0:
545; PPC32-NEXT:    mflr r0
546; PPC32-NEXT:    stwu r1, -16(r1)
547; PPC32-NEXT:    stw r0, 20(r1)
548; PPC32-NEXT:    .cfi_def_cfa_offset 16
549; PPC32-NEXT:    .cfi_offset lr, 4
550; PPC32-NEXT:    li r4, 5
551; PPC32-NEXT:    bl __atomic_load_8
552; PPC32-NEXT:    stw r4, 12(r1)
553; PPC32-NEXT:    stw r3, 8(r1)
554; PPC32-NEXT:    lfd f1, 8(r1)
555; PPC32-NEXT:    lwz r0, 20(r1)
556; PPC32-NEXT:    addi r1, r1, 16
557; PPC32-NEXT:    mtlr r0
558; PPC32-NEXT:    blr
559;
560; PPC64-LABEL: load_atomic_f64__seq_cst:
561; PPC64:       # %bb.0:
562; PPC64-NEXT:    sync
563; PPC64-NEXT:    ld r3, 0(r3)
564; PPC64-NEXT:    cmpd cr7, r3, r3
565; PPC64-NEXT:    bne- cr7, .+4
566; PPC64-NEXT:    isync
567; PPC64-NEXT:    std r3, -8(r1)
568; PPC64-NEXT:    lfd f1, -8(r1)
569; PPC64-NEXT:    blr
570  %val = load atomic double, ptr %ptr seq_cst, align 8
571  ret double %val
572}
573
574define void @store_atomic_f16__seq_cst(ptr %ptr, half %val1) {
575; PPC32-LABEL: store_atomic_f16__seq_cst:
576; PPC32:       # %bb.0:
577; PPC32-NEXT:    mflr r0
578; PPC32-NEXT:    stwu r1, -16(r1)
579; PPC32-NEXT:    stw r0, 20(r1)
580; PPC32-NEXT:    .cfi_def_cfa_offset 16
581; PPC32-NEXT:    .cfi_offset lr, 4
582; PPC32-NEXT:    .cfi_offset r30, -8
583; PPC32-NEXT:    stw r30, 8(r1) # 4-byte Folded Spill
584; PPC32-NEXT:    mr r30, r3
585; PPC32-NEXT:    bl __gnu_f2h_ieee
586; PPC32-NEXT:    sync
587; PPC32-NEXT:    sth r3, 0(r30)
588; PPC32-NEXT:    lwz r30, 8(r1) # 4-byte Folded Reload
589; PPC32-NEXT:    lwz r0, 20(r1)
590; PPC32-NEXT:    addi r1, r1, 16
591; PPC32-NEXT:    mtlr r0
592; PPC32-NEXT:    blr
593;
594; PPC64-LABEL: store_atomic_f16__seq_cst:
595; PPC64:       # %bb.0:
596; PPC64-NEXT:    mflr r0
597; PPC64-NEXT:    stdu r1, -128(r1)
598; PPC64-NEXT:    std r0, 144(r1)
599; PPC64-NEXT:    .cfi_def_cfa_offset 128
600; PPC64-NEXT:    .cfi_offset lr, 16
601; PPC64-NEXT:    .cfi_offset r30, -16
602; PPC64-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
603; PPC64-NEXT:    mr r30, r3
604; PPC64-NEXT:    bl __gnu_f2h_ieee
605; PPC64-NEXT:    nop
606; PPC64-NEXT:    sync
607; PPC64-NEXT:    sth r3, 0(r30)
608; PPC64-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
609; PPC64-NEXT:    addi r1, r1, 128
610; PPC64-NEXT:    ld r0, 16(r1)
611; PPC64-NEXT:    mtlr r0
612; PPC64-NEXT:    blr
613  store atomic half %val1, ptr %ptr seq_cst, align 2
614  ret void
615}
616
617; FIXME: bf16_to_fp fails to select
618; define void @store_atomic_bf16__seq_cst(ptr %ptr, bfloat %val1) {
619;   store atomic bfloat %val1, ptr %ptr seq_cst, align 2
620;   ret void
621; }
622
623define void @store_atomic_f32__seq_cst(ptr %ptr, float %val1) {
624; PPC32-LABEL: store_atomic_f32__seq_cst:
625; PPC32:       # %bb.0:
626; PPC32-NEXT:    stwu r1, -16(r1)
627; PPC32-NEXT:    .cfi_def_cfa_offset 16
628; PPC32-NEXT:    stfs f1, 12(r1)
629; PPC32-NEXT:    lwz r4, 12(r1)
630; PPC32-NEXT:    sync
631; PPC32-NEXT:    stw r4, 0(r3)
632; PPC32-NEXT:    addi r1, r1, 16
633; PPC32-NEXT:    blr
634;
635; PPC64-LABEL: store_atomic_f32__seq_cst:
636; PPC64:       # %bb.0:
637; PPC64-NEXT:    stfs f1, -4(r1)
638; PPC64-NEXT:    lwz r4, -4(r1)
639; PPC64-NEXT:    sync
640; PPC64-NEXT:    stw r4, 0(r3)
641; PPC64-NEXT:    blr
642  store atomic float %val1, ptr %ptr seq_cst, align 4
643  ret void
644}
645
646define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
647; PPC32-LABEL: store_atomic_f64__seq_cst:
648; PPC32:       # %bb.0:
649; PPC32-NEXT:    mflr r0
650; PPC32-NEXT:    stwu r1, -16(r1)
651; PPC32-NEXT:    stw r0, 20(r1)
652; PPC32-NEXT:    .cfi_def_cfa_offset 16
653; PPC32-NEXT:    .cfi_offset lr, 4
654; PPC32-NEXT:    stfd f1, 8(r1)
655; PPC32-NEXT:    li r7, 5
656; PPC32-NEXT:    lwz r5, 8(r1)
657; PPC32-NEXT:    lwz r6, 12(r1)
658; PPC32-NEXT:    bl __atomic_store_8
659; PPC32-NEXT:    lwz r0, 20(r1)
660; PPC32-NEXT:    addi r1, r1, 16
661; PPC32-NEXT:    mtlr r0
662; PPC32-NEXT:    blr
663;
664; PPC64-LABEL: store_atomic_f64__seq_cst:
665; PPC64:       # %bb.0:
666; PPC64-NEXT:    stfd f1, -8(r1)
667; PPC64-NEXT:    ld r4, -8(r1)
668; PPC64-NEXT:    sync
669; PPC64-NEXT:    std r4, 0(r3)
670; PPC64-NEXT:    blr
671  store atomic double %val1, ptr %ptr seq_cst, align 8
672  ret void
673}
674