xref: /llvm-project/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll (revision 706e1975400b3f30bd406b694bb711a7c7dbe1c4)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
3; RUN:   -mcpu=pwr8 -ppc-asm-full-reg-names -enable-subreg-liveness \
4; RUN:   < %s | FileCheck --check-prefix=P8 %s
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr7 \
6; RUN:   -ppc-asm-full-reg-names \
7; RUN:   -enable-subreg-liveness < %s | FileCheck --check-prefix=PWR7 %s
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 \
9; RUN:   -ppc-asm-full-reg-names -enable-subreg-liveness < %s | FileCheck \
10; RUN:   --check-prefix=LE-PWR8 %s
11; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-freebsd -mcpu=pwr8 \
12; RUN:   -ppc-asm-full-reg-names -enable-subreg-liveness < %s | FileCheck \
13; RUN:   --check-prefix=LE-PWR8 %s
14; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix -mcpu=pwr8 \
15; RUN:   -ppc-asm-full-reg-names -enable-subreg-liveness < %s | FileCheck \
16; RUN:   --check-prefix=AIX64-PWR8 %s
17; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-unknown -mcpu=pwr8 \
18; RUN:   -ppc-asm-full-reg-names -enable-subreg-liveness < %s \
19; RUN: | FileCheck --check-prefix=PPC-PWR8 %s
20
21define dso_local i128 @lq_unordered(ptr %src) {
22; P8-LABEL: lq_unordered:
23; P8:       # %bb.0: # %entry
24; P8-NEXT:    lq r4, 0(r3)
25; P8-NEXT:    mr r3, r4
26; P8-NEXT:    mr r4, r5
27; P8-NEXT:    blr
28;
29; PWR7-LABEL: lq_unordered:
30; PWR7:       # %bb.0: # %entry
31; PWR7-NEXT:    mflr r0
32; PWR7-NEXT:    stdu r1, -112(r1)
33; PWR7-NEXT:    std r0, 128(r1)
34; PWR7-NEXT:    .cfi_def_cfa_offset 112
35; PWR7-NEXT:    .cfi_offset lr, 16
36; PWR7-NEXT:    li r4, 0
37; PWR7-NEXT:    bl __atomic_load_16
38; PWR7-NEXT:    nop
39; PWR7-NEXT:    addi r1, r1, 112
40; PWR7-NEXT:    ld r0, 16(r1)
41; PWR7-NEXT:    mtlr r0
42; PWR7-NEXT:    blr
43;
44; LE-PWR8-LABEL: lq_unordered:
45; LE-PWR8:       # %bb.0: # %entry
46; LE-PWR8-NEXT:    lq r4, 0(r3)
47; LE-PWR8-NEXT:    mr r3, r5
48; LE-PWR8-NEXT:    blr
49;
50; AIX64-PWR8-LABEL: lq_unordered:
51; AIX64-PWR8:       # %bb.0: # %entry
52; AIX64-PWR8-NEXT:    lq r4, 0(r3)
53; AIX64-PWR8-NEXT:    mr r3, r4
54; AIX64-PWR8-NEXT:    mr r4, r5
55; AIX64-PWR8-NEXT:    blr
56;
57; PPC-PWR8-LABEL: lq_unordered:
58; PPC-PWR8:       # %bb.0: # %entry
59; PPC-PWR8-NEXT:    mflr r0
60; PPC-PWR8-NEXT:    stwu r1, -32(r1)
61; PPC-PWR8-NEXT:    stw r0, 36(r1)
62; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
63; PPC-PWR8-NEXT:    .cfi_offset lr, 4
64; PPC-PWR8-NEXT:    mr r4, r3
65; PPC-PWR8-NEXT:    addi r5, r1, 16
66; PPC-PWR8-NEXT:    li r3, 16
67; PPC-PWR8-NEXT:    li r6, 0
68; PPC-PWR8-NEXT:    bl __atomic_load
69; PPC-PWR8-NEXT:    lwz r6, 28(r1)
70; PPC-PWR8-NEXT:    lwz r5, 24(r1)
71; PPC-PWR8-NEXT:    lwz r4, 20(r1)
72; PPC-PWR8-NEXT:    lwz r3, 16(r1)
73; PPC-PWR8-NEXT:    lwz r0, 36(r1)
74; PPC-PWR8-NEXT:    addi r1, r1, 32
75; PPC-PWR8-NEXT:    mtlr r0
76; PPC-PWR8-NEXT:    blr
77entry:
78  %0 = load atomic i128, ptr %src unordered, align 16
79  ret i128 %0
80}
81
82define dso_local i128 @lqx_unordered(ptr %src, i64 %idx) {
83; P8-LABEL: lqx_unordered:
84; P8:       # %bb.0: # %entry
85; P8-NEXT:    sldi r4, r4, 4
86; P8-NEXT:    add r3, r3, r4
87; P8-NEXT:    lq r4, 0(r3)
88; P8-NEXT:    mr r3, r4
89; P8-NEXT:    mr r4, r5
90; P8-NEXT:    blr
91;
92; PWR7-LABEL: lqx_unordered:
93; PWR7:       # %bb.0: # %entry
94; PWR7-NEXT:    mflr r0
95; PWR7-NEXT:    stdu r1, -112(r1)
96; PWR7-NEXT:    std r0, 128(r1)
97; PWR7-NEXT:    .cfi_def_cfa_offset 112
98; PWR7-NEXT:    .cfi_offset lr, 16
99; PWR7-NEXT:    sldi r4, r4, 4
100; PWR7-NEXT:    add r3, r3, r4
101; PWR7-NEXT:    li r4, 0
102; PWR7-NEXT:    bl __atomic_load_16
103; PWR7-NEXT:    nop
104; PWR7-NEXT:    addi r1, r1, 112
105; PWR7-NEXT:    ld r0, 16(r1)
106; PWR7-NEXT:    mtlr r0
107; PWR7-NEXT:    blr
108;
109; LE-PWR8-LABEL: lqx_unordered:
110; LE-PWR8:       # %bb.0: # %entry
111; LE-PWR8-NEXT:    sldi r4, r4, 4
112; LE-PWR8-NEXT:    std r2, -8(r1) # 8-byte Folded Spill
113; LE-PWR8-NEXT:    add r4, r3, r4
114; LE-PWR8-NEXT:    lq r2, 0(r4)
115; LE-PWR8-NEXT:    mr r4, r2
116; LE-PWR8-NEXT:    ld r2, -8(r1) # 8-byte Folded Reload
117; LE-PWR8-NEXT:    blr
118;
119; AIX64-PWR8-LABEL: lqx_unordered:
120; AIX64-PWR8:       # %bb.0: # %entry
121; AIX64-PWR8-NEXT:    sldi r4, r4, 4
122; AIX64-PWR8-NEXT:    add r3, r3, r4
123; AIX64-PWR8-NEXT:    lq r4, 0(r3)
124; AIX64-PWR8-NEXT:    mr r3, r4
125; AIX64-PWR8-NEXT:    mr r4, r5
126; AIX64-PWR8-NEXT:    blr
127;
128; PPC-PWR8-LABEL: lqx_unordered:
129; PPC-PWR8:       # %bb.0: # %entry
130; PPC-PWR8-NEXT:    mflr r0
131; PPC-PWR8-NEXT:    stwu r1, -32(r1)
132; PPC-PWR8-NEXT:    stw r0, 36(r1)
133; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
134; PPC-PWR8-NEXT:    .cfi_offset lr, 4
135; PPC-PWR8-NEXT:    slwi r4, r6, 4
136; PPC-PWR8-NEXT:    addi r5, r1, 16
137; PPC-PWR8-NEXT:    li r6, 0
138; PPC-PWR8-NEXT:    add r4, r3, r4
139; PPC-PWR8-NEXT:    li r3, 16
140; PPC-PWR8-NEXT:    bl __atomic_load
141; PPC-PWR8-NEXT:    lwz r6, 28(r1)
142; PPC-PWR8-NEXT:    lwz r5, 24(r1)
143; PPC-PWR8-NEXT:    lwz r4, 20(r1)
144; PPC-PWR8-NEXT:    lwz r3, 16(r1)
145; PPC-PWR8-NEXT:    lwz r0, 36(r1)
146; PPC-PWR8-NEXT:    addi r1, r1, 32
147; PPC-PWR8-NEXT:    mtlr r0
148; PPC-PWR8-NEXT:    blr
149entry:
150  %0 = getelementptr i128, ptr %src, i64 %idx
151  %1 = load atomic i128, ptr %0 unordered, align 16
152  ret i128 %1
153}
154
155define dso_local i128 @lq_big_offset_unordered(ptr %src) {
156; P8-LABEL: lq_big_offset_unordered:
157; P8:       # %bb.0: # %entry
158; P8-NEXT:    lis r4, 32
159; P8-NEXT:    add r3, r3, r4
160; P8-NEXT:    lq r4, 0(r3)
161; P8-NEXT:    mr r3, r4
162; P8-NEXT:    mr r4, r5
163; P8-NEXT:    blr
164;
165; PWR7-LABEL: lq_big_offset_unordered:
166; PWR7:       # %bb.0: # %entry
167; PWR7-NEXT:    mflr r0
168; PWR7-NEXT:    stdu r1, -112(r1)
169; PWR7-NEXT:    std r0, 128(r1)
170; PWR7-NEXT:    .cfi_def_cfa_offset 112
171; PWR7-NEXT:    .cfi_offset lr, 16
172; PWR7-NEXT:    addis r3, r3, 32
173; PWR7-NEXT:    li r4, 0
174; PWR7-NEXT:    bl __atomic_load_16
175; PWR7-NEXT:    nop
176; PWR7-NEXT:    addi r1, r1, 112
177; PWR7-NEXT:    ld r0, 16(r1)
178; PWR7-NEXT:    mtlr r0
179; PWR7-NEXT:    blr
180;
181; LE-PWR8-LABEL: lq_big_offset_unordered:
182; LE-PWR8:       # %bb.0: # %entry
183; LE-PWR8-NEXT:    lis r4, 32
184; LE-PWR8-NEXT:    std r2, -8(r1) # 8-byte Folded Spill
185; LE-PWR8-NEXT:    add r4, r3, r4
186; LE-PWR8-NEXT:    lq r2, 0(r4)
187; LE-PWR8-NEXT:    mr r4, r2
188; LE-PWR8-NEXT:    ld r2, -8(r1) # 8-byte Folded Reload
189; LE-PWR8-NEXT:    blr
190;
191; AIX64-PWR8-LABEL: lq_big_offset_unordered:
192; AIX64-PWR8:       # %bb.0: # %entry
193; AIX64-PWR8-NEXT:    lis r4, 32
194; AIX64-PWR8-NEXT:    add r3, r3, r4
195; AIX64-PWR8-NEXT:    lq r4, 0(r3)
196; AIX64-PWR8-NEXT:    mr r3, r4
197; AIX64-PWR8-NEXT:    mr r4, r5
198; AIX64-PWR8-NEXT:    blr
199;
200; PPC-PWR8-LABEL: lq_big_offset_unordered:
201; PPC-PWR8:       # %bb.0: # %entry
202; PPC-PWR8-NEXT:    mflr r0
203; PPC-PWR8-NEXT:    stwu r1, -32(r1)
204; PPC-PWR8-NEXT:    stw r0, 36(r1)
205; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
206; PPC-PWR8-NEXT:    .cfi_offset lr, 4
207; PPC-PWR8-NEXT:    addis r4, r3, 32
208; PPC-PWR8-NEXT:    addi r5, r1, 16
209; PPC-PWR8-NEXT:    li r3, 16
210; PPC-PWR8-NEXT:    li r6, 0
211; PPC-PWR8-NEXT:    bl __atomic_load
212; PPC-PWR8-NEXT:    lwz r6, 28(r1)
213; PPC-PWR8-NEXT:    lwz r5, 24(r1)
214; PPC-PWR8-NEXT:    lwz r4, 20(r1)
215; PPC-PWR8-NEXT:    lwz r3, 16(r1)
216; PPC-PWR8-NEXT:    lwz r0, 36(r1)
217; PPC-PWR8-NEXT:    addi r1, r1, 32
218; PPC-PWR8-NEXT:    mtlr r0
219; PPC-PWR8-NEXT:    blr
220entry:
221  %0 = getelementptr i128, ptr %src, i64 131072
222  %1 = load atomic i128, ptr %0 unordered, align 16
223  ret i128 %1
224}
225
226define dso_local i128 @lq_monotonic(ptr %src) {
227; P8-LABEL: lq_monotonic:
228; P8:       # %bb.0: # %entry
229; P8-NEXT:    lq r4, 0(r3)
230; P8-NEXT:    mr r3, r4
231; P8-NEXT:    mr r4, r5
232; P8-NEXT:    blr
233;
234; PWR7-LABEL: lq_monotonic:
235; PWR7:       # %bb.0: # %entry
236; PWR7-NEXT:    mflr r0
237; PWR7-NEXT:    stdu r1, -112(r1)
238; PWR7-NEXT:    std r0, 128(r1)
239; PWR7-NEXT:    .cfi_def_cfa_offset 112
240; PWR7-NEXT:    .cfi_offset lr, 16
241; PWR7-NEXT:    li r4, 0
242; PWR7-NEXT:    bl __atomic_load_16
243; PWR7-NEXT:    nop
244; PWR7-NEXT:    addi r1, r1, 112
245; PWR7-NEXT:    ld r0, 16(r1)
246; PWR7-NEXT:    mtlr r0
247; PWR7-NEXT:    blr
248;
249; LE-PWR8-LABEL: lq_monotonic:
250; LE-PWR8:       # %bb.0: # %entry
251; LE-PWR8-NEXT:    lq r4, 0(r3)
252; LE-PWR8-NEXT:    mr r3, r5
253; LE-PWR8-NEXT:    blr
254;
255; AIX64-PWR8-LABEL: lq_monotonic:
256; AIX64-PWR8:       # %bb.0: # %entry
257; AIX64-PWR8-NEXT:    lq r4, 0(r3)
258; AIX64-PWR8-NEXT:    mr r3, r4
259; AIX64-PWR8-NEXT:    mr r4, r5
260; AIX64-PWR8-NEXT:    blr
261;
262; PPC-PWR8-LABEL: lq_monotonic:
263; PPC-PWR8:       # %bb.0: # %entry
264; PPC-PWR8-NEXT:    mflr r0
265; PPC-PWR8-NEXT:    stwu r1, -32(r1)
266; PPC-PWR8-NEXT:    stw r0, 36(r1)
267; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
268; PPC-PWR8-NEXT:    .cfi_offset lr, 4
269; PPC-PWR8-NEXT:    mr r4, r3
270; PPC-PWR8-NEXT:    addi r5, r1, 16
271; PPC-PWR8-NEXT:    li r3, 16
272; PPC-PWR8-NEXT:    li r6, 0
273; PPC-PWR8-NEXT:    bl __atomic_load
274; PPC-PWR8-NEXT:    lwz r6, 28(r1)
275; PPC-PWR8-NEXT:    lwz r5, 24(r1)
276; PPC-PWR8-NEXT:    lwz r4, 20(r1)
277; PPC-PWR8-NEXT:    lwz r3, 16(r1)
278; PPC-PWR8-NEXT:    lwz r0, 36(r1)
279; PPC-PWR8-NEXT:    addi r1, r1, 32
280; PPC-PWR8-NEXT:    mtlr r0
281; PPC-PWR8-NEXT:    blr
282entry:
283  %0 = load atomic i128, ptr %src monotonic, align 16
284  ret i128 %0
285}
286
287define dso_local i128 @lq_acquire(ptr %src) {
288; P8-LABEL: lq_acquire:
289; P8:       # %bb.0: # %entry
290; P8-NEXT:    lq r4, 0(r3)
291; P8-NEXT:    cmpd cr7, r5, r5
292; P8-NEXT:    mr r3, r4
293; P8-NEXT:    mr r4, r5
294; P8-NEXT:    bne- cr7, .+4
295; P8-NEXT:    isync
296; P8-NEXT:    blr
297;
298; PWR7-LABEL: lq_acquire:
299; PWR7:       # %bb.0: # %entry
300; PWR7-NEXT:    mflr r0
301; PWR7-NEXT:    stdu r1, -112(r1)
302; PWR7-NEXT:    std r0, 128(r1)
303; PWR7-NEXT:    .cfi_def_cfa_offset 112
304; PWR7-NEXT:    .cfi_offset lr, 16
305; PWR7-NEXT:    li r4, 2
306; PWR7-NEXT:    bl __atomic_load_16
307; PWR7-NEXT:    nop
308; PWR7-NEXT:    addi r1, r1, 112
309; PWR7-NEXT:    ld r0, 16(r1)
310; PWR7-NEXT:    mtlr r0
311; PWR7-NEXT:    blr
312;
313; LE-PWR8-LABEL: lq_acquire:
314; LE-PWR8:       # %bb.0: # %entry
315; LE-PWR8-NEXT:    lq r4, 0(r3)
316; LE-PWR8-NEXT:    cmpd cr7, r5, r5
317; LE-PWR8-NEXT:    mr r3, r5
318; LE-PWR8-NEXT:    bne- cr7, .+4
319; LE-PWR8-NEXT:    isync
320; LE-PWR8-NEXT:    blr
321;
322; AIX64-PWR8-LABEL: lq_acquire:
323; AIX64-PWR8:       # %bb.0: # %entry
324; AIX64-PWR8-NEXT:    lq r4, 0(r3)
325; AIX64-PWR8-NEXT:    cmpd cr7, r5, r5
326; AIX64-PWR8-NEXT:    mr r3, r4
327; AIX64-PWR8-NEXT:    mr r4, r5
328; AIX64-PWR8-NEXT:    bne- cr7, $+4
329; AIX64-PWR8-NEXT:    isync
330; AIX64-PWR8-NEXT:    blr
331;
332; PPC-PWR8-LABEL: lq_acquire:
333; PPC-PWR8:       # %bb.0: # %entry
334; PPC-PWR8-NEXT:    mflr r0
335; PPC-PWR8-NEXT:    stwu r1, -32(r1)
336; PPC-PWR8-NEXT:    stw r0, 36(r1)
337; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
338; PPC-PWR8-NEXT:    .cfi_offset lr, 4
339; PPC-PWR8-NEXT:    mr r4, r3
340; PPC-PWR8-NEXT:    addi r5, r1, 16
341; PPC-PWR8-NEXT:    li r3, 16
342; PPC-PWR8-NEXT:    li r6, 2
343; PPC-PWR8-NEXT:    bl __atomic_load
344; PPC-PWR8-NEXT:    lwz r6, 28(r1)
345; PPC-PWR8-NEXT:    lwz r5, 24(r1)
346; PPC-PWR8-NEXT:    lwz r4, 20(r1)
347; PPC-PWR8-NEXT:    lwz r3, 16(r1)
348; PPC-PWR8-NEXT:    lwz r0, 36(r1)
349; PPC-PWR8-NEXT:    addi r1, r1, 32
350; PPC-PWR8-NEXT:    mtlr r0
351; PPC-PWR8-NEXT:    blr
352entry:
353  %0 = load atomic i128, ptr %src acquire, align 16
354  ret i128 %0
355}
356
357define dso_local i128 @lq_seqcst(ptr %src) {
358; P8-LABEL: lq_seqcst:
359; P8:       # %bb.0: # %entry
360; P8-NEXT:    sync
361; P8-NEXT:    lq r4, 0(r3)
362; P8-NEXT:    cmpd cr7, r5, r5
363; P8-NEXT:    mr r3, r4
364; P8-NEXT:    bne- cr7, .+4
365; P8-NEXT:    isync
366; P8-NEXT:    mr r4, r5
367; P8-NEXT:    blr
368;
369; PWR7-LABEL: lq_seqcst:
370; PWR7:       # %bb.0: # %entry
371; PWR7-NEXT:    mflr r0
372; PWR7-NEXT:    stdu r1, -112(r1)
373; PWR7-NEXT:    std r0, 128(r1)
374; PWR7-NEXT:    .cfi_def_cfa_offset 112
375; PWR7-NEXT:    .cfi_offset lr, 16
376; PWR7-NEXT:    li r4, 5
377; PWR7-NEXT:    bl __atomic_load_16
378; PWR7-NEXT:    nop
379; PWR7-NEXT:    addi r1, r1, 112
380; PWR7-NEXT:    ld r0, 16(r1)
381; PWR7-NEXT:    mtlr r0
382; PWR7-NEXT:    blr
383;
384; LE-PWR8-LABEL: lq_seqcst:
385; LE-PWR8:       # %bb.0: # %entry
386; LE-PWR8-NEXT:    sync
387; LE-PWR8-NEXT:    lq r4, 0(r3)
388; LE-PWR8-NEXT:    cmpd cr7, r5, r5
389; LE-PWR8-NEXT:    mr r3, r5
390; LE-PWR8-NEXT:    bne- cr7, .+4
391; LE-PWR8-NEXT:    isync
392; LE-PWR8-NEXT:    blr
393;
394; AIX64-PWR8-LABEL: lq_seqcst:
395; AIX64-PWR8:       # %bb.0: # %entry
396; AIX64-PWR8-NEXT:    sync
397; AIX64-PWR8-NEXT:    lq r4, 0(r3)
398; AIX64-PWR8-NEXT:    cmpd cr7, r5, r5
399; AIX64-PWR8-NEXT:    mr r3, r4
400; AIX64-PWR8-NEXT:    bne- cr7, $+4
401; AIX64-PWR8-NEXT:    isync
402; AIX64-PWR8-NEXT:    mr r4, r5
403; AIX64-PWR8-NEXT:    blr
404;
405; PPC-PWR8-LABEL: lq_seqcst:
406; PPC-PWR8:       # %bb.0: # %entry
407; PPC-PWR8-NEXT:    mflr r0
408; PPC-PWR8-NEXT:    stwu r1, -32(r1)
409; PPC-PWR8-NEXT:    stw r0, 36(r1)
410; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
411; PPC-PWR8-NEXT:    .cfi_offset lr, 4
412; PPC-PWR8-NEXT:    mr r4, r3
413; PPC-PWR8-NEXT:    addi r5, r1, 16
414; PPC-PWR8-NEXT:    li r3, 16
415; PPC-PWR8-NEXT:    li r6, 5
416; PPC-PWR8-NEXT:    bl __atomic_load
417; PPC-PWR8-NEXT:    lwz r6, 28(r1)
418; PPC-PWR8-NEXT:    lwz r5, 24(r1)
419; PPC-PWR8-NEXT:    lwz r4, 20(r1)
420; PPC-PWR8-NEXT:    lwz r3, 16(r1)
421; PPC-PWR8-NEXT:    lwz r0, 36(r1)
422; PPC-PWR8-NEXT:    addi r1, r1, 32
423; PPC-PWR8-NEXT:    mtlr r0
424; PPC-PWR8-NEXT:    blr
425entry:
426  %0 = load atomic i128, ptr %src seq_cst, align 16
427  ret i128 %0
428}
429
430define dso_local void @stq_unordered(i128 %val, ptr %dst) {
431; P8-LABEL: stq_unordered:
432; P8:       # %bb.0: # %entry
433; P8-NEXT:    mr r7, r4
434; P8-NEXT:    mr r6, r3
435; P8-NEXT:    stq r6, 0(r5)
436; P8-NEXT:    blr
437;
438; PWR7-LABEL: stq_unordered:
439; PWR7:       # %bb.0: # %entry
440; PWR7-NEXT:    mflr r0
441; PWR7-NEXT:    stdu r1, -112(r1)
442; PWR7-NEXT:    std r0, 128(r1)
443; PWR7-NEXT:    .cfi_def_cfa_offset 112
444; PWR7-NEXT:    .cfi_offset lr, 16
445; PWR7-NEXT:    mr r6, r4
446; PWR7-NEXT:    mr r4, r3
447; PWR7-NEXT:    mr r3, r5
448; PWR7-NEXT:    mr r5, r6
449; PWR7-NEXT:    li r6, 0
450; PWR7-NEXT:    bl __atomic_store_16
451; PWR7-NEXT:    nop
452; PWR7-NEXT:    addi r1, r1, 112
453; PWR7-NEXT:    ld r0, 16(r1)
454; PWR7-NEXT:    mtlr r0
455; PWR7-NEXT:    blr
456;
457; LE-PWR8-LABEL: stq_unordered:
458; LE-PWR8:       # %bb.0: # %entry
459; LE-PWR8-NEXT:    mr r7, r3
460; LE-PWR8-NEXT:    mr r6, r4
461; LE-PWR8-NEXT:    stq r6, 0(r5)
462; LE-PWR8-NEXT:    blr
463;
464; AIX64-PWR8-LABEL: stq_unordered:
465; AIX64-PWR8:       # %bb.0: # %entry
466; AIX64-PWR8-NEXT:    mr r7, r4
467; AIX64-PWR8-NEXT:    mr r6, r3
468; AIX64-PWR8-NEXT:    stq r6, 0(r5)
469; AIX64-PWR8-NEXT:    blr
470;
471; PPC-PWR8-LABEL: stq_unordered:
472; PPC-PWR8:       # %bb.0: # %entry
473; PPC-PWR8-NEXT:    mflr r0
474; PPC-PWR8-NEXT:    stwu r1, -32(r1)
475; PPC-PWR8-NEXT:    stw r0, 36(r1)
476; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
477; PPC-PWR8-NEXT:    .cfi_offset lr, 4
478; PPC-PWR8-NEXT:    stw r6, 28(r1)
479; PPC-PWR8-NEXT:    stw r5, 24(r1)
480; PPC-PWR8-NEXT:    addi r5, r1, 16
481; PPC-PWR8-NEXT:    li r6, 0
482; PPC-PWR8-NEXT:    stw r4, 20(r1)
483; PPC-PWR8-NEXT:    stw r3, 16(r1)
484; PPC-PWR8-NEXT:    li r3, 16
485; PPC-PWR8-NEXT:    mr r4, r7
486; PPC-PWR8-NEXT:    bl __atomic_store
487; PPC-PWR8-NEXT:    lwz r0, 36(r1)
488; PPC-PWR8-NEXT:    addi r1, r1, 32
489; PPC-PWR8-NEXT:    mtlr r0
490; PPC-PWR8-NEXT:    blr
491entry:
492  store atomic i128 %val, ptr %dst unordered, align 16
493  ret void
494}
495
496define dso_local void @stqx_unordered(i128 %val, ptr %dst, i64 %idx) {
497; P8-LABEL: stqx_unordered:
498; P8:       # %bb.0: # %entry
499; P8-NEXT:    sldi r6, r6, 4
500; P8-NEXT:    mr r9, r4
501; P8-NEXT:    mr r8, r3
502; P8-NEXT:    add r3, r5, r6
503; P8-NEXT:    stq r8, 0(r3)
504; P8-NEXT:    blr
505;
506; PWR7-LABEL: stqx_unordered:
507; PWR7:       # %bb.0: # %entry
508; PWR7-NEXT:    mflr r0
509; PWR7-NEXT:    stdu r1, -112(r1)
510; PWR7-NEXT:    std r0, 128(r1)
511; PWR7-NEXT:    .cfi_def_cfa_offset 112
512; PWR7-NEXT:    .cfi_offset lr, 16
513; PWR7-NEXT:    mr r7, r4
514; PWR7-NEXT:    mr r4, r3
515; PWR7-NEXT:    sldi r3, r6, 4
516; PWR7-NEXT:    li r6, 0
517; PWR7-NEXT:    add r3, r5, r3
518; PWR7-NEXT:    mr r5, r7
519; PWR7-NEXT:    bl __atomic_store_16
520; PWR7-NEXT:    nop
521; PWR7-NEXT:    addi r1, r1, 112
522; PWR7-NEXT:    ld r0, 16(r1)
523; PWR7-NEXT:    mtlr r0
524; PWR7-NEXT:    blr
525;
526; LE-PWR8-LABEL: stqx_unordered:
527; LE-PWR8:       # %bb.0: # %entry
528; LE-PWR8-NEXT:    sldi r6, r6, 4
529; LE-PWR8-NEXT:    mr r9, r3
530; LE-PWR8-NEXT:    mr r8, r4
531; LE-PWR8-NEXT:    add r3, r5, r6
532; LE-PWR8-NEXT:    stq r8, 0(r3)
533; LE-PWR8-NEXT:    blr
534;
535; AIX64-PWR8-LABEL: stqx_unordered:
536; AIX64-PWR8:       # %bb.0: # %entry
537; AIX64-PWR8-NEXT:    sldi r6, r6, 4
538; AIX64-PWR8-NEXT:    mr r9, r4
539; AIX64-PWR8-NEXT:    mr r8, r3
540; AIX64-PWR8-NEXT:    add r3, r5, r6
541; AIX64-PWR8-NEXT:    stq r8, 0(r3)
542; AIX64-PWR8-NEXT:    blr
543;
544; PPC-PWR8-LABEL: stqx_unordered:
545; PPC-PWR8:       # %bb.0: # %entry
546; PPC-PWR8-NEXT:    mflr r0
547; PPC-PWR8-NEXT:    stwu r1, -32(r1)
548; PPC-PWR8-NEXT:    stw r0, 36(r1)
549; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
550; PPC-PWR8-NEXT:    .cfi_offset lr, 4
551; PPC-PWR8-NEXT:    slwi r8, r10, 4
552; PPC-PWR8-NEXT:    stw r6, 28(r1)
553; PPC-PWR8-NEXT:    stw r5, 24(r1)
554; PPC-PWR8-NEXT:    addi r5, r1, 16
555; PPC-PWR8-NEXT:    stw r4, 20(r1)
556; PPC-PWR8-NEXT:    stw r3, 16(r1)
557; PPC-PWR8-NEXT:    li r3, 16
558; PPC-PWR8-NEXT:    add r6, r7, r8
559; PPC-PWR8-NEXT:    mr r4, r6
560; PPC-PWR8-NEXT:    li r6, 0
561; PPC-PWR8-NEXT:    bl __atomic_store
562; PPC-PWR8-NEXT:    lwz r0, 36(r1)
563; PPC-PWR8-NEXT:    addi r1, r1, 32
564; PPC-PWR8-NEXT:    mtlr r0
565; PPC-PWR8-NEXT:    blr
566entry:
567  %0 = getelementptr i128, ptr %dst, i64 %idx
568  store atomic i128 %val, ptr %0 unordered, align 16
569  ret void
570}
571
572define dso_local void @stq_big_offset_unordered(i128 %val, ptr %dst) {
573; P8-LABEL: stq_big_offset_unordered:
574; P8:       # %bb.0: # %entry
575; P8-NEXT:    mr r7, r4
576; P8-NEXT:    mr r6, r3
577; P8-NEXT:    lis r3, 32
578; P8-NEXT:    add r3, r5, r3
579; P8-NEXT:    stq r6, 0(r3)
580; P8-NEXT:    blr
581;
582; PWR7-LABEL: stq_big_offset_unordered:
583; PWR7:       # %bb.0: # %entry
584; PWR7-NEXT:    mflr r0
585; PWR7-NEXT:    stdu r1, -112(r1)
586; PWR7-NEXT:    std r0, 128(r1)
587; PWR7-NEXT:    .cfi_def_cfa_offset 112
588; PWR7-NEXT:    .cfi_offset lr, 16
589; PWR7-NEXT:    mr r6, r4
590; PWR7-NEXT:    mr r4, r3
591; PWR7-NEXT:    addis r3, r5, 32
592; PWR7-NEXT:    mr r5, r6
593; PWR7-NEXT:    li r6, 0
594; PWR7-NEXT:    bl __atomic_store_16
595; PWR7-NEXT:    nop
596; PWR7-NEXT:    addi r1, r1, 112
597; PWR7-NEXT:    ld r0, 16(r1)
598; PWR7-NEXT:    mtlr r0
599; PWR7-NEXT:    blr
600;
601; LE-PWR8-LABEL: stq_big_offset_unordered:
602; LE-PWR8:       # %bb.0: # %entry
603; LE-PWR8-NEXT:    mr r7, r3
604; LE-PWR8-NEXT:    mr r6, r4
605; LE-PWR8-NEXT:    lis r3, 32
606; LE-PWR8-NEXT:    add r3, r5, r3
607; LE-PWR8-NEXT:    stq r6, 0(r3)
608; LE-PWR8-NEXT:    blr
609;
610; AIX64-PWR8-LABEL: stq_big_offset_unordered:
611; AIX64-PWR8:       # %bb.0: # %entry
612; AIX64-PWR8-NEXT:    mr r7, r4
613; AIX64-PWR8-NEXT:    mr r6, r3
614; AIX64-PWR8-NEXT:    lis r3, 32
615; AIX64-PWR8-NEXT:    add r3, r5, r3
616; AIX64-PWR8-NEXT:    stq r6, 0(r3)
617; AIX64-PWR8-NEXT:    blr
618;
619; PPC-PWR8-LABEL: stq_big_offset_unordered:
620; PPC-PWR8:       # %bb.0: # %entry
621; PPC-PWR8-NEXT:    mflr r0
622; PPC-PWR8-NEXT:    stwu r1, -32(r1)
623; PPC-PWR8-NEXT:    stw r0, 36(r1)
624; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
625; PPC-PWR8-NEXT:    .cfi_offset lr, 4
626; PPC-PWR8-NEXT:    stw r6, 28(r1)
627; PPC-PWR8-NEXT:    addis r6, r7, 32
628; PPC-PWR8-NEXT:    stw r5, 24(r1)
629; PPC-PWR8-NEXT:    addi r5, r1, 16
630; PPC-PWR8-NEXT:    stw r4, 20(r1)
631; PPC-PWR8-NEXT:    stw r3, 16(r1)
632; PPC-PWR8-NEXT:    li r3, 16
633; PPC-PWR8-NEXT:    mr r4, r6
634; PPC-PWR8-NEXT:    li r6, 0
635; PPC-PWR8-NEXT:    bl __atomic_store
636; PPC-PWR8-NEXT:    lwz r0, 36(r1)
637; PPC-PWR8-NEXT:    addi r1, r1, 32
638; PPC-PWR8-NEXT:    mtlr r0
639; PPC-PWR8-NEXT:    blr
640entry:
641  %0 = getelementptr i128, ptr %dst, i64 131072
642  store atomic i128 %val, ptr %0 unordered, align 16
643  ret void
644}
645
646define dso_local void @stq_monotonic(i128 %val, ptr %dst) {
647; P8-LABEL: stq_monotonic:
648; P8:       # %bb.0: # %entry
649; P8-NEXT:    mr r7, r4
650; P8-NEXT:    mr r6, r3
651; P8-NEXT:    stq r6, 0(r5)
652; P8-NEXT:    blr
653;
654; PWR7-LABEL: stq_monotonic:
655; PWR7:       # %bb.0: # %entry
656; PWR7-NEXT:    mflr r0
657; PWR7-NEXT:    stdu r1, -112(r1)
658; PWR7-NEXT:    std r0, 128(r1)
659; PWR7-NEXT:    .cfi_def_cfa_offset 112
660; PWR7-NEXT:    .cfi_offset lr, 16
661; PWR7-NEXT:    mr r6, r4
662; PWR7-NEXT:    mr r4, r3
663; PWR7-NEXT:    mr r3, r5
664; PWR7-NEXT:    mr r5, r6
665; PWR7-NEXT:    li r6, 0
666; PWR7-NEXT:    bl __atomic_store_16
667; PWR7-NEXT:    nop
668; PWR7-NEXT:    addi r1, r1, 112
669; PWR7-NEXT:    ld r0, 16(r1)
670; PWR7-NEXT:    mtlr r0
671; PWR7-NEXT:    blr
672;
673; LE-PWR8-LABEL: stq_monotonic:
674; LE-PWR8:       # %bb.0: # %entry
675; LE-PWR8-NEXT:    mr r7, r3
676; LE-PWR8-NEXT:    mr r6, r4
677; LE-PWR8-NEXT:    stq r6, 0(r5)
678; LE-PWR8-NEXT:    blr
679;
680; AIX64-PWR8-LABEL: stq_monotonic:
681; AIX64-PWR8:       # %bb.0: # %entry
682; AIX64-PWR8-NEXT:    mr r7, r4
683; AIX64-PWR8-NEXT:    mr r6, r3
684; AIX64-PWR8-NEXT:    stq r6, 0(r5)
685; AIX64-PWR8-NEXT:    blr
686;
687; PPC-PWR8-LABEL: stq_monotonic:
688; PPC-PWR8:       # %bb.0: # %entry
689; PPC-PWR8-NEXT:    mflr r0
690; PPC-PWR8-NEXT:    stwu r1, -32(r1)
691; PPC-PWR8-NEXT:    stw r0, 36(r1)
692; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
693; PPC-PWR8-NEXT:    .cfi_offset lr, 4
694; PPC-PWR8-NEXT:    stw r6, 28(r1)
695; PPC-PWR8-NEXT:    stw r5, 24(r1)
696; PPC-PWR8-NEXT:    addi r5, r1, 16
697; PPC-PWR8-NEXT:    li r6, 0
698; PPC-PWR8-NEXT:    stw r4, 20(r1)
699; PPC-PWR8-NEXT:    stw r3, 16(r1)
700; PPC-PWR8-NEXT:    li r3, 16
701; PPC-PWR8-NEXT:    mr r4, r7
702; PPC-PWR8-NEXT:    bl __atomic_store
703; PPC-PWR8-NEXT:    lwz r0, 36(r1)
704; PPC-PWR8-NEXT:    addi r1, r1, 32
705; PPC-PWR8-NEXT:    mtlr r0
706; PPC-PWR8-NEXT:    blr
707entry:
708  store atomic i128 %val, ptr %dst monotonic, align 16
709  ret void
710}
711
712define dso_local void @stq_release(i128 %val, ptr %dst) {
713; P8-LABEL: stq_release:
714; P8:       # %bb.0: # %entry
715; P8-NEXT:    lwsync
716; P8-NEXT:    mr r7, r4
717; P8-NEXT:    mr r6, r3
718; P8-NEXT:    stq r6, 0(r5)
719; P8-NEXT:    blr
720;
721; PWR7-LABEL: stq_release:
722; PWR7:       # %bb.0: # %entry
723; PWR7-NEXT:    mflr r0
724; PWR7-NEXT:    stdu r1, -112(r1)
725; PWR7-NEXT:    std r0, 128(r1)
726; PWR7-NEXT:    .cfi_def_cfa_offset 112
727; PWR7-NEXT:    .cfi_offset lr, 16
728; PWR7-NEXT:    mr r6, r4
729; PWR7-NEXT:    mr r4, r3
730; PWR7-NEXT:    mr r3, r5
731; PWR7-NEXT:    mr r5, r6
732; PWR7-NEXT:    li r6, 3
733; PWR7-NEXT:    bl __atomic_store_16
734; PWR7-NEXT:    nop
735; PWR7-NEXT:    addi r1, r1, 112
736; PWR7-NEXT:    ld r0, 16(r1)
737; PWR7-NEXT:    mtlr r0
738; PWR7-NEXT:    blr
739;
740; LE-PWR8-LABEL: stq_release:
741; LE-PWR8:       # %bb.0: # %entry
742; LE-PWR8-NEXT:    lwsync
743; LE-PWR8-NEXT:    mr r7, r3
744; LE-PWR8-NEXT:    mr r6, r4
745; LE-PWR8-NEXT:    stq r6, 0(r5)
746; LE-PWR8-NEXT:    blr
747;
748; AIX64-PWR8-LABEL: stq_release:
749; AIX64-PWR8:       # %bb.0: # %entry
750; AIX64-PWR8-NEXT:    lwsync
751; AIX64-PWR8-NEXT:    mr r7, r4
752; AIX64-PWR8-NEXT:    mr r6, r3
753; AIX64-PWR8-NEXT:    stq r6, 0(r5)
754; AIX64-PWR8-NEXT:    blr
755;
756; PPC-PWR8-LABEL: stq_release:
757; PPC-PWR8:       # %bb.0: # %entry
758; PPC-PWR8-NEXT:    mflr r0
759; PPC-PWR8-NEXT:    stwu r1, -32(r1)
760; PPC-PWR8-NEXT:    stw r0, 36(r1)
761; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
762; PPC-PWR8-NEXT:    .cfi_offset lr, 4
763; PPC-PWR8-NEXT:    stw r6, 28(r1)
764; PPC-PWR8-NEXT:    stw r5, 24(r1)
765; PPC-PWR8-NEXT:    addi r5, r1, 16
766; PPC-PWR8-NEXT:    li r6, 3
767; PPC-PWR8-NEXT:    stw r4, 20(r1)
768; PPC-PWR8-NEXT:    stw r3, 16(r1)
769; PPC-PWR8-NEXT:    li r3, 16
770; PPC-PWR8-NEXT:    mr r4, r7
771; PPC-PWR8-NEXT:    bl __atomic_store
772; PPC-PWR8-NEXT:    lwz r0, 36(r1)
773; PPC-PWR8-NEXT:    addi r1, r1, 32
774; PPC-PWR8-NEXT:    mtlr r0
775; PPC-PWR8-NEXT:    blr
776entry:
777  store atomic i128 %val, ptr %dst release, align 16
778  ret void
779}
780
781define dso_local void @stq_seqcst(i128 %val, ptr %dst) {
782; P8-LABEL: stq_seqcst:
783; P8:       # %bb.0: # %entry
784; P8-NEXT:    sync
785; P8-NEXT:    mr r7, r4
786; P8-NEXT:    mr r6, r3
787; P8-NEXT:    stq r6, 0(r5)
788; P8-NEXT:    blr
789;
790; PWR7-LABEL: stq_seqcst:
791; PWR7:       # %bb.0: # %entry
792; PWR7-NEXT:    mflr r0
793; PWR7-NEXT:    stdu r1, -112(r1)
794; PWR7-NEXT:    std r0, 128(r1)
795; PWR7-NEXT:    .cfi_def_cfa_offset 112
796; PWR7-NEXT:    .cfi_offset lr, 16
797; PWR7-NEXT:    mr r6, r4
798; PWR7-NEXT:    mr r4, r3
799; PWR7-NEXT:    mr r3, r5
800; PWR7-NEXT:    mr r5, r6
801; PWR7-NEXT:    li r6, 5
802; PWR7-NEXT:    bl __atomic_store_16
803; PWR7-NEXT:    nop
804; PWR7-NEXT:    addi r1, r1, 112
805; PWR7-NEXT:    ld r0, 16(r1)
806; PWR7-NEXT:    mtlr r0
807; PWR7-NEXT:    blr
808;
809; LE-PWR8-LABEL: stq_seqcst:
810; LE-PWR8:       # %bb.0: # %entry
811; LE-PWR8-NEXT:    sync
812; LE-PWR8-NEXT:    mr r7, r3
813; LE-PWR8-NEXT:    mr r6, r4
814; LE-PWR8-NEXT:    stq r6, 0(r5)
815; LE-PWR8-NEXT:    blr
816;
817; AIX64-PWR8-LABEL: stq_seqcst:
818; AIX64-PWR8:       # %bb.0: # %entry
819; AIX64-PWR8-NEXT:    sync
820; AIX64-PWR8-NEXT:    mr r7, r4
821; AIX64-PWR8-NEXT:    mr r6, r3
822; AIX64-PWR8-NEXT:    stq r6, 0(r5)
823; AIX64-PWR8-NEXT:    blr
824;
825; PPC-PWR8-LABEL: stq_seqcst:
826; PPC-PWR8:       # %bb.0: # %entry
827; PPC-PWR8-NEXT:    mflr r0
828; PPC-PWR8-NEXT:    stwu r1, -32(r1)
829; PPC-PWR8-NEXT:    stw r0, 36(r1)
830; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
831; PPC-PWR8-NEXT:    .cfi_offset lr, 4
832; PPC-PWR8-NEXT:    stw r6, 28(r1)
833; PPC-PWR8-NEXT:    stw r5, 24(r1)
834; PPC-PWR8-NEXT:    addi r5, r1, 16
835; PPC-PWR8-NEXT:    li r6, 5
836; PPC-PWR8-NEXT:    stw r4, 20(r1)
837; PPC-PWR8-NEXT:    stw r3, 16(r1)
838; PPC-PWR8-NEXT:    li r3, 16
839; PPC-PWR8-NEXT:    mr r4, r7
840; PPC-PWR8-NEXT:    bl __atomic_store
841; PPC-PWR8-NEXT:    lwz r0, 36(r1)
842; PPC-PWR8-NEXT:    addi r1, r1, 32
843; PPC-PWR8-NEXT:    mtlr r0
844; PPC-PWR8-NEXT:    blr
845entry:
846  store atomic i128 %val, ptr %dst seq_cst, align 16
847  ret void
848}
849