xref: /llvm-project/llvm/test/CodeGen/PowerPC/atomics-i128.ll (revision 706e1975400b3f30bd406b694bb711a7c7dbe1c4)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr8 \
3; RUN:   -ppc-asm-full-reg-names \
4; RUN:   -enable-subreg-liveness < %s | FileCheck %s
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr7 \
6; RUN:   -ppc-asm-full-reg-names \
7; RUN:   -enable-subreg-liveness < %s | FileCheck --check-prefix=PWR7 %s
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 \
9; RUN:   -ppc-asm-full-reg-names -enable-subreg-liveness < %s | FileCheck \
10; RUN:   --check-prefix=LE-PWR8 %s
11; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-freebsd -mcpu=pwr8 \
12; RUN:   -ppc-asm-full-reg-names -enable-subreg-liveness < %s | FileCheck \
13; RUN:   --check-prefix=LE-PWR8 %s
14; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix -mcpu=pwr8 \
15; RUN:   -ppc-asm-full-reg-names -enable-subreg-liveness < %s | FileCheck \
16; RUN:   --check-prefix=AIX64-PWR8 %s
17
18; On 32-bit PPC platform, 16-byte lock free atomic instructions are not available,
19; it's expected not to generate inlined lock-free code on such platforms, even arch level
20; is pwr8+ and `-ppc-quadword-atomics` is on.
21; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-unknown -mcpu=pwr8 \
22; RUN:   -ppc-asm-full-reg-names -enable-subreg-liveness < %s \
23; RUN: | FileCheck --check-prefix=PPC-PWR8 %s
24
25
26define i128 @swap(ptr %a, i128 %x) {
27; CHECK-LABEL: swap:
28; CHECK:       # %bb.0: # %entry
29; CHECK-NEXT:    sync
30; CHECK-NEXT:  .LBB0_1: # %entry
31; CHECK-NEXT:    #
32; CHECK-NEXT:    lqarx r6, 0, r3
33; CHECK-NEXT:    mr r9, r5
34; CHECK-NEXT:    mr r8, r4
35; CHECK-NEXT:    stqcx. r8, 0, r3
36; CHECK-NEXT:    bne cr0, .LBB0_1
37; CHECK-NEXT:  # %bb.2: # %entry
38; CHECK-NEXT:    lwsync
39; CHECK-NEXT:    mr r3, r6
40; CHECK-NEXT:    mr r4, r7
41; CHECK-NEXT:    blr
42;
43; PWR7-LABEL: swap:
44; PWR7:       # %bb.0: # %entry
45; PWR7-NEXT:    mflr r0
46; PWR7-NEXT:    stdu r1, -112(r1)
47; PWR7-NEXT:    std r0, 128(r1)
48; PWR7-NEXT:    .cfi_def_cfa_offset 112
49; PWR7-NEXT:    .cfi_offset lr, 16
50; PWR7-NEXT:    li r6, 5
51; PWR7-NEXT:    bl __atomic_exchange_16
52; PWR7-NEXT:    nop
53; PWR7-NEXT:    addi r1, r1, 112
54; PWR7-NEXT:    ld r0, 16(r1)
55; PWR7-NEXT:    mtlr r0
56; PWR7-NEXT:    blr
57;
58; LE-PWR8-LABEL: swap:
59; LE-PWR8:       # %bb.0: # %entry
60; LE-PWR8-NEXT:    sync
61; LE-PWR8-NEXT:  .LBB0_1: # %entry
62; LE-PWR8-NEXT:    #
63; LE-PWR8-NEXT:    lqarx r6, 0, r3
64; LE-PWR8-NEXT:    mr r9, r4
65; LE-PWR8-NEXT:    mr r8, r5
66; LE-PWR8-NEXT:    stqcx. r8, 0, r3
67; LE-PWR8-NEXT:    bne cr0, .LBB0_1
68; LE-PWR8-NEXT:  # %bb.2: # %entry
69; LE-PWR8-NEXT:    lwsync
70; LE-PWR8-NEXT:    mr r3, r7
71; LE-PWR8-NEXT:    mr r4, r6
72; LE-PWR8-NEXT:    blr
73;
74; AIX64-PWR8-LABEL: swap:
75; AIX64-PWR8:       # %bb.0: # %entry
76; AIX64-PWR8-NEXT:    sync
77; AIX64-PWR8-NEXT:  L..BB0_1: # %entry
78; AIX64-PWR8-NEXT:    #
79; AIX64-PWR8-NEXT:    lqarx r6, 0, r3
80; AIX64-PWR8-NEXT:    mr r9, r5
81; AIX64-PWR8-NEXT:    mr r8, r4
82; AIX64-PWR8-NEXT:    stqcx. r8, 0, r3
83; AIX64-PWR8-NEXT:    bne cr0, L..BB0_1
84; AIX64-PWR8-NEXT:  # %bb.2: # %entry
85; AIX64-PWR8-NEXT:    lwsync
86; AIX64-PWR8-NEXT:    mr r3, r6
87; AIX64-PWR8-NEXT:    mr r4, r7
88; AIX64-PWR8-NEXT:    blr
89;
90; PPC-PWR8-LABEL: swap:
91; PPC-PWR8:       # %bb.0: # %entry
92; PPC-PWR8-NEXT:    mflr r0
93; PPC-PWR8-NEXT:    stwu r1, -48(r1)
94; PPC-PWR8-NEXT:    stw r0, 52(r1)
95; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
96; PPC-PWR8-NEXT:    .cfi_offset lr, 4
97; PPC-PWR8-NEXT:    mr r4, r3
98; PPC-PWR8-NEXT:    stw r7, 40(r1)
99; PPC-PWR8-NEXT:    stw r6, 36(r1)
100; PPC-PWR8-NEXT:    addi r6, r1, 16
101; PPC-PWR8-NEXT:    stw r5, 32(r1)
102; PPC-PWR8-NEXT:    addi r5, r1, 32
103; PPC-PWR8-NEXT:    li r3, 16
104; PPC-PWR8-NEXT:    li r7, 5
105; PPC-PWR8-NEXT:    stw r8, 44(r1)
106; PPC-PWR8-NEXT:    bl __atomic_exchange
107; PPC-PWR8-NEXT:    lwz r6, 28(r1)
108; PPC-PWR8-NEXT:    lwz r5, 24(r1)
109; PPC-PWR8-NEXT:    lwz r4, 20(r1)
110; PPC-PWR8-NEXT:    lwz r3, 16(r1)
111; PPC-PWR8-NEXT:    lwz r0, 52(r1)
112; PPC-PWR8-NEXT:    addi r1, r1, 48
113; PPC-PWR8-NEXT:    mtlr r0
114; PPC-PWR8-NEXT:    blr
115entry:
116  %0 = atomicrmw xchg ptr %a, i128 %x seq_cst, align 16
117  ret i128 %0
118}
119
120define i128 @add(ptr %a, i128 %x) {
121; CHECK-LABEL: add:
122; CHECK:       # %bb.0: # %entry
123; CHECK-NEXT:    sync
124; CHECK-NEXT:  .LBB1_1: # %entry
125; CHECK-NEXT:    #
126; CHECK-NEXT:    lqarx r6, 0, r3
127; CHECK-NEXT:    addc r9, r5, r7
128; CHECK-NEXT:    adde r8, r4, r6
129; CHECK-NEXT:    stqcx. r8, 0, r3
130; CHECK-NEXT:    bne cr0, .LBB1_1
131; CHECK-NEXT:  # %bb.2: # %entry
132; CHECK-NEXT:    lwsync
133; CHECK-NEXT:    mr r3, r6
134; CHECK-NEXT:    mr r4, r7
135; CHECK-NEXT:    blr
136;
137; PWR7-LABEL: add:
138; PWR7:       # %bb.0: # %entry
139; PWR7-NEXT:    mflr r0
140; PWR7-NEXT:    stdu r1, -112(r1)
141; PWR7-NEXT:    std r0, 128(r1)
142; PWR7-NEXT:    .cfi_def_cfa_offset 112
143; PWR7-NEXT:    .cfi_offset lr, 16
144; PWR7-NEXT:    li r6, 5
145; PWR7-NEXT:    bl __atomic_fetch_add_16
146; PWR7-NEXT:    nop
147; PWR7-NEXT:    addi r1, r1, 112
148; PWR7-NEXT:    ld r0, 16(r1)
149; PWR7-NEXT:    mtlr r0
150; PWR7-NEXT:    blr
151;
152; LE-PWR8-LABEL: add:
153; LE-PWR8:       # %bb.0: # %entry
154; LE-PWR8-NEXT:    sync
155; LE-PWR8-NEXT:  .LBB1_1: # %entry
156; LE-PWR8-NEXT:    #
157; LE-PWR8-NEXT:    lqarx r6, 0, r3
158; LE-PWR8-NEXT:    addc r9, r4, r7
159; LE-PWR8-NEXT:    adde r8, r5, r6
160; LE-PWR8-NEXT:    stqcx. r8, 0, r3
161; LE-PWR8-NEXT:    bne cr0, .LBB1_1
162; LE-PWR8-NEXT:  # %bb.2: # %entry
163; LE-PWR8-NEXT:    lwsync
164; LE-PWR8-NEXT:    mr r3, r7
165; LE-PWR8-NEXT:    mr r4, r6
166; LE-PWR8-NEXT:    blr
167;
168; AIX64-PWR8-LABEL: add:
169; AIX64-PWR8:       # %bb.0: # %entry
170; AIX64-PWR8-NEXT:    sync
171; AIX64-PWR8-NEXT:  L..BB1_1: # %entry
172; AIX64-PWR8-NEXT:    #
173; AIX64-PWR8-NEXT:    lqarx r6, 0, r3
174; AIX64-PWR8-NEXT:    addc r9, r5, r7
175; AIX64-PWR8-NEXT:    adde r8, r4, r6
176; AIX64-PWR8-NEXT:    stqcx. r8, 0, r3
177; AIX64-PWR8-NEXT:    bne cr0, L..BB1_1
178; AIX64-PWR8-NEXT:  # %bb.2: # %entry
179; AIX64-PWR8-NEXT:    lwsync
180; AIX64-PWR8-NEXT:    mr r3, r6
181; AIX64-PWR8-NEXT:    mr r4, r7
182; AIX64-PWR8-NEXT:    blr
183;
184; PPC-PWR8-LABEL: add:
185; PPC-PWR8:       # %bb.0: # %entry
186; PPC-PWR8-NEXT:    mflr r0
187; PPC-PWR8-NEXT:    stwu r1, -80(r1)
188; PPC-PWR8-NEXT:    stw r0, 84(r1)
189; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 80
190; PPC-PWR8-NEXT:    .cfi_offset lr, 4
191; PPC-PWR8-NEXT:    .cfi_offset r24, -32
192; PPC-PWR8-NEXT:    .cfi_offset r25, -28
193; PPC-PWR8-NEXT:    .cfi_offset r26, -24
194; PPC-PWR8-NEXT:    .cfi_offset r27, -20
195; PPC-PWR8-NEXT:    .cfi_offset r28, -16
196; PPC-PWR8-NEXT:    .cfi_offset r29, -12
197; PPC-PWR8-NEXT:    .cfi_offset r30, -8
198; PPC-PWR8-NEXT:    stw r26, 56(r1) # 4-byte Folded Spill
199; PPC-PWR8-NEXT:    stw r27, 60(r1) # 4-byte Folded Spill
200; PPC-PWR8-NEXT:    mr r27, r5
201; PPC-PWR8-NEXT:    mr r26, r3
202; PPC-PWR8-NEXT:    stw r28, 64(r1) # 4-byte Folded Spill
203; PPC-PWR8-NEXT:    mr r28, r6
204; PPC-PWR8-NEXT:    lwz r6, 12(r3)
205; PPC-PWR8-NEXT:    stw r24, 48(r1) # 4-byte Folded Spill
206; PPC-PWR8-NEXT:    lwz r5, 8(r3)
207; PPC-PWR8-NEXT:    lwz r4, 4(r3)
208; PPC-PWR8-NEXT:    stw r25, 52(r1) # 4-byte Folded Spill
209; PPC-PWR8-NEXT:    addi r25, r1, 32
210; PPC-PWR8-NEXT:    lwz r3, 0(r3)
211; PPC-PWR8-NEXT:    stw r29, 68(r1) # 4-byte Folded Spill
212; PPC-PWR8-NEXT:    mr r29, r7
213; PPC-PWR8-NEXT:    addi r24, r1, 16
214; PPC-PWR8-NEXT:    stw r30, 72(r1) # 4-byte Folded Spill
215; PPC-PWR8-NEXT:    mr r30, r8
216; PPC-PWR8-NEXT:    .p2align 4
217; PPC-PWR8-NEXT:  .LBB1_1: # %atomicrmw.start
218; PPC-PWR8-NEXT:    #
219; PPC-PWR8-NEXT:    addc r7, r6, r30
220; PPC-PWR8-NEXT:    stw r4, 36(r1)
221; PPC-PWR8-NEXT:    stw r3, 32(r1)
222; PPC-PWR8-NEXT:    adde r8, r5, r29
223; PPC-PWR8-NEXT:    stw r5, 40(r1)
224; PPC-PWR8-NEXT:    stw r6, 44(r1)
225; PPC-PWR8-NEXT:    mr r5, r25
226; PPC-PWR8-NEXT:    mr r6, r24
227; PPC-PWR8-NEXT:    adde r4, r4, r28
228; PPC-PWR8-NEXT:    stw r7, 28(r1)
229; PPC-PWR8-NEXT:    li r7, 5
230; PPC-PWR8-NEXT:    adde r3, r3, r27
231; PPC-PWR8-NEXT:    stw r8, 24(r1)
232; PPC-PWR8-NEXT:    li r8, 5
233; PPC-PWR8-NEXT:    stw r4, 20(r1)
234; PPC-PWR8-NEXT:    mr r4, r26
235; PPC-PWR8-NEXT:    stw r3, 16(r1)
236; PPC-PWR8-NEXT:    li r3, 16
237; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
238; PPC-PWR8-NEXT:    mr r7, r3
239; PPC-PWR8-NEXT:    lwz r6, 44(r1)
240; PPC-PWR8-NEXT:    lwz r5, 40(r1)
241; PPC-PWR8-NEXT:    lwz r4, 36(r1)
242; PPC-PWR8-NEXT:    lwz r3, 32(r1)
243; PPC-PWR8-NEXT:    cmplwi r7, 0
244; PPC-PWR8-NEXT:    beq cr0, .LBB1_1
245; PPC-PWR8-NEXT:  # %bb.2: # %atomicrmw.end
246; PPC-PWR8-NEXT:    lwz r30, 72(r1) # 4-byte Folded Reload
247; PPC-PWR8-NEXT:    lwz r29, 68(r1) # 4-byte Folded Reload
248; PPC-PWR8-NEXT:    lwz r28, 64(r1) # 4-byte Folded Reload
249; PPC-PWR8-NEXT:    lwz r27, 60(r1) # 4-byte Folded Reload
250; PPC-PWR8-NEXT:    lwz r26, 56(r1) # 4-byte Folded Reload
251; PPC-PWR8-NEXT:    lwz r25, 52(r1) # 4-byte Folded Reload
252; PPC-PWR8-NEXT:    lwz r24, 48(r1) # 4-byte Folded Reload
253; PPC-PWR8-NEXT:    lwz r0, 84(r1)
254; PPC-PWR8-NEXT:    addi r1, r1, 80
255; PPC-PWR8-NEXT:    mtlr r0
256; PPC-PWR8-NEXT:    blr
257entry:
258  %0 = atomicrmw add ptr %a, i128 %x seq_cst, align 16
259  ret i128 %0
260}
261
262define i128 @sub(ptr %a, i128 %x) {
263; CHECK-LABEL: sub:
264; CHECK:       # %bb.0: # %entry
265; CHECK-NEXT:    sync
266; CHECK-NEXT:  .LBB2_1: # %entry
267; CHECK-NEXT:    #
268; CHECK-NEXT:    lqarx r6, 0, r3
269; CHECK-NEXT:    subc r9, r7, r5
270; CHECK-NEXT:    subfe r8, r4, r6
271; CHECK-NEXT:    stqcx. r8, 0, r3
272; CHECK-NEXT:    bne cr0, .LBB2_1
273; CHECK-NEXT:  # %bb.2: # %entry
274; CHECK-NEXT:    lwsync
275; CHECK-NEXT:    mr r3, r6
276; CHECK-NEXT:    mr r4, r7
277; CHECK-NEXT:    blr
278;
279; PWR7-LABEL: sub:
280; PWR7:       # %bb.0: # %entry
281; PWR7-NEXT:    mflr r0
282; PWR7-NEXT:    stdu r1, -112(r1)
283; PWR7-NEXT:    std r0, 128(r1)
284; PWR7-NEXT:    .cfi_def_cfa_offset 112
285; PWR7-NEXT:    .cfi_offset lr, 16
286; PWR7-NEXT:    li r6, 5
287; PWR7-NEXT:    bl __atomic_fetch_sub_16
288; PWR7-NEXT:    nop
289; PWR7-NEXT:    addi r1, r1, 112
290; PWR7-NEXT:    ld r0, 16(r1)
291; PWR7-NEXT:    mtlr r0
292; PWR7-NEXT:    blr
293;
294; LE-PWR8-LABEL: sub:
295; LE-PWR8:       # %bb.0: # %entry
296; LE-PWR8-NEXT:    sync
297; LE-PWR8-NEXT:  .LBB2_1: # %entry
298; LE-PWR8-NEXT:    #
299; LE-PWR8-NEXT:    lqarx r6, 0, r3
300; LE-PWR8-NEXT:    subc r9, r7, r4
301; LE-PWR8-NEXT:    subfe r8, r5, r6
302; LE-PWR8-NEXT:    stqcx. r8, 0, r3
303; LE-PWR8-NEXT:    bne cr0, .LBB2_1
304; LE-PWR8-NEXT:  # %bb.2: # %entry
305; LE-PWR8-NEXT:    lwsync
306; LE-PWR8-NEXT:    mr r3, r7
307; LE-PWR8-NEXT:    mr r4, r6
308; LE-PWR8-NEXT:    blr
309;
310; AIX64-PWR8-LABEL: sub:
311; AIX64-PWR8:       # %bb.0: # %entry
312; AIX64-PWR8-NEXT:    sync
313; AIX64-PWR8-NEXT:  L..BB2_1: # %entry
314; AIX64-PWR8-NEXT:    #
315; AIX64-PWR8-NEXT:    lqarx r6, 0, r3
316; AIX64-PWR8-NEXT:    subc r9, r7, r5
317; AIX64-PWR8-NEXT:    subfe r8, r4, r6
318; AIX64-PWR8-NEXT:    stqcx. r8, 0, r3
319; AIX64-PWR8-NEXT:    bne cr0, L..BB2_1
320; AIX64-PWR8-NEXT:  # %bb.2: # %entry
321; AIX64-PWR8-NEXT:    lwsync
322; AIX64-PWR8-NEXT:    mr r3, r6
323; AIX64-PWR8-NEXT:    mr r4, r7
324; AIX64-PWR8-NEXT:    blr
325;
326; PPC-PWR8-LABEL: sub:
327; PPC-PWR8:       # %bb.0: # %entry
328; PPC-PWR8-NEXT:    mflr r0
329; PPC-PWR8-NEXT:    stwu r1, -80(r1)
330; PPC-PWR8-NEXT:    stw r0, 84(r1)
331; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 80
332; PPC-PWR8-NEXT:    .cfi_offset lr, 4
333; PPC-PWR8-NEXT:    .cfi_offset r24, -32
334; PPC-PWR8-NEXT:    .cfi_offset r25, -28
335; PPC-PWR8-NEXT:    .cfi_offset r26, -24
336; PPC-PWR8-NEXT:    .cfi_offset r27, -20
337; PPC-PWR8-NEXT:    .cfi_offset r28, -16
338; PPC-PWR8-NEXT:    .cfi_offset r29, -12
339; PPC-PWR8-NEXT:    .cfi_offset r30, -8
340; PPC-PWR8-NEXT:    stw r26, 56(r1) # 4-byte Folded Spill
341; PPC-PWR8-NEXT:    stw r27, 60(r1) # 4-byte Folded Spill
342; PPC-PWR8-NEXT:    mr r27, r5
343; PPC-PWR8-NEXT:    mr r26, r3
344; PPC-PWR8-NEXT:    stw r28, 64(r1) # 4-byte Folded Spill
345; PPC-PWR8-NEXT:    mr r28, r6
346; PPC-PWR8-NEXT:    lwz r6, 12(r3)
347; PPC-PWR8-NEXT:    stw r24, 48(r1) # 4-byte Folded Spill
348; PPC-PWR8-NEXT:    lwz r5, 8(r3)
349; PPC-PWR8-NEXT:    lwz r4, 4(r3)
350; PPC-PWR8-NEXT:    stw r25, 52(r1) # 4-byte Folded Spill
351; PPC-PWR8-NEXT:    addi r25, r1, 32
352; PPC-PWR8-NEXT:    lwz r3, 0(r3)
353; PPC-PWR8-NEXT:    stw r29, 68(r1) # 4-byte Folded Spill
354; PPC-PWR8-NEXT:    mr r29, r7
355; PPC-PWR8-NEXT:    addi r24, r1, 16
356; PPC-PWR8-NEXT:    stw r30, 72(r1) # 4-byte Folded Spill
357; PPC-PWR8-NEXT:    mr r30, r8
358; PPC-PWR8-NEXT:    .p2align 4
359; PPC-PWR8-NEXT:  .LBB2_1: # %atomicrmw.start
360; PPC-PWR8-NEXT:    #
361; PPC-PWR8-NEXT:    subc r7, r6, r30
362; PPC-PWR8-NEXT:    stw r4, 36(r1)
363; PPC-PWR8-NEXT:    stw r3, 32(r1)
364; PPC-PWR8-NEXT:    subfe r8, r29, r5
365; PPC-PWR8-NEXT:    stw r5, 40(r1)
366; PPC-PWR8-NEXT:    stw r6, 44(r1)
367; PPC-PWR8-NEXT:    mr r5, r25
368; PPC-PWR8-NEXT:    mr r6, r24
369; PPC-PWR8-NEXT:    subfe r4, r28, r4
370; PPC-PWR8-NEXT:    stw r7, 28(r1)
371; PPC-PWR8-NEXT:    li r7, 5
372; PPC-PWR8-NEXT:    subfe r3, r27, r3
373; PPC-PWR8-NEXT:    stw r8, 24(r1)
374; PPC-PWR8-NEXT:    li r8, 5
375; PPC-PWR8-NEXT:    stw r4, 20(r1)
376; PPC-PWR8-NEXT:    mr r4, r26
377; PPC-PWR8-NEXT:    stw r3, 16(r1)
378; PPC-PWR8-NEXT:    li r3, 16
379; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
380; PPC-PWR8-NEXT:    mr r7, r3
381; PPC-PWR8-NEXT:    lwz r6, 44(r1)
382; PPC-PWR8-NEXT:    lwz r5, 40(r1)
383; PPC-PWR8-NEXT:    lwz r4, 36(r1)
384; PPC-PWR8-NEXT:    lwz r3, 32(r1)
385; PPC-PWR8-NEXT:    cmplwi r7, 0
386; PPC-PWR8-NEXT:    beq cr0, .LBB2_1
387; PPC-PWR8-NEXT:  # %bb.2: # %atomicrmw.end
388; PPC-PWR8-NEXT:    lwz r30, 72(r1) # 4-byte Folded Reload
389; PPC-PWR8-NEXT:    lwz r29, 68(r1) # 4-byte Folded Reload
390; PPC-PWR8-NEXT:    lwz r28, 64(r1) # 4-byte Folded Reload
391; PPC-PWR8-NEXT:    lwz r27, 60(r1) # 4-byte Folded Reload
392; PPC-PWR8-NEXT:    lwz r26, 56(r1) # 4-byte Folded Reload
393; PPC-PWR8-NEXT:    lwz r25, 52(r1) # 4-byte Folded Reload
394; PPC-PWR8-NEXT:    lwz r24, 48(r1) # 4-byte Folded Reload
395; PPC-PWR8-NEXT:    lwz r0, 84(r1)
396; PPC-PWR8-NEXT:    addi r1, r1, 80
397; PPC-PWR8-NEXT:    mtlr r0
398; PPC-PWR8-NEXT:    blr
399entry:
400  %0 = atomicrmw sub ptr %a, i128 %x seq_cst, align 16
401  ret i128 %0
402}
403
404define i128 @and(ptr %a, i128 %x) {
405; CHECK-LABEL: and:
406; CHECK:       # %bb.0: # %entry
407; CHECK-NEXT:    sync
408; CHECK-NEXT:  .LBB3_1: # %entry
409; CHECK-NEXT:    #
410; CHECK-NEXT:    lqarx r6, 0, r3
411; CHECK-NEXT:    and r9, r5, r7
412; CHECK-NEXT:    and r8, r4, r6
413; CHECK-NEXT:    stqcx. r8, 0, r3
414; CHECK-NEXT:    bne cr0, .LBB3_1
415; CHECK-NEXT:  # %bb.2: # %entry
416; CHECK-NEXT:    lwsync
417; CHECK-NEXT:    mr r3, r6
418; CHECK-NEXT:    mr r4, r7
419; CHECK-NEXT:    blr
420;
421; PWR7-LABEL: and:
422; PWR7:       # %bb.0: # %entry
423; PWR7-NEXT:    mflr r0
424; PWR7-NEXT:    stdu r1, -112(r1)
425; PWR7-NEXT:    std r0, 128(r1)
426; PWR7-NEXT:    .cfi_def_cfa_offset 112
427; PWR7-NEXT:    .cfi_offset lr, 16
428; PWR7-NEXT:    li r6, 5
429; PWR7-NEXT:    bl __atomic_fetch_and_16
430; PWR7-NEXT:    nop
431; PWR7-NEXT:    addi r1, r1, 112
432; PWR7-NEXT:    ld r0, 16(r1)
433; PWR7-NEXT:    mtlr r0
434; PWR7-NEXT:    blr
435;
436; LE-PWR8-LABEL: and:
437; LE-PWR8:       # %bb.0: # %entry
438; LE-PWR8-NEXT:    sync
439; LE-PWR8-NEXT:  .LBB3_1: # %entry
440; LE-PWR8-NEXT:    #
441; LE-PWR8-NEXT:    lqarx r6, 0, r3
442; LE-PWR8-NEXT:    and r9, r4, r7
443; LE-PWR8-NEXT:    and r8, r5, r6
444; LE-PWR8-NEXT:    stqcx. r8, 0, r3
445; LE-PWR8-NEXT:    bne cr0, .LBB3_1
446; LE-PWR8-NEXT:  # %bb.2: # %entry
447; LE-PWR8-NEXT:    lwsync
448; LE-PWR8-NEXT:    mr r3, r7
449; LE-PWR8-NEXT:    mr r4, r6
450; LE-PWR8-NEXT:    blr
451;
452; AIX64-PWR8-LABEL: and:
453; AIX64-PWR8:       # %bb.0: # %entry
454; AIX64-PWR8-NEXT:    sync
455; AIX64-PWR8-NEXT:  L..BB3_1: # %entry
456; AIX64-PWR8-NEXT:    #
457; AIX64-PWR8-NEXT:    lqarx r6, 0, r3
458; AIX64-PWR8-NEXT:    and r9, r5, r7
459; AIX64-PWR8-NEXT:    and r8, r4, r6
460; AIX64-PWR8-NEXT:    stqcx. r8, 0, r3
461; AIX64-PWR8-NEXT:    bne cr0, L..BB3_1
462; AIX64-PWR8-NEXT:  # %bb.2: # %entry
463; AIX64-PWR8-NEXT:    lwsync
464; AIX64-PWR8-NEXT:    mr r3, r6
465; AIX64-PWR8-NEXT:    mr r4, r7
466; AIX64-PWR8-NEXT:    blr
467;
468; PPC-PWR8-LABEL: and:
469; PPC-PWR8:       # %bb.0: # %entry
470; PPC-PWR8-NEXT:    mflr r0
471; PPC-PWR8-NEXT:    stwu r1, -80(r1)
472; PPC-PWR8-NEXT:    stw r0, 84(r1)
473; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 80
474; PPC-PWR8-NEXT:    .cfi_offset lr, 4
475; PPC-PWR8-NEXT:    .cfi_offset r24, -32
476; PPC-PWR8-NEXT:    .cfi_offset r25, -28
477; PPC-PWR8-NEXT:    .cfi_offset r26, -24
478; PPC-PWR8-NEXT:    .cfi_offset r27, -20
479; PPC-PWR8-NEXT:    .cfi_offset r28, -16
480; PPC-PWR8-NEXT:    .cfi_offset r29, -12
481; PPC-PWR8-NEXT:    .cfi_offset r30, -8
482; PPC-PWR8-NEXT:    stw r26, 56(r1) # 4-byte Folded Spill
483; PPC-PWR8-NEXT:    stw r27, 60(r1) # 4-byte Folded Spill
484; PPC-PWR8-NEXT:    mr r27, r5
485; PPC-PWR8-NEXT:    mr r26, r3
486; PPC-PWR8-NEXT:    stw r28, 64(r1) # 4-byte Folded Spill
487; PPC-PWR8-NEXT:    mr r28, r6
488; PPC-PWR8-NEXT:    lwz r6, 12(r3)
489; PPC-PWR8-NEXT:    stw r24, 48(r1) # 4-byte Folded Spill
490; PPC-PWR8-NEXT:    lwz r5, 8(r3)
491; PPC-PWR8-NEXT:    lwz r4, 4(r3)
492; PPC-PWR8-NEXT:    stw r25, 52(r1) # 4-byte Folded Spill
493; PPC-PWR8-NEXT:    addi r25, r1, 32
494; PPC-PWR8-NEXT:    lwz r3, 0(r3)
495; PPC-PWR8-NEXT:    stw r29, 68(r1) # 4-byte Folded Spill
496; PPC-PWR8-NEXT:    mr r29, r7
497; PPC-PWR8-NEXT:    addi r24, r1, 16
498; PPC-PWR8-NEXT:    stw r30, 72(r1) # 4-byte Folded Spill
499; PPC-PWR8-NEXT:    mr r30, r8
500; PPC-PWR8-NEXT:    .p2align 4
501; PPC-PWR8-NEXT:  .LBB3_1: # %atomicrmw.start
502; PPC-PWR8-NEXT:    #
503; PPC-PWR8-NEXT:    stw r3, 32(r1)
504; PPC-PWR8-NEXT:    and r3, r3, r27
505; PPC-PWR8-NEXT:    stw r4, 36(r1)
506; PPC-PWR8-NEXT:    and r4, r4, r28
507; PPC-PWR8-NEXT:    and r7, r5, r29
508; PPC-PWR8-NEXT:    stw r5, 40(r1)
509; PPC-PWR8-NEXT:    and r5, r6, r30
510; PPC-PWR8-NEXT:    stw r6, 44(r1)
511; PPC-PWR8-NEXT:    stw r5, 28(r1)
512; PPC-PWR8-NEXT:    stw r7, 24(r1)
513; PPC-PWR8-NEXT:    mr r5, r25
514; PPC-PWR8-NEXT:    li r7, 5
515; PPC-PWR8-NEXT:    stw r4, 20(r1)
516; PPC-PWR8-NEXT:    stw r3, 16(r1)
517; PPC-PWR8-NEXT:    li r3, 16
518; PPC-PWR8-NEXT:    mr r4, r26
519; PPC-PWR8-NEXT:    mr r6, r24
520; PPC-PWR8-NEXT:    li r8, 5
521; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
522; PPC-PWR8-NEXT:    mr r7, r3
523; PPC-PWR8-NEXT:    lwz r6, 44(r1)
524; PPC-PWR8-NEXT:    lwz r5, 40(r1)
525; PPC-PWR8-NEXT:    lwz r4, 36(r1)
526; PPC-PWR8-NEXT:    lwz r3, 32(r1)
527; PPC-PWR8-NEXT:    cmplwi r7, 0
528; PPC-PWR8-NEXT:    beq cr0, .LBB3_1
529; PPC-PWR8-NEXT:  # %bb.2: # %atomicrmw.end
530; PPC-PWR8-NEXT:    lwz r30, 72(r1) # 4-byte Folded Reload
531; PPC-PWR8-NEXT:    lwz r29, 68(r1) # 4-byte Folded Reload
532; PPC-PWR8-NEXT:    lwz r28, 64(r1) # 4-byte Folded Reload
533; PPC-PWR8-NEXT:    lwz r27, 60(r1) # 4-byte Folded Reload
534; PPC-PWR8-NEXT:    lwz r26, 56(r1) # 4-byte Folded Reload
535; PPC-PWR8-NEXT:    lwz r25, 52(r1) # 4-byte Folded Reload
536; PPC-PWR8-NEXT:    lwz r24, 48(r1) # 4-byte Folded Reload
537; PPC-PWR8-NEXT:    lwz r0, 84(r1)
538; PPC-PWR8-NEXT:    addi r1, r1, 80
539; PPC-PWR8-NEXT:    mtlr r0
540; PPC-PWR8-NEXT:    blr
541entry:
542  %0 = atomicrmw and ptr %a, i128 %x seq_cst, align 16
543  ret i128 %0
544}
545
546define i128 @or(ptr %a, i128 %x) {
547; CHECK-LABEL: or:
548; CHECK:       # %bb.0: # %entry
549; CHECK-NEXT:    sync
550; CHECK-NEXT:  .LBB4_1: # %entry
551; CHECK-NEXT:    #
552; CHECK-NEXT:    lqarx r6, 0, r3
553; CHECK-NEXT:    or r9, r5, r7
554; CHECK-NEXT:    or r8, r4, r6
555; CHECK-NEXT:    stqcx. r8, 0, r3
556; CHECK-NEXT:    bne cr0, .LBB4_1
557; CHECK-NEXT:  # %bb.2: # %entry
558; CHECK-NEXT:    lwsync
559; CHECK-NEXT:    mr r3, r6
560; CHECK-NEXT:    mr r4, r7
561; CHECK-NEXT:    blr
562;
563; PWR7-LABEL: or:
564; PWR7:       # %bb.0: # %entry
565; PWR7-NEXT:    mflr r0
566; PWR7-NEXT:    stdu r1, -112(r1)
567; PWR7-NEXT:    std r0, 128(r1)
568; PWR7-NEXT:    .cfi_def_cfa_offset 112
569; PWR7-NEXT:    .cfi_offset lr, 16
570; PWR7-NEXT:    li r6, 5
571; PWR7-NEXT:    bl __atomic_fetch_or_16
572; PWR7-NEXT:    nop
573; PWR7-NEXT:    addi r1, r1, 112
574; PWR7-NEXT:    ld r0, 16(r1)
575; PWR7-NEXT:    mtlr r0
576; PWR7-NEXT:    blr
577;
578; LE-PWR8-LABEL: or:
579; LE-PWR8:       # %bb.0: # %entry
580; LE-PWR8-NEXT:    sync
581; LE-PWR8-NEXT:  .LBB4_1: # %entry
582; LE-PWR8-NEXT:    #
583; LE-PWR8-NEXT:    lqarx r6, 0, r3
584; LE-PWR8-NEXT:    or r9, r4, r7
585; LE-PWR8-NEXT:    or r8, r5, r6
586; LE-PWR8-NEXT:    stqcx. r8, 0, r3
587; LE-PWR8-NEXT:    bne cr0, .LBB4_1
588; LE-PWR8-NEXT:  # %bb.2: # %entry
589; LE-PWR8-NEXT:    lwsync
590; LE-PWR8-NEXT:    mr r3, r7
591; LE-PWR8-NEXT:    mr r4, r6
592; LE-PWR8-NEXT:    blr
593;
594; AIX64-PWR8-LABEL: or:
595; AIX64-PWR8:       # %bb.0: # %entry
596; AIX64-PWR8-NEXT:    sync
597; AIX64-PWR8-NEXT:  L..BB4_1: # %entry
598; AIX64-PWR8-NEXT:    #
599; AIX64-PWR8-NEXT:    lqarx r6, 0, r3
600; AIX64-PWR8-NEXT:    or r9, r5, r7
601; AIX64-PWR8-NEXT:    or r8, r4, r6
602; AIX64-PWR8-NEXT:    stqcx. r8, 0, r3
603; AIX64-PWR8-NEXT:    bne cr0, L..BB4_1
604; AIX64-PWR8-NEXT:  # %bb.2: # %entry
605; AIX64-PWR8-NEXT:    lwsync
606; AIX64-PWR8-NEXT:    mr r3, r6
607; AIX64-PWR8-NEXT:    mr r4, r7
608; AIX64-PWR8-NEXT:    blr
609;
610; PPC-PWR8-LABEL: or:
611; PPC-PWR8:       # %bb.0: # %entry
612; PPC-PWR8-NEXT:    mflr r0
613; PPC-PWR8-NEXT:    stwu r1, -80(r1)
614; PPC-PWR8-NEXT:    stw r0, 84(r1)
615; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 80
616; PPC-PWR8-NEXT:    .cfi_offset lr, 4
617; PPC-PWR8-NEXT:    .cfi_offset r24, -32
618; PPC-PWR8-NEXT:    .cfi_offset r25, -28
619; PPC-PWR8-NEXT:    .cfi_offset r26, -24
620; PPC-PWR8-NEXT:    .cfi_offset r27, -20
621; PPC-PWR8-NEXT:    .cfi_offset r28, -16
622; PPC-PWR8-NEXT:    .cfi_offset r29, -12
623; PPC-PWR8-NEXT:    .cfi_offset r30, -8
624; PPC-PWR8-NEXT:    stw r26, 56(r1) # 4-byte Folded Spill
625; PPC-PWR8-NEXT:    stw r27, 60(r1) # 4-byte Folded Spill
626; PPC-PWR8-NEXT:    mr r27, r5
627; PPC-PWR8-NEXT:    mr r26, r3
628; PPC-PWR8-NEXT:    stw r28, 64(r1) # 4-byte Folded Spill
629; PPC-PWR8-NEXT:    mr r28, r6
630; PPC-PWR8-NEXT:    lwz r6, 12(r3)
631; PPC-PWR8-NEXT:    stw r24, 48(r1) # 4-byte Folded Spill
632; PPC-PWR8-NEXT:    lwz r5, 8(r3)
633; PPC-PWR8-NEXT:    lwz r4, 4(r3)
634; PPC-PWR8-NEXT:    stw r25, 52(r1) # 4-byte Folded Spill
635; PPC-PWR8-NEXT:    addi r25, r1, 32
636; PPC-PWR8-NEXT:    lwz r3, 0(r3)
637; PPC-PWR8-NEXT:    stw r29, 68(r1) # 4-byte Folded Spill
638; PPC-PWR8-NEXT:    mr r29, r7
639; PPC-PWR8-NEXT:    addi r24, r1, 16
640; PPC-PWR8-NEXT:    stw r30, 72(r1) # 4-byte Folded Spill
641; PPC-PWR8-NEXT:    mr r30, r8
642; PPC-PWR8-NEXT:    .p2align 4
643; PPC-PWR8-NEXT:  .LBB4_1: # %atomicrmw.start
644; PPC-PWR8-NEXT:    #
645; PPC-PWR8-NEXT:    stw r3, 32(r1)
646; PPC-PWR8-NEXT:    or r3, r3, r27
647; PPC-PWR8-NEXT:    stw r4, 36(r1)
648; PPC-PWR8-NEXT:    or r4, r4, r28
649; PPC-PWR8-NEXT:    or r7, r5, r29
650; PPC-PWR8-NEXT:    stw r5, 40(r1)
651; PPC-PWR8-NEXT:    or r5, r6, r30
652; PPC-PWR8-NEXT:    stw r6, 44(r1)
653; PPC-PWR8-NEXT:    stw r5, 28(r1)
654; PPC-PWR8-NEXT:    stw r7, 24(r1)
655; PPC-PWR8-NEXT:    mr r5, r25
656; PPC-PWR8-NEXT:    li r7, 5
657; PPC-PWR8-NEXT:    stw r4, 20(r1)
658; PPC-PWR8-NEXT:    stw r3, 16(r1)
659; PPC-PWR8-NEXT:    li r3, 16
660; PPC-PWR8-NEXT:    mr r4, r26
661; PPC-PWR8-NEXT:    mr r6, r24
662; PPC-PWR8-NEXT:    li r8, 5
663; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
664; PPC-PWR8-NEXT:    mr r7, r3
665; PPC-PWR8-NEXT:    lwz r6, 44(r1)
666; PPC-PWR8-NEXT:    lwz r5, 40(r1)
667; PPC-PWR8-NEXT:    lwz r4, 36(r1)
668; PPC-PWR8-NEXT:    lwz r3, 32(r1)
669; PPC-PWR8-NEXT:    cmplwi r7, 0
670; PPC-PWR8-NEXT:    beq cr0, .LBB4_1
671; PPC-PWR8-NEXT:  # %bb.2: # %atomicrmw.end
672; PPC-PWR8-NEXT:    lwz r30, 72(r1) # 4-byte Folded Reload
673; PPC-PWR8-NEXT:    lwz r29, 68(r1) # 4-byte Folded Reload
674; PPC-PWR8-NEXT:    lwz r28, 64(r1) # 4-byte Folded Reload
675; PPC-PWR8-NEXT:    lwz r27, 60(r1) # 4-byte Folded Reload
676; PPC-PWR8-NEXT:    lwz r26, 56(r1) # 4-byte Folded Reload
677; PPC-PWR8-NEXT:    lwz r25, 52(r1) # 4-byte Folded Reload
678; PPC-PWR8-NEXT:    lwz r24, 48(r1) # 4-byte Folded Reload
679; PPC-PWR8-NEXT:    lwz r0, 84(r1)
680; PPC-PWR8-NEXT:    addi r1, r1, 80
681; PPC-PWR8-NEXT:    mtlr r0
682; PPC-PWR8-NEXT:    blr
683entry:
684  %0 = atomicrmw or ptr %a, i128 %x seq_cst, align 16
685  ret i128 %0
686}
687
688define i128 @xor(ptr %a, i128 %x) {
689; CHECK-LABEL: xor:
690; CHECK:       # %bb.0: # %entry
691; CHECK-NEXT:    sync
692; CHECK-NEXT:  .LBB5_1: # %entry
693; CHECK-NEXT:    #
694; CHECK-NEXT:    lqarx r6, 0, r3
695; CHECK-NEXT:    xor r9, r5, r7
696; CHECK-NEXT:    xor r8, r4, r6
697; CHECK-NEXT:    stqcx. r8, 0, r3
698; CHECK-NEXT:    bne cr0, .LBB5_1
699; CHECK-NEXT:  # %bb.2: # %entry
700; CHECK-NEXT:    lwsync
701; CHECK-NEXT:    mr r3, r6
702; CHECK-NEXT:    mr r4, r7
703; CHECK-NEXT:    blr
704;
705; PWR7-LABEL: xor:
706; PWR7:       # %bb.0: # %entry
707; PWR7-NEXT:    mflr r0
708; PWR7-NEXT:    stdu r1, -112(r1)
709; PWR7-NEXT:    std r0, 128(r1)
710; PWR7-NEXT:    .cfi_def_cfa_offset 112
711; PWR7-NEXT:    .cfi_offset lr, 16
712; PWR7-NEXT:    li r6, 5
713; PWR7-NEXT:    bl __atomic_fetch_xor_16
714; PWR7-NEXT:    nop
715; PWR7-NEXT:    addi r1, r1, 112
716; PWR7-NEXT:    ld r0, 16(r1)
717; PWR7-NEXT:    mtlr r0
718; PWR7-NEXT:    blr
719;
720; LE-PWR8-LABEL: xor:
721; LE-PWR8:       # %bb.0: # %entry
722; LE-PWR8-NEXT:    sync
723; LE-PWR8-NEXT:  .LBB5_1: # %entry
724; LE-PWR8-NEXT:    #
725; LE-PWR8-NEXT:    lqarx r6, 0, r3
726; LE-PWR8-NEXT:    xor r9, r4, r7
727; LE-PWR8-NEXT:    xor r8, r5, r6
728; LE-PWR8-NEXT:    stqcx. r8, 0, r3
729; LE-PWR8-NEXT:    bne cr0, .LBB5_1
730; LE-PWR8-NEXT:  # %bb.2: # %entry
731; LE-PWR8-NEXT:    lwsync
732; LE-PWR8-NEXT:    mr r3, r7
733; LE-PWR8-NEXT:    mr r4, r6
734; LE-PWR8-NEXT:    blr
735;
736; AIX64-PWR8-LABEL: xor:
737; AIX64-PWR8:       # %bb.0: # %entry
738; AIX64-PWR8-NEXT:    sync
739; AIX64-PWR8-NEXT:  L..BB5_1: # %entry
740; AIX64-PWR8-NEXT:    #
741; AIX64-PWR8-NEXT:    lqarx r6, 0, r3
742; AIX64-PWR8-NEXT:    xor r9, r5, r7
743; AIX64-PWR8-NEXT:    xor r8, r4, r6
744; AIX64-PWR8-NEXT:    stqcx. r8, 0, r3
745; AIX64-PWR8-NEXT:    bne cr0, L..BB5_1
746; AIX64-PWR8-NEXT:  # %bb.2: # %entry
747; AIX64-PWR8-NEXT:    lwsync
748; AIX64-PWR8-NEXT:    mr r3, r6
749; AIX64-PWR8-NEXT:    mr r4, r7
750; AIX64-PWR8-NEXT:    blr
751;
752; PPC-PWR8-LABEL: xor:
753; PPC-PWR8:       # %bb.0: # %entry
754; PPC-PWR8-NEXT:    mflr r0
755; PPC-PWR8-NEXT:    stwu r1, -80(r1)
756; PPC-PWR8-NEXT:    stw r0, 84(r1)
757; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 80
758; PPC-PWR8-NEXT:    .cfi_offset lr, 4
759; PPC-PWR8-NEXT:    .cfi_offset r24, -32
760; PPC-PWR8-NEXT:    .cfi_offset r25, -28
761; PPC-PWR8-NEXT:    .cfi_offset r26, -24
762; PPC-PWR8-NEXT:    .cfi_offset r27, -20
763; PPC-PWR8-NEXT:    .cfi_offset r28, -16
764; PPC-PWR8-NEXT:    .cfi_offset r29, -12
765; PPC-PWR8-NEXT:    .cfi_offset r30, -8
766; PPC-PWR8-NEXT:    stw r26, 56(r1) # 4-byte Folded Spill
767; PPC-PWR8-NEXT:    stw r27, 60(r1) # 4-byte Folded Spill
768; PPC-PWR8-NEXT:    mr r27, r5
769; PPC-PWR8-NEXT:    mr r26, r3
770; PPC-PWR8-NEXT:    stw r28, 64(r1) # 4-byte Folded Spill
771; PPC-PWR8-NEXT:    mr r28, r6
772; PPC-PWR8-NEXT:    lwz r6, 12(r3)
773; PPC-PWR8-NEXT:    stw r24, 48(r1) # 4-byte Folded Spill
774; PPC-PWR8-NEXT:    lwz r5, 8(r3)
775; PPC-PWR8-NEXT:    lwz r4, 4(r3)
776; PPC-PWR8-NEXT:    stw r25, 52(r1) # 4-byte Folded Spill
777; PPC-PWR8-NEXT:    addi r25, r1, 32
778; PPC-PWR8-NEXT:    lwz r3, 0(r3)
779; PPC-PWR8-NEXT:    stw r29, 68(r1) # 4-byte Folded Spill
780; PPC-PWR8-NEXT:    mr r29, r7
781; PPC-PWR8-NEXT:    addi r24, r1, 16
782; PPC-PWR8-NEXT:    stw r30, 72(r1) # 4-byte Folded Spill
783; PPC-PWR8-NEXT:    mr r30, r8
784; PPC-PWR8-NEXT:    .p2align 4
785; PPC-PWR8-NEXT:  .LBB5_1: # %atomicrmw.start
786; PPC-PWR8-NEXT:    #
787; PPC-PWR8-NEXT:    stw r3, 32(r1)
788; PPC-PWR8-NEXT:    xor r3, r3, r27
789; PPC-PWR8-NEXT:    stw r4, 36(r1)
790; PPC-PWR8-NEXT:    xor r4, r4, r28
791; PPC-PWR8-NEXT:    xor r7, r5, r29
792; PPC-PWR8-NEXT:    stw r5, 40(r1)
793; PPC-PWR8-NEXT:    xor r5, r6, r30
794; PPC-PWR8-NEXT:    stw r6, 44(r1)
795; PPC-PWR8-NEXT:    stw r5, 28(r1)
796; PPC-PWR8-NEXT:    stw r7, 24(r1)
797; PPC-PWR8-NEXT:    mr r5, r25
798; PPC-PWR8-NEXT:    li r7, 5
799; PPC-PWR8-NEXT:    stw r4, 20(r1)
800; PPC-PWR8-NEXT:    stw r3, 16(r1)
801; PPC-PWR8-NEXT:    li r3, 16
802; PPC-PWR8-NEXT:    mr r4, r26
803; PPC-PWR8-NEXT:    mr r6, r24
804; PPC-PWR8-NEXT:    li r8, 5
805; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
806; PPC-PWR8-NEXT:    mr r7, r3
807; PPC-PWR8-NEXT:    lwz r6, 44(r1)
808; PPC-PWR8-NEXT:    lwz r5, 40(r1)
809; PPC-PWR8-NEXT:    lwz r4, 36(r1)
810; PPC-PWR8-NEXT:    lwz r3, 32(r1)
811; PPC-PWR8-NEXT:    cmplwi r7, 0
812; PPC-PWR8-NEXT:    beq cr0, .LBB5_1
813; PPC-PWR8-NEXT:  # %bb.2: # %atomicrmw.end
814; PPC-PWR8-NEXT:    lwz r30, 72(r1) # 4-byte Folded Reload
815; PPC-PWR8-NEXT:    lwz r29, 68(r1) # 4-byte Folded Reload
816; PPC-PWR8-NEXT:    lwz r28, 64(r1) # 4-byte Folded Reload
817; PPC-PWR8-NEXT:    lwz r27, 60(r1) # 4-byte Folded Reload
818; PPC-PWR8-NEXT:    lwz r26, 56(r1) # 4-byte Folded Reload
819; PPC-PWR8-NEXT:    lwz r25, 52(r1) # 4-byte Folded Reload
820; PPC-PWR8-NEXT:    lwz r24, 48(r1) # 4-byte Folded Reload
821; PPC-PWR8-NEXT:    lwz r0, 84(r1)
822; PPC-PWR8-NEXT:    addi r1, r1, 80
823; PPC-PWR8-NEXT:    mtlr r0
824; PPC-PWR8-NEXT:    blr
825entry:
826  %0 = atomicrmw xor ptr %a, i128 %x seq_cst, align 16
827  ret i128 %0
828}
829
830define i128 @nand(ptr %a, i128 %x) {
831; CHECK-LABEL: nand:
832; CHECK:       # %bb.0: # %entry
833; CHECK-NEXT:    sync
834; CHECK-NEXT:  .LBB6_1: # %entry
835; CHECK-NEXT:    #
836; CHECK-NEXT:    lqarx r6, 0, r3
837; CHECK-NEXT:    nand r9, r5, r7
838; CHECK-NEXT:    nand r8, r4, r6
839; CHECK-NEXT:    stqcx. r8, 0, r3
840; CHECK-NEXT:    bne cr0, .LBB6_1
841; CHECK-NEXT:  # %bb.2: # %entry
842; CHECK-NEXT:    lwsync
843; CHECK-NEXT:    mr r3, r6
844; CHECK-NEXT:    mr r4, r7
845; CHECK-NEXT:    blr
846;
847; PWR7-LABEL: nand:
848; PWR7:       # %bb.0: # %entry
849; PWR7-NEXT:    mflr r0
850; PWR7-NEXT:    stdu r1, -112(r1)
851; PWR7-NEXT:    std r0, 128(r1)
852; PWR7-NEXT:    .cfi_def_cfa_offset 112
853; PWR7-NEXT:    .cfi_offset lr, 16
854; PWR7-NEXT:    li r6, 5
855; PWR7-NEXT:    bl __atomic_fetch_nand_16
856; PWR7-NEXT:    nop
857; PWR7-NEXT:    addi r1, r1, 112
858; PWR7-NEXT:    ld r0, 16(r1)
859; PWR7-NEXT:    mtlr r0
860; PWR7-NEXT:    blr
861;
862; LE-PWR8-LABEL: nand:
863; LE-PWR8:       # %bb.0: # %entry
864; LE-PWR8-NEXT:    sync
865; LE-PWR8-NEXT:  .LBB6_1: # %entry
866; LE-PWR8-NEXT:    #
867; LE-PWR8-NEXT:    lqarx r6, 0, r3
868; LE-PWR8-NEXT:    nand r9, r4, r7
869; LE-PWR8-NEXT:    nand r8, r5, r6
870; LE-PWR8-NEXT:    stqcx. r8, 0, r3
871; LE-PWR8-NEXT:    bne cr0, .LBB6_1
872; LE-PWR8-NEXT:  # %bb.2: # %entry
873; LE-PWR8-NEXT:    lwsync
874; LE-PWR8-NEXT:    mr r3, r7
875; LE-PWR8-NEXT:    mr r4, r6
876; LE-PWR8-NEXT:    blr
877;
878; AIX64-PWR8-LABEL: nand:
879; AIX64-PWR8:       # %bb.0: # %entry
880; AIX64-PWR8-NEXT:    sync
881; AIX64-PWR8-NEXT:  L..BB6_1: # %entry
882; AIX64-PWR8-NEXT:    #
883; AIX64-PWR8-NEXT:    lqarx r6, 0, r3
884; AIX64-PWR8-NEXT:    nand r9, r5, r7
885; AIX64-PWR8-NEXT:    nand r8, r4, r6
886; AIX64-PWR8-NEXT:    stqcx. r8, 0, r3
887; AIX64-PWR8-NEXT:    bne cr0, L..BB6_1
888; AIX64-PWR8-NEXT:  # %bb.2: # %entry
889; AIX64-PWR8-NEXT:    lwsync
890; AIX64-PWR8-NEXT:    mr r3, r6
891; AIX64-PWR8-NEXT:    mr r4, r7
892; AIX64-PWR8-NEXT:    blr
893;
894; PPC-PWR8-LABEL: nand:
895; PPC-PWR8:       # %bb.0: # %entry
896; PPC-PWR8-NEXT:    mflr r0
897; PPC-PWR8-NEXT:    stwu r1, -80(r1)
898; PPC-PWR8-NEXT:    stw r0, 84(r1)
899; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 80
900; PPC-PWR8-NEXT:    .cfi_offset lr, 4
901; PPC-PWR8-NEXT:    .cfi_offset r24, -32
902; PPC-PWR8-NEXT:    .cfi_offset r25, -28
903; PPC-PWR8-NEXT:    .cfi_offset r26, -24
904; PPC-PWR8-NEXT:    .cfi_offset r27, -20
905; PPC-PWR8-NEXT:    .cfi_offset r28, -16
906; PPC-PWR8-NEXT:    .cfi_offset r29, -12
907; PPC-PWR8-NEXT:    .cfi_offset r30, -8
908; PPC-PWR8-NEXT:    stw r26, 56(r1) # 4-byte Folded Spill
909; PPC-PWR8-NEXT:    stw r27, 60(r1) # 4-byte Folded Spill
910; PPC-PWR8-NEXT:    mr r27, r5
911; PPC-PWR8-NEXT:    mr r26, r3
912; PPC-PWR8-NEXT:    stw r28, 64(r1) # 4-byte Folded Spill
913; PPC-PWR8-NEXT:    mr r28, r6
914; PPC-PWR8-NEXT:    lwz r6, 12(r3)
915; PPC-PWR8-NEXT:    stw r24, 48(r1) # 4-byte Folded Spill
916; PPC-PWR8-NEXT:    lwz r5, 8(r3)
917; PPC-PWR8-NEXT:    lwz r4, 4(r3)
918; PPC-PWR8-NEXT:    stw r25, 52(r1) # 4-byte Folded Spill
919; PPC-PWR8-NEXT:    addi r25, r1, 32
920; PPC-PWR8-NEXT:    lwz r3, 0(r3)
921; PPC-PWR8-NEXT:    stw r29, 68(r1) # 4-byte Folded Spill
922; PPC-PWR8-NEXT:    mr r29, r7
923; PPC-PWR8-NEXT:    addi r24, r1, 16
924; PPC-PWR8-NEXT:    stw r30, 72(r1) # 4-byte Folded Spill
925; PPC-PWR8-NEXT:    mr r30, r8
926; PPC-PWR8-NEXT:    .p2align 4
927; PPC-PWR8-NEXT:  .LBB6_1: # %atomicrmw.start
928; PPC-PWR8-NEXT:    #
929; PPC-PWR8-NEXT:    stw r3, 32(r1)
930; PPC-PWR8-NEXT:    nand r3, r3, r27
931; PPC-PWR8-NEXT:    stw r4, 36(r1)
932; PPC-PWR8-NEXT:    nand r4, r4, r28
933; PPC-PWR8-NEXT:    nand r7, r5, r29
934; PPC-PWR8-NEXT:    stw r5, 40(r1)
935; PPC-PWR8-NEXT:    nand r5, r6, r30
936; PPC-PWR8-NEXT:    stw r6, 44(r1)
937; PPC-PWR8-NEXT:    stw r5, 28(r1)
938; PPC-PWR8-NEXT:    stw r7, 24(r1)
939; PPC-PWR8-NEXT:    mr r5, r25
940; PPC-PWR8-NEXT:    li r7, 5
941; PPC-PWR8-NEXT:    stw r4, 20(r1)
942; PPC-PWR8-NEXT:    stw r3, 16(r1)
943; PPC-PWR8-NEXT:    li r3, 16
944; PPC-PWR8-NEXT:    mr r4, r26
945; PPC-PWR8-NEXT:    mr r6, r24
946; PPC-PWR8-NEXT:    li r8, 5
947; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
948; PPC-PWR8-NEXT:    mr r7, r3
949; PPC-PWR8-NEXT:    lwz r6, 44(r1)
950; PPC-PWR8-NEXT:    lwz r5, 40(r1)
951; PPC-PWR8-NEXT:    lwz r4, 36(r1)
952; PPC-PWR8-NEXT:    lwz r3, 32(r1)
953; PPC-PWR8-NEXT:    cmplwi r7, 0
954; PPC-PWR8-NEXT:    beq cr0, .LBB6_1
955; PPC-PWR8-NEXT:  # %bb.2: # %atomicrmw.end
956; PPC-PWR8-NEXT:    lwz r30, 72(r1) # 4-byte Folded Reload
957; PPC-PWR8-NEXT:    lwz r29, 68(r1) # 4-byte Folded Reload
958; PPC-PWR8-NEXT:    lwz r28, 64(r1) # 4-byte Folded Reload
959; PPC-PWR8-NEXT:    lwz r27, 60(r1) # 4-byte Folded Reload
960; PPC-PWR8-NEXT:    lwz r26, 56(r1) # 4-byte Folded Reload
961; PPC-PWR8-NEXT:    lwz r25, 52(r1) # 4-byte Folded Reload
962; PPC-PWR8-NEXT:    lwz r24, 48(r1) # 4-byte Folded Reload
963; PPC-PWR8-NEXT:    lwz r0, 84(r1)
964; PPC-PWR8-NEXT:    addi r1, r1, 80
965; PPC-PWR8-NEXT:    mtlr r0
966; PPC-PWR8-NEXT:    blr
967entry:
968  %0 = atomicrmw nand ptr %a, i128 %x seq_cst, align 16
969  ret i128 %0
970}
971
972;; CmpXchg
973define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
974; CHECK-LABEL: cas_weak_acquire_acquire:
975; CHECK:       # %bb.0: # %entry
976; CHECK-NEXT:  .LBB7_1: # %entry
977; CHECK-NEXT:    #
978; CHECK-NEXT:    lqarx r8, 0, r3
979; CHECK-NEXT:    xor r11, r9, r5
980; CHECK-NEXT:    xor r10, r8, r4
981; CHECK-NEXT:    or. r11, r11, r10
982; CHECK-NEXT:    bne cr0, .LBB7_3
983; CHECK-NEXT:  # %bb.2: # %entry
984; CHECK-NEXT:    #
985; CHECK-NEXT:    mr r11, r7
986; CHECK-NEXT:    mr r10, r6
987; CHECK-NEXT:    stqcx. r10, 0, r3
988; CHECK-NEXT:    bne cr0, .LBB7_1
989; CHECK-NEXT:  .LBB7_3: # %entry
990; CHECK-NEXT:    lwsync
991; CHECK-NEXT:    mr r3, r8
992; CHECK-NEXT:    mr r4, r9
993; CHECK-NEXT:    blr
994;
995; PWR7-LABEL: cas_weak_acquire_acquire:
996; PWR7:       # %bb.0: # %entry
997; PWR7-NEXT:    mflr r0
998; PWR7-NEXT:    stdu r1, -128(r1)
999; PWR7-NEXT:    std r0, 144(r1)
1000; PWR7-NEXT:    .cfi_def_cfa_offset 128
1001; PWR7-NEXT:    .cfi_offset lr, 16
1002; PWR7-NEXT:    std r5, 120(r1)
1003; PWR7-NEXT:    std r4, 112(r1)
1004; PWR7-NEXT:    addi r4, r1, 112
1005; PWR7-NEXT:    mr r5, r6
1006; PWR7-NEXT:    mr r6, r7
1007; PWR7-NEXT:    li r7, 2
1008; PWR7-NEXT:    li r8, 2
1009; PWR7-NEXT:    bl __atomic_compare_exchange_16
1010; PWR7-NEXT:    nop
1011; PWR7-NEXT:    ld r4, 120(r1)
1012; PWR7-NEXT:    ld r3, 112(r1)
1013; PWR7-NEXT:    addi r1, r1, 128
1014; PWR7-NEXT:    ld r0, 16(r1)
1015; PWR7-NEXT:    mtlr r0
1016; PWR7-NEXT:    blr
1017;
1018; LE-PWR8-LABEL: cas_weak_acquire_acquire:
1019; LE-PWR8:       # %bb.0: # %entry
1020; LE-PWR8-NEXT:  .LBB7_1: # %entry
1021; LE-PWR8-NEXT:    #
1022; LE-PWR8-NEXT:    lqarx r8, 0, r3
1023; LE-PWR8-NEXT:    xor r11, r9, r4
1024; LE-PWR8-NEXT:    xor r10, r8, r5
1025; LE-PWR8-NEXT:    or. r11, r11, r10
1026; LE-PWR8-NEXT:    bne cr0, .LBB7_3
1027; LE-PWR8-NEXT:  # %bb.2: # %entry
1028; LE-PWR8-NEXT:    #
1029; LE-PWR8-NEXT:    mr r11, r6
1030; LE-PWR8-NEXT:    mr r10, r7
1031; LE-PWR8-NEXT:    stqcx. r10, 0, r3
1032; LE-PWR8-NEXT:    bne cr0, .LBB7_1
1033; LE-PWR8-NEXT:  .LBB7_3: # %entry
1034; LE-PWR8-NEXT:    lwsync
1035; LE-PWR8-NEXT:    mr r3, r9
1036; LE-PWR8-NEXT:    mr r4, r8
1037; LE-PWR8-NEXT:    blr
1038;
1039; AIX64-PWR8-LABEL: cas_weak_acquire_acquire:
1040; AIX64-PWR8:       # %bb.0: # %entry
1041; AIX64-PWR8-NEXT:  L..BB7_1: # %entry
1042; AIX64-PWR8-NEXT:    #
1043; AIX64-PWR8-NEXT:    lqarx r8, 0, r3
1044; AIX64-PWR8-NEXT:    xor r11, r9, r5
1045; AIX64-PWR8-NEXT:    xor r10, r8, r4
1046; AIX64-PWR8-NEXT:    or. r11, r11, r10
1047; AIX64-PWR8-NEXT:    bne cr0, L..BB7_3
1048; AIX64-PWR8-NEXT:  # %bb.2: # %entry
1049; AIX64-PWR8-NEXT:    #
1050; AIX64-PWR8-NEXT:    mr r11, r7
1051; AIX64-PWR8-NEXT:    mr r10, r6
1052; AIX64-PWR8-NEXT:    stqcx. r10, 0, r3
1053; AIX64-PWR8-NEXT:    bne cr0, L..BB7_1
1054; AIX64-PWR8-NEXT:  L..BB7_3: # %entry
1055; AIX64-PWR8-NEXT:    lwsync
1056; AIX64-PWR8-NEXT:    mr r3, r8
1057; AIX64-PWR8-NEXT:    mr r4, r9
1058; AIX64-PWR8-NEXT:    blr
1059;
1060; PPC-PWR8-LABEL: cas_weak_acquire_acquire:
1061; PPC-PWR8:       # %bb.0: # %entry
1062; PPC-PWR8-NEXT:    mflr r0
1063; PPC-PWR8-NEXT:    stwu r1, -48(r1)
1064; PPC-PWR8-NEXT:    stw r0, 52(r1)
1065; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
1066; PPC-PWR8-NEXT:    .cfi_offset lr, 4
1067; PPC-PWR8-NEXT:    mr r4, r3
1068; PPC-PWR8-NEXT:    lwz r3, 60(r1)
1069; PPC-PWR8-NEXT:    stw r8, 44(r1)
1070; PPC-PWR8-NEXT:    stw r7, 40(r1)
1071; PPC-PWR8-NEXT:    stw r6, 36(r1)
1072; PPC-PWR8-NEXT:    stw r5, 32(r1)
1073; PPC-PWR8-NEXT:    addi r5, r1, 32
1074; PPC-PWR8-NEXT:    addi r6, r1, 16
1075; PPC-PWR8-NEXT:    li r7, 2
1076; PPC-PWR8-NEXT:    li r8, 2
1077; PPC-PWR8-NEXT:    stw r10, 20(r1)
1078; PPC-PWR8-NEXT:    stw r9, 16(r1)
1079; PPC-PWR8-NEXT:    stw r3, 28(r1)
1080; PPC-PWR8-NEXT:    lwz r3, 56(r1)
1081; PPC-PWR8-NEXT:    stw r3, 24(r1)
1082; PPC-PWR8-NEXT:    li r3, 16
1083; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
1084; PPC-PWR8-NEXT:    lwz r6, 44(r1)
1085; PPC-PWR8-NEXT:    lwz r5, 40(r1)
1086; PPC-PWR8-NEXT:    lwz r4, 36(r1)
1087; PPC-PWR8-NEXT:    lwz r3, 32(r1)
1088; PPC-PWR8-NEXT:    lwz r0, 52(r1)
1089; PPC-PWR8-NEXT:    addi r1, r1, 48
1090; PPC-PWR8-NEXT:    mtlr r0
1091; PPC-PWR8-NEXT:    blr
1092entry:
1093  %0 = cmpxchg weak ptr %a, i128 %cmp, i128 %new acquire acquire
1094  %1 = extractvalue { i128, i1 } %0, 0
1095  ret i128 %1
1096}
1097
1098define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
1099; CHECK-LABEL: cas_weak_release_monotonic:
1100; CHECK:       # %bb.0: # %entry
1101; CHECK-NEXT:    lwsync
1102; CHECK-NEXT:  .LBB8_1: # %entry
1103; CHECK-NEXT:    #
1104; CHECK-NEXT:    lqarx r8, 0, r3
1105; CHECK-NEXT:    xor r11, r9, r5
1106; CHECK-NEXT:    xor r10, r8, r4
1107; CHECK-NEXT:    or. r11, r11, r10
1108; CHECK-NEXT:    bne cr0, .LBB8_3
1109; CHECK-NEXT:  # %bb.2: # %entry
1110; CHECK-NEXT:    #
1111; CHECK-NEXT:    mr r11, r7
1112; CHECK-NEXT:    mr r10, r6
1113; CHECK-NEXT:    stqcx. r10, 0, r3
1114; CHECK-NEXT:    bne cr0, .LBB8_1
1115; CHECK-NEXT:  .LBB8_3: # %entry
1116; CHECK-NEXT:    mr r3, r8
1117; CHECK-NEXT:    mr r4, r9
1118; CHECK-NEXT:    blr
1119;
1120; PWR7-LABEL: cas_weak_release_monotonic:
1121; PWR7:       # %bb.0: # %entry
1122; PWR7-NEXT:    mflr r0
1123; PWR7-NEXT:    stdu r1, -128(r1)
1124; PWR7-NEXT:    std r0, 144(r1)
1125; PWR7-NEXT:    .cfi_def_cfa_offset 128
1126; PWR7-NEXT:    .cfi_offset lr, 16
1127; PWR7-NEXT:    std r5, 120(r1)
1128; PWR7-NEXT:    std r4, 112(r1)
1129; PWR7-NEXT:    addi r4, r1, 112
1130; PWR7-NEXT:    mr r5, r6
1131; PWR7-NEXT:    mr r6, r7
1132; PWR7-NEXT:    li r7, 3
1133; PWR7-NEXT:    li r8, 0
1134; PWR7-NEXT:    bl __atomic_compare_exchange_16
1135; PWR7-NEXT:    nop
1136; PWR7-NEXT:    ld r4, 120(r1)
1137; PWR7-NEXT:    ld r3, 112(r1)
1138; PWR7-NEXT:    addi r1, r1, 128
1139; PWR7-NEXT:    ld r0, 16(r1)
1140; PWR7-NEXT:    mtlr r0
1141; PWR7-NEXT:    blr
1142;
1143; LE-PWR8-LABEL: cas_weak_release_monotonic:
1144; LE-PWR8:       # %bb.0: # %entry
1145; LE-PWR8-NEXT:    lwsync
1146; LE-PWR8-NEXT:  .LBB8_1: # %entry
1147; LE-PWR8-NEXT:    #
1148; LE-PWR8-NEXT:    lqarx r8, 0, r3
1149; LE-PWR8-NEXT:    xor r11, r9, r4
1150; LE-PWR8-NEXT:    xor r10, r8, r5
1151; LE-PWR8-NEXT:    or. r11, r11, r10
1152; LE-PWR8-NEXT:    bne cr0, .LBB8_3
1153; LE-PWR8-NEXT:  # %bb.2: # %entry
1154; LE-PWR8-NEXT:    #
1155; LE-PWR8-NEXT:    mr r11, r6
1156; LE-PWR8-NEXT:    mr r10, r7
1157; LE-PWR8-NEXT:    stqcx. r10, 0, r3
1158; LE-PWR8-NEXT:    bne cr0, .LBB8_1
1159; LE-PWR8-NEXT:  .LBB8_3: # %entry
1160; LE-PWR8-NEXT:    mr r3, r9
1161; LE-PWR8-NEXT:    mr r4, r8
1162; LE-PWR8-NEXT:    blr
1163;
1164; AIX64-PWR8-LABEL: cas_weak_release_monotonic:
1165; AIX64-PWR8:       # %bb.0: # %entry
1166; AIX64-PWR8-NEXT:    lwsync
1167; AIX64-PWR8-NEXT:  L..BB8_1: # %entry
1168; AIX64-PWR8-NEXT:    #
1169; AIX64-PWR8-NEXT:    lqarx r8, 0, r3
1170; AIX64-PWR8-NEXT:    xor r11, r9, r5
1171; AIX64-PWR8-NEXT:    xor r10, r8, r4
1172; AIX64-PWR8-NEXT:    or. r11, r11, r10
1173; AIX64-PWR8-NEXT:    bne cr0, L..BB8_3
1174; AIX64-PWR8-NEXT:  # %bb.2: # %entry
1175; AIX64-PWR8-NEXT:    #
1176; AIX64-PWR8-NEXT:    mr r11, r7
1177; AIX64-PWR8-NEXT:    mr r10, r6
1178; AIX64-PWR8-NEXT:    stqcx. r10, 0, r3
1179; AIX64-PWR8-NEXT:    bne cr0, L..BB8_1
1180; AIX64-PWR8-NEXT:  L..BB8_3: # %entry
1181; AIX64-PWR8-NEXT:    mr r3, r8
1182; AIX64-PWR8-NEXT:    mr r4, r9
1183; AIX64-PWR8-NEXT:    blr
1184;
1185; PPC-PWR8-LABEL: cas_weak_release_monotonic:
1186; PPC-PWR8:       # %bb.0: # %entry
1187; PPC-PWR8-NEXT:    mflr r0
1188; PPC-PWR8-NEXT:    stwu r1, -48(r1)
1189; PPC-PWR8-NEXT:    stw r0, 52(r1)
1190; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
1191; PPC-PWR8-NEXT:    .cfi_offset lr, 4
1192; PPC-PWR8-NEXT:    mr r4, r3
1193; PPC-PWR8-NEXT:    lwz r3, 60(r1)
1194; PPC-PWR8-NEXT:    stw r8, 44(r1)
1195; PPC-PWR8-NEXT:    stw r7, 40(r1)
1196; PPC-PWR8-NEXT:    stw r6, 36(r1)
1197; PPC-PWR8-NEXT:    stw r5, 32(r1)
1198; PPC-PWR8-NEXT:    addi r5, r1, 32
1199; PPC-PWR8-NEXT:    addi r6, r1, 16
1200; PPC-PWR8-NEXT:    li r7, 3
1201; PPC-PWR8-NEXT:    li r8, 0
1202; PPC-PWR8-NEXT:    stw r10, 20(r1)
1203; PPC-PWR8-NEXT:    stw r9, 16(r1)
1204; PPC-PWR8-NEXT:    stw r3, 28(r1)
1205; PPC-PWR8-NEXT:    lwz r3, 56(r1)
1206; PPC-PWR8-NEXT:    stw r3, 24(r1)
1207; PPC-PWR8-NEXT:    li r3, 16
1208; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
1209; PPC-PWR8-NEXT:    lwz r6, 44(r1)
1210; PPC-PWR8-NEXT:    lwz r5, 40(r1)
1211; PPC-PWR8-NEXT:    lwz r4, 36(r1)
1212; PPC-PWR8-NEXT:    lwz r3, 32(r1)
1213; PPC-PWR8-NEXT:    lwz r0, 52(r1)
1214; PPC-PWR8-NEXT:    addi r1, r1, 48
1215; PPC-PWR8-NEXT:    mtlr r0
1216; PPC-PWR8-NEXT:    blr
1217entry:
1218  %0 = cmpxchg weak ptr %a, i128 %cmp, i128 %new release monotonic
1219  %1 = extractvalue { i128, i1 } %0, 0
1220  ret i128 %1
1221}
1222
1223define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
1224; CHECK-LABEL: cas_sc_sc:
1225; CHECK:       # %bb.0: # %entry
1226; CHECK-NEXT:    sync
1227; CHECK-NEXT:  .LBB9_1: # %entry
1228; CHECK-NEXT:    #
1229; CHECK-NEXT:    lqarx r8, 0, r3
1230; CHECK-NEXT:    xor r11, r9, r5
1231; CHECK-NEXT:    xor r10, r8, r4
1232; CHECK-NEXT:    or. r11, r11, r10
1233; CHECK-NEXT:    bne cr0, .LBB9_3
1234; CHECK-NEXT:  # %bb.2: # %entry
1235; CHECK-NEXT:    #
1236; CHECK-NEXT:    mr r11, r7
1237; CHECK-NEXT:    mr r10, r6
1238; CHECK-NEXT:    stqcx. r10, 0, r3
1239; CHECK-NEXT:    bne cr0, .LBB9_1
1240; CHECK-NEXT:  .LBB9_3: # %entry
1241; CHECK-NEXT:    lwsync
1242; CHECK-NEXT:    mr r3, r8
1243; CHECK-NEXT:    mr r4, r9
1244; CHECK-NEXT:    blr
1245;
1246; PWR7-LABEL: cas_sc_sc:
1247; PWR7:       # %bb.0: # %entry
1248; PWR7-NEXT:    mflr r0
1249; PWR7-NEXT:    stdu r1, -128(r1)
1250; PWR7-NEXT:    std r0, 144(r1)
1251; PWR7-NEXT:    .cfi_def_cfa_offset 128
1252; PWR7-NEXT:    .cfi_offset lr, 16
1253; PWR7-NEXT:    std r5, 120(r1)
1254; PWR7-NEXT:    std r4, 112(r1)
1255; PWR7-NEXT:    addi r4, r1, 112
1256; PWR7-NEXT:    mr r5, r6
1257; PWR7-NEXT:    mr r6, r7
1258; PWR7-NEXT:    li r7, 5
1259; PWR7-NEXT:    li r8, 5
1260; PWR7-NEXT:    bl __atomic_compare_exchange_16
1261; PWR7-NEXT:    nop
1262; PWR7-NEXT:    ld r4, 120(r1)
1263; PWR7-NEXT:    ld r3, 112(r1)
1264; PWR7-NEXT:    addi r1, r1, 128
1265; PWR7-NEXT:    ld r0, 16(r1)
1266; PWR7-NEXT:    mtlr r0
1267; PWR7-NEXT:    blr
1268;
1269; LE-PWR8-LABEL: cas_sc_sc:
1270; LE-PWR8:       # %bb.0: # %entry
1271; LE-PWR8-NEXT:    sync
1272; LE-PWR8-NEXT:  .LBB9_1: # %entry
1273; LE-PWR8-NEXT:    #
1274; LE-PWR8-NEXT:    lqarx r8, 0, r3
1275; LE-PWR8-NEXT:    xor r11, r9, r4
1276; LE-PWR8-NEXT:    xor r10, r8, r5
1277; LE-PWR8-NEXT:    or. r11, r11, r10
1278; LE-PWR8-NEXT:    bne cr0, .LBB9_3
1279; LE-PWR8-NEXT:  # %bb.2: # %entry
1280; LE-PWR8-NEXT:    #
1281; LE-PWR8-NEXT:    mr r11, r6
1282; LE-PWR8-NEXT:    mr r10, r7
1283; LE-PWR8-NEXT:    stqcx. r10, 0, r3
1284; LE-PWR8-NEXT:    bne cr0, .LBB9_1
1285; LE-PWR8-NEXT:  .LBB9_3: # %entry
1286; LE-PWR8-NEXT:    lwsync
1287; LE-PWR8-NEXT:    mr r3, r9
1288; LE-PWR8-NEXT:    mr r4, r8
1289; LE-PWR8-NEXT:    blr
1290;
1291; AIX64-PWR8-LABEL: cas_sc_sc:
1292; AIX64-PWR8:       # %bb.0: # %entry
1293; AIX64-PWR8-NEXT:    sync
1294; AIX64-PWR8-NEXT:  L..BB9_1: # %entry
1295; AIX64-PWR8-NEXT:    #
1296; AIX64-PWR8-NEXT:    lqarx r8, 0, r3
1297; AIX64-PWR8-NEXT:    xor r11, r9, r5
1298; AIX64-PWR8-NEXT:    xor r10, r8, r4
1299; AIX64-PWR8-NEXT:    or. r11, r11, r10
1300; AIX64-PWR8-NEXT:    bne cr0, L..BB9_3
1301; AIX64-PWR8-NEXT:  # %bb.2: # %entry
1302; AIX64-PWR8-NEXT:    #
1303; AIX64-PWR8-NEXT:    mr r11, r7
1304; AIX64-PWR8-NEXT:    mr r10, r6
1305; AIX64-PWR8-NEXT:    stqcx. r10, 0, r3
1306; AIX64-PWR8-NEXT:    bne cr0, L..BB9_1
1307; AIX64-PWR8-NEXT:  L..BB9_3: # %entry
1308; AIX64-PWR8-NEXT:    lwsync
1309; AIX64-PWR8-NEXT:    mr r3, r8
1310; AIX64-PWR8-NEXT:    mr r4, r9
1311; AIX64-PWR8-NEXT:    blr
1312;
1313; PPC-PWR8-LABEL: cas_sc_sc:
1314; PPC-PWR8:       # %bb.0: # %entry
1315; PPC-PWR8-NEXT:    mflr r0
1316; PPC-PWR8-NEXT:    stwu r1, -48(r1)
1317; PPC-PWR8-NEXT:    stw r0, 52(r1)
1318; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
1319; PPC-PWR8-NEXT:    .cfi_offset lr, 4
1320; PPC-PWR8-NEXT:    mr r4, r3
1321; PPC-PWR8-NEXT:    lwz r3, 60(r1)
1322; PPC-PWR8-NEXT:    stw r8, 44(r1)
1323; PPC-PWR8-NEXT:    stw r7, 40(r1)
1324; PPC-PWR8-NEXT:    stw r6, 36(r1)
1325; PPC-PWR8-NEXT:    stw r5, 32(r1)
1326; PPC-PWR8-NEXT:    addi r5, r1, 32
1327; PPC-PWR8-NEXT:    addi r6, r1, 16
1328; PPC-PWR8-NEXT:    li r7, 5
1329; PPC-PWR8-NEXT:    li r8, 5
1330; PPC-PWR8-NEXT:    stw r10, 20(r1)
1331; PPC-PWR8-NEXT:    stw r9, 16(r1)
1332; PPC-PWR8-NEXT:    stw r3, 28(r1)
1333; PPC-PWR8-NEXT:    lwz r3, 56(r1)
1334; PPC-PWR8-NEXT:    stw r3, 24(r1)
1335; PPC-PWR8-NEXT:    li r3, 16
1336; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
1337; PPC-PWR8-NEXT:    lwz r6, 44(r1)
1338; PPC-PWR8-NEXT:    lwz r5, 40(r1)
1339; PPC-PWR8-NEXT:    lwz r4, 36(r1)
1340; PPC-PWR8-NEXT:    lwz r3, 32(r1)
1341; PPC-PWR8-NEXT:    lwz r0, 52(r1)
1342; PPC-PWR8-NEXT:    addi r1, r1, 48
1343; PPC-PWR8-NEXT:    mtlr r0
1344; PPC-PWR8-NEXT:    blr
1345entry:
1346  %0 = cmpxchg ptr %a, i128 %cmp, i128 %new seq_cst seq_cst
1347  %1 = extractvalue { i128, i1 } %0, 0
1348  ret i128 %1
1349}
1350
1351define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
1352; CHECK-LABEL: cas_acqrel_acquire:
1353; CHECK:       # %bb.0: # %entry
1354; CHECK-NEXT:    lwsync
1355; CHECK-NEXT:  .LBB10_1: # %entry
1356; CHECK-NEXT:    #
1357; CHECK-NEXT:    lqarx r8, 0, r3
1358; CHECK-NEXT:    xor r11, r9, r5
1359; CHECK-NEXT:    xor r10, r8, r4
1360; CHECK-NEXT:    or. r11, r11, r10
1361; CHECK-NEXT:    bne cr0, .LBB10_3
1362; CHECK-NEXT:  # %bb.2: # %entry
1363; CHECK-NEXT:    #
1364; CHECK-NEXT:    mr r11, r7
1365; CHECK-NEXT:    mr r10, r6
1366; CHECK-NEXT:    stqcx. r10, 0, r3
1367; CHECK-NEXT:    bne cr0, .LBB10_1
1368; CHECK-NEXT:  .LBB10_3: # %entry
1369; CHECK-NEXT:    lwsync
1370; CHECK-NEXT:    mr r3, r8
1371; CHECK-NEXT:    mr r4, r9
1372; CHECK-NEXT:    blr
1373;
1374; PWR7-LABEL: cas_acqrel_acquire:
1375; PWR7:       # %bb.0: # %entry
1376; PWR7-NEXT:    mflr r0
1377; PWR7-NEXT:    stdu r1, -128(r1)
1378; PWR7-NEXT:    std r0, 144(r1)
1379; PWR7-NEXT:    .cfi_def_cfa_offset 128
1380; PWR7-NEXT:    .cfi_offset lr, 16
1381; PWR7-NEXT:    std r5, 120(r1)
1382; PWR7-NEXT:    std r4, 112(r1)
1383; PWR7-NEXT:    addi r4, r1, 112
1384; PWR7-NEXT:    mr r5, r6
1385; PWR7-NEXT:    mr r6, r7
1386; PWR7-NEXT:    li r7, 4
1387; PWR7-NEXT:    li r8, 2
1388; PWR7-NEXT:    bl __atomic_compare_exchange_16
1389; PWR7-NEXT:    nop
1390; PWR7-NEXT:    ld r4, 120(r1)
1391; PWR7-NEXT:    ld r3, 112(r1)
1392; PWR7-NEXT:    addi r1, r1, 128
1393; PWR7-NEXT:    ld r0, 16(r1)
1394; PWR7-NEXT:    mtlr r0
1395; PWR7-NEXT:    blr
1396;
1397; LE-PWR8-LABEL: cas_acqrel_acquire:
1398; LE-PWR8:       # %bb.0: # %entry
1399; LE-PWR8-NEXT:    lwsync
1400; LE-PWR8-NEXT:  .LBB10_1: # %entry
1401; LE-PWR8-NEXT:    #
1402; LE-PWR8-NEXT:    lqarx r8, 0, r3
1403; LE-PWR8-NEXT:    xor r11, r9, r4
1404; LE-PWR8-NEXT:    xor r10, r8, r5
1405; LE-PWR8-NEXT:    or. r11, r11, r10
1406; LE-PWR8-NEXT:    bne cr0, .LBB10_3
1407; LE-PWR8-NEXT:  # %bb.2: # %entry
1408; LE-PWR8-NEXT:    #
1409; LE-PWR8-NEXT:    mr r11, r6
1410; LE-PWR8-NEXT:    mr r10, r7
1411; LE-PWR8-NEXT:    stqcx. r10, 0, r3
1412; LE-PWR8-NEXT:    bne cr0, .LBB10_1
1413; LE-PWR8-NEXT:  .LBB10_3: # %entry
1414; LE-PWR8-NEXT:    lwsync
1415; LE-PWR8-NEXT:    mr r3, r9
1416; LE-PWR8-NEXT:    mr r4, r8
1417; LE-PWR8-NEXT:    blr
1418;
1419; AIX64-PWR8-LABEL: cas_acqrel_acquire:
1420; AIX64-PWR8:       # %bb.0: # %entry
1421; AIX64-PWR8-NEXT:    lwsync
1422; AIX64-PWR8-NEXT:  L..BB10_1: # %entry
1423; AIX64-PWR8-NEXT:    #
1424; AIX64-PWR8-NEXT:    lqarx r8, 0, r3
1425; AIX64-PWR8-NEXT:    xor r11, r9, r5
1426; AIX64-PWR8-NEXT:    xor r10, r8, r4
1427; AIX64-PWR8-NEXT:    or. r11, r11, r10
1428; AIX64-PWR8-NEXT:    bne cr0, L..BB10_3
1429; AIX64-PWR8-NEXT:  # %bb.2: # %entry
1430; AIX64-PWR8-NEXT:    #
1431; AIX64-PWR8-NEXT:    mr r11, r7
1432; AIX64-PWR8-NEXT:    mr r10, r6
1433; AIX64-PWR8-NEXT:    stqcx. r10, 0, r3
1434; AIX64-PWR8-NEXT:    bne cr0, L..BB10_1
1435; AIX64-PWR8-NEXT:  L..BB10_3: # %entry
1436; AIX64-PWR8-NEXT:    lwsync
1437; AIX64-PWR8-NEXT:    mr r3, r8
1438; AIX64-PWR8-NEXT:    mr r4, r9
1439; AIX64-PWR8-NEXT:    blr
1440;
1441; PPC-PWR8-LABEL: cas_acqrel_acquire:
1442; PPC-PWR8:       # %bb.0: # %entry
1443; PPC-PWR8-NEXT:    mflr r0
1444; PPC-PWR8-NEXT:    stwu r1, -48(r1)
1445; PPC-PWR8-NEXT:    stw r0, 52(r1)
1446; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
1447; PPC-PWR8-NEXT:    .cfi_offset lr, 4
1448; PPC-PWR8-NEXT:    mr r4, r3
1449; PPC-PWR8-NEXT:    lwz r3, 60(r1)
1450; PPC-PWR8-NEXT:    stw r8, 44(r1)
1451; PPC-PWR8-NEXT:    stw r7, 40(r1)
1452; PPC-PWR8-NEXT:    stw r6, 36(r1)
1453; PPC-PWR8-NEXT:    stw r5, 32(r1)
1454; PPC-PWR8-NEXT:    addi r5, r1, 32
1455; PPC-PWR8-NEXT:    addi r6, r1, 16
1456; PPC-PWR8-NEXT:    li r7, 4
1457; PPC-PWR8-NEXT:    li r8, 2
1458; PPC-PWR8-NEXT:    stw r10, 20(r1)
1459; PPC-PWR8-NEXT:    stw r9, 16(r1)
1460; PPC-PWR8-NEXT:    stw r3, 28(r1)
1461; PPC-PWR8-NEXT:    lwz r3, 56(r1)
1462; PPC-PWR8-NEXT:    stw r3, 24(r1)
1463; PPC-PWR8-NEXT:    li r3, 16
1464; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
1465; PPC-PWR8-NEXT:    lwz r6, 44(r1)
1466; PPC-PWR8-NEXT:    lwz r5, 40(r1)
1467; PPC-PWR8-NEXT:    lwz r4, 36(r1)
1468; PPC-PWR8-NEXT:    lwz r3, 32(r1)
1469; PPC-PWR8-NEXT:    lwz r0, 52(r1)
1470; PPC-PWR8-NEXT:    addi r1, r1, 48
1471; PPC-PWR8-NEXT:    mtlr r0
1472; PPC-PWR8-NEXT:    blr
1473entry:
1474  %0 = cmpxchg ptr %a, i128 %cmp, i128 %new acq_rel acquire
1475  %1 = extractvalue { i128, i1 } %0, 0
1476  ret i128 %1
1477}
1478
1479define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
1480; CHECK-LABEL: cas_acqrel_acquire_check_succ:
1481; CHECK:       # %bb.0: # %entry
1482; CHECK-NEXT:    lwsync
1483; CHECK-NEXT:  .LBB11_1: # %entry
1484; CHECK-NEXT:    #
1485; CHECK-NEXT:    lqarx r8, 0, r3
1486; CHECK-NEXT:    xor r11, r9, r5
1487; CHECK-NEXT:    xor r10, r8, r4
1488; CHECK-NEXT:    or. r11, r11, r10
1489; CHECK-NEXT:    bne cr0, .LBB11_3
1490; CHECK-NEXT:  # %bb.2: # %entry
1491; CHECK-NEXT:    #
1492; CHECK-NEXT:    mr r11, r7
1493; CHECK-NEXT:    mr r10, r6
1494; CHECK-NEXT:    stqcx. r10, 0, r3
1495; CHECK-NEXT:    bne cr0, .LBB11_1
1496; CHECK-NEXT:  .LBB11_3: # %entry
1497; CHECK-NEXT:    lwsync
1498; CHECK-NEXT:    xor r3, r4, r8
1499; CHECK-NEXT:    xor r4, r5, r9
1500; CHECK-NEXT:    or r3, r4, r3
1501; CHECK-NEXT:    cntlzd r3, r3
1502; CHECK-NEXT:    rldicl r3, r3, 58, 63
1503; CHECK-NEXT:    blr
1504;
1505; PWR7-LABEL: cas_acqrel_acquire_check_succ:
1506; PWR7:       # %bb.0: # %entry
1507; PWR7-NEXT:    mflr r0
1508; PWR7-NEXT:    stdu r1, -128(r1)
1509; PWR7-NEXT:    std r0, 144(r1)
1510; PWR7-NEXT:    .cfi_def_cfa_offset 128
1511; PWR7-NEXT:    .cfi_offset lr, 16
1512; PWR7-NEXT:    std r5, 120(r1)
1513; PWR7-NEXT:    std r4, 112(r1)
1514; PWR7-NEXT:    addi r4, r1, 112
1515; PWR7-NEXT:    mr r5, r6
1516; PWR7-NEXT:    mr r6, r7
1517; PWR7-NEXT:    li r7, 4
1518; PWR7-NEXT:    li r8, 2
1519; PWR7-NEXT:    bl __atomic_compare_exchange_16
1520; PWR7-NEXT:    nop
1521; PWR7-NEXT:    addi r1, r1, 128
1522; PWR7-NEXT:    ld r0, 16(r1)
1523; PWR7-NEXT:    mtlr r0
1524; PWR7-NEXT:    blr
1525;
1526; LE-PWR8-LABEL: cas_acqrel_acquire_check_succ:
1527; LE-PWR8:       # %bb.0: # %entry
1528; LE-PWR8-NEXT:    lwsync
1529; LE-PWR8-NEXT:  .LBB11_1: # %entry
1530; LE-PWR8-NEXT:    #
1531; LE-PWR8-NEXT:    lqarx r8, 0, r3
1532; LE-PWR8-NEXT:    xor r11, r9, r4
1533; LE-PWR8-NEXT:    xor r10, r8, r5
1534; LE-PWR8-NEXT:    or. r11, r11, r10
1535; LE-PWR8-NEXT:    bne cr0, .LBB11_3
1536; LE-PWR8-NEXT:  # %bb.2: # %entry
1537; LE-PWR8-NEXT:    #
1538; LE-PWR8-NEXT:    mr r11, r6
1539; LE-PWR8-NEXT:    mr r10, r7
1540; LE-PWR8-NEXT:    stqcx. r10, 0, r3
1541; LE-PWR8-NEXT:    bne cr0, .LBB11_1
1542; LE-PWR8-NEXT:  .LBB11_3: # %entry
1543; LE-PWR8-NEXT:    lwsync
1544; LE-PWR8-NEXT:    xor r3, r5, r8
1545; LE-PWR8-NEXT:    xor r4, r4, r9
1546; LE-PWR8-NEXT:    or r3, r4, r3
1547; LE-PWR8-NEXT:    cntlzd r3, r3
1548; LE-PWR8-NEXT:    rldicl r3, r3, 58, 63
1549; LE-PWR8-NEXT:    blr
1550;
1551; AIX64-PWR8-LABEL: cas_acqrel_acquire_check_succ:
1552; AIX64-PWR8:       # %bb.0: # %entry
1553; AIX64-PWR8-NEXT:    lwsync
1554; AIX64-PWR8-NEXT:  L..BB11_1: # %entry
1555; AIX64-PWR8-NEXT:    #
1556; AIX64-PWR8-NEXT:    lqarx r8, 0, r3
1557; AIX64-PWR8-NEXT:    xor r11, r9, r5
1558; AIX64-PWR8-NEXT:    xor r10, r8, r4
1559; AIX64-PWR8-NEXT:    or. r11, r11, r10
1560; AIX64-PWR8-NEXT:    bne cr0, L..BB11_3
1561; AIX64-PWR8-NEXT:  # %bb.2: # %entry
1562; AIX64-PWR8-NEXT:    #
1563; AIX64-PWR8-NEXT:    mr r11, r7
1564; AIX64-PWR8-NEXT:    mr r10, r6
1565; AIX64-PWR8-NEXT:    stqcx. r10, 0, r3
1566; AIX64-PWR8-NEXT:    bne cr0, L..BB11_1
1567; AIX64-PWR8-NEXT:  L..BB11_3: # %entry
1568; AIX64-PWR8-NEXT:    lwsync
1569; AIX64-PWR8-NEXT:    xor r3, r4, r8
1570; AIX64-PWR8-NEXT:    xor r4, r5, r9
1571; AIX64-PWR8-NEXT:    or r3, r4, r3
1572; AIX64-PWR8-NEXT:    cntlzd r3, r3
1573; AIX64-PWR8-NEXT:    rldicl r3, r3, 58, 63
1574; AIX64-PWR8-NEXT:    blr
1575;
1576; PPC-PWR8-LABEL: cas_acqrel_acquire_check_succ:
1577; PPC-PWR8:       # %bb.0: # %entry
1578; PPC-PWR8-NEXT:    mflr r0
1579; PPC-PWR8-NEXT:    stwu r1, -48(r1)
1580; PPC-PWR8-NEXT:    stw r0, 52(r1)
1581; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
1582; PPC-PWR8-NEXT:    .cfi_offset lr, 4
1583; PPC-PWR8-NEXT:    mr r4, r3
1584; PPC-PWR8-NEXT:    lwz r3, 60(r1)
1585; PPC-PWR8-NEXT:    stw r8, 44(r1)
1586; PPC-PWR8-NEXT:    stw r7, 40(r1)
1587; PPC-PWR8-NEXT:    stw r6, 36(r1)
1588; PPC-PWR8-NEXT:    stw r5, 32(r1)
1589; PPC-PWR8-NEXT:    addi r5, r1, 32
1590; PPC-PWR8-NEXT:    addi r6, r1, 16
1591; PPC-PWR8-NEXT:    li r7, 4
1592; PPC-PWR8-NEXT:    li r8, 2
1593; PPC-PWR8-NEXT:    stw r10, 20(r1)
1594; PPC-PWR8-NEXT:    stw r9, 16(r1)
1595; PPC-PWR8-NEXT:    stw r3, 28(r1)
1596; PPC-PWR8-NEXT:    lwz r3, 56(r1)
1597; PPC-PWR8-NEXT:    stw r3, 24(r1)
1598; PPC-PWR8-NEXT:    li r3, 16
1599; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
1600; PPC-PWR8-NEXT:    lwz r0, 52(r1)
1601; PPC-PWR8-NEXT:    addi r1, r1, 48
1602; PPC-PWR8-NEXT:    mtlr r0
1603; PPC-PWR8-NEXT:    blr
1604entry:
1605  %0 = cmpxchg ptr %a, i128 %cmp, i128 %new acq_rel acquire
1606  %1 = extractvalue { i128, i1 } %0, 1
1607  ret i1 %1
1608}
1609
1610;; TODO: Optimize CAS at exit block when bool value is returned.
1611define i1 @bool_cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
1612; CHECK-LABEL: bool_cas_weak_acquire_acquire:
1613; CHECK:       # %bb.0: # %entry
1614; CHECK-NEXT:  .LBB12_1: # %entry
1615; CHECK-NEXT:    #
1616; CHECK-NEXT:    lqarx r8, 0, r3
1617; CHECK-NEXT:    xor r11, r9, r5
1618; CHECK-NEXT:    xor r10, r8, r4
1619; CHECK-NEXT:    or. r11, r11, r10
1620; CHECK-NEXT:    bne cr0, .LBB12_3
1621; CHECK-NEXT:  # %bb.2: # %entry
1622; CHECK-NEXT:    #
1623; CHECK-NEXT:    mr r11, r7
1624; CHECK-NEXT:    mr r10, r6
1625; CHECK-NEXT:    stqcx. r10, 0, r3
1626; CHECK-NEXT:    bne cr0, .LBB12_1
1627; CHECK-NEXT:  .LBB12_3: # %entry
1628; CHECK-NEXT:    lwsync
1629; CHECK-NEXT:    xor r3, r4, r8
1630; CHECK-NEXT:    xor r4, r5, r9
1631; CHECK-NEXT:    or r3, r4, r3
1632; CHECK-NEXT:    cntlzd r3, r3
1633; CHECK-NEXT:    rldicl r3, r3, 58, 63
1634; CHECK-NEXT:    blr
1635;
1636; PWR7-LABEL: bool_cas_weak_acquire_acquire:
1637; PWR7:       # %bb.0: # %entry
1638; PWR7-NEXT:    mflr r0
1639; PWR7-NEXT:    stdu r1, -128(r1)
1640; PWR7-NEXT:    std r0, 144(r1)
1641; PWR7-NEXT:    .cfi_def_cfa_offset 128
1642; PWR7-NEXT:    .cfi_offset lr, 16
1643; PWR7-NEXT:    std r5, 120(r1)
1644; PWR7-NEXT:    std r4, 112(r1)
1645; PWR7-NEXT:    addi r4, r1, 112
1646; PWR7-NEXT:    mr r5, r6
1647; PWR7-NEXT:    mr r6, r7
1648; PWR7-NEXT:    li r7, 2
1649; PWR7-NEXT:    li r8, 2
1650; PWR7-NEXT:    bl __atomic_compare_exchange_16
1651; PWR7-NEXT:    nop
1652; PWR7-NEXT:    addi r1, r1, 128
1653; PWR7-NEXT:    ld r0, 16(r1)
1654; PWR7-NEXT:    mtlr r0
1655; PWR7-NEXT:    blr
1656;
1657; LE-PWR8-LABEL: bool_cas_weak_acquire_acquire:
1658; LE-PWR8:       # %bb.0: # %entry
1659; LE-PWR8-NEXT:  .LBB12_1: # %entry
1660; LE-PWR8-NEXT:    #
1661; LE-PWR8-NEXT:    lqarx r8, 0, r3
1662; LE-PWR8-NEXT:    xor r11, r9, r4
1663; LE-PWR8-NEXT:    xor r10, r8, r5
1664; LE-PWR8-NEXT:    or. r11, r11, r10
1665; LE-PWR8-NEXT:    bne cr0, .LBB12_3
1666; LE-PWR8-NEXT:  # %bb.2: # %entry
1667; LE-PWR8-NEXT:    #
1668; LE-PWR8-NEXT:    mr r11, r6
1669; LE-PWR8-NEXT:    mr r10, r7
1670; LE-PWR8-NEXT:    stqcx. r10, 0, r3
1671; LE-PWR8-NEXT:    bne cr0, .LBB12_1
1672; LE-PWR8-NEXT:  .LBB12_3: # %entry
1673; LE-PWR8-NEXT:    lwsync
1674; LE-PWR8-NEXT:    xor r3, r5, r8
1675; LE-PWR8-NEXT:    xor r4, r4, r9
1676; LE-PWR8-NEXT:    or r3, r4, r3
1677; LE-PWR8-NEXT:    cntlzd r3, r3
1678; LE-PWR8-NEXT:    rldicl r3, r3, 58, 63
1679; LE-PWR8-NEXT:    blr
1680;
1681; AIX64-PWR8-LABEL: bool_cas_weak_acquire_acquire:
1682; AIX64-PWR8:       # %bb.0: # %entry
1683; AIX64-PWR8-NEXT:  L..BB12_1: # %entry
1684; AIX64-PWR8-NEXT:    #
1685; AIX64-PWR8-NEXT:    lqarx r8, 0, r3
1686; AIX64-PWR8-NEXT:    xor r11, r9, r5
1687; AIX64-PWR8-NEXT:    xor r10, r8, r4
1688; AIX64-PWR8-NEXT:    or. r11, r11, r10
1689; AIX64-PWR8-NEXT:    bne cr0, L..BB12_3
1690; AIX64-PWR8-NEXT:  # %bb.2: # %entry
1691; AIX64-PWR8-NEXT:    #
1692; AIX64-PWR8-NEXT:    mr r11, r7
1693; AIX64-PWR8-NEXT:    mr r10, r6
1694; AIX64-PWR8-NEXT:    stqcx. r10, 0, r3
1695; AIX64-PWR8-NEXT:    bne cr0, L..BB12_1
1696; AIX64-PWR8-NEXT:  L..BB12_3: # %entry
1697; AIX64-PWR8-NEXT:    lwsync
1698; AIX64-PWR8-NEXT:    xor r3, r4, r8
1699; AIX64-PWR8-NEXT:    xor r4, r5, r9
1700; AIX64-PWR8-NEXT:    or r3, r4, r3
1701; AIX64-PWR8-NEXT:    cntlzd r3, r3
1702; AIX64-PWR8-NEXT:    rldicl r3, r3, 58, 63
1703; AIX64-PWR8-NEXT:    blr
1704;
1705; PPC-PWR8-LABEL: bool_cas_weak_acquire_acquire:
1706; PPC-PWR8:       # %bb.0: # %entry
1707; PPC-PWR8-NEXT:    mflr r0
1708; PPC-PWR8-NEXT:    stwu r1, -48(r1)
1709; PPC-PWR8-NEXT:    stw r0, 52(r1)
1710; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
1711; PPC-PWR8-NEXT:    .cfi_offset lr, 4
1712; PPC-PWR8-NEXT:    mr r4, r3
1713; PPC-PWR8-NEXT:    lwz r3, 60(r1)
1714; PPC-PWR8-NEXT:    stw r8, 44(r1)
1715; PPC-PWR8-NEXT:    stw r7, 40(r1)
1716; PPC-PWR8-NEXT:    stw r6, 36(r1)
1717; PPC-PWR8-NEXT:    stw r5, 32(r1)
1718; PPC-PWR8-NEXT:    addi r5, r1, 32
1719; PPC-PWR8-NEXT:    addi r6, r1, 16
1720; PPC-PWR8-NEXT:    li r7, 2
1721; PPC-PWR8-NEXT:    li r8, 2
1722; PPC-PWR8-NEXT:    stw r10, 20(r1)
1723; PPC-PWR8-NEXT:    stw r9, 16(r1)
1724; PPC-PWR8-NEXT:    stw r3, 28(r1)
1725; PPC-PWR8-NEXT:    lwz r3, 56(r1)
1726; PPC-PWR8-NEXT:    stw r3, 24(r1)
1727; PPC-PWR8-NEXT:    li r3, 16
1728; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
1729; PPC-PWR8-NEXT:    lwz r0, 52(r1)
1730; PPC-PWR8-NEXT:    addi r1, r1, 48
1731; PPC-PWR8-NEXT:    mtlr r0
1732; PPC-PWR8-NEXT:    blr
1733entry:
1734  %0 = cmpxchg weak ptr %a, i128 %cmp, i128 %new acquire acquire
1735  %1 = extractvalue { i128, i1 } %0, 1
1736  ret i1 %1
1737}
1738