xref: /llvm-project/llvm/test/CodeGen/X86/atomic-idempotent.ll (revision e6bf48d11047e970cb24554a01b65b566d6b5d22)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs                           | FileCheck %s --check-prefix=X64
3; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs           -mattr=+sse2      | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SSE2
4; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=slm -mattr=-sse2      | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM
5; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=goldmont -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM
6; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=knl -mattr=-sse2      | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM
7; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=atom -mattr=-sse2     | FileCheck %s --check-prefixes=X86,X86-ATOM
8
9; On x86, an atomic rmw operation that does not modify the value in memory
10; (such as atomic add 0) can be replaced by an mfence followed by a mov.
11; This is explained (with the motivation for such an optimization) in
12; http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
13
14define i8 @add8(ptr %p) {
15; X64-LABEL: add8:
16; X64:       # %bb.0:
17; X64-NEXT:    mfence
18; X64-NEXT:    movzbl (%rdi), %eax
19; X64-NEXT:    retq
20;
21; X86-SSE2-LABEL: add8:
22; X86-SSE2:       # %bb.0:
23; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
24; X86-SSE2-NEXT:    mfence
25; X86-SSE2-NEXT:    movzbl (%eax), %eax
26; X86-SSE2-NEXT:    retl
27;
28; X86-SLM-LABEL: add8:
29; X86-SLM:       # %bb.0:
30; X86-SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
31; X86-SLM-NEXT:    xorl %eax, %eax
32; X86-SLM-NEXT:    lock xaddb %al, (%ecx)
33; X86-SLM-NEXT:    # kill: def $al killed $al killed $eax
34; X86-SLM-NEXT:    retl
35;
36; X86-ATOM-LABEL: add8:
37; X86-ATOM:       # %bb.0:
38; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
39; X86-ATOM-NEXT:    xorl %eax, %eax
40; X86-ATOM-NEXT:    lock xaddb %al, (%ecx)
41; X86-ATOM-NEXT:    # kill: def $al killed $al killed $eax
42; X86-ATOM-NEXT:    nop
43; X86-ATOM-NEXT:    nop
44; X86-ATOM-NEXT:    retl
45  %1 = atomicrmw add ptr %p, i8 0 monotonic
46  ret i8 %1
47}
48
49define i16 @or16(ptr %p) {
50; X64-LABEL: or16:
51; X64:       # %bb.0:
52; X64-NEXT:    mfence
53; X64-NEXT:    movzwl (%rdi), %eax
54; X64-NEXT:    retq
55;
56; X86-SSE2-LABEL: or16:
57; X86-SSE2:       # %bb.0:
58; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
59; X86-SSE2-NEXT:    mfence
60; X86-SSE2-NEXT:    movzwl (%eax), %eax
61; X86-SSE2-NEXT:    retl
62;
63; X86-SLM-LABEL: or16:
64; X86-SLM:       # %bb.0:
65; X86-SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
66; X86-SLM-NEXT:    movzwl (%ecx), %eax
67; X86-SLM-NEXT:    .p2align 4
68; X86-SLM-NEXT:  .LBB1_1: # %atomicrmw.start
69; X86-SLM-NEXT:    # =>This Inner Loop Header: Depth=1
70; X86-SLM-NEXT:    lock cmpxchgw %ax, (%ecx)
71; X86-SLM-NEXT:    jne .LBB1_1
72; X86-SLM-NEXT:  # %bb.2: # %atomicrmw.end
73; X86-SLM-NEXT:    retl
74;
75; X86-ATOM-LABEL: or16:
76; X86-ATOM:       # %bb.0:
77; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
78; X86-ATOM-NEXT:    movzwl (%ecx), %eax
79; X86-ATOM-NEXT:    .p2align 4
80; X86-ATOM-NEXT:  .LBB1_1: # %atomicrmw.start
81; X86-ATOM-NEXT:    # =>This Inner Loop Header: Depth=1
82; X86-ATOM-NEXT:    lock cmpxchgw %ax, (%ecx)
83; X86-ATOM-NEXT:    jne .LBB1_1
84; X86-ATOM-NEXT:  # %bb.2: # %atomicrmw.end
85; X86-ATOM-NEXT:    retl
86  %1 = atomicrmw or ptr %p, i16 0 acquire
87  ret i16 %1
88}
89
90define i32 @xor32(ptr %p) {
91; X64-LABEL: xor32:
92; X64:       # %bb.0:
93; X64-NEXT:    mfence
94; X64-NEXT:    movl (%rdi), %eax
95; X64-NEXT:    retq
96;
97; X86-SSE2-LABEL: xor32:
98; X86-SSE2:       # %bb.0:
99; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
100; X86-SSE2-NEXT:    mfence
101; X86-SSE2-NEXT:    movl (%eax), %eax
102; X86-SSE2-NEXT:    retl
103;
104; X86-SLM-LABEL: xor32:
105; X86-SLM:       # %bb.0:
106; X86-SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
107; X86-SLM-NEXT:    movl (%ecx), %eax
108; X86-SLM-NEXT:    .p2align 4
109; X86-SLM-NEXT:  .LBB2_1: # %atomicrmw.start
110; X86-SLM-NEXT:    # =>This Inner Loop Header: Depth=1
111; X86-SLM-NEXT:    lock cmpxchgl %eax, (%ecx)
112; X86-SLM-NEXT:    jne .LBB2_1
113; X86-SLM-NEXT:  # %bb.2: # %atomicrmw.end
114; X86-SLM-NEXT:    retl
115;
116; X86-ATOM-LABEL: xor32:
117; X86-ATOM:       # %bb.0:
118; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
119; X86-ATOM-NEXT:    movl (%ecx), %eax
120; X86-ATOM-NEXT:    .p2align 4
121; X86-ATOM-NEXT:  .LBB2_1: # %atomicrmw.start
122; X86-ATOM-NEXT:    # =>This Inner Loop Header: Depth=1
123; X86-ATOM-NEXT:    lock cmpxchgl %eax, (%ecx)
124; X86-ATOM-NEXT:    jne .LBB2_1
125; X86-ATOM-NEXT:  # %bb.2: # %atomicrmw.end
126; X86-ATOM-NEXT:    retl
127  %1 = atomicrmw xor ptr %p, i32 0 release
128  ret i32 %1
129}
130
131define i64 @sub64(ptr %p) {
132; X64-LABEL: sub64:
133; X64:       # %bb.0:
134; X64-NEXT:    mfence
135; X64-NEXT:    movq (%rdi), %rax
136; X64-NEXT:    retq
137;
138; X86-LABEL: sub64:
139; X86:       # %bb.0:
140; X86-NEXT:    pushl %ebx
141; X86-NEXT:    .cfi_def_cfa_offset 8
142; X86-NEXT:    pushl %esi
143; X86-NEXT:    .cfi_def_cfa_offset 12
144; X86-NEXT:    .cfi_offset %esi, -12
145; X86-NEXT:    .cfi_offset %ebx, -8
146; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
147; X86-NEXT:    movl (%esi), %eax
148; X86-NEXT:    movl 4(%esi), %edx
149; X86-NEXT:    .p2align 4
150; X86-NEXT:  .LBB3_1: # %atomicrmw.start
151; X86-NEXT:    # =>This Inner Loop Header: Depth=1
152; X86-NEXT:    movl %edx, %ecx
153; X86-NEXT:    movl %eax, %ebx
154; X86-NEXT:    lock cmpxchg8b (%esi)
155; X86-NEXT:    jne .LBB3_1
156; X86-NEXT:  # %bb.2: # %atomicrmw.end
157; X86-NEXT:    popl %esi
158; X86-NEXT:    .cfi_def_cfa_offset 8
159; X86-NEXT:    popl %ebx
160; X86-NEXT:    .cfi_def_cfa_offset 4
161; X86-NEXT:    retl
162  %1 = atomicrmw sub ptr %p, i64 0 seq_cst
163  ret i64 %1
164}
165
166define i128 @or128(ptr %p) {
167; X64-LABEL: or128:
168; X64:       # %bb.0:
169; X64-NEXT:    pushq %rax
170; X64-NEXT:    .cfi_def_cfa_offset 16
171; X64-NEXT:    xorl %esi, %esi
172; X64-NEXT:    xorl %edx, %edx
173; X64-NEXT:    xorl %ecx, %ecx
174; X64-NEXT:    callq __atomic_fetch_or_16@PLT
175; X64-NEXT:    popq %rcx
176; X64-NEXT:    .cfi_def_cfa_offset 8
177; X64-NEXT:    retq
178;
179; X86-GENERIC-LABEL: or128:
180; X86-GENERIC:       # %bb.0:
181; X86-GENERIC-NEXT:    pushl %ebp
182; X86-GENERIC-NEXT:    .cfi_def_cfa_offset 8
183; X86-GENERIC-NEXT:    .cfi_offset %ebp, -8
184; X86-GENERIC-NEXT:    movl %esp, %ebp
185; X86-GENERIC-NEXT:    .cfi_def_cfa_register %ebp
186; X86-GENERIC-NEXT:    pushl %ebx
187; X86-GENERIC-NEXT:    pushl %edi
188; X86-GENERIC-NEXT:    pushl %esi
189; X86-GENERIC-NEXT:    andl $-16, %esp
190; X86-GENERIC-NEXT:    subl $48, %esp
191; X86-GENERIC-NEXT:    .cfi_offset %esi, -20
192; X86-GENERIC-NEXT:    .cfi_offset %edi, -16
193; X86-GENERIC-NEXT:    .cfi_offset %ebx, -12
194; X86-GENERIC-NEXT:    movl 12(%ebp), %edi
195; X86-GENERIC-NEXT:    movl 12(%edi), %ecx
196; X86-GENERIC-NEXT:    movl 8(%edi), %edx
197; X86-GENERIC-NEXT:    movl (%edi), %ebx
198; X86-GENERIC-NEXT:    movl 4(%edi), %esi
199; X86-GENERIC-NEXT:    .p2align 4
200; X86-GENERIC-NEXT:  .LBB4_1: # %atomicrmw.start
201; X86-GENERIC-NEXT:    # =>This Inner Loop Header: Depth=1
202; X86-GENERIC-NEXT:    movl %ebx, (%esp)
203; X86-GENERIC-NEXT:    movl %esi, {{[0-9]+}}(%esp)
204; X86-GENERIC-NEXT:    movl %edx, {{[0-9]+}}(%esp)
205; X86-GENERIC-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
206; X86-GENERIC-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
207; X86-GENERIC-NEXT:    movl %edx, {{[0-9]+}}(%esp)
208; X86-GENERIC-NEXT:    movl %esi, {{[0-9]+}}(%esp)
209; X86-GENERIC-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
210; X86-GENERIC-NEXT:    pushl $0
211; X86-GENERIC-NEXT:    pushl $0
212; X86-GENERIC-NEXT:    leal {{[0-9]+}}(%esp), %eax
213; X86-GENERIC-NEXT:    pushl %eax
214; X86-GENERIC-NEXT:    leal {{[0-9]+}}(%esp), %eax
215; X86-GENERIC-NEXT:    pushl %eax
216; X86-GENERIC-NEXT:    pushl %edi
217; X86-GENERIC-NEXT:    pushl $16
218; X86-GENERIC-NEXT:    calll __atomic_compare_exchange@PLT
219; X86-GENERIC-NEXT:    addl $24, %esp
220; X86-GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %ecx
221; X86-GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %edx
222; X86-GENERIC-NEXT:    movl (%esp), %ebx
223; X86-GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %esi
224; X86-GENERIC-NEXT:    testb %al, %al
225; X86-GENERIC-NEXT:    je .LBB4_1
226; X86-GENERIC-NEXT:  # %bb.2: # %atomicrmw.end
227; X86-GENERIC-NEXT:    movl 8(%ebp), %eax
228; X86-GENERIC-NEXT:    movl %ebx, (%eax)
229; X86-GENERIC-NEXT:    movl %esi, 4(%eax)
230; X86-GENERIC-NEXT:    movl %edx, 8(%eax)
231; X86-GENERIC-NEXT:    movl %ecx, 12(%eax)
232; X86-GENERIC-NEXT:    leal -12(%ebp), %esp
233; X86-GENERIC-NEXT:    popl %esi
234; X86-GENERIC-NEXT:    popl %edi
235; X86-GENERIC-NEXT:    popl %ebx
236; X86-GENERIC-NEXT:    popl %ebp
237; X86-GENERIC-NEXT:    .cfi_def_cfa %esp, 4
238; X86-GENERIC-NEXT:    retl $4
239;
240; X86-ATOM-LABEL: or128:
241; X86-ATOM:       # %bb.0:
242; X86-ATOM-NEXT:    pushl %ebp
243; X86-ATOM-NEXT:    .cfi_def_cfa_offset 8
244; X86-ATOM-NEXT:    .cfi_offset %ebp, -8
245; X86-ATOM-NEXT:    movl %esp, %ebp
246; X86-ATOM-NEXT:    .cfi_def_cfa_register %ebp
247; X86-ATOM-NEXT:    pushl %ebx
248; X86-ATOM-NEXT:    pushl %edi
249; X86-ATOM-NEXT:    pushl %esi
250; X86-ATOM-NEXT:    andl $-16, %esp
251; X86-ATOM-NEXT:    leal -{{[0-9]+}}(%esp), %esp
252; X86-ATOM-NEXT:    .cfi_offset %esi, -20
253; X86-ATOM-NEXT:    .cfi_offset %edi, -16
254; X86-ATOM-NEXT:    .cfi_offset %ebx, -12
255; X86-ATOM-NEXT:    movl 12(%ebp), %edi
256; X86-ATOM-NEXT:    movl 12(%edi), %ecx
257; X86-ATOM-NEXT:    movl 8(%edi), %edx
258; X86-ATOM-NEXT:    movl (%edi), %esi
259; X86-ATOM-NEXT:    movl 4(%edi), %ebx
260; X86-ATOM-NEXT:    .p2align 4
261; X86-ATOM-NEXT:  .LBB4_1: # %atomicrmw.start
262; X86-ATOM-NEXT:    # =>This Inner Loop Header: Depth=1
263; X86-ATOM-NEXT:    movl %esi, (%esp)
264; X86-ATOM-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
265; X86-ATOM-NEXT:    movl %edx, {{[0-9]+}}(%esp)
266; X86-ATOM-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
267; X86-ATOM-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
268; X86-ATOM-NEXT:    movl %edx, {{[0-9]+}}(%esp)
269; X86-ATOM-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
270; X86-ATOM-NEXT:    movl %esi, {{[0-9]+}}(%esp)
271; X86-ATOM-NEXT:    pushl $0
272; X86-ATOM-NEXT:    pushl $0
273; X86-ATOM-NEXT:    leal {{[0-9]+}}(%esp), %eax
274; X86-ATOM-NEXT:    pushl %eax
275; X86-ATOM-NEXT:    leal {{[0-9]+}}(%esp), %eax
276; X86-ATOM-NEXT:    pushl %eax
277; X86-ATOM-NEXT:    pushl %edi
278; X86-ATOM-NEXT:    pushl $16
279; X86-ATOM-NEXT:    calll __atomic_compare_exchange@PLT
280; X86-ATOM-NEXT:    leal {{[0-9]+}}(%esp), %esp
281; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
282; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %edx
283; X86-ATOM-NEXT:    testb %al, %al
284; X86-ATOM-NEXT:    movl (%esp), %esi
285; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ebx
286; X86-ATOM-NEXT:    je .LBB4_1
287; X86-ATOM-NEXT:  # %bb.2: # %atomicrmw.end
288; X86-ATOM-NEXT:    movl 8(%ebp), %eax
289; X86-ATOM-NEXT:    movl %esi, (%eax)
290; X86-ATOM-NEXT:    movl %ebx, 4(%eax)
291; X86-ATOM-NEXT:    movl %edx, 8(%eax)
292; X86-ATOM-NEXT:    movl %ecx, 12(%eax)
293; X86-ATOM-NEXT:    leal -12(%ebp), %esp
294; X86-ATOM-NEXT:    popl %esi
295; X86-ATOM-NEXT:    popl %edi
296; X86-ATOM-NEXT:    popl %ebx
297; X86-ATOM-NEXT:    popl %ebp
298; X86-ATOM-NEXT:    .cfi_def_cfa %esp, 4
299; X86-ATOM-NEXT:    retl $4
300  %1 = atomicrmw or ptr %p, i128 0 monotonic
301  ret i128 %1
302}
303
304; For 'and', the idempotent value is (-1)
305define i32 @and32 (ptr %p) {
306; X64-LABEL: and32:
307; X64:       # %bb.0:
308; X64-NEXT:    mfence
309; X64-NEXT:    movl (%rdi), %eax
310; X64-NEXT:    retq
311;
312; X86-SSE2-LABEL: and32:
313; X86-SSE2:       # %bb.0:
314; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
315; X86-SSE2-NEXT:    mfence
316; X86-SSE2-NEXT:    movl (%eax), %eax
317; X86-SSE2-NEXT:    retl
318;
319; X86-SLM-LABEL: and32:
320; X86-SLM:       # %bb.0:
321; X86-SLM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
322; X86-SLM-NEXT:    movl (%ecx), %eax
323; X86-SLM-NEXT:    .p2align 4
324; X86-SLM-NEXT:  .LBB5_1: # %atomicrmw.start
325; X86-SLM-NEXT:    # =>This Inner Loop Header: Depth=1
326; X86-SLM-NEXT:    lock cmpxchgl %eax, (%ecx)
327; X86-SLM-NEXT:    jne .LBB5_1
328; X86-SLM-NEXT:  # %bb.2: # %atomicrmw.end
329; X86-SLM-NEXT:    retl
330;
331; X86-ATOM-LABEL: and32:
332; X86-ATOM:       # %bb.0:
333; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
334; X86-ATOM-NEXT:    movl (%ecx), %eax
335; X86-ATOM-NEXT:    .p2align 4
336; X86-ATOM-NEXT:  .LBB5_1: # %atomicrmw.start
337; X86-ATOM-NEXT:    # =>This Inner Loop Header: Depth=1
338; X86-ATOM-NEXT:    lock cmpxchgl %eax, (%ecx)
339; X86-ATOM-NEXT:    jne .LBB5_1
340; X86-ATOM-NEXT:  # %bb.2: # %atomicrmw.end
341; X86-ATOM-NEXT:    retl
342  %1 = atomicrmw and ptr %p, i32 -1 acq_rel
343  ret i32 %1
344}
345
346define void @or32_nouse_monotonic(ptr %p) {
347; X64-LABEL: or32_nouse_monotonic:
348; X64:       # %bb.0:
349; X64-NEXT:    #MEMBARRIER
350; X64-NEXT:    retq
351;
352; X86-GENERIC-LABEL: or32_nouse_monotonic:
353; X86-GENERIC:       # %bb.0:
354; X86-GENERIC-NEXT:    #MEMBARRIER
355; X86-GENERIC-NEXT:    retl
356;
357; X86-ATOM-LABEL: or32_nouse_monotonic:
358; X86-ATOM:       # %bb.0:
359; X86-ATOM-NEXT:    #MEMBARRIER
360; X86-ATOM-NEXT:    nop
361; X86-ATOM-NEXT:    nop
362; X86-ATOM-NEXT:    nop
363; X86-ATOM-NEXT:    nop
364; X86-ATOM-NEXT:    nop
365; X86-ATOM-NEXT:    nop
366; X86-ATOM-NEXT:    nop
367; X86-ATOM-NEXT:    nop
368; X86-ATOM-NEXT:    retl
369  atomicrmw or ptr %p, i32 0 monotonic
370  ret void
371}
372
373
374define void @or32_nouse_acquire(ptr %p) {
375; X64-LABEL: or32_nouse_acquire:
376; X64:       # %bb.0:
377; X64-NEXT:    #MEMBARRIER
378; X64-NEXT:    retq
379;
380; X86-GENERIC-LABEL: or32_nouse_acquire:
381; X86-GENERIC:       # %bb.0:
382; X86-GENERIC-NEXT:    #MEMBARRIER
383; X86-GENERIC-NEXT:    retl
384;
385; X86-ATOM-LABEL: or32_nouse_acquire:
386; X86-ATOM:       # %bb.0:
387; X86-ATOM-NEXT:    #MEMBARRIER
388; X86-ATOM-NEXT:    nop
389; X86-ATOM-NEXT:    nop
390; X86-ATOM-NEXT:    nop
391; X86-ATOM-NEXT:    nop
392; X86-ATOM-NEXT:    nop
393; X86-ATOM-NEXT:    nop
394; X86-ATOM-NEXT:    nop
395; X86-ATOM-NEXT:    nop
396; X86-ATOM-NEXT:    retl
397  atomicrmw or ptr %p, i32 0 acquire
398  ret void
399}
400
401define void @or32_nouse_release(ptr %p) {
402; X64-LABEL: or32_nouse_release:
403; X64:       # %bb.0:
404; X64-NEXT:    #MEMBARRIER
405; X64-NEXT:    retq
406;
407; X86-GENERIC-LABEL: or32_nouse_release:
408; X86-GENERIC:       # %bb.0:
409; X86-GENERIC-NEXT:    #MEMBARRIER
410; X86-GENERIC-NEXT:    retl
411;
412; X86-ATOM-LABEL: or32_nouse_release:
413; X86-ATOM:       # %bb.0:
414; X86-ATOM-NEXT:    #MEMBARRIER
415; X86-ATOM-NEXT:    nop
416; X86-ATOM-NEXT:    nop
417; X86-ATOM-NEXT:    nop
418; X86-ATOM-NEXT:    nop
419; X86-ATOM-NEXT:    nop
420; X86-ATOM-NEXT:    nop
421; X86-ATOM-NEXT:    nop
422; X86-ATOM-NEXT:    nop
423; X86-ATOM-NEXT:    retl
424  atomicrmw or ptr %p, i32 0 release
425  ret void
426}
427
428define void @or32_nouse_acq_rel(ptr %p) {
429; X64-LABEL: or32_nouse_acq_rel:
430; X64:       # %bb.0:
431; X64-NEXT:    #MEMBARRIER
432; X64-NEXT:    retq
433;
434; X86-GENERIC-LABEL: or32_nouse_acq_rel:
435; X86-GENERIC:       # %bb.0:
436; X86-GENERIC-NEXT:    #MEMBARRIER
437; X86-GENERIC-NEXT:    retl
438;
439; X86-ATOM-LABEL: or32_nouse_acq_rel:
440; X86-ATOM:       # %bb.0:
441; X86-ATOM-NEXT:    #MEMBARRIER
442; X86-ATOM-NEXT:    nop
443; X86-ATOM-NEXT:    nop
444; X86-ATOM-NEXT:    nop
445; X86-ATOM-NEXT:    nop
446; X86-ATOM-NEXT:    nop
447; X86-ATOM-NEXT:    nop
448; X86-ATOM-NEXT:    nop
449; X86-ATOM-NEXT:    nop
450; X86-ATOM-NEXT:    retl
451  atomicrmw or ptr %p, i32 0 acq_rel
452  ret void
453}
454
455define void @or32_nouse_seq_cst(ptr %p) {
456; X64-LABEL: or32_nouse_seq_cst:
457; X64:       # %bb.0:
458; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
459; X64-NEXT:    retq
460;
461; X86-GENERIC-LABEL: or32_nouse_seq_cst:
462; X86-GENERIC:       # %bb.0:
463; X86-GENERIC-NEXT:    lock orl $0, (%esp)
464; X86-GENERIC-NEXT:    retl
465;
466; X86-ATOM-LABEL: or32_nouse_seq_cst:
467; X86-ATOM:       # %bb.0:
468; X86-ATOM-NEXT:    lock orl $0, (%esp)
469; X86-ATOM-NEXT:    nop
470; X86-ATOM-NEXT:    nop
471; X86-ATOM-NEXT:    nop
472; X86-ATOM-NEXT:    nop
473; X86-ATOM-NEXT:    nop
474; X86-ATOM-NEXT:    nop
475; X86-ATOM-NEXT:    retl
476  atomicrmw or ptr %p, i32 0 seq_cst
477  ret void
478}
479
480; TODO: The value isn't used on 32 bit, so the cmpxchg8b is unneeded
481define void @or64_nouse_seq_cst(ptr %p) {
482; X64-LABEL: or64_nouse_seq_cst:
483; X64:       # %bb.0:
484; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
485; X64-NEXT:    retq
486;
487; X86-LABEL: or64_nouse_seq_cst:
488; X86:       # %bb.0:
489; X86-NEXT:    pushl %ebx
490; X86-NEXT:    .cfi_def_cfa_offset 8
491; X86-NEXT:    pushl %esi
492; X86-NEXT:    .cfi_def_cfa_offset 12
493; X86-NEXT:    .cfi_offset %esi, -12
494; X86-NEXT:    .cfi_offset %ebx, -8
495; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
496; X86-NEXT:    movl (%esi), %eax
497; X86-NEXT:    movl 4(%esi), %edx
498; X86-NEXT:    .p2align 4
499; X86-NEXT:  .LBB11_1: # %atomicrmw.start
500; X86-NEXT:    # =>This Inner Loop Header: Depth=1
501; X86-NEXT:    movl %edx, %ecx
502; X86-NEXT:    movl %eax, %ebx
503; X86-NEXT:    lock cmpxchg8b (%esi)
504; X86-NEXT:    jne .LBB11_1
505; X86-NEXT:  # %bb.2: # %atomicrmw.end
506; X86-NEXT:    popl %esi
507; X86-NEXT:    .cfi_def_cfa_offset 8
508; X86-NEXT:    popl %ebx
509; X86-NEXT:    .cfi_def_cfa_offset 4
510; X86-NEXT:    retl
511  atomicrmw or ptr %p, i64 0 seq_cst
512  ret void
513}
514
515; TODO: Don't need to lower as sync_and_fetch call
516define void @or128_nouse_seq_cst(ptr %p) {
517; X64-LABEL: or128_nouse_seq_cst:
518; X64:       # %bb.0:
519; X64-NEXT:    pushq %rax
520; X64-NEXT:    .cfi_def_cfa_offset 16
521; X64-NEXT:    xorl %esi, %esi
522; X64-NEXT:    xorl %edx, %edx
523; X64-NEXT:    movl $5, %ecx
524; X64-NEXT:    callq __atomic_fetch_or_16@PLT
525; X64-NEXT:    popq %rax
526; X64-NEXT:    .cfi_def_cfa_offset 8
527; X64-NEXT:    retq
528;
529; X86-GENERIC-LABEL: or128_nouse_seq_cst:
530; X86-GENERIC:       # %bb.0:
531; X86-GENERIC-NEXT:    pushl %ebp
532; X86-GENERIC-NEXT:    .cfi_def_cfa_offset 8
533; X86-GENERIC-NEXT:    .cfi_offset %ebp, -8
534; X86-GENERIC-NEXT:    movl %esp, %ebp
535; X86-GENERIC-NEXT:    .cfi_def_cfa_register %ebp
536; X86-GENERIC-NEXT:    pushl %ebx
537; X86-GENERIC-NEXT:    pushl %edi
538; X86-GENERIC-NEXT:    pushl %esi
539; X86-GENERIC-NEXT:    andl $-16, %esp
540; X86-GENERIC-NEXT:    subl $48, %esp
541; X86-GENERIC-NEXT:    .cfi_offset %esi, -20
542; X86-GENERIC-NEXT:    .cfi_offset %edi, -16
543; X86-GENERIC-NEXT:    .cfi_offset %ebx, -12
544; X86-GENERIC-NEXT:    movl 8(%ebp), %esi
545; X86-GENERIC-NEXT:    movl 12(%esi), %ecx
546; X86-GENERIC-NEXT:    movl 8(%esi), %edi
547; X86-GENERIC-NEXT:    movl (%esi), %edx
548; X86-GENERIC-NEXT:    movl 4(%esi), %ebx
549; X86-GENERIC-NEXT:    .p2align 4
550; X86-GENERIC-NEXT:  .LBB12_1: # %atomicrmw.start
551; X86-GENERIC-NEXT:    # =>This Inner Loop Header: Depth=1
552; X86-GENERIC-NEXT:    movl %edx, (%esp)
553; X86-GENERIC-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
554; X86-GENERIC-NEXT:    movl %edi, {{[0-9]+}}(%esp)
555; X86-GENERIC-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
556; X86-GENERIC-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
557; X86-GENERIC-NEXT:    movl %edi, {{[0-9]+}}(%esp)
558; X86-GENERIC-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
559; X86-GENERIC-NEXT:    movl %edx, {{[0-9]+}}(%esp)
560; X86-GENERIC-NEXT:    pushl $5
561; X86-GENERIC-NEXT:    pushl $5
562; X86-GENERIC-NEXT:    leal {{[0-9]+}}(%esp), %eax
563; X86-GENERIC-NEXT:    pushl %eax
564; X86-GENERIC-NEXT:    leal {{[0-9]+}}(%esp), %eax
565; X86-GENERIC-NEXT:    pushl %eax
566; X86-GENERIC-NEXT:    pushl %esi
567; X86-GENERIC-NEXT:    pushl $16
568; X86-GENERIC-NEXT:    calll __atomic_compare_exchange@PLT
569; X86-GENERIC-NEXT:    addl $24, %esp
570; X86-GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %ecx
571; X86-GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %edi
572; X86-GENERIC-NEXT:    movl (%esp), %edx
573; X86-GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %ebx
574; X86-GENERIC-NEXT:    testb %al, %al
575; X86-GENERIC-NEXT:    je .LBB12_1
576; X86-GENERIC-NEXT:  # %bb.2: # %atomicrmw.end
577; X86-GENERIC-NEXT:    leal -12(%ebp), %esp
578; X86-GENERIC-NEXT:    popl %esi
579; X86-GENERIC-NEXT:    popl %edi
580; X86-GENERIC-NEXT:    popl %ebx
581; X86-GENERIC-NEXT:    popl %ebp
582; X86-GENERIC-NEXT:    .cfi_def_cfa %esp, 4
583; X86-GENERIC-NEXT:    retl
584;
585; X86-ATOM-LABEL: or128_nouse_seq_cst:
586; X86-ATOM:       # %bb.0:
587; X86-ATOM-NEXT:    pushl %ebp
588; X86-ATOM-NEXT:    .cfi_def_cfa_offset 8
589; X86-ATOM-NEXT:    .cfi_offset %ebp, -8
590; X86-ATOM-NEXT:    movl %esp, %ebp
591; X86-ATOM-NEXT:    .cfi_def_cfa_register %ebp
592; X86-ATOM-NEXT:    pushl %ebx
593; X86-ATOM-NEXT:    pushl %edi
594; X86-ATOM-NEXT:    pushl %esi
595; X86-ATOM-NEXT:    andl $-16, %esp
596; X86-ATOM-NEXT:    leal -{{[0-9]+}}(%esp), %esp
597; X86-ATOM-NEXT:    .cfi_offset %esi, -20
598; X86-ATOM-NEXT:    .cfi_offset %edi, -16
599; X86-ATOM-NEXT:    .cfi_offset %ebx, -12
600; X86-ATOM-NEXT:    movl 8(%ebp), %esi
601; X86-ATOM-NEXT:    movl %esp, %ebx
602; X86-ATOM-NEXT:    movl 12(%esi), %ecx
603; X86-ATOM-NEXT:    movl 8(%esi), %edx
604; X86-ATOM-NEXT:    movl (%esi), %eax
605; X86-ATOM-NEXT:    movl 4(%esi), %edi
606; X86-ATOM-NEXT:    .p2align 4
607; X86-ATOM-NEXT:  .LBB12_1: # %atomicrmw.start
608; X86-ATOM-NEXT:    # =>This Inner Loop Header: Depth=1
609; X86-ATOM-NEXT:    movl %eax, (%esp)
610; X86-ATOM-NEXT:    movl %edi, {{[0-9]+}}(%esp)
611; X86-ATOM-NEXT:    movl %edx, {{[0-9]+}}(%esp)
612; X86-ATOM-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
613; X86-ATOM-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
614; X86-ATOM-NEXT:    movl %edx, {{[0-9]+}}(%esp)
615; X86-ATOM-NEXT:    movl %edi, {{[0-9]+}}(%esp)
616; X86-ATOM-NEXT:    movl %eax, {{[0-9]+}}(%esp)
617; X86-ATOM-NEXT:    pushl $5
618; X86-ATOM-NEXT:    pushl $5
619; X86-ATOM-NEXT:    leal {{[0-9]+}}(%esp), %eax
620; X86-ATOM-NEXT:    pushl %eax
621; X86-ATOM-NEXT:    pushl %ebx
622; X86-ATOM-NEXT:    pushl %esi
623; X86-ATOM-NEXT:    pushl $16
624; X86-ATOM-NEXT:    calll __atomic_compare_exchange@PLT
625; X86-ATOM-NEXT:    leal {{[0-9]+}}(%esp), %esp
626; X86-ATOM-NEXT:    testb %al, %al
627; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
628; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %edx
629; X86-ATOM-NEXT:    movl (%esp), %eax
630; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %edi
631; X86-ATOM-NEXT:    je .LBB12_1
632; X86-ATOM-NEXT:  # %bb.2: # %atomicrmw.end
633; X86-ATOM-NEXT:    leal -12(%ebp), %esp
634; X86-ATOM-NEXT:    popl %esi
635; X86-ATOM-NEXT:    popl %edi
636; X86-ATOM-NEXT:    popl %ebx
637; X86-ATOM-NEXT:    popl %ebp
638; X86-ATOM-NEXT:    .cfi_def_cfa %esp, 4
639; X86-ATOM-NEXT:    retl
640  atomicrmw or ptr %p, i128 0 seq_cst
641  ret void
642}
643
644
645define void @or16_nouse_seq_cst(ptr %p) {
646; X64-LABEL: or16_nouse_seq_cst:
647; X64:       # %bb.0:
648; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
649; X64-NEXT:    retq
650;
651; X86-GENERIC-LABEL: or16_nouse_seq_cst:
652; X86-GENERIC:       # %bb.0:
653; X86-GENERIC-NEXT:    lock orl $0, (%esp)
654; X86-GENERIC-NEXT:    retl
655;
656; X86-ATOM-LABEL: or16_nouse_seq_cst:
657; X86-ATOM:       # %bb.0:
658; X86-ATOM-NEXT:    lock orl $0, (%esp)
659; X86-ATOM-NEXT:    nop
660; X86-ATOM-NEXT:    nop
661; X86-ATOM-NEXT:    nop
662; X86-ATOM-NEXT:    nop
663; X86-ATOM-NEXT:    nop
664; X86-ATOM-NEXT:    nop
665; X86-ATOM-NEXT:    retl
666  atomicrmw or ptr %p, i16 0 seq_cst
667  ret void
668}
669
670define void @or8_nouse_seq_cst(ptr %p) {
671; X64-LABEL: or8_nouse_seq_cst:
672; X64:       # %bb.0:
673; X64-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
674; X64-NEXT:    retq
675;
676; X86-GENERIC-LABEL: or8_nouse_seq_cst:
677; X86-GENERIC:       # %bb.0:
678; X86-GENERIC-NEXT:    lock orl $0, (%esp)
679; X86-GENERIC-NEXT:    retl
680;
681; X86-ATOM-LABEL: or8_nouse_seq_cst:
682; X86-ATOM:       # %bb.0:
683; X86-ATOM-NEXT:    lock orl $0, (%esp)
684; X86-ATOM-NEXT:    nop
685; X86-ATOM-NEXT:    nop
686; X86-ATOM-NEXT:    nop
687; X86-ATOM-NEXT:    nop
688; X86-ATOM-NEXT:    nop
689; X86-ATOM-NEXT:    nop
690; X86-ATOM-NEXT:    retl
691  atomicrmw or ptr %p, i8 0 seq_cst
692  ret void
693}
694