xref: /llvm-project/llvm/test/CodeGen/X86/atomic-non-integer.ll (revision e6bf48d11047e970cb24554a01b65b566d6b5d22)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=sse | FileCheck %s --check-prefixes=X86,X86-SSE1
3; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
4; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=avx | FileCheck %s --check-prefixes=X86,X86-AVX
5; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=avx512f | FileCheck %s --check-prefixes=X86,X86-AVX
6; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs | FileCheck %s --check-prefixes=X86,X86-NOSSE
7; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s --check-prefixes=X64-SSE
8; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx | FileCheck %s --check-prefixes=X64-AVX
9; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx512f | FileCheck %s --check-prefixes=X64-AVX
10
11; Note: This test is testing that the lowering for atomics matches what we
12; currently emit for non-atomics + the atomic restriction.  The presence of
13; particular lowering detail in these tests should not be read as requiring
14; that detail for correctness unless it's related to the atomicity itself.
15; (Specifically, there were reviewer questions about the lowering for halfs
16;  and their calling convention which remain unresolved.)
17
18define void @store_half(ptr %fptr, half %v) {
19; X86-SSE1-LABEL: store_half:
20; X86-SSE1:       # %bb.0:
21; X86-SSE1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
22; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
23; X86-SSE1-NEXT:    movw %ax, (%ecx)
24; X86-SSE1-NEXT:    retl
25;
26; X86-SSE2-LABEL: store_half:
27; X86-SSE2:       # %bb.0:
28; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
29; X86-SSE2-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
30; X86-SSE2-NEXT:    movw %cx, (%eax)
31; X86-SSE2-NEXT:    retl
32;
33; X86-AVX-LABEL: store_half:
34; X86-AVX:       # %bb.0:
35; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
36; X86-AVX-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
37; X86-AVX-NEXT:    movw %cx, (%eax)
38; X86-AVX-NEXT:    retl
39;
40; X86-NOSSE-LABEL: store_half:
41; X86-NOSSE:       # %bb.0:
42; X86-NOSSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
43; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
44; X86-NOSSE-NEXT:    movw %ax, (%ecx)
45; X86-NOSSE-NEXT:    retl
46;
47; X64-SSE-LABEL: store_half:
48; X64-SSE:       # %bb.0:
49; X64-SSE-NEXT:    pextrw $0, %xmm0, %eax
50; X64-SSE-NEXT:    movw %ax, (%rdi)
51; X64-SSE-NEXT:    retq
52;
53; X64-AVX-LABEL: store_half:
54; X64-AVX:       # %bb.0:
55; X64-AVX-NEXT:    vpextrw $0, %xmm0, %eax
56; X64-AVX-NEXT:    movw %ax, (%rdi)
57; X64-AVX-NEXT:    retq
58  store atomic half %v, ptr %fptr unordered, align 2
59  ret void
60}
61
62define void @store_float(ptr %fptr, float %v) {
63; X86-LABEL: store_float:
64; X86:       # %bb.0:
65; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
66; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
67; X86-NEXT:    movl %ecx, (%eax)
68; X86-NEXT:    retl
69;
70; X64-SSE-LABEL: store_float:
71; X64-SSE:       # %bb.0:
72; X64-SSE-NEXT:    movss %xmm0, (%rdi)
73; X64-SSE-NEXT:    retq
74;
75; X64-AVX-LABEL: store_float:
76; X64-AVX:       # %bb.0:
77; X64-AVX-NEXT:    vmovss %xmm0, (%rdi)
78; X64-AVX-NEXT:    retq
79  store atomic float %v, ptr %fptr unordered, align 4
80  ret void
81}
82
83define void @store_double(ptr %fptr, double %v) {
84; X86-SSE1-LABEL: store_double:
85; X86-SSE1:       # %bb.0:
86; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
87; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
88; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
89; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
90; X86-SSE1-NEXT:    retl
91;
92; X86-SSE2-LABEL: store_double:
93; X86-SSE2:       # %bb.0:
94; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
95; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
96; X86-SSE2-NEXT:    movlps %xmm0, (%eax)
97; X86-SSE2-NEXT:    retl
98;
99; X86-AVX-LABEL: store_double:
100; X86-AVX:       # %bb.0:
101; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
102; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
103; X86-AVX-NEXT:    vmovlps %xmm0, (%eax)
104; X86-AVX-NEXT:    retl
105;
106; X86-NOSSE-LABEL: store_double:
107; X86-NOSSE:       # %bb.0:
108; X86-NOSSE-NEXT:    subl $12, %esp
109; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 16
110; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
111; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
112; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
113; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
114; X86-NOSSE-NEXT:    movl %ecx, (%esp)
115; X86-NOSSE-NEXT:    fildll (%esp)
116; X86-NOSSE-NEXT:    fistpll (%eax)
117; X86-NOSSE-NEXT:    addl $12, %esp
118; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
119; X86-NOSSE-NEXT:    retl
120;
121; X64-SSE-LABEL: store_double:
122; X64-SSE:       # %bb.0:
123; X64-SSE-NEXT:    movsd %xmm0, (%rdi)
124; X64-SSE-NEXT:    retq
125;
126; X64-AVX-LABEL: store_double:
127; X64-AVX:       # %bb.0:
128; X64-AVX-NEXT:    vmovsd %xmm0, (%rdi)
129; X64-AVX-NEXT:    retq
130  store atomic double %v, ptr %fptr unordered, align 8
131  ret void
132}
133
134define half @load_half(ptr %fptr) {
135; X86-SSE1-LABEL: load_half:
136; X86-SSE1:       # %bb.0:
137; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
138; X86-SSE1-NEXT:    movzwl (%eax), %eax
139; X86-SSE1-NEXT:    retl
140;
141; X86-SSE2-LABEL: load_half:
142; X86-SSE2:       # %bb.0:
143; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
144; X86-SSE2-NEXT:    movzwl (%eax), %eax
145; X86-SSE2-NEXT:    pinsrw $0, %eax, %xmm0
146; X86-SSE2-NEXT:    retl
147;
148; X86-AVX-LABEL: load_half:
149; X86-AVX:       # %bb.0:
150; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
151; X86-AVX-NEXT:    movzwl (%eax), %eax
152; X86-AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
153; X86-AVX-NEXT:    retl
154;
155; X86-NOSSE-LABEL: load_half:
156; X86-NOSSE:       # %bb.0:
157; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
158; X86-NOSSE-NEXT:    movzwl (%eax), %eax
159; X86-NOSSE-NEXT:    retl
160;
161; X64-SSE-LABEL: load_half:
162; X64-SSE:       # %bb.0:
163; X64-SSE-NEXT:    movzwl (%rdi), %eax
164; X64-SSE-NEXT:    pinsrw $0, %eax, %xmm0
165; X64-SSE-NEXT:    retq
166;
167; X64-AVX-LABEL: load_half:
168; X64-AVX:       # %bb.0:
169; X64-AVX-NEXT:    movzwl (%rdi), %eax
170; X64-AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
171; X64-AVX-NEXT:    retq
172  %v = load atomic half, ptr %fptr unordered, align 2
173  ret half %v
174}
175
176define float @load_float(ptr %fptr) {
177; X86-SSE1-LABEL: load_float:
178; X86-SSE1:       # %bb.0:
179; X86-SSE1-NEXT:    pushl %eax
180; X86-SSE1-NEXT:    .cfi_def_cfa_offset 8
181; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
182; X86-SSE1-NEXT:    movl (%eax), %eax
183; X86-SSE1-NEXT:    movl %eax, (%esp)
184; X86-SSE1-NEXT:    flds (%esp)
185; X86-SSE1-NEXT:    popl %eax
186; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
187; X86-SSE1-NEXT:    retl
188;
189; X86-SSE2-LABEL: load_float:
190; X86-SSE2:       # %bb.0:
191; X86-SSE2-NEXT:    pushl %eax
192; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
193; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
194; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
195; X86-SSE2-NEXT:    movss %xmm0, (%esp)
196; X86-SSE2-NEXT:    flds (%esp)
197; X86-SSE2-NEXT:    popl %eax
198; X86-SSE2-NEXT:    .cfi_def_cfa_offset 4
199; X86-SSE2-NEXT:    retl
200;
201; X86-AVX-LABEL: load_float:
202; X86-AVX:       # %bb.0:
203; X86-AVX-NEXT:    pushl %eax
204; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
205; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
206; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
207; X86-AVX-NEXT:    vmovss %xmm0, (%esp)
208; X86-AVX-NEXT:    flds (%esp)
209; X86-AVX-NEXT:    popl %eax
210; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
211; X86-AVX-NEXT:    retl
212;
213; X86-NOSSE-LABEL: load_float:
214; X86-NOSSE:       # %bb.0:
215; X86-NOSSE-NEXT:    pushl %eax
216; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
217; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
218; X86-NOSSE-NEXT:    movl (%eax), %eax
219; X86-NOSSE-NEXT:    movl %eax, (%esp)
220; X86-NOSSE-NEXT:    flds (%esp)
221; X86-NOSSE-NEXT:    popl %eax
222; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
223; X86-NOSSE-NEXT:    retl
224;
225; X64-SSE-LABEL: load_float:
226; X64-SSE:       # %bb.0:
227; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
228; X64-SSE-NEXT:    retq
229;
230; X64-AVX-LABEL: load_float:
231; X64-AVX:       # %bb.0:
232; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
233; X64-AVX-NEXT:    retq
234  %v = load atomic float, ptr %fptr unordered, align 4
235  ret float %v
236}
237
238define double @load_double(ptr %fptr) {
239; X86-SSE1-LABEL: load_double:
240; X86-SSE1:       # %bb.0:
241; X86-SSE1-NEXT:    subl $12, %esp
242; X86-SSE1-NEXT:    .cfi_def_cfa_offset 16
243; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
244; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
245; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
246; X86-SSE1-NEXT:    movss %xmm0, (%esp)
247; X86-SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
248; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
249; X86-SSE1-NEXT:    fldl (%esp)
250; X86-SSE1-NEXT:    addl $12, %esp
251; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
252; X86-SSE1-NEXT:    retl
253;
254; X86-SSE2-LABEL: load_double:
255; X86-SSE2:       # %bb.0:
256; X86-SSE2-NEXT:    subl $12, %esp
257; X86-SSE2-NEXT:    .cfi_def_cfa_offset 16
258; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
259; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
260; X86-SSE2-NEXT:    movlps %xmm0, (%esp)
261; X86-SSE2-NEXT:    fldl (%esp)
262; X86-SSE2-NEXT:    addl $12, %esp
263; X86-SSE2-NEXT:    .cfi_def_cfa_offset 4
264; X86-SSE2-NEXT:    retl
265;
266; X86-AVX-LABEL: load_double:
267; X86-AVX:       # %bb.0:
268; X86-AVX-NEXT:    subl $12, %esp
269; X86-AVX-NEXT:    .cfi_def_cfa_offset 16
270; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
271; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
272; X86-AVX-NEXT:    vmovlps %xmm0, (%esp)
273; X86-AVX-NEXT:    fldl (%esp)
274; X86-AVX-NEXT:    addl $12, %esp
275; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
276; X86-AVX-NEXT:    retl
277;
278; X86-NOSSE-LABEL: load_double:
279; X86-NOSSE:       # %bb.0:
280; X86-NOSSE-NEXT:    subl $20, %esp
281; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 24
282; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
283; X86-NOSSE-NEXT:    fildll (%eax)
284; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
285; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
286; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
287; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
288; X86-NOSSE-NEXT:    movl %eax, (%esp)
289; X86-NOSSE-NEXT:    fldl (%esp)
290; X86-NOSSE-NEXT:    addl $20, %esp
291; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
292; X86-NOSSE-NEXT:    retl
293;
294; X64-SSE-LABEL: load_double:
295; X64-SSE:       # %bb.0:
296; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
297; X64-SSE-NEXT:    retq
298;
299; X64-AVX-LABEL: load_double:
300; X64-AVX:       # %bb.0:
301; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
302; X64-AVX-NEXT:    retq
303  %v = load atomic double, ptr %fptr unordered, align 8
304  ret double %v
305}
306
307define half @exchange_half(ptr %fptr, half %x) {
308; X86-SSE1-LABEL: exchange_half:
309; X86-SSE1:       # %bb.0:
310; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
311; X86-SSE1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
312; X86-SSE1-NEXT:    xchgw %ax, (%ecx)
313; X86-SSE1-NEXT:    retl
314;
315; X86-SSE2-LABEL: exchange_half:
316; X86-SSE2:       # %bb.0:
317; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
318; X86-SSE2-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
319; X86-SSE2-NEXT:    xchgw %cx, (%eax)
320; X86-SSE2-NEXT:    pinsrw $0, %ecx, %xmm0
321; X86-SSE2-NEXT:    retl
322;
323; X86-AVX-LABEL: exchange_half:
324; X86-AVX:       # %bb.0:
325; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
326; X86-AVX-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
327; X86-AVX-NEXT:    xchgw %cx, (%eax)
328; X86-AVX-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0
329; X86-AVX-NEXT:    retl
330;
331; X86-NOSSE-LABEL: exchange_half:
332; X86-NOSSE:       # %bb.0:
333; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
334; X86-NOSSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
335; X86-NOSSE-NEXT:    xchgw %ax, (%ecx)
336; X86-NOSSE-NEXT:    retl
337;
338; X64-SSE-LABEL: exchange_half:
339; X64-SSE:       # %bb.0:
340; X64-SSE-NEXT:    pextrw $0, %xmm0, %eax
341; X64-SSE-NEXT:    xchgw %ax, (%rdi)
342; X64-SSE-NEXT:    pinsrw $0, %eax, %xmm0
343; X64-SSE-NEXT:    retq
344;
345; X64-AVX-LABEL: exchange_half:
346; X64-AVX:       # %bb.0:
347; X64-AVX-NEXT:    vpextrw $0, %xmm0, %eax
348; X64-AVX-NEXT:    xchgw %ax, (%rdi)
349; X64-AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
350; X64-AVX-NEXT:    retq
351  %v = atomicrmw xchg ptr %fptr, half %x monotonic, align 2
352  ret half %v
353}
354
355define float @exchange_float(ptr %fptr, float %x) {
356; X86-SSE1-LABEL: exchange_float:
357; X86-SSE1:       # %bb.0:
358; X86-SSE1-NEXT:    pushl %eax
359; X86-SSE1-NEXT:    .cfi_def_cfa_offset 8
360; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
361; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
362; X86-SSE1-NEXT:    xchgl %ecx, (%eax)
363; X86-SSE1-NEXT:    movl %ecx, (%esp)
364; X86-SSE1-NEXT:    flds (%esp)
365; X86-SSE1-NEXT:    popl %eax
366; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
367; X86-SSE1-NEXT:    retl
368;
369; X86-SSE2-LABEL: exchange_float:
370; X86-SSE2:       # %bb.0:
371; X86-SSE2-NEXT:    pushl %eax
372; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
373; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
374; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
375; X86-SSE2-NEXT:    xchgl %ecx, (%eax)
376; X86-SSE2-NEXT:    movd %ecx, %xmm0
377; X86-SSE2-NEXT:    movd %xmm0, (%esp)
378; X86-SSE2-NEXT:    flds (%esp)
379; X86-SSE2-NEXT:    popl %eax
380; X86-SSE2-NEXT:    .cfi_def_cfa_offset 4
381; X86-SSE2-NEXT:    retl
382;
383; X86-AVX-LABEL: exchange_float:
384; X86-AVX:       # %bb.0:
385; X86-AVX-NEXT:    pushl %eax
386; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
387; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
388; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
389; X86-AVX-NEXT:    xchgl %ecx, (%eax)
390; X86-AVX-NEXT:    vmovd %ecx, %xmm0
391; X86-AVX-NEXT:    vmovd %xmm0, (%esp)
392; X86-AVX-NEXT:    flds (%esp)
393; X86-AVX-NEXT:    popl %eax
394; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
395; X86-AVX-NEXT:    retl
396;
397; X86-NOSSE-LABEL: exchange_float:
398; X86-NOSSE:       # %bb.0:
399; X86-NOSSE-NEXT:    pushl %eax
400; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
401; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
402; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
403; X86-NOSSE-NEXT:    xchgl %ecx, (%eax)
404; X86-NOSSE-NEXT:    movl %ecx, (%esp)
405; X86-NOSSE-NEXT:    flds (%esp)
406; X86-NOSSE-NEXT:    popl %eax
407; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
408; X86-NOSSE-NEXT:    retl
409;
410; X64-SSE-LABEL: exchange_float:
411; X64-SSE:       # %bb.0:
412; X64-SSE-NEXT:    movd %xmm0, %eax
413; X64-SSE-NEXT:    xchgl %eax, (%rdi)
414; X64-SSE-NEXT:    movd %eax, %xmm0
415; X64-SSE-NEXT:    retq
416;
417; X64-AVX-LABEL: exchange_float:
418; X64-AVX:       # %bb.0:
419; X64-AVX-NEXT:    vmovd %xmm0, %eax
420; X64-AVX-NEXT:    xchgl %eax, (%rdi)
421; X64-AVX-NEXT:    vmovd %eax, %xmm0
422; X64-AVX-NEXT:    retq
423  %v = atomicrmw xchg ptr %fptr, float %x monotonic, align 4
424  ret float %v
425}
426
427define double @exchange_double(ptr %fptr, double %x) {
428; X86-SSE1-LABEL: exchange_double:
429; X86-SSE1:       # %bb.0:
430; X86-SSE1-NEXT:    pushl %ebx
431; X86-SSE1-NEXT:    .cfi_def_cfa_offset 8
432; X86-SSE1-NEXT:    pushl %esi
433; X86-SSE1-NEXT:    .cfi_def_cfa_offset 12
434; X86-SSE1-NEXT:    subl $12, %esp
435; X86-SSE1-NEXT:    .cfi_def_cfa_offset 24
436; X86-SSE1-NEXT:    .cfi_offset %esi, -12
437; X86-SSE1-NEXT:    .cfi_offset %ebx, -8
438; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %esi
439; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ebx
440; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
441; X86-SSE1-NEXT:    movl (%esi), %eax
442; X86-SSE1-NEXT:    movl 4(%esi), %edx
443; X86-SSE1-NEXT:    .p2align 4
444; X86-SSE1-NEXT:  .LBB8_1: # %atomicrmw.start
445; X86-SSE1-NEXT:    # =>This Inner Loop Header: Depth=1
446; X86-SSE1-NEXT:    lock cmpxchg8b (%esi)
447; X86-SSE1-NEXT:    jne .LBB8_1
448; X86-SSE1-NEXT:  # %bb.2: # %atomicrmw.end
449; X86-SSE1-NEXT:    movl %eax, (%esp)
450; X86-SSE1-NEXT:    movl %edx, {{[0-9]+}}(%esp)
451; X86-SSE1-NEXT:    fldl (%esp)
452; X86-SSE1-NEXT:    addl $12, %esp
453; X86-SSE1-NEXT:    .cfi_def_cfa_offset 12
454; X86-SSE1-NEXT:    popl %esi
455; X86-SSE1-NEXT:    .cfi_def_cfa_offset 8
456; X86-SSE1-NEXT:    popl %ebx
457; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
458; X86-SSE1-NEXT:    retl
459;
460; X86-SSE2-LABEL: exchange_double:
461; X86-SSE2:       # %bb.0:
462; X86-SSE2-NEXT:    pushl %ebx
463; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
464; X86-SSE2-NEXT:    pushl %esi
465; X86-SSE2-NEXT:    .cfi_def_cfa_offset 12
466; X86-SSE2-NEXT:    subl $12, %esp
467; X86-SSE2-NEXT:    .cfi_def_cfa_offset 24
468; X86-SSE2-NEXT:    .cfi_offset %esi, -12
469; X86-SSE2-NEXT:    .cfi_offset %ebx, -8
470; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
471; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
472; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
473; X86-SSE2-NEXT:    movl (%esi), %eax
474; X86-SSE2-NEXT:    movl 4(%esi), %edx
475; X86-SSE2-NEXT:    .p2align 4
476; X86-SSE2-NEXT:  .LBB8_1: # %atomicrmw.start
477; X86-SSE2-NEXT:    # =>This Inner Loop Header: Depth=1
478; X86-SSE2-NEXT:    lock cmpxchg8b (%esi)
479; X86-SSE2-NEXT:    jne .LBB8_1
480; X86-SSE2-NEXT:  # %bb.2: # %atomicrmw.end
481; X86-SSE2-NEXT:    movd %eax, %xmm0
482; X86-SSE2-NEXT:    movd %edx, %xmm1
483; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
484; X86-SSE2-NEXT:    movq %xmm0, (%esp)
485; X86-SSE2-NEXT:    fldl (%esp)
486; X86-SSE2-NEXT:    addl $12, %esp
487; X86-SSE2-NEXT:    .cfi_def_cfa_offset 12
488; X86-SSE2-NEXT:    popl %esi
489; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
490; X86-SSE2-NEXT:    popl %ebx
491; X86-SSE2-NEXT:    .cfi_def_cfa_offset 4
492; X86-SSE2-NEXT:    retl
493;
494; X86-AVX-LABEL: exchange_double:
495; X86-AVX:       # %bb.0:
496; X86-AVX-NEXT:    pushl %ebx
497; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
498; X86-AVX-NEXT:    pushl %esi
499; X86-AVX-NEXT:    .cfi_def_cfa_offset 12
500; X86-AVX-NEXT:    subl $12, %esp
501; X86-AVX-NEXT:    .cfi_def_cfa_offset 24
502; X86-AVX-NEXT:    .cfi_offset %esi, -12
503; X86-AVX-NEXT:    .cfi_offset %ebx, -8
504; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %esi
505; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ebx
506; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
507; X86-AVX-NEXT:    movl (%esi), %eax
508; X86-AVX-NEXT:    movl 4(%esi), %edx
509; X86-AVX-NEXT:    .p2align 4
510; X86-AVX-NEXT:  .LBB8_1: # %atomicrmw.start
511; X86-AVX-NEXT:    # =>This Inner Loop Header: Depth=1
512; X86-AVX-NEXT:    lock cmpxchg8b (%esi)
513; X86-AVX-NEXT:    jne .LBB8_1
514; X86-AVX-NEXT:  # %bb.2: # %atomicrmw.end
515; X86-AVX-NEXT:    vmovd %eax, %xmm0
516; X86-AVX-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0
517; X86-AVX-NEXT:    vmovq %xmm0, (%esp)
518; X86-AVX-NEXT:    fldl (%esp)
519; X86-AVX-NEXT:    addl $12, %esp
520; X86-AVX-NEXT:    .cfi_def_cfa_offset 12
521; X86-AVX-NEXT:    popl %esi
522; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
523; X86-AVX-NEXT:    popl %ebx
524; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
525; X86-AVX-NEXT:    retl
526;
527; X86-NOSSE-LABEL: exchange_double:
528; X86-NOSSE:       # %bb.0:
529; X86-NOSSE-NEXT:    pushl %ebx
530; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
531; X86-NOSSE-NEXT:    pushl %esi
532; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 12
533; X86-NOSSE-NEXT:    subl $12, %esp
534; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 24
535; X86-NOSSE-NEXT:    .cfi_offset %esi, -12
536; X86-NOSSE-NEXT:    .cfi_offset %ebx, -8
537; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
538; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
539; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
540; X86-NOSSE-NEXT:    movl (%esi), %eax
541; X86-NOSSE-NEXT:    movl 4(%esi), %edx
542; X86-NOSSE-NEXT:    .p2align 4
543; X86-NOSSE-NEXT:  .LBB8_1: # %atomicrmw.start
544; X86-NOSSE-NEXT:    # =>This Inner Loop Header: Depth=1
545; X86-NOSSE-NEXT:    lock cmpxchg8b (%esi)
546; X86-NOSSE-NEXT:    jne .LBB8_1
547; X86-NOSSE-NEXT:  # %bb.2: # %atomicrmw.end
548; X86-NOSSE-NEXT:    movl %eax, (%esp)
549; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
550; X86-NOSSE-NEXT:    fldl (%esp)
551; X86-NOSSE-NEXT:    addl $12, %esp
552; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 12
553; X86-NOSSE-NEXT:    popl %esi
554; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
555; X86-NOSSE-NEXT:    popl %ebx
556; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
557; X86-NOSSE-NEXT:    retl
558;
559; X64-SSE-LABEL: exchange_double:
560; X64-SSE:       # %bb.0:
561; X64-SSE-NEXT:    movq %xmm0, %rax
562; X64-SSE-NEXT:    xchgq %rax, (%rdi)
563; X64-SSE-NEXT:    movq %rax, %xmm0
564; X64-SSE-NEXT:    retq
565;
566; X64-AVX-LABEL: exchange_double:
567; X64-AVX:       # %bb.0:
568; X64-AVX-NEXT:    vmovq %xmm0, %rax
569; X64-AVX-NEXT:    xchgq %rax, (%rdi)
570; X64-AVX-NEXT:    vmovq %rax, %xmm0
571; X64-AVX-NEXT:    retq
572  %v = atomicrmw xchg ptr %fptr, double %x monotonic, align 8
573  ret double %v
574}
575
576
577; Check the seq_cst lowering since that's the
578; interesting one from an ordering perspective on x86.
579
580define void @store_float_seq_cst(ptr %fptr, float %v) {
581; X86-LABEL: store_float_seq_cst:
582; X86:       # %bb.0:
583; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
584; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
585; X86-NEXT:    xchgl %ecx, (%eax)
586; X86-NEXT:    retl
587;
588; X64-SSE-LABEL: store_float_seq_cst:
589; X64-SSE:       # %bb.0:
590; X64-SSE-NEXT:    movd %xmm0, %eax
591; X64-SSE-NEXT:    xchgl %eax, (%rdi)
592; X64-SSE-NEXT:    retq
593;
594; X64-AVX-LABEL: store_float_seq_cst:
595; X64-AVX:       # %bb.0:
596; X64-AVX-NEXT:    vmovd %xmm0, %eax
597; X64-AVX-NEXT:    xchgl %eax, (%rdi)
598; X64-AVX-NEXT:    retq
599  store atomic float %v, ptr %fptr seq_cst, align 4
600  ret void
601}
602
603define void @store_double_seq_cst(ptr %fptr, double %v) {
604; X86-SSE1-LABEL: store_double_seq_cst:
605; X86-SSE1:       # %bb.0:
606; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
607; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
608; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
609; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
610; X86-SSE1-NEXT:    lock orl $0, (%esp)
611; X86-SSE1-NEXT:    retl
612;
613; X86-SSE2-LABEL: store_double_seq_cst:
614; X86-SSE2:       # %bb.0:
615; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
616; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
617; X86-SSE2-NEXT:    movlps %xmm0, (%eax)
618; X86-SSE2-NEXT:    lock orl $0, (%esp)
619; X86-SSE2-NEXT:    retl
620;
621; X86-AVX-LABEL: store_double_seq_cst:
622; X86-AVX:       # %bb.0:
623; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
624; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
625; X86-AVX-NEXT:    vmovlps %xmm0, (%eax)
626; X86-AVX-NEXT:    lock orl $0, (%esp)
627; X86-AVX-NEXT:    retl
628;
629; X86-NOSSE-LABEL: store_double_seq_cst:
630; X86-NOSSE:       # %bb.0:
631; X86-NOSSE-NEXT:    subl $12, %esp
632; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 16
633; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
634; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
635; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
636; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
637; X86-NOSSE-NEXT:    movl %ecx, (%esp)
638; X86-NOSSE-NEXT:    fildll (%esp)
639; X86-NOSSE-NEXT:    fistpll (%eax)
640; X86-NOSSE-NEXT:    lock orl $0, (%esp)
641; X86-NOSSE-NEXT:    addl $12, %esp
642; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
643; X86-NOSSE-NEXT:    retl
644;
645; X64-SSE-LABEL: store_double_seq_cst:
646; X64-SSE:       # %bb.0:
647; X64-SSE-NEXT:    movq %xmm0, %rax
648; X64-SSE-NEXT:    xchgq %rax, (%rdi)
649; X64-SSE-NEXT:    retq
650;
651; X64-AVX-LABEL: store_double_seq_cst:
652; X64-AVX:       # %bb.0:
653; X64-AVX-NEXT:    vmovq %xmm0, %rax
654; X64-AVX-NEXT:    xchgq %rax, (%rdi)
655; X64-AVX-NEXT:    retq
656  store atomic double %v, ptr %fptr seq_cst, align 8
657  ret void
658}
659
660define float @load_float_seq_cst(ptr %fptr) {
661; X86-SSE1-LABEL: load_float_seq_cst:
662; X86-SSE1:       # %bb.0:
663; X86-SSE1-NEXT:    pushl %eax
664; X86-SSE1-NEXT:    .cfi_def_cfa_offset 8
665; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
666; X86-SSE1-NEXT:    movl (%eax), %eax
667; X86-SSE1-NEXT:    movl %eax, (%esp)
668; X86-SSE1-NEXT:    flds (%esp)
669; X86-SSE1-NEXT:    popl %eax
670; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
671; X86-SSE1-NEXT:    retl
672;
673; X86-SSE2-LABEL: load_float_seq_cst:
674; X86-SSE2:       # %bb.0:
675; X86-SSE2-NEXT:    pushl %eax
676; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
677; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
678; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
679; X86-SSE2-NEXT:    movss %xmm0, (%esp)
680; X86-SSE2-NEXT:    flds (%esp)
681; X86-SSE2-NEXT:    popl %eax
682; X86-SSE2-NEXT:    .cfi_def_cfa_offset 4
683; X86-SSE2-NEXT:    retl
684;
685; X86-AVX-LABEL: load_float_seq_cst:
686; X86-AVX:       # %bb.0:
687; X86-AVX-NEXT:    pushl %eax
688; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
689; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
690; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
691; X86-AVX-NEXT:    vmovss %xmm0, (%esp)
692; X86-AVX-NEXT:    flds (%esp)
693; X86-AVX-NEXT:    popl %eax
694; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
695; X86-AVX-NEXT:    retl
696;
697; X86-NOSSE-LABEL: load_float_seq_cst:
698; X86-NOSSE:       # %bb.0:
699; X86-NOSSE-NEXT:    pushl %eax
700; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
701; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
702; X86-NOSSE-NEXT:    movl (%eax), %eax
703; X86-NOSSE-NEXT:    movl %eax, (%esp)
704; X86-NOSSE-NEXT:    flds (%esp)
705; X86-NOSSE-NEXT:    popl %eax
706; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
707; X86-NOSSE-NEXT:    retl
708;
709; X64-SSE-LABEL: load_float_seq_cst:
710; X64-SSE:       # %bb.0:
711; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
712; X64-SSE-NEXT:    retq
713;
714; X64-AVX-LABEL: load_float_seq_cst:
715; X64-AVX:       # %bb.0:
716; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
717; X64-AVX-NEXT:    retq
718  %v = load atomic float, ptr %fptr seq_cst, align 4
719  ret float %v
720}
721
722define double @load_double_seq_cst(ptr %fptr) {
723; X86-SSE1-LABEL: load_double_seq_cst:
724; X86-SSE1:       # %bb.0:
725; X86-SSE1-NEXT:    subl $12, %esp
726; X86-SSE1-NEXT:    .cfi_def_cfa_offset 16
727; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
728; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
729; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
730; X86-SSE1-NEXT:    movss %xmm0, (%esp)
731; X86-SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
732; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
733; X86-SSE1-NEXT:    fldl (%esp)
734; X86-SSE1-NEXT:    addl $12, %esp
735; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
736; X86-SSE1-NEXT:    retl
737;
738; X86-SSE2-LABEL: load_double_seq_cst:
739; X86-SSE2:       # %bb.0:
740; X86-SSE2-NEXT:    subl $12, %esp
741; X86-SSE2-NEXT:    .cfi_def_cfa_offset 16
742; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
743; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
744; X86-SSE2-NEXT:    movlps %xmm0, (%esp)
745; X86-SSE2-NEXT:    fldl (%esp)
746; X86-SSE2-NEXT:    addl $12, %esp
747; X86-SSE2-NEXT:    .cfi_def_cfa_offset 4
748; X86-SSE2-NEXT:    retl
749;
750; X86-AVX-LABEL: load_double_seq_cst:
751; X86-AVX:       # %bb.0:
752; X86-AVX-NEXT:    subl $12, %esp
753; X86-AVX-NEXT:    .cfi_def_cfa_offset 16
754; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
755; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
756; X86-AVX-NEXT:    vmovlps %xmm0, (%esp)
757; X86-AVX-NEXT:    fldl (%esp)
758; X86-AVX-NEXT:    addl $12, %esp
759; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
760; X86-AVX-NEXT:    retl
761;
762; X86-NOSSE-LABEL: load_double_seq_cst:
763; X86-NOSSE:       # %bb.0:
764; X86-NOSSE-NEXT:    subl $20, %esp
765; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 24
766; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
767; X86-NOSSE-NEXT:    fildll (%eax)
768; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
769; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
770; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
771; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
772; X86-NOSSE-NEXT:    movl %eax, (%esp)
773; X86-NOSSE-NEXT:    fldl (%esp)
774; X86-NOSSE-NEXT:    addl $20, %esp
775; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
776; X86-NOSSE-NEXT:    retl
777;
778; X64-SSE-LABEL: load_double_seq_cst:
779; X64-SSE:       # %bb.0:
780; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
781; X64-SSE-NEXT:    retq
782;
783; X64-AVX-LABEL: load_double_seq_cst:
784; X64-AVX:       # %bb.0:
785; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
786; X64-AVX-NEXT:    retq
787  %v = load atomic double, ptr %fptr seq_cst, align 8
788  ret double %v
789}
790
791define void @store_bfloat(ptr %fptr, bfloat %v) {
792; X86-SSE1-LABEL: store_bfloat:
793; X86-SSE1:       # %bb.0:
794; X86-SSE1-NEXT:    pushl %esi
795; X86-SSE1-NEXT:    .cfi_def_cfa_offset 8
796; X86-SSE1-NEXT:    subl $8, %esp
797; X86-SSE1-NEXT:    .cfi_def_cfa_offset 16
798; X86-SSE1-NEXT:    .cfi_offset %esi, -8
799; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
800; X86-SSE1-NEXT:    movss %xmm0, (%esp)
801; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %esi
802; X86-SSE1-NEXT:    calll __truncsfbf2
803; X86-SSE1-NEXT:    movw %ax, (%esi)
804; X86-SSE1-NEXT:    addl $8, %esp
805; X86-SSE1-NEXT:    .cfi_def_cfa_offset 8
806; X86-SSE1-NEXT:    popl %esi
807; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
808; X86-SSE1-NEXT:    retl
809;
810; X86-SSE2-LABEL: store_bfloat:
811; X86-SSE2:       # %bb.0:
812; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
813; X86-SSE2-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
814; X86-SSE2-NEXT:    movw %cx, (%eax)
815; X86-SSE2-NEXT:    retl
816;
817; X86-AVX-LABEL: store_bfloat:
818; X86-AVX:       # %bb.0:
819; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
820; X86-AVX-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
821; X86-AVX-NEXT:    movw %cx, (%eax)
822; X86-AVX-NEXT:    retl
823;
824; X86-NOSSE-LABEL: store_bfloat:
825; X86-NOSSE:       # %bb.0:
826; X86-NOSSE-NEXT:    pushl %esi
827; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
828; X86-NOSSE-NEXT:    subl $8, %esp
829; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 16
830; X86-NOSSE-NEXT:    .cfi_offset %esi, -8
831; X86-NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
832; X86-NOSSE-NEXT:    fstps (%esp)
833; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
834; X86-NOSSE-NEXT:    calll __truncsfbf2
835; X86-NOSSE-NEXT:    movw %ax, (%esi)
836; X86-NOSSE-NEXT:    addl $8, %esp
837; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
838; X86-NOSSE-NEXT:    popl %esi
839; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
840; X86-NOSSE-NEXT:    retl
841;
842; X64-SSE-LABEL: store_bfloat:
843; X64-SSE:       # %bb.0:
844; X64-SSE-NEXT:    pextrw $0, %xmm0, %eax
845; X64-SSE-NEXT:    movw %ax, (%rdi)
846; X64-SSE-NEXT:    retq
847;
848; X64-AVX-LABEL: store_bfloat:
849; X64-AVX:       # %bb.0:
850; X64-AVX-NEXT:    vpextrw $0, %xmm0, %eax
851; X64-AVX-NEXT:    movw %ax, (%rdi)
852; X64-AVX-NEXT:    retq
853  store atomic bfloat %v, ptr %fptr unordered, align 2
854  ret void
855}
856
857define bfloat @load_bfloat(ptr %fptr) {
858; X86-SSE1-LABEL: load_bfloat:
859; X86-SSE1:       # %bb.0:
860; X86-SSE1-NEXT:    pushl %eax
861; X86-SSE1-NEXT:    .cfi_def_cfa_offset 8
862; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
863; X86-SSE1-NEXT:    movzwl (%eax), %eax
864; X86-SSE1-NEXT:    shll $16, %eax
865; X86-SSE1-NEXT:    movl %eax, (%esp)
866; X86-SSE1-NEXT:    flds (%esp)
867; X86-SSE1-NEXT:    popl %eax
868; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
869; X86-SSE1-NEXT:    retl
870;
871; X86-SSE2-LABEL: load_bfloat:
872; X86-SSE2:       # %bb.0:
873; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
874; X86-SSE2-NEXT:    movzwl (%eax), %eax
875; X86-SSE2-NEXT:    pinsrw $0, %eax, %xmm0
876; X86-SSE2-NEXT:    retl
877;
878; X86-AVX-LABEL: load_bfloat:
879; X86-AVX:       # %bb.0:
880; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
881; X86-AVX-NEXT:    movzwl (%eax), %eax
882; X86-AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
883; X86-AVX-NEXT:    retl
884;
885; X86-NOSSE-LABEL: load_bfloat:
886; X86-NOSSE:       # %bb.0:
887; X86-NOSSE-NEXT:    pushl %eax
888; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
889; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
890; X86-NOSSE-NEXT:    movzwl (%eax), %eax
891; X86-NOSSE-NEXT:    shll $16, %eax
892; X86-NOSSE-NEXT:    movl %eax, (%esp)
893; X86-NOSSE-NEXT:    flds (%esp)
894; X86-NOSSE-NEXT:    popl %eax
895; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
896; X86-NOSSE-NEXT:    retl
897;
898; X64-SSE-LABEL: load_bfloat:
899; X64-SSE:       # %bb.0:
900; X64-SSE-NEXT:    movzwl (%rdi), %eax
901; X64-SSE-NEXT:    pinsrw $0, %eax, %xmm0
902; X64-SSE-NEXT:    retq
903;
904; X64-AVX-LABEL: load_bfloat:
905; X64-AVX:       # %bb.0:
906; X64-AVX-NEXT:    movzwl (%rdi), %eax
907; X64-AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
908; X64-AVX-NEXT:    retq
909  %v = load atomic bfloat, ptr %fptr unordered, align 2
910  ret bfloat %v
911}
912