xref: /llvm-project/llvm/test/CodeGen/X86/atomic-fp.ll (revision bca7864ffe9045e896fe0ed087150af37778eb40)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X86-NOSSE
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse -verify-machineinstrs | FileCheck %s --check-prefix=X86-SSE1
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse2 -verify-machineinstrs | FileCheck %s --check-prefix=X86-SSE2
5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx -verify-machineinstrs | FileCheck %s --check-prefix=X86-AVX
6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f -verify-machineinstrs | FileCheck %s --check-prefix=X86-AVX
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X64-SSE
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -verify-machineinstrs | FileCheck %s --check-prefix=X64-AVX
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f -verify-machineinstrs | FileCheck %s --check-prefix=X64-AVX
10
11; ----- FADD -----
12
13define dso_local void @fadd_32r(ptr %loc, float %val) nounwind {
14; X86-NOSSE-LABEL: fadd_32r:
15; X86-NOSSE:       # %bb.0:
16; X86-NOSSE-NEXT:    subl $8, %esp
17; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
18; X86-NOSSE-NEXT:    movl (%eax), %ecx
19; X86-NOSSE-NEXT:    movl %ecx, (%esp)
20; X86-NOSSE-NEXT:    flds (%esp)
21; X86-NOSSE-NEXT:    fadds {{[0-9]+}}(%esp)
22; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
23; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
24; X86-NOSSE-NEXT:    movl %ecx, (%eax)
25; X86-NOSSE-NEXT:    addl $8, %esp
26; X86-NOSSE-NEXT:    retl
27;
28; X86-SSE1-LABEL: fadd_32r:
29; X86-SSE1:       # %bb.0:
30; X86-SSE1-NEXT:    subl $8, %esp
31; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
32; X86-SSE1-NEXT:    movl (%eax), %ecx
33; X86-SSE1-NEXT:    movl %ecx, (%esp)
34; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
35; X86-SSE1-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
36; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
37; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
38; X86-SSE1-NEXT:    movl %ecx, (%eax)
39; X86-SSE1-NEXT:    addl $8, %esp
40; X86-SSE1-NEXT:    retl
41;
42; X86-SSE2-LABEL: fadd_32r:
43; X86-SSE2:       # %bb.0:
44; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
45; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
46; X86-SSE2-NEXT:    addss (%eax), %xmm0
47; X86-SSE2-NEXT:    movss %xmm0, (%eax)
48; X86-SSE2-NEXT:    retl
49;
50; X86-AVX-LABEL: fadd_32r:
51; X86-AVX:       # %bb.0:
52; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
53; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
54; X86-AVX-NEXT:    vaddss (%eax), %xmm0, %xmm0
55; X86-AVX-NEXT:    vmovss %xmm0, (%eax)
56; X86-AVX-NEXT:    retl
57;
58; X64-SSE-LABEL: fadd_32r:
59; X64-SSE:       # %bb.0:
60; X64-SSE-NEXT:    addss (%rdi), %xmm0
61; X64-SSE-NEXT:    movss %xmm0, (%rdi)
62; X64-SSE-NEXT:    retq
63;
64; X64-AVX-LABEL: fadd_32r:
65; X64-AVX:       # %bb.0:
66; X64-AVX-NEXT:    vaddss (%rdi), %xmm0, %xmm0
67; X64-AVX-NEXT:    vmovss %xmm0, (%rdi)
68; X64-AVX-NEXT:    retq
69  %1 = load atomic i32, ptr %loc seq_cst, align 4
70  %2 = bitcast i32 %1 to float
71  %add = fadd float %2, %val
72  %3 = bitcast float %add to i32
73  store atomic i32 %3, ptr %loc release, align 4
74  ret void
75}
76
77define dso_local void @fadd_64r(ptr %loc, double %val) nounwind {
78; X86-NOSSE-LABEL: fadd_64r:
79; X86-NOSSE:       # %bb.0:
80; X86-NOSSE-NEXT:    pushl %ebp
81; X86-NOSSE-NEXT:    movl %esp, %ebp
82; X86-NOSSE-NEXT:    andl $-8, %esp
83; X86-NOSSE-NEXT:    subl $32, %esp
84; X86-NOSSE-NEXT:    movl 8(%ebp), %eax
85; X86-NOSSE-NEXT:    fildll (%eax)
86; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
87; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
88; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
89; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
90; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
91; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
92; X86-NOSSE-NEXT:    faddl 12(%ebp)
93; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
94; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
95; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
96; X86-NOSSE-NEXT:    movl %ecx, (%esp)
97; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
98; X86-NOSSE-NEXT:    fildll (%esp)
99; X86-NOSSE-NEXT:    fistpll (%eax)
100; X86-NOSSE-NEXT:    movl %ebp, %esp
101; X86-NOSSE-NEXT:    popl %ebp
102; X86-NOSSE-NEXT:    retl
103;
104; X86-SSE1-LABEL: fadd_64r:
105; X86-SSE1:       # %bb.0:
106; X86-SSE1-NEXT:    pushl %ebp
107; X86-SSE1-NEXT:    movl %esp, %ebp
108; X86-SSE1-NEXT:    andl $-8, %esp
109; X86-SSE1-NEXT:    subl $16, %esp
110; X86-SSE1-NEXT:    movl 8(%ebp), %eax
111; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
112; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
113; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
114; X86-SSE1-NEXT:    movss %xmm1, (%esp)
115; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
116; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
117; X86-SSE1-NEXT:    fldl (%esp)
118; X86-SSE1-NEXT:    faddl 12(%ebp)
119; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
120; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
121; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
122; X86-SSE1-NEXT:    movl %ebp, %esp
123; X86-SSE1-NEXT:    popl %ebp
124; X86-SSE1-NEXT:    retl
125;
126; X86-SSE2-LABEL: fadd_64r:
127; X86-SSE2:       # %bb.0:
128; X86-SSE2-NEXT:    pushl %ebp
129; X86-SSE2-NEXT:    movl %esp, %ebp
130; X86-SSE2-NEXT:    andl $-8, %esp
131; X86-SSE2-NEXT:    subl $8, %esp
132; X86-SSE2-NEXT:    movl 8(%ebp), %eax
133; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
134; X86-SSE2-NEXT:    addsd 12(%ebp), %xmm0
135; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
136; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
137; X86-SSE2-NEXT:    movlps %xmm0, (%eax)
138; X86-SSE2-NEXT:    movl %ebp, %esp
139; X86-SSE2-NEXT:    popl %ebp
140; X86-SSE2-NEXT:    retl
141;
142; X86-AVX-LABEL: fadd_64r:
143; X86-AVX:       # %bb.0:
144; X86-AVX-NEXT:    pushl %ebp
145; X86-AVX-NEXT:    movl %esp, %ebp
146; X86-AVX-NEXT:    andl $-8, %esp
147; X86-AVX-NEXT:    subl $8, %esp
148; X86-AVX-NEXT:    movl 8(%ebp), %eax
149; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
150; X86-AVX-NEXT:    vaddsd 12(%ebp), %xmm0, %xmm0
151; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
152; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
153; X86-AVX-NEXT:    vmovlps %xmm0, (%eax)
154; X86-AVX-NEXT:    movl %ebp, %esp
155; X86-AVX-NEXT:    popl %ebp
156; X86-AVX-NEXT:    retl
157;
158; X64-SSE-LABEL: fadd_64r:
159; X64-SSE:       # %bb.0:
160; X64-SSE-NEXT:    addsd (%rdi), %xmm0
161; X64-SSE-NEXT:    movsd %xmm0, (%rdi)
162; X64-SSE-NEXT:    retq
163;
164; X64-AVX-LABEL: fadd_64r:
165; X64-AVX:       # %bb.0:
166; X64-AVX-NEXT:    vaddsd (%rdi), %xmm0, %xmm0
167; X64-AVX-NEXT:    vmovsd %xmm0, (%rdi)
168; X64-AVX-NEXT:    retq
169  %1 = load atomic i64, ptr %loc seq_cst, align 8
170  %2 = bitcast i64 %1 to double
171  %add = fadd double %2, %val
172  %3 = bitcast double %add to i64
173  store atomic i64 %3, ptr %loc release, align 8
174  ret void
175}
176
177@glob32 = dso_local global float 0.000000e+00, align 4
178@glob64 = dso_local global double 0.000000e+00, align 8
179
180; Floating-point add to a global using an immediate.
181define dso_local void @fadd_32g() nounwind {
182; X86-NOSSE-LABEL: fadd_32g:
183; X86-NOSSE:       # %bb.0:
184; X86-NOSSE-NEXT:    subl $8, %esp
185; X86-NOSSE-NEXT:    movl glob32, %eax
186; X86-NOSSE-NEXT:    movl %eax, (%esp)
187; X86-NOSSE-NEXT:    fld1
188; X86-NOSSE-NEXT:    fadds (%esp)
189; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
190; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
191; X86-NOSSE-NEXT:    movl %eax, glob32
192; X86-NOSSE-NEXT:    addl $8, %esp
193; X86-NOSSE-NEXT:    retl
194;
195; X86-SSE1-LABEL: fadd_32g:
196; X86-SSE1:       # %bb.0:
197; X86-SSE1-NEXT:    subl $8, %esp
198; X86-SSE1-NEXT:    movl glob32, %eax
199; X86-SSE1-NEXT:    movl %eax, (%esp)
200; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
201; X86-SSE1-NEXT:    addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
202; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
203; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
204; X86-SSE1-NEXT:    movl %eax, glob32
205; X86-SSE1-NEXT:    addl $8, %esp
206; X86-SSE1-NEXT:    retl
207;
208; X86-SSE2-LABEL: fadd_32g:
209; X86-SSE2:       # %bb.0:
210; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
211; X86-SSE2-NEXT:    addss glob32, %xmm0
212; X86-SSE2-NEXT:    movss %xmm0, glob32
213; X86-SSE2-NEXT:    retl
214;
215; X86-AVX-LABEL: fadd_32g:
216; X86-AVX:       # %bb.0:
217; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
218; X86-AVX-NEXT:    vaddss glob32, %xmm0, %xmm0
219; X86-AVX-NEXT:    vmovss %xmm0, glob32
220; X86-AVX-NEXT:    retl
221;
222; X64-SSE-LABEL: fadd_32g:
223; X64-SSE:       # %bb.0:
224; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
225; X64-SSE-NEXT:    addss glob32(%rip), %xmm0
226; X64-SSE-NEXT:    movss %xmm0, glob32(%rip)
227; X64-SSE-NEXT:    retq
228;
229; X64-AVX-LABEL: fadd_32g:
230; X64-AVX:       # %bb.0:
231; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
232; X64-AVX-NEXT:    vaddss glob32(%rip), %xmm0, %xmm0
233; X64-AVX-NEXT:    vmovss %xmm0, glob32(%rip)
234; X64-AVX-NEXT:    retq
235  %i = load atomic i32, ptr @glob32 monotonic, align 4
236  %f = bitcast i32 %i to float
237  %add = fadd float %f, 1.000000e+00
238  %s = bitcast float %add to i32
239  store atomic i32 %s, ptr @glob32 monotonic, align 4
240  ret void
241}
242
243define dso_local void @fadd_64g() nounwind {
244; X86-NOSSE-LABEL: fadd_64g:
245; X86-NOSSE:       # %bb.0:
246; X86-NOSSE-NEXT:    pushl %ebp
247; X86-NOSSE-NEXT:    movl %esp, %ebp
248; X86-NOSSE-NEXT:    andl $-8, %esp
249; X86-NOSSE-NEXT:    subl $32, %esp
250; X86-NOSSE-NEXT:    fildll glob64
251; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
252; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
253; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
254; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
255; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
256; X86-NOSSE-NEXT:    fld1
257; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
258; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
259; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
260; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
261; X86-NOSSE-NEXT:    movl %eax, (%esp)
262; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
263; X86-NOSSE-NEXT:    fildll (%esp)
264; X86-NOSSE-NEXT:    fistpll glob64
265; X86-NOSSE-NEXT:    movl %ebp, %esp
266; X86-NOSSE-NEXT:    popl %ebp
267; X86-NOSSE-NEXT:    retl
268;
269; X86-SSE1-LABEL: fadd_64g:
270; X86-SSE1:       # %bb.0:
271; X86-SSE1-NEXT:    pushl %ebp
272; X86-SSE1-NEXT:    movl %esp, %ebp
273; X86-SSE1-NEXT:    andl $-8, %esp
274; X86-SSE1-NEXT:    subl $16, %esp
275; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
276; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
277; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
278; X86-SSE1-NEXT:    movss %xmm1, (%esp)
279; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
280; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
281; X86-SSE1-NEXT:    fld1
282; X86-SSE1-NEXT:    faddl (%esp)
283; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
284; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
285; X86-SSE1-NEXT:    movlps %xmm0, glob64
286; X86-SSE1-NEXT:    movl %ebp, %esp
287; X86-SSE1-NEXT:    popl %ebp
288; X86-SSE1-NEXT:    retl
289;
290; X86-SSE2-LABEL: fadd_64g:
291; X86-SSE2:       # %bb.0:
292; X86-SSE2-NEXT:    pushl %ebp
293; X86-SSE2-NEXT:    movl %esp, %ebp
294; X86-SSE2-NEXT:    andl $-8, %esp
295; X86-SSE2-NEXT:    subl $8, %esp
296; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
297; X86-SSE2-NEXT:    addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
298; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
299; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
300; X86-SSE2-NEXT:    movlps %xmm0, glob64
301; X86-SSE2-NEXT:    movl %ebp, %esp
302; X86-SSE2-NEXT:    popl %ebp
303; X86-SSE2-NEXT:    retl
304;
305; X86-AVX-LABEL: fadd_64g:
306; X86-AVX:       # %bb.0:
307; X86-AVX-NEXT:    pushl %ebp
308; X86-AVX-NEXT:    movl %esp, %ebp
309; X86-AVX-NEXT:    andl $-8, %esp
310; X86-AVX-NEXT:    subl $8, %esp
311; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
312; X86-AVX-NEXT:    vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
313; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
314; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
315; X86-AVX-NEXT:    vmovlps %xmm0, glob64
316; X86-AVX-NEXT:    movl %ebp, %esp
317; X86-AVX-NEXT:    popl %ebp
318; X86-AVX-NEXT:    retl
319;
320; X64-SSE-LABEL: fadd_64g:
321; X64-SSE:       # %bb.0:
322; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
323; X64-SSE-NEXT:    addsd glob64(%rip), %xmm0
324; X64-SSE-NEXT:    movsd %xmm0, glob64(%rip)
325; X64-SSE-NEXT:    retq
326;
327; X64-AVX-LABEL: fadd_64g:
328; X64-AVX:       # %bb.0:
329; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
330; X64-AVX-NEXT:    vaddsd glob64(%rip), %xmm0, %xmm0
331; X64-AVX-NEXT:    vmovsd %xmm0, glob64(%rip)
332; X64-AVX-NEXT:    retq
333  %i = load atomic i64, ptr @glob64 monotonic, align 8
334  %f = bitcast i64 %i to double
335  %add = fadd double %f, 1.000000e+00
336  %s = bitcast double %add to i64
337  store atomic i64 %s, ptr @glob64 monotonic, align 8
338  ret void
339}
340
341; Floating-point add to a hard-coded immediate location using an immediate.
342define dso_local void @fadd_32imm() nounwind {
343; X86-NOSSE-LABEL: fadd_32imm:
344; X86-NOSSE:       # %bb.0:
345; X86-NOSSE-NEXT:    subl $8, %esp
346; X86-NOSSE-NEXT:    movl -559038737, %eax
347; X86-NOSSE-NEXT:    movl %eax, (%esp)
348; X86-NOSSE-NEXT:    fld1
349; X86-NOSSE-NEXT:    fadds (%esp)
350; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
351; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
352; X86-NOSSE-NEXT:    movl %eax, -559038737
353; X86-NOSSE-NEXT:    addl $8, %esp
354; X86-NOSSE-NEXT:    retl
355;
356; X86-SSE1-LABEL: fadd_32imm:
357; X86-SSE1:       # %bb.0:
358; X86-SSE1-NEXT:    subl $8, %esp
359; X86-SSE1-NEXT:    movl -559038737, %eax
360; X86-SSE1-NEXT:    movl %eax, (%esp)
361; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
362; X86-SSE1-NEXT:    addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
363; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
364; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
365; X86-SSE1-NEXT:    movl %eax, -559038737
366; X86-SSE1-NEXT:    addl $8, %esp
367; X86-SSE1-NEXT:    retl
368;
369; X86-SSE2-LABEL: fadd_32imm:
370; X86-SSE2:       # %bb.0:
371; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
372; X86-SSE2-NEXT:    addss -559038737, %xmm0
373; X86-SSE2-NEXT:    movss %xmm0, -559038737
374; X86-SSE2-NEXT:    retl
375;
376; X86-AVX-LABEL: fadd_32imm:
377; X86-AVX:       # %bb.0:
378; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
379; X86-AVX-NEXT:    vaddss -559038737, %xmm0, %xmm0
380; X86-AVX-NEXT:    vmovss %xmm0, -559038737
381; X86-AVX-NEXT:    retl
382;
383; X64-SSE-LABEL: fadd_32imm:
384; X64-SSE:       # %bb.0:
385; X64-SSE-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
386; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
387; X64-SSE-NEXT:    addss (%rax), %xmm0
388; X64-SSE-NEXT:    movss %xmm0, (%rax)
389; X64-SSE-NEXT:    retq
390;
391; X64-AVX-LABEL: fadd_32imm:
392; X64-AVX:       # %bb.0:
393; X64-AVX-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
394; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
395; X64-AVX-NEXT:    vaddss (%rax), %xmm0, %xmm0
396; X64-AVX-NEXT:    vmovss %xmm0, (%rax)
397; X64-AVX-NEXT:    retq
398  %i = load atomic i32, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4
399  %f = bitcast i32 %i to float
400  %add = fadd float %f, 1.000000e+00
401  %s = bitcast float %add to i32
402  store atomic i32 %s, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4
403  ret void
404}
405
406define dso_local void @fadd_64imm() nounwind {
407; X86-NOSSE-LABEL: fadd_64imm:
408; X86-NOSSE:       # %bb.0:
409; X86-NOSSE-NEXT:    pushl %ebp
410; X86-NOSSE-NEXT:    movl %esp, %ebp
411; X86-NOSSE-NEXT:    andl $-8, %esp
412; X86-NOSSE-NEXT:    subl $32, %esp
413; X86-NOSSE-NEXT:    fildll -559038737
414; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
415; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
416; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
417; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
418; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
419; X86-NOSSE-NEXT:    fld1
420; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
421; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
422; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
423; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
424; X86-NOSSE-NEXT:    movl %eax, (%esp)
425; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
426; X86-NOSSE-NEXT:    fildll (%esp)
427; X86-NOSSE-NEXT:    fistpll -559038737
428; X86-NOSSE-NEXT:    movl %ebp, %esp
429; X86-NOSSE-NEXT:    popl %ebp
430; X86-NOSSE-NEXT:    retl
431;
432; X86-SSE1-LABEL: fadd_64imm:
433; X86-SSE1:       # %bb.0:
434; X86-SSE1-NEXT:    pushl %ebp
435; X86-SSE1-NEXT:    movl %esp, %ebp
436; X86-SSE1-NEXT:    andl $-8, %esp
437; X86-SSE1-NEXT:    subl $16, %esp
438; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
439; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
440; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
441; X86-SSE1-NEXT:    movss %xmm1, (%esp)
442; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
443; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
444; X86-SSE1-NEXT:    fld1
445; X86-SSE1-NEXT:    faddl (%esp)
446; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
447; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
448; X86-SSE1-NEXT:    movlps %xmm0, -559038737
449; X86-SSE1-NEXT:    movl %ebp, %esp
450; X86-SSE1-NEXT:    popl %ebp
451; X86-SSE1-NEXT:    retl
452;
453; X86-SSE2-LABEL: fadd_64imm:
454; X86-SSE2:       # %bb.0:
455; X86-SSE2-NEXT:    pushl %ebp
456; X86-SSE2-NEXT:    movl %esp, %ebp
457; X86-SSE2-NEXT:    andl $-8, %esp
458; X86-SSE2-NEXT:    subl $8, %esp
459; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
460; X86-SSE2-NEXT:    addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
461; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
462; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
463; X86-SSE2-NEXT:    movlps %xmm0, -559038737
464; X86-SSE2-NEXT:    movl %ebp, %esp
465; X86-SSE2-NEXT:    popl %ebp
466; X86-SSE2-NEXT:    retl
467;
468; X86-AVX-LABEL: fadd_64imm:
469; X86-AVX:       # %bb.0:
470; X86-AVX-NEXT:    pushl %ebp
471; X86-AVX-NEXT:    movl %esp, %ebp
472; X86-AVX-NEXT:    andl $-8, %esp
473; X86-AVX-NEXT:    subl $8, %esp
474; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
475; X86-AVX-NEXT:    vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
476; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
477; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
478; X86-AVX-NEXT:    vmovlps %xmm0, -559038737
479; X86-AVX-NEXT:    movl %ebp, %esp
480; X86-AVX-NEXT:    popl %ebp
481; X86-AVX-NEXT:    retl
482;
483; X64-SSE-LABEL: fadd_64imm:
484; X64-SSE:       # %bb.0:
485; X64-SSE-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
486; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
487; X64-SSE-NEXT:    addsd (%rax), %xmm0
488; X64-SSE-NEXT:    movsd %xmm0, (%rax)
489; X64-SSE-NEXT:    retq
490;
491; X64-AVX-LABEL: fadd_64imm:
492; X64-AVX:       # %bb.0:
493; X64-AVX-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
494; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
495; X64-AVX-NEXT:    vaddsd (%rax), %xmm0, %xmm0
496; X64-AVX-NEXT:    vmovsd %xmm0, (%rax)
497; X64-AVX-NEXT:    retq
498  %i = load atomic i64, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8
499  %f = bitcast i64 %i to double
500  %add = fadd double %f, 1.000000e+00
501  %s = bitcast double %add to i64
502  store atomic i64 %s, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8
503  ret void
504}
505
506; Floating-point add to a stack location.
507define dso_local void @fadd_32stack() nounwind {
508; X86-NOSSE-LABEL: fadd_32stack:
509; X86-NOSSE:       # %bb.0:
510; X86-NOSSE-NEXT:    subl $12, %esp
511; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
512; X86-NOSSE-NEXT:    movl %eax, (%esp)
513; X86-NOSSE-NEXT:    fld1
514; X86-NOSSE-NEXT:    fadds (%esp)
515; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
516; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
517; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
518; X86-NOSSE-NEXT:    addl $12, %esp
519; X86-NOSSE-NEXT:    retl
520;
521; X86-SSE1-LABEL: fadd_32stack:
522; X86-SSE1:       # %bb.0:
523; X86-SSE1-NEXT:    subl $12, %esp
524; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
525; X86-SSE1-NEXT:    movl %eax, (%esp)
526; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
527; X86-SSE1-NEXT:    addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
528; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
529; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
530; X86-SSE1-NEXT:    movl %eax, {{[0-9]+}}(%esp)
531; X86-SSE1-NEXT:    addl $12, %esp
532; X86-SSE1-NEXT:    retl
533;
534; X86-SSE2-LABEL: fadd_32stack:
535; X86-SSE2:       # %bb.0:
536; X86-SSE2-NEXT:    pushl %eax
537; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
538; X86-SSE2-NEXT:    addss (%esp), %xmm0
539; X86-SSE2-NEXT:    movss %xmm0, (%esp)
540; X86-SSE2-NEXT:    popl %eax
541; X86-SSE2-NEXT:    retl
542;
543; X86-AVX-LABEL: fadd_32stack:
544; X86-AVX:       # %bb.0:
545; X86-AVX-NEXT:    pushl %eax
546; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
547; X86-AVX-NEXT:    vaddss (%esp), %xmm0, %xmm0
548; X86-AVX-NEXT:    vmovss %xmm0, (%esp)
549; X86-AVX-NEXT:    popl %eax
550; X86-AVX-NEXT:    retl
551;
552; X64-SSE-LABEL: fadd_32stack:
553; X64-SSE:       # %bb.0:
554; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
555; X64-SSE-NEXT:    addss -{{[0-9]+}}(%rsp), %xmm0
556; X64-SSE-NEXT:    movss %xmm0, -{{[0-9]+}}(%rsp)
557; X64-SSE-NEXT:    retq
558;
559; X64-AVX-LABEL: fadd_32stack:
560; X64-AVX:       # %bb.0:
561; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
562; X64-AVX-NEXT:    vaddss -{{[0-9]+}}(%rsp), %xmm0, %xmm0
563; X64-AVX-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp)
564; X64-AVX-NEXT:    retq
565  %ptr = alloca i32, align 4
566  %load = load atomic i32, ptr %ptr acquire, align 4
567  %bc0 = bitcast i32 %load to float
568  %fadd = fadd float 1.000000e+00, %bc0
569  %bc1 = bitcast float %fadd to i32
570  store atomic i32 %bc1, ptr %ptr release, align 4
571  ret void
572}
573
574define dso_local void @fadd_64stack() nounwind {
575; X86-NOSSE-LABEL: fadd_64stack:
576; X86-NOSSE:       # %bb.0:
577; X86-NOSSE-NEXT:    pushl %ebp
578; X86-NOSSE-NEXT:    movl %esp, %ebp
579; X86-NOSSE-NEXT:    andl $-8, %esp
580; X86-NOSSE-NEXT:    subl $40, %esp
581; X86-NOSSE-NEXT:    fildll {{[0-9]+}}(%esp)
582; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
583; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
584; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
585; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
586; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
587; X86-NOSSE-NEXT:    fld1
588; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
589; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
590; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
591; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
592; X86-NOSSE-NEXT:    movl %eax, (%esp)
593; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
594; X86-NOSSE-NEXT:    fildll (%esp)
595; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
596; X86-NOSSE-NEXT:    movl %ebp, %esp
597; X86-NOSSE-NEXT:    popl %ebp
598; X86-NOSSE-NEXT:    retl
599;
600; X86-SSE1-LABEL: fadd_64stack:
601; X86-SSE1:       # %bb.0:
602; X86-SSE1-NEXT:    pushl %ebp
603; X86-SSE1-NEXT:    movl %esp, %ebp
604; X86-SSE1-NEXT:    andl $-8, %esp
605; X86-SSE1-NEXT:    subl $24, %esp
606; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
607; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
608; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
609; X86-SSE1-NEXT:    movss %xmm1, (%esp)
610; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
611; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
612; X86-SSE1-NEXT:    fld1
613; X86-SSE1-NEXT:    faddl (%esp)
614; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
615; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
616; X86-SSE1-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
617; X86-SSE1-NEXT:    movl %ebp, %esp
618; X86-SSE1-NEXT:    popl %ebp
619; X86-SSE1-NEXT:    retl
620;
621; X86-SSE2-LABEL: fadd_64stack:
622; X86-SSE2:       # %bb.0:
623; X86-SSE2-NEXT:    pushl %ebp
624; X86-SSE2-NEXT:    movl %esp, %ebp
625; X86-SSE2-NEXT:    andl $-8, %esp
626; X86-SSE2-NEXT:    subl $16, %esp
627; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
628; X86-SSE2-NEXT:    addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
629; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
630; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
631; X86-SSE2-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
632; X86-SSE2-NEXT:    movl %ebp, %esp
633; X86-SSE2-NEXT:    popl %ebp
634; X86-SSE2-NEXT:    retl
635;
636; X86-AVX-LABEL: fadd_64stack:
637; X86-AVX:       # %bb.0:
638; X86-AVX-NEXT:    pushl %ebp
639; X86-AVX-NEXT:    movl %esp, %ebp
640; X86-AVX-NEXT:    andl $-8, %esp
641; X86-AVX-NEXT:    subl $16, %esp
642; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
643; X86-AVX-NEXT:    vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
644; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
645; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
646; X86-AVX-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
647; X86-AVX-NEXT:    movl %ebp, %esp
648; X86-AVX-NEXT:    popl %ebp
649; X86-AVX-NEXT:    retl
650;
651; X64-SSE-LABEL: fadd_64stack:
652; X64-SSE:       # %bb.0:
653; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
654; X64-SSE-NEXT:    addsd -{{[0-9]+}}(%rsp), %xmm0
655; X64-SSE-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
656; X64-SSE-NEXT:    retq
657;
658; X64-AVX-LABEL: fadd_64stack:
659; X64-AVX:       # %bb.0:
660; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
661; X64-AVX-NEXT:    vaddsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
662; X64-AVX-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp)
663; X64-AVX-NEXT:    retq
664  %ptr = alloca i64, align 8
665  %load = load atomic i64, ptr %ptr acquire, align 8
666  %bc0 = bitcast i64 %load to double
667  %fadd = fadd double 1.000000e+00, %bc0
668  %bc1 = bitcast double %fadd to i64
669  store atomic i64 %bc1, ptr %ptr release, align 8
670  ret void
671}
672
673define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
674; X86-NOSSE-LABEL: fadd_array:
675; X86-NOSSE:       # %bb.0: # %bb
676; X86-NOSSE-NEXT:    pushl %ebp
677; X86-NOSSE-NEXT:    movl %esp, %ebp
678; X86-NOSSE-NEXT:    pushl %esi
679; X86-NOSSE-NEXT:    andl $-8, %esp
680; X86-NOSSE-NEXT:    subl $40, %esp
681; X86-NOSSE-NEXT:    movl 20(%ebp), %eax
682; X86-NOSSE-NEXT:    movl 8(%ebp), %ecx
683; X86-NOSSE-NEXT:    fildll (%ecx,%eax,8)
684; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
685; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
686; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
687; X86-NOSSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
688; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
689; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
690; X86-NOSSE-NEXT:    faddl 12(%ebp)
691; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
692; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
693; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
694; X86-NOSSE-NEXT:    movl %edx, (%esp)
695; X86-NOSSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
696; X86-NOSSE-NEXT:    fildll (%esp)
697; X86-NOSSE-NEXT:    fistpll (%ecx,%eax,8)
698; X86-NOSSE-NEXT:    leal -4(%ebp), %esp
699; X86-NOSSE-NEXT:    popl %esi
700; X86-NOSSE-NEXT:    popl %ebp
701; X86-NOSSE-NEXT:    retl
702;
703; X86-SSE1-LABEL: fadd_array:
704; X86-SSE1:       # %bb.0: # %bb
705; X86-SSE1-NEXT:    pushl %ebp
706; X86-SSE1-NEXT:    movl %esp, %ebp
707; X86-SSE1-NEXT:    andl $-8, %esp
708; X86-SSE1-NEXT:    subl $16, %esp
709; X86-SSE1-NEXT:    movl 20(%ebp), %eax
710; X86-SSE1-NEXT:    movl 8(%ebp), %ecx
711; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
712; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
713; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
714; X86-SSE1-NEXT:    movss %xmm1, (%esp)
715; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
716; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
717; X86-SSE1-NEXT:    fldl (%esp)
718; X86-SSE1-NEXT:    faddl 12(%ebp)
719; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
720; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
721; X86-SSE1-NEXT:    movlps %xmm0, (%ecx,%eax,8)
722; X86-SSE1-NEXT:    movl %ebp, %esp
723; X86-SSE1-NEXT:    popl %ebp
724; X86-SSE1-NEXT:    retl
725;
726; X86-SSE2-LABEL: fadd_array:
727; X86-SSE2:       # %bb.0: # %bb
728; X86-SSE2-NEXT:    pushl %ebp
729; X86-SSE2-NEXT:    movl %esp, %ebp
730; X86-SSE2-NEXT:    andl $-8, %esp
731; X86-SSE2-NEXT:    subl $8, %esp
732; X86-SSE2-NEXT:    movl 20(%ebp), %eax
733; X86-SSE2-NEXT:    movl 8(%ebp), %ecx
734; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
735; X86-SSE2-NEXT:    addsd 12(%ebp), %xmm0
736; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
737; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
738; X86-SSE2-NEXT:    movlps %xmm0, (%ecx,%eax,8)
739; X86-SSE2-NEXT:    movl %ebp, %esp
740; X86-SSE2-NEXT:    popl %ebp
741; X86-SSE2-NEXT:    retl
742;
743; X86-AVX-LABEL: fadd_array:
744; X86-AVX:       # %bb.0: # %bb
745; X86-AVX-NEXT:    pushl %ebp
746; X86-AVX-NEXT:    movl %esp, %ebp
747; X86-AVX-NEXT:    andl $-8, %esp
748; X86-AVX-NEXT:    subl $8, %esp
749; X86-AVX-NEXT:    movl 20(%ebp), %eax
750; X86-AVX-NEXT:    movl 8(%ebp), %ecx
751; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
752; X86-AVX-NEXT:    vaddsd 12(%ebp), %xmm0, %xmm0
753; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
754; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
755; X86-AVX-NEXT:    vmovlps %xmm0, (%ecx,%eax,8)
756; X86-AVX-NEXT:    movl %ebp, %esp
757; X86-AVX-NEXT:    popl %ebp
758; X86-AVX-NEXT:    retl
759;
760; X64-SSE-LABEL: fadd_array:
761; X64-SSE:       # %bb.0: # %bb
762; X64-SSE-NEXT:    addsd (%rdi,%rsi,8), %xmm0
763; X64-SSE-NEXT:    movsd %xmm0, (%rdi,%rsi,8)
764; X64-SSE-NEXT:    retq
765;
766; X64-AVX-LABEL: fadd_array:
767; X64-AVX:       # %bb.0: # %bb
768; X64-AVX-NEXT:    vaddsd (%rdi,%rsi,8), %xmm0, %xmm0
769; X64-AVX-NEXT:    vmovsd %xmm0, (%rdi,%rsi,8)
770; X64-AVX-NEXT:    retq
771bb:
772  %tmp4 = getelementptr inbounds i64, ptr %arg, i64 %arg2
773  %tmp6 = load atomic i64, ptr %tmp4 monotonic, align 8
774  %tmp7 = bitcast i64 %tmp6 to double
775  %tmp8 = fadd double %tmp7, %arg1
776  %tmp9 = bitcast double %tmp8 to i64
777  store atomic i64 %tmp9, ptr %tmp4 monotonic, align 8
778  ret void
779}
780
781; ----- FSUB -----
782
783define dso_local void @fsub_32r(ptr %loc, float %val) nounwind {
784; X86-NOSSE-LABEL: fsub_32r:
785; X86-NOSSE:       # %bb.0:
786; X86-NOSSE-NEXT:    subl $8, %esp
787; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
788; X86-NOSSE-NEXT:    movl (%eax), %ecx
789; X86-NOSSE-NEXT:    movl %ecx, (%esp)
790; X86-NOSSE-NEXT:    flds (%esp)
791; X86-NOSSE-NEXT:    fsubs {{[0-9]+}}(%esp)
792; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
793; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
794; X86-NOSSE-NEXT:    movl %ecx, (%eax)
795; X86-NOSSE-NEXT:    addl $8, %esp
796; X86-NOSSE-NEXT:    retl
797;
798; X86-SSE1-LABEL: fsub_32r:
799; X86-SSE1:       # %bb.0:
800; X86-SSE1-NEXT:    subl $8, %esp
801; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
802; X86-SSE1-NEXT:    movl (%eax), %ecx
803; X86-SSE1-NEXT:    movl %ecx, (%esp)
804; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
805; X86-SSE1-NEXT:    subss {{[0-9]+}}(%esp), %xmm0
806; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
807; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
808; X86-SSE1-NEXT:    movl %ecx, (%eax)
809; X86-SSE1-NEXT:    addl $8, %esp
810; X86-SSE1-NEXT:    retl
811;
812; X86-SSE2-LABEL: fsub_32r:
813; X86-SSE2:       # %bb.0:
814; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
815; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
816; X86-SSE2-NEXT:    subss {{[0-9]+}}(%esp), %xmm0
817; X86-SSE2-NEXT:    movss %xmm0, (%eax)
818; X86-SSE2-NEXT:    retl
819;
820; X86-AVX-LABEL: fsub_32r:
821; X86-AVX:       # %bb.0:
822; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
823; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
824; X86-AVX-NEXT:    vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0
825; X86-AVX-NEXT:    vmovss %xmm0, (%eax)
826; X86-AVX-NEXT:    retl
827;
828; X64-SSE-LABEL: fsub_32r:
829; X64-SSE:       # %bb.0:
830; X64-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
831; X64-SSE-NEXT:    subss %xmm0, %xmm1
832; X64-SSE-NEXT:    movss %xmm1, (%rdi)
833; X64-SSE-NEXT:    retq
834;
835; X64-AVX-LABEL: fsub_32r:
836; X64-AVX:       # %bb.0:
837; X64-AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
838; X64-AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
839; X64-AVX-NEXT:    vmovss %xmm0, (%rdi)
840; X64-AVX-NEXT:    retq
841  %1 = load atomic i32, ptr %loc seq_cst, align 4
842  %2 = bitcast i32 %1 to float
843  %sub = fsub float %2, %val
844  %3 = bitcast float %sub to i32
845  store atomic i32 %3, ptr %loc release, align 4
846  ret void
847}
848
849define dso_local void @fsub_64r(ptr %loc, double %val) nounwind {
850; X86-NOSSE-LABEL: fsub_64r:
851; X86-NOSSE:       # %bb.0:
852; X86-NOSSE-NEXT:    pushl %ebp
853; X86-NOSSE-NEXT:    movl %esp, %ebp
854; X86-NOSSE-NEXT:    andl $-8, %esp
855; X86-NOSSE-NEXT:    subl $32, %esp
856; X86-NOSSE-NEXT:    movl 8(%ebp), %eax
857; X86-NOSSE-NEXT:    fildll (%eax)
858; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
859; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
860; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
861; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
862; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
863; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
864; X86-NOSSE-NEXT:    fsubl 12(%ebp)
865; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
866; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
867; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
868; X86-NOSSE-NEXT:    movl %ecx, (%esp)
869; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
870; X86-NOSSE-NEXT:    fildll (%esp)
871; X86-NOSSE-NEXT:    fistpll (%eax)
872; X86-NOSSE-NEXT:    movl %ebp, %esp
873; X86-NOSSE-NEXT:    popl %ebp
874; X86-NOSSE-NEXT:    retl
875;
876; X86-SSE1-LABEL: fsub_64r:
877; X86-SSE1:       # %bb.0:
878; X86-SSE1-NEXT:    pushl %ebp
879; X86-SSE1-NEXT:    movl %esp, %ebp
880; X86-SSE1-NEXT:    andl $-8, %esp
881; X86-SSE1-NEXT:    subl $16, %esp
882; X86-SSE1-NEXT:    movl 8(%ebp), %eax
883; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
884; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
885; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
886; X86-SSE1-NEXT:    movss %xmm1, (%esp)
887; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
888; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
889; X86-SSE1-NEXT:    fldl (%esp)
890; X86-SSE1-NEXT:    fsubl 12(%ebp)
891; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
892; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
893; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
894; X86-SSE1-NEXT:    movl %ebp, %esp
895; X86-SSE1-NEXT:    popl %ebp
896; X86-SSE1-NEXT:    retl
897;
898; X86-SSE2-LABEL: fsub_64r:
899; X86-SSE2:       # %bb.0:
900; X86-SSE2-NEXT:    pushl %ebp
901; X86-SSE2-NEXT:    movl %esp, %ebp
902; X86-SSE2-NEXT:    andl $-8, %esp
903; X86-SSE2-NEXT:    subl $8, %esp
904; X86-SSE2-NEXT:    movl 8(%ebp), %eax
905; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
906; X86-SSE2-NEXT:    subsd 12(%ebp), %xmm0
907; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
908; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
909; X86-SSE2-NEXT:    movlps %xmm0, (%eax)
910; X86-SSE2-NEXT:    movl %ebp, %esp
911; X86-SSE2-NEXT:    popl %ebp
912; X86-SSE2-NEXT:    retl
913;
914; X86-AVX-LABEL: fsub_64r:
915; X86-AVX:       # %bb.0:
916; X86-AVX-NEXT:    pushl %ebp
917; X86-AVX-NEXT:    movl %esp, %ebp
918; X86-AVX-NEXT:    andl $-8, %esp
919; X86-AVX-NEXT:    subl $8, %esp
920; X86-AVX-NEXT:    movl 8(%ebp), %eax
921; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
922; X86-AVX-NEXT:    vsubsd 12(%ebp), %xmm0, %xmm0
923; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
924; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
925; X86-AVX-NEXT:    vmovlps %xmm0, (%eax)
926; X86-AVX-NEXT:    movl %ebp, %esp
927; X86-AVX-NEXT:    popl %ebp
928; X86-AVX-NEXT:    retl
929;
930; X64-SSE-LABEL: fsub_64r:
931; X64-SSE:       # %bb.0:
932; X64-SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
933; X64-SSE-NEXT:    subsd %xmm0, %xmm1
934; X64-SSE-NEXT:    movsd %xmm1, (%rdi)
935; X64-SSE-NEXT:    retq
936;
937; X64-AVX-LABEL: fsub_64r:
938; X64-AVX:       # %bb.0:
939; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
940; X64-AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
941; X64-AVX-NEXT:    vmovsd %xmm0, (%rdi)
942; X64-AVX-NEXT:    retq
943  %1 = load atomic i64, ptr %loc seq_cst, align 8
944  %2 = bitcast i64 %1 to double
945  %sub = fsub double %2, %val
946  %3 = bitcast double %sub to i64
947  store atomic i64 %3, ptr %loc release, align 8
948  ret void
949}
950
951; Floating-point sub to a global using an immediate.
952define dso_local void @fsub_32g() nounwind {
953; X86-NOSSE-LABEL: fsub_32g:
954; X86-NOSSE:       # %bb.0:
955; X86-NOSSE-NEXT:    subl $8, %esp
956; X86-NOSSE-NEXT:    movl glob32, %eax
957; X86-NOSSE-NEXT:    movl %eax, (%esp)
958; X86-NOSSE-NEXT:    fld1
959; X86-NOSSE-NEXT:    fchs
960; X86-NOSSE-NEXT:    fadds (%esp)
961; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
962; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
963; X86-NOSSE-NEXT:    movl %eax, glob32
964; X86-NOSSE-NEXT:    addl $8, %esp
965; X86-NOSSE-NEXT:    retl
966;
967; X86-SSE1-LABEL: fsub_32g:
968; X86-SSE1:       # %bb.0:
969; X86-SSE1-NEXT:    subl $8, %esp
970; X86-SSE1-NEXT:    movl glob32, %eax
971; X86-SSE1-NEXT:    movl %eax, (%esp)
972; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
973; X86-SSE1-NEXT:    addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
974; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
975; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
976; X86-SSE1-NEXT:    movl %eax, glob32
977; X86-SSE1-NEXT:    addl $8, %esp
978; X86-SSE1-NEXT:    retl
979;
980; X86-SSE2-LABEL: fsub_32g:
981; X86-SSE2:       # %bb.0:
982; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
983; X86-SSE2-NEXT:    addss glob32, %xmm0
984; X86-SSE2-NEXT:    movss %xmm0, glob32
985; X86-SSE2-NEXT:    retl
986;
987; X86-AVX-LABEL: fsub_32g:
988; X86-AVX:       # %bb.0:
989; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
990; X86-AVX-NEXT:    vaddss glob32, %xmm0, %xmm0
991; X86-AVX-NEXT:    vmovss %xmm0, glob32
992; X86-AVX-NEXT:    retl
993;
994; X64-SSE-LABEL: fsub_32g:
995; X64-SSE:       # %bb.0:
996; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
997; X64-SSE-NEXT:    addss glob32(%rip), %xmm0
998; X64-SSE-NEXT:    movss %xmm0, glob32(%rip)
999; X64-SSE-NEXT:    retq
1000;
1001; X64-AVX-LABEL: fsub_32g:
1002; X64-AVX:       # %bb.0:
1003; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
1004; X64-AVX-NEXT:    vaddss glob32(%rip), %xmm0, %xmm0
1005; X64-AVX-NEXT:    vmovss %xmm0, glob32(%rip)
1006; X64-AVX-NEXT:    retq
1007  %i = load atomic i32, ptr @glob32 monotonic, align 4
1008  %f = bitcast i32 %i to float
1009  %sub = fsub float %f, 1.000000e+00
1010  %s = bitcast float %sub to i32
1011  store atomic i32 %s, ptr @glob32 monotonic, align 4
1012  ret void
1013}
1014
1015define dso_local void @fsub_64g() nounwind {
1016; X86-NOSSE-LABEL: fsub_64g:
1017; X86-NOSSE:       # %bb.0:
1018; X86-NOSSE-NEXT:    pushl %ebp
1019; X86-NOSSE-NEXT:    movl %esp, %ebp
1020; X86-NOSSE-NEXT:    andl $-8, %esp
1021; X86-NOSSE-NEXT:    subl $32, %esp
1022; X86-NOSSE-NEXT:    fildll glob64
1023; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
1024; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1025; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1026; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
1027; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
1028; X86-NOSSE-NEXT:    fld1
1029; X86-NOSSE-NEXT:    fchs
1030; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
1031; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
1032; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1033; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1034; X86-NOSSE-NEXT:    movl %eax, (%esp)
1035; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
1036; X86-NOSSE-NEXT:    fildll (%esp)
1037; X86-NOSSE-NEXT:    fistpll glob64
1038; X86-NOSSE-NEXT:    movl %ebp, %esp
1039; X86-NOSSE-NEXT:    popl %ebp
1040; X86-NOSSE-NEXT:    retl
1041;
1042; X86-SSE1-LABEL: fsub_64g:
1043; X86-SSE1:       # %bb.0:
1044; X86-SSE1-NEXT:    pushl %ebp
1045; X86-SSE1-NEXT:    movl %esp, %ebp
1046; X86-SSE1-NEXT:    andl $-8, %esp
1047; X86-SSE1-NEXT:    subl $16, %esp
1048; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
1049; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
1050; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
1051; X86-SSE1-NEXT:    movss %xmm1, (%esp)
1052; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
1053; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
1054; X86-SSE1-NEXT:    fld1
1055; X86-SSE1-NEXT:    fchs
1056; X86-SSE1-NEXT:    faddl (%esp)
1057; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
1058; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1059; X86-SSE1-NEXT:    movlps %xmm0, glob64
1060; X86-SSE1-NEXT:    movl %ebp, %esp
1061; X86-SSE1-NEXT:    popl %ebp
1062; X86-SSE1-NEXT:    retl
1063;
1064; X86-SSE2-LABEL: fsub_64g:
1065; X86-SSE2:       # %bb.0:
1066; X86-SSE2-NEXT:    pushl %ebp
1067; X86-SSE2-NEXT:    movl %esp, %ebp
1068; X86-SSE2-NEXT:    andl $-8, %esp
1069; X86-SSE2-NEXT:    subl $8, %esp
1070; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1071; X86-SSE2-NEXT:    addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1072; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
1073; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1074; X86-SSE2-NEXT:    movlps %xmm0, glob64
1075; X86-SSE2-NEXT:    movl %ebp, %esp
1076; X86-SSE2-NEXT:    popl %ebp
1077; X86-SSE2-NEXT:    retl
1078;
1079; X86-AVX-LABEL: fsub_64g:
1080; X86-AVX:       # %bb.0:
1081; X86-AVX-NEXT:    pushl %ebp
1082; X86-AVX-NEXT:    movl %esp, %ebp
1083; X86-AVX-NEXT:    andl $-8, %esp
1084; X86-AVX-NEXT:    subl $8, %esp
1085; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1086; X86-AVX-NEXT:    vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
1087; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
1088; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1089; X86-AVX-NEXT:    vmovlps %xmm0, glob64
1090; X86-AVX-NEXT:    movl %ebp, %esp
1091; X86-AVX-NEXT:    popl %ebp
1092; X86-AVX-NEXT:    retl
1093;
1094; X64-SSE-LABEL: fsub_64g:
1095; X64-SSE:       # %bb.0:
1096; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0]
1097; X64-SSE-NEXT:    addsd glob64(%rip), %xmm0
1098; X64-SSE-NEXT:    movsd %xmm0, glob64(%rip)
1099; X64-SSE-NEXT:    retq
1100;
1101; X64-AVX-LABEL: fsub_64g:
1102; X64-AVX:       # %bb.0:
1103; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0]
1104; X64-AVX-NEXT:    vaddsd glob64(%rip), %xmm0, %xmm0
1105; X64-AVX-NEXT:    vmovsd %xmm0, glob64(%rip)
1106; X64-AVX-NEXT:    retq
1107  %i = load atomic i64, ptr @glob64 monotonic, align 8
1108  %f = bitcast i64 %i to double
1109  %sub = fsub double %f, 1.000000e+00
1110  %s = bitcast double %sub to i64
1111  store atomic i64 %s, ptr @glob64 monotonic, align 8
1112  ret void
1113}
1114
1115; Floating-point sub to a hard-coded immediate location using an immediate.
1116define dso_local void @fsub_32imm() nounwind {
1117; X86-NOSSE-LABEL: fsub_32imm:
1118; X86-NOSSE:       # %bb.0:
1119; X86-NOSSE-NEXT:    subl $8, %esp
1120; X86-NOSSE-NEXT:    movl -559038737, %eax
1121; X86-NOSSE-NEXT:    movl %eax, (%esp)
1122; X86-NOSSE-NEXT:    fld1
1123; X86-NOSSE-NEXT:    fchs
1124; X86-NOSSE-NEXT:    fadds (%esp)
1125; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
1126; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1127; X86-NOSSE-NEXT:    movl %eax, -559038737
1128; X86-NOSSE-NEXT:    addl $8, %esp
1129; X86-NOSSE-NEXT:    retl
1130;
1131; X86-SSE1-LABEL: fsub_32imm:
1132; X86-SSE1:       # %bb.0:
1133; X86-SSE1-NEXT:    subl $8, %esp
1134; X86-SSE1-NEXT:    movl -559038737, %eax
1135; X86-SSE1-NEXT:    movl %eax, (%esp)
1136; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1137; X86-SSE1-NEXT:    addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1138; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
1139; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
1140; X86-SSE1-NEXT:    movl %eax, -559038737
1141; X86-SSE1-NEXT:    addl $8, %esp
1142; X86-SSE1-NEXT:    retl
1143;
1144; X86-SSE2-LABEL: fsub_32imm:
1145; X86-SSE2:       # %bb.0:
1146; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
1147; X86-SSE2-NEXT:    addss -559038737, %xmm0
1148; X86-SSE2-NEXT:    movss %xmm0, -559038737
1149; X86-SSE2-NEXT:    retl
1150;
1151; X86-AVX-LABEL: fsub_32imm:
1152; X86-AVX:       # %bb.0:
1153; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
1154; X86-AVX-NEXT:    vaddss -559038737, %xmm0, %xmm0
1155; X86-AVX-NEXT:    vmovss %xmm0, -559038737
1156; X86-AVX-NEXT:    retl
1157;
1158; X64-SSE-LABEL: fsub_32imm:
1159; X64-SSE:       # %bb.0:
1160; X64-SSE-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
1161; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
1162; X64-SSE-NEXT:    addss (%rax), %xmm0
1163; X64-SSE-NEXT:    movss %xmm0, (%rax)
1164; X64-SSE-NEXT:    retq
1165;
1166; X64-AVX-LABEL: fsub_32imm:
1167; X64-AVX:       # %bb.0:
1168; X64-AVX-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
1169; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
1170; X64-AVX-NEXT:    vaddss (%rax), %xmm0, %xmm0
1171; X64-AVX-NEXT:    vmovss %xmm0, (%rax)
1172; X64-AVX-NEXT:    retq
1173  %i = load atomic i32, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4
1174  %f = bitcast i32 %i to float
1175  %sub = fsub float %f, 1.000000e+00
1176  %s = bitcast float %sub to i32
1177  store atomic i32 %s, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4
1178  ret void
1179}
1180
1181define dso_local void @fsub_64imm() nounwind {
1182; X86-NOSSE-LABEL: fsub_64imm:
1183; X86-NOSSE:       # %bb.0:
1184; X86-NOSSE-NEXT:    pushl %ebp
1185; X86-NOSSE-NEXT:    movl %esp, %ebp
1186; X86-NOSSE-NEXT:    andl $-8, %esp
1187; X86-NOSSE-NEXT:    subl $32, %esp
1188; X86-NOSSE-NEXT:    fildll -559038737
1189; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
1190; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1191; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1192; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
1193; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
1194; X86-NOSSE-NEXT:    fld1
1195; X86-NOSSE-NEXT:    fchs
1196; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
1197; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
1198; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1199; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1200; X86-NOSSE-NEXT:    movl %eax, (%esp)
1201; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
1202; X86-NOSSE-NEXT:    fildll (%esp)
1203; X86-NOSSE-NEXT:    fistpll -559038737
1204; X86-NOSSE-NEXT:    movl %ebp, %esp
1205; X86-NOSSE-NEXT:    popl %ebp
1206; X86-NOSSE-NEXT:    retl
1207;
1208; X86-SSE1-LABEL: fsub_64imm:
1209; X86-SSE1:       # %bb.0:
1210; X86-SSE1-NEXT:    pushl %ebp
1211; X86-SSE1-NEXT:    movl %esp, %ebp
1212; X86-SSE1-NEXT:    andl $-8, %esp
1213; X86-SSE1-NEXT:    subl $16, %esp
1214; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
1215; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
1216; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
1217; X86-SSE1-NEXT:    movss %xmm1, (%esp)
1218; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
1219; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
1220; X86-SSE1-NEXT:    fld1
1221; X86-SSE1-NEXT:    fchs
1222; X86-SSE1-NEXT:    faddl (%esp)
1223; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
1224; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1225; X86-SSE1-NEXT:    movlps %xmm0, -559038737
1226; X86-SSE1-NEXT:    movl %ebp, %esp
1227; X86-SSE1-NEXT:    popl %ebp
1228; X86-SSE1-NEXT:    retl
1229;
1230; X86-SSE2-LABEL: fsub_64imm:
1231; X86-SSE2:       # %bb.0:
1232; X86-SSE2-NEXT:    pushl %ebp
1233; X86-SSE2-NEXT:    movl %esp, %ebp
1234; X86-SSE2-NEXT:    andl $-8, %esp
1235; X86-SSE2-NEXT:    subl $8, %esp
1236; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1237; X86-SSE2-NEXT:    addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1238; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
1239; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1240; X86-SSE2-NEXT:    movlps %xmm0, -559038737
1241; X86-SSE2-NEXT:    movl %ebp, %esp
1242; X86-SSE2-NEXT:    popl %ebp
1243; X86-SSE2-NEXT:    retl
1244;
1245; X86-AVX-LABEL: fsub_64imm:
1246; X86-AVX:       # %bb.0:
1247; X86-AVX-NEXT:    pushl %ebp
1248; X86-AVX-NEXT:    movl %esp, %ebp
1249; X86-AVX-NEXT:    andl $-8, %esp
1250; X86-AVX-NEXT:    subl $8, %esp
1251; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1252; X86-AVX-NEXT:    vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
1253; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
1254; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1255; X86-AVX-NEXT:    vmovlps %xmm0, -559038737
1256; X86-AVX-NEXT:    movl %ebp, %esp
1257; X86-AVX-NEXT:    popl %ebp
1258; X86-AVX-NEXT:    retl
1259;
1260; X64-SSE-LABEL: fsub_64imm:
1261; X64-SSE:       # %bb.0:
1262; X64-SSE-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
1263; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0]
1264; X64-SSE-NEXT:    addsd (%rax), %xmm0
1265; X64-SSE-NEXT:    movsd %xmm0, (%rax)
1266; X64-SSE-NEXT:    retq
1267;
1268; X64-AVX-LABEL: fsub_64imm:
1269; X64-AVX:       # %bb.0:
1270; X64-AVX-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
1271; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [-1.0E+0,0.0E+0]
1272; X64-AVX-NEXT:    vaddsd (%rax), %xmm0, %xmm0
1273; X64-AVX-NEXT:    vmovsd %xmm0, (%rax)
1274; X64-AVX-NEXT:    retq
1275  %i = load atomic i64, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8
1276  %f = bitcast i64 %i to double
1277  %sub = fsub double %f, 1.000000e+00
1278  %s = bitcast double %sub to i64
1279  store atomic i64 %s, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8
1280  ret void
1281}
1282
1283; Floating-point sub to a stack location.
1284define dso_local void @fsub_32stack() nounwind {
1285; X86-NOSSE-LABEL: fsub_32stack:
1286; X86-NOSSE:       # %bb.0:
1287; X86-NOSSE-NEXT:    subl $12, %esp
1288; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1289; X86-NOSSE-NEXT:    movl %eax, (%esp)
1290; X86-NOSSE-NEXT:    fld1
1291; X86-NOSSE-NEXT:    fsubs (%esp)
1292; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
1293; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1294; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
1295; X86-NOSSE-NEXT:    addl $12, %esp
1296; X86-NOSSE-NEXT:    retl
1297;
1298; X86-SSE1-LABEL: fsub_32stack:
1299; X86-SSE1:       # %bb.0:
1300; X86-SSE1-NEXT:    subl $12, %esp
1301; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
1302; X86-SSE1-NEXT:    movl %eax, (%esp)
1303; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
1304; X86-SSE1-NEXT:    subss (%esp), %xmm0
1305; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
1306; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
1307; X86-SSE1-NEXT:    movl %eax, {{[0-9]+}}(%esp)
1308; X86-SSE1-NEXT:    addl $12, %esp
1309; X86-SSE1-NEXT:    retl
1310;
1311; X86-SSE2-LABEL: fsub_32stack:
1312; X86-SSE2:       # %bb.0:
1313; X86-SSE2-NEXT:    pushl %eax
1314; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
1315; X86-SSE2-NEXT:    subss (%esp), %xmm0
1316; X86-SSE2-NEXT:    movss %xmm0, (%esp)
1317; X86-SSE2-NEXT:    popl %eax
1318; X86-SSE2-NEXT:    retl
1319;
1320; X86-AVX-LABEL: fsub_32stack:
1321; X86-AVX:       # %bb.0:
1322; X86-AVX-NEXT:    pushl %eax
1323; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
1324; X86-AVX-NEXT:    vsubss (%esp), %xmm0, %xmm0
1325; X86-AVX-NEXT:    vmovss %xmm0, (%esp)
1326; X86-AVX-NEXT:    popl %eax
1327; X86-AVX-NEXT:    retl
1328;
1329; X64-SSE-LABEL: fsub_32stack:
1330; X64-SSE:       # %bb.0:
1331; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
1332; X64-SSE-NEXT:    subss -{{[0-9]+}}(%rsp), %xmm0
1333; X64-SSE-NEXT:    movss %xmm0, -{{[0-9]+}}(%rsp)
1334; X64-SSE-NEXT:    retq
1335;
1336; X64-AVX-LABEL: fsub_32stack:
1337; X64-AVX:       # %bb.0:
1338; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
1339; X64-AVX-NEXT:    vsubss -{{[0-9]+}}(%rsp), %xmm0, %xmm0
1340; X64-AVX-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp)
1341; X64-AVX-NEXT:    retq
1342  %ptr = alloca i32, align 4
1343  %load = load atomic i32, ptr %ptr acquire, align 4
1344  %bc0 = bitcast i32 %load to float
1345  %fsub = fsub float 1.000000e+00, %bc0
1346  %bc1 = bitcast float %fsub to i32
1347  store atomic i32 %bc1, ptr %ptr release, align 4
1348  ret void
1349}
1350
1351define dso_local void @fsub_64stack() nounwind {
1352; X86-NOSSE-LABEL: fsub_64stack:
1353; X86-NOSSE:       # %bb.0:
1354; X86-NOSSE-NEXT:    pushl %ebp
1355; X86-NOSSE-NEXT:    movl %esp, %ebp
1356; X86-NOSSE-NEXT:    andl $-8, %esp
1357; X86-NOSSE-NEXT:    subl $40, %esp
1358; X86-NOSSE-NEXT:    fildll {{[0-9]+}}(%esp)
1359; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
1360; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1361; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1362; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
1363; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
1364; X86-NOSSE-NEXT:    fld1
1365; X86-NOSSE-NEXT:    fsubl {{[0-9]+}}(%esp)
1366; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
1367; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1368; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1369; X86-NOSSE-NEXT:    movl %eax, (%esp)
1370; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
1371; X86-NOSSE-NEXT:    fildll (%esp)
1372; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
1373; X86-NOSSE-NEXT:    movl %ebp, %esp
1374; X86-NOSSE-NEXT:    popl %ebp
1375; X86-NOSSE-NEXT:    retl
1376;
1377; X86-SSE1-LABEL: fsub_64stack:
1378; X86-SSE1:       # %bb.0:
1379; X86-SSE1-NEXT:    pushl %ebp
1380; X86-SSE1-NEXT:    movl %esp, %ebp
1381; X86-SSE1-NEXT:    andl $-8, %esp
1382; X86-SSE1-NEXT:    subl $24, %esp
1383; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
1384; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
1385; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
1386; X86-SSE1-NEXT:    movss %xmm1, (%esp)
1387; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
1388; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
1389; X86-SSE1-NEXT:    fld1
1390; X86-SSE1-NEXT:    fsubl (%esp)
1391; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
1392; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1393; X86-SSE1-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
1394; X86-SSE1-NEXT:    movl %ebp, %esp
1395; X86-SSE1-NEXT:    popl %ebp
1396; X86-SSE1-NEXT:    retl
1397;
1398; X86-SSE2-LABEL: fsub_64stack:
1399; X86-SSE2:       # %bb.0:
1400; X86-SSE2-NEXT:    pushl %ebp
1401; X86-SSE2-NEXT:    movl %esp, %ebp
1402; X86-SSE2-NEXT:    andl $-8, %esp
1403; X86-SSE2-NEXT:    subl $16, %esp
1404; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1405; X86-SSE2-NEXT:    movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
1406; X86-SSE2-NEXT:    subsd %xmm0, %xmm1
1407; X86-SSE2-NEXT:    movsd %xmm1, (%esp)
1408; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1409; X86-SSE2-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
1410; X86-SSE2-NEXT:    movl %ebp, %esp
1411; X86-SSE2-NEXT:    popl %ebp
1412; X86-SSE2-NEXT:    retl
1413;
1414; X86-AVX-LABEL: fsub_64stack:
1415; X86-AVX:       # %bb.0:
1416; X86-AVX-NEXT:    pushl %ebp
1417; X86-AVX-NEXT:    movl %esp, %ebp
1418; X86-AVX-NEXT:    andl $-8, %esp
1419; X86-AVX-NEXT:    subl $16, %esp
1420; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1421; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
1422; X86-AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
1423; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
1424; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1425; X86-AVX-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
1426; X86-AVX-NEXT:    movl %ebp, %esp
1427; X86-AVX-NEXT:    popl %ebp
1428; X86-AVX-NEXT:    retl
1429;
1430; X64-SSE-LABEL: fsub_64stack:
1431; X64-SSE:       # %bb.0:
1432; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
1433; X64-SSE-NEXT:    subsd -{{[0-9]+}}(%rsp), %xmm0
1434; X64-SSE-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
1435; X64-SSE-NEXT:    retq
1436;
1437; X64-AVX-LABEL: fsub_64stack:
1438; X64-AVX:       # %bb.0:
1439; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
1440; X64-AVX-NEXT:    vsubsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
1441; X64-AVX-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp)
1442; X64-AVX-NEXT:    retq
1443  %ptr = alloca i64, align 8
1444  %load = load atomic i64, ptr %ptr acquire, align 8
1445  %bc0 = bitcast i64 %load to double
1446  %fsub = fsub double 1.000000e+00, %bc0
1447  %bc1 = bitcast double %fsub to i64
1448  store atomic i64 %bc1, ptr %ptr release, align 8
1449  ret void
1450}
1451
1452define dso_local void @fsub_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
1453; X86-NOSSE-LABEL: fsub_array:
1454; X86-NOSSE:       # %bb.0: # %bb
1455; X86-NOSSE-NEXT:    pushl %ebp
1456; X86-NOSSE-NEXT:    movl %esp, %ebp
1457; X86-NOSSE-NEXT:    pushl %esi
1458; X86-NOSSE-NEXT:    andl $-8, %esp
1459; X86-NOSSE-NEXT:    subl $40, %esp
1460; X86-NOSSE-NEXT:    movl 20(%ebp), %eax
1461; X86-NOSSE-NEXT:    movl 8(%ebp), %ecx
1462; X86-NOSSE-NEXT:    fildll (%ecx,%eax,8)
1463; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
1464; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
1465; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
1466; X86-NOSSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
1467; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
1468; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
1469; X86-NOSSE-NEXT:    fsubl 12(%ebp)
1470; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
1471; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
1472; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
1473; X86-NOSSE-NEXT:    movl %edx, (%esp)
1474; X86-NOSSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
1475; X86-NOSSE-NEXT:    fildll (%esp)
1476; X86-NOSSE-NEXT:    fistpll (%ecx,%eax,8)
1477; X86-NOSSE-NEXT:    leal -4(%ebp), %esp
1478; X86-NOSSE-NEXT:    popl %esi
1479; X86-NOSSE-NEXT:    popl %ebp
1480; X86-NOSSE-NEXT:    retl
1481;
1482; X86-SSE1-LABEL: fsub_array:
1483; X86-SSE1:       # %bb.0: # %bb
1484; X86-SSE1-NEXT:    pushl %ebp
1485; X86-SSE1-NEXT:    movl %esp, %ebp
1486; X86-SSE1-NEXT:    andl $-8, %esp
1487; X86-SSE1-NEXT:    subl $16, %esp
1488; X86-SSE1-NEXT:    movl 20(%ebp), %eax
1489; X86-SSE1-NEXT:    movl 8(%ebp), %ecx
1490; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
1491; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
1492; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
1493; X86-SSE1-NEXT:    movss %xmm1, (%esp)
1494; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
1495; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
1496; X86-SSE1-NEXT:    fldl (%esp)
1497; X86-SSE1-NEXT:    fsubl 12(%ebp)
1498; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
1499; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1500; X86-SSE1-NEXT:    movlps %xmm0, (%ecx,%eax,8)
1501; X86-SSE1-NEXT:    movl %ebp, %esp
1502; X86-SSE1-NEXT:    popl %ebp
1503; X86-SSE1-NEXT:    retl
1504;
1505; X86-SSE2-LABEL: fsub_array:
1506; X86-SSE2:       # %bb.0: # %bb
1507; X86-SSE2-NEXT:    pushl %ebp
1508; X86-SSE2-NEXT:    movl %esp, %ebp
1509; X86-SSE2-NEXT:    andl $-8, %esp
1510; X86-SSE2-NEXT:    subl $8, %esp
1511; X86-SSE2-NEXT:    movl 20(%ebp), %eax
1512; X86-SSE2-NEXT:    movl 8(%ebp), %ecx
1513; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1514; X86-SSE2-NEXT:    subsd 12(%ebp), %xmm0
1515; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
1516; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1517; X86-SSE2-NEXT:    movlps %xmm0, (%ecx,%eax,8)
1518; X86-SSE2-NEXT:    movl %ebp, %esp
1519; X86-SSE2-NEXT:    popl %ebp
1520; X86-SSE2-NEXT:    retl
1521;
1522; X86-AVX-LABEL: fsub_array:
1523; X86-AVX:       # %bb.0: # %bb
1524; X86-AVX-NEXT:    pushl %ebp
1525; X86-AVX-NEXT:    movl %esp, %ebp
1526; X86-AVX-NEXT:    andl $-8, %esp
1527; X86-AVX-NEXT:    subl $8, %esp
1528; X86-AVX-NEXT:    movl 20(%ebp), %eax
1529; X86-AVX-NEXT:    movl 8(%ebp), %ecx
1530; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1531; X86-AVX-NEXT:    vsubsd 12(%ebp), %xmm0, %xmm0
1532; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
1533; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1534; X86-AVX-NEXT:    vmovlps %xmm0, (%ecx,%eax,8)
1535; X86-AVX-NEXT:    movl %ebp, %esp
1536; X86-AVX-NEXT:    popl %ebp
1537; X86-AVX-NEXT:    retl
1538;
1539; X64-SSE-LABEL: fsub_array:
1540; X64-SSE:       # %bb.0: # %bb
1541; X64-SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
1542; X64-SSE-NEXT:    subsd %xmm0, %xmm1
1543; X64-SSE-NEXT:    movsd %xmm1, (%rdi,%rsi,8)
1544; X64-SSE-NEXT:    retq
1545;
1546; X64-AVX-LABEL: fsub_array:
1547; X64-AVX:       # %bb.0: # %bb
1548; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
1549; X64-AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
1550; X64-AVX-NEXT:    vmovsd %xmm0, (%rdi,%rsi,8)
1551; X64-AVX-NEXT:    retq
1552bb:
1553  %tmp4 = getelementptr inbounds i64, ptr %arg, i64 %arg2
1554  %tmp6 = load atomic i64, ptr %tmp4 monotonic, align 8
1555  %tmp7 = bitcast i64 %tmp6 to double
1556  %tmp8 = fsub double %tmp7, %arg1
1557  %tmp9 = bitcast double %tmp8 to i64
1558  store atomic i64 %tmp9, ptr %tmp4 monotonic, align 8
1559  ret void
1560}
1561
1562; ----- FMUL -----
1563
1564define dso_local void @fmul_32r(ptr %loc, float %val) nounwind {
1565; X86-NOSSE-LABEL: fmul_32r:
1566; X86-NOSSE:       # %bb.0:
1567; X86-NOSSE-NEXT:    subl $8, %esp
1568; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1569; X86-NOSSE-NEXT:    movl (%eax), %ecx
1570; X86-NOSSE-NEXT:    movl %ecx, (%esp)
1571; X86-NOSSE-NEXT:    flds (%esp)
1572; X86-NOSSE-NEXT:    fmuls {{[0-9]+}}(%esp)
1573; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
1574; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1575; X86-NOSSE-NEXT:    movl %ecx, (%eax)
1576; X86-NOSSE-NEXT:    addl $8, %esp
1577; X86-NOSSE-NEXT:    retl
1578;
1579; X86-SSE1-LABEL: fmul_32r:
1580; X86-SSE1:       # %bb.0:
1581; X86-SSE1-NEXT:    subl $8, %esp
1582; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
1583; X86-SSE1-NEXT:    movl (%eax), %ecx
1584; X86-SSE1-NEXT:    movl %ecx, (%esp)
1585; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1586; X86-SSE1-NEXT:    mulss {{[0-9]+}}(%esp), %xmm0
1587; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
1588; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1589; X86-SSE1-NEXT:    movl %ecx, (%eax)
1590; X86-SSE1-NEXT:    addl $8, %esp
1591; X86-SSE1-NEXT:    retl
1592;
1593; X86-SSE2-LABEL: fmul_32r:
1594; X86-SSE2:       # %bb.0:
1595; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1596; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1597; X86-SSE2-NEXT:    mulss (%eax), %xmm0
1598; X86-SSE2-NEXT:    movss %xmm0, (%eax)
1599; X86-SSE2-NEXT:    retl
1600;
1601; X86-AVX-LABEL: fmul_32r:
1602; X86-AVX:       # %bb.0:
1603; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
1604; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1605; X86-AVX-NEXT:    vmulss (%eax), %xmm0, %xmm0
1606; X86-AVX-NEXT:    vmovss %xmm0, (%eax)
1607; X86-AVX-NEXT:    retl
1608;
1609; X64-SSE-LABEL: fmul_32r:
1610; X64-SSE:       # %bb.0:
1611; X64-SSE-NEXT:    mulss (%rdi), %xmm0
1612; X64-SSE-NEXT:    movss %xmm0, (%rdi)
1613; X64-SSE-NEXT:    retq
1614;
1615; X64-AVX-LABEL: fmul_32r:
1616; X64-AVX:       # %bb.0:
1617; X64-AVX-NEXT:    vmulss (%rdi), %xmm0, %xmm0
1618; X64-AVX-NEXT:    vmovss %xmm0, (%rdi)
1619; X64-AVX-NEXT:    retq
1620  %1 = load atomic i32, ptr %loc seq_cst, align 4
1621  %2 = bitcast i32 %1 to float
1622  %mul = fmul float %2, %val
1623  %3 = bitcast float %mul to i32
1624  store atomic i32 %3, ptr %loc release, align 4
1625  ret void
1626}
1627
1628define dso_local void @fmul_64r(ptr %loc, double %val) nounwind {
1629; X86-NOSSE-LABEL: fmul_64r:
1630; X86-NOSSE:       # %bb.0:
1631; X86-NOSSE-NEXT:    pushl %ebp
1632; X86-NOSSE-NEXT:    movl %esp, %ebp
1633; X86-NOSSE-NEXT:    andl $-8, %esp
1634; X86-NOSSE-NEXT:    subl $32, %esp
1635; X86-NOSSE-NEXT:    movl 8(%ebp), %eax
1636; X86-NOSSE-NEXT:    fildll (%eax)
1637; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
1638; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1639; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
1640; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
1641; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
1642; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
1643; X86-NOSSE-NEXT:    fmull 12(%ebp)
1644; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
1645; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1646; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
1647; X86-NOSSE-NEXT:    movl %ecx, (%esp)
1648; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
1649; X86-NOSSE-NEXT:    fildll (%esp)
1650; X86-NOSSE-NEXT:    fistpll (%eax)
1651; X86-NOSSE-NEXT:    movl %ebp, %esp
1652; X86-NOSSE-NEXT:    popl %ebp
1653; X86-NOSSE-NEXT:    retl
1654;
1655; X86-SSE1-LABEL: fmul_64r:
1656; X86-SSE1:       # %bb.0:
1657; X86-SSE1-NEXT:    pushl %ebp
1658; X86-SSE1-NEXT:    movl %esp, %ebp
1659; X86-SSE1-NEXT:    andl $-8, %esp
1660; X86-SSE1-NEXT:    subl $16, %esp
1661; X86-SSE1-NEXT:    movl 8(%ebp), %eax
1662; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
1663; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
1664; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
1665; X86-SSE1-NEXT:    movss %xmm1, (%esp)
1666; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
1667; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
1668; X86-SSE1-NEXT:    fldl (%esp)
1669; X86-SSE1-NEXT:    fmull 12(%ebp)
1670; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
1671; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1672; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
1673; X86-SSE1-NEXT:    movl %ebp, %esp
1674; X86-SSE1-NEXT:    popl %ebp
1675; X86-SSE1-NEXT:    retl
1676;
1677; X86-SSE2-LABEL: fmul_64r:
1678; X86-SSE2:       # %bb.0:
1679; X86-SSE2-NEXT:    pushl %ebp
1680; X86-SSE2-NEXT:    movl %esp, %ebp
1681; X86-SSE2-NEXT:    andl $-8, %esp
1682; X86-SSE2-NEXT:    subl $8, %esp
1683; X86-SSE2-NEXT:    movl 8(%ebp), %eax
1684; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1685; X86-SSE2-NEXT:    mulsd 12(%ebp), %xmm0
1686; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
1687; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1688; X86-SSE2-NEXT:    movlps %xmm0, (%eax)
1689; X86-SSE2-NEXT:    movl %ebp, %esp
1690; X86-SSE2-NEXT:    popl %ebp
1691; X86-SSE2-NEXT:    retl
1692;
1693; X86-AVX-LABEL: fmul_64r:
1694; X86-AVX:       # %bb.0:
1695; X86-AVX-NEXT:    pushl %ebp
1696; X86-AVX-NEXT:    movl %esp, %ebp
1697; X86-AVX-NEXT:    andl $-8, %esp
1698; X86-AVX-NEXT:    subl $8, %esp
1699; X86-AVX-NEXT:    movl 8(%ebp), %eax
1700; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1701; X86-AVX-NEXT:    vmulsd 12(%ebp), %xmm0, %xmm0
1702; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
1703; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1704; X86-AVX-NEXT:    vmovlps %xmm0, (%eax)
1705; X86-AVX-NEXT:    movl %ebp, %esp
1706; X86-AVX-NEXT:    popl %ebp
1707; X86-AVX-NEXT:    retl
1708;
1709; X64-SSE-LABEL: fmul_64r:
1710; X64-SSE:       # %bb.0:
1711; X64-SSE-NEXT:    mulsd (%rdi), %xmm0
1712; X64-SSE-NEXT:    movsd %xmm0, (%rdi)
1713; X64-SSE-NEXT:    retq
1714;
1715; X64-AVX-LABEL: fmul_64r:
1716; X64-AVX:       # %bb.0:
1717; X64-AVX-NEXT:    vmulsd (%rdi), %xmm0, %xmm0
1718; X64-AVX-NEXT:    vmovsd %xmm0, (%rdi)
1719; X64-AVX-NEXT:    retq
1720  %1 = load atomic i64, ptr %loc seq_cst, align 8
1721  %2 = bitcast i64 %1 to double
1722  %mul = fmul double %2, %val
1723  %3 = bitcast double %mul to i64
1724  store atomic i64 %3, ptr %loc release, align 8
1725  ret void
1726}
1727
1728; Floating-point mul to a global using an immediate.
1729define dso_local void @fmul_32g() nounwind {
1730; X86-NOSSE-LABEL: fmul_32g:
1731; X86-NOSSE:       # %bb.0:
1732; X86-NOSSE-NEXT:    subl $8, %esp
1733; X86-NOSSE-NEXT:    movl glob32, %eax
1734; X86-NOSSE-NEXT:    movl %eax, (%esp)
1735; X86-NOSSE-NEXT:    flds (%esp)
1736; X86-NOSSE-NEXT:    fmuls {{\.?LCPI[0-9]+_[0-9]+}}
1737; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
1738; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1739; X86-NOSSE-NEXT:    movl %eax, glob32
1740; X86-NOSSE-NEXT:    addl $8, %esp
1741; X86-NOSSE-NEXT:    retl
1742;
1743; X86-SSE1-LABEL: fmul_32g:
1744; X86-SSE1:       # %bb.0:
1745; X86-SSE1-NEXT:    subl $8, %esp
1746; X86-SSE1-NEXT:    movl glob32, %eax
1747; X86-SSE1-NEXT:    movl %eax, (%esp)
1748; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1749; X86-SSE1-NEXT:    mulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1750; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
1751; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
1752; X86-SSE1-NEXT:    movl %eax, glob32
1753; X86-SSE1-NEXT:    addl $8, %esp
1754; X86-SSE1-NEXT:    retl
1755;
1756; X86-SSE2-LABEL: fmul_32g:
1757; X86-SSE2:       # %bb.0:
1758; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0]
1759; X86-SSE2-NEXT:    mulss glob32, %xmm0
1760; X86-SSE2-NEXT:    movss %xmm0, glob32
1761; X86-SSE2-NEXT:    retl
1762;
1763; X86-AVX-LABEL: fmul_32g:
1764; X86-AVX:       # %bb.0:
1765; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0]
1766; X86-AVX-NEXT:    vmulss glob32, %xmm0, %xmm0
1767; X86-AVX-NEXT:    vmovss %xmm0, glob32
1768; X86-AVX-NEXT:    retl
1769;
1770; X64-SSE-LABEL: fmul_32g:
1771; X64-SSE:       # %bb.0:
1772; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0]
1773; X64-SSE-NEXT:    mulss glob32(%rip), %xmm0
1774; X64-SSE-NEXT:    movss %xmm0, glob32(%rip)
1775; X64-SSE-NEXT:    retq
1776;
1777; X64-AVX-LABEL: fmul_32g:
1778; X64-AVX:       # %bb.0:
1779; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0]
1780; X64-AVX-NEXT:    vmulss glob32(%rip), %xmm0, %xmm0
1781; X64-AVX-NEXT:    vmovss %xmm0, glob32(%rip)
1782; X64-AVX-NEXT:    retq
1783  %i = load atomic i32, ptr @glob32 monotonic, align 4
1784  %f = bitcast i32 %i to float
1785  %mul = fmul float %f, 0x400921FA00000000
1786  %s = bitcast float %mul to i32
1787  store atomic i32 %s, ptr @glob32 monotonic, align 4
1788  ret void
1789}
1790
1791define dso_local void @fmul_64g() nounwind {
1792; X86-NOSSE-LABEL: fmul_64g:
1793; X86-NOSSE:       # %bb.0:
1794; X86-NOSSE-NEXT:    pushl %ebp
1795; X86-NOSSE-NEXT:    movl %esp, %ebp
1796; X86-NOSSE-NEXT:    andl $-8, %esp
1797; X86-NOSSE-NEXT:    subl $32, %esp
1798; X86-NOSSE-NEXT:    fildll glob64
1799; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
1800; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1801; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1802; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
1803; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
1804; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
1805; X86-NOSSE-NEXT:    fmuls {{\.?LCPI[0-9]+_[0-9]+}}
1806; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
1807; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1808; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1809; X86-NOSSE-NEXT:    movl %eax, (%esp)
1810; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
1811; X86-NOSSE-NEXT:    fildll (%esp)
1812; X86-NOSSE-NEXT:    fistpll glob64
1813; X86-NOSSE-NEXT:    movl %ebp, %esp
1814; X86-NOSSE-NEXT:    popl %ebp
1815; X86-NOSSE-NEXT:    retl
1816;
1817; X86-SSE1-LABEL: fmul_64g:
1818; X86-SSE1:       # %bb.0:
1819; X86-SSE1-NEXT:    pushl %ebp
1820; X86-SSE1-NEXT:    movl %esp, %ebp
1821; X86-SSE1-NEXT:    andl $-8, %esp
1822; X86-SSE1-NEXT:    subl $16, %esp
1823; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
1824; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
1825; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
1826; X86-SSE1-NEXT:    movss %xmm1, (%esp)
1827; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
1828; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
1829; X86-SSE1-NEXT:    fldl (%esp)
1830; X86-SSE1-NEXT:    fmuls {{\.?LCPI[0-9]+_[0-9]+}}
1831; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
1832; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1833; X86-SSE1-NEXT:    movlps %xmm0, glob64
1834; X86-SSE1-NEXT:    movl %ebp, %esp
1835; X86-SSE1-NEXT:    popl %ebp
1836; X86-SSE1-NEXT:    retl
1837;
1838; X86-SSE2-LABEL: fmul_64g:
1839; X86-SSE2:       # %bb.0:
1840; X86-SSE2-NEXT:    pushl %ebp
1841; X86-SSE2-NEXT:    movl %esp, %ebp
1842; X86-SSE2-NEXT:    andl $-8, %esp
1843; X86-SSE2-NEXT:    subl $8, %esp
1844; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1845; X86-SSE2-NEXT:    mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1846; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
1847; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
1848; X86-SSE2-NEXT:    movlps %xmm0, glob64
1849; X86-SSE2-NEXT:    movl %ebp, %esp
1850; X86-SSE2-NEXT:    popl %ebp
1851; X86-SSE2-NEXT:    retl
1852;
1853; X86-AVX-LABEL: fmul_64g:
1854; X86-AVX:       # %bb.0:
1855; X86-AVX-NEXT:    pushl %ebp
1856; X86-AVX-NEXT:    movl %esp, %ebp
1857; X86-AVX-NEXT:    andl $-8, %esp
1858; X86-AVX-NEXT:    subl $8, %esp
1859; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1860; X86-AVX-NEXT:    vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
1861; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
1862; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1863; X86-AVX-NEXT:    vmovlps %xmm0, glob64
1864; X86-AVX-NEXT:    movl %ebp, %esp
1865; X86-AVX-NEXT:    popl %ebp
1866; X86-AVX-NEXT:    retl
1867;
1868; X64-SSE-LABEL: fmul_64g:
1869; X64-SSE:       # %bb.0:
1870; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0]
1871; X64-SSE-NEXT:    mulsd glob64(%rip), %xmm0
1872; X64-SSE-NEXT:    movsd %xmm0, glob64(%rip)
1873; X64-SSE-NEXT:    retq
1874;
1875; X64-AVX-LABEL: fmul_64g:
1876; X64-AVX:       # %bb.0:
1877; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0]
1878; X64-AVX-NEXT:    vmulsd glob64(%rip), %xmm0, %xmm0
1879; X64-AVX-NEXT:    vmovsd %xmm0, glob64(%rip)
1880; X64-AVX-NEXT:    retq
1881  %i = load atomic i64, ptr @glob64 monotonic, align 8
1882  %f = bitcast i64 %i to double
1883  %mul = fmul double %f, 0x400921FA00000000
1884  %s = bitcast double %mul to i64
1885  store atomic i64 %s, ptr @glob64 monotonic, align 8
1886  ret void
1887}
1888
1889; Floating-point mul to a hard-coded immediate location using an immediate.
1890define dso_local void @fmul_32imm() nounwind {
1891; X86-NOSSE-LABEL: fmul_32imm:
1892; X86-NOSSE:       # %bb.0:
1893; X86-NOSSE-NEXT:    subl $8, %esp
1894; X86-NOSSE-NEXT:    movl -559038737, %eax
1895; X86-NOSSE-NEXT:    movl %eax, (%esp)
1896; X86-NOSSE-NEXT:    flds (%esp)
1897; X86-NOSSE-NEXT:    fmuls {{\.?LCPI[0-9]+_[0-9]+}}
1898; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
1899; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1900; X86-NOSSE-NEXT:    movl %eax, -559038737
1901; X86-NOSSE-NEXT:    addl $8, %esp
1902; X86-NOSSE-NEXT:    retl
1903;
1904; X86-SSE1-LABEL: fmul_32imm:
1905; X86-SSE1:       # %bb.0:
1906; X86-SSE1-NEXT:    subl $8, %esp
1907; X86-SSE1-NEXT:    movl -559038737, %eax
1908; X86-SSE1-NEXT:    movl %eax, (%esp)
1909; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1910; X86-SSE1-NEXT:    mulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1911; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
1912; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
1913; X86-SSE1-NEXT:    movl %eax, -559038737
1914; X86-SSE1-NEXT:    addl $8, %esp
1915; X86-SSE1-NEXT:    retl
1916;
1917; X86-SSE2-LABEL: fmul_32imm:
1918; X86-SSE2:       # %bb.0:
1919; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0]
1920; X86-SSE2-NEXT:    mulss -559038737, %xmm0
1921; X86-SSE2-NEXT:    movss %xmm0, -559038737
1922; X86-SSE2-NEXT:    retl
1923;
1924; X86-AVX-LABEL: fmul_32imm:
1925; X86-AVX:       # %bb.0:
1926; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0]
1927; X86-AVX-NEXT:    vmulss -559038737, %xmm0, %xmm0
1928; X86-AVX-NEXT:    vmovss %xmm0, -559038737
1929; X86-AVX-NEXT:    retl
1930;
1931; X64-SSE-LABEL: fmul_32imm:
1932; X64-SSE:       # %bb.0:
1933; X64-SSE-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
1934; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0]
1935; X64-SSE-NEXT:    mulss (%rax), %xmm0
1936; X64-SSE-NEXT:    movss %xmm0, (%rax)
1937; X64-SSE-NEXT:    retq
1938;
1939; X64-AVX-LABEL: fmul_32imm:
1940; X64-AVX:       # %bb.0:
1941; X64-AVX-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
1942; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0]
1943; X64-AVX-NEXT:    vmulss (%rax), %xmm0, %xmm0
1944; X64-AVX-NEXT:    vmovss %xmm0, (%rax)
1945; X64-AVX-NEXT:    retq
1946  %i = load atomic i32, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4
1947  %f = bitcast i32 %i to float
1948  %mul = fmul float %f, 0x400921FA00000000
1949  %s = bitcast float %mul to i32
1950  store atomic i32 %s, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4
1951  ret void
1952}
1953
1954define dso_local void @fmul_64imm() nounwind {
1955; X86-NOSSE-LABEL: fmul_64imm:
1956; X86-NOSSE:       # %bb.0:
1957; X86-NOSSE-NEXT:    pushl %ebp
1958; X86-NOSSE-NEXT:    movl %esp, %ebp
1959; X86-NOSSE-NEXT:    andl $-8, %esp
1960; X86-NOSSE-NEXT:    subl $32, %esp
1961; X86-NOSSE-NEXT:    fildll -559038737
1962; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
1963; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1964; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1965; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
1966; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
1967; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
1968; X86-NOSSE-NEXT:    fmuls {{\.?LCPI[0-9]+_[0-9]+}}
1969; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
1970; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
1971; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1972; X86-NOSSE-NEXT:    movl %eax, (%esp)
1973; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
1974; X86-NOSSE-NEXT:    fildll (%esp)
1975; X86-NOSSE-NEXT:    fistpll -559038737
1976; X86-NOSSE-NEXT:    movl %ebp, %esp
1977; X86-NOSSE-NEXT:    popl %ebp
1978; X86-NOSSE-NEXT:    retl
1979;
1980; X86-SSE1-LABEL: fmul_64imm:
1981; X86-SSE1:       # %bb.0:
1982; X86-SSE1-NEXT:    pushl %ebp
1983; X86-SSE1-NEXT:    movl %esp, %ebp
1984; X86-SSE1-NEXT:    andl $-8, %esp
1985; X86-SSE1-NEXT:    subl $16, %esp
1986; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
1987; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
1988; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
1989; X86-SSE1-NEXT:    movss %xmm1, (%esp)
1990; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
1991; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
1992; X86-SSE1-NEXT:    fldl (%esp)
1993; X86-SSE1-NEXT:    fmuls {{\.?LCPI[0-9]+_[0-9]+}}
1994; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
1995; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
1996; X86-SSE1-NEXT:    movlps %xmm0, -559038737
1997; X86-SSE1-NEXT:    movl %ebp, %esp
1998; X86-SSE1-NEXT:    popl %ebp
1999; X86-SSE1-NEXT:    retl
2000;
2001; X86-SSE2-LABEL: fmul_64imm:
2002; X86-SSE2:       # %bb.0:
2003; X86-SSE2-NEXT:    pushl %ebp
2004; X86-SSE2-NEXT:    movl %esp, %ebp
2005; X86-SSE2-NEXT:    andl $-8, %esp
2006; X86-SSE2-NEXT:    subl $8, %esp
2007; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2008; X86-SSE2-NEXT:    mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
2009; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
2010; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2011; X86-SSE2-NEXT:    movlps %xmm0, -559038737
2012; X86-SSE2-NEXT:    movl %ebp, %esp
2013; X86-SSE2-NEXT:    popl %ebp
2014; X86-SSE2-NEXT:    retl
2015;
2016; X86-AVX-LABEL: fmul_64imm:
2017; X86-AVX:       # %bb.0:
2018; X86-AVX-NEXT:    pushl %ebp
2019; X86-AVX-NEXT:    movl %esp, %ebp
2020; X86-AVX-NEXT:    andl $-8, %esp
2021; X86-AVX-NEXT:    subl $8, %esp
2022; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2023; X86-AVX-NEXT:    vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
2024; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
2025; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2026; X86-AVX-NEXT:    vmovlps %xmm0, -559038737
2027; X86-AVX-NEXT:    movl %ebp, %esp
2028; X86-AVX-NEXT:    popl %ebp
2029; X86-AVX-NEXT:    retl
2030;
2031; X64-SSE-LABEL: fmul_64imm:
2032; X64-SSE:       # %bb.0:
2033; X64-SSE-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
2034; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0]
2035; X64-SSE-NEXT:    mulsd (%rax), %xmm0
2036; X64-SSE-NEXT:    movsd %xmm0, (%rax)
2037; X64-SSE-NEXT:    retq
2038;
2039; X64-AVX-LABEL: fmul_64imm:
2040; X64-AVX:       # %bb.0:
2041; X64-AVX-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
2042; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0]
2043; X64-AVX-NEXT:    vmulsd (%rax), %xmm0, %xmm0
2044; X64-AVX-NEXT:    vmovsd %xmm0, (%rax)
2045; X64-AVX-NEXT:    retq
2046  %i = load atomic i64, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8
2047  %f = bitcast i64 %i to double
2048  %mul = fmul double %f, 0x400921FA00000000
2049  %s = bitcast double %mul to i64
2050  store atomic i64 %s, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8
2051  ret void
2052}
2053
2054; Floating-point mul to a stack location.
2055define dso_local void @fmul_32stack() nounwind {
2056; X86-NOSSE-LABEL: fmul_32stack:
2057; X86-NOSSE:       # %bb.0:
2058; X86-NOSSE-NEXT:    subl $12, %esp
2059; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
2060; X86-NOSSE-NEXT:    movl %eax, (%esp)
2061; X86-NOSSE-NEXT:    flds (%esp)
2062; X86-NOSSE-NEXT:    fmuls {{\.?LCPI[0-9]+_[0-9]+}}
2063; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
2064; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
2065; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
2066; X86-NOSSE-NEXT:    addl $12, %esp
2067; X86-NOSSE-NEXT:    retl
2068;
2069; X86-SSE1-LABEL: fmul_32stack:
2070; X86-SSE1:       # %bb.0:
2071; X86-SSE1-NEXT:    subl $12, %esp
2072; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
2073; X86-SSE1-NEXT:    movl %eax, (%esp)
2074; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2075; X86-SSE1-NEXT:    mulss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
2076; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
2077; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
2078; X86-SSE1-NEXT:    movl %eax, {{[0-9]+}}(%esp)
2079; X86-SSE1-NEXT:    addl $12, %esp
2080; X86-SSE1-NEXT:    retl
2081;
2082; X86-SSE2-LABEL: fmul_32stack:
2083; X86-SSE2:       # %bb.0:
2084; X86-SSE2-NEXT:    pushl %eax
2085; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0]
2086; X86-SSE2-NEXT:    mulss (%esp), %xmm0
2087; X86-SSE2-NEXT:    movss %xmm0, (%esp)
2088; X86-SSE2-NEXT:    popl %eax
2089; X86-SSE2-NEXT:    retl
2090;
2091; X86-AVX-LABEL: fmul_32stack:
2092; X86-AVX:       # %bb.0:
2093; X86-AVX-NEXT:    pushl %eax
2094; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0]
2095; X86-AVX-NEXT:    vmulss (%esp), %xmm0, %xmm0
2096; X86-AVX-NEXT:    vmovss %xmm0, (%esp)
2097; X86-AVX-NEXT:    popl %eax
2098; X86-AVX-NEXT:    retl
2099;
2100; X64-SSE-LABEL: fmul_32stack:
2101; X64-SSE:       # %bb.0:
2102; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0]
2103; X64-SSE-NEXT:    mulss -{{[0-9]+}}(%rsp), %xmm0
2104; X64-SSE-NEXT:    movss %xmm0, -{{[0-9]+}}(%rsp)
2105; X64-SSE-NEXT:    retq
2106;
2107; X64-AVX-LABEL: fmul_32stack:
2108; X64-AVX:       # %bb.0:
2109; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [3.14159012E+0,0.0E+0,0.0E+0,0.0E+0]
2110; X64-AVX-NEXT:    vmulss -{{[0-9]+}}(%rsp), %xmm0, %xmm0
2111; X64-AVX-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp)
2112; X64-AVX-NEXT:    retq
2113  %ptr = alloca i32, align 4
2114  %load = load atomic i32, ptr %ptr acquire, align 4
2115  %bc0 = bitcast i32 %load to float
2116  %fmul = fmul float 0x400921FA00000000, %bc0
2117  %bc1 = bitcast float %fmul to i32
2118  store atomic i32 %bc1, ptr %ptr release, align 4
2119  ret void
2120}
2121
2122define dso_local void @fmul_64stack() nounwind {
2123; X86-NOSSE-LABEL: fmul_64stack:
2124; X86-NOSSE:       # %bb.0:
2125; X86-NOSSE-NEXT:    pushl %ebp
2126; X86-NOSSE-NEXT:    movl %esp, %ebp
2127; X86-NOSSE-NEXT:    andl $-8, %esp
2128; X86-NOSSE-NEXT:    subl $40, %esp
2129; X86-NOSSE-NEXT:    fildll {{[0-9]+}}(%esp)
2130; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
2131; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
2132; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2133; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
2134; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
2135; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
2136; X86-NOSSE-NEXT:    fmuls {{\.?LCPI[0-9]+_[0-9]+}}
2137; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
2138; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
2139; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2140; X86-NOSSE-NEXT:    movl %eax, (%esp)
2141; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
2142; X86-NOSSE-NEXT:    fildll (%esp)
2143; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
2144; X86-NOSSE-NEXT:    movl %ebp, %esp
2145; X86-NOSSE-NEXT:    popl %ebp
2146; X86-NOSSE-NEXT:    retl
2147;
2148; X86-SSE1-LABEL: fmul_64stack:
2149; X86-SSE1:       # %bb.0:
2150; X86-SSE1-NEXT:    pushl %ebp
2151; X86-SSE1-NEXT:    movl %esp, %ebp
2152; X86-SSE1-NEXT:    andl $-8, %esp
2153; X86-SSE1-NEXT:    subl $24, %esp
2154; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
2155; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
2156; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
2157; X86-SSE1-NEXT:    movss %xmm1, (%esp)
2158; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
2159; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
2160; X86-SSE1-NEXT:    fldl (%esp)
2161; X86-SSE1-NEXT:    fmuls {{\.?LCPI[0-9]+_[0-9]+}}
2162; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
2163; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
2164; X86-SSE1-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
2165; X86-SSE1-NEXT:    movl %ebp, %esp
2166; X86-SSE1-NEXT:    popl %ebp
2167; X86-SSE1-NEXT:    retl
2168;
2169; X86-SSE2-LABEL: fmul_64stack:
2170; X86-SSE2:       # %bb.0:
2171; X86-SSE2-NEXT:    pushl %ebp
2172; X86-SSE2-NEXT:    movl %esp, %ebp
2173; X86-SSE2-NEXT:    andl $-8, %esp
2174; X86-SSE2-NEXT:    subl $16, %esp
2175; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2176; X86-SSE2-NEXT:    mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
2177; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
2178; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2179; X86-SSE2-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
2180; X86-SSE2-NEXT:    movl %ebp, %esp
2181; X86-SSE2-NEXT:    popl %ebp
2182; X86-SSE2-NEXT:    retl
2183;
2184; X86-AVX-LABEL: fmul_64stack:
2185; X86-AVX:       # %bb.0:
2186; X86-AVX-NEXT:    pushl %ebp
2187; X86-AVX-NEXT:    movl %esp, %ebp
2188; X86-AVX-NEXT:    andl $-8, %esp
2189; X86-AVX-NEXT:    subl $16, %esp
2190; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2191; X86-AVX-NEXT:    vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
2192; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
2193; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2194; X86-AVX-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
2195; X86-AVX-NEXT:    movl %ebp, %esp
2196; X86-AVX-NEXT:    popl %ebp
2197; X86-AVX-NEXT:    retl
2198;
2199; X64-SSE-LABEL: fmul_64stack:
2200; X64-SSE:       # %bb.0:
2201; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0]
2202; X64-SSE-NEXT:    mulsd -{{[0-9]+}}(%rsp), %xmm0
2203; X64-SSE-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
2204; X64-SSE-NEXT:    retq
2205;
2206; X64-AVX-LABEL: fmul_64stack:
2207; X64-AVX:       # %bb.0:
2208; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [3.1415901184082031E+0,0.0E+0]
2209; X64-AVX-NEXT:    vmulsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
2210; X64-AVX-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp)
2211; X64-AVX-NEXT:    retq
2212  %ptr = alloca i64, align 8
2213  %load = load atomic i64, ptr %ptr acquire, align 8
2214  %bc0 = bitcast i64 %load to double
2215  %fmul = fmul double 0x400921FA00000000, %bc0
2216  %bc1 = bitcast double %fmul to i64
2217  store atomic i64 %bc1, ptr %ptr release, align 8
2218  ret void
2219}
2220
2221define dso_local void @fmul_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
2222; X86-NOSSE-LABEL: fmul_array:
2223; X86-NOSSE:       # %bb.0: # %bb
2224; X86-NOSSE-NEXT:    pushl %ebp
2225; X86-NOSSE-NEXT:    movl %esp, %ebp
2226; X86-NOSSE-NEXT:    pushl %esi
2227; X86-NOSSE-NEXT:    andl $-8, %esp
2228; X86-NOSSE-NEXT:    subl $40, %esp
2229; X86-NOSSE-NEXT:    movl 20(%ebp), %eax
2230; X86-NOSSE-NEXT:    movl 8(%ebp), %ecx
2231; X86-NOSSE-NEXT:    fildll (%ecx,%eax,8)
2232; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
2233; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
2234; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
2235; X86-NOSSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
2236; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
2237; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
2238; X86-NOSSE-NEXT:    fmull 12(%ebp)
2239; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
2240; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
2241; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
2242; X86-NOSSE-NEXT:    movl %edx, (%esp)
2243; X86-NOSSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
2244; X86-NOSSE-NEXT:    fildll (%esp)
2245; X86-NOSSE-NEXT:    fistpll (%ecx,%eax,8)
2246; X86-NOSSE-NEXT:    leal -4(%ebp), %esp
2247; X86-NOSSE-NEXT:    popl %esi
2248; X86-NOSSE-NEXT:    popl %ebp
2249; X86-NOSSE-NEXT:    retl
2250;
2251; X86-SSE1-LABEL: fmul_array:
2252; X86-SSE1:       # %bb.0: # %bb
2253; X86-SSE1-NEXT:    pushl %ebp
2254; X86-SSE1-NEXT:    movl %esp, %ebp
2255; X86-SSE1-NEXT:    andl $-8, %esp
2256; X86-SSE1-NEXT:    subl $16, %esp
2257; X86-SSE1-NEXT:    movl 20(%ebp), %eax
2258; X86-SSE1-NEXT:    movl 8(%ebp), %ecx
2259; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
2260; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
2261; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
2262; X86-SSE1-NEXT:    movss %xmm1, (%esp)
2263; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
2264; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
2265; X86-SSE1-NEXT:    fldl (%esp)
2266; X86-SSE1-NEXT:    fmull 12(%ebp)
2267; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
2268; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
2269; X86-SSE1-NEXT:    movlps %xmm0, (%ecx,%eax,8)
2270; X86-SSE1-NEXT:    movl %ebp, %esp
2271; X86-SSE1-NEXT:    popl %ebp
2272; X86-SSE1-NEXT:    retl
2273;
2274; X86-SSE2-LABEL: fmul_array:
2275; X86-SSE2:       # %bb.0: # %bb
2276; X86-SSE2-NEXT:    pushl %ebp
2277; X86-SSE2-NEXT:    movl %esp, %ebp
2278; X86-SSE2-NEXT:    andl $-8, %esp
2279; X86-SSE2-NEXT:    subl $8, %esp
2280; X86-SSE2-NEXT:    movl 20(%ebp), %eax
2281; X86-SSE2-NEXT:    movl 8(%ebp), %ecx
2282; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2283; X86-SSE2-NEXT:    mulsd 12(%ebp), %xmm0
2284; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
2285; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2286; X86-SSE2-NEXT:    movlps %xmm0, (%ecx,%eax,8)
2287; X86-SSE2-NEXT:    movl %ebp, %esp
2288; X86-SSE2-NEXT:    popl %ebp
2289; X86-SSE2-NEXT:    retl
2290;
2291; X86-AVX-LABEL: fmul_array:
2292; X86-AVX:       # %bb.0: # %bb
2293; X86-AVX-NEXT:    pushl %ebp
2294; X86-AVX-NEXT:    movl %esp, %ebp
2295; X86-AVX-NEXT:    andl $-8, %esp
2296; X86-AVX-NEXT:    subl $8, %esp
2297; X86-AVX-NEXT:    movl 20(%ebp), %eax
2298; X86-AVX-NEXT:    movl 8(%ebp), %ecx
2299; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2300; X86-AVX-NEXT:    vmulsd 12(%ebp), %xmm0, %xmm0
2301; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
2302; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2303; X86-AVX-NEXT:    vmovlps %xmm0, (%ecx,%eax,8)
2304; X86-AVX-NEXT:    movl %ebp, %esp
2305; X86-AVX-NEXT:    popl %ebp
2306; X86-AVX-NEXT:    retl
2307;
2308; X64-SSE-LABEL: fmul_array:
2309; X64-SSE:       # %bb.0: # %bb
2310; X64-SSE-NEXT:    mulsd (%rdi,%rsi,8), %xmm0
2311; X64-SSE-NEXT:    movsd %xmm0, (%rdi,%rsi,8)
2312; X64-SSE-NEXT:    retq
2313;
2314; X64-AVX-LABEL: fmul_array:
2315; X64-AVX:       # %bb.0: # %bb
2316; X64-AVX-NEXT:    vmulsd (%rdi,%rsi,8), %xmm0, %xmm0
2317; X64-AVX-NEXT:    vmovsd %xmm0, (%rdi,%rsi,8)
2318; X64-AVX-NEXT:    retq
2319bb:
2320  %tmp4 = getelementptr inbounds i64, ptr %arg, i64 %arg2
2321  %tmp6 = load atomic i64, ptr %tmp4 monotonic, align 8
2322  %tmp7 = bitcast i64 %tmp6 to double
2323  %tmp8 = fmul double %tmp7, %arg1
2324  %tmp9 = bitcast double %tmp8 to i64
2325  store atomic i64 %tmp9, ptr %tmp4 monotonic, align 8
2326  ret void
2327}
2328
2329; ----- FDIV -----
2330
2331define dso_local void @fdiv_32r(ptr %loc, float %val) nounwind {
2332; X86-NOSSE-LABEL: fdiv_32r:
2333; X86-NOSSE:       # %bb.0:
2334; X86-NOSSE-NEXT:    subl $8, %esp
2335; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
2336; X86-NOSSE-NEXT:    movl (%eax), %ecx
2337; X86-NOSSE-NEXT:    movl %ecx, (%esp)
2338; X86-NOSSE-NEXT:    flds (%esp)
2339; X86-NOSSE-NEXT:    fdivs {{[0-9]+}}(%esp)
2340; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
2341; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2342; X86-NOSSE-NEXT:    movl %ecx, (%eax)
2343; X86-NOSSE-NEXT:    addl $8, %esp
2344; X86-NOSSE-NEXT:    retl
2345;
2346; X86-SSE1-LABEL: fdiv_32r:
2347; X86-SSE1:       # %bb.0:
2348; X86-SSE1-NEXT:    subl $8, %esp
2349; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
2350; X86-SSE1-NEXT:    movl (%eax), %ecx
2351; X86-SSE1-NEXT:    movl %ecx, (%esp)
2352; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2353; X86-SSE1-NEXT:    divss {{[0-9]+}}(%esp), %xmm0
2354; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
2355; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2356; X86-SSE1-NEXT:    movl %ecx, (%eax)
2357; X86-SSE1-NEXT:    addl $8, %esp
2358; X86-SSE1-NEXT:    retl
2359;
2360; X86-SSE2-LABEL: fdiv_32r:
2361; X86-SSE2:       # %bb.0:
2362; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
2363; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2364; X86-SSE2-NEXT:    divss {{[0-9]+}}(%esp), %xmm0
2365; X86-SSE2-NEXT:    movss %xmm0, (%eax)
2366; X86-SSE2-NEXT:    retl
2367;
2368; X86-AVX-LABEL: fdiv_32r:
2369; X86-AVX:       # %bb.0:
2370; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
2371; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2372; X86-AVX-NEXT:    vdivss {{[0-9]+}}(%esp), %xmm0, %xmm0
2373; X86-AVX-NEXT:    vmovss %xmm0, (%eax)
2374; X86-AVX-NEXT:    retl
2375;
2376; X64-SSE-LABEL: fdiv_32r:
2377; X64-SSE:       # %bb.0:
2378; X64-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2379; X64-SSE-NEXT:    divss %xmm0, %xmm1
2380; X64-SSE-NEXT:    movss %xmm1, (%rdi)
2381; X64-SSE-NEXT:    retq
2382;
2383; X64-AVX-LABEL: fdiv_32r:
2384; X64-AVX:       # %bb.0:
2385; X64-AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2386; X64-AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
2387; X64-AVX-NEXT:    vmovss %xmm0, (%rdi)
2388; X64-AVX-NEXT:    retq
2389  %1 = load atomic i32, ptr %loc seq_cst, align 4
2390  %2 = bitcast i32 %1 to float
2391  %div = fdiv float %2, %val
2392  %3 = bitcast float %div to i32
2393  store atomic i32 %3, ptr %loc release, align 4
2394  ret void
2395}
2396
2397define dso_local void @fdiv_64r(ptr %loc, double %val) nounwind {
2398; X86-NOSSE-LABEL: fdiv_64r:
2399; X86-NOSSE:       # %bb.0:
2400; X86-NOSSE-NEXT:    pushl %ebp
2401; X86-NOSSE-NEXT:    movl %esp, %ebp
2402; X86-NOSSE-NEXT:    andl $-8, %esp
2403; X86-NOSSE-NEXT:    subl $32, %esp
2404; X86-NOSSE-NEXT:    movl 8(%ebp), %eax
2405; X86-NOSSE-NEXT:    fildll (%eax)
2406; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
2407; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2408; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
2409; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
2410; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
2411; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
2412; X86-NOSSE-NEXT:    fdivl 12(%ebp)
2413; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
2414; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2415; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
2416; X86-NOSSE-NEXT:    movl %ecx, (%esp)
2417; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
2418; X86-NOSSE-NEXT:    fildll (%esp)
2419; X86-NOSSE-NEXT:    fistpll (%eax)
2420; X86-NOSSE-NEXT:    movl %ebp, %esp
2421; X86-NOSSE-NEXT:    popl %ebp
2422; X86-NOSSE-NEXT:    retl
2423;
2424; X86-SSE1-LABEL: fdiv_64r:
2425; X86-SSE1:       # %bb.0:
2426; X86-SSE1-NEXT:    pushl %ebp
2427; X86-SSE1-NEXT:    movl %esp, %ebp
2428; X86-SSE1-NEXT:    andl $-8, %esp
2429; X86-SSE1-NEXT:    subl $16, %esp
2430; X86-SSE1-NEXT:    movl 8(%ebp), %eax
2431; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
2432; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
2433; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
2434; X86-SSE1-NEXT:    movss %xmm1, (%esp)
2435; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
2436; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
2437; X86-SSE1-NEXT:    fldl (%esp)
2438; X86-SSE1-NEXT:    fdivl 12(%ebp)
2439; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
2440; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
2441; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
2442; X86-SSE1-NEXT:    movl %ebp, %esp
2443; X86-SSE1-NEXT:    popl %ebp
2444; X86-SSE1-NEXT:    retl
2445;
2446; X86-SSE2-LABEL: fdiv_64r:
2447; X86-SSE2:       # %bb.0:
2448; X86-SSE2-NEXT:    pushl %ebp
2449; X86-SSE2-NEXT:    movl %esp, %ebp
2450; X86-SSE2-NEXT:    andl $-8, %esp
2451; X86-SSE2-NEXT:    subl $8, %esp
2452; X86-SSE2-NEXT:    movl 8(%ebp), %eax
2453; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2454; X86-SSE2-NEXT:    divsd 12(%ebp), %xmm0
2455; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
2456; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2457; X86-SSE2-NEXT:    movlps %xmm0, (%eax)
2458; X86-SSE2-NEXT:    movl %ebp, %esp
2459; X86-SSE2-NEXT:    popl %ebp
2460; X86-SSE2-NEXT:    retl
2461;
2462; X86-AVX-LABEL: fdiv_64r:
2463; X86-AVX:       # %bb.0:
2464; X86-AVX-NEXT:    pushl %ebp
2465; X86-AVX-NEXT:    movl %esp, %ebp
2466; X86-AVX-NEXT:    andl $-8, %esp
2467; X86-AVX-NEXT:    subl $8, %esp
2468; X86-AVX-NEXT:    movl 8(%ebp), %eax
2469; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2470; X86-AVX-NEXT:    vdivsd 12(%ebp), %xmm0, %xmm0
2471; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
2472; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2473; X86-AVX-NEXT:    vmovlps %xmm0, (%eax)
2474; X86-AVX-NEXT:    movl %ebp, %esp
2475; X86-AVX-NEXT:    popl %ebp
2476; X86-AVX-NEXT:    retl
2477;
2478; X64-SSE-LABEL: fdiv_64r:
2479; X64-SSE:       # %bb.0:
2480; X64-SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
2481; X64-SSE-NEXT:    divsd %xmm0, %xmm1
2482; X64-SSE-NEXT:    movsd %xmm1, (%rdi)
2483; X64-SSE-NEXT:    retq
2484;
2485; X64-AVX-LABEL: fdiv_64r:
2486; X64-AVX:       # %bb.0:
2487; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
2488; X64-AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
2489; X64-AVX-NEXT:    vmovsd %xmm0, (%rdi)
2490; X64-AVX-NEXT:    retq
2491  %1 = load atomic i64, ptr %loc seq_cst, align 8
2492  %2 = bitcast i64 %1 to double
2493  %div = fdiv double %2, %val
2494  %3 = bitcast double %div to i64
2495  store atomic i64 %3, ptr %loc release, align 8
2496  ret void
2497}
2498
2499; Floating-point div to a global using an immediate.
2500define dso_local void @fdiv_32g() nounwind {
2501; X86-NOSSE-LABEL: fdiv_32g:
2502; X86-NOSSE:       # %bb.0:
2503; X86-NOSSE-NEXT:    subl $8, %esp
2504; X86-NOSSE-NEXT:    movl glob32, %eax
2505; X86-NOSSE-NEXT:    movl %eax, (%esp)
2506; X86-NOSSE-NEXT:    flds (%esp)
2507; X86-NOSSE-NEXT:    fdivs {{\.?LCPI[0-9]+_[0-9]+}}
2508; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
2509; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
2510; X86-NOSSE-NEXT:    movl %eax, glob32
2511; X86-NOSSE-NEXT:    addl $8, %esp
2512; X86-NOSSE-NEXT:    retl
2513;
2514; X86-SSE1-LABEL: fdiv_32g:
2515; X86-SSE1:       # %bb.0:
2516; X86-SSE1-NEXT:    subl $8, %esp
2517; X86-SSE1-NEXT:    movl glob32, %eax
2518; X86-SSE1-NEXT:    movl %eax, (%esp)
2519; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2520; X86-SSE1-NEXT:    divss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
2521; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
2522; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
2523; X86-SSE1-NEXT:    movl %eax, glob32
2524; X86-SSE1-NEXT:    addl $8, %esp
2525; X86-SSE1-NEXT:    retl
2526;
2527; X86-SSE2-LABEL: fdiv_32g:
2528; X86-SSE2:       # %bb.0:
2529; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2530; X86-SSE2-NEXT:    divss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
2531; X86-SSE2-NEXT:    movss %xmm0, glob32
2532; X86-SSE2-NEXT:    retl
2533;
2534; X86-AVX-LABEL: fdiv_32g:
2535; X86-AVX:       # %bb.0:
2536; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2537; X86-AVX-NEXT:    vdivss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
2538; X86-AVX-NEXT:    vmovss %xmm0, glob32
2539; X86-AVX-NEXT:    retl
2540;
2541; X64-SSE-LABEL: fdiv_32g:
2542; X64-SSE:       # %bb.0:
2543; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2544; X64-SSE-NEXT:    divss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2545; X64-SSE-NEXT:    movss %xmm0, glob32(%rip)
2546; X64-SSE-NEXT:    retq
2547;
2548; X64-AVX-LABEL: fdiv_32g:
2549; X64-AVX:       # %bb.0:
2550; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2551; X64-AVX-NEXT:    vdivss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2552; X64-AVX-NEXT:    vmovss %xmm0, glob32(%rip)
2553; X64-AVX-NEXT:    retq
2554  %i = load atomic i32, ptr @glob32 monotonic, align 4
2555  %f = bitcast i32 %i to float
2556  %div = fdiv float %f, 0x400921FA00000000
2557  %s = bitcast float %div to i32
2558  store atomic i32 %s, ptr @glob32 monotonic, align 4
2559  ret void
2560}
2561
2562define dso_local void @fdiv_64g() nounwind {
2563; X86-NOSSE-LABEL: fdiv_64g:
2564; X86-NOSSE:       # %bb.0:
2565; X86-NOSSE-NEXT:    pushl %ebp
2566; X86-NOSSE-NEXT:    movl %esp, %ebp
2567; X86-NOSSE-NEXT:    andl $-8, %esp
2568; X86-NOSSE-NEXT:    subl $32, %esp
2569; X86-NOSSE-NEXT:    fildll glob64
2570; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
2571; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
2572; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2573; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
2574; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
2575; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
2576; X86-NOSSE-NEXT:    fdivs {{\.?LCPI[0-9]+_[0-9]+}}
2577; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
2578; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
2579; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2580; X86-NOSSE-NEXT:    movl %eax, (%esp)
2581; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
2582; X86-NOSSE-NEXT:    fildll (%esp)
2583; X86-NOSSE-NEXT:    fistpll glob64
2584; X86-NOSSE-NEXT:    movl %ebp, %esp
2585; X86-NOSSE-NEXT:    popl %ebp
2586; X86-NOSSE-NEXT:    retl
2587;
2588; X86-SSE1-LABEL: fdiv_64g:
2589; X86-SSE1:       # %bb.0:
2590; X86-SSE1-NEXT:    pushl %ebp
2591; X86-SSE1-NEXT:    movl %esp, %ebp
2592; X86-SSE1-NEXT:    andl $-8, %esp
2593; X86-SSE1-NEXT:    subl $16, %esp
2594; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
2595; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
2596; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
2597; X86-SSE1-NEXT:    movss %xmm1, (%esp)
2598; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
2599; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
2600; X86-SSE1-NEXT:    fldl (%esp)
2601; X86-SSE1-NEXT:    fdivs {{\.?LCPI[0-9]+_[0-9]+}}
2602; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
2603; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
2604; X86-SSE1-NEXT:    movlps %xmm0, glob64
2605; X86-SSE1-NEXT:    movl %ebp, %esp
2606; X86-SSE1-NEXT:    popl %ebp
2607; X86-SSE1-NEXT:    retl
2608;
2609; X86-SSE2-LABEL: fdiv_64g:
2610; X86-SSE2:       # %bb.0:
2611; X86-SSE2-NEXT:    pushl %ebp
2612; X86-SSE2-NEXT:    movl %esp, %ebp
2613; X86-SSE2-NEXT:    andl $-8, %esp
2614; X86-SSE2-NEXT:    subl $8, %esp
2615; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2616; X86-SSE2-NEXT:    divsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
2617; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
2618; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2619; X86-SSE2-NEXT:    movlps %xmm0, glob64
2620; X86-SSE2-NEXT:    movl %ebp, %esp
2621; X86-SSE2-NEXT:    popl %ebp
2622; X86-SSE2-NEXT:    retl
2623;
2624; X86-AVX-LABEL: fdiv_64g:
2625; X86-AVX:       # %bb.0:
2626; X86-AVX-NEXT:    pushl %ebp
2627; X86-AVX-NEXT:    movl %esp, %ebp
2628; X86-AVX-NEXT:    andl $-8, %esp
2629; X86-AVX-NEXT:    subl $8, %esp
2630; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2631; X86-AVX-NEXT:    vdivsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
2632; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
2633; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2634; X86-AVX-NEXT:    vmovlps %xmm0, glob64
2635; X86-AVX-NEXT:    movl %ebp, %esp
2636; X86-AVX-NEXT:    popl %ebp
2637; X86-AVX-NEXT:    retl
2638;
2639; X64-SSE-LABEL: fdiv_64g:
2640; X64-SSE:       # %bb.0:
2641; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2642; X64-SSE-NEXT:    divsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2643; X64-SSE-NEXT:    movsd %xmm0, glob64(%rip)
2644; X64-SSE-NEXT:    retq
2645;
2646; X64-AVX-LABEL: fdiv_64g:
2647; X64-AVX:       # %bb.0:
2648; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2649; X64-AVX-NEXT:    vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2650; X64-AVX-NEXT:    vmovsd %xmm0, glob64(%rip)
2651; X64-AVX-NEXT:    retq
2652  %i = load atomic i64, ptr @glob64 monotonic, align 8
2653  %f = bitcast i64 %i to double
2654  %div = fdiv double %f, 0x400921FA00000000
2655  %s = bitcast double %div to i64
2656  store atomic i64 %s, ptr @glob64 monotonic, align 8
2657  ret void
2658}
2659
2660; Floating-point div to a hard-coded immediate location using an immediate.
2661define dso_local void @fdiv_32imm() nounwind {
2662; X86-NOSSE-LABEL: fdiv_32imm:
2663; X86-NOSSE:       # %bb.0:
2664; X86-NOSSE-NEXT:    subl $8, %esp
2665; X86-NOSSE-NEXT:    movl -559038737, %eax
2666; X86-NOSSE-NEXT:    movl %eax, (%esp)
2667; X86-NOSSE-NEXT:    flds (%esp)
2668; X86-NOSSE-NEXT:    fdivs {{\.?LCPI[0-9]+_[0-9]+}}
2669; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
2670; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
2671; X86-NOSSE-NEXT:    movl %eax, -559038737
2672; X86-NOSSE-NEXT:    addl $8, %esp
2673; X86-NOSSE-NEXT:    retl
2674;
2675; X86-SSE1-LABEL: fdiv_32imm:
2676; X86-SSE1:       # %bb.0:
2677; X86-SSE1-NEXT:    subl $8, %esp
2678; X86-SSE1-NEXT:    movl -559038737, %eax
2679; X86-SSE1-NEXT:    movl %eax, (%esp)
2680; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2681; X86-SSE1-NEXT:    divss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
2682; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
2683; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
2684; X86-SSE1-NEXT:    movl %eax, -559038737
2685; X86-SSE1-NEXT:    addl $8, %esp
2686; X86-SSE1-NEXT:    retl
2687;
2688; X86-SSE2-LABEL: fdiv_32imm:
2689; X86-SSE2:       # %bb.0:
2690; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2691; X86-SSE2-NEXT:    divss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
2692; X86-SSE2-NEXT:    movss %xmm0, -559038737
2693; X86-SSE2-NEXT:    retl
2694;
2695; X86-AVX-LABEL: fdiv_32imm:
2696; X86-AVX:       # %bb.0:
2697; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2698; X86-AVX-NEXT:    vdivss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
2699; X86-AVX-NEXT:    vmovss %xmm0, -559038737
2700; X86-AVX-NEXT:    retl
2701;
2702; X64-SSE-LABEL: fdiv_32imm:
2703; X64-SSE:       # %bb.0:
2704; X64-SSE-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
2705; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2706; X64-SSE-NEXT:    divss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2707; X64-SSE-NEXT:    movss %xmm0, (%rax)
2708; X64-SSE-NEXT:    retq
2709;
2710; X64-AVX-LABEL: fdiv_32imm:
2711; X64-AVX:       # %bb.0:
2712; X64-AVX-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
2713; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2714; X64-AVX-NEXT:    vdivss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2715; X64-AVX-NEXT:    vmovss %xmm0, (%rax)
2716; X64-AVX-NEXT:    retq
2717  %i = load atomic i32, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4
2718  %f = bitcast i32 %i to float
2719  %div = fdiv float %f, 0x400921FA00000000
2720  %s = bitcast float %div to i32
2721  store atomic i32 %s, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4
2722  ret void
2723}
2724
2725define dso_local void @fdiv_64imm() nounwind {
2726; X86-NOSSE-LABEL: fdiv_64imm:
2727; X86-NOSSE:       # %bb.0:
2728; X86-NOSSE-NEXT:    pushl %ebp
2729; X86-NOSSE-NEXT:    movl %esp, %ebp
2730; X86-NOSSE-NEXT:    andl $-8, %esp
2731; X86-NOSSE-NEXT:    subl $32, %esp
2732; X86-NOSSE-NEXT:    fildll -559038737
2733; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
2734; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
2735; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2736; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
2737; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
2738; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
2739; X86-NOSSE-NEXT:    fdivs {{\.?LCPI[0-9]+_[0-9]+}}
2740; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
2741; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
2742; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2743; X86-NOSSE-NEXT:    movl %eax, (%esp)
2744; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
2745; X86-NOSSE-NEXT:    fildll (%esp)
2746; X86-NOSSE-NEXT:    fistpll -559038737
2747; X86-NOSSE-NEXT:    movl %ebp, %esp
2748; X86-NOSSE-NEXT:    popl %ebp
2749; X86-NOSSE-NEXT:    retl
2750;
2751; X86-SSE1-LABEL: fdiv_64imm:
2752; X86-SSE1:       # %bb.0:
2753; X86-SSE1-NEXT:    pushl %ebp
2754; X86-SSE1-NEXT:    movl %esp, %ebp
2755; X86-SSE1-NEXT:    andl $-8, %esp
2756; X86-SSE1-NEXT:    subl $16, %esp
2757; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
2758; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
2759; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
2760; X86-SSE1-NEXT:    movss %xmm1, (%esp)
2761; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
2762; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
2763; X86-SSE1-NEXT:    fldl (%esp)
2764; X86-SSE1-NEXT:    fdivs {{\.?LCPI[0-9]+_[0-9]+}}
2765; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
2766; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
2767; X86-SSE1-NEXT:    movlps %xmm0, -559038737
2768; X86-SSE1-NEXT:    movl %ebp, %esp
2769; X86-SSE1-NEXT:    popl %ebp
2770; X86-SSE1-NEXT:    retl
2771;
2772; X86-SSE2-LABEL: fdiv_64imm:
2773; X86-SSE2:       # %bb.0:
2774; X86-SSE2-NEXT:    pushl %ebp
2775; X86-SSE2-NEXT:    movl %esp, %ebp
2776; X86-SSE2-NEXT:    andl $-8, %esp
2777; X86-SSE2-NEXT:    subl $8, %esp
2778; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2779; X86-SSE2-NEXT:    divsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
2780; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
2781; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2782; X86-SSE2-NEXT:    movlps %xmm0, -559038737
2783; X86-SSE2-NEXT:    movl %ebp, %esp
2784; X86-SSE2-NEXT:    popl %ebp
2785; X86-SSE2-NEXT:    retl
2786;
2787; X86-AVX-LABEL: fdiv_64imm:
2788; X86-AVX:       # %bb.0:
2789; X86-AVX-NEXT:    pushl %ebp
2790; X86-AVX-NEXT:    movl %esp, %ebp
2791; X86-AVX-NEXT:    andl $-8, %esp
2792; X86-AVX-NEXT:    subl $8, %esp
2793; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2794; X86-AVX-NEXT:    vdivsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
2795; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
2796; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2797; X86-AVX-NEXT:    vmovlps %xmm0, -559038737
2798; X86-AVX-NEXT:    movl %ebp, %esp
2799; X86-AVX-NEXT:    popl %ebp
2800; X86-AVX-NEXT:    retl
2801;
2802; X64-SSE-LABEL: fdiv_64imm:
2803; X64-SSE:       # %bb.0:
2804; X64-SSE-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
2805; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2806; X64-SSE-NEXT:    divsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2807; X64-SSE-NEXT:    movsd %xmm0, (%rax)
2808; X64-SSE-NEXT:    retq
2809;
2810; X64-AVX-LABEL: fdiv_64imm:
2811; X64-AVX:       # %bb.0:
2812; X64-AVX-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
2813; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2814; X64-AVX-NEXT:    vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2815; X64-AVX-NEXT:    vmovsd %xmm0, (%rax)
2816; X64-AVX-NEXT:    retq
2817  %i = load atomic i64, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8
2818  %f = bitcast i64 %i to double
2819  %div = fdiv double %f, 0x400921FA00000000
2820  %s = bitcast double %div to i64
2821  store atomic i64 %s, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8
2822  ret void
2823}
2824
2825; Floating-point div to a stack location.
2826define dso_local void @fdiv_32stack() nounwind {
2827; X86-NOSSE-LABEL: fdiv_32stack:
2828; X86-NOSSE:       # %bb.0:
2829; X86-NOSSE-NEXT:    subl $12, %esp
2830; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
2831; X86-NOSSE-NEXT:    movl %eax, (%esp)
2832; X86-NOSSE-NEXT:    fld1
2833; X86-NOSSE-NEXT:    fdivs (%esp)
2834; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
2835; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
2836; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
2837; X86-NOSSE-NEXT:    addl $12, %esp
2838; X86-NOSSE-NEXT:    retl
2839;
2840; X86-SSE1-LABEL: fdiv_32stack:
2841; X86-SSE1:       # %bb.0:
2842; X86-SSE1-NEXT:    subl $12, %esp
2843; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
2844; X86-SSE1-NEXT:    movl %eax, (%esp)
2845; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
2846; X86-SSE1-NEXT:    divss (%esp), %xmm0
2847; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
2848; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
2849; X86-SSE1-NEXT:    movl %eax, {{[0-9]+}}(%esp)
2850; X86-SSE1-NEXT:    addl $12, %esp
2851; X86-SSE1-NEXT:    retl
2852;
2853; X86-SSE2-LABEL: fdiv_32stack:
2854; X86-SSE2:       # %bb.0:
2855; X86-SSE2-NEXT:    pushl %eax
2856; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
2857; X86-SSE2-NEXT:    divss (%esp), %xmm0
2858; X86-SSE2-NEXT:    movss %xmm0, (%esp)
2859; X86-SSE2-NEXT:    popl %eax
2860; X86-SSE2-NEXT:    retl
2861;
2862; X86-AVX-LABEL: fdiv_32stack:
2863; X86-AVX:       # %bb.0:
2864; X86-AVX-NEXT:    pushl %eax
2865; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
2866; X86-AVX-NEXT:    vdivss (%esp), %xmm0, %xmm0
2867; X86-AVX-NEXT:    vmovss %xmm0, (%esp)
2868; X86-AVX-NEXT:    popl %eax
2869; X86-AVX-NEXT:    retl
2870;
2871; X64-SSE-LABEL: fdiv_32stack:
2872; X64-SSE:       # %bb.0:
2873; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
2874; X64-SSE-NEXT:    divss -{{[0-9]+}}(%rsp), %xmm0
2875; X64-SSE-NEXT:    movss %xmm0, -{{[0-9]+}}(%rsp)
2876; X64-SSE-NEXT:    retq
2877;
2878; X64-AVX-LABEL: fdiv_32stack:
2879; X64-AVX:       # %bb.0:
2880; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
2881; X64-AVX-NEXT:    vdivss -{{[0-9]+}}(%rsp), %xmm0, %xmm0
2882; X64-AVX-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp)
2883; X64-AVX-NEXT:    retq
2884  %ptr = alloca i32, align 4
2885  %load = load atomic i32, ptr %ptr acquire, align 4
2886  %bc0 = bitcast i32 %load to float
2887  %fdiv = fdiv float 1.000000e+00, %bc0
2888  %bc1 = bitcast float %fdiv to i32
2889  store atomic i32 %bc1, ptr %ptr release, align 4
2890  ret void
2891}
2892
2893define dso_local void @fdiv_64stack() nounwind {
2894; X86-NOSSE-LABEL: fdiv_64stack:
2895; X86-NOSSE:       # %bb.0:
2896; X86-NOSSE-NEXT:    pushl %ebp
2897; X86-NOSSE-NEXT:    movl %esp, %ebp
2898; X86-NOSSE-NEXT:    andl $-8, %esp
2899; X86-NOSSE-NEXT:    subl $40, %esp
2900; X86-NOSSE-NEXT:    fildll {{[0-9]+}}(%esp)
2901; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
2902; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
2903; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2904; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
2905; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
2906; X86-NOSSE-NEXT:    fld1
2907; X86-NOSSE-NEXT:    fdivl {{[0-9]+}}(%esp)
2908; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
2909; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
2910; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2911; X86-NOSSE-NEXT:    movl %eax, (%esp)
2912; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
2913; X86-NOSSE-NEXT:    fildll (%esp)
2914; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
2915; X86-NOSSE-NEXT:    movl %ebp, %esp
2916; X86-NOSSE-NEXT:    popl %ebp
2917; X86-NOSSE-NEXT:    retl
2918;
2919; X86-SSE1-LABEL: fdiv_64stack:
2920; X86-SSE1:       # %bb.0:
2921; X86-SSE1-NEXT:    pushl %ebp
2922; X86-SSE1-NEXT:    movl %esp, %ebp
2923; X86-SSE1-NEXT:    andl $-8, %esp
2924; X86-SSE1-NEXT:    subl $24, %esp
2925; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
2926; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
2927; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
2928; X86-SSE1-NEXT:    movss %xmm1, (%esp)
2929; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
2930; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
2931; X86-SSE1-NEXT:    fld1
2932; X86-SSE1-NEXT:    fdivl (%esp)
2933; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
2934; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
2935; X86-SSE1-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
2936; X86-SSE1-NEXT:    movl %ebp, %esp
2937; X86-SSE1-NEXT:    popl %ebp
2938; X86-SSE1-NEXT:    retl
2939;
2940; X86-SSE2-LABEL: fdiv_64stack:
2941; X86-SSE2:       # %bb.0:
2942; X86-SSE2-NEXT:    pushl %ebp
2943; X86-SSE2-NEXT:    movl %esp, %ebp
2944; X86-SSE2-NEXT:    andl $-8, %esp
2945; X86-SSE2-NEXT:    subl $16, %esp
2946; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2947; X86-SSE2-NEXT:    movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
2948; X86-SSE2-NEXT:    divsd %xmm0, %xmm1
2949; X86-SSE2-NEXT:    movsd %xmm1, (%esp)
2950; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
2951; X86-SSE2-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
2952; X86-SSE2-NEXT:    movl %ebp, %esp
2953; X86-SSE2-NEXT:    popl %ebp
2954; X86-SSE2-NEXT:    retl
2955;
2956; X86-AVX-LABEL: fdiv_64stack:
2957; X86-AVX:       # %bb.0:
2958; X86-AVX-NEXT:    pushl %ebp
2959; X86-AVX-NEXT:    movl %esp, %ebp
2960; X86-AVX-NEXT:    andl $-8, %esp
2961; X86-AVX-NEXT:    subl $16, %esp
2962; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2963; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
2964; X86-AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
2965; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
2966; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2967; X86-AVX-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
2968; X86-AVX-NEXT:    movl %ebp, %esp
2969; X86-AVX-NEXT:    popl %ebp
2970; X86-AVX-NEXT:    retl
2971;
2972; X64-SSE-LABEL: fdiv_64stack:
2973; X64-SSE:       # %bb.0:
2974; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
2975; X64-SSE-NEXT:    divsd -{{[0-9]+}}(%rsp), %xmm0
2976; X64-SSE-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
2977; X64-SSE-NEXT:    retq
2978;
2979; X64-AVX-LABEL: fdiv_64stack:
2980; X64-AVX:       # %bb.0:
2981; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
2982; X64-AVX-NEXT:    vdivsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
2983; X64-AVX-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp)
2984; X64-AVX-NEXT:    retq
2985  %ptr = alloca i64, align 8
2986  %load = load atomic i64, ptr %ptr acquire, align 8
2987  %bc0 = bitcast i64 %load to double
2988  %fdiv = fdiv double 1.000000e+00, %bc0
2989  %bc1 = bitcast double %fdiv to i64
2990  store atomic i64 %bc1, ptr %ptr release, align 8
2991  ret void
2992}
2993
2994define dso_local void @fdiv_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
2995; X86-NOSSE-LABEL: fdiv_array:
2996; X86-NOSSE:       # %bb.0: # %bb
2997; X86-NOSSE-NEXT:    pushl %ebp
2998; X86-NOSSE-NEXT:    movl %esp, %ebp
2999; X86-NOSSE-NEXT:    pushl %esi
3000; X86-NOSSE-NEXT:    andl $-8, %esp
3001; X86-NOSSE-NEXT:    subl $40, %esp
3002; X86-NOSSE-NEXT:    movl 20(%ebp), %eax
3003; X86-NOSSE-NEXT:    movl 8(%ebp), %ecx
3004; X86-NOSSE-NEXT:    fildll (%ecx,%eax,8)
3005; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
3006; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
3007; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
3008; X86-NOSSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
3009; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
3010; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
3011; X86-NOSSE-NEXT:    fdivl 12(%ebp)
3012; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
3013; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
3014; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
3015; X86-NOSSE-NEXT:    movl %edx, (%esp)
3016; X86-NOSSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
3017; X86-NOSSE-NEXT:    fildll (%esp)
3018; X86-NOSSE-NEXT:    fistpll (%ecx,%eax,8)
3019; X86-NOSSE-NEXT:    leal -4(%ebp), %esp
3020; X86-NOSSE-NEXT:    popl %esi
3021; X86-NOSSE-NEXT:    popl %ebp
3022; X86-NOSSE-NEXT:    retl
3023;
3024; X86-SSE1-LABEL: fdiv_array:
3025; X86-SSE1:       # %bb.0: # %bb
3026; X86-SSE1-NEXT:    pushl %ebp
3027; X86-SSE1-NEXT:    movl %esp, %ebp
3028; X86-SSE1-NEXT:    andl $-8, %esp
3029; X86-SSE1-NEXT:    subl $16, %esp
3030; X86-SSE1-NEXT:    movl 20(%ebp), %eax
3031; X86-SSE1-NEXT:    movl 8(%ebp), %ecx
3032; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
3033; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
3034; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
3035; X86-SSE1-NEXT:    movss %xmm1, (%esp)
3036; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
3037; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
3038; X86-SSE1-NEXT:    fldl (%esp)
3039; X86-SSE1-NEXT:    fdivl 12(%ebp)
3040; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
3041; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
3042; X86-SSE1-NEXT:    movlps %xmm0, (%ecx,%eax,8)
3043; X86-SSE1-NEXT:    movl %ebp, %esp
3044; X86-SSE1-NEXT:    popl %ebp
3045; X86-SSE1-NEXT:    retl
3046;
3047; X86-SSE2-LABEL: fdiv_array:
3048; X86-SSE2:       # %bb.0: # %bb
3049; X86-SSE2-NEXT:    pushl %ebp
3050; X86-SSE2-NEXT:    movl %esp, %ebp
3051; X86-SSE2-NEXT:    andl $-8, %esp
3052; X86-SSE2-NEXT:    subl $8, %esp
3053; X86-SSE2-NEXT:    movl 20(%ebp), %eax
3054; X86-SSE2-NEXT:    movl 8(%ebp), %ecx
3055; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3056; X86-SSE2-NEXT:    divsd 12(%ebp), %xmm0
3057; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
3058; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3059; X86-SSE2-NEXT:    movlps %xmm0, (%ecx,%eax,8)
3060; X86-SSE2-NEXT:    movl %ebp, %esp
3061; X86-SSE2-NEXT:    popl %ebp
3062; X86-SSE2-NEXT:    retl
3063;
3064; X86-AVX-LABEL: fdiv_array:
3065; X86-AVX:       # %bb.0: # %bb
3066; X86-AVX-NEXT:    pushl %ebp
3067; X86-AVX-NEXT:    movl %esp, %ebp
3068; X86-AVX-NEXT:    andl $-8, %esp
3069; X86-AVX-NEXT:    subl $8, %esp
3070; X86-AVX-NEXT:    movl 20(%ebp), %eax
3071; X86-AVX-NEXT:    movl 8(%ebp), %ecx
3072; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3073; X86-AVX-NEXT:    vdivsd 12(%ebp), %xmm0, %xmm0
3074; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
3075; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3076; X86-AVX-NEXT:    vmovlps %xmm0, (%ecx,%eax,8)
3077; X86-AVX-NEXT:    movl %ebp, %esp
3078; X86-AVX-NEXT:    popl %ebp
3079; X86-AVX-NEXT:    retl
3080;
3081; X64-SSE-LABEL: fdiv_array:
3082; X64-SSE:       # %bb.0: # %bb
3083; X64-SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
3084; X64-SSE-NEXT:    divsd %xmm0, %xmm1
3085; X64-SSE-NEXT:    movsd %xmm1, (%rdi,%rsi,8)
3086; X64-SSE-NEXT:    retq
3087;
3088; X64-AVX-LABEL: fdiv_array:
3089; X64-AVX:       # %bb.0: # %bb
3090; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
3091; X64-AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
3092; X64-AVX-NEXT:    vmovsd %xmm0, (%rdi,%rsi,8)
3093; X64-AVX-NEXT:    retq
3094bb:
3095  %tmp4 = getelementptr inbounds i64, ptr %arg, i64 %arg2
3096  %tmp6 = load atomic i64, ptr %tmp4 monotonic, align 8
3097  %tmp7 = bitcast i64 %tmp6 to double
3098  %tmp8 = fdiv double %tmp7, %arg1
3099  %tmp9 = bitcast double %tmp8 to i64
3100  store atomic i64 %tmp9, ptr %tmp4 monotonic, align 8
3101  ret void
3102}
3103