xref: /llvm-project/llvm/test/CodeGen/X86/pr34080.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -mcpu=x86-64 | FileCheck %s --check-prefix=SSE2-SCHEDULE
4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse3 | FileCheck %s --check-prefix=SSE3
5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse3 -mcpu=nocona | FileCheck %s --check-prefix=SSE3
6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=AVX
7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -mcpu=sandybridge | FileCheck %s --check-prefix=AVX
8
9define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 {
10; SSE2-LABEL: _Z1fe:
11; SSE2:       ## %bb.0: ## %entry
12; SSE2-NEXT:    pushq %rbp
13; SSE2-NEXT:    .cfi_def_cfa_offset 16
14; SSE2-NEXT:    .cfi_offset %rbp, -16
15; SSE2-NEXT:    movq %rsp, %rbp
16; SSE2-NEXT:    .cfi_def_cfa_register %rbp
17; SSE2-NEXT:    fldt 16(%rbp)
18; SSE2-NEXT:    fnstcw -4(%rbp)
19; SSE2-NEXT:    movzwl -4(%rbp), %eax
20; SSE2-NEXT:    orl $3072, %eax ## imm = 0xC00
21; SSE2-NEXT:    movw %ax, -8(%rbp)
22; SSE2-NEXT:    fldcw -8(%rbp)
23; SSE2-NEXT:    fistl -12(%rbp)
24; SSE2-NEXT:    fldcw -4(%rbp)
25; SSE2-NEXT:    cvtsi2sdl -12(%rbp), %xmm0
26; SSE2-NEXT:    movsd %xmm0, -64(%rbp)
27; SSE2-NEXT:    movsd %xmm0, -32(%rbp)
28; SSE2-NEXT:    fsubl -32(%rbp)
29; SSE2-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
30; SSE2-NEXT:    fmul %st, %st(1)
31; SSE2-NEXT:    fnstcw -2(%rbp)
32; SSE2-NEXT:    movzwl -2(%rbp), %eax
33; SSE2-NEXT:    orl $3072, %eax ## imm = 0xC00
34; SSE2-NEXT:    movw %ax, -6(%rbp)
35; SSE2-NEXT:    fldcw -6(%rbp)
36; SSE2-NEXT:    fxch %st(1)
37; SSE2-NEXT:    fistl -16(%rbp)
38; SSE2-NEXT:    fldcw -2(%rbp)
39; SSE2-NEXT:    xorps %xmm0, %xmm0
40; SSE2-NEXT:    cvtsi2sdl -16(%rbp), %xmm0
41; SSE2-NEXT:    movsd %xmm0, -56(%rbp)
42; SSE2-NEXT:    movsd %xmm0, -24(%rbp)
43; SSE2-NEXT:    fsubl -24(%rbp)
44; SSE2-NEXT:    fmulp %st, %st(1)
45; SSE2-NEXT:    fstpl -48(%rbp)
46; SSE2-NEXT:    popq %rbp
47; SSE2-NEXT:    retq
48;
49; SSE2-SCHEDULE-LABEL: _Z1fe:
50; SSE2-SCHEDULE:       ## %bb.0: ## %entry
51; SSE2-SCHEDULE-NEXT:    pushq %rbp
52; SSE2-SCHEDULE-NEXT:    .cfi_def_cfa_offset 16
53; SSE2-SCHEDULE-NEXT:    .cfi_offset %rbp, -16
54; SSE2-SCHEDULE-NEXT:    movq %rsp, %rbp
55; SSE2-SCHEDULE-NEXT:    .cfi_def_cfa_register %rbp
56; SSE2-SCHEDULE-NEXT:    fldt 16(%rbp)
57; SSE2-SCHEDULE-NEXT:    fnstcw -4(%rbp)
58; SSE2-SCHEDULE-NEXT:    movzwl -4(%rbp), %eax
59; SSE2-SCHEDULE-NEXT:    orl $3072, %eax ## imm = 0xC00
60; SSE2-SCHEDULE-NEXT:    movw %ax, -8(%rbp)
61; SSE2-SCHEDULE-NEXT:    fldcw -8(%rbp)
62; SSE2-SCHEDULE-NEXT:    fistl -12(%rbp)
63; SSE2-SCHEDULE-NEXT:    fldcw -4(%rbp)
64; SSE2-SCHEDULE-NEXT:    cvtsi2sdl -12(%rbp), %xmm0
65; SSE2-SCHEDULE-NEXT:    movsd %xmm0, -64(%rbp)
66; SSE2-SCHEDULE-NEXT:    movsd %xmm0, -32(%rbp)
67; SSE2-SCHEDULE-NEXT:    fsubl -32(%rbp)
68; SSE2-SCHEDULE-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
69; SSE2-SCHEDULE-NEXT:    fmul %st, %st(1)
70; SSE2-SCHEDULE-NEXT:    fnstcw -2(%rbp)
71; SSE2-SCHEDULE-NEXT:    movzwl -2(%rbp), %eax
72; SSE2-SCHEDULE-NEXT:    orl $3072, %eax ## imm = 0xC00
73; SSE2-SCHEDULE-NEXT:    movw %ax, -6(%rbp)
74; SSE2-SCHEDULE-NEXT:    fldcw -6(%rbp)
75; SSE2-SCHEDULE-NEXT:    fxch %st(1)
76; SSE2-SCHEDULE-NEXT:    fistl -16(%rbp)
77; SSE2-SCHEDULE-NEXT:    fldcw -2(%rbp)
78; SSE2-SCHEDULE-NEXT:    xorps %xmm0, %xmm0
79; SSE2-SCHEDULE-NEXT:    cvtsi2sdl -16(%rbp), %xmm0
80; SSE2-SCHEDULE-NEXT:    movsd %xmm0, -56(%rbp)
81; SSE2-SCHEDULE-NEXT:    movsd %xmm0, -24(%rbp)
82; SSE2-SCHEDULE-NEXT:    fsubl -24(%rbp)
83; SSE2-SCHEDULE-NEXT:    fmulp %st, %st(1)
84; SSE2-SCHEDULE-NEXT:    fstpl -48(%rbp)
85; SSE2-SCHEDULE-NEXT:    popq %rbp
86; SSE2-SCHEDULE-NEXT:    retq
87;
88; SSE3-LABEL: _Z1fe:
89; SSE3:       ## %bb.0: ## %entry
90; SSE3-NEXT:    pushq %rbp
91; SSE3-NEXT:    .cfi_def_cfa_offset 16
92; SSE3-NEXT:    .cfi_offset %rbp, -16
93; SSE3-NEXT:    movq %rsp, %rbp
94; SSE3-NEXT:    .cfi_def_cfa_register %rbp
95; SSE3-NEXT:    fldt 16(%rbp)
96; SSE3-NEXT:    fld %st(0)
97; SSE3-NEXT:    fisttpl -4(%rbp)
98; SSE3-NEXT:    cvtsi2sdl -4(%rbp), %xmm0
99; SSE3-NEXT:    movsd %xmm0, -48(%rbp)
100; SSE3-NEXT:    movsd %xmm0, -24(%rbp)
101; SSE3-NEXT:    fsubl -24(%rbp)
102; SSE3-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
103; SSE3-NEXT:    fmul %st, %st(1)
104; SSE3-NEXT:    fld %st(1)
105; SSE3-NEXT:    fisttpl -8(%rbp)
106; SSE3-NEXT:    xorps %xmm0, %xmm0
107; SSE3-NEXT:    cvtsi2sdl -8(%rbp), %xmm0
108; SSE3-NEXT:    movsd %xmm0, -40(%rbp)
109; SSE3-NEXT:    movsd %xmm0, -16(%rbp)
110; SSE3-NEXT:    fxch %st(1)
111; SSE3-NEXT:    fsubl -16(%rbp)
112; SSE3-NEXT:    fmulp %st, %st(1)
113; SSE3-NEXT:    fstpl -32(%rbp)
114; SSE3-NEXT:    popq %rbp
115; SSE3-NEXT:    retq
116;
117; AVX-LABEL: _Z1fe:
118; AVX:       ## %bb.0: ## %entry
119; AVX-NEXT:    pushq %rbp
120; AVX-NEXT:    .cfi_def_cfa_offset 16
121; AVX-NEXT:    .cfi_offset %rbp, -16
122; AVX-NEXT:    movq %rsp, %rbp
123; AVX-NEXT:    .cfi_def_cfa_register %rbp
124; AVX-NEXT:    fldt 16(%rbp)
125; AVX-NEXT:    fld %st(0)
126; AVX-NEXT:    fisttpl -4(%rbp)
127; AVX-NEXT:    vcvtsi2sdl -4(%rbp), %xmm0, %xmm0
128; AVX-NEXT:    vmovsd %xmm0, -48(%rbp)
129; AVX-NEXT:    vmovsd %xmm0, -24(%rbp)
130; AVX-NEXT:    fsubl -24(%rbp)
131; AVX-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
132; AVX-NEXT:    fmul %st, %st(1)
133; AVX-NEXT:    fld %st(1)
134; AVX-NEXT:    fisttpl -8(%rbp)
135; AVX-NEXT:    vcvtsi2sdl -8(%rbp), %xmm1, %xmm0
136; AVX-NEXT:    vmovsd %xmm0, -40(%rbp)
137; AVX-NEXT:    vmovsd %xmm0, -16(%rbp)
138; AVX-NEXT:    fxch %st(1)
139; AVX-NEXT:    fsubl -16(%rbp)
140; AVX-NEXT:    fmulp %st, %st(1)
141; AVX-NEXT:    fstpl -32(%rbp)
142; AVX-NEXT:    popq %rbp
143; AVX-NEXT:    retq
144entry:
145  %tx = alloca [3 x double], align 16
146  %conv = fptosi x86_fp80 %z to i32
147  %conv1 = sitofp i32 %conv to double
148  store double %conv1, ptr %tx, align 16
149  %conv4 = fpext double %conv1 to x86_fp80
150  %sub = fsub x86_fp80 %z, %conv4
151  %mul = fmul x86_fp80 %sub, 0xK40178000000000000000
152  %conv.1 = fptosi x86_fp80 %mul to i32
153  %conv1.1 = sitofp i32 %conv.1 to double
154  %arrayidx.1 = getelementptr inbounds [3 x double], ptr %tx, i64 0, i64 1
155  store double %conv1.1, ptr %arrayidx.1, align 8
156  %conv4.1 = fpext double %conv1.1 to x86_fp80
157  %sub.1 = fsub x86_fp80 %mul, %conv4.1
158  %mul.1 = fmul x86_fp80 %sub.1, 0xK40178000000000000000
159  %conv5 = fptrunc x86_fp80 %mul.1 to double
160  %arrayidx6 = getelementptr inbounds [3 x double], ptr %tx, i64 0, i64 2
161  store double %conv5, ptr %arrayidx6, align 16
162  ret void
163}
164
165attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
166