xref: /llvm-project/llvm/test/CodeGen/X86/powi.ll (revision 1c2727581378677152cfb4909358e6b9e4b9b3ad)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86-X87
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
5
6; Ideally this would compile to 5 multiplies.
7
8define double @pow_wrapper(double %a) nounwind readonly ssp noredzone {
9; X86-X87-LABEL: pow_wrapper:
10; X86-X87:       # %bb.0:
11; X86-X87-NEXT:    fldl {{[0-9]+}}(%esp)
12; X86-X87-NEXT:    fld %st(0)
13; X86-X87-NEXT:    fmul %st(1), %st
14; X86-X87-NEXT:    fmul %st, %st(1)
15; X86-X87-NEXT:    fmul %st, %st(0)
16; X86-X87-NEXT:    fmul %st, %st(1)
17; X86-X87-NEXT:    fmul %st, %st(0)
18; X86-X87-NEXT:    fmulp %st, %st(1)
19; X86-X87-NEXT:    retl
20;
21; X86-SSE-LABEL: pow_wrapper:
22; X86-SSE:       # %bb.0:
23; X86-SSE-NEXT:    pushl %ebp
24; X86-SSE-NEXT:    movl %esp, %ebp
25; X86-SSE-NEXT:    andl $-8, %esp
26; X86-SSE-NEXT:    subl $8, %esp
27; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
28; X86-SSE-NEXT:    movapd %xmm0, %xmm1
29; X86-SSE-NEXT:    mulsd %xmm0, %xmm1
30; X86-SSE-NEXT:    mulsd %xmm1, %xmm0
31; X86-SSE-NEXT:    mulsd %xmm1, %xmm1
32; X86-SSE-NEXT:    mulsd %xmm1, %xmm0
33; X86-SSE-NEXT:    mulsd %xmm1, %xmm1
34; X86-SSE-NEXT:    mulsd %xmm0, %xmm1
35; X86-SSE-NEXT:    movsd %xmm1, (%esp)
36; X86-SSE-NEXT:    fldl (%esp)
37; X86-SSE-NEXT:    movl %ebp, %esp
38; X86-SSE-NEXT:    popl %ebp
39; X86-SSE-NEXT:    retl
40;
41; X64-LABEL: pow_wrapper:
42; X64:       # %bb.0:
43; X64-NEXT:    movapd %xmm0, %xmm1
44; X64-NEXT:    mulsd %xmm0, %xmm1
45; X64-NEXT:    mulsd %xmm1, %xmm0
46; X64-NEXT:    mulsd %xmm1, %xmm1
47; X64-NEXT:    mulsd %xmm1, %xmm0
48; X64-NEXT:    mulsd %xmm1, %xmm1
49; X64-NEXT:    mulsd %xmm1, %xmm0
50; X64-NEXT:    retq
51  %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1]
52  ret double %ret
53}
54
55define double @pow_wrapper_optsize(double %a) optsize {
56; X86-X87-LABEL: pow_wrapper_optsize:
57; X86-X87:       # %bb.0:
58; X86-X87-NEXT:    subl $12, %esp
59; X86-X87-NEXT:    .cfi_def_cfa_offset 16
60; X86-X87-NEXT:    fldl {{[0-9]+}}(%esp)
61; X86-X87-NEXT:    fstpl (%esp)
62; X86-X87-NEXT:    movl $15, {{[0-9]+}}(%esp)
63; X86-X87-NEXT:    calll __powidf2
64; X86-X87-NEXT:    addl $12, %esp
65; X86-X87-NEXT:    .cfi_def_cfa_offset 4
66; X86-X87-NEXT:    retl
67;
68; X86-SSE-LABEL: pow_wrapper_optsize:
69; X86-SSE:       # %bb.0:
70; X86-SSE-NEXT:    subl $12, %esp
71; X86-SSE-NEXT:    .cfi_def_cfa_offset 16
72; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
73; X86-SSE-NEXT:    movsd %xmm0, (%esp)
74; X86-SSE-NEXT:    movl $15, {{[0-9]+}}(%esp)
75; X86-SSE-NEXT:    calll __powidf2
76; X86-SSE-NEXT:    addl $12, %esp
77; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
78; X86-SSE-NEXT:    retl
79;
80; X64-LABEL: pow_wrapper_optsize:
81; X64:       # %bb.0:
82; X64-NEXT:    movl $15, %edi
83; X64-NEXT:    jmp __powidf2@PLT # TAILCALL
84  %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1]
85  ret double %ret
86}
87
88define double @pow_wrapper_pgso(double %a) !prof !14 {
89; X86-X87-LABEL: pow_wrapper_pgso:
90; X86-X87:       # %bb.0:
91; X86-X87-NEXT:    subl $12, %esp
92; X86-X87-NEXT:    .cfi_def_cfa_offset 16
93; X86-X87-NEXT:    fldl {{[0-9]+}}(%esp)
94; X86-X87-NEXT:    fstpl (%esp)
95; X86-X87-NEXT:    movl $15, {{[0-9]+}}(%esp)
96; X86-X87-NEXT:    calll __powidf2
97; X86-X87-NEXT:    addl $12, %esp
98; X86-X87-NEXT:    .cfi_def_cfa_offset 4
99; X86-X87-NEXT:    retl
100;
101; X86-SSE-LABEL: pow_wrapper_pgso:
102; X86-SSE:       # %bb.0:
103; X86-SSE-NEXT:    subl $12, %esp
104; X86-SSE-NEXT:    .cfi_def_cfa_offset 16
105; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
106; X86-SSE-NEXT:    movsd %xmm0, (%esp)
107; X86-SSE-NEXT:    movl $15, {{[0-9]+}}(%esp)
108; X86-SSE-NEXT:    calll __powidf2
109; X86-SSE-NEXT:    addl $12, %esp
110; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
111; X86-SSE-NEXT:    retl
112;
113; X64-LABEL: pow_wrapper_pgso:
114; X64:       # %bb.0:
115; X64-NEXT:    movl $15, %edi
116; X64-NEXT:    jmp __powidf2@PLT # TAILCALL
117  %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1]
118  ret double %ret
119}
120
121define double @pow_wrapper_minsize(double %a) minsize {
122; X86-X87-LABEL: pow_wrapper_minsize:
123; X86-X87:       # %bb.0:
124; X86-X87-NEXT:    subl $12, %esp
125; X86-X87-NEXT:    .cfi_def_cfa_offset 16
126; X86-X87-NEXT:    fldl {{[0-9]+}}(%esp)
127; X86-X87-NEXT:    fstpl (%esp)
128; X86-X87-NEXT:    movl $15, {{[0-9]+}}(%esp)
129; X86-X87-NEXT:    calll __powidf2
130; X86-X87-NEXT:    addl $12, %esp
131; X86-X87-NEXT:    .cfi_def_cfa_offset 4
132; X86-X87-NEXT:    retl
133;
134; X86-SSE-LABEL: pow_wrapper_minsize:
135; X86-SSE:       # %bb.0:
136; X86-SSE-NEXT:    subl $12, %esp
137; X86-SSE-NEXT:    .cfi_def_cfa_offset 16
138; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
139; X86-SSE-NEXT:    movsd %xmm0, (%esp)
140; X86-SSE-NEXT:    movl $15, {{[0-9]+}}(%esp)
141; X86-SSE-NEXT:    calll __powidf2
142; X86-SSE-NEXT:    addl $12, %esp
143; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
144; X86-SSE-NEXT:    retl
145;
146; X64-LABEL: pow_wrapper_minsize:
147; X64:       # %bb.0:
148; X64-NEXT:    pushq $15
149; X64-NEXT:    .cfi_adjust_cfa_offset 8
150; X64-NEXT:    popq %rdi
151; X64-NEXT:    .cfi_adjust_cfa_offset -8
152; X64-NEXT:    jmp __powidf2@PLT # TAILCALL
153  %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1]
154  ret double %ret
155}
156
157define <2 x float> @powi_v2f32(<2 x float> %a) minsize {
158; X86-X87-LABEL: powi_v2f32:
159; X86-X87:       # %bb.0:
160; X86-X87-NEXT:    pushl %esi
161; X86-X87-NEXT:    .cfi_def_cfa_offset 8
162; X86-X87-NEXT:    subl $16, %esp
163; X86-X87-NEXT:    .cfi_def_cfa_offset 24
164; X86-X87-NEXT:    .cfi_offset %esi, -8
165; X86-X87-NEXT:    flds {{[0-9]+}}(%esp)
166; X86-X87-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
167; X86-X87-NEXT:    flds {{[0-9]+}}(%esp)
168; X86-X87-NEXT:    pushl $15
169; X86-X87-NEXT:    .cfi_adjust_cfa_offset 4
170; X86-X87-NEXT:    popl %esi
171; X86-X87-NEXT:    .cfi_adjust_cfa_offset -4
172; X86-X87-NEXT:    movl %esi, {{[0-9]+}}(%esp)
173; X86-X87-NEXT:    fstps (%esp)
174; X86-X87-NEXT:    calll __powisf2
175; X86-X87-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
176; X86-X87-NEXT:    movl %esi, {{[0-9]+}}(%esp)
177; X86-X87-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
178; X86-X87-NEXT:    fstps (%esp)
179; X86-X87-NEXT:    calll __powisf2
180; X86-X87-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
181; X86-X87-NEXT:    fxch %st(1)
182; X86-X87-NEXT:    addl $16, %esp
183; X86-X87-NEXT:    .cfi_def_cfa_offset 8
184; X86-X87-NEXT:    popl %esi
185; X86-X87-NEXT:    .cfi_def_cfa_offset 4
186; X86-X87-NEXT:    retl
187;
188; X86-SSE-LABEL: powi_v2f32:
189; X86-SSE:       # %bb.0:
190; X86-SSE-NEXT:    pushl %esi
191; X86-SSE-NEXT:    .cfi_def_cfa_offset 8
192; X86-SSE-NEXT:    subl $32, %esp
193; X86-SSE-NEXT:    .cfi_def_cfa_offset 40
194; X86-SSE-NEXT:    .cfi_offset %esi, -8
195; X86-SSE-NEXT:    movups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
196; X86-SSE-NEXT:    pushl $15
197; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
198; X86-SSE-NEXT:    popl %esi
199; X86-SSE-NEXT:    .cfi_adjust_cfa_offset -4
200; X86-SSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
201; X86-SSE-NEXT:    movss %xmm0, (%esp)
202; X86-SSE-NEXT:    calll __powisf2
203; X86-SSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
204; X86-SSE-NEXT:    movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
205; X86-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
206; X86-SSE-NEXT:    movss %xmm0, (%esp)
207; X86-SSE-NEXT:    fstps {{[0-9]+}}(%esp)
208; X86-SSE-NEXT:    calll __powisf2
209; X86-SSE-NEXT:    fstps {{[0-9]+}}(%esp)
210; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
211; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
212; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
213; X86-SSE-NEXT:    addl $32, %esp
214; X86-SSE-NEXT:    .cfi_def_cfa_offset 8
215; X86-SSE-NEXT:    popl %esi
216; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
217; X86-SSE-NEXT:    retl
218;
219; X64-LABEL: powi_v2f32:
220; X64:       # %bb.0:
221; X64-NEXT:    pushq %rbx
222; X64-NEXT:    .cfi_def_cfa_offset 16
223; X64-NEXT:    subq $32, %rsp
224; X64-NEXT:    .cfi_def_cfa_offset 48
225; X64-NEXT:    .cfi_offset %rbx, -16
226; X64-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
227; X64-NEXT:    pushq $15
228; X64-NEXT:    .cfi_adjust_cfa_offset 8
229; X64-NEXT:    popq %rbx
230; X64-NEXT:    .cfi_adjust_cfa_offset -8
231; X64-NEXT:    movl %ebx, %edi
232; X64-NEXT:    callq __powisf2@PLT
233; X64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
234; X64-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
235; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
236; X64-NEXT:    movl %ebx, %edi
237; X64-NEXT:    callq __powisf2@PLT
238; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
239; X64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
240; X64-NEXT:    movaps %xmm1, %xmm0
241; X64-NEXT:    addq $32, %rsp
242; X64-NEXT:    .cfi_def_cfa_offset 16
243; X64-NEXT:    popq %rbx
244; X64-NEXT:    .cfi_def_cfa_offset 8
245; X64-NEXT:    retq
246  %ret = tail call < 2 x float> @llvm.powi.v2f32.i32(<2 x float> %a, i32 15) nounwind ;
247  ret <2 x float> %ret
248}
249
250declare double @llvm.powi.f64.i32(double, i32) nounwind readonly
251declare < 2 x float> @llvm.powi.v2f32.i32(<2 x float>, i32) nounwind readonly
252
253!llvm.module.flags = !{!0}
254!0 = !{i32 1, !"ProfileSummary", !1}
255!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
256!2 = !{!"ProfileFormat", !"InstrProf"}
257!3 = !{!"TotalCount", i64 10000}
258!4 = !{!"MaxCount", i64 10}
259!5 = !{!"MaxInternalCount", i64 1}
260!6 = !{!"MaxFunctionCount", i64 1000}
261!7 = !{!"NumCounts", i64 3}
262!8 = !{!"NumFunctions", i64 3}
263!9 = !{!"DetailedSummary", !10}
264!10 = !{!11, !12, !13}
265!11 = !{i32 10000, i64 100, i32 1}
266!12 = !{i32 999000, i64 100, i32 1}
267!13 = !{i32 999999, i64 1, i32 2}
268!14 = !{!"function_entry_count", i64 0}
269