xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86_64/atom/lshift.asm (revision b83ebeba7f767758d2778bb0f9d7a76534253621)
1dnl  AMD64 mpn_lshift -- mpn left shift, optimised for Atom.
2
3dnl  Contributed to the GNU project by Torbjorn Granlund.
4
5dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
6
7dnl  This file is part of the GNU MP Library.
8
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of the GNU Lesser General Public License as published
11dnl  by the Free Software Foundation; either version 3 of the License, or (at
12dnl  your option) any later version.
13
14dnl  The GNU MP Library is distributed in the hope that it will be useful, but
15dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17dnl  License for more details.
18
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24C	     cycles/limb
25C AMD K8,K9	 ?
26C AMD K10	 ?
27C Intel P4	 ?
28C Intel core2	 ?
29C Intel NHM	 ?
30C Intel SBR	 ?
31C Intel atom	 4.5
32C VIA nano	 ?
33
34C TODO
35C  * Consider using 4-way unrolling.  We reach 4 c/l, but the code is 2.5 times
36C    larger.
37
38C INPUT PARAMETERS
39define(`rp',	`%rdi')
40define(`up',	`%rsi')
41define(`n',	`%rdx')
42define(`cnt',	`%rcx')
43
44ABI_SUPPORT(DOS64)
45ABI_SUPPORT(STD64)
46
47ASM_START()
48	TEXT
49	ALIGN(16)
50PROLOGUE(mpn_lshift)
51	FUNC_ENTRY(4)
52	lea	-8(up,n,8), up
53	lea	-8(rp,n,8), rp
54	shr	R32(n)
55	mov	(up), %rax
56	jnc	L(evn)
57
58	mov	%rax, %r11
59	shl	R8(%rcx), %r11
60	neg	R8(%rcx)
61	shr	R8(%rcx), %rax
62	test	n, n
63	jnz	L(gt1)
64	mov	%r11, (rp)
65	FUNC_EXIT()
66	ret
67
68L(gt1):	mov	-8(up), %r8
69	mov	%r8, %r10
70	shr	R8(%rcx), %r8
71	jmp	L(lo1)
72
73L(evn):	mov	%rax, %r10
74	neg	R8(%rcx)
75	shr	R8(%rcx), %rax
76	mov	-8(up), %r9
77	mov	%r9, %r11
78	shr	R8(%rcx), %r9
79	neg	R8(%rcx)
80	dec	n
81	lea	8(rp), rp
82	lea	-8(up), up
83	jz	L(end)
84
85	ALIGN(8)
86L(top):	shl	R8(%rcx), %r10
87	or	%r10, %r9
88	shl	R8(%rcx), %r11
89	neg	R8(%rcx)
90	mov	-8(up), %r8
91	mov	%r8, %r10
92	mov	%r9, -8(rp)
93	shr	R8(%rcx), %r8
94	lea	-16(rp), rp
95L(lo1):	mov	-16(up), %r9
96	or	%r11, %r8
97	mov	%r9, %r11
98	shr	R8(%rcx), %r9
99	lea	-16(up), up
100	neg	R8(%rcx)
101	mov	%r8, (rp)
102	dec	n
103	jg	L(top)
104
105L(end):	shl	R8(%rcx), %r10
106	or	%r10, %r9
107	shl	R8(%rcx), %r11
108	mov	%r9, -8(rp)
109	mov	%r11, -16(rp)
110	FUNC_EXIT()
111	ret
112EPILOGUE()
113