xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86_64/atom/lshiftc.asm (revision c3ab26950fe8540fb553d1d1dcae454bc98e5a25)
1dnl  AMD64 mpn_lshiftc -- mpn left shift with complement, optimised for Atom.
2
3dnl  Contributed to the GNU project by Torbjorn Granlund.
4
5dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
6
7dnl  This file is part of the GNU MP Library.
8
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of the GNU Lesser General Public License as published
11dnl  by the Free Software Foundation; either version 3 of the License, or (at
12dnl  your option) any later version.
13
14dnl  The GNU MP Library is distributed in the hope that it will be useful, but
15dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17dnl  License for more details.
18
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24C	     cycles/limb
25C AMD K8,K9	 ?
26C AMD K10	 ?
27C Intel P4	 ?
28C Intel core2	 ?
29C Intel NHM	 ?
30C Intel SBR	 ?
31C Intel atom	 5
32C VIA nano	 ?
33
34C TODO
35C  * Consider using 4-way unrolling.  We reach 4.5 c/l, but the code is 2.5
36C    times larger.
37
38C INPUT PARAMETERS
39define(`rp',	`%rdi')
40define(`up',	`%rsi')
41define(`n',	`%rdx')
42define(`cnt',	`%rcx')
43
44ABI_SUPPORT(DOS64)
45ABI_SUPPORT(STD64)
46
47ASM_START()
48	TEXT
49	ALIGN(16)
50PROLOGUE(mpn_lshiftc)
51	FUNC_ENTRY(4)
52	lea	-8(up,n,8), up
53	lea	-8(rp,n,8), rp
54	shr	R32(n)
55	mov	(up), %rax
56	jnc	L(evn)
57
58	mov	%rax, %r11
59	shl	R8(%rcx), %r11
60	neg	R8(%rcx)
61	shr	R8(%rcx), %rax
62	test	n, n
63	jnz	L(gt1)
64	not	%r11
65	mov	%r11, (rp)
66	FUNC_EXIT()
67	ret
68
69L(gt1):	mov	-8(up), %r8
70	mov	%r8, %r10
71	shr	R8(%rcx), %r8
72	jmp	L(lo1)
73
74L(evn):	mov	%rax, %r10
75	neg	R8(%rcx)
76	shr	R8(%rcx), %rax
77	mov	-8(up), %r9
78	mov	%r9, %r11
79	shr	R8(%rcx), %r9
80	neg	R8(%rcx)
81	lea	8(rp), rp
82	lea	-8(up), up
83	jmp	L(lo0)
84
85C	ALIGN(16)
86L(top):	shl	R8(%rcx), %r10
87	or	%r10, %r9
88	shl	R8(%rcx), %r11
89	not	%r9
90	neg	R8(%rcx)
91	mov	-8(up), %r8
92	lea	-16(rp), rp
93	mov	%r8, %r10
94	shr	R8(%rcx), %r8
95	mov	%r9, 8(rp)
96L(lo1):	or	%r11, %r8
97	mov	-16(up), %r9
98	mov	%r9, %r11
99	shr	R8(%rcx), %r9
100	lea	-16(up), up
101	neg	R8(%rcx)
102	not	%r8
103	mov	%r8, (rp)
104L(lo0):	dec	n
105	jg	L(top)
106
107L(end):	shl	R8(%rcx), %r10
108	or	%r10, %r9
109	not	%r9
110	shl	R8(%rcx), %r11
111	not	%r11
112	mov	%r9, -8(rp)
113	mov	%r11, -16(rp)
114	FUNC_EXIT()
115	ret
116EPILOGUE()
117