xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86_64/lshiftc.asm (revision 37afb7eb6895c833050f8bfb1d1bb2f99f332539)
1dnl  AMD64 mpn_lshiftc -- mpn left shift with complement.
2
3dnl  Copyright 2003, 2005, 2006, 2009, 2011, 2012 Free Software Foundation, Inc.
4dnl
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or
8dnl  modify it under the terms of the GNU Lesser General Public License as
9dnl  published by the Free Software Foundation; either version 3 of the
10dnl  License, or (at your option) any later version.
11dnl
12dnl  The GNU MP Library is distributed in the hope that it will be useful,
13dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
14dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15dnl  Lesser General Public License for more details.
16dnl
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22
23C	     cycles/limb
24C AMD K8,K9	 2.75
25C AMD K10	 2.75
26C Intel P4	 ?
27C Intel core2	 ?
28C Intel corei	 ?
29C Intel atom	 ?
30C VIA nano	 3.75
31
32
33C INPUT PARAMETERS
34define(`rp',	`%rdi')
35define(`up',	`%rsi')
36define(`n',	`%rdx')
37define(`cnt',	`%rcx')
38
39ABI_SUPPORT(DOS64)
40ABI_SUPPORT(STD64)
41
42ASM_START()
43	TEXT
44	ALIGN(32)
45PROLOGUE(mpn_lshiftc)
46	FUNC_ENTRY(4)
47	neg	R32(%rcx)		C put rsh count in cl
48	mov	-8(up,n,8), %rax
49	shr	R8(%rcx), %rax		C function return value
50
51	neg	R32(%rcx)		C put lsh count in cl
52	lea	1(n), R32(%r8)
53	and	$3, R32(%r8)
54	je	L(rlx)			C jump for n = 3, 7, 11, ...
55
56	dec	R32(%r8)
57	jne	L(1)
58C	n = 4, 8, 12, ...
59	mov	-8(up,n,8), %r10
60	shl	R8(%rcx), %r10
61	neg	R32(%rcx)		C put rsh count in cl
62	mov	-16(up,n,8), %r8
63	shr	R8(%rcx), %r8
64	or	%r8, %r10
65	not	%r10
66	mov	%r10, -8(rp,n,8)
67	dec	n
68	jmp	L(rll)
69
70L(1):	dec	R32(%r8)
71	je	L(1x)			C jump for n = 1, 5, 9, 13, ...
72C	n = 2, 6, 10, 16, ...
73	mov	-8(up,n,8), %r10
74	shl	R8(%rcx), %r10
75	neg	R32(%rcx)		C put rsh count in cl
76	mov	-16(up,n,8), %r8
77	shr	R8(%rcx), %r8
78	or	%r8, %r10
79	not	%r10
80	mov	%r10, -8(rp,n,8)
81	dec	n
82	neg	R32(%rcx)		C put lsh count in cl
83L(1x):
84	cmp	$1, n
85	je	L(ast)
86	mov	-8(up,n,8), %r10
87	shl	R8(%rcx), %r10
88	mov	-16(up,n,8), %r11
89	shl	R8(%rcx), %r11
90	neg	R32(%rcx)		C put rsh count in cl
91	mov	-16(up,n,8), %r8
92	mov	-24(up,n,8), %r9
93	shr	R8(%rcx), %r8
94	or	%r8, %r10
95	shr	R8(%rcx), %r9
96	or	%r9, %r11
97	not	%r10
98	not	%r11
99	mov	%r10, -8(rp,n,8)
100	mov	%r11, -16(rp,n,8)
101	sub	$2, n
102
103L(rll):	neg	R32(%rcx)		C put lsh count in cl
104L(rlx):	mov	-8(up,n,8), %r10
105	shl	R8(%rcx), %r10
106	mov	-16(up,n,8), %r11
107	shl	R8(%rcx), %r11
108
109	sub	$4, n			C				      4
110	jb	L(end)			C				      2
111	ALIGN(16)
112L(top):
113	C finish stuff from lsh block
114	neg	R32(%rcx)		C put rsh count in cl
115	mov	16(up,n,8), %r8
116	mov	8(up,n,8), %r9
117	shr	R8(%rcx), %r8
118	or	%r8, %r10
119	shr	R8(%rcx), %r9
120	or	%r9, %r11
121	not	%r10
122	not	%r11
123	mov	%r10, 24(rp,n,8)
124	mov	%r11, 16(rp,n,8)
125	C start two new rsh
126	mov	0(up,n,8), %r8
127	mov	-8(up,n,8), %r9
128	shr	R8(%rcx), %r8
129	shr	R8(%rcx), %r9
130
131	C finish stuff from rsh block
132	neg	R32(%rcx)		C put lsh count in cl
133	mov	8(up,n,8), %r10
134	mov	0(up,n,8), %r11
135	shl	R8(%rcx), %r10
136	or	%r10, %r8
137	shl	R8(%rcx), %r11
138	or	%r11, %r9
139	not	%r8
140	not	%r9
141	mov	%r8, 8(rp,n,8)
142	mov	%r9, 0(rp,n,8)
143	C start two new lsh
144	mov	-8(up,n,8), %r10
145	mov	-16(up,n,8), %r11
146	shl	R8(%rcx), %r10
147	shl	R8(%rcx), %r11
148
149	sub	$4, n
150	jae	L(top)			C				      2
151L(end):
152	neg	R32(%rcx)		C put rsh count in cl
153	mov	8(up), %r8
154	shr	R8(%rcx), %r8
155	or	%r8, %r10
156	mov	(up), %r9
157	shr	R8(%rcx), %r9
158	or	%r9, %r11
159	not	%r10
160	not	%r11
161	mov	%r10, 16(rp)
162	mov	%r11, 8(rp)
163
164	neg	R32(%rcx)		C put lsh count in cl
165L(ast):	mov	(up), %r10
166	shl	R8(%rcx), %r10
167	not	%r10
168	mov	%r10, (rp)
169	FUNC_EXIT()
170	ret
171EPILOGUE()
172