xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86_64/rshift.asm (revision 8450a7c42673d65e3b1f6560d3b6ecd317a6cbe8)
1dnl  AMD64 mpn_rshift -- mpn right shift.
2
3dnl  Copyright 2003, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22
23C	     cycles/limb
24C AMD K8,K9	 2.375
25C AMD K10	 2.375
26C Intel P4	 8
27C Intel core2	 2.11
28C Intel corei	 ?
29C Intel atom	 5.75
30C VIA nano	 3.5
31
32
33C INPUT PARAMETERS
34define(`rp',	`%rdi')
35define(`up',	`%rsi')
36define(`n',	`%rdx')
37define(`cnt',	`%rcx')
38
39ABI_SUPPORT(DOS64)
40ABI_SUPPORT(STD64)
41
42ASM_START()
43	TEXT
44	ALIGN(32)
45PROLOGUE(mpn_rshift)
46	FUNC_ENTRY(4)
47	neg	R32(%rcx)		C put rsh count in cl
48	mov	(up), %rax
49	shl	R8(%rcx), %rax		C function return value
50	neg	R32(%rcx)		C put lsh count in cl
51
52	lea	1(n), R32(%r8)
53
54	lea	-8(up,n,8), up
55	lea	-8(rp,n,8), rp
56	neg	n
57
58	and	$3, R32(%r8)
59	je	L(rlx)			C jump for n = 3, 7, 11, ...
60
61	dec	R32(%r8)
62	jne	L(1)
63C	n = 4, 8, 12, ...
64	mov	8(up,n,8), %r10
65	shr	R8(%rcx), %r10
66	neg	R32(%rcx)		C put rsh count in cl
67	mov	16(up,n,8), %r8
68	shl	R8(%rcx), %r8
69	or	%r8, %r10
70	mov	%r10, 8(rp,n,8)
71	inc	n
72	jmp	L(rll)
73
74L(1):	dec	R32(%r8)
75	je	L(1x)			C jump for n = 1, 5, 9, 13, ...
76C	n = 2, 6, 10, 16, ...
77	mov	8(up,n,8), %r10
78	shr	R8(%rcx), %r10
79	neg	R32(%rcx)		C put rsh count in cl
80	mov	16(up,n,8), %r8
81	shl	R8(%rcx), %r8
82	or	%r8, %r10
83	mov	%r10, 8(rp,n,8)
84	inc	n
85	neg	R32(%rcx)		C put lsh count in cl
86L(1x):
87	cmp	$-1, n
88	je	L(ast)
89	mov	8(up,n,8), %r10
90	shr	R8(%rcx), %r10
91	mov	16(up,n,8), %r11
92	shr	R8(%rcx), %r11
93	neg	R32(%rcx)		C put rsh count in cl
94	mov	16(up,n,8), %r8
95	mov	24(up,n,8), %r9
96	shl	R8(%rcx), %r8
97	or	%r8, %r10
98	shl	R8(%rcx), %r9
99	or	%r9, %r11
100	mov	%r10, 8(rp,n,8)
101	mov	%r11, 16(rp,n,8)
102	add	$2, n
103
104L(rll):	neg	R32(%rcx)		C put lsh count in cl
105L(rlx):	mov	8(up,n,8), %r10
106	shr	R8(%rcx), %r10
107	mov	16(up,n,8), %r11
108	shr	R8(%rcx), %r11
109
110	add	$4, n			C				      4
111	jb	L(end)			C				      2
112	ALIGN(16)
113L(top):
114	C finish stuff from lsh block
115	neg	R32(%rcx)		C put rsh count in cl
116	mov	-16(up,n,8), %r8
117	mov	-8(up,n,8), %r9
118	shl	R8(%rcx), %r8
119	or	%r8, %r10
120	shl	R8(%rcx), %r9
121	or	%r9, %r11
122	mov	%r10, -24(rp,n,8)
123	mov	%r11, -16(rp,n,8)
124	C start two new rsh
125	mov	(up,n,8), %r8
126	mov	8(up,n,8), %r9
127	shl	R8(%rcx), %r8
128	shl	R8(%rcx), %r9
129
130	C finish stuff from rsh block
131	neg	R32(%rcx)		C put lsh count in cl
132	mov	-8(up,n,8), %r10
133	mov	0(up,n,8), %r11
134	shr	R8(%rcx), %r10
135	or	%r10, %r8
136	shr	R8(%rcx), %r11
137	or	%r11, %r9
138	mov	%r8, -8(rp,n,8)
139	mov	%r9, 0(rp,n,8)
140	C start two new lsh
141	mov	8(up,n,8), %r10
142	mov	16(up,n,8), %r11
143	shr	R8(%rcx), %r10
144	shr	R8(%rcx), %r11
145
146	add	$4, n
147	jae	L(top)			C				      2
148L(end):
149	neg	R32(%rcx)		C put rsh count in cl
150	mov	-8(up), %r8
151	shl	R8(%rcx), %r8
152	or	%r8, %r10
153	mov	(up), %r9
154	shl	R8(%rcx), %r9
155	or	%r9, %r11
156	mov	%r10, -16(rp)
157	mov	%r11, -8(rp)
158
159	neg	R32(%rcx)		C put lsh count in cl
160L(ast):	mov	(up), %r10
161	shr	R8(%rcx), %r10
162	mov	%r10, (rp)
163	FUNC_EXIT()
164	ret
165EPILOGUE()
166