xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86_64/rshift.asm (revision 9fd8799cb5ceb66c69f2eb1a6d26a1d587ba1f1e)
1dnl  AMD64 mpn_rshift -- mpn right shift.
2
3dnl  Copyright 2003, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C	     cycles/limb
35C AMD K8,K9	 2.375
36C AMD K10	 2.375
37C Intel P4	 8
38C Intel core2	 2.11
39C Intel corei	 ?
40C Intel atom	 5.75
41C VIA nano	 3.5
42
43
44C INPUT PARAMETERS
45define(`rp',	`%rdi')
46define(`up',	`%rsi')
47define(`n',	`%rdx')
48define(`cnt',	`%rcx')
49
50ABI_SUPPORT(DOS64)
51ABI_SUPPORT(STD64)
52
53ASM_START()
54	TEXT
55	ALIGN(32)
56PROLOGUE(mpn_rshift)
57	FUNC_ENTRY(4)
58	neg	R32(%rcx)		C put rsh count in cl
59	mov	(up), %rax
60	shl	R8(%rcx), %rax		C function return value
61	neg	R32(%rcx)		C put lsh count in cl
62
63	lea	1(n), R32(%r8)
64
65	lea	-8(up,n,8), up
66	lea	-8(rp,n,8), rp
67	neg	n
68
69	and	$3, R32(%r8)
70	je	L(rlx)			C jump for n = 3, 7, 11, ...
71
72	dec	R32(%r8)
73	jne	L(1)
74C	n = 4, 8, 12, ...
75	mov	8(up,n,8), %r10
76	shr	R8(%rcx), %r10
77	neg	R32(%rcx)		C put rsh count in cl
78	mov	16(up,n,8), %r8
79	shl	R8(%rcx), %r8
80	or	%r8, %r10
81	mov	%r10, 8(rp,n,8)
82	inc	n
83	jmp	L(rll)
84
85L(1):	dec	R32(%r8)
86	je	L(1x)			C jump for n = 1, 5, 9, 13, ...
87C	n = 2, 6, 10, 16, ...
88	mov	8(up,n,8), %r10
89	shr	R8(%rcx), %r10
90	neg	R32(%rcx)		C put rsh count in cl
91	mov	16(up,n,8), %r8
92	shl	R8(%rcx), %r8
93	or	%r8, %r10
94	mov	%r10, 8(rp,n,8)
95	inc	n
96	neg	R32(%rcx)		C put lsh count in cl
97L(1x):
98	cmp	$-1, n
99	je	L(ast)
100	mov	8(up,n,8), %r10
101	shr	R8(%rcx), %r10
102	mov	16(up,n,8), %r11
103	shr	R8(%rcx), %r11
104	neg	R32(%rcx)		C put rsh count in cl
105	mov	16(up,n,8), %r8
106	mov	24(up,n,8), %r9
107	shl	R8(%rcx), %r8
108	or	%r8, %r10
109	shl	R8(%rcx), %r9
110	or	%r9, %r11
111	mov	%r10, 8(rp,n,8)
112	mov	%r11, 16(rp,n,8)
113	add	$2, n
114
115L(rll):	neg	R32(%rcx)		C put lsh count in cl
116L(rlx):	mov	8(up,n,8), %r10
117	shr	R8(%rcx), %r10
118	mov	16(up,n,8), %r11
119	shr	R8(%rcx), %r11
120
121	add	$4, n			C				      4
122	jb	L(end)			C				      2
123	ALIGN(16)
124L(top):
125	C finish stuff from lsh block
126	neg	R32(%rcx)		C put rsh count in cl
127	mov	-16(up,n,8), %r8
128	mov	-8(up,n,8), %r9
129	shl	R8(%rcx), %r8
130	or	%r8, %r10
131	shl	R8(%rcx), %r9
132	or	%r9, %r11
133	mov	%r10, -24(rp,n,8)
134	mov	%r11, -16(rp,n,8)
135	C start two new rsh
136	mov	(up,n,8), %r8
137	mov	8(up,n,8), %r9
138	shl	R8(%rcx), %r8
139	shl	R8(%rcx), %r9
140
141	C finish stuff from rsh block
142	neg	R32(%rcx)		C put lsh count in cl
143	mov	-8(up,n,8), %r10
144	mov	0(up,n,8), %r11
145	shr	R8(%rcx), %r10
146	or	%r10, %r8
147	shr	R8(%rcx), %r11
148	or	%r11, %r9
149	mov	%r8, -8(rp,n,8)
150	mov	%r9, 0(rp,n,8)
151	C start two new lsh
152	mov	8(up,n,8), %r10
153	mov	16(up,n,8), %r11
154	shr	R8(%rcx), %r10
155	shr	R8(%rcx), %r11
156
157	add	$4, n
158	jae	L(top)			C				      2
159L(end):
160	neg	R32(%rcx)		C put rsh count in cl
161	mov	-8(up), %r8
162	shl	R8(%rcx), %r8
163	or	%r8, %r10
164	mov	(up), %r9
165	shl	R8(%rcx), %r9
166	or	%r9, %r11
167	mov	%r10, -16(rp)
168	mov	%r11, -8(rp)
169
170	neg	R32(%rcx)		C put lsh count in cl
171L(ast):	mov	(up), %r10
172	shr	R8(%rcx), %r10
173	mov	%r10, (rp)
174	FUNC_EXIT()
175	ret
176EPILOGUE()
177