xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86_64/divrem_1.asm (revision 5dd36a3bc8bf2a9dec29ceb6349550414570c447)
1dnl  x86-64 mpn_divrem_1 -- mpn by limb division.
2
3dnl  Copyright 2004, 2005, 2007-2012, 2014 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C		norm	unorm	frac
35C AMD K8,K9	13	13	12
36C AMD K10	13	13	12
37C Intel P4	43	44	43
38C Intel core2	24.5	24.5	19.5
39C Intel corei	20.5	19.5	18
40C Intel atom	43	46	36
41C VIA nano	25.5	25.5	24
42
43C mp_limb_t
44C mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
45C               mp_srcptr np, mp_size_t nn, mp_limb_t d)
46
47C mp_limb_t
48C mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn,
49C                      mp_srcptr np, mp_size_t nn, mp_limb_t d,
50C                      mp_limb_t dinv, int cnt)
51
52C INPUT PARAMETERS
53define(`qp',		`%rdi')
54define(`fn_param',	`%rsi')
55define(`up_param',	`%rdx')
56define(`un_param',	`%rcx')
57define(`d',		`%r8')
58define(`dinv',		`%r9')		C only for mpn_preinv_divrem_1
59C       shift passed on stack		C only for mpn_preinv_divrem_1
60
61define(`cnt',		`%rcx')
62define(`up',		`%rsi')
63define(`fn',		`%r12')
64define(`un',		`%rbx')
65
66
67C rax rbx rcx rdx rsi rdi rbp r8  r9  r10 r11 r12 r13 r14 r15
68C         cnt         qp      d  dinv
69
70ABI_SUPPORT(DOS64)
71ABI_SUPPORT(STD64)
72
73IFSTD(`define(`CNTOFF',		`40($1)')')
74IFDOS(`define(`CNTOFF',		`104($1)')')
75
76ASM_START()
77	TEXT
78	ALIGN(16)
79PROLOGUE(mpn_preinv_divrem_1)
80	FUNC_ENTRY(4)
81IFDOS(`	mov	56(%rsp), %r8	')
82IFDOS(`	mov	64(%rsp), %r9	')
83	xor	R32(%rax), R32(%rax)
84	push	%r13
85	push	%r12
86	push	%rbp
87	push	%rbx
88
89	mov	fn_param, fn
90	mov	un_param, un
91	add	fn_param, un_param
92	mov	up_param, up
93
94	lea	-8(qp,un_param,8), qp
95
96	test	d, d
97	js	L(nent)
98
99	mov	CNTOFF(%rsp), R8(cnt)
100	shl	R8(cnt), d
101	jmp	L(uent)
102EPILOGUE()
103
104	ALIGN(16)
105PROLOGUE(mpn_divrem_1)
106	FUNC_ENTRY(4)
107IFDOS(`	mov	56(%rsp), %r8	')
108	xor	R32(%rax), R32(%rax)
109	push	%r13
110	push	%r12
111	push	%rbp
112	push	%rbx
113
114	mov	fn_param, fn
115	mov	un_param, un
116	add	fn_param, un_param
117	mov	up_param, up
118	je	L(ret)
119
120	lea	-8(qp,un_param,8), qp
121	xor	R32(%rbp), R32(%rbp)
122
123	test	d, d
124	jns	L(unnormalized)
125
126L(normalized):
127	test	un, un
128	je	L(8)			C un == 0
129	mov	-8(up,un,8), %rbp
130	dec	un
131	mov	%rbp, %rax
132	sub	d, %rbp
133	cmovc	%rax, %rbp
134	sbb	R32(%rax), R32(%rax)
135	inc	R32(%rax)
136	mov	%rax, (qp)
137	lea	-8(qp), qp
138L(8):
139IFSTD(`	push	%rdi		')
140IFSTD(`	push	%rsi		')
141	push	%r8
142IFSTD(`	mov	d, %rdi		')
143IFDOS(`	sub	$32, %rsp	')
144IFDOS(`	mov	d, %rcx		')
145	ASSERT(nz, `test $15, %rsp')
146	CALL(	mpn_invert_limb)
147IFDOS(`	add	$32, %rsp	')
148	pop	%r8
149IFSTD(`	pop	%rsi		')
150IFSTD(`	pop	%rdi		')
151
152	mov	%rax, dinv
153	mov	%rbp, %rax
154	jmp	L(nent)
155
156	ALIGN(16)
157L(ntop):mov	(up,un,8), %r10		C	    K8-K10  P6-CNR P6-NHM  P4
158	mul	dinv			C	      0,13   0,20   0,18   0,45
159	add	%r10, %rax		C	      4      8      3     12
160	adc	%rbp, %rdx		C	      5      9     10     13
161	mov	%rax, %rbp		C	      5      9      4     13
162	mov	%rdx, %r13		C	      6     11     12     23
163	imul	d, %rdx			C	      6     11     11     23
164	sub	%rdx, %r10		C	     10     16     14     33
165	mov	d, %rax			C
166	add	%r10, %rax		C	     11     17     15     34
167	cmp	%rbp, %r10		C	     11     17     15     34
168	cmovc	%r10, %rax		C	     12     18     16     35
169	adc	$-1, %r13		C
170	cmp	d, %rax			C
171	jae	L(nfx)			C
172L(nok):	mov	%r13, (qp)		C
173	sub	$8, qp			C
174L(nent):lea	1(%rax), %rbp		C
175	dec	un			C
176	jns	L(ntop)			C
177
178	xor	R32(%rcx), R32(%rcx)
179	jmp	L(frac)
180
181L(nfx):	sub	d, %rax
182	inc	%r13
183	jmp	L(nok)
184
185L(unnormalized):
186	test	un, un
187	je	L(44)
188	mov	-8(up,un,8), %rax
189	cmp	d, %rax
190	jae	L(44)
191	mov	%rbp, (qp)
192	mov	%rax, %rbp
193	lea	-8(qp), qp
194	je	L(ret)
195	dec	un
196L(44):
197	bsr	d, %rcx
198	not	R32(%rcx)
199	shl	R8(%rcx), d
200	shl	R8(%rcx), %rbp
201
202	push	%rcx
203IFSTD(`	push	%rdi		')
204IFSTD(`	push	%rsi		')
205	push	%r8
206IFSTD(`	sub	$8, %rsp	')
207IFSTD(`	mov	d, %rdi		')
208IFDOS(`	sub	$40, %rsp	')
209IFDOS(`	mov	d, %rcx		')
210	ASSERT(nz, `test $15, %rsp')
211	CALL(	mpn_invert_limb)
212IFSTD(`	add	$8, %rsp	')
213IFDOS(`	add	$40, %rsp	')
214	pop	%r8
215IFSTD(`	pop	%rsi		')
216IFSTD(`	pop	%rdi		')
217	pop	%rcx
218
219	mov	%rax, dinv
220	mov	%rbp, %rax
221	test	un, un
222	je	L(frac)
223
224L(uent):dec	un
225	mov	(up,un,8), %rbp
226	neg	R32(%rcx)
227	shr	R8(%rcx), %rbp
228	neg	R32(%rcx)
229	or	%rbp, %rax
230	jmp	L(ent)
231
232	ALIGN(16)
233L(utop):mov	(up,un,8), %r10
234	shl	R8(%rcx), %rbp
235	neg	R32(%rcx)
236	shr	R8(%rcx), %r10
237	neg	R32(%rcx)
238	or	%r10, %rbp
239	mul	dinv
240	add	%rbp, %rax
241	adc	%r11, %rdx
242	mov	%rax, %r11
243	mov	%rdx, %r13
244	imul	d, %rdx
245	sub	%rdx, %rbp
246	mov	d, %rax
247	add	%rbp, %rax
248	cmp	%r11, %rbp
249	cmovc	%rbp, %rax
250	adc	$-1, %r13
251	cmp	d, %rax
252	jae	L(ufx)
253L(uok):	mov	%r13, (qp)
254	sub	$8, qp
255L(ent):	mov	(up,un,8), %rbp
256	dec	un
257	lea	1(%rax), %r11
258	jns	L(utop)
259
260L(uend):shl	R8(%rcx), %rbp
261	mul	dinv
262	add	%rbp, %rax
263	adc	%r11, %rdx
264	mov	%rax, %r11
265	mov	%rdx, %r13
266	imul	d, %rdx
267	sub	%rdx, %rbp
268	mov	d, %rax
269	add	%rbp, %rax
270	cmp	%r11, %rbp
271	cmovc	%rbp, %rax
272	adc	$-1, %r13
273	cmp	d, %rax
274	jae	L(efx)
275L(eok):	mov	%r13, (qp)
276	sub	$8, qp
277	jmp	L(frac)
278
279L(ufx):	sub	d, %rax
280	inc	%r13
281	jmp	L(uok)
282L(efx):	sub	d, %rax
283	inc	%r13
284	jmp	L(eok)
285
286L(frac):mov	d, %rbp
287	neg	%rbp
288	jmp	L(fent)
289
290	ALIGN(16)			C	    K8-K10  P6-CNR P6-NHM  P4
291L(ftop):mul	dinv			C	      0,12   0,17   0,17
292	add	%r11, %rdx		C	      5      8     10
293	mov	%rax, %r11		C	      4      8      3
294	mov	%rdx, %r13		C	      6      9     11
295	imul	%rbp, %rdx		C	      6      9     11
296	mov	d, %rax			C
297	add	%rdx, %rax		C	     10     14     14
298	cmp	%r11, %rdx		C	     10     14     14
299	cmovc	%rdx, %rax		C	     11     15     15
300	adc	$-1, %r13		C
301	mov	%r13, (qp)		C
302	sub	$8, qp			C
303L(fent):lea	1(%rax), %r11		C
304	dec	fn			C
305	jns	L(ftop)			C
306
307	shr	R8(%rcx), %rax
308L(ret):	pop	%rbx
309	pop	%rbp
310	pop	%r12
311	pop	%r13
312	FUNC_EXIT()
313	ret
314EPILOGUE()
315