xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/divrem_2.asm (revision 6a493d6bc668897c91594964a732d38505b70cbb)
1dnl  x86 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
2
3dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22
23C		norm	frac
24C 486
25C P5
26C P6-13		29.2
27C P6-15		*26
28C K6
29C K7		22
30C K8		*19
31C P4-f1
32C P4-f2		*65
33C P4-f3
34C P4-f4		*72
35
36C A star means numbers not updated for the latest version of the code.
37
38
39C TODO
40C  * Perhaps keep ecx or esi in stack slot, freeing up a reg for q0.
41C  * The loop has not been carefully tuned.  We should at the very least do
42C    some local insn swapping.
43C  * The code outside the main loop is what gcc generated.  Clean up!
44C  * Clean up stack slot usage.
45
46C INPUT PARAMETERS
47C qp
48C fn
49C up_param
50C un_param
51C dp
52
53
54C eax ebx ecx edx esi edi ebp
55C         cnt         qp
56
57ASM_START()
58	TEXT
59	ALIGN(16)
60PROLOGUE(mpn_divrem_2)
61	push	%ebp
62	push	%edi
63	push	%esi
64	push	%ebx
65	sub	$36, %esp
66	mov	68(%esp), %ecx		C un
67	mov	72(%esp), %esi		C dp
68	movl	$0, 32(%esp)
69	lea	0(,%ecx,4), %edi
70	add	64(%esp), %edi		C up
71	mov	(%esi), %ebx
72	mov	4(%esi), %eax
73	mov	%ebx, 20(%esp)
74	sub	$12, %edi
75	mov	%eax, 24(%esp)
76	mov	%edi, 12(%esp)
77	mov	8(%edi), %ebx
78	mov	4(%edi), %ebp
79	cmp	%eax, %ebx
80	jb	L(8)
81	seta	%dl
82	cmp	20(%esp), %ebp
83	setae	%al
84	orb	%dl, %al		C "orb" form to placate Sun tools
85	jne	L(35)
86L(8):
87	mov	60(%esp), %esi		C fn
88	lea	-3(%esi,%ecx), %edi
89	test	%edi, %edi
90	js	L(9)
91	mov	24(%esp), %edx
92	mov	$-1, %esi
93	mov	%esi, %eax
94	mov	%esi, %ecx
95	not	%edx
96	divl	24(%esp)
97	mov	%eax, %esi
98	imul	24(%esp), %eax
99	mov	%eax, (%esp)
100	mov	%esi, %eax
101	mull	20(%esp)
102	mov	(%esp), %eax
103	add	20(%esp), %eax
104	adc	$0, %ecx
105	add	%eax, %edx
106	adc	$0, %ecx
107	mov	%ecx, %eax
108	js	L(32)
109L(36):	dec	%esi
110	sub	24(%esp), %edx
111	sbb	$0, %eax
112	jns	L(36)
113L(32):
114	mov	%esi, 16(%esp)		C di
115	mov	%edi, %ecx		C un
116	mov	12(%esp), %esi		C up
117	mov	24(%esp), %eax
118	neg	%eax
119	mov	%eax, 4(%esp)		C -d1
120	ALIGN(16)
121	nop
122
123C eax ebx ecx edx esi edi ebp  0    4   8   12  16  20  24  28  32   56  60
124C     n2  un      up      n1   q0  -d1          di  d0  d1      msl  qp  fn
125
126L(loop):
127	mov	16(%esp), %eax		C di
128	mul	%ebx
129	add	%ebp, %eax
130	mov	%eax, (%esp)		C q0
131	adc	%ebx, %edx
132	mov	%edx, %edi		C q
133	imul	4(%esp), %edx
134	mov	20(%esp), %eax
135	lea	(%edx, %ebp), %ebx	C n1 -= ...
136	mul	%edi
137	xor	%ebp, %ebp
138	cmp	60(%esp), %ecx
139	jl	L(19)
140	mov	(%esi), %ebp
141	sub	$4, %esi
142L(19):	sub	20(%esp), %ebp
143	sbb	24(%esp), %ebx
144	sub	%eax, %ebp
145	sbb	%edx, %ebx
146	mov	20(%esp), %eax		C d1
147	inc	%edi
148	xor	%edx, %edx
149	cmp	(%esp), %ebx
150	adc	$-1, %edx		C mask
151	add	%edx, %edi		C q--
152	and	%edx, %eax		C d0 or 0
153	and	24(%esp), %edx		C d1 or 0
154	add	%eax, %ebp
155	adc	%edx, %ebx
156	cmp	24(%esp), %ebx
157	jae	L(fix)
158L(bck):	mov	56(%esp), %edx
159	mov	%edi, (%edx, %ecx, 4)
160	dec	%ecx
161	jns	L(loop)
162
163L(9):	mov	64(%esp), %esi		C up
164	mov	%ebp, (%esi)
165	mov	%ebx, 4(%esi)
166	mov	32(%esp), %eax
167	add	$36, %esp
168	pop	%ebx
169	pop	%esi
170	pop	%edi
171	pop	%ebp
172	ret
173
174L(fix):	seta	%dl
175	cmp	20(%esp), %ebp
176	setae	%al
177	orb	%dl, %al		C "orb" form to placate Sun tools
178	je	L(bck)
179	inc	%edi
180	sub	20(%esp), %ebp
181	sbb	24(%esp), %ebx
182	jmp	L(bck)
183
184L(35):	sub	20(%esp), %ebp
185	sbb	24(%esp), %ebx
186	movl	$1, 32(%esp)
187	jmp	L(8)
188EPILOGUE()
189