xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/divrem_2.asm (revision 70f7362772ba52b749c976fb5e86e39a8b2c9afc)
1dnl  x86 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
2
3dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C		norm	frac
35C 486
36C P5
37C P6-13		29.2
38C P6-15		*26
39C K6
40C K7		22
41C K8		*19
42C P4-f1
43C P4-f2		*65
44C P4-f3
45C P4-f4		*72
46
47C A star means numbers not updated for the latest version of the code.
48
49
50C TODO
51C  * Perhaps keep ecx or esi in stack slot, freeing up a reg for q0.
52C  * The loop has not been carefully tuned.  We should at the very least do
53C    some local insn swapping.
54C  * The code outside the main loop is what gcc generated.  Clean up!
55C  * Clean up stack slot usage.
56
57C INPUT PARAMETERS
58C qp
59C fn
60C up_param
61C un_param
62C dp
63
64
65C eax ebx ecx edx esi edi ebp
66C         cnt         qp
67
68ASM_START()
69	TEXT
70	ALIGN(16)
71PROLOGUE(mpn_divrem_2)
72	push	%ebp
73	push	%edi
74	push	%esi
75	push	%ebx
76	sub	$36, %esp
77	mov	68(%esp), %ecx		C un
78	mov	72(%esp), %esi		C dp
79	movl	$0, 32(%esp)
80	lea	0(,%ecx,4), %edi
81	add	64(%esp), %edi		C up
82	mov	(%esi), %ebx
83	mov	4(%esi), %eax
84	mov	%ebx, 20(%esp)
85	sub	$12, %edi
86	mov	%eax, 24(%esp)
87	mov	%edi, 12(%esp)
88	mov	8(%edi), %ebx
89	mov	4(%edi), %ebp
90	cmp	%eax, %ebx
91	jb	L(8)
92	seta	%dl
93	cmp	20(%esp), %ebp
94	setae	%al
95	orb	%dl, %al		C "orb" form to placate Sun tools
96	jne	L(35)
97L(8):
98	mov	60(%esp), %esi		C fn
99	lea	-3(%esi,%ecx), %edi
100	test	%edi, %edi
101	js	L(9)
102	mov	24(%esp), %edx
103	mov	$-1, %esi
104	mov	%esi, %eax
105	mov	%esi, %ecx
106	not	%edx
107	divl	24(%esp)
108	mov	%eax, %esi
109	imul	24(%esp), %eax
110	mov	%eax, (%esp)
111	mov	%esi, %eax
112	mull	20(%esp)
113	mov	(%esp), %eax
114	add	20(%esp), %eax
115	adc	$0, %ecx
116	add	%eax, %edx
117	adc	$0, %ecx
118	mov	%ecx, %eax
119	js	L(32)
120L(36):	dec	%esi
121	sub	24(%esp), %edx
122	sbb	$0, %eax
123	jns	L(36)
124L(32):
125	mov	%esi, 16(%esp)		C di
126	mov	%edi, %ecx		C un
127	mov	12(%esp), %esi		C up
128	mov	24(%esp), %eax
129	neg	%eax
130	mov	%eax, 4(%esp)		C -d1
131	ALIGN(16)
132	nop
133
134C eax ebx ecx edx esi edi ebp  0    4   8   12  16  20  24  28  32   56  60
135C     n2  un      up      n1   q0  -d1          di  d0  d1      msl  qp  fn
136
137L(loop):
138	mov	16(%esp), %eax		C di
139	mul	%ebx
140	add	%ebp, %eax
141	mov	%eax, (%esp)		C q0
142	adc	%ebx, %edx
143	mov	%edx, %edi		C q
144	imul	4(%esp), %edx
145	mov	20(%esp), %eax
146	lea	(%edx, %ebp), %ebx	C n1 -= ...
147	mul	%edi
148	xor	%ebp, %ebp
149	cmp	60(%esp), %ecx
150	jl	L(19)
151	mov	(%esi), %ebp
152	sub	$4, %esi
153L(19):	sub	20(%esp), %ebp
154	sbb	24(%esp), %ebx
155	sub	%eax, %ebp
156	sbb	%edx, %ebx
157	mov	20(%esp), %eax		C d1
158	inc	%edi
159	xor	%edx, %edx
160	cmp	(%esp), %ebx
161	adc	$-1, %edx		C mask
162	add	%edx, %edi		C q--
163	and	%edx, %eax		C d0 or 0
164	and	24(%esp), %edx		C d1 or 0
165	add	%eax, %ebp
166	adc	%edx, %ebx
167	cmp	24(%esp), %ebx
168	jae	L(fix)
169L(bck):	mov	56(%esp), %edx
170	mov	%edi, (%edx, %ecx, 4)
171	dec	%ecx
172	jns	L(loop)
173
174L(9):	mov	64(%esp), %esi		C up
175	mov	%ebp, (%esi)
176	mov	%ebx, 4(%esi)
177	mov	32(%esp), %eax
178	add	$36, %esp
179	pop	%ebx
180	pop	%esi
181	pop	%edi
182	pop	%ebp
183	ret
184
185L(fix):	seta	%dl
186	cmp	20(%esp), %ebp
187	setae	%al
188	orb	%dl, %al		C "orb" form to placate Sun tools
189	je	L(bck)
190	inc	%edi
191	sub	20(%esp), %ebp
192	sbb	24(%esp), %ebx
193	jmp	L(bck)
194
195L(35):	sub	20(%esp), %ebp
196	sbb	24(%esp), %ebx
197	movl	$1, 32(%esp)
198	jmp	L(8)
199EPILOGUE()
200