xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/bdiv_q_1.asm (revision 230b95665bbd3a9d1a53658a36b1053f8382a519)
1dnl  x86 mpn_bdiv_q_1 -- mpn by limb exact division.
2
3dnl  Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
4dnl
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  Rearranged from mpn/x86/dive_1.asm by Marco Bodrato.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or
10dnl  modify it under the terms of the GNU Lesser General Public License as
11dnl  published by the Free Software Foundation; either version 3 of the
12dnl  License, or (at your option) any later version.
13dnl
14dnl  The GNU MP Library is distributed in the hope that it will be useful,
15dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
16dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17dnl  Lesser General Public License for more details.
18dnl
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24
25C     cycles/limb
26C P54    30.0
27C P55    29.0
28C P6     13.0 odd divisor, 12.0 even (strangely)
29C K6     14.0
30C K7     12.0
31C P4     42.0
32
33MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
34
35defframe(PARAM_SHIFT,  24)
36defframe(PARAM_INVERSE,20)
37defframe(PARAM_DIVISOR,16)
38defframe(PARAM_SIZE,   12)
39defframe(PARAM_SRC,    8)
40defframe(PARAM_DST,    4)
41
42dnl  re-use parameter space
43define(VAR_INVERSE,`PARAM_SRC')
44
45	TEXT
46
47C mp_limb_t
48C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
49C		    mp_limb_t inverse, int shift)
50
51	ALIGN(16)
52PROLOGUE(mpn_pi1_bdiv_q_1)
53deflit(`FRAME',0)
54
55	movl	PARAM_SHIFT, %ecx
56	pushl	%ebp	FRAME_pushl()
57
58	movl	PARAM_INVERSE, %eax
59	movl	PARAM_SIZE, %ebp
60	pushl	%ebx	FRAME_pushl()
61L(common):
62	pushl	%edi	FRAME_pushl()
63	pushl	%esi	FRAME_pushl()
64
65	movl	PARAM_SRC, %esi
66	movl	PARAM_DST, %edi
67
68	leal	(%esi,%ebp,4), %esi	C src end
69	leal	(%edi,%ebp,4), %edi	C dst end
70	negl	%ebp			C -size
71
72	movl	%eax, VAR_INVERSE
73	movl	(%esi,%ebp,4), %eax	C src[0]
74
75	xorl	%ebx, %ebx
76	xorl	%edx, %edx
77
78	incl	%ebp
79	jz	L(one)
80
81	movl	(%esi,%ebp,4), %edx	C src[1]
82
83	shrdl(	%cl, %edx, %eax)
84
85	movl	VAR_INVERSE, %edx
86	jmp	L(entry)
87
88
89	ALIGN(8)
90	nop	C k6 code alignment
91	nop
92L(top):
93	C eax	q
94	C ebx	carry bit, 0 or -1
95	C ecx	shift
96	C edx	carry limb
97	C esi	src end
98	C edi	dst end
99	C ebp	counter, limbs, negative
100
101	movl	-4(%esi,%ebp,4), %eax
102	subl	%ebx, %edx		C accumulate carry bit
103
104	movl	(%esi,%ebp,4), %ebx
105
106	shrdl(	%cl, %ebx, %eax)
107
108	subl	%edx, %eax		C apply carry limb
109	movl	VAR_INVERSE, %edx
110
111	sbbl	%ebx, %ebx
112
113L(entry):
114	imull	%edx, %eax
115
116	movl	%eax, -4(%edi,%ebp,4)
117	movl	PARAM_DIVISOR, %edx
118
119	mull	%edx
120
121	incl	%ebp
122	jnz	L(top)
123
124
125	movl	-4(%esi), %eax		C src high limb
126L(one):
127	shrl	%cl, %eax
128	popl	%esi	FRAME_popl()
129
130	addl	%ebx, %eax		C apply carry bit
131
132	subl	%edx, %eax		C apply carry limb
133
134	imull	VAR_INVERSE, %eax
135
136	movl	%eax, -4(%edi)
137
138	popl	%edi
139	popl	%ebx
140	popl	%ebp
141
142	ret
143
144EPILOGUE()
145
146C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
147C                           mp_limb_t divisor);
148C
149
150	ALIGN(16)
151PROLOGUE(mpn_bdiv_q_1)
152deflit(`FRAME',0)
153
154	movl	PARAM_DIVISOR, %eax
155	pushl	%ebp	FRAME_pushl()
156
157	movl	$-1, %ecx		C shift count
158	movl	PARAM_SIZE, %ebp
159
160	pushl	%ebx	FRAME_pushl()
161
162L(strip_twos):
163	incl	%ecx
164
165	shrl	%eax
166	jnc	L(strip_twos)
167
168	leal	1(%eax,%eax), %ebx	C d without twos
169	andl	$127, %eax		C d/2, 7 bits
170
171ifdef(`PIC',`
172	LEA(	binvert_limb_table, %edx)
173	movzbl	(%eax,%edx), %eax		C inv 8 bits
174',`
175	movzbl	binvert_limb_table(%eax), %eax	C inv 8 bits
176')
177
178	leal	(%eax,%eax), %edx	C 2*inv
179	movl	%ebx, PARAM_DIVISOR	C d without twos
180	imull	%eax, %eax		C inv*inv
181	imull	%ebx, %eax		C inv*inv*d
182	subl	%eax, %edx		C inv = 2*inv - inv*inv*d
183
184	leal	(%edx,%edx), %eax	C 2*inv
185	imull	%edx, %edx		C inv*inv
186	imull	%ebx, %edx		C inv*inv*d
187	subl	%edx, %eax		C inv = 2*inv - inv*inv*d
188
189	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
190	pushl	%eax	FRAME_pushl()
191	imull	PARAM_DIVISOR, %eax
192	cmpl	$1, %eax
193	popl	%eax	FRAME_popl()')
194
195	jmp	L(common)
196EPILOGUE()
197
198