xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/bdiv_dbm1c.asm (revision fa28c6faa16e0b00edee7acdcaf4899797043def)
1dnl  x86 mpn_bdiv_dbm1.
2
3dnl  Copyright 2008, 2011 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C			    cycles/limb
23C P5
24C P6 model 0-8,10-12)
25C P6 model 9  (Banias)
26C P6 model 13 (Dothan)		 5.1
27C P4 model 0  (Willamette)
28C P4 model 1  (?)
29C P4 model 2  (Northwood)	13.67
30C P4 model 3  (Prescott)
31C P4 model 4  (Nocona)
32C Intel Atom
33C AMD K6
34C AMD K7			 3.5
35C AMD K8
36C AMD K10
37
38
39C TODO
40C  * Optimize for more x86 processors
41
42ASM_START()
43	TEXT
44	ALIGN(16)
45PROLOGUE(mpn_bdiv_dbm1c)
46	mov	16(%esp), %ecx		C d
47	push	%esi
48	mov	12(%esp), %esi		C ap
49	push	%edi
50	mov	12(%esp), %edi		C qp
51	push	%ebp
52	mov	24(%esp), %ebp		C n
53	push	%ebx
54
55	mov	(%esi), %eax
56	mul	%ecx
57	mov	36(%esp), %ebx
58	sub	%eax, %ebx
59	mov	%ebx, (%edi)
60	sbb	%edx, %ebx
61
62	mov	%ebp, %eax
63	and	$3, %eax
64	jz	L(b0)
65	cmp	$2, %eax
66	jc	L(b1)
67	jz	L(b2)
68
69L(b3):	lea	-8(%esi), %esi
70	lea	8(%edi), %edi
71	add	$-3, %ebp
72	jmp	L(3)
73
74L(b0):	mov	4(%esi), %eax
75	lea	-4(%esi), %esi
76	lea	12(%edi), %edi
77	add	$-4, %ebp
78	jmp	L(0)
79
80L(b2):	mov	4(%esi), %eax
81	lea	4(%esi), %esi
82	lea	4(%edi), %edi
83	add	$-2, %ebp
84	jmp	L(2)
85
86	ALIGN(8)
87L(top):	mov	4(%esi), %eax
88	mul	%ecx
89	lea	16(%edi), %edi
90	sub	%eax, %ebx
91	mov	8(%esi), %eax
92	mov	%ebx, -12(%edi)
93	sbb	%edx, %ebx
94L(0):	mul	%ecx
95	sub	%eax, %ebx
96	mov	%ebx, -8(%edi)
97	sbb	%edx, %ebx
98L(3):	mov	12(%esi), %eax
99	mul	%ecx
100	sub	%eax, %ebx
101	mov	%ebx, -4(%edi)
102	mov	16(%esi), %eax
103	lea	16(%esi), %esi
104	sbb	%edx, %ebx
105L(2):	mul	%ecx
106	sub	%eax, %ebx
107	mov	%ebx, 0(%edi)
108	sbb	%edx, %ebx
109L(b1):	add	$-4, %ebp
110	jns	L(top)
111
112	mov	%ebx, %eax
113	pop	%ebx
114	pop	%ebp
115	pop	%edi
116	pop	%esi
117	ret
118EPILOGUE()
119