xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/pentium4/sse2/bdiv_dbm1c.asm (revision 75f6d617e282811cb173c2ccfbf5df0dd71f7045)
1dnl  Intel Atom  mpn_bdiv_dbm1.
2
3dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
4dnl
5dnl  Copyright 2011 Free Software Foundation, Inc.
6dnl
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or
10dnl  modify it under the terms of the GNU Lesser General Public License as
11dnl  published by the Free Software Foundation; either version 3 of the
12dnl  License, or (at your option) any later version.
13dnl
14dnl  The GNU MP Library is distributed in the hope that it will be useful,
15dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
16dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17dnl  Lesser General Public License for more details.
18dnl
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24C			    cycles/limb
25C			    cycles/limb
26C P5				 -
27C P6 model 0-8,10-12		 -
28C P6 model 9  (Banias)		 9.75
29C P6 model 13 (Dothan)
30C P4 model 0  (Willamette)
31C P4 model 1  (?)
32C P4 model 2  (Northwood)	 8.25
33C P4 model 3  (Prescott)
34C P4 model 4  (Nocona)
35C Intel Atom			 8
36C AMD K6			 -
37C AMD K7			 -
38C AMD K8
39C AMD K10
40
41C TODO: This code was optimised for atom-32, consider moving it back to atom
42C	dir(atom currently grabs this code), and write a 4-way version(7c/l).
43
44defframe(PARAM_CARRY,20)
45defframe(PARAM_MUL,  16)
46defframe(PARAM_SIZE, 12)
47defframe(PARAM_SRC,  8)
48defframe(PARAM_DST,  4)
49
50dnl  re-use parameter space
51define(SAVE_RP,`PARAM_MUL')
52define(SAVE_UP,`PARAM_SIZE')
53
54define(`rp', `%edi')
55define(`up', `%esi')
56define(`n',  `%ecx')
57define(`reg', `%edx')
58define(`cy', `%eax')	C contains the return value
59
60ASM_START()
61	TEXT
62	ALIGN(16)
63deflit(`FRAME',0)
64
65PROLOGUE(mpn_bdiv_dbm1c)
66	mov	PARAM_SIZE, n		C size
67	mov	up, SAVE_UP
68	mov	PARAM_SRC, up
69	movd	PARAM_MUL, %mm7
70	mov	rp, SAVE_RP
71	mov	PARAM_DST, rp
72
73	movd	(up), %mm0
74	pmuludq	%mm7, %mm0
75	shr	n
76	mov	PARAM_CARRY, cy
77	jz	L(eq1)
78
79	movd	4(up), %mm1
80	jc	L(odd)
81
82	lea	4(up), up
83	pmuludq	%mm7, %mm1
84	movd	%mm0, reg
85	psrlq	$32, %mm0
86	sub	reg, cy
87	movd	%mm0, reg
88	movq	%mm1, %mm0
89	dec	n
90	mov	cy, (rp)
91	lea	4(rp), rp
92	jz	L(end)
93
94C	ALIGN(16)
95L(top):	movd	4(up), %mm1
96	sbb	reg, cy
97L(odd):	movd	%mm0, reg
98	psrlq	$32, %mm0
99	pmuludq	%mm7, %mm1
100	sub	reg, cy
101	lea	8(up), up
102	movd	%mm0, reg
103	movd	(up), %mm0
104	mov	cy, (rp)
105	sbb	reg, cy
106	movd	%mm1, reg
107	psrlq	$32, %mm1
108	sub	reg, cy
109	movd	%mm1, reg
110	pmuludq	%mm7, %mm0
111	dec	n
112	mov	cy, 4(rp)
113	lea	8(rp), rp
114	jnz	L(top)
115
116L(end):	sbb	reg, cy
117
118L(eq1):	movd	%mm0, reg
119	psrlq	$32, %mm0
120	mov	SAVE_UP, up
121	sub	reg, cy
122	movd	%mm0, reg
123	emms
124	mov	cy, (rp)
125	sbb	reg, cy
126
127	mov	SAVE_RP, rp
128	ret
129EPILOGUE()
130ASM_END()
131