xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/aorsmul_1.asm (revision d11b170b9000ada93db553723522a63d5deac310)
1dnl  x86 __gmpn_addmul_1 (for 386 and 486) -- Multiply a limb vector with a
2dnl  limb and add the result to a second limb vector.
3
4dnl  Copyright 1992, 1994, 1997, 1999, 2000, 2001, 2002, 2005 Free Software
5dnl  Foundation, Inc.
6dnl
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or
10dnl  modify it under the terms of the GNU Lesser General Public License as
11dnl  published by the Free Software Foundation; either version 3 of the
12dnl  License, or (at your option) any later version.
13dnl
14dnl  The GNU MP Library is distributed in the hope that it will be useful,
15dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
16dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17dnl  Lesser General Public License for more details.
18dnl
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24C			    cycles/limb
25C P5				14.75
26C P6 model 0-8,10-12		 7.5
27C P6 model 9  (Banias)		 6.7
28C P6 model 13 (Dothan)		 6.75
29C P4 model 0  (Willamette)	24.0
30C P4 model 1  (?)		24.0
31C P4 model 2  (Northwood)	24.0
32C P4 model 3  (Prescott)
33C P4 model 4  (Nocona)
34C Intel Atom
35C AMD K6			12.5
36C AMD K7			 5.25
37C AMD K8
38C AMD K10
39
40
41ifdef(`OPERATION_addmul_1',`
42      define(M4_inst,        addl)
43      define(M4_function_1,  mpn_addmul_1)
44
45',`ifdef(`OPERATION_submul_1',`
46      define(M4_inst,        subl)
47      define(M4_function_1,  mpn_submul_1)
48
49',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
50')')')
51
52MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
53
54
55C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
56C                          mp_limb_t mult);
57
58define(PARAM_MULTIPLIER, `FRAME+16(%esp)')
59define(PARAM_SIZE,       `FRAME+12(%esp)')
60define(PARAM_SRC,        `FRAME+8(%esp)')
61define(PARAM_DST,        `FRAME+4(%esp)')
62
63	TEXT
64	ALIGN(8)
65
66PROLOGUE(M4_function_1)
67deflit(`FRAME',0)
68
69	pushl	%edi
70	pushl	%esi
71	pushl	%ebx
72	pushl	%ebp
73deflit(`FRAME',16)
74
75	movl	PARAM_DST,%edi
76	movl	PARAM_SRC,%esi
77	movl	PARAM_SIZE,%ecx
78
79	xorl	%ebx,%ebx
80	andl	$3,%ecx
81	jz	L(end0)
82
83L(oop0):
84	movl	(%esi),%eax
85	mull	PARAM_MULTIPLIER
86	leal	4(%esi),%esi
87	addl	%ebx,%eax
88	movl	$0,%ebx
89	adcl	%ebx,%edx
90	M4_inst	%eax,(%edi)
91	adcl	%edx,%ebx	C propagate carry into cylimb
92
93	leal	4(%edi),%edi
94	decl	%ecx
95	jnz	L(oop0)
96
97L(end0):
98	movl	PARAM_SIZE,%ecx
99	shrl	$2,%ecx
100	jz	L(end)
101
102	ALIGN(8)
103L(oop):	movl	(%esi),%eax
104	mull	PARAM_MULTIPLIER
105	addl	%eax,%ebx
106	movl	$0,%ebp
107	adcl	%edx,%ebp
108
109	movl	4(%esi),%eax
110	mull	PARAM_MULTIPLIER
111	M4_inst	%ebx,(%edi)
112	adcl	%eax,%ebp	C new lo + cylimb
113	movl	$0,%ebx
114	adcl	%edx,%ebx
115
116	movl	8(%esi),%eax
117	mull	PARAM_MULTIPLIER
118	M4_inst	%ebp,4(%edi)
119	adcl	%eax,%ebx	C new lo + cylimb
120	movl	$0,%ebp
121	adcl	%edx,%ebp
122
123	movl	12(%esi),%eax
124	mull	PARAM_MULTIPLIER
125	M4_inst	%ebx,8(%edi)
126	adcl	%eax,%ebp	C new lo + cylimb
127	movl	$0,%ebx
128	adcl	%edx,%ebx
129
130	M4_inst	%ebp,12(%edi)
131	adcl	$0,%ebx		C propagate carry into cylimb
132
133	leal	16(%esi),%esi
134	leal	16(%edi),%edi
135	decl	%ecx
136	jnz	L(oop)
137
138L(end):	movl	%ebx,%eax
139
140	popl	%ebp
141	popl	%ebx
142	popl	%esi
143	popl	%edi
144	ret
145
146EPILOGUE()
147