xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/aors_n.asm (revision 9ddb6ab554e70fb9bbd90c3d96b812bc57755a14)
1dnl  x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
2
3dnl  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
4dnl  Foundation, Inc.
5dnl
6dnl  This file is part of the GNU MP Library.
7dnl
8dnl  The GNU MP Library is free software; you can redistribute it and/or
9dnl  modify it under the terms of the GNU Lesser General Public License as
10dnl  published by the Free Software Foundation; either version 3 of the
11dnl  License, or (at your option) any later version.
12dnl
13dnl  The GNU MP Library is distributed in the hope that it will be useful,
14dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
15dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16dnl  Lesser General Public License for more details.
17dnl
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21include(`../config.m4')
22
23
24C     cycles/limb
25C P5:   3.375
26C P6:   3.125
27C K6:   3.5
28C K7:   2.25
29C P4:   8.75
30
31
32ifdef(`OPERATION_add_n',`
33	define(M4_inst,        adcl)
34	define(M4_function_n,  mpn_add_n)
35	define(M4_function_nc, mpn_add_nc)
36
37',`ifdef(`OPERATION_sub_n',`
38	define(M4_inst,        sbbl)
39	define(M4_function_n,  mpn_sub_n)
40	define(M4_function_nc, mpn_sub_nc)
41
42',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
43')')')
44
45MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
46
47
48C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
49C                          mp_size_t size);
50C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
51C	                    mp_size_t size, mp_limb_t carry);
52
53defframe(PARAM_CARRY,20)
54defframe(PARAM_SIZE, 16)
55defframe(PARAM_SRC2, 12)
56defframe(PARAM_SRC1, 8)
57defframe(PARAM_DST,  4)
58
59	TEXT
60	ALIGN(8)
61
62PROLOGUE(M4_function_nc)
63deflit(`FRAME',0)
64
65	pushl	%edi		FRAME_pushl()
66	pushl	%esi		FRAME_pushl()
67
68	movl	PARAM_DST,%edi
69	movl	PARAM_SRC1,%esi
70	movl	PARAM_SRC2,%edx
71	movl	PARAM_SIZE,%ecx
72
73	movl	%ecx,%eax
74	shrl	$3,%ecx			C compute count for unrolled loop
75	negl	%eax
76	andl	$7,%eax			C get index where to start loop
77	jz	L(oopgo)		C necessary special case for 0
78	incl	%ecx			C adjust loop count
79	shll	$2,%eax			C adjustment for pointers...
80	subl	%eax,%edi		C ... since they are offset ...
81	subl	%eax,%esi		C ... by a constant when we ...
82	subl	%eax,%edx		C ... enter the loop
83	shrl	$2,%eax			C restore previous value
84
85ifdef(`PIC',`
86	C Calculate start address in loop for PIC.  Due to limitations in
87	C old gas, LF(M4_function_n,oop)-L(0a)-3 cannot be put into the leal
88	call	L(0a)
89L(0a):	leal	(%eax,%eax,8),%eax
90	addl	(%esp),%eax
91	addl	$L(oop)-L(0a)-3,%eax
92	addl	$4,%esp
93',`
94	C Calculate start address in loop for non-PIC.
95	leal	L(oop)-3(%eax,%eax,8),%eax
96')
97
98	C These lines initialize carry from the 5th parameter.  Should be
99	C possible to simplify.
100	pushl	%ebp		FRAME_pushl()
101	movl	PARAM_CARRY,%ebp
102	shrl	$1,%ebp			C shift bit 0 into carry
103	popl	%ebp		FRAME_popl()
104
105	jmp	*%eax			C jump into loop
106
107EPILOGUE()
108
109
110	ALIGN(16)
111PROLOGUE(M4_function_n)
112deflit(`FRAME',0)
113
114	pushl	%edi		FRAME_pushl()
115	pushl	%esi		FRAME_pushl()
116
117	movl	PARAM_DST,%edi
118	movl	PARAM_SRC1,%esi
119	movl	PARAM_SRC2,%edx
120	movl	PARAM_SIZE,%ecx
121
122	movl	%ecx,%eax
123	shrl	$3,%ecx			C compute count for unrolled loop
124	negl	%eax
125	andl	$7,%eax			C get index where to start loop
126	jz	L(oop)			C necessary special case for 0
127	incl	%ecx			C adjust loop count
128	shll	$2,%eax			C adjustment for pointers...
129	subl	%eax,%edi		C ... since they are offset ...
130	subl	%eax,%esi		C ... by a constant when we ...
131	subl	%eax,%edx		C ... enter the loop
132	shrl	$2,%eax			C restore previous value
133
134ifdef(`PIC',`
135	C Calculate start address in loop for PIC.  Due to limitations in
136	C some assemblers, L(oop)-L(0b)-3 cannot be put into the leal
137	call	L(0b)
138L(0b):	leal	(%eax,%eax,8),%eax
139	addl	(%esp),%eax
140	addl	$L(oop)-L(0b)-3,%eax
141	addl	$4,%esp
142',`
143	C Calculate start address in loop for non-PIC.
144	leal	L(oop)-3(%eax,%eax,8),%eax
145')
146	jmp	*%eax			C jump into loop
147
148L(oopgo):
149	pushl	%ebp		FRAME_pushl()
150	movl	PARAM_CARRY,%ebp
151	shrl	$1,%ebp			C shift bit 0 into carry
152	popl	%ebp		FRAME_popl()
153
154	ALIGN(16)
155L(oop):	movl	(%esi),%eax
156	M4_inst	(%edx),%eax
157	movl	%eax,(%edi)
158	movl	4(%esi),%eax
159	M4_inst	4(%edx),%eax
160	movl	%eax,4(%edi)
161	movl	8(%esi),%eax
162	M4_inst	8(%edx),%eax
163	movl	%eax,8(%edi)
164	movl	12(%esi),%eax
165	M4_inst	12(%edx),%eax
166	movl	%eax,12(%edi)
167	movl	16(%esi),%eax
168	M4_inst	16(%edx),%eax
169	movl	%eax,16(%edi)
170	movl	20(%esi),%eax
171	M4_inst	20(%edx),%eax
172	movl	%eax,20(%edi)
173	movl	24(%esi),%eax
174	M4_inst	24(%edx),%eax
175	movl	%eax,24(%edi)
176	movl	28(%esi),%eax
177	M4_inst	28(%edx),%eax
178	movl	%eax,28(%edi)
179	leal	32(%edi),%edi
180	leal	32(%esi),%esi
181	leal	32(%edx),%edx
182	decl	%ecx
183	jnz	L(oop)
184
185	sbbl	%eax,%eax
186	negl	%eax
187
188	popl	%esi
189	popl	%edi
190	ret
191
192EPILOGUE()
193