xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/aors_n.asm (revision 32d1c65c71fbdb65a012e8392a62a757dd6853e9)
1dnl  x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
2
3dnl  Copyright 1992, 1994-1996, 1999-2002 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C     cycles/limb
35C P5	3.375
36C P6	3.125
37C K6	3.5
38C K7	2.25
39C P4	8.75
40
41
42ifdef(`OPERATION_add_n',`
43	define(M4_inst,        adcl)
44	define(M4_function_n,  mpn_add_n)
45	define(M4_function_nc, mpn_add_nc)
46
47',`ifdef(`OPERATION_sub_n',`
48	define(M4_inst,        sbbl)
49	define(M4_function_n,  mpn_sub_n)
50	define(M4_function_nc, mpn_sub_nc)
51
52',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
53')')')
54
55MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
56
57
58C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
59C                          mp_size_t size);
60C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
61C	                    mp_size_t size, mp_limb_t carry);
62
63defframe(PARAM_CARRY,20)
64defframe(PARAM_SIZE, 16)
65defframe(PARAM_SRC2, 12)
66defframe(PARAM_SRC1, 8)
67defframe(PARAM_DST,  4)
68
69	TEXT
70	ALIGN(8)
71
72PROLOGUE(M4_function_nc)
73deflit(`FRAME',0)
74
75	pushl	%edi		FRAME_pushl()
76	pushl	%esi		FRAME_pushl()
77
78	movl	PARAM_DST,%edi
79	movl	PARAM_SRC1,%esi
80	movl	PARAM_SRC2,%edx
81	movl	PARAM_SIZE,%ecx
82
83	movl	%ecx,%eax
84	shrl	$3,%ecx			C compute count for unrolled loop
85	negl	%eax
86	andl	$7,%eax			C get index where to start loop
87	jz	L(oopgo)		C necessary special case for 0
88	incl	%ecx			C adjust loop count
89	shll	$2,%eax			C adjustment for pointers...
90	subl	%eax,%edi		C ... since they are offset ...
91	subl	%eax,%esi		C ... by a constant when we ...
92	subl	%eax,%edx		C ... enter the loop
93	shrl	$2,%eax			C restore previous value
94
95ifdef(`PIC',`
96	C Calculate start address in loop for PIC.  Due to limitations in
97	C old gas, LF(M4_function_n,oop)-L(0a)-3 cannot be put into the leal
98	call	L(0a)
99L(0a):	leal	(%eax,%eax,8),%eax
100	addl	(%esp),%eax
101	addl	$L(oop)-L(0a)-3,%eax
102	addl	$4,%esp
103',`
104	C Calculate start address in loop for non-PIC.
105	leal	L(oop)-3(%eax,%eax,8),%eax
106')
107
108	C These lines initialize carry from the 5th parameter.  Should be
109	C possible to simplify.
110	pushl	%ebp		FRAME_pushl()
111	movl	PARAM_CARRY,%ebp
112	shrl	%ebp			C shift bit 0 into carry
113	popl	%ebp		FRAME_popl()
114
115	jmp	*%eax			C jump into loop
116
117EPILOGUE()
118
119
120	ALIGN(16)
121PROLOGUE(M4_function_n)
122deflit(`FRAME',0)
123
124	pushl	%edi		FRAME_pushl()
125	pushl	%esi		FRAME_pushl()
126
127	movl	PARAM_DST,%edi
128	movl	PARAM_SRC1,%esi
129	movl	PARAM_SRC2,%edx
130	movl	PARAM_SIZE,%ecx
131
132	movl	%ecx,%eax
133	shrl	$3,%ecx			C compute count for unrolled loop
134	negl	%eax
135	andl	$7,%eax			C get index where to start loop
136	jz	L(oop)			C necessary special case for 0
137	incl	%ecx			C adjust loop count
138	shll	$2,%eax			C adjustment for pointers...
139	subl	%eax,%edi		C ... since they are offset ...
140	subl	%eax,%esi		C ... by a constant when we ...
141	subl	%eax,%edx		C ... enter the loop
142	shrl	$2,%eax			C restore previous value
143
144ifdef(`PIC',`
145	C Calculate start address in loop for PIC.  Due to limitations in
146	C some assemblers, L(oop)-L(0b)-3 cannot be put into the leal
147	call	L(0b)
148L(0b):	leal	(%eax,%eax,8),%eax
149	addl	(%esp),%eax
150	addl	$L(oop)-L(0b)-3,%eax
151	addl	$4,%esp
152',`
153	C Calculate start address in loop for non-PIC.
154	leal	L(oop)-3(%eax,%eax,8),%eax
155')
156	jmp	*%eax			C jump into loop
157
158L(oopgo):
159	pushl	%ebp		FRAME_pushl()
160	movl	PARAM_CARRY,%ebp
161	shrl	%ebp			C shift bit 0 into carry
162	popl	%ebp		FRAME_popl()
163
164	ALIGN(16)
165L(oop):	movl	(%esi),%eax
166	M4_inst	(%edx),%eax
167	movl	%eax,(%edi)
168	movl	4(%esi),%eax
169	M4_inst	4(%edx),%eax
170	movl	%eax,4(%edi)
171	movl	8(%esi),%eax
172	M4_inst	8(%edx),%eax
173	movl	%eax,8(%edi)
174	movl	12(%esi),%eax
175	M4_inst	12(%edx),%eax
176	movl	%eax,12(%edi)
177	movl	16(%esi),%eax
178	M4_inst	16(%edx),%eax
179	movl	%eax,16(%edi)
180	movl	20(%esi),%eax
181	M4_inst	20(%edx),%eax
182	movl	%eax,20(%edi)
183	movl	24(%esi),%eax
184	M4_inst	24(%edx),%eax
185	movl	%eax,24(%edi)
186	movl	28(%esi),%eax
187	M4_inst	28(%edx),%eax
188	movl	%eax,28(%edi)
189	leal	32(%edi),%edi
190	leal	32(%esi),%esi
191	leal	32(%edx),%edx
192	decl	%ecx
193	jnz	L(oop)
194
195	sbbl	%eax,%eax
196	negl	%eax
197
198	popl	%esi
199	popl	%edi
200	ret
201
202EPILOGUE()
203