xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/pentium/aors_n.asm (revision 479d8f7d843cc1b22d497efdf1f27a50ee8418d4)
1dnl  Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
2
3dnl  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software
4dnl  Foundation, Inc.
5dnl
6dnl  This file is part of the GNU MP Library.
7dnl
8dnl  The GNU MP Library is free software; you can redistribute it and/or
9dnl  modify it under the terms of the GNU Lesser General Public License as
10dnl  published by the Free Software Foundation; either version 3 of the
11dnl  License, or (at your option) any later version.
12dnl
13dnl  The GNU MP Library is distributed in the hope that it will be useful,
14dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
15dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16dnl  Lesser General Public License for more details.
17dnl
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21include(`../config.m4')
22
23
24C P5: 2.375 cycles/limb
25
26
27ifdef(`OPERATION_add_n',`
28	define(M4_inst,        adcl)
29	define(M4_function_n,  mpn_add_n)
30	define(M4_function_nc, mpn_add_nc)
31
32',`ifdef(`OPERATION_sub_n',`
33	define(M4_inst,        sbbl)
34	define(M4_function_n,  mpn_sub_n)
35	define(M4_function_nc, mpn_sub_nc)
36
37',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
38')')')
39
40MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
41
42
43C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
44C                          mp_size_t size);
45C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
46C                           mp_size_t size, mp_limb_t carry);
47
48defframe(PARAM_CARRY,20)
49defframe(PARAM_SIZE, 16)
50defframe(PARAM_SRC2, 12)
51defframe(PARAM_SRC1, 8)
52defframe(PARAM_DST,  4)
53
54	TEXT
55	ALIGN(8)
56PROLOGUE(M4_function_nc)
57
58	pushl	%edi
59	pushl	%esi
60	pushl	%ebx
61	pushl	%ebp
62deflit(`FRAME',16)
63
64	movl	PARAM_DST,%edi
65	movl	PARAM_SRC1,%esi
66	movl	PARAM_SRC2,%ebp
67	movl	PARAM_SIZE,%ecx
68
69	movl	(%ebp),%ebx
70
71	decl	%ecx
72	movl	%ecx,%edx
73	shrl	$3,%ecx
74	andl	$7,%edx
75	testl	%ecx,%ecx		C zero carry flag
76	jz	L(endgo)
77
78	pushl	%edx
79FRAME_pushl()
80	movl	PARAM_CARRY,%eax
81	shrl	%eax			C shift bit 0 into carry
82	jmp	L(oop)
83
84L(endgo):
85deflit(`FRAME',16)
86	movl	PARAM_CARRY,%eax
87	shrl	%eax			C shift bit 0 into carry
88	jmp	L(end)
89
90EPILOGUE()
91
92
93	ALIGN(8)
94PROLOGUE(M4_function_n)
95
96	pushl	%edi
97	pushl	%esi
98	pushl	%ebx
99	pushl	%ebp
100deflit(`FRAME',16)
101
102	movl	PARAM_DST,%edi
103	movl	PARAM_SRC1,%esi
104	movl	PARAM_SRC2,%ebp
105	movl	PARAM_SIZE,%ecx
106
107	movl	(%ebp),%ebx
108
109	decl	%ecx
110	movl	%ecx,%edx
111	shrl	$3,%ecx
112	andl	$7,%edx
113	testl	%ecx,%ecx		C zero carry flag
114	jz	L(end)
115	pushl	%edx
116FRAME_pushl()
117
118	ALIGN(8)
119L(oop):	movl	28(%edi),%eax		C fetch destination cache line
120	leal	32(%edi),%edi
121
122L(1):	movl	(%esi),%eax
123	movl	4(%esi),%edx
124	M4_inst	%ebx,%eax
125	movl	4(%ebp),%ebx
126	M4_inst	%ebx,%edx
127	movl	8(%ebp),%ebx
128	movl	%eax,-32(%edi)
129	movl	%edx,-28(%edi)
130
131L(2):	movl	8(%esi),%eax
132	movl	12(%esi),%edx
133	M4_inst	%ebx,%eax
134	movl	12(%ebp),%ebx
135	M4_inst	%ebx,%edx
136	movl	16(%ebp),%ebx
137	movl	%eax,-24(%edi)
138	movl	%edx,-20(%edi)
139
140L(3):	movl	16(%esi),%eax
141	movl	20(%esi),%edx
142	M4_inst	%ebx,%eax
143	movl	20(%ebp),%ebx
144	M4_inst	%ebx,%edx
145	movl	24(%ebp),%ebx
146	movl	%eax,-16(%edi)
147	movl	%edx,-12(%edi)
148
149L(4):	movl	24(%esi),%eax
150	movl	28(%esi),%edx
151	M4_inst	%ebx,%eax
152	movl	28(%ebp),%ebx
153	M4_inst	%ebx,%edx
154	movl	32(%ebp),%ebx
155	movl	%eax,-8(%edi)
156	movl	%edx,-4(%edi)
157
158	leal	32(%esi),%esi
159	leal	32(%ebp),%ebp
160	decl	%ecx
161	jnz	L(oop)
162
163	popl	%edx
164FRAME_popl()
165L(end):
166	decl	%edx			C test %edx w/o clobbering carry
167	js	L(end2)
168	incl	%edx
169L(oop2):
170	leal	4(%edi),%edi
171	movl	(%esi),%eax
172	M4_inst	%ebx,%eax
173	movl	4(%ebp),%ebx
174	movl	%eax,-4(%edi)
175	leal	4(%esi),%esi
176	leal	4(%ebp),%ebp
177	decl	%edx
178	jnz	L(oop2)
179L(end2):
180	movl	(%esi),%eax
181	M4_inst	%ebx,%eax
182	movl	%eax,(%edi)
183
184	sbbl	%eax,%eax
185	negl	%eax
186
187	popl	%ebp
188	popl	%ebx
189	popl	%esi
190	popl	%edi
191	ret
192
193EPILOGUE()
194