xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/atom/aorslshC_n.asm (revision 230b95665bbd3a9d1a53658a36b1053f8382a519)
1dnl  Intel Atom mpn_addlshC_n/mpn_sublshC_n -- rp[] = up[] +- (vp[] << C)
2
3dnl  Contributed to the GNU project by Marco Bodrato.
4
5dnl  Copyright 2011 Free Software Foundation, Inc.
6dnl
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or
10dnl  modify it under the terms of the GNU Lesser General Public License as
11dnl  published by the Free Software Foundation; either version 3 of the
12dnl  License, or (at your option) any later version.
13dnl
14dnl  The GNU MP Library is distributed in the hope that it will be useful,
15dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
16dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17dnl  Lesser General Public License for more details.
18dnl
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24C mp_limb_t mpn_addlshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size);
25C mp_limb_t mpn_addlshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
26C				mp_limb_t carry);
27C mp_limb_t mpn_sublshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,);
28C mp_limb_t mpn_sublshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
29C				mp_signed_limb_t borrow);
30
31defframe(PARAM_CORB,	16)
32defframe(PARAM_SIZE,	12)
33defframe(PARAM_SRC,	 8)
34defframe(PARAM_DST,	 4)
35
36C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
37C                          mp_size_t size,);
38C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
39C                           mp_size_t size, mp_limb_t carry);
40C mp_limb_t mpn_sublshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
41C                          mp_size_t size,);
42C mp_limb_t mpn_sublshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
43C                           mp_size_t size, mp_limb_t borrow);
44
45C if src1 == dst, _ip1 is used
46
47C					cycles/limb
48C				dst!=src1,src2	dst==src1
49C P5
50C P6 model 0-8,10-12
51C P6 model 9  (Banias)
52C P6 model 13 (Dothan)
53C P4 model 0  (Willamette)
54C P4 model 1  (?)
55C P4 model 2  (Northwood)
56C P4 model 3  (Prescott)
57C P4 model 4  (Nocona)
58C Intel Atom			 7		 6
59C AMD K6
60C AMD K7
61C AMD K8
62C AMD K10
63
64defframe(GPARAM_CORB,	20)
65defframe(GPARAM_SIZE,	16)
66defframe(GPARAM_SRC2,	12)
67
68dnl  re-use parameter space
69define(SAVE_EBP,`PARAM_SIZE')
70define(SAVE_EBX,`PARAM_SRC')
71define(SAVE_UP,`PARAM_DST')
72
73define(M, eval(m4_lshift(1,LSH)))
74define(`rp',  `%edi')
75define(`up',  `%esi')
76
77ASM_START()
78	TEXT
79	ALIGN(8)
80
81PROLOGUE(M4_ip_function_c)
82deflit(`FRAME',0)
83	movl	PARAM_CORB, %ecx
84	movl	%ecx, %edx
85	shr	$LSH, %edx
86	andl	$1, %edx
87	M4_opp	%edx, %ecx
88	jmp	L(start_nc)
89EPILOGUE()
90
91PROLOGUE(M4_ip_function)
92deflit(`FRAME',0)
93
94	xor	%ecx, %ecx
95	xor	%edx, %edx
96L(start_nc):
97	push	rp			FRAME_pushl()
98	mov	PARAM_DST, rp
99	mov	up, SAVE_UP
100	mov	PARAM_SRC, up
101	mov	%ebx, SAVE_EBX
102	mov	PARAM_SIZE, %ebx	C size
103L(inplace):
104	incl	%ebx			C size + 1
105	shr	%ebx			C (size+1)\2
106	mov	%ebp, SAVE_EBP
107	jnc	L(entry)		C size odd
108
109	add	%edx, %edx		C size even
110	mov	%ecx, %ebp
111	mov	(up), %ecx
112	lea	-4(rp), rp
113	lea	(%ebp,%ecx,M), %eax
114	lea	4(up), up
115	jmp	L(enteven)
116
117	ALIGN(16)
118L(oop):
119	lea	(%ecx,%eax,M), %ebp
120	shr	$RSH, %eax
121	mov	4(up), %ecx
122	add	%edx, %edx
123	lea	8(up), up
124	M4_inst	%ebp, (rp)
125	lea	(%eax,%ecx,M), %eax
126
127L(enteven):
128	M4_inst	%eax, 4(rp)
129	lea	8(rp), rp
130
131	sbb	%edx, %edx
132	shr	$RSH, %ecx
133
134L(entry):
135	mov	(up), %eax
136	decl	%ebx
137	jnz	L(oop)
138
139	lea	(%ecx,%eax,M), %ebp
140	shr	$RSH, %eax
141	shr	%edx
142	M4_inst	%ebp, (rp)
143	mov	SAVE_UP, up
144	adc	$0, %eax
145	mov	SAVE_EBP, %ebp
146	mov	SAVE_EBX, %ebx
147	pop	rp			FRAME_popl()
148	ret
149EPILOGUE()
150
151PROLOGUE(M4_function_c)
152deflit(`FRAME',0)
153	movl	GPARAM_CORB, %ecx
154	movl	%ecx, %edx
155	shr	$LSH, %edx
156	andl	$1, %edx
157	M4_opp	%edx, %ecx
158	jmp	L(generic_nc)
159EPILOGUE()
160
161PROLOGUE(M4_function)
162deflit(`FRAME',0)
163
164	xor	%ecx, %ecx
165	xor	%edx, %edx
166L(generic_nc):
167	push	rp			FRAME_pushl()
168	mov	PARAM_DST, rp
169	mov	up, SAVE_UP
170	mov	PARAM_SRC, up
171	cmp	rp, up
172	mov	%ebx, SAVE_EBX
173	jne	L(general)
174	mov	GPARAM_SIZE, %ebx	C size
175	mov	GPARAM_SRC2, up
176	jmp	L(inplace)
177
178L(general):
179	mov	GPARAM_SIZE, %eax	C size
180	mov	%ebx, SAVE_EBX
181	incl	%eax			C size + 1
182	mov	up, %ebx		C vp
183	mov	GPARAM_SRC2, up		C up
184	shr	%eax			C (size+1)\2
185	mov	%ebp, SAVE_EBP
186	mov	%eax, GPARAM_SIZE
187	jnc	L(entry2)		C size odd
188
189	add	%edx, %edx		C size even
190	mov	%ecx, %ebp
191	mov	(up), %ecx
192	lea	-4(rp), rp
193	lea	-4(%ebx), %ebx
194	lea	(%ebp,%ecx,M), %eax
195	lea	4(up), up
196	jmp	L(enteven2)
197
198	ALIGN(16)
199L(oop2):
200	lea	(%ecx,%eax,M), %ebp
201	shr	$RSH, %eax
202	mov	4(up), %ecx
203	add	%edx, %edx
204	lea	8(up), up
205	mov	(%ebx), %edx
206	M4_inst	%ebp, %edx
207	lea	(%eax,%ecx,M), %eax
208	mov	%edx, (rp)
209L(enteven2):
210	mov	4(%ebx), %edx
211	lea	8(%ebx), %ebx
212	M4_inst	%eax, %edx
213	mov	%edx, 4(rp)
214	sbb	%edx, %edx
215	shr	$RSH, %ecx
216	lea	8(rp), rp
217L(entry2):
218	mov	(up), %eax
219	decl	GPARAM_SIZE
220	jnz	L(oop2)
221
222	lea	(%ecx,%eax,M), %ebp
223	shr	$RSH, %eax
224	shr	%edx
225	mov	(%ebx), %edx
226	M4_inst	%ebp, %edx
227	mov	%edx, (rp)
228	mov	SAVE_UP, up
229	adc	$0, %eax
230	mov	SAVE_EBP, %ebp
231	mov	SAVE_EBX, %ebx
232	pop	rp			FRAME_popl()
233	ret
234EPILOGUE()
235
236ASM_END()
237