xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/atom/aors_n.asm (revision 413d532bcc3f62d122e56d92e13ac64825a40baf)
1dnl  Intel Atom mpn_add_n/mpn_sub_n -- rp[] = up[] +- vp[].
2
3dnl  Copyright 2011 Free Software Foundation, Inc.
4
5dnl  Contributed to the GNU project by Marco Bodrato.
6
7dnl  This file is part of the GNU MP Library.
8
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of the GNU Lesser General Public License as published
11dnl  by the Free Software Foundation; either version 3 of the License, or (at
12dnl  your option) any later version.
13
14dnl  The GNU MP Library is distributed in the hope that it will be useful, but
15dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17dnl  License for more details.
18
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24C			    cycles/limb
25C P5
26C P6 model 0-8,10-12
27C P6 model 9  (Banias)
28C P6 model 13 (Dothan)
29C P4 model 0  (Willamette)
30C P4 model 1  (?)
31C P4 model 2  (Northwood)
32C P4 model 3  (Prescott)
33C P4 model 4  (Nocona)
34C Intel Atom			 3
35C AMD K6
36C AMD K7
37C AMD K8
38C AMD K10
39
40ifdef(`OPERATION_add_n', `
41	define(M4_inst,        adcl)
42	define(M4_function_n,  mpn_add_n)
43	define(M4_function_nc, mpn_add_nc)
44	define(M4_description, add)
45',`ifdef(`OPERATION_sub_n', `
46	define(M4_inst,        sbbl)
47	define(M4_function_n,  mpn_sub_n)
48	define(M4_function_nc, mpn_sub_nc)
49	define(M4_description, subtract)
50',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
51')')')
52
53MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
54
55C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
56C                         mp_size_t size);
57C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
58C	                   mp_size_t size, mp_limb_t carry);
59C
60C Calculate src1,size M4_description src2,size, and store the result in
61C dst,size.  The return value is the carry bit from the top of the result (1
62C or 0).
63C
64C The _nc version accepts 1 or 0 for an initial carry into the low limb of
65C the calculation.  Note values other than 1 or 0 here will lead to garbage
66C results.
67
68defframe(PARAM_CARRY,20)
69defframe(PARAM_SIZE, 16)
70defframe(PARAM_SRC2, 12)
71defframe(PARAM_SRC1, 8)
72defframe(PARAM_DST,  4)
73
74dnl  re-use parameter space
75define(SAVE_RP,`PARAM_SIZE')
76define(SAVE_VP,`PARAM_SRC1')
77define(SAVE_UP,`PARAM_DST')
78
79define(`rp',  `%edi')
80define(`up',  `%esi')
81define(`vp',  `%ebx')
82define(`cy',  `%ecx')
83define(`r1',  `%ecx')
84define(`r2',  `%edx')
85
86ASM_START()
87	TEXT
88	ALIGN(16)
89deflit(`FRAME',0)
90
91PROLOGUE(M4_function_n)
92	xor	cy, cy			C carry
93L(start):
94	mov	PARAM_SIZE, %eax	C size
95	mov	rp, SAVE_RP
96	mov	PARAM_DST, rp
97	mov	up, SAVE_UP
98	mov	PARAM_SRC1, up
99	shr	%eax			C size >> 1
100	mov	vp, SAVE_VP
101	mov	PARAM_SRC2, vp
102	jz	L(one)			C size == 1
103	jc	L(three)		C size % 2 == 1
104
105	shr	cy
106	mov	(up), r2
107	lea	4(up), up
108	lea	4(vp), vp
109	lea	-4(rp), rp
110	jmp	L(entry)
111L(one):
112	shr	cy
113	mov	(up), r1
114	jmp	L(end)
115L(three):
116	shr	cy
117	mov	(up), r1
118
119	ALIGN(16)
120L(oop):
121	M4_inst	(vp), r1
122	lea	8(up), up
123	mov	-4(up), r2
124	lea	8(vp), vp
125	mov	r1, (rp)
126L(entry):
127	M4_inst	-4(vp), r2
128	lea	8(rp), rp
129	dec	%eax
130	mov	(up), r1
131	mov	r2, -4(rp)
132	jnz	L(oop)
133
134L(end):					C %eax is zero here
135	mov	SAVE_UP, up
136	M4_inst	(vp), r1
137	mov	SAVE_VP, vp
138	mov	r1, (rp)
139	adc	%eax, %eax
140	mov	SAVE_RP, rp
141	ret
142EPILOGUE()
143
144PROLOGUE(M4_function_nc)
145	mov	PARAM_CARRY, cy		C carry
146	jmp	L(start)
147EPILOGUE()
148ASM_END()
149