xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/p6/aors_n.asm (revision 796c32c94f6e154afc9de0f63da35c91bb739b45)
1dnl  Intel P6 mpn_add_n/mpn_sub_n -- mpn add or subtract.
2
3dnl  Copyright 2006 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C TODO:
34C  * Avoid indexed addressing, it makes us stall on the two-ported register
35C    file.
36
37C			    cycles/limb
38C P6 model 0-8,10-12		3.17
39C P6 model 9   (Banias)		2.15
40C P6 model 13  (Dothan)		2.25
41
42
43define(`rp',	`%edi')
44define(`up',	`%esi')
45define(`vp',	`%ebx')
46define(`n',	`%ecx')
47
48ifdef(`OPERATION_add_n', `
49	define(ADCSBB,	      adc)
50	define(func,	      mpn_add_n)
51	define(func_nc,	      mpn_add_nc)')
52ifdef(`OPERATION_sub_n', `
53	define(ADCSBB,	      sbb)
54	define(func,	      mpn_sub_n)
55	define(func_nc,	      mpn_sub_nc)')
56
57MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
58
59ASM_START()
60
61	TEXT
62	ALIGN(16)
63
64PROLOGUE(func)
65	xor	%edx, %edx
66L(start):
67	push	%edi
68	push	%esi
69	push	%ebx
70
71	mov	16(%esp), rp
72	mov	20(%esp), up
73	mov	24(%esp), vp
74	mov	28(%esp), n
75
76	lea	(up,n,4), up
77	lea	(vp,n,4), vp
78	lea	(rp,n,4), rp
79
80	neg	n
81	mov	n, %eax
82	and	$-8, n
83	and	$7, %eax
84	shl	$2, %eax			C 4x
85ifdef(`PIC',`
86	call	L(pic_calc)
87L(here):
88',`
89	lea	L(ent) (%eax,%eax,2), %eax	C 12x
90')
91
92	shr	%edx				C set cy flag
93	jmp	*%eax
94
95ifdef(`PIC',`
96L(pic_calc):
97	C See mpn/x86/README about old gas bugs
98	lea	(%eax,%eax,2), %eax
99	add	$L(ent)-L(here), %eax
100	add	(%esp), %eax
101	ret_internal
102')
103
104L(end):
105	sbb	%eax, %eax
106	neg	%eax
107	pop	%ebx
108	pop	%esi
109	pop	%edi
110	ret
111
112	ALIGN(16)
113L(top):
114	jecxz	L(end)
115L(ent):
116Zdisp(	mov,	0,(up,n,4), %eax)
117Zdisp(	ADCSBB,	0,(vp,n,4), %eax)
118Zdisp(	mov,	%eax, 0,(rp,n,4))
119
120	mov	4(up,n,4), %edx
121	ADCSBB	4(vp,n,4), %edx
122	mov	%edx, 4(rp,n,4)
123
124	mov	8(up,n,4), %eax
125	ADCSBB	8(vp,n,4), %eax
126	mov	%eax, 8(rp,n,4)
127
128	mov	12(up,n,4), %edx
129	ADCSBB	12(vp,n,4), %edx
130	mov	%edx, 12(rp,n,4)
131
132	mov	16(up,n,4), %eax
133	ADCSBB	16(vp,n,4), %eax
134	mov	%eax, 16(rp,n,4)
135
136	mov	20(up,n,4), %edx
137	ADCSBB	20(vp,n,4), %edx
138	mov	%edx, 20(rp,n,4)
139
140	mov	24(up,n,4), %eax
141	ADCSBB	24(vp,n,4), %eax
142	mov	%eax, 24(rp,n,4)
143
144	mov	28(up,n,4), %edx
145	ADCSBB	28(vp,n,4), %edx
146	mov	%edx, 28(rp,n,4)
147
148	lea	8(n), n
149	jmp	L(top)
150
151EPILOGUE()
152
153PROLOGUE(func_nc)
154	movl	20(%esp), %edx
155	jmp	L(start)
156EPILOGUE()
157