xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/pa64/aorslsh1_n.asm (revision cef8759bd76c1b621f8eab8faa6f208faabc2e15)
1dnl  PA64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
2
3dnl  Copyright 2003 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C		    cycles/limb
34C 8000,8200:		2
35C 8500,8600,8700:	1.75
36
37C TODO
38C  * Write special feed-in code for each (n mod 8). (See the ia64 code.)
39C  * Try to make this run at closer to 1.5 c/l.
40C  * Set up register aliases (define(`u0',`%r19')).
41C  * Explicitly align loop.
42
43dnl INPUT PARAMETERS
44define(`rp',`%r26')
45define(`up',`%r25')
46define(`vp',`%r24')
47define(`n',`%r23')
48
49ifdef(`OPERATION_addlsh1_n',`
50  define(ADCSBC,	`add,dc')
51  define(INITC,		`ldi	0,')
52  define(func, mpn_addlsh1_n)
53')
54ifdef(`OPERATION_sublsh1_n',`
55  define(ADCSBC,	`sub,db')
56  define(INITC,		`ldi	1,')
57  define(func, mpn_sublsh1_n)
58')
59
60MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
61
62ifdef(`HAVE_ABI_2_0w',`
63  define(LEVEL,		`.level 2.0w')
64  define(RETREG,	`%r28')
65  define(CLRRET1,	`dnl')
66')
67ifdef(`HAVE_ABI_2_0n',`
68  define(LEVEL,		`.level 2.0')
69  define(RETREG,	`%r29')
70  define(CLRRET1,	`ldi	0, %r28')
71')
72
73	LEVEL
74PROLOGUE(func)
75	std,ma		%r3, 0x100(%r30)	C save reg
76
77	INITC		%r1			C init saved cy
78
79C Primitive code for the first (n mod 8) limbs:
80	extrd,u		n, 63, 3, %r22		C count for loop0
81	comib,=		0, %r22, L(unrolled)	C skip loop0?
82	copy		%r0, %r28
83LDEF(loop0)
84	ldd	0(vp), %r21
85	ldo		8(vp), vp
86	ldd	0(up), %r19
87	ldo		8(up), up
88	shrpd	%r21, %r28, 63, %r31
89	addi		-1, %r1, %r0		C restore cy
90	ADCSBC	%r19, %r31, %r29
91	std	%r29, 0(rp)
92	add,dc		%r0, %r0, %r1		C save cy
93	copy	%r21, %r28
94	addib,>		-1, %r22, L(loop0)
95	ldo		8(rp), rp
96
97	addib,>=	-8, n, L(unrolled)
98	addi		-1, %r1, %r0		C restore cy
99
100	shrpd	%r0, %r28, 63, %r28
101	ADCSBC	%r0, %r28, RETREG
102ifdef(`OPERATION_sublsh1_n',
103`	sub	%r0, RETREG, RETREG')
104	CLRRET1
105
106	bve		(%r2)
107	ldd,mb		-0x100(%r30), %r3
108
109
110LDEF(unrolled)
111	std		%r4, -0xf8(%r30)	C save reg
112	ldd	0(vp), %r4
113	std		%r5, -0xf0(%r30)	C save reg
114	ldd	8(vp), %r5
115	std		%r6, -0xe8(%r30)	C save reg
116	ldd	16(vp), %r6
117	std		%r7, -0xe0(%r30)	C save reg
118
119	ldd	24(vp), %r7
120	shrpd	%r4, %r28, 63, %r31
121	std		%r8, -0xd8(%r30)	C save reg
122	ldd	32(vp), %r8
123	shrpd	%r5, %r4, 63, %r4
124	std		%r9, -0xd0(%r30)	C save reg
125	ldd	40(vp), %r9
126	shrpd	%r6, %r5, 63, %r5
127	ldd	48(vp), %r3
128	shrpd	%r7, %r6, 63, %r6
129	ldd	56(vp), %r28
130	shrpd	%r8, %r7, 63, %r7
131	ldd	0(up), %r19
132	shrpd	%r9, %r8, 63, %r8
133	ldd	8(up), %r20
134	shrpd	%r3, %r9, 63, %r9
135	ldd	16(up), %r21
136	shrpd	%r28, %r3, 63, %r3
137	ldd	24(up), %r22
138
139	nop					C alignment FIXME
140	addib,<=	-8, n, L(end)
141	addi		-1, %r1, %r0		C restore cy
142LDEF(loop)
143	ADCSBC	%r19, %r31, %r29
144	ldd	32(up), %r19
145	std	%r29, 0(rp)
146	ADCSBC	%r20, %r4, %r29
147	ldd	40(up), %r20
148	std	%r29, 8(rp)
149	ADCSBC	%r21, %r5, %r29
150	ldd	48(up), %r21
151	std	%r29, 16(rp)
152	ADCSBC	%r22, %r6, %r29
153	ldd	56(up), %r22
154	std	%r29, 24(rp)
155	ADCSBC	%r19, %r7, %r29
156	ldd	64(vp), %r4
157	std	%r29, 32(rp)
158	ADCSBC	%r20, %r8, %r29
159	ldd	72(vp), %r5
160	std	%r29, 40(rp)
161	ADCSBC	%r21, %r9, %r29
162	ldd	80(vp), %r6
163	std	%r29, 48(rp)
164	ADCSBC	%r22, %r3, %r29
165	std	%r29, 56(rp)
166
167	add,dc		%r0, %r0, %r1		C save cy
168
169	ldd	88(vp), %r7
170	shrpd	%r4, %r28, 63, %r31
171	ldd	96(vp), %r8
172	shrpd	%r5, %r4, 63, %r4
173	ldd	104(vp), %r9
174	shrpd	%r6, %r5, 63, %r5
175	ldd	112(vp), %r3
176	shrpd	%r7, %r6, 63, %r6
177	ldd	120(vp), %r28
178	shrpd	%r8, %r7, 63, %r7
179	ldd	64(up), %r19
180	shrpd	%r9, %r8, 63, %r8
181	ldd	72(up), %r20
182	shrpd	%r3, %r9, 63, %r9
183	ldd	80(up), %r21
184	shrpd	%r28, %r3, 63, %r3
185	ldd	88(up), %r22
186
187	ldo		64(vp), vp
188	ldo		64(rp), rp
189	ldo		64(up), up
190	addib,>		-8, n, L(loop)
191	addi		-1, %r1, %r0		C restore cy
192LDEF(end)
193	ADCSBC	%r19, %r31, %r29
194	ldd	32(up), %r19
195	std	%r29, 0(rp)
196	ADCSBC	%r20, %r4, %r29
197	ldd	40(up), %r20
198	std	%r29, 8(rp)
199	ADCSBC	%r21, %r5, %r29
200	ldd	48(up), %r21
201	std	%r29, 16(rp)
202	ADCSBC	%r22, %r6, %r29
203	ldd	56(up), %r22
204	std	%r29, 24(rp)
205	ADCSBC	%r19, %r7, %r29
206	ldd		-0xf8(%r30), %r4	C restore reg
207	std	%r29, 32(rp)
208	ADCSBC	%r20, %r8, %r29
209	ldd		-0xf0(%r30), %r5	C restore reg
210	std	%r29, 40(rp)
211	ADCSBC	%r21, %r9, %r29
212	ldd		-0xe8(%r30), %r6	C restore reg
213	std	%r29, 48(rp)
214	ADCSBC	%r22, %r3, %r29
215	ldd		-0xe0(%r30), %r7	C restore reg
216	std	%r29, 56(rp)
217
218	shrpd	%r0, %r28, 63, %r28
219	ldd		-0xd8(%r30), %r8	C restore reg
220	ADCSBC	%r0, %r28, RETREG
221ifdef(`OPERATION_sublsh1_n',
222`	sub	%r0, RETREG, RETREG')
223	CLRRET1
224
225	ldd		-0xd0(%r30), %r9	C restore reg
226	bve		(%r2)
227	ldd,mb		-0x100(%r30), %r3	C restore reg
228EPILOGUE()
229