xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/pa64/aors_n.asm (revision 4d5abbe83f525258eb479e5fca29f25cb943f379)
1dnl  HP-PA 2.0 mpn_add_n, mpn_sub_n
2
3dnl  Copyright 1997, 2000, 2002, 2003, 2009, 2010 Free Software Foundation,
4dnl  Inc.
5
6dnl  This file is part of the GNU MP Library.
7
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of the GNU Lesser General Public License as published
10dnl  by the Free Software Foundation; either version 3 of the License, or (at
11dnl  your option) any later version.
12
13dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16dnl  License for more details.
17
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21
22dnl  This runs at 2 cycles/limb on PA8000 and 1.6875 cycles/limb on PA8500.  It
23dnl  should be possible to reach the cache bandwidth 1.5 cycles/limb at least
24dnl  with PA8500.  The problem now is stalling of the first ADD,DC after LDO,
25dnl  where the processor gets confused about where carry comes from.
26
27include(`../config.m4')
28
29dnl INPUT PARAMETERS
30define(`rp',`%r26')
31define(`up',`%r25')
32define(`vp',`%r24')
33define(`n',`%r23')
34
35ifdef(`OPERATION_add_n', `
36	define(ADCSBC,	      `add,dc')
37	define(INITCY,	      `addi -1,%r22,%r0')
38	define(func,	      mpn_add_n)
39	define(func_nc,	      mpn_add_nc)')
40ifdef(`OPERATION_sub_n', `
41	define(ADCSBC,	      `sub,db')
42	define(INITCY,	      `subi 0,%r22,%r0')
43	define(func,	      mpn_sub_n)
44	define(func_nc,	      mpn_sub_nc)')
45
46MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
47
48ifdef(`HAVE_ABI_2_0w',
49`       .level  2.0w
50',`     .level  2.0
51')
52PROLOGUE(func_nc)
53ifdef(`HAVE_ABI_2_0w',
54`	b		L(com)
55	nop
56',`	b		L(com)
57	ldw		-52(%r30), %r22
58')
59EPILOGUE()
60PROLOGUE(func)
61	ldi		0, %r22
62LDEF(com)
63	sub		%r0, n, %r21
64	depw,z		%r21, 30, 3, %r28	C r28 = 2 * (-n & 7)
65	depw,z		%r21, 28, 3, %r21	C r21 = 8 * (-n & 7)
66	sub		up, %r21, up		C offset up
67	sub		vp, %r21, vp		C offset vp
68	sub		rp, %r21, rp		C offset rp
69	blr		%r28, %r0		C branch into loop
70	INITCY
71
72LDEF(loop)
73	ldd		0(up), %r20
74	ldd		0(vp), %r31
75	ADCSBC		%r20, %r31, %r20
76	std		%r20, 0(rp)
77LDEF(7)	ldd		8(up), %r21
78	ldd		8(vp), %r19
79	ADCSBC		%r21, %r19, %r21
80	std		%r21, 8(rp)
81LDEF(6)	ldd		16(up), %r20
82	ldd		16(vp), %r31
83	ADCSBC		%r20, %r31, %r20
84	std		%r20, 16(rp)
85LDEF(5)	ldd		24(up), %r21
86	ldd		24(vp), %r19
87	ADCSBC		%r21, %r19, %r21
88	std		%r21, 24(rp)
89LDEF(4)	ldd		32(up), %r20
90	ldd		32(vp), %r31
91	ADCSBC		%r20, %r31, %r20
92	std		%r20, 32(rp)
93LDEF(3)	ldd		40(up), %r21
94	ldd		40(vp), %r19
95	ADCSBC		%r21, %r19, %r21
96	std		%r21, 40(rp)
97LDEF(2)	ldd		48(up), %r20
98	ldd		48(vp), %r31
99	ADCSBC		%r20, %r31, %r20
100	std		%r20, 48(rp)
101LDEF(1)	ldd		56(up), %r21
102	ldd		56(vp), %r19
103	ADCSBC		%r21, %r19, %r21
104	ldo		64(up), up
105	std		%r21, 56(rp)
106	ldo		64(vp), vp
107	addib,>		-8, n, L(loop)
108	ldo		64(rp), rp
109
110	add,dc		%r0, %r0, %r29
111ifdef(`OPERATION_sub_n',`
112	subi		1, %r29, %r29
113')
114	bve		(%r2)
115ifdef(`HAVE_ABI_2_0w',
116`	copy		%r29, %r28
117',`	ldi		0, %r28
118')
119EPILOGUE()
120