xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/pa64/aors_n.asm (revision 901e7e84758515fbf39dfc064cb0b45ab146d8b0)
1dnl  HP-PA 2.0 mpn_add_n, mpn_sub_n
2
3dnl  Copyright 1997, 2000, 2002, 2003, 2009, 2010 Free Software Foundation,
4dnl  Inc.
5
6dnl  This file is part of the GNU MP Library.
7dnl
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of either:
10dnl
11dnl    * the GNU Lesser General Public License as published by the Free
12dnl      Software Foundation; either version 3 of the License, or (at your
13dnl      option) any later version.
14dnl
15dnl  or
16dnl
17dnl    * the GNU General Public License as published by the Free Software
18dnl      Foundation; either version 2 of the License, or (at your option) any
19dnl      later version.
20dnl
21dnl  or both in parallel, as here.
22dnl
23dnl  The GNU MP Library is distributed in the hope that it will be useful, but
24dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26dnl  for more details.
27dnl
28dnl  You should have received copies of the GNU General Public License and the
29dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
30dnl  see https://www.gnu.org/licenses/.
31
32
33dnl  This runs at 2 cycles/limb on PA8000 and 1.6875 cycles/limb on PA8500.  It
34dnl  should be possible to reach the cache bandwidth 1.5 cycles/limb at least
35dnl  with PA8500.  The problem now is stalling of the first ADD,DC after LDO,
36dnl  where the processor gets confused about where carry comes from.
37
38include(`../config.m4')
39
40dnl INPUT PARAMETERS
41define(`rp',`%r26')
42define(`up',`%r25')
43define(`vp',`%r24')
44define(`n',`%r23')
45
46ifdef(`OPERATION_add_n', `
47	define(ADCSBC,	      `add,dc')
48	define(INITCY,	      `addi -1,%r22,%r0')
49	define(func,	      mpn_add_n)
50	define(func_nc,	      mpn_add_nc)')
51ifdef(`OPERATION_sub_n', `
52	define(ADCSBC,	      `sub,db')
53	define(INITCY,	      `subi 0,%r22,%r0')
54	define(func,	      mpn_sub_n)
55	define(func_nc,	      mpn_sub_nc)')
56
57MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
58
59ifdef(`HAVE_ABI_2_0w',
60`       .level  2.0w
61',`     .level  2.0
62')
63PROLOGUE(func_nc)
64ifdef(`HAVE_ABI_2_0w',
65`	b		L(com)
66	nop
67',`	b		L(com)
68	ldw		-52(%r30), %r22
69')
70EPILOGUE()
71PROLOGUE(func)
72	ldi		0, %r22
73LDEF(com)
74	sub		%r0, n, %r21
75	depw,z		%r21, 30, 3, %r28	C r28 = 2 * (-n & 7)
76	depw,z		%r21, 28, 3, %r21	C r21 = 8 * (-n & 7)
77	sub		up, %r21, up		C offset up
78	sub		vp, %r21, vp		C offset vp
79	sub		rp, %r21, rp		C offset rp
80	blr		%r28, %r0		C branch into loop
81	INITCY
82
83LDEF(loop)
84	ldd		0(up), %r20
85	ldd		0(vp), %r31
86	ADCSBC		%r20, %r31, %r20
87	std		%r20, 0(rp)
88LDEF(7)	ldd		8(up), %r21
89	ldd		8(vp), %r19
90	ADCSBC		%r21, %r19, %r21
91	std		%r21, 8(rp)
92LDEF(6)	ldd		16(up), %r20
93	ldd		16(vp), %r31
94	ADCSBC		%r20, %r31, %r20
95	std		%r20, 16(rp)
96LDEF(5)	ldd		24(up), %r21
97	ldd		24(vp), %r19
98	ADCSBC		%r21, %r19, %r21
99	std		%r21, 24(rp)
100LDEF(4)	ldd		32(up), %r20
101	ldd		32(vp), %r31
102	ADCSBC		%r20, %r31, %r20
103	std		%r20, 32(rp)
104LDEF(3)	ldd		40(up), %r21
105	ldd		40(vp), %r19
106	ADCSBC		%r21, %r19, %r21
107	std		%r21, 40(rp)
108LDEF(2)	ldd		48(up), %r20
109	ldd		48(vp), %r31
110	ADCSBC		%r20, %r31, %r20
111	std		%r20, 48(rp)
112LDEF(1)	ldd		56(up), %r21
113	ldd		56(vp), %r19
114	ADCSBC		%r21, %r19, %r21
115	ldo		64(up), up
116	std		%r21, 56(rp)
117	ldo		64(vp), vp
118	addib,>		-8, n, L(loop)
119	ldo		64(rp), rp
120
121	add,dc		%r0, %r0, %r29
122ifdef(`OPERATION_sub_n',`
123	subi		1, %r29, %r29
124')
125	bve		(%r2)
126ifdef(`HAVE_ABI_2_0w',
127`	copy		%r29, %r28
128',`	ldi		0, %r28
129')
130EPILOGUE()
131