xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/alpha/aorslsh2_n.asm (revision 924795e69c8bb3f17afd8fcbb799710cc1719dc4)
1dnl  Alpha mpn_addlsh2_n/mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2).
2
3dnl  Copyright 2003, 2013 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C      cycles/limb
34C EV4:     ?
35C EV5:     6
36C EV6:     3.75
37
38C TODO
39C  * Tune to reach 3.5 c/l on ev6 and 5.75 c/l on ev5.
40
41define(`rp',`r16')
42define(`up',`r17')
43define(`vp',`r18')
44define(`n', `r19')
45
46define(`u0', `r8')
47define(`u1', `r1')
48define(`v0', `r4')
49define(`v1', `r5')
50
51define(`cy0', `r0')
52define(`cy1', `r20')
53define(`cy', `r22')
54define(`rr', `r24')
55define(`ps', `r25')
56define(`sl', `r28')
57
58ifdef(`OPERATION_addlsh2_n',`
59  define(ADDSUB,       addq)
60  define(CARRY,       `cmpult $1,$2,$3')
61  define(func, mpn_addlsh2_n)
62')
63ifdef(`OPERATION_sublsh2_n',`
64  define(ADDSUB,       subq)
65  define(CARRY,       `cmpult $2,$1,$3')
66  define(func, mpn_sublsh2_n)
67')
68
69MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n)
70
71ASM_START()
72PROLOGUE(func)
73	and	n, 2, cy0
74	blbs	n, L(bx1)
75L(bx0):	ldq	v1, 0(vp)
76	ldq	u1, 0(up)
77	bis	r31, r31, r2
78	bne	cy0, L(b10)
79
80L(b00):	lda	vp, 48(vp)
81	lda	up, -16(up)
82	lda	rp, -8(rp)
83	s4addq	v1, r31, sl
84	br	r31, L(lo0)
85
86L(b10):	lda	vp, 32(vp)
87	lda	rp, 8(rp)
88	lda	cy0, 0(r31)
89	br	r31, L(lo2)
90
91L(bx1):	ldq	v0, 0(vp)
92	ldq	u0, 0(up)
93	lda	cy1, 0(r31)
94	bis	r31, r31, r3
95	nop
96	beq	cy0, L(b01)
97
98L(b11):	lda	vp, 40(vp)
99	lda	up, -24(up)
100	lda	rp, 16(rp)
101	br	r31, L(lo3)
102
103L(b01):	lda	n, -4(n)
104	ble	n, L(end)
105	lda	vp, 24(vp)
106	lda	up, -8(up)
107
108	ALIGN(16)
109L(top):	s4addq	v0, r3, sl	C combined vlimb
110	ldq	v1, -16(vp)
111	ADDSUB	u0, sl, ps	C ulimb + (vlimb << 1)
112	ldq	u1, 16(up)
113	srl	v0, 62, r2	C high v bits
114	ADDSUB	ps, cy1, rr	C consume carry from previous operation
115	CARRY(	ps, u0, cy0)	C carry out #2
116	stq	rr, 0(rp)
117	CARRY(	rr, ps, cy)	C carry out #3
118	lda	vp, 32(vp)	C bookkeeping
119	addq	cy, cy0, cy0	C final carry out
120	s4addq	v1, r2, sl
121L(lo0):	ldq	v0, -40(vp)
122	ADDSUB	u1, sl, ps
123	ldq	u0, 24(up)
124	srl	v1, 62, r3
125	ADDSUB	ps, cy0, rr
126	CARRY(	ps, u1, cy1)
127	stq	rr, 8(rp)
128	CARRY(	rr, ps, cy)
129	lda	rp, 32(rp)	C bookkeeping
130	addq	cy, cy1, cy1
131L(lo3):	s4addq	v0, r3, sl
132	ldq	v1, -32(vp)
133	ADDSUB	u0, sl, ps
134	ldq	u1, 32(up)
135	srl	v0, 62, r2
136	ADDSUB	ps, cy1, rr
137	CARRY(	ps, u0, cy0)
138	stq	rr, -16(rp)
139	CARRY(	rr, ps, cy)
140	lda	up, 32(up)	C bookkeeping
141	addq	cy, cy0, cy0
142L(lo2):	s4addq	v1, r2, sl
143	ldq	v0, -24(vp)
144	ADDSUB	u1, sl, ps
145	ldq	u0, 8(up)
146	srl	v1, 62, r3
147	ADDSUB	ps, cy0, rr
148	CARRY(	ps, u1, cy1)
149	stq	rr, -8(rp)
150	CARRY(	rr, ps, cy)
151	lda	n, -4(n)	C bookkeeping
152	addq	cy, cy1, cy1
153	bgt	n, L(top)
154
155L(end):	s4addq	v0, r3, sl
156	ADDSUB	u0, sl, ps
157	srl	v0, 62, r2
158	ADDSUB	ps, cy1, rr
159	CARRY(	ps, u0, cy0)
160	stq	rr, 0(rp)
161	CARRY(	rr, ps, cy)
162	addq	cy, cy0, cy0
163	addq	cy0, r2, r0
164
165	ret	r31,(r26),1
166EPILOGUE()
167ASM_END()
168