xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/alpha/sub_n.asm (revision 567219e1d7461bff1b180e494a9674a287b057a7)
1dnl  Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0
2dnl  and store difference in a third limb vector.
3
4dnl  Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of the GNU Lesser General Public License as published
10dnl  by the Free Software Foundation; either version 3 of the License, or (at
11dnl  your option) any later version.
12
13dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16dnl  License for more details.
17
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21include(`../config.m4')
22
23C      cycles/limb
24C EV4:     ?
25C EV5:     4.75
26C EV6:     3
27
28dnl  INPUT PARAMETERS
29dnl  res_ptr	r16
30dnl  s1_ptr	r17
31dnl  s2_ptr	r18
32dnl  size	r19
33
34ASM_START()
35PROLOGUE(mpn_sub_n)
36	bis	r31,r31,r25		C clear cy
37	subq	r19,4,r19		C decr loop cnt
38	blt	r19,$Lend2		C if less than 4 limbs, goto 2nd loop
39C Start software pipeline for 1st loop
40	ldq	r0,0(r18)
41	ldq	r4,0(r17)
42	ldq	r1,8(r18)
43	ldq	r5,8(r17)
44	addq	r17,32,r17		C update s1_ptr
45	ldq	r2,16(r18)
46	subq	r4,r0,r20		C 1st main subtract
47	ldq	r3,24(r18)
48	subq	r19,4,r19		C decr loop cnt
49	ldq	r6,-16(r17)
50	cmpult	r4,r0,r25		C compute cy from last subtract
51	ldq	r7,-8(r17)
52	subq	r5,r1,r28		C 2nd main subtract
53	addq	r18,32,r18		C update s2_ptr
54	subq	r28,r25,r21		C 2nd carry subtract
55	cmpult	r5,r1,r8		C compute cy from last subtract
56	blt	r19,$Lend1		C if less than 4 limbs remain, jump
57C 1st loop handles groups of 4 limbs in a software pipeline
58	ALIGN(16)
59$Loop:	cmpult	r28,r25,r25		C compute cy from last subtract
60	ldq	r0,0(r18)
61	bis	r8,r25,r25		C combine cy from the two subtracts
62	ldq	r1,8(r18)
63	subq	r6,r2,r28		C 3rd main subtract
64	ldq	r4,0(r17)
65	subq	r28,r25,r22		C 3rd carry subtract
66	ldq	r5,8(r17)
67	cmpult	r6,r2,r8		C compute cy from last subtract
68	cmpult	r28,r25,r25		C compute cy from last subtract
69	stq	r20,0(r16)
70	bis	r8,r25,r25		C combine cy from the two subtracts
71	stq	r21,8(r16)
72	subq	r7,r3,r28		C 4th main subtract
73	subq	r28,r25,r23		C 4th carry subtract
74	cmpult	r7,r3,r8		C compute cy from last subtract
75	cmpult	r28,r25,r25		C compute cy from last subtract
76		addq	r17,32,r17		C update s1_ptr
77	bis	r8,r25,r25		C combine cy from the two subtracts
78		addq	r16,32,r16		C update res_ptr
79	subq	r4,r0,r28		C 1st main subtract
80	ldq	r2,16(r18)
81	subq	r28,r25,r20		C 1st carry subtract
82	ldq	r3,24(r18)
83	cmpult	r4,r0,r8		C compute cy from last subtract
84	ldq	r6,-16(r17)
85	cmpult	r28,r25,r25		C compute cy from last subtract
86	ldq	r7,-8(r17)
87	bis	r8,r25,r25		C combine cy from the two subtracts
88	subq	r19,4,r19		C decr loop cnt
89	stq	r22,-16(r16)
90	subq	r5,r1,r28		C 2nd main subtract
91	stq	r23,-8(r16)
92	subq	r28,r25,r21		C 2nd carry subtract
93		addq	r18,32,r18		C update s2_ptr
94	cmpult	r5,r1,r8		C compute cy from last subtract
95	bge	r19,$Loop
96C Finish software pipeline for 1st loop
97$Lend1:	cmpult	r28,r25,r25		C compute cy from last subtract
98	bis	r8,r25,r25		C combine cy from the two subtracts
99	subq	r6,r2,r28		C cy add
100	subq	r28,r25,r22		C 3rd main subtract
101	cmpult	r6,r2,r8		C compute cy from last subtract
102	cmpult	r28,r25,r25		C compute cy from last subtract
103	stq	r20,0(r16)
104	bis	r8,r25,r25		C combine cy from the two subtracts
105	stq	r21,8(r16)
106	subq	r7,r3,r28		C cy add
107	subq	r28,r25,r23		C 4th main subtract
108	cmpult	r7,r3,r8		C compute cy from last subtract
109	cmpult	r28,r25,r25		C compute cy from last subtract
110	bis	r8,r25,r25		C combine cy from the two subtracts
111	addq	r16,32,r16		C update res_ptr
112	stq	r22,-16(r16)
113	stq	r23,-8(r16)
114$Lend2:	addq	r19,4,r19		C restore loop cnt
115	beq	r19,$Lret
116C Start software pipeline for 2nd loop
117	ldq	r0,0(r18)
118	ldq	r4,0(r17)
119	subq	r19,1,r19
120	beq	r19,$Lend0
121C 2nd loop handles remaining 1-3 limbs
122	ALIGN(16)
123$Loop0:	subq	r4,r0,r28		C main subtract
124	cmpult	r4,r0,r8		C compute cy from last subtract
125	ldq	r0,8(r18)
126	ldq	r4,8(r17)
127	subq	r28,r25,r20		C carry subtract
128	addq	r18,8,r18
129	addq	r17,8,r17
130	stq	r20,0(r16)
131	cmpult	r28,r25,r25		C compute cy from last subtract
132	subq	r19,1,r19		C decr loop cnt
133	bis	r8,r25,r25		C combine cy from the two subtracts
134	addq	r16,8,r16
135	bne	r19,$Loop0
136$Lend0:	subq	r4,r0,r28		C main subtract
137	subq	r28,r25,r20		C carry subtract
138	cmpult	r4,r0,r8		C compute cy from last subtract
139	cmpult	r28,r25,r25		C compute cy from last subtract
140	stq	r20,0(r16)
141	bis	r8,r25,r25		C combine cy from the two subtracts
142
143$Lret:	bis	r25,r31,r0		C return cy
144	ret	r31,(r26),1
145EPILOGUE(mpn_sub_n)
146ASM_END()
147