xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/alpha/sub_n.asm (revision c3ab26950fe8540fb553d1d1dcae454bc98e5a25)
1dnl  Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0
2dnl  and store difference in a third limb vector.
3
4dnl  Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of the GNU Lesser General Public License as published
10dnl  by the Free Software Foundation; either version 3 of the License, or (at
11dnl  your option) any later version.
12
13dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16dnl  License for more details.
17
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21include(`../config.m4')
22
23C      cycles/limb
24C EV4:     ?
25C EV5:     4.75
26C EV6:     3
27
28dnl  INPUT PARAMETERS
29dnl  res_ptr	r16
30dnl  s1_ptr	r17
31dnl  s2_ptr	r18
32dnl  size	r19
33
34ASM_START()
35PROLOGUE(mpn_sub_nc)
36	bis	r31,r20,r25
37	br	L(com)
38EPILOGUE()
39PROLOGUE(mpn_sub_n)
40	bis	r31,r31,r25		C clear cy
41L(com):	subq	r19,4,r19		C decr loop cnt
42	blt	r19,$Lend2		C if less than 4 limbs, goto 2nd loop
43C Start software pipeline for 1st loop
44	ldq	r0,0(r18)
45	ldq	r4,0(r17)
46	ldq	r1,8(r18)
47	ldq	r5,8(r17)
48	addq	r17,32,r17		C update s1_ptr
49	subq	r4,r0,r28		C 1st main subtract
50	ldq	r2,16(r18)
51	subq	r28,r25,r20		C 1st carry subtract
52	ldq	r3,24(r18)
53	cmpult	r4,r0,r8		C compute cy from last subtract
54	ldq	r6,-16(r17)
55	cmpult	r28,r25,r25		C compute cy from last subtract
56	ldq	r7,-8(r17)
57	bis	r8,r25,r25		C combine cy from the two subtracts
58	subq	r19,4,r19		C decr loop cnt
59	subq	r5,r1,r28		C 2nd main subtract
60	addq	r18,32,r18		C update s2_ptr
61	subq	r28,r25,r21		C 2nd carry subtract
62	cmpult	r5,r1,r8		C compute cy from last subtract
63	blt	r19,$Lend1		C if less than 4 limbs remain, jump
64C 1st loop handles groups of 4 limbs in a software pipeline
65	ALIGN(16)
66$Loop:	cmpult	r28,r25,r25		C compute cy from last subtract
67	ldq	r0,0(r18)
68	bis	r8,r25,r25		C combine cy from the two subtracts
69	ldq	r1,8(r18)
70	subq	r6,r2,r28		C 3rd main subtract
71	ldq	r4,0(r17)
72	subq	r28,r25,r22		C 3rd carry subtract
73	ldq	r5,8(r17)
74	cmpult	r6,r2,r8		C compute cy from last subtract
75	cmpult	r28,r25,r25		C compute cy from last subtract
76	stq	r20,0(r16)
77	bis	r8,r25,r25		C combine cy from the two subtracts
78	stq	r21,8(r16)
79	subq	r7,r3,r28		C 4th main subtract
80	subq	r28,r25,r23		C 4th carry subtract
81	cmpult	r7,r3,r8		C compute cy from last subtract
82	cmpult	r28,r25,r25		C compute cy from last subtract
83		addq	r17,32,r17		C update s1_ptr
84	bis	r8,r25,r25		C combine cy from the two subtracts
85		addq	r16,32,r16		C update res_ptr
86	subq	r4,r0,r28		C 1st main subtract
87	ldq	r2,16(r18)
88	subq	r28,r25,r20		C 1st carry subtract
89	ldq	r3,24(r18)
90	cmpult	r4,r0,r8		C compute cy from last subtract
91	ldq	r6,-16(r17)
92	cmpult	r28,r25,r25		C compute cy from last subtract
93	ldq	r7,-8(r17)
94	bis	r8,r25,r25		C combine cy from the two subtracts
95	subq	r19,4,r19		C decr loop cnt
96	stq	r22,-16(r16)
97	subq	r5,r1,r28		C 2nd main subtract
98	stq	r23,-8(r16)
99	subq	r28,r25,r21		C 2nd carry subtract
100		addq	r18,32,r18		C update s2_ptr
101	cmpult	r5,r1,r8		C compute cy from last subtract
102	bge	r19,$Loop
103C Finish software pipeline for 1st loop
104$Lend1:	cmpult	r28,r25,r25		C compute cy from last subtract
105	bis	r8,r25,r25		C combine cy from the two subtracts
106	subq	r6,r2,r28		C cy add
107	subq	r28,r25,r22		C 3rd main subtract
108	cmpult	r6,r2,r8		C compute cy from last subtract
109	cmpult	r28,r25,r25		C compute cy from last subtract
110	stq	r20,0(r16)
111	bis	r8,r25,r25		C combine cy from the two subtracts
112	stq	r21,8(r16)
113	subq	r7,r3,r28		C cy add
114	subq	r28,r25,r23		C 4th main subtract
115	cmpult	r7,r3,r8		C compute cy from last subtract
116	cmpult	r28,r25,r25		C compute cy from last subtract
117	bis	r8,r25,r25		C combine cy from the two subtracts
118	addq	r16,32,r16		C update res_ptr
119	stq	r22,-16(r16)
120	stq	r23,-8(r16)
121$Lend2:	addq	r19,4,r19		C restore loop cnt
122	beq	r19,$Lret
123C Start software pipeline for 2nd loop
124	ldq	r0,0(r18)
125	ldq	r4,0(r17)
126	subq	r19,1,r19
127	beq	r19,$Lend0
128C 2nd loop handles remaining 1-3 limbs
129	ALIGN(16)
130$Loop0:	subq	r4,r0,r28		C main subtract
131	cmpult	r4,r0,r8		C compute cy from last subtract
132	ldq	r0,8(r18)
133	ldq	r4,8(r17)
134	subq	r28,r25,r20		C carry subtract
135	addq	r18,8,r18
136	addq	r17,8,r17
137	stq	r20,0(r16)
138	cmpult	r28,r25,r25		C compute cy from last subtract
139	subq	r19,1,r19		C decr loop cnt
140	bis	r8,r25,r25		C combine cy from the two subtracts
141	addq	r16,8,r16
142	bne	r19,$Loop0
143$Lend0:	subq	r4,r0,r28		C main subtract
144	subq	r28,r25,r20		C carry subtract
145	cmpult	r4,r0,r8		C compute cy from last subtract
146	cmpult	r28,r25,r25		C compute cy from last subtract
147	stq	r20,0(r16)
148	bis	r8,r25,r25		C combine cy from the two subtracts
149
150$Lret:	bis	r25,r31,r0		C return cy
151	ret	r31,(r26),1
152EPILOGUE()
153ASM_END()
154