xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/alpha/sub_n.asm (revision 15a984a0d95c8f96abe9717ee6241762c55dc106)
1dnl  Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0
2dnl  and store difference in a third limb vector.
3
4dnl  Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7dnl
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of either:
10dnl
11dnl    * the GNU Lesser General Public License as published by the Free
12dnl      Software Foundation; either version 3 of the License, or (at your
13dnl      option) any later version.
14dnl
15dnl  or
16dnl
17dnl    * the GNU General Public License as published by the Free Software
18dnl      Foundation; either version 2 of the License, or (at your option) any
19dnl      later version.
20dnl
21dnl  or both in parallel, as here.
22dnl
23dnl  The GNU MP Library is distributed in the hope that it will be useful, but
24dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26dnl  for more details.
27dnl
28dnl  You should have received copies of the GNU General Public License and the
29dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
30dnl  see https://www.gnu.org/licenses/.
31
32include(`../config.m4')
33
34C      cycles/limb
35C EV4:     ?
36C EV5:     4.75
37C EV6:     3
38
39dnl  INPUT PARAMETERS
40dnl  res_ptr	r16
41dnl  s1_ptr	r17
42dnl  s2_ptr	r18
43dnl  size	r19
44
45ASM_START()
46PROLOGUE(mpn_sub_nc)
47	bis	r31,r20,r25
48	br	L(com)
49EPILOGUE()
50PROLOGUE(mpn_sub_n)
51	bis	r31,r31,r25		C clear cy
52L(com):	subq	r19,4,r19		C decr loop cnt
53	blt	r19,$Lend2		C if less than 4 limbs, goto 2nd loop
54C Start software pipeline for 1st loop
55	ldq	r0,0(r18)
56	ldq	r4,0(r17)
57	ldq	r1,8(r18)
58	ldq	r5,8(r17)
59	addq	r17,32,r17		C update s1_ptr
60	subq	r4,r0,r28		C 1st main subtract
61	ldq	r2,16(r18)
62	subq	r28,r25,r20		C 1st carry subtract
63	ldq	r3,24(r18)
64	cmpult	r4,r0,r8		C compute cy from last subtract
65	ldq	r6,-16(r17)
66	cmpult	r28,r25,r25		C compute cy from last subtract
67	ldq	r7,-8(r17)
68	bis	r8,r25,r25		C combine cy from the two subtracts
69	subq	r19,4,r19		C decr loop cnt
70	subq	r5,r1,r28		C 2nd main subtract
71	addq	r18,32,r18		C update s2_ptr
72	subq	r28,r25,r21		C 2nd carry subtract
73	cmpult	r5,r1,r8		C compute cy from last subtract
74	blt	r19,$Lend1		C if less than 4 limbs remain, jump
75C 1st loop handles groups of 4 limbs in a software pipeline
76	ALIGN(16)
77$Loop:	cmpult	r28,r25,r25		C compute cy from last subtract
78	ldq	r0,0(r18)
79	bis	r8,r25,r25		C combine cy from the two subtracts
80	ldq	r1,8(r18)
81	subq	r6,r2,r28		C 3rd main subtract
82	ldq	r4,0(r17)
83	subq	r28,r25,r22		C 3rd carry subtract
84	ldq	r5,8(r17)
85	cmpult	r6,r2,r8		C compute cy from last subtract
86	cmpult	r28,r25,r25		C compute cy from last subtract
87	stq	r20,0(r16)
88	bis	r8,r25,r25		C combine cy from the two subtracts
89	stq	r21,8(r16)
90	subq	r7,r3,r28		C 4th main subtract
91	subq	r28,r25,r23		C 4th carry subtract
92	cmpult	r7,r3,r8		C compute cy from last subtract
93	cmpult	r28,r25,r25		C compute cy from last subtract
94		addq	r17,32,r17		C update s1_ptr
95	bis	r8,r25,r25		C combine cy from the two subtracts
96		addq	r16,32,r16		C update res_ptr
97	subq	r4,r0,r28		C 1st main subtract
98	ldq	r2,16(r18)
99	subq	r28,r25,r20		C 1st carry subtract
100	ldq	r3,24(r18)
101	cmpult	r4,r0,r8		C compute cy from last subtract
102	ldq	r6,-16(r17)
103	cmpult	r28,r25,r25		C compute cy from last subtract
104	ldq	r7,-8(r17)
105	bis	r8,r25,r25		C combine cy from the two subtracts
106	subq	r19,4,r19		C decr loop cnt
107	stq	r22,-16(r16)
108	subq	r5,r1,r28		C 2nd main subtract
109	stq	r23,-8(r16)
110	subq	r28,r25,r21		C 2nd carry subtract
111		addq	r18,32,r18		C update s2_ptr
112	cmpult	r5,r1,r8		C compute cy from last subtract
113	bge	r19,$Loop
114C Finish software pipeline for 1st loop
115$Lend1:	cmpult	r28,r25,r25		C compute cy from last subtract
116	bis	r8,r25,r25		C combine cy from the two subtracts
117	subq	r6,r2,r28		C cy add
118	subq	r28,r25,r22		C 3rd main subtract
119	cmpult	r6,r2,r8		C compute cy from last subtract
120	cmpult	r28,r25,r25		C compute cy from last subtract
121	stq	r20,0(r16)
122	bis	r8,r25,r25		C combine cy from the two subtracts
123	stq	r21,8(r16)
124	subq	r7,r3,r28		C cy add
125	subq	r28,r25,r23		C 4th main subtract
126	cmpult	r7,r3,r8		C compute cy from last subtract
127	cmpult	r28,r25,r25		C compute cy from last subtract
128	bis	r8,r25,r25		C combine cy from the two subtracts
129	addq	r16,32,r16		C update res_ptr
130	stq	r22,-16(r16)
131	stq	r23,-8(r16)
132$Lend2:	addq	r19,4,r19		C restore loop cnt
133	beq	r19,$Lret
134C Start software pipeline for 2nd loop
135	ldq	r0,0(r18)
136	ldq	r4,0(r17)
137	subq	r19,1,r19
138	beq	r19,$Lend0
139C 2nd loop handles remaining 1-3 limbs
140	ALIGN(16)
141$Loop0:	subq	r4,r0,r28		C main subtract
142	cmpult	r4,r0,r8		C compute cy from last subtract
143	ldq	r0,8(r18)
144	ldq	r4,8(r17)
145	subq	r28,r25,r20		C carry subtract
146	addq	r18,8,r18
147	addq	r17,8,r17
148	stq	r20,0(r16)
149	cmpult	r28,r25,r25		C compute cy from last subtract
150	subq	r19,1,r19		C decr loop cnt
151	bis	r8,r25,r25		C combine cy from the two subtracts
152	addq	r16,8,r16
153	bne	r19,$Loop0
154$Lend0:	subq	r4,r0,r28		C main subtract
155	subq	r28,r25,r20		C carry subtract
156	cmpult	r4,r0,r8		C compute cy from last subtract
157	cmpult	r28,r25,r25		C compute cy from last subtract
158	stq	r20,0(r16)
159	bis	r8,r25,r25		C combine cy from the two subtracts
160
161$Lret:	bis	r25,r31,r0		C return cy
162	ret	r31,(r26),1
163EPILOGUE()
164ASM_END()
165