xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/m88k/mc88110/sub_n.S (revision 62f324d0121177eaf2e0384f92fd9ca2a751c795)
1; mc88110 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
2; store difference in a third limb vector.
3
4; Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
5
6; This file is part of the GNU MP Library.
7
8; The GNU MP Library is free software; you can redistribute it and/or modify
9; it under the terms of the GNU Lesser General Public License as published by
10; the Free Software Foundation; either version 3 of the License, or (at your
11; option) any later version.
12
13; The GNU MP Library is distributed in the hope that it will be useful, but
14; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16; License for more details.
17
18; You should have received a copy of the GNU Lesser General Public License
19; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21
22; INPUT PARAMETERS
23#define res_ptr	r2
24#define s1_ptr	r3
25#define s2_ptr	r4
26#define size	r5
27
28#include "sysdep.h"
29
30	text
31	align	16
32	global	C_SYMBOL_NAME(__gmpn_sub_n)
33C_SYMBOL_NAME(__gmpn_sub_n):
34	subu.co	 r0,r0,r0		; set cy flag
35	xor	 r12,s2_ptr,res_ptr
36	bb1	 2,r12,L1
37; **  V1a  **
38L0:	bb0	 2,res_ptr,L_v1		; branch if res_ptr is aligned
39/* Add least significant limb separately to align res_ptr and s2_ptr */
40	ld	 r10,s1_ptr,0
41	addu	 s1_ptr,s1_ptr,4
42	ld	 r8,s2_ptr,0
43	addu	 s2_ptr,s2_ptr,4
44	subu	 size,size,1
45	subu.co	 r6,r10,r8
46	st	 r6,res_ptr,0
47	addu	 res_ptr,res_ptr,4
48L_v1:	cmp	 r12,size,2
49	bb1	 lt,r12,Lend2
50
51	ld	 r10,s1_ptr,0
52	ld	 r12,s1_ptr,4
53	ld.d	 r8,s2_ptr,0
54	subu	 size,size,10
55	bcnd	 lt0,size,Lfin1
56/* Add blocks of 8 limbs until less than 8 limbs remain */
57	align	 8
58Loop1:	subu	 size,size,8
59	subu.cio r6,r10,r8
60	ld	 r10,s1_ptr,8
61	subu.cio r7,r12,r9
62	ld	 r12,s1_ptr,12
63	ld.d	 r8,s2_ptr,8
64	st.d	 r6,res_ptr,0
65	subu.cio r6,r10,r8
66	ld	 r10,s1_ptr,16
67	subu.cio r7,r12,r9
68	ld	 r12,s1_ptr,20
69	ld.d	 r8,s2_ptr,16
70	st.d	 r6,res_ptr,8
71	subu.cio r6,r10,r8
72	ld	 r10,s1_ptr,24
73	subu.cio r7,r12,r9
74	ld	 r12,s1_ptr,28
75	ld.d	 r8,s2_ptr,24
76	st.d	 r6,res_ptr,16
77	subu.cio r6,r10,r8
78	ld	 r10,s1_ptr,32
79	subu.cio r7,r12,r9
80	ld	 r12,s1_ptr,36
81	addu	 s1_ptr,s1_ptr,32
82	ld.d	 r8,s2_ptr,32
83	addu	 s2_ptr,s2_ptr,32
84	st.d	 r6,res_ptr,24
85	addu	 res_ptr,res_ptr,32
86	bcnd	 ge0,size,Loop1
87
88Lfin1:	addu	 size,size,8-2
89	bcnd	 lt0,size,Lend1
90/* Add blocks of 2 limbs until less than 2 limbs remain */
91Loope1:	subu.cio r6,r10,r8
92	ld	 r10,s1_ptr,8
93	subu.cio r7,r12,r9
94	ld	 r12,s1_ptr,12
95	ld.d	 r8,s2_ptr,8
96	st.d	 r6,res_ptr,0
97	subu	 size,size,2
98	addu	 s1_ptr,s1_ptr,8
99	addu	 s2_ptr,s2_ptr,8
100	addu	 res_ptr,res_ptr,8
101	bcnd	 ge0,size,Loope1
102Lend1:	subu.cio r6,r10,r8
103	subu.cio r7,r12,r9
104	st.d	 r6,res_ptr,0
105
106	bb0	 0,size,Lret1
107/* Add last limb */
108	ld	 r10,s1_ptr,8
109	ld	 r8,s2_ptr,8
110	subu.cio r6,r10,r8
111	st	 r6,res_ptr,8
112
113Lret1:	addu.ci r2,r0,r0		; return carry-out from most sign. limb
114	jmp.n	 r1
115	 xor	r2,r2,1
116
117L1:	xor	 r12,s1_ptr,res_ptr
118	bb1	 2,r12,L2
119; **  V1b  **
120	bb0	 2,res_ptr,L_v1b	; branch if res_ptr is aligned
121/* Add least significant limb separately to align res_ptr and s1_ptr */
122	ld	 r10,s2_ptr,0
123	addu	 s2_ptr,s2_ptr,4
124	ld	 r8,s1_ptr,0
125	addu	 s1_ptr,s1_ptr,4
126	subu	 size,size,1
127	subu.co	 r6,r8,r10
128	st	 r6,res_ptr,0
129	addu	 res_ptr,res_ptr,4
130L_v1b:	cmp	 r12,size,2
131	bb1	 lt,r12,Lend2
132
133	ld	 r10,s2_ptr,0
134	ld	 r12,s2_ptr,4
135	ld.d	 r8,s1_ptr,0
136	subu	 size,size,10
137	bcnd	 lt0,size,Lfin1b
138/* Add blocks of 8 limbs until less than 8 limbs remain */
139	align	 8
140Loop1b:	subu	 size,size,8
141	subu.cio r6,r8,r10
142	ld	 r10,s2_ptr,8
143	subu.cio r7,r9,r12
144	ld	 r12,s2_ptr,12
145	ld.d	 r8,s1_ptr,8
146	st.d	 r6,res_ptr,0
147	subu.cio r6,r8,r10
148	ld	 r10,s2_ptr,16
149	subu.cio r7,r9,r12
150	ld	 r12,s2_ptr,20
151	ld.d	 r8,s1_ptr,16
152	st.d	 r6,res_ptr,8
153	subu.cio r6,r8,r10
154	ld	 r10,s2_ptr,24
155	subu.cio r7,r9,r12
156	ld	 r12,s2_ptr,28
157	ld.d	 r8,s1_ptr,24
158	st.d	 r6,res_ptr,16
159	subu.cio r6,r8,r10
160	ld	 r10,s2_ptr,32
161	subu.cio r7,r9,r12
162	ld	 r12,s2_ptr,36
163	addu	 s2_ptr,s2_ptr,32
164	ld.d	 r8,s1_ptr,32
165	addu	 s1_ptr,s1_ptr,32
166	st.d	 r6,res_ptr,24
167	addu	 res_ptr,res_ptr,32
168	bcnd	 ge0,size,Loop1b
169
170Lfin1b:	addu	 size,size,8-2
171	bcnd	 lt0,size,Lend1b
172/* Add blocks of 2 limbs until less than 2 limbs remain */
173Loope1b:subu.cio r6,r8,r10
174	ld	 r10,s2_ptr,8
175	subu.cio r7,r9,r12
176	ld	 r12,s2_ptr,12
177	ld.d	 r8,s1_ptr,8
178	st.d	 r6,res_ptr,0
179	subu	 size,size,2
180	addu	 s1_ptr,s1_ptr,8
181	addu	 s2_ptr,s2_ptr,8
182	addu	 res_ptr,res_ptr,8
183	bcnd	 ge0,size,Loope1b
184Lend1b:	subu.cio r6,r8,r10
185	subu.cio r7,r9,r12
186	st.d	 r6,res_ptr,0
187
188	bb0	 0,size,Lret1b
189/* Add last limb */
190	ld	 r10,s2_ptr,8
191	ld	 r8,s1_ptr,8
192	subu.cio r6,r8,r10
193	st	 r6,res_ptr,8
194
195Lret1b:	addu.ci r2,r0,r0		; return carry-out from most sign. limb
196	jmp.n	 r1
197	 xor	r2,r2,1
198
199; **  V2  **
200/* If we come here, the alignment of s1_ptr and res_ptr as well as the
201   alignment of s2_ptr and res_ptr differ.  Since there are only two ways
202   things can be aligned (that we care about) we now know that the alignment
203   of s1_ptr and s2_ptr are the same.  */
204
205L2:	cmp	 r12,size,1
206	bb1	 eq,r12,Ljone
207	bb0	 2,s1_ptr,L_v2		; branch if s1_ptr is aligned
208/* Add least significant limb separately to align res_ptr and s2_ptr */
209	ld	 r10,s1_ptr,0
210	addu	 s1_ptr,s1_ptr,4
211	ld	 r8,s2_ptr,0
212	addu	 s2_ptr,s2_ptr,4
213	subu	 size,size,1
214	subu.co	 r6,r10,r8
215	st	 r6,res_ptr,0
216	addu	 res_ptr,res_ptr,4
217
218L_v2:	subu	 size,size,8
219	bcnd	 lt0,size,Lfin2
220/* Add blocks of 8 limbs until less than 8 limbs remain */
221	align	 8
222Loop2:	subu	 size,size,8
223	ld.d	 r8,s1_ptr,0
224	ld.d	 r6,s2_ptr,0
225	subu.cio r8,r8,r6
226	st	 r8,res_ptr,0
227	subu.cio r9,r9,r7
228	st	 r9,res_ptr,4
229	ld.d	 r8,s1_ptr,8
230	ld.d	 r6,s2_ptr,8
231	subu.cio r8,r8,r6
232	st	 r8,res_ptr,8
233	subu.cio r9,r9,r7
234	st	 r9,res_ptr,12
235	ld.d	 r8,s1_ptr,16
236	ld.d	 r6,s2_ptr,16
237	subu.cio r8,r8,r6
238	st	 r8,res_ptr,16
239	subu.cio r9,r9,r7
240	st	 r9,res_ptr,20
241	ld.d	 r8,s1_ptr,24
242	ld.d	 r6,s2_ptr,24
243	subu.cio r8,r8,r6
244	st	 r8,res_ptr,24
245	subu.cio r9,r9,r7
246	st	 r9,res_ptr,28
247	addu	 s1_ptr,s1_ptr,32
248	addu	 s2_ptr,s2_ptr,32
249	addu	 res_ptr,res_ptr,32
250	bcnd	 ge0,size,Loop2
251
252Lfin2:	addu	 size,size,8-2
253	bcnd	 lt0,size,Lend2
254Loope2:	ld.d	 r8,s1_ptr,0
255	ld.d	 r6,s2_ptr,0
256	subu.cio r8,r8,r6
257	st	 r8,res_ptr,0
258	subu.cio r9,r9,r7
259	st	 r9,res_ptr,4
260	subu	 size,size,2
261	addu	 s1_ptr,s1_ptr,8
262	addu	 s2_ptr,s2_ptr,8
263	addu	 res_ptr,res_ptr,8
264	bcnd	 ge0,size,Loope2
265Lend2:	bb0	 0,size,Lret2
266/* Add last limb */
267Ljone:	ld	 r10,s1_ptr,0
268	ld	 r8,s2_ptr,0
269	subu.cio r6,r10,r8
270	st	 r6,res_ptr,0
271
272Lret2:	addu.ci r2,r0,r0		; return carry-out from most sign. limb
273	jmp.n	 r1
274	 xor	r2,r2,1
275