xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/m88k/mc88110/add_n.S (revision d90047b5d07facf36e6c01dcc0bded8997ce9cc2)
1; mc88110 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
2; sum in a third limb vector.
3
4; Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
5
6;  This file is part of the GNU MP Library.
7;
8;  The GNU MP Library is free software; you can redistribute it and/or modify
9;  it under the terms of either:
10;
11;    * the GNU Lesser General Public License as published by the Free
12;      Software Foundation; either version 3 of the License, or (at your
13;      option) any later version.
14;
15;  or
16;
17;    * the GNU General Public License as published by the Free Software
18;      Foundation; either version 2 of the License, or (at your option) any
19;      later version.
20;
21;  or both in parallel, as here.
22;
23;  The GNU MP Library is distributed in the hope that it will be useful, but
24;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26;  for more details.
27;
28;  You should have received copies of the GNU General Public License and the
29;  GNU Lesser General Public License along with the GNU MP Library.  If not,
30;  see https://www.gnu.org/licenses/.
31
32
33; INPUT PARAMETERS
34#define res_ptr	r2
35#define s1_ptr	r3
36#define s2_ptr	r4
37#define size	r5
38
39#include "sysdep.h"
40
41	text
42	align	16
43	global	C_SYMBOL_NAME(__gmpn_add_n)
44C_SYMBOL_NAME(__gmpn_add_n):
45	addu.co	 r0,r0,r0		; clear cy flag
46	xor	 r12,s2_ptr,res_ptr
47	bb1	 2,r12,L1
48; **  V1a  **
49L0:	bb0	 2,res_ptr,L_v1		; branch if res_ptr is aligned?
50/* Add least significant limb separately to align res_ptr and s2_ptr */
51	ld	 r10,s1_ptr,0
52	addu	 s1_ptr,s1_ptr,4
53	ld	 r8,s2_ptr,0
54	addu	 s2_ptr,s2_ptr,4
55	subu	 size,size,1
56	addu.co	 r6,r10,r8
57	st	 r6,res_ptr,0
58	addu	 res_ptr,res_ptr,4
59L_v1:	cmp	 r12,size,2
60	bb1	 lt,r12,Lend2
61
62	ld	 r10,s1_ptr,0
63	ld	 r12,s1_ptr,4
64	ld.d	 r8,s2_ptr,0
65	subu	 size,size,10
66	bcnd	 lt0,size,Lfin1
67/* Add blocks of 8 limbs until less than 8 limbs remain */
68	align	 8
69Loop1:	subu	 size,size,8
70	addu.cio r6,r10,r8
71	ld	 r10,s1_ptr,8
72	addu.cio r7,r12,r9
73	ld	 r12,s1_ptr,12
74	ld.d	 r8,s2_ptr,8
75	st.d	 r6,res_ptr,0
76	addu.cio r6,r10,r8
77	ld	 r10,s1_ptr,16
78	addu.cio r7,r12,r9
79	ld	 r12,s1_ptr,20
80	ld.d	 r8,s2_ptr,16
81	st.d	 r6,res_ptr,8
82	addu.cio r6,r10,r8
83	ld	 r10,s1_ptr,24
84	addu.cio r7,r12,r9
85	ld	 r12,s1_ptr,28
86	ld.d	 r8,s2_ptr,24
87	st.d	 r6,res_ptr,16
88	addu.cio r6,r10,r8
89	ld	 r10,s1_ptr,32
90	addu.cio r7,r12,r9
91	ld	 r12,s1_ptr,36
92	addu	 s1_ptr,s1_ptr,32
93	ld.d	 r8,s2_ptr,32
94	addu	 s2_ptr,s2_ptr,32
95	st.d	 r6,res_ptr,24
96	addu	 res_ptr,res_ptr,32
97	bcnd	 ge0,size,Loop1
98
99Lfin1:	addu	 size,size,8-2
100	bcnd	 lt0,size,Lend1
101/* Add blocks of 2 limbs until less than 2 limbs remain */
102Loope1:	addu.cio r6,r10,r8
103	ld	 r10,s1_ptr,8
104	addu.cio r7,r12,r9
105	ld	 r12,s1_ptr,12
106	ld.d	 r8,s2_ptr,8
107	st.d	 r6,res_ptr,0
108	subu	 size,size,2
109	addu	 s1_ptr,s1_ptr,8
110	addu	 s2_ptr,s2_ptr,8
111	addu	 res_ptr,res_ptr,8
112	bcnd	 ge0,size,Loope1
113Lend1:	addu.cio r6,r10,r8
114	addu.cio r7,r12,r9
115	st.d	 r6,res_ptr,0
116
117	bb0	 0,size,Lret1
118/* Add last limb */
119	ld	 r10,s1_ptr,8
120	ld	 r8,s2_ptr,8
121	addu.cio r6,r10,r8
122	st	 r6,res_ptr,8
123
124Lret1:	jmp.n	 r1
125	addu.ci	 r2,r0,r0		; return carry-out from most sign. limb
126
127L1:	xor	 r12,s1_ptr,res_ptr
128	bb1	 2,r12,L2
129; **  V1b  **
130	or	 r12,r0,s2_ptr
131	or	 s2_ptr,r0,s1_ptr
132	or	 s1_ptr,r0,r12
133	br	 L0
134
135; **  V2  **
136/* If we come here, the alignment of s1_ptr and res_ptr as well as the
137   alignment of s2_ptr and res_ptr differ.  Since there are only two ways
138   things can be aligned (that we care about) we now know that the alignment
139   of s1_ptr and s2_ptr are the same.  */
140
141L2:	cmp	 r12,size,1
142	bb1	 eq,r12,Ljone
143	bb0	 2,s1_ptr,L_v2		; branch if s1_ptr is aligned
144/* Add least significant limb separately to align res_ptr and s2_ptr */
145	ld	 r10,s1_ptr,0
146	addu	 s1_ptr,s1_ptr,4
147	ld	 r8,s2_ptr,0
148	addu	 s2_ptr,s2_ptr,4
149	subu	 size,size,1
150	addu.co	 r6,r10,r8
151	st	 r6,res_ptr,0
152	addu	 res_ptr,res_ptr,4
153
154L_v2:	subu	 size,size,8
155	bcnd	 lt0,size,Lfin2
156/* Add blocks of 8 limbs until less than 8 limbs remain */
157	align	 8
158Loop2:	subu	 size,size,8
159	ld.d	 r8,s1_ptr,0
160	ld.d	 r6,s2_ptr,0
161	addu.cio r8,r8,r6
162	st	 r8,res_ptr,0
163	addu.cio r9,r9,r7
164	st	 r9,res_ptr,4
165	ld.d	 r8,s1_ptr,8
166	ld.d	 r6,s2_ptr,8
167	addu.cio r8,r8,r6
168	st	 r8,res_ptr,8
169	addu.cio r9,r9,r7
170	st	 r9,res_ptr,12
171	ld.d	 r8,s1_ptr,16
172	ld.d	 r6,s2_ptr,16
173	addu.cio r8,r8,r6
174	st	 r8,res_ptr,16
175	addu.cio r9,r9,r7
176	st	 r9,res_ptr,20
177	ld.d	 r8,s1_ptr,24
178	ld.d	 r6,s2_ptr,24
179	addu.cio r8,r8,r6
180	st	 r8,res_ptr,24
181	addu.cio r9,r9,r7
182	st	 r9,res_ptr,28
183	addu	 s1_ptr,s1_ptr,32
184	addu	 s2_ptr,s2_ptr,32
185	addu	 res_ptr,res_ptr,32
186	bcnd	 ge0,size,Loop2
187
188Lfin2:	addu	 size,size,8-2
189	bcnd	 lt0,size,Lend2
190Loope2:	ld.d	 r8,s1_ptr,0
191	ld.d	 r6,s2_ptr,0
192	addu.cio r8,r8,r6
193	st	 r8,res_ptr,0
194	addu.cio r9,r9,r7
195	st	 r9,res_ptr,4
196	subu	 size,size,2
197	addu	 s1_ptr,s1_ptr,8
198	addu	 s2_ptr,s2_ptr,8
199	addu	 res_ptr,res_ptr,8
200	bcnd	 ge0,size,Loope2
201Lend2:	bb0	 0,size,Lret2
202/* Add last limb */
203Ljone:	ld	 r10,s1_ptr,0
204	ld	 r8,s2_ptr,0
205	addu.cio r6,r10,r8
206	st	 r6,res_ptr,0
207
208Lret2:	jmp.n	 r1
209	addu.ci	 r2,r0,r0		; return carry-out from most sign. limb
210