xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/m88k/mc88110/add_n.S (revision d909946ca08dceb44d7d0f22ec9488679695d976)
1; mc88110 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
2; sum in a third limb vector.
3
4; Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
5
6; This file is part of the GNU MP Library.
7
8; The GNU MP Library is free software; you can redistribute it and/or modify
9; it under the terms of the GNU Lesser General Public License as published by
10; the Free Software Foundation; either version 3 of the License, or (at your
11; option) any later version.
12
13; The GNU MP Library is distributed in the hope that it will be useful, but
14; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16; License for more details.
17
18; You should have received a copy of the GNU Lesser General Public License
19; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21
22; INPUT PARAMETERS
23#define res_ptr	r2
24#define s1_ptr	r3
25#define s2_ptr	r4
26#define size	r5
27
28#include "sysdep.h"
29
30	text
31	align	16
32	global	C_SYMBOL_NAME(__gmpn_add_n)
33C_SYMBOL_NAME(__gmpn_add_n):
34	addu.co	 r0,r0,r0		; clear cy flag
35	xor	 r12,s2_ptr,res_ptr
36	bb1	 2,r12,L1
37; **  V1a  **
38L0:	bb0	 2,res_ptr,L_v1		; branch if res_ptr is aligned?
39/* Add least significant limb separately to align res_ptr and s2_ptr */
40	ld	 r10,s1_ptr,0
41	addu	 s1_ptr,s1_ptr,4
42	ld	 r8,s2_ptr,0
43	addu	 s2_ptr,s2_ptr,4
44	subu	 size,size,1
45	addu.co	 r6,r10,r8
46	st	 r6,res_ptr,0
47	addu	 res_ptr,res_ptr,4
48L_v1:	cmp	 r12,size,2
49	bb1	 lt,r12,Lend2
50
51	ld	 r10,s1_ptr,0
52	ld	 r12,s1_ptr,4
53	ld.d	 r8,s2_ptr,0
54	subu	 size,size,10
55	bcnd	 lt0,size,Lfin1
56/* Add blocks of 8 limbs until less than 8 limbs remain */
57	align	 8
58Loop1:	subu	 size,size,8
59	addu.cio r6,r10,r8
60	ld	 r10,s1_ptr,8
61	addu.cio r7,r12,r9
62	ld	 r12,s1_ptr,12
63	ld.d	 r8,s2_ptr,8
64	st.d	 r6,res_ptr,0
65	addu.cio r6,r10,r8
66	ld	 r10,s1_ptr,16
67	addu.cio r7,r12,r9
68	ld	 r12,s1_ptr,20
69	ld.d	 r8,s2_ptr,16
70	st.d	 r6,res_ptr,8
71	addu.cio r6,r10,r8
72	ld	 r10,s1_ptr,24
73	addu.cio r7,r12,r9
74	ld	 r12,s1_ptr,28
75	ld.d	 r8,s2_ptr,24
76	st.d	 r6,res_ptr,16
77	addu.cio r6,r10,r8
78	ld	 r10,s1_ptr,32
79	addu.cio r7,r12,r9
80	ld	 r12,s1_ptr,36
81	addu	 s1_ptr,s1_ptr,32
82	ld.d	 r8,s2_ptr,32
83	addu	 s2_ptr,s2_ptr,32
84	st.d	 r6,res_ptr,24
85	addu	 res_ptr,res_ptr,32
86	bcnd	 ge0,size,Loop1
87
88Lfin1:	addu	 size,size,8-2
89	bcnd	 lt0,size,Lend1
90/* Add blocks of 2 limbs until less than 2 limbs remain */
91Loope1:	addu.cio r6,r10,r8
92	ld	 r10,s1_ptr,8
93	addu.cio r7,r12,r9
94	ld	 r12,s1_ptr,12
95	ld.d	 r8,s2_ptr,8
96	st.d	 r6,res_ptr,0
97	subu	 size,size,2
98	addu	 s1_ptr,s1_ptr,8
99	addu	 s2_ptr,s2_ptr,8
100	addu	 res_ptr,res_ptr,8
101	bcnd	 ge0,size,Loope1
102Lend1:	addu.cio r6,r10,r8
103	addu.cio r7,r12,r9
104	st.d	 r6,res_ptr,0
105
106	bb0	 0,size,Lret1
107/* Add last limb */
108	ld	 r10,s1_ptr,8
109	ld	 r8,s2_ptr,8
110	addu.cio r6,r10,r8
111	st	 r6,res_ptr,8
112
113Lret1:	jmp.n	 r1
114	addu.ci	 r2,r0,r0		; return carry-out from most sign. limb
115
116L1:	xor	 r12,s1_ptr,res_ptr
117	bb1	 2,r12,L2
118; **  V1b  **
119	or	 r12,r0,s2_ptr
120	or	 s2_ptr,r0,s1_ptr
121	or	 s1_ptr,r0,r12
122	br	 L0
123
124; **  V2  **
125/* If we come here, the alignment of s1_ptr and res_ptr as well as the
126   alignment of s2_ptr and res_ptr differ.  Since there are only two ways
127   things can be aligned (that we care about) we now know that the alignment
128   of s1_ptr and s2_ptr are the same.  */
129
130L2:	cmp	 r12,size,1
131	bb1	 eq,r12,Ljone
132	bb0	 2,s1_ptr,L_v2		; branch if s1_ptr is aligned
133/* Add least significant limb separately to align res_ptr and s2_ptr */
134	ld	 r10,s1_ptr,0
135	addu	 s1_ptr,s1_ptr,4
136	ld	 r8,s2_ptr,0
137	addu	 s2_ptr,s2_ptr,4
138	subu	 size,size,1
139	addu.co	 r6,r10,r8
140	st	 r6,res_ptr,0
141	addu	 res_ptr,res_ptr,4
142
143L_v2:	subu	 size,size,8
144	bcnd	 lt0,size,Lfin2
145/* Add blocks of 8 limbs until less than 8 limbs remain */
146	align	 8
147Loop2:	subu	 size,size,8
148	ld.d	 r8,s1_ptr,0
149	ld.d	 r6,s2_ptr,0
150	addu.cio r8,r8,r6
151	st	 r8,res_ptr,0
152	addu.cio r9,r9,r7
153	st	 r9,res_ptr,4
154	ld.d	 r8,s1_ptr,8
155	ld.d	 r6,s2_ptr,8
156	addu.cio r8,r8,r6
157	st	 r8,res_ptr,8
158	addu.cio r9,r9,r7
159	st	 r9,res_ptr,12
160	ld.d	 r8,s1_ptr,16
161	ld.d	 r6,s2_ptr,16
162	addu.cio r8,r8,r6
163	st	 r8,res_ptr,16
164	addu.cio r9,r9,r7
165	st	 r9,res_ptr,20
166	ld.d	 r8,s1_ptr,24
167	ld.d	 r6,s2_ptr,24
168	addu.cio r8,r8,r6
169	st	 r8,res_ptr,24
170	addu.cio r9,r9,r7
171	st	 r9,res_ptr,28
172	addu	 s1_ptr,s1_ptr,32
173	addu	 s2_ptr,s2_ptr,32
174	addu	 res_ptr,res_ptr,32
175	bcnd	 ge0,size,Loop2
176
177Lfin2:	addu	 size,size,8-2
178	bcnd	 lt0,size,Lend2
179Loope2:	ld.d	 r8,s1_ptr,0
180	ld.d	 r6,s2_ptr,0
181	addu.cio r8,r8,r6
182	st	 r8,res_ptr,0
183	addu.cio r9,r9,r7
184	st	 r9,res_ptr,4
185	subu	 size,size,2
186	addu	 s1_ptr,s1_ptr,8
187	addu	 s2_ptr,s2_ptr,8
188	addu	 res_ptr,res_ptr,8
189	bcnd	 ge0,size,Loope2
190Lend2:	bb0	 0,size,Lret2
191/* Add last limb */
192Ljone:	ld	 r10,s1_ptr,0
193	ld	 r8,s2_ptr,0
194	addu.cio r6,r10,r8
195	st	 r6,res_ptr,0
196
197Lret2:	jmp.n	 r1
198	addu.ci	 r2,r0,r0		; return carry-out from most sign. limb
199