1; mc88110 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store 2; sum in a third limb vector. 3 4; Copyright 1995, 1996, 2000 Free Software Foundation, Inc. 5 6; This file is part of the GNU MP Library. 7 8; The GNU MP Library is free software; you can redistribute it and/or modify 9; it under the terms of the GNU Lesser General Public License as published by 10; the Free Software Foundation; either version 3 of the License, or (at your 11; option) any later version. 12 13; The GNU MP Library is distributed in the hope that it will be useful, but 14; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16; License for more details. 17 18; You should have received a copy of the GNU Lesser General Public License 19; along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21 22; INPUT PARAMETERS 23#define res_ptr r2 24#define s1_ptr r3 25#define s2_ptr r4 26#define size r5 27 28#include "sysdep.h" 29 30 text 31 align 16 32 global C_SYMBOL_NAME(__gmpn_add_n) 33C_SYMBOL_NAME(__gmpn_add_n): 34 addu.co r0,r0,r0 ; clear cy flag 35 xor r12,s2_ptr,res_ptr 36 bb1 2,r12,L1 37; ** V1a ** 38L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned? 39/* Add least significant limb separately to align res_ptr and s2_ptr */ 40 ld r10,s1_ptr,0 41 addu s1_ptr,s1_ptr,4 42 ld r8,s2_ptr,0 43 addu s2_ptr,s2_ptr,4 44 subu size,size,1 45 addu.co r6,r10,r8 46 st r6,res_ptr,0 47 addu res_ptr,res_ptr,4 48L_v1: cmp r12,size,2 49 bb1 lt,r12,Lend2 50 51 ld r10,s1_ptr,0 52 ld r12,s1_ptr,4 53 ld.d r8,s2_ptr,0 54 subu size,size,10 55 bcnd lt0,size,Lfin1 56/* Add blocks of 8 limbs until less than 8 limbs remain */ 57 align 8 58Loop1: subu size,size,8 59 addu.cio r6,r10,r8 60 ld r10,s1_ptr,8 61 addu.cio r7,r12,r9 62 ld r12,s1_ptr,12 63 ld.d r8,s2_ptr,8 64 st.d r6,res_ptr,0 65 addu.cio r6,r10,r8 66 ld r10,s1_ptr,16 67 addu.cio r7,r12,r9 68 ld r12,s1_ptr,20 69 ld.d r8,s2_ptr,16 70 st.d r6,res_ptr,8 71 addu.cio r6,r10,r8 72 ld r10,s1_ptr,24 73 addu.cio r7,r12,r9 74 ld r12,s1_ptr,28 75 ld.d r8,s2_ptr,24 76 st.d r6,res_ptr,16 77 addu.cio r6,r10,r8 78 ld r10,s1_ptr,32 79 addu.cio r7,r12,r9 80 ld r12,s1_ptr,36 81 addu s1_ptr,s1_ptr,32 82 ld.d r8,s2_ptr,32 83 addu s2_ptr,s2_ptr,32 84 st.d r6,res_ptr,24 85 addu res_ptr,res_ptr,32 86 bcnd ge0,size,Loop1 87 88Lfin1: addu size,size,8-2 89 bcnd lt0,size,Lend1 90/* Add blocks of 2 limbs until less than 2 limbs remain */ 91Loope1: addu.cio r6,r10,r8 92 ld r10,s1_ptr,8 93 addu.cio r7,r12,r9 94 ld r12,s1_ptr,12 95 ld.d r8,s2_ptr,8 96 st.d r6,res_ptr,0 97 subu size,size,2 98 addu s1_ptr,s1_ptr,8 99 addu s2_ptr,s2_ptr,8 100 addu res_ptr,res_ptr,8 101 bcnd ge0,size,Loope1 102Lend1: addu.cio r6,r10,r8 103 addu.cio r7,r12,r9 104 st.d r6,res_ptr,0 105 106 bb0 0,size,Lret1 107/* Add last limb */ 108 ld r10,s1_ptr,8 109 ld r8,s2_ptr,8 110 addu.cio r6,r10,r8 111 st r6,res_ptr,8 112 113Lret1: jmp.n r1 114 addu.ci r2,r0,r0 ; return carry-out from most sign. limb 115 116L1: xor r12,s1_ptr,res_ptr 117 bb1 2,r12,L2 118; ** V1b ** 119 or r12,r0,s2_ptr 120 or s2_ptr,r0,s1_ptr 121 or s1_ptr,r0,r12 122 br L0 123 124; ** V2 ** 125/* If we come here, the alignment of s1_ptr and res_ptr as well as the 126 alignment of s2_ptr and res_ptr differ. Since there are only two ways 127 things can be aligned (that we care about) we now know that the alignment 128 of s1_ptr and s2_ptr are the same. */ 129 130L2: cmp r12,size,1 131 bb1 eq,r12,Ljone 132 bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned 133/* Add least significant limb separately to align res_ptr and s2_ptr */ 134 ld r10,s1_ptr,0 135 addu s1_ptr,s1_ptr,4 136 ld r8,s2_ptr,0 137 addu s2_ptr,s2_ptr,4 138 subu size,size,1 139 addu.co r6,r10,r8 140 st r6,res_ptr,0 141 addu res_ptr,res_ptr,4 142 143L_v2: subu size,size,8 144 bcnd lt0,size,Lfin2 145/* Add blocks of 8 limbs until less than 8 limbs remain */ 146 align 8 147Loop2: subu size,size,8 148 ld.d r8,s1_ptr,0 149 ld.d r6,s2_ptr,0 150 addu.cio r8,r8,r6 151 st r8,res_ptr,0 152 addu.cio r9,r9,r7 153 st r9,res_ptr,4 154 ld.d r8,s1_ptr,8 155 ld.d r6,s2_ptr,8 156 addu.cio r8,r8,r6 157 st r8,res_ptr,8 158 addu.cio r9,r9,r7 159 st r9,res_ptr,12 160 ld.d r8,s1_ptr,16 161 ld.d r6,s2_ptr,16 162 addu.cio r8,r8,r6 163 st r8,res_ptr,16 164 addu.cio r9,r9,r7 165 st r9,res_ptr,20 166 ld.d r8,s1_ptr,24 167 ld.d r6,s2_ptr,24 168 addu.cio r8,r8,r6 169 st r8,res_ptr,24 170 addu.cio r9,r9,r7 171 st r9,res_ptr,28 172 addu s1_ptr,s1_ptr,32 173 addu s2_ptr,s2_ptr,32 174 addu res_ptr,res_ptr,32 175 bcnd ge0,size,Loop2 176 177Lfin2: addu size,size,8-2 178 bcnd lt0,size,Lend2 179Loope2: ld.d r8,s1_ptr,0 180 ld.d r6,s2_ptr,0 181 addu.cio r8,r8,r6 182 st r8,res_ptr,0 183 addu.cio r9,r9,r7 184 st r9,res_ptr,4 185 subu size,size,2 186 addu s1_ptr,s1_ptr,8 187 addu s2_ptr,s2_ptr,8 188 addu res_ptr,res_ptr,8 189 bcnd ge0,size,Loope2 190Lend2: bb0 0,size,Lret2 191/* Add last limb */ 192Ljone: ld r10,s1_ptr,0 193 ld r8,s2_ptr,0 194 addu.cio r6,r10,r8 195 st r6,res_ptr,0 196 197Lret2: jmp.n r1 198 addu.ci r2,r0,r0 ; return carry-out from most sign. limb 199