1; mc88110 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and 2; store difference in a third limb vector. 3 4; Copyright 1995, 1996, 2000 Free Software Foundation, Inc. 5 6; This file is part of the GNU MP Library. 7 8; The GNU MP Library is free software; you can redistribute it and/or modify 9; it under the terms of the GNU Lesser General Public License as published by 10; the Free Software Foundation; either version 3 of the License, or (at your 11; option) any later version. 12 13; The GNU MP Library is distributed in the hope that it will be useful, but 14; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16; License for more details. 17 18; You should have received a copy of the GNU Lesser General Public License 19; along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21 22; INPUT PARAMETERS 23#define res_ptr r2 24#define s1_ptr r3 25#define s2_ptr r4 26#define size r5 27 28#include "sysdep.h" 29 30 text 31 align 16 32 global C_SYMBOL_NAME(__gmpn_sub_n) 33C_SYMBOL_NAME(__gmpn_sub_n): 34 subu.co r0,r0,r0 ; set cy flag 35 xor r12,s2_ptr,res_ptr 36 bb1 2,r12,L1 37; ** V1a ** 38L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned 39/* Add least significant limb separately to align res_ptr and s2_ptr */ 40 ld r10,s1_ptr,0 41 addu s1_ptr,s1_ptr,4 42 ld r8,s2_ptr,0 43 addu s2_ptr,s2_ptr,4 44 subu size,size,1 45 subu.co r6,r10,r8 46 st r6,res_ptr,0 47 addu res_ptr,res_ptr,4 48L_v1: cmp r12,size,2 49 bb1 lt,r12,Lend2 50 51 ld r10,s1_ptr,0 52 ld r12,s1_ptr,4 53 ld.d r8,s2_ptr,0 54 subu size,size,10 55 bcnd lt0,size,Lfin1 56/* Add blocks of 8 limbs until less than 8 limbs remain */ 57 align 8 58Loop1: subu size,size,8 59 subu.cio r6,r10,r8 60 ld r10,s1_ptr,8 61 subu.cio r7,r12,r9 62 ld r12,s1_ptr,12 63 ld.d r8,s2_ptr,8 64 st.d r6,res_ptr,0 65 subu.cio r6,r10,r8 66 ld r10,s1_ptr,16 67 subu.cio r7,r12,r9 68 ld r12,s1_ptr,20 69 ld.d r8,s2_ptr,16 70 st.d r6,res_ptr,8 71 subu.cio r6,r10,r8 72 ld r10,s1_ptr,24 73 subu.cio r7,r12,r9 74 ld r12,s1_ptr,28 75 ld.d r8,s2_ptr,24 76 st.d r6,res_ptr,16 77 subu.cio r6,r10,r8 78 ld r10,s1_ptr,32 79 subu.cio r7,r12,r9 80 ld r12,s1_ptr,36 81 addu s1_ptr,s1_ptr,32 82 ld.d r8,s2_ptr,32 83 addu s2_ptr,s2_ptr,32 84 st.d r6,res_ptr,24 85 addu res_ptr,res_ptr,32 86 bcnd ge0,size,Loop1 87 88Lfin1: addu size,size,8-2 89 bcnd lt0,size,Lend1 90/* Add blocks of 2 limbs until less than 2 limbs remain */ 91Loope1: subu.cio r6,r10,r8 92 ld r10,s1_ptr,8 93 subu.cio r7,r12,r9 94 ld r12,s1_ptr,12 95 ld.d r8,s2_ptr,8 96 st.d r6,res_ptr,0 97 subu size,size,2 98 addu s1_ptr,s1_ptr,8 99 addu s2_ptr,s2_ptr,8 100 addu res_ptr,res_ptr,8 101 bcnd ge0,size,Loope1 102Lend1: subu.cio r6,r10,r8 103 subu.cio r7,r12,r9 104 st.d r6,res_ptr,0 105 106 bb0 0,size,Lret1 107/* Add last limb */ 108 ld r10,s1_ptr,8 109 ld r8,s2_ptr,8 110 subu.cio r6,r10,r8 111 st r6,res_ptr,8 112 113Lret1: addu.ci r2,r0,r0 ; return carry-out from most sign. limb 114 jmp.n r1 115 xor r2,r2,1 116 117L1: xor r12,s1_ptr,res_ptr 118 bb1 2,r12,L2 119; ** V1b ** 120 bb0 2,res_ptr,L_v1b ; branch if res_ptr is aligned 121/* Add least significant limb separately to align res_ptr and s1_ptr */ 122 ld r10,s2_ptr,0 123 addu s2_ptr,s2_ptr,4 124 ld r8,s1_ptr,0 125 addu s1_ptr,s1_ptr,4 126 subu size,size,1 127 subu.co r6,r8,r10 128 st r6,res_ptr,0 129 addu res_ptr,res_ptr,4 130L_v1b: cmp r12,size,2 131 bb1 lt,r12,Lend2 132 133 ld r10,s2_ptr,0 134 ld r12,s2_ptr,4 135 ld.d r8,s1_ptr,0 136 subu size,size,10 137 bcnd lt0,size,Lfin1b 138/* Add blocks of 8 limbs until less than 8 limbs remain */ 139 align 8 140Loop1b: subu size,size,8 141 subu.cio r6,r8,r10 142 ld r10,s2_ptr,8 143 subu.cio r7,r9,r12 144 ld r12,s2_ptr,12 145 ld.d r8,s1_ptr,8 146 st.d r6,res_ptr,0 147 subu.cio r6,r8,r10 148 ld r10,s2_ptr,16 149 subu.cio r7,r9,r12 150 ld r12,s2_ptr,20 151 ld.d r8,s1_ptr,16 152 st.d r6,res_ptr,8 153 subu.cio r6,r8,r10 154 ld r10,s2_ptr,24 155 subu.cio r7,r9,r12 156 ld r12,s2_ptr,28 157 ld.d r8,s1_ptr,24 158 st.d r6,res_ptr,16 159 subu.cio r6,r8,r10 160 ld r10,s2_ptr,32 161 subu.cio r7,r9,r12 162 ld r12,s2_ptr,36 163 addu s2_ptr,s2_ptr,32 164 ld.d r8,s1_ptr,32 165 addu s1_ptr,s1_ptr,32 166 st.d r6,res_ptr,24 167 addu res_ptr,res_ptr,32 168 bcnd ge0,size,Loop1b 169 170Lfin1b: addu size,size,8-2 171 bcnd lt0,size,Lend1b 172/* Add blocks of 2 limbs until less than 2 limbs remain */ 173Loope1b:subu.cio r6,r8,r10 174 ld r10,s2_ptr,8 175 subu.cio r7,r9,r12 176 ld r12,s2_ptr,12 177 ld.d r8,s1_ptr,8 178 st.d r6,res_ptr,0 179 subu size,size,2 180 addu s1_ptr,s1_ptr,8 181 addu s2_ptr,s2_ptr,8 182 addu res_ptr,res_ptr,8 183 bcnd ge0,size,Loope1b 184Lend1b: subu.cio r6,r8,r10 185 subu.cio r7,r9,r12 186 st.d r6,res_ptr,0 187 188 bb0 0,size,Lret1b 189/* Add last limb */ 190 ld r10,s2_ptr,8 191 ld r8,s1_ptr,8 192 subu.cio r6,r8,r10 193 st r6,res_ptr,8 194 195Lret1b: addu.ci r2,r0,r0 ; return carry-out from most sign. limb 196 jmp.n r1 197 xor r2,r2,1 198 199; ** V2 ** 200/* If we come here, the alignment of s1_ptr and res_ptr as well as the 201 alignment of s2_ptr and res_ptr differ. Since there are only two ways 202 things can be aligned (that we care about) we now know that the alignment 203 of s1_ptr and s2_ptr are the same. */ 204 205L2: cmp r12,size,1 206 bb1 eq,r12,Ljone 207 bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned 208/* Add least significant limb separately to align res_ptr and s2_ptr */ 209 ld r10,s1_ptr,0 210 addu s1_ptr,s1_ptr,4 211 ld r8,s2_ptr,0 212 addu s2_ptr,s2_ptr,4 213 subu size,size,1 214 subu.co r6,r10,r8 215 st r6,res_ptr,0 216 addu res_ptr,res_ptr,4 217 218L_v2: subu size,size,8 219 bcnd lt0,size,Lfin2 220/* Add blocks of 8 limbs until less than 8 limbs remain */ 221 align 8 222Loop2: subu size,size,8 223 ld.d r8,s1_ptr,0 224 ld.d r6,s2_ptr,0 225 subu.cio r8,r8,r6 226 st r8,res_ptr,0 227 subu.cio r9,r9,r7 228 st r9,res_ptr,4 229 ld.d r8,s1_ptr,8 230 ld.d r6,s2_ptr,8 231 subu.cio r8,r8,r6 232 st r8,res_ptr,8 233 subu.cio r9,r9,r7 234 st r9,res_ptr,12 235 ld.d r8,s1_ptr,16 236 ld.d r6,s2_ptr,16 237 subu.cio r8,r8,r6 238 st r8,res_ptr,16 239 subu.cio r9,r9,r7 240 st r9,res_ptr,20 241 ld.d r8,s1_ptr,24 242 ld.d r6,s2_ptr,24 243 subu.cio r8,r8,r6 244 st r8,res_ptr,24 245 subu.cio r9,r9,r7 246 st r9,res_ptr,28 247 addu s1_ptr,s1_ptr,32 248 addu s2_ptr,s2_ptr,32 249 addu res_ptr,res_ptr,32 250 bcnd ge0,size,Loop2 251 252Lfin2: addu size,size,8-2 253 bcnd lt0,size,Lend2 254Loope2: ld.d r8,s1_ptr,0 255 ld.d r6,s2_ptr,0 256 subu.cio r8,r8,r6 257 st r8,res_ptr,0 258 subu.cio r9,r9,r7 259 st r9,res_ptr,4 260 subu size,size,2 261 addu s1_ptr,s1_ptr,8 262 addu s2_ptr,s2_ptr,8 263 addu res_ptr,res_ptr,8 264 bcnd ge0,size,Loope2 265Lend2: bb0 0,size,Lret2 266/* Add last limb */ 267Ljone: ld r10,s1_ptr,0 268 ld r8,s2_ptr,0 269 subu.cio r6,r10,r8 270 st r6,res_ptr,0 271 272Lret2: addu.ci r2,r0,r0 ; return carry-out from most sign. limb 273 jmp.n r1 274 xor r2,r2,1 275