1dnl SPARC v9 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and 2dnl store difference in a third limb vector. 3 4dnl Copyright 2001-2003, 2011 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7dnl 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of either: 10dnl 11dnl * the GNU Lesser General Public License as published by the Free 12dnl Software Foundation; either version 3 of the License, or (at your 13dnl option) any later version. 14dnl 15dnl or 16dnl 17dnl * the GNU General Public License as published by the Free Software 18dnl Foundation; either version 2 of the License, or (at your option) any 19dnl later version. 20dnl 21dnl or both in parallel, as here. 22dnl 23dnl The GNU MP Library is distributed in the hope that it will be useful, but 24dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26dnl for more details. 27dnl 28dnl You should have received copies of the GNU General Public License and the 29dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30dnl see https://www.gnu.org/licenses/. 31 32include(`../config.m4') 33 34C cycles/limb 35C UltraSPARC 1&2: 4 36C UltraSPARC 3: 4.5 37 38C Compute carry-out from the most significant bits of u,v, and r, where 39C r=u-v-carry_in, using logic operations. 40 41C This code runs at 4 cycles/limb on UltraSPARC 1 and 2. It has a 4 insn 42C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated. 43C Therefore, it seems futile to try to optimize this any further... 44 45C INPUT PARAMETERS 46define(`rp',`%i0') 47define(`up',`%i1') 48define(`vp',`%i2') 49define(`n',`%i3') 50 51define(`u0',`%l0') 52define(`u1',`%l2') 53define(`u2',`%l4') 54define(`u3',`%l6') 55define(`v0',`%l1') 56define(`v1',`%l3') 57define(`v2',`%l5') 58define(`v3',`%l7') 59 60define(`cy',`%i4') 61 62define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe 63define(`fmnop',`fmuld %f0,%f0,%f4') dnl A quasi nop running in the FM pipe 64 65ASM_START() 66 REGISTER(%g2,#scratch) 67 REGISTER(%g3,#scratch) 68PROLOGUE(mpn_sub_nc) 69 save %sp,-160,%sp 70 71 fitod %f0,%f0 C make sure f0 contains small, quiet number 72 subcc n,4,%g0 73 bl,pn %xcc,.Loop0 74 nop 75 b,a L(com) 76EPILOGUE() 77 78PROLOGUE(mpn_sub_n) 79 save %sp,-160,%sp 80 81 fitod %f0,%f0 C make sure f0 contains small, quiet number 82 subcc n,4,%g0 83 bl,pn %xcc,.Loop0 84 mov 0,cy 85L(com): 86 ldx [up+0],u0 87 ldx [vp+0],v0 88 add up,32,up 89 ldx [up-24],u1 90 ldx [vp+8],v1 91 add vp,32,vp 92 ldx [up-16],u2 93 ldx [vp-16],v2 94 ldx [up-8],u3 95 ldx [vp-8],v3 96 subcc n,8,n 97 sub u0,v0,%g1 C main sub 98 sub %g1,cy,%g5 C carry sub 99 orn u0,v0,%g2 100 bl,pn %xcc,.Lend4567 101 fanop 102 b,a .Loop 103 104 .align 16 105C START MAIN LOOP 106.Loop: orn %g5,%g2,%g2 107 andn u0,v0,%g3 108 ldx [up+0],u0 109 fanop 110C -- 111 andn %g2,%g3,%g2 112 ldx [vp+0],v0 113 add up,32,up 114 fanop 115C -- 116 srlx %g2,63,cy 117 sub u1,v1,%g1 118 stx %g5,[rp+0] 119 fanop 120C -- 121 sub %g1,cy,%g5 122 orn u1,v1,%g2 123 fmnop 124 fanop 125C -- 126 orn %g5,%g2,%g2 127 andn u1,v1,%g3 128 ldx [up-24],u1 129 fanop 130C -- 131 andn %g2,%g3,%g2 132 ldx [vp+8],v1 133 add vp,32,vp 134 fanop 135C -- 136 srlx %g2,63,cy 137 sub u2,v2,%g1 138 stx %g5,[rp+8] 139 fanop 140C -- 141 sub %g1,cy,%g5 142 orn u2,v2,%g2 143 fmnop 144 fanop 145C -- 146 orn %g5,%g2,%g2 147 andn u2,v2,%g3 148 ldx [up-16],u2 149 fanop 150C -- 151 andn %g2,%g3,%g2 152 ldx [vp-16],v2 153 add rp,32,rp 154 fanop 155C -- 156 srlx %g2,63,cy 157 sub u3,v3,%g1 158 stx %g5,[rp-16] 159 fanop 160C -- 161 sub %g1,cy,%g5 162 orn u3,v3,%g2 163 fmnop 164 fanop 165C -- 166 orn %g5,%g2,%g2 167 andn u3,v3,%g3 168 ldx [up-8],u3 169 fanop 170C -- 171 andn %g2,%g3,%g2 172 subcc n,4,n 173 ldx [vp-8],v3 174 fanop 175C -- 176 srlx %g2,63,cy 177 sub u0,v0,%g1 178 stx %g5,[rp-8] 179 fanop 180C -- 181 sub %g1,cy,%g5 182 orn u0,v0,%g2 183 bge,pt %xcc,.Loop 184 fanop 185C END MAIN LOOP 186.Lend4567: 187 orn %g5,%g2,%g2 188 andn u0,v0,%g3 189 andn %g2,%g3,%g2 190 srlx %g2,63,cy 191 sub u1,v1,%g1 192 stx %g5,[rp+0] 193 sub %g1,cy,%g5 194 orn u1,v1,%g2 195 orn %g5,%g2,%g2 196 andn u1,v1,%g3 197 andn %g2,%g3,%g2 198 srlx %g2,63,cy 199 sub u2,v2,%g1 200 stx %g5,[rp+8] 201 sub %g1,cy,%g5 202 orn u2,v2,%g2 203 orn %g5,%g2,%g2 204 andn u2,v2,%g3 205 andn %g2,%g3,%g2 206 add rp,32,rp 207 srlx %g2,63,cy 208 sub u3,v3,%g1 209 stx %g5,[rp-16] 210 sub %g1,cy,%g5 211 orn u3,v3,%g2 212 orn %g5,%g2,%g2 213 andn u3,v3,%g3 214 andn %g2,%g3,%g2 215 srlx %g2,63,cy 216 stx %g5,[rp-8] 217 218 addcc n,4,n 219 bz,pn %xcc,.Lret 220 fanop 221 222.Loop0: ldx [up],u0 223 add up,8,up 224 ldx [vp],v0 225 add vp,8,vp 226 add rp,8,rp 227 subcc n,1,n 228 sub u0,v0,%g1 229 orn u0,v0,%g2 230 sub %g1,cy,%g5 231 andn u0,v0,%g3 232 orn %g5,%g2,%g2 233 stx %g5,[rp-8] 234 andn %g2,%g3,%g2 235 bnz,pt %xcc,.Loop0 236 srlx %g2,63,cy 237 238.Lret: mov cy,%i0 239 ret 240 restore 241EPILOGUE(mpn_sub_n) 242