1 /* UltraSparc 64 mpn_divrem_1 -- mpn by limb division. 2 3 Copyright 1991, 1993, 1994, 1996, 1998-2001, 2003 Free Software Foundation, 4 Inc. 5 6 This file is part of the GNU MP Library. 7 8 The GNU MP Library is free software; you can redistribute it and/or modify 9 it under the terms of either: 10 11 * the GNU Lesser General Public License as published by the Free 12 Software Foundation; either version 3 of the License, or (at your 13 option) any later version. 14 15 or 16 17 * the GNU General Public License as published by the Free Software 18 Foundation; either version 2 of the License, or (at your option) any 19 later version. 20 21 or both in parallel, as here. 22 23 The GNU MP Library is distributed in the hope that it will be useful, but 24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 for more details. 27 28 You should have received copies of the GNU General Public License and the 29 GNU Lesser General Public License along with the GNU MP Library. If not, 30 see https://www.gnu.org/licenses/. */ 31 32 #include "gmp-impl.h" 33 #include "longlong.h" 34 35 #include "mpn/sparc64/sparc64.h" 36 37 38 /* 64-bit divisor 32-bit divisor 39 cycles/limb cycles/limb 40 (approx) (approx) 41 integer fraction integer fraction 42 Ultrasparc 2i: 160 160 122 96 43 */ 44 45 46 /* 32-bit divisors are treated in special case code. This requires 4 mulx 47 per limb instead of 8 in the general case. 48 49 For big endian systems we need HALF_ENDIAN_ADJ included in the src[i] 50 addressing, to get the two halves of each limb read in the correct order. 51 This is kept in an adj variable. Doing that measures about 4 c/l faster 52 than just writing HALF_ENDIAN_ADJ(i) in the integer loop. The latter 53 shouldn't be 6 cycles worth of work, but perhaps it doesn't schedule well 54 (on gcc 3.2.1 at least). The fraction loop doesn't seem affected, but we 55 still use a variable since that ought to work out best. */ 56 57 mp_limb_t 58 mpn_divrem_1 (mp_ptr qp_limbptr, mp_size_t xsize_limbs, 59 mp_srcptr ap_limbptr, mp_size_t size_limbs, mp_limb_t d_limb) 60 { 61 mp_size_t total_size_limbs; 62 mp_size_t i; 63 64 ASSERT (xsize_limbs >= 0); 65 ASSERT (size_limbs >= 0); 66 ASSERT (d_limb != 0); 67 /* FIXME: What's the correct overlap rule when xsize!=0? */ 68 ASSERT (MPN_SAME_OR_SEPARATE_P (qp_limbptr + xsize_limbs, 69 ap_limbptr, size_limbs)); 70 71 total_size_limbs = size_limbs + xsize_limbs; 72 if (UNLIKELY (total_size_limbs == 0)) 73 return 0; 74 75 /* udivx is good for total_size==1, and no need to bother checking 76 limb<divisor, since if that's likely the caller should check */ 77 if (UNLIKELY (total_size_limbs == 1)) 78 { 79 mp_limb_t a, q; 80 a = (LIKELY (size_limbs != 0) ? ap_limbptr[0] : 0); 81 q = a / d_limb; 82 qp_limbptr[0] = q; 83 return a - q*d_limb; 84 } 85 86 if (d_limb <= CNST_LIMB(0xFFFFFFFF)) 87 { 88 mp_size_t size, xsize, total_size, adj; 89 unsigned *qp, n1, n0, q, r, nshift, norm_rmask; 90 mp_limb_t dinv_limb; 91 const unsigned *ap; 92 int norm, norm_rshift; 93 94 size = 2 * size_limbs; 95 xsize = 2 * xsize_limbs; 96 total_size = size + xsize; 97 98 ap = (unsigned *) ap_limbptr; 99 qp = (unsigned *) qp_limbptr; 100 101 qp += xsize; 102 r = 0; /* initial remainder */ 103 104 if (LIKELY (size != 0)) 105 { 106 n1 = ap[size-1 + HALF_ENDIAN_ADJ(1)]; 107 108 /* If the length of the source is uniformly distributed, then 109 there's a 50% chance of the high 32-bits being zero, which we 110 can skip. */ 111 if (n1 == 0) 112 { 113 n1 = ap[size-2 + HALF_ENDIAN_ADJ(0)]; 114 total_size--; 115 size--; 116 ASSERT (size > 0); /* because always even */ 117 qp[size + HALF_ENDIAN_ADJ(1)] = 0; 118 } 119 120 /* Skip a division if high < divisor (high quotient 0). Testing 121 here before before normalizing will still skip as often as 122 possible. */ 123 if (n1 < d_limb) 124 { 125 r = n1; 126 size--; 127 qp[size + HALF_ENDIAN_ADJ(size)] = 0; 128 total_size--; 129 if (total_size == 0) 130 return r; 131 } 132 } 133 134 count_leading_zeros_32 (norm, d_limb); 135 norm -= 32; 136 d_limb <<= norm; 137 r <<= norm; 138 139 norm_rshift = 32 - norm; 140 norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF); 141 142 invert_half_limb (dinv_limb, d_limb); 143 144 if (LIKELY (size != 0)) 145 { 146 i = size - 1; 147 adj = HALF_ENDIAN_ADJ (i); 148 n1 = ap[i + adj]; 149 adj = -adj; 150 r |= ((n1 >> norm_rshift) & norm_rmask); 151 for ( ; i > 0; i--) 152 { 153 n0 = ap[i-1 + adj]; 154 adj = -adj; 155 nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask); 156 udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb); 157 qp[i + adj] = q; 158 n1 = n0; 159 } 160 nshift = n1 << norm; 161 udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb); 162 qp[0 + HALF_ENDIAN_ADJ(0)] = q; 163 } 164 qp -= xsize; 165 adj = HALF_ENDIAN_ADJ (0); 166 for (i = xsize-1; i >= 0; i--) 167 { 168 udiv_qrnnd_half_preinv (q, r, r, 0, d_limb, dinv_limb); 169 adj = -adj; 170 qp[i + adj] = q; 171 } 172 173 return r >> norm; 174 } 175 else 176 { 177 mp_srcptr ap; 178 mp_ptr qp; 179 mp_size_t size, xsize, total_size; 180 mp_limb_t d, n1, n0, q, r, dinv, nshift, norm_rmask; 181 int norm, norm_rshift; 182 183 ap = ap_limbptr; 184 qp = qp_limbptr; 185 size = size_limbs; 186 xsize = xsize_limbs; 187 total_size = total_size_limbs; 188 d = d_limb; 189 190 qp += total_size; /* above high limb */ 191 r = 0; /* initial remainder */ 192 193 if (LIKELY (size != 0)) 194 { 195 /* Skip a division if high < divisor (high quotient 0). Testing 196 here before before normalizing will still skip as often as 197 possible. */ 198 n1 = ap[size-1]; 199 if (n1 < d) 200 { 201 r = n1; 202 *--qp = 0; 203 total_size--; 204 if (total_size == 0) 205 return r; 206 size--; 207 } 208 } 209 210 count_leading_zeros (norm, d); 211 d <<= norm; 212 r <<= norm; 213 214 norm_rshift = GMP_LIMB_BITS - norm; 215 norm_rmask = (norm == 0 ? 0 : ~CNST_LIMB(0)); 216 217 invert_limb (dinv, d); 218 219 if (LIKELY (size != 0)) 220 { 221 n1 = ap[size-1]; 222 r |= ((n1 >> norm_rshift) & norm_rmask); 223 for (i = size-2; i >= 0; i--) 224 { 225 n0 = ap[i]; 226 nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask); 227 udiv_qrnnd_preinv (q, r, r, nshift, d, dinv); 228 *--qp = q; 229 n1 = n0; 230 } 231 nshift = n1 << norm; 232 udiv_qrnnd_preinv (q, r, r, nshift, d, dinv); 233 *--qp = q; 234 } 235 for (i = 0; i < xsize; i++) 236 { 237 udiv_qrnnd_preinv (q, r, r, CNST_LIMB(0), d, dinv); 238 *--qp = q; 239 } 240 return r >> norm; 241 } 242 } 243