1 /* UltraSparc 64 mpn_divrem_1 -- mpn by limb division. 2 3 Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2001, 2003 Free Software 4 Foundation, Inc. 5 6 This file is part of the GNU MP Library. 7 8 The GNU MP Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MP Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 20 21 #include "gmp.h" 22 #include "gmp-impl.h" 23 #include "longlong.h" 24 25 #include "mpn/sparc64/sparc64.h" 26 27 28 /* 64-bit divisor 32-bit divisor 29 cycles/limb cycles/limb 30 (approx) (approx) 31 integer fraction integer fraction 32 Ultrasparc 2i: 160 160 122 96 33 */ 34 35 36 /* 32-bit divisors are treated in special case code. This requires 4 mulx 37 per limb instead of 8 in the general case. 38 39 For big endian systems we need HALF_ENDIAN_ADJ included in the src[i] 40 addressing, to get the two halves of each limb read in the correct order. 41 This is kept in an adj variable. Doing that measures about 4 c/l faster 42 than just writing HALF_ENDIAN_ADJ(i) in the integer loop. The latter 43 shouldn't be 6 cycles worth of work, but perhaps it doesn't schedule well 44 (on gcc 3.2.1 at least). The fraction loop doesn't seem affected, but we 45 still use a variable since that ought to work out best. */ 46 47 mp_limb_t 48 mpn_divrem_1 (mp_ptr qp_limbptr, mp_size_t xsize_limbs, 49 mp_srcptr ap_limbptr, mp_size_t size_limbs, mp_limb_t d_limb) 50 { 51 mp_size_t total_size_limbs; 52 mp_size_t i; 53 54 ASSERT (xsize_limbs >= 0); 55 ASSERT (size_limbs >= 0); 56 ASSERT (d_limb != 0); 57 /* FIXME: What's the correct overlap rule when xsize!=0? */ 58 ASSERT (MPN_SAME_OR_SEPARATE_P (qp_limbptr + xsize_limbs, 59 ap_limbptr, size_limbs)); 60 61 total_size_limbs = size_limbs + xsize_limbs; 62 if (UNLIKELY (total_size_limbs == 0)) 63 return 0; 64 65 /* udivx is good for total_size==1, and no need to bother checking 66 limb<divisor, since if that's likely the caller should check */ 67 if (UNLIKELY (total_size_limbs == 1)) 68 { 69 mp_limb_t a, q; 70 a = (LIKELY (size_limbs != 0) ? ap_limbptr[0] : 0); 71 q = a / d_limb; 72 qp_limbptr[0] = q; 73 return a - q*d_limb; 74 } 75 76 if (d_limb <= CNST_LIMB(0xFFFFFFFF)) 77 { 78 mp_size_t size, xsize, total_size, adj; 79 unsigned *qp, n1, n0, q, r, nshift, norm_rmask; 80 mp_limb_t dinv_limb; 81 const unsigned *ap; 82 int norm, norm_rshift; 83 84 size = 2 * size_limbs; 85 xsize = 2 * xsize_limbs; 86 total_size = size + xsize; 87 88 ap = (unsigned *) ap_limbptr; 89 qp = (unsigned *) qp_limbptr; 90 91 qp += xsize; 92 r = 0; /* initial remainder */ 93 94 if (LIKELY (size != 0)) 95 { 96 n1 = ap[size-1 + HALF_ENDIAN_ADJ(1)]; 97 98 /* If the length of the source is uniformly distributed, then 99 there's a 50% chance of the high 32-bits being zero, which we 100 can skip. */ 101 if (n1 == 0) 102 { 103 n1 = ap[size-2 + HALF_ENDIAN_ADJ(0)]; 104 total_size--; 105 size--; 106 ASSERT (size > 0); /* because always even */ 107 qp[size + HALF_ENDIAN_ADJ(1)] = 0; 108 } 109 110 /* Skip a division if high < divisor (high quotient 0). Testing 111 here before before normalizing will still skip as often as 112 possible. */ 113 if (n1 < d_limb) 114 { 115 r = n1; 116 size--; 117 qp[size + HALF_ENDIAN_ADJ(size)] = 0; 118 total_size--; 119 if (total_size == 0) 120 return r; 121 } 122 } 123 124 count_leading_zeros_32 (norm, d_limb); 125 norm -= 32; 126 d_limb <<= norm; 127 r <<= norm; 128 129 norm_rshift = 32 - norm; 130 norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF); 131 132 invert_half_limb (dinv_limb, d_limb); 133 134 if (LIKELY (size != 0)) 135 { 136 i = size - 1; 137 adj = HALF_ENDIAN_ADJ (i); 138 n1 = ap[i + adj]; 139 adj = -adj; 140 r |= ((n1 >> norm_rshift) & norm_rmask); 141 for ( ; i > 0; i--) 142 { 143 n0 = ap[i-1 + adj]; 144 adj = -adj; 145 nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask); 146 udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb); 147 qp[i + adj] = q; 148 n1 = n0; 149 } 150 nshift = n1 << norm; 151 udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb); 152 qp[0 + HALF_ENDIAN_ADJ(0)] = q; 153 } 154 qp -= xsize; 155 adj = HALF_ENDIAN_ADJ (0); 156 for (i = xsize-1; i >= 0; i--) 157 { 158 udiv_qrnnd_half_preinv (q, r, r, 0, d_limb, dinv_limb); 159 adj = -adj; 160 qp[i + adj] = q; 161 } 162 163 return r >> norm; 164 } 165 else 166 { 167 mp_srcptr ap; 168 mp_ptr qp; 169 mp_size_t size, xsize, total_size; 170 mp_limb_t d, n1, n0, q, r, dinv, nshift, norm_rmask; 171 int norm, norm_rshift; 172 173 ap = ap_limbptr; 174 qp = qp_limbptr; 175 size = size_limbs; 176 xsize = xsize_limbs; 177 total_size = total_size_limbs; 178 d = d_limb; 179 180 qp += total_size; /* above high limb */ 181 r = 0; /* initial remainder */ 182 183 if (LIKELY (size != 0)) 184 { 185 /* Skip a division if high < divisor (high quotient 0). Testing 186 here before before normalizing will still skip as often as 187 possible. */ 188 n1 = ap[size-1]; 189 if (n1 < d) 190 { 191 r = n1; 192 *--qp = 0; 193 total_size--; 194 if (total_size == 0) 195 return r; 196 size--; 197 } 198 } 199 200 count_leading_zeros (norm, d); 201 d <<= norm; 202 r <<= norm; 203 204 norm_rshift = GMP_LIMB_BITS - norm; 205 norm_rmask = (norm == 0 ? 0 : ~CNST_LIMB(0)); 206 207 invert_limb (dinv, d); 208 209 if (LIKELY (size != 0)) 210 { 211 n1 = ap[size-1]; 212 r |= ((n1 >> norm_rshift) & norm_rmask); 213 for (i = size-2; i >= 0; i--) 214 { 215 n0 = ap[i]; 216 nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask); 217 udiv_qrnnd_preinv (q, r, r, nshift, d, dinv); 218 *--qp = q; 219 n1 = n0; 220 } 221 nshift = n1 << norm; 222 udiv_qrnnd_preinv (q, r, r, nshift, d, dinv); 223 *--qp = q; 224 } 225 for (i = 0; i < xsize; i++) 226 { 227 udiv_qrnnd_preinv (q, r, r, CNST_LIMB(0), d, dinv); 228 *--qp = q; 229 } 230 return r >> norm; 231 } 232 } 233