1 /* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and 2 store the truncated integer part at rootp and the remainder at remp. 3 4 Contributed by Paul Zimmermann (algorithm) and 5 Paul Zimmermann and Torbjorn Granlund (implementation). 6 7 THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES. IT'S 8 ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT'S ALMOST 9 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 10 11 Copyright 2002, 2005, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. 12 13 This file is part of the GNU MP Library. 14 15 The GNU MP Library is free software; you can redistribute it and/or modify 16 it under the terms of the GNU Lesser General Public License as published by 17 the Free Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 The GNU MP Library is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 22 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 23 License for more details. 24 25 You should have received a copy of the GNU Lesser General Public License 26 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 27 28 /* FIXME: 29 This implementation is not optimal when remp == NULL, since the complexity 30 is M(n), whereas it should be M(n/k) on average. 31 */ 32 33 #include <stdio.h> /* for NULL */ 34 35 #include "gmp.h" 36 #include "gmp-impl.h" 37 #include "longlong.h" 38 39 static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, 40 mp_limb_t, int); 41 42 #define MPN_RSHIFT(cy,rp,up,un,cnt) \ 43 do { \ 44 if ((cnt) != 0) \ 45 cy = mpn_rshift (rp, up, un, cnt); \ 46 else \ 47 { \ 48 MPN_COPY_INCR (rp, up, un); \ 49 cy = 0; \ 50 } \ 51 } while (0) 52 53 #define MPN_LSHIFT(cy,rp,up,un,cnt) \ 54 do { \ 55 if ((cnt) != 0) \ 56 cy = mpn_lshift (rp, up, un, cnt); \ 57 else \ 58 { \ 59 MPN_COPY_DECR (rp, up, un); \ 60 cy = 0; \ 61 } \ 62 } while (0) 63 64 65 /* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero. 66 If remp <> NULL, put in {remp, un} the remainder. 67 Return the size (in limbs) of the remainder if remp <> NULL, 68 or a non-zero value iff the remainder is non-zero when remp = NULL. 69 Assumes: 70 (a) up[un-1] is not zero 71 (b) rootp has at least space for ceil(un/k) limbs 72 (c) remp has at least space for un limbs (in case remp <> NULL) 73 (d) the operands do not overlap. 74 75 The auxiliary memory usage is 3*un+2 if remp = NULL, 76 and 2*un+2 if remp <> NULL. FIXME: This is an incorrect comment. 77 */ 78 mp_size_t 79 mpn_rootrem (mp_ptr rootp, mp_ptr remp, 80 mp_srcptr up, mp_size_t un, mp_limb_t k) 81 { 82 mp_size_t m; 83 ASSERT (un > 0); 84 ASSERT (up[un - 1] != 0); 85 ASSERT (k > 1); 86 87 m = (un - 1) / k; /* ceil(un/k) - 1 */ 88 if (remp == NULL && m > 2) 89 /* Pad {up,un} with k zero limbs. This will produce an approximate root 90 with one more limb, allowing us to compute the exact integral result. */ 91 { 92 mp_ptr sp, wp; 93 mp_size_t rn, sn, wn; 94 TMP_DECL; 95 TMP_MARK; 96 wn = un + k; 97 wp = TMP_ALLOC_LIMBS (wn); /* will contain the padded input */ 98 sn = m + 2; /* ceil(un/k) + 1 */ 99 sp = TMP_ALLOC_LIMBS (sn); /* approximate root of padded input */ 100 MPN_COPY (wp + k, up, un); 101 MPN_ZERO (wp, k); 102 rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1); 103 /* The approximate root S = {sp,sn} is either the correct root of 104 {sp,sn}, or 1 too large. Thus unless the least significant limb of 105 S is 0 or 1, we can deduce the root of {up,un} is S truncated by one 106 limb. (In case sp[0]=1, we can deduce the root, but not decide 107 whether it is exact or not.) */ 108 MPN_COPY (rootp, sp + 1, sn - 1); 109 TMP_FREE; 110 return rn; 111 } 112 else 113 { 114 return mpn_rootrem_internal (rootp, remp, up, un, k, 0); 115 } 116 } 117 118 /* if approx is non-zero, does not compute the final remainder */ 119 static mp_size_t 120 mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un, 121 mp_limb_t k, int approx) 122 { 123 mp_ptr qp, rp, sp, wp, scratch; 124 mp_size_t qn, rn, sn, wn, nl, bn; 125 mp_limb_t save, save2, cy; 126 unsigned long int unb; /* number of significant bits of {up,un} */ 127 unsigned long int xnb; /* number of significant bits of the result */ 128 unsigned long b, kk; 129 unsigned long sizes[GMP_NUMB_BITS + 1]; 130 int ni, i; 131 int c; 132 int logk; 133 TMP_DECL; 134 135 TMP_MARK; 136 137 if (remp == NULL) 138 { 139 rp = TMP_ALLOC_LIMBS (un + 1); /* will contain the remainder */ 140 scratch = rp; /* used by mpn_div_q */ 141 } 142 else 143 { 144 scratch = TMP_ALLOC_LIMBS (un + 1); /* used by mpn_div_q */ 145 rp = remp; 146 } 147 sp = rootp; 148 149 MPN_SIZEINBASE_2EXP(unb, up, un, 1); 150 /* unb is the number of bits of the input U */ 151 152 xnb = (unb - 1) / k + 1; /* ceil (unb / k) */ 153 /* xnb is the number of bits of the root R */ 154 155 if (xnb == 1) /* root is 1 */ 156 { 157 if (remp == NULL) 158 remp = rp; 159 mpn_sub_1 (remp, up, un, (mp_limb_t) 1); 160 MPN_NORMALIZE (remp, un); /* There should be at most one zero limb, 161 if we demand u to be normalized */ 162 rootp[0] = 1; 163 TMP_FREE; 164 return un; 165 } 166 167 /* We initialize the algorithm with a 1-bit approximation to zero: since we 168 know the root has exactly xnb bits, we write r0 = 2^(xnb-1), so that 169 r0^k = 2^(k*(xnb-1)), that we subtract to the input. */ 170 kk = k * (xnb - 1); /* number of truncated bits in the input */ 171 rn = un - kk / GMP_NUMB_BITS; /* number of limbs of the non-truncated part */ 172 MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS); 173 mpn_sub_1 (rp, rp, rn, 1); /* subtract the initial approximation: since 174 the non-truncated part is less than 2^k, it 175 is <= k bits: rn <= ceil(k/GMP_NUMB_BITS) */ 176 sp[0] = 1; /* initial approximation */ 177 sn = 1; /* it has one limb */ 178 179 for (logk = 1; ((k - 1) >> logk) != 0; logk++) 180 ; 181 /* logk = ceil(log(k)/log(2)) */ 182 183 b = xnb - 1; /* number of remaining bits to determine in the kth root */ 184 ni = 0; 185 while (b != 0) 186 { 187 /* invariant: here we want b+1 total bits for the kth root */ 188 sizes[ni] = b; 189 /* if c is the new value of b, this means that we'll go from a root 190 of c+1 bits (say s') to a root of b+1 bits. 191 It is proved in the book "Modern Computer Arithmetic" from Brent 192 and Zimmermann, Chapter 1, that 193 if s' >= k*beta, then at most one correction is necessary. 194 Here beta = 2^(b-c), and s' >= 2^c, thus it suffices that 195 c >= ceil((b + log2(k))/2). */ 196 b = (b + logk + 1) / 2; 197 if (b >= sizes[ni]) 198 b = sizes[ni] - 1; /* add just one bit at a time */ 199 ni++; 200 } 201 sizes[ni] = 0; 202 ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1); 203 /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with 204 sizes[i] <= 2 * sizes[i+1]. 205 Newton iteration will first compute sizes[ni-1] extra bits, 206 then sizes[ni-2], ..., then sizes[0] = b. */ 207 208 /* qp and wp need enough space to store S'^k where S' is an approximate 209 root. Since S' can be as large as S+2, the worst case is when S=2 and 210 S'=4. But then since we know the number of bits of S in advance, S' 211 can only be 3 at most. Similarly for S=4, then S' can be 6 at most. 212 So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k 213 fits in un limbs, the number of extra limbs needed is bounded by 214 ceil(k*log2(3/2)/GMP_NUMB_BITS). */ 215 #define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS) 216 qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder 217 of R/(k*S^(k-1)), and S^k */ 218 wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1), 219 and temporary for mpn_pow_1 */ 220 221 wp[0] = 1; /* {sp,sn}^(k-1) = 1 */ 222 wn = 1; 223 for (i = ni; i != 0; i--) 224 { 225 /* 1: loop invariant: 226 {sp, sn} is the current approximation of the root, which has 227 exactly 1 + sizes[ni] bits. 228 {rp, rn} is the current remainder 229 {wp, wn} = {sp, sn}^(k-1) 230 kk = number of truncated bits of the input 231 */ 232 b = sizes[i - 1] - sizes[i]; /* number of bits to compute in that 233 iteration */ 234 235 /* Reinsert a low zero limb if we normalized away the entire remainder */ 236 if (rn == 0) 237 { 238 rp[0] = 0; 239 rn = 1; 240 } 241 242 /* first multiply the remainder by 2^b */ 243 MPN_LSHIFT (cy, rp + b / GMP_NUMB_BITS, rp, rn, b % GMP_NUMB_BITS); 244 rn = rn + b / GMP_NUMB_BITS; 245 if (cy != 0) 246 { 247 rp[rn] = cy; 248 rn++; 249 } 250 251 kk = kk - b; 252 253 /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ 254 255 /* Now insert bits [kk,kk+b-1] from the input U */ 256 bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[] */ 257 save = rp[bn]; 258 /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */ 259 nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS); 260 /* nl = 1 + floor((kk + b - 1) / GMP_NUMB_BITS) 261 - floor(kk / GMP_NUMB_BITS) 262 <= 1 + (kk + b - 1) / GMP_NUMB_BITS 263 - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS 264 = 2 + (b - 2) / GMP_NUMB_BITS 265 thus since nl is an integer: 266 nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */ 267 /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */ 268 if (nl - 1 > bn) 269 save2 = rp[bn + 1]; 270 MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS); 271 /* set to zero high bits of rp[bn] */ 272 rp[bn] &= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS)) - 1; 273 /* restore corresponding bits */ 274 rp[bn] |= save; 275 if (nl - 1 > bn) 276 rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since 277 they start by bit 0 in rp[0], so they use 278 at most ceil(b/GMP_NUMB_BITS) limbs */ 279 280 /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ 281 282 /* compute {wp, wn} = k * {sp, sn}^(k-1) */ 283 cy = mpn_mul_1 (wp, wp, wn, k); 284 wp[wn] = cy; 285 wn += cy != 0; 286 287 /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ 288 289 /* now divide {rp, rn} by {wp, wn} to get the low part of the root */ 290 if (rn < wn) 291 { 292 qn = 0; 293 } 294 else 295 { 296 qn = rn - wn; /* expected quotient size */ 297 mpn_div_q (qp, rp, rn, wp, wn, scratch); 298 qn += qp[qn] != 0; 299 } 300 301 /* 5: current buffers: {sp,sn}, {qp,qn}. 302 Note: {rp,rn} is not needed any more since we'll compute it from 303 scratch at the end of the loop. 304 */ 305 306 /* Number of limbs used by b bits, when least significant bit is 307 aligned to least limb */ 308 bn = (b - 1) / GMP_NUMB_BITS + 1; 309 310 /* the quotient should be smaller than 2^b, since the previous 311 approximation was correctly rounded toward zero */ 312 if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) && 313 qp[qn - 1] >= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS)))) 314 { 315 qn = b / GMP_NUMB_BITS + 1; /* b+1 bits */ 316 MPN_ZERO (qp, qn); 317 qp[qn - 1] = (mp_limb_t) 1 << (b % GMP_NUMB_BITS); 318 MPN_DECR_U (qp, qn, 1); 319 qn -= qp[qn - 1] == 0; 320 } 321 322 /* 6: current buffers: {sp,sn}, {qp,qn} */ 323 324 /* multiply the root approximation by 2^b */ 325 MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS); 326 sn = sn + b / GMP_NUMB_BITS; 327 if (cy != 0) 328 { 329 sp[sn] = cy; 330 sn++; 331 } 332 333 /* 7: current buffers: {sp,sn}, {qp,qn} */ 334 335 ASSERT_ALWAYS (bn >= qn); /* this is ok since in the case qn > bn 336 above, q is set to 2^b-1, which has 337 exactly bn limbs */ 338 339 /* Combine sB and q to form sB + q. */ 340 save = sp[b / GMP_NUMB_BITS]; 341 MPN_COPY (sp, qp, qn); 342 MPN_ZERO (sp + qn, bn - qn); 343 sp[b / GMP_NUMB_BITS] |= save; 344 345 /* 8: current buffer: {sp,sn} */ 346 347 /* Since each iteration treats b bits from the root and thus k*b bits 348 from the input, and we already considered b bits from the input, 349 we now have to take another (k-1)*b bits from the input. */ 350 kk -= (k - 1) * b; /* remaining input bits */ 351 /* {rp, rn} = floor({up, un} / 2^kk) */ 352 MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, un - kk / GMP_NUMB_BITS, kk % GMP_NUMB_BITS); 353 rn = un - kk / GMP_NUMB_BITS; 354 rn -= rp[rn - 1] == 0; 355 356 /* 9: current buffers: {sp,sn}, {rp,rn} */ 357 358 for (c = 0;; c++) 359 { 360 /* Compute S^k in {qp,qn}. */ 361 if (i == 1) 362 { 363 /* Last iteration: we don't need W anymore. */ 364 /* mpn_pow_1 requires that both qp and wp have enough space to 365 store the result {sp,sn}^k + 1 limb */ 366 approx = approx && (sp[0] > 1); 367 qn = (approx == 0) ? mpn_pow_1 (qp, sp, sn, k, wp) : 0; 368 } 369 else 370 { 371 /* W <- S^(k-1) for the next iteration, 372 and S^k = W * S. */ 373 wn = mpn_pow_1 (wp, sp, sn, k - 1, qp); 374 mpn_mul (qp, wp, wn, sp, sn); 375 qn = wn + sn; 376 qn -= qp[qn - 1] == 0; 377 } 378 379 /* if S^k > floor(U/2^kk), the root approximation was too large */ 380 if (qn > rn || (qn == rn && mpn_cmp (qp, rp, rn) > 0)) 381 MPN_DECR_U (sp, sn, 1); 382 else 383 break; 384 } 385 386 /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */ 387 388 ASSERT_ALWAYS (c <= 1); 389 ASSERT_ALWAYS (rn >= qn); 390 391 /* R = R - Q = floor(U/2^kk) - S^k */ 392 if (i > 1 || approx == 0) 393 { 394 mpn_sub (rp, rp, rn, qp, qn); 395 MPN_NORMALIZE (rp, rn); 396 } 397 /* otherwise we have rn > 0, thus the return value is ok */ 398 399 /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ 400 } 401 402 TMP_FREE; 403 return rn; 404 } 405