xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/rootrem.c (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1 /* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and
2    store the truncated integer part at rootp and the remainder at remp.
3 
4    Contributed by Paul Zimmermann (algorithm) and
5    Paul Zimmermann and Torbjorn Granlund (implementation).
6    Marco Bodrato wrote logbased_root to seed the loop.
7 
8    THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES.  IT'S
9    ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT'S ALMOST
10    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
11 
12 Copyright 2002, 2005, 2009-2012, 2015 Free Software Foundation, Inc.
13 
14 This file is part of the GNU MP Library.
15 
16 The GNU MP Library is free software; you can redistribute it and/or modify
17 it under the terms of either:
18 
19   * the GNU Lesser General Public License as published by the Free
20     Software Foundation; either version 3 of the License, or (at your
21     option) any later version.
22 
23 or
24 
25   * the GNU General Public License as published by the Free Software
26     Foundation; either version 2 of the License, or (at your option) any
27     later version.
28 
29 or both in parallel, as here.
30 
31 The GNU MP Library is distributed in the hope that it will be useful, but
32 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
33 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
34 for more details.
35 
36 You should have received copies of the GNU General Public License and the
37 GNU Lesser General Public License along with the GNU MP Library.  If not,
38 see https://www.gnu.org/licenses/.  */
39 
40 /* FIXME:
41      This implementation is not optimal when remp == NULL, since the complexity
42      is M(n), whereas it should be M(n/k) on average.
43 */
44 
45 #include <stdio.h>		/* for NULL */
46 
47 #include "gmp-impl.h"
48 #include "longlong.h"
49 
50 static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,
51 				       mp_limb_t, int);
52 
53 #define MPN_RSHIFT(rp,up,un,cnt) \
54   do {									\
55     if ((cnt) != 0)							\
56       mpn_rshift (rp, up, un, cnt);					\
57     else								\
58       {									\
59 	MPN_COPY_INCR (rp, up, un);					\
60       }									\
61   } while (0)
62 
63 #define MPN_LSHIFT(cy,rp,up,un,cnt) \
64   do {									\
65     if ((cnt) != 0)							\
66       cy = mpn_lshift (rp, up, un, cnt);				\
67     else								\
68       {									\
69 	MPN_COPY_DECR (rp, up, un);					\
70 	cy = 0;								\
71       }									\
72   } while (0)
73 
74 
75 /* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero.
76    If remp <> NULL, put in {remp, un} the remainder.
77    Return the size (in limbs) of the remainder if remp <> NULL,
78 	  or a non-zero value iff the remainder is non-zero when remp = NULL.
79    Assumes:
80    (a) up[un-1] is not zero
81    (b) rootp has at least space for ceil(un/k) limbs
82    (c) remp has at least space for un limbs (in case remp <> NULL)
83    (d) the operands do not overlap.
84 
85    The auxiliary memory usage is 3*un+2 if remp = NULL,
86    and 2*un+2 if remp <> NULL.  FIXME: This is an incorrect comment.
87 */
88 mp_size_t
mpn_rootrem(mp_ptr rootp,mp_ptr remp,mp_srcptr up,mp_size_t un,mp_limb_t k)89 mpn_rootrem (mp_ptr rootp, mp_ptr remp,
90 	     mp_srcptr up, mp_size_t un, mp_limb_t k)
91 {
92   ASSERT (un > 0);
93   ASSERT (up[un - 1] != 0);
94   ASSERT (k > 1);
95 
96   if (UNLIKELY (k == 2))
97     return mpn_sqrtrem (rootp, remp, up, un);
98   /* (un-1)/k > 2 <=> un > 3k <=> (un + 2)/3 > k */
99   if (remp == NULL && (un + 2) / 3 > k)
100     /* Pad {up,un} with k zero limbs.  This will produce an approximate root
101        with one more limb, allowing us to compute the exact integral result. */
102     {
103       mp_ptr sp, wp;
104       mp_size_t rn, sn, wn;
105       TMP_DECL;
106       TMP_MARK;
107       wn = un + k;
108       sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */
109       TMP_ALLOC_LIMBS_2 (wp, wn, /* will contain the padded input */
110 			 sp, sn); /* approximate root of padded input */
111       MPN_COPY (wp + k, up, un);
112       MPN_FILL (wp, k, 0);
113       rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
114       /* The approximate root S = {sp,sn} is either the correct root of
115 	 {sp,sn}, or 1 too large.  Thus unless the least significant limb of
116 	 S is 0 or 1, we can deduce the root of {up,un} is S truncated by one
117 	 limb.  (In case sp[0]=1, we can deduce the root, but not decide
118 	 whether it is exact or not.) */
119       MPN_COPY (rootp, sp + 1, sn - 1);
120       TMP_FREE;
121       return rn;
122     }
123   else
124     {
125       return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
126     }
127 }
128 
129 #define LOGROOT_USED_BITS 8
130 #define LOGROOT_NEEDS_TWO_CORRECTIONS 1
131 #define LOGROOT_RETURNED_BITS (LOGROOT_USED_BITS + LOGROOT_NEEDS_TWO_CORRECTIONS)
132 /* Puts in *rootp some bits of the k^nt root of the number
133    2^bitn * 1.op ; where op represents the "fractional" bits.
134 
135    The returned value is the number of bits of the root minus one;
136    i.e. an approximation of the root will be
137    (*rootp) * 2^(retval-LOGROOT_RETURNED_BITS+1).
138 
139    Currently, only LOGROOT_USED_BITS bits of op are used (the implicit
140    one is not counted).
141  */
142 static unsigned
logbased_root(mp_ptr rootp,mp_limb_t op,mp_bitcnt_t bitn,mp_limb_t k)143 logbased_root (mp_ptr rootp, mp_limb_t op, mp_bitcnt_t bitn, mp_limb_t k)
144 {
145   /* vlog=vector(256,i,floor((log(256+i)/log(2)-8)*256)-(i>255)) */
146   static const
147   unsigned char vlog[] = {1,   2,   4,   5,   7,   8,   9,  11,  12,  14,  15,  16,  18,  19,  21,  22,
148 			 23,  25,  26,  27,  29,  30,  31,  33,  34,  35,  37,  38,  39,  40,  42,  43,
149 			 44,  46,  47,  48,  49,  51,  52,  53,  54,  56,  57,  58,  59,  61,  62,  63,
150 			 64,  65,  67,  68,  69,  70,  71,  73,  74,  75,  76,  77,  78,  80,  81,  82,
151 			 83,  84,  85,  87,  88,  89,  90,  91,  92,  93,  94,  96,  97,  98,  99, 100,
152 			101, 102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
153 			118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 131, 132, 133, 134,
154 			135, 136, 137, 138, 139, 140, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
155 			150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 162, 163, 164,
156 			165, 166, 167, 168, 169, 170, 171, 172, 173, 173, 174, 175, 176, 177, 178, 179,
157 			180, 181, 181, 182, 183, 184, 185, 186, 187, 188, 188, 189, 190, 191, 192, 193,
158 			194, 194, 195, 196, 197, 198, 199, 200, 200, 201, 202, 203, 204, 205, 205, 206,
159 			207, 208, 209, 209, 210, 211, 212, 213, 214, 214, 215, 216, 217, 218, 218, 219,
160 			220, 221, 222, 222, 223, 224, 225, 225, 226, 227, 228, 229, 229, 230, 231, 232,
161 			232, 233, 234, 235, 235, 236, 237, 238, 239, 239, 240, 241, 242, 242, 243, 244,
162 			245, 245, 246, 247, 247, 248, 249, 250, 250, 251, 252, 253, 253, 254, 255, 255};
163 
164   /* vexp=vector(256,i,floor(2^(8+i/256)-256)-(i>255)) */
165   static const
166   unsigned char vexp[] = {0,   1,   2,   2,   3,   4,   4,   5,   6,   7,   7,   8,   9,   9,  10,  11,
167 			 12,  12,  13,  14,  14,  15,  16,  17,  17,  18,  19,  20,  20,  21,  22,  23,
168 			 23,  24,  25,  26,  26,  27,  28,  29,  30,  30,  31,  32,  33,  33,  34,  35,
169 			 36,  37,  37,  38,  39,  40,  41,  41,  42,  43,  44,  45,  45,  46,  47,  48,
170 			 49,  50,  50,  51,  52,  53,  54,  55,  55,  56,  57,  58,  59,  60,  61,  61,
171 			 62,  63,  64,  65,  66,  67,  67,  68,  69,  70,  71,  72,  73,  74,  75,  75,
172 			 76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  86,  87,  88,  89,  90,
173 			 91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
174 			107, 108, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 119, 120, 121, 122,
175 			123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138,
176 			139, 140, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 154, 155, 156,
177 			157, 158, 159, 160, 161, 163, 164, 165, 166, 167, 168, 169, 171, 172, 173, 174,
178 			175, 176, 178, 179, 180, 181, 182, 183, 185, 186, 187, 188, 189, 191, 192, 193,
179 			194, 196, 197, 198, 199, 200, 202, 203, 204, 205, 207, 208, 209, 210, 212, 213,
180 			214, 216, 217, 218, 219, 221, 222, 223, 225, 226, 227, 229, 230, 231, 232, 234,
181 			235, 236, 238, 239, 240, 242, 243, 245, 246, 247, 249, 250, 251, 253, 254, 255};
182   mp_bitcnt_t retval;
183 
184   if (UNLIKELY (bitn > (~ (mp_bitcnt_t) 0) >> LOGROOT_USED_BITS))
185     {
186       /* In the unlikely case, we use two divisions and a modulo. */
187       retval = bitn / k;
188       bitn %= k;
189       bitn = (bitn << LOGROOT_USED_BITS |
190 	      vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
191     }
192   else
193     {
194       bitn = (bitn << LOGROOT_USED_BITS |
195 	      vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
196       retval = bitn >> LOGROOT_USED_BITS;
197       bitn &= (CNST_LIMB (1) << LOGROOT_USED_BITS) - 1;
198     }
199   ASSERT(bitn < CNST_LIMB (1) << LOGROOT_USED_BITS);
200   *rootp = CNST_LIMB(1) << (LOGROOT_USED_BITS - ! LOGROOT_NEEDS_TWO_CORRECTIONS)
201     | vexp[bitn] >> ! LOGROOT_NEEDS_TWO_CORRECTIONS;
202   return retval;
203 }
204 
205 /* if approx is non-zero, does not compute the final remainder */
206 static mp_size_t
mpn_rootrem_internal(mp_ptr rootp,mp_ptr remp,mp_srcptr up,mp_size_t un,mp_limb_t k,int approx)207 mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
208 		      mp_limb_t k, int approx)
209 {
210   mp_ptr qp, rp, sp, wp, scratch;
211   mp_size_t qn, rn, sn, wn, nl, bn;
212   mp_limb_t save, save2, cy, uh;
213   mp_bitcnt_t unb; /* number of significant bits of {up,un} */
214   mp_bitcnt_t xnb; /* number of significant bits of the result */
215   mp_bitcnt_t b, kk;
216   mp_bitcnt_t sizes[GMP_NUMB_BITS + 1];
217   int ni;
218   int perf_pow;
219   unsigned ulz, snb, c, logk;
220   TMP_DECL;
221 
222   /* MPN_SIZEINBASE_2EXP(unb, up, un, 1); --unb; */
223   uh = up[un - 1];
224   count_leading_zeros (ulz, uh);
225   ulz = ulz - GMP_NAIL_BITS + 1; /* Ignore the first 1. */
226   unb = (mp_bitcnt_t) un * GMP_NUMB_BITS - ulz;
227   /* unb is the (truncated) logarithm of the input U in base 2*/
228 
229   if (unb < k) /* root is 1 */
230     {
231       rootp[0] = 1;
232       if (remp == NULL)
233 	un -= (*up == CNST_LIMB (1)); /* Non-zero iif {up,un} > 1 */
234       else
235 	{
236 	  mpn_sub_1 (remp, up, un, CNST_LIMB (1));
237 	  un -= (remp [un - 1] == 0);	/* There should be at most one zero limb,
238 				   if we demand u to be normalized  */
239 	}
240       return un;
241     }
242   /* if (unb - k < k/2 + k/16) // root is 2 */
243 
244   if (ulz == GMP_NUMB_BITS)
245     uh = up[un - 2];
246   else
247     uh = (uh << ulz & GMP_NUMB_MASK) | up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz);
248   ASSERT (un != 1 || up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz) == 1);
249 
250   xnb = logbased_root (rootp, uh, unb, k);
251   snb = LOGROOT_RETURNED_BITS - 1;
252   /* xnb+1 is the number of bits of the root R */
253   /* snb+1 is the number of bits of the current approximation S */
254 
255   kk = k * xnb;		/* number of truncated bits in the input */
256 
257   /* FIXME: Should we skip the next two loops when xnb <= snb ? */
258   for (uh = (k - 1) / 2, logk = 3; (uh >>= 1) != 0; ++logk )
259     ;
260   /* logk = ceil(log(k)/log(2)) + 1 */
261 
262   /* xnb is the number of remaining bits to determine in the kth root */
263   for (ni = 0; (sizes[ni] = xnb) > snb; ++ni)
264     {
265       /* invariant: here we want xnb+1 total bits for the kth root */
266 
267       /* if c is the new value of xnb, this means that we'll go from a
268 	 root of c+1 bits (say s') to a root of xnb+1 bits.
269 	 It is proved in the book "Modern Computer Arithmetic" by Brent
270 	 and Zimmermann, Chapter 1, that
271 	 if s' >= k*beta, then at most one correction is necessary.
272 	 Here beta = 2^(xnb-c), and s' >= 2^c, thus it suffices that
273 	 c >= ceil((xnb + log2(k))/2). */
274       if (xnb > logk)
275 	xnb = (xnb + logk) / 2;
276       else
277 	--xnb;	/* add just one bit at a time */
278     }
279 
280   *rootp >>= snb - xnb;
281   kk -= xnb;
282 
283   ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
284   /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
285      sizes[i] <= 2 * sizes[i+1].
286      Newton iteration will first compute sizes[ni-1] extra bits,
287      then sizes[ni-2], ..., then sizes[0] = b. */
288 
289   TMP_MARK;
290   /* qp and wp need enough space to store S'^k where S' is an approximate
291      root. Since S' can be as large as S+2, the worst case is when S=2 and
292      S'=4. But then since we know the number of bits of S in advance, S'
293      can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
294      So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
295      fits in un limbs, the number of extra limbs needed is bounded by
296      ceil(k*log2(3/2)/GMP_NUMB_BITS). */
297   /* THINK: with the use of logbased_root, maybe the constant is
298      258/256 instead of 3/2 ? log2(258/256) < 1/89 < 1/64 */
299 #define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
300   TMP_ALLOC_LIMBS_3 (scratch, un + 1, /* used by mpn_div_q */
301 		     qp, un + EXTRA,  /* will contain quotient and remainder
302 					 of R/(k*S^(k-1)), and S^k */
303 		     wp, un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
304 					 and temporary for mpn_pow_1 */
305 
306   if (remp == NULL)
307     rp = scratch;	/* will contain the remainder */
308   else
309     rp = remp;
310   sp = rootp;
311 
312   sn = 1;		/* Initial approximation has one limb */
313 
314   for (b = xnb; ni != 0; --ni)
315     {
316       /* 1: loop invariant:
317 	 {sp, sn} is the current approximation of the root, which has
318 		  exactly 1 + sizes[ni] bits.
319 	 {rp, rn} is the current remainder
320 	 {wp, wn} = {sp, sn}^(k-1)
321 	 kk = number of truncated bits of the input
322       */
323 
324       /* Since each iteration treats b bits from the root and thus k*b bits
325 	 from the input, and we already considered b bits from the input,
326 	 we now have to take another (k-1)*b bits from the input. */
327       kk -= (k - 1) * b; /* remaining input bits */
328       /* {rp, rn} = floor({up, un} / 2^kk) */
329       rn = un - kk / GMP_NUMB_BITS;
330       MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
331       rn -= rp[rn - 1] == 0;
332 
333       /* 9: current buffers: {sp,sn}, {rp,rn} */
334 
335       for (c = 0;; c++)
336 	{
337 	  /* Compute S^k in {qp,qn}. */
338 	  /* W <- S^(k-1) for the next iteration,
339 	     and S^k = W * S. */
340 	  wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
341 	  mpn_mul (qp, wp, wn, sp, sn);
342 	  qn = wn + sn;
343 	  qn -= qp[qn - 1] == 0;
344 
345 	  perf_pow = 1;
346 	  /* if S^k > floor(U/2^kk), the root approximation was too large */
347 	  if (qn > rn || (qn == rn && (perf_pow=mpn_cmp (qp, rp, rn)) > 0))
348 	    MPN_DECR_U (sp, sn, 1);
349 	  else
350 	    break;
351 	}
352 
353       /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */
354 
355       /* sometimes two corrections are needed with logbased_root*/
356       ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
357       ASSERT_ALWAYS (rn >= qn);
358 
359       b = sizes[ni - 1] - sizes[ni]; /* number of bits to compute in the
360 				      next iteration */
361       bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[], after shift */
362 
363       kk = kk - b;
364       /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */
365       nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS);
366       /* nl  = 1 + floor((kk + b - 1) / GMP_NUMB_BITS)
367 		 - floor(kk / GMP_NUMB_BITS)
368 	     <= 1 + (kk + b - 1) / GMP_NUMB_BITS
369 		  - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS
370 	     = 2 + (b - 2) / GMP_NUMB_BITS
371 	 thus since nl is an integer:
372 	 nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */
373 
374       /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
375 
376       /* R = R - Q = floor(U/2^kk) - S^k */
377       if (perf_pow != 0)
378 	{
379 	  mpn_sub (rp, rp, rn, qp, qn);
380 	  MPN_NORMALIZE_NOT_ZERO (rp, rn);
381 
382 	  /* first multiply the remainder by 2^b */
383 	  MPN_LSHIFT (cy, rp + bn, rp, rn, b % GMP_NUMB_BITS);
384 	  rn = rn + bn;
385 	  if (cy != 0)
386 	    {
387 	      rp[rn] = cy;
388 	      rn++;
389 	    }
390 
391 	  save = rp[bn];
392 	  /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */
393 	  if (nl - 1 > bn)
394 	    save2 = rp[bn + 1];
395 	}
396       else
397 	{
398 	  rn = bn;
399 	  save2 = save = 0;
400 	}
401       /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
402 
403       /* Now insert bits [kk,kk+b-1] from the input U */
404       MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS);
405       /* set to zero high bits of rp[bn] */
406       rp[bn] &= (CNST_LIMB (1) << (b % GMP_NUMB_BITS)) - 1;
407       /* restore corresponding bits */
408       rp[bn] |= save;
409       if (nl - 1 > bn)
410 	rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since
411 			       they start by bit 0 in rp[0], so they use
412 			       at most ceil(b/GMP_NUMB_BITS) limbs */
413       /* FIXME: Should we normalise {rp,rn} here ?*/
414 
415       /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
416 
417       /* compute {wp, wn} = k * {sp, sn}^(k-1) */
418       cy = mpn_mul_1 (wp, wp, wn, k);
419       wp[wn] = cy;
420       wn += cy != 0;
421 
422       /* 6: current buffers: {sp,sn}, {qp,qn} */
423 
424       /* multiply the root approximation by 2^b */
425       MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS);
426       sn = sn + b / GMP_NUMB_BITS;
427       if (cy != 0)
428 	{
429 	  sp[sn] = cy;
430 	  sn++;
431 	}
432 
433       save = sp[b / GMP_NUMB_BITS];
434 
435       /* Number of limbs used by b bits, when least significant bit is
436 	 aligned to least limb */
437       bn = (b - 1) / GMP_NUMB_BITS + 1;
438 
439       /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
440 
441       /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
442       if (UNLIKELY (rn < wn))
443 	{
444 	  MPN_FILL (sp, bn, 0);
445 	}
446       else
447 	{
448 	  qn = rn - wn; /* expected quotient size */
449 	  if (qn <= bn) { /* Divide only if result is not too big. */
450 	    mpn_div_q (qp, rp, rn, wp, wn, scratch);
451 	    qn += qp[qn] != 0;
452 	  }
453 
454       /* 5: current buffers: {sp,sn}, {qp,qn}.
455 	 Note: {rp,rn} is not needed any more since we'll compute it from
456 	 scratch at the end of the loop.
457        */
458 
459       /* the quotient should be smaller than 2^b, since the previous
460 	 approximation was correctly rounded toward zero */
461 	  if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
462 			  qp[qn - 1] >= (CNST_LIMB (1) << (b % GMP_NUMB_BITS))))
463 	    {
464 	      for (qn = 1; qn < bn; ++qn)
465 		sp[qn - 1] = GMP_NUMB_MAX;
466 	      sp[qn - 1] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - 1 - ((b - 1) % GMP_NUMB_BITS));
467 	    }
468 	  else
469 	    {
470       /* 7: current buffers: {sp,sn}, {qp,qn} */
471 
472       /* Combine sB and q to form sB + q.  */
473 	      MPN_COPY (sp, qp, qn);
474 	      MPN_ZERO (sp + qn, bn - qn);
475 	    }
476 	}
477       sp[b / GMP_NUMB_BITS] |= save;
478 
479       /* 8: current buffer: {sp,sn} */
480 
481     }
482 
483   /* otherwise we have rn > 0, thus the return value is ok */
484   if (!approx || sp[0] <= CNST_LIMB (1))
485     {
486       for (c = 0;; c++)
487 	{
488 	  /* Compute S^k in {qp,qn}. */
489 	  /* Last iteration: we don't need W anymore. */
490 	  /* mpn_pow_1 requires that both qp and wp have enough
491 	     space to store the result {sp,sn}^k + 1 limb */
492 	  qn = mpn_pow_1 (qp, sp, sn, k, wp);
493 
494 	  perf_pow = 1;
495 	  if (qn > un || (qn == un && (perf_pow=mpn_cmp (qp, up, un)) > 0))
496 	    MPN_DECR_U (sp, sn, 1);
497 	  else
498 	    break;
499 	};
500 
501       /* sometimes two corrections are needed with logbased_root*/
502       ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
503 
504       rn = perf_pow != 0;
505       if (rn != 0 && remp != NULL)
506 	{
507 	  mpn_sub (remp, up, un, qp, qn);
508 	  rn = un;
509 	  MPN_NORMALIZE_NOT_ZERO (remp, rn);
510 	}
511     }
512 
513   TMP_FREE;
514   return rn;
515 }
516