xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/get_str.c (revision cef8759bd76c1b621f8eab8faa6f208faabc2e15)
1 /* mpn_get_str -- Convert {UP,USIZE} to a base BASE string in STR.
2 
3    Contributed to the GNU project by Torbjorn Granlund.
4 
5    THE FUNCTIONS IN THIS FILE, EXCEPT mpn_get_str, ARE INTERNAL WITH A MUTABLE
6    INTERFACE.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN
7    FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE
8    GNU MP RELEASE.
9 
10 Copyright 1991-1994, 1996, 2000-2002, 2004, 2006-2008, 2011, 2012 Free Software
11 Foundation, Inc.
12 
13 This file is part of the GNU MP Library.
14 
15 The GNU MP Library is free software; you can redistribute it and/or modify
16 it under the terms of either:
17 
18   * the GNU Lesser General Public License as published by the Free
19     Software Foundation; either version 3 of the License, or (at your
20     option) any later version.
21 
22 or
23 
24   * the GNU General Public License as published by the Free Software
25     Foundation; either version 2 of the License, or (at your option) any
26     later version.
27 
28 or both in parallel, as here.
29 
30 The GNU MP Library is distributed in the hope that it will be useful, but
31 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
32 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
33 for more details.
34 
35 You should have received copies of the GNU General Public License and the
36 GNU Lesser General Public License along with the GNU MP Library.  If not,
37 see https://www.gnu.org/licenses/.  */
38 
39 #include "gmp.h"
40 #include "gmp-impl.h"
41 #include "longlong.h"
42 
43 /* Conversion of U {up,un} to a string in base b.  Internally, we convert to
44    base B = b^m, the largest power of b that fits a limb.  Basic algorithms:
45 
46   A) Divide U repeatedly by B, generating a quotient and remainder, until the
47      quotient becomes zero.  The remainders hold the converted digits.  Digits
48      come out from right to left.  (Used in mpn_sb_get_str.)
49 
50   B) Divide U by b^g, for g such that 1/b <= U/b^g < 1, generating a fraction.
51      Then develop digits by multiplying the fraction repeatedly by b.  Digits
52      come out from left to right.  (Currently not used herein, except for in
53      code for converting single limbs to individual digits.)
54 
55   C) Compute B^1, B^2, B^4, ..., B^s, for s such that B^s is just above
56      sqrt(U).  Then divide U by B^s, generating quotient and remainder.
57      Recursively convert the quotient, then the remainder, using the
58      precomputed powers.  Digits come out from left to right.  (Used in
59      mpn_dc_get_str.)
60 
61   When using algorithm C, algorithm B might be suitable for basecase code,
62   since the required b^g power will be readily accessible.
63 
64   Optimization ideas:
65   1. The recursive function of (C) could use less temporary memory.  The powtab
66      allocation could be trimmed with some computation, and the tmp area could
67      be reduced, or perhaps eliminated if up is reused for both quotient and
68      remainder (it is currently used just for remainder).
69   2. Store the powers of (C) in normalized form, with the normalization count.
70      Quotients will usually need to be left-shifted before each divide, and
71      remainders will either need to be left-shifted of right-shifted.
72   3. In the code for developing digits from a single limb, we could avoid using
73      a full umul_ppmm except for the first (or first few) digits, provided base
74      is even.  Subsequent digits can be developed using plain multiplication.
75      (This saves on register-starved machines (read x86) and on all machines
76      that generate the upper product half using a separate instruction (alpha,
77      powerpc, IA-64) or lacks such support altogether (sparc64, hppa64).
78   4. Separate mpn_dc_get_str basecase code from code for small conversions. The
79      former code will have the exact right power readily available in the
80      powtab parameter for dividing the current number into a fraction.  Convert
81      that using algorithm B.
82   5. Completely avoid division.  Compute the inverses of the powers now in
83      powtab instead of the actual powers.
84   6. Decrease powtab allocation for even bases.  E.g. for base 10 we could save
85      about 30% (1-log(5)/log(10)).
86 
87   Basic structure of (C):
88     mpn_get_str:
89       if POW2_P (n)
90 	...
91       else
92 	if (un < GET_STR_PRECOMPUTE_THRESHOLD)
93 	  mpn_sb_get_str (str, base, up, un);
94 	else
95 	  precompute_power_tables
96 	  mpn_dc_get_str
97 
98     mpn_dc_get_str:
99 	mpn_tdiv_qr
100 	if (qn < GET_STR_DC_THRESHOLD)
101 	  mpn_sb_get_str
102 	else
103 	  mpn_dc_get_str
104 	if (rn < GET_STR_DC_THRESHOLD)
105 	  mpn_sb_get_str
106 	else
107 	  mpn_dc_get_str
108 
109 
110   The reason for the two threshold values is the cost of
111   precompute_power_tables.  GET_STR_PRECOMPUTE_THRESHOLD will be considerably
112   larger than GET_STR_PRECOMPUTE_THRESHOLD.  */
113 
114 
115 /* The x86s and m68020 have a quotient and remainder "div" instruction and
116    gcc recognises an adjacent "/" and "%" can be combined using that.
117    Elsewhere "/" and "%" are either separate instructions, or separate
118    libgcc calls (which unfortunately gcc as of version 3.0 doesn't combine).
119    A multiply and subtract should be faster than a "%" in those cases.  */
120 #if HAVE_HOST_CPU_FAMILY_x86            \
121   || HAVE_HOST_CPU_m68020               \
122   || HAVE_HOST_CPU_m68030               \
123   || HAVE_HOST_CPU_m68040               \
124   || HAVE_HOST_CPU_m68060               \
125   || HAVE_HOST_CPU_m68360 /* CPU32 */
126 #define udiv_qrnd_unnorm(q,r,n,d)       \
127   do {                                  \
128     mp_limb_t  __q = (n) / (d);         \
129     mp_limb_t  __r = (n) % (d);         \
130     (q) = __q;                          \
131     (r) = __r;                          \
132   } while (0)
133 #else
134 #define udiv_qrnd_unnorm(q,r,n,d)       \
135   do {                                  \
136     mp_limb_t  __q = (n) / (d);         \
137     mp_limb_t  __r = (n) - __q*(d);     \
138     (q) = __q;                          \
139     (r) = __r;                          \
140   } while (0)
141 #endif
142 
143 
144 /* Convert {up,un} to a string in base base, and put the result in str.
145    Generate len characters, possibly padding with zeros to the left.  If len is
146    zero, generate as many characters as required.  Return a pointer immediately
147    after the last digit of the result string.  Complexity is O(un^2); intended
148    for small conversions.  */
149 static unsigned char *
150 mpn_sb_get_str (unsigned char *str, size_t len,
151 		mp_ptr up, mp_size_t un, int base)
152 {
153   mp_limb_t rl, ul;
154   unsigned char *s;
155   size_t l;
156   /* Allocate memory for largest possible string, given that we only get here
157      for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest
158      base is 3.  7/11 is an approximation to 1/log2(3).  */
159 #if TUNE_PROGRAM_BUILD
160 #define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * GMP_LIMB_BITS * 7 / 11)
161 #else
162 #define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * GMP_LIMB_BITS * 7 / 11)
163 #endif
164   unsigned char buf[BUF_ALLOC];
165 #if TUNE_PROGRAM_BUILD
166   mp_limb_t rp[GET_STR_THRESHOLD_LIMIT];
167 #else
168   mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD];
169 #endif
170 
171   if (base == 10)
172     {
173       /* Special case code for base==10 so that the compiler has a chance to
174 	 optimize things.  */
175 
176       MPN_COPY (rp + 1, up, un);
177 
178       s = buf + BUF_ALLOC;
179       while (un > 1)
180 	{
181 	  int i;
182 	  mp_limb_t frac, digit;
183 	  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
184 					 MP_BASES_BIG_BASE_10,
185 					 MP_BASES_BIG_BASE_INVERTED_10,
186 					 MP_BASES_NORMALIZATION_STEPS_10);
187 	  un -= rp[un] == 0;
188 	  frac = (rp[0] + 1) << GMP_NAIL_BITS;
189 	  s -= MP_BASES_CHARS_PER_LIMB_10;
190 #if HAVE_HOST_CPU_FAMILY_x86
191 	  /* The code below turns out to be a bit slower for x86 using gcc.
192 	     Use plain code.  */
193 	  i = MP_BASES_CHARS_PER_LIMB_10;
194 	  do
195 	    {
196 	      umul_ppmm (digit, frac, frac, 10);
197 	      *s++ = digit;
198 	    }
199 	  while (--i);
200 #else
201 	  /* Use the fact that 10 in binary is 1010, with the lowest bit 0.
202 	     After a few umul_ppmm, we will have accumulated enough low zeros
203 	     to use a plain multiply.  */
204 	  if (MP_BASES_NORMALIZATION_STEPS_10 == 0)
205 	    {
206 	      umul_ppmm (digit, frac, frac, 10);
207 	      *s++ = digit;
208 	    }
209 	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 1)
210 	    {
211 	      umul_ppmm (digit, frac, frac, 10);
212 	      *s++ = digit;
213 	    }
214 	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 2)
215 	    {
216 	      umul_ppmm (digit, frac, frac, 10);
217 	      *s++ = digit;
218 	    }
219 	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 3)
220 	    {
221 	      umul_ppmm (digit, frac, frac, 10);
222 	      *s++ = digit;
223 	    }
224 	  i = (MP_BASES_CHARS_PER_LIMB_10 - ((MP_BASES_NORMALIZATION_STEPS_10 < 4)
225 					     ? (4-MP_BASES_NORMALIZATION_STEPS_10)
226 					     : 0));
227 	  frac = (frac + 0xf) >> 4;
228 	  do
229 	    {
230 	      frac *= 10;
231 	      digit = frac >> (GMP_LIMB_BITS - 4);
232 	      *s++ = digit;
233 	      frac &= (~(mp_limb_t) 0) >> 4;
234 	    }
235 	  while (--i);
236 #endif
237 	  s -= MP_BASES_CHARS_PER_LIMB_10;
238 	}
239 
240       ul = rp[1];
241       while (ul != 0)
242 	{
243 	  udiv_qrnd_unnorm (ul, rl, ul, 10);
244 	  *--s = rl;
245 	}
246     }
247   else /* not base 10 */
248     {
249       unsigned chars_per_limb;
250       mp_limb_t big_base, big_base_inverted;
251       unsigned normalization_steps;
252 
253       chars_per_limb = mp_bases[base].chars_per_limb;
254       big_base = mp_bases[base].big_base;
255       big_base_inverted = mp_bases[base].big_base_inverted;
256       count_leading_zeros (normalization_steps, big_base);
257 
258       MPN_COPY (rp + 1, up, un);
259 
260       s = buf + BUF_ALLOC;
261       while (un > 1)
262 	{
263 	  int i;
264 	  mp_limb_t frac;
265 	  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
266 					 big_base, big_base_inverted,
267 					 normalization_steps);
268 	  un -= rp[un] == 0;
269 	  frac = (rp[0] + 1) << GMP_NAIL_BITS;
270 	  s -= chars_per_limb;
271 	  i = chars_per_limb;
272 	  do
273 	    {
274 	      mp_limb_t digit;
275 	      umul_ppmm (digit, frac, frac, base);
276 	      *s++ = digit;
277 	    }
278 	  while (--i);
279 	  s -= chars_per_limb;
280 	}
281 
282       ul = rp[1];
283       while (ul != 0)
284 	{
285 	  udiv_qrnd_unnorm (ul, rl, ul, base);
286 	  *--s = rl;
287 	}
288     }
289 
290   l = buf + BUF_ALLOC - s;
291   while (l < len)
292     {
293       *str++ = 0;
294       len--;
295     }
296   while (l != 0)
297     {
298       *str++ = *s++;
299       l--;
300     }
301   return str;
302 }
303 
304 
305 /* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
306    the string in STR.  Generate LEN characters, possibly padding with zeros to
307    the left.  If LEN is zero, generate as many characters as required.
308    Return a pointer immediately after the last digit of the result string.
309    This uses divide-and-conquer and is intended for large conversions.  */
310 static unsigned char *
311 mpn_dc_get_str (unsigned char *str, size_t len,
312 		mp_ptr up, mp_size_t un,
313 		const powers_t *powtab, mp_ptr tmp)
314 {
315   if (BELOW_THRESHOLD (un, GET_STR_DC_THRESHOLD))
316     {
317       if (un != 0)
318 	str = mpn_sb_get_str (str, len, up, un, powtab->base);
319       else
320 	{
321 	  while (len != 0)
322 	    {
323 	      *str++ = 0;
324 	      len--;
325 	    }
326 	}
327     }
328   else
329     {
330       mp_ptr pwp, qp, rp;
331       mp_size_t pwn, qn;
332       mp_size_t sn;
333 
334       pwp = powtab->p;
335       pwn = powtab->n;
336       sn = powtab->shift;
337 
338       if (un < pwn + sn || (un == pwn + sn && mpn_cmp (up + sn, pwp, un - sn) < 0))
339 	{
340 	  str = mpn_dc_get_str (str, len, up, un, powtab - 1, tmp);
341 	}
342       else
343 	{
344 	  qp = tmp;		/* (un - pwn + 1) limbs for qp */
345 	  rp = up;		/* pwn limbs for rp; overwrite up area */
346 
347 	  mpn_tdiv_qr (qp, rp + sn, 0L, up + sn, un - sn, pwp, pwn);
348 	  qn = un - sn - pwn; qn += qp[qn] != 0;		/* quotient size */
349 
350 	  ASSERT (qn < pwn + sn || (qn == pwn + sn && mpn_cmp (qp + sn, pwp, pwn) < 0));
351 
352 	  if (len != 0)
353 	    len = len - powtab->digits_in_base;
354 
355 	  str = mpn_dc_get_str (str, len, qp, qn, powtab - 1, tmp + qn);
356 	  str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn + sn, powtab - 1, tmp);
357 	}
358     }
359   return str;
360 }
361 
362 
363 /* There are no leading zeros on the digits generated at str, but that's not
364    currently a documented feature.  The current mpz_out_str and mpz_get_str
365    rely on it.  */
366 
367 size_t
368 mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
369 {
370   mp_ptr powtab_mem, powtab_mem_ptr;
371   mp_limb_t big_base;
372   size_t digits_in_base;
373   powers_t powtab[GMP_LIMB_BITS];
374   int pi;
375   mp_size_t n;
376   mp_ptr p, t;
377   size_t out_len;
378   mp_ptr tmp;
379   TMP_DECL;
380 
381   /* Special case zero, as the code below doesn't handle it.  */
382   if (un == 0)
383     {
384       str[0] = 0;
385       return 1;
386     }
387 
388   if (POW2_P (base))
389     {
390       /* The base is a power of 2.  Convert from most significant end.  */
391       mp_limb_t n1, n0;
392       int bits_per_digit = mp_bases[base].big_base;
393       int cnt;
394       int bit_pos;
395       mp_size_t i;
396       unsigned char *s = str;
397       mp_bitcnt_t bits;
398 
399       n1 = up[un - 1];
400       count_leading_zeros (cnt, n1);
401 
402       /* BIT_POS should be R when input ends in least significant nibble,
403 	 R + bits_per_digit * n when input ends in nth least significant
404 	 nibble. */
405 
406       bits = (mp_bitcnt_t) GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
407       cnt = bits % bits_per_digit;
408       if (cnt != 0)
409 	bits += bits_per_digit - cnt;
410       bit_pos = bits - (mp_bitcnt_t) (un - 1) * GMP_NUMB_BITS;
411 
412       /* Fast loop for bit output.  */
413       i = un - 1;
414       for (;;)
415 	{
416 	  bit_pos -= bits_per_digit;
417 	  while (bit_pos >= 0)
418 	    {
419 	      *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);
420 	      bit_pos -= bits_per_digit;
421 	    }
422 	  i--;
423 	  if (i < 0)
424 	    break;
425 	  n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);
426 	  n1 = up[i];
427 	  bit_pos += GMP_NUMB_BITS;
428 	  *s++ = n0 | (n1 >> bit_pos);
429 	}
430 
431       return s - str;
432     }
433 
434   /* General case.  The base is not a power of 2.  */
435 
436   if (BELOW_THRESHOLD (un, GET_STR_PRECOMPUTE_THRESHOLD))
437     return mpn_sb_get_str (str, (size_t) 0, up, un, base) - str;
438 
439   TMP_MARK;
440 
441   /* Allocate one large block for the powers of big_base.  */
442   powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_get_str_powtab_alloc (un));
443   powtab_mem_ptr = powtab_mem;
444 
445   /* Compute a table of powers, were the largest power is >= sqrt(U).  */
446 
447   big_base = mp_bases[base].big_base;
448   digits_in_base = mp_bases[base].chars_per_limb;
449 
450   {
451     mp_size_t n_pows, xn, pn, exptab[GMP_LIMB_BITS], bexp;
452     mp_limb_t cy;
453     mp_size_t shift;
454     size_t ndig;
455 
456     DIGITS_IN_BASE_PER_LIMB (ndig, un, base);
457     xn = 1 + ndig / mp_bases[base].chars_per_limb; /* FIXME: scalar integer division */
458 
459     n_pows = 0;
460     for (pn = xn; pn != 1; pn = (pn + 1) >> 1)
461       {
462 	exptab[n_pows] = pn;
463 	n_pows++;
464       }
465     exptab[n_pows] = 1;
466 
467     powtab[0].p = &big_base;
468     powtab[0].n = 1;
469     powtab[0].digits_in_base = digits_in_base;
470     powtab[0].base = base;
471     powtab[0].shift = 0;
472 
473     powtab[1].p = powtab_mem_ptr;  powtab_mem_ptr += 2;
474     powtab[1].p[0] = big_base;
475     powtab[1].n = 1;
476     powtab[1].digits_in_base = digits_in_base;
477     powtab[1].base = base;
478     powtab[1].shift = 0;
479 
480     n = 1;
481     p = &big_base;
482     bexp = 1;
483     shift = 0;
484     for (pi = 2; pi < n_pows; pi++)
485       {
486 	t = powtab_mem_ptr;
487 	powtab_mem_ptr += 2 * n + 2;
488 
489 	ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_get_str_powtab_alloc (un));
490 
491 	mpn_sqr (t, p, n);
492 
493 	digits_in_base *= 2;
494 	n *= 2;  n -= t[n - 1] == 0;
495 	bexp *= 2;
496 
497 	if (bexp + 1 < exptab[n_pows - pi])
498 	  {
499 	    digits_in_base += mp_bases[base].chars_per_limb;
500 	    cy = mpn_mul_1 (t, t, n, big_base);
501 	    t[n] = cy;
502 	    n += cy != 0;
503 	    bexp += 1;
504 	  }
505 	shift *= 2;
506 	/* Strip low zero limbs.  */
507 	while (t[0] == 0)
508 	  {
509 	    t++;
510 	    n--;
511 	    shift++;
512 	  }
513 	p = t;
514 	powtab[pi].p = p;
515 	powtab[pi].n = n;
516 	powtab[pi].digits_in_base = digits_in_base;
517 	powtab[pi].base = base;
518 	powtab[pi].shift = shift;
519       }
520 
521     for (pi = 1; pi < n_pows; pi++)
522       {
523 	t = powtab[pi].p;
524 	n = powtab[pi].n;
525 	cy = mpn_mul_1 (t, t, n, big_base);
526 	t[n] = cy;
527 	n += cy != 0;
528 	if (t[0] == 0)
529 	  {
530 	    powtab[pi].p = t + 1;
531 	    n--;
532 	    powtab[pi].shift++;
533 	  }
534 	powtab[pi].n = n;
535 	powtab[pi].digits_in_base += mp_bases[base].chars_per_limb;
536       }
537 
538 #if 0
539     { int i;
540       printf ("Computed table values for base=%d, un=%d, xn=%d:\n", base, un, xn);
541       for (i = 0; i < n_pows; i++)
542 	printf ("%2d: %10ld %10ld %11ld %ld\n", i, exptab[n_pows-i], powtab[i].n, powtab[i].digits_in_base, powtab[i].shift);
543     }
544 #endif
545   }
546 
547   /* Using our precomputed powers, now in powtab[], convert our number.  */
548   tmp = TMP_BALLOC_LIMBS (mpn_dc_get_str_itch (un));
549   out_len = mpn_dc_get_str (str, 0, up, un, powtab + (pi - 1), tmp) - str;
550   TMP_FREE;
551 
552   return out_len;
553 }
554