xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/get_str.c (revision 1daf83e636cd998f45e5597a8f995a540e2d5b4a)
1 /* mpn_get_str -- Convert {UP,USIZE} to a base BASE string in STR.
2 
3    Contributed to the GNU project by Torbjorn Granlund.
4 
5    THE FUNCTIONS IN THIS FILE, EXCEPT mpn_get_str, ARE INTERNAL WITH MUTABLE
6    INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
7    IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A
8    FUTURE GNU MP RELEASE.
9 
10 Copyright 1991-2017 Free Software Foundation, Inc.
11 
12 This file is part of the GNU MP Library.
13 
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16 
17   * the GNU Lesser General Public License as published by the Free
18     Software Foundation; either version 3 of the License, or (at your
19     option) any later version.
20 
21 or
22 
23   * the GNU General Public License as published by the Free Software
24     Foundation; either version 2 of the License, or (at your option) any
25     later version.
26 
27 or both in parallel, as here.
28 
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32 for more details.
33 
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library.  If not,
36 see https://www.gnu.org/licenses/.  */
37 
38 #include "gmp-impl.h"
39 #include "longlong.h"
40 
41 /* Conversion of U {up,un} to a string in base b.  Internally, we convert to
42    base B = b^m, the largest power of b that fits a limb.  Basic algorithms:
43 
44   A) Divide U repeatedly by B, generating a quotient and remainder, until the
45      quotient becomes zero.  The remainders hold the converted digits.  Digits
46      come out from right to left.  (Used in mpn_bc_get_str.)
47 
48   B) Divide U by b^g, for g such that 1/b <= U/b^g < 1, generating a fraction.
49      Then develop digits by multiplying the fraction repeatedly by b.  Digits
50      come out from left to right.  (Currently not used herein, except for in
51      code for converting single limbs to individual digits.)
52 
53   C) Compute B^1, B^2, B^4, ..., B^s, for s such that B^s is just above
54      sqrt(U).  Then divide U by B^s, generating quotient and remainder.
55      Recursively convert the quotient, then the remainder, using the
56      precomputed powers.  Digits come out from left to right.  (Used in
57      mpn_dc_get_str.)
58 
59   When using algorithm C, algorithm B might be suitable for basecase code,
60   since the required b^g power will be readily accessible.
61 
62   Optimization ideas:
63   1. The recursive function of (C) could use less temporary memory.  The powtab
64      allocation could be trimmed with some computation, and the tmp area could
65      be reduced, or perhaps eliminated if up is reused for both quotient and
66      remainder (it is currently used just for remainder).
67   2. Store the powers of (C) in normalized form, with the normalization count.
68      Quotients will usually need to be left-shifted before each divide, and
69      remainders will either need to be left-shifted of right-shifted.
70   3. In the code for developing digits from a single limb, we could avoid using
71      a full umul_ppmm except for the first (or first few) digits, provided base
72      is even.  Subsequent digits can be developed using plain multiplication.
73      (This saves on register-starved machines (read x86) and on all machines
74      that generate the upper product half using a separate instruction (alpha,
75      powerpc, IA-64) or lacks such support altogether (sparc64, hppa64).
76   4. Separate mpn_dc_get_str basecase code from code for small conversions. The
77      former code will have the exact right power readily available in the
78      powtab parameter for dividing the current number into a fraction.  Convert
79      that using algorithm B.
80   5. Completely avoid division.  Compute the inverses of the powers now in
81      powtab instead of the actual powers.
82   6. Decrease powtab allocation for even bases.  E.g. for base 10 we could save
83      about 30% (1-log(5)/log(10)).
84 
85   Basic structure of (C):
86     mpn_get_str:
87       if POW2_P (n)
88 	...
89       else
90 	if (un < GET_STR_PRECOMPUTE_THRESHOLD)
91 	  mpn_bx_get_str (str, base, up, un);
92 	else
93 	  precompute_power_tables
94 	  mpn_dc_get_str
95 
96     mpn_dc_get_str:
97 	mpn_tdiv_qr
98 	if (qn < GET_STR_DC_THRESHOLD)
99 	  mpn_bc_get_str
100 	else
101 	  mpn_dc_get_str
102 	if (rn < GET_STR_DC_THRESHOLD)
103 	  mpn_bc_get_str
104 	else
105 	  mpn_dc_get_str
106 
107 
108   The reason for the two threshold values is the cost of
109   precompute_power_tables.  GET_STR_PRECOMPUTE_THRESHOLD will be
110   considerably larger than GET_STR_DC_THRESHOLD.  */
111 
112 
113 /* The x86s and m68020 have a quotient and remainder "div" instruction and
114    gcc recognises an adjacent "/" and "%" can be combined using that.
115    Elsewhere "/" and "%" are either separate instructions, or separate
116    libgcc calls (which unfortunately gcc as of version 3.0 doesn't combine).
117    A multiply and subtract should be faster than a "%" in those cases.  */
118 #if HAVE_HOST_CPU_FAMILY_x86            \
119   || HAVE_HOST_CPU_m68020               \
120   || HAVE_HOST_CPU_m68030               \
121   || HAVE_HOST_CPU_m68040               \
122   || HAVE_HOST_CPU_m68060               \
123   || HAVE_HOST_CPU_m68360 /* CPU32 */
124 #define udiv_qrnd_unnorm(q,r,n,d)       \
125   do {                                  \
126     mp_limb_t  __q = (n) / (d);         \
127     mp_limb_t  __r = (n) % (d);         \
128     (q) = __q;                          \
129     (r) = __r;                          \
130   } while (0)
131 #else
132 #define udiv_qrnd_unnorm(q,r,n,d)       \
133   do {                                  \
134     mp_limb_t  __q = (n) / (d);         \
135     mp_limb_t  __r = (n) - __q*(d);     \
136     (q) = __q;                          \
137     (r) = __r;                          \
138   } while (0)
139 #endif
140 
141 
142 /* Convert {up,un} to a string in base base, and put the result in str.
143    Generate len characters, possibly padding with zeros to the left.  If len is
144    zero, generate as many characters as required.  Return a pointer immediately
145    after the last digit of the result string.  Complexity is O(un^2); intended
146    for small conversions.  */
147 static unsigned char *
mpn_bc_get_str(unsigned char * str,size_t len,mp_ptr up,mp_size_t un,int base)148 mpn_bc_get_str (unsigned char *str, size_t len,
149 		mp_ptr up, mp_size_t un, int base)
150 {
151   mp_limb_t rl, ul;
152   unsigned char *s;
153   size_t l;
154   /* Allocate memory for largest possible string, given that we only get here
155      for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest
156      base is 3.  7/11 is an approximation to 1/log2(3).  */
157 #if TUNE_PROGRAM_BUILD
158 #define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * GMP_LIMB_BITS * 7 / 11)
159 #else
160 #define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * GMP_LIMB_BITS * 7 / 11)
161 #endif
162   unsigned char buf[BUF_ALLOC];
163 #if TUNE_PROGRAM_BUILD
164   mp_limb_t rp[GET_STR_THRESHOLD_LIMIT];
165 #else
166   mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD];
167 #endif
168 
169   if (base == 10)
170     {
171       /* Special case code for base==10 so that the compiler has a chance to
172 	 optimize things.  */
173 
174       MPN_COPY (rp + 1, up, un);
175 
176       s = buf + BUF_ALLOC;
177       while (un > 1)
178 	{
179 	  int i;
180 	  mp_limb_t frac, digit;
181 	  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
182 					 MP_BASES_BIG_BASE_10,
183 					 MP_BASES_BIG_BASE_INVERTED_10,
184 					 MP_BASES_NORMALIZATION_STEPS_10);
185 	  un -= rp[un] == 0;
186 	  frac = (rp[0] + 1) << GMP_NAIL_BITS;
187 	  s -= MP_BASES_CHARS_PER_LIMB_10;
188 #if HAVE_HOST_CPU_FAMILY_x86
189 	  /* The code below turns out to be a bit slower for x86 using gcc.
190 	     Use plain code.  */
191 	  i = MP_BASES_CHARS_PER_LIMB_10;
192 	  do
193 	    {
194 	      umul_ppmm (digit, frac, frac, 10);
195 	      *s++ = digit;
196 	    }
197 	  while (--i);
198 #else
199 	  /* Use the fact that 10 in binary is 1010, with the lowest bit 0.
200 	     After a few umul_ppmm, we will have accumulated enough low zeros
201 	     to use a plain multiply.  */
202 	  if (MP_BASES_NORMALIZATION_STEPS_10 == 0)
203 	    {
204 	      umul_ppmm (digit, frac, frac, 10);
205 	      *s++ = digit;
206 	    }
207 	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 1)
208 	    {
209 	      umul_ppmm (digit, frac, frac, 10);
210 	      *s++ = digit;
211 	    }
212 	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 2)
213 	    {
214 	      umul_ppmm (digit, frac, frac, 10);
215 	      *s++ = digit;
216 	    }
217 	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 3)
218 	    {
219 	      umul_ppmm (digit, frac, frac, 10);
220 	      *s++ = digit;
221 	    }
222 	  i = (MP_BASES_CHARS_PER_LIMB_10 - ((MP_BASES_NORMALIZATION_STEPS_10 < 4)
223 					     ? (4-MP_BASES_NORMALIZATION_STEPS_10)
224 					     : 0));
225 	  frac = (frac + 0xf) >> 4;
226 	  do
227 	    {
228 	      frac *= 10;
229 	      digit = frac >> (GMP_LIMB_BITS - 4);
230 	      *s++ = digit;
231 	      frac &= (~(mp_limb_t) 0) >> 4;
232 	    }
233 	  while (--i);
234 #endif
235 	  s -= MP_BASES_CHARS_PER_LIMB_10;
236 	}
237 
238       ul = rp[1];
239       while (ul != 0)
240 	{
241 	  udiv_qrnd_unnorm (ul, rl, ul, 10);
242 	  *--s = rl;
243 	}
244     }
245   else /* not base 10 */
246     {
247       unsigned chars_per_limb;
248       mp_limb_t big_base, big_base_inverted;
249       unsigned normalization_steps;
250 
251       chars_per_limb = mp_bases[base].chars_per_limb;
252       big_base = mp_bases[base].big_base;
253       big_base_inverted = mp_bases[base].big_base_inverted;
254       count_leading_zeros (normalization_steps, big_base);
255 
256       MPN_COPY (rp + 1, up, un);
257 
258       s = buf + BUF_ALLOC;
259       while (un > 1)
260 	{
261 	  int i;
262 	  mp_limb_t frac;
263 	  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
264 					 big_base, big_base_inverted,
265 					 normalization_steps);
266 	  un -= rp[un] == 0;
267 	  frac = (rp[0] + 1) << GMP_NAIL_BITS;
268 	  s -= chars_per_limb;
269 	  i = chars_per_limb;
270 	  do
271 	    {
272 	      mp_limb_t digit;
273 	      umul_ppmm (digit, frac, frac, base);
274 	      *s++ = digit;
275 	    }
276 	  while (--i);
277 	  s -= chars_per_limb;
278 	}
279 
280       ul = rp[1];
281       while (ul != 0)
282 	{
283 	  udiv_qrnd_unnorm (ul, rl, ul, base);
284 	  *--s = rl;
285 	}
286     }
287 
288   l = buf + BUF_ALLOC - s;
289   while (l < len)
290     {
291       *str++ = 0;
292       len--;
293     }
294   while (l != 0)
295     {
296       *str++ = *s++;
297       l--;
298     }
299   return str;
300 }
301 
302 
303 /* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
304    the string in STR.  Generate LEN characters, possibly padding with zeros to
305    the left.  If LEN is zero, generate as many characters as required.
306    Return a pointer immediately after the last digit of the result string.
307    This uses divide-and-conquer and is intended for large conversions.  */
308 static unsigned char *
mpn_dc_get_str(unsigned char * str,size_t len,mp_ptr up,mp_size_t un,const powers_t * powtab,mp_ptr tmp)309 mpn_dc_get_str (unsigned char *str, size_t len,
310 		mp_ptr up, mp_size_t un,
311 		const powers_t *powtab, mp_ptr tmp)
312 {
313   if (BELOW_THRESHOLD (un, GET_STR_DC_THRESHOLD))
314     {
315       if (un != 0)
316 	str = mpn_bc_get_str (str, len, up, un, powtab->base);
317       else
318 	{
319 	  while (len != 0)
320 	    {
321 	      *str++ = 0;
322 	      len--;
323 	    }
324 	}
325     }
326   else
327     {
328       mp_ptr pwp, qp, rp;
329       mp_size_t pwn, qn;
330       mp_size_t sn;
331 
332       pwp = powtab->p;
333       pwn = powtab->n;
334       sn = powtab->shift;
335 
336       if (un < pwn + sn || (un == pwn + sn && mpn_cmp (up + sn, pwp, un - sn) < 0))
337 	{
338 	  str = mpn_dc_get_str (str, len, up, un, powtab - 1, tmp);
339 	}
340       else
341 	{
342 	  qp = tmp;		/* (un - pwn + 1) limbs for qp */
343 	  rp = up;		/* pwn limbs for rp; overwrite up area */
344 
345 	  mpn_tdiv_qr (qp, rp + sn, 0L, up + sn, un - sn, pwp, pwn);
346 	  qn = un - sn - pwn; qn += qp[qn] != 0;		/* quotient size */
347 
348 	  ASSERT (qn < pwn + sn || (qn == pwn + sn && mpn_cmp (qp + sn, pwp, pwn) < 0));
349 
350 	  if (len != 0)
351 	    len = len - powtab->digits_in_base;
352 
353 	  str = mpn_dc_get_str (str, len, qp, qn, powtab - 1, tmp + qn);
354 	  str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn + sn, powtab - 1, tmp);
355 	}
356     }
357   return str;
358 }
359 
360 /* There are no leading zeros on the digits generated at str, but that's not
361    currently a documented feature.  The current mpz_out_str and mpz_get_str
362    rely on it.  */
363 
364 size_t
mpn_get_str(unsigned char * str,int base,mp_ptr up,mp_size_t un)365 mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
366 {
367   mp_ptr powtab_mem;
368   powers_t powtab[GMP_LIMB_BITS];
369   int pi;
370   size_t out_len;
371   mp_ptr tmp;
372   size_t ndig;
373   mp_size_t xn;
374   TMP_DECL;
375 
376   /* Special case zero, as the code below doesn't handle it.  */
377   if (un == 0)
378     {
379       str[0] = 0;
380       return 1;
381     }
382 
383   if (POW2_P (base))
384     {
385       /* The base is a power of 2.  Convert from most significant end.  */
386       mp_limb_t n1, n0;
387       int bits_per_digit = mp_bases[base].big_base;
388       int cnt;
389       int bit_pos;
390       mp_size_t i;
391       unsigned char *s = str;
392       mp_bitcnt_t bits;
393 
394       n1 = up[un - 1];
395       count_leading_zeros (cnt, n1);
396 
397       /* BIT_POS should be R when input ends in least significant nibble,
398 	 R + bits_per_digit * n when input ends in nth least significant
399 	 nibble. */
400 
401       bits = (mp_bitcnt_t) GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
402       cnt = bits % bits_per_digit;
403       if (cnt != 0)
404 	bits += bits_per_digit - cnt;
405       bit_pos = bits - (mp_bitcnt_t) (un - 1) * GMP_NUMB_BITS;
406 
407       /* Fast loop for bit output.  */
408       i = un - 1;
409       for (;;)
410 	{
411 	  bit_pos -= bits_per_digit;
412 	  while (bit_pos >= 0)
413 	    {
414 	      *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);
415 	      bit_pos -= bits_per_digit;
416 	    }
417 	  i--;
418 	  if (i < 0)
419 	    break;
420 	  n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);
421 	  n1 = up[i];
422 	  bit_pos += GMP_NUMB_BITS;
423 	  *s++ = n0 | (n1 >> bit_pos);
424 	}
425 
426       return s - str;
427     }
428 
429   /* General case.  The base is not a power of 2.  */
430 
431   if (BELOW_THRESHOLD (un, GET_STR_PRECOMPUTE_THRESHOLD))
432     return mpn_bc_get_str (str, (size_t) 0, up, un, base) - str;
433 
434   TMP_MARK;
435 
436   /* Allocate one large block for the powers of big_base.  */
437   powtab_mem = TMP_BALLOC_LIMBS (mpn_str_powtab_alloc (un));
438 
439   /* Compute a table of powers, were the largest power is >= sqrt(U).  */
440   DIGITS_IN_BASE_PER_LIMB (ndig, un, base);
441   xn = 1 + ndig / mp_bases[base].chars_per_limb; /* FIXME: scalar integer division */
442 
443   pi = 1 + mpn_compute_powtab (powtab, powtab_mem, xn, base);
444 
445   /* Using our precomputed powers, now in powtab[], convert our number.  */
446   tmp = TMP_BALLOC_LIMBS (mpn_dc_get_str_itch (un));
447   out_len = mpn_dc_get_str (str, 0, up, un, powtab + (pi - 1), tmp) - str;
448   TMP_FREE;
449 
450   return out_len;
451 }
452