xref: /netbsd-src/external/lgpl3/gmp/dist/mpf/set_str.c (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1 /* mpf_set_str (dest, string, base) -- Convert the string STRING
2    in base BASE to a float in dest.  If BASE is zero, the leading characters
3    of STRING is used to figure out the base.
4 
5 Copyright 1993-1997, 2000-2003, 2005, 2007, 2008, 2011, 2013, 2019 Free
6 Software Foundation, Inc.
7 
8 This file is part of the GNU MP Library.
9 
10 The GNU MP Library is free software; you can redistribute it and/or modify
11 it under the terms of either:
12 
13   * the GNU Lesser General Public License as published by the Free
14     Software Foundation; either version 3 of the License, or (at your
15     option) any later version.
16 
17 or
18 
19   * the GNU General Public License as published by the Free Software
20     Foundation; either version 2 of the License, or (at your option) any
21     later version.
22 
23 or both in parallel, as here.
24 
25 The GNU MP Library is distributed in the hope that it will be useful, but
26 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
28 for more details.
29 
30 You should have received copies of the GNU General Public License and the
31 GNU Lesser General Public License along with the GNU MP Library.  If not,
32 see https://www.gnu.org/licenses/.  */
33 
34 /*
35   This still needs work, as suggested by some FIXME comments.
36   1. Don't depend on superfluous mantissa digits.
37   2. Allocate temp space more cleverly.
38   3. Use mpn_div_q instead of mpn_lshift+mpn_divrem.
39 */
40 
41 #define _GNU_SOURCE    /* for DECIMAL_POINT in langinfo.h */
42 
43 #include "config.h"
44 
45 #include <stdlib.h>
46 #include <string.h>
47 #include <ctype.h>
48 
49 #if HAVE_LANGINFO_H
50 #include <langinfo.h>  /* for nl_langinfo */
51 #endif
52 
53 #if HAVE_LOCALE_H
54 #include <locale.h>    /* for localeconv */
55 #endif
56 
57 #include "gmp-impl.h"
58 #include "longlong.h"
59 
60 
61 #define digit_value_tab __gmp_digit_value_tab
62 
63 /* Compute base^exp and return the most significant prec limbs in rp[].
64    Put the count of omitted low limbs in *ign.
65    Return the actual size (which might be less than prec).  */
66 static mp_size_t
mpn_pow_1_highpart(mp_ptr rp,mp_size_t * ignp,mp_limb_t base,mp_exp_t exp,mp_size_t prec,mp_ptr tp)67 mpn_pow_1_highpart (mp_ptr rp, mp_size_t *ignp,
68 		    mp_limb_t base, mp_exp_t exp,
69 		    mp_size_t prec, mp_ptr tp)
70 {
71   mp_size_t ign;		/* counts number of ignored low limbs in r */
72   mp_size_t off;		/* keeps track of offset where value starts */
73   mp_ptr passed_rp = rp;
74   mp_size_t rn;
75   int cnt;
76   int i;
77 
78   rp[0] = base;
79   rn = 1;
80   off = 0;
81   ign = 0;
82   count_leading_zeros (cnt, exp);
83   for (i = GMP_LIMB_BITS - cnt - 2; i >= 0; i--)
84     {
85       mpn_sqr (tp, rp + off, rn);
86       rn = 2 * rn;
87       rn -= tp[rn - 1] == 0;
88       ign <<= 1;
89 
90       off = 0;
91       if (rn > prec)
92 	{
93 	  ign += rn - prec;
94 	  off = rn - prec;
95 	  rn = prec;
96 	}
97       MP_PTR_SWAP (rp, tp);
98 
99       if (((exp >> i) & 1) != 0)
100 	{
101 	  mp_limb_t cy;
102 	  cy = mpn_mul_1 (rp, rp + off, rn, base);
103 	  rp[rn] = cy;
104 	  rn += cy != 0;
105 	  off = 0;
106 	}
107     }
108 
109   if (rn > prec)
110     {
111       ign += rn - prec;
112       rp += rn - prec;
113       rn = prec;
114     }
115 
116   MPN_COPY_INCR (passed_rp, rp + off, rn);
117   *ignp = ign;
118   return rn;
119 }
120 
121 int
mpf_set_str(mpf_ptr x,const char * str,int base)122 mpf_set_str (mpf_ptr x, const char *str, int base)
123 {
124   size_t str_size;
125   char *s, *begs;
126   size_t i, j;
127   int c;
128   int negative;
129   char *dotpos;
130   const char *expptr;
131   int exp_base;
132   const char  *point = GMP_DECIMAL_POINT;
133   size_t      pointlen = strlen (point);
134   const unsigned char *digit_value;
135   int incr;
136   size_t n_zeros_skipped;
137 
138   TMP_DECL;
139 
140   c = (unsigned char) *str;
141 
142   /* Skip whitespace.  */
143   while (isspace (c))
144     c = (unsigned char) *++str;
145 
146   negative = 0;
147   if (c == '-')
148     {
149       negative = 1;
150       c = (unsigned char) *++str;
151     }
152 
153   /* Default base to decimal.  */
154   if (base == 0)
155     base = 10;
156 
157   exp_base = base;
158 
159   if (base < 0)
160     {
161       exp_base = 10;
162       base = -base;
163     }
164 
165   digit_value = digit_value_tab;
166   if (base > 36)
167     {
168       /* For bases > 36, use the collating sequence
169 	 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.  */
170       digit_value += 208;
171       if (base > 62)
172 	return -1;		/* too large base */
173     }
174 
175   /* Require at least one digit, possibly after an initial decimal point.  */
176   if (digit_value[c] >= base)
177     {
178       /* not a digit, must be a decimal point */
179       for (i = 0; i < pointlen; i++)
180 	if (str[i] != point[i])
181 	  return -1;
182       if (digit_value[(unsigned char) str[pointlen]] >= base)
183 	return -1;
184     }
185 
186   /* Locate exponent part of the input.  Look from the right of the string,
187      since the exponent is usually a lot shorter than the mantissa.  */
188   expptr = NULL;
189   str_size = strlen (str);
190   for (i = str_size - 1; i > 0; i--)
191     {
192       c = (unsigned char) str[i];
193       if (c == '@' || (base <= 10 && (c == 'e' || c == 'E')))
194 	{
195 	  expptr = str + i + 1;
196 	  str_size = i;
197 	  break;
198 	}
199     }
200 
201   TMP_MARK;
202   s = begs = (char *) TMP_ALLOC (str_size + 1);
203 
204   incr = 0;
205   n_zeros_skipped = 0;
206   dotpos = NULL;
207 
208   /* Loop through mantissa, converting it from ASCII to raw byte values.  */
209   for (i = 0; i < str_size; i++)
210     {
211       c = (unsigned char) *str;
212       if (!isspace (c))
213 	{
214 	  int dig;
215 
216 	  for (j = 0; j < pointlen; j++)
217 	    if (str[j] != point[j])
218 	      goto not_point;
219 	  if (1)
220 	    {
221 	      if (dotpos != 0)
222 		{
223 		  /* already saw a decimal point, another is invalid */
224 		  TMP_FREE;
225 		  return -1;
226 		}
227 	      dotpos = s;
228 	      str += pointlen - 1;
229 	      i += pointlen - 1;
230 	    }
231 	  else
232 	    {
233 	    not_point:
234 	      dig = digit_value[c];
235 	      if (dig >= base)
236 		{
237 		  TMP_FREE;
238 		  return -1;
239 		}
240 	      *s = dig;
241 	      incr |= dig != 0;
242 	      s += incr;	/* Increment after first non-0 digit seen. */
243 	      if (dotpos != NULL)
244 		/* Count skipped zeros between radix point and first non-0
245 		   digit. */
246 		n_zeros_skipped += 1 - incr;
247 	    }
248 	}
249       c = (unsigned char) *++str;
250     }
251 
252   str_size = s - begs;
253 
254   {
255     long exp_in_base;
256     mp_size_t ra, ma, rn, mn;
257     int cnt;
258     mp_ptr mp, tp, rp;
259     mp_exp_t exp_in_limbs;
260     mp_size_t prec = PREC(x) + 1;
261     int divflag;
262     mp_size_t madj, radj;
263 
264 #if 0
265     size_t n_chars_needed;
266 
267     /* This needs careful testing.  Leave disabled for now.  */
268     /* Just consider the relevant leading digits of the mantissa.  */
269     LIMBS_PER_DIGIT_IN_BASE (n_chars_needed, prec, base);
270     if (str_size > n_chars_needed)
271       str_size = n_chars_needed;
272 #endif
273 
274     if (str_size == 0)
275       {
276 	SIZ(x) = 0;
277 	EXP(x) = 0;
278 	TMP_FREE;
279 	return 0;
280       }
281 
282     LIMBS_PER_DIGIT_IN_BASE (ma, str_size, base);
283     mp = TMP_ALLOC_LIMBS (ma);
284     mn = mpn_set_str (mp, (unsigned char *) begs, str_size, base);
285 
286     madj = 0;
287     /* Ignore excess limbs in MP,MSIZE.  */
288     if (mn > prec)
289       {
290 	madj = mn - prec;
291 	mp += mn - prec;
292 	mn = prec;
293       }
294 
295     if (expptr != 0)
296       {
297 	/* Scan and convert the exponent, in base exp_base.  */
298 	long dig, minus, plusminus;
299 	c = (unsigned char) *expptr;
300 	minus = -(long) (c == '-');
301 	plusminus = minus | -(long) (c == '+');
302 	expptr -= plusminus;			/* conditional increment */
303 	c = (unsigned char) *expptr++;
304 	dig = digit_value[c];
305 	if (dig >= exp_base)
306 	  {
307 	    TMP_FREE;
308 	    return -1;
309 	  }
310 	exp_in_base = dig;
311 	c = (unsigned char) *expptr++;
312 	dig = digit_value[c];
313 	while (dig < exp_base)
314 	  {
315 	    exp_in_base = exp_in_base * exp_base;
316 	    exp_in_base += dig;
317 	    c = (unsigned char) *expptr++;
318 	    dig = digit_value[c];
319 	  }
320 	exp_in_base = (exp_in_base ^ minus) - minus; /* conditional negation */
321       }
322     else
323       exp_in_base = 0;
324     if (dotpos != 0)
325       exp_in_base -= s - dotpos + n_zeros_skipped;
326     divflag = exp_in_base < 0;
327     exp_in_base = ABS (exp_in_base);
328 
329     if (exp_in_base == 0)
330       {
331 	MPN_COPY (PTR(x), mp, mn);
332 	SIZ(x) = negative ? -mn : mn;
333 	EXP(x) = mn + madj;
334 	TMP_FREE;
335 	return 0;
336       }
337 
338     ra = 2 * (prec + 1);
339     TMP_ALLOC_LIMBS_2 (rp, ra, tp, ra);
340     rn = mpn_pow_1_highpart (rp, &radj, (mp_limb_t) base, exp_in_base, prec, tp);
341 
342     if (divflag)
343       {
344 #if 0
345 	/* FIXME: Should use mpn_div_q here.  */
346 	...
347 	mpn_div_q (tp, mp, mn, rp, rn, scratch);
348 	...
349 #else
350 	mp_ptr qp;
351 	mp_limb_t qlimb;
352 	if (mn < rn)
353 	  {
354 	    /* Pad out MP,MSIZE for current divrem semantics.  */
355 	    mp_ptr tmp = TMP_ALLOC_LIMBS (rn + 1);
356 	    MPN_ZERO (tmp, rn - mn);
357 	    MPN_COPY (tmp + rn - mn, mp, mn);
358 	    mp = tmp;
359 	    madj -= rn - mn;
360 	    mn = rn;
361 	  }
362 	if ((rp[rn - 1] & GMP_NUMB_HIGHBIT) == 0)
363 	  {
364 	    mp_limb_t cy;
365 	    count_leading_zeros (cnt, rp[rn - 1]);
366 	    cnt -= GMP_NAIL_BITS;
367 	    mpn_lshift (rp, rp, rn, cnt);
368 	    cy = mpn_lshift (mp, mp, mn, cnt);
369 	    if (cy)
370 	      mp[mn++] = cy;
371 	  }
372 
373 	qp = TMP_ALLOC_LIMBS (prec + 1);
374 	qlimb = mpn_divrem (qp, prec - (mn - rn), mp, mn, rp, rn);
375 	tp = qp;
376 	exp_in_limbs = qlimb + (mn - rn) + (madj - radj);
377 	rn = prec;
378 	if (qlimb != 0)
379 	  {
380 	    tp[prec] = qlimb;
381 	    /* Skip the least significant limb not to overrun the destination
382 	       variable.  */
383 	    tp++;
384 	  }
385 #endif
386       }
387     else
388       {
389 	tp = TMP_ALLOC_LIMBS (rn + mn);
390 	if (rn > mn)
391 	  mpn_mul (tp, rp, rn, mp, mn);
392 	else
393 	  mpn_mul (tp, mp, mn, rp, rn);
394 	rn += mn;
395 	rn -= tp[rn - 1] == 0;
396 	exp_in_limbs = rn + madj + radj;
397 
398 	if (rn > prec)
399 	  {
400 	    tp += rn - prec;
401 	    rn = prec;
402 	    exp_in_limbs += 0;
403 	  }
404       }
405 
406     MPN_COPY (PTR(x), tp, rn);
407     SIZ(x) = negative ? -rn : rn;
408     EXP(x) = exp_in_limbs;
409     TMP_FREE;
410     return 0;
411   }
412 }
413