xref: /openbsd-src/gnu/usr.bin/perl/numeric.c (revision 799f675f6700f14e59124f9825c723e9f2ce19dc)
1 /*    numeric.c
2  *
3  *    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4  *    2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  * "That only makes eleven (plus one mislaid) and not fourteen, unless
13  * wizards count differently to other people."
14  */
15 
16 /*
17 =head1 Numeric functions
18 
19 This file contains all the stuff needed by perl for manipulating numeric
20 values, including such things as replacements for the OS's atof() function
21 
22 =cut
23 
24 */
25 
26 #include "EXTERN.h"
27 #define PERL_IN_NUMERIC_C
28 #include "perl.h"
29 
30 U32
31 Perl_cast_ulong(pTHX_ NV f)
32 {
33   if (f < 0.0)
34     return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
35   if (f < U32_MAX_P1) {
36 #if CASTFLAGS & 2
37     if (f < U32_MAX_P1_HALF)
38       return (U32) f;
39     f -= U32_MAX_P1_HALF;
40     return ((U32) f) | (1 + U32_MAX >> 1);
41 #else
42     return (U32) f;
43 #endif
44   }
45   return f > 0 ? U32_MAX : 0 /* NaN */;
46 }
47 
48 I32
49 Perl_cast_i32(pTHX_ NV f)
50 {
51   if (f < I32_MAX_P1)
52     return f < I32_MIN ? I32_MIN : (I32) f;
53   if (f < U32_MAX_P1) {
54 #if CASTFLAGS & 2
55     if (f < U32_MAX_P1_HALF)
56       return (I32)(U32) f;
57     f -= U32_MAX_P1_HALF;
58     return (I32)(((U32) f) | (1 + U32_MAX >> 1));
59 #else
60     return (I32)(U32) f;
61 #endif
62   }
63   return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
64 }
65 
66 IV
67 Perl_cast_iv(pTHX_ NV f)
68 {
69   if (f < IV_MAX_P1)
70     return f < IV_MIN ? IV_MIN : (IV) f;
71   if (f < UV_MAX_P1) {
72 #if CASTFLAGS & 2
73     /* For future flexibility allowing for sizeof(UV) >= sizeof(IV)  */
74     if (f < UV_MAX_P1_HALF)
75       return (IV)(UV) f;
76     f -= UV_MAX_P1_HALF;
77     return (IV)(((UV) f) | (1 + UV_MAX >> 1));
78 #else
79     return (IV)(UV) f;
80 #endif
81   }
82   return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
83 }
84 
85 UV
86 Perl_cast_uv(pTHX_ NV f)
87 {
88   if (f < 0.0)
89     return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
90   if (f < UV_MAX_P1) {
91 #if CASTFLAGS & 2
92     if (f < UV_MAX_P1_HALF)
93       return (UV) f;
94     f -= UV_MAX_P1_HALF;
95     return ((UV) f) | (1 + UV_MAX >> 1);
96 #else
97     return (UV) f;
98 #endif
99   }
100   return f > 0 ? UV_MAX : 0 /* NaN */;
101 }
102 
103 #if defined(HUGE_VAL) || (defined(USE_LONG_DOUBLE) && defined(HUGE_VALL))
104 /*
105  * This hack is to force load of "huge" support from libm.a
106  * So it is in perl for (say) POSIX to use.
107  * Needed for SunOS with Sun's 'acc' for example.
108  */
109 NV
110 Perl_huge(void)
111 {
112 #   if defined(USE_LONG_DOUBLE) && defined(HUGE_VALL)
113     return HUGE_VALL;
114 #   endif
115     return HUGE_VAL;
116 }
117 #endif
118 
119 /*
120 =for apidoc grok_bin
121 
122 converts a string representing a binary number to numeric form.
123 
124 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
125 conversion flags, and I<result> should be NULL or a pointer to an NV.
126 The scan stops at the end of the string, or the first invalid character.
127 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
128 invalid character will also trigger a warning.
129 On return I<*len> is set to the length of the scanned string,
130 and I<*flags> gives output flags.
131 
132 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
133 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_bin>
134 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
135 and writes the value to I<*result> (or the value is discarded if I<result>
136 is NULL).
137 
138 The binary number may optionally be prefixed with "0b" or "b" unless
139 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
140 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the binary
141 number may use '_' characters to separate digits.
142 
143 =cut
144  */
145 
146 UV
147 Perl_grok_bin(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) {
148     const char *s = start;
149     STRLEN len = *len_p;
150     UV value = 0;
151     NV value_nv = 0;
152 
153     const UV max_div_2 = UV_MAX / 2;
154     const bool allow_underscores = (bool)(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
155     bool overflowed = FALSE;
156     char bit;
157 
158     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
159         /* strip off leading b or 0b.
160            for compatibility silently suffer "b" and "0b" as valid binary
161            numbers. */
162         if (len >= 1) {
163             if (s[0] == 'b') {
164                 s++;
165                 len--;
166             }
167             else if (len >= 2 && s[0] == '0' && s[1] == 'b') {
168                 s+=2;
169                 len-=2;
170             }
171         }
172     }
173 
174     for (; len-- && (bit = *s); s++) {
175         if (bit == '0' || bit == '1') {
176             /* Write it in this wonky order with a goto to attempt to get the
177                compiler to make the common case integer-only loop pretty tight.
178                With gcc seems to be much straighter code than old scan_bin.  */
179           redo:
180             if (!overflowed) {
181                 if (value <= max_div_2) {
182                     value = (value << 1) | (bit - '0');
183                     continue;
184                 }
185                 /* Bah. We're just overflowed.  */
186                 if (ckWARN_d(WARN_OVERFLOW))
187                     Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
188                                 "Integer overflow in binary number");
189                 overflowed = TRUE;
190                 value_nv = (NV) value;
191             }
192             value_nv *= 2.0;
193 	    /* If an NV has not enough bits in its mantissa to
194 	     * represent a UV this summing of small low-order numbers
195 	     * is a waste of time (because the NV cannot preserve
196 	     * the low-order bits anyway): we could just remember when
197 	     * did we overflow and in the end just multiply value_nv by the
198 	     * right amount. */
199             value_nv += (NV)(bit - '0');
200             continue;
201         }
202         if (bit == '_' && len && allow_underscores && (bit = s[1])
203             && (bit == '0' || bit == '1'))
204 	    {
205 		--len;
206 		++s;
207                 goto redo;
208 	    }
209         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT) && ckWARN(WARN_DIGIT))
210             Perl_warner(aTHX_ packWARN(WARN_DIGIT),
211                         "Illegal binary digit '%c' ignored", *s);
212         break;
213     }
214 
215     if (   ( overflowed && value_nv > 4294967295.0)
216 #if UVSIZE > 4
217 	|| (!overflowed && value > 0xffffffff  )
218 #endif
219 	) {
220 	if (ckWARN(WARN_PORTABLE))
221 	    Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
222 			"Binary number > 0b11111111111111111111111111111111 non-portable");
223     }
224     *len_p = s - start;
225     if (!overflowed) {
226         *flags = 0;
227         return value;
228     }
229     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
230     if (result)
231         *result = value_nv;
232     return UV_MAX;
233 }
234 
235 /*
236 =for apidoc grok_hex
237 
238 converts a string representing a hex number to numeric form.
239 
240 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
241 conversion flags, and I<result> should be NULL or a pointer to an NV.
242 The scan stops at the end of the string, or the first invalid character.
243 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
244 invalid character will also trigger a warning.
245 On return I<*len> is set to the length of the scanned string,
246 and I<*flags> gives output flags.
247 
248 If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
249 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_hex>
250 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
251 and writes the value to I<*result> (or the value is discarded if I<result>
252 is NULL).
253 
254 The hex number may optionally be prefixed with "0x" or "x" unless
255 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
256 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the hex
257 number may use '_' characters to separate digits.
258 
259 =cut
260  */
261 
262 UV
263 Perl_grok_hex(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) {
264     const char *s = start;
265     STRLEN len = *len_p;
266     UV value = 0;
267     NV value_nv = 0;
268 
269     const UV max_div_16 = UV_MAX / 16;
270     const bool allow_underscores = (bool)(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
271     bool overflowed = FALSE;
272 
273     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
274         /* strip off leading x or 0x.
275            for compatibility silently suffer "x" and "0x" as valid hex numbers.
276         */
277         if (len >= 1) {
278             if (s[0] == 'x') {
279                 s++;
280                 len--;
281             }
282             else if (len >= 2 && s[0] == '0' && s[1] == 'x') {
283                 s+=2;
284                 len-=2;
285             }
286         }
287     }
288 
289     for (; len-- && *s; s++) {
290 	const char *hexdigit = strchr(PL_hexdigit, *s);
291         if (hexdigit) {
292             /* Write it in this wonky order with a goto to attempt to get the
293                compiler to make the common case integer-only loop pretty tight.
294                With gcc seems to be much straighter code than old scan_hex.  */
295           redo:
296             if (!overflowed) {
297                 if (value <= max_div_16) {
298                     value = (value << 4) | ((hexdigit - PL_hexdigit) & 15);
299                     continue;
300                 }
301                 /* Bah. We're just overflowed.  */
302                 if (ckWARN_d(WARN_OVERFLOW))
303                     Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
304                                 "Integer overflow in hexadecimal number");
305                 overflowed = TRUE;
306                 value_nv = (NV) value;
307             }
308             value_nv *= 16.0;
309 	    /* If an NV has not enough bits in its mantissa to
310 	     * represent a UV this summing of small low-order numbers
311 	     * is a waste of time (because the NV cannot preserve
312 	     * the low-order bits anyway): we could just remember when
313 	     * did we overflow and in the end just multiply value_nv by the
314 	     * right amount of 16-tuples. */
315             value_nv += (NV)((hexdigit - PL_hexdigit) & 15);
316             continue;
317         }
318         if (*s == '_' && len && allow_underscores && s[1]
319 		&& (hexdigit = strchr(PL_hexdigit, s[1])))
320 	    {
321 		--len;
322 		++s;
323                 goto redo;
324 	    }
325         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT) && ckWARN(WARN_DIGIT))
326             Perl_warner(aTHX_ packWARN(WARN_DIGIT),
327                         "Illegal hexadecimal digit '%c' ignored", *s);
328         break;
329     }
330 
331     if (   ( overflowed && value_nv > 4294967295.0)
332 #if UVSIZE > 4
333 	|| (!overflowed && value > 0xffffffff  )
334 #endif
335 	) {
336 	if (ckWARN(WARN_PORTABLE))
337 	    Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
338 			"Hexadecimal number > 0xffffffff non-portable");
339     }
340     *len_p = s - start;
341     if (!overflowed) {
342         *flags = 0;
343         return value;
344     }
345     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
346     if (result)
347         *result = value_nv;
348     return UV_MAX;
349 }
350 
351 /*
352 =for apidoc grok_oct
353 
354 converts a string representing an octal number to numeric form.
355 
356 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
357 conversion flags, and I<result> should be NULL or a pointer to an NV.
358 The scan stops at the end of the string, or the first invalid character.
359 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
360 invalid character will also trigger a warning.
361 On return I<*len> is set to the length of the scanned string,
362 and I<*flags> gives output flags.
363 
364 If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
365 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_oct>
366 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
367 and writes the value to I<*result> (or the value is discarded if I<result>
368 is NULL).
369 
370 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the octal
371 number may use '_' characters to separate digits.
372 
373 =cut
374  */
375 
376 UV
377 Perl_grok_oct(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) {
378     const char *s = start;
379     STRLEN len = *len_p;
380     UV value = 0;
381     NV value_nv = 0;
382 
383     const UV max_div_8 = UV_MAX / 8;
384     const bool allow_underscores = (bool)(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
385     bool overflowed = FALSE;
386 
387     for (; len-- && *s; s++) {
388          /* gcc 2.95 optimiser not smart enough to figure that this subtraction
389             out front allows slicker code.  */
390         int digit = *s - '0';
391         if (digit >= 0 && digit <= 7) {
392             /* Write it in this wonky order with a goto to attempt to get the
393                compiler to make the common case integer-only loop pretty tight.
394             */
395           redo:
396             if (!overflowed) {
397                 if (value <= max_div_8) {
398                     value = (value << 3) | digit;
399                     continue;
400                 }
401                 /* Bah. We're just overflowed.  */
402                 if (ckWARN_d(WARN_OVERFLOW))
403                     Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
404                                 "Integer overflow in octal number");
405                 overflowed = TRUE;
406                 value_nv = (NV) value;
407             }
408             value_nv *= 8.0;
409 	    /* If an NV has not enough bits in its mantissa to
410 	     * represent a UV this summing of small low-order numbers
411 	     * is a waste of time (because the NV cannot preserve
412 	     * the low-order bits anyway): we could just remember when
413 	     * did we overflow and in the end just multiply value_nv by the
414 	     * right amount of 8-tuples. */
415             value_nv += (NV)digit;
416             continue;
417         }
418         if (digit == ('_' - '0') && len && allow_underscores
419             && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
420 	    {
421 		--len;
422 		++s;
423                 goto redo;
424 	    }
425         /* Allow \octal to work the DWIM way (that is, stop scanning
426          * as soon as non-octal characters are seen, complain only if
427          * someone seems to want to use the digits eight and nine). */
428         if (digit == 8 || digit == 9) {
429             if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT) && ckWARN(WARN_DIGIT))
430                 Perl_warner(aTHX_ packWARN(WARN_DIGIT),
431                             "Illegal octal digit '%c' ignored", *s);
432         }
433         break;
434     }
435 
436     if (   ( overflowed && value_nv > 4294967295.0)
437 #if UVSIZE > 4
438 	|| (!overflowed && value > 0xffffffff  )
439 #endif
440 	) {
441 	if (ckWARN(WARN_PORTABLE))
442 	    Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
443 			"Octal number > 037777777777 non-portable");
444     }
445     *len_p = s - start;
446     if (!overflowed) {
447         *flags = 0;
448         return value;
449     }
450     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
451     if (result)
452         *result = value_nv;
453     return UV_MAX;
454 }
455 
456 /*
457 =for apidoc scan_bin
458 
459 For backwards compatibility. Use C<grok_bin> instead.
460 
461 =for apidoc scan_hex
462 
463 For backwards compatibility. Use C<grok_hex> instead.
464 
465 =for apidoc scan_oct
466 
467 For backwards compatibility. Use C<grok_oct> instead.
468 
469 =cut
470  */
471 
472 NV
473 Perl_scan_bin(pTHX_ char *start, STRLEN len, STRLEN *retlen)
474 {
475     NV rnv;
476     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
477     const UV ruv = grok_bin (start, &len, &flags, &rnv);
478 
479     *retlen = len;
480     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
481 }
482 
483 NV
484 Perl_scan_oct(pTHX_ char *start, STRLEN len, STRLEN *retlen)
485 {
486     NV rnv;
487     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
488     const UV ruv = grok_oct (start, &len, &flags, &rnv);
489 
490     *retlen = len;
491     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
492 }
493 
494 NV
495 Perl_scan_hex(pTHX_ char *start, STRLEN len, STRLEN *retlen)
496 {
497     NV rnv;
498     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
499     const UV ruv = grok_hex (start, &len, &flags, &rnv);
500 
501     *retlen = len;
502     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
503 }
504 
505 /*
506 =for apidoc grok_numeric_radix
507 
508 Scan and skip for a numeric decimal separator (radix).
509 
510 =cut
511  */
512 bool
513 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
514 {
515 #ifdef USE_LOCALE_NUMERIC
516     if (PL_numeric_radix_sv && IN_LOCALE) {
517         STRLEN len;
518         const char* radix = SvPV(PL_numeric_radix_sv, len);
519         if (*sp + len <= send && memEQ(*sp, radix, len)) {
520             *sp += len;
521             return TRUE;
522         }
523     }
524     /* always try "." if numeric radix didn't match because
525      * we may have data from different locales mixed */
526 #endif
527     if (*sp < send && **sp == '.') {
528         ++*sp;
529         return TRUE;
530     }
531     return FALSE;
532 }
533 
534 /*
535 =for apidoc grok_number
536 
537 Recognise (or not) a number.  The type of the number is returned
538 (0 if unrecognised), otherwise it is a bit-ORed combination of
539 IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT,
540 IS_NUMBER_NEG, IS_NUMBER_INFINITY, IS_NUMBER_NAN (defined in perl.h).
541 
542 If the value of the number can fit an in UV, it is returned in the *valuep
543 IS_NUMBER_IN_UV will be set to indicate that *valuep is valid, IS_NUMBER_IN_UV
544 will never be set unless *valuep is valid, but *valuep may have been assigned
545 to during processing even though IS_NUMBER_IN_UV is not set on return.
546 If valuep is NULL, IS_NUMBER_IN_UV will be set for the same cases as when
547 valuep is non-NULL, but no actual assignment (or SEGV) will occur.
548 
549 IS_NUMBER_NOT_INT will be set with IS_NUMBER_IN_UV if trailing decimals were
550 seen (in which case *valuep gives the true value truncated to an integer), and
551 IS_NUMBER_NEG if the number is negative (in which case *valuep holds the
552 absolute value).  IS_NUMBER_IN_UV is not set if e notation was used or the
553 number is larger than a UV.
554 
555 =cut
556  */
557 int
558 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
559 {
560   const char *s = pv;
561   const char *send = pv + len;
562   const UV max_div_10 = UV_MAX / 10;
563   const char max_mod_10 = UV_MAX % 10;
564   int numtype = 0;
565   int sawinf = 0;
566   int sawnan = 0;
567 
568   while (s < send && isSPACE(*s))
569     s++;
570   if (s == send) {
571     return 0;
572   } else if (*s == '-') {
573     s++;
574     numtype = IS_NUMBER_NEG;
575   }
576   else if (*s == '+')
577   s++;
578 
579   if (s == send)
580     return 0;
581 
582   /* next must be digit or the radix separator or beginning of infinity */
583   if (isDIGIT(*s)) {
584     /* UVs are at least 32 bits, so the first 9 decimal digits cannot
585        overflow.  */
586     UV value = *s - '0';
587     /* This construction seems to be more optimiser friendly.
588        (without it gcc does the isDIGIT test and the *s - '0' separately)
589        With it gcc on arm is managing 6 instructions (6 cycles) per digit.
590        In theory the optimiser could deduce how far to unroll the loop
591        before checking for overflow.  */
592     if (++s < send) {
593       int digit = *s - '0';
594       if (digit >= 0 && digit <= 9) {
595         value = value * 10 + digit;
596         if (++s < send) {
597           digit = *s - '0';
598           if (digit >= 0 && digit <= 9) {
599             value = value * 10 + digit;
600             if (++s < send) {
601               digit = *s - '0';
602               if (digit >= 0 && digit <= 9) {
603                 value = value * 10 + digit;
604 		if (++s < send) {
605                   digit = *s - '0';
606                   if (digit >= 0 && digit <= 9) {
607                     value = value * 10 + digit;
608                     if (++s < send) {
609                       digit = *s - '0';
610                       if (digit >= 0 && digit <= 9) {
611                         value = value * 10 + digit;
612                         if (++s < send) {
613                           digit = *s - '0';
614                           if (digit >= 0 && digit <= 9) {
615                             value = value * 10 + digit;
616                             if (++s < send) {
617                               digit = *s - '0';
618                               if (digit >= 0 && digit <= 9) {
619                                 value = value * 10 + digit;
620                                 if (++s < send) {
621                                   digit = *s - '0';
622                                   if (digit >= 0 && digit <= 9) {
623                                     value = value * 10 + digit;
624                                     if (++s < send) {
625                                       /* Now got 9 digits, so need to check
626                                          each time for overflow.  */
627                                       digit = *s - '0';
628                                       while (digit >= 0 && digit <= 9
629                                              && (value < max_div_10
630                                                  || (value == max_div_10
631                                                      && digit <= max_mod_10))) {
632                                         value = value * 10 + digit;
633                                         if (++s < send)
634                                           digit = *s - '0';
635                                         else
636                                           break;
637                                       }
638                                       if (digit >= 0 && digit <= 9
639                                           && (s < send)) {
640                                         /* value overflowed.
641                                            skip the remaining digits, don't
642                                            worry about setting *valuep.  */
643                                         do {
644                                           s++;
645                                         } while (s < send && isDIGIT(*s));
646                                         numtype |=
647                                           IS_NUMBER_GREATER_THAN_UV_MAX;
648                                         goto skip_value;
649                                       }
650                                     }
651                                   }
652 				}
653                               }
654                             }
655                           }
656                         }
657                       }
658                     }
659                   }
660                 }
661               }
662             }
663           }
664 	}
665       }
666     }
667     numtype |= IS_NUMBER_IN_UV;
668     if (valuep)
669       *valuep = value;
670 
671   skip_value:
672     if (GROK_NUMERIC_RADIX(&s, send)) {
673       numtype |= IS_NUMBER_NOT_INT;
674       while (s < send && isDIGIT(*s))  /* optional digits after the radix */
675         s++;
676     }
677   }
678   else if (GROK_NUMERIC_RADIX(&s, send)) {
679     numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
680     /* no digits before the radix means we need digits after it */
681     if (s < send && isDIGIT(*s)) {
682       do {
683         s++;
684       } while (s < send && isDIGIT(*s));
685       if (valuep) {
686         /* integer approximation is valid - it's 0.  */
687         *valuep = 0;
688       }
689     }
690     else
691       return 0;
692   } else if (*s == 'I' || *s == 'i') {
693     s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
694     s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
695     s++; if (s < send && (*s == 'I' || *s == 'i')) {
696       s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
697       s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
698       s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
699       s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
700       s++;
701     }
702     sawinf = 1;
703   } else if (*s == 'N' || *s == 'n') {
704     /* XXX TODO: There are signaling NaNs and quiet NaNs. */
705     s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
706     s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
707     s++;
708     sawnan = 1;
709   } else
710     return 0;
711 
712   if (sawinf) {
713     numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
714     numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
715   } else if (sawnan) {
716     numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
717     numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
718   } else if (s < send) {
719     /* we can have an optional exponent part */
720     if (*s == 'e' || *s == 'E') {
721       /* The only flag we keep is sign.  Blow away any "it's UV"  */
722       numtype &= IS_NUMBER_NEG;
723       numtype |= IS_NUMBER_NOT_INT;
724       s++;
725       if (s < send && (*s == '-' || *s == '+'))
726         s++;
727       if (s < send && isDIGIT(*s)) {
728         do {
729           s++;
730         } while (s < send && isDIGIT(*s));
731       }
732       else
733       return 0;
734     }
735   }
736   while (s < send && isSPACE(*s))
737     s++;
738   if (s >= send)
739     return numtype;
740   if (len == 10 && memEQ(pv, "0 but true", 10)) {
741     if (valuep)
742       *valuep = 0;
743     return IS_NUMBER_IN_UV;
744   }
745   return 0;
746 }
747 
748 STATIC NV
749 S_mulexp10(NV value, I32 exponent)
750 {
751     NV result = 1.0;
752     NV power = 10.0;
753     bool negative = 0;
754     I32 bit;
755 
756     if (exponent == 0)
757 	return value;
758     if (value == 0)
759 	return (NV)0;
760 
761     /* On OpenVMS VAX we by default use the D_FLOAT double format,
762      * and that format does not have *easy* capabilities [1] for
763      * overflowing doubles 'silently' as IEEE fp does.  We also need
764      * to support G_FLOAT on both VAX and Alpha, and though the exponent
765      * range is much larger than D_FLOAT it still doesn't do silent
766      * overflow.  Therefore we need to detect early whether we would
767      * overflow (this is the behaviour of the native string-to-float
768      * conversion routines, and therefore of native applications, too).
769      *
770      * [1] Trying to establish a condition handler to trap floating point
771      *     exceptions is not a good idea. */
772 
773     /* In UNICOS and in certain Cray models (such as T90) there is no
774      * IEEE fp, and no way at all from C to catch fp overflows gracefully.
775      * There is something you can do if you are willing to use some
776      * inline assembler: the instruction is called DFI-- but that will
777      * disable *all* floating point interrupts, a little bit too large
778      * a hammer.  Therefore we need to catch potential overflows before
779      * it's too late. */
780 
781 #if ((defined(VMS) && !defined(__IEEE_FP)) || defined(_UNICOS)) && defined(NV_MAX_10_EXP)
782     STMT_START {
783 	NV exp_v = log10(value);
784 	if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
785 	    return NV_MAX;
786 	if (exponent < 0) {
787 	    if (-(exponent + exp_v) >= NV_MAX_10_EXP)
788 		return 0.0;
789 	    while (-exponent >= NV_MAX_10_EXP) {
790 		/* combination does not overflow, but 10^(-exponent) does */
791 		value /= 10;
792 		++exponent;
793 	    }
794 	}
795     } STMT_END;
796 #endif
797 
798     if (exponent < 0) {
799 	negative = 1;
800 	exponent = -exponent;
801     }
802     for (bit = 1; exponent; bit <<= 1) {
803 	if (exponent & bit) {
804 	    exponent ^= bit;
805 	    result *= power;
806 	    /* Floating point exceptions are supposed to be turned off,
807 	     *  but if we're obviously done, don't risk another iteration.
808 	     */
809 	     if (exponent == 0) break;
810 	}
811 	power *= power;
812     }
813     return negative ? value / result : value * result;
814 }
815 
816 NV
817 Perl_my_atof(pTHX_ const char* s)
818 {
819     NV x = 0.0;
820 #ifdef USE_LOCALE_NUMERIC
821     if (PL_numeric_local && IN_LOCALE) {
822 	NV y;
823 
824 	/* Scan the number twice; once using locale and once without;
825 	 * choose the larger result (in absolute value). */
826 	Perl_atof2(s, x);
827 	SET_NUMERIC_STANDARD();
828 	Perl_atof2(s, y);
829 	SET_NUMERIC_LOCAL();
830 	if ((y < 0.0 && y < x) || (y > 0.0 && y > x))
831 	    return y;
832     }
833     else
834 	Perl_atof2(s, x);
835 #else
836     Perl_atof2(s, x);
837 #endif
838     return x;
839 }
840 
841 char*
842 Perl_my_atof2(pTHX_ const char* orig, NV* value)
843 {
844     NV result[3] = {0.0, 0.0, 0.0};
845     const char* s = orig;
846 #ifdef USE_PERL_ATOF
847     UV accumulator[2] = {0,0};	/* before/after dp */
848     bool negative = 0;
849     const char* send = s + strlen(orig) - 1;
850     bool seen_digit = 0;
851     I32 exp_adjust[2] = {0,0};
852     I32 exp_acc[2] = {-1, -1};
853     /* the current exponent adjust for the accumulators */
854     I32 exponent = 0;
855     I32	seen_dp  = 0;
856     I32 digit = 0;
857     I32 old_digit = 0;
858     I32 sig_digits = 0; /* noof significant digits seen so far */
859 
860 /* There is no point in processing more significant digits
861  * than the NV can hold. Note that NV_DIG is a lower-bound value,
862  * while we need an upper-bound value. We add 2 to account for this;
863  * since it will have been conservative on both the first and last digit.
864  * For example a 32-bit mantissa with an exponent of 4 would have
865  * exact values in the set
866  *               4
867  *               8
868  *              ..
869  *     17179869172
870  *     17179869176
871  *     17179869180
872  *
873  * where for the purposes of calculating NV_DIG we would have to discount
874  * both the first and last digit, since neither can hold all values from
875  * 0..9; but for calculating the value we must examine those two digits.
876  */
877 #define MAX_SIG_DIGITS (NV_DIG+2)
878 
879 /* the max number we can accumulate in a UV, and still safely do 10*N+9 */
880 #define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
881 
882     /* leading whitespace */
883     while (isSPACE(*s))
884 	++s;
885 
886     /* sign */
887     switch (*s) {
888 	case '-':
889 	    negative = 1;
890 	    /* fall through */
891 	case '+':
892 	    ++s;
893     }
894 
895     /* punt to strtod for NaN/Inf; if no support for it there, tough luck */
896 
897 #ifdef HAS_STRTOD
898     if (*s == 'n' || *s == 'N' || *s == 'i' || *s == 'I') {
899         const char *p = negative ? s - 1 : s;
900         char *endp;
901         NV rslt;
902         rslt = strtod(p, &endp);
903         if (endp != p) {
904             *value = rslt;
905             return (char *)endp;
906         }
907     }
908 #endif
909 
910     /* we accumulate digits into an integer; when this becomes too
911      * large, we add the total to NV and start again */
912 
913     while (1) {
914 	if (isDIGIT(*s)) {
915 	    seen_digit = 1;
916 	    old_digit = digit;
917 	    digit = *s++ - '0';
918 	    if (seen_dp)
919 		exp_adjust[1]++;
920 
921 	    /* don't start counting until we see the first significant
922 	     * digit, eg the 5 in 0.00005... */
923 	    if (!sig_digits && digit == 0)
924 		continue;
925 
926 	    if (++sig_digits > MAX_SIG_DIGITS) {
927 		/* limits of precision reached */
928 	        if (digit > 5) {
929 		    ++accumulator[seen_dp];
930 		} else if (digit == 5) {
931 		    if (old_digit % 2) { /* round to even - Allen */
932 			++accumulator[seen_dp];
933 		    }
934 		}
935 		if (seen_dp) {
936 		    exp_adjust[1]--;
937 		} else {
938 		    exp_adjust[0]++;
939 		}
940 		/* skip remaining digits */
941 		while (isDIGIT(*s)) {
942 		    ++s;
943 		    if (! seen_dp) {
944 			exp_adjust[0]++;
945 		    }
946 		}
947 		/* warn of loss of precision? */
948 	    }
949 	    else {
950 		if (accumulator[seen_dp] > MAX_ACCUMULATE) {
951 		    /* add accumulator to result and start again */
952 		    result[seen_dp] = S_mulexp10(result[seen_dp],
953 						 exp_acc[seen_dp])
954 			+ (NV)accumulator[seen_dp];
955 		    accumulator[seen_dp] = 0;
956 		    exp_acc[seen_dp] = 0;
957 		}
958 		accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
959 		++exp_acc[seen_dp];
960 	    }
961 	}
962 	else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
963 	    seen_dp = 1;
964 	    if (sig_digits > MAX_SIG_DIGITS) {
965 		++s;
966 		while (isDIGIT(*s)) {
967 		    ++s;
968 		}
969 		break;
970 	    }
971 	}
972 	else {
973 	    break;
974 	}
975     }
976 
977     result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
978     if (seen_dp) {
979 	result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
980     }
981 
982     if (seen_digit && (*s == 'e' || *s == 'E')) {
983 	bool expnegative = 0;
984 
985 	++s;
986 	switch (*s) {
987 	    case '-':
988 		expnegative = 1;
989 		/* fall through */
990 	    case '+':
991 		++s;
992 	}
993 	while (isDIGIT(*s))
994 	    exponent = exponent * 10 + (*s++ - '0');
995 	if (expnegative)
996 	    exponent = -exponent;
997     }
998 
999 
1000 
1001     /* now apply the exponent */
1002 
1003     if (seen_dp) {
1004 	result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
1005 		+ S_mulexp10(result[1],exponent-exp_adjust[1]);
1006     } else {
1007 	result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
1008     }
1009 
1010     /* now apply the sign */
1011     if (negative)
1012 	result[2] = -result[2];
1013 #endif /* USE_PERL_ATOF */
1014     *value = result[2];
1015     return (char *)s;
1016 }
1017 
1018 #if ! defined(HAS_MODFL) && defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
1019 long double
1020 Perl_my_modfl(long double x, long double *ip)
1021 {
1022 	*ip = aintl(x);
1023 	return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1024 }
1025 #endif
1026 
1027 #if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL)
1028 long double
1029 Perl_my_frexpl(long double x, int *e) {
1030 	*e = x == 0.0L ? 0 : ilogbl(x) + 1;
1031 	return (scalbnl(x, -*e));
1032 }
1033 #endif
1034 
1035 /*
1036  * Local variables:
1037  * c-indentation-style: bsd
1038  * c-basic-offset: 4
1039  * indent-tabs-mode: t
1040  * End:
1041  *
1042  * ex: set ts=8 sts=4 sw=4 noet:
1043  */
1044