xref: /plan9/sys/src/ape/lib/fmt/strtod.c (revision b85a83648eec38fe82b6f00adfd7828ceec5ee8d)
1 /*
2  * The authors of this software are Rob Pike and Ken Thompson.
3  *              Copyright (c) 2002 by Lucent Technologies.
4  * Permission to use, copy, modify, and distribute this software for any
5  * purpose without fee is hereby granted, provided that this entire notice
6  * is included in all copies of any software which is or includes a copy
7  * or modification of this software and in all copies of the supporting
8  * documentation for such software.
9  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
10  * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
11  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
12  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
13  */
14 #include <stdlib.h>
15 #include <math.h>
16 #include <ctype.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <errno.h>
20 #include "fmt.h"
21 #include "nan.h"
22 
23 #ifndef nelem
24 #define nelem(x)	(sizeof(x)/sizeof *(x))
25 #endif
26 #define nil ((void*)0)
27 #define ulong _fmtulong
28 typedef unsigned long ulong;
29 
30 static ulong
umuldiv(ulong a,ulong b,ulong c)31 umuldiv(ulong a, ulong b, ulong c)
32 {
33 	double d;
34 
35 	d = ((double)a * (double)b) / (double)c;
36 	if(d >= 4294967295.)
37 		d = 4294967295.;
38 	return (ulong)d;
39 }
40 
41 /*
42  * This routine will convert to arbitrary precision
43  * floating point entirely in multi-precision fixed.
44  * The answer is the closest floating point number to
45  * the given decimal number. Exactly half way are
46  * rounded ala ieee rules.
47  * Method is to scale input decimal between .500 and .999...
48  * with external power of 2, then binary search for the
49  * closest mantissa to this decimal number.
50  * Nmant is is the required precision. (53 for ieee dp)
51  * Nbits is the max number of bits/word. (must be <= 28)
52  * Prec is calculated - the number of words of fixed mantissa.
53  */
54 enum
55 {
56 	Nbits	= 28,				/* bits safely represented in a ulong */
57 	Nmant	= 53,				/* bits of precision required */
58 	Prec	= (Nmant+Nbits+1)/Nbits,	/* words of Nbits each to represent mantissa */
59 	Sigbit	= 1<<(Prec*Nbits-Nmant),	/* first significant bit of Prec-th word */
60 	Ndig	= 1500,
61 	One	= (ulong)(1<<Nbits),
62 	Half	= (ulong)(One>>1),
63 	Maxe	= 310,
64 
65 	Fsign	= 1<<0,		/* found - */
66 	Fesign	= 1<<1,		/* found e- */
67 	Fdpoint	= 1<<2,		/* found . */
68 
69 	S0	= 0,		/* _		_S0	+S1	#S2	.S3 */
70 	S1,			/* _+		#S2	.S3 */
71 	S2,			/* _+#		#S2	.S4	eS5 */
72 	S3,			/* _+.		#S4 */
73 	S4,			/* _+#.#	#S4	eS5 */
74 	S5,			/* _+#.#e	+S6	#S7 */
75 	S6,			/* _+#.#e+	#S7 */
76 	S7,			/* _+#.#e+#	#S7 */
77 };
78 
79 static	int	xcmp(char*, char*);
80 static	int	fpcmp(char*, ulong*);
81 static	void	frnorm(ulong*);
82 static	void	divascii(char*, int*, int*, int*);
83 static	void	mulascii(char*, int*, int*, int*);
84 
85 typedef	struct	Tab	Tab;
86 struct	Tab
87 {
88 	int	bp;
89 	int	siz;
90 	char*	cmp;
91 };
92 
93 double
fmtstrtod(const char * as,char ** aas)94 fmtstrtod(const char *as, char **aas)
95 {
96 	int na, ex, dp, bp, c, i, flag, state;
97 	ulong low[Prec], hig[Prec], mid[Prec];
98 	double d;
99 	char *s, a[Ndig];
100 
101 	flag = 0;	/* Fsign, Fesign, Fdpoint */
102 	na = 0;		/* number of digits of a[] */
103 	dp = 0;		/* na of decimal point */
104 	ex = 0;		/* exonent */
105 
106 	state = S0;
107 	for(s=(char*)as;; s++) {
108 		c = *s;
109 		if(c >= '0' && c <= '9') {
110 			switch(state) {
111 			case S0:
112 			case S1:
113 			case S2:
114 				state = S2;
115 				break;
116 			case S3:
117 			case S4:
118 				state = S4;
119 				break;
120 
121 			case S5:
122 			case S6:
123 			case S7:
124 				state = S7;
125 				ex = ex*10 + (c-'0');
126 				continue;
127 			}
128 			if(na == 0 && c == '0') {
129 				dp--;
130 				continue;
131 			}
132 			if(na < Ndig-50)
133 				a[na++] = c;
134 			continue;
135 		}
136 		switch(c) {
137 		case '\t':
138 		case '\n':
139 		case '\v':
140 		case '\f':
141 		case '\r':
142 		case ' ':
143 			if(state == S0)
144 				continue;
145 			break;
146 		case '-':
147 			if(state == S0)
148 				flag |= Fsign;
149 			else
150 				flag |= Fesign;
151 		case '+':
152 			if(state == S0)
153 				state = S1;
154 			else
155 			if(state == S5)
156 				state = S6;
157 			else
158 				break;	/* syntax */
159 			continue;
160 		case '.':
161 			flag |= Fdpoint;
162 			dp = na;
163 			if(state == S0 || state == S1) {
164 				state = S3;
165 				continue;
166 			}
167 			if(state == S2) {
168 				state = S4;
169 				continue;
170 			}
171 			break;
172 		case 'e':
173 		case 'E':
174 			if(state == S2 || state == S4) {
175 				state = S5;
176 				continue;
177 			}
178 			break;
179 		}
180 		break;
181 	}
182 
183 	/*
184 	 * clean up return char-pointer
185 	 */
186 	switch(state) {
187 	case S0:
188 		if(xcmp(s, "nan") == 0) {
189 			if(aas != nil)
190 				*aas = s+3;
191 			goto retnan;
192 		}
193 	case S1:
194 		if(xcmp(s, "infinity") == 0) {
195 			if(aas != nil)
196 				*aas = s+8;
197 			goto retinf;
198 		}
199 		if(xcmp(s, "inf") == 0) {
200 			if(aas != nil)
201 				*aas = s+3;
202 			goto retinf;
203 		}
204 	case S3:
205 		if(aas != nil)
206 			*aas = (char*)as;
207 		goto ret0;	/* no digits found */
208 	case S6:
209 		s--;		/* back over +- */
210 	case S5:
211 		s--;		/* back over e */
212 		break;
213 	}
214 	if(aas != nil)
215 		*aas = s;
216 
217 	if(flag & Fdpoint)
218 	while(na > 0 && a[na-1] == '0')
219 		na--;
220 	if(na == 0)
221 		goto ret0;	/* zero */
222 	a[na] = 0;
223 	if(!(flag & Fdpoint))
224 		dp = na;
225 	if(flag & Fesign)
226 		ex = -ex;
227 	dp += ex;
228 	if(dp < -Maxe){
229 		errno = ERANGE;
230 		goto ret0;	/* underflow by exp */
231 	} else
232 	if(dp > +Maxe)
233 		goto retinf;	/* overflow by exp */
234 
235 	/*
236 	 * normalize the decimal ascii number
237 	 * to range .[5-9][0-9]* e0
238 	 */
239 	bp = 0;		/* binary exponent */
240 	while(dp > 0)
241 		divascii(a, &na, &dp, &bp);
242 	while(dp < 0 || a[0] < '5')
243 		mulascii(a, &na, &dp, &bp);
244 
245 	/* close approx by naive conversion */
246 	mid[0] = 0;
247 	mid[1] = 1;
248 	for(i=0; c=a[i]; i++) {
249 		mid[0] = mid[0]*10 + (c-'0');
250 		mid[1] = mid[1]*10;
251 		if(i >= 8)
252 			break;
253 	}
254 	low[0] = umuldiv(mid[0], One, mid[1]);
255 	hig[0] = umuldiv(mid[0]+1, One, mid[1]);
256 	for(i=1; i<Prec; i++) {
257 		low[i] = 0;
258 		hig[i] = One-1;
259 	}
260 
261 	/* binary search for closest mantissa */
262 	for(;;) {
263 		/* mid = (hig + low) / 2 */
264 		c = 0;
265 		for(i=0; i<Prec; i++) {
266 			mid[i] = hig[i] + low[i];
267 			if(c)
268 				mid[i] += One;
269 			c = mid[i] & 1;
270 			mid[i] >>= 1;
271 		}
272 		frnorm(mid);
273 
274 		/* compare */
275 		c = fpcmp(a, mid);
276 		if(c > 0) {
277 			c = 1;
278 			for(i=0; i<Prec; i++)
279 				if(low[i] != mid[i]) {
280 					c = 0;
281 					low[i] = mid[i];
282 				}
283 			if(c)
284 				break;	/* between mid and hig */
285 			continue;
286 		}
287 		if(c < 0) {
288 			for(i=0; i<Prec; i++)
289 				hig[i] = mid[i];
290 			continue;
291 		}
292 
293 		/* only hard part is if even/odd roundings wants to go up */
294 		c = mid[Prec-1] & (Sigbit-1);
295 		if(c == Sigbit/2 && (mid[Prec-1]&Sigbit) == 0)
296 			mid[Prec-1] -= c;
297 		break;	/* exactly mid */
298 	}
299 
300 	/* normal rounding applies */
301 	c = mid[Prec-1] & (Sigbit-1);
302 	mid[Prec-1] -= c;
303 	if(c >= Sigbit/2) {
304 		mid[Prec-1] += Sigbit;
305 		frnorm(mid);
306 	}
307 	goto out;
308 
309 ret0:
310 	return 0;
311 
312 retnan:
313 	return __NaN();
314 
315 retinf:
316 	/*
317 	 * Unix strtod requires these.  Plan 9 would return Inf(0) or Inf(-1). */
318 	errno = ERANGE;
319 	if(flag & Fsign)
320 		return -HUGE_VAL;
321 	return HUGE_VAL;
322 
323 out:
324 	d = 0;
325 	for(i=0; i<Prec; i++)
326 		d = d*One + mid[i];
327 	if(flag & Fsign)
328 		d = -d;
329 	d = ldexp(d, bp - Prec*Nbits);
330 	if(d == 0){	/* underflow */
331 		errno = ERANGE;
332 	}
333 	return d;
334 }
335 
336 static void
frnorm(ulong * f)337 frnorm(ulong *f)
338 {
339 	int i, c;
340 
341 	c = 0;
342 	for(i=Prec-1; i>0; i--) {
343 		f[i] += c;
344 		c = f[i] >> Nbits;
345 		f[i] &= One-1;
346 	}
347 	f[0] += c;
348 }
349 
350 static int
fpcmp(char * a,ulong * f)351 fpcmp(char *a, ulong* f)
352 {
353 	ulong tf[Prec];
354 	int i, d, c;
355 
356 	for(i=0; i<Prec; i++)
357 		tf[i] = f[i];
358 
359 	for(;;) {
360 		/* tf *= 10 */
361 		for(i=0; i<Prec; i++)
362 			tf[i] = tf[i]*10;
363 		frnorm(tf);
364 		d = (tf[0] >> Nbits) + '0';
365 		tf[0] &= One-1;
366 
367 		/* compare next digit */
368 		c = *a;
369 		if(c == 0) {
370 			if('0' < d)
371 				return -1;
372 			if(tf[0] != 0)
373 				goto cont;
374 			for(i=1; i<Prec; i++)
375 				if(tf[i] != 0)
376 					goto cont;
377 			return 0;
378 		}
379 		if(c > d)
380 			return +1;
381 		if(c < d)
382 			return -1;
383 		a++;
384 	cont:;
385 	}
386 }
387 
388 static void
divby(char * a,int * na,int b)389 divby(char *a, int *na, int b)
390 {
391 	int n, c;
392 	char *p;
393 
394 	p = a;
395 	n = 0;
396 	while(n>>b == 0) {
397 		c = *a++;
398 		if(c == 0) {
399 			while(n) {
400 				c = n*10;
401 				if(c>>b)
402 					break;
403 				n = c;
404 			}
405 			goto xx;
406 		}
407 		n = n*10 + c-'0';
408 		(*na)--;
409 	}
410 	for(;;) {
411 		c = n>>b;
412 		n -= c<<b;
413 		*p++ = c + '0';
414 		c = *a++;
415 		if(c == 0)
416 			break;
417 		n = n*10 + c-'0';
418 	}
419 	(*na)++;
420 xx:
421 	while(n) {
422 		n = n*10;
423 		c = n>>b;
424 		n -= c<<b;
425 		*p++ = c + '0';
426 		(*na)++;
427 	}
428 	*p = 0;
429 }
430 
431 static	Tab	tab1[] =
432 {
433 	 1,  0, "",
434 	 3,  1, "7",
435 	 6,  2, "63",
436 	 9,  3, "511",
437 	13,  4, "8191",
438 	16,  5, "65535",
439 	19,  6, "524287",
440 	23,  7, "8388607",
441 	26,  8, "67108863",
442 	27,  9, "134217727",
443 };
444 
445 static void
divascii(char * a,int * na,int * dp,int * bp)446 divascii(char *a, int *na, int *dp, int *bp)
447 {
448 	int b, d;
449 	Tab *t;
450 
451 	d = *dp;
452 	if(d >= (int)(nelem(tab1)))
453 		d = (int)(nelem(tab1))-1;
454 	t = tab1 + d;
455 	b = t->bp;
456 	if(memcmp(a, t->cmp, t->siz) > 0)
457 		d--;
458 	*dp -= d;
459 	*bp += b;
460 	divby(a, na, b);
461 }
462 
463 static void
mulby(char * a,char * p,char * q,int b)464 mulby(char *a, char *p, char *q, int b)
465 {
466 	int n, c;
467 
468 	n = 0;
469 	*p = 0;
470 	for(;;) {
471 		q--;
472 		if(q < a)
473 			break;
474 		c = *q - '0';
475 		c = (c<<b) + n;
476 		n = c/10;
477 		c -= n*10;
478 		p--;
479 		*p = c + '0';
480 	}
481 	while(n) {
482 		c = n;
483 		n = c/10;
484 		c -= n*10;
485 		p--;
486 		*p = c + '0';
487 	}
488 }
489 
490 static	Tab	tab2[] =
491 {
492 	 1,  1, "",				/* dp = 0-0 */
493 	 3,  3, "125",
494 	 6,  5, "15625",
495 	 9,  7, "1953125",
496 	13, 10, "1220703125",
497 	16, 12, "152587890625",
498 	19, 14, "19073486328125",
499 	23, 17, "11920928955078125",
500 	26, 19, "1490116119384765625",
501 	27, 19, "7450580596923828125",		/* dp 8-9 */
502 };
503 
504 static void
mulascii(char * a,int * na,int * dp,int * bp)505 mulascii(char *a, int *na, int *dp, int *bp)
506 {
507 	char *p;
508 	int d, b;
509 	Tab *t;
510 
511 	d = -*dp;
512 	if(d >= (int)(nelem(tab2)))
513 		d = (int)(nelem(tab2))-1;
514 	t = tab2 + d;
515 	b = t->bp;
516 	if(memcmp(a, t->cmp, t->siz) < 0)
517 		d--;
518 	p = a + *na;
519 	*bp -= b;
520 	*dp += d;
521 	*na += d;
522 	mulby(a, p+d, p, b);
523 }
524 
525 static int
xcmp(char * a,char * b)526 xcmp(char *a, char *b)
527 {
528 	int c1, c2;
529 
530 	while(c1 = *b++) {
531 		c2 = *a++;
532 		if(isupper(c2))
533 			c2 = tolower(c2);
534 		if(c1 != c2)
535 			return 1;
536 	}
537 	return 0;
538 }
539