xref: /plan9-contrib/sys/src/ape/lib/fmt/strtod.c (revision 9b943567965ba040fd275927fbe088656eb8ce4f)
1 /*
2  * The authors of this software are Rob Pike and Ken Thompson.
3  *              Copyright (c) 2002 by Lucent Technologies.
4  * Permission to use, copy, modify, and distribute this software for any
5  * purpose without fee is hereby granted, provided that this entire notice
6  * is included in all copies of any software which is or includes a copy
7  * or modification of this software and in all copies of the supporting
8  * documentation for such software.
9  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
10  * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
11  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
12  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
13  */
14 #include <stdlib.h>
15 #include <math.h>
16 #include <ctype.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <errno.h>
20 #include "fmt.h"
21 #include "nan.h"
22 
23 #ifndef nelem
24 #define nelem(x)	(sizeof(x)/sizeof *(x))
25 #endif
26 #define nil ((void*)0)
27 #define ulong _fmtulong
28 typedef unsigned long ulong;
29 
30 static ulong
31 umuldiv(ulong a, ulong b, ulong c)
32 {
33 	double d;
34 
35 	d = ((double)a * (double)b) / (double)c;
36 	if(d >= 4294967295.)
37 		d = 4294967295.;
38 	return (ulong)d;
39 }
40 
41 /*
42  * This routine will convert to arbitrary precision
43  * floating point entirely in multi-precision fixed.
44  * The answer is the closest floating point number to
45  * the given decimal number. Exactly half way are
46  * rounded ala ieee rules.
47  * Method is to scale input decimal between .500 and .999...
48  * with external power of 2, then binary search for the
49  * closest mantissa to this decimal number.
50  * Nmant is is the required precision. (53 for ieee dp)
51  * Nbits is the max number of bits/word. (must be <= 28)
52  * Prec is calculated - the number of words of fixed mantissa.
53  */
54 enum
55 {
56 	Nbits	= 28,				/* bits safely represented in a ulong */
57 	Nmant	= 53,				/* bits of precision required */
58 	Prec	= (Nmant+Nbits+1)/Nbits,	/* words of Nbits each to represent mantissa */
59 	Sigbit	= 1<<(Prec*Nbits-Nmant),	/* first significant bit of Prec-th word */
60 	Ndig	= 1500,
61 	One	= (ulong)(1<<Nbits),
62 	Half	= (ulong)(One>>1),
63 	Maxe	= 310,
64 
65 	Fsign	= 1<<0,		/* found - */
66 	Fesign	= 1<<1,		/* found e- */
67 	Fdpoint	= 1<<2,		/* found . */
68 
69 	S0	= 0,		/* _		_S0	+S1	#S2	.S3 */
70 	S1,			/* _+		#S2	.S3 */
71 	S2,			/* _+#		#S2	.S4	eS5 */
72 	S3,			/* _+.		#S4 */
73 	S4,			/* _+#.#	#S4	eS5 */
74 	S5,			/* _+#.#e	+S6	#S7 */
75 	S6,			/* _+#.#e+	#S7 */
76 	S7,			/* _+#.#e+#	#S7 */
77 };
78 
79 static	int	xcmp(char*, char*);
80 static	int	fpcmp(char*, ulong*);
81 static	void	frnorm(ulong*);
82 static	void	divascii(char*, int*, int*, int*);
83 static	void	mulascii(char*, int*, int*, int*);
84 
85 typedef	struct	Tab	Tab;
86 struct	Tab
87 {
88 	int	bp;
89 	int	siz;
90 	char*	cmp;
91 };
92 
93 double
94 fmtstrtod(const char *as, char **aas)
95 {
96 	int na, ex, dp, bp, c, i, flag, state;
97 	ulong low[Prec], hig[Prec], mid[Prec];
98 	double d;
99 	char *s, a[Ndig];
100 
101 	flag = 0;	/* Fsign, Fesign, Fdpoint */
102 	na = 0;		/* number of digits of a[] */
103 	dp = 0;		/* na of decimal point */
104 	ex = 0;		/* exonent */
105 
106 	state = S0;
107 	for(s=(char*)as;; s++) {
108 		c = *s;
109 		if(c >= '0' && c <= '9') {
110 			switch(state) {
111 			case S0:
112 			case S1:
113 			case S2:
114 				state = S2;
115 				break;
116 			case S3:
117 			case S4:
118 				state = S4;
119 				break;
120 
121 			case S5:
122 			case S6:
123 			case S7:
124 				state = S7;
125 				ex = ex*10 + (c-'0');
126 				continue;
127 			}
128 			if(na == 0 && c == '0') {
129 				dp--;
130 				continue;
131 			}
132 			if(na < Ndig-50)
133 				a[na++] = c;
134 			continue;
135 		}
136 		switch(c) {
137 		case '\t':
138 		case '\n':
139 		case '\v':
140 		case '\f':
141 		case '\r':
142 		case ' ':
143 			if(state == S0)
144 				continue;
145 			break;
146 		case '-':
147 			if(state == S0)
148 				flag |= Fsign;
149 			else
150 				flag |= Fesign;
151 		case '+':
152 			if(state == S0)
153 				state = S1;
154 			else
155 			if(state == S5)
156 				state = S6;
157 			else
158 				break;	/* syntax */
159 			continue;
160 		case '.':
161 			flag |= Fdpoint;
162 			dp = na;
163 			if(state == S0 || state == S1) {
164 				state = S3;
165 				continue;
166 			}
167 			if(state == S2) {
168 				state = S4;
169 				continue;
170 			}
171 			break;
172 		case 'e':
173 		case 'E':
174 			if(state == S2 || state == S4) {
175 				state = S5;
176 				continue;
177 			}
178 			break;
179 		}
180 		break;
181 	}
182 
183 	/*
184 	 * clean up return char-pointer
185 	 */
186 	switch(state) {
187 	case S0:
188 		if(xcmp(s, "nan") == 0) {
189 			if(aas != nil)
190 				*aas = s+3;
191 			goto retnan;
192 		}
193 	case S1:
194 		if(xcmp(s, "infinity") == 0) {
195 			if(aas != nil)
196 				*aas = s+8;
197 			goto retinf;
198 		}
199 		if(xcmp(s, "inf") == 0) {
200 			if(aas != nil)
201 				*aas = s+3;
202 			goto retinf;
203 		}
204 	case S3:
205 		if(aas != nil)
206 			*aas = (char*)as;
207 		goto ret0;	/* no digits found */
208 	case S6:
209 		s--;		/* back over +- */
210 	case S5:
211 		s--;		/* back over e */
212 		break;
213 	}
214 	if(aas != nil)
215 		*aas = s;
216 
217 	if(flag & Fdpoint)
218 	while(na > 0 && a[na-1] == '0')
219 		na--;
220 	if(na == 0)
221 		goto ret0;	/* zero */
222 	a[na] = 0;
223 	if(!(flag & Fdpoint))
224 		dp = na;
225 	if(flag & Fesign)
226 		ex = -ex;
227 	dp += ex;
228 	if(dp < -Maxe){
229 		errno = ERANGE;
230 		goto ret0;	/* underflow by exp */
231 	} else
232 	if(dp > +Maxe)
233 		goto retinf;	/* overflow by exp */
234 
235 	/*
236 	 * normalize the decimal ascii number
237 	 * to range .[5-9][0-9]* e0
238 	 */
239 	bp = 0;		/* binary exponent */
240 	while(dp > 0)
241 		divascii(a, &na, &dp, &bp);
242 	while(dp < 0 || a[0] < '5')
243 		mulascii(a, &na, &dp, &bp);
244 
245 	/* close approx by naive conversion */
246 	mid[0] = 0;
247 	mid[1] = 1;
248 	for(i=0; c=a[i]; i++) {
249 		mid[0] = mid[0]*10 + (c-'0');
250 		mid[1] = mid[1]*10;
251 		if(i >= 8)
252 			break;
253 	}
254 	low[0] = umuldiv(mid[0], One, mid[1]);
255 	hig[0] = umuldiv(mid[0]+1, One, mid[1]);
256 	for(i=1; i<Prec; i++) {
257 		low[i] = 0;
258 		hig[i] = One-1;
259 	}
260 
261 	/* binary search for closest mantissa */
262 	for(;;) {
263 		/* mid = (hig + low) / 2 */
264 		c = 0;
265 		for(i=0; i<Prec; i++) {
266 			mid[i] = hig[i] + low[i];
267 			if(c)
268 				mid[i] += One;
269 			c = mid[i] & 1;
270 			mid[i] >>= 1;
271 		}
272 		frnorm(mid);
273 
274 		/* compare */
275 		c = fpcmp(a, mid);
276 		if(c > 0) {
277 			c = 1;
278 			for(i=0; i<Prec; i++)
279 				if(low[i] != mid[i]) {
280 					c = 0;
281 					low[i] = mid[i];
282 				}
283 			if(c)
284 				break;	/* between mid and hig */
285 			continue;
286 		}
287 		if(c < 0) {
288 			for(i=0; i<Prec; i++)
289 				hig[i] = mid[i];
290 			continue;
291 		}
292 
293 		/* only hard part is if even/odd roundings wants to go up */
294 		c = mid[Prec-1] & (Sigbit-1);
295 		if(c == Sigbit/2 && (mid[Prec-1]&Sigbit) == 0)
296 			mid[Prec-1] -= c;
297 		break;	/* exactly mid */
298 	}
299 
300 	/* normal rounding applies */
301 	c = mid[Prec-1] & (Sigbit-1);
302 	mid[Prec-1] -= c;
303 	if(c >= Sigbit/2) {
304 		mid[Prec-1] += Sigbit;
305 		frnorm(mid);
306 	}
307 	goto out;
308 
309 ret0:
310 	return 0;
311 
312 retnan:
313 	return __NaN();
314 
315 retinf:
316 	/*
317 	 * Unix strtod requires these.  Plan 9 would return Inf(0) or Inf(-1). */
318 	errno = ERANGE;
319 	if(flag & Fsign)
320 		return -HUGE_VAL;
321 	return HUGE_VAL;
322 
323 out:
324 	d = 0;
325 	for(i=0; i<Prec; i++)
326 		d = d*One + mid[i];
327 	if(flag & Fsign)
328 		d = -d;
329 	d = ldexp(d, bp - Prec*Nbits);
330 	if(d == 0){	/* underflow */
331 		errno = ERANGE;
332 	}
333 	return d;
334 }
335 
336 static void
337 frnorm(ulong *f)
338 {
339 	int i, c;
340 
341 	c = 0;
342 	for(i=Prec-1; i>0; i--) {
343 		f[i] += c;
344 		c = f[i] >> Nbits;
345 		f[i] &= One-1;
346 	}
347 	f[0] += c;
348 }
349 
350 static int
351 fpcmp(char *a, ulong* f)
352 {
353 	ulong tf[Prec];
354 	int i, d, c;
355 
356 	for(i=0; i<Prec; i++)
357 		tf[i] = f[i];
358 
359 	for(;;) {
360 		/* tf *= 10 */
361 		for(i=0; i<Prec; i++)
362 			tf[i] = tf[i]*10;
363 		frnorm(tf);
364 		d = (tf[0] >> Nbits) + '0';
365 		tf[0] &= One-1;
366 
367 		/* compare next digit */
368 		c = *a;
369 		if(c == 0) {
370 			if('0' < d)
371 				return -1;
372 			if(tf[0] != 0)
373 				goto cont;
374 			for(i=1; i<Prec; i++)
375 				if(tf[i] != 0)
376 					goto cont;
377 			return 0;
378 		}
379 		if(c > d)
380 			return +1;
381 		if(c < d)
382 			return -1;
383 		a++;
384 	cont:;
385 	}
386 	return 0;
387 }
388 
389 static void
390 divby(char *a, int *na, int b)
391 {
392 	int n, c;
393 	char *p;
394 
395 	p = a;
396 	n = 0;
397 	while(n>>b == 0) {
398 		c = *a++;
399 		if(c == 0) {
400 			while(n) {
401 				c = n*10;
402 				if(c>>b)
403 					break;
404 				n = c;
405 			}
406 			goto xx;
407 		}
408 		n = n*10 + c-'0';
409 		(*na)--;
410 	}
411 	for(;;) {
412 		c = n>>b;
413 		n -= c<<b;
414 		*p++ = c + '0';
415 		c = *a++;
416 		if(c == 0)
417 			break;
418 		n = n*10 + c-'0';
419 	}
420 	(*na)++;
421 xx:
422 	while(n) {
423 		n = n*10;
424 		c = n>>b;
425 		n -= c<<b;
426 		*p++ = c + '0';
427 		(*na)++;
428 	}
429 	*p = 0;
430 }
431 
432 static	Tab	tab1[] =
433 {
434 	 1,  0, "",
435 	 3,  1, "7",
436 	 6,  2, "63",
437 	 9,  3, "511",
438 	13,  4, "8191",
439 	16,  5, "65535",
440 	19,  6, "524287",
441 	23,  7, "8388607",
442 	26,  8, "67108863",
443 	27,  9, "134217727",
444 };
445 
446 static void
447 divascii(char *a, int *na, int *dp, int *bp)
448 {
449 	int b, d;
450 	Tab *t;
451 
452 	d = *dp;
453 	if(d >= (int)(nelem(tab1)))
454 		d = (int)(nelem(tab1))-1;
455 	t = tab1 + d;
456 	b = t->bp;
457 	if(memcmp(a, t->cmp, t->siz) > 0)
458 		d--;
459 	*dp -= d;
460 	*bp += b;
461 	divby(a, na, b);
462 }
463 
464 static void
465 mulby(char *a, char *p, char *q, int b)
466 {
467 	int n, c;
468 
469 	n = 0;
470 	*p = 0;
471 	for(;;) {
472 		q--;
473 		if(q < a)
474 			break;
475 		c = *q - '0';
476 		c = (c<<b) + n;
477 		n = c/10;
478 		c -= n*10;
479 		p--;
480 		*p = c + '0';
481 	}
482 	while(n) {
483 		c = n;
484 		n = c/10;
485 		c -= n*10;
486 		p--;
487 		*p = c + '0';
488 	}
489 }
490 
491 static	Tab	tab2[] =
492 {
493 	 1,  1, "",				/* dp = 0-0 */
494 	 3,  3, "125",
495 	 6,  5, "15625",
496 	 9,  7, "1953125",
497 	13, 10, "1220703125",
498 	16, 12, "152587890625",
499 	19, 14, "19073486328125",
500 	23, 17, "11920928955078125",
501 	26, 19, "1490116119384765625",
502 	27, 19, "7450580596923828125",		/* dp 8-9 */
503 };
504 
505 static void
506 mulascii(char *a, int *na, int *dp, int *bp)
507 {
508 	char *p;
509 	int d, b;
510 	Tab *t;
511 
512 	d = -*dp;
513 	if(d >= (int)(nelem(tab2)))
514 		d = (int)(nelem(tab2))-1;
515 	t = tab2 + d;
516 	b = t->bp;
517 	if(memcmp(a, t->cmp, t->siz) < 0)
518 		d--;
519 	p = a + *na;
520 	*bp -= b;
521 	*dp += d;
522 	*na += d;
523 	mulby(a, p+d, p, b);
524 }
525 
526 static int
527 xcmp(char *a, char *b)
528 {
529 	int c1, c2;
530 
531 	while(c1 = *b++) {
532 		c2 = *a++;
533 		if(isupper(c2))
534 			c2 = tolower(c2);
535 		if(c1 != c2)
536 			return 1;
537 	}
538 	return 0;
539 }
540