xref: /plan9/sys/src/cmd/unix/drawterm/libc/strtod.c (revision 8ccd4a6360d974db7bd7bbd4f37e7018419ea908)
1 #include <u.h>
2 #include <libc.h>
3 #include <ctype.h>
4 
5 /*
6  * This routine will convert to arbitrary precision
7  * floating point entirely in multi-precision fixed.
8  * The answer is the closest floating point number to
9  * the given decimal number. Exactly half way are
10  * rounded ala ieee rules.
11  * Method is to scale input decimal between .500 and .999...
12  * with external power of 2, then binary search for the
13  * closest mantissa to this decimal number.
14  * Nmant is is the required precision. (53 for ieee dp)
15  * Nbits is the max number of bits/word. (must be <= 28)
16  * Prec is calculated - the number of words of fixed mantissa.
17  */
18 enum
19 {
20 	Nbits	= 28,				// bits safely represented in a ulong
21 	Nmant	= 53,				// bits of precision required
22 	Bias		= 1022,
23 	Prec	= (Nmant+Nbits+1)/Nbits,	// words of Nbits each to represent mantissa
24 	Sigbit	= 1<<(Prec*Nbits-Nmant),	// first significant bit of Prec-th word
25 	Ndig	= 1500,
26 	One	= (ulong)(1<<Nbits),
27 	Half	= (ulong)(One>>1),
28 	Maxe	= 310,
29 	Fsign	= 1<<0,		// found -
30 	Fesign	= 1<<1,		// found e-
31 	Fdpoint	= 1<<2,		// found .
32 
33 	S0	= 0,		// _		_S0	+S1	#S2	.S3
34 	S1,			// _+		#S2	.S3
35 	S2,			// _+#		#S2	.S4	eS5
36 	S3,			// _+.		#S4
37 	S4,			// _+#.#	#S4	eS5
38 	S5,			// _+#.#e	+S6	#S7
39 	S6,			// _+#.#e+	#S7
40 	S7,			// _+#.#e+#	#S7
41 };
42 
43 static ulong
44 umuldiv(ulong a, ulong b, ulong c)
45 {
46 	double d;
47 
48 	d = ((double)a * (double)b) / (double)c;
49 	if(d >= 4294967295.)
50 		d = 4294967295.;
51 	return d;
52 }
53 
54 static	int	xcmp(char*, char*);
55 static	int	fpcmp(char*, ulong*);
56 static	void	frnorm(ulong*);
57 static	void	divascii(char*, int*, int*, int*);
58 static	void	mulascii(char*, int*, int*, int*);
59 static	void	divby(char*, int*, int);
60 
61 typedef	struct	Tab	Tab;
62 struct	Tab
63 {
64 	int	bp;
65 	int	siz;
66 	char*	cmp;
67 };
68 
69 double
70 strtod(char *as, char **aas)
71 {
72 	int na, ona, ex, dp, bp, c, i, flag, state;
73 	ulong low[Prec], hig[Prec], mid[Prec], num, den;
74 	double d;
75 	char *s, a[Ndig];
76 
77 	flag = 0;	// Fsign, Fesign, Fdpoint
78 	na = 0;		// number of digits of a[]
79 	dp = 0;		// na of decimal point
80 	ex = 0;		// exonent
81 
82 	state = S0;
83 	for(s=as;; s++) {
84 		c = *s;
85 		if(c >= '0' && c <= '9') {
86 			switch(state) {
87 			case S0:
88 			case S1:
89 			case S2:
90 				state = S2;
91 				break;
92 			case S3:
93 			case S4:
94 				state = S4;
95 				break;
96 
97 			case S5:
98 			case S6:
99 			case S7:
100 				state = S7;
101 				ex = ex*10 + (c-'0');
102 				continue;
103 			}
104 			if(na == 0 && c == '0') {
105 				dp--;
106 				continue;
107 			}
108 			if(na < Ndig-50)
109 				a[na++] = c;
110 			continue;
111 		}
112 		switch(c) {
113 		case '\t':
114 		case '\n':
115 		case '\v':
116 		case '\f':
117 		case '\r':
118 		case ' ':
119 			if(state == S0)
120 				continue;
121 			break;
122 		case '-':
123 			if(state == S0)
124 				flag |= Fsign;
125 			else
126 				flag |= Fesign;
127 		case '+':
128 			if(state == S0)
129 				state = S1;
130 			else
131 			if(state == S5)
132 				state = S6;
133 			else
134 				break;	// syntax
135 			continue;
136 		case '.':
137 			flag |= Fdpoint;
138 			dp = na;
139 			if(state == S0 || state == S1) {
140 				state = S3;
141 				continue;
142 			}
143 			if(state == S2) {
144 				state = S4;
145 				continue;
146 			}
147 			break;
148 		case 'e':
149 		case 'E':
150 			if(state == S2 || state == S4) {
151 				state = S5;
152 				continue;
153 			}
154 			break;
155 		}
156 		break;
157 	}
158 
159 	/*
160 	 * clean up return char-pointer
161 	 */
162 	switch(state) {
163 	case S0:
164 		if(xcmp(s, "nan") == 0) {
165 			if(aas != nil)
166 				*aas = s+3;
167 			goto retnan;
168 		}
169 	case S1:
170 		if(xcmp(s, "infinity") == 0) {
171 			if(aas != nil)
172 				*aas = s+8;
173 			goto retinf;
174 		}
175 		if(xcmp(s, "inf") == 0) {
176 			if(aas != nil)
177 				*aas = s+3;
178 			goto retinf;
179 		}
180 	case S3:
181 		if(aas != nil)
182 			*aas = as;
183 		goto ret0;	// no digits found
184 	case S6:
185 		s--;		// back over +-
186 	case S5:
187 		s--;		// back over e
188 		break;
189 	}
190 	if(aas != nil)
191 		*aas = s;
192 
193 	if(flag & Fdpoint)
194 	while(na > 0 && a[na-1] == '0')
195 		na--;
196 	if(na == 0)
197 		goto ret0;	// zero
198 	a[na] = 0;
199 	if(!(flag & Fdpoint))
200 		dp = na;
201 	if(flag & Fesign)
202 		ex = -ex;
203 	dp += ex;
204 	if(dp < -Maxe-Nmant/3)	/* actually -Nmant*log(2)/log(10), but Nmant/3 close enough */
205 		goto ret0;	// underflow by exp
206 	else
207 	if(dp > +Maxe)
208 		goto retinf;	// overflow by exp
209 
210 	/*
211 	 * normalize the decimal ascii number
212 	 * to range .[5-9][0-9]* e0
213 	 */
214 	bp = 0;		// binary exponent
215 	while(dp > 0)
216 		divascii(a, &na, &dp, &bp);
217 	while(dp < 0 || a[0] < '5')
218 		mulascii(a, &na, &dp, &bp);
219 	a[na] = 0;
220 
221 	/*
222 	 * very small numbers are represented using
223 	 * bp = -Bias+1.  adjust accordingly.
224 	 */
225 	if(bp < -Bias+1){
226 		ona = na;
227 		divby(a, &na, -bp-Bias+1);
228 		if(na < ona){
229 			memmove(a+ona-na, a, na);
230 			memset(a, '0', ona-na);
231 			na = ona;
232 		}
233 		a[na] = 0;
234 		bp = -Bias+1;
235 	}
236 
237 	/* close approx by naive conversion */
238 	num = 0;
239 	den = 1;
240 	for(i=0; i<9 && (c=a[i]); i++) {
241 		num = num*10 + (c-'0');
242 		den *= 10;
243 	}
244 	low[0] = umuldiv(num, One, den);
245 	hig[0] = umuldiv(num+1, One, den);
246 	for(i=1; i<Prec; i++) {
247 		low[i] = 0;
248 		hig[i] = One-1;
249 	}
250 
251 	/* binary search for closest mantissa */
252 	for(;;) {
253 		/* mid = (hig + low) / 2 */
254 		c = 0;
255 		for(i=0; i<Prec; i++) {
256 			mid[i] = hig[i] + low[i];
257 			if(c)
258 				mid[i] += One;
259 			c = mid[i] & 1;
260 			mid[i] >>= 1;
261 		}
262 		frnorm(mid);
263 
264 		/* compare */
265 		c = fpcmp(a, mid);
266 		if(c > 0) {
267 			c = 1;
268 			for(i=0; i<Prec; i++)
269 				if(low[i] != mid[i]) {
270 					c = 0;
271 					low[i] = mid[i];
272 				}
273 			if(c)
274 				break;	// between mid and hig
275 			continue;
276 		}
277 		if(c < 0) {
278 			for(i=0; i<Prec; i++)
279 				hig[i] = mid[i];
280 			continue;
281 		}
282 
283 		/* only hard part is if even/odd roundings wants to go up */
284 		c = mid[Prec-1] & (Sigbit-1);
285 		if(c == Sigbit/2 && (mid[Prec-1]&Sigbit) == 0)
286 			mid[Prec-1] -= c;
287 		break;	// exactly mid
288 	}
289 
290 	/* normal rounding applies */
291 	c = mid[Prec-1] & (Sigbit-1);
292 	mid[Prec-1] -= c;
293 	if(c >= Sigbit/2) {
294 		mid[Prec-1] += Sigbit;
295 		frnorm(mid);
296 	}
297 	d = 0;
298 	for(i=0; i<Prec; i++)
299 		d = d*One + mid[i];
300 	if(flag & Fsign)
301 		d = -d;
302 	d = ldexp(d, bp - Prec*Nbits);
303 	return d;
304 
305 ret0:
306 	return 0;
307 
308 retnan:
309 	return __NaN();
310 
311 retinf:
312 	if(flag & Fsign)
313 		return __Inf(-1);
314 	return __Inf(+1);
315 }
316 
317 static void
318 frnorm(ulong *f)
319 {
320 	int i, c;
321 
322 	c = 0;
323 	for(i=Prec-1; i>0; i--) {
324 		f[i] += c;
325 		c = f[i] >> Nbits;
326 		f[i] &= One-1;
327 	}
328 	f[0] += c;
329 }
330 
331 static int
332 fpcmp(char *a, ulong* f)
333 {
334 	ulong tf[Prec];
335 	int i, d, c;
336 
337 	for(i=0; i<Prec; i++)
338 		tf[i] = f[i];
339 
340 	for(;;) {
341 		/* tf *= 10 */
342 		for(i=0; i<Prec; i++)
343 			tf[i] = tf[i]*10;
344 		frnorm(tf);
345 		d = (tf[0] >> Nbits) + '0';
346 		tf[0] &= One-1;
347 
348 		/* compare next digit */
349 		c = *a;
350 		if(c == 0) {
351 			if('0' < d)
352 				return -1;
353 			if(tf[0] != 0)
354 				goto cont;
355 			for(i=1; i<Prec; i++)
356 				if(tf[i] != 0)
357 					goto cont;
358 			return 0;
359 		}
360 		if(c > d)
361 			return +1;
362 		if(c < d)
363 			return -1;
364 		a++;
365 	cont:;
366 	}
367 	return 0;
368 }
369 
370 static void
371 _divby(char *a, int *na, int b)
372 {
373 	int n, c;
374 	char *p;
375 
376 	p = a;
377 	n = 0;
378 	while(n>>b == 0) {
379 		c = *a++;
380 		if(c == 0) {
381 			while(n) {
382 				c = n*10;
383 				if(c>>b)
384 					break;
385 				n = c;
386 			}
387 			goto xx;
388 		}
389 		n = n*10 + c-'0';
390 		(*na)--;
391 	}
392 	for(;;) {
393 		c = n>>b;
394 		n -= c<<b;
395 		*p++ = c + '0';
396 		c = *a++;
397 		if(c == 0)
398 			break;
399 		n = n*10 + c-'0';
400 	}
401 	(*na)++;
402 xx:
403 	while(n) {
404 		n = n*10;
405 		c = n>>b;
406 		n -= c<<b;
407 		*p++ = c + '0';
408 		(*na)++;
409 	}
410 	*p = 0;
411 }
412 
413 static void
414 divby(char *a, int *na, int b)
415 {
416 	while(b > 9){
417 		_divby(a, na, 9);
418 		a[*na] = 0;
419 		b -= 9;
420 	}
421 	if(b > 0)
422 		_divby(a, na, b);
423 }
424 
425 static	Tab	tab1[] =
426 {
427 	 1,  0, "",
428 	 3,  1, "7",
429 	 6,  2, "63",
430 	 9,  3, "511",
431 	13,  4, "8191",
432 	16,  5, "65535",
433 	19,  6, "524287",
434 	23,  7, "8388607",
435 	26,  8, "67108863",
436 	27,  9, "134217727",
437 };
438 
439 static void
440 divascii(char *a, int *na, int *dp, int *bp)
441 {
442 	int b, d;
443 	Tab *t;
444 
445 	d = *dp;
446 	if(d >= nelem(tab1))
447 		d = nelem(tab1)-1;
448 	t = tab1 + d;
449 	b = t->bp;
450 	if(memcmp(a, t->cmp, t->siz) > 0)
451 		d--;
452 	*dp -= d;
453 	*bp += b;
454 	divby(a, na, b);
455 }
456 
457 static void
458 mulby(char *a, char *p, char *q, int b)
459 {
460 	int n, c;
461 
462 	n = 0;
463 	*p = 0;
464 	for(;;) {
465 		q--;
466 		if(q < a)
467 			break;
468 		c = *q - '0';
469 		c = (c<<b) + n;
470 		n = c/10;
471 		c -= n*10;
472 		p--;
473 		*p = c + '0';
474 	}
475 	while(n) {
476 		c = n;
477 		n = c/10;
478 		c -= n*10;
479 		p--;
480 		*p = c + '0';
481 	}
482 }
483 
484 static	Tab	tab2[] =
485 {
486 	 1,  1, "",				// dp = 0-0
487 	 3,  3, "125",
488 	 6,  5, "15625",
489 	 9,  7, "1953125",
490 	13, 10, "1220703125",
491 	16, 12, "152587890625",
492 	19, 14, "19073486328125",
493 	23, 17, "11920928955078125",
494 	26, 19, "1490116119384765625",
495 	27, 19, "7450580596923828125",		// dp 8-9
496 };
497 
498 static void
499 mulascii(char *a, int *na, int *dp, int *bp)
500 {
501 	char *p;
502 	int d, b;
503 	Tab *t;
504 
505 	d = -*dp;
506 	if(d >= nelem(tab2))
507 		d = nelem(tab2)-1;
508 	t = tab2 + d;
509 	b = t->bp;
510 	if(memcmp(a, t->cmp, t->siz) < 0)
511 		d--;
512 	p = a + *na;
513 	*bp -= b;
514 	*dp += d;
515 	*na += d;
516 	mulby(a, p+d, p, b);
517 }
518 
519 static int
520 xcmp(char *a, char *b)
521 {
522 	int c1, c2;
523 
524 	while((c1 = *b++)) {
525 		c2 = *a++;
526 		if(isupper(c2))
527 			c2 = tolower(c2);
528 		if(c1 != c2)
529 			return 1;
530 	}
531 	return 0;
532 }
533