xref: /plan9/sys/src/libc/port/strtod.c (revision b85a83648eec38fe82b6f00adfd7828ceec5ee8d)
1 #include <u.h>
2 #include <libc.h>
3 #include <ctype.h>
4 
5 /*
6  * This routine will convert to arbitrary precision
7  * floating point entirely in multi-precision fixed.
8  * The answer is the closest floating point number to
9  * the given decimal number. Exactly half way are
10  * rounded ala ieee rules.
11  * Method is to scale input decimal between .500 and .999...
12  * with external power of 2, then binary search for the
13  * closest mantissa to this decimal number.
14  * Nmant is is the required precision. (53 for ieee dp)
15  * Nbits is the max number of bits/word. (must be <= 28)
16  * Prec is calculated - the number of words of fixed mantissa.
17  */
18 enum
19 {
20 	Nbits	= 28,				// bits safely represented in a ulong
21 	Nmant	= 53,				// bits of precision required
22 	Bias		= 1022,
23 	Prec	= (Nmant+Nbits+1)/Nbits,	// words of Nbits each to represent mantissa
24 	Sigbit	= 1<<(Prec*Nbits-Nmant),	// first significant bit of Prec-th word
25 	Ndig	= 1500,
26 	One	= (ulong)(1<<Nbits),
27 	Half	= (ulong)(One>>1),
28 	Maxe	= 310,
29 	Fsign	= 1<<0,		// found -
30 	Fesign	= 1<<1,		// found e-
31 	Fdpoint	= 1<<2,		// found .
32 
33 	S0	= 0,		// _		_S0	+S1	#S2	.S3
34 	S1,			// _+		#S2	.S3
35 	S2,			// _+#		#S2	.S4	eS5
36 	S3,			// _+.		#S4
37 	S4,			// _+#.#	#S4	eS5
38 	S5,			// _+#.#e	+S6	#S7
39 	S6,			// _+#.#e+	#S7
40 	S7,			// _+#.#e+#	#S7
41 };
42 
43 static	int	xcmp(char*, char*);
44 static	int	fpcmp(char*, ulong*);
45 static	void	frnorm(ulong*);
46 static	void	divascii(char*, int*, int*, int*);
47 static	void	mulascii(char*, int*, int*, int*);
48 static	void	divby(char*, int*, int);
49 
50 typedef	struct	Tab	Tab;
51 struct	Tab
52 {
53 	int	bp;
54 	int	siz;
55 	char*	cmp;
56 };
57 
58 double
strtod(char * as,char ** aas)59 strtod(char *as, char **aas)
60 {
61 	int na, ona, ex, dp, bp, c, i, flag, state;
62 	ulong low[Prec], hig[Prec], mid[Prec], num, den;
63 	double d;
64 	char *s, a[Ndig];
65 
66 	flag = 0;	// Fsign, Fesign, Fdpoint
67 	na = 0;		// number of digits of a[]
68 	dp = 0;		// na of decimal point
69 	ex = 0;		// exonent
70 
71 	state = S0;
72 	for(s=as;; s++) {
73 		c = *s;
74 		if(c >= '0' && c <= '9') {
75 			switch(state) {
76 			case S0:
77 			case S1:
78 			case S2:
79 				state = S2;
80 				break;
81 			case S3:
82 			case S4:
83 				state = S4;
84 				break;
85 
86 			case S5:
87 			case S6:
88 			case S7:
89 				state = S7;
90 				ex = ex*10 + (c-'0');
91 				continue;
92 			}
93 			if(na == 0 && c == '0') {
94 				dp--;
95 				continue;
96 			}
97 			if(na < Ndig-50)
98 				a[na++] = c;
99 			continue;
100 		}
101 		switch(c) {
102 		case '\t':
103 		case '\n':
104 		case '\v':
105 		case '\f':
106 		case '\r':
107 		case ' ':
108 			if(state == S0)
109 				continue;
110 			break;
111 		case '-':
112 			if(state == S0)
113 				flag |= Fsign;
114 			else
115 				flag |= Fesign;
116 		case '+':
117 			if(state == S0)
118 				state = S1;
119 			else
120 			if(state == S5)
121 				state = S6;
122 			else
123 				break;	// syntax
124 			continue;
125 		case '.':
126 			flag |= Fdpoint;
127 			dp = na;
128 			if(state == S0 || state == S1) {
129 				state = S3;
130 				continue;
131 			}
132 			if(state == S2) {
133 				state = S4;
134 				continue;
135 			}
136 			break;
137 		case 'e':
138 		case 'E':
139 			if(state == S2 || state == S4) {
140 				state = S5;
141 				continue;
142 			}
143 			break;
144 		}
145 		break;
146 	}
147 
148 	/*
149 	 * clean up return char-pointer
150 	 */
151 	switch(state) {
152 	case S0:
153 		if(xcmp(s, "nan") == 0) {
154 			if(aas != nil)
155 				*aas = s+3;
156 			goto retnan;
157 		}
158 	case S1:
159 		if(xcmp(s, "infinity") == 0) {
160 			if(aas != nil)
161 				*aas = s+8;
162 			goto retinf;
163 		}
164 		if(xcmp(s, "inf") == 0) {
165 			if(aas != nil)
166 				*aas = s+3;
167 			goto retinf;
168 		}
169 	case S3:
170 		if(aas != nil)
171 			*aas = as;
172 		goto ret0;	// no digits found
173 	case S6:
174 		s--;		// back over +-
175 	case S5:
176 		s--;		// back over e
177 		break;
178 	}
179 	if(aas != nil)
180 		*aas = s;
181 
182 	if(flag & Fdpoint)
183 	while(na > 0 && a[na-1] == '0')
184 		na--;
185 	if(na == 0)
186 		goto ret0;	// zero
187 	a[na] = 0;
188 	if(!(flag & Fdpoint))
189 		dp = na;
190 	if(flag & Fesign)
191 		ex = -ex;
192 	dp += ex;
193 	if(dp < -Maxe-Nmant/3)	/* actually -Nmant*log(2)/log(10), but Nmant/3 close enough */
194 		goto ret0;	// underflow by exp
195 	else
196 	if(dp > +Maxe)
197 		goto retinf;	// overflow by exp
198 
199 	/*
200 	 * normalize the decimal ascii number
201 	 * to range .[5-9][0-9]* e0
202 	 */
203 	bp = 0;		// binary exponent
204 	while(dp > 0)
205 		divascii(a, &na, &dp, &bp);
206 	while(dp < 0 || a[0] < '5')
207 		mulascii(a, &na, &dp, &bp);
208 	a[na] = 0;
209 
210 	/*
211 	 * very small numbers are represented using
212 	 * bp = -Bias+1.  adjust accordingly.
213 	 */
214 	if(bp < -Bias+1){
215 		ona = na;
216 		divby(a, &na, -bp-Bias+1);
217 		if(na < ona){
218 			memmove(a+ona-na, a, na);
219 			memset(a, '0', ona-na);
220 			na = ona;
221 		}
222 		a[na] = 0;
223 		bp = -Bias+1;
224 	}
225 
226 	/* close approx by naive conversion */
227 	num = 0;
228 	den = 1;
229 	for(i=0; i<9 && (c=a[i]); i++) {
230 		num = num*10 + (c-'0');
231 		den *= 10;
232 	}
233 	low[0] = umuldiv(num, One, den);
234 	hig[0] = umuldiv(num+1, One, den);
235 	for(i=1; i<Prec; i++) {
236 		low[i] = 0;
237 		hig[i] = One-1;
238 	}
239 
240 	/* binary search for closest mantissa */
241 	for(;;) {
242 		/* mid = (hig + low) / 2 */
243 		c = 0;
244 		for(i=0; i<Prec; i++) {
245 			mid[i] = hig[i] + low[i];
246 			if(c)
247 				mid[i] += One;
248 			c = mid[i] & 1;
249 			mid[i] >>= 1;
250 		}
251 		frnorm(mid);
252 
253 		/* compare */
254 		c = fpcmp(a, mid);
255 		if(c > 0) {
256 			c = 1;
257 			for(i=0; i<Prec; i++)
258 				if(low[i] != mid[i]) {
259 					c = 0;
260 					low[i] = mid[i];
261 				}
262 			if(c)
263 				break;	// between mid and hig
264 			continue;
265 		}
266 		if(c < 0) {
267 			for(i=0; i<Prec; i++)
268 				hig[i] = mid[i];
269 			continue;
270 		}
271 
272 		/* only hard part is if even/odd roundings wants to go up */
273 		c = mid[Prec-1] & (Sigbit-1);
274 		if(c == Sigbit/2 && (mid[Prec-1]&Sigbit) == 0)
275 			mid[Prec-1] -= c;
276 		break;	// exactly mid
277 	}
278 
279 	/* normal rounding applies */
280 	c = mid[Prec-1] & (Sigbit-1);
281 	mid[Prec-1] -= c;
282 	if(c >= Sigbit/2) {
283 		mid[Prec-1] += Sigbit;
284 		frnorm(mid);
285 	}
286 	d = 0;
287 	for(i=0; i<Prec; i++)
288 		d = d*One + mid[i];
289 	if(flag & Fsign)
290 		d = -d;
291 	d = ldexp(d, bp - Prec*Nbits);
292 	return d;
293 
294 ret0:
295 	return 0;
296 
297 retnan:
298 	return NaN();
299 
300 retinf:
301 	if(flag & Fsign)
302 		return Inf(-1);
303 	return Inf(+1);
304 }
305 
306 static void
frnorm(ulong * f)307 frnorm(ulong *f)
308 {
309 	int i, c;
310 
311 	c = 0;
312 	for(i=Prec-1; i>0; i--) {
313 		f[i] += c;
314 		c = f[i] >> Nbits;
315 		f[i] &= One-1;
316 	}
317 	f[0] += c;
318 }
319 
320 static int
fpcmp(char * a,ulong * f)321 fpcmp(char *a, ulong* f)
322 {
323 	ulong tf[Prec];
324 	int i, d, c;
325 
326 	for(i=0; i<Prec; i++)
327 		tf[i] = f[i];
328 
329 	for(;;) {
330 		/* tf *= 10 */
331 		for(i=0; i<Prec; i++)
332 			tf[i] = tf[i]*10;
333 		frnorm(tf);
334 		d = (tf[0] >> Nbits) + '0';
335 		tf[0] &= One-1;
336 
337 		/* compare next digit */
338 		c = *a;
339 		if(c == 0) {
340 			if('0' < d)
341 				return -1;
342 			if(tf[0] != 0)
343 				goto cont;
344 			for(i=1; i<Prec; i++)
345 				if(tf[i] != 0)
346 					goto cont;
347 			return 0;
348 		}
349 		if(c > d)
350 			return +1;
351 		if(c < d)
352 			return -1;
353 		a++;
354 	cont:;
355 	}
356 }
357 
358 static void
_divby(char * a,int * na,int b)359 _divby(char *a, int *na, int b)
360 {
361 	int n, c;
362 	char *p;
363 
364 	p = a;
365 	n = 0;
366 	while(n>>b == 0) {
367 		c = *a++;
368 		if(c == 0) {
369 			while(n) {
370 				c = n*10;
371 				if(c>>b)
372 					break;
373 				n = c;
374 			}
375 			goto xx;
376 		}
377 		n = n*10 + c-'0';
378 		(*na)--;
379 	}
380 	for(;;) {
381 		c = n>>b;
382 		n -= c<<b;
383 		*p++ = c + '0';
384 		c = *a++;
385 		if(c == 0)
386 			break;
387 		n = n*10 + c-'0';
388 	}
389 	(*na)++;
390 xx:
391 	while(n) {
392 		n = n*10;
393 		c = n>>b;
394 		n -= c<<b;
395 		*p++ = c + '0';
396 		(*na)++;
397 	}
398 	*p = 0;
399 }
400 
401 static void
divby(char * a,int * na,int b)402 divby(char *a, int *na, int b)
403 {
404 	while(b > 9){
405 		_divby(a, na, 9);
406 		a[*na] = 0;
407 		b -= 9;
408 	}
409 	if(b > 0)
410 		_divby(a, na, b);
411 }
412 
413 static	Tab	tab1[] =
414 {
415 	 1,  0, "",
416 	 3,  1, "7",
417 	 6,  2, "63",
418 	 9,  3, "511",
419 	13,  4, "8191",
420 	16,  5, "65535",
421 	19,  6, "524287",
422 	23,  7, "8388607",
423 	26,  8, "67108863",
424 	27,  9, "134217727",
425 };
426 
427 static void
divascii(char * a,int * na,int * dp,int * bp)428 divascii(char *a, int *na, int *dp, int *bp)
429 {
430 	int b, d;
431 	Tab *t;
432 
433 	d = *dp;
434 	if(d >= nelem(tab1))
435 		d = nelem(tab1)-1;
436 	t = tab1 + d;
437 	b = t->bp;
438 	if(memcmp(a, t->cmp, t->siz) > 0)
439 		d--;
440 	*dp -= d;
441 	*bp += b;
442 	divby(a, na, b);
443 }
444 
445 static void
mulby(char * a,char * p,char * q,int b)446 mulby(char *a, char *p, char *q, int b)
447 {
448 	int n, c;
449 
450 	n = 0;
451 	*p = 0;
452 	for(;;) {
453 		q--;
454 		if(q < a)
455 			break;
456 		c = *q - '0';
457 		c = (c<<b) + n;
458 		n = c/10;
459 		c -= n*10;
460 		p--;
461 		*p = c + '0';
462 	}
463 	while(n) {
464 		c = n;
465 		n = c/10;
466 		c -= n*10;
467 		p--;
468 		*p = c + '0';
469 	}
470 }
471 
472 static	Tab	tab2[] =
473 {
474 	 1,  1, "",				// dp = 0-0
475 	 3,  3, "125",
476 	 6,  5, "15625",
477 	 9,  7, "1953125",
478 	13, 10, "1220703125",
479 	16, 12, "152587890625",
480 	19, 14, "19073486328125",
481 	23, 17, "11920928955078125",
482 	26, 19, "1490116119384765625",
483 	27, 19, "7450580596923828125",		// dp 8-9
484 };
485 
486 static void
mulascii(char * a,int * na,int * dp,int * bp)487 mulascii(char *a, int *na, int *dp, int *bp)
488 {
489 	char *p;
490 	int d, b;
491 	Tab *t;
492 
493 	d = -*dp;
494 	if(d >= nelem(tab2))
495 		d = nelem(tab2)-1;
496 	t = tab2 + d;
497 	b = t->bp;
498 	if(memcmp(a, t->cmp, t->siz) < 0)
499 		d--;
500 	p = a + *na;
501 	*bp -= b;
502 	*dp += d;
503 	*na += d;
504 	mulby(a, p+d, p, b);
505 }
506 
507 static int
xcmp(char * a,char * b)508 xcmp(char *a, char *b)
509 {
510 	int c1, c2;
511 
512 	while(c1 = *b++) {
513 		c2 = *a++;
514 		if(isupper(c2))
515 			c2 = tolower(c2);
516 		if(c1 != c2)
517 			return 1;
518 	}
519 	return 0;
520 }
521