xref: /plan9/sys/src/libstdio/dtoa.c (revision ec59a3ddbfceee0efe34584c2c9981a5e5ff1ec4)
1 /* derived from /netlib/fp/dtoa.c assuming IEEE, Standard C */
2 /* kudos to dmg@bell-labs.com, gripes to ehg@bell-labs.com */
3 
4 /* Let x be the exact mathematical number defined by a decimal
5  *	string s.  Then atof(s) is the round-nearest-even IEEE
6  *	floating point value.
7  * Let y be an IEEE floating point value and let s be the string
8  *	printed as %.17g.  Then atof(s) is exactly y.
9  */
10 #include <u.h>
11 #include <libc.h>
12 
13 static Lock _dtoalk[2];
14 #define ACQUIRE_DTOA_LOCK(n)	lock(&_dtoalk[n])
15 #define FREE_DTOA_LOCK(n)	unlock(&_dtoalk[n])
16 #define PRIVATE_mem ((2000+sizeof(double)-1)/sizeof(double))
17 static double private_mem[PRIVATE_mem], *pmem_next = private_mem;
18 #define FLT_ROUNDS	1
19 #define DBL_DIG		15
20 #define DBL_MAX_10_EXP	308
21 #define DBL_MAX_EXP	1024
22 #define FLT_RADIX	2
23 #define Storeinc(a,b,c) (*a++ = b << 16 | c & 0xffff)
24 #define fpword0(x) ((FPdbleword*)&x)->hi
25 #define fpword1(x) ((FPdbleword*)&x)->lo
26 /* Ten_pmax = floor(P*log(2)/log(5)) */
27 /* Bletch = (highest power of 2 < DBL_MAX_10_EXP) / 16 */
28 /* Quick_max = floor((P-1)*log(FLT_RADIX)/log(10) - 1) */
29 /* Int_max = floor(P*log(FLT_RADIX)/log(10) - 1) */
30 
31 #define Exp_shift  20
32 #define Exp_shift1 20
33 #define Exp_msk1    0x100000
34 #define Exp_msk11   0x100000
35 #define Exp_mask  0x7ff00000
36 #define P 53
37 #define Bias 1023
38 #define Emin (-1022)
39 #define Exp_1  0x3ff00000
40 #define Exp_11 0x3ff00000
41 #define Ebits 11
42 #define Frac_mask  0xfffff
43 #define Frac_mask1 0xfffff
44 #define Ten_pmax 22
45 #define Bletch 0x10
46 #define Bndry_mask  0xfffff
47 #define Bndry_mask1 0xfffff
48 #define LSB 1
49 #define Sign_bit 0x80000000
50 #define Log2P 1
51 #define Tiny0 0
52 #define Tiny1 1
53 #define Quick_max 14
54 #define Int_max 14
55 #define Avoid_Underflow
56 
57 #define rounded_product(a,b) a *= b
58 #define rounded_quotient(a,b) a /= b
59 
60 #define Big0 (Frac_mask1 | Exp_msk1*(DBL_MAX_EXP+Bias-1))
61 #define Big1 0xffffffff
62 
63 #define FFFFFFFF 0xffffffffUL
64 
65 #undef ULint
66 
67 #define Kmax 15
68 
69 struct
70 Bigint {
71 	struct Bigint *next;
72 	int	k, maxwds, sign, wds;
73 	unsigned int x[1];
74 };
75 
76 typedef struct Bigint Bigint;
77 
78 static Bigint *freelist[Kmax+1];
79 
80 static Bigint *
81 Balloc(int k)
82 {
83 	int	x;
84 	Bigint * rv;
85 	unsigned int	len;
86 
87 	ACQUIRE_DTOA_LOCK(0);
88 	if (rv = freelist[k]) {
89 		freelist[k] = rv->next;
90 	} else {
91 		x = 1 << k;
92 		len = (sizeof(Bigint) + (x - 1) * sizeof(unsigned int) + sizeof(double) -1)
93 		 / sizeof(double);
94 		if (pmem_next - private_mem + len <= PRIVATE_mem) {
95 			rv = (Bigint * )pmem_next;
96 			pmem_next += len;
97 		} else
98 			rv = (Bigint * )malloc(len * sizeof(double));
99 		rv->k = k;
100 		rv->maxwds = x;
101 	}
102 	FREE_DTOA_LOCK(0);
103 	rv->sign = rv->wds = 0;
104 	return rv;
105 }
106 
107 static void
108 Bfree(Bigint *v)
109 {
110 	if (v) {
111 		ACQUIRE_DTOA_LOCK(0);
112 		v->next = freelist[v->k];
113 		freelist[v->k] = v;
114 		FREE_DTOA_LOCK(0);
115 	}
116 }
117 
118 #define Bcopy(x,y) memcpy((char *)&x->sign, (char *)&y->sign, \
119 y->wds*sizeof(int) + 2*sizeof(int))
120 
121 static Bigint *
122 multadd(Bigint *b, int m, int a)	/* multiply by m and add a */
123 {
124 	int	i, wds;
125 	unsigned int carry, *x, y;
126 	unsigned int xi, z;
127 	Bigint * b1;
128 
129 	wds = b->wds;
130 	x = b->x;
131 	i = 0;
132 	carry = a;
133 	do {
134 		xi = *x;
135 		y = (xi & 0xffff) * m + carry;
136 		z = (xi >> 16) * m + (y >> 16);
137 		carry = z >> 16;
138 		*x++ = (z << 16) + (y & 0xffff);
139 	} while (++i < wds);
140 	if (carry) {
141 		if (wds >= b->maxwds) {
142 			b1 = Balloc(b->k + 1);
143 			Bcopy(b1, b);
144 			Bfree(b);
145 			b = b1;
146 		}
147 		b->x[wds++] = carry;
148 		b->wds = wds;
149 	}
150 	return b;
151 }
152 
153 static Bigint *
154 s2b(const char *s, int nd0, int nd, unsigned int y9)
155 {
156 	Bigint * b;
157 	int	i, k;
158 	int x, y;
159 
160 	x = (nd + 8) / 9;
161 	for (k = 0, y = 1; x > y; y <<= 1, k++)
162 		;
163 	b = Balloc(k);
164 	b->x[0] = y9;
165 	b->wds = 1;
166 
167 	i = 9;
168 	if (9 < nd0) {
169 		s += 9;
170 		do
171 			b = multadd(b, 10, *s++ - '0');
172 		while (++i < nd0);
173 		s++;
174 	} else
175 		s += 10;
176 	for (; i < nd; i++)
177 		b = multadd(b, 10, *s++ - '0');
178 	return b;
179 }
180 
181 static int
182 hi0bits(register unsigned int x)
183 {
184 	register int	k = 0;
185 
186 	if (!(x & 0xffff0000)) {
187 		k = 16;
188 		x <<= 16;
189 	}
190 	if (!(x & 0xff000000)) {
191 		k += 8;
192 		x <<= 8;
193 	}
194 	if (!(x & 0xf0000000)) {
195 		k += 4;
196 		x <<= 4;
197 	}
198 	if (!(x & 0xc0000000)) {
199 		k += 2;
200 		x <<= 2;
201 	}
202 	if (!(x & 0x80000000)) {
203 		k++;
204 		if (!(x & 0x40000000))
205 			return 32;
206 	}
207 	return k;
208 }
209 
210 static int
211 lo0bits(unsigned int *y)
212 {
213 	register int	k;
214 	register unsigned int x = *y;
215 
216 	if (x & 7) {
217 		if (x & 1)
218 			return 0;
219 		if (x & 2) {
220 			*y = x >> 1;
221 			return 1;
222 		}
223 		*y = x >> 2;
224 		return 2;
225 	}
226 	k = 0;
227 	if (!(x & 0xffff)) {
228 		k = 16;
229 		x >>= 16;
230 	}
231 	if (!(x & 0xff)) {
232 		k += 8;
233 		x >>= 8;
234 	}
235 	if (!(x & 0xf)) {
236 		k += 4;
237 		x >>= 4;
238 	}
239 	if (!(x & 0x3)) {
240 		k += 2;
241 		x >>= 2;
242 	}
243 	if (!(x & 1)) {
244 		k++;
245 		x >>= 1;
246 		if (!x & 1)
247 			return 32;
248 	}
249 	*y = x;
250 	return k;
251 }
252 
253 static Bigint *
254 i2b(int i)
255 {
256 	Bigint * b;
257 
258 	b = Balloc(1);
259 	b->x[0] = i;
260 	b->wds = 1;
261 	return b;
262 }
263 
264 static Bigint *
265 mult(Bigint *a, Bigint *b)
266 {
267 	Bigint * c;
268 	int	k, wa, wb, wc;
269 	unsigned int * x, *xa, *xae, *xb, *xbe, *xc, *xc0;
270 	unsigned int y;
271 	unsigned int carry, z;
272 	unsigned int z2;
273 
274 	if (a->wds < b->wds) {
275 		c = a;
276 		a = b;
277 		b = c;
278 	}
279 	k = a->k;
280 	wa = a->wds;
281 	wb = b->wds;
282 	wc = wa + wb;
283 	if (wc > a->maxwds)
284 		k++;
285 	c = Balloc(k);
286 	for (x = c->x, xa = x + wc; x < xa; x++)
287 		*x = 0;
288 	xa = a->x;
289 	xae = xa + wa;
290 	xb = b->x;
291 	xbe = xb + wb;
292 	xc0 = c->x;
293 	for (; xb < xbe; xb++, xc0++) {
294 		if (y = *xb & 0xffff) {
295 			x = xa;
296 			xc = xc0;
297 			carry = 0;
298 			do {
299 				z = (*x & 0xffff) * y + (*xc & 0xffff) + carry;
300 				carry = z >> 16;
301 				z2 = (*x++ >> 16) * y + (*xc >> 16) + carry;
302 				carry = z2 >> 16;
303 				Storeinc(xc, z2, z);
304 			} while (x < xae);
305 			*xc = carry;
306 		}
307 		if (y = *xb >> 16) {
308 			x = xa;
309 			xc = xc0;
310 			carry = 0;
311 			z2 = *xc;
312 			do {
313 				z = (*x & 0xffff) * y + (*xc >> 16) + carry;
314 				carry = z >> 16;
315 				Storeinc(xc, z, z2);
316 				z2 = (*x++ >> 16) * y + (*xc & 0xffff) + carry;
317 				carry = z2 >> 16;
318 			} while (x < xae);
319 			*xc = z2;
320 		}
321 	}
322 	for (xc0 = c->x, xc = xc0 + wc; wc > 0 && !*--xc; --wc)
323 		;
324 	c->wds = wc;
325 	return c;
326 }
327 
328 static Bigint *p5s;
329 
330 static Bigint *
331 pow5mult(Bigint *b, int k)
332 {
333 	Bigint * b1, *p5, *p51;
334 	int	i;
335 	static int	p05[3] = {
336 		5, 25, 125 	};
337 
338 	if (i = k & 3)
339 		b = multadd(b, p05[i-1], 0);
340 
341 	if (!(k >>= 2))
342 		return b;
343 	if (!(p5 = p5s)) {
344 		/* first time */
345 		ACQUIRE_DTOA_LOCK(1);
346 		if (!(p5 = p5s)) {
347 			p5 = p5s = i2b(625);
348 			p5->next = 0;
349 		}
350 		FREE_DTOA_LOCK(1);
351 	}
352 	for (; ; ) {
353 		if (k & 1) {
354 			b1 = mult(b, p5);
355 			Bfree(b);
356 			b = b1;
357 		}
358 		if (!(k >>= 1))
359 			break;
360 		if (!(p51 = p5->next)) {
361 			ACQUIRE_DTOA_LOCK(1);
362 			if (!(p51 = p5->next)) {
363 				p51 = p5->next = mult(p5, p5);
364 				p51->next = 0;
365 			}
366 			FREE_DTOA_LOCK(1);
367 		}
368 		p5 = p51;
369 	}
370 	return b;
371 }
372 
373 static Bigint *
374 lshift(Bigint *b, int k)
375 {
376 	int	i, k1, n, n1;
377 	Bigint * b1;
378 	unsigned int * x, *x1, *xe, z;
379 
380 	n = k >> 5;
381 	k1 = b->k;
382 	n1 = n + b->wds + 1;
383 	for (i = b->maxwds; n1 > i; i <<= 1)
384 		k1++;
385 	b1 = Balloc(k1);
386 	x1 = b1->x;
387 	for (i = 0; i < n; i++)
388 		*x1++ = 0;
389 	x = b->x;
390 	xe = x + b->wds;
391 	if (k &= 0x1f) {
392 		k1 = 32 - k;
393 		z = 0;
394 		do {
395 			*x1++ = *x << k | z;
396 			z = *x++ >> k1;
397 		} while (x < xe);
398 		if (*x1 = z)
399 			++n1;
400 	} else
401 		do
402 			*x1++ = *x++;
403 		while (x < xe);
404 	b1->wds = n1 - 1;
405 	Bfree(b);
406 	return b1;
407 }
408 
409 static int
410 cmp(Bigint *a, Bigint *b)
411 {
412 	unsigned int * xa, *xa0, *xb, *xb0;
413 	int	i, j;
414 
415 	i = a->wds;
416 	j = b->wds;
417 	if (i -= j)
418 		return i;
419 	xa0 = a->x;
420 	xa = xa0 + j;
421 	xb0 = b->x;
422 	xb = xb0 + j;
423 	for (; ; ) {
424 		if (*--xa != *--xb)
425 			return * xa < *xb ? -1 : 1;
426 		if (xa <= xa0)
427 			break;
428 	}
429 	return 0;
430 }
431 
432 static Bigint *
433 diff(Bigint *a, Bigint *b)
434 {
435 	Bigint * c;
436 	int	i, wa, wb;
437 	unsigned int * xa, *xae, *xb, *xbe, *xc;
438 	unsigned int borrow, y;
439 	unsigned int z;
440 
441 	i = cmp(a, b);
442 	if (!i) {
443 		c = Balloc(0);
444 		c->wds = 1;
445 		c->x[0] = 0;
446 		return c;
447 	}
448 	if (i < 0) {
449 		c = a;
450 		a = b;
451 		b = c;
452 		i = 1;
453 	} else
454 		i = 0;
455 	c = Balloc(a->k);
456 	c->sign = i;
457 	wa = a->wds;
458 	xa = a->x;
459 	xae = xa + wa;
460 	wb = b->wds;
461 	xb = b->x;
462 	xbe = xb + wb;
463 	xc = c->x;
464 	borrow = 0;
465 	do {
466 		y = (*xa & 0xffff) - (*xb & 0xffff) - borrow;
467 		borrow = (y & 0x10000) >> 16;
468 		z = (*xa++ >> 16) - (*xb++ >> 16) - borrow;
469 		borrow = (z & 0x10000) >> 16;
470 		Storeinc(xc, z, y);
471 	} while (xb < xbe);
472 	while (xa < xae) {
473 		y = (*xa & 0xffff) - borrow;
474 		borrow = (y & 0x10000) >> 16;
475 		z = (*xa++ >> 16) - borrow;
476 		borrow = (z & 0x10000) >> 16;
477 		Storeinc(xc, z, y);
478 	}
479 	while (!*--xc)
480 		wa--;
481 	c->wds = wa;
482 	return c;
483 }
484 
485 static double
486 ulp(double x)
487 {
488 	register int L;
489 	double	a;
490 
491 	L = (fpword0(x) & Exp_mask) - (P - 1) * Exp_msk1;
492 	fpword0(a) = L;
493 	fpword1(a) = 0;
494 	return a;
495 }
496 
497 static double
498 b2d(Bigint *a, int *e)
499 {
500 	unsigned int * xa, *xa0, w, y, z;
501 	int	k;
502 	double	d;
503 #define d0 fpword0(d)
504 #define d1 fpword1(d)
505 
506 	xa0 = a->x;
507 	xa = xa0 + a->wds;
508 	y = *--xa;
509 	k = hi0bits(y);
510 	*e = 32 - k;
511 	if (k < Ebits) {
512 		d0 = Exp_1 | y >> Ebits - k;
513 		w = xa > xa0 ? *--xa : 0;
514 		d1 = y << (32 - Ebits) + k | w >> Ebits - k;
515 		goto ret_d;
516 	}
517 	z = xa > xa0 ? *--xa : 0;
518 	if (k -= Ebits) {
519 		d0 = Exp_1 | y << k | z >> 32 - k;
520 		y = xa > xa0 ? *--xa : 0;
521 		d1 = z << k | y >> 32 - k;
522 	} else {
523 		d0 = Exp_1 | y;
524 		d1 = z;
525 	}
526 ret_d:
527 #undef d0
528 #undef d1
529 	return d;
530 }
531 
532 static Bigint *
533 d2b(double d, int *e, int *bits)
534 {
535 	Bigint * b;
536 	int	de, i, k;
537 	unsigned int * x, y, z;
538 #define d0 fpword0(d)
539 #define d1 fpword1(d)
540 
541 	b = Balloc(1);
542 	x = b->x;
543 
544 	z = d0 & Frac_mask;
545 	d0 &= 0x7fffffff;	/* clear sign bit, which we ignore */
546 	de = (int)(d0 >> Exp_shift);
547 	z |= Exp_msk11;
548 	if (y = d1) {
549 		if (k = lo0bits(&y)) {
550 			x[0] = y | z << 32 - k;
551 			z >>= k;
552 		} else
553 			x[0] = y;
554 		i = b->wds = (x[1] = z) ? 2 : 1;
555 	} else {
556 		k = lo0bits(&z);
557 		x[0] = z;
558 		i = b->wds = 1;
559 		k += 32;
560 	}
561 	*e = de - Bias - (P - 1) + k;
562 	*bits = P - k;
563 	return b;
564 }
565 
566 #undef d0
567 #undef d1
568 
569 static double
570 ratio(Bigint *a, Bigint *b)
571 {
572 	double	da, db;
573 	int	k, ka, kb;
574 
575 	da = b2d(a, &ka);
576 	db = b2d(b, &kb);
577 	k = ka - kb + 32 * (a->wds - b->wds);
578 	if (k > 0)
579 		fpword0(da) += k * Exp_msk1;
580 	else {
581 		k = -k;
582 		fpword0(db) += k * Exp_msk1;
583 	}
584 	return da / db;
585 }
586 
587 static const double
588 tens[] = {
589 	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
590 	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
591 	1e20, 1e21, 1e22
592 };
593 
594 static const double
595 bigtens[] = {
596 	1e16, 1e32, 1e64, 1e128, 1e256 };
597 
598 static const double tinytens[] = {
599 	1e-16, 1e-32, 1e-64, 1e-128,
600 	9007199254740992.e-256
601 };
602 
603 /* The factor of 2^53 in tinytens[4] helps us avoid setting the underflow */
604 /* flag unnecessarily.  It leads to a song and dance at the end of strtod. */
605 #define Scale_Bit 0x10
606 #define n_bigtens 5
607 
608 #define NAN_WORD0 0x7ff80000
609 
610 #define NAN_WORD1 0
611 
612 static int
613 match(const char **sp, char *t)
614 {
615 	int	c, d;
616 	const char * s = *sp;
617 
618 	while (d = *t++) {
619 		if ((c = *++s) >= 'A' && c <= 'Z')
620 			c += 'a' - 'A';
621 		if (c != d)
622 			return 0;
623 	}
624 	*sp = s + 1;
625 	return 1;
626 }
627 
628 static void
629 gethex(double *rvp, const char **sp)
630 {
631 	unsigned int c, x[2];
632 	const char * s;
633 	int	havedig, udx0, xshift;
634 
635 	x[0] = x[1] = 0;
636 	havedig = xshift = 0;
637 	udx0 = 1;
638 	s = *sp;
639 	while (c = *(const unsigned char * )++s) {
640 		if (c >= '0' && c <= '9')
641 			c -= '0';
642 		else if (c >= 'a' && c <= 'f')
643 			c += 10 - 'a';
644 		else if (c >= 'A' && c <= 'F')
645 			c += 10 - 'A';
646 		else if (c <= ' ') {
647 			if (udx0 && havedig) {
648 				udx0 = 0;
649 				xshift = 1;
650 			}
651 			continue;
652 		} else if (/*(*/ c == ')') {
653 			*sp = s + 1;
654 			break;
655 		} else
656 			return;	/* invalid form: don't change *sp */
657 		havedig = 1;
658 		if (xshift) {
659 			xshift = 0;
660 			x[0] = x[1];
661 			x[1] = 0;
662 		}
663 		if (udx0)
664 			x[0] = (x[0] << 4) | (x[1] >> 28);
665 		x[1] = (x[1] << 4) | c;
666 	}
667 	if ((x[0] &= 0xfffff) || x[1]) {
668 		fpword0(*rvp) = Exp_mask | x[0];
669 		fpword1(*rvp) = x[1];
670 	}
671 }
672 
673 static int
674 quorem(Bigint *b, Bigint *S)
675 {
676 	int	n;
677 	unsigned int * bx, *bxe, q, *sx, *sxe;
678 	unsigned int borrow, carry, y, ys;
679 	unsigned int si, z, zs;
680 
681 	n = S->wds;
682 	if (b->wds < n)
683 		return 0;
684 	sx = S->x;
685 	sxe = sx + --n;
686 	bx = b->x;
687 	bxe = bx + n;
688 	q = *bxe / (*sxe + 1);	/* ensure q <= true quotient */
689 	if (q) {
690 		borrow = 0;
691 		carry = 0;
692 		do {
693 			si = *sx++;
694 			ys = (si & 0xffff) * q + carry;
695 			zs = (si >> 16) * q + (ys >> 16);
696 			carry = zs >> 16;
697 			y = (*bx & 0xffff) - (ys & 0xffff) - borrow;
698 			borrow = (y & 0x10000) >> 16;
699 			z = (*bx >> 16) - (zs & 0xffff) - borrow;
700 			borrow = (z & 0x10000) >> 16;
701 			Storeinc(bx, z, y);
702 		} while (sx <= sxe);
703 		if (!*bxe) {
704 			bx = b->x;
705 			while (--bxe > bx && !*bxe)
706 				--n;
707 			b->wds = n;
708 		}
709 	}
710 	if (cmp(b, S) >= 0) {
711 		q++;
712 		borrow = 0;
713 		carry = 0;
714 		bx = b->x;
715 		sx = S->x;
716 		do {
717 			si = *sx++;
718 			ys = (si & 0xffff) + carry;
719 			zs = (si >> 16) + (ys >> 16);
720 			carry = zs >> 16;
721 			y = (*bx & 0xffff) - (ys & 0xffff) - borrow;
722 			borrow = (y & 0x10000) >> 16;
723 			z = (*bx >> 16) - (zs & 0xffff) - borrow;
724 			borrow = (z & 0x10000) >> 16;
725 			Storeinc(bx, z, y);
726 		} while (sx <= sxe);
727 		bx = b->x;
728 		bxe = bx + n;
729 		if (!*bxe) {
730 			while (--bxe > bx && !*bxe)
731 				--n;
732 			b->wds = n;
733 		}
734 	}
735 	return q;
736 }
737 
738 static char	*
739 rv_alloc(int i)
740 {
741 	int	j, k, *r;
742 
743 	j = sizeof(unsigned int);
744 	for (k = 0;
745 	    sizeof(Bigint) - sizeof(unsigned int) - sizeof(int) + j <= i;
746 	    j <<= 1)
747 		k++;
748 	r = (int * )Balloc(k);
749 	*r = k;
750 	return
751 	    (char *)(r + 1);
752 }
753 
754 static char	*
755 nrv_alloc(char *s, char **rve, int n)
756 {
757 	char	*rv, *t;
758 
759 	t = rv = rv_alloc(n);
760 	while (*t = *s++)
761 		t++;
762 	if (rve)
763 		*rve = t;
764 	return rv;
765 }
766 
767 /* freedtoa(s) must be used to free values s returned by dtoa
768  * when MULTIPLE_THREADS is #defined.  It should be used in all cases,
769  * but for consistency with earlier versions of dtoa, it is optional
770  * when MULTIPLE_THREADS is not defined.
771  */
772 
773 void
774 freedtoa(char *s)
775 {
776 	Bigint * b = (Bigint * )((int *)s - 1);
777 	b->maxwds = 1 << (b->k = *(int * )b);
778 	Bfree(b);
779 }
780 
781 /* dtoa for IEEE arithmetic (dmg): convert double to ASCII string.
782  *
783  * Inspired by "How to Print Floating-Point Numbers Accurately" by
784  * Guy L. Steele, Jr. and Jon L. White [Proc. ACM SIGPLAN '90, pp. 92-101].
785  *
786  * Modifications:
787  *	1. Rather than iterating, we use a simple numeric overestimate
788  *	   to determine k = floor(log10(d)).  We scale relevant
789  *	   quantities using O(log2(k)) rather than O(k) multiplications.
790  *	2. For some modes > 2 (corresponding to ecvt and fcvt), we don't
791  *	   try to generate digits strictly left to right.  Instead, we
792  *	   compute with fewer bits and propagate the carry if necessary
793  *	   when rounding the final digit up.  This is often faster.
794  *	3. Under the assumption that input will be rounded nearest,
795  *	   mode 0 renders 1e23 as 1e23 rather than 9.999999999999999e22.
796  *	   That is, we allow equality in stopping tests when the
797  *	   round-nearest rule will give the same floating-point value
798  *	   as would satisfaction of the stopping test with strict
799  *	   inequality.
800  *	4. We remove common factors of powers of 2 from relevant
801  *	   quantities.
802  *	5. When converting floating-point integers less than 1e16,
803  *	   we use floating-point arithmetic rather than resorting
804  *	   to multiple-precision integers.
805  *	6. When asked to produce fewer than 15 digits, we first try
806  *	   to get by with floating-point arithmetic; we resort to
807  *	   multiple-precision integer arithmetic only if we cannot
808  *	   guarantee that the floating-point calculation has given
809  *	   the correctly rounded result.  For k requested digits and
810  *	   "uniformly" distributed input, the probability is
811  *	   something like 10^(k-15) that we must resort to the int
812  *	   calculation.
813  */
814 
815 char	*
816 dtoa(double d, int mode, int ndigits, int *decpt, int *sign, char **rve)
817 {
818 	/*	Arguments ndigits, decpt, sign are similar to those
819 	of ecvt and fcvt; trailing zeros are suppressed from
820 	the returned string.  If not null, *rve is set to point
821 	to the end of the return value.  If d is +-Infinity or NaN,
822 	then *decpt is set to 9999.
823 
824 	mode:
825 		0 ==> shortest string that yields d when read in
826 			and rounded to nearest.
827 		1 ==> like 0, but with Steele & White stopping rule;
828 			e.g. with IEEE P754 arithmetic , mode 0 gives
829 			1e23 whereas mode 1 gives 9.999999999999999e22.
830 		2 ==> max(1,ndigits) significant digits.  This gives a
831 			return value similar to that of ecvt, except
832 			that trailing zeros are suppressed.
833 		3 ==> through ndigits past the decimal point.  This
834 			gives a return value similar to that from fcvt,
835 			except that trailing zeros are suppressed, and
836 			ndigits can be negative.
837 		4-9 should give the same return values as 2-3, i.e.,
838 			4 <= mode <= 9 ==> same return as mode
839 			2 + (mode & 1).  These modes are mainly for
840 			debugging; often they run slower but sometimes
841 			faster than modes 2-3.
842 		4,5,8,9 ==> left-to-right digit generation.
843 		6-9 ==> don't try fast floating-point estimate
844 			(if applicable).
845 
846 		Values of mode other than 0-9 are treated as mode 0.
847 
848 		Sufficient space is allocated to the return value
849 		to hold the suppressed trailing zeros.
850 	*/
851 
852 	int	bbits, b2, b5, be, dig, i, ieps, ilim, ilim0, ilim1,
853 	j, j1, k, k0, k_check, leftright, m2, m5, s2, s5,
854 	spec_case, try_quick;
855 	int L;
856 	Bigint * b, *b1, *delta, *mlo=nil, *mhi, *S;
857 	double	d2, ds, eps;
858 	char	*s, *s0;
859 
860 	if (fpword0(d) & Sign_bit) {
861 		/* set sign for everything, including 0's and NaNs */
862 		*sign = 1;
863 		fpword0(d) &= ~Sign_bit;	/* clear sign bit */
864 	} else
865 		*sign = 0;
866 
867 	if ((fpword0(d) & Exp_mask) == Exp_mask) {
868 		/* Infinity or NaN */
869 		*decpt = 9999;
870 		if (!fpword1(d) && !(fpword0(d) & 0xfffff))
871 			return nrv_alloc("Infinity", rve, 8);
872 		return nrv_alloc("NaN", rve, 3);
873 	}
874 	if (!d) {
875 		*decpt = 1;
876 		return nrv_alloc("0", rve, 1);
877 	}
878 
879 	b = d2b(d, &be, &bbits);
880 	i = (int)(fpword0(d) >> Exp_shift1 & (Exp_mask >> Exp_shift1));
881 	d2 = d;
882 	fpword0(d2) &= Frac_mask1;
883 	fpword0(d2) |= Exp_11;
884 
885 	/* log(x)	~=~ log(1.5) + (x-1.5)/1.5
886 		 * log10(x)	 =  log(x) / log(10)
887 		 *		~=~ log(1.5)/log(10) + (x-1.5)/(1.5*log(10))
888 		 * log10(d) = (i-Bias)*log(2)/log(10) + log10(d2)
889 		 *
890 		 * This suggests computing an approximation k to log10(d) by
891 		 *
892 		 * k = (i - Bias)*0.301029995663981
893 		 *	+ ( (d2-1.5)*0.289529654602168 + 0.176091259055681 );
894 		 *
895 		 * We want k to be too large rather than too small.
896 		 * The error in the first-order Taylor series approximation
897 		 * is in our favor, so we just round up the constant enough
898 		 * to compensate for any error in the multiplication of
899 		 * (i - Bias) by 0.301029995663981; since |i - Bias| <= 1077,
900 		 * and 1077 * 0.30103 * 2^-52 ~=~ 7.2e-14,
901 		 * adding 1e-13 to the constant term more than suffices.
902 		 * Hence we adjust the constant term to 0.1760912590558.
903 		 * (We could get a more accurate k by invoking log10,
904 		 *  but this is probably not worthwhile.)
905 		 */
906 
907 	i -= Bias;
908 	ds = (d2 - 1.5) * 0.289529654602168 + 0.1760912590558 + i * 0.301029995663981;
909 	k = (int)ds;
910 	if (ds < 0. && ds != k)
911 		k--;	/* want k = floor(ds) */
912 	k_check = 1;
913 	if (k >= 0 && k <= Ten_pmax) {
914 		if (d < tens[k])
915 			k--;
916 		k_check = 0;
917 	}
918 	j = bbits - i - 1;
919 	if (j >= 0) {
920 		b2 = 0;
921 		s2 = j;
922 	} else {
923 		b2 = -j;
924 		s2 = 0;
925 	}
926 	if (k >= 0) {
927 		b5 = 0;
928 		s5 = k;
929 		s2 += k;
930 	} else {
931 		b2 -= k;
932 		b5 = -k;
933 		s5 = 0;
934 	}
935 	if (mode < 0 || mode > 9)
936 		mode = 0;
937 	try_quick = 1;
938 	if (mode > 5) {
939 		mode -= 4;
940 		try_quick = 0;
941 	}
942 	leftright = 1;
943 	switch (mode) {
944 	case 0:
945 	case 1:
946 		ilim = ilim1 = -1;
947 		i = 18;
948 		ndigits = 0;
949 		break;
950 	case 2:
951 		leftright = 0;
952 		/* no break */
953 	case 4:
954 		if (ndigits <= 0)
955 			ndigits = 1;
956 		ilim = ilim1 = i = ndigits;
957 		break;
958 	case 3:
959 		leftright = 0;
960 		/* no break */
961 	case 5:
962 		i = ndigits + k + 1;
963 		ilim = i;
964 		ilim1 = i - 1;
965 		if (i <= 0)
966 			i = 1;
967 	}
968 	s = s0 = rv_alloc(i);
969 
970 	if (ilim >= 0 && ilim <= Quick_max && try_quick) {
971 
972 		/* Try to get by with floating-point arithmetic. */
973 
974 		i = 0;
975 		d2 = d;
976 		k0 = k;
977 		ilim0 = ilim;
978 		ieps = 2; /* conservative */
979 		if (k > 0) {
980 			ds = tens[k&0xf];
981 			j = k >> 4;
982 			if (j & Bletch) {
983 				/* prevent overflows */
984 				j &= Bletch - 1;
985 				d /= bigtens[n_bigtens-1];
986 				ieps++;
987 			}
988 			for (; j; j >>= 1, i++)
989 				if (j & 1) {
990 					ieps++;
991 					ds *= bigtens[i];
992 				}
993 			d /= ds;
994 		} else if (j1 = -k) {
995 			d *= tens[j1 & 0xf];
996 			for (j = j1 >> 4; j; j >>= 1, i++)
997 				if (j & 1) {
998 					ieps++;
999 					d *= bigtens[i];
1000 				}
1001 		}
1002 		if (k_check && d < 1. && ilim > 0) {
1003 			if (ilim1 <= 0)
1004 				goto fast_failed;
1005 			ilim = ilim1;
1006 			k--;
1007 			d *= 10.;
1008 			ieps++;
1009 		}
1010 		eps = ieps * d + 7.;
1011 		fpword0(eps) -= (P - 1) * Exp_msk1;
1012 		if (ilim == 0) {
1013 			S = mhi = 0;
1014 			d -= 5.;
1015 			if (d > eps)
1016 				goto one_digit;
1017 			if (d < -eps)
1018 				goto no_digits;
1019 			goto fast_failed;
1020 		}
1021 		/* Generate ilim digits, then fix them up. */
1022 		eps *= tens[ilim-1];
1023 		for (i = 1; ; i++, d *= 10.) {
1024 			L = d;
1025 			d -= L;
1026 			*s++ = '0' + (int)L;
1027 			if (i == ilim) {
1028 				if (d > 0.5 + eps)
1029 					goto bump_up;
1030 				else if (d < 0.5 - eps) {
1031 					while (*--s == '0')
1032 						;
1033 					s++;
1034 					goto ret1;
1035 				}
1036 				break;
1037 			}
1038 		}
1039 fast_failed:
1040 		s = s0;
1041 		d = d2;
1042 		k = k0;
1043 		ilim = ilim0;
1044 	}
1045 
1046 	/* Do we have a "small" integer? */
1047 
1048 	if (be >= 0 && k <= Int_max) {
1049 		/* Yes. */
1050 		ds = tens[k];
1051 		if (ndigits < 0 && ilim <= 0) {
1052 			S = mhi = 0;
1053 			if (ilim < 0 || d <= 5 * ds)
1054 				goto no_digits;
1055 			goto one_digit;
1056 		}
1057 		for (i = 1; ; i++) {
1058 			L = d / ds;
1059 			d -= L * ds;
1060 			*s++ = '0' + (int)L;
1061 			if (i == ilim) {
1062 				d += d;
1063 				if (d > ds || d == ds && L & 1) {
1064 bump_up:
1065 					while (*--s == '9')
1066 						if (s == s0) {
1067 							k++;
1068 							*s = '0';
1069 							break;
1070 						}
1071 					++ * s++;
1072 				}
1073 				break;
1074 			}
1075 			if (!(d *= 10.))
1076 				break;
1077 		}
1078 		goto ret1;
1079 	}
1080 
1081 	m2 = b2;
1082 	m5 = b5;
1083 	mhi = mlo = 0;
1084 	if (leftright) {
1085 		if (mode < 2) {
1086 			i =
1087 			    1 + P - bbits;
1088 		} else {
1089 			j = ilim - 1;
1090 			if (m5 >= j)
1091 				m5 -= j;
1092 			else {
1093 				s5 += j -= m5;
1094 				b5 += j;
1095 				m5 = 0;
1096 			}
1097 			if ((i = ilim) < 0) {
1098 				m2 -= i;
1099 				i = 0;
1100 			}
1101 		}
1102 		b2 += i;
1103 		s2 += i;
1104 		mhi = i2b(1);
1105 	}
1106 	if (m2 > 0 && s2 > 0) {
1107 		i = m2 < s2 ? m2 : s2;
1108 		b2 -= i;
1109 		m2 -= i;
1110 		s2 -= i;
1111 	}
1112 	if (b5 > 0) {
1113 		if (leftright) {
1114 			if (m5 > 0) {
1115 				mhi = pow5mult(mhi, m5);
1116 				b1 = mult(mhi, b);
1117 				Bfree(b);
1118 				b = b1;
1119 			}
1120 			if (j = b5 - m5)
1121 				b = pow5mult(b, j);
1122 		} else
1123 			b = pow5mult(b, b5);
1124 	}
1125 	S = i2b(1);
1126 	if (s5 > 0)
1127 		S = pow5mult(S, s5);
1128 
1129 	/* Check for special case that d is a normalized power of 2. */
1130 
1131 	spec_case = 0;
1132 	if (mode < 2) {
1133 		if (!fpword1(d) && !(fpword0(d) & Bndry_mask)
1134 		    ) {
1135 			/* The special case */
1136 			b2 += Log2P;
1137 			s2 += Log2P;
1138 			spec_case = 1;
1139 		}
1140 	}
1141 
1142 	/* Arrange for convenient computation of quotients:
1143 	 * shift left if necessary so divisor has 4 leading 0 bits.
1144 	 *
1145 	 * Perhaps we should just compute leading 28 bits of S once
1146 	 * and for all and pass them and a shift to quorem, so it
1147 	 * can do shifts and ors to compute the numerator for q.
1148 	 */
1149 	if (i = ((s5 ? 32 - hi0bits(S->x[S->wds-1]) : 1) + s2) & 0x1f)
1150 		i = 32 - i;
1151 	if (i > 4) {
1152 		i -= 4;
1153 		b2 += i;
1154 		m2 += i;
1155 		s2 += i;
1156 	} else if (i < 4) {
1157 		i += 28;
1158 		b2 += i;
1159 		m2 += i;
1160 		s2 += i;
1161 	}
1162 	if (b2 > 0)
1163 		b = lshift(b, b2);
1164 	if (s2 > 0)
1165 		S = lshift(S, s2);
1166 	if (k_check) {
1167 		if (cmp(b, S) < 0) {
1168 			k--;
1169 			b = multadd(b, 10, 0);	/* we botched the k estimate */
1170 			if (leftright)
1171 				mhi = multadd(mhi, 10, 0);
1172 			ilim = ilim1;
1173 		}
1174 	}
1175 	if (ilim <= 0 && mode > 2) {
1176 		if (ilim < 0 || cmp(b, S = multadd(S, 5, 0)) <= 0) {
1177 			/* no digits, fcvt style */
1178 no_digits:
1179 			k = -1 - ndigits;
1180 			goto ret;
1181 		}
1182 one_digit:
1183 		*s++ = '1';
1184 		k++;
1185 		goto ret;
1186 	}
1187 	if (leftright) {
1188 		if (m2 > 0)
1189 			mhi = lshift(mhi, m2);
1190 
1191 		/* Compute mlo -- check for special case
1192 		 * that d is a normalized power of 2.
1193 		 */
1194 
1195 		mlo = mhi;
1196 		if (spec_case) {
1197 			mhi = Balloc(mhi->k);
1198 			Bcopy(mhi, mlo);
1199 			mhi = lshift(mhi, Log2P);
1200 		}
1201 
1202 		for (i = 1; ; i++) {
1203 			dig = quorem(b, S) + '0';
1204 			/* Do we yet have the shortest decimal string
1205 			 * that will round to d?
1206 			 */
1207 			j = cmp(b, mlo);
1208 			delta = diff(S, mhi);
1209 			j1 = delta->sign ? 1 : cmp(b, delta);
1210 			Bfree(delta);
1211 			if (j1 == 0 && !mode && !(fpword1(d) & 1)) {
1212 				if (dig == '9')
1213 					goto round_9_up;
1214 				if (j > 0)
1215 					dig++;
1216 				*s++ = dig;
1217 				goto ret;
1218 			}
1219 			if (j < 0 || j == 0 && !mode
1220 			     && !(fpword1(d) & 1)
1221 			    ) {
1222 				if (j1 > 0) {
1223 					b = lshift(b, 1);
1224 					j1 = cmp(b, S);
1225 					if ((j1 > 0 || j1 == 0 && dig & 1)
1226 					     && dig++ == '9')
1227 						goto round_9_up;
1228 				}
1229 				*s++ = dig;
1230 				goto ret;
1231 			}
1232 			if (j1 > 0) {
1233 				if (dig == '9') { /* possible if i == 1 */
1234 round_9_up:
1235 					*s++ = '9';
1236 					goto roundoff;
1237 				}
1238 				*s++ = dig + 1;
1239 				goto ret;
1240 			}
1241 			*s++ = dig;
1242 			if (i == ilim)
1243 				break;
1244 			b = multadd(b, 10, 0);
1245 			if (mlo == mhi)
1246 				mlo = mhi = multadd(mhi, 10, 0);
1247 			else {
1248 				mlo = multadd(mlo, 10, 0);
1249 				mhi = multadd(mhi, 10, 0);
1250 			}
1251 		}
1252 	} else
1253 		for (i = 1; ; i++) {
1254 			*s++ = dig = quorem(b, S) + '0';
1255 			if (i >= ilim)
1256 				break;
1257 			b = multadd(b, 10, 0);
1258 		}
1259 
1260 	/* Round off last digit */
1261 
1262 	b = lshift(b, 1);
1263 	j = cmp(b, S);
1264 	if (j > 0 || j == 0 && dig & 1) {
1265 roundoff:
1266 		while (*--s == '9')
1267 			if (s == s0) {
1268 				k++;
1269 				*s++ = '1';
1270 				goto ret;
1271 			}
1272 		++ * s++;
1273 	} else {
1274 		while (*--s == '0')
1275 			;
1276 		s++;
1277 	}
1278 ret:
1279 	Bfree(S);
1280 	if (mhi) {
1281 		if (mlo && mlo != mhi)
1282 			Bfree(mlo);
1283 		Bfree(mhi);
1284 	}
1285 ret1:
1286 	Bfree(b);
1287 	*s = 0;
1288 	*decpt = k + 1;
1289 	if (rve)
1290 		*rve = s;
1291 	return s0;
1292 }
1293 
1294