1 #include <u.h>
2 #include <libc.h>
3 #include <ctype.h>
4
5 /*
6 * This routine will convert to arbitrary precision
7 * floating point entirely in multi-precision fixed.
8 * The answer is the closest floating point number to
9 * the given decimal number. Exactly half way are
10 * rounded ala ieee rules.
11 * Method is to scale input decimal between .500 and .999...
12 * with external power of 2, then binary search for the
13 * closest mantissa to this decimal number.
14 * Nmant is is the required precision. (53 for ieee dp)
15 * Nbits is the max number of bits/word. (must be <= 28)
16 * Prec is calculated - the number of words of fixed mantissa.
17 */
18 enum
19 {
20 Nbits = 28, // bits safely represented in a ulong
21 Nmant = 53, // bits of precision required
22 Bias = 1022,
23 Prec = (Nmant+Nbits+1)/Nbits, // words of Nbits each to represent mantissa
24 Sigbit = 1<<(Prec*Nbits-Nmant), // first significant bit of Prec-th word
25 Ndig = 1500,
26 One = (ulong)(1<<Nbits),
27 Half = (ulong)(One>>1),
28 Maxe = 310,
29 Fsign = 1<<0, // found -
30 Fesign = 1<<1, // found e-
31 Fdpoint = 1<<2, // found .
32
33 S0 = 0, // _ _S0 +S1 #S2 .S3
34 S1, // _+ #S2 .S3
35 S2, // _+# #S2 .S4 eS5
36 S3, // _+. #S4
37 S4, // _+#.# #S4 eS5
38 S5, // _+#.#e +S6 #S7
39 S6, // _+#.#e+ #S7
40 S7, // _+#.#e+# #S7
41 };
42
43 static int xcmp(char*, char*);
44 static int fpcmp(char*, ulong*);
45 static void frnorm(ulong*);
46 static void divascii(char*, int*, int*, int*);
47 static void mulascii(char*, int*, int*, int*);
48 static void divby(char*, int*, int);
49
50 typedef struct Tab Tab;
51 struct Tab
52 {
53 int bp;
54 int siz;
55 char* cmp;
56 };
57
58 double
strtod(char * as,char ** aas)59 strtod(char *as, char **aas)
60 {
61 int na, ona, ex, dp, bp, c, i, flag, state;
62 ulong low[Prec], hig[Prec], mid[Prec], num, den;
63 double d;
64 char *s, a[Ndig];
65
66 flag = 0; // Fsign, Fesign, Fdpoint
67 na = 0; // number of digits of a[]
68 dp = 0; // na of decimal point
69 ex = 0; // exonent
70
71 state = S0;
72 for(s=as;; s++) {
73 c = *s;
74 if(c >= '0' && c <= '9') {
75 switch(state) {
76 case S0:
77 case S1:
78 case S2:
79 state = S2;
80 break;
81 case S3:
82 case S4:
83 state = S4;
84 break;
85
86 case S5:
87 case S6:
88 case S7:
89 state = S7;
90 ex = ex*10 + (c-'0');
91 continue;
92 }
93 if(na == 0 && c == '0') {
94 dp--;
95 continue;
96 }
97 if(na < Ndig-50)
98 a[na++] = c;
99 continue;
100 }
101 switch(c) {
102 case '\t':
103 case '\n':
104 case '\v':
105 case '\f':
106 case '\r':
107 case ' ':
108 if(state == S0)
109 continue;
110 break;
111 case '-':
112 if(state == S0)
113 flag |= Fsign;
114 else
115 flag |= Fesign;
116 case '+':
117 if(state == S0)
118 state = S1;
119 else
120 if(state == S5)
121 state = S6;
122 else
123 break; // syntax
124 continue;
125 case '.':
126 flag |= Fdpoint;
127 dp = na;
128 if(state == S0 || state == S1) {
129 state = S3;
130 continue;
131 }
132 if(state == S2) {
133 state = S4;
134 continue;
135 }
136 break;
137 case 'e':
138 case 'E':
139 if(state == S2 || state == S4) {
140 state = S5;
141 continue;
142 }
143 break;
144 }
145 break;
146 }
147
148 /*
149 * clean up return char-pointer
150 */
151 switch(state) {
152 case S0:
153 if(xcmp(s, "nan") == 0) {
154 if(aas != nil)
155 *aas = s+3;
156 goto retnan;
157 }
158 case S1:
159 if(xcmp(s, "infinity") == 0) {
160 if(aas != nil)
161 *aas = s+8;
162 goto retinf;
163 }
164 if(xcmp(s, "inf") == 0) {
165 if(aas != nil)
166 *aas = s+3;
167 goto retinf;
168 }
169 case S3:
170 if(aas != nil)
171 *aas = as;
172 goto ret0; // no digits found
173 case S6:
174 s--; // back over +-
175 case S5:
176 s--; // back over e
177 break;
178 }
179 if(aas != nil)
180 *aas = s;
181
182 if(flag & Fdpoint)
183 while(na > 0 && a[na-1] == '0')
184 na--;
185 if(na == 0)
186 goto ret0; // zero
187 a[na] = 0;
188 if(!(flag & Fdpoint))
189 dp = na;
190 if(flag & Fesign)
191 ex = -ex;
192 dp += ex;
193 if(dp < -Maxe-Nmant/3) /* actually -Nmant*log(2)/log(10), but Nmant/3 close enough */
194 goto ret0; // underflow by exp
195 else
196 if(dp > +Maxe)
197 goto retinf; // overflow by exp
198
199 /*
200 * normalize the decimal ascii number
201 * to range .[5-9][0-9]* e0
202 */
203 bp = 0; // binary exponent
204 while(dp > 0)
205 divascii(a, &na, &dp, &bp);
206 while(dp < 0 || a[0] < '5')
207 mulascii(a, &na, &dp, &bp);
208 a[na] = 0;
209
210 /*
211 * very small numbers are represented using
212 * bp = -Bias+1. adjust accordingly.
213 */
214 if(bp < -Bias+1){
215 ona = na;
216 divby(a, &na, -bp-Bias+1);
217 if(na < ona){
218 memmove(a+ona-na, a, na);
219 memset(a, '0', ona-na);
220 na = ona;
221 }
222 a[na] = 0;
223 bp = -Bias+1;
224 }
225
226 /* close approx by naive conversion */
227 num = 0;
228 den = 1;
229 for(i=0; i<9 && (c=a[i]); i++) {
230 num = num*10 + (c-'0');
231 den *= 10;
232 }
233 low[0] = umuldiv(num, One, den);
234 hig[0] = umuldiv(num+1, One, den);
235 for(i=1; i<Prec; i++) {
236 low[i] = 0;
237 hig[i] = One-1;
238 }
239
240 /* binary search for closest mantissa */
241 for(;;) {
242 /* mid = (hig + low) / 2 */
243 c = 0;
244 for(i=0; i<Prec; i++) {
245 mid[i] = hig[i] + low[i];
246 if(c)
247 mid[i] += One;
248 c = mid[i] & 1;
249 mid[i] >>= 1;
250 }
251 frnorm(mid);
252
253 /* compare */
254 c = fpcmp(a, mid);
255 if(c > 0) {
256 c = 1;
257 for(i=0; i<Prec; i++)
258 if(low[i] != mid[i]) {
259 c = 0;
260 low[i] = mid[i];
261 }
262 if(c)
263 break; // between mid and hig
264 continue;
265 }
266 if(c < 0) {
267 for(i=0; i<Prec; i++)
268 hig[i] = mid[i];
269 continue;
270 }
271
272 /* only hard part is if even/odd roundings wants to go up */
273 c = mid[Prec-1] & (Sigbit-1);
274 if(c == Sigbit/2 && (mid[Prec-1]&Sigbit) == 0)
275 mid[Prec-1] -= c;
276 break; // exactly mid
277 }
278
279 /* normal rounding applies */
280 c = mid[Prec-1] & (Sigbit-1);
281 mid[Prec-1] -= c;
282 if(c >= Sigbit/2) {
283 mid[Prec-1] += Sigbit;
284 frnorm(mid);
285 }
286 d = 0;
287 for(i=0; i<Prec; i++)
288 d = d*One + mid[i];
289 if(flag & Fsign)
290 d = -d;
291 d = ldexp(d, bp - Prec*Nbits);
292 return d;
293
294 ret0:
295 return 0;
296
297 retnan:
298 return NaN();
299
300 retinf:
301 if(flag & Fsign)
302 return Inf(-1);
303 return Inf(+1);
304 }
305
306 static void
frnorm(ulong * f)307 frnorm(ulong *f)
308 {
309 int i, c;
310
311 c = 0;
312 for(i=Prec-1; i>0; i--) {
313 f[i] += c;
314 c = f[i] >> Nbits;
315 f[i] &= One-1;
316 }
317 f[0] += c;
318 }
319
320 static int
fpcmp(char * a,ulong * f)321 fpcmp(char *a, ulong* f)
322 {
323 ulong tf[Prec];
324 int i, d, c;
325
326 for(i=0; i<Prec; i++)
327 tf[i] = f[i];
328
329 for(;;) {
330 /* tf *= 10 */
331 for(i=0; i<Prec; i++)
332 tf[i] = tf[i]*10;
333 frnorm(tf);
334 d = (tf[0] >> Nbits) + '0';
335 tf[0] &= One-1;
336
337 /* compare next digit */
338 c = *a;
339 if(c == 0) {
340 if('0' < d)
341 return -1;
342 if(tf[0] != 0)
343 goto cont;
344 for(i=1; i<Prec; i++)
345 if(tf[i] != 0)
346 goto cont;
347 return 0;
348 }
349 if(c > d)
350 return +1;
351 if(c < d)
352 return -1;
353 a++;
354 cont:;
355 }
356 }
357
358 static void
_divby(char * a,int * na,int b)359 _divby(char *a, int *na, int b)
360 {
361 int n, c;
362 char *p;
363
364 p = a;
365 n = 0;
366 while(n>>b == 0) {
367 c = *a++;
368 if(c == 0) {
369 while(n) {
370 c = n*10;
371 if(c>>b)
372 break;
373 n = c;
374 }
375 goto xx;
376 }
377 n = n*10 + c-'0';
378 (*na)--;
379 }
380 for(;;) {
381 c = n>>b;
382 n -= c<<b;
383 *p++ = c + '0';
384 c = *a++;
385 if(c == 0)
386 break;
387 n = n*10 + c-'0';
388 }
389 (*na)++;
390 xx:
391 while(n) {
392 n = n*10;
393 c = n>>b;
394 n -= c<<b;
395 *p++ = c + '0';
396 (*na)++;
397 }
398 *p = 0;
399 }
400
401 static void
divby(char * a,int * na,int b)402 divby(char *a, int *na, int b)
403 {
404 while(b > 9){
405 _divby(a, na, 9);
406 a[*na] = 0;
407 b -= 9;
408 }
409 if(b > 0)
410 _divby(a, na, b);
411 }
412
413 static Tab tab1[] =
414 {
415 1, 0, "",
416 3, 1, "7",
417 6, 2, "63",
418 9, 3, "511",
419 13, 4, "8191",
420 16, 5, "65535",
421 19, 6, "524287",
422 23, 7, "8388607",
423 26, 8, "67108863",
424 27, 9, "134217727",
425 };
426
427 static void
divascii(char * a,int * na,int * dp,int * bp)428 divascii(char *a, int *na, int *dp, int *bp)
429 {
430 int b, d;
431 Tab *t;
432
433 d = *dp;
434 if(d >= nelem(tab1))
435 d = nelem(tab1)-1;
436 t = tab1 + d;
437 b = t->bp;
438 if(memcmp(a, t->cmp, t->siz) > 0)
439 d--;
440 *dp -= d;
441 *bp += b;
442 divby(a, na, b);
443 }
444
445 static void
mulby(char * a,char * p,char * q,int b)446 mulby(char *a, char *p, char *q, int b)
447 {
448 int n, c;
449
450 n = 0;
451 *p = 0;
452 for(;;) {
453 q--;
454 if(q < a)
455 break;
456 c = *q - '0';
457 c = (c<<b) + n;
458 n = c/10;
459 c -= n*10;
460 p--;
461 *p = c + '0';
462 }
463 while(n) {
464 c = n;
465 n = c/10;
466 c -= n*10;
467 p--;
468 *p = c + '0';
469 }
470 }
471
472 static Tab tab2[] =
473 {
474 1, 1, "", // dp = 0-0
475 3, 3, "125",
476 6, 5, "15625",
477 9, 7, "1953125",
478 13, 10, "1220703125",
479 16, 12, "152587890625",
480 19, 14, "19073486328125",
481 23, 17, "11920928955078125",
482 26, 19, "1490116119384765625",
483 27, 19, "7450580596923828125", // dp 8-9
484 };
485
486 static void
mulascii(char * a,int * na,int * dp,int * bp)487 mulascii(char *a, int *na, int *dp, int *bp)
488 {
489 char *p;
490 int d, b;
491 Tab *t;
492
493 d = -*dp;
494 if(d >= nelem(tab2))
495 d = nelem(tab2)-1;
496 t = tab2 + d;
497 b = t->bp;
498 if(memcmp(a, t->cmp, t->siz) < 0)
499 d--;
500 p = a + *na;
501 *bp -= b;
502 *dp += d;
503 *na += d;
504 mulby(a, p+d, p, b);
505 }
506
507 static int
xcmp(char * a,char * b)508 xcmp(char *a, char *b)
509 {
510 int c1, c2;
511
512 while(c1 = *b++) {
513 c2 = *a++;
514 if(isupper(c2))
515 c2 = tolower(c2);
516 if(c1 != c2)
517 return 1;
518 }
519 return 0;
520 }
521