1 #include <u.h> 2 #include <libc.h> 3 #include <ctype.h> 4 5 /* 6 * This routine will convert to arbitrary precision 7 * floating point entirely in multi-precision fixed. 8 * The answer is the closest floating point number to 9 * the given decimal number. Exactly half way are 10 * rounded ala ieee rules. 11 * Method is to scale input decimal between .500 and .999... 12 * with external power of 2, then binary search for the 13 * closest mantissa to this decimal number. 14 * Nmant is is the required precision. (53 for ieee dp) 15 * Nbits is the max number of bits/word. (must be <= 28) 16 * Prec is calculated - the number of words of fixed mantissa. 17 */ 18 enum 19 { 20 Nbits = 28, // bits safely represented in a ulong 21 Nmant = 53, // bits of precision required 22 Bias = 1022, 23 Prec = (Nmant+Nbits+1)/Nbits, // words of Nbits each to represent mantissa 24 Sigbit = 1<<(Prec*Nbits-Nmant), // first significant bit of Prec-th word 25 Ndig = 1500, 26 One = (ulong)(1<<Nbits), 27 Half = (ulong)(One>>1), 28 Maxe = 310, 29 Fsign = 1<<0, // found - 30 Fesign = 1<<1, // found e- 31 Fdpoint = 1<<2, // found . 32 33 S0 = 0, // _ _S0 +S1 #S2 .S3 34 S1, // _+ #S2 .S3 35 S2, // _+# #S2 .S4 eS5 36 S3, // _+. #S4 37 S4, // _+#.# #S4 eS5 38 S5, // _+#.#e +S6 #S7 39 S6, // _+#.#e+ #S7 40 S7, // _+#.#e+# #S7 41 }; 42 43 static int xcmp(char*, char*); 44 static int fpcmp(char*, ulong*); 45 static void frnorm(ulong*); 46 static void divascii(char*, int*, int*, int*); 47 static void mulascii(char*, int*, int*, int*); 48 static void divby(char*, int*, int); 49 50 typedef struct Tab Tab; 51 struct Tab 52 { 53 int bp; 54 int siz; 55 char* cmp; 56 }; 57 58 double 59 strtod(char *as, char **aas) 60 { 61 int na, ona, ex, dp, bp, c, i, flag, state; 62 ulong low[Prec], hig[Prec], mid[Prec], num, den; 63 double d; 64 char *s, a[Ndig]; 65 66 flag = 0; // Fsign, Fesign, Fdpoint 67 na = 0; // number of digits of a[] 68 dp = 0; // na of decimal point 69 ex = 0; // exonent 70 71 state = S0; 72 for(s=as;; s++) { 73 c = *s; 74 if(c >= '0' && c <= '9') { 75 switch(state) { 76 case S0: 77 case S1: 78 case S2: 79 state = S2; 80 break; 81 case S3: 82 case S4: 83 state = S4; 84 break; 85 86 case S5: 87 case S6: 88 case S7: 89 state = S7; 90 ex = ex*10 + (c-'0'); 91 continue; 92 } 93 if(na == 0 && c == '0') { 94 dp--; 95 continue; 96 } 97 if(na < Ndig-50) 98 a[na++] = c; 99 continue; 100 } 101 switch(c) { 102 case '\t': 103 case '\n': 104 case '\v': 105 case '\f': 106 case '\r': 107 case ' ': 108 if(state == S0) 109 continue; 110 break; 111 case '-': 112 if(state == S0) 113 flag |= Fsign; 114 else 115 flag |= Fesign; 116 case '+': 117 if(state == S0) 118 state = S1; 119 else 120 if(state == S5) 121 state = S6; 122 else 123 break; // syntax 124 continue; 125 case '.': 126 flag |= Fdpoint; 127 dp = na; 128 if(state == S0 || state == S1) { 129 state = S3; 130 continue; 131 } 132 if(state == S2) { 133 state = S4; 134 continue; 135 } 136 break; 137 case 'e': 138 case 'E': 139 if(state == S2 || state == S4) { 140 state = S5; 141 continue; 142 } 143 break; 144 } 145 break; 146 } 147 148 /* 149 * clean up return char-pointer 150 */ 151 switch(state) { 152 case S0: 153 if(xcmp(s, "nan") == 0) { 154 if(aas != nil) 155 *aas = s+3; 156 goto retnan; 157 } 158 case S1: 159 if(xcmp(s, "infinity") == 0) { 160 if(aas != nil) 161 *aas = s+8; 162 goto retinf; 163 } 164 if(xcmp(s, "inf") == 0) { 165 if(aas != nil) 166 *aas = s+3; 167 goto retinf; 168 } 169 case S3: 170 if(aas != nil) 171 *aas = as; 172 goto ret0; // no digits found 173 case S6: 174 s--; // back over +- 175 case S5: 176 s--; // back over e 177 break; 178 } 179 if(aas != nil) 180 *aas = s; 181 182 if(flag & Fdpoint) 183 while(na > 0 && a[na-1] == '0') 184 na--; 185 if(na == 0) 186 goto ret0; // zero 187 a[na] = 0; 188 if(!(flag & Fdpoint)) 189 dp = na; 190 if(flag & Fesign) 191 ex = -ex; 192 dp += ex; 193 if(dp < -Maxe-Nmant/3) /* actually -Nmant*log(2)/log(10), but Nmant/3 close enough */ 194 goto ret0; // underflow by exp 195 else 196 if(dp > +Maxe) 197 goto retinf; // overflow by exp 198 199 /* 200 * normalize the decimal ascii number 201 * to range .[5-9][0-9]* e0 202 */ 203 bp = 0; // binary exponent 204 while(dp > 0) 205 divascii(a, &na, &dp, &bp); 206 while(dp < 0 || a[0] < '5') 207 mulascii(a, &na, &dp, &bp); 208 a[na] = 0; 209 210 /* 211 * very small numbers are represented using 212 * bp = -Bias+1. adjust accordingly. 213 */ 214 if(bp < -Bias+1){ 215 ona = na; 216 divby(a, &na, -bp-Bias+1); 217 if(na < ona){ 218 memmove(a+ona-na, a, na); 219 memset(a, '0', ona-na); 220 na = ona; 221 } 222 a[na] = 0; 223 bp = -Bias+1; 224 } 225 226 /* close approx by naive conversion */ 227 num = 0; 228 den = 1; 229 for(i=0; i<9 && (c=a[i]); i++) { 230 num = num*10 + (c-'0'); 231 den *= 10; 232 } 233 low[0] = umuldiv(num, One, den); 234 hig[0] = umuldiv(num+1, One, den); 235 for(i=1; i<Prec; i++) { 236 low[i] = 0; 237 hig[i] = One-1; 238 } 239 240 /* binary search for closest mantissa */ 241 for(;;) { 242 /* mid = (hig + low) / 2 */ 243 c = 0; 244 for(i=0; i<Prec; i++) { 245 mid[i] = hig[i] + low[i]; 246 if(c) 247 mid[i] += One; 248 c = mid[i] & 1; 249 mid[i] >>= 1; 250 } 251 frnorm(mid); 252 253 /* compare */ 254 c = fpcmp(a, mid); 255 if(c > 0) { 256 c = 1; 257 for(i=0; i<Prec; i++) 258 if(low[i] != mid[i]) { 259 c = 0; 260 low[i] = mid[i]; 261 } 262 if(c) 263 break; // between mid and hig 264 continue; 265 } 266 if(c < 0) { 267 for(i=0; i<Prec; i++) 268 hig[i] = mid[i]; 269 continue; 270 } 271 272 /* only hard part is if even/odd roundings wants to go up */ 273 c = mid[Prec-1] & (Sigbit-1); 274 if(c == Sigbit/2 && (mid[Prec-1]&Sigbit) == 0) 275 mid[Prec-1] -= c; 276 break; // exactly mid 277 } 278 279 /* normal rounding applies */ 280 c = mid[Prec-1] & (Sigbit-1); 281 mid[Prec-1] -= c; 282 if(c >= Sigbit/2) { 283 mid[Prec-1] += Sigbit; 284 frnorm(mid); 285 } 286 d = 0; 287 for(i=0; i<Prec; i++) 288 d = d*One + mid[i]; 289 if(flag & Fsign) 290 d = -d; 291 d = ldexp(d, bp - Prec*Nbits); 292 return d; 293 294 ret0: 295 return 0; 296 297 retnan: 298 return NaN(); 299 300 retinf: 301 if(flag & Fsign) 302 return Inf(-1); 303 return Inf(+1); 304 } 305 306 static void 307 frnorm(ulong *f) 308 { 309 int i, c; 310 311 c = 0; 312 for(i=Prec-1; i>0; i--) { 313 f[i] += c; 314 c = f[i] >> Nbits; 315 f[i] &= One-1; 316 } 317 f[0] += c; 318 } 319 320 static int 321 fpcmp(char *a, ulong* f) 322 { 323 ulong tf[Prec]; 324 int i, d, c; 325 326 for(i=0; i<Prec; i++) 327 tf[i] = f[i]; 328 329 for(;;) { 330 /* tf *= 10 */ 331 for(i=0; i<Prec; i++) 332 tf[i] = tf[i]*10; 333 frnorm(tf); 334 d = (tf[0] >> Nbits) + '0'; 335 tf[0] &= One-1; 336 337 /* compare next digit */ 338 c = *a; 339 if(c == 0) { 340 if('0' < d) 341 return -1; 342 if(tf[0] != 0) 343 goto cont; 344 for(i=1; i<Prec; i++) 345 if(tf[i] != 0) 346 goto cont; 347 return 0; 348 } 349 if(c > d) 350 return +1; 351 if(c < d) 352 return -1; 353 a++; 354 cont:; 355 } 356 } 357 358 static void 359 _divby(char *a, int *na, int b) 360 { 361 int n, c; 362 char *p; 363 364 p = a; 365 n = 0; 366 while(n>>b == 0) { 367 c = *a++; 368 if(c == 0) { 369 while(n) { 370 c = n*10; 371 if(c>>b) 372 break; 373 n = c; 374 } 375 goto xx; 376 } 377 n = n*10 + c-'0'; 378 (*na)--; 379 } 380 for(;;) { 381 c = n>>b; 382 n -= c<<b; 383 *p++ = c + '0'; 384 c = *a++; 385 if(c == 0) 386 break; 387 n = n*10 + c-'0'; 388 } 389 (*na)++; 390 xx: 391 while(n) { 392 n = n*10; 393 c = n>>b; 394 n -= c<<b; 395 *p++ = c + '0'; 396 (*na)++; 397 } 398 *p = 0; 399 } 400 401 static void 402 divby(char *a, int *na, int b) 403 { 404 while(b > 9){ 405 _divby(a, na, 9); 406 a[*na] = 0; 407 b -= 9; 408 } 409 if(b > 0) 410 _divby(a, na, b); 411 } 412 413 static Tab tab1[] = 414 { 415 1, 0, "", 416 3, 1, "7", 417 6, 2, "63", 418 9, 3, "511", 419 13, 4, "8191", 420 16, 5, "65535", 421 19, 6, "524287", 422 23, 7, "8388607", 423 26, 8, "67108863", 424 27, 9, "134217727", 425 }; 426 427 static void 428 divascii(char *a, int *na, int *dp, int *bp) 429 { 430 int b, d; 431 Tab *t; 432 433 d = *dp; 434 if(d >= nelem(tab1)) 435 d = nelem(tab1)-1; 436 t = tab1 + d; 437 b = t->bp; 438 if(memcmp(a, t->cmp, t->siz) > 0) 439 d--; 440 *dp -= d; 441 *bp += b; 442 divby(a, na, b); 443 } 444 445 static void 446 mulby(char *a, char *p, char *q, int b) 447 { 448 int n, c; 449 450 n = 0; 451 *p = 0; 452 for(;;) { 453 q--; 454 if(q < a) 455 break; 456 c = *q - '0'; 457 c = (c<<b) + n; 458 n = c/10; 459 c -= n*10; 460 p--; 461 *p = c + '0'; 462 } 463 while(n) { 464 c = n; 465 n = c/10; 466 c -= n*10; 467 p--; 468 *p = c + '0'; 469 } 470 } 471 472 static Tab tab2[] = 473 { 474 1, 1, "", // dp = 0-0 475 3, 3, "125", 476 6, 5, "15625", 477 9, 7, "1953125", 478 13, 10, "1220703125", 479 16, 12, "152587890625", 480 19, 14, "19073486328125", 481 23, 17, "11920928955078125", 482 26, 19, "1490116119384765625", 483 27, 19, "7450580596923828125", // dp 8-9 484 }; 485 486 static void 487 mulascii(char *a, int *na, int *dp, int *bp) 488 { 489 char *p; 490 int d, b; 491 Tab *t; 492 493 d = -*dp; 494 if(d >= nelem(tab2)) 495 d = nelem(tab2)-1; 496 t = tab2 + d; 497 b = t->bp; 498 if(memcmp(a, t->cmp, t->siz) < 0) 499 d--; 500 p = a + *na; 501 *bp -= b; 502 *dp += d; 503 *na += d; 504 mulby(a, p+d, p, b); 505 } 506 507 static int 508 xcmp(char *a, char *b) 509 { 510 int c1, c2; 511 512 while(c1 = *b++) { 513 c2 = *a++; 514 if(isupper(c2)) 515 c2 = tolower(c2); 516 if(c1 != c2) 517 return 1; 518 } 519 return 0; 520 } 521