1 /* msexceltables.c Steve Simon 5-Jan-2005 */ 2 #include <u.h> 3 #include <libc.h> 4 #include <bio.h> 5 #include <ctype.h> 6 7 enum { 8 Tillegal = 0, 9 Tnumber, // cell types 10 Tlabel, 11 Tindex, 12 Tbool, 13 Terror, 14 15 Ver8 = 0x600, // only BIFF8 and BIFF8x files support unicode 16 17 Nwidths = 4096, 18 }; 19 20 21 typedef struct Biff Biff; 22 typedef struct Col Col; 23 typedef struct Row Row; 24 25 struct Row { 26 Row *next; // next row 27 int r; // row number 28 Col *col; // list of cols in row 29 }; 30 31 struct Col { 32 Col *next; // next col in row 33 int c; // col number 34 int f; // index into formating table (Xf) 35 int type; // type of value for union below 36 union { // value 37 int index; // index into string table (Strtab) 38 int error; 39 int bool; 40 char *label; 41 double number; 42 }; 43 }; 44 45 struct Biff { 46 Biobuf *bp; // input file 47 int op; // current record type 48 int len; // length of current record 49 }; 50 51 // options 52 static int Nopad = 0; // disable padding cells to colum width 53 static int Trunc = 0; // truncate cells to colum width 54 static int All = 0; // dump all sheet types, Worksheets only by default 55 static char *Delim = " "; // field delimiter 56 static int Debug = 0; 57 58 // file scope 59 static int Defwidth = 10; // default colum width if non given 60 static int Biffver; // file vesion 61 static int Datemode; // date ref: 1899-Dec-31 or 1904-jan-1 62 static char **Strtab = nil; // label contents heap 63 static int Nstrtab = 0; // # of above 64 static int *Xf; // array of extended format indices 65 static int Nxf = 0; // # of above 66 static Biobuf *bo; // stdout (sic) 67 68 // table scope 69 static int Width[Nwidths]; // array of colum widths 70 static int Ncols = -1; // max colums in table used 71 static int Content = 0; // type code for contents of sheet 72 static Row *Root = nil; // one worksheet's worth of cells 73 74 static char *Months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", 75 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; 76 77 static char *Errmsgs[] = { 78 [0x0] "#NULL!", // intersection of two cell ranges is empty 79 [0x7] "#DIV/0!", // division by zero 80 [0xf] "#VALUE!", // wrong type of operand 81 [0x17] "#REF!", // illegal or deleted cell reference 82 [0x1d] "#NAME?", // wrong function or range name 83 [0x24] "#NUM!", // value range overflow 84 [0x2a] "#N/A!", // argument of function not available 85 }; 86 87 88 void 89 cell(int r, int c, int f, int type, void *val) 90 { 91 Row *row, *nrow; 92 Col *col, *ncol; 93 94 if (c > Ncols) 95 Ncols = c; 96 97 if ((ncol = malloc(sizeof(Col))) == nil) 98 sysfatal("no memory\n"); 99 ncol->c = c; 100 ncol->f = f; 101 ncol->type = type; 102 ncol->next = nil; 103 104 switch(type){ 105 case Tnumber: ncol->number = *(double *)val; break; 106 case Tlabel: ncol->label = (char *)val; break; 107 case Tindex: ncol->index = *(int *)val; break; 108 case Tbool: ncol->bool = *(int *)val; break; 109 case Terror: ncol->error = *(int *)val; break; 110 default: sysfatal("can't happen error\n"); 111 } 112 113 if (Root == nil || Root->r > r){ 114 if ((nrow = malloc(sizeof(Row))) == nil) 115 sysfatal("no memory\n"); 116 nrow->col = ncol; 117 ncol->next = nil; 118 nrow->r = r; 119 nrow->next = Root; 120 Root = nrow; 121 return; 122 } 123 124 for (row = Root; row; row = row->next){ 125 if (row->r == r){ 126 if (row->col->c > c){ 127 ncol->next = row->col; 128 row->col = ncol; 129 return; 130 } 131 else{ 132 for (col = row->col; col; col = col->next) 133 if (col->next == nil || col->next->c > c){ 134 ncol->next = col->next; 135 col->next = ncol; 136 return; 137 } 138 } 139 } 140 141 if (row->next == nil || row->next->r > r){ 142 if ((nrow = malloc(sizeof(Row))) == nil) 143 sysfatal("no memory\n"); 144 nrow->col = ncol; 145 nrow->r = r; 146 nrow->next = row->next; 147 row->next = nrow; 148 return; 149 } 150 } 151 sysfatal("cannot happen error\n"); 152 } 153 154 struct Tm * 155 bifftime(double num) 156 { 157 long long t = num; 158 159 /* Beware - These epochs are wrong, this 160 * is due to Excel still remaining compatible 161 * with Lotus-123, which incorrectly believed 1900 162 * was a leap year 163 */ 164 if (Datemode) 165 t -= 24107; // epoch = 1/1/1904 166 else 167 t -= 25569; // epoch = 31/12/1899 168 t *= 60*60*24; 169 170 return localtime((long)t); 171 } 172 173 void 174 numfmt(int fmt, int min, int max, double num) 175 { 176 char buf[1024]; 177 struct Tm *tm; 178 179 if (fmt == 9) 180 snprint(buf, sizeof(buf),"%.0f%%", num); 181 else 182 if (fmt == 10) 183 snprint(buf, sizeof(buf),"%f%%", num); 184 else 185 if (fmt == 11 || fmt == 48) 186 snprint(buf, sizeof(buf),"%e", num); 187 else 188 if (fmt >= 14 && fmt <= 17){ 189 tm = bifftime(num); 190 snprint(buf, sizeof(buf),"%d-%s-%d", 191 tm->mday, Months[tm->mon], tm->year+1900); 192 } 193 else 194 if ((fmt >= 18 && fmt <= 21) || (fmt >= 45 && fmt <= 47)){ 195 tm = bifftime(num); 196 snprint(buf, sizeof(buf),"%02d:%02d:%02d", tm->hour, tm->min, tm->sec); 197 198 } 199 else 200 if (fmt == 22){ 201 tm = bifftime(num); 202 snprint(buf, sizeof(buf),"%02d:%02d:%02d %d-%s-%d", 203 tm->hour, tm->min, tm->sec, 204 tm->mday, Months[tm->mon], tm->year+1900); 205 206 }else 207 snprint(buf, sizeof(buf),"%g", num); 208 209 Bprint(bo, "%-*.*q", min, max, buf); 210 } 211 212 void 213 dump(void) 214 { 215 Row *r; 216 Col *c; 217 int i, min, max; 218 219 for (r = Root; r; r = r->next){ 220 for (c = r->col; c; c = c->next){ 221 if (c->c < 0 || c->c >= Nwidths || (min = Width[c->c]) == 0) 222 min = Defwidth; 223 if ((c->next && c->c == c->next->c) || Nopad) 224 min = 0; 225 max = -1; 226 if (Trunc && min > 2) 227 max = min -2; // FIXME: -2 because of bug %q format ? 228 229 switch(c->type){ 230 case Tnumber: 231 if (Xf[c->f] == 0) 232 Bprint(bo, "%-*.*g", min, max, c->number); 233 else 234 numfmt(Xf[c->f], min, max, c->number); 235 break; 236 case Tlabel: 237 Bprint(bo, "%-*.*q", min, max, c->label); 238 break; 239 case Tbool: 240 Bprint(bo, "%-*.*s", min, max, (c->bool)? "True": "False"); 241 break; 242 case Tindex: 243 if (c->index < 0 || c->index >= Nstrtab) 244 sysfatal("SST string out of range - corrupt file?\n"); 245 Bprint(bo, "%-*.*q", min, max, Strtab[c->index]); 246 break; 247 case Terror: 248 if (c->error < 0 || c->error >= nelem(Errmsgs) || !Errmsgs[c->error]) 249 Bprint(bo, "#ERR=%d", c->index); 250 else 251 Bprint(bo, "%-*.*q", min, max, Errmsgs[c->error]); 252 break; 253 default: 254 sysfatal("cannot happen error\n"); 255 break; 256 } 257 258 if (c->next){ 259 if (c->next->c == c->c) // bar charts 260 Bprint(bo, "="); 261 else{ 262 Bprint(bo, "%s", Delim); 263 for (i = c->c; c->next && i < c->next->c -1; i++) 264 Bprint(bo, "%-*.*s%s", min, max, "", Delim); 265 } 266 } 267 } 268 if (r->next) 269 for (i = r->r; i < r->next->r; i++) 270 Bprint(bo, "\n"); 271 272 } 273 Bprint(bo, "\n"); 274 } 275 276 void 277 release(void) 278 { 279 Row *r, *or; 280 Col *c, *oc; 281 282 r = Root; 283 while(r){ 284 c = r->col; 285 while(c){ 286 if (c->type == Tlabel) 287 free(c->label); 288 oc = c; 289 c = c->next; 290 free(oc); 291 } 292 or = r; 293 r = r->next; 294 free(or); 295 } 296 Root = nil; 297 298 memset(Width, 0, sizeof(Width)); 299 Ncols = -1; 300 } 301 302 void 303 skip(Biff *b, int len) 304 { 305 assert(len <= b->len); 306 if (Bseek(b->bp, len, 1) == -1) 307 sysfatal("seek failed - %r\n"); 308 b->len -= len; 309 } 310 311 void 312 gmem(Biff *b, void *p, int n) 313 { 314 if (b->len < n) 315 sysfatal("short record %d < %d\n", b->len, n); 316 if (Bread(b->bp, p, n) != n) 317 sysfatal("unexpected EOF - %r\n"); 318 b->len -= n; 319 } 320 321 void 322 xd(Biff *b) 323 { 324 uvlong off; 325 uchar buf[16]; 326 int addr, got, n, i, j; 327 328 addr = 0; 329 off = Boffset(b->bp); 330 while (addr < b->len){ 331 n = (b->len >= sizeof(buf))? sizeof(buf): b->len; 332 got = Bread(b->bp, buf, n); 333 334 Bprint(bo, " %6d ", addr); 335 addr += n; 336 337 for (i = 0; i < got; i++) 338 Bprint(bo, "%02x ", buf[i]); 339 for (j = i; j < 16; j++) 340 Bprint(bo, " "); 341 Bprint(bo, " "); 342 for (i = 0; i < got; i++) 343 Bprint(bo, "%c", isprint(buf[i])? buf[i]: '.'); 344 Bprint(bo, "\n"); 345 } 346 Bseek(b->bp, off, 0); 347 } 348 349 static int 350 getrec(Biff *b) 351 { 352 int c; 353 if ((c = Bgetc(b->bp)) == -1) 354 return -1; // real EOF 355 b->op = c; 356 if ((c = Bgetc(b->bp)) == -1) 357 sysfatal("unexpected EOF - %r\n"); 358 b->op |= c << 8; 359 if ((c = Bgetc(b->bp)) == -1) 360 sysfatal("unexpected EOF - %r\n"); 361 b->len = c; 362 if ((c = Bgetc(b->bp)) == -1) 363 sysfatal("unexpected EOF - %r\n"); 364 b->len |= c << 8; 365 if (b->op == 0 && b->len == 0) 366 return -1; 367 if (Debug){ 368 Bprint(bo, "op=0x%x len=%d\n", b->op, b->len); 369 xd(b); 370 } 371 return 0; 372 } 373 374 static uvlong 375 gint(Biff *b, int n) 376 { 377 int i, c; 378 uvlong vl, rc; 379 380 if (b->len < n) 381 return -1; 382 rc = 0; 383 for (i = 0; i < n; i++){ 384 if ((c = Bgetc(b->bp)) == -1) 385 sysfatal("unexpected EOF - %r\n"); 386 b->len--; 387 vl = c; 388 rc |= vl << (8*i); 389 } 390 return rc; 391 } 392 393 double 394 grk(Biff *b) 395 { 396 int f; 397 uvlong n; 398 double d; 399 400 n = gint(b, 4); 401 f = n & 3; 402 n &= ~3LL; 403 if (f & 2){ 404 d = n / 4.0; 405 } 406 else{ 407 n <<= 32; 408 memcpy(&d, &n, sizeof(d)); 409 } 410 411 if (f & 1) 412 d /= 100.0; 413 return d; 414 } 415 416 double 417 gdoub(Biff *b) 418 { 419 double d; 420 uvlong n = gint(b, 8); 421 memcpy(&d, &n, sizeof(n)); 422 return d; 423 } 424 425 char * 426 gstr(Biff *b, int len_width) 427 { 428 Rune r; 429 char *buf, *p; 430 int nch, w, ap, ln, rt, opt; 431 enum { 432 Unicode = 1, 433 Asian_phonetic = 4, 434 Rich_text = 8, 435 }; 436 437 if (b->len < len_width){ 438 if (getrec(b) == -1) 439 sysfatal("starting STRING expected CONTINUE, got EOF\n"); 440 if (b->op != 0x03c) 441 sysfatal("starting STRING expected CONTINUE, got op=0x%x\n", b->op); 442 } 443 444 ln = gint(b, len_width); 445 if (Biffver != Ver8){ 446 if ((buf = calloc(ln+1, sizeof(char))) == nil) 447 sysfatal("no memory\n"); 448 gmem(b, buf, ln); 449 return buf; 450 } 451 452 453 if ((buf = calloc(ln+1, sizeof(char)*UTFmax)) == nil) 454 sysfatal("no memory\n"); 455 p = buf; 456 457 if (ln == 0) 458 return buf; 459 nch = 0; 460 *buf = 0; 461 opt = gint(b, 1); 462 if(opt & Rich_text) 463 rt = gint(b, 2); 464 else 465 rt = 0; 466 if(opt & Asian_phonetic) 467 ap = gint(b, 4); 468 else 469 ap = 0; 470 for(;;){ 471 w = (opt & Unicode)? sizeof(Rune): sizeof(char); 472 473 while(b->len > 0){ 474 r = gint(b, w); 475 p += runetochar(p, &r); 476 if (++nch >= ln){ 477 if (rt) 478 skip(b, rt*4); 479 if (ap) 480 skip(b, ap); 481 return buf; 482 } 483 } 484 if (getrec(b) == -1) 485 sysfatal("in STRING expected CONTINUE, got EOF\n"); 486 if (b->op != 0x03c) 487 sysfatal("in STRING expected CONTINUE, got op=0x%x\n", b->op); 488 opt = gint(b, 1); 489 } 490 } 491 492 void 493 sst(Biff *b) 494 { 495 int n; 496 497 skip(b, 4); // total # strings 498 Nstrtab = gint(b, 4); // # unique strings 499 if ((Strtab = calloc(Nstrtab, sizeof(char *))) == nil) 500 sysfatal("no memory\n"); 501 for (n = 0; n < Nstrtab; n++) 502 Strtab[n] = gstr(b, 2); 503 504 } 505 506 void 507 boolerr(Biff *b) 508 { 509 int r = gint(b, 2); // row 510 int c = gint(b, 2); // col 511 int f = gint(b, 2); // formatting ref 512 int v = gint(b, 1); // bool value / err code 513 int t = gint(b, 1); // type 514 cell(r, c, f, (t)? Terror: Tbool, &v); 515 } 516 517 void 518 rk(Biff *b) 519 { 520 int r = gint(b, 2); // row 521 int c = gint(b, 2); // col 522 int f = gint(b, 2); // formatting ref 523 double v = grk(b); // value 524 cell(r, c, f, Tnumber, &v); 525 } 526 527 void 528 mulrk(Biff *b) 529 { 530 int r = gint(b, 2); // row 531 int c = gint(b, 2); // first col 532 while (b->len >= 6){ 533 int f = gint(b, 2); // formatting ref 534 double v = grk(b); // value 535 cell(r, c++, f, Tnumber, &v); 536 } 537 } 538 539 void 540 number(Biff *b) 541 { 542 int r = gint(b, 2); // row 543 int c = gint(b, 2); // col 544 int f = gint(b, 2); // formatting ref 545 double v = gdoub(b); // double 546 cell(r, c, f, Tnumber, &v); 547 } 548 549 void 550 label(Biff *b) 551 { 552 int r = gint(b, 2); // row 553 int c = gint(b, 2); // col 554 int f = gint(b, 2); // formatting ref 555 char *s = gstr(b, 2); // byte string 556 cell(r, c, f, Tlabel, s); 557 } 558 559 560 void 561 labelsst(Biff *b) 562 { 563 int r = gint(b, 2); // row 564 int c = gint(b, 2); // col 565 int f = gint(b, 2); // formatting ref 566 int i = gint(b, 2); // sst string ref 567 cell(r, c, f, Tindex, &i); 568 } 569 570 void 571 bof(Biff *b) 572 { 573 Biffver = gint(b, 2); 574 Content = gint(b, 2); 575 } 576 577 void 578 defcolwidth(Biff *b) 579 { 580 Defwidth = gint(b, 2); 581 } 582 583 void 584 datemode(Biff *b) 585 { 586 Datemode = gint(b, 2); 587 } 588 589 void 590 eof(Biff *b) 591 { 592 int i; 593 struct { 594 int n; 595 char *s; 596 } names[] = { 597 0x005, "Workbook globals", 598 0x006, "Visual Basic module", 599 0x010, "Worksheet", 600 0x020, "Chart", 601 0x040, "Macro sheet", 602 0x100, "Workspace file", 603 }; 604 605 if (Ncols != -1){ 606 if (All){ 607 for (i = 0; i < nelem(names); i++) 608 if (names[i].n == Content){ 609 Bprint(bo, "\n# contents %s\n", names[i].s); 610 dump(); 611 } 612 } 613 else 614 if (Content == 0x10) 615 dump(); 616 } 617 release(); 618 USED(b); 619 } 620 621 void 622 colinfo(Biff *b) 623 { 624 int c; 625 int c1 = gint(b, 2); 626 int c2 = gint(b, 2); 627 int w = gint(b, 2); 628 629 if (c1 < 0) 630 sysfatal("negative column number (%d)\n", c1); 631 if (c2 >= Nwidths) 632 sysfatal("too many columns (%d > %d)\n", c2, Nwidths); 633 w /= 256; 634 635 if (w > 100) 636 w = 100; 637 if (w < 0) 638 w = 0; 639 640 for (c = c1; c <= c2; c++) 641 Width[c] = w; 642 } 643 644 void 645 xf(Biff *b) 646 { 647 int fmt; 648 static int nalloc = 0; 649 650 skip(b, 2); 651 fmt = gint(b, 2); 652 if (nalloc >= Nxf){ 653 nalloc += 20; 654 if ((Xf = realloc(Xf, nalloc*sizeof(int))) == nil) 655 sysfatal("no memory\n"); 656 } 657 Xf[Nxf++] = fmt; 658 } 659 660 void 661 writeaccess(Biff *b) 662 { 663 Bprint(bo, "# author %s\n", gstr(b, 2)); 664 } 665 666 void 667 codepage(Biff *b) 668 { 669 int codepage = gint(b, 2); 670 if (codepage != 1200) // 1200 == UTF-16 671 Bprint(bo, "# codepage %d\n", codepage); 672 } 673 674 void 675 xls2csv(Biobuf *bp) 676 { 677 int i; 678 Biff biff, *b; 679 struct { 680 int op; 681 void (*func)(Biff *); 682 } dispatch[] = { 683 0x000a, eof, 684 0x0022, datemode, 685 0x0042, codepage, 686 0x0055, defcolwidth, 687 0x005c, writeaccess, 688 0x007d, colinfo, 689 0x00bd, mulrk, 690 0x00fc, sst, 691 0x00fd, labelsst, 692 0x0203, number, 693 0x0204, label, 694 0x0205, boolerr, 695 0x027e, rk, 696 0x0809, bof, 697 0x00e0, xf, 698 }; 699 700 b = &biff; 701 b->bp = bp; 702 while(getrec(b) != -1){ 703 for (i = 0; i < nelem(dispatch); i++) 704 if (b->op == dispatch[i].op) 705 (*dispatch[i].func)(b); 706 skip(b, b->len); 707 } 708 } 709 710 void 711 usage(void) 712 { 713 fprint(2, "usage: %s [-aDnt] [-d delim] /mnt/doc/Workbook\n", argv0); 714 exits("usage"); 715 } 716 717 void 718 main(int argc, char *argv[]) 719 { 720 int i; 721 Biobuf bin, bout, *bp; 722 723 ARGBEGIN{ 724 case 'n': 725 Nopad = 1; 726 break; 727 case 't': 728 Trunc = 1; 729 break; 730 case 'a': 731 All = 1; 732 break; 733 case 'd': 734 Delim = EARGF(usage()); 735 break; 736 case 'D': 737 Debug = 1; 738 break; 739 default: 740 usage(); 741 break; 742 }ARGEND; 743 744 if (argc != 1) 745 usage(); 746 747 bo = &bout; 748 quotefmtinstall(); 749 Binit(bo, OWRITE, 1); 750 751 if(argc > 0) { 752 for(i = 0; i < argc; i++){ 753 if ((bp = Bopen(argv[i], OREAD)) == nil) 754 sysfatal("%s cannot open - %r\n", argv[i]); 755 xls2csv(bp); 756 Bterm(bp); 757 } 758 } else { 759 Binit(&bin, 0, OREAD); 760 xls2csv(&bin); 761 } 762 exits(0); 763 } 764 765