1 /* msexceltables.c Steve Simon 5-Jan-2005 */ 2 #include <u.h> 3 #include <libc.h> 4 #include <bio.h> 5 #include <ctype.h> 6 7 enum { 8 Tillegal = 0, 9 Tnumber, // cell types 10 Tlabel, 11 Tindex, 12 Tbool, 13 Terror, 14 15 Ver8 = 0x600, // only BIFF8 and BIFF8x files support unicode 16 17 Nwidths = 4096, 18 }; 19 20 21 typedef struct Biff Biff; 22 typedef struct Col Col; 23 typedef struct Row Row; 24 25 struct Row { 26 Row *next; // next row 27 int r; // row number 28 Col *col; // list of cols in row 29 }; 30 31 struct Col { 32 Col *next; // next col in row 33 int c; // col number 34 int f; // index into formating table (Xf) 35 int type; // type of value for union below 36 union { // value 37 int index; // index into string table (Strtab) 38 int error; 39 int bool; 40 char *label; 41 double number; 42 }; 43 }; 44 45 struct Biff { 46 Biobuf *bp; // input file 47 int op; // current record type 48 int len; // length of current record 49 }; 50 51 // options 52 static int Nopad = 0; // disable padding cells to colum width 53 static int Trunc = 0; // truncate cells to colum width 54 static int All = 0; // dump all sheet types, Worksheets only by default 55 static char *Delim = " "; // field delimiter 56 static char *Sheetrange = nil; // range of sheets wanted 57 static char *Columnrange = nil; // range of collums wanted 58 static int Debug = 0; 59 60 // file scope 61 static int Defwidth = 10; // default colum width if non given 62 static int Biffver; // file vesion 63 static int Datemode; // date ref: 1899-Dec-31 or 1904-jan-1 64 static char **Strtab = nil; // label contents heap 65 static int Nstrtab = 0; // # of above 66 static int *Xf; // array of extended format indices 67 static int Nxf = 0; // # of above 68 static Biobuf *bo; // stdout (sic) 69 static int Doquote = 1; // quote text fields if they are rc(1) unfriendly 70 71 // table scope 72 static int Width[Nwidths]; // array of colum widths 73 static int Ncols = -1; // max colums in table used 74 static int Content = 0; // type code for contents of sheet 75 static Row *Root = nil; // one worksheet's worth of cells 76 77 static char *Months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", 78 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; 79 80 static char *Errmsgs[] = { 81 [0x0] "#NULL!", // intersection of two cell ranges is empty 82 [0x7] "#DIV/0!", // division by zero 83 [0xf] "#VALUE!", // wrong type of operand 84 [0x17] "#REF!", // illegal or deleted cell reference 85 [0x1d] "#NAME?", // wrong function or range name 86 [0x24] "#NUM!", // value range overflow 87 [0x2a] "#N/A!", // argument of function not available 88 }; 89 90 int 91 wanted(char *range, int here) 92 { 93 int n, s; 94 char *p; 95 96 if (! range) 97 return 1; 98 99 s = -1; 100 p = range; 101 while(1){ 102 n = strtol(p, &p, 10); 103 switch(*p){ 104 case 0: 105 if(n == here) 106 return 1; 107 if(s != -1 && here > s && here < n) 108 return 1; 109 return 0; 110 case ',': 111 if(n == here) 112 return 1; 113 if(s != -1 && here > s && here < n) 114 return 1; 115 s = -1; 116 p++; 117 break; 118 case '-': 119 if(n == here) 120 return 1; 121 s = n; 122 p++; 123 break; 124 default: 125 sysfatal("%s malformed range spec", range); 126 break; 127 } 128 } 129 } 130 131 132 void 133 cell(int r, int c, int f, int type, void *val) 134 { 135 Row *row, *nrow; 136 Col *col, *ncol; 137 138 if(c > Ncols) 139 Ncols = c; 140 141 if((ncol = malloc(sizeof(Col))) == nil) 142 sysfatal("no memory"); 143 ncol->c = c; 144 ncol->f = f; 145 ncol->type = type; 146 ncol->next = nil; 147 148 switch(type){ 149 case Tnumber: ncol->number = *(double *)val; break; 150 case Tlabel: ncol->label = (char *)val; break; 151 case Tindex: ncol->index = *(int *)val; break; 152 case Tbool: ncol->bool = *(int *)val; break; 153 case Terror: ncol->error = *(int *)val; break; 154 default: sysfatal("can't happen error"); 155 } 156 157 if(Root == nil || Root->r > r){ 158 if((nrow = malloc(sizeof(Row))) == nil) 159 sysfatal("no memory"); 160 nrow->col = ncol; 161 ncol->next = nil; 162 nrow->r = r; 163 nrow->next = Root; 164 Root = nrow; 165 return; 166 } 167 168 for(row = Root; row; row = row->next){ 169 if(row->r == r){ 170 if(row->col->c > c){ 171 ncol->next = row->col; 172 row->col = ncol; 173 return; 174 } 175 else{ 176 for(col = row->col; col; col = col->next) 177 if(col->next == nil || col->next->c > c){ 178 ncol->next = col->next; 179 col->next = ncol; 180 return; 181 } 182 } 183 } 184 185 if(row->next == nil || row->next->r > r){ 186 if((nrow = malloc(sizeof(Row))) == nil) 187 sysfatal("no memory"); 188 nrow->col = ncol; 189 nrow->r = r; 190 nrow->next = row->next; 191 row->next = nrow; 192 return; 193 } 194 } 195 sysfatal("cannot happen error"); 196 } 197 198 struct Tm * 199 bifftime(double t) 200 { 201 202 /* Beware - These epochs are wrong, this 203 * is due to Excel still remaining compatible 204 * with Lotus-123, which incorrectly believed 1900 205 * was a leap year 206 */ 207 if(Datemode) 208 t -= 24107; // epoch = 1/1/1904 209 else 210 t -= 25569; // epoch = 31/12/1899 211 t *= 60*60*24; 212 213 return localtime((long)t); 214 } 215 216 void 217 numfmt(int fmt, int min, int max, double num) 218 { 219 char buf[1024]; 220 struct Tm *tm; 221 222 if(fmt == 9) 223 snprint(buf, sizeof(buf),"%.0f%%", num); 224 else 225 if(fmt == 10) 226 snprint(buf, sizeof(buf),"%f%%", num); 227 else 228 if(fmt == 11 || fmt == 48) 229 snprint(buf, sizeof(buf),"%e", num); 230 else 231 if(fmt >= 14 && fmt <= 17){ 232 tm = bifftime(num); 233 snprint(buf, sizeof(buf),"%d-%s-%d", 234 tm->mday, Months[tm->mon], tm->year+1900); 235 } 236 else 237 if((fmt >= 18 && fmt <= 21) || (fmt >= 45 && fmt <= 47)){ 238 tm = bifftime(num); 239 snprint(buf, sizeof(buf),"%02d:%02d:%02d", tm->hour, tm->min, tm->sec); 240 241 } 242 else 243 if(fmt == 22){ 244 tm = bifftime(num); 245 snprint(buf, sizeof(buf),"%02d:%02d:%02d %d-%s-%d", 246 tm->hour, tm->min, tm->sec, 247 tm->mday, Months[tm->mon], tm->year+1900); 248 249 }else 250 snprint(buf, sizeof(buf),"%g", num); 251 252 Bprint(bo, "%-*.*q", min, max, buf); 253 } 254 255 void 256 dump(void) 257 { 258 Row *r; 259 Col *c, *c1; 260 char *strfmt; 261 int i, n, last, min, max; 262 263 if(Doquote) 264 strfmt = "%-*.*q"; 265 else 266 strfmt = "%-*.*s"; 267 268 for(r = Root; r; r = r->next){ 269 n = 1; 270 for(c = r->col; c; c = c->next){ 271 n++; 272 if(! wanted(Columnrange, n)) 273 continue; 274 275 if(c->c < 0 || c->c >= Nwidths || (min = Width[c->c]) == 0) 276 min = Defwidth; 277 if((c->next && c->c == c->next->c) || Nopad) 278 min = 0; 279 max = -1; 280 if(Trunc && min > 2) 281 max = min -2; // FIXME: -2 because of bug %q format ? 282 283 switch(c->type){ 284 case Tnumber: 285 if(Xf == nil || Xf[c->f] == 0) 286 Bprint(bo, "%-*.*g", min, max, c->number); 287 else 288 numfmt(Xf[c->f], min, max, c->number); 289 break; 290 case Tlabel: 291 Bprint(bo, strfmt, min, max, c->label); 292 break; 293 case Tbool: 294 Bprint(bo, strfmt, min, max, (c->bool)? "True": "False"); 295 break; 296 case Tindex: 297 if(c->index < 0 || c->index >= Nstrtab) 298 sysfatal("SST string out of range - corrupt file?"); 299 Bprint(bo, strfmt, min, max, Strtab[c->index]); 300 break; 301 case Terror: 302 if(c->error < 0 || c->error >= nelem(Errmsgs) || !Errmsgs[c->error]) 303 Bprint(bo, "#ERR=%d", c->index); 304 else 305 Bprint(bo, strfmt, min, max, Errmsgs[c->error]); 306 break; 307 default: 308 sysfatal("cannot happen error"); 309 break; 310 } 311 312 last = 1; 313 for(i = n+1, c1 = c->next; c1; c1 = c1->next, i++) 314 if(wanted(Columnrange, i)){ 315 last = 0; 316 break; 317 } 318 319 if(! last){ 320 if(c->next->c == c->c) // bar charts 321 Bprint(bo, "="); 322 else{ 323 Bprint(bo, "%s", Delim); 324 for(i = c->c; c->next && i < c->next->c -1; i++) 325 Bprint(bo, "%-*.*s%s", min, max, "", Delim); 326 } 327 } 328 } 329 if(r->next) 330 for(i = r->r; i < r->next->r; i++) 331 Bprint(bo, "\n"); 332 333 } 334 Bprint(bo, "\n"); 335 } 336 337 void 338 release(void) 339 { 340 Row *r, *or; 341 Col *c, *oc; 342 343 r = Root; 344 while(r){ 345 c = r->col; 346 while(c){ 347 if(c->type == Tlabel) 348 free(c->label); 349 oc = c; 350 c = c->next; 351 free(oc); 352 } 353 or = r; 354 r = r->next; 355 free(or); 356 } 357 Root = nil; 358 359 memset(Width, 0, sizeof(Width)); 360 Ncols = -1; 361 } 362 363 void 364 skip(Biff *b, int len) 365 { 366 assert(len <= b->len); 367 if(Bseek(b->bp, len, 1) == -1) 368 sysfatal("seek failed - %r"); 369 b->len -= len; 370 } 371 372 void 373 gmem(Biff *b, void *p, int n) 374 { 375 if(b->len < n) 376 sysfatal("short record %d < %d", b->len, n); 377 if(Bread(b->bp, p, n) != n) 378 sysfatal("unexpected EOF - %r"); 379 b->len -= n; 380 } 381 382 void 383 xd(Biff *b) 384 { 385 uvlong off; 386 uchar buf[16]; 387 int addr, got, n, i, j; 388 389 addr = 0; 390 off = Boffset(b->bp); 391 while(addr < b->len){ 392 n = (b->len >= sizeof(buf))? sizeof(buf): b->len; 393 got = Bread(b->bp, buf, n); 394 395 Bprint(bo, " %6d ", addr); 396 addr += n; 397 398 for(i = 0; i < got; i++) 399 Bprint(bo, "%02x ", buf[i]); 400 for(j = i; j < 16; j++) 401 Bprint(bo, " "); 402 Bprint(bo, " "); 403 for(i = 0; i < got; i++) 404 Bprint(bo, "%c", isprint(buf[i])? buf[i]: '.'); 405 Bprint(bo, "\n"); 406 } 407 Bseek(b->bp, off, 0); 408 } 409 410 static int 411 getrec(Biff *b) 412 { 413 int c; 414 if((c = Bgetc(b->bp)) == -1) 415 return -1; // real EOF 416 b->op = c; 417 if((c = Bgetc(b->bp)) == -1) 418 sysfatal("unexpected EOF - %r"); 419 b->op |= c << 8; 420 if((c = Bgetc(b->bp)) == -1) 421 sysfatal("unexpected EOF - %r"); 422 b->len = c; 423 if((c = Bgetc(b->bp)) == -1) 424 sysfatal("unexpected EOF - %r"); 425 b->len |= c << 8; 426 if(b->op == 0 && b->len == 0) 427 return -1; 428 if(Debug){ 429 Bprint(bo, "op=0x%x len=%d\n", b->op, b->len); 430 xd(b); 431 } 432 return 0; 433 } 434 435 static uvlong 436 gint(Biff *b, int n) 437 { 438 int i, c; 439 uvlong vl, rc; 440 441 if(b->len < n) 442 return -1; 443 rc = 0; 444 for(i = 0; i < n; i++){ 445 if((c = Bgetc(b->bp)) == -1) 446 sysfatal("unexpected EOF - %r"); 447 b->len--; 448 vl = c; 449 rc |= vl << (8*i); 450 } 451 return rc; 452 } 453 454 double 455 grk(Biff *b) 456 { 457 int f; 458 uvlong n; 459 double d; 460 461 n = gint(b, 4); 462 f = n & 3; 463 n &= ~3LL; 464 if(f & 2){ 465 d = n / 4.0; 466 } 467 else{ 468 n <<= 32; 469 memcpy(&d, &n, sizeof(d)); 470 } 471 472 if(f & 1) 473 d /= 100.0; 474 return d; 475 } 476 477 double 478 gdoub(Biff *b) 479 { 480 double d; 481 uvlong n = gint(b, 8); 482 memcpy(&d, &n, sizeof(n)); 483 return d; 484 } 485 486 char * 487 gstr(Biff *b, int len_width) 488 { 489 Rune r; 490 char *buf, *p; 491 int nch, w, ap, ln, rt, opt; 492 enum { 493 Unicode = 1, 494 Asian_phonetic = 4, 495 Rich_text = 8, 496 }; 497 498 if(b->len < len_width){ 499 if(getrec(b) == -1) 500 sysfatal("starting STRING expected CONTINUE, got EOF"); 501 if(b->op != 0x03c) 502 sysfatal("starting STRING expected CONTINUE, got op=0x%x", b->op); 503 } 504 505 ln = gint(b, len_width); 506 if(Biffver != Ver8){ 507 if((buf = calloc(ln+1, sizeof(char))) == nil) 508 sysfatal("no memory"); 509 gmem(b, buf, ln); 510 return buf; 511 } 512 513 514 if((buf = calloc(ln+1, sizeof(char)*UTFmax)) == nil) 515 sysfatal("no memory"); 516 p = buf; 517 518 if(ln == 0) 519 return buf; 520 nch = 0; 521 *buf = 0; 522 opt = gint(b, 1); 523 if(opt & Rich_text) 524 rt = gint(b, 2); 525 else 526 rt = 0; 527 if(opt & Asian_phonetic) 528 ap = gint(b, 4); 529 else 530 ap = 0; 531 for(;;){ 532 w = (opt & Unicode)? sizeof(Rune): sizeof(char); 533 534 while(b->len > 0){ 535 r = gint(b, w); 536 p += runetochar(p, &r); 537 if(++nch >= ln){ 538 if(rt) 539 skip(b, rt*4); 540 if(ap) 541 skip(b, ap); 542 return buf; 543 } 544 } 545 if(getrec(b) == -1) 546 sysfatal("in STRING expected CONTINUE, got EOF"); 547 if(b->op != 0x03c) 548 sysfatal("in STRING expected CONTINUE, got op=0x%x", b->op); 549 opt = gint(b, 1); 550 } 551 } 552 553 void 554 sst(Biff *b) 555 { 556 int n; 557 558 skip(b, 4); // total # strings 559 Nstrtab = gint(b, 4); // # unique strings 560 if((Strtab = calloc(Nstrtab, sizeof(char *))) == nil) 561 sysfatal("no memory"); 562 for(n = 0; n < Nstrtab; n++) 563 Strtab[n] = gstr(b, 2); 564 565 } 566 567 void 568 boolerr(Biff *b) 569 { 570 int r = gint(b, 2); // row 571 int c = gint(b, 2); // col 572 int f = gint(b, 2); // formatting ref 573 int v = gint(b, 1); // bool value / err code 574 int t = gint(b, 1); // type 575 cell(r, c, f, (t)? Terror: Tbool, &v); 576 } 577 578 void 579 rk(Biff *b) 580 { 581 int r = gint(b, 2); // row 582 int c = gint(b, 2); // col 583 int f = gint(b, 2); // formatting ref 584 double v = grk(b); // value 585 cell(r, c, f, Tnumber, &v); 586 } 587 588 void 589 mulrk(Biff *b) 590 { 591 int r = gint(b, 2); // row 592 int c = gint(b, 2); // first col 593 while(b->len >= 6){ 594 int f = gint(b, 2); // formatting ref 595 double v = grk(b); // value 596 cell(r, c++, f, Tnumber, &v); 597 } 598 } 599 600 void 601 number(Biff *b) 602 { 603 int r = gint(b, 2); // row 604 int c = gint(b, 2); // col 605 int f = gint(b, 2); // formatting ref 606 double v = gdoub(b); // double 607 cell(r, c, f, Tnumber, &v); 608 } 609 610 void 611 label(Biff *b) 612 { 613 int r = gint(b, 2); // row 614 int c = gint(b, 2); // col 615 int f = gint(b, 2); // formatting ref 616 char *s = gstr(b, 2); // byte string 617 cell(r, c, f, Tlabel, s); 618 } 619 620 621 void 622 labelsst(Biff *b) 623 { 624 int r = gint(b, 2); // row 625 int c = gint(b, 2); // col 626 int f = gint(b, 2); // formatting ref 627 int i = gint(b, 2); // sst string ref 628 cell(r, c, f, Tindex, &i); 629 } 630 631 void 632 bof(Biff *b) 633 { 634 Biffver = gint(b, 2); 635 Content = gint(b, 2); 636 } 637 638 void 639 defcolwidth(Biff *b) 640 { 641 Defwidth = gint(b, 2); 642 } 643 644 void 645 datemode(Biff *b) 646 { 647 Datemode = gint(b, 2); 648 } 649 650 void 651 eof(Biff *b) 652 { 653 int i; 654 struct { 655 int n; 656 char *s; 657 } names[] = { 658 0x005, "Workbook globals", 659 0x006, "Visual Basic module", 660 0x010, "Worksheet", 661 0x020, "Chart", 662 0x040, "Macro sheet", 663 0x100, "Workspace file", 664 }; 665 static int sheet = 0; 666 667 if(! wanted(Sheetrange, ++sheet)){ 668 release(); 669 return; 670 } 671 672 if(Ncols != -1){ 673 if(All){ 674 for(i = 0; i < nelem(names); i++) 675 if(names[i].n == Content){ 676 Bprint(bo, "\n# contents %s\n", names[i].s); 677 dump(); 678 } 679 } 680 else 681 if(Content == 0x10) // Worksheet 682 dump(); 683 } 684 release(); 685 USED(b); 686 } 687 688 void 689 colinfo(Biff *b) 690 { 691 int c; 692 int c1 = gint(b, 2); 693 int c2 = gint(b, 2); 694 int w = gint(b, 2); 695 696 if(c1 < 0) 697 sysfatal("negative column number (%d)", c1); 698 if(c2 >= Nwidths) 699 sysfatal("too many columns (%d > %d)", c2, Nwidths); 700 w /= 256; 701 702 if(w > 100) 703 w = 100; 704 if(w < 0) 705 w = 0; 706 707 for(c = c1; c <= c2; c++) 708 Width[c] = w; 709 } 710 711 void 712 xf(Biff *b) 713 { 714 int fmt; 715 static int nalloc = 0; 716 717 skip(b, 2); 718 fmt = gint(b, 2); 719 if(nalloc >= Nxf){ 720 nalloc += 20; 721 if((Xf = realloc(Xf, nalloc*sizeof(int))) == nil) 722 sysfatal("no memory"); 723 } 724 Xf[Nxf++] = fmt; 725 } 726 727 void 728 writeaccess(Biff *b) 729 { 730 Bprint(bo, "# author %s\n", gstr(b, 2)); 731 } 732 733 void 734 codepage(Biff *b) 735 { 736 int codepage = gint(b, 2); 737 if(codepage != 1200) // 1200 == UTF-16 738 Bprint(bo, "# codepage %d\n", codepage); 739 } 740 741 void 742 xls2csv(Biobuf *bp) 743 { 744 int i; 745 Biff biff, *b; 746 struct { 747 int op; 748 void (*func)(Biff *); 749 } dispatch[] = { 750 0x000a, eof, 751 0x0022, datemode, 752 0x0042, codepage, 753 0x0055, defcolwidth, 754 0x005c, writeaccess, 755 0x007d, colinfo, 756 0x00bd, mulrk, 757 0x00fc, sst, 758 0x00fd, labelsst, 759 0x0203, number, 760 0x0204, label, 761 0x0205, boolerr, 762 0x027e, rk, 763 0x0809, bof, 764 0x00e0, xf, 765 }; 766 767 b = &biff; 768 b->bp = bp; 769 while(getrec(b) != -1){ 770 for(i = 0; i < nelem(dispatch); i++) 771 if(b->op == dispatch[i].op) 772 (*dispatch[i].func)(b); 773 skip(b, b->len); 774 } 775 } 776 777 void 778 usage(void) 779 { 780 fprint(2, "usage: %s [-Danqt] [-w worksheets] [-c columns] [-d delim] /mnt/doc/Workbook\n", argv0); 781 exits("usage"); 782 } 783 784 void 785 main(int argc, char *argv[]) 786 { 787 int i; 788 Biobuf bin, bout, *bp; 789 790 ARGBEGIN{ 791 case 'D': 792 Debug = 1; 793 break; 794 case 'a': 795 All = 1; 796 break; 797 case 'q': 798 Doquote = 0; 799 break; 800 case 'd': 801 Delim = EARGF(usage()); 802 break; 803 case 'n': 804 Nopad = 1; 805 break; 806 case 't': 807 Trunc = 1; 808 break; 809 case 'c': 810 Columnrange = EARGF(usage()); 811 break; 812 case 'w': 813 Sheetrange = EARGF(usage()); 814 break; 815 default: 816 usage(); 817 break; 818 }ARGEND; 819 820 if(argc != 1) 821 usage(); 822 823 bo = &bout; 824 quotefmtinstall(); 825 Binit(bo, OWRITE, 1); 826 827 if(argc > 0) { 828 for(i = 0; i < argc; i++){ 829 if((bp = Bopen(argv[i], OREAD)) == nil) 830 sysfatal("%s cannot open - %r", argv[i]); 831 xls2csv(bp); 832 Bterm(bp); 833 } 834 } else { 835 Binit(&bin, 0, OREAD); 836 xls2csv(&bin); 837 } 838 exits(0); 839 } 840 841