1 /* msexceltables.c Steve Simon 5-Jan-2005 */ 2 #include <u.h> 3 #include <libc.h> 4 #include <bio.h> 5 #include <ctype.h> 6 7 enum { 8 Tillegal = 0, 9 Tnumber, // cell types 10 Tlabel, 11 Tindex, 12 Tbool, 13 Terror, 14 15 Ver8 = 0x600, // only BIFF8 and BIFF8x files support unicode 16 17 Nwidths = 4096, 18 }; 19 20 21 typedef struct Biff Biff; 22 typedef struct Col Col; 23 typedef struct Row Row; 24 25 struct Row { 26 Row *next; // next row 27 int r; // row number 28 Col *col; // list of cols in row 29 }; 30 31 struct Col { 32 Col *next; // next col in row 33 int c; // col number 34 int f; // index into formating table (Xf) 35 int type; // type of value for union below 36 union { // value 37 int index; // index into string table (Strtab) 38 int error; 39 int bool; 40 char *label; 41 double number; 42 }; 43 }; 44 45 struct Biff { 46 Biobuf *bp; // input file 47 int op; // current record type 48 int len; // length of current record 49 }; 50 51 // options 52 static int Nopad = 0; // disable padding cells to colum width 53 static int Trunc = 0; // truncate cells to colum width 54 static int All = 0; // dump all sheet types, Worksheets only by default 55 static char *Delim = " "; // field delimiter 56 static char *Sheetrange = nil; // range of sheets wanted 57 static int Debug = 0; 58 59 // file scope 60 static int Defwidth = 10; // default colum width if non given 61 static int Biffver; // file vesion 62 static int Datemode; // date ref: 1899-Dec-31 or 1904-jan-1 63 static char **Strtab = nil; // label contents heap 64 static int Nstrtab = 0; // # of above 65 static int *Xf; // array of extended format indices 66 static int Nxf = 0; // # of above 67 static Biobuf *bo; // stdout (sic) 68 69 // table scope 70 static int Width[Nwidths]; // array of colum widths 71 static int Ncols = -1; // max colums in table used 72 static int Content = 0; // type code for contents of sheet 73 static Row *Root = nil; // one worksheet's worth of cells 74 75 static char *Months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", 76 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; 77 78 static char *Errmsgs[] = { 79 [0x0] "#NULL!", // intersection of two cell ranges is empty 80 [0x7] "#DIV/0!", // division by zero 81 [0xf] "#VALUE!", // wrong type of operand 82 [0x17] "#REF!", // illegal or deleted cell reference 83 [0x1d] "#NAME?", // wrong function or range name 84 [0x24] "#NUM!", // value range overflow 85 [0x2a] "#N/A!", // argument of function not available 86 }; 87 88 89 void 90 cell(int r, int c, int f, int type, void *val) 91 { 92 Row *row, *nrow; 93 Col *col, *ncol; 94 95 if(c > Ncols) 96 Ncols = c; 97 98 if((ncol = malloc(sizeof(Col))) == nil) 99 sysfatal("no memory"); 100 ncol->c = c; 101 ncol->f = f; 102 ncol->type = type; 103 ncol->next = nil; 104 105 switch(type){ 106 case Tnumber: ncol->number = *(double *)val; break; 107 case Tlabel: ncol->label = (char *)val; break; 108 case Tindex: ncol->index = *(int *)val; break; 109 case Tbool: ncol->bool = *(int *)val; break; 110 case Terror: ncol->error = *(int *)val; break; 111 default: sysfatal("can't happen error"); 112 } 113 114 if(Root == nil || Root->r > r){ 115 if((nrow = malloc(sizeof(Row))) == nil) 116 sysfatal("no memory"); 117 nrow->col = ncol; 118 ncol->next = nil; 119 nrow->r = r; 120 nrow->next = Root; 121 Root = nrow; 122 return; 123 } 124 125 for(row = Root; row; row = row->next){ 126 if(row->r == r){ 127 if(row->col->c > c){ 128 ncol->next = row->col; 129 row->col = ncol; 130 return; 131 } 132 else{ 133 for(col = row->col; col; col = col->next) 134 if(col->next == nil || col->next->c > c){ 135 ncol->next = col->next; 136 col->next = ncol; 137 return; 138 } 139 } 140 } 141 142 if(row->next == nil || row->next->r > r){ 143 if((nrow = malloc(sizeof(Row))) == nil) 144 sysfatal("no memory"); 145 nrow->col = ncol; 146 nrow->r = r; 147 nrow->next = row->next; 148 row->next = nrow; 149 return; 150 } 151 } 152 sysfatal("cannot happen error"); 153 } 154 155 struct Tm * 156 bifftime(double num) 157 { 158 long long t = num; 159 160 /* Beware - These epochs are wrong, this 161 * is due to Excel still remaining compatible 162 * with Lotus-123, which incorrectly believed 1900 163 * was a leap year 164 */ 165 if(Datemode) 166 t -= 24107; // epoch = 1/1/1904 167 else 168 t -= 25569; // epoch = 31/12/1899 169 t *= 60*60*24; 170 171 return localtime((long)t); 172 } 173 174 void 175 numfmt(int fmt, int min, int max, double num) 176 { 177 char buf[1024]; 178 struct Tm *tm; 179 180 if(fmt == 9) 181 snprint(buf, sizeof(buf),"%.0f%%", num); 182 else 183 if(fmt == 10) 184 snprint(buf, sizeof(buf),"%f%%", num); 185 else 186 if(fmt == 11 || fmt == 48) 187 snprint(buf, sizeof(buf),"%e", num); 188 else 189 if(fmt >= 14 && fmt <= 17){ 190 tm = bifftime(num); 191 snprint(buf, sizeof(buf),"%d-%s-%d", 192 tm->mday, Months[tm->mon], tm->year+1900); 193 } 194 else 195 if((fmt >= 18 && fmt <= 21) || (fmt >= 45 && fmt <= 47)){ 196 tm = bifftime(num); 197 snprint(buf, sizeof(buf),"%02d:%02d:%02d", tm->hour, tm->min, tm->sec); 198 199 } 200 else 201 if(fmt == 22){ 202 tm = bifftime(num); 203 snprint(buf, sizeof(buf),"%02d:%02d:%02d %d-%s-%d", 204 tm->hour, tm->min, tm->sec, 205 tm->mday, Months[tm->mon], tm->year+1900); 206 207 }else 208 snprint(buf, sizeof(buf),"%g", num); 209 210 Bprint(bo, "%-*.*q", min, max, buf); 211 } 212 213 void 214 dump(void) 215 { 216 Row *r; 217 Col *c; 218 int i, min, max; 219 220 for(r = Root; r; r = r->next){ 221 for(c = r->col; c; c = c->next){ 222 if(c->c < 0 || c->c >= Nwidths || (min = Width[c->c]) == 0) 223 min = Defwidth; 224 if((c->next && c->c == c->next->c) || Nopad) 225 min = 0; 226 max = -1; 227 if(Trunc && min > 2) 228 max = min -2; // FIXME: -2 because of bug %q format ? 229 230 switch(c->type){ 231 case Tnumber: 232 if(Xf[c->f] == 0) 233 Bprint(bo, "%-*.*g", min, max, c->number); 234 else 235 numfmt(Xf[c->f], min, max, c->number); 236 break; 237 case Tlabel: 238 Bprint(bo, "%-*.*q", min, max, c->label); 239 break; 240 case Tbool: 241 Bprint(bo, "%-*.*s", min, max, (c->bool)? "True": "False"); 242 break; 243 case Tindex: 244 if(c->index < 0 || c->index >= Nstrtab) 245 sysfatal("SST string out of range - corrupt file?"); 246 Bprint(bo, "%-*.*q", min, max, Strtab[c->index]); 247 break; 248 case Terror: 249 if(c->error < 0 || c->error >= nelem(Errmsgs) || !Errmsgs[c->error]) 250 Bprint(bo, "#ERR=%d", c->index); 251 else 252 Bprint(bo, "%-*.*q", min, max, Errmsgs[c->error]); 253 break; 254 default: 255 sysfatal("cannot happen error"); 256 break; 257 } 258 259 if(c->next){ 260 if(c->next->c == c->c) // bar charts 261 Bprint(bo, "="); 262 else{ 263 Bprint(bo, "%s", Delim); 264 for(i = c->c; c->next && i < c->next->c -1; i++) 265 Bprint(bo, "%-*.*s%s", min, max, "", Delim); 266 } 267 } 268 } 269 if(r->next) 270 for(i = r->r; i < r->next->r; i++) 271 Bprint(bo, "\n"); 272 273 } 274 Bprint(bo, "\n"); 275 } 276 277 void 278 release(void) 279 { 280 Row *r, *or; 281 Col *c, *oc; 282 283 r = Root; 284 while(r){ 285 c = r->col; 286 while(c){ 287 if(c->type == Tlabel) 288 free(c->label); 289 oc = c; 290 c = c->next; 291 free(oc); 292 } 293 or = r; 294 r = r->next; 295 free(or); 296 } 297 Root = nil; 298 299 memset(Width, 0, sizeof(Width)); 300 Ncols = -1; 301 } 302 303 void 304 skip(Biff *b, int len) 305 { 306 assert(len <= b->len); 307 if(Bseek(b->bp, len, 1) == -1) 308 sysfatal("seek failed - %r"); 309 b->len -= len; 310 } 311 312 void 313 gmem(Biff *b, void *p, int n) 314 { 315 if(b->len < n) 316 sysfatal("short record %d < %d", b->len, n); 317 if(Bread(b->bp, p, n) != n) 318 sysfatal("unexpected EOF - %r"); 319 b->len -= n; 320 } 321 322 void 323 xd(Biff *b) 324 { 325 uvlong off; 326 uchar buf[16]; 327 int addr, got, n, i, j; 328 329 addr = 0; 330 off = Boffset(b->bp); 331 while(addr < b->len){ 332 n = (b->len >= sizeof(buf))? sizeof(buf): b->len; 333 got = Bread(b->bp, buf, n); 334 335 Bprint(bo, " %6d ", addr); 336 addr += n; 337 338 for(i = 0; i < got; i++) 339 Bprint(bo, "%02x ", buf[i]); 340 for(j = i; j < 16; j++) 341 Bprint(bo, " "); 342 Bprint(bo, " "); 343 for(i = 0; i < got; i++) 344 Bprint(bo, "%c", isprint(buf[i])? buf[i]: '.'); 345 Bprint(bo, "\n"); 346 } 347 Bseek(b->bp, off, 0); 348 } 349 350 static int 351 getrec(Biff *b) 352 { 353 int c; 354 if((c = Bgetc(b->bp)) == -1) 355 return -1; // real EOF 356 b->op = c; 357 if((c = Bgetc(b->bp)) == -1) 358 sysfatal("unexpected EOF - %r"); 359 b->op |= c << 8; 360 if((c = Bgetc(b->bp)) == -1) 361 sysfatal("unexpected EOF - %r"); 362 b->len = c; 363 if((c = Bgetc(b->bp)) == -1) 364 sysfatal("unexpected EOF - %r"); 365 b->len |= c << 8; 366 if(b->op == 0 && b->len == 0) 367 return -1; 368 if(Debug){ 369 Bprint(bo, "op=0x%x len=%d\n", b->op, b->len); 370 xd(b); 371 } 372 return 0; 373 } 374 375 static uvlong 376 gint(Biff *b, int n) 377 { 378 int i, c; 379 uvlong vl, rc; 380 381 if(b->len < n) 382 return -1; 383 rc = 0; 384 for(i = 0; i < n; i++){ 385 if((c = Bgetc(b->bp)) == -1) 386 sysfatal("unexpected EOF - %r"); 387 b->len--; 388 vl = c; 389 rc |= vl << (8*i); 390 } 391 return rc; 392 } 393 394 double 395 grk(Biff *b) 396 { 397 int f; 398 uvlong n; 399 double d; 400 401 n = gint(b, 4); 402 f = n & 3; 403 n &= ~3LL; 404 if(f & 2){ 405 d = n / 4.0; 406 } 407 else{ 408 n <<= 32; 409 memcpy(&d, &n, sizeof(d)); 410 } 411 412 if(f & 1) 413 d /= 100.0; 414 return d; 415 } 416 417 double 418 gdoub(Biff *b) 419 { 420 double d; 421 uvlong n = gint(b, 8); 422 memcpy(&d, &n, sizeof(n)); 423 return d; 424 } 425 426 char * 427 gstr(Biff *b, int len_width) 428 { 429 Rune r; 430 char *buf, *p; 431 int nch, w, ap, ln, rt, opt; 432 enum { 433 Unicode = 1, 434 Asian_phonetic = 4, 435 Rich_text = 8, 436 }; 437 438 if(b->len < len_width){ 439 if(getrec(b) == -1) 440 sysfatal("starting STRING expected CONTINUE, got EOF"); 441 if(b->op != 0x03c) 442 sysfatal("starting STRING expected CONTINUE, got op=0x%x", b->op); 443 } 444 445 ln = gint(b, len_width); 446 if(Biffver != Ver8){ 447 if((buf = calloc(ln+1, sizeof(char))) == nil) 448 sysfatal("no memory"); 449 gmem(b, buf, ln); 450 return buf; 451 } 452 453 454 if((buf = calloc(ln+1, sizeof(char)*UTFmax)) == nil) 455 sysfatal("no memory"); 456 p = buf; 457 458 if(ln == 0) 459 return buf; 460 nch = 0; 461 *buf = 0; 462 opt = gint(b, 1); 463 if(opt & Rich_text) 464 rt = gint(b, 2); 465 else 466 rt = 0; 467 if(opt & Asian_phonetic) 468 ap = gint(b, 4); 469 else 470 ap = 0; 471 for(;;){ 472 w = (opt & Unicode)? sizeof(Rune): sizeof(char); 473 474 while(b->len > 0){ 475 r = gint(b, w); 476 p += runetochar(p, &r); 477 if(++nch >= ln){ 478 if(rt) 479 skip(b, rt*4); 480 if(ap) 481 skip(b, ap); 482 return buf; 483 } 484 } 485 if(getrec(b) == -1) 486 sysfatal("in STRING expected CONTINUE, got EOF"); 487 if(b->op != 0x03c) 488 sysfatal("in STRING expected CONTINUE, got op=0x%x", b->op); 489 opt = gint(b, 1); 490 } 491 } 492 493 void 494 sst(Biff *b) 495 { 496 int n; 497 498 skip(b, 4); // total # strings 499 Nstrtab = gint(b, 4); // # unique strings 500 if((Strtab = calloc(Nstrtab, sizeof(char *))) == nil) 501 sysfatal("no memory"); 502 for(n = 0; n < Nstrtab; n++) 503 Strtab[n] = gstr(b, 2); 504 505 } 506 507 void 508 boolerr(Biff *b) 509 { 510 int r = gint(b, 2); // row 511 int c = gint(b, 2); // col 512 int f = gint(b, 2); // formatting ref 513 int v = gint(b, 1); // bool value / err code 514 int t = gint(b, 1); // type 515 cell(r, c, f, (t)? Terror: Tbool, &v); 516 } 517 518 void 519 rk(Biff *b) 520 { 521 int r = gint(b, 2); // row 522 int c = gint(b, 2); // col 523 int f = gint(b, 2); // formatting ref 524 double v = grk(b); // value 525 cell(r, c, f, Tnumber, &v); 526 } 527 528 void 529 mulrk(Biff *b) 530 { 531 int r = gint(b, 2); // row 532 int c = gint(b, 2); // first col 533 while(b->len >= 6){ 534 int f = gint(b, 2); // formatting ref 535 double v = grk(b); // value 536 cell(r, c++, f, Tnumber, &v); 537 } 538 } 539 540 void 541 number(Biff *b) 542 { 543 int r = gint(b, 2); // row 544 int c = gint(b, 2); // col 545 int f = gint(b, 2); // formatting ref 546 double v = gdoub(b); // double 547 cell(r, c, f, Tnumber, &v); 548 } 549 550 void 551 label(Biff *b) 552 { 553 int r = gint(b, 2); // row 554 int c = gint(b, 2); // col 555 int f = gint(b, 2); // formatting ref 556 char *s = gstr(b, 2); // byte string 557 cell(r, c, f, Tlabel, s); 558 } 559 560 561 void 562 labelsst(Biff *b) 563 { 564 int r = gint(b, 2); // row 565 int c = gint(b, 2); // col 566 int f = gint(b, 2); // formatting ref 567 int i = gint(b, 2); // sst string ref 568 cell(r, c, f, Tindex, &i); 569 } 570 571 void 572 bof(Biff *b) 573 { 574 Biffver = gint(b, 2); 575 Content = gint(b, 2); 576 } 577 578 void 579 defcolwidth(Biff *b) 580 { 581 Defwidth = gint(b, 2); 582 } 583 584 void 585 datemode(Biff *b) 586 { 587 Datemode = gint(b, 2); 588 } 589 590 int 591 wanted(char *range, int sheet) 592 { 593 int i, j; 594 char *p; 595 596 if (! range) 597 return 1; 598 599 p = range; 600 while(*p){ 601 i = strtol(p, &p, 10); 602 switch(*p){ 603 case '\0': 604 case ',': 605 if (i == sheet) 606 return 1; 607 break; 608 case '-': 609 j = strtol(p+1, &p, 10); 610 if(sheet >= i && sheet <= j) 611 return 1; 612 613 break; 614 default: 615 sysfatal(" %s malformed range spec", range); 616 break; 617 } 618 if (*p == ',') 619 p++; 620 } 621 return 0; 622 } 623 624 void 625 eof(Biff *b) 626 { 627 int i; 628 struct { 629 int n; 630 char *s; 631 } names[] = { 632 0x005, "Workbook globals", 633 0x006, "Visual Basic module", 634 0x010, "Worksheet", 635 0x020, "Chart", 636 0x040, "Macro sheet", 637 0x100, "Workspace file", 638 }; 639 static int sheet = 0; 640 641 if(! wanted(Sheetrange, ++sheet)){ 642 release(); 643 return; 644 } 645 646 if(Ncols != -1){ 647 if(All){ 648 for(i = 0; i < nelem(names); i++) 649 if(names[i].n == Content){ 650 Bprint(bo, "\n# contents %s\n", names[i].s); 651 dump(); 652 } 653 } 654 else 655 if(Content == 0x10) // Worksheet 656 dump(); 657 } 658 release(); 659 USED(b); 660 } 661 662 void 663 colinfo(Biff *b) 664 { 665 int c; 666 int c1 = gint(b, 2); 667 int c2 = gint(b, 2); 668 int w = gint(b, 2); 669 670 if(c1 < 0) 671 sysfatal("negative column number (%d)", c1); 672 if(c2 >= Nwidths) 673 sysfatal("too many columns (%d > %d)", c2, Nwidths); 674 w /= 256; 675 676 if(w > 100) 677 w = 100; 678 if(w < 0) 679 w = 0; 680 681 for(c = c1; c <= c2; c++) 682 Width[c] = w; 683 } 684 685 void 686 xf(Biff *b) 687 { 688 int fmt; 689 static int nalloc = 0; 690 691 skip(b, 2); 692 fmt = gint(b, 2); 693 if(nalloc >= Nxf){ 694 nalloc += 20; 695 if((Xf = realloc(Xf, nalloc*sizeof(int))) == nil) 696 sysfatal("no memory"); 697 } 698 Xf[Nxf++] = fmt; 699 } 700 701 void 702 writeaccess(Biff *b) 703 { 704 Bprint(bo, "# author %s\n", gstr(b, 2)); 705 } 706 707 void 708 codepage(Biff *b) 709 { 710 int codepage = gint(b, 2); 711 if(codepage != 1200) // 1200 == UTF-16 712 Bprint(bo, "# codepage %d\n", codepage); 713 } 714 715 void 716 xls2csv(Biobuf *bp) 717 { 718 int i; 719 Biff biff, *b; 720 struct { 721 int op; 722 void (*func)(Biff *); 723 } dispatch[] = { 724 0x000a, eof, 725 0x0022, datemode, 726 0x0042, codepage, 727 0x0055, defcolwidth, 728 0x005c, writeaccess, 729 0x007d, colinfo, 730 0x00bd, mulrk, 731 0x00fc, sst, 732 0x00fd, labelsst, 733 0x0203, number, 734 0x0204, label, 735 0x0205, boolerr, 736 0x027e, rk, 737 0x0809, bof, 738 0x00e0, xf, 739 }; 740 741 b = &biff; 742 b->bp = bp; 743 while(getrec(b) != -1){ 744 for(i = 0; i < nelem(dispatch); i++) 745 if(b->op == dispatch[i].op) 746 (*dispatch[i].func)(b); 747 skip(b, b->len); 748 } 749 } 750 751 void 752 usage(void) 753 { 754 fprint(2, "usage: %s [-Dant] [-w worksheets] [-d delim] /mnt/doc/Workbook\n", argv0); 755 exits("usage"); 756 } 757 758 void 759 main(int argc, char *argv[]) 760 { 761 int i; 762 Biobuf bin, bout, *bp; 763 764 ARGBEGIN{ 765 case 'D': 766 Debug = 1; 767 break; 768 case 'a': 769 All = 1; 770 break; 771 case 'd': 772 Delim = EARGF(usage()); 773 break; 774 case 'n': 775 Nopad = 1; 776 break; 777 case 't': 778 Trunc = 1; 779 break; 780 case 'w': 781 Sheetrange = EARGF(usage()); 782 break; 783 default: 784 usage(); 785 break; 786 }ARGEND; 787 788 if(argc != 1) 789 usage(); 790 791 bo = &bout; 792 quotefmtinstall(); 793 Binit(bo, OWRITE, 1); 794 795 if(argc > 0) { 796 for(i = 0; i < argc; i++){ 797 if((bp = Bopen(argv[i], OREAD)) == nil) 798 sysfatal("%s cannot open - %r", argv[i]); 799 xls2csv(bp); 800 Bterm(bp); 801 } 802 } else { 803 Binit(&bin, 0, OREAD); 804 xls2csv(&bin); 805 } 806 exits(0); 807 } 808 809