1 /* msexceltables.c Steve Simon 5-Jan-2005 */
2 #include <u.h>
3 #include <libc.h>
4 #include <bio.h>
5 #include <ctype.h>
6
7 enum {
8 Tillegal = 0,
9 Tnumber, // cell types
10 Tlabel,
11 Tindex,
12 Tbool,
13 Terror,
14
15 Ver8 = 0x600, // only BIFF8 and BIFF8x files support unicode
16
17 Nwidths = 4096,
18 };
19
20
21 typedef struct Biff Biff;
22 typedef struct Col Col;
23 typedef struct Row Row;
24
25 struct Row {
26 Row *next; // next row
27 int r; // row number
28 Col *col; // list of cols in row
29 };
30
31 struct Col {
32 Col *next; // next col in row
33 int c; // col number
34 int f; // index into formating table (Xf)
35 int type; // type of value for union below
36 union { // value
37 int index; // index into string table (Strtab)
38 int error;
39 int bool;
40 char *label;
41 double number;
42 };
43 };
44
45 struct Biff {
46 Biobuf *bp; // input file
47 int op; // current record type
48 int len; // length of current record
49 };
50
51 // options
52 static int Nopad = 0; // disable padding cells to colum width
53 static int Trunc = 0; // truncate cells to colum width
54 static int All = 0; // dump all sheet types, Worksheets only by default
55 static char *Delim = " "; // field delimiter
56 static char *Sheetrange = nil; // range of sheets wanted
57 static char *Columnrange = nil; // range of collums wanted
58 static int Debug = 0;
59
60 // file scope
61 static int Defwidth = 10; // default colum width if non given
62 static int Biffver; // file vesion
63 static int Datemode; // date ref: 1899-Dec-31 or 1904-jan-1
64 static char **Strtab = nil; // label contents heap
65 static int Nstrtab = 0; // # of above
66 static int *Xf; // array of extended format indices
67 static int Nxf = 0; // # of above
68 static Biobuf *bo; // stdout (sic)
69 static int Doquote = 1; // quote text fields if they are rc(1) unfriendly
70
71 // table scope
72 static int Width[Nwidths]; // array of colum widths
73 static int Ncols = -1; // max colums in table used
74 static int Content = 0; // type code for contents of sheet
75 static Row *Root = nil; // one worksheet's worth of cells
76
77 static char *Months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
78 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
79
80 static char *Errmsgs[] = {
81 [0x0] "#NULL!", // intersection of two cell ranges is empty
82 [0x7] "#DIV/0!", // division by zero
83 [0xf] "#VALUE!", // wrong type of operand
84 [0x17] "#REF!", // illegal or deleted cell reference
85 [0x1d] "#NAME?", // wrong function or range name
86 [0x24] "#NUM!", // value range overflow
87 [0x2a] "#N/A!", // argument of function not available
88 };
89
90 int
wanted(char * range,int here)91 wanted(char *range, int here)
92 {
93 int n, s;
94 char *p;
95
96 if (! range)
97 return 1;
98
99 s = -1;
100 p = range;
101 while(1){
102 n = strtol(p, &p, 10);
103 switch(*p){
104 case 0:
105 if(n == here)
106 return 1;
107 if(s != -1 && here > s && here < n)
108 return 1;
109 return 0;
110 case ',':
111 if(n == here)
112 return 1;
113 if(s != -1 && here > s && here < n)
114 return 1;
115 s = -1;
116 p++;
117 break;
118 case '-':
119 if(n == here)
120 return 1;
121 s = n;
122 p++;
123 break;
124 default:
125 sysfatal("%s malformed range spec", range);
126 break;
127 }
128 }
129 }
130
131
132 void
cell(int r,int c,int f,int type,void * val)133 cell(int r, int c, int f, int type, void *val)
134 {
135 Row *row, *nrow;
136 Col *col, *ncol;
137
138 if(c > Ncols)
139 Ncols = c;
140
141 if((ncol = malloc(sizeof(Col))) == nil)
142 sysfatal("no memory");
143 ncol->c = c;
144 ncol->f = f;
145 ncol->type = type;
146 ncol->next = nil;
147
148 switch(type){
149 case Tnumber: ncol->number = *(double *)val; break;
150 case Tlabel: ncol->label = (char *)val; break;
151 case Tindex: ncol->index = *(int *)val; break;
152 case Tbool: ncol->bool = *(int *)val; break;
153 case Terror: ncol->error = *(int *)val; break;
154 default: sysfatal("can't happen error");
155 }
156
157 if(Root == nil || Root->r > r){
158 if((nrow = malloc(sizeof(Row))) == nil)
159 sysfatal("no memory");
160 nrow->col = ncol;
161 ncol->next = nil;
162 nrow->r = r;
163 nrow->next = Root;
164 Root = nrow;
165 return;
166 }
167
168 for(row = Root; row; row = row->next){
169 if(row->r == r){
170 if(row->col->c > c){
171 ncol->next = row->col;
172 row->col = ncol;
173 return;
174 }
175 else{
176 for(col = row->col; col; col = col->next)
177 if(col->next == nil || col->next->c > c){
178 ncol->next = col->next;
179 col->next = ncol;
180 return;
181 }
182 }
183 }
184
185 if(row->next == nil || row->next->r > r){
186 if((nrow = malloc(sizeof(Row))) == nil)
187 sysfatal("no memory");
188 nrow->col = ncol;
189 nrow->r = r;
190 nrow->next = row->next;
191 row->next = nrow;
192 return;
193 }
194 }
195 sysfatal("cannot happen error");
196 }
197
198 struct Tm *
bifftime(double t)199 bifftime(double t)
200 {
201
202 /* Beware - These epochs are wrong, this
203 * is due to Excel still remaining compatible
204 * with Lotus-123, which incorrectly believed 1900
205 * was a leap year
206 */
207 if(Datemode)
208 t -= 24107; // epoch = 1/1/1904
209 else
210 t -= 25569; // epoch = 31/12/1899
211 t *= 60*60*24;
212
213 return localtime((long)t);
214 }
215
216 void
numfmt(int fmt,int min,int max,double num)217 numfmt(int fmt, int min, int max, double num)
218 {
219 char buf[1024];
220 struct Tm *tm;
221
222 if(fmt == 9)
223 snprint(buf, sizeof(buf),"%.0f%%", num);
224 else
225 if(fmt == 10)
226 snprint(buf, sizeof(buf),"%f%%", num);
227 else
228 if(fmt == 11 || fmt == 48)
229 snprint(buf, sizeof(buf),"%e", num);
230 else
231 if(fmt >= 14 && fmt <= 17){
232 tm = bifftime(num);
233 snprint(buf, sizeof(buf),"%d-%s-%d",
234 tm->mday, Months[tm->mon], tm->year+1900);
235 }
236 else
237 if((fmt >= 18 && fmt <= 21) || (fmt >= 45 && fmt <= 47)){
238 tm = bifftime(num);
239 snprint(buf, sizeof(buf),"%02d:%02d:%02d", tm->hour, tm->min, tm->sec);
240
241 }
242 else
243 if(fmt == 22){
244 tm = bifftime(num);
245 snprint(buf, sizeof(buf),"%02d:%02d:%02d %d-%s-%d",
246 tm->hour, tm->min, tm->sec,
247 tm->mday, Months[tm->mon], tm->year+1900);
248
249 }else
250 snprint(buf, sizeof(buf),"%g", num);
251
252 Bprint(bo, "%-*.*q", min, max, buf);
253 }
254
255 void
dump(void)256 dump(void)
257 {
258 Row *r;
259 Col *c, *c1;
260 char *strfmt;
261 int i, n, last, min, max;
262
263 if(Doquote)
264 strfmt = "%-*.*q";
265 else
266 strfmt = "%-*.*s";
267
268 for(r = Root; r; r = r->next){
269 n = 1;
270 for(c = r->col; c; c = c->next){
271 n++;
272 if(! wanted(Columnrange, n))
273 continue;
274
275 if(c->c < 0 || c->c >= Nwidths || (min = Width[c->c]) == 0)
276 min = Defwidth;
277 if((c->next && c->c == c->next->c) || Nopad)
278 min = 0;
279 max = -1;
280 if(Trunc && min > 2)
281 max = min -2; // FIXME: -2 because of bug %q format ?
282
283 switch(c->type){
284 case Tnumber:
285 if(Xf == nil || Xf[c->f] == 0)
286 Bprint(bo, "%-*.*g", min, max, c->number);
287 else
288 numfmt(Xf[c->f], min, max, c->number);
289 break;
290 case Tlabel:
291 Bprint(bo, strfmt, min, max, c->label);
292 break;
293 case Tbool:
294 Bprint(bo, strfmt, min, max, (c->bool)? "True": "False");
295 break;
296 case Tindex:
297 if(c->index < 0 || c->index >= Nstrtab)
298 sysfatal("SST string out of range - corrupt file?");
299 Bprint(bo, strfmt, min, max, Strtab[c->index]);
300 break;
301 case Terror:
302 if(c->error < 0 || c->error >= nelem(Errmsgs) || !Errmsgs[c->error])
303 Bprint(bo, "#ERR=%d", c->index);
304 else
305 Bprint(bo, strfmt, min, max, Errmsgs[c->error]);
306 break;
307 default:
308 sysfatal("cannot happen error");
309 break;
310 }
311
312 last = 1;
313 for(i = n+1, c1 = c->next; c1; c1 = c1->next, i++)
314 if(wanted(Columnrange, i)){
315 last = 0;
316 break;
317 }
318
319 if(! last){
320 if(c->next->c == c->c) // bar charts
321 Bprint(bo, "=");
322 else{
323 Bprint(bo, "%s", Delim);
324 for(i = c->c; c->next && i < c->next->c -1; i++)
325 Bprint(bo, "%-*.*s%s", min, max, "", Delim);
326 }
327 }
328 }
329 if(r->next)
330 for(i = r->r; i < r->next->r; i++)
331 Bprint(bo, "\n");
332
333 }
334 Bprint(bo, "\n");
335 }
336
337 void
release(void)338 release(void)
339 {
340 Row *r, *or;
341 Col *c, *oc;
342
343 r = Root;
344 while(r){
345 c = r->col;
346 while(c){
347 if(c->type == Tlabel)
348 free(c->label);
349 oc = c;
350 c = c->next;
351 free(oc);
352 }
353 or = r;
354 r = r->next;
355 free(or);
356 }
357 Root = nil;
358
359 memset(Width, 0, sizeof(Width));
360 Ncols = -1;
361 }
362
363 void
skip(Biff * b,int len)364 skip(Biff *b, int len)
365 {
366 assert(len <= b->len);
367 if(Bseek(b->bp, len, 1) == -1)
368 sysfatal("seek failed - %r");
369 b->len -= len;
370 }
371
372 void
gmem(Biff * b,void * p,int n)373 gmem(Biff *b, void *p, int n)
374 {
375 if(b->len < n)
376 sysfatal("short record %d < %d", b->len, n);
377 if(Bread(b->bp, p, n) != n)
378 sysfatal("unexpected EOF - %r");
379 b->len -= n;
380 }
381
382 void
xd(Biff * b)383 xd(Biff *b)
384 {
385 uvlong off;
386 uchar buf[16];
387 int addr, got, n, i, j;
388
389 addr = 0;
390 off = Boffset(b->bp);
391 while(addr < b->len){
392 n = (b->len >= sizeof(buf))? sizeof(buf): b->len;
393 got = Bread(b->bp, buf, n);
394
395 Bprint(bo, " %6d ", addr);
396 addr += n;
397
398 for(i = 0; i < got; i++)
399 Bprint(bo, "%02x ", buf[i]);
400 for(j = i; j < 16; j++)
401 Bprint(bo, " ");
402 Bprint(bo, " ");
403 for(i = 0; i < got; i++)
404 Bprint(bo, "%c", isprint(buf[i])? buf[i]: '.');
405 Bprint(bo, "\n");
406 }
407 Bseek(b->bp, off, 0);
408 }
409
410 static int
getrec(Biff * b)411 getrec(Biff *b)
412 {
413 int c;
414 if((c = Bgetc(b->bp)) == -1)
415 return -1; // real EOF
416 b->op = c;
417 if((c = Bgetc(b->bp)) == -1)
418 sysfatal("unexpected EOF - %r");
419 b->op |= c << 8;
420 if((c = Bgetc(b->bp)) == -1)
421 sysfatal("unexpected EOF - %r");
422 b->len = c;
423 if((c = Bgetc(b->bp)) == -1)
424 sysfatal("unexpected EOF - %r");
425 b->len |= c << 8;
426 if(b->op == 0 && b->len == 0)
427 return -1;
428 if(Debug){
429 Bprint(bo, "op=0x%x len=%d\n", b->op, b->len);
430 xd(b);
431 }
432 return 0;
433 }
434
435 static uvlong
gint(Biff * b,int n)436 gint(Biff *b, int n)
437 {
438 int i, c;
439 uvlong vl, rc;
440
441 if(b->len < n)
442 return -1;
443 rc = 0;
444 for(i = 0; i < n; i++){
445 if((c = Bgetc(b->bp)) == -1)
446 sysfatal("unexpected EOF - %r");
447 b->len--;
448 vl = c;
449 rc |= vl << (8*i);
450 }
451 return rc;
452 }
453
454 double
grk(Biff * b)455 grk(Biff *b)
456 {
457 int f;
458 uvlong n;
459 double d;
460
461 n = gint(b, 4);
462 f = n & 3;
463 n &= ~3LL;
464 if(f & 2){
465 d = n / 4.0;
466 }
467 else{
468 n <<= 32;
469 memcpy(&d, &n, sizeof(d));
470 }
471
472 if(f & 1)
473 d /= 100.0;
474 return d;
475 }
476
477 double
gdoub(Biff * b)478 gdoub(Biff *b)
479 {
480 double d;
481 uvlong n = gint(b, 8);
482 memcpy(&d, &n, sizeof(n));
483 return d;
484 }
485
486 char *
gstr(Biff * b,int len_width)487 gstr(Biff *b, int len_width)
488 {
489 Rune r;
490 char *buf, *p;
491 int nch, w, ap, ln, rt, opt;
492 enum {
493 Unicode = 1,
494 Asian_phonetic = 4,
495 Rich_text = 8,
496 };
497
498 if(b->len < len_width){
499 if(getrec(b) == -1)
500 sysfatal("starting STRING expected CONTINUE, got EOF");
501 if(b->op != 0x03c)
502 sysfatal("starting STRING expected CONTINUE, got op=0x%x", b->op);
503 }
504
505 ln = gint(b, len_width);
506 if(Biffver != Ver8){
507 if((buf = calloc(ln+1, sizeof(char))) == nil)
508 sysfatal("no memory");
509 gmem(b, buf, ln);
510 return buf;
511 }
512
513
514 if((buf = calloc(ln+1, sizeof(char)*UTFmax)) == nil)
515 sysfatal("no memory");
516 p = buf;
517
518 if(ln == 0)
519 return buf;
520 nch = 0;
521 *buf = 0;
522 opt = gint(b, 1);
523 if(opt & Rich_text)
524 rt = gint(b, 2);
525 else
526 rt = 0;
527 if(opt & Asian_phonetic)
528 ap = gint(b, 4);
529 else
530 ap = 0;
531 for(;;){
532 w = (opt & Unicode)? sizeof(Rune): sizeof(char);
533
534 while(b->len > 0){
535 r = gint(b, w);
536 p += runetochar(p, &r);
537 if(++nch >= ln){
538 if(rt)
539 skip(b, rt*4);
540 if(ap)
541 skip(b, ap);
542 return buf;
543 }
544 }
545 if(getrec(b) == -1)
546 sysfatal("in STRING expected CONTINUE, got EOF");
547 if(b->op != 0x03c)
548 sysfatal("in STRING expected CONTINUE, got op=0x%x", b->op);
549 opt = gint(b, 1);
550 }
551 }
552
553 void
sst(Biff * b)554 sst(Biff *b)
555 {
556 int n;
557
558 skip(b, 4); // total # strings
559 Nstrtab = gint(b, 4); // # unique strings
560 if((Strtab = calloc(Nstrtab, sizeof(char *))) == nil)
561 sysfatal("no memory");
562 for(n = 0; n < Nstrtab; n++)
563 Strtab[n] = gstr(b, 2);
564
565 }
566
567 void
boolerr(Biff * b)568 boolerr(Biff *b)
569 {
570 int r = gint(b, 2); // row
571 int c = gint(b, 2); // col
572 int f = gint(b, 2); // formatting ref
573 int v = gint(b, 1); // bool value / err code
574 int t = gint(b, 1); // type
575 cell(r, c, f, (t)? Terror: Tbool, &v);
576 }
577
578 void
rk(Biff * b)579 rk(Biff *b)
580 {
581 int r = gint(b, 2); // row
582 int c = gint(b, 2); // col
583 int f = gint(b, 2); // formatting ref
584 double v = grk(b); // value
585 cell(r, c, f, Tnumber, &v);
586 }
587
588 void
mulrk(Biff * b)589 mulrk(Biff *b)
590 {
591 int r = gint(b, 2); // row
592 int c = gint(b, 2); // first col
593 while(b->len >= 6){
594 int f = gint(b, 2); // formatting ref
595 double v = grk(b); // value
596 cell(r, c++, f, Tnumber, &v);
597 }
598 }
599
600 void
number(Biff * b)601 number(Biff *b)
602 {
603 int r = gint(b, 2); // row
604 int c = gint(b, 2); // col
605 int f = gint(b, 2); // formatting ref
606 double v = gdoub(b); // double
607 cell(r, c, f, Tnumber, &v);
608 }
609
610 void
label(Biff * b)611 label(Biff *b)
612 {
613 int r = gint(b, 2); // row
614 int c = gint(b, 2); // col
615 int f = gint(b, 2); // formatting ref
616 char *s = gstr(b, 2); // byte string
617 cell(r, c, f, Tlabel, s);
618 }
619
620
621 void
labelsst(Biff * b)622 labelsst(Biff *b)
623 {
624 int r = gint(b, 2); // row
625 int c = gint(b, 2); // col
626 int f = gint(b, 2); // formatting ref
627 int i = gint(b, 2); // sst string ref
628 cell(r, c, f, Tindex, &i);
629 }
630
631 void
bof(Biff * b)632 bof(Biff *b)
633 {
634 Biffver = gint(b, 2);
635 Content = gint(b, 2);
636 }
637
638 void
defcolwidth(Biff * b)639 defcolwidth(Biff *b)
640 {
641 Defwidth = gint(b, 2);
642 }
643
644 void
datemode(Biff * b)645 datemode(Biff *b)
646 {
647 Datemode = gint(b, 2);
648 }
649
650 void
eof(Biff * b)651 eof(Biff *b)
652 {
653 int i;
654 struct {
655 int n;
656 char *s;
657 } names[] = {
658 0x005, "Workbook globals",
659 0x006, "Visual Basic module",
660 0x010, "Worksheet",
661 0x020, "Chart",
662 0x040, "Macro sheet",
663 0x100, "Workspace file",
664 };
665 static int sheet = 0;
666
667 if(! wanted(Sheetrange, ++sheet)){
668 release();
669 return;
670 }
671
672 if(Ncols != -1){
673 if(All){
674 for(i = 0; i < nelem(names); i++)
675 if(names[i].n == Content){
676 Bprint(bo, "\n# contents %s\n", names[i].s);
677 dump();
678 }
679 }
680 else
681 if(Content == 0x10) // Worksheet
682 dump();
683 }
684 release();
685 USED(b);
686 }
687
688 void
colinfo(Biff * b)689 colinfo(Biff *b)
690 {
691 int c;
692 int c1 = gint(b, 2);
693 int c2 = gint(b, 2);
694 int w = gint(b, 2);
695
696 if(c1 < 0)
697 sysfatal("negative column number (%d)", c1);
698 if(c2 >= Nwidths)
699 sysfatal("too many columns (%d > %d)", c2, Nwidths);
700 w /= 256;
701
702 if(w > 100)
703 w = 100;
704 if(w < 0)
705 w = 0;
706
707 for(c = c1; c <= c2; c++)
708 Width[c] = w;
709 }
710
711 void
xf(Biff * b)712 xf(Biff *b)
713 {
714 int fmt;
715 static int nalloc = 0;
716
717 skip(b, 2);
718 fmt = gint(b, 2);
719 if(nalloc >= Nxf){
720 nalloc += 20;
721 if((Xf = realloc(Xf, nalloc*sizeof(int))) == nil)
722 sysfatal("no memory");
723 }
724 Xf[Nxf++] = fmt;
725 }
726
727 void
writeaccess(Biff * b)728 writeaccess(Biff *b)
729 {
730 Bprint(bo, "# author %s\n", gstr(b, 2));
731 }
732
733 void
codepage(Biff * b)734 codepage(Biff *b)
735 {
736 int codepage = gint(b, 2);
737 if(codepage != 1200) // 1200 == UTF-16
738 Bprint(bo, "# codepage %d\n", codepage);
739 }
740
741 void
xls2csv(Biobuf * bp)742 xls2csv(Biobuf *bp)
743 {
744 int i;
745 Biff biff, *b;
746 struct {
747 int op;
748 void (*func)(Biff *);
749 } dispatch[] = {
750 0x000a, eof,
751 0x0022, datemode,
752 0x0042, codepage,
753 0x0055, defcolwidth,
754 0x005c, writeaccess,
755 0x007d, colinfo,
756 0x00bd, mulrk,
757 0x00fc, sst,
758 0x00fd, labelsst,
759 0x0203, number,
760 0x0204, label,
761 0x0205, boolerr,
762 0x027e, rk,
763 0x0809, bof,
764 0x00e0, xf,
765 };
766
767 b = &biff;
768 b->bp = bp;
769 while(getrec(b) != -1){
770 for(i = 0; i < nelem(dispatch); i++)
771 if(b->op == dispatch[i].op)
772 (*dispatch[i].func)(b);
773 skip(b, b->len);
774 }
775 }
776
777 void
usage(void)778 usage(void)
779 {
780 fprint(2, "usage: %s [-Danqt] [-w worksheets] [-c columns] [-d delim] /mnt/doc/Workbook\n", argv0);
781 exits("usage");
782 }
783
784 void
main(int argc,char * argv[])785 main(int argc, char *argv[])
786 {
787 int i;
788 Biobuf bin, bout, *bp;
789
790 ARGBEGIN{
791 case 'D':
792 Debug = 1;
793 break;
794 case 'a':
795 All = 1;
796 break;
797 case 'q':
798 Doquote = 0;
799 break;
800 case 'd':
801 Delim = EARGF(usage());
802 break;
803 case 'n':
804 Nopad = 1;
805 break;
806 case 't':
807 Trunc = 1;
808 break;
809 case 'c':
810 Columnrange = EARGF(usage());
811 break;
812 case 'w':
813 Sheetrange = EARGF(usage());
814 break;
815 default:
816 usage();
817 break;
818 }ARGEND;
819
820 if(argc != 1)
821 usage();
822
823 bo = &bout;
824 quotefmtinstall();
825 Binit(bo, OWRITE, 1);
826
827 if(argc > 0) {
828 for(i = 0; i < argc; i++){
829 if((bp = Bopen(argv[i], OREAD)) == nil)
830 sysfatal("%s cannot open - %r", argv[i]);
831 xls2csv(bp);
832 Bterm(bp);
833 }
834 } else {
835 Binit(&bin, 0, OREAD);
836 xls2csv(&bin);
837 }
838 exits(0);
839 }
840
841