1*7dd7cddfSDavid du Colombier #include <u.h>
2*7dd7cddfSDavid du Colombier #include <libc.h>
3*7dd7cddfSDavid du Colombier #include <bio.h>
4*7dd7cddfSDavid du Colombier
5*7dd7cddfSDavid du Colombier /* automatically generated; do not edit. */
6*7dd7cddfSDavid du Colombier typedef struct Fibhdr Fibhdr;
7*7dd7cddfSDavid du Colombier struct Fibhdr {
8*7dd7cddfSDavid du Colombier ushort wIdent;
9*7dd7cddfSDavid du Colombier ushort nFib;
10*7dd7cddfSDavid du Colombier ushort nProduct;
11*7dd7cddfSDavid du Colombier ushort lid;
12*7dd7cddfSDavid du Colombier short pnNext;
13*7dd7cddfSDavid du Colombier uchar fDot;
14*7dd7cddfSDavid du Colombier uchar fGlsy;
15*7dd7cddfSDavid du Colombier uchar fComplex;
16*7dd7cddfSDavid du Colombier uchar fHasPic;
17*7dd7cddfSDavid du Colombier uchar cQuickSaves;
18*7dd7cddfSDavid du Colombier uchar fEncrypted;
19*7dd7cddfSDavid du Colombier uchar fWhichTblStm;
20*7dd7cddfSDavid du Colombier uchar fReadOnlyRecommended;
21*7dd7cddfSDavid du Colombier uchar fWriteReservation;
22*7dd7cddfSDavid du Colombier uchar fExtChar;
23*7dd7cddfSDavid du Colombier uchar fLoadOverride;
24*7dd7cddfSDavid du Colombier uchar fFarEast;
25*7dd7cddfSDavid du Colombier uchar fCrypto;
26*7dd7cddfSDavid du Colombier ushort nFibBack;
27*7dd7cddfSDavid du Colombier ulong lKey;
28*7dd7cddfSDavid du Colombier uchar envr;
29*7dd7cddfSDavid du Colombier uchar fMac;
30*7dd7cddfSDavid du Colombier uchar fEmptySpecial;
31*7dd7cddfSDavid du Colombier uchar fLoadOverridePage;
32*7dd7cddfSDavid du Colombier uchar fFutureSavedUndo;
33*7dd7cddfSDavid du Colombier uchar fWord97Saved;
34*7dd7cddfSDavid du Colombier ushort chs;
35*7dd7cddfSDavid du Colombier ushort chsTables;
36*7dd7cddfSDavid du Colombier long fcMin;
37*7dd7cddfSDavid du Colombier long fcMac;
38*7dd7cddfSDavid du Colombier ushort csw;
39*7dd7cddfSDavid du Colombier };
40*7dd7cddfSDavid du Colombier enum { bcFibhdr = 0x22 };
41*7dd7cddfSDavid du Colombier
42*7dd7cddfSDavid du Colombier /* automatically generated; do not edit. */
43*7dd7cddfSDavid du Colombier void
readFibhdr(Fibhdr * s,uchar * v,int nv)44*7dd7cddfSDavid du Colombier readFibhdr(Fibhdr *s, uchar *v, int nv)
45*7dd7cddfSDavid du Colombier {
46*7dd7cddfSDavid du Colombier if(nv < bcFibhdr) sysfatal("not enough data for Fibhdr");
47*7dd7cddfSDavid du Colombier s->wIdent = v[0x0] | (v[0x0+1] << 8);
48*7dd7cddfSDavid du Colombier s->nFib = v[0x2] | (v[0x2+1] << 8);
49*7dd7cddfSDavid du Colombier s->nProduct = v[0x4] | (v[0x4+1] << 8);
50*7dd7cddfSDavid du Colombier s->lid = v[0x6] | (v[0x6+1] << 8);
51*7dd7cddfSDavid du Colombier s->pnNext = v[0x8] | (v[0x8+1] << 8);
52*7dd7cddfSDavid du Colombier s->fDot = ((v[0xA]) & 0x1) >> 0;
53*7dd7cddfSDavid du Colombier s->fGlsy = ((v[0xA]) & 0x2) >> 1;
54*7dd7cddfSDavid du Colombier s->fComplex = ((v[0xA]) & 0x4) >> 2;
55*7dd7cddfSDavid du Colombier s->fHasPic = ((v[0xA]) & 0x8) >> 3;
56*7dd7cddfSDavid du Colombier s->cQuickSaves = ((v[0xA]) & 0x240) >> 4;
57*7dd7cddfSDavid du Colombier s->fEncrypted = ((v[0xB]) & 0x1) >> 0;
58*7dd7cddfSDavid du Colombier s->fWhichTblStm = ((v[0xB]) & 0x2) >> 1;
59*7dd7cddfSDavid du Colombier s->fReadOnlyRecommended = ((v[0xB]) & 0x4) >> 2;
60*7dd7cddfSDavid du Colombier s->fWriteReservation = ((v[0xB]) & 0x8) >> 3;
61*7dd7cddfSDavid du Colombier s->fExtChar = ((v[0xB]) & 0x16) >> 4;
62*7dd7cddfSDavid du Colombier s->fLoadOverride = ((v[0xB]) & 0x32) >> 5;
63*7dd7cddfSDavid du Colombier s->fFarEast = ((v[0xB]) & 0x64) >> 6;
64*7dd7cddfSDavid du Colombier s->fCrypto = ((v[0xB]) & 0x128) >> 7;
65*7dd7cddfSDavid du Colombier s->nFibBack = v[0xC] | (v[0xC+1] << 8);
66*7dd7cddfSDavid du Colombier s->lKey = v[0xE] | (v[0xE+1] << 8)| (v[0xE+2] << 16) | (v[0xE+3] << 24);
67*7dd7cddfSDavid du Colombier s->envr = v[0x12];
68*7dd7cddfSDavid du Colombier s->fMac = ((v[0x13]) & 0x1) >> 0;
69*7dd7cddfSDavid du Colombier s->fEmptySpecial = ((v[0x13]) & 0x2) >> 1;
70*7dd7cddfSDavid du Colombier s->fLoadOverridePage = ((v[0x13]) & 0x4) >> 2;
71*7dd7cddfSDavid du Colombier s->fFutureSavedUndo = ((v[0x13]) & 0x8) >> 3;
72*7dd7cddfSDavid du Colombier s->fWord97Saved = ((v[0x13]) & 0x16) >> 4;
73*7dd7cddfSDavid du Colombier s->chs = v[0x14] | (v[0x14+1] << 8);
74*7dd7cddfSDavid du Colombier s->chsTables = v[0x16] | (v[0x16+1] << 8);
75*7dd7cddfSDavid du Colombier s->fcMin = v[0x18] | (v[0x18+1] << 8)| (v[0x18+2] << 16) | (v[0x18+3] << 24);
76*7dd7cddfSDavid du Colombier s->fcMac = v[0x1C] | (v[0x1C+1] << 8)| (v[0x1C+2] << 16) | (v[0x1C+3] << 24);
77*7dd7cddfSDavid du Colombier s->csw = v[0x20] | (v[0x20+1] << 8);
78*7dd7cddfSDavid du Colombier }
79*7dd7cddfSDavid du Colombier
80*7dd7cddfSDavid du Colombier void
usage(void)81*7dd7cddfSDavid du Colombier usage(void)
82*7dd7cddfSDavid du Colombier {
83*7dd7cddfSDavid du Colombier fprint(2, "usage: wordtext /mnt/doc/WordDocument\n");
84*7dd7cddfSDavid du Colombier exits("usage");
85*7dd7cddfSDavid du Colombier }
86*7dd7cddfSDavid du Colombier
87*7dd7cddfSDavid du Colombier void
main(int argc,char ** argv)88*7dd7cddfSDavid du Colombier main(int argc, char **argv)
89*7dd7cddfSDavid du Colombier {
90*7dd7cddfSDavid du Colombier Biobuf *b;
91*7dd7cddfSDavid du Colombier Biobuf bout;
92*7dd7cddfSDavid du Colombier uchar buf[512];
93*7dd7cddfSDavid du Colombier Fibhdr f;
94*7dd7cddfSDavid du Colombier int i, c, n;
95*7dd7cddfSDavid du Colombier
96*7dd7cddfSDavid du Colombier ARGBEGIN{
97*7dd7cddfSDavid du Colombier default:
98*7dd7cddfSDavid du Colombier usage();
99*7dd7cddfSDavid du Colombier }ARGEND
100*7dd7cddfSDavid du Colombier
101*7dd7cddfSDavid du Colombier if(argc != 1)
102*7dd7cddfSDavid du Colombier usage();
103*7dd7cddfSDavid du Colombier
104*7dd7cddfSDavid du Colombier Binit(&bout, 1, OWRITE);
105*7dd7cddfSDavid du Colombier b = Bopen(argv[0], OREAD);
106*7dd7cddfSDavid du Colombier if(b == nil) {
107*7dd7cddfSDavid du Colombier fprint(2, "couldn't open file: %r\n");
108*7dd7cddfSDavid du Colombier exits("word");
109*7dd7cddfSDavid du Colombier }
110*7dd7cddfSDavid du Colombier
111*7dd7cddfSDavid du Colombier n = Bread(b, buf, sizeof buf);
112*7dd7cddfSDavid du Colombier if(n < sizeof buf) {
113*7dd7cddfSDavid du Colombier fprint(2, "short read: %r\n");
114*7dd7cddfSDavid du Colombier exits("read");
115*7dd7cddfSDavid du Colombier }
116*7dd7cddfSDavid du Colombier
117*7dd7cddfSDavid du Colombier readFibhdr(&f, buf, sizeof buf);
118*7dd7cddfSDavid du Colombier // printFibhdr(&f);
119*7dd7cddfSDavid du Colombier
120*7dd7cddfSDavid du Colombier Bseek(b, f.fcMin, 0);
121*7dd7cddfSDavid du Colombier
122*7dd7cddfSDavid du Colombier n = f.fcMac - f.fcMin;
123*7dd7cddfSDavid du Colombier for(i=0; i<n; i++) {
124*7dd7cddfSDavid du Colombier c = Bgetc(b);
125*7dd7cddfSDavid du Colombier if(c < 0)
126*7dd7cddfSDavid du Colombier break;
127*7dd7cddfSDavid du Colombier
128*7dd7cddfSDavid du Colombier switch(c) {
129*7dd7cddfSDavid du Colombier default:
130*7dd7cddfSDavid du Colombier Bputc(&bout, c);
131*7dd7cddfSDavid du Colombier break;
132*7dd7cddfSDavid du Colombier
133*7dd7cddfSDavid du Colombier case '\\': Bprint(&bout, "\\"); break; /* field escape */
134*7dd7cddfSDavid du Colombier case 7: Bprint(&bout, "\n"); break; /* cell, row mark */
135*7dd7cddfSDavid du Colombier case 9: Bprint(&bout, "\t"); break; /* tab */
136*7dd7cddfSDavid du Colombier case 11: Bprint(&bout, "\n"); break; /* hard line break */
137*7dd7cddfSDavid du Colombier case 12: Bprint(&bout, "\n\n\n\n"); break; /* page break */
138*7dd7cddfSDavid du Colombier case 13: Bprint(&bout, "\n\n"); break; /* paragraph end */
139*7dd7cddfSDavid du Colombier case 14: break; /* column break */
140*7dd7cddfSDavid du Colombier case 19: Bprint(&bout, "<"); break; /* field begin */
141*7dd7cddfSDavid du Colombier case 20: Bprint(&bout, ":"); break; /* field sep */
142*7dd7cddfSDavid du Colombier case 21: Bprint(&bout, ">"); break; /* field end */
143*7dd7cddfSDavid du Colombier case 30: Bprint(&bout, "-"); break; /* non-breaking hyphen */
144*7dd7cddfSDavid du Colombier case 31: break; /* non-required hyphen */
145*7dd7cddfSDavid du Colombier /* case 45: Bprint(&bout, "-"); break; /* breaking hyphen */
146*7dd7cddfSDavid du Colombier case 160: Bprint(&bout, " "); break; /* non-breaking space */
147*7dd7cddfSDavid du Colombier
148*7dd7cddfSDavid du Colombier /*
149*7dd7cddfSDavid du Colombier * these are only supposed to get used when special is set, but we
150*7dd7cddfSDavid du Colombier * never see these ascii values otherwise anyway.
151*7dd7cddfSDavid du Colombier */
152*7dd7cddfSDavid du Colombier
153*7dd7cddfSDavid du Colombier /*
154*7dd7cddfSDavid du Colombier * Empirically, some documents have sections of text where
155*7dd7cddfSDavid du Colombier * every character is followed by a zero byte. Some have sections
156*7dd7cddfSDavid du Colombier * of text where there are no zero bytes. Still others have both
157*7dd7cddfSDavid du Colombier * types and alternate between them. Until we parse which
158*7dd7cddfSDavid du Colombier * characters are ``special'', page numbers lose out.
159*7dd7cddfSDavid du Colombier */
160*7dd7cddfSDavid du Colombier case 0: /* Bprint(&bout, "<pageno>"); */ break;
161*7dd7cddfSDavid du Colombier case 1: Bprint(&bout, "<picture>"); break;
162*7dd7cddfSDavid du Colombier case 2: Bprint(&bout, "<footnote>"); break;
163*7dd7cddfSDavid du Colombier case 3: Bprint(&bout, "<footnote sep>"); break;
164*7dd7cddfSDavid du Colombier case 4: Bprint(&bout, "<footnote cont>"); break;
165*7dd7cddfSDavid du Colombier case 5: Bprint(&bout, "<animation>"); break;
166*7dd7cddfSDavid du Colombier case 6: Bprint(&bout, "<lineno>"); break;
167*7dd7cddfSDavid du Colombier /* case 7: Bprint(&bout, "<hand picture>"); break; */
168*7dd7cddfSDavid du Colombier case 8: Bprint(&bout, "<drawn object>"); break;
169*7dd7cddfSDavid du Colombier case 10: Bprint(&bout, "<abbrev date>"); break;
170*7dd7cddfSDavid du Colombier /* case 11: Bprint(&bout, "<hh:mm:ss>"); break; */
171*7dd7cddfSDavid du Colombier /* case 12: Bprint(&bout, "<section no>"); break; */
172*7dd7cddfSDavid du Colombier /* case 14: Bprint(&bout, "<Thu>"); break; */
173*7dd7cddfSDavid du Colombier case 15: Bprint(&bout, "<Thursday>"); break;
174*7dd7cddfSDavid du Colombier case 16: Bprint(&bout, "<day of month>"); break;
175*7dd7cddfSDavid du Colombier
176*7dd7cddfSDavid du Colombier case 22: Bprint(&bout, "<hour>"); break;
177*7dd7cddfSDavid du Colombier case 23: Bprint(&bout, "<hour hh>"); break;
178*7dd7cddfSDavid du Colombier case 24: Bprint(&bout, "<minute>"); break;
179*7dd7cddfSDavid du Colombier case 25: Bprint(&bout, "<minute mm>"); break;
180*7dd7cddfSDavid du Colombier case 26: Bprint(&bout, "<seconds>"); break;
181*7dd7cddfSDavid du Colombier case 27: Bprint(&bout, "<AM/PM>"); break;
182*7dd7cddfSDavid du Colombier case 28: Bprint(&bout, "<hh:mm:ss>"); break;
183*7dd7cddfSDavid du Colombier case 29: Bprint(&bout, "<date>"); break;
184*7dd7cddfSDavid du Colombier /* printable ascii begins hereish */
185*7dd7cddfSDavid du Colombier /*
186*7dd7cddfSDavid du Colombier case 30: Bprint(&bout, "<mm/dd/yy>"); break;
187*7dd7cddfSDavid du Colombier case 33: Bprint(&bout, "<mm>"); break;
188*7dd7cddfSDavid du Colombier case 34: Bprint(&bout, "<yyyy>"); break;
189*7dd7cddfSDavid du Colombier case 35: Bprint(&bout, "<yy>"); break;
190*7dd7cddfSDavid du Colombier case 36: Bprint(&bout, "<Feb>"); break;
191*7dd7cddfSDavid du Colombier case 37: Bprint(&bout, "<February>"); break;
192*7dd7cddfSDavid du Colombier case 38: Bprint(&bout, "<hh:mm>"); break;
193*7dd7cddfSDavid du Colombier case 39: Bprint(&bout, "<long date>"); break;
194*7dd7cddfSDavid du Colombier case 41: break; */
195*7dd7cddfSDavid du Colombier }
196*7dd7cddfSDavid du Colombier }
197*7dd7cddfSDavid du Colombier Bprint(&bout, "\n");
198*7dd7cddfSDavid du Colombier }
199