1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4
5 /* automatically generated; do not edit. */
6 typedef struct Fibhdr Fibhdr;
7 struct Fibhdr {
8 ushort wIdent;
9 ushort nFib;
10 ushort nProduct;
11 ushort lid;
12 short pnNext;
13 uchar fDot;
14 uchar fGlsy;
15 uchar fComplex;
16 uchar fHasPic;
17 uchar cQuickSaves;
18 uchar fEncrypted;
19 uchar fWhichTblStm;
20 uchar fReadOnlyRecommended;
21 uchar fWriteReservation;
22 uchar fExtChar;
23 uchar fLoadOverride;
24 uchar fFarEast;
25 uchar fCrypto;
26 ushort nFibBack;
27 ulong lKey;
28 uchar envr;
29 uchar fMac;
30 uchar fEmptySpecial;
31 uchar fLoadOverridePage;
32 uchar fFutureSavedUndo;
33 uchar fWord97Saved;
34 ushort chs;
35 ushort chsTables;
36 long fcMin;
37 long fcMac;
38 ushort csw;
39 };
40 enum { bcFibhdr = 0x22 };
41
42 /* automatically generated; do not edit. */
43 void
readFibhdr(Fibhdr * s,uchar * v,int nv)44 readFibhdr(Fibhdr *s, uchar *v, int nv)
45 {
46 if(nv < bcFibhdr) sysfatal("not enough data for Fibhdr");
47 s->wIdent = v[0x0] | (v[0x0+1] << 8);
48 s->nFib = v[0x2] | (v[0x2+1] << 8);
49 s->nProduct = v[0x4] | (v[0x4+1] << 8);
50 s->lid = v[0x6] | (v[0x6+1] << 8);
51 s->pnNext = v[0x8] | (v[0x8+1] << 8);
52 s->fDot = ((v[0xA]) & 0x1) >> 0;
53 s->fGlsy = ((v[0xA]) & 0x2) >> 1;
54 s->fComplex = ((v[0xA]) & 0x4) >> 2;
55 s->fHasPic = ((v[0xA]) & 0x8) >> 3;
56 s->cQuickSaves = ((v[0xA]) & 0x240) >> 4;
57 s->fEncrypted = ((v[0xB]) & 0x1) >> 0;
58 s->fWhichTblStm = ((v[0xB]) & 0x2) >> 1;
59 s->fReadOnlyRecommended = ((v[0xB]) & 0x4) >> 2;
60 s->fWriteReservation = ((v[0xB]) & 0x8) >> 3;
61 s->fExtChar = ((v[0xB]) & 0x16) >> 4;
62 s->fLoadOverride = ((v[0xB]) & 0x32) >> 5;
63 s->fFarEast = ((v[0xB]) & 0x64) >> 6;
64 s->fCrypto = ((v[0xB]) & 0x128) >> 7;
65 s->nFibBack = v[0xC] | (v[0xC+1] << 8);
66 s->lKey = v[0xE] | (v[0xE+1] << 8)| (v[0xE+2] << 16) | (v[0xE+3] << 24);
67 s->envr = v[0x12];
68 s->fMac = ((v[0x13]) & 0x1) >> 0;
69 s->fEmptySpecial = ((v[0x13]) & 0x2) >> 1;
70 s->fLoadOverridePage = ((v[0x13]) & 0x4) >> 2;
71 s->fFutureSavedUndo = ((v[0x13]) & 0x8) >> 3;
72 s->fWord97Saved = ((v[0x13]) & 0x16) >> 4;
73 s->chs = v[0x14] | (v[0x14+1] << 8);
74 s->chsTables = v[0x16] | (v[0x16+1] << 8);
75 s->fcMin = v[0x18] | (v[0x18+1] << 8)| (v[0x18+2] << 16) | (v[0x18+3] << 24);
76 s->fcMac = v[0x1C] | (v[0x1C+1] << 8)| (v[0x1C+2] << 16) | (v[0x1C+3] << 24);
77 s->csw = v[0x20] | (v[0x20+1] << 8);
78 }
79
80 void
usage(void)81 usage(void)
82 {
83 fprint(2, "usage: wordtext /mnt/doc/WordDocument\n");
84 exits("usage");
85 }
86
87 void
main(int argc,char ** argv)88 main(int argc, char **argv)
89 {
90 Biobuf *b;
91 Biobuf bout;
92 uchar buf[512];
93 Fibhdr f;
94 int i, c, n;
95
96 ARGBEGIN{
97 default:
98 usage();
99 }ARGEND
100
101 if(argc != 1)
102 usage();
103
104 Binit(&bout, 1, OWRITE);
105 b = Bopen(argv[0], OREAD);
106 if(b == nil) {
107 fprint(2, "couldn't open file: %r\n");
108 exits("word");
109 }
110
111 n = Bread(b, buf, sizeof buf);
112 if(n < sizeof buf) {
113 fprint(2, "short read: %r\n");
114 exits("read");
115 }
116
117 readFibhdr(&f, buf, sizeof buf);
118 // printFibhdr(&f);
119
120 Bseek(b, f.fcMin, 0);
121
122 n = f.fcMac - f.fcMin;
123 for(i=0; i<n; i++) {
124 c = Bgetc(b);
125 if(c < 0)
126 break;
127
128 switch(c) {
129 default:
130 Bputc(&bout, c);
131 break;
132
133 case '\\': Bprint(&bout, "\\"); break; /* field escape */
134 case 7: Bprint(&bout, "\n"); break; /* cell, row mark */
135 case 9: Bprint(&bout, "\t"); break; /* tab */
136 case 11: Bprint(&bout, "\n"); break; /* hard line break */
137 case 12: Bprint(&bout, "\n\n\n\n"); break; /* page break */
138 case 13: Bprint(&bout, "\n\n"); break; /* paragraph end */
139 case 14: break; /* column break */
140 case 19: Bprint(&bout, "<"); break; /* field begin */
141 case 20: Bprint(&bout, ":"); break; /* field sep */
142 case 21: Bprint(&bout, ">"); break; /* field end */
143 case 30: Bprint(&bout, "-"); break; /* non-breaking hyphen */
144 case 31: break; /* non-required hyphen */
145 /* case 45: Bprint(&bout, "-"); break; /* breaking hyphen */
146 case 160: Bprint(&bout, " "); break; /* non-breaking space */
147
148 /*
149 * these are only supposed to get used when special is set, but we
150 * never see these ascii values otherwise anyway.
151 */
152
153 /*
154 * Empirically, some documents have sections of text where
155 * every character is followed by a zero byte. Some have sections
156 * of text where there are no zero bytes. Still others have both
157 * types and alternate between them. Until we parse which
158 * characters are ``special'', page numbers lose out.
159 */
160 case 0: /* Bprint(&bout, "<pageno>"); */ break;
161 case 1: Bprint(&bout, "<picture>"); break;
162 case 2: Bprint(&bout, "<footnote>"); break;
163 case 3: Bprint(&bout, "<footnote sep>"); break;
164 case 4: Bprint(&bout, "<footnote cont>"); break;
165 case 5: Bprint(&bout, "<animation>"); break;
166 case 6: Bprint(&bout, "<lineno>"); break;
167 /* case 7: Bprint(&bout, "<hand picture>"); break; */
168 case 8: Bprint(&bout, "<drawn object>"); break;
169 case 10: Bprint(&bout, "<abbrev date>"); break;
170 /* case 11: Bprint(&bout, "<hh:mm:ss>"); break; */
171 /* case 12: Bprint(&bout, "<section no>"); break; */
172 /* case 14: Bprint(&bout, "<Thu>"); break; */
173 case 15: Bprint(&bout, "<Thursday>"); break;
174 case 16: Bprint(&bout, "<day of month>"); break;
175
176 case 22: Bprint(&bout, "<hour>"); break;
177 case 23: Bprint(&bout, "<hour hh>"); break;
178 case 24: Bprint(&bout, "<minute>"); break;
179 case 25: Bprint(&bout, "<minute mm>"); break;
180 case 26: Bprint(&bout, "<seconds>"); break;
181 case 27: Bprint(&bout, "<AM/PM>"); break;
182 case 28: Bprint(&bout, "<hh:mm:ss>"); break;
183 case 29: Bprint(&bout, "<date>"); break;
184 /* printable ascii begins hereish */
185 /*
186 case 30: Bprint(&bout, "<mm/dd/yy>"); break;
187 case 33: Bprint(&bout, "<mm>"); break;
188 case 34: Bprint(&bout, "<yyyy>"); break;
189 case 35: Bprint(&bout, "<yy>"); break;
190 case 36: Bprint(&bout, "<Feb>"); break;
191 case 37: Bprint(&bout, "<February>"); break;
192 case 38: Bprint(&bout, "<hh:mm>"); break;
193 case 39: Bprint(&bout, "<long date>"); break;
194 case 41: break; */
195 }
196 }
197 Bprint(&bout, "\n");
198 }
199