1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 5 /* automatically generated; do not edit. */ 6 typedef struct Fibhdr Fibhdr; 7 struct Fibhdr { 8 ushort wIdent; 9 ushort nFib; 10 ushort nProduct; 11 ushort lid; 12 short pnNext; 13 uchar fDot; 14 uchar fGlsy; 15 uchar fComplex; 16 uchar fHasPic; 17 uchar cQuickSaves; 18 uchar fEncrypted; 19 uchar fWhichTblStm; 20 uchar fReadOnlyRecommended; 21 uchar fWriteReservation; 22 uchar fExtChar; 23 uchar fLoadOverride; 24 uchar fFarEast; 25 uchar fCrypto; 26 ushort nFibBack; 27 ulong lKey; 28 uchar envr; 29 uchar fMac; 30 uchar fEmptySpecial; 31 uchar fLoadOverridePage; 32 uchar fFutureSavedUndo; 33 uchar fWord97Saved; 34 ushort chs; 35 ushort chsTables; 36 long fcMin; 37 long fcMac; 38 ushort csw; 39 }; 40 enum { bcFibhdr = 0x22 }; 41 42 /* automatically generated; do not edit. */ 43 void 44 readFibhdr(Fibhdr *s, uchar *v, int nv) 45 { 46 if(nv < bcFibhdr) sysfatal("not enough data for Fibhdr"); 47 s->wIdent = v[0x0] | (v[0x0+1] << 8); 48 s->nFib = v[0x2] | (v[0x2+1] << 8); 49 s->nProduct = v[0x4] | (v[0x4+1] << 8); 50 s->lid = v[0x6] | (v[0x6+1] << 8); 51 s->pnNext = v[0x8] | (v[0x8+1] << 8); 52 s->fDot = ((v[0xA]) & 0x1) >> 0; 53 s->fGlsy = ((v[0xA]) & 0x2) >> 1; 54 s->fComplex = ((v[0xA]) & 0x4) >> 2; 55 s->fHasPic = ((v[0xA]) & 0x8) >> 3; 56 s->cQuickSaves = ((v[0xA]) & 0x240) >> 4; 57 s->fEncrypted = ((v[0xB]) & 0x1) >> 0; 58 s->fWhichTblStm = ((v[0xB]) & 0x2) >> 1; 59 s->fReadOnlyRecommended = ((v[0xB]) & 0x4) >> 2; 60 s->fWriteReservation = ((v[0xB]) & 0x8) >> 3; 61 s->fExtChar = ((v[0xB]) & 0x16) >> 4; 62 s->fLoadOverride = ((v[0xB]) & 0x32) >> 5; 63 s->fFarEast = ((v[0xB]) & 0x64) >> 6; 64 s->fCrypto = ((v[0xB]) & 0x128) >> 7; 65 s->nFibBack = v[0xC] | (v[0xC+1] << 8); 66 s->lKey = v[0xE] | (v[0xE+1] << 8)| (v[0xE+2] << 16) | (v[0xE+3] << 24); 67 s->envr = v[0x12]; 68 s->fMac = ((v[0x13]) & 0x1) >> 0; 69 s->fEmptySpecial = ((v[0x13]) & 0x2) >> 1; 70 s->fLoadOverridePage = ((v[0x13]) & 0x4) >> 2; 71 s->fFutureSavedUndo = ((v[0x13]) & 0x8) >> 3; 72 s->fWord97Saved = ((v[0x13]) & 0x16) >> 4; 73 s->chs = v[0x14] | (v[0x14+1] << 8); 74 s->chsTables = v[0x16] | (v[0x16+1] << 8); 75 s->fcMin = v[0x18] | (v[0x18+1] << 8)| (v[0x18+2] << 16) | (v[0x18+3] << 24); 76 s->fcMac = v[0x1C] | (v[0x1C+1] << 8)| (v[0x1C+2] << 16) | (v[0x1C+3] << 24); 77 s->csw = v[0x20] | (v[0x20+1] << 8); 78 } 79 80 void 81 usage(void) 82 { 83 fprint(2, "usage: wordtext /mnt/doc/WordDocument\n"); 84 exits("usage"); 85 } 86 87 void 88 main(int argc, char **argv) 89 { 90 Biobuf *b; 91 Biobuf bout; 92 uchar buf[512]; 93 Fibhdr f; 94 int i, c, n; 95 96 ARGBEGIN{ 97 default: 98 usage(); 99 }ARGEND 100 101 if(argc != 1) 102 usage(); 103 104 Binit(&bout, 1, OWRITE); 105 b = Bopen(argv[0], OREAD); 106 if(b == nil) { 107 fprint(2, "couldn't open file: %r\n"); 108 exits("word"); 109 } 110 111 n = Bread(b, buf, sizeof buf); 112 if(n < sizeof buf) { 113 fprint(2, "short read: %r\n"); 114 exits("read"); 115 } 116 117 readFibhdr(&f, buf, sizeof buf); 118 // printFibhdr(&f); 119 120 Bseek(b, f.fcMin, 0); 121 122 n = f.fcMac - f.fcMin; 123 for(i=0; i<n; i++) { 124 c = Bgetc(b); 125 if(c < 0) 126 break; 127 128 switch(c) { 129 default: 130 Bputc(&bout, c); 131 break; 132 133 case '\\': Bprint(&bout, "\\"); break; /* field escape */ 134 case 7: Bprint(&bout, "\n"); break; /* cell, row mark */ 135 case 9: Bprint(&bout, "\t"); break; /* tab */ 136 case 11: Bprint(&bout, "\n"); break; /* hard line break */ 137 case 12: Bprint(&bout, "\n\n\n\n"); break; /* page break */ 138 case 13: Bprint(&bout, "\n\n"); break; /* paragraph end */ 139 case 14: break; /* column break */ 140 case 19: Bprint(&bout, "<"); break; /* field begin */ 141 case 20: Bprint(&bout, ":"); break; /* field sep */ 142 case 21: Bprint(&bout, ">"); break; /* field end */ 143 case 30: Bprint(&bout, "-"); break; /* non-breaking hyphen */ 144 case 31: break; /* non-required hyphen */ 145 /* case 45: Bprint(&bout, "-"); break; /* breaking hyphen */ 146 case 160: Bprint(&bout, " "); break; /* non-breaking space */ 147 148 /* 149 * these are only supposed to get used when special is set, but we 150 * never see these ascii values otherwise anyway. 151 */ 152 153 /* 154 * Empirically, some documents have sections of text where 155 * every character is followed by a zero byte. Some have sections 156 * of text where there are no zero bytes. Still others have both 157 * types and alternate between them. Until we parse which 158 * characters are ``special'', page numbers lose out. 159 */ 160 case 0: /* Bprint(&bout, "<pageno>"); */ break; 161 case 1: Bprint(&bout, "<picture>"); break; 162 case 2: Bprint(&bout, "<footnote>"); break; 163 case 3: Bprint(&bout, "<footnote sep>"); break; 164 case 4: Bprint(&bout, "<footnote cont>"); break; 165 case 5: Bprint(&bout, "<animation>"); break; 166 case 6: Bprint(&bout, "<lineno>"); break; 167 /* case 7: Bprint(&bout, "<hand picture>"); break; */ 168 case 8: Bprint(&bout, "<drawn object>"); break; 169 case 10: Bprint(&bout, "<abbrev date>"); break; 170 /* case 11: Bprint(&bout, "<hh:mm:ss>"); break; */ 171 /* case 12: Bprint(&bout, "<section no>"); break; */ 172 /* case 14: Bprint(&bout, "<Thu>"); break; */ 173 case 15: Bprint(&bout, "<Thursday>"); break; 174 case 16: Bprint(&bout, "<day of month>"); break; 175 176 case 22: Bprint(&bout, "<hour>"); break; 177 case 23: Bprint(&bout, "<hour hh>"); break; 178 case 24: Bprint(&bout, "<minute>"); break; 179 case 25: Bprint(&bout, "<minute mm>"); break; 180 case 26: Bprint(&bout, "<seconds>"); break; 181 case 27: Bprint(&bout, "<AM/PM>"); break; 182 case 28: Bprint(&bout, "<hh:mm:ss>"); break; 183 case 29: Bprint(&bout, "<date>"); break; 184 /* printable ascii begins hereish */ 185 /* 186 case 30: Bprint(&bout, "<mm/dd/yy>"); break; 187 case 33: Bprint(&bout, "<mm>"); break; 188 case 34: Bprint(&bout, "<yyyy>"); break; 189 case 35: Bprint(&bout, "<yy>"); break; 190 case 36: Bprint(&bout, "<Feb>"); break; 191 case 37: Bprint(&bout, "<February>"); break; 192 case 38: Bprint(&bout, "<hh:mm>"); break; 193 case 39: Bprint(&bout, "<long date>"); break; 194 case 41: break; */ 195 } 196 } 197 Bprint(&bout, "\n"); 198 } 199