xref: /plan9/sys/src/cmd/aux/mswordstrings.c (revision 7dd7cddf99dd7472612f1413b4da293630e6b1bc)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 
5 /* automatically generated; do not edit. */
6 typedef struct Fibhdr Fibhdr;
7 struct Fibhdr {
8 	ushort wIdent;
9 	ushort nFib;
10 	ushort nProduct;
11 	ushort lid;
12 	short pnNext;
13 	uchar fDot;
14 	uchar fGlsy;
15 	uchar fComplex;
16 	uchar fHasPic;
17 	uchar cQuickSaves;
18 	uchar fEncrypted;
19 	uchar fWhichTblStm;
20 	uchar fReadOnlyRecommended;
21 	uchar fWriteReservation;
22 	uchar fExtChar;
23 	uchar fLoadOverride;
24 	uchar fFarEast;
25 	uchar fCrypto;
26 	ushort nFibBack;
27 	ulong lKey;
28 	uchar envr;
29 	uchar fMac;
30 	uchar fEmptySpecial;
31 	uchar fLoadOverridePage;
32 	uchar fFutureSavedUndo;
33 	uchar fWord97Saved;
34 	ushort chs;
35 	ushort chsTables;
36 	long fcMin;
37 	long fcMac;
38 	ushort csw;
39 };
40 enum { bcFibhdr = 0x22 };
41 
42 /* automatically generated; do not edit. */
43 void
readFibhdr(Fibhdr * s,uchar * v,int nv)44 readFibhdr(Fibhdr *s, uchar *v, int nv)
45 {
46 	if(nv < bcFibhdr) sysfatal("not enough data for Fibhdr");
47 	s->wIdent = v[0x0] | (v[0x0+1] << 8);
48 	s->nFib = v[0x2] | (v[0x2+1] << 8);
49 	s->nProduct = v[0x4] | (v[0x4+1] << 8);
50 	s->lid = v[0x6] | (v[0x6+1] << 8);
51 	s->pnNext = v[0x8] | (v[0x8+1] << 8);
52 	s->fDot = ((v[0xA]) & 0x1) >> 0;
53 	s->fGlsy = ((v[0xA]) & 0x2) >> 1;
54 	s->fComplex = ((v[0xA]) & 0x4) >> 2;
55 	s->fHasPic = ((v[0xA]) & 0x8) >> 3;
56 	s->cQuickSaves = ((v[0xA]) & 0x240) >> 4;
57 	s->fEncrypted = ((v[0xB]) & 0x1) >> 0;
58 	s->fWhichTblStm = ((v[0xB]) & 0x2) >> 1;
59 	s->fReadOnlyRecommended = ((v[0xB]) & 0x4) >> 2;
60 	s->fWriteReservation = ((v[0xB]) & 0x8) >> 3;
61 	s->fExtChar = ((v[0xB]) & 0x16) >> 4;
62 	s->fLoadOverride = ((v[0xB]) & 0x32) >> 5;
63 	s->fFarEast = ((v[0xB]) & 0x64) >> 6;
64 	s->fCrypto = ((v[0xB]) & 0x128) >> 7;
65 	s->nFibBack = v[0xC] | (v[0xC+1] << 8);
66 	s->lKey = v[0xE] | (v[0xE+1] << 8)| (v[0xE+2] << 16) | (v[0xE+3] << 24);
67 	s->envr = v[0x12];
68 	s->fMac = ((v[0x13]) & 0x1) >> 0;
69 	s->fEmptySpecial = ((v[0x13]) & 0x2) >> 1;
70 	s->fLoadOverridePage = ((v[0x13]) & 0x4) >> 2;
71 	s->fFutureSavedUndo = ((v[0x13]) & 0x8) >> 3;
72 	s->fWord97Saved = ((v[0x13]) & 0x16) >> 4;
73 	s->chs = v[0x14] | (v[0x14+1] << 8);
74 	s->chsTables = v[0x16] | (v[0x16+1] << 8);
75 	s->fcMin = v[0x18] | (v[0x18+1] << 8)| (v[0x18+2] << 16) | (v[0x18+3] << 24);
76 	s->fcMac = v[0x1C] | (v[0x1C+1] << 8)| (v[0x1C+2] << 16) | (v[0x1C+3] << 24);
77 	s->csw = v[0x20] | (v[0x20+1] << 8);
78 }
79 
80 void
usage(void)81 usage(void)
82 {
83 	fprint(2, "usage: wordtext /mnt/doc/WordDocument\n");
84 	exits("usage");
85 }
86 
87 void
main(int argc,char ** argv)88 main(int argc, char **argv)
89 {
90 	Biobuf *b;
91 	Biobuf bout;
92 	uchar buf[512];
93 	Fibhdr f;
94 	int i, c, n;
95 
96 	ARGBEGIN{
97 	default:
98 		usage();
99 	}ARGEND
100 
101 	if(argc != 1)
102 		usage();
103 
104 	Binit(&bout, 1, OWRITE);
105 	b = Bopen(argv[0], OREAD);
106 	if(b == nil) {
107 		fprint(2, "couldn't open file: %r\n");
108 		exits("word");
109 	}
110 
111 	n = Bread(b, buf, sizeof buf);
112 	if(n < sizeof buf) {
113 		fprint(2, "short read: %r\n");
114 		exits("read");
115 	}
116 
117 	readFibhdr(&f, buf, sizeof buf);
118 	// printFibhdr(&f);
119 
120 	Bseek(b, f.fcMin, 0);
121 
122 	n = f.fcMac - f.fcMin;
123 	for(i=0; i<n; i++) {
124 		c = Bgetc(b);
125 		if(c < 0)
126 			break;
127 
128 		switch(c) {
129 		default:
130 			Bputc(&bout, c);
131 			break;
132 
133 		case '\\':	Bprint(&bout, "\\");	break;	/* field escape */
134 		case 7:	Bprint(&bout, "\n");		break;	/* cell, row mark */
135 		case 9:	Bprint(&bout, "\t");		break;	/* tab */
136 		case 11:	Bprint(&bout, "\n");		break;	/* hard line break */
137 		case 12:	Bprint(&bout, "\n\n\n\n");	break;	/* page break */
138 		case 13:	Bprint(&bout, "\n\n");	break;	/* paragraph end */
139 		case 14:				break;	/* column break */
140 		case 19:	Bprint(&bout, "<");		break;	/* field begin */
141 		case 20:	Bprint(&bout, ":");		break;	/* field sep */
142 		case 21:	Bprint(&bout, ">");		break;	/* field end */
143 		case 30:	Bprint(&bout, "-");		break;	/* non-breaking hyphen */
144 		case 31:				break;	/* non-required hyphen */
145 	/*	case 45:	Bprint(&bout, "-");		break;	/* breaking hyphen */
146 		case 160:	Bprint(&bout, " ");		break;	/* non-breaking space */
147 
148 		/*
149 		 *  these are only supposed to get used when special is set, but we
150 		 * never see these ascii values otherwise anyway.
151 		 */
152 
153 		/*
154 		 * Empirically, some documents have sections of text where
155 		 * every character is followed by a zero byte.  Some have sections
156 		 * of text where there are no zero bytes.  Still others have both
157 		 * types and alternate between them.  Until we parse which
158 		 * characters are ``special'', page numbers lose out.
159 		 */
160 		case 0:	/* Bprint(&bout, "<pageno>"); */	break;
161 		case 1:	Bprint(&bout, "<picture>");	break;
162 		case 2:	Bprint(&bout, "<footnote>");	break;
163 		case 3:	Bprint(&bout, "<footnote sep>");	break;
164 		case 4:	Bprint(&bout, "<footnote cont>");	break;
165 		case 5:	Bprint(&bout, "<animation>");	break;
166 		case 6:	Bprint(&bout, "<lineno>");	break;
167 		/* case 7:	Bprint(&bout, "<hand picture>");	break; */
168 		case 8:	Bprint(&bout, "<drawn object>");	break;
169 		case 10:	Bprint(&bout, "<abbrev date>");	break;
170 		/* case 11:	Bprint(&bout, "<hh:mm:ss>");	break; */
171 		/* case 12:	Bprint(&bout, "<section no>");	break; */
172 		/* case 14:	Bprint(&bout, "<Thu>");	break; */
173 		case 15:	Bprint(&bout, "<Thursday>");	break;
174 		case 16:	Bprint(&bout, "<day of month>");	break;
175 
176 		case 22:	Bprint(&bout, "<hour>");	break;
177 		case 23:	Bprint(&bout, "<hour hh>");	break;
178 		case 24:	Bprint(&bout, "<minute>");	break;
179 		case 25:	Bprint(&bout, "<minute mm>");	break;
180 		case 26:	Bprint(&bout, "<seconds>");	break;
181 		case 27:	Bprint(&bout, "<AM/PM>");	break;
182 		case 28:	Bprint(&bout, "<hh:mm:ss>");	break;
183 		case 29:	Bprint(&bout, "<date>");	break;
184 	/* printable ascii begins hereish */
185 	/*
186 		case 30:	Bprint(&bout, "<mm/dd/yy>");	break;
187 		case 33:	Bprint(&bout, "<mm>");	break;
188 		case 34:	Bprint(&bout, "<yyyy>");	break;
189 		case 35:	Bprint(&bout, "<yy>");	break;
190 		case 36:	Bprint(&bout, "<Feb>");	break;
191 		case 37:	Bprint(&bout, "<February>");	break;
192 		case 38:	Bprint(&bout, "<hh:mm>");	break;
193 		case 39:	Bprint(&bout, "<long date>");	break;
194 		case 41:				break; */
195 		}
196 	}
197 	Bprint(&bout, "\n");
198 }
199