xref: /plan9/sys/src/cmd/aux/antiword/wordole.c (revision 25b329d522281a8cdd35da0dcc08c3fc621059a9)
1 /*
2  * wordole.c
3  * Copyright (C) 1998-2004 A.J. van Os; Released under GPL
4  *
5  * Description:
6  * Deal with the OLE internals of a MS Word file
7  */
8 
9 #include <string.h>
10 #include "antiword.h"
11 
12 /* Private type for Property Set Storage entries */
13 typedef struct pps_entry_tag {
14 	ULONG	ulNext;
15 	ULONG	ulPrevious;
16 	ULONG	ulDir;
17 	ULONG	ulSB;
18 	ULONG	ulSize;
19 	int	iLevel;
20 	char	szName[32];
21 	UCHAR	ucType;
22 } pps_entry_type;
23 
24 /* Show that a PPS number or index should not be used */
25 #define PPS_NUMBER_INVALID	0xffffffffUL
26 
27 
28 /* Macro to make sure all such statements will be identical */
29 #define FREE_ALL()		\
30 	do {\
31 		vDestroySmallBlockList();\
32 		aulRootList = xfree(aulRootList);\
33 		aulSbdList = xfree(aulSbdList);\
34 		aulBbdList = xfree(aulBbdList);\
35 		aulSBD = xfree(aulSBD);\
36 		aulBBD = xfree(aulBBD);\
37 	} while(0)
38 
39 
40 /*
41  * ulReadLong - read four bytes from the given file and offset
42  */
43 static ULONG
ulReadLong(FILE * pFile,ULONG ulOffset)44 ulReadLong(FILE *pFile, ULONG ulOffset)
45 {
46 	UCHAR	aucBytes[4];
47 
48 	fail(pFile == NULL);
49 
50 	if (!bReadBytes(aucBytes, 4, ulOffset, pFile)) {
51 		werr(1, "Read long 0x%lx not possible", ulOffset);
52 	}
53 	return ulGetLong(0, aucBytes);
54 } /* end of ulReadLong */
55 
56 /*
57  * vName2String - turn the name into a proper string.
58  */
59 static void
vName2String(char * szName,const UCHAR * aucBytes,size_t tNameSize)60 vName2String(char *szName, const UCHAR *aucBytes, size_t tNameSize)
61 {
62 	char	*pcChar;
63 	size_t	tIndex;
64 
65 	fail(aucBytes == NULL || szName == NULL);
66 
67 	if (tNameSize < 2) {
68 		szName[0] = '\0';
69 		return;
70 	}
71 	for (tIndex = 0, pcChar = szName;
72 	     tIndex < 2 * tNameSize;
73 	     tIndex += 2, pcChar++) {
74 		*pcChar = (char)aucBytes[tIndex];
75 	}
76 	szName[tNameSize - 1] = '\0';
77 } /* end of vName2String */
78 
79 /*
80  * tReadBlockIndices - read the Big/Small Block Depot indices
81  *
82  * Returns the number of indices read
83  */
84 static size_t
tReadBlockIndices(FILE * pFile,ULONG * aulBlockDepot,size_t tMaxRec,ULONG ulOffset)85 tReadBlockIndices(FILE *pFile, ULONG *aulBlockDepot,
86 	size_t tMaxRec, ULONG ulOffset)
87 {
88 	size_t	tDone;
89 	int	iIndex;
90 	UCHAR	aucBytes[BIG_BLOCK_SIZE];
91 
92 	fail(pFile == NULL || aulBlockDepot == NULL);
93 	fail(tMaxRec == 0);
94 
95 	/* Read a big block with BBD or SBD indices */
96 	if (!bReadBytes(aucBytes, BIG_BLOCK_SIZE, ulOffset, pFile)) {
97 		werr(0, "Reading big block from 0x%lx is not possible",
98 			ulOffset);
99 		return 0;
100 	}
101 	/* Split the big block into indices, an index is four bytes */
102 	tDone = min(tMaxRec, (size_t)BIG_BLOCK_SIZE / 4);
103 	for (iIndex = 0; iIndex < (int)tDone; iIndex++) {
104 		aulBlockDepot[iIndex] = ulGetLong(4 * iIndex, aucBytes);
105 		NO_DBG_DEC(aulBlockDepot[iIndex]);
106 	}
107 	return tDone;
108 } /* end of tReadBlockIndices */
109 
110 /*
111  * bGetBBD - get the Big Block Depot indices from the index-blocks
112  */
113 static BOOL
bGetBBD(FILE * pFile,const ULONG * aulDepot,size_t tDepotLen,ULONG * aulBBD,size_t tBBDLen)114 bGetBBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen,
115 	ULONG *aulBBD, size_t tBBDLen)
116 {
117 	ULONG	ulBegin;
118 	size_t	tToGo, tDone;
119 	int	iIndex;
120 
121 	fail(pFile == NULL || aulDepot == NULL || aulBBD == NULL);
122 
123 	DBG_MSG("bGetBBD");
124 
125 	tToGo = tBBDLen;
126 	for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
127 		ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
128 		NO_DBG_HEX(ulBegin);
129 		tDone = tReadBlockIndices(pFile, aulBBD, tToGo, ulBegin);
130 		fail(tDone > tToGo);
131 		if (tDone == 0) {
132 			return FALSE;
133 		}
134 		aulBBD += tDone;
135 		tToGo -= tDone;
136 	}
137 	return tToGo == 0;
138 } /* end of bGetBBD */
139 
140 /*
141  * bGetSBD - get the Small Block Depot indices from the index-blocks
142  */
143 static BOOL
bGetSBD(FILE * pFile,const ULONG * aulDepot,size_t tDepotLen,ULONG * aulSBD,size_t tSBDLen)144 bGetSBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen,
145 	ULONG *aulSBD, size_t tSBDLen)
146 {
147 	ULONG	ulBegin;
148 	size_t	tToGo, tDone;
149 	int	iIndex;
150 
151 	fail(pFile == NULL || aulDepot == NULL || aulSBD == NULL);
152 
153 	DBG_MSG("bGetSBD");
154 
155 	tToGo = tSBDLen;
156 	for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
157 		fail(aulDepot[iIndex] >= ULONG_MAX / BIG_BLOCK_SIZE);
158 		ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
159 		NO_DBG_HEX(ulBegin);
160 		tDone = tReadBlockIndices(pFile, aulSBD, tToGo, ulBegin);
161 		fail(tDone > tToGo);
162 		if (tDone == 0) {
163 			return FALSE;
164 		}
165 		aulSBD += tDone;
166 		tToGo -= tDone;
167 	}
168 	return tToGo == 0;
169 } /* end of bGetSBD */
170 
171 /*
172  * vComputePPSlevels - compute the levels of the Property Set Storage entries
173  */
174 static void
vComputePPSlevels(pps_entry_type * atPPSlist,pps_entry_type * pNode,int iLevel,int iRecursionLevel)175 vComputePPSlevels(pps_entry_type *atPPSlist, pps_entry_type *pNode,
176 			int iLevel, int iRecursionLevel)
177 {
178 	fail(atPPSlist == NULL || pNode == NULL);
179 	fail(iLevel < 0 || iRecursionLevel < 0);
180 
181 	if (iRecursionLevel > 25) {
182 		/* This removes the possibility of an infinite recursion */
183 		DBG_DEC(iRecursionLevel);
184 		return;
185 	}
186 	if (pNode->iLevel <= iLevel) {
187 		/* Avoid entering a loop */
188 		DBG_DEC(iLevel);
189 		DBG_DEC(pNode->iLevel);
190 		return;
191 	}
192 
193 	pNode->iLevel = iLevel;
194 
195 	if (pNode->ulDir != PPS_NUMBER_INVALID) {
196 		vComputePPSlevels(atPPSlist,
197 				&atPPSlist[pNode->ulDir],
198 				iLevel + 1,
199 				iRecursionLevel + 1);
200 	}
201 	if (pNode->ulNext != PPS_NUMBER_INVALID) {
202 		vComputePPSlevels(atPPSlist,
203 				&atPPSlist[pNode->ulNext],
204 				iLevel,
205 				iRecursionLevel + 1);
206 	}
207 	if (pNode->ulPrevious != PPS_NUMBER_INVALID) {
208 		vComputePPSlevels(atPPSlist,
209 				&atPPSlist[pNode->ulPrevious],
210 				iLevel,
211 				iRecursionLevel + 1);
212 	}
213 } /* end of vComputePPSlevels */
214 
215 /*
216  * bGetPPS - search the Property Set Storage for three sets
217  *
218  * Return TRUE if the WordDocument PPS is found
219  */
220 static BOOL
bGetPPS(FILE * pFile,const ULONG * aulRootList,size_t tRootListLen,pps_info_type * pPPS)221 bGetPPS(FILE *pFile,
222 	const ULONG *aulRootList, size_t tRootListLen, pps_info_type *pPPS)
223 {
224 	pps_entry_type	*atPPSlist;
225 	ULONG	ulBegin, ulOffset, ulTmp;
226 	size_t	tNbrOfPPS, tNameSize;
227 	int	iIndex, iStartBlock, iRootIndex;
228 	BOOL	bWord, bExcel;
229 	UCHAR	aucBytes[PROPERTY_SET_STORAGE_SIZE];
230 
231 	fail(pFile == NULL || aulRootList == NULL || pPPS == NULL);
232 
233 	DBG_MSG("bGetPPS");
234 
235 	NO_DBG_DEC(tRootListLen);
236 
237 	bWord = FALSE;
238 	bExcel = FALSE;
239 	(void)memset(pPPS, 0, sizeof(*pPPS));
240 
241 	/* Read and store all the Property Set Storage entries */
242 
243 	tNbrOfPPS = tRootListLen * BIG_BLOCK_SIZE / PROPERTY_SET_STORAGE_SIZE;
244 	atPPSlist = xcalloc(tNbrOfPPS, sizeof(pps_entry_type));
245 	iRootIndex = 0;
246 
247 	for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
248 		ulTmp = (ULONG)iIndex * PROPERTY_SET_STORAGE_SIZE;
249 		iStartBlock = (int)(ulTmp / BIG_BLOCK_SIZE);
250 		ulOffset = ulTmp % BIG_BLOCK_SIZE;
251 		ulBegin = (aulRootList[iStartBlock] + 1) * BIG_BLOCK_SIZE +
252 				ulOffset;
253 		NO_DBG_HEX(ulBegin);
254 		if (!bReadBytes(aucBytes, PROPERTY_SET_STORAGE_SIZE,
255 							ulBegin, pFile)) {
256 			werr(0, "Reading PPS %d is not possible", iIndex);
257 			atPPSlist = xfree(atPPSlist);
258 			return FALSE;
259 		}
260 		tNameSize = (size_t)usGetWord(0x40, aucBytes);
261 		tNameSize = (tNameSize + 1) / 2;
262 		vName2String(atPPSlist[iIndex].szName, aucBytes, tNameSize);
263 		atPPSlist[iIndex].ucType = ucGetByte(0x42, aucBytes);
264 		if (atPPSlist[iIndex].ucType == 5) {
265 			iRootIndex = iIndex;
266 		}
267 		atPPSlist[iIndex].ulPrevious = ulGetLong(0x44, aucBytes);
268 		atPPSlist[iIndex].ulNext = ulGetLong(0x48, aucBytes);
269 		atPPSlist[iIndex].ulDir = ulGetLong(0x4c, aucBytes);
270 		atPPSlist[iIndex].ulSB = ulGetLong(0x74, aucBytes);
271 		atPPSlist[iIndex].ulSize = ulGetLong(0x78, aucBytes);
272 		atPPSlist[iIndex].iLevel = INT_MAX;
273 		if ((atPPSlist[iIndex].ulPrevious >= (ULONG)tNbrOfPPS &&
274 		     atPPSlist[iIndex].ulPrevious != PPS_NUMBER_INVALID) ||
275 		    (atPPSlist[iIndex].ulNext >= (ULONG)tNbrOfPPS &&
276 		     atPPSlist[iIndex].ulNext != PPS_NUMBER_INVALID) ||
277 		    (atPPSlist[iIndex].ulDir >= (ULONG)tNbrOfPPS &&
278 		     atPPSlist[iIndex].ulDir != PPS_NUMBER_INVALID)) {
279 			DBG_DEC(iIndex);
280 			DBG_DEC(atPPSlist[iIndex].ulPrevious);
281 			DBG_DEC(atPPSlist[iIndex].ulNext);
282 			DBG_DEC(atPPSlist[iIndex].ulDir);
283 			DBG_DEC(tNbrOfPPS);
284 			werr(0, "The Property Set Storage is damaged");
285 			atPPSlist = xfree(atPPSlist);
286 			return FALSE;
287 		}
288 	}
289 
290 #if 0 /* defined(DEBUG) */
291 	DBG_MSG("Before");
292 	for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
293 		DBG_MSG(atPPSlist[iIndex].szName);
294 		DBG_HEX(atPPSlist[iIndex].ulDir);
295 		DBG_HEX(atPPSlist[iIndex].ulPrevious);
296 		DBG_HEX(atPPSlist[iIndex].ulNext);
297 		DBG_DEC(atPPSlist[iIndex].ulSB);
298 		DBG_HEX(atPPSlist[iIndex].ulSize);
299 		DBG_DEC(atPPSlist[iIndex].iLevel);
300 	}
301 #endif /* DEBUG */
302 
303 	/* Add level information to each entry */
304 	vComputePPSlevels(atPPSlist, &atPPSlist[iRootIndex], 0, 0);
305 
306 	/* Check the entries on level 1 for the required information */
307 	NO_DBG_MSG("After");
308 	for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
309 #if 0 /* defined(DEBUG) */
310 		DBG_MSG(atPPSlist[iIndex].szName);
311 		DBG_HEX(atPPSlist[iIndex].ulDir);
312 		DBG_HEX(atPPSlist[iIndex].ulPrevious);
313 		DBG_HEX(atPPSlist[iIndex].ulNext);
314 		DBG_DEC(atPPSlist[iIndex].ulSB);
315 		DBG_HEX(atPPSlist[iIndex].ulSize);
316 		DBG_DEC(atPPSlist[iIndex].iLevel);
317 #endif /* DEBUG */
318 		if (atPPSlist[iIndex].iLevel != 1 ||
319 		    atPPSlist[iIndex].ucType != 2 ||
320 		    atPPSlist[iIndex].szName[0] == '\0' ||
321 		    atPPSlist[iIndex].ulSize == 0) {
322 			/* This entry can be ignored */
323 			continue;
324 		}
325 		if (pPPS->tWordDocument.ulSize == 0 &&
326 		    STREQ(atPPSlist[iIndex].szName, "WordDocument")) {
327 			pPPS->tWordDocument.ulSB = atPPSlist[iIndex].ulSB;
328 			pPPS->tWordDocument.ulSize = atPPSlist[iIndex].ulSize;
329 			bWord = TRUE;
330 		} else if (pPPS->tData.ulSize == 0 &&
331 			   STREQ(atPPSlist[iIndex].szName, "Data")) {
332 			pPPS->tData.ulSB = atPPSlist[iIndex].ulSB;
333 			pPPS->tData.ulSize = atPPSlist[iIndex].ulSize;
334 		} else if (pPPS->t0Table.ulSize == 0 &&
335 			   STREQ(atPPSlist[iIndex].szName, "0Table")) {
336 			pPPS->t0Table.ulSB = atPPSlist[iIndex].ulSB;
337 			pPPS->t0Table.ulSize = atPPSlist[iIndex].ulSize;
338 		} else if (pPPS->t1Table.ulSize == 0 &&
339 			   STREQ(atPPSlist[iIndex].szName, "1Table")) {
340 			pPPS->t1Table.ulSB = atPPSlist[iIndex].ulSB;
341 			pPPS->t1Table.ulSize = atPPSlist[iIndex].ulSize;
342 		} else if (pPPS->tSummaryInfo.ulSize == 0 &&
343 			   STREQ(atPPSlist[iIndex].szName,
344 						"\005SummaryInformation")) {
345 			pPPS->tSummaryInfo.ulSB = atPPSlist[iIndex].ulSB;
346 			pPPS->tSummaryInfo.ulSize = atPPSlist[iIndex].ulSize;
347 		} else if (pPPS->tDocSummaryInfo.ulSize == 0 &&
348 			   STREQ(atPPSlist[iIndex].szName,
349 					"\005DocumentSummaryInformation")) {
350 			pPPS->tDocSummaryInfo.ulSB = atPPSlist[iIndex].ulSB;
351 			pPPS->tDocSummaryInfo.ulSize = atPPSlist[iIndex].ulSize;
352 		} else if (STREQ(atPPSlist[iIndex].szName, "Book") ||
353 			   STREQ(atPPSlist[iIndex].szName, "Workbook")) {
354 			bExcel = TRUE;
355 		}
356 	}
357 
358 	/* Free the space for the Property Set Storage entries */
359 	atPPSlist = xfree(atPPSlist);
360 
361 	/* Draw your conclusions */
362 	if (bWord) {
363 		return TRUE;
364 	}
365 
366 	if (bExcel) {
367 		werr(0, "Sorry, but this is an Excel spreadsheet");
368 	} else {
369 		werr(0, "This OLE file does not contain a Word document");
370 	}
371 	return FALSE;
372 } /* end of bGetPPS */
373 
374 /*
375  * vGetBbdList - make a list of the places to find big blocks
376  */
377 static void
vGetBbdList(FILE * pFile,int iNbr,ULONG * aulBbdList,ULONG ulOffset)378 vGetBbdList(FILE *pFile, int iNbr, ULONG *aulBbdList, ULONG ulOffset)
379 {
380 	int	iIndex;
381 
382 	fail(pFile == NULL);
383 	fail(iNbr > 127);
384 	fail(aulBbdList == NULL);
385 
386 	NO_DBG_DEC(iNbr);
387 	for (iIndex = 0; iIndex < iNbr; iIndex++) {
388                 aulBbdList[iIndex] =
389                         ulReadLong(pFile, ulOffset + 4 * (ULONG)iIndex);
390 		NO_DBG_DEC(iIndex);
391                 NO_DBG_HEX(aulBbdList[iIndex]);
392         }
393 } /* end of vGetBbdList */
394 
395 /*
396  * bGetDocumentText - make a list of the text blocks of a Word document
397  *
398  * Return TRUE when succesful, otherwise FALSE
399  */
400 static BOOL
bGetDocumentText(FILE * pFile,const pps_info_type * pPPS,const ULONG * aulBBD,size_t tBBDLen,const ULONG * aulSBD,size_t tSBDLen,const UCHAR * aucHeader,int iWordVersion)401 bGetDocumentText(FILE *pFile, const pps_info_type *pPPS,
402 	const ULONG *aulBBD, size_t tBBDLen,
403 	const ULONG *aulSBD, size_t tSBDLen,
404 	const UCHAR *aucHeader, int iWordVersion)
405 {
406 	ULONG	ulBeginOfText;
407 	ULONG	ulTextLen, ulFootnoteLen, ulEndnoteLen;
408 	ULONG	ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
409 	ULONG	ulTextBoxLen, ulHdrTextBoxLen;
410 	UINT	uiQuickSaves;
411 	BOOL	bFarEastWord, bTemplate, bFastSaved, bEncrypted, bSuccess;
412 	USHORT	usIdent, usDocStatus;
413 
414 	fail(pFile == NULL || pPPS == NULL);
415 	fail(aulBBD == NULL);
416 	fail(aulSBD == NULL);
417 
418 	DBG_MSG("bGetDocumentText");
419 
420 	/* Get the "magic number" from the header */
421 	usIdent = usGetWord(0x00, aucHeader);
422 	DBG_HEX(usIdent);
423 	bFarEastWord = usIdent == 0x8098 || usIdent == 0x8099 ||
424 			usIdent == 0xa697 || usIdent == 0xa699;
425 	/* Get the status flags from the header */
426 	usDocStatus = usGetWord(0x0a, aucHeader);
427 	DBG_HEX(usDocStatus);
428 	bTemplate = (usDocStatus & BIT(0)) != 0;
429 	DBG_MSG_C(bTemplate, "This document is a Template");
430 	bFastSaved = (usDocStatus & BIT(2)) != 0;
431 	uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
432 	DBG_MSG_C(bFastSaved, "This document is Fast Saved");
433 	DBG_DEC_C(bFastSaved, uiQuickSaves);
434 	bEncrypted = (usDocStatus & BIT(8)) != 0;
435 	if (bEncrypted) {
436 		werr(0, "Encrypted documents are not supported");
437 		return FALSE;
438 	}
439 
440 	/* Get length information */
441 	ulBeginOfText = ulGetLong(0x18, aucHeader);
442 	DBG_HEX(ulBeginOfText);
443 	switch (iWordVersion) {
444 	case 6:
445 	case 7:
446 		ulTextLen = ulGetLong(0x34, aucHeader);
447 		ulFootnoteLen = ulGetLong(0x38, aucHeader);
448 		ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
449 		ulMacroLen = ulGetLong(0x40, aucHeader);
450 		ulAnnotationLen = ulGetLong(0x44, aucHeader);
451 		ulEndnoteLen = ulGetLong(0x48, aucHeader);
452 		ulTextBoxLen = ulGetLong(0x4c, aucHeader);
453 		ulHdrTextBoxLen = ulGetLong(0x50, aucHeader);
454 		break;
455 	case 8:
456 		ulTextLen = ulGetLong(0x4c, aucHeader);
457 		ulFootnoteLen = ulGetLong(0x50, aucHeader);
458 		ulHdrFtrLen = ulGetLong(0x54, aucHeader);
459 		ulMacroLen = ulGetLong(0x58, aucHeader);
460 		ulAnnotationLen = ulGetLong(0x5c, aucHeader);
461 		ulEndnoteLen = ulGetLong(0x60, aucHeader);
462 		ulTextBoxLen = ulGetLong(0x64, aucHeader);
463 		ulHdrTextBoxLen = ulGetLong(0x68, aucHeader);
464 		break;
465 	default:
466 		werr(0, "This version of Word is not supported");
467 		return FALSE;
468 	}
469 	DBG_DEC(ulTextLen);
470 	DBG_DEC(ulFootnoteLen);
471 	DBG_DEC(ulHdrFtrLen);
472 	DBG_DEC(ulMacroLen);
473 	DBG_DEC(ulAnnotationLen);
474 	DBG_DEC(ulEndnoteLen);
475 	DBG_DEC(ulTextBoxLen);
476 	DBG_DEC(ulHdrTextBoxLen);
477 
478 	/* Make a list of the text blocks */
479 	switch (iWordVersion) {
480 	case 6:
481 	case 7:
482 		if (bFastSaved) {
483 			bSuccess = bGet6DocumentText(pFile,
484 					bFarEastWord,
485 					pPPS->tWordDocument.ulSB,
486 					aulBBD, tBBDLen,
487 					aucHeader);
488 		} else {
489 		  	bSuccess = bAddTextBlocks(ulBeginOfText,
490 				ulTextLen +
491 				ulFootnoteLen +
492 				ulHdrFtrLen +
493 				ulMacroLen + ulAnnotationLen +
494 				ulEndnoteLen +
495 				ulTextBoxLen + ulHdrTextBoxLen,
496 				bFarEastWord,
497 				IGNORE_PROPMOD,
498 				pPPS->tWordDocument.ulSB,
499 				aulBBD, tBBDLen);
500 		}
501 		break;
502 	case 8:
503 		bSuccess = bGet8DocumentText(pFile,
504 				pPPS,
505 				aulBBD, tBBDLen, aulSBD, tSBDLen,
506 				aucHeader);
507 		break;
508 	default:
509 		werr(0, "This version of Word is not supported");
510 		bSuccess = FALSE;
511 		break;
512 	}
513 
514 	if (bSuccess) {
515 		vSplitBlockList(pFile,
516 				ulTextLen,
517 				ulFootnoteLen,
518 				ulHdrFtrLen,
519 				ulMacroLen,
520 				ulAnnotationLen,
521 				ulEndnoteLen,
522 				ulTextBoxLen,
523 				ulHdrTextBoxLen,
524 				!bFastSaved && iWordVersion == 8);
525 	} else {
526 		vDestroyTextBlockList();
527 		werr(0, "I can't find the text of this document");
528 	}
529 	return bSuccess;
530 } /* end of bGetDocumentText */
531 
532 /*
533  * vGetDocumentData - make a list of the data blocks of a Word document
534  */
535 static void
vGetDocumentData(FILE * pFile,const pps_info_type * pPPS,const ULONG * aulBBD,size_t tBBDLen,const UCHAR * aucHeader,int iWordVersion)536 vGetDocumentData(FILE *pFile, const pps_info_type *pPPS,
537 	const ULONG *aulBBD, size_t tBBDLen,
538 	const UCHAR *aucHeader, int iWordVersion)
539 {
540 	options_type	tOptions;
541 	ULONG	ulBeginOfText;
542 	BOOL	bFastSaved, bHasImages, bSuccess;
543 	USHORT	usDocStatus;
544 
545 	fail(pFile == NULL);
546 	fail(pPPS == NULL);
547 	fail(aulBBD == NULL);
548 
549 	/* Get the options */
550 	vGetOptions(&tOptions);
551 
552 	/* Get the status flags from the header */
553 	usDocStatus = usGetWord(0x0a, aucHeader);
554 	DBG_HEX(usDocStatus);
555 	bFastSaved = (usDocStatus & BIT(2)) != 0;
556 	bHasImages = (usDocStatus & BIT(3)) != 0;
557 
558 	if (!bHasImages ||
559 	    tOptions.eConversionType == conversion_text ||
560 	    tOptions.eConversionType == conversion_fmt_text ||
561 	    tOptions.eConversionType == conversion_xml ||
562 	    tOptions.eImageLevel == level_no_images) {
563 		/*
564 		 * No images in the document or text-only output or
565 		 * no images wanted, so no data blocks will be needed
566 		 */
567 		vDestroyDataBlockList();
568 		return;
569 	}
570 
571 	/* Get length information */
572 	ulBeginOfText = ulGetLong(0x18, aucHeader);
573 	DBG_HEX(ulBeginOfText);
574 
575 	/* Make a list of the data blocks */
576 	switch (iWordVersion) {
577 	case 6:
578 	case 7:
579 		/*
580 		 * The data blocks are in the text stream. The text stream
581 		 * is in "fast saved" format or "normal saved" format
582 		 */
583 		if (bFastSaved) {
584 			bSuccess = bGet6DocumentData(pFile,
585 					pPPS->tWordDocument.ulSB,
586 					aulBBD, tBBDLen,
587 					aucHeader);
588 		} else {
589 		  	bSuccess = bAddDataBlocks(ulBeginOfText,
590 					(ULONG)LONG_MAX,
591 					pPPS->tWordDocument.ulSB,
592 					aulBBD, tBBDLen);
593 		}
594 		break;
595 	case 8:
596 		/*
597 		 * The data blocks are in the data stream. The data stream
598 		 * is always in "normal saved" format
599 		 */
600 		bSuccess = bAddDataBlocks(0, (ULONG)LONG_MAX,
601 				pPPS->tData.ulSB, aulBBD, tBBDLen);
602 		break;
603 	default:
604 		werr(0, "This version of Word is not supported");
605 		bSuccess = FALSE;
606 		break;
607 	}
608 
609 	if (!bSuccess) {
610 		vDestroyDataBlockList();
611 		werr(0, "I can't find the data of this document");
612 	}
613 } /* end of vGetDocumentData */
614 
615 /*
616  * iInitDocumentOLE - initialize an OLE document
617  *
618  * Returns the version of Word that made the document or -1
619  */
620 int
iInitDocumentOLE(FILE * pFile,long lFilesize)621 iInitDocumentOLE(FILE *pFile, long lFilesize)
622 {
623 	pps_info_type	PPS_info;
624 	ULONG	*aulBBD, *aulSBD;
625 	ULONG	*aulRootList, *aulBbdList, *aulSbdList;
626 	ULONG	ulBdbListStart, ulAdditionalBBDlist;
627 	ULONG	ulRootStartblock, ulSbdStartblock, ulSBLstartblock;
628 	ULONG	ulStart, ulTmp;
629 	long	lMaxBlock;
630 	size_t	tBBDLen, tSBDLen, tNumBbdBlocks, tRootListLen;
631 	int	iWordVersion, iIndex, iToGo;
632 	BOOL	bSuccess;
633 	USHORT	usIdent, usDocStatus;
634 	UCHAR	aucHeader[HEADER_SIZE];
635 
636 	fail(pFile == NULL);
637 
638 	lMaxBlock = lFilesize / BIG_BLOCK_SIZE - 2;
639 	DBG_DEC(lMaxBlock);
640 	if (lMaxBlock < 1) {
641 		return -1;
642 	}
643 	tBBDLen = (size_t)(lMaxBlock + 1);
644 	tNumBbdBlocks = (size_t)ulReadLong(pFile, 0x2c);
645 	DBG_DEC(tNumBbdBlocks);
646 	ulRootStartblock = ulReadLong(pFile, 0x30);
647 	DBG_DEC(ulRootStartblock);
648 	ulSbdStartblock = ulReadLong(pFile, 0x3c);
649 	DBG_DEC(ulSbdStartblock);
650 	ulAdditionalBBDlist = ulReadLong(pFile, 0x44);
651 	DBG_HEX(ulAdditionalBBDlist);
652 	ulSBLstartblock = ulReadLong(pFile,
653 			(ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x74);
654 	DBG_DEC(ulSBLstartblock);
655 	tSBDLen = (size_t)(ulReadLong(pFile,
656 			(ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x78) /
657 			SMALL_BLOCK_SIZE);
658 	/* All to be xcalloc-ed pointers to NULL */
659 	aulRootList = NULL;
660 	aulSbdList = NULL;
661 	aulBbdList = NULL;
662 	aulSBD = NULL;
663 	aulBBD = NULL;
664 /* Big Block Depot */
665 	aulBbdList = xcalloc(tNumBbdBlocks, sizeof(ULONG));
666 	aulBBD = xcalloc(tBBDLen, sizeof(ULONG));
667 	iToGo = (int)tNumBbdBlocks;
668 	vGetBbdList(pFile, min(iToGo, 109),  aulBbdList, 0x4c);
669 	ulStart = 109;
670 	iToGo -= 109;
671 	while (ulAdditionalBBDlist != END_OF_CHAIN && iToGo > 0) {
672 		ulBdbListStart = (ulAdditionalBBDlist + 1) * BIG_BLOCK_SIZE;
673 		vGetBbdList(pFile, min(iToGo, 127),
674 					aulBbdList + ulStart, ulBdbListStart);
675 		ulAdditionalBBDlist = ulReadLong(pFile,
676 					ulBdbListStart + 4 * 127);
677 		DBG_DEC(ulAdditionalBBDlist);
678 		DBG_HEX(ulAdditionalBBDlist);
679 		ulStart += 127;
680 		iToGo -= 127;
681 	}
682 	if (!bGetBBD(pFile, aulBbdList, tNumBbdBlocks, aulBBD, tBBDLen)) {
683 		FREE_ALL();
684 		return -1;
685 	}
686 	aulBbdList = xfree(aulBbdList);
687 /* Small Block Depot */
688 	aulSbdList = xcalloc(tBBDLen, sizeof(ULONG));
689 	aulSBD = xcalloc(tSBDLen, sizeof(ULONG));
690 	for (iIndex = 0, ulTmp = ulSbdStartblock;
691 	     iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
692 	     iIndex++, ulTmp = aulBBD[ulTmp]) {
693 		if (ulTmp >= (ULONG)tBBDLen) {
694 			DBG_DEC(ulTmp);
695 			DBG_DEC(tBBDLen);
696 			werr(1, "The Big Block Depot is damaged");
697 		}
698 		aulSbdList[iIndex] = ulTmp;
699 		NO_DBG_HEX(aulSbdList[iIndex]);
700 	}
701 	if (!bGetSBD(pFile, aulSbdList, tBBDLen, aulSBD, tSBDLen)) {
702 		FREE_ALL();
703 		return -1;
704 	}
705 	aulSbdList = xfree(aulSbdList);
706 /* Root list */
707 	for (tRootListLen = 0, ulTmp = ulRootStartblock;
708 	     tRootListLen < tBBDLen && ulTmp != END_OF_CHAIN;
709 	     tRootListLen++, ulTmp = aulBBD[ulTmp]) {
710 		if (ulTmp >= (ULONG)tBBDLen) {
711 			DBG_DEC(ulTmp);
712 			DBG_DEC(tBBDLen);
713 			werr(1, "The Big Block Depot is damaged");
714 		}
715 	}
716 	if (tRootListLen == 0) {
717 		werr(0, "No Rootlist found");
718 		FREE_ALL();
719 		return -1;
720 	}
721 	aulRootList = xcalloc(tRootListLen, sizeof(ULONG));
722 	for (iIndex = 0, ulTmp = ulRootStartblock;
723 	     iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
724 	     iIndex++, ulTmp = aulBBD[ulTmp]) {
725 		if (ulTmp >= (ULONG)tBBDLen) {
726 			DBG_DEC(ulTmp);
727 			DBG_DEC(tBBDLen);
728 			werr(1, "The Big Block Depot is damaged");
729 		}
730 		aulRootList[iIndex] = ulTmp;
731 		NO_DBG_DEC(aulRootList[iIndex]);
732 	}
733 	fail(tRootListLen != (size_t)iIndex);
734 	bSuccess = bGetPPS(pFile, aulRootList, tRootListLen, &PPS_info);
735 	aulRootList = xfree(aulRootList);
736 	if (!bSuccess) {
737 		FREE_ALL();
738 		return -1;
739 	}
740 /* Small block list */
741 	if (!bCreateSmallBlockList(ulSBLstartblock, aulBBD, tBBDLen)) {
742 		FREE_ALL();
743 		return -1;
744 	}
745 
746 	if (PPS_info.tWordDocument.ulSize < MIN_SIZE_FOR_BBD_USE) {
747 		DBG_DEC(PPS_info.tWordDocument.ulSize);
748 		FREE_ALL();
749 		werr(0, "I'm afraid the text stream of this file "
750 			"is too small to handle.");
751 		return -1;
752 	}
753 	/* Read the headerblock */
754 	if (!bReadBuffer(pFile, PPS_info.tWordDocument.ulSB,
755 			aulBBD, tBBDLen, BIG_BLOCK_SIZE,
756 			aucHeader, 0, HEADER_SIZE)) {
757 		FREE_ALL();
758 		return -1;
759 	}
760 	usIdent = usGetWord(0x00, aucHeader);
761 	DBG_HEX(usIdent);
762 	fail(usIdent != 0x8098 &&	/* Word 7 for oriental languages */
763 	     usIdent != 0x8099 &&	/* Word 7 for oriental languages */
764 	     usIdent != 0xa5dc &&	/* Word 6 & 7 */
765 	     usIdent != 0xa5ec &&	/* Word 7 & 97 & 98 */
766 	     usIdent != 0xa697 &&	/* Word 7 for oriental languages */
767 	     usIdent != 0xa699);	/* Word 7 for oriental languages */
768 	iWordVersion = iGetVersionNumber(aucHeader);
769 	if (iWordVersion < 6) {
770 		FREE_ALL();
771 		werr(0, "This file is from a version of Word before Word 6.");
772 		return -1;
773 	}
774 
775 	/* Get the status flags from the header */
776 	usDocStatus = usGetWord(0x0a, aucHeader);
777         if (usDocStatus & BIT(9)) {
778 		PPS_info.tTable = PPS_info.t1Table;
779 	} else {
780 		PPS_info.tTable = PPS_info.t0Table;
781 	}
782 	/* Clean the entries that should not be used */
783 	memset(&PPS_info.t0Table, 0, sizeof(PPS_info.t0Table));
784 	memset(&PPS_info.t1Table, 0, sizeof(PPS_info.t1Table));
785 
786 	bSuccess = bGetDocumentText(pFile, &PPS_info,
787 			aulBBD, tBBDLen, aulSBD, tSBDLen,
788 			aucHeader, iWordVersion);
789 	if (bSuccess) {
790 		vGetDocumentData(pFile, &PPS_info,
791 			aulBBD, tBBDLen, aucHeader, iWordVersion);
792 		vGetPropertyInfo(pFile, &PPS_info,
793 			aulBBD, tBBDLen, aulSBD, tSBDLen,
794 			aucHeader, iWordVersion);
795 		vSetDefaultTabWidth(pFile, &PPS_info,
796 			aulBBD, tBBDLen, aulSBD, tSBDLen,
797 			aucHeader, iWordVersion);
798 		vGetNotesInfo(pFile, &PPS_info,
799 			aulBBD, tBBDLen, aulSBD, tSBDLen,
800 			aucHeader, iWordVersion);
801 	}
802 	FREE_ALL();
803 	return bSuccess ? iWordVersion : -1;
804 } /* end of iInitDocumentOLE */
805