xref: /plan9/sys/src/cmd/aux/antiword/blocklist.c (revision 25b329d522281a8cdd35da0dcc08c3fc621059a9)
1 /*
2  * blocklist.c
3  * Copyright (C) 1998-2005 A.J. van Os; Released under GNU GPL
4  *
5  * Description:
6  * Build, read and destroy the lists of Word "text" blocks
7  */
8 
9 #include <stdlib.h>
10 #include "antiword.h"
11 
12 
13 /*
14  * Private structure to hide the way the information
15  * is stored from the rest of the program
16  */
17 typedef struct list_mem_tag {
18 	text_block_type		tInfo;
19 	struct list_mem_tag	*pNext;
20 } list_mem_type;
21 
22 typedef struct readinfo_tag {
23 	list_mem_type		*pBlockCurrent;
24 	ULONG			ulBlockOffset;
25 	size_t			tByteNext;
26 	UCHAR			aucBlock[BIG_BLOCK_SIZE];
27 } readinfo_type;
28 
29 /* Variables to describe the start of the block lists */
30 static list_mem_type	*pTextAnchor = NULL;
31 static list_mem_type	*pFootnoteAnchor = NULL;
32 static list_mem_type	*pHdrFtrAnchor = NULL;
33 static list_mem_type	*pMacroAnchor = NULL;
34 static list_mem_type	*pAnnotationAnchor = NULL;
35 static list_mem_type	*pEndnoteAnchor = NULL;
36 static list_mem_type	*pTextBoxAnchor = NULL;
37 static list_mem_type	*pHdrTextBoxAnchor = NULL;
38 /* Variable needed to build the block list */
39 static list_mem_type	*pBlockLast = NULL;
40 /* Variable needed to read the block lists */
41 static readinfo_type	tOthers = { NULL, 0, 0, };
42 static readinfo_type	tHdrFtr = { NULL, 0, 0, };
43 static readinfo_type	tFootnote = { NULL, 0, 0, };
44 
45 
46 /*
47  * pFreeOneList - free a text block list
48  *
49  * Will always return NULL
50  */
51 static list_mem_type *
pFreeOneList(list_mem_type * pAnchor)52 pFreeOneList(list_mem_type *pAnchor)
53 {
54 	list_mem_type	*pCurr, *pNext;
55 
56 	pCurr = pAnchor;
57 	while (pCurr != NULL) {
58 		pNext = pCurr->pNext;
59 		pCurr = xfree(pCurr);
60 		pCurr = pNext;
61 	}
62 	return NULL;
63 } /* end of pFreeOneList */
64 
65 /*
66  * vDestroyTextBlockList - destroy the text block lists
67  */
68 void
vDestroyTextBlockList(void)69 vDestroyTextBlockList(void)
70 {
71 	DBG_MSG("vDestroyTextBlockList");
72 
73 	/* Free the lists one by one */
74 	pTextAnchor = pFreeOneList(pTextAnchor);
75 	pFootnoteAnchor = pFreeOneList(pFootnoteAnchor);
76 	pHdrFtrAnchor = pFreeOneList(pHdrFtrAnchor);
77 	pMacroAnchor = pFreeOneList(pMacroAnchor);
78 	pAnnotationAnchor = pFreeOneList(pAnnotationAnchor);
79 	pEndnoteAnchor = pFreeOneList(pEndnoteAnchor);
80 	pTextBoxAnchor = pFreeOneList(pTextBoxAnchor);
81 	pHdrTextBoxAnchor = pFreeOneList(pHdrTextBoxAnchor);
82 	/* Reset all the controle variables */
83 	pBlockLast = NULL;
84 	tOthers.pBlockCurrent = NULL;
85 	tHdrFtr.pBlockCurrent = NULL;
86 	tFootnote.pBlockCurrent = NULL;
87 } /* end of vDestroyTextBlockList */
88 
89 /*
90  * bAdd2TextBlockList - add an element to the text block list
91  *
92  * returns: TRUE when successful, otherwise FALSE
93  */
94 BOOL
bAdd2TextBlockList(const text_block_type * pTextBlock)95 bAdd2TextBlockList(const text_block_type *pTextBlock)
96 {
97 	list_mem_type	*pListMember;
98 
99 	fail(pTextBlock == NULL);
100 	fail(pTextBlock->ulFileOffset == FC_INVALID);
101 	fail(pTextBlock->ulCharPos == CP_INVALID);
102 	fail(pTextBlock->ulLength == 0);
103 	fail(pTextBlock->bUsesUnicode && odd(pTextBlock->ulLength));
104 
105 	NO_DBG_MSG("bAdd2TextBlockList");
106 	NO_DBG_HEX(pTextBlock->ulFileOffset);
107 	NO_DBG_HEX(pTextBlock->ulCharPos);
108 	NO_DBG_HEX(pTextBlock->ulLength);
109 	NO_DBG_DEC(pTextBlock->bUsesUnicode);
110 	NO_DBG_DEC(pTextBlock->usPropMod);
111 
112 	if (pTextBlock->ulFileOffset == FC_INVALID ||
113 	    pTextBlock->ulCharPos == CP_INVALID ||
114 	    pTextBlock->ulLength == 0 ||
115 	    (pTextBlock->bUsesUnicode && odd(pTextBlock->ulLength))) {
116 		werr(0, "Software (textblock) error");
117 		return FALSE;
118 	}
119 	/*
120 	 * Check for continuous blocks of the same character size and
121 	 * the same properties modifier
122 	 */
123 	if (pBlockLast != NULL &&
124 	    pBlockLast->tInfo.ulFileOffset +
125 	     pBlockLast->tInfo.ulLength == pTextBlock->ulFileOffset &&
126 	    pBlockLast->tInfo.ulCharPos +
127 	     pBlockLast->tInfo.ulLength == pTextBlock->ulCharPos &&
128 	    pBlockLast->tInfo.bUsesUnicode == pTextBlock->bUsesUnicode &&
129 	    pBlockLast->tInfo.usPropMod == pTextBlock->usPropMod) {
130 		/* These are continous blocks */
131 		pBlockLast->tInfo.ulLength += pTextBlock->ulLength;
132 		return TRUE;
133 	}
134 	/* Make a new block */
135 	pListMember = xmalloc(sizeof(list_mem_type));
136 	/* Add the block to the list */
137 	pListMember->tInfo = *pTextBlock;
138 	pListMember->pNext = NULL;
139 	if (pTextAnchor == NULL) {
140 		pTextAnchor = pListMember;
141 	} else {
142 		fail(pBlockLast == NULL);
143 		pBlockLast->pNext = pListMember;
144 	}
145 	pBlockLast = pListMember;
146 	return TRUE;
147 } /* end of bAdd2TextBlockList */
148 
149 /*
150  * vSpitList - Split the list in two
151  */
152 static void
vSpitList(list_mem_type ** ppAnchorCurr,list_mem_type ** ppAnchorNext,ULONG ulListLen)153 vSpitList(list_mem_type **ppAnchorCurr, list_mem_type **ppAnchorNext,
154 	ULONG ulListLen)
155 {
156 	list_mem_type	*pCurr;
157 	long		lCharsToGo, lBytesTooFar;
158 
159 	fail(ppAnchorCurr == NULL);
160 	fail(ppAnchorNext == NULL);
161 	fail(ulListLen > (ULONG)LONG_MAX);
162 
163 	pCurr = NULL;
164 	lCharsToGo = (long)ulListLen;
165 	lBytesTooFar = -1;
166 	if (ulListLen != 0) {
167 		DBG_DEC(ulListLen);
168 		for (pCurr = *ppAnchorCurr;
169 		     pCurr != NULL;
170 		     pCurr = pCurr->pNext) {
171 			NO_DBG_DEC(pCurr->tInfo.ulLength);
172 			fail(pCurr->tInfo.ulLength == 0);
173 			fail(pCurr->tInfo.ulLength > (ULONG)LONG_MAX);
174 			if (pCurr->tInfo.bUsesUnicode) {
175 				fail(odd(pCurr->tInfo.ulLength));
176 				lCharsToGo -= (long)(pCurr->tInfo.ulLength / 2);
177 				if (lCharsToGo < 0) {
178 					lBytesTooFar = -2 * lCharsToGo;
179 				}
180 			} else {
181 				lCharsToGo -= (long)pCurr->tInfo.ulLength;
182 				if (lCharsToGo < 0) {
183 					lBytesTooFar = -lCharsToGo;
184 				}
185 			}
186 			if (lCharsToGo <= 0) {
187 				break;
188 			}
189 		}
190 	}
191 /* Split the list */
192 	if (ulListLen == 0) {
193 		/* Current blocklist is empty */
194 		*ppAnchorNext = *ppAnchorCurr;
195 		*ppAnchorCurr = NULL;
196 	} else if (pCurr == NULL) {
197 		/* No blocks for the next list */
198 		*ppAnchorNext = NULL;
199 	} else if (lCharsToGo == 0) {
200 		/* Move the integral number of blocks to the next list */
201 		*ppAnchorNext = pCurr->pNext;
202 		pCurr->pNext = NULL;
203 	} else {
204 		/* Split the part current block list, part next block list */
205 		DBG_DEC(lBytesTooFar);
206 		fail(lBytesTooFar <= 0);
207 		*ppAnchorNext = xmalloc(sizeof(list_mem_type));
208 		DBG_HEX(pCurr->tInfo.ulFileOffset);
209 		(*ppAnchorNext)->tInfo.ulFileOffset =
210 				pCurr->tInfo.ulFileOffset +
211 				pCurr->tInfo.ulLength -
212 				lBytesTooFar;
213 		DBG_HEX((*ppAnchorNext)->tInfo.ulFileOffset);
214 		DBG_HEX(pCurr->tInfo.ulCharPos);
215 		(*ppAnchorNext)->tInfo.ulCharPos =
216 				pCurr->tInfo.ulCharPos +
217 				pCurr->tInfo.ulLength -
218 				lBytesTooFar;
219 		DBG_HEX((*ppAnchorNext)->tInfo.ulCharPos);
220 		(*ppAnchorNext)->tInfo.ulLength = (ULONG)lBytesTooFar;
221 		pCurr->tInfo.ulLength -= (ULONG)lBytesTooFar;
222 		(*ppAnchorNext)->tInfo.bUsesUnicode = pCurr->tInfo.bUsesUnicode;
223 		(*ppAnchorNext)->tInfo.usPropMod = pCurr->tInfo.usPropMod;
224 		/* Move the integral number of blocks to the next list */
225 		(*ppAnchorNext)->pNext = pCurr->pNext;
226 		pCurr->pNext = NULL;
227 	}
228 } /* end of vSpitList */
229 
230 #if defined(DEBUG) || defined(__riscos)
231 /*
232  * ulComputeListLength - compute the length of a list
233  *
234  * returns the list length in characters
235  */
236 static ULONG
ulComputeListLength(const list_mem_type * pAnchor)237 ulComputeListLength(const list_mem_type *pAnchor)
238 {
239 	const list_mem_type	*pCurr;
240 	ULONG		ulTotal;
241 
242 	ulTotal = 0;
243 	for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
244 		fail(pCurr->tInfo.ulLength == 0);
245 		if (pCurr->tInfo.bUsesUnicode) {
246 			fail(odd(pCurr->tInfo.ulLength));
247 			ulTotal += pCurr->tInfo.ulLength / 2;
248 		} else {
249 			ulTotal += pCurr->tInfo.ulLength;
250 		}
251 	}
252 	return ulTotal;
253 } /* end of ulComputeListLength */
254 #endif /* DEBUG || __riscos */
255 
256 #if defined(DEBUG)
257 /*
258  * vCheckList - check the number of bytes in a block list
259  */
260 static void
vCheckList(const list_mem_type * pAnchor,ULONG ulListLen,char * szMsg)261 vCheckList(const list_mem_type *pAnchor, ULONG ulListLen, char *szMsg)
262 {
263 	ULONG		ulTotal;
264 
265 	ulTotal = ulComputeListLength(pAnchor);
266 	DBG_DEC(ulTotal);
267 	if (ulTotal != ulListLen) {
268 		DBG_DEC(ulListLen);
269 		werr(1, szMsg);
270 	}
271 } /* end of vCheckList */
272 #endif /* DEBUG */
273 
274 /*
275  * bIsEmptyBox - check to see if the given text box is empty
276  */
277 static BOOL
bIsEmptyBox(FILE * pFile,const list_mem_type * pAnchor)278 bIsEmptyBox(FILE *pFile, const list_mem_type *pAnchor)
279 {
280 	const list_mem_type	*pCurr;
281 	size_t	tIndex, tSize;
282 	UCHAR	*aucBuffer;
283 	char	cChar;
284 
285 	fail(pFile == NULL);
286 
287 	if (pAnchor == NULL) {
288 		return TRUE;
289 	}
290 
291 	aucBuffer = NULL;
292 	for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
293 		fail(pCurr->tInfo.ulLength == 0);
294 		tSize = (size_t)pCurr->tInfo.ulLength;
295 #if defined(__dos) && !defined(__DJGPP__)
296 		if (pCurr->tInfo.ulLength > 0xffffUL) {
297 			tSize = 0xffff;
298 		}
299 #endif /* __dos && !__DJGPP__ */
300 		fail(aucBuffer != NULL);
301 		aucBuffer = xmalloc(tSize);
302 		if (!bReadBytes(aucBuffer, tSize,
303 				pCurr->tInfo.ulFileOffset, pFile)) {
304 			aucBuffer = xfree(aucBuffer);
305 			return FALSE;
306 		}
307 		for (tIndex = 0; tIndex < tSize; tIndex++) {
308 			cChar = (char)aucBuffer[tIndex];
309 			switch (cChar) {
310 			case '\0': case '\r': case '\n':
311 			case '\f': case '\t': case '\v':
312 			case ' ':
313 				break;
314 			default:
315 				aucBuffer = xfree(aucBuffer);
316 				return FALSE;
317 			}
318 		}
319 		aucBuffer = xfree(aucBuffer);
320 	}
321 	fail(aucBuffer != NULL);
322 	return TRUE;
323 } /* end of bIsEmptyBox */
324 
325 /*
326  * vSplitBlockList - split the block list in the various parts
327  *
328  * Split the blocklist in a Text block list, a Footnote block list, a
329  * HeaderFooter block list, a Macro block list, an Annotation block list,
330  * an Endnote block list, a TextBox list and a HeaderTextBox list.
331  *
332  * NOTE:
333  * The various ul*Len input parameters are given in characters, but the
334  * length of the blocks are in bytes.
335  */
336 void
vSplitBlockList(FILE * pFile,ULONG ulTextLen,ULONG ulFootnoteLen,ULONG ulHdrFtrLen,ULONG ulMacroLen,ULONG ulAnnotationLen,ULONG ulEndnoteLen,ULONG ulTextBoxLen,ULONG ulHdrTextBoxLen,BOOL bMustExtend)337 vSplitBlockList(FILE *pFile, ULONG ulTextLen, ULONG ulFootnoteLen,
338 	ULONG ulHdrFtrLen, ULONG ulMacroLen, ULONG ulAnnotationLen,
339 	ULONG ulEndnoteLen, ULONG ulTextBoxLen, ULONG ulHdrTextBoxLen,
340 	BOOL bMustExtend)
341 {
342 	list_mem_type	*apAnchors[8];
343 	list_mem_type	*pGarbageAnchor, *pCurr;
344 	size_t		tIndex;
345 
346 	DBG_MSG("vSplitBlockList");
347 
348 	pGarbageAnchor = NULL;
349 
350 	DBG_MSG_C(ulTextLen != 0, "Text block list");
351 	vSpitList(&pTextAnchor, &pFootnoteAnchor, ulTextLen);
352 	DBG_MSG_C(ulFootnoteLen != 0, "Footnote block list");
353 	vSpitList(&pFootnoteAnchor, &pHdrFtrAnchor, ulFootnoteLen);
354 	DBG_MSG_C(ulHdrFtrLen != 0, "Header/Footer block list");
355 	vSpitList(&pHdrFtrAnchor, &pMacroAnchor, ulHdrFtrLen);
356 	DBG_MSG_C(ulMacroLen != 0, "Macro block list");
357 	vSpitList(&pMacroAnchor, &pAnnotationAnchor, ulMacroLen);
358 	DBG_MSG_C(ulAnnotationLen != 0, "Annotation block list");
359 	vSpitList(&pAnnotationAnchor, &pEndnoteAnchor, ulAnnotationLen);
360 	DBG_MSG_C(ulEndnoteLen != 0, "Endnote block list");
361 	vSpitList(&pEndnoteAnchor, &pTextBoxAnchor, ulEndnoteLen);
362 	DBG_MSG_C(ulTextBoxLen != 0, "Textbox block list");
363 	vSpitList(&pTextBoxAnchor, &pHdrTextBoxAnchor, ulTextBoxLen);
364 	DBG_MSG_C(ulHdrTextBoxLen != 0, "HeaderTextbox block list");
365 	vSpitList(&pHdrTextBoxAnchor, &pGarbageAnchor, ulHdrTextBoxLen);
366 
367 	/* Free the garbage block list, this should not be needed */
368 	DBG_DEC_C(pGarbageAnchor != NULL, pGarbageAnchor->tInfo.ulLength);
369 	pGarbageAnchor = pFreeOneList(pGarbageAnchor);
370 
371 #if defined(DEBUG)
372 	vCheckList(pTextAnchor, ulTextLen, "Software error (Text)");
373 	vCheckList(pFootnoteAnchor, ulFootnoteLen, "Software error (Footnote)");
374 	vCheckList(pHdrFtrAnchor, ulHdrFtrLen, "Software error (Hdr/Ftr)");
375 	vCheckList(pMacroAnchor, ulMacroLen, "Software error (Macro)");
376 	vCheckList(pAnnotationAnchor, ulAnnotationLen,
377 						"Software error (Annotation)");
378 	vCheckList(pEndnoteAnchor, ulEndnoteLen, "Software error (Endnote)");
379 	vCheckList(pTextBoxAnchor, ulTextBoxLen, "Software error (TextBox)");
380 	vCheckList(pHdrTextBoxAnchor, ulHdrTextBoxLen,
381 						"Software error (HdrTextBox)");
382 #endif /* DEBUG */
383 
384 	/* Remove the list if the text box is empty */
385 	if (bIsEmptyBox(pFile, pTextBoxAnchor)) {
386 		pTextBoxAnchor = pFreeOneList(pTextBoxAnchor);
387 	}
388 	if (bIsEmptyBox(pFile, pHdrTextBoxAnchor)) {
389 		pHdrTextBoxAnchor = pFreeOneList(pHdrTextBoxAnchor);
390 	}
391 
392 	if (!bMustExtend) {
393 		return;
394 	}
395 	/*
396 	 * All blocks (except the last one) must have a length that
397 	 * is a multiple of the Big Block Size
398 	 */
399 
400 	apAnchors[0] = pTextAnchor;
401 	apAnchors[1] = pFootnoteAnchor;
402 	apAnchors[2] = pHdrFtrAnchor;
403 	apAnchors[3] = pMacroAnchor;
404 	apAnchors[4] = pAnnotationAnchor;
405 	apAnchors[5] = pEndnoteAnchor;
406 	apAnchors[6] = pTextBoxAnchor;
407 	apAnchors[7] = pHdrTextBoxAnchor;
408 
409 	for (tIndex = 0; tIndex < elementsof(apAnchors); tIndex++) {
410 		for (pCurr = apAnchors[tIndex];
411 		     pCurr != NULL;
412 		     pCurr = pCurr->pNext) {
413 			if (pCurr->pNext != NULL &&
414 			    pCurr->tInfo.ulLength % BIG_BLOCK_SIZE != 0) {
415 				DBG_DEC(tIndex);
416 				DBG_HEX(pCurr->tInfo.ulFileOffset);
417 				DBG_HEX(pCurr->tInfo.ulCharPos);
418 				DBG_DEC(pCurr->tInfo.ulLength);
419 				pCurr->tInfo.ulLength /= BIG_BLOCK_SIZE;
420 				pCurr->tInfo.ulLength++;
421 				pCurr->tInfo.ulLength *= BIG_BLOCK_SIZE;
422 				DBG_DEC(pCurr->tInfo.ulLength);
423 			}
424 		}
425 	}
426 } /* end of vSplitBlockList */
427 
428 #if defined(__riscos)
429 /*
430  * ulGetDocumentLength - get the total character length of the printable lists
431  *
432  * returns: The total number of characters
433  */
434 ULONG
ulGetDocumentLength(void)435 ulGetDocumentLength(void)
436 {
437 	long		ulTotal;
438 
439 	DBG_MSG("ulGetDocumentLength");
440 
441 	ulTotal = ulComputeListLength(pTextAnchor);
442 	ulTotal += ulComputeListLength(pFootnoteAnchor);
443 	ulTotal += ulComputeListLength(pEndnoteAnchor);
444 	ulTotal += ulComputeListLength(pTextBoxAnchor);
445 	ulTotal += ulComputeListLength(pHdrTextBoxAnchor);
446 	DBG_DEC(ulTotal);
447 	return ulTotal;
448 } /* end of ulGetDocumentLength */
449 #endif /* __riscos */
450 
451 #if 0
452 /*
453  * bExistsHdrFtr - are there headers and/or footers?
454  */
455 BOOL
456 bExistsHdrFtr(void)
457 {
458 	return pHdrFtrAnchor != NULL &&
459 		pHdrFtrAnchor->tInfo.ulLength != 0;
460 } /* end of bExistsHdrFtr */
461 #endif
462 
463 /*
464  * bExistsTextBox - is there a text box?
465  */
466 BOOL
bExistsTextBox(void)467 bExistsTextBox(void)
468 {
469 	return pTextBoxAnchor != NULL &&
470 		pTextBoxAnchor->tInfo.ulLength != 0;
471 } /* end of bExistsTextBox */
472 
473 /*
474  * bExistsHdrTextBox - is there a header text box?
475  */
476 BOOL
bExistsHdrTextBox(void)477 bExistsHdrTextBox(void)
478 {
479 	return pHdrTextBoxAnchor != NULL &&
480 		pHdrTextBoxAnchor->tInfo.ulLength != 0;
481 } /* end of bExistsHdrTextBox */
482 
483 /*
484  * usGetNextByte - get the next byte from the specified block list
485  */
486 static USHORT
usGetNextByte(FILE * pFile,readinfo_type * pInfoCurrent,list_mem_type * pAnchor,ULONG * pulFileOffset,ULONG * pulCharPos,USHORT * pusPropMod)487 usGetNextByte(FILE *pFile, readinfo_type *pInfoCurrent, list_mem_type *pAnchor,
488 	ULONG *pulFileOffset, ULONG *pulCharPos, USHORT *pusPropMod)
489 {
490 	ULONG	ulReadOff;
491 	size_t	tReadLen;
492 
493 	fail(pInfoCurrent == NULL);
494 
495 	if (pInfoCurrent->pBlockCurrent == NULL ||
496 	    pInfoCurrent->tByteNext >= sizeof(pInfoCurrent->aucBlock) ||
497 	    pInfoCurrent->ulBlockOffset + pInfoCurrent->tByteNext >=
498 				pInfoCurrent->pBlockCurrent->tInfo.ulLength) {
499 		if (pInfoCurrent->pBlockCurrent == NULL) {
500 			/* First block, first part */
501 			pInfoCurrent->pBlockCurrent = pAnchor;
502 			pInfoCurrent->ulBlockOffset = 0;
503 		} else if (pInfoCurrent->ulBlockOffset +
504 				sizeof(pInfoCurrent->aucBlock) <
505 				pInfoCurrent->pBlockCurrent->tInfo.ulLength) {
506 			/* Same block, next part */
507 			pInfoCurrent->ulBlockOffset +=
508 					sizeof(pInfoCurrent->aucBlock);
509 		} else {
510 			/* Next block, first part */
511 			pInfoCurrent->pBlockCurrent =
512 					pInfoCurrent->pBlockCurrent->pNext;
513 			pInfoCurrent->ulBlockOffset = 0;
514 		}
515 		if (pInfoCurrent->pBlockCurrent == NULL) {
516 			/* Past the last part of the last block */
517 			return (USHORT)EOF;
518 		}
519 		tReadLen = (size_t)
520 			(pInfoCurrent->pBlockCurrent->tInfo.ulLength -
521 			 pInfoCurrent->ulBlockOffset);
522 		if (tReadLen > sizeof(pInfoCurrent->aucBlock)) {
523 			tReadLen = sizeof(pInfoCurrent->aucBlock);
524 		}
525 		ulReadOff = pInfoCurrent->pBlockCurrent->tInfo.ulFileOffset +
526 				pInfoCurrent->ulBlockOffset;
527 		if (!bReadBytes(pInfoCurrent->aucBlock,
528 						tReadLen, ulReadOff, pFile)) {
529 			/* Don't read from this list any longer */
530 			pInfoCurrent->pBlockCurrent = NULL;
531 			return (USHORT)EOF;
532 		}
533 		pInfoCurrent->tByteNext = 0;
534 	}
535 	if (pulFileOffset != NULL) {
536 		*pulFileOffset =
537 			pInfoCurrent->pBlockCurrent->tInfo.ulFileOffset +
538 			pInfoCurrent->ulBlockOffset +
539 			pInfoCurrent->tByteNext;
540 	}
541 	if (pulCharPos != NULL) {
542 		*pulCharPos =
543 			pInfoCurrent->pBlockCurrent->tInfo.ulCharPos +
544 			pInfoCurrent->ulBlockOffset +
545 			pInfoCurrent->tByteNext;
546 	}
547 	if (pusPropMod != NULL) {
548 		*pusPropMod = pInfoCurrent->pBlockCurrent->tInfo.usPropMod;
549 	}
550 	return (USHORT)pInfoCurrent->aucBlock[pInfoCurrent->tByteNext++];
551 } /* end of usGetNextByte */
552 
553 
554 /*
555  * usGetNextChar - get the next character from the specified block list
556  */
557 static USHORT
usGetNextChar(FILE * pFile,list_id_enum eListID,ULONG * pulFileOffset,ULONG * pulCharPos,USHORT * pusPropMod)558 usGetNextChar(FILE *pFile, list_id_enum eListID,
559 	ULONG *pulFileOffset, ULONG *pulCharPos, USHORT *pusPropMod)
560 {
561 	readinfo_type	*pReadinfo;
562 	list_mem_type	*pAnchor;
563 	USHORT	usLSB, usMSB;
564 
565 	switch (eListID) {
566 	case text_list:
567 		pReadinfo = &tOthers;
568 		pAnchor = pTextAnchor;
569 		break;
570 	case footnote_list:
571 		pReadinfo = &tFootnote;
572 		pAnchor = pFootnoteAnchor;
573 		break;
574 	case hdrftr_list:
575 		pReadinfo = &tHdrFtr;
576 		pAnchor = pHdrFtrAnchor;
577 		break;
578 	case endnote_list:
579 		pReadinfo = &tOthers;
580 		pAnchor = pEndnoteAnchor;
581 		break;
582 	case textbox_list:
583 		pReadinfo = &tOthers;
584 		pAnchor = pTextBoxAnchor;
585 		break;
586 	case hdrtextbox_list:
587 		pReadinfo = &tOthers;
588 		pAnchor = pHdrTextBoxAnchor;
589 		break;
590 	default:
591 		DBG_DEC(eListID);
592 		return (USHORT)EOF;
593 	}
594 
595 	usLSB = usGetNextByte(pFile, pReadinfo, pAnchor,
596 				pulFileOffset, pulCharPos, pusPropMod);
597 	if (usLSB == (USHORT)EOF) {
598 		return (USHORT)EOF;
599 	}
600 	fail(pReadinfo->pBlockCurrent == NULL);
601 
602 	if (pReadinfo->pBlockCurrent->tInfo.bUsesUnicode) {
603 		usMSB = usGetNextByte(pFile,
604 				pReadinfo, pAnchor, NULL, NULL, NULL);
605 	} else {
606 		usMSB = 0x00;
607 	}
608 	if (usMSB == (USHORT)EOF) {
609 		DBG_MSG("usGetNextChar: Unexpected EOF");
610 		DBG_HEX_C(pulFileOffset != NULL, *pulFileOffset);
611 		DBG_HEX_C(pulCharPos != NULL, *pulCharPos);
612 		return (USHORT)EOF;
613 	}
614 	return (usMSB << 8) | usLSB;
615 } /* end of usGetNextChar */
616 
617 /*
618  * usNextChar - get the next character from the given block list
619  */
620 USHORT
usNextChar(FILE * pFile,list_id_enum eListID,ULONG * pulFileOffset,ULONG * pulCharPos,USHORT * pusPropMod)621 usNextChar(FILE *pFile, list_id_enum eListID,
622 	ULONG *pulFileOffset, ULONG *pulCharPos, USHORT *pusPropMod)
623 {
624 	USHORT	usRetVal;
625 
626 	fail(pFile == NULL);
627 
628 	usRetVal = usGetNextChar(pFile, eListID,
629 				pulFileOffset, pulCharPos, pusPropMod);
630 	if (usRetVal == (USHORT)EOF) {
631 		if (pulFileOffset != NULL) {
632 			*pulFileOffset = FC_INVALID;
633 		}
634 		if (pulCharPos != NULL) {
635 			*pulCharPos = CP_INVALID;
636 		}
637 		if (pusPropMod != NULL) {
638 			*pusPropMod = IGNORE_PROPMOD;
639 		}
640 	}
641 	return usRetVal;
642 } /* end of usNextChar */
643 
644 /*
645  * usToHdrFtrPosition - Go to a character position in header/foorter list
646  *
647  * Returns the character found on the specified character position
648  */
649 USHORT
usToHdrFtrPosition(FILE * pFile,ULONG ulCharPos)650 usToHdrFtrPosition(FILE *pFile, ULONG ulCharPos)
651 {
652 	ULONG	ulCharPosCurr;
653 	USHORT	usChar;
654 
655 	tHdrFtr.pBlockCurrent = NULL;	/* To reset the header/footer list */
656 	do {
657 		usChar = usNextChar(pFile,
658 				hdrftr_list, NULL, &ulCharPosCurr, NULL);
659 	} while (usChar != (USHORT)EOF && ulCharPosCurr != ulCharPos);
660 	return usChar;
661 } /* end of usToHdrFtrPosition */
662 
663 /*
664  * usToFootnotePosition - Go to a character position in footnote list
665  *
666  * Returns the character found on the specified character position
667  */
668 USHORT
usToFootnotePosition(FILE * pFile,ULONG ulCharPos)669 usToFootnotePosition(FILE *pFile, ULONG ulCharPos)
670 {
671 	ULONG	ulCharPosCurr;
672 	USHORT	usChar;
673 
674 	tFootnote.pBlockCurrent = NULL;	/* To reset the footnote list */
675 	do {
676 		usChar = usNextChar(pFile,
677 				footnote_list, NULL, &ulCharPosCurr, NULL);
678 	} while (usChar != (USHORT)EOF && ulCharPosCurr != ulCharPos);
679 	return usChar;
680 } /* end of usToFootnotePosition */
681 
682 /*
683  * Convert a character position to an offset in the file.
684  * Logical to physical offset.
685  *
686  * Returns:	FC_INVALID: in case of error
687  *		otherwise: the computed file offset
688  */
689 ULONG
ulCharPos2FileOffsetX(ULONG ulCharPos,list_id_enum * peListID)690 ulCharPos2FileOffsetX(ULONG ulCharPos, list_id_enum *peListID)
691 {
692 	static list_id_enum	eListIDs[8] = {
693 		text_list,	footnote_list,		hdrftr_list,
694 		macro_list,	annotation_list,	endnote_list,
695 		textbox_list,	hdrtextbox_list,
696 	};
697 	list_mem_type	*apAnchors[8];
698 	list_mem_type	*pCurr;
699 	list_id_enum	eListGuess;
700 	ULONG		ulBestGuess;
701 	size_t		tIndex;
702 
703 	fail(peListID == NULL);
704 
705 	if (ulCharPos == CP_INVALID) {
706 		*peListID = no_list;
707 		return FC_INVALID;
708 	}
709 
710 	apAnchors[0] = pTextAnchor;
711 	apAnchors[1] = pFootnoteAnchor;
712 	apAnchors[2] = pHdrFtrAnchor;
713 	apAnchors[3] = pMacroAnchor;
714 	apAnchors[4] = pAnnotationAnchor;
715 	apAnchors[5] = pEndnoteAnchor;
716 	apAnchors[6] = pTextBoxAnchor;
717 	apAnchors[7] = pHdrTextBoxAnchor;
718 
719 	eListGuess = no_list;	  /* Best guess is no list */
720 	ulBestGuess = FC_INVALID; /* Best guess is "file offset not found" */
721 
722 	for (tIndex = 0; tIndex < elementsof(apAnchors); tIndex++) {
723 		for (pCurr = apAnchors[tIndex];
724 		     pCurr != NULL;
725 		     pCurr = pCurr->pNext) {
726 			if (ulCharPos == pCurr->tInfo.ulCharPos +
727 			     pCurr->tInfo.ulLength &&
728 			    pCurr->pNext != NULL) {
729 				/*
730 				 * The character position is one beyond this
731 				 * block, so we guess it's the first byte of
732 				 * the next block (if there is a next block)
733 				 */
734 				eListGuess= eListIDs[tIndex];
735 				ulBestGuess = pCurr->pNext->tInfo.ulFileOffset;
736 			}
737 
738 			if (ulCharPos < pCurr->tInfo.ulCharPos ||
739 			    ulCharPos >= pCurr->tInfo.ulCharPos +
740 			     pCurr->tInfo.ulLength) {
741 				/* Character position is not in this block */
742 				continue;
743 			}
744 
745 			/* The character position is in the current block */
746 			*peListID = eListIDs[tIndex];
747 			return pCurr->tInfo.ulFileOffset +
748 				ulCharPos - pCurr->tInfo.ulCharPos;
749 		}
750 	}
751 	/* Passed beyond the end of the last list */
752 	NO_DBG_HEX(ulCharPos);
753 	NO_DBG_HEX(ulBestGuess);
754 	*peListID = eListGuess;
755 	return ulBestGuess;
756 } /* end of ulCharPos2FileOffsetX */
757 
758 /*
759  * Convert a character position to an offset in the file.
760  * Logical to physical offset.
761  *
762  * Returns:	FC_INVALID: in case of error
763  *		otherwise: the computed file offset
764  */
765 ULONG
ulCharPos2FileOffset(ULONG ulCharPos)766 ulCharPos2FileOffset(ULONG ulCharPos)
767 {
768 	list_id_enum	eListID;
769 
770 	return ulCharPos2FileOffsetX(ulCharPos, &eListID);
771 } /* end of ulCharPos2FileOffset */
772 
773 /*
774  * Convert an offset in the header/footer list to a character position.
775  *
776  * Returns:	CP_INVALID: in case of error
777  *		otherwise: the computed character position
778  */
779 ULONG
ulHdrFtrOffset2CharPos(ULONG ulHdrFtrOffset)780 ulHdrFtrOffset2CharPos(ULONG ulHdrFtrOffset)
781 {
782 	list_mem_type	*pCurr;
783 	ULONG		ulOffset;
784 
785 	ulOffset = ulHdrFtrOffset;
786 	for (pCurr = pHdrFtrAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
787 		if (ulOffset >= pCurr->tInfo.ulLength) {
788 			/* The offset is not in this block */
789 			ulOffset -= pCurr->tInfo.ulLength;
790 			continue;
791 		}
792 		return pCurr->tInfo.ulCharPos + ulOffset;
793 	}
794 	return CP_INVALID;
795 } /* end of ulHdrFtrOffset2CharPos */
796 
797 /*
798  * Get the sequence number beloning to the given file offset
799  *
800  * Returns the sequence number
801  */
802 ULONG
ulGetSeqNumber(ULONG ulFileOffset)803 ulGetSeqNumber(ULONG ulFileOffset)
804 {
805 	list_mem_type	*pCurr;
806 	ULONG		ulSeq;
807 
808 	if (ulFileOffset == FC_INVALID) {
809 		return FC_INVALID;
810 	}
811 
812 	ulSeq = 0;
813 	for (pCurr = pTextAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
814 		if (ulFileOffset >= pCurr->tInfo.ulFileOffset &&
815 		    ulFileOffset < pCurr->tInfo.ulFileOffset +
816 		     pCurr->tInfo.ulLength) {
817 			/* The file offset is within the current textblock */
818 			return ulSeq + ulFileOffset - pCurr->tInfo.ulFileOffset;
819 		}
820 		ulSeq += pCurr->tInfo.ulLength;
821 	}
822 	return FC_INVALID;
823 } /* end of ulGetSeqNumber */
824