xref: /plan9/sys/src/cmd/aux/antiword/word2text.c (revision 25b329d522281a8cdd35da0dcc08c3fc621059a9)
1 /*
2  * word2text.c
3  * Copyright (C) 1998-2005 A.J. van Os; Released under GNU GPL
4  *
5  * Description:
6  * MS Word to "text" functions
7  */
8 
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #if defined(__riscos)
14 #include "DeskLib:Hourglass.h"
15 #include "drawfile.h"
16 #endif /* __riscos */
17 #include "antiword.h"
18 
19 
20 #define INITIAL_SIZE		40
21 #define EXTENTION_SIZE		20
22 
23 
24 /* Macros to make sure all such statements will be identical */
25 #define OUTPUT_LINE()		\
26 	do {\
27 		vAlign2Window(pDiag, pAnchor, lWidthMax, ucAlignment);\
28 		TRACE_MSG("after vAlign2Window");\
29 		pAnchor = pStartNewOutput(pAnchor, NULL);\
30 		pOutput = pAnchor;\
31 	} while(0)
32 
33 #define RESET_LINE()		\
34 	do {\
35 		pAnchor = pStartNewOutput(pAnchor, NULL);\
36 		pOutput = pAnchor;\
37 	} while(0)
38 
39 #if defined(__riscos)
40 /* Length of the document in characters */
41 static ULONG	ulDocumentLength;
42 /* Number of characters processed so far */
43 static ULONG	ulCharCounter;
44 static int	iCurrPct, iPrevPct;
45 #endif /* __riscos */
46 /* The document is in the format belonging to this version of Word */
47 static int	iWordVersion = -1;
48 /* Special treatment for files from Word 4/5/6 on an Apple Macintosh */
49 static BOOL	bOldMacFile = FALSE;
50 /* Section Information */
51 static const section_block_type	*pSection = NULL;
52 static const section_block_type	*pSectionNext = NULL;
53 /* All the (command line) options */
54 static options_type	tOptions;
55 /* Needed for reading a complete table row */
56 static const row_block_type	*pRowInfo = NULL;
57 static BOOL	bStartRow = FALSE;
58 static BOOL	bEndRowNorm = FALSE;
59 static BOOL	bEndRowFast = FALSE;
60 static BOOL	bIsTableRow = FALSE;
61 /* Index of the next style and font information */
62 static USHORT	usIstdNext = ISTD_NORMAL;
63 /* Needed for finding the start of a style */
64 static const style_block_type	*pStyleInfo = NULL;
65 static style_block_type		tStyleNext;
66 static BOOL	bStartStyle = FALSE;
67 static BOOL	bStartStyleNext = FALSE;
68 /* Needed for finding the start of a font */
69 static const font_block_type	*pFontInfo = NULL;
70 static font_block_type		tFontNext;
71 static BOOL	bStartFont = FALSE;
72 static BOOL	bStartFontNext = FALSE;
73 /* Needed for finding an image */
74 static ULONG	ulFileOffsetImage = FC_INVALID;
75 
76 
77 /*
78  * vUpdateCounters - Update the counters for the hourglass
79  */
80 static void
vUpdateCounters(void)81 vUpdateCounters(void)
82 {
83 #if defined(__riscos)
84 	ulCharCounter++;
85 	iCurrPct = (int)((ulCharCounter * 100) / ulDocumentLength);
86 	if (iCurrPct != iPrevPct) {
87 		Hourglass_Percentage(iCurrPct);
88 		iPrevPct = iCurrPct;
89 	}
90 #endif /* __riscos */
91 } /* end of vUpdateCounters */
92 
93 /*
94  * bOutputContainsText - see if the output contains more than white space
95  */
96 BOOL
bOutputContainsText(const output_type * pAnchor)97 bOutputContainsText(const output_type *pAnchor)
98 {
99 	const output_type	*pCurr;
100 	size_t	tIndex;
101 
102 	fail(pAnchor == NULL);
103 
104 	for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
105 		fail(pCurr->lStringWidth < 0);
106 		for (tIndex = 0; tIndex < pCurr->tNextFree; tIndex++) {
107 			if (isspace((int)(UCHAR)pCurr->szStorage[tIndex])) {
108 				continue;
109 			}
110 #if defined(DEBUG)
111 			if (pCurr->szStorage[tIndex] == FILLER_CHAR) {
112 				continue;
113 			}
114 #endif /* DEBUG */
115 			return TRUE;
116 		}
117 	}
118 	return FALSE;
119 } /* end of bOutputContainsText */
120 
121 /*
122  * lTotalStringWidth - compute the total width of the output string
123  */
124 static long
lTotalStringWidth(const output_type * pAnchor)125 lTotalStringWidth(const output_type *pAnchor)
126 {
127 	const output_type	*pCurr;
128 	long		lTotal;
129 
130 	lTotal = 0;
131 	for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
132 		DBG_DEC_C(pCurr->lStringWidth < 0, pCurr->lStringWidth);
133 		fail(pCurr->lStringWidth < 0);
134 		lTotal += pCurr->lStringWidth;
135 	}
136 	return lTotal;
137 } /* end of lTotalStringWidth */
138 
139 /*
140  * vStoreByte - store one byte
141  */
142 static void
vStoreByte(UCHAR ucChar,output_type * pOutput)143 vStoreByte(UCHAR ucChar, output_type *pOutput)
144 {
145 	fail(pOutput == NULL);
146 
147 	if (ucChar == 0) {
148 		pOutput->szStorage[pOutput->tNextFree] = '\0';
149 		return;
150 	}
151 
152 	while (pOutput->tNextFree + 2 > pOutput->tStorageSize) {
153 		pOutput->tStorageSize += EXTENTION_SIZE;
154 		pOutput->szStorage = xrealloc(pOutput->szStorage,
155 					pOutput->tStorageSize);
156 	}
157 	pOutput->szStorage[pOutput->tNextFree] = (char)ucChar;
158 	pOutput->szStorage[pOutput->tNextFree + 1] = '\0';
159 	pOutput->tNextFree++;
160 } /* end of vStoreByte */
161 
162 /*
163  * vStoreChar - store a character as one or more bytes
164  */
165 static void
vStoreChar(ULONG ulChar,BOOL bChangeAllowed,output_type * pOutput)166 vStoreChar(ULONG ulChar, BOOL bChangeAllowed, output_type *pOutput)
167 {
168 	char	szResult[4];
169 	size_t	tIndex, tLen;
170 
171 	fail(pOutput == NULL);
172 
173 	if (tOptions.eEncoding == encoding_utf_8 && bChangeAllowed) {
174 		DBG_HEX_C(ulChar > 0xffff, ulChar);
175 		fail(ulChar > 0xffff);
176 		tLen = tUcs2Utf8(ulChar, szResult, sizeof(szResult));
177 		for (tIndex = 0; tIndex < tLen; tIndex++) {
178 			vStoreByte((UCHAR)szResult[tIndex], pOutput);
179 		}
180 	} else {
181 		DBG_HEX_C(ulChar > 0xff, ulChar);
182 		fail(ulChar > 0xff);
183 		vStoreByte((UCHAR)ulChar, pOutput);
184 		tLen = 1;
185 	}
186 	pOutput->lStringWidth += lComputeStringWidth(
187 				pOutput->szStorage + pOutput->tNextFree - tLen,
188 				tLen,
189 				pOutput->tFontRef,
190 				pOutput->usFontSize);
191 } /* end of vStoreChar */
192 
193 /*
194  * vStoreCharacter - store one character
195  */
196 static void
vStoreCharacter(ULONG ulChar,output_type * pOutput)197 vStoreCharacter(ULONG ulChar, output_type *pOutput)
198 {
199 	vStoreChar(ulChar, TRUE, pOutput);
200 } /* end of vStoreCharacter */
201 
202 /*
203  * vStoreString - store a string
204  */
205 static void
vStoreString(const char * szString,size_t tStringLength,output_type * pOutput)206 vStoreString(const char *szString, size_t tStringLength, output_type *pOutput)
207 {
208 	size_t	tIndex;
209 
210 	fail(szString == NULL || pOutput == NULL);
211 
212 	for (tIndex = 0; tIndex < tStringLength; tIndex++) {
213 		vStoreCharacter((ULONG)(UCHAR)szString[tIndex], pOutput);
214 	}
215 } /* end of vStoreString */
216 
217 /*
218  * vStoreNumberAsDecimal - store a number as a decimal number
219  */
220 static void
vStoreNumberAsDecimal(UINT uiNumber,output_type * pOutput)221 vStoreNumberAsDecimal(UINT uiNumber, output_type *pOutput)
222 {
223 	size_t	tLen;
224 	char	szString[3 * sizeof(UINT) + 1];
225 
226 	fail(uiNumber == 0);
227 	fail(pOutput == NULL);
228 
229 	tLen = (size_t)sprintf(szString, "%u", uiNumber);
230 	vStoreString(szString, tLen, pOutput);
231 } /* end of vStoreNumberAsDecimal */
232 
233 /*
234  * vStoreNumberAsRoman - store a number as a roman numerical
235  */
236 static void
vStoreNumberAsRoman(UINT uiNumber,output_type * pOutput)237 vStoreNumberAsRoman(UINT uiNumber, output_type *pOutput)
238 {
239 	size_t	tLen;
240 	char	szString[15];
241 
242 	fail(uiNumber == 0);
243 	fail(pOutput == NULL);
244 
245 	tLen = tNumber2Roman(uiNumber, FALSE, szString);
246 	vStoreString(szString, tLen, pOutput);
247 } /* end of vStoreNumberAsRoman */
248 
249 /*
250  * vStoreStyle - store a style
251  */
252 static void
vStoreStyle(diagram_type * pDiag,output_type * pOutput,const style_block_type * pStyle)253 vStoreStyle(diagram_type *pDiag, output_type *pOutput,
254 	const style_block_type *pStyle)
255 {
256 	size_t	tLen;
257 	char	szString[120];
258 
259 	fail(pDiag == NULL);
260 	fail(pOutput == NULL);
261 	fail(pStyle == NULL);
262 
263 	if (tOptions.eConversionType == conversion_xml) {
264 		vSetHeaders(pDiag, pStyle->usIstd);
265 	} else {
266 		tLen = tStyle2Window(szString, sizeof(szString),
267 					pStyle, pSection);
268 		vStoreString(szString, tLen, pOutput);
269 	}
270 } /* end of vStoreStyle */
271 
272 /*
273  * vPutIndentation - output the specified amount of indentation
274  */
275 static void
vPutIndentation(diagram_type * pDiag,output_type * pOutput,BOOL bNoMarks,BOOL bFirstLine,UINT uiListNumber,UCHAR ucNFC,const char * szListChar,long lLeftIndentation,long lLeftIndentation1)276 vPutIndentation(diagram_type *pDiag, output_type *pOutput,
277 	BOOL bNoMarks, BOOL bFirstLine,
278 	UINT uiListNumber, UCHAR ucNFC, const char *szListChar,
279 	long lLeftIndentation, long lLeftIndentation1)
280 {
281 	long	lWidth;
282 	size_t	tIndex, tNextFree;
283 	char	szLine[30];
284 
285 	fail(pDiag == NULL);
286 	fail(pOutput == NULL);
287 	fail(szListChar == NULL);
288 	fail(lLeftIndentation < 0);
289 
290 	if (tOptions.eConversionType == conversion_xml) {
291 		/* XML does its own indentation at rendering time */
292 		return;
293 	}
294 
295 	if (bNoMarks) {
296 		if (bFirstLine) {
297 			lLeftIndentation += lLeftIndentation1;
298 		}
299 		if (lLeftIndentation < 0) {
300 			lLeftIndentation = 0;
301 		}
302 		vSetLeftIndentation(pDiag, lLeftIndentation);
303 		return;
304 	}
305 	if (lLeftIndentation <= 0) {
306 		DBG_HEX_C(ucNFC != 0x00, ucNFC);
307 		vSetLeftIndentation(pDiag, 0);
308 		return;
309 	}
310 
311 #if defined(DEBUG)
312 	if (tOptions.eEncoding == encoding_utf_8) {
313 		fail(strlen(szListChar) > 3);
314 	} else {
315 		DBG_HEX_C(iscntrl((int)szListChar[0]), szListChar[0]);
316 		fail(iscntrl((int)szListChar[0]));
317 		fail(szListChar[1] != '\0');
318 	}
319 #endif /* DEBUG */
320 
321 	switch (ucNFC) {
322 	case LIST_ARABIC_NUM:
323 	case LIST_NUMBER_TXT:
324 		tNextFree = (size_t)sprintf(szLine, "%u", uiListNumber);
325 		break;
326 	case LIST_UPPER_ROMAN:
327 	case LIST_LOWER_ROMAN:
328 		tNextFree = tNumber2Roman(uiListNumber,
329 				ucNFC == LIST_UPPER_ROMAN, szLine);
330 		break;
331 	case LIST_UPPER_ALPHA:
332 	case LIST_LOWER_ALPHA:
333 		tNextFree = tNumber2Alpha(uiListNumber,
334 				ucNFC == LIST_UPPER_ALPHA, szLine);
335 		break;
336 	case LIST_ORDINAL_NUM:
337 	case LIST_ORDINAL_TXT:
338 		if (uiListNumber % 10 == 1 && uiListNumber != 11) {
339 			tNextFree =
340 				(size_t)sprintf(szLine, "%ust", uiListNumber);
341 		} else if (uiListNumber % 10 == 2 && uiListNumber != 12) {
342 			tNextFree =
343 				(size_t)sprintf(szLine, "%und", uiListNumber);
344 		} else if (uiListNumber % 10 == 3 && uiListNumber != 13) {
345 			tNextFree =
346 				(size_t)sprintf(szLine, "%urd", uiListNumber);
347 		} else {
348 			tNextFree =
349 				(size_t)sprintf(szLine, "%uth", uiListNumber);
350 		}
351 		break;
352 	case LIST_OUTLINE_NUM:
353 		tNextFree = (size_t)sprintf(szLine, "%02u", uiListNumber);
354 		break;
355 	case LIST_SPECIAL:
356 	case LIST_SPECIAL2:
357 	case LIST_BULLETS:
358 		tNextFree = 0;
359 		break;
360 	default:
361 		DBG_HEX(ucNFC);
362 		DBG_FIXME();
363 		tNextFree = (size_t)sprintf(szLine, "%u", uiListNumber);
364 		break;
365 	}
366 	tNextFree += (size_t)sprintf(szLine + tNextFree, "%.3s", szListChar);
367 	szLine[tNextFree++] = ' ';
368 	szLine[tNextFree] = '\0';
369 	lWidth = lComputeStringWidth(szLine, tNextFree,
370 				pOutput->tFontRef, pOutput->usFontSize);
371 	lLeftIndentation -= lWidth;
372 	if (lLeftIndentation < 0) {
373 		lLeftIndentation = 0;
374 	}
375 	vSetLeftIndentation(pDiag, lLeftIndentation);
376 	for (tIndex = 0; tIndex < tNextFree; tIndex++) {
377 		vStoreChar((ULONG)(UCHAR)szLine[tIndex], FALSE, pOutput);
378 	}
379 } /* end of vPutIndentation */
380 
381 /*
382  * vPutSeparatorLine - output a separator line
383  *
384  * A separator line is a horizontal line two inches long.
385  * Two inches equals 144000 millipoints.
386  */
387 static void
vPutSeparatorLine(output_type * pOutput)388 vPutSeparatorLine(output_type *pOutput)
389 {
390 	long	lCharWidth;
391 	int	iCounter, iChars;
392 	char	szOne[2];
393 
394 	fail(pOutput == NULL);
395 
396 	szOne[0] = OUR_EM_DASH;
397 	szOne[1] = '\0';
398 	lCharWidth = lComputeStringWidth(szOne, 1,
399 				pOutput->tFontRef, pOutput->usFontSize);
400 	NO_DBG_DEC(lCharWidth);
401 	iChars = (int)((144000 + lCharWidth / 2) / lCharWidth);
402 	NO_DBG_DEC(iChars);
403 	for (iCounter = 0; iCounter < iChars; iCounter++) {
404 		vStoreCharacter((ULONG)(UCHAR)OUR_EM_DASH, pOutput);
405 	}
406 } /* end of vPutSeparatorLine */
407 
408 /*
409  * pStartNextOutput - start the next output record
410  *
411  * returns a pointer to the next record
412  */
413 static output_type *
pStartNextOutput(output_type * pCurrent)414 pStartNextOutput(output_type *pCurrent)
415 {
416 	output_type	*pNew;
417 
418 	TRACE_MSG("pStartNextOutput");
419 
420 	if (pCurrent->tNextFree == 0) {
421 		/* The current record is empty, re-use */
422 		fail(pCurrent->szStorage[0] != '\0');
423 		fail(pCurrent->lStringWidth != 0);
424 		return pCurrent;
425 	}
426 	/* The current record is in use, make a new one */
427 	pNew = xmalloc(sizeof(*pNew));
428 	pCurrent->pNext = pNew;
429 	pNew->tStorageSize = INITIAL_SIZE;
430 	pNew->szStorage = xmalloc(pNew->tStorageSize);
431 	pNew->szStorage[0] = '\0';
432 	pNew->tNextFree = 0;
433 	pNew->lStringWidth = 0;
434 	pNew->ucFontColor = FONT_COLOR_DEFAULT;
435 	pNew->usFontStyle = FONT_REGULAR;
436 	pNew->tFontRef = (drawfile_fontref)0;
437 	pNew->usFontSize = DEFAULT_FONT_SIZE;
438 	pNew->pPrev = pCurrent;
439 	pNew->pNext = NULL;
440 	return pNew;
441 } /* end of pStartNextOutput */
442 
443 /*
444  * pStartNewOutput
445  */
446 static output_type *
pStartNewOutput(output_type * pAnchor,output_type * pLeftOver)447 pStartNewOutput(output_type *pAnchor, output_type *pLeftOver)
448 {
449 	output_type	*pCurr, *pNext;
450 	USHORT		usFontStyle, usFontSize;
451 	drawfile_fontref	tFontRef;
452 	UCHAR		ucFontColor;
453 
454 	TRACE_MSG("pStartNewOutput");
455 
456 	ucFontColor = FONT_COLOR_DEFAULT;
457 	usFontStyle = FONT_REGULAR;
458 	tFontRef = (drawfile_fontref)0;
459 	usFontSize = DEFAULT_FONT_SIZE;
460 	/* Free the old output space */
461 	pCurr = pAnchor;
462 	while (pCurr != NULL) {
463 		TRACE_MSG("Free the old output space");
464 		pNext = pCurr->pNext;
465 		pCurr->szStorage = xfree(pCurr->szStorage);
466 		if (pCurr->pNext == NULL) {
467 			ucFontColor = pCurr->ucFontColor;
468 			usFontStyle = pCurr->usFontStyle;
469 			tFontRef = pCurr->tFontRef;
470 			usFontSize = pCurr->usFontSize;
471 		}
472 		pCurr = xfree(pCurr);
473 		pCurr = pNext;
474 	}
475 	if (pLeftOver == NULL) {
476 		/* Create new output space */
477 		TRACE_MSG("Create new output space");
478 		pLeftOver = xmalloc(sizeof(*pLeftOver));
479 		pLeftOver->tStorageSize = INITIAL_SIZE;
480 		NO_DBG_DEC(pLeftOver->tStorageSize);
481 		TRACE_MSG("before 2nd xmalloc");
482 		pLeftOver->szStorage = xmalloc(pLeftOver->tStorageSize);
483 		TRACE_MSG("after 2nd xmalloc");
484 		pLeftOver->szStorage[0] = '\0';
485 		pLeftOver->tNextFree = 0;
486 		pLeftOver->lStringWidth = 0;
487 		pLeftOver->ucFontColor = ucFontColor;
488 		pLeftOver->usFontStyle = usFontStyle;
489 		pLeftOver->tFontRef = tFontRef;
490 		pLeftOver->usFontSize = usFontSize;
491 		pLeftOver->pPrev = NULL;
492 		pLeftOver->pNext = NULL;
493 	}
494 	fail(!bCheckDoubleLinkedList(pLeftOver));
495 	return pLeftOver;
496 } /* end of pStartNewOutput */
497 
498 /*
499  * ulGetChar - get the next character from the specified list
500  *
501  * returns the next character of EOF
502  */
503 static ULONG
ulGetChar(FILE * pFile,list_id_enum eListID)504 ulGetChar(FILE *pFile, list_id_enum eListID)
505 {
506 	const font_block_type	*pCurr;
507 	ULONG		ulChar, ulFileOffset, ulCharPos;
508 	row_info_enum	eRowInfo;
509 	USHORT		usChar, usPropMod;
510 	BOOL		bSkip;
511 
512 	fail(pFile == NULL);
513 
514 	pCurr = pFontInfo;
515 	bSkip = FALSE;
516 	for (;;) {
517 		usChar = usNextChar(pFile, eListID,
518 				&ulFileOffset, &ulCharPos, &usPropMod);
519 		if (usChar == (USHORT)EOF) {
520 			return (ULONG)EOF;
521 		}
522 
523 		vUpdateCounters();
524 
525 		eRowInfo = ePropMod2RowInfo(usPropMod, iWordVersion);
526 		if (!bStartRow) {
527 #if 0
528 			bStartRow = eRowInfo == found_a_cell ||
529 				(pRowInfo != NULL &&
530 				 ulFileOffset == pRowInfo->ulFileOffsetStart &&
531 				 eRowInfo != found_not_a_cell);
532 #else
533 			bStartRow = pRowInfo != NULL &&
534 				ulFileOffset == pRowInfo->ulFileOffsetStart;
535 #endif
536 			NO_DBG_HEX_C(bStartRow, pRowInfo->ulFileOffsetStart);
537 		}
538 		if (!bEndRowNorm) {
539 #if 0
540 			bEndRow = eRowInfo == found_end_of_row ||
541 				(pRowInfo != NULL &&
542 				 ulFileOffset == pRowInfo->ulFileOffsetEnd &&
543 				 eRowInfo != found_not_end_of_row);
544 #else
545 			bEndRowNorm = pRowInfo != NULL &&
546 				ulFileOffset == pRowInfo->ulFileOffsetEnd;
547 #endif
548 			NO_DBG_HEX_C(bEndRowNorm, pRowInfo->ulFileOffsetEnd);
549 		}
550 		if (!bEndRowFast) {
551 			bEndRowFast = eRowInfo == found_end_of_row;
552 			NO_DBG_HEX_C(bEndRowFast, pRowInfo->ulFileOffsetEnd);
553 		}
554 
555 		if (!bStartStyle) {
556 			bStartStyle = pStyleInfo != NULL &&
557 				ulFileOffset == pStyleInfo->ulFileOffset;
558 			NO_DBG_HEX_C(bStartStyle, ulFileOffset);
559 		}
560 		if (pCurr != NULL && ulFileOffset == pCurr->ulFileOffset) {
561 			bStartFont = TRUE;
562 			NO_DBG_HEX(ulFileOffset);
563 			pFontInfo = pCurr;
564 			pCurr = pGetNextFontInfoListItem(pCurr);
565 		}
566 
567 		/* Skip embedded characters */
568 		if (usChar == START_EMBEDDED) {
569 			bSkip = TRUE;
570 			continue;
571 		}
572 		if (usChar == END_IGNORE || usChar == END_EMBEDDED) {
573 			bSkip = FALSE;
574 			continue;
575 		}
576 		if (bSkip) {
577 			continue;
578 		}
579 		ulChar = ulTranslateCharacters(usChar,
580 					ulFileOffset,
581 					iWordVersion,
582 					tOptions.eConversionType,
583 					tOptions.eEncoding,
584 					bOldMacFile);
585 		if (ulChar == IGNORE_CHARACTER) {
586 			continue;
587 		}
588 		if (ulChar == PICTURE) {
589 			ulFileOffsetImage = ulGetPictInfoListItem(ulFileOffset);
590 		} else {
591 			ulFileOffsetImage = FC_INVALID;
592 		}
593 		if (ulChar == PAR_END) {
594 			/* End of paragraph seen, prepare for the next */
595 			vFillStyleFromStylesheet(usIstdNext, &tStyleNext);
596 			vCorrectStyleValues(&tStyleNext);
597 			bStartStyleNext = TRUE;
598 			vFillFontFromStylesheet(usIstdNext, &tFontNext);
599 			vCorrectFontValues(&tFontNext);
600 			bStartFontNext = TRUE;
601 		}
602 		if (ulChar == PAGE_BREAK) {
603 			/* Might be the start of a new section */
604 			pSectionNext = pGetSectionInfo(pSection, ulCharPos);
605 		}
606 		return ulChar;
607 	}
608 } /* end of ulGetChar */
609 
610 /*
611  * lGetWidthMax - get the maximum line width from the paragraph break value
612  *
613  * Returns the maximum line width in millipoints
614  */
615 static long
lGetWidthMax(int iParagraphBreak)616 lGetWidthMax(int iParagraphBreak)
617 {
618 	fail(iParagraphBreak < 0);
619 
620 	if (iParagraphBreak == 0) {
621 		return LONG_MAX;
622 	}
623 	if (iParagraphBreak < MIN_SCREEN_WIDTH) {
624 		return lChar2MilliPoints(MIN_SCREEN_WIDTH);
625 	}
626 	if (iParagraphBreak > MAX_SCREEN_WIDTH) {
627 		return lChar2MilliPoints(MAX_SCREEN_WIDTH);
628 	}
629 	return lChar2MilliPoints(iParagraphBreak);
630 } /* end of lGetWidthMax */
631 
632 /*
633  * bWordDecryptor - turn Word to something more useful
634  *
635  * returns TRUE when succesful, otherwise FALSE
636  */
637 BOOL
bWordDecryptor(FILE * pFile,long lFilesize,diagram_type * pDiag)638 bWordDecryptor(FILE *pFile, long lFilesize, diagram_type *pDiag)
639 {
640 	imagedata_type	tImage;
641 	const style_block_type	*pStyleTmp;
642 	const font_block_type	*pFontTmp;
643 	const char	*szListChar;
644 	output_type	*pAnchor, *pOutput, *pLeftOver;
645 	ULONG	ulChar;
646 	long	lBeforeIndentation, lAfterIndentation;
647 	long	lLeftIndentation, lLeftIndentation1, lRightIndentation;
648 	long	lWidthCurr, lWidthMax, lDefaultTabWidth, lHalfSpaceWidth, lTmp;
649 	list_id_enum 	eListID;
650 	image_info_enum	eRes;
651 	UINT	uiFootnoteNumber, uiEndnoteNumber, uiTmp;
652 	int	iListSeqNumber;
653 	BOOL	bWasTableRow, bTableFontClosed, bWasEndOfParagraph;
654 	BOOL	bInList, bWasInList, bNoMarks, bFirstLine;
655 	BOOL	bAllCapitals, bHiddenText, bMarkDelText, bSuccess;
656 	USHORT	usListNumber;
657 	USHORT	usFontStyle, usFontStyleMinimal, usFontSize, usTmp;
658 	UCHAR	ucFontNumber, ucFontColor;
659 	UCHAR	ucNFC, ucAlignment;
660 
661 	fail(pFile == NULL || lFilesize <= 0 || pDiag == NULL);
662 
663 	TRACE_MSG("bWordDecryptor");
664 
665 	iWordVersion = iInitDocument(pFile, lFilesize);
666 	if (iWordVersion < 0) {
667 		DBG_DEC(iWordVersion);
668 		return FALSE;
669 	}
670 
671 	vGetOptions(&tOptions);
672 	bOldMacFile = bIsOldMacFile();
673 	vPrepareHdrFtrText(pFile);
674 	vPrepareFootnoteText(pFile);
675 
676 	vPrologue2(pDiag, iWordVersion);
677 
678 	/* Initialisation */
679 #if defined(__riscos)
680 	ulCharCounter = 0;
681 	iCurrPct = 0;
682 	iPrevPct = -1;
683 	ulDocumentLength = ulGetDocumentLength();
684 #endif /* __riscos */
685 	pSection = pGetSectionInfo(NULL, 0);
686 	pSectionNext = pSection;
687 	lDefaultTabWidth = lGetDefaultTabWidth();
688 	DBG_DEC_C(lDefaultTabWidth != 36000, lDefaultTabWidth);
689 	pRowInfo = pGetNextRowInfoListItem();
690 	DBG_HEX_C(pRowInfo != NULL, pRowInfo->ulFileOffsetStart);
691 	DBG_HEX_C(pRowInfo != NULL, pRowInfo->ulFileOffsetEnd);
692 	DBG_MSG_C(pRowInfo == NULL, "No rows at all");
693 	bStartRow = FALSE;
694 	bEndRowNorm = FALSE;
695 	bEndRowFast = FALSE;
696 	bIsTableRow = FALSE;
697 	bWasTableRow = FALSE;
698 	vResetStyles();
699 	pStyleInfo = pGetNextTextStyle(NULL);
700 	bStartStyle = FALSE;
701 	bInList = FALSE;
702 	bWasInList = FALSE;
703 	iListSeqNumber = 0;
704 	usIstdNext = ISTD_NORMAL;
705 	pAnchor = NULL;
706 	pFontInfo = pGetNextFontInfoListItem(NULL);
707 	DBG_HEX_C(pFontInfo != NULL, pFontInfo->ulFileOffset);
708 	DBG_MSG_C(pFontInfo == NULL, "No fonts at all");
709 	bStartFont = FALSE;
710 	ucFontNumber = 0;
711 	usFontStyleMinimal = FONT_REGULAR;
712 	usFontStyle = FONT_REGULAR;
713 	usFontSize = DEFAULT_FONT_SIZE;
714 	ucFontColor = FONT_COLOR_DEFAULT;
715 	pAnchor = pStartNewOutput(pAnchor, NULL);
716 	pOutput = pAnchor;
717 	pOutput->ucFontColor = ucFontColor;
718 	pOutput->usFontStyle = usFontStyle;
719 	pOutput->tFontRef = tOpenFont(ucFontNumber, usFontStyle, usFontSize);
720 	pOutput->usFontSize = usFontSize;
721 	bTableFontClosed = TRUE;
722 	lBeforeIndentation = 0;
723 	lAfterIndentation = 0;
724 	lLeftIndentation = 0;
725 	lLeftIndentation1 = 0;
726 	lRightIndentation = 0;
727 	bWasEndOfParagraph = TRUE;
728 	bNoMarks = TRUE;
729 	bFirstLine = TRUE;
730 	ucNFC = LIST_BULLETS;
731 	if (pStyleInfo != NULL) {
732 		szListChar = pStyleInfo->szListChar;
733 		pStyleTmp = pStyleInfo;
734 	} else {
735 		if (tStyleNext.szListChar[0] == '\0') {
736 			vGetBulletValue(tOptions.eConversionType,
737 				tOptions.eEncoding, tStyleNext.szListChar, 4);
738 		}
739 		szListChar = tStyleNext.szListChar;
740 		pStyleTmp = &tStyleNext;
741 	}
742 	usListNumber = 0;
743 	ucAlignment = ALIGNMENT_LEFT;
744 	bAllCapitals = FALSE;
745 	bHiddenText = FALSE;
746 	bMarkDelText = FALSE;
747 	lWidthMax = lGetWidthMax(tOptions.iParagraphBreak);
748 	NO_DBG_DEC(lWidthMax);
749 
750 	Hourglass_On();
751 
752 	uiFootnoteNumber = 0;
753 	uiEndnoteNumber = 0;
754 	eListID = text_list;
755 	for(;;) {
756 		ulChar = ulGetChar(pFile, eListID);
757 		if (ulChar == (ULONG)EOF) {
758 			if (bOutputContainsText(pAnchor)) {
759 				OUTPUT_LINE();
760 			} else {
761 				RESET_LINE();
762 			}
763 			switch (eListID) {
764 			case text_list:
765 				if (tOptions.eConversionType !=
766 							conversion_xml) {
767 					eListID = footnote_list;
768 					if (uiFootnoteNumber != 0) {
769 						vPutSeparatorLine(pAnchor);
770 						OUTPUT_LINE();
771 						uiFootnoteNumber = 0;
772 					}
773 					break;
774 				}
775 				/* No break or return */
776 			case footnote_list:
777 				eListID = endnote_list;
778 				if (uiEndnoteNumber != 0) {
779 					vPutSeparatorLine(pAnchor);
780 					OUTPUT_LINE();
781 					uiEndnoteNumber = 0;
782 				}
783 				break;
784 			case endnote_list:
785 				eListID = textbox_list;
786 				if (bExistsTextBox()) {
787 					vPutSeparatorLine(pAnchor);
788 					OUTPUT_LINE();
789 				}
790 				break;
791 			case textbox_list:
792 				eListID = hdrtextbox_list;
793 				if (bExistsHdrTextBox()) {
794 					vPutSeparatorLine(pAnchor);
795 					OUTPUT_LINE();
796 				}
797 				break;
798 			case hdrtextbox_list:
799 			default:
800 				eListID = end_of_lists;
801 				break;
802 			}
803 			if (eListID == end_of_lists) {
804 				break;
805 			}
806 			continue;
807 		}
808 
809 		if (ulChar == UNKNOWN_NOTE_CHAR) {
810 			switch (eListID) {
811 			case footnote_list:
812 				ulChar = FOOTNOTE_CHAR;
813 				break;
814 			case endnote_list:
815 				ulChar = ENDNOTE_CHAR;
816 				break;
817 			default:
818 				break;
819 			}
820 		}
821 
822 		if (bStartRow) {
823 			/* Begin of a tablerow found */
824 			if (bOutputContainsText(pAnchor)) {
825 				OUTPUT_LINE();
826 			} else {
827 				RESET_LINE();
828 			}
829 			fail(pAnchor != pOutput);
830 			if (bTableFontClosed) {
831 				/* Start special table font */
832 				vCloseFont();
833 				/*
834 				 * Compensate for the fact that Word uses
835 				 * proportional fonts for its tables and we
836 				 * only one fixed-width font
837 				 */
838 				uiTmp = ((UINT)usFontSize * 5 + 3) / 6;
839 				if (uiTmp < MIN_TABLEFONT_SIZE) {
840 					uiTmp = MIN_TABLEFONT_SIZE;
841 				} else if (uiTmp > MAX_TABLEFONT_SIZE) {
842 					uiTmp = MAX_TABLEFONT_SIZE;
843 				}
844 				pOutput->usFontSize = (USHORT)uiTmp;
845 				pOutput->tFontRef =
846 					tOpenTableFont(pOutput->usFontSize);
847 				pOutput->usFontStyle = FONT_REGULAR;
848 				pOutput->ucFontColor = FONT_COLOR_BLACK;
849 				bTableFontClosed = FALSE;
850 			}
851 			bIsTableRow = TRUE;
852 			bStartRow = FALSE;
853 		}
854 
855 		if (bWasTableRow &&
856 		    !bIsTableRow &&
857 		    ulChar != PAR_END &&
858 		    ulChar != HARD_RETURN &&
859 		    ulChar != PAGE_BREAK &&
860 		    ulChar != COLUMN_FEED) {
861 			/*
862 			 * The end of a table should be followed by an
863 			 * empty line, like the end of a paragraph
864 			 */
865 			OUTPUT_LINE();
866 			vEndOfParagraph(pDiag,
867 					pOutput->tFontRef,
868 					pOutput->usFontSize,
869 					(long)pOutput->usFontSize * 600);
870 		}
871 
872 		switch (ulChar) {
873 		case PAGE_BREAK:
874 		case COLUMN_FEED:
875 			if (bIsTableRow) {
876 				/* Ignore when in a table */
877 				break;
878 			}
879 			if (bOutputContainsText(pAnchor)) {
880 				OUTPUT_LINE();
881 			} else {
882 				RESET_LINE();
883 			}
884 			if (ulChar == PAGE_BREAK) {
885 				vEndOfPage(pDiag, lAfterIndentation,
886 						pSection != pSectionNext);
887 			} else {
888 				vEndOfParagraph(pDiag,
889 					pOutput->tFontRef,
890 					pOutput->usFontSize,
891 					lAfterIndentation);
892 			}
893 			break;
894 		default:
895 			break;
896 		}
897 
898 		if (bStartFont || (bStartFontNext && ulChar != PAR_END)) {
899 			/* Begin of a font found */
900 			if (bStartFont) {
901 				/* bStartFont takes priority */
902 				fail(pFontInfo == NULL);
903 				pFontTmp = pFontInfo;
904 			} else {
905 				pFontTmp = &tFontNext;
906 			}
907 			bAllCapitals = bIsCapitals(pFontTmp->usFontStyle);
908 			bHiddenText = bIsHidden(pFontTmp->usFontStyle);
909 			bMarkDelText = bIsMarkDel(pFontTmp->usFontStyle);
910 			usTmp = pFontTmp->usFontStyle &
911 				(FONT_BOLD|FONT_ITALIC|FONT_UNDERLINE|
912 				 FONT_STRIKE|FONT_MARKDEL|
913 				 FONT_SUPERSCRIPT|FONT_SUBSCRIPT);
914 			if (!bIsTableRow &&
915 			    (usFontSize != pFontTmp->usFontSize ||
916 			     ucFontNumber != pFontTmp->ucFontNumber ||
917 			     usFontStyleMinimal != usTmp ||
918 			     ucFontColor != pFontTmp->ucFontColor)) {
919 				pOutput = pStartNextOutput(pOutput);
920 				vCloseFont();
921 				pOutput->ucFontColor = pFontTmp->ucFontColor;
922 				pOutput->usFontStyle = pFontTmp->usFontStyle;
923 				pOutput->usFontSize = pFontTmp->usFontSize;
924 				pOutput->tFontRef = tOpenFont(
925 						pFontTmp->ucFontNumber,
926 						pFontTmp->usFontStyle,
927 						pFontTmp->usFontSize);
928 				fail(!bCheckDoubleLinkedList(pAnchor));
929 			}
930 			ucFontNumber = pFontTmp->ucFontNumber;
931 			usFontSize = pFontTmp->usFontSize;
932 			ucFontColor = pFontTmp->ucFontColor;
933 			usFontStyle = pFontTmp->usFontStyle;
934 			usFontStyleMinimal = usTmp;
935 			if (bStartFont) {
936 				/* Get the next font info */
937 				pFontInfo = pGetNextFontInfoListItem(pFontInfo);
938 				NO_DBG_HEX_C(pFontInfo != NULL,
939 						pFontInfo->ulFileOffset);
940 				DBG_MSG_C(pFontInfo == NULL, "No more fonts");
941 			}
942 			bStartFont = FALSE;
943 			bStartFontNext = FALSE;
944 		}
945 
946 		if (bStartStyle || (bStartStyleNext && ulChar != PAR_END)) {
947 			bFirstLine = TRUE;
948 			/* Begin of a style found */
949 			if (bStartStyle) {
950 				/* bStartStyle takes priority */
951 				fail(pStyleInfo == NULL);
952 				pStyleTmp = pStyleInfo;
953 			} else {
954 				pStyleTmp = &tStyleNext;
955 			}
956 			if (!bIsTableRow) {
957 				vStoreStyle(pDiag, pOutput, pStyleTmp);
958 			}
959 			usIstdNext = pStyleTmp->usIstdNext;
960 			lBeforeIndentation =
961 				lTwips2MilliPoints(pStyleTmp->usBeforeIndent);
962 			lAfterIndentation =
963 				lTwips2MilliPoints(pStyleTmp->usAfterIndent);
964 			lLeftIndentation =
965 				lTwips2MilliPoints(pStyleTmp->sLeftIndent);
966 			lLeftIndentation1 =
967 				lTwips2MilliPoints(pStyleTmp->sLeftIndent1);
968 			lRightIndentation =
969 				lTwips2MilliPoints(pStyleTmp->sRightIndent);
970 			bInList = bStyleImpliesList(pStyleTmp, iWordVersion);
971 			bNoMarks = !bInList || pStyleTmp->bNumPause;
972 			ucNFC = pStyleTmp->ucNFC;
973 			szListChar = pStyleTmp->szListChar;
974 			ucAlignment = pStyleTmp->ucAlignment;
975 			if (bInList && !bWasInList) {
976 				/* Start of a list */
977 				iListSeqNumber++;
978 				vStartOfList(pDiag, ucNFC,
979 						bWasTableRow && !bIsTableRow);
980 			}
981 			if (!bInList && bWasInList) {
982 				/* End of a list */
983 				vEndOfList(pDiag);
984 			}
985 			bWasInList = bInList;
986 			if (bStartStyle) {
987 				pStyleInfo = pGetNextTextStyle(pStyleInfo);
988 				NO_DBG_HEX_C(pStyleInfo != NULL,
989 						pStyleInfo->ulFileOffset);
990 				DBG_MSG_C(pStyleInfo == NULL,
991 						"No more styles");
992 			}
993 			bStartStyle = FALSE;
994 			bStartStyleNext = FALSE;
995 		}
996 
997 		if (bWasEndOfParagraph) {
998 			vStartOfParagraph1(pDiag, lBeforeIndentation);
999 		}
1000 
1001 		if (!bIsTableRow &&
1002 		    lTotalStringWidth(pAnchor) == 0) {
1003 			if (!bNoMarks) {
1004 				usListNumber = usGetListValue(iListSeqNumber,
1005 							iWordVersion,
1006 							pStyleTmp);
1007 			}
1008 			if (bInList && bFirstLine) {
1009 				vStartOfListItem(pDiag, bNoMarks);
1010 			}
1011 			vPutIndentation(pDiag, pAnchor, bNoMarks, bFirstLine,
1012 					usListNumber, ucNFC, szListChar,
1013 					lLeftIndentation, lLeftIndentation1);
1014 			bFirstLine = FALSE;
1015 			/* One number or mark per paragraph will do */
1016 			bNoMarks = TRUE;
1017 		}
1018 
1019 		if (bWasEndOfParagraph) {
1020 			vStartOfParagraph2(pDiag);
1021 			bWasEndOfParagraph = FALSE;
1022 		}
1023 
1024 		switch (ulChar) {
1025 		case PICTURE:
1026 			(void)memset(&tImage, 0, sizeof(tImage));
1027 			eRes = eExamineImage(pFile, ulFileOffsetImage, &tImage);
1028 			switch (eRes) {
1029 			case image_no_information:
1030 				bSuccess = FALSE;
1031 				break;
1032 			case image_minimal_information:
1033 			case image_full_information:
1034 #if 0
1035 				if (bOutputContainsText(pAnchor)) {
1036 					OUTPUT_LINE();
1037 				} else {
1038 					RESET_LINE();
1039 				}
1040 #endif
1041 				bSuccess = bTranslateImage(pDiag, pFile,
1042 					eRes == image_minimal_information,
1043 					ulFileOffsetImage, &tImage);
1044 				break;
1045 			default:
1046 				DBG_DEC(eRes);
1047 				bSuccess = FALSE;
1048 				break;
1049 			}
1050 			if (!bSuccess) {
1051 				vStoreString("[pic]", 5, pOutput);
1052 			}
1053 			break;
1054 		case FOOTNOTE_CHAR:
1055 			uiFootnoteNumber++;
1056 			if (tOptions.eConversionType == conversion_xml) {
1057 				vStoreCharacter((ULONG)FOOTNOTE_OR_ENDNOTE,
1058 								pOutput);
1059 				break;
1060 			}
1061 			vStoreCharacter((ULONG)'[', pOutput);
1062 			vStoreNumberAsDecimal(uiFootnoteNumber, pOutput);
1063 			vStoreCharacter((ULONG)']', pOutput);
1064 			break;
1065 		case ENDNOTE_CHAR:
1066 			uiEndnoteNumber++;
1067 			vStoreCharacter((ULONG)'[', pOutput);
1068 			vStoreNumberAsRoman(uiEndnoteNumber, pOutput);
1069 			vStoreCharacter((ULONG)']', pOutput);
1070 			break;
1071 		case UNKNOWN_NOTE_CHAR:
1072 			vStoreString("[?]", 3, pOutput);
1073 			break;
1074 		case PAR_END:
1075 			if (bIsTableRow) {
1076 				vStoreCharacter((ULONG)'\n', pOutput);
1077 				break;
1078 			}
1079 			if (bOutputContainsText(pAnchor)) {
1080 				OUTPUT_LINE();
1081 			} else {
1082 				vMove2NextLine(pDiag,
1083 					pOutput->tFontRef, pOutput->usFontSize);
1084 				RESET_LINE();
1085 			}
1086 			vEndOfParagraph(pDiag,
1087 					pOutput->tFontRef,
1088 					pOutput->usFontSize,
1089 					lAfterIndentation);
1090 			bWasEndOfParagraph = TRUE;
1091 			break;
1092 		case HARD_RETURN:
1093 			if (bIsTableRow) {
1094 				vStoreCharacter((ULONG)'\n', pOutput);
1095 				break;
1096 			}
1097 			if (bOutputContainsText(pAnchor)) {
1098 				OUTPUT_LINE();
1099 			} else {
1100 				vMove2NextLine(pDiag,
1101 					pOutput->tFontRef, pOutput->usFontSize);
1102 				RESET_LINE();
1103 			}
1104 			break;
1105 		case PAGE_BREAK:
1106 		case COLUMN_FEED:
1107 			pSection = pSectionNext;
1108 			break;
1109 		case TABLE_SEPARATOR:
1110 			if (bIsTableRow) {
1111 				vStoreCharacter(ulChar, pOutput);
1112 				break;
1113 			}
1114 			vStoreCharacter((ULONG)' ', pOutput);
1115 			vStoreCharacter((ULONG)TABLE_SEPARATOR_CHAR, pOutput);
1116 			break;
1117 		case TAB:
1118 			if (bIsTableRow ||
1119 			    tOptions.eConversionType == conversion_xml) {
1120 				vStoreCharacter((ULONG)' ', pOutput);
1121 				break;
1122 			}
1123 			if (tOptions.iParagraphBreak == 0 &&
1124 			    (tOptions.eConversionType == conversion_text ||
1125 			     tOptions.eConversionType == conversion_fmt_text)) {
1126 				/* No logical lines, so no tab expansion */
1127 				vStoreCharacter(TAB, pOutput);
1128 				break;
1129 			}
1130 			lHalfSpaceWidth = (lComputeSpaceWidth(
1131 					pOutput->tFontRef,
1132 					pOutput->usFontSize) + 1) / 2;
1133 			lTmp = lTotalStringWidth(pAnchor);
1134 			lTmp += lDrawUnits2MilliPoints(pDiag->lXleft);
1135 			lTmp /= lDefaultTabWidth;
1136 			do {
1137 				vStoreCharacter((ULONG)FILLER_CHAR, pOutput);
1138 				lWidthCurr = lTotalStringWidth(pAnchor);
1139 				lWidthCurr +=
1140 					lDrawUnits2MilliPoints(pDiag->lXleft);
1141 			} while (lTmp == lWidthCurr / lDefaultTabWidth &&
1142 				 lWidthCurr < lWidthMax + lRightIndentation);
1143 			break;
1144 		default:
1145 			if (bHiddenText && tOptions.bHideHiddenText) {
1146 				continue;
1147 			}
1148 			if (bMarkDelText && tOptions.bRemoveRemovedText) {
1149 				continue;
1150 			}
1151 			if (ulChar == UNICODE_ELLIPSIS &&
1152 			    tOptions.eEncoding != encoding_utf_8) {
1153 				vStoreString("...", 3, pOutput);
1154 			} else {
1155 				if (bAllCapitals) {
1156 					ulChar = ulToUpper(ulChar);
1157 				}
1158 				vStoreCharacter(ulChar, pOutput);
1159 			}
1160 			break;
1161 		}
1162 
1163 		if (bWasTableRow && !bIsTableRow) {
1164 			/* End of a table */
1165 			vEndOfTable(pDiag);
1166 			/* Resume normal font */
1167 			NO_DBG_MSG("End of table font");
1168 			vCloseFont();
1169 			bTableFontClosed = TRUE;
1170 			pOutput->ucFontColor = ucFontColor;
1171 			pOutput->usFontStyle = usFontStyle;
1172 			pOutput->usFontSize = usFontSize;
1173 			pOutput->tFontRef = tOpenFont(
1174 					ucFontNumber, usFontStyle, usFontSize);
1175 		}
1176 		bWasTableRow = bIsTableRow;
1177 
1178 		if (bIsTableRow) {
1179 			fail(pAnchor != pOutput);
1180 			if (!bEndRowNorm && !bEndRowFast) {
1181 				continue;
1182 			}
1183 			/* End of a table row */
1184 			if (bEndRowNorm) {
1185 				fail(pRowInfo == NULL);
1186 				vTableRow2Window(pDiag, pAnchor, pRowInfo,
1187 						tOptions.eConversionType,
1188 						tOptions.iParagraphBreak);
1189 			} else {
1190 				fail(!bEndRowFast);
1191 			}
1192 			/* Reset */
1193 			pAnchor = pStartNewOutput(pAnchor, NULL);
1194 			pOutput = pAnchor;
1195 			if (bEndRowNorm) {
1196 				pRowInfo = pGetNextRowInfoListItem();
1197 			}
1198 			bIsTableRow = FALSE;
1199 			bEndRowNorm = FALSE;
1200 			bEndRowFast = FALSE;
1201 			NO_DBG_HEX_C(pRowInfo != NULL,
1202 						pRowInfo->ulFileOffsetStart);
1203 			NO_DBG_HEX_C(pRowInfo != NULL,
1204 						pRowInfo->ulFileOffsetEnd);
1205 			continue;
1206 		}
1207 		lWidthCurr = lTotalStringWidth(pAnchor);
1208 		lWidthCurr += lDrawUnits2MilliPoints(pDiag->lXleft);
1209 		if (lWidthCurr < lWidthMax + lRightIndentation) {
1210 			continue;
1211 		}
1212 		pLeftOver = pSplitList(pAnchor);
1213 		vJustify2Window(pDiag, pAnchor,
1214 				lWidthMax, lRightIndentation, ucAlignment);
1215 		pAnchor = pStartNewOutput(pAnchor, pLeftOver);
1216 		for (pOutput = pAnchor;
1217 		     pOutput->pNext != NULL;
1218 		     pOutput = pOutput->pNext)
1219 			;	/* EMPTY */
1220 		fail(pOutput == NULL);
1221 		if (lTotalStringWidth(pAnchor) > 0) {
1222 			vSetLeftIndentation(pDiag, lLeftIndentation);
1223 		}
1224 	}
1225 
1226 	pAnchor = pStartNewOutput(pAnchor, NULL);
1227 	pAnchor->szStorage = xfree(pAnchor->szStorage);
1228 	pAnchor = xfree(pAnchor);
1229 	vCloseFont();
1230 	vFreeDocument();
1231 	Hourglass_Off();
1232 	return TRUE;
1233 } /* end of bWordDecryptor */
1234 
1235 /*
1236  * lLastStringWidth - compute the width of the last part of the output string
1237  */
1238 static long
lLastStringWidth(const output_type * pAnchor)1239 lLastStringWidth(const output_type *pAnchor)
1240 {
1241 	const output_type	*pCurr, *pStart;
1242 
1243 	pStart = NULL;
1244 	for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
1245 		if (pCurr->tNextFree == 1 &&
1246 		    (pCurr->szStorage[0] == PAR_END ||
1247 		     pCurr->szStorage[0] == HARD_RETURN)) {
1248 			/* Found a separator. Start after the separator */
1249 			pStart = pCurr->pNext;
1250 		}
1251 	}
1252 	if (pStart == NULL) {
1253 		/* No separators. Use the whole output string */
1254 		pStart = pAnchor;
1255 	}
1256 	return lTotalStringWidth(pStart);
1257 } /* end of lLastStringWidth */
1258 
1259 /*
1260  * pHdrFtrDecryptor - turn a header/footer list element to something useful
1261  */
1262 output_type *
pHdrFtrDecryptor(FILE * pFile,ULONG ulCharPosStart,ULONG ulCharPosNext)1263 pHdrFtrDecryptor(FILE *pFile, ULONG ulCharPosStart, ULONG ulCharPosNext)
1264 {
1265 	output_type	*pAnchor, *pOutput, *pLeftOver;
1266 	ULONG	ulChar, ulFileOffset, ulCharPos;
1267 	long	lWidthCurr, lWidthMax;
1268 	long	lRightIndentation;
1269 	USHORT	usChar;
1270 	UCHAR	ucAlignment;
1271 	BOOL	bSkip;
1272 
1273 	fail(iWordVersion < 0);
1274 	fail(tOptions.eConversionType == conversion_unknown);
1275 	fail(tOptions.eEncoding == 0);
1276 
1277 	if (ulCharPosStart == ulCharPosNext) {
1278 		/* There are no bytes to decrypt */
1279 		return NULL;
1280 	}
1281 
1282 	lRightIndentation = 0;
1283 	ucAlignment = ALIGNMENT_LEFT;
1284 	bSkip = FALSE;
1285 	lWidthMax = lGetWidthMax(tOptions.iParagraphBreak);
1286 	pAnchor = pStartNewOutput(NULL, NULL);
1287 	pOutput = pAnchor;
1288 	pOutput->tFontRef = tOpenFont(0, FONT_REGULAR, DEFAULT_FONT_SIZE);
1289 	usChar = usToHdrFtrPosition(pFile, ulCharPosStart);
1290 	ulCharPos = ulCharPosStart;
1291 	ulFileOffset = ulCharPos2FileOffset(ulCharPos);
1292 	while (usChar != (USHORT)EOF && ulCharPos != ulCharPosNext) {
1293 		/* Skip embedded characters */
1294 		if (usChar == START_EMBEDDED) {
1295 			bSkip = TRUE;
1296 		} else if (usChar == END_IGNORE || usChar == END_EMBEDDED) {
1297 			bSkip = FALSE;
1298 		}
1299 		/* Translate character */
1300 		if (bSkip || usChar == END_IGNORE || usChar == END_EMBEDDED) {
1301 			ulChar = IGNORE_CHARACTER;
1302 		} else {
1303 			ulChar = ulTranslateCharacters(usChar,
1304 					ulFileOffset,
1305 					iWordVersion,
1306 					tOptions.eConversionType,
1307 					tOptions.eEncoding,
1308 					bOldMacFile);
1309 		}
1310 		/* Process character */
1311 		if (ulChar != IGNORE_CHARACTER) {
1312 			switch (ulChar) {
1313 			case PICTURE:
1314 				vStoreString("[pic]", 5, pOutput);
1315 				break;
1316 			case PAR_END:
1317 			case HARD_RETURN:
1318 			case PAGE_BREAK:
1319 			case COLUMN_FEED:
1320 				/* To the next substring */
1321 				pOutput = pStartNextOutput(pOutput);
1322 				vCloseFont();
1323 				pOutput->tFontRef = tOpenFont(0,
1324 					FONT_REGULAR, DEFAULT_FONT_SIZE);
1325 				/* A substring with just one character */
1326 				if (ulChar == HARD_RETURN) {
1327 					vStoreCharacter(HARD_RETURN, pOutput);
1328 				} else {
1329 					vStoreCharacter(PAR_END, pOutput);
1330 				}
1331 				/* To the next substring */
1332 				pOutput = pStartNextOutput(pOutput);
1333 				vCloseFont();
1334 				pOutput->tFontRef = tOpenFont(0,
1335 					FONT_REGULAR, DEFAULT_FONT_SIZE);
1336 				fail(!bCheckDoubleLinkedList(pAnchor));
1337 				break;
1338 			case TABLE_SEPARATOR:
1339 				vStoreCharacter((ULONG)' ', pOutput);
1340 				vStoreCharacter((ULONG)TABLE_SEPARATOR_CHAR,
1341 							pOutput);
1342 				break;
1343 			case TAB:
1344 				vStoreCharacter((ULONG)FILLER_CHAR, pOutput);
1345 				break;
1346 			default:
1347 				vStoreCharacter(ulChar, pOutput);
1348 				break;
1349 			}
1350 		}
1351 		lWidthCurr = lLastStringWidth(pAnchor);
1352 		if (lWidthCurr >= lWidthMax + lRightIndentation) {
1353 			pLeftOver = pSplitList(pAnchor);
1354 			for (pOutput = pAnchor;
1355 			     pOutput->pNext != NULL;
1356 			     pOutput = pOutput->pNext)
1357 				;	/* EMPTY */
1358 			fail(pOutput == NULL);
1359 			/* To the next substring */
1360 			pOutput = pStartNextOutput(pOutput);
1361 			/* A substring with just one HARD_RETURN */
1362 			vStoreCharacter(HARD_RETURN, pOutput);
1363 			/* Put the leftover piece(s) at the end */
1364 			pOutput->pNext = pLeftOver;
1365 			if (pLeftOver != NULL) {
1366 				pLeftOver->pPrev = pOutput;
1367 			}
1368 			fail(!bCheckDoubleLinkedList(pAnchor));
1369 			for (pOutput = pAnchor;
1370 			     pOutput->pNext != NULL;
1371 			     pOutput = pOutput->pNext)
1372 				;	/* EMPTY */
1373 			fail(pOutput == NULL);
1374 		}
1375 		usChar = usNextChar(pFile, hdrftr_list,
1376 					&ulFileOffset, &ulCharPos, NULL);
1377 	}
1378 	vCloseFont();
1379 	if (bOutputContainsText(pAnchor)) {
1380 		return pAnchor;
1381 	}
1382 	pAnchor = pStartNewOutput(pAnchor, NULL);
1383 	pAnchor->szStorage = xfree(pAnchor->szStorage);
1384 	pAnchor = xfree(pAnchor);
1385 	return NULL;
1386 } /* end of pHdrFtrDecryptor */
1387 
1388 /*
1389  * pFootnoteDecryptor - turn a footnote text list element into text
1390  */
1391 char *
szFootnoteDecryptor(FILE * pFile,ULONG ulCharPosStart,ULONG ulCharPosNext)1392 szFootnoteDecryptor(FILE *pFile, ULONG ulCharPosStart, ULONG ulCharPosNext)
1393 {
1394 	char	*szText;
1395 	ULONG	ulChar, ulFileOffset, ulCharPos;
1396 	USHORT	usChar;
1397 	size_t	tLen, tIndex, tNextFree, tStorageSize;
1398 	char	szResult[6];
1399 	BOOL	bSkip;
1400 
1401 	fail(iWordVersion < 0);
1402 	fail(tOptions.eConversionType == conversion_unknown);
1403 	fail(tOptions.eEncoding == 0);
1404 
1405 	if (ulCharPosStart == ulCharPosNext) {
1406 		/* There are no bytes to decrypt */
1407 		return NULL;
1408 	}
1409 
1410 	if (tOptions.eConversionType != conversion_xml) {
1411 		/* Only implemented for XML output */
1412 		return NULL;
1413 	}
1414 
1415 	bSkip = FALSE;
1416 
1417 	/* Initialise the text buffer */
1418 	tStorageSize = INITIAL_SIZE;
1419 	szText = xmalloc(tStorageSize);
1420 	tNextFree = 0;
1421 	szText[tNextFree] = '\0';
1422 
1423 	/* Goto the start */
1424 	usChar = usToFootnotePosition(pFile, ulCharPosStart);
1425 	ulCharPos = ulCharPosStart;
1426 	ulFileOffset = ulCharPos2FileOffset(ulCharPos);
1427 	/* Skip the unwanted starting characters */
1428 	while (usChar != (USHORT)EOF && ulCharPos != ulCharPosNext &&
1429 	       (usChar == FOOTNOTE_OR_ENDNOTE ||
1430 		usChar == PAR_END ||
1431 		usChar == TAB ||
1432 		usChar == (USHORT)' ')) {
1433 		usChar = usNextChar(pFile, footnote_list,
1434 					&ulFileOffset, &ulCharPos, NULL);
1435 	}
1436 	/* Process the footnote text */
1437 	while (usChar != (USHORT)EOF && ulCharPos != ulCharPosNext) {
1438 		/* Skip embedded characters */
1439 		if (usChar == START_EMBEDDED) {
1440 			bSkip = TRUE;
1441 		} else if (usChar == END_IGNORE || usChar == END_EMBEDDED) {
1442 			bSkip = FALSE;
1443 		}
1444 		/* Translate character */
1445 		if (bSkip ||
1446 		    usChar == END_IGNORE ||
1447 		    usChar == END_EMBEDDED ||
1448 		    usChar == FOOTNOTE_OR_ENDNOTE) {
1449 			ulChar = IGNORE_CHARACTER;
1450 		} else {
1451 			ulChar = ulTranslateCharacters(usChar,
1452 					ulFileOffset,
1453 					iWordVersion,
1454 					tOptions.eConversionType,
1455 					tOptions.eEncoding,
1456 					bOldMacFile);
1457 		}
1458 		/* Process character */
1459 		if (ulChar == PICTURE) {
1460 			tLen = 5;
1461 			strcpy(szResult, "[pic]");
1462 		} else if (ulChar == IGNORE_CHARACTER) {
1463 			tLen = 0;
1464 			szResult[0] = '\0';
1465 		} else {
1466 			switch (ulChar) {
1467 			case PAR_END:
1468 			case HARD_RETURN:
1469 			case PAGE_BREAK:
1470 			case COLUMN_FEED:
1471 				ulChar = (ULONG)PAR_END;
1472 				break;
1473 			case TAB:
1474 				ulChar = (ULONG)' ';
1475 				break;
1476 			default:
1477 				break;
1478 			}
1479 			tLen = tUcs2Utf8(ulChar, szResult, sizeof(szResult));
1480 		}
1481 		/* Add the results to the text */
1482 		if (tNextFree + tLen + 1 > tStorageSize) {
1483 			tStorageSize += EXTENTION_SIZE;
1484 			szText = xrealloc(szText, tStorageSize);
1485 		}
1486 		for (tIndex = 0; tIndex < tLen; tIndex++) {
1487 			szText[tNextFree++] = szResult[tIndex];
1488 		}
1489 		szText[tNextFree] = '\0';
1490 		/* Next character */
1491 		usChar = usNextChar(pFile, footnote_list,
1492 					&ulFileOffset, &ulCharPos, NULL);
1493 	}
1494 	/* Remove redundant spaces */
1495 	while (tNextFree != 0 && szText[tNextFree - 1] == ' ') {
1496 		szText[tNextFree - 1] = '\0';
1497 		tNextFree--;
1498 	}
1499 	if (tNextFree == 0) {
1500 		/* No text */
1501 		szText = xfree(szText);
1502 		return NULL;
1503 	}
1504 	return szText;
1505 } /* end of szFootnoteDecryptor */
1506