xref: /plan9/sys/src/cmd/aux/antiword/stylelist.c (revision 25b329d522281a8cdd35da0dcc08c3fc621059a9)
1 /*
2  * stylelist.c
3  * Copyright (C) 1998-2005 A.J. van Os; Released under GNU GPL
4  *
5  * Description:
6  * Build, read and destroy a list of Word style information
7  */
8 
9 #include <stdlib.h>
10 #include <stddef.h>
11 #include <ctype.h>
12 #include "antiword.h"
13 
14 
15 /*
16  * Private structure to hide the way the information
17  * is stored from the rest of the program
18  */
19 typedef struct style_mem_tag {
20 	style_block_type	tInfo;
21 	ULONG			ulSequenceNumber;
22 	struct style_mem_tag	*pNext;
23 } style_mem_type;
24 
25 /* Variables needed to write the Style Information List */
26 static style_mem_type	*pAnchor = NULL;
27 static style_mem_type	*pStyleLast = NULL;
28 /* The type of conversion */
29 static conversion_type	eConversionType = conversion_unknown;
30 /* The character set encoding */
31 static encoding_type	eEncoding = encoding_neutral;
32 /* Values for efficiency reasons */
33 static const style_mem_type	*pMidPtr = NULL;
34 static BOOL		bMoveMidPtr = FALSE;
35 static BOOL		bInSequence = TRUE;
36 
37 
38 /*
39  * vDestroyStyleInfoList - destroy the Style Information List
40  */
41 void
vDestroyStyleInfoList(void)42 vDestroyStyleInfoList(void)
43 {
44 	style_mem_type	*pCurr, *pNext;
45 
46 	DBG_MSG("vDestroyStyleInfoList");
47 
48 	/* Free the Style Information List */
49 	pCurr = pAnchor;
50 	while (pCurr != NULL) {
51 		pNext = pCurr->pNext;
52 		pCurr = xfree(pCurr);
53 		pCurr = pNext;
54 	}
55 	pAnchor = NULL;
56 	/* Reset all control variables */
57 	pStyleLast = NULL;
58 	pMidPtr = NULL;
59 	bMoveMidPtr = FALSE;
60 	bInSequence = TRUE;
61 } /* end of vDestroyStyleInfoList */
62 
63 /*
64  * vConvertListCharacter - convert the list character
65  */
66 static void
vConvertListCharacter(UCHAR ucNFC,USHORT usListChar,char * szListChar)67 vConvertListCharacter(UCHAR ucNFC, USHORT usListChar, char *szListChar)
68 {
69 	options_type	tOptions;
70 	size_t	tLen;
71 
72 	fail(szListChar == NULL);
73 	fail(szListChar[0] != '\0');
74 
75 	if (usListChar < 0x80 && isprint((int)usListChar)) {
76 		DBG_CHR_C(isalnum((int)usListChar), usListChar);
77 		szListChar[0] = (char)usListChar;
78 		szListChar[1] = '\0';
79 		return;
80 	}
81 
82 	if (ucNFC != LIST_SPECIAL &&
83 	    ucNFC != LIST_SPECIAL2 &&
84 	    ucNFC != LIST_BULLETS) {
85 		szListChar[0] = '.';
86 		szListChar[1] = '\0';
87 		return;
88 	}
89 
90 	if (eConversionType == conversion_unknown ||
91 	    eEncoding == encoding_neutral) {
92 		vGetOptions(&tOptions);
93 		eConversionType = tOptions.eConversionType;
94 		eEncoding = tOptions.eEncoding;
95 	}
96 
97 	switch (usListChar) {
98 	case 0x0000: case 0x00b7: case 0x00fe: case  0xf021: case 0xf043:
99 	case 0xf06c: case 0xf093: case 0xf0b7:
100 		usListChar = 0x2022;	/* BULLET */
101 		break;
102 	case 0x0096: case 0xf02d:
103 		usListChar = 0x2013;	/* EN DASH */
104 		break;
105 	case 0x00a8:
106 		usListChar = 0x2666;	/* BLACK DIAMOND SUIT */
107 		break;
108 	case 0x00de:
109 		usListChar = 0x21d2;	/* RIGHTWARDS DOUBLE ARROW */
110 		break;
111 	case 0x00e0: case 0xf074:
112 		usListChar = 0x25ca;	/* LOZENGE */
113 		break;
114 	case 0x00e1:
115 		usListChar = 0x2329;	/* LEFT ANGLE BRACKET */
116 		break;
117 	case 0xf020:
118 		usListChar = 0x0020;	/* SPACE */
119 		break;
120 	case 0xf041:
121 		usListChar = 0x270c;	/* VICTORY HAND */
122 		break;
123 	case 0xf066:
124 		usListChar = 0x03d5;	/* GREEK PHI SYMBOL */
125 		break;
126 	case 0xf06e:
127 		usListChar = 0x25a0;	/* BLACK SQUARE */
128 		break;
129 	case 0xf06f: case 0xf070: case 0xf0a8:
130 		usListChar = 0x25a1;	/* WHITE SQUARE */
131 		break;
132 	case 0xf071:
133 		usListChar = 0x2751;	/* LOWER RIGHT SHADOWED WHITE SQUARE */
134 		break;
135 	case 0xf075: case 0xf077:
136 		usListChar = 0x25c6;	/* BLACK DIAMOND */
137 		break;
138 	case 0xf076:
139 		usListChar = 0x2756;	/* BLACK DIAMOND MINUS WHITE X */
140 		break;
141 	case 0xf0a7:
142 		usListChar = 0x25aa;	/* BLACK SMALL SQUARE */
143 		break;
144 	case 0xf0d8:
145 		usListChar = 0x27a2;	/* RIGHTWARDS ARROWHEAD */
146 		break;
147 	case 0xf0e5:
148 		usListChar = 0x2199;	/* SOUTH WEST ARROW */
149 		break;
150 	case 0xf0f0:
151 		usListChar = 0x21e8;	/* RIGHTWARDS WHITE ARROW */
152 		break;
153 	case 0xf0fc:
154 		usListChar = 0x2713;	/* CHECK MARK */
155 		break;
156 	default:
157 		if ((usListChar >= 0xe000 && usListChar < 0xf900) ||
158 		    (usListChar < 0x80 && !isprint((int)usListChar))) {
159 			/*
160 			 * All remaining private area characters and all
161 			 * remaining non-printable ASCII characters to their
162 			 * default bullet character
163 			 */
164 			DBG_HEX(usListChar);
165 			DBG_FIXME();
166 			if (ucNFC == LIST_SPECIAL || ucNFC == LIST_SPECIAL2) {
167 				usListChar = 0x2190;	/* LEFTWARDS ARROW */
168 			} else {
169 				usListChar = 0x2022;	/* BULLET */
170 			}
171 		}
172 		break;
173 	}
174 
175 	if (eEncoding == encoding_utf_8) {
176 		tLen = tUcs2Utf8(usListChar, szListChar, 4);
177 		szListChar[tLen] = '\0';
178 	} else {
179 		switch (usListChar) {
180 		case 0x03d5: case 0x25a1: case 0x25c6: case 0x25ca:
181 		case 0x2751:
182 			szListChar[0] = 'o';
183 			break;
184 		case 0x2013: case 0x2500:
185 			szListChar[0] = '-';
186 			break;
187 		case 0x2190: case 0x2199: case 0x2329:
188 			szListChar[0] = '<';
189 			break;
190 		case 0x21d2:
191 			szListChar[0] = '=';
192 			break;
193 		case 0x21e8: case 0x27a2:
194 			szListChar[0] = '>';
195 			break;
196 		case 0x25a0: case 0x25aa:
197 			szListChar[0] = '.';
198 			break;
199 		case 0x2666:
200 			szListChar[0] = OUR_DIAMOND;
201 			break;
202 		case 0x270c:
203 			szListChar[0] = 'x';
204 			break;
205 		case 0x2713:
206 			szListChar[0] = 'V';
207 			break;
208 		case 0x2756:
209 			szListChar[0] = '*';
210 			break;
211 		case 0x2022:
212 		default:
213 			vGetBulletValue(eConversionType, eEncoding,
214 					szListChar, 2);
215 			break;
216 		}
217 		tLen = 1;
218 	}
219 	szListChar[tLen] = '\0';
220 } /* end of vConvertListCharacter */
221 
222 /*
223  * eGetNumType - get the level type from the given level number
224  *
225  * Returns the level type
226  */
227 level_type_enum
eGetNumType(UCHAR ucNumLevel)228 eGetNumType(UCHAR ucNumLevel)
229 {
230 	switch (ucNumLevel) {
231 	case  1: case  2: case  3: case  4: case  5:
232 	case  6: case  7: case  8: case  9:
233 		return level_type_outline;
234 	case 10:
235 		return level_type_numbering;
236 	case 11:
237 		return level_type_sequence;
238 	case 12:
239 		return level_type_pause;
240 	default:
241 		return level_type_none;
242 	}
243 } /* end of eGetNumType */
244 
245 /*
246  * vCorrectStyleValues - correct style values that Antiword can't use
247  */
248 void
vCorrectStyleValues(style_block_type * pStyleBlock)249 vCorrectStyleValues(style_block_type *pStyleBlock)
250 {
251 	if (pStyleBlock->usBeforeIndent > 0x7fff) {
252 		pStyleBlock->usBeforeIndent = 0;
253 	} else if (pStyleBlock->usBeforeIndent > 2160) {
254 		/* 2160 twips = 1.5 inches or 38.1 mm */
255 		DBG_DEC(pStyleBlock->usBeforeIndent);
256 		pStyleBlock->usBeforeIndent = 2160;
257 	}
258 	if (pStyleBlock->usIstd >= 1 &&
259 	    pStyleBlock->usIstd <= 9 &&
260 	    pStyleBlock->usBeforeIndent < HEADING_GAP) {
261 		NO_DBG_DEC(pStyleBlock->usBeforeIndent);
262 		pStyleBlock->usBeforeIndent = HEADING_GAP;
263 	}
264 
265 	if (pStyleBlock->usAfterIndent > 0x7fff) {
266 		pStyleBlock->usAfterIndent = 0;
267 	} else if (pStyleBlock->usAfterIndent > 2160) {
268 		/* 2160 twips = 1.5 inches or 38.1 mm */
269 		DBG_DEC(pStyleBlock->usAfterIndent);
270 		pStyleBlock->usAfterIndent = 2160;
271 	}
272 	if (pStyleBlock->usIstd >= 1 &&
273 	    pStyleBlock->usIstd <= 9 &&
274 	    pStyleBlock->usAfterIndent < HEADING_GAP) {
275 		NO_DBG_DEC(pStyleBlock->usAfterIndent);
276 		pStyleBlock->usAfterIndent = HEADING_GAP;
277 	}
278 
279 	if (pStyleBlock->sLeftIndent < 0) {
280 		pStyleBlock->sLeftIndent = 0;
281 	}
282 	if (pStyleBlock->sRightIndent > 0) {
283 		pStyleBlock->sRightIndent = 0;
284 	}
285 	vConvertListCharacter(pStyleBlock->ucNFC,
286 			pStyleBlock->usListChar,
287 			pStyleBlock->szListChar);
288 } /* end of vCorrectStyleValues */
289 
290 /*
291  * vAdd2StyleInfoList - Add an element to the Style Information List
292  */
293 void
vAdd2StyleInfoList(const style_block_type * pStyleBlock)294 vAdd2StyleInfoList(const style_block_type *pStyleBlock)
295 {
296 	style_mem_type	*pListMember;
297 
298 	fail(pStyleBlock == NULL);
299 
300 	NO_DBG_MSG("bAdd2StyleInfoList");
301 
302 	if (pStyleBlock->ulFileOffset == FC_INVALID) {
303 		NO_DBG_DEC(pStyleBlock->usIstd);
304 		return;
305 	}
306 
307 	NO_DBG_HEX(pStyleBlock->ulFileOffset);
308 	NO_DBG_DEC_C(pStyleBlock->sLeftIndent != 0,
309 					pStyleBlock->sLeftIndent);
310 	NO_DBG_DEC_C(pStyleBlock->sRightIndent != 0,
311 					pStyleBlock->sRightIndent);
312 	NO_DBG_DEC_C(pStyleBlock->bNumPause, pStyleBlock->bNumPause);
313 	NO_DBG_DEC_C(pStyleBlock->usIstd != 0, pStyleBlock->usIstd);
314 	NO_DBG_DEC_C(pStyleBlock->usStartAt != 1, pStyleBlock->usStartAt);
315 	NO_DBG_DEC_C(pStyleBlock->usAfterIndent != 0,
316 					pStyleBlock->usAfterIndent);
317 	NO_DBG_DEC_C(pStyleBlock->ucAlignment != 0, pStyleBlock->ucAlignment);
318 	NO_DBG_DEC(pStyleBlock->ucNFC);
319 	NO_DBG_HEX(pStyleBlock->usListChar);
320 
321 	if (pStyleLast != NULL &&
322 	    pStyleLast->tInfo.ulFileOffset == pStyleBlock->ulFileOffset) {
323 		/*
324 		 * If two consecutive styles share the same
325 		 * offset, remember only the last style
326 		 */
327 		fail(pStyleLast->pNext != NULL);
328 		pStyleLast->tInfo = *pStyleBlock;
329 		/* Correct the values where needed */
330 		vCorrectStyleValues(&pStyleLast->tInfo);
331 		return;
332 	}
333 
334 	/* Create list member */
335 	pListMember = xmalloc(sizeof(style_mem_type));
336 	/* Fill the list member */
337 	pListMember->tInfo = *pStyleBlock;
338 	pListMember->pNext = NULL;
339 	/* Add the sequence number */
340 	pListMember->ulSequenceNumber =
341 			ulGetSeqNumber(pListMember->tInfo.ulFileOffset);
342 	/* Correct the values where needed */
343 	vCorrectStyleValues(&pListMember->tInfo);
344 	/* Add the new member to the list */
345 	if (pAnchor == NULL) {
346 		pAnchor = pListMember;
347 		/* For efficiency */
348 		pMidPtr = pAnchor;
349 		bMoveMidPtr = FALSE;
350 		bInSequence = TRUE;
351 	} else {
352 		fail(pStyleLast == NULL);
353 		pStyleLast->pNext = pListMember;
354 		/* For efficiency */
355 		if (bMoveMidPtr) {
356 			pMidPtr = pMidPtr->pNext;
357 			bMoveMidPtr = FALSE;
358 		} else {
359 			bMoveMidPtr = TRUE;
360 		}
361 		if (bInSequence) {
362 			bInSequence = pListMember->ulSequenceNumber >
363 					pStyleLast->ulSequenceNumber;
364 		}
365 	}
366 	pStyleLast = pListMember;
367 } /* end of vAdd2StyleInfoList */
368 
369 /*
370  * Get the record that follows the given recored in the Style Information List
371  */
372 const style_block_type *
pGetNextStyleInfoListItem(const style_block_type * pCurr)373 pGetNextStyleInfoListItem(const style_block_type *pCurr)
374 {
375 	const style_mem_type	*pRecord;
376 	size_t	tOffset;
377 
378 	if (pCurr == NULL) {
379 		if (pAnchor == NULL) {
380 			/* There are no records */
381 			return NULL;
382 		}
383 		/* The first record is the only one without a predecessor */
384 		return &pAnchor->tInfo;
385 	}
386 	tOffset = offsetof(style_mem_type, tInfo);
387 	/* Many casts to prevent alignment warnings */
388 	pRecord = (style_mem_type *)(void *)((char *)pCurr - tOffset);
389 	fail(pCurr != &pRecord->tInfo);
390 	if (pRecord->pNext == NULL) {
391 		/* The last record has no successor */
392 		return NULL;
393 	}
394 	return &pRecord->pNext->tInfo;
395 } /* end of pGetNextStyleInfoListItem */
396 
397 /*
398  * Get the next text style
399  */
400 const style_block_type *
pGetNextTextStyle(const style_block_type * pCurr)401 pGetNextTextStyle(const style_block_type *pCurr)
402 {
403 	const style_block_type	*pRecord;
404 
405 	pRecord = pCurr;
406 	do {
407 		pRecord = pGetNextStyleInfoListItem(pRecord);
408 	} while (pRecord != NULL &&
409 		 (pRecord->eListID == hdrftr_list ||
410 		  pRecord->eListID == macro_list ||
411 		  pRecord->eListID == annotation_list));
412 	return pRecord;
413 } /* end of pGetNextTextStyle */
414 
415 /*
416  * usGetIstd - get the istd that belongs to the given file offset
417  */
418 USHORT
usGetIstd(ULONG ulFileOffset)419 usGetIstd(ULONG ulFileOffset)
420 {
421 	const style_mem_type	*pCurr, *pBest, *pStart;
422 	ULONG	ulSeq, ulBest;
423 
424 	ulSeq = ulGetSeqNumber(ulFileOffset);
425 	if (ulSeq == FC_INVALID) {
426 		return ISTD_NORMAL;
427 	}
428 	NO_DBG_HEX(ulFileOffset);
429 	NO_DBG_DEC(ulSeq);
430 
431 	if (bInSequence &&
432 	    pMidPtr != NULL &&
433 	    ulSeq > pMidPtr->ulSequenceNumber) {
434 		/* The istd is in the second half of the chained list */
435 		pStart = pMidPtr;
436 	} else {
437 		pStart = pAnchor;
438 	}
439 
440 	pBest = NULL;
441 	ulBest = 0;
442 	for (pCurr = pStart; pCurr != NULL; pCurr = pCurr->pNext) {
443 		if (pCurr->ulSequenceNumber != FC_INVALID &&
444 		    (pBest == NULL || pCurr->ulSequenceNumber > ulBest) &&
445 		    pCurr->ulSequenceNumber <= ulSeq) {
446 			pBest = pCurr;
447 			ulBest = pCurr->ulSequenceNumber;
448 		}
449 		if (bInSequence && pCurr->ulSequenceNumber > ulSeq) {
450 			break;
451 		}
452 	}
453 	NO_DBG_DEC(ulBest);
454 
455 	if (pBest == NULL) {
456 		return ISTD_NORMAL;
457 	}
458 
459 	NO_DBG_DEC(pBest->tInfo.usIstd);
460 	return pBest->tInfo.usIstd;
461 } /* end of usGetIstd */
462 
463 /*
464  * bStyleImpliesList - does style info implies being part of a list
465  *
466  * Decide whether the style information implies that the given paragraph is
467  * part of a list
468  *
469  * Returns TRUE when the paragraph is part of a list, otherwise FALSE
470  */
471 BOOL
bStyleImpliesList(const style_block_type * pStyle,int iWordVersion)472 bStyleImpliesList(const style_block_type *pStyle, int iWordVersion)
473 {
474 	fail(pStyle == NULL);
475 	fail(iWordVersion < 0);
476 
477 	if (pStyle->usIstd >= 1 && pStyle->usIstd <= 9) {
478 		/* These are heading levels */
479 		return FALSE;
480 	}
481 	if (iWordVersion < 8) {
482 		/* Check for old style lists */
483 		return pStyle->ucNumLevel != 0;
484 	}
485 	/* Check for new style lists */
486 	return pStyle->usListIndex != 0;
487 } /* end of bStyleImpliesList */
488