xref: /plan9/sys/src/cmd/aux/antiword/notes.c (revision a84536681645e23c630ce4ef2e5c3b284d4c590b)
1 /*
2  * notes.c
3  * Copyright (C) 1998-2003 A.J. van Os; Released under GPL
4  *
5  * Description:
6  * Functions to tell the difference between footnotes and endnotes
7  */
8 
9 #include "antiword.h"
10 
11 /* Variables needed to write the Footnote and Endnote Lists */
12 static ULONG	*aulFootnoteList = NULL;
13 static size_t	tFootnoteListLength = 0;
14 static ULONG	*aulEndnoteList = NULL;
15 static size_t	tEndnoteListLength = 0;
16 
17 
18 /*
19  * Destroy the lists with footnote and endnote information
20  */
21 void
22 vDestroyNotesInfoLists(void)
23 {
24 	DBG_MSG("vDestroyNotesInfoLists");
25 
26 	/* Free the lists and reset all control variables */
27 	aulEndnoteList = xfree(aulEndnoteList);
28 	aulFootnoteList = xfree(aulFootnoteList);
29 	tEndnoteListLength = 0;
30 	tFootnoteListLength = 0;
31 } /* end of vDestroyNotesInfoLists */
32 
33 /*
34  * Build the list with footnote information for Word 6/7 files
35  */
36 static void
37 vGet6FootnotesInfo(FILE *pFile, ULONG ulStartBlock,
38 	const ULONG *aulBBD, size_t tBBDLen,
39 	const UCHAR *aucHeader)
40 {
41 	UCHAR	*aucBuffer;
42 	ULONG	ulFileOffset, ulBeginOfText, ulOffset, ulBeginFootnoteInfo;
43 	size_t	tFootnoteInfoLen;
44 	int	iIndex;
45 
46 	fail(pFile == NULL || aucHeader == NULL);
47 	fail(ulStartBlock > MAX_BLOCKNUMBER && ulStartBlock != END_OF_CHAIN);
48 	fail(aulBBD == NULL);
49 
50 	ulBeginOfText = ulGetLong(0x18, aucHeader);
51 	NO_DBG_HEX(ulBeginOfText);
52 	ulBeginFootnoteInfo = ulGetLong(0x68, aucHeader);
53 	NO_DBG_HEX(ulBeginFootnoteInfo);
54 	tFootnoteInfoLen = (size_t)ulGetLong(0x6c, aucHeader);
55 	NO_DBG_DEC(tFootnoteInfoLen);
56 
57 	if (tFootnoteInfoLen < 10) {
58 		DBG_MSG("No Footnotes in this document");
59 		return;
60 	}
61 
62 	aucBuffer = xmalloc(tFootnoteInfoLen);
63 	if (!bReadBuffer(pFile, ulStartBlock,
64 			aulBBD, tBBDLen, BIG_BLOCK_SIZE,
65 			aucBuffer, ulBeginFootnoteInfo, tFootnoteInfoLen)) {
66 		aucBuffer = xfree(aucBuffer);
67 		return;
68 	}
69 	NO_DBG_PRINT_BLOCK(aucBuffer, tFootnoteInfoLen);
70 
71 	fail(tFootnoteListLength != 0);
72 	tFootnoteListLength = (tFootnoteInfoLen - 4) / 6;
73 	fail(tFootnoteListLength == 0);
74 
75 	fail(aulFootnoteList != NULL);
76 	aulFootnoteList = xcalloc(tFootnoteListLength, sizeof(ULONG));
77 
78 	for (iIndex = 0; iIndex < (int)tFootnoteListLength; iIndex++) {
79 		ulOffset = ulGetLong(iIndex * 4, aucBuffer);
80 		NO_DBG_HEX(ulOffset);
81 		ulFileOffset = ulCharPos2FileOffset(ulBeginOfText + ulOffset);
82 		NO_DBG_HEX(ulFileOffset);
83 		aulFootnoteList[iIndex] = ulFileOffset;
84 	}
85 	aucBuffer = xfree(aucBuffer);
86 } /* end of vGet6FootnotesInfo */
87 
88 /*
89  * Build the list with endnote information for Word 6/7 files
90  */
91 static void
92 vGet6EndnotesInfo(FILE *pFile, ULONG ulStartBlock,
93 	const ULONG *aulBBD, size_t tBBDLen,
94 	const UCHAR *aucHeader)
95 {
96 	UCHAR	*aucBuffer;
97 	ULONG	ulFileOffset, ulBeginOfText, ulOffset, ulBeginEndnoteInfo;
98 	size_t	tEndnoteInfoLen;
99 	int	iIndex;
100 
101 	fail(pFile == NULL || aucHeader == NULL);
102 	fail(ulStartBlock > MAX_BLOCKNUMBER && ulStartBlock != END_OF_CHAIN);
103 	fail(aulBBD == NULL);
104 
105 	ulBeginOfText = ulGetLong(0x18, aucHeader);
106 	NO_DBG_HEX(ulBeginOfText);
107 	ulBeginEndnoteInfo = ulGetLong(0x1d2, aucHeader);
108 	NO_DBG_HEX(ulBeginEndnoteInfo);
109 	tEndnoteInfoLen = (size_t)ulGetLong(0x1d6, aucHeader);
110 	NO_DBG_DEC(tEndnoteInfoLen);
111 
112 	if (tEndnoteInfoLen < 10) {
113 		DBG_MSG("No Endnotes in this document");
114 		return;
115 	}
116 
117 	aucBuffer = xmalloc(tEndnoteInfoLen);
118 	if (!bReadBuffer(pFile, ulStartBlock,
119 			aulBBD, tBBDLen, BIG_BLOCK_SIZE,
120 			aucBuffer, ulBeginEndnoteInfo, tEndnoteInfoLen)) {
121 		aucBuffer = xfree(aucBuffer);
122 		return;
123 	}
124 	NO_DBG_PRINT_BLOCK(aucBuffer, tEndnoteInfoLen);
125 
126 	fail(tEndnoteListLength != 0);
127 	tEndnoteListLength = (tEndnoteInfoLen - 4) / 6;
128 	fail(tEndnoteListLength == 0);
129 
130 	fail(aulEndnoteList != NULL);
131 	aulEndnoteList = xcalloc(tEndnoteListLength, sizeof(ULONG));
132 
133 	for (iIndex = 0; iIndex < (int)tEndnoteListLength; iIndex++) {
134 		ulOffset = ulGetLong(iIndex * 4, aucBuffer);
135 		NO_DBG_HEX(ulOffset);
136 		ulFileOffset = ulCharPos2FileOffset(ulBeginOfText + ulOffset);
137 		NO_DBG_HEX(ulFileOffset);
138 		aulEndnoteList[iIndex] = ulFileOffset;
139 	}
140 	aucBuffer = xfree(aucBuffer);
141 } /* end of vGet6EndnotesInfo */
142 
143 /*
144  * Build the lists note information for Word 6/7 files
145  */
146 static void
147 vGet6NotesInfo(FILE *pFile, ULONG ulStartBlock,
148 	const ULONG *aulBBD, size_t tBBDLen,
149 	const UCHAR *aucHeader)
150 {
151 	vGet6FootnotesInfo(pFile, ulStartBlock,
152 			aulBBD, tBBDLen, aucHeader);
153 	vGet6EndnotesInfo(pFile, ulStartBlock,
154 			aulBBD, tBBDLen, aucHeader);
155 } /* end of vGet6NotesInfo */
156 
157 /*
158  * Build the list with footnote information for Word 8/9/10 files
159  */
160 static void
161 vGet8FootnotesInfo(FILE *pFile, const pps_info_type *pPPS,
162 	const ULONG *aulBBD, size_t tBBDLen,
163 	const ULONG *aulSBD, size_t tSBDLen,
164 	const UCHAR *aucHeader)
165 {
166 	const ULONG	*aulBlockDepot;
167 	UCHAR	*aucBuffer;
168 	ULONG	ulFileOffset, ulBeginOfText, ulOffset, ulBeginFootnoteInfo;
169 	ULONG	ulTableSize, ulTableStartBlock;
170 	size_t	tFootnoteInfoLen, tBlockDepotLen, tBlockSize;
171 	int	iIndex;
172 	USHORT	usDocStatus;
173 
174 	ulBeginOfText = ulGetLong(0x18, aucHeader);
175 	NO_DBG_HEX(ulBeginOfText);
176 	ulBeginFootnoteInfo = ulGetLong(0xaa, aucHeader);
177 	NO_DBG_HEX(ulBeginFootnoteInfo);
178 	tFootnoteInfoLen = (size_t)ulGetLong(0xae, aucHeader);
179 	NO_DBG_DEC(tFootnoteInfoLen);
180 
181 	if (tFootnoteInfoLen < 10) {
182 		DBG_MSG("No Footnotes in this document");
183 		return;
184 	}
185 
186 	/* Use 0Table or 1Table? */
187 	usDocStatus = usGetWord(0x0a, aucHeader);
188 	if (usDocStatus & BIT(9)) {
189 		ulTableStartBlock = pPPS->t1Table.ulSB;
190 		ulTableSize = pPPS->t1Table.ulSize;
191 	} else {
192 		ulTableStartBlock = pPPS->t0Table.ulSB;
193 		ulTableSize = pPPS->t0Table.ulSize;
194 	}
195 	NO_DBG_DEC(ulTableStartBlock);
196 	if (ulTableStartBlock == 0) {
197 		DBG_MSG("No notes information");
198 		return;
199 	}
200 	NO_DBG_HEX(ulTableSize);
201 	if (ulTableSize < MIN_SIZE_FOR_BBD_USE) {
202 	  	/* Use the Small Block Depot */
203 		aulBlockDepot = aulSBD;
204 		tBlockDepotLen = tSBDLen;
205 		tBlockSize = SMALL_BLOCK_SIZE;
206 	} else {
207 	  	/* Use the Big Block Depot */
208 		aulBlockDepot = aulBBD;
209 		tBlockDepotLen = tBBDLen;
210 		tBlockSize = BIG_BLOCK_SIZE;
211 	}
212 	aucBuffer = xmalloc(tFootnoteInfoLen);
213 	if (!bReadBuffer(pFile, ulTableStartBlock,
214 			aulBlockDepot, tBlockDepotLen, tBlockSize,
215 			aucBuffer, ulBeginFootnoteInfo, tFootnoteInfoLen)) {
216 		aucBuffer = xfree(aucBuffer);
217 		return;
218 	}
219 	NO_DBG_PRINT_BLOCK(aucBuffer, tFootnoteInfoLen);
220 
221 	fail(tFootnoteListLength != 0);
222 	tFootnoteListLength = (tFootnoteInfoLen - 4) / 6;
223 	fail(tFootnoteListLength == 0);
224 
225 	fail(aulFootnoteList != NULL);
226 	aulFootnoteList = xcalloc(tFootnoteListLength, sizeof(ULONG));
227 
228 	for (iIndex = 0; iIndex < (int)tFootnoteListLength; iIndex++) {
229 		ulOffset = ulGetLong(iIndex * 4, aucBuffer);
230 		NO_DBG_HEX(ulOffset);
231 		ulFileOffset = ulCharPos2FileOffset(ulBeginOfText + ulOffset);
232 		NO_DBG_HEX(ulFileOffset);
233 		aulFootnoteList[iIndex] = ulFileOffset;
234 	}
235 	aucBuffer = xfree(aucBuffer);
236 } /* end of vGet8FootnotesInfo */
237 
238 /*
239  * Build the list with endnote information for Word 8/9/10 files
240  */
241 static void
242 vGet8EndnotesInfo(FILE *pFile, const pps_info_type *pPPS,
243 	const ULONG *aulBBD, size_t tBBDLen,
244 	const ULONG *aulSBD, size_t tSBDLen,
245 	const UCHAR *aucHeader)
246 {
247 	const ULONG	*aulBlockDepot;
248 	UCHAR	*aucBuffer;
249 	ULONG	ulFileOffset, ulBeginOfText, ulOffset, ulBeginEndnoteInfo;
250 	ULONG	ulTableSize, ulTableStartBlock;
251 	size_t	tEndnoteInfoLen, tBlockDepotLen, tBlockSize;
252 	int	iIndex;
253 	USHORT	usDocStatus;
254 
255 	ulBeginOfText = ulGetLong(0x18, aucHeader);
256 	NO_DBG_HEX(ulBeginOfText);
257 	ulBeginEndnoteInfo = ulGetLong(0x20a, aucHeader);
258 	NO_DBG_HEX(ulBeginEndnoteInfo);
259 	tEndnoteInfoLen = (size_t)ulGetLong(0x20e, aucHeader);
260 	NO_DBG_DEC(tEndnoteInfoLen);
261 
262 	if (tEndnoteInfoLen < 10) {
263 		DBG_MSG("No Endnotes in this document");
264 		return;
265 	}
266 
267 	/* Use 0Table or 1Table? */
268 	usDocStatus = usGetWord(0x0a, aucHeader);
269 	if (usDocStatus & BIT(9)) {
270 		ulTableStartBlock = pPPS->t1Table.ulSB;
271 		ulTableSize = pPPS->t1Table.ulSize;
272 	} else {
273 		ulTableStartBlock = pPPS->t0Table.ulSB;
274 		ulTableSize = pPPS->t0Table.ulSize;
275 	}
276 	NO_DBG_DEC(ulTableStartBlock);
277 	if (ulTableStartBlock == 0) {
278 		DBG_MSG("No notes information");
279 		return;
280 	}
281 	NO_DBG_HEX(ulTableSize);
282 	if (ulTableSize < MIN_SIZE_FOR_BBD_USE) {
283 	  	/* Use the Small Block Depot */
284 		aulBlockDepot = aulSBD;
285 		tBlockDepotLen = tSBDLen;
286 		tBlockSize = SMALL_BLOCK_SIZE;
287 	} else {
288 	  	/* Use the Big Block Depot */
289 		aulBlockDepot = aulBBD;
290 		tBlockDepotLen = tBBDLen;
291 		tBlockSize = BIG_BLOCK_SIZE;
292 	}
293 	aucBuffer = xmalloc(tEndnoteInfoLen);
294 	if (!bReadBuffer(pFile, ulTableStartBlock,
295 			aulBlockDepot, tBlockDepotLen, tBlockSize,
296 			aucBuffer, ulBeginEndnoteInfo, tEndnoteInfoLen)) {
297 		aucBuffer = xfree(aucBuffer);
298 		return;
299 	}
300 	NO_DBG_PRINT_BLOCK(aucBuffer, tEndnoteInfoLen);
301 
302 	fail(tEndnoteListLength != 0);
303 	tEndnoteListLength = (tEndnoteInfoLen - 4) / 6;
304 	fail(tEndnoteListLength == 0);
305 
306 	fail(aulEndnoteList != NULL);
307 	aulEndnoteList = xcalloc(tEndnoteListLength, sizeof(ULONG));
308 
309 	for (iIndex = 0; iIndex < (int)tEndnoteListLength; iIndex++) {
310 		ulOffset = ulGetLong(iIndex * 4, aucBuffer);
311 		NO_DBG_HEX(ulOffset);
312 		ulFileOffset = ulCharPos2FileOffset(ulBeginOfText + ulOffset);
313 		NO_DBG_HEX(ulFileOffset);
314 		aulEndnoteList[iIndex] = ulFileOffset;
315 	}
316 	aucBuffer = xfree(aucBuffer);
317 } /* end of vGet8EndnotesInfo */
318 
319 /*
320  * Build the lists with footnote and endnote information for Word 8/9/10 files
321  */
322 static void
323 vGet8NotesInfo(FILE *pFile, const pps_info_type *pPPS,
324 	const ULONG *aulBBD, size_t tBBDLen,
325 	const ULONG *aulSBD, size_t tSBDLen,
326 	const UCHAR *aucHeader)
327 {
328 	vGet8FootnotesInfo(pFile, pPPS,
329 			aulBBD, tBBDLen, aulSBD, tSBDLen, aucHeader);
330 	vGet8EndnotesInfo(pFile, pPPS,
331 			aulBBD, tBBDLen, aulSBD, tSBDLen, aucHeader);
332 } /* end of vGet8NotesInfo */
333 
334 /*
335  * Build the lists with footnote and endnote information
336  */
337 void
338 vGetNotesInfo(FILE *pFile, const pps_info_type *pPPS,
339 	const ULONG *aulBBD, size_t tBBDLen,
340 	const ULONG *aulSBD, size_t tSBDLen,
341 	const UCHAR *aucHeader, int iWordVersion)
342 {
343 	fail(pFile == NULL || pPPS == NULL || aucHeader == NULL);
344 	fail(iWordVersion < 6 || iWordVersion > 8);
345 	fail(aulBBD == NULL || aulSBD == NULL);
346 
347 	switch (iWordVersion) {
348 	case 6:
349 	case 7:
350 		vGet6NotesInfo(pFile, pPPS->tWordDocument.ulSB,
351 			aulBBD, tBBDLen, aucHeader);
352 		break;
353 	case 8:
354 		vGet8NotesInfo(pFile, pPPS,
355 			aulBBD, tBBDLen, aulSBD, tSBDLen, aucHeader);
356 		break;
357 	default:
358 		werr(0, "Sorry, no notes information");
359 		break;
360 	}
361 } /* end of vGetNotesInfo */
362 
363 /*
364  * Get the notetype of the note at the given fileoffset
365  */
366 notetype_enum
367 eGetNotetype(ULONG ulFileOffset)
368 {
369 	size_t	tIndex;
370 
371 	fail(aulFootnoteList == NULL && tFootnoteListLength != 0);
372 	fail(aulEndnoteList == NULL && tEndnoteListLength != 0);
373 
374 	/* Go for the easy answers first */
375 	if (tFootnoteListLength == 0 && tEndnoteListLength == 0) {
376 		return notetype_is_unknown;
377 	}
378 	if (tEndnoteListLength == 0) {
379 		return notetype_is_footnote;
380 	}
381 	if (tFootnoteListLength == 0) {
382 		return notetype_is_endnote;
383 	}
384 	/* No easy answer, so we search */
385 	for (tIndex = 0; tIndex < tFootnoteListLength; tIndex++) {
386 		if (aulFootnoteList[tIndex] == ulFileOffset) {
387 			return notetype_is_footnote;
388 		}
389 	}
390 	for (tIndex = 0; tIndex < tEndnoteListLength; tIndex++) {
391 		if (aulEndnoteList[tIndex] == ulFileOffset) {
392 			return notetype_is_endnote;
393 		}
394 	}
395 	/* Not found */
396 	return notetype_is_unknown;
397 } /* end of eGetNotetype */
398