xref: /plan9/sys/src/cmd/aux/antiword/datalist.c (revision f5736e95f14e1485b3a0291fa82d86cca323ab61)
1 /*
2  * datalist.c
3  * Copyright (C) 2000-2002 A.J. van Os; Released under GPL
4  *
5  * Description:
6  * Build, read and destroy a list of Word data blocks
7  */
8 
9 #include <stdlib.h>
10 #include <errno.h>
11 #include "antiword.h"
12 
13 #if defined(__riscos)
14 #define EIO		42
15 #endif /* __riscos */
16 
17 
18 /*
19  * Private structure to hide the way the information
20  * is stored from the rest of the program
21  */
22 typedef struct data_mem_tag {
23 	data_block_type		tInfo;
24 	struct data_mem_tag	*pNext;
25 } data_mem_type;
26 
27 /* Variable to describe the start of the data block list */
28 static data_mem_type	*pAnchor = NULL;
29 /* Variable needed to read the data block list */
30 static data_mem_type	*pBlockLast = NULL;
31 /* Variable needed to read the data block list */
32 static data_mem_type	*pBlockCurrent = NULL;
33 static ULONG	ulBlockOffset = 0;
34 static size_t	tByteNext = 0;
35 /* Last block read */
36 static UCHAR	aucBlock[BIG_BLOCK_SIZE];
37 
38 
39 /*
40  * vDestroyDataBlockList - destroy the data block list
41  */
42 void
vDestroyDataBlockList(void)43 vDestroyDataBlockList(void)
44 {
45 	data_mem_type	*pCurr, *pNext;
46 
47 	DBG_MSG("vDestroyDataBlockList");
48 
49 	pCurr = pAnchor;
50 	while (pCurr != NULL) {
51 		pNext = pCurr->pNext;
52 		pCurr = xfree(pCurr);
53 		pCurr = pNext;
54 	}
55 	pAnchor = NULL;
56 	/* Reset all the control variables */
57 	pBlockLast = NULL;
58 	pBlockCurrent = NULL;
59 	ulBlockOffset = 0;
60 	tByteNext = 0;
61 } /* end of vDestroyDataBlockList */
62 
63 /*
64  * bAdd2DataBlockList - add an element to the data block list
65  *
66  * Returns TRUE when successful, otherwise FALSE
67  */
68 BOOL
bAdd2DataBlockList(const data_block_type * pDataBlock)69 bAdd2DataBlockList(const data_block_type *pDataBlock)
70 {
71 	data_mem_type	*pListMember;
72 
73 	fail(pDataBlock == NULL);
74 	fail(pDataBlock->ulFileOffset == FC_INVALID);
75 	fail(pDataBlock->ulDataPos == CP_INVALID);
76 	fail(pDataBlock->ulLength == 0);
77 
78 	NO_DBG_MSG("bAdd2DataBlockList");
79 	NO_DBG_HEX(pDataBlock->ulFileOffset);
80 	NO_DBG_HEX(pDataBlock->ulDataPos);
81 	NO_DBG_HEX(pDataBlock->ulLength);
82 
83 	if (pDataBlock->ulFileOffset == FC_INVALID ||
84 	    pDataBlock->ulDataPos == CP_INVALID ||
85 	    pDataBlock->ulLength == 0) {
86 		werr(0, "Software (datablock) error");
87 		return FALSE;
88 	}
89 	/* Check for continuous blocks */
90 	if (pBlockLast != NULL &&
91 	    pBlockLast->tInfo.ulFileOffset +
92 	     pBlockLast->tInfo.ulLength == pDataBlock->ulFileOffset &&
93 	    pBlockLast->tInfo.ulDataPos +
94 	     pBlockLast->tInfo.ulLength == pDataBlock->ulDataPos) {
95 		/* These are continous blocks */
96 		pBlockLast->tInfo.ulLength += pDataBlock->ulLength;
97 		return TRUE;
98 	}
99 	/* Make a new block */
100 	pListMember = xmalloc(sizeof(data_mem_type));
101 	/* Add the block to the data list */
102 	pListMember->tInfo = *pDataBlock;
103 	pListMember->pNext = NULL;
104 	if (pAnchor == NULL) {
105 		pAnchor = pListMember;
106 	} else {
107 		fail(pBlockLast == NULL);
108 		pBlockLast->pNext = pListMember;
109 	}
110 	pBlockLast = pListMember;
111 	return TRUE;
112 } /* end of bAdd2DataBlockList */
113 
114 /*
115  * ulGetDataOffset - get the offset in the data block list
116  *
117  * Get the fileoffset the current position in the data block list
118  */
119 ULONG
ulGetDataOffset(FILE * pFile)120 ulGetDataOffset(FILE *pFile)
121 {
122 	return pBlockCurrent->tInfo.ulFileOffset + ulBlockOffset + tByteNext;
123 } /* end of ulGetDataOffset */
124 
125 /*
126  * bSetDataOffset - set the offset in the data block list
127  *
128  * Make the given fileoffset the current position in the data block list
129  */
130 BOOL
bSetDataOffset(FILE * pFile,ULONG ulFileOffset)131 bSetDataOffset(FILE *pFile, ULONG ulFileOffset)
132 {
133 	data_mem_type	*pCurr;
134 	size_t	tReadLen;
135 
136 	DBG_HEX(ulFileOffset);
137 
138 	for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
139 		if (ulFileOffset < pCurr->tInfo.ulFileOffset ||
140 		    ulFileOffset >= pCurr->tInfo.ulFileOffset +
141 		     pCurr->tInfo.ulLength) {
142 			/* The file offset is not in this block */
143 			continue;
144 		}
145 		/* Compute the maximum number of bytes to read */
146 		tReadLen = (size_t)(pCurr->tInfo.ulFileOffset +
147 				pCurr->tInfo.ulLength -
148 				ulFileOffset);
149 		/* Compute the real number of bytes to read */
150 		if (tReadLen > sizeof(aucBlock)) {
151 			tReadLen = sizeof(aucBlock);
152 		}
153 		/* Read the bytes */
154 		if (!bReadBytes(aucBlock, tReadLen, ulFileOffset, pFile)) {
155 			return FALSE;
156 		}
157 		/* Set the control variables */
158 		pBlockCurrent = pCurr;
159 		ulBlockOffset = ulFileOffset - pCurr->tInfo.ulFileOffset;
160 		tByteNext = 0;
161 		return TRUE;
162 	}
163 	return FALSE;
164 } /* end of bSetDataOffset */
165 
166 /*
167  * iNextByte - get the next byte from the data block list
168  */
169 int
iNextByte(FILE * pFile)170 iNextByte(FILE *pFile)
171 {
172 	ULONG	ulReadOff;
173 	size_t	tReadLen;
174 
175 	fail(pBlockCurrent == NULL);
176 
177 	if (tByteNext >= sizeof(aucBlock) ||
178 	    ulBlockOffset + tByteNext >= pBlockCurrent->tInfo.ulLength) {
179 		if (ulBlockOffset + sizeof(aucBlock) <
180 					pBlockCurrent->tInfo.ulLength) {
181 			/* Same block, next part */
182 			ulBlockOffset += sizeof(aucBlock);
183 		} else {
184 			/* Next block, first part */
185 			pBlockCurrent = pBlockCurrent->pNext;
186 			ulBlockOffset = 0;
187 		}
188 		if (pBlockCurrent == NULL) {
189 			/* Past the last part of the last block */
190 			errno = EIO;
191 			return EOF;
192 		}
193 		tReadLen = (size_t)
194 				(pBlockCurrent->tInfo.ulLength - ulBlockOffset);
195 		if (tReadLen > sizeof(aucBlock)) {
196 			tReadLen = sizeof(aucBlock);
197 		}
198 		ulReadOff = pBlockCurrent->tInfo.ulFileOffset + ulBlockOffset;
199 		if (!bReadBytes(aucBlock, tReadLen, ulReadOff, pFile)) {
200 			errno = EIO;
201 			return EOF;
202 		}
203 		tByteNext = 0;
204 	}
205 	return (int)aucBlock[tByteNext++];
206 } /* end of iNextByte */
207 
208 /*
209  * usNextWord - get the next word from the data block list
210  *
211  * Read a two byte value in Little Endian order, that means MSB last
212  *
213  * All return values can be valid so errno is set in case of error
214  */
215 USHORT
usNextWord(FILE * pFile)216 usNextWord(FILE *pFile)
217 {
218 	USHORT	usLSB, usMSB;
219 
220 	usLSB = (USHORT)iNextByte(pFile);
221 	if (usLSB == (USHORT)EOF) {
222 		errno = EIO;
223 		return (USHORT)EOF;
224 	}
225 	usMSB = (USHORT)iNextByte(pFile);
226 	if (usMSB == (USHORT)EOF) {
227 		DBG_MSG("usNextWord: Unexpected EOF");
228 		errno = EIO;
229 		return (USHORT)EOF;
230 	}
231 	return (usMSB << 8) | usLSB;
232 } /* end of usNextWord */
233 
234 /*
235  * ulNextLong - get the next long from the data block list
236  *
237  * Read a four byte value in Little Endian order, that means MSW last
238  *
239  * All return values can be valid so errno is set in case of error
240  */
241 ULONG
ulNextLong(FILE * pFile)242 ulNextLong(FILE *pFile)
243 {
244 	ULONG	ulLSW, ulMSW;
245 
246 	ulLSW = (ULONG)usNextWord(pFile);
247 	if (ulLSW == (ULONG)EOF) {
248 		errno = EIO;
249 		return (ULONG)EOF;
250 	}
251 	ulMSW = (ULONG)usNextWord(pFile);
252 	if (ulMSW == (ULONG)EOF) {
253 		DBG_MSG("ulNextLong: Unexpected EOF");
254 		errno = EIO;
255 		return (ULONG)EOF;
256 	}
257 	return (ulMSW << 16) | ulLSW;
258 } /* end of ulNextLong */
259 
260 /*
261  * usNextWordBE - get the next two byte value
262  *
263  * Read a two byte value in Big Endian order, that means MSB first
264  *
265  * All return values can be valid so errno is set in case of error
266  */
267 USHORT
usNextWordBE(FILE * pFile)268 usNextWordBE(FILE *pFile)
269 {
270 	USHORT usLSB, usMSB;
271 
272 	usMSB = (USHORT)iNextByte(pFile);
273 	if (usMSB == (USHORT)EOF) {
274 		errno = EIO;
275 		return (USHORT)EOF;
276 	}
277 	usLSB = (USHORT)iNextByte(pFile);
278 	if (usLSB == (USHORT)EOF) {
279 		DBG_MSG("usNextWordBE: Unexpected EOF");
280 		errno = EIO;
281 		return (USHORT)EOF;
282 	}
283 	return (usMSB << 8) | usLSB;
284 } /* end of usNextWordBE */
285 
286 /*
287  * ulNextLongBE - get the next four byte value
288  *
289  * Read a four byte value in Big Endian order, that means MSW first
290  *
291  * All return values can be valid so errno is set in case of error
292  */
293 ULONG
ulNextLongBE(FILE * pFile)294 ulNextLongBE(FILE *pFile)
295 {
296 	ULONG	ulLSW, ulMSW;
297 
298 	ulMSW = (ULONG)usNextWordBE(pFile);
299 	if (ulMSW == (ULONG)EOF) {
300 		errno = EIO;
301 		return (ULONG)EOF;
302 	}
303 	ulLSW = (ULONG)usNextWordBE(pFile);
304 	if (ulLSW == (ULONG)EOF) {
305 		DBG_MSG("ulNextLongBE: Unexpected EOF");
306 		errno = EIO;
307 		return (ULONG)EOF;
308 	}
309 	return (ulMSW << 16) | ulLSW;
310 } /* end of ulNextLongBE */
311 
312 /*
313  * tSkipBytes - skip over the given number of bytes
314  *
315  * Returns the number of skipped bytes
316  */
317 size_t
tSkipBytes(FILE * pFile,size_t tToSkip)318 tSkipBytes(FILE *pFile, size_t tToSkip)
319 {
320 	size_t	tToGo, tMaxMove, tMove;
321 
322 	fail(pFile == NULL);
323 	fail(pBlockCurrent == NULL);
324 
325 	tToGo = tToSkip;
326 	while (tToGo != 0) {
327 		/* Goto the end of the current block */
328 		tMaxMove = min(sizeof(aucBlock) - tByteNext,
329 				(size_t)(pBlockCurrent->tInfo.ulLength -
330 				ulBlockOffset - tByteNext));
331 		tMove = min(tMaxMove, tToGo);
332 		tByteNext += tMove;
333 		tToGo -= tMove;
334 		if (tToGo != 0) {
335 			/* Goto the next block */
336 			if (iNextByte(pFile) == EOF) {
337 				return tToSkip - tToGo;
338 			}
339 			tToGo--;
340 		}
341 	}
342 	return tToSkip;
343 } /* end of tSkipBytes */
344 
345 /*
346  * Translate  a data position to an offset in the file.
347  * Logical to physical offset.
348  *
349  * Returns:	FC_INVALID: in case of error
350  *		otherwise: the computed file offset
351  */
352 ULONG
ulDataPos2FileOffset(ULONG ulDataPos)353 ulDataPos2FileOffset(ULONG ulDataPos)
354 {
355 	data_mem_type	*pCurr;
356 
357 	fail(ulDataPos == CP_INVALID);
358 
359 	for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
360 		if (ulDataPos < pCurr->tInfo.ulDataPos ||
361 		    ulDataPos >= pCurr->tInfo.ulDataPos +
362 		     pCurr->tInfo.ulLength) {
363 			/* The data offset is not in this block, try the next */
364 			continue;
365 		}
366 		/* The data offset is in the current block */
367 		return pCurr->tInfo.ulFileOffset +
368 				ulDataPos -
369 				pCurr->tInfo.ulDataPos;
370 	}
371 	/* Passed beyond the end of the list */
372 	DBG_HEX_C(ulDataPos != 0, ulDataPos);
373 	return FC_INVALID;
374 } /* end of ulDataPos2FileOffset */
375