xref: /plan9/sys/src/cmd/aux/antiword/summary.c (revision 25b329d522281a8cdd35da0dcc08c3fc621059a9)
1 /*
2  * summary.c
3  * Copyright (C) 2002-2005 A.J. van Os; Released under GNU GPL
4  *
5  * Description:
6  * Read the summary information of a Word document
7  */
8 
9 #include <time.h>
10 #include <string.h>
11 #include "antiword.h"
12 
13 #define P_HEADER_SZ		28
14 #define P_SECTIONLIST_SZ	20
15 #define P_LENGTH_SZ		 4
16 #define P_SECTION_MAX_SZ	(2 * P_SECTIONLIST_SZ + P_LENGTH_SZ)
17 #define P_SECTION_SZ(x)		((x) * P_SECTIONLIST_SZ + P_LENGTH_SZ)
18 
19 #define PID_TITLE		 2
20 #define PID_SUBJECT		 3
21 #define PID_AUTHOR		 4
22 #define PID_CREATE_DTM		12
23 #define PID_LASTSAVE_DTM	13
24 #define PID_APPNAME		18
25 
26 #define PIDD_MANAGER		14
27 #define PIDD_COMPANY		15
28 
29 #define VT_LPSTR		30
30 #define VT_FILETIME		64
31 
32 #define TIME_OFFSET_HI		0x019db1de
33 #define TIME_OFFSET_LO		0xd53e8000
34 
35 static char	*szTitle = NULL;
36 static char	*szSubject = NULL;
37 static char	*szAuthor = NULL;
38 static time_t	tCreateDtm = (time_t)-1;
39 static time_t	tLastSaveDtm= (time_t)-1;
40 static char	*szAppName = NULL;
41 static char	*szManager = NULL;
42 static char	*szCompany = NULL;
43 static USHORT	usLid = (USHORT)-1;
44 
45 
46 /*
47  * vDestroySummaryInfo - destroy the summary information
48  */
49 void
vDestroySummaryInfo(void)50 vDestroySummaryInfo(void)
51 {
52 	TRACE_MSG("vDestroySummaryInfo");
53 
54 	szTitle = xfree(szTitle);
55 	szSubject = xfree(szSubject);
56 	szAuthor = xfree(szAuthor);
57 	tCreateDtm = (time_t)-1;
58 	tLastSaveDtm = (time_t)-1;
59 	szAppName = xfree(szAppName);
60 	szManager = xfree(szManager);
61 	szCompany = xfree(szCompany);
62 	usLid = (USHORT)-1;
63 } /* end of vDestroySummaryInfo */
64 
65 /*
66  * tConvertDosDate - convert DOS date format
67  *
68  * returns Unix time_t or -1
69  */
70 static time_t
tConvertDosDate(const char * szDosDate)71 tConvertDosDate(const char *szDosDate)
72 {
73 	struct tm	tTime;
74 	const char	*pcTmp;
75 	time_t		tResult;
76 
77 	memset(&tTime, 0, sizeof(tTime));
78 	pcTmp = szDosDate;
79 	/* Get the month */
80 	if (!isdigit(*pcTmp)) {
81 		return (time_t)-1;
82 	}
83 	tTime.tm_mon = (int)(*pcTmp - '0');
84 	pcTmp++;
85 	if (isdigit(*pcTmp)) {
86 		tTime.tm_mon *= 10;
87 		tTime.tm_mon += (int)(*pcTmp - '0');
88 		pcTmp++;
89 	}
90 	/* Get the first separater */
91 	if (isalnum(*pcTmp)) {
92 		return (time_t)-1;
93 	}
94 	pcTmp++;
95 	/* Get the day */
96 	if (!isdigit(*pcTmp)) {
97 		return (time_t)-1;
98 	}
99 	tTime.tm_mday = (int)(*pcTmp - '0');
100 	pcTmp++;
101 	if (isdigit(*pcTmp)) {
102 		tTime.tm_mday *= 10;
103 		tTime.tm_mday += (int)(*pcTmp - '0');
104 		pcTmp++;
105 	}
106 	/* Get the second separater */
107 	if (isalnum(*pcTmp)) {
108 		return (time_t)-1;
109 	}
110 	pcTmp++;
111 	/* Get the year */
112 	if (!isdigit(*pcTmp)) {
113 		return (time_t)-1;
114 	}
115 	tTime.tm_year = (int)(*pcTmp - '0');
116 	pcTmp++;
117 	if (isdigit(*pcTmp)) {
118 		tTime.tm_year *= 10;
119 		tTime.tm_year += (int)(*pcTmp - '0');
120 		pcTmp++;
121 	}
122 	/* Check the values */
123 	if (tTime.tm_mon == 0 || tTime.tm_mday == 0 || tTime.tm_mday > 31) {
124 		return (time_t)-1;
125 	}
126 	/* Correct the values */
127 	tTime.tm_mon--;		/* From 01-12 to 00-11 */
128 	if (tTime.tm_year < 80) {
129 		tTime.tm_year += 100;	/* 00 means 2000 is 100 */
130 	}
131 	tTime.tm_isdst = -1;
132 	tResult = mktime(&tTime);
133 	NO_DBG_MSG(ctime(&tResult));
134 	return tResult;
135 } /* end of tConvertDosDate */
136 
137 /*
138  * szLpstr - get a zero terminate string property
139  */
140 static char *
szLpstr(ULONG ulOffset,const UCHAR * aucBuffer)141 szLpstr(ULONG ulOffset, const UCHAR *aucBuffer)
142 {
143 	char	*szStart, *szResult, *szTmp;
144 	size_t	tSize;
145 
146 	tSize = (size_t)ulGetLong(ulOffset + 4, aucBuffer);
147 	NO_DBG_DEC(tSize);
148 	if (tSize == 0) {
149 		return NULL;
150 	}
151 	/* Remove white space from the start of the string */
152 	szStart = (char *)aucBuffer + ulOffset + 8;
153 	NO_DBG_MSG(szStart);
154 	fail(strlen(szStart) >= tSize);
155 	while (isspace(*szStart)) {
156 		szStart++;
157 	}
158 	if (szStart[0] == '\0') {
159 		return NULL;
160 	}
161 	szResult = xstrdup(szStart);
162 	/* Remove white space from the end of the string */
163 	szTmp = szResult + strlen(szResult) - 1;
164 	while (isspace(*szTmp)) {
165 		*szTmp = '\0';
166 		szTmp--;
167 	}
168 	NO_DBG_MSG(szResult);
169 	return szResult;
170 } /* end of szLpstr */
171 
172 /*
173  * tFiletime - get a filetime property
174  */
175 static time_t
tFiletime(ULONG ulOffset,const UCHAR * aucBuffer)176 tFiletime(ULONG ulOffset, const UCHAR *aucBuffer)
177 {
178 	double	dHi, dLo, dTmp;
179 	ULONG	ulHi, ulLo;
180 	time_t	tResult;
181 
182 	ulLo = ulGetLong(ulOffset + 4, aucBuffer);
183 	ulHi = ulGetLong(ulOffset + 8, aucBuffer);
184 	NO_DBG_HEX(ulHi);
185 	NO_DBG_HEX(ulLo);
186 
187 	/* Move the starting point from 01 Jan 1601 to 01 Jan 1970 */
188 	dHi = (double)ulHi - (double)TIME_OFFSET_HI;
189 	dLo = (double)ulLo - (double)TIME_OFFSET_LO;
190 	NO_DBG_FLT(dHi);
191 	NO_DBG_FLT(dLo);
192 
193 	/* Combine the values and divide by 10^7 to get seconds */
194 	dTmp  = dLo / 10000000.0;	/* 10^7 */
195 	dTmp += dHi * 429.4967926;	/* 2^32 / 10^7 */
196 	NO_DBG_FLT(dTmp);
197 
198 	/* Make a time_t */
199 	if (dTmp - 0.5 < TIME_T_MIN || dTmp + 0.5 > TIME_T_MAX) {
200 		return (time_t)-1;
201 	}
202 	tResult = dTmp < 0.0 ? (time_t)(dTmp - 0.5) : (time_t)(dTmp + 0.5);
203 	NO_DBG_MSG(ctime(&tResult));
204 	return tResult;
205 } /* end of tFiletime */
206 
207 /*
208  * vAnalyseSummaryInfo - analyse the summary information
209  */
210 static void
vAnalyseSummaryInfo(const UCHAR * aucBuffer)211 vAnalyseSummaryInfo(const UCHAR *aucBuffer)
212 {
213 	ULONG	ulOffset;
214 	size_t	tIndex, tCount, tPropID, tPropType;
215 
216 	tCount = (size_t)ulGetLong(4, aucBuffer);
217 	DBG_DEC(tCount);
218 	for (tIndex = 0; tIndex < tCount; tIndex++) {
219 		tPropID = (size_t)ulGetLong(8 + tIndex * 8, aucBuffer);
220 		ulOffset = ulGetLong(12 + tIndex * 8, aucBuffer);
221 		NO_DBG_DEC(tPropID);
222 		NO_DBG_HEX(ulOffset);
223 		tPropType = (size_t)ulGetLong(ulOffset, aucBuffer);
224 		NO_DBG_DEC(tPropType);
225 		switch (tPropID) {
226 		case PID_TITLE:
227 			if (tPropType == VT_LPSTR && szTitle == NULL) {
228 				szTitle = szLpstr(ulOffset, aucBuffer);
229 			}
230 			break;
231 		case PID_SUBJECT:
232 			if (tPropType == VT_LPSTR && szSubject == NULL) {
233 				szSubject = szLpstr(ulOffset, aucBuffer);
234 			}
235 			break;
236 		case PID_AUTHOR:
237 			if (tPropType == VT_LPSTR && szAuthor == NULL) {
238 				szAuthor = szLpstr(ulOffset, aucBuffer);
239 			}
240 			break;
241 		case PID_CREATE_DTM:
242 			if (tPropType == VT_FILETIME &&
243 			    tCreateDtm == (time_t)-1) {
244 				tCreateDtm = tFiletime(ulOffset, aucBuffer);
245 			}
246 			break;
247 		case PID_LASTSAVE_DTM:
248 			if (tPropType == VT_FILETIME &&
249 			    tLastSaveDtm == (time_t)-1) {
250 				tLastSaveDtm = tFiletime(ulOffset, aucBuffer);
251 			}
252 			break;
253 		case PID_APPNAME:
254 			if (tPropType == VT_LPSTR && szAppName == NULL) {
255 				szAppName = szLpstr(ulOffset, aucBuffer);
256 			}
257 			break;
258 		default:
259 			break;
260 		}
261 	}
262 } /* end of vAnalyseSummaryInfo */
263 
264 /*
265  * vAnalyseDocumentSummaryInfo - analyse the document summary information
266  */
267 static void
vAnalyseDocumentSummaryInfo(const UCHAR * aucBuffer)268 vAnalyseDocumentSummaryInfo(const UCHAR *aucBuffer)
269 {
270 	ULONG	ulOffset;
271 	size_t	tIndex, tCount, tPropID, tPropType;
272 
273 	tCount = (size_t)ulGetLong(4, aucBuffer);
274 	DBG_DEC(tCount);
275 	for (tIndex = 0; tIndex < tCount; tIndex++) {
276 		tPropID = (size_t)ulGetLong(8 + tIndex * 8, aucBuffer);
277 		ulOffset = ulGetLong(12 + tIndex * 8, aucBuffer);
278 		NO_DBG_DEC(tPropID);
279 		NO_DBG_HEX(ulOffset);
280 		tPropType = (size_t)ulGetLong(ulOffset, aucBuffer);
281 		NO_DBG_DEC(tPropType);
282 		switch (tPropID) {
283 		case PIDD_MANAGER:
284 			if (tPropType == VT_LPSTR && szManager == NULL) {
285 				szManager = szLpstr(ulOffset, aucBuffer);
286 			}
287 			break;
288 		case PIDD_COMPANY:
289 			if (tPropType == VT_LPSTR && szCompany == NULL) {
290 				szCompany = szLpstr(ulOffset, aucBuffer);
291 			}
292 			break;
293 		default:
294 			break;
295 		}
296 	}
297 } /* end of vAnalyseDocumentSummaryInfo */
298 
299 /*
300  * pucAnalyseSummaryInfoHeader-
301  */
302 static UCHAR *
pucAnalyseSummaryInfoHeader(FILE * pFile,ULONG ulStartBlock,ULONG ulSize,const ULONG * aulBBD,size_t tBBDLen,const ULONG * aulSBD,size_t tSBDLen)303 pucAnalyseSummaryInfoHeader(FILE *pFile,
304 	ULONG ulStartBlock, ULONG ulSize,
305 	const ULONG *aulBBD, size_t tBBDLen,
306 	const ULONG *aulSBD, size_t tSBDLen)
307 {
308 	const ULONG	*aulBlockDepot;
309 	UCHAR	*aucBuffer;
310 	size_t	tBlockDepotLen, tBlockSize, tSectionCount, tLength;
311 	ULONG	ulTmp, ulOffset;
312 	USHORT	usLittleEndian, usEmpty, usOS, usVersion;
313 	UCHAR	aucHdr[P_HEADER_SZ], aucSecLst[P_SECTION_MAX_SZ];
314 
315 	if (ulSize < MIN_SIZE_FOR_BBD_USE) {
316 		/* Use the Small Block Depot */
317 		aulBlockDepot = aulSBD;
318 		tBlockDepotLen = tSBDLen;
319 		tBlockSize = SMALL_BLOCK_SIZE;
320 	} else {
321 		/* Use the Big Block Depot */
322 		aulBlockDepot = aulBBD;
323 		tBlockDepotLen = tBBDLen;
324 		tBlockSize = BIG_BLOCK_SIZE;
325 	}
326 
327 	if (tBlockDepotLen == 0) {
328 		DBG_MSG("The Block Depot length is zero");
329 		return NULL;
330 	}
331 
332 	/* Read the Summery Information header */
333 	if (!bReadBuffer(pFile, ulStartBlock,
334 			aulBlockDepot, tBlockDepotLen, tBlockSize,
335 			aucHdr, 0, P_HEADER_SZ)) {
336 		return NULL;
337 	}
338 	NO_DBG_PRINT_BLOCK(aucHdr, P_HEADER_SZ);
339 
340 	/* Analyse the Summery Information header */
341 	usLittleEndian =  usGetWord(0, aucHdr);
342 	if (usLittleEndian != 0xfffe) {
343 		DBG_HEX(usLittleEndian);
344 		DBG_MSG_C(usLittleEndian == 0xfeff, "Big endian");
345 		return NULL;
346 	}
347 	usEmpty =  usGetWord(2, aucHdr);
348 	if (usEmpty != 0x0000) {
349 		DBG_DEC(usEmpty);
350 		return NULL;
351 	}
352 	ulTmp = ulGetLong(4, aucHdr);
353 	DBG_HEX(ulTmp);
354 	usOS = (USHORT)(ulTmp >> 16);
355 	usVersion = (USHORT)(ulTmp & 0xffff);
356 	switch (usOS) {
357 	case 0:
358 		DBG_MSG("Win16");
359 		DBG_HEX(usVersion);
360 		break;
361 	case 1:
362 		DBG_MSG("MacOS");
363 		DBG_HEX(usVersion);
364 		break;
365 	case 2:
366 		DBG_MSG("Win32");
367 		DBG_HEX(usVersion);
368 		break;
369 	default:
370 		DBG_DEC(usOS);
371 		DBG_HEX(usVersion);
372 		break;
373 	}
374 	tSectionCount = (size_t)ulGetLong(24, aucHdr);
375 	DBG_DEC_C(tSectionCount != 1 && tSectionCount != 2, tSectionCount);
376 	if (tSectionCount != 1 && tSectionCount != 2) {
377 		return NULL;
378 	}
379 
380 	/* Read the Summery Information Section Lists */
381 	if (!bReadBuffer(pFile, ulStartBlock,
382 			aulBlockDepot, tBlockDepotLen, tBlockSize,
383 			aucSecLst, P_HEADER_SZ, P_SECTION_SZ(tSectionCount))) {
384 		return NULL;
385 	}
386 	NO_DBG_PRINT_BLOCK(aucSecLst, P_SECTION_SZ(tSectionCount));
387 
388 	ulTmp = ulGetLong(0, aucSecLst);
389 	DBG_HEX(ulTmp);
390 	ulTmp = ulGetLong(4, aucSecLst);
391 	DBG_HEX(ulTmp);
392 	ulTmp = ulGetLong(8, aucSecLst);
393 	DBG_HEX(ulTmp);
394 	ulTmp = ulGetLong(12, aucSecLst);
395 	DBG_HEX(ulTmp);
396 	ulOffset = ulGetLong(16, aucSecLst);
397 	DBG_DEC_C(ulOffset != P_HEADER_SZ + P_SECTIONLIST_SZ &&
398 		ulOffset != P_HEADER_SZ + 2 * P_SECTIONLIST_SZ,
399 		ulOffset);
400 	fail(ulOffset != P_HEADER_SZ + P_SECTIONLIST_SZ &&
401 		ulOffset != P_HEADER_SZ + 2 * P_SECTIONLIST_SZ);
402 	tLength =
403 		(size_t)ulGetLong(tSectionCount * P_SECTIONLIST_SZ, aucSecLst);
404 	NO_DBG_HEX(tLength);
405 	fail(ulOffset + tLength > ulSize);
406 
407 	/* Read the Summery Information */
408 	aucBuffer = xmalloc(tLength);
409 	if (!bReadBuffer(pFile, ulStartBlock,
410 			aulBlockDepot, tBlockDepotLen, tBlockSize,
411 			aucBuffer, ulOffset, tLength)) {
412 		aucBuffer = xfree(aucBuffer);
413 		return NULL;
414 	}
415 	NO_DBG_PRINT_BLOCK(aucBuffer, tLength);
416 	return aucBuffer;
417 } /* end of pucAnalyseSummaryInfoHeader */
418 
419 /*
420  * vSet0SummaryInfo - set summary information from a Word for DOS file
421  */
422 void
vSet0SummaryInfo(FILE * pFile,const UCHAR * aucHeader)423 vSet0SummaryInfo(FILE *pFile, const UCHAR *aucHeader)
424 {
425 	UCHAR	*aucBuffer;
426 	ULONG	ulBeginSumdInfo, ulBeginNextBlock;
427 	size_t	tLen;
428 	USHORT	usCodepage, usOffset;
429 
430 	TRACE_MSG("vSet0SummaryInfo");
431 
432 	fail(pFile == NULL || aucHeader == NULL);
433 
434 	/* First check the header */
435 	usCodepage = usGetWord(0x7e, aucHeader);
436 	DBG_DEC(usCodepage);
437 	switch (usCodepage) {
438 	case 850: usLid = 0x0809; break; /* Latin1 -> British English */
439 	case 862: usLid = 0x040d; break; /* Hebrew */
440 	case 866: usLid = 0x0419; break; /* Russian */
441 	case 0:
442 	case 437:
443 	default: usLid = 0x0409; break; /* ASCII -> American English */
444 	}
445 
446 	/* Second check the summary information block */
447 	ulBeginSumdInfo = 128 * (ULONG)usGetWord(0x1c, aucHeader);
448 	DBG_HEX(ulBeginSumdInfo);
449 	ulBeginNextBlock = 128 * (ULONG)usGetWord(0x6a, aucHeader);
450 	DBG_HEX(ulBeginNextBlock);
451 
452 	if (ulBeginSumdInfo >= ulBeginNextBlock || ulBeginNextBlock == 0) {
453 		/* There is no summary information block */
454 		return;
455 	}
456 	tLen = (size_t)(ulBeginNextBlock - ulBeginSumdInfo);
457 	aucBuffer = xmalloc(tLen);
458 	/* Read the summary information block */
459 	if (!bReadBytes(aucBuffer, tLen, ulBeginSumdInfo, pFile)) {
460 		return;
461 	}
462 	usOffset = usGetWord(0, aucBuffer);
463 	if (aucBuffer[usOffset] != 0) {
464 		NO_DBG_MSG(aucBuffer + usOffset);
465 		szTitle = xstrdup((char *)aucBuffer + usOffset);
466 	}
467 	usOffset = usGetWord(2, aucBuffer);
468 	if (aucBuffer[usOffset] != 0) {
469 		NO_DBG_MSG(aucBuffer + usOffset);
470 		szAuthor = xstrdup((char *)aucBuffer + usOffset);
471 	}
472 	usOffset = usGetWord(12, aucBuffer);
473 	if (aucBuffer[usOffset] != 0) {
474 		NO_DBG_STRN(aucBuffer + usOffset, 8);
475 		tLastSaveDtm = tConvertDosDate((char *)aucBuffer + usOffset);
476 	}
477 	usOffset = usGetWord(14, aucBuffer);
478 	if (aucBuffer[usOffset] != 0) {
479 		NO_DBG_STRN(aucBuffer + usOffset, 8);
480 		tCreateDtm = tConvertDosDate((char *)aucBuffer + usOffset);
481 	}
482 	aucBuffer = xfree(aucBuffer);
483 } /* end of vSet0SummaryInfo */
484 
485 /*
486  * vSet2SummaryInfo - set summary information from a WinWord 1/2 file
487  */
488 void
vSet2SummaryInfo(FILE * pFile,int iWordVersion,const UCHAR * aucHeader)489 vSet2SummaryInfo(FILE *pFile, int iWordVersion, const UCHAR *aucHeader)
490 {
491 	UCHAR	*aucBuffer;
492 	ULONG	ulBeginSumdInfo, ulBeginDocpInfo, ulTmp;
493 	size_t	tSumdInfoLen, tDocpInfoLen, tLen, tCounter, tStart;
494 
495 	TRACE_MSG("vSet2SummaryInfo");
496 
497 	fail(pFile == NULL || aucHeader == NULL);
498 	fail(iWordVersion != 1 && iWordVersion != 2);
499 
500 	/* First check the header */
501 	usLid = usGetWord(0x06, aucHeader); /* Language IDentification */
502 	DBG_HEX(usLid);
503 	if (usLid < 999 && iWordVersion == 1) {
504 		switch (usLid) {
505 		case   1: usLid = 0x0409; break;	/* American English */
506 		case   2: usLid = 0x0c0c; break;	/* Canadian French */
507 		case  31: usLid = 0x0413; break;	/* Dutch */
508 		case  33: usLid = 0x040c; break;	/* French */
509 		case  34: usLid = 0x040a; break;	/* Spanish */
510 		case  36: usLid = 0x040e; break;	/* Hungarian */
511 		case  39: usLid = 0x0410; break;	/* Italian */
512 		case  44: usLid = 0x0809; break;	/* British English */
513 		case  45: usLid = 0x0406; break;	/* Danish */
514 		case  46: usLid = 0x041f; break;	/* Swedish */
515 		case  47: usLid = 0x0414; break;	/* Norwegian */
516 		case  48: usLid = 0x0415; break;	/* Polish */
517 		case  49: usLid = 0x0407; break;	/* German */
518 		case 351: usLid = 0x0816; break;	/* Portuguese */
519 		case 358: usLid = 0x040b; break;	/* Finnish */
520 		default:
521 			DBG_DEC(usLid);
522 			DBG_FIXME();
523 			usLid = 0x0409;		/* American English */
524 			break;
525 		}
526 	}
527 
528 	if (iWordVersion != 2) {
529 		/* Unknown where to find the associated strings */
530 		return;
531 	}
532 
533 	/* Second check the associated strings */
534 	ulBeginSumdInfo = ulGetLong(0x118, aucHeader); /* fcSttbfAssoc */
535 	DBG_HEX(ulBeginSumdInfo);
536 	tSumdInfoLen = (size_t)usGetWord(0x11c, aucHeader); /* cbSttbfAssoc */
537 	DBG_DEC(tSumdInfoLen);
538 
539 	if (tSumdInfoLen == 0) {
540 		/* There is no summary information */
541 		return;
542 	}
543 
544 	aucBuffer = xmalloc(tSumdInfoLen);
545 	if (!bReadBytes(aucBuffer, tSumdInfoLen, ulBeginSumdInfo, pFile)) {
546 		aucBuffer = xfree(aucBuffer);
547 		return;
548 	}
549 	NO_DBG_PRINT_BLOCK(aucBuffer, tSumdInfoLen);
550 	tLen = (size_t)ucGetByte(0, aucBuffer);
551 	DBG_DEC_C(tSumdInfoLen != tLen, tSumdInfoLen);
552 	DBG_DEC_C(tSumdInfoLen != tLen, tLen);
553 	tStart = 1;
554 	for (tCounter = 0; tCounter < 17; tCounter++) {
555 		if (tStart >= tSumdInfoLen) {
556 			break;
557 		}
558 		tLen = (size_t)ucGetByte(tStart, aucBuffer);
559 		if (tLen != 0) {
560 			NO_DBG_DEC(tCounter);
561 			NO_DBG_STRN(aucBuffer + tStart + 1, tLen);
562 			switch (tCounter) {
563 			case 3:
564 				szTitle = xmalloc(tLen + 1);
565 				strncpy(szTitle,
566 					(char *)aucBuffer + tStart + 1, tLen);
567 				szTitle[tLen] = '\0';
568 				break;
569 			case 4:
570 				szSubject = xmalloc(tLen + 1);
571 				strncpy(szSubject,
572 					(char *)aucBuffer + tStart + 1, tLen);
573 				szSubject[tLen] = '\0';
574 				break;
575 			case 7:
576 				szAuthor = xmalloc(tLen + 1);
577 				strncpy(szAuthor,
578 					(char *)aucBuffer + tStart + 1, tLen);
579 				szAuthor[tLen] = '\0';
580 				break;
581 			default:
582 				break;
583 			}
584 		}
585 		tStart += tLen + 1;
586 	}
587 	aucBuffer = xfree(aucBuffer);
588 
589 	/* Third check the document properties */
590 	ulBeginDocpInfo = ulGetLong(0x112, aucHeader); /* fcDop */
591 	DBG_HEX(ulBeginDocpInfo);
592 	tDocpInfoLen = (size_t)usGetWord(0x116, aucHeader); /* cbDop */
593 	DBG_DEC(tDocpInfoLen);
594 	if (tDocpInfoLen < 12) {
595 		return;
596 	}
597 
598 	aucBuffer = xmalloc(tDocpInfoLen);
599 	if (!bReadBytes(aucBuffer, tDocpInfoLen, ulBeginDocpInfo, pFile)) {
600 		aucBuffer = xfree(aucBuffer);
601 		return;
602 	}
603         ulTmp = ulGetLong(0x14, aucBuffer); /* dttmCreated */
604 	tCreateDtm = tConvertDTTM(ulTmp);
605         ulTmp = ulGetLong(0x18, aucBuffer); /* dttmRevised */
606 	tLastSaveDtm = tConvertDTTM(ulTmp);
607 	aucBuffer = xfree(aucBuffer);
608 } /* end of vSet2SummaryInfo */
609 
610 /*
611  * vSetSummaryInfoOLE - set summary information from a Word 6+ file
612  */
613 static void
vSetSummaryInfoOLE(FILE * pFile,const pps_info_type * pPPS,const ULONG * aulBBD,size_t tBBDLen,const ULONG * aulSBD,size_t tSBDLen)614 vSetSummaryInfoOLE(FILE *pFile, const pps_info_type *pPPS,
615 	const ULONG *aulBBD, size_t tBBDLen,
616 	const ULONG *aulSBD, size_t tSBDLen)
617 {
618 	UCHAR	*pucBuffer;
619 
620 	fail(pFile == NULL || pPPS == NULL);
621 	fail(aulBBD == NULL || aulSBD == NULL);
622 
623 	/* Summary Information */
624 	pucBuffer = pucAnalyseSummaryInfoHeader(pFile,
625 		pPPS->tSummaryInfo.ulSB, pPPS->tSummaryInfo.ulSize,
626 		aulBBD, tBBDLen, aulSBD, tSBDLen);
627 	if (pucBuffer != NULL) {
628 		vAnalyseSummaryInfo(pucBuffer);
629 		pucBuffer = xfree(pucBuffer);
630 	}
631 
632 	/* Document Summary Information */
633 	pucBuffer = pucAnalyseSummaryInfoHeader(pFile,
634 		pPPS->tDocSummaryInfo.ulSB, pPPS->tDocSummaryInfo.ulSize,
635 		aulBBD, tBBDLen, aulSBD, tSBDLen);
636 	if (pucBuffer != NULL) {
637 		vAnalyseDocumentSummaryInfo(pucBuffer);
638 		pucBuffer = xfree(pucBuffer);
639 	}
640 } /* end of vSetSummaryInfoOLE */
641 
642 /*
643  * vSet6SummaryInfo - set summary information from a Word 6/7 file
644  */
645 void
vSet6SummaryInfo(FILE * pFile,const pps_info_type * pPPS,const ULONG * aulBBD,size_t tBBDLen,const ULONG * aulSBD,size_t tSBDLen,const UCHAR * aucHeader)646 vSet6SummaryInfo(FILE *pFile, const pps_info_type *pPPS,
647 	const ULONG *aulBBD, size_t tBBDLen,
648 	const ULONG *aulSBD, size_t tSBDLen,
649 	const UCHAR *aucHeader)
650 {
651 	TRACE_MSG("vSet6SummaryInfo");
652 
653 	/* Header Information */
654 	usLid = usGetWord(0x06, aucHeader); /* Language IDentification */
655 	DBG_HEX(usLid);
656 
657 	/* Summery Information */
658 	vSetSummaryInfoOLE(pFile, pPPS, aulBBD, tBBDLen, aulSBD, tSBDLen);
659 } /* end of vSet6SummaryInfo */
660 
661 /*
662  * vSet8SummaryInfo - set summary information a Word 8/9/10 file
663  */
664 void
vSet8SummaryInfo(FILE * pFile,const pps_info_type * pPPS,const ULONG * aulBBD,size_t tBBDLen,const ULONG * aulSBD,size_t tSBDLen,const UCHAR * aucHeader)665 vSet8SummaryInfo(FILE *pFile, const pps_info_type *pPPS,
666 	const ULONG *aulBBD, size_t tBBDLen,
667 	const ULONG *aulSBD, size_t tSBDLen,
668 	const UCHAR *aucHeader)
669 {
670 	USHORT	usTmp;
671 
672 	TRACE_MSG("vSet8SummaryInfo");
673 
674 	/* Header Information */
675 	usTmp = usGetWord(0x0a, aucHeader);
676 	if (usTmp & BIT(14)) {
677 		/* Language IDentification Far East */
678 		usLid = usGetWord(0x3c, aucHeader);
679 	} else {
680 		/* Language IDentification */
681 		usLid = usGetWord(0x06, aucHeader);
682 	}
683 	DBG_HEX(usLid);
684 
685 	/* Summery Information */
686 	vSetSummaryInfoOLE(pFile, pPPS, aulBBD, tBBDLen, aulSBD, tSBDLen);
687 } /* end of vSet8SummaryInfo */
688 
689 /*
690  * szGetTitle - get the title field
691  */
692 const char *
szGetTitle(void)693 szGetTitle(void)
694 {
695 	return szTitle;
696 } /* end of szGetTitle */
697 
698 /*
699  * szGetSubject - get the subject field
700  */
701 const char *
szGetSubject(void)702 szGetSubject(void)
703 {
704 	return szSubject;
705 } /* end of szGetSubject */
706 
707 /*
708  * szGetAuthor - get the author field
709  */
710 const char *
szGetAuthor(void)711 szGetAuthor(void)
712 {
713 	return szAuthor;
714 } /* end of szGetAuthor */
715 
716 /*
717  * szGetLastSaveDtm - get the last save date field
718  */
719 const char *
szGetLastSaveDtm(void)720 szGetLastSaveDtm(void)
721 {
722 	static char	szTime[12];
723 	struct tm	*pTime;
724 
725 	if (tLastSaveDtm == (time_t)-1) {
726 		return NULL;
727 	}
728 	pTime = localtime(&tLastSaveDtm);
729 	if (pTime == NULL) {
730 		return NULL;
731 	}
732 	sprintf(szTime, "%04d-%02d-%02d",
733 		pTime->tm_year + 1900, pTime->tm_mon + 1, pTime->tm_mday);
734 	return szTime;
735 } /* end of szGetLastSaveDtm */
736 
737 /*
738  * szGetModDate - get the last save date field
739  */
740 const char *
szGetModDate(void)741 szGetModDate(void)
742 {
743 	static char	szTime[20];
744 	struct tm	*pTime;
745 
746 	if (tLastSaveDtm == (time_t)-1) {
747 		return NULL;
748 	}
749 	pTime = localtime(&tLastSaveDtm);
750 	if (pTime == NULL) {
751 		return NULL;
752 	}
753 	sprintf(szTime, "D:%04d%02d%02d%02d%02d",
754 		pTime->tm_year + 1900, pTime->tm_mon + 1, pTime->tm_mday,
755 		pTime->tm_hour, pTime->tm_min);
756 	return szTime;
757 } /* end of szGetModDate */
758 
759 /*
760  * szGetCreationDate - get the last save date field
761  */
762 const char *
szGetCreationDate(void)763 szGetCreationDate(void)
764 {
765 	static char	szTime[20];
766 	struct tm	*pTime;
767 
768 	if (tCreateDtm == (time_t)-1) {
769 		return NULL;
770 	}
771 	pTime = localtime(&tCreateDtm);
772 	if (pTime == NULL) {
773 		return NULL;
774 	}
775 	sprintf(szTime, "D:%04d%02d%02d%02d%02d",
776 		pTime->tm_year + 1900, pTime->tm_mon + 1, pTime->tm_mday,
777 		pTime->tm_hour, pTime->tm_min);
778 	return szTime;
779 } /* end of szGetCreationDate */
780 
781 /*
782  * szGetCompany - get the company field
783  */
784 const char *
szGetCompany(void)785 szGetCompany(void)
786 {
787 	return szCompany;
788 } /* end of szGetCompany */
789 
790 /*
791  * szGetLanguage - get de language field
792  */
793 const char *
szGetLanguage(void)794 szGetLanguage(void)
795 {
796 	if (usLid == (USHORT)-1) {
797 		/* No Language IDentification */
798 		return NULL;
799 	}
800 	if (usLid < 999) {
801 		/* This is a Locale, not a Language IDentification */
802 		DBG_DEC(usLid);
803 		return NULL;
804 	}
805 
806 	/* Exceptions to the general rule */
807 	switch (usLid) {
808 	case 0x0404: return "zh_TW"; /* Traditional Chinese */
809 	case 0x0804: return "zh_CN"; /* Simplified Chinese */
810 	case 0x0c04: return "zh_HK"; /* Hong Kong Chinese */
811 	case 0x1004: return "zh_SG"; /* Singapore Chinese */
812 	case 0x0807: return "de_CH"; /* Swiss German */
813 	case 0x0409: return "en_US"; /* American English */
814 	case 0x0809: return "en_GB"; /* British English */
815 	case 0x0c09: return "en_AU"; /* Australian English */
816 	case 0x080a: return "es_MX"; /* Mexican Spanish */
817 	case 0x080c: return "fr_BE"; /* Belgian French */
818 	case 0x0c0c: return "fr_CA"; /* Canadian French */
819 	case 0x100c: return "fr_CH"; /* Swiss French */
820 	case 0x0810: return "it_CH"; /* Swiss Italian */
821 	case 0x0813: return "nl_BE"; /* Belgian Dutch */
822 	case 0x0416: return "pt_BR"; /* Brazilian Portuguese */
823 	case 0x081a:
824 	case 0x0c1a: return "sr";    /* Serbian */
825 	case 0x081d: return "sv_FI"; /* Finland Swedish */
826 	default:
827 		break;
828 	}
829 
830 	/* The general rule */
831 	switch (usLid & 0x00ff) {
832 	case 0x01: return "ar";	/* Arabic */
833 	case 0x02: return "bg";	/* Bulgarian */
834 	case 0x03: return "ca";	/* Catalan */
835 	case 0x04: return "zh";	/* Chinese */
836 	case 0x05: return "cs";	/* Czech */
837 	case 0x06: return "da";	/* Danish */
838 	case 0x07: return "de";	/* German */
839 	case 0x08: return "el";	/* Greek */
840 	case 0x09: return "en";	/* English */
841 	case 0x0a: return "es";	/* Spanish */
842 	case 0x0b: return "fi";	/* Finnish */
843 	case 0x0c: return "fr";	/* French */
844 	case 0x0d: return "he";	/* Hebrew */
845 	case 0x0e: return "hu";	/* Hungarian */
846 	case 0x0f: return "is";	/* Icelandic */
847 	case 0x10: return "it";	/* Italian */
848 	case 0x11: return "ja";	/* Japanese */
849 	case 0x12: return "ko";	/* Korean */
850 	case 0x13: return "nl";	/* Dutch */
851 	case 0x14: return "no";	/* Norwegian */
852 	case 0x15: return "pl";	/* Polish */
853 	case 0x16: return "pt";	/* Portuguese */
854 	case 0x17: return "rm";	/* Rhaeto-Romance */
855 	case 0x18: return "ro";	/* Romanian */
856 	case 0x19: return "ru";	/* Russian */
857 	case 0x1a: return "hr";	/* Croatian */
858 	case 0x1b: return "sk";	/* Slovak */
859 	case 0x1c: return "sq";	/* Albanian */
860 	case 0x1d: return "sv";	/* Swedish */
861 	case 0x1e: return "th";	/* Thai */
862 	case 0x1f: return "tr";	/* Turkish */
863 	case 0x20: return "ur";	/* Urdu */
864 	case 0x21: return "id";	/* Indonesian */
865 	case 0x22: return "uk";	/* Ukrainian */
866 	case 0x23: return "be";	/* Belarusian */
867 	case 0x24: return "sl";	/* Slovenian */
868 	case 0x25: return "et";	/* Estonian */
869 	case 0x26: return "lv";	/* Latvian */
870 	case 0x27: return "lt";	/* Lithuanian */
871 	case 0x29: return "fa";	/* Farsi */
872 	case 0x2a: return "vi";	/* Viet Nam */
873 	case 0x2b: return "hy";	/* Armenian */
874 	case 0x2c: return "az";	/* Azeri */
875 	case 0x2d: return "eu";	/* Basque */
876 	case 0x2f: return "mk";	/* Macedonian */
877 	case 0x36: return "af";	/* Afrikaans */
878 	case 0x37: return "ka";	/* Georgian */
879 	case 0x38: return "fo";	/* Faeroese */
880 	case 0x39: return "hi";	/* Hindi */
881 	case 0x3e: return "ms";	/* Malay */
882 	case 0x3f: return "kk";	/* Kazakh */
883 	default:
884 		DBG_HEX(usLid);
885 		DBG_FIXME();
886 		return NULL;
887 	}
888 } /* end of szGetLanguage */
889