xref: /plan9/sys/src/cmd/aux/antiword/worddos.c (revision 25b329d522281a8cdd35da0dcc08c3fc621059a9)
1 /*
2  * worddos.c
3  * Copyright (C) 2002-2005 A.J. van Os; Released under GNU GPL
4  *
5  * Description:
6  * Deal with the DOS internals of a MS Word file
7  */
8 
9 #include "antiword.h"
10 
11 
12 /*
13  * bGetDocumentText - make a list of the text blocks of a Word document
14  *
15  * Return TRUE when succesful, otherwise FALSE
16  */
17 static BOOL
bGetDocumentText(FILE * pFile,long lFilesize,const UCHAR * aucHeader)18 bGetDocumentText(FILE *pFile, long lFilesize, const UCHAR *aucHeader)
19 {
20 	text_block_type	tTextBlock;
21 	ULONG	ulTextLen;
22 	BOOL	bFastSaved;
23 	UCHAR	ucDocStatus, ucVersion;
24 
25 	fail(pFile == NULL);
26 	fail(lFilesize < 128);
27 	fail(aucHeader == NULL);
28 
29 	/* Get the status flags from the header */
30 	ucDocStatus = ucGetByte(0x75, aucHeader);
31 	DBG_HEX(ucDocStatus);
32 	bFastSaved = (ucDocStatus & BIT(1)) != 0;
33 	DBG_MSG_C(bFastSaved, "This document is Fast Saved");
34 	ucVersion = ucGetByte(0x74, aucHeader);
35 	DBG_DEC(ucVersion);
36 	DBG_MSG_C(ucVersion == 0, "Written by Word 4.0 or earlier");
37 	DBG_MSG_C(ucVersion == 3, "Word 5.0 format, but not written by Word");
38 	DBG_MSG_C(ucVersion == 4, "Written by Word 5.x");
39 	if (bFastSaved) {
40 		werr(0, "Word for DOS: autosave documents are not supported");
41 		return FALSE;
42 	}
43 
44 	/* Get length information */
45 	ulTextLen = ulGetLong(0x0e, aucHeader);
46 	DBG_HEX(ulTextLen);
47 	ulTextLen -= 128;
48 	DBG_DEC(ulTextLen);
49 	tTextBlock.ulFileOffset = 128;
50 	tTextBlock.ulCharPos = 128;
51 	tTextBlock.ulLength = ulTextLen;
52 	tTextBlock.bUsesUnicode = FALSE;
53 	tTextBlock.usPropMod = IGNORE_PROPMOD;
54 	if (!bAdd2TextBlockList(&tTextBlock)) {
55 		DBG_HEX(tTextBlock.ulFileOffset);
56 		DBG_HEX(tTextBlock.ulCharPos);
57 		DBG_DEC(tTextBlock.ulLength);
58 		DBG_DEC(tTextBlock.bUsesUnicode);
59 		DBG_DEC(tTextBlock.usPropMod);
60 		return FALSE;
61 	}
62 	return TRUE;
63 } /* end of bGetDocumentText */
64 
65 /*
66  * iInitDocumentDOS - initialize an DOS document
67  *
68  * Returns the version of Word that made the document or -1
69  */
70 int
iInitDocumentDOS(FILE * pFile,long lFilesize)71 iInitDocumentDOS(FILE *pFile, long lFilesize)
72 {
73 	int	iWordVersion;
74 	BOOL	bSuccess;
75 	USHORT	usIdent;
76 	UCHAR	aucHeader[128];
77 
78 	fail(pFile == NULL);
79 
80 	if (lFilesize < 128) {
81 		return -1;
82 	}
83 
84 	/* Read the headerblock */
85 	if (!bReadBytes(aucHeader, 128, 0x00, pFile)) {
86 		return -1;
87 	}
88 	/* Get the "magic number" from the header */
89 	usIdent = usGetWord(0x00, aucHeader);
90 	DBG_HEX(usIdent);
91 	fail(usIdent != 0xbe31);	/* Word for DOS */
92 	iWordVersion = iGetVersionNumber(aucHeader);
93 	if (iWordVersion != 0) {
94 		werr(0, "This file is not from 'Word for DOS'.");
95 		return -1;
96 	}
97 	bSuccess = bGetDocumentText(pFile, lFilesize, aucHeader);
98 	if (bSuccess) {
99 		vGetPropertyInfo(pFile, NULL,
100 				NULL, 0, NULL, 0,
101 				aucHeader, iWordVersion);
102 		vSetDefaultTabWidth(pFile, NULL,
103 				NULL, 0, NULL, 0,
104 				aucHeader, iWordVersion);
105 		vGetNotesInfo(pFile, NULL,
106 				NULL, 0, NULL, 0,
107 				aucHeader, iWordVersion);
108 	}
109 	return bSuccess ? iWordVersion : -1;
110 } /* end of iInitDocumentDOS */
111