xref: /plan9/sys/src/cmd/aux/antiword/main_u.c (revision 25b329d522281a8cdd35da0dcc08c3fc621059a9)
1 /*
2  * main_u.c
3  *
4  * Released under GPL
5  *
6  * Copyright (C) 1998-2004 A.J. van Os
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License
10  * as published by the Free Software Foundation; either version 2
11  * of the License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
21  *
22  * Description:
23  * The main program of 'antiword' (Unix version)
24  */
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #if defined(__dos)
29 #include <fcntl.h>
30 #include <io.h>
31 #endif /* __dos */
32 #if defined(__CYGWIN__) || defined(__CYGMING__)
33 #  ifdef X_LOCALE
34 #    include <X11/Xlocale.h>
35 #  else
36 #    include <locale.h>
37 #  endif
38 #else
39 #include <locale.h>
40 #endif /* __CYGWIN__ || __CYGMING__ */
41 #if defined(N_PLAT_NLM)
42 #if !defined(_VA_LIST)
43 #include "NW-only/nw_os.h"
44 #endif /* !_VA_LIST */
45 #include "getopt.h"
46 #endif /* N_PLAT_NLM */
47 #include "version.h"
48 #include "antiword.h"
49 
50 /* The name of this program */
51 static const char	*szTask = NULL;
52 
53 
54 static void
vUsage(void)55 vUsage(void)
56 {
57 	fprintf(stderr, "\tName: %s\n", szTask);
58 	fprintf(stderr, "\tPurpose: "PURPOSESTRING"\n");
59 	fprintf(stderr, "\tAuthor: "AUTHORSTRING"\n");
60 	fprintf(stderr, "\tVersion: "VERSIONSTRING);
61 #if defined(__dos)
62 	fprintf(stderr, VERSIONSTRING2);
63 #endif /* __dos */
64 	fprintf(stderr, "\n");
65 	fprintf(stderr, "\tStatus: "STATUSSTRING"\n");
66 	fprintf(stderr,
67 		"\tUsage: %s [switches] wordfile1 [wordfile2 ...]\n", szTask);
68 	fprintf(stderr,
69 		"\tSwitches: [-f|-t|-a papersize|-p papersize|-x dtd]"
70 		"[-m mapping][-w #][-i #][-Ls]\n");
71 	fprintf(stderr, "\t\t-f formatted text output\n");
72 	fprintf(stderr, "\t\t-t text output (default)\n");
73 	fprintf(stderr, "\t\t-a <paper size name> Adobe PDF output\n");
74 	fprintf(stderr, "\t\t-p <paper size name> PostScript output\n");
75 	fprintf(stderr, "\t\t   paper size like: a4, letter or legal\n");
76 	fprintf(stderr, "\t\t-x <dtd> XML output\n");
77 	fprintf(stderr, "\t\t   like: db (DocBook)\n");
78 	fprintf(stderr, "\t\t-m <mapping> character mapping file\n");
79 	fprintf(stderr, "\t\t-w <width> in characters of text output\n");
80 	fprintf(stderr, "\t\t-i <level> image level (PostScript only)\n");
81 	fprintf(stderr, "\t\t-L use landscape mode (PostScript only)\n");
82 	fprintf(stderr, "\t\t-r Show removed text\n");
83 	fprintf(stderr, "\t\t-s Show hidden (by Word) text\n");
84 } /* end of vUsage */
85 
86 /*
87  * pStdin2TmpFile - save stdin in a temporary file
88  *
89  * returns: the pointer to the temporary file or NULL
90  */
91 static FILE *
pStdin2TmpFile(long * lFilesize)92 pStdin2TmpFile(long *lFilesize)
93 {
94 	FILE	*pTmpFile;
95 	size_t	tSize;
96 	BOOL	bFailure;
97 	UCHAR	aucBytes[BUFSIZ];
98 
99 	DBG_MSG("pStdin2TmpFile");
100 
101 	fail(lFilesize == NULL);
102 
103 	/* Open the temporary file */
104 	pTmpFile = tmpfile();
105 	if (pTmpFile == NULL) {
106 		return NULL;
107 	}
108 
109 #if defined(__dos)
110 	/* Stdin must be read as a binary stream */
111 	setmode(fileno(stdin), O_BINARY);
112 #endif /* __dos */
113 
114 	/* Copy stdin to the temporary file */
115 	*lFilesize = 0;
116 	bFailure = TRUE;
117 	for (;;) {
118 		tSize = fread(aucBytes, 1, sizeof(aucBytes), stdin);
119 		if (tSize == 0) {
120 			bFailure = feof(stdin) == 0;
121 			break;
122 		}
123 		if (fwrite(aucBytes, 1, tSize, pTmpFile) != tSize) {
124 			bFailure = TRUE;
125 			break;
126 		}
127 		*lFilesize += (long)tSize;
128 	}
129 
130 #if defined(__dos)
131 	/* Switch stdin back to a text stream */
132 	setmode(fileno(stdin), O_TEXT);
133 #endif /* __dos */
134 
135 	/* Deal with the result of the copy action */
136 	if (bFailure) {
137 		*lFilesize = 0;
138 		(void)fclose(pTmpFile);
139 		return NULL;
140 	}
141 	rewind(pTmpFile);
142 	return pTmpFile;
143 } /* end of pStdin2TmpFile */
144 
145 /*
146  * bProcessFile - process a single file
147  *
148  * returns: TRUE when the given file is a supported Word file, otherwise FALSE
149  */
150 static BOOL
bProcessFile(const char * szFilename)151 bProcessFile(const char *szFilename)
152 {
153 	FILE		*pFile;
154 	diagram_type	*pDiag;
155 	long		lFilesize;
156 	int		iWordVersion;
157 	BOOL		bResult;
158 
159 	fail(szFilename == NULL || szFilename[0] == '\0');
160 
161 	DBG_MSG(szFilename);
162 
163 	if (szFilename[0] == '-' && szFilename[1] == '\0') {
164 		pFile = pStdin2TmpFile(&lFilesize);
165 		if (pFile == NULL) {
166 			werr(0, "I can't save the standard input to a file");
167 			return FALSE;
168 		}
169 	} else {
170 		pFile = fopen(szFilename, "rb");
171 		if (pFile == NULL) {
172 			werr(0, "I can't open '%s' for reading", szFilename);
173 			return FALSE;
174 		}
175 
176 		lFilesize = lGetFilesize(szFilename);
177 		if (lFilesize < 0) {
178 			(void)fclose(pFile);
179 			werr(0, "I can't get the size of '%s'", szFilename);
180 			return FALSE;
181 		}
182 	}
183 
184 	iWordVersion = iGuessVersionNumber(pFile, lFilesize);
185 	if (iWordVersion < 0 || iWordVersion == 3) {
186 		if (bIsRtfFile(pFile)) {
187 			werr(0, "%s is not a Word Document."
188 				" It is probably a Rich Text Format file",
189 				szFilename);
190 		} if (bIsWordPerfectFile(pFile)) {
191 			werr(0, "%s is not a Word Document."
192 				" It is probably a Word Perfect file",
193 				szFilename);
194 		} else {
195 #if defined(__dos)
196 			werr(0, "%s is not a Word Document or the filename"
197 				" is not in the 8+3 format.", szFilename);
198 #else
199 			werr(0, "%s is not a Word Document.", szFilename);
200 #endif /* __dos */
201 		}
202 		(void)fclose(pFile);
203 		return FALSE;
204 	}
205 	/* Reset any reading done during file testing */
206 	rewind(pFile);
207 
208 	pDiag = pCreateDiagram(szTask, szFilename);
209 	if (pDiag == NULL) {
210 		(void)fclose(pFile);
211 		return FALSE;
212 	}
213 
214 	bResult = bWordDecryptor(pFile, lFilesize, pDiag);
215 	vDestroyDiagram(pDiag);
216 
217 	(void)fclose(pFile);
218 	return bResult;
219 } /* end of bProcessFile */
220 
221 int
main(int argc,char ** argv)222 main(int argc, char **argv)
223 {
224 	options_type	tOptions;
225 	const char	*szWordfile;
226 	int	iFirst, iIndex, iGoodCount;
227 	BOOL	bUsage, bMultiple, bUseTXT, bUseXML;
228 
229 	if (argc <= 0) {
230 		return EXIT_FAILURE;
231 	}
232 
233 	szTask = szBasename(argv[0]);
234 
235 	if (argc <= 1) {
236 		iFirst = 1;
237 		bUsage = TRUE;
238 	} else {
239 		iFirst = iReadOptions(argc, argv);
240 		bUsage = iFirst <= 0;
241 	}
242 	if (bUsage) {
243 		vUsage();
244 		return iFirst < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
245 	}
246 
247 #if defined(N_PLAT_NLM) && !defined(_VA_LIST)
248 	nwinit();
249 #endif /* N_PLAT_NLM && !_VA_LIST */
250 
251 	vGetOptions(&tOptions);
252 
253 #if !defined(__dos)
254 	if (is_locale_utf8()) {
255 #if defined(__STDC_ISO_10646__)
256 		/*
257 		 * If the user wants UTF-8 and the envirionment variables
258 		 * support UTF-8, than set the locale accordingly
259 		 */
260 		if (tOptions.eEncoding == encoding_utf_8) {
261 			if (setlocale(LC_CTYPE, "") == NULL) {
262 				werr(1, "Can't set the UTF-8 locale! "
263 					"Check LANG, LC_CTYPE, LC_ALL.");
264 			}
265 			DBG_MSG("The UTF-8 locale has been set");
266 		} else {
267 			(void)setlocale(LC_CTYPE, "C");
268 		}
269 #endif /* __STDC_ISO_10646__ */
270 	} else {
271 		if (setlocale(LC_CTYPE, "") == NULL) {
272 			werr(0, "Can't set the locale! Will use defaults");
273 			(void)setlocale(LC_CTYPE, "C");
274 		}
275 		DBG_MSG("The locale has been set");
276 	}
277 #endif /* !__dos */
278 
279 	bMultiple = argc - iFirst > 1;
280 	bUseTXT = tOptions.eConversionType == conversion_text ||
281 		tOptions.eConversionType == conversion_fmt_text;
282 	bUseXML = tOptions.eConversionType == conversion_xml;
283 	iGoodCount = 0;
284 
285 #if defined(__dos)
286 	if (tOptions.eConversionType == conversion_pdf) {
287 		/* PDF must be written as a binary stream */
288 		setmode(fileno(stdout), O_BINARY);
289 	}
290 #endif /* __dos */
291 
292 	if (bUseXML) {
293 		fprintf(stdout,
294 	"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
295 	"<!DOCTYPE %s PUBLIC \"-//OASIS//DTD DocBook XML V4.1.2//EN\"\n"
296 	"\t\"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd\">\n",
297 		bMultiple ? "set" : "book");
298 		if (bMultiple) {
299 			fprintf(stdout, "<set>\n");
300 		}
301 	}
302 
303 	for (iIndex = iFirst; iIndex < argc; iIndex++) {
304 		if (bMultiple && bUseTXT) {
305 			szWordfile = szBasename(argv[iIndex]);
306 			fprintf(stdout, "::::::::::::::\n");
307 			fprintf(stdout, "%s\n", szWordfile);
308 			fprintf(stdout, "::::::::::::::\n");
309 		}
310 		if (bProcessFile(argv[iIndex])) {
311 			iGoodCount++;
312 		}
313 	}
314 
315 	if (bMultiple && bUseXML) {
316 		fprintf(stdout, "</set>\n");
317 	}
318 
319 	DBG_DEC(iGoodCount);
320 	return iGoodCount <= 0 ? EXIT_FAILURE : EXIT_SUCCESS;
321 } /* end of main */
322