xref: /netbsd-src/external/bsd/file/dist/src/readcdf.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /*	$NetBSD: readcdf.c,v 1.10 2014/06/13 02:08:06 christos Exp $	*/
2 /*-
3  * Copyright (c) 2008 Christos Zoulas
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
16  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
17  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
19  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25  * POSSIBILITY OF SUCH DAMAGE.
26  */
27 #include "file.h"
28 
29 #ifndef lint
30 #if 0
31 FILE_RCSID("@(#)$File: readcdf.c,v 1.44 2014/05/14 23:22:48 christos Exp $")
32 #else
33 __RCSID("$NetBSD: readcdf.c,v 1.10 2014/06/13 02:08:06 christos Exp $");
34 #endif
35 #endif
36 
37 #include <assert.h>
38 #include <stdlib.h>
39 #include <unistd.h>
40 #include <string.h>
41 #include <time.h>
42 #include <ctype.h>
43 #if defined(HAVE_LOCALE_H)
44 #include <locale.h>
45 #endif
46 
47 #include "cdf.h"
48 #include "magic.h"
49 
50 #ifndef __arraycount
51 #define __arraycount(a) (sizeof(a) / sizeof(a[0]))
52 #endif
53 
54 #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
55 
56 static const struct nv {
57 	const char *pattern;
58 	const char *mime;
59 } app2mime[] =  {
60 	{ "Word",			"msword",		},
61 	{ "Excel",			"vnd.ms-excel",		},
62 	{ "Powerpoint",			"vnd.ms-powerpoint",	},
63 	{ "Crystal Reports",		"x-rpt",		},
64 	{ "Advanced Installer",		"vnd.ms-msi",		},
65 	{ "InstallShield",		"vnd.ms-msi",		},
66 	{ "Microsoft Patch Compiler",	"vnd.ms-msi",		},
67 	{ "NAnt",			"vnd.ms-msi",		},
68 	{ "Windows Installer",		"vnd.ms-msi",		},
69 	{ NULL,				NULL,			},
70 }, name2mime[] = {
71 	{ "WordDocument",		"msword",		},
72 	{ "PowerPoint",			"vnd.ms-powerpoint",	},
73 	{ "DigitalSignature",		"vnd.ms-msi",		},
74 	{ NULL,				NULL,			},
75 }, name2desc[] = {
76 	{ "WordDocument",		"Microsoft Office Word",},
77 	{ "PowerPoint",			"Microsoft PowerPoint",	},
78 	{ "DigitalSignature",		"Microsoft Installer",	},
79 	{ NULL,				NULL,			},
80 };
81 
82 static const struct cv {
83 	uint64_t clsid[2];
84 	const char *mime;
85 } clsid2mime[] = {
86 	{
87 		{ 0x00000000000c1084LLU, 0x46000000000000c0LLU  },
88 		"x-msi",
89 	},
90 	{	{ 0,			 0			},
91 		NULL,
92 	},
93 }, clsid2desc[] = {
94 	{
95 		{ 0x00000000000c1084LLU, 0x46000000000000c0LLU  },
96 		"MSI Installer",
97 	},
98 	{	{ 0,			 0			},
99 		NULL,
100 	},
101 };
102 
103 private const char *
104 cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv)
105 {
106 	size_t i;
107 	for (i = 0; cv[i].mime != NULL; i++) {
108 		if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1])
109 			return cv[i].mime;
110 	}
111 	return NULL;
112 }
113 
114 private const char *
115 cdf_app_to_mime(const char *vbuf, const struct nv *nv)
116 {
117 	size_t i;
118 	const char *rv = NULL;
119 	char *old_lc_ctype;
120 
121 	old_lc_ctype = setlocale(LC_CTYPE, NULL);
122 	assert(old_lc_ctype != NULL);
123 	old_lc_ctype = strdup(old_lc_ctype);
124 	assert(old_lc_ctype != NULL);
125 	(void)setlocale(LC_CTYPE, "C");
126 	for (i = 0; nv[i].pattern != NULL; i++)
127 		if (strcasestr(vbuf, nv[i].pattern) != NULL) {
128 			rv = nv[i].mime;
129 			break;
130 		}
131 	(void)setlocale(LC_CTYPE, old_lc_ctype);
132 	free(old_lc_ctype);
133 	return rv;
134 }
135 
136 private int
137 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
138     size_t count, const cdf_directory_t *root_storage)
139 {
140         size_t i;
141         cdf_timestamp_t tp;
142         struct timespec ts;
143         char buf[64];
144         const char *str = NULL;
145         const char *s;
146         int len;
147 
148         if (!NOTMIME(ms) && root_storage)
149 		str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
150 		    clsid2mime);
151 
152         for (i = 0; i < count; i++) {
153                 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
154                 switch (info[i].pi_type) {
155                 case CDF_NULL:
156                         break;
157                 case CDF_SIGNED16:
158                         if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
159                             info[i].pi_s16) == -1)
160                                 return -1;
161                         break;
162                 case CDF_SIGNED32:
163                         if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
164                             info[i].pi_s32) == -1)
165                                 return -1;
166                         break;
167                 case CDF_UNSIGNED32:
168                         if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
169                             info[i].pi_u32) == -1)
170                                 return -1;
171                         break;
172                 case CDF_FLOAT:
173                         if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
174                             info[i].pi_f) == -1)
175                                 return -1;
176                         break;
177                 case CDF_DOUBLE:
178                         if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
179                             info[i].pi_d) == -1)
180                                 return -1;
181                         break;
182                 case CDF_LENGTH32_STRING:
183                 case CDF_LENGTH32_WSTRING:
184                         len = info[i].pi_str.s_len;
185                         if (len > 1) {
186                                 char vbuf[1024];
187                                 size_t j, k = 1;
188 
189                                 if (info[i].pi_type == CDF_LENGTH32_WSTRING)
190                                     k++;
191                                 s = info[i].pi_str.s_buf;
192                                 for (j = 0; j < sizeof(vbuf) && len--; s += k) {
193                                         if (*s == '\0')
194                                                 break;
195                                         if (isprint((unsigned char)*s))
196                                                 vbuf[j++] = *s;
197                                 }
198                                 if (j == sizeof(vbuf))
199                                         --j;
200                                 vbuf[j] = '\0';
201                                 if (NOTMIME(ms)) {
202                                         if (vbuf[0]) {
203                                                 if (file_printf(ms, ", %s: %s",
204                                                     buf, vbuf) == -1)
205                                                         return -1;
206                                         }
207                                 } else if (str == NULL && info[i].pi_id ==
208 				    CDF_PROPERTY_NAME_OF_APPLICATION) {
209 					str = cdf_app_to_mime(vbuf, app2mime);
210 				}
211 			}
212                         break;
213                 case CDF_FILETIME:
214                         tp = info[i].pi_tp;
215                         if (tp != 0) {
216 				char tbuf[64];
217                                 if (tp < 1000000000000000LL) {
218                                         cdf_print_elapsed_time(tbuf,
219                                             sizeof(tbuf), tp);
220                                         if (NOTMIME(ms) && file_printf(ms,
221                                             ", %s: %s", buf, tbuf) == -1)
222                                                 return -1;
223                                 } else {
224                                         char *c, *ec;
225                                         cdf_timestamp_to_timespec(&ts, tp);
226                                         c = cdf_ctime(&ts.tv_sec, tbuf);
227                                         if (c != NULL &&
228 					    (ec = strchr(c, '\n')) != NULL)
229 						*ec = '\0';
230 
231                                         if (NOTMIME(ms) && file_printf(ms,
232                                             ", %s: %s", buf, c) == -1)
233                                                 return -1;
234                                 }
235                         }
236                         break;
237                 case CDF_CLIPBOARD:
238                         break;
239                 default:
240                         return -1;
241                 }
242         }
243         if (!NOTMIME(ms)) {
244 		if (str == NULL)
245 			return 0;
246                 if (file_printf(ms, "application/%s", str) == -1)
247                         return -1;
248         }
249         return 1;
250 }
251 
252 private int
253 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
254     const cdf_stream_t *sst, const cdf_directory_t *root_storage)
255 {
256         cdf_summary_info_header_t si;
257         cdf_property_info_t *info;
258         size_t count;
259         int m;
260 
261         if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1)
262                 return -1;
263 
264         if (NOTMIME(ms)) {
265 		const char *str;
266 
267                 if (file_printf(ms, "Composite Document File V2 Document")
268 		    == -1)
269                         return -1;
270 
271                 if (file_printf(ms, ", %s Endian",
272                     si.si_byte_order == 0xfffe ?  "Little" : "Big") == -1)
273                         return -2;
274                 switch (si.si_os) {
275                 case 2:
276                         if (file_printf(ms, ", Os: Windows, Version %d.%d",
277                             si.si_os_version & 0xff,
278                             (uint32_t)si.si_os_version >> 8) == -1)
279                                 return -2;
280                         break;
281                 case 1:
282                         if (file_printf(ms, ", Os: MacOS, Version %d.%d",
283                             (uint32_t)si.si_os_version >> 8,
284                             si.si_os_version & 0xff) == -1)
285                                 return -2;
286                         break;
287                 default:
288                         if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
289                             si.si_os_version & 0xff,
290                             (uint32_t)si.si_os_version >> 8) == -1)
291                                 return -2;
292                         break;
293                 }
294 		if (root_storage) {
295 			str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
296 			    clsid2desc);
297 			if (str)
298 				if (file_printf(ms, ", %s", str) == -1)
299 					return -2;
300 			}
301 		}
302 
303         m = cdf_file_property_info(ms, info, count, root_storage);
304         free(info);
305 
306         return m == -1 ? -2 : m;
307 }
308 
309 #ifdef notdef
310 private char *
311 format_clsid(char *buf, size_t len, const uint64_t uuid[2]) {
312 	snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4"
313 	    PRIx64 "-%.12" PRIx64,
314 	    (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffLLU,
315 	    (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffLLU,
316 	    (uuid[0] >>  0) & (uint64_t)0x0000000000000ffffLLU,
317 	    (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffLLU,
318 	    (uuid[1] >>  0) & (uint64_t)0x0000fffffffffffffLLU);
319 	return buf;
320 }
321 #endif
322 
323 protected int
324 file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
325     size_t nbytes)
326 {
327         cdf_info_t info;
328         cdf_header_t h;
329         cdf_sat_t sat, ssat;
330         cdf_stream_t sst, scn;
331         cdf_dir_t dir;
332         int i;
333         const char *expn = "";
334         const char *corrupt = "corrupt: ";
335 
336         info.i_fd = fd;
337         info.i_buf = buf;
338         info.i_len = nbytes;
339         if (ms->flags & MAGIC_APPLE)
340                 return 0;
341         if (cdf_read_header(&info, &h) == -1)
342                 return 0;
343 #ifdef CDF_DEBUG
344         cdf_dump_header(&h);
345 #endif
346 
347         if ((i = cdf_read_sat(&info, &h, &sat)) == -1) {
348                 expn = "Can't read SAT";
349                 goto out0;
350         }
351 #ifdef CDF_DEBUG
352         cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
353 #endif
354 
355         if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) {
356                 expn = "Can't read SSAT";
357                 goto out1;
358         }
359 #ifdef CDF_DEBUG
360         cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
361 #endif
362 
363         if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) {
364                 expn = "Can't read directory";
365                 goto out2;
366         }
367 
368         const cdf_directory_t *root_storage;
369         if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst,
370 	    &root_storage)) == -1) {
371                 expn = "Cannot read short stream";
372                 goto out3;
373         }
374 #ifdef CDF_DEBUG
375         cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
376 #endif
377 #ifdef notdef
378 	if (root_storage) {
379 		if (NOTMIME(ms)) {
380 			char clsbuf[128];
381 			if (file_printf(ms, "CLSID %s, ",
382 			    format_clsid(clsbuf, sizeof(clsbuf),
383 			    root_storage->d_storage_uuid)) == -1)
384 				return -1;
385 		}
386 	}
387 #endif
388 
389 	if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir,
390 	    "FileHeader", &scn)) != -1) {
391 #define HWP5_SIGNATURE "HWP Document File"
392 		if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1
393 		    && memcmp(scn.sst_tab, HWP5_SIGNATURE,
394 		    sizeof(HWP5_SIGNATURE) - 1) == 0) {
395 		    if (NOTMIME(ms)) {
396 			if (file_printf(ms,
397 			    "Hangul (Korean) Word Processor File 5.x") == -1)
398 			    return -1;
399 		    } else {
400 			if (file_printf(ms, "application/x-hwp") == -1)
401 			    return -1;
402 		    }
403 		    i = 1;
404 		    goto out5;
405 		} else {
406 		    free(scn.sst_tab);
407 		    scn.sst_tab = NULL;
408 		    scn.sst_len = 0;
409 		    scn.sst_dirlen = 0;
410 		}
411 	}
412 
413         if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
414             &scn)) == -1) {
415                 if (errno == ESRCH) {
416                         corrupt = expn;
417                         expn = "No summary info";
418                 } else {
419                         expn = "Cannot read summary info";
420                 }
421                 goto out4;
422         }
423 #ifdef CDF_DEBUG
424         cdf_dump_summary_info(&h, &scn);
425 #endif
426         if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0)
427             expn = "Can't expand summary_info";
428 
429 	if (i == 0) {
430 		const char *str = NULL;
431 		cdf_directory_t *d;
432 		char name[__arraycount(d->d_name)];
433 		size_t j, k;
434 
435 		for (j = 0; str == NULL && j < dir.dir_len; j++) {
436 			d = &dir.dir_tab[j];
437 			for (k = 0; k < sizeof(name); k++)
438 				name[k] = (char)cdf_tole2(d->d_name[k]);
439 			str = cdf_app_to_mime(name,
440 			    NOTMIME(ms) ? name2desc : name2mime);
441 		}
442 		if (NOTMIME(ms)) {
443 			if (str != NULL) {
444 				if (file_printf(ms, "%s", str) == -1)
445 					return -1;
446 				i = 1;
447 			}
448 		} else {
449 			if (str == NULL)
450 				str = "vnd.ms-office";
451 			if (file_printf(ms, "application/%s", str) == -1)
452 				return -1;
453 			i = 1;
454 		}
455 	}
456 out5:
457         free(scn.sst_tab);
458 out4:
459         free(sst.sst_tab);
460 out3:
461         free(dir.dir_tab);
462 out2:
463         free(ssat.sat_tab);
464 out1:
465         free(sat.sat_tab);
466 out0:
467 	if (i == -1) {
468 	    if (NOTMIME(ms)) {
469 		if (file_printf(ms,
470 		    "Composite Document File V2 Document") == -1)
471 		    return -1;
472 		if (*expn)
473 		    if (file_printf(ms, ", %s%s", corrupt, expn) == -1)
474 			return -1;
475 	    } else {
476 		if (file_printf(ms, "application/CDFV2-corrupt") == -1)
477 		    return -1;
478 	    }
479 	    i = 1;
480 	}
481         return i;
482 }
483