xref: /netbsd-src/external/bsd/file/dist/src/file.h (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /*	$NetBSD: file.h,v 1.11 2014/06/13 02:08:06 christos Exp $	*/
2 /*
3  * Copyright (c) Ian F. Darwin 1986-1995.
4  * Software written by Ian F. Darwin and others;
5  * maintained 1995-present by Christos Zoulas and others.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice immediately at the beginning of the file, without modification,
12  *    this list of conditions, and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
21  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 /*
30  * file.h - definitions for file(1) program
31  * @(#)$File: file.h,v 1.152 2014/06/03 19:01:34 christos Exp $
32  */
33 
34 #ifndef __file_h__
35 #define __file_h__
36 
37 #ifdef HAVE_CONFIG_H
38 #include <config.h>
39 #endif
40 
41 #ifdef WIN32
42   #ifdef _WIN64
43     #define SIZE_T_FORMAT "I64"
44   #else
45     #define SIZE_T_FORMAT ""
46   #endif
47   #define INT64_T_FORMAT "I64"
48 #else
49   #define SIZE_T_FORMAT "z"
50   #define INT64_T_FORMAT "ll"
51 #endif
52 
53 #include <stdio.h>	/* Include that here, to make sure __P gets defined */
54 #include <errno.h>
55 #include <fcntl.h>	/* For open and flags */
56 #ifdef HAVE_STDINT_H
57 #ifndef __STDC_LIMIT_MACROS
58 #define __STDC_LIMIT_MACROS
59 #endif
60 #include <stdint.h>
61 #endif
62 #ifdef HAVE_INTTYPES_H
63 #include <inttypes.h>
64 #endif
65 #include <regex.h>
66 #include <time.h>
67 #include <sys/types.h>
68 #include <sys/param.h>
69 /* Do this here and now, because struct stat gets re-defined on solaris */
70 #include <sys/stat.h>
71 #include <stdarg.h>
72 
73 #define ENABLE_CONDITIONALS
74 
75 #ifndef MAGIC
76 #define MAGIC "/etc/magic"
77 #endif
78 
79 #if defined(__EMX__) || defined (WIN32)
80 #define PATHSEP	';'
81 #else
82 #define PATHSEP	':'
83 #endif
84 
85 #define private static
86 
87 #if HAVE_VISIBILITY && !defined(WIN32)
88 #define public  __attribute__ ((__visibility__("default")))
89 #ifndef protected
90 #define protected __attribute__ ((__visibility__("hidden")))
91 #endif
92 #else
93 #define public
94 #ifndef protected
95 #define protected
96 #endif
97 #endif
98 
99 #ifndef __arraycount
100 #define __arraycount(a) (sizeof(a) / sizeof(a[0]))
101 #endif
102 
103 #ifndef __GNUC_PREREQ__
104 #ifdef __GNUC__
105 #define	__GNUC_PREREQ__(x, y)						\
106 	((__GNUC__ == (x) && __GNUC_MINOR__ >= (y)) ||			\
107 	 (__GNUC__ > (x)))
108 #else
109 #define	__GNUC_PREREQ__(x, y)	0
110 #endif
111 #endif
112 
113 #ifndef __GNUC__
114 #ifndef __attribute__
115 #define __attribute__(a)
116 #endif
117 #endif
118 
119 #ifndef MIN
120 #define	MIN(a,b)	(((a) < (b)) ? (a) : (b))
121 #endif
122 
123 #ifndef MAX
124 #define	MAX(a,b)	(((a) > (b)) ? (a) : (b))
125 #endif
126 
127 #ifndef HOWMANY
128 # define HOWMANY (256 * 1024)	/* how much of the file to look at */
129 #endif
130 #define MAXMAGIS 8192		/* max entries in any one magic file
131 				   or directory */
132 #define MAXDESC	64		/* max len of text description/MIME type */
133 #define MAXMIME	80		/* max len of text MIME type */
134 #define MAXstring 64		/* max len of "string" types */
135 
136 #define MAGICNO		0xF11E041C
137 #define VERSIONNO	12
138 #define FILE_MAGICSIZE	248
139 
140 #define	FILE_LOAD	0
141 #define FILE_CHECK	1
142 #define FILE_COMPILE	2
143 #define FILE_LIST	3
144 
145 union VALUETYPE {
146 	uint8_t b;
147 	uint16_t h;
148 	uint32_t l;
149 	uint64_t q;
150 	uint8_t hs[2];	/* 2 bytes of a fixed-endian "short" */
151 	uint8_t hl[4];	/* 4 bytes of a fixed-endian "long" */
152 	uint8_t hq[8];	/* 8 bytes of a fixed-endian "quad" */
153 	char s[MAXstring];	/* the search string or regex pattern */
154 	unsigned char us[MAXstring];
155 	float f;
156 	double d;
157 };
158 
159 struct magic {
160 	/* Word 1 */
161 	uint16_t cont_level;	/* level of ">" */
162 	uint8_t flag;
163 #define INDIR		0x01	/* if '(...)' appears */
164 #define OFFADD		0x02	/* if '>&' or '>...(&' appears */
165 #define INDIROFFADD	0x04	/* if '>&(' appears */
166 #define UNSIGNED	0x08	/* comparison is unsigned */
167 #define NOSPACE		0x10	/* suppress space character before output */
168 #define BINTEST		0x20	/* test is for a binary type (set only
169 				   for top-level tests) */
170 #define TEXTTEST	0x40	/* for passing to file_softmagic */
171 
172 	uint8_t factor;
173 
174 	/* Word 2 */
175 	uint8_t reln;		/* relation (0=eq, '>'=gt, etc) */
176 	uint8_t vallen;		/* length of string value, if any */
177 	uint8_t type;		/* comparison type (FILE_*) */
178 	uint8_t in_type;	/* type of indirection */
179 #define 			FILE_INVALID	0
180 #define 			FILE_BYTE	1
181 #define				FILE_SHORT	2
182 #define				FILE_DEFAULT	3
183 #define				FILE_LONG	4
184 #define				FILE_STRING	5
185 #define				FILE_DATE	6
186 #define				FILE_BESHORT	7
187 #define				FILE_BELONG	8
188 #define				FILE_BEDATE	9
189 #define				FILE_LESHORT	10
190 #define				FILE_LELONG	11
191 #define				FILE_LEDATE	12
192 #define				FILE_PSTRING	13
193 #define				FILE_LDATE	14
194 #define				FILE_BELDATE	15
195 #define				FILE_LELDATE	16
196 #define				FILE_REGEX	17
197 #define				FILE_BESTRING16	18
198 #define				FILE_LESTRING16	19
199 #define				FILE_SEARCH	20
200 #define				FILE_MEDATE	21
201 #define				FILE_MELDATE	22
202 #define				FILE_MELONG	23
203 #define				FILE_QUAD	24
204 #define				FILE_LEQUAD	25
205 #define				FILE_BEQUAD	26
206 #define				FILE_QDATE	27
207 #define				FILE_LEQDATE	28
208 #define				FILE_BEQDATE	29
209 #define				FILE_QLDATE	30
210 #define				FILE_LEQLDATE	31
211 #define				FILE_BEQLDATE	32
212 #define				FILE_FLOAT	33
213 #define				FILE_BEFLOAT	34
214 #define				FILE_LEFLOAT	35
215 #define				FILE_DOUBLE	36
216 #define				FILE_BEDOUBLE	37
217 #define				FILE_LEDOUBLE	38
218 #define				FILE_BEID3	39
219 #define				FILE_LEID3	40
220 #define				FILE_INDIRECT	41
221 #define				FILE_QWDATE	42
222 #define				FILE_LEQWDATE	43
223 #define				FILE_BEQWDATE	44
224 #define				FILE_NAME	45
225 #define				FILE_USE	46
226 #define				FILE_CLEAR	47
227 #define				FILE_NAMES_SIZE	48 /* size of array to contain all names */
228 
229 #define IS_STRING(t) \
230 	((t) == FILE_STRING || \
231 	 (t) == FILE_PSTRING || \
232 	 (t) == FILE_BESTRING16 || \
233 	 (t) == FILE_LESTRING16 || \
234 	 (t) == FILE_REGEX || \
235 	 (t) == FILE_SEARCH || \
236 	 (t) == FILE_NAME || \
237 	 (t) == FILE_USE)
238 
239 #define FILE_FMT_NONE 0
240 #define FILE_FMT_NUM  1 /* "cduxXi" */
241 #define FILE_FMT_STR  2 /* "s" */
242 #define FILE_FMT_QUAD 3 /* "ll" */
243 #define FILE_FMT_FLOAT 4 /* "eEfFgG" */
244 #define FILE_FMT_DOUBLE 5 /* "eEfFgG" */
245 
246 	/* Word 3 */
247 	uint8_t in_op;		/* operator for indirection */
248 	uint8_t mask_op;	/* operator for mask */
249 #ifdef ENABLE_CONDITIONALS
250 	uint8_t cond;		/* conditional type */
251 #else
252 	uint8_t dummy;
253 #endif
254 	uint8_t factor_op;
255 #define		FILE_FACTOR_OP_PLUS	'+'
256 #define		FILE_FACTOR_OP_MINUS	'-'
257 #define		FILE_FACTOR_OP_TIMES	'*'
258 #define		FILE_FACTOR_OP_DIV	'/'
259 #define		FILE_FACTOR_OP_NONE	'\0'
260 
261 #define				FILE_OPS	"&|^+-*/%"
262 #define				FILE_OPAND	0
263 #define				FILE_OPOR	1
264 #define				FILE_OPXOR	2
265 #define				FILE_OPADD	3
266 #define				FILE_OPMINUS	4
267 #define				FILE_OPMULTIPLY	5
268 #define				FILE_OPDIVIDE	6
269 #define				FILE_OPMODULO	7
270 #define				FILE_OPS_MASK	0x07 /* mask for above ops */
271 #define				FILE_UNUSED_1	0x08
272 #define				FILE_UNUSED_2	0x10
273 #define				FILE_UNUSED_3	0x20
274 #define				FILE_OPINVERSE	0x40
275 #define				FILE_OPINDIRECT	0x80
276 
277 #ifdef ENABLE_CONDITIONALS
278 #define				COND_NONE	0
279 #define				COND_IF		1
280 #define				COND_ELIF	2
281 #define				COND_ELSE	3
282 #endif /* ENABLE_CONDITIONALS */
283 
284 	/* Word 4 */
285 	uint32_t offset;	/* offset to magic number */
286 	/* Word 5 */
287 	int32_t in_offset;	/* offset from indirection */
288 	/* Word 6 */
289 	uint32_t lineno;	/* line number in magic file */
290 	/* Word 7,8 */
291 	union {
292 		uint64_t _mask;	/* for use with numeric and date types */
293 		struct {
294 			uint32_t _count;	/* repeat/line count */
295 			uint32_t _flags;	/* modifier flags */
296 		} _s;		/* for use with string types */
297 	} _u;
298 #define num_mask _u._mask
299 #define str_range _u._s._count
300 #define str_flags _u._s._flags
301 	/* Words 9-16 */
302 	union VALUETYPE value;	/* either number or string */
303 	/* Words 17-32 */
304 	char desc[MAXDESC];	/* description */
305 	/* Words 33-52 */
306 	char mimetype[MAXMIME]; /* MIME type */
307 	/* Words 53-54 */
308 	char apple[8];
309 };
310 
311 #define BIT(A)   (1 << (A))
312 #define STRING_COMPACT_WHITESPACE		BIT(0)
313 #define STRING_COMPACT_OPTIONAL_WHITESPACE	BIT(1)
314 #define STRING_IGNORE_LOWERCASE			BIT(2)
315 #define STRING_IGNORE_UPPERCASE			BIT(3)
316 #define REGEX_OFFSET_START			BIT(4)
317 #define STRING_TEXTTEST				BIT(5)
318 #define STRING_BINTEST				BIT(6)
319 #define PSTRING_1_BE				BIT(7)
320 #define PSTRING_1_LE				BIT(7)
321 #define PSTRING_2_BE				BIT(8)
322 #define PSTRING_2_LE				BIT(9)
323 #define PSTRING_4_BE				BIT(10)
324 #define PSTRING_4_LE				BIT(11)
325 #define REGEX_LINE_COUNT			BIT(11)
326 #define PSTRING_LEN	\
327     (PSTRING_1_BE|PSTRING_2_LE|PSTRING_2_BE|PSTRING_4_LE|PSTRING_4_BE)
328 #define PSTRING_LENGTH_INCLUDES_ITSELF		BIT(12)
329 #define	STRING_TRIM				BIT(13)
330 #define CHAR_COMPACT_WHITESPACE			'W'
331 #define CHAR_COMPACT_OPTIONAL_WHITESPACE	'w'
332 #define CHAR_IGNORE_LOWERCASE			'c'
333 #define CHAR_IGNORE_UPPERCASE			'C'
334 #define CHAR_REGEX_OFFSET_START			's'
335 #define CHAR_TEXTTEST				't'
336 #define	CHAR_TRIM				'T'
337 #define CHAR_BINTEST				'b'
338 #define CHAR_PSTRING_1_BE			'B'
339 #define CHAR_PSTRING_1_LE			'B'
340 #define CHAR_PSTRING_2_BE			'H'
341 #define CHAR_PSTRING_2_LE			'h'
342 #define CHAR_PSTRING_4_BE			'L'
343 #define CHAR_PSTRING_4_LE			'l'
344 #define CHAR_PSTRING_LENGTH_INCLUDES_ITSELF     'J'
345 #define STRING_IGNORE_CASE		(STRING_IGNORE_LOWERCASE|STRING_IGNORE_UPPERCASE)
346 #define STRING_DEFAULT_RANGE		100
347 
348 
349 /* list of magic entries */
350 struct mlist {
351 	struct magic *magic;		/* array of magic entries */
352 	uint32_t nmagic;		/* number of entries in array */
353 	void *map;			/* internal resources used by entry */
354 	struct mlist *next, *prev;
355 };
356 
357 #ifdef __cplusplus
358 #define CAST(T, b)	static_cast<T>(b)
359 #define RCAST(T, b)	reinterpret_cast<T>(b)
360 #else
361 #define CAST(T, b)	(T)(b)
362 #define RCAST(T, b)	(T)(b)
363 #endif
364 
365 struct level_info {
366 	int32_t off;
367 	int got_match;
368 #ifdef ENABLE_CONDITIONALS
369 	int last_match;
370 	int last_cond;	/* used for error checking by parse() */
371 #endif
372 };
373 
374 #define MAGIC_SETS	2
375 
376 struct magic_set {
377 	struct mlist *mlist[MAGIC_SETS];	/* list of regular entries */
378 	struct cont {
379 		size_t len;
380 		struct level_info *li;
381 	} c;
382 	struct out {
383 		char *buf;		/* Accumulation buffer */
384 		char *pbuf;		/* Printable buffer */
385 	} o;
386 	uint32_t offset;
387 	int error;
388 	int flags;			/* Control magic tests. */
389 	int event_flags;		/* Note things that happened. */
390 #define 		EVENT_HAD_ERR		0x01
391 	const char *file;
392 	size_t line;			/* current magic line number */
393 
394 	/* data for searches */
395 	struct {
396 		const char *s;		/* start of search in original source */
397 		size_t s_len;		/* length of search region */
398 		size_t offset;		/* starting offset in source: XXX - should this be off_t? */
399 		size_t rm_len;		/* match length */
400 	} search;
401 
402 	/* FIXME: Make the string dynamically allocated so that e.g.
403 	   strings matched in files can be longer than MAXstring */
404 	union VALUETYPE ms_value;	/* either number or string */
405 };
406 
407 /* Type for Unicode characters */
408 typedef unsigned long unichar;
409 
410 struct stat;
411 #define FILE_T_LOCAL	1
412 #define FILE_T_WINDOWS	2
413 protected const char *file_fmttime(uint64_t, int, char *);
414 protected struct magic_set *file_ms_alloc(int);
415 protected void file_ms_free(struct magic_set *);
416 protected int file_buffer(struct magic_set *, int, const char *, const void *,
417     size_t);
418 protected int file_fsmagic(struct magic_set *, const char *, struct stat *);
419 protected int file_pipe2file(struct magic_set *, int, const void *, size_t);
420 protected int file_vprintf(struct magic_set *, const char *, va_list)
421     __attribute__((__format__(__printf__, 2, 0)));
422 protected size_t file_printedlen(const struct magic_set *);
423 protected int file_replace(struct magic_set *, const char *, const char *);
424 protected int file_printf(struct magic_set *, const char *, ...)
425     __attribute__((__format__(__printf__, 2, 3)));
426 protected int file_reset(struct magic_set *);
427 protected int file_tryelf(struct magic_set *, int, const unsigned char *,
428     size_t);
429 protected int file_trycdf(struct magic_set *, int, const unsigned char *,
430     size_t);
431 #if HAVE_FORK
432 protected int file_zmagic(struct magic_set *, int, const char *,
433     const unsigned char *, size_t);
434 #endif
435 protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t,
436     int);
437 protected int file_ascmagic_with_encoding(struct magic_set *,
438     const unsigned char *, size_t, unichar *, size_t, const char *,
439     const char *, int);
440 protected int file_encoding(struct magic_set *, const unsigned char *, size_t,
441     unichar **, size_t *, const char **, const char **, const char **);
442 protected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
443 protected int file_softmagic(struct magic_set *, const unsigned char *, size_t,
444     size_t, int, int);
445 protected int file_apprentice(struct magic_set *, const char *, int);
446 protected int file_magicfind(struct magic_set *, const char *, struct mlist *);
447 protected uint64_t file_signextend(struct magic_set *, struct magic *,
448     uint64_t);
449 protected void file_badread(struct magic_set *);
450 protected void file_badseek(struct magic_set *);
451 protected void file_oomem(struct magic_set *, size_t);
452 protected void file_error(struct magic_set *, int, const char *, ...)
453     __attribute__((__format__(__printf__, 3, 4)));
454 protected void file_magerror(struct magic_set *, const char *, ...)
455     __attribute__((__format__(__printf__, 2, 3)));
456 protected void file_magwarn(struct magic_set *, const char *, ...)
457     __attribute__((__format__(__printf__, 2, 3)));
458 protected void file_mdump(struct magic *);
459 protected void file_showstr(FILE *, const char *, size_t);
460 protected size_t file_mbswidth(const char *);
461 protected const char *file_getbuffer(struct magic_set *);
462 protected ssize_t sread(int, void *, size_t, int);
463 protected int file_check_mem(struct magic_set *, unsigned int);
464 protected int file_looks_utf8(const unsigned char *, size_t, unichar *,
465     size_t *);
466 protected size_t file_pstring_length_size(const struct magic *);
467 protected size_t file_pstring_get_length(const struct magic *, const char *);
468 #ifdef __EMX__
469 protected int file_os2_apptype(struct magic_set *, const char *, const void *,
470     size_t);
471 #endif /* __EMX__ */
472 
473 typedef struct {
474 	const char *pat;
475 	char *old_lc_ctype;
476 	int rc;
477 	regex_t rx;
478 } file_regex_t;
479 
480 protected int file_regcomp(file_regex_t *, const char *, int);
481 protected int file_regexec(file_regex_t *, const char *, size_t, regmatch_t *,
482     int);
483 protected void file_regfree(file_regex_t *);
484 protected void file_regerror(file_regex_t *, int, struct magic_set *);
485 
486 #ifndef COMPILE_ONLY
487 extern const char *file_names[];
488 extern const size_t file_nnames;
489 #endif
490 
491 #ifndef HAVE_STRERROR
492 extern int sys_nerr;
493 extern char *sys_errlist[];
494 #define strerror(e) \
495 	(((e) >= 0 && (e) < sys_nerr) ? sys_errlist[(e)] : "Unknown error")
496 #endif
497 
498 #ifndef HAVE_STRTOUL
499 #define strtoul(a, b, c)	strtol(a, b, c)
500 #endif
501 
502 #ifndef HAVE_PREAD
503 ssize_t pread(int, void *, size_t, off_t);
504 #endif
505 #ifndef HAVE_VASPRINTF
506 int vasprintf(char **, const char *, va_list);
507 #endif
508 #ifndef HAVE_ASPRINTF
509 int asprintf(char **, const char *, ...);
510 #endif
511 
512 #ifndef HAVE_STRLCPY
513 size_t strlcpy(char *, const char *, size_t);
514 #endif
515 #ifndef HAVE_STRLCAT
516 size_t strlcat(char *, const char *, size_t);
517 #endif
518 #ifndef HAVE_STRCASESTR
519 char *strcasestr(const char *, const char *);
520 #endif
521 #ifndef HAVE_GETLINE
522 ssize_t getline(char **, size_t *, FILE *);
523 ssize_t getdelim(char **, size_t *, int, FILE *);
524 #endif
525 #ifndef HAVE_CTIME_R
526 char   *ctime_r(const time_t *, char *);
527 #endif
528 #ifndef HAVE_ASCTIME_R
529 char   *asctime_r(const struct tm *, char *);
530 #endif
531 #ifndef HAVE_FMTCHECK
532 const char *fmtcheck(const char *, const char *)
533      __attribute__((__format_arg__(2)));
534 #endif
535 
536 #if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
537 #define QUICK
538 #endif
539 
540 #ifndef O_BINARY
541 #define O_BINARY	0
542 #endif
543 
544 #ifndef __cplusplus
545 #if defined(__GNUC__) && (__GNUC__ >= 3)
546 #define FILE_RCSID(id) \
547 static const char rcsid[] __attribute__((__used__)) = id;
548 #else
549 #define FILE_RCSID(id) \
550 static const char *rcsid(const char *p) { \
551 	return rcsid(p = id); \
552 }
553 #endif
554 #else
555 #define FILE_RCSID(id)
556 #endif
557 #ifndef __RCSID
558 #define __RCSID(a)
559 #endif
560 
561 #endif /* __file_h__ */
562