xref: /freebsd-src/contrib/file/python/magic.py (revision 3e41d09d08f5bfa2fc1386241f334b865d6da085)
1*3e41d09dSXin LI# coding: utf-8
2*3e41d09dSXin LI
3b6cee71dSXin LI'''
4b6cee71dSXin LIPython bindings for libmagic
5b6cee71dSXin LI'''
6b6cee71dSXin LI
7b6cee71dSXin LIimport ctypes
8b6cee71dSXin LI
9*3e41d09dSXin LIfrom collections import namedtuple
10*3e41d09dSXin LI
11b6cee71dSXin LIfrom ctypes import *
12b6cee71dSXin LIfrom ctypes.util import find_library
13b6cee71dSXin LI
14b6cee71dSXin LI
15b6cee71dSXin LIdef _init():
16b6cee71dSXin LI    """
17b6cee71dSXin LI    Loads the shared library through ctypes and returns a library
18b6cee71dSXin LI    L{ctypes.CDLL} instance
19b6cee71dSXin LI    """
20b6cee71dSXin LI    return ctypes.cdll.LoadLibrary(find_library('magic'))
21b6cee71dSXin LI
22b6cee71dSXin LI_libraries = {}
23b6cee71dSXin LI_libraries['magic'] = _init()
24b6cee71dSXin LI
25b6cee71dSXin LI# Flag constants for open and setflags
26b6cee71dSXin LIMAGIC_NONE = NONE = 0
27b6cee71dSXin LIMAGIC_DEBUG = DEBUG = 1
28b6cee71dSXin LIMAGIC_SYMLINK = SYMLINK = 2
29b6cee71dSXin LIMAGIC_COMPRESS = COMPRESS = 4
30b6cee71dSXin LIMAGIC_DEVICES = DEVICES = 8
31b6cee71dSXin LIMAGIC_MIME_TYPE = MIME_TYPE = 16
32b6cee71dSXin LIMAGIC_CONTINUE = CONTINUE = 32
33b6cee71dSXin LIMAGIC_CHECK = CHECK = 64
34b6cee71dSXin LIMAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128
35b6cee71dSXin LIMAGIC_RAW = RAW = 256
36b6cee71dSXin LIMAGIC_ERROR = ERROR = 512
37b6cee71dSXin LIMAGIC_MIME_ENCODING = MIME_ENCODING = 1024
38*3e41d09dSXin LIMAGIC_MIME = MIME = 1040  # MIME_TYPE + MIME_ENCODING
39b6cee71dSXin LIMAGIC_APPLE = APPLE = 2048
40b6cee71dSXin LI
41b6cee71dSXin LIMAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096
42b6cee71dSXin LIMAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192
43b6cee71dSXin LIMAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384
44b6cee71dSXin LIMAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768
45b6cee71dSXin LIMAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536
46b6cee71dSXin LIMAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072
47b6cee71dSXin LIMAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144
48b6cee71dSXin LIMAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576
49b6cee71dSXin LIMAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152
50b6cee71dSXin LI
51b6cee71dSXin LIMAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824
52b6cee71dSXin LI
53*3e41d09dSXin LIFileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name'))
54*3e41d09dSXin LI
55b6cee71dSXin LI
56b6cee71dSXin LIclass magic_set(Structure):
57b6cee71dSXin LI    pass
58b6cee71dSXin LImagic_set._fields_ = []
59b6cee71dSXin LImagic_t = POINTER(magic_set)
60b6cee71dSXin LI
61b6cee71dSXin LI_open = _libraries['magic'].magic_open
62b6cee71dSXin LI_open.restype = magic_t
63b6cee71dSXin LI_open.argtypes = [c_int]
64b6cee71dSXin LI
65b6cee71dSXin LI_close = _libraries['magic'].magic_close
66b6cee71dSXin LI_close.restype = None
67b6cee71dSXin LI_close.argtypes = [magic_t]
68b6cee71dSXin LI
69b6cee71dSXin LI_file = _libraries['magic'].magic_file
70b6cee71dSXin LI_file.restype = c_char_p
71b6cee71dSXin LI_file.argtypes = [magic_t, c_char_p]
72b6cee71dSXin LI
73b6cee71dSXin LI_descriptor = _libraries['magic'].magic_descriptor
74b6cee71dSXin LI_descriptor.restype = c_char_p
75b6cee71dSXin LI_descriptor.argtypes = [magic_t, c_int]
76b6cee71dSXin LI
77b6cee71dSXin LI_buffer = _libraries['magic'].magic_buffer
78b6cee71dSXin LI_buffer.restype = c_char_p
79b6cee71dSXin LI_buffer.argtypes = [magic_t, c_void_p, c_size_t]
80b6cee71dSXin LI
81b6cee71dSXin LI_error = _libraries['magic'].magic_error
82b6cee71dSXin LI_error.restype = c_char_p
83b6cee71dSXin LI_error.argtypes = [magic_t]
84b6cee71dSXin LI
85b6cee71dSXin LI_setflags = _libraries['magic'].magic_setflags
86b6cee71dSXin LI_setflags.restype = c_int
87b6cee71dSXin LI_setflags.argtypes = [magic_t, c_int]
88b6cee71dSXin LI
89b6cee71dSXin LI_load = _libraries['magic'].magic_load
90b6cee71dSXin LI_load.restype = c_int
91b6cee71dSXin LI_load.argtypes = [magic_t, c_char_p]
92b6cee71dSXin LI
93b6cee71dSXin LI_compile = _libraries['magic'].magic_compile
94b6cee71dSXin LI_compile.restype = c_int
95b6cee71dSXin LI_compile.argtypes = [magic_t, c_char_p]
96b6cee71dSXin LI
97b6cee71dSXin LI_check = _libraries['magic'].magic_check
98b6cee71dSXin LI_check.restype = c_int
99b6cee71dSXin LI_check.argtypes = [magic_t, c_char_p]
100b6cee71dSXin LI
101b6cee71dSXin LI_list = _libraries['magic'].magic_list
102b6cee71dSXin LI_list.restype = c_int
103b6cee71dSXin LI_list.argtypes = [magic_t, c_char_p]
104b6cee71dSXin LI
105b6cee71dSXin LI_errno = _libraries['magic'].magic_errno
106b6cee71dSXin LI_errno.restype = c_int
107b6cee71dSXin LI_errno.argtypes = [magic_t]
108b6cee71dSXin LI
109b6cee71dSXin LI
110b6cee71dSXin LIclass Magic(object):
111b6cee71dSXin LI    def __init__(self, ms):
112b6cee71dSXin LI        self._magic_t = ms
113b6cee71dSXin LI
114b6cee71dSXin LI    def close(self):
115b6cee71dSXin LI        """
116b6cee71dSXin LI        Closes the magic database and deallocates any resources used.
117b6cee71dSXin LI        """
118b6cee71dSXin LI        _close(self._magic_t)
119b6cee71dSXin LI
120b6cee71dSXin LI    def file(self, filename):
121b6cee71dSXin LI        """
122b6cee71dSXin LI        Returns a textual description of the contents of the argument passed
123b6cee71dSXin LI        as a filename or None if an error occurred and the MAGIC_ERROR flag
124b6cee71dSXin LI        is set.  A call to errno() will return the numeric error code.
125b6cee71dSXin LI        """
126b6cee71dSXin LI        if isinstance(filename, bytes):
127b6cee71dSXin LI            bi = filename
128b6cee71dSXin LI        else:
129*3e41d09dSXin LI            try:  # keep Python 2 compatibility
130b6cee71dSXin LI                bi = bytes(filename, 'utf-8')
131*3e41d09dSXin LI            except TypeError:
132*3e41d09dSXin LI                bi = bytes(filename)
133*3e41d09dSXin LI        r = _file(self._magic_t, bi)
134*3e41d09dSXin LI        if isinstance(r, str):
135*3e41d09dSXin LI            return r
136*3e41d09dSXin LI        else:
137*3e41d09dSXin LI            return str(r).encode('utf-8')
138b6cee71dSXin LI
139b6cee71dSXin LI    def descriptor(self, fd):
140b6cee71dSXin LI        """
141b6cee71dSXin LI        Like the file method, but the argument is a file descriptor.
142b6cee71dSXin LI        """
143b6cee71dSXin LI        return _descriptor(self._magic_t, fd)
144b6cee71dSXin LI
145b6cee71dSXin LI    def buffer(self, buf):
146b6cee71dSXin LI        """
147b6cee71dSXin LI        Returns a textual description of the contents of the argument passed
148b6cee71dSXin LI        as a buffer or None if an error occurred and the MAGIC_ERROR flag
149b6cee71dSXin LI        is set. A call to errno() will return the numeric error code.
150b6cee71dSXin LI        """
151*3e41d09dSXin LI        r = _buffer(self._magic_t, buf, len(buf))
152*3e41d09dSXin LI        if isinstance(r, str):
153*3e41d09dSXin LI            return r
154*3e41d09dSXin LI        else:
155*3e41d09dSXin LI            return str(r).encode('utf-8')
156b6cee71dSXin LI
157b6cee71dSXin LI    def error(self):
158b6cee71dSXin LI        """
159b6cee71dSXin LI        Returns a textual explanation of the last error or None
160b6cee71dSXin LI        if there was no error.
161b6cee71dSXin LI        """
162*3e41d09dSXin LI        e = _error(self._magic_t)
163*3e41d09dSXin LI        if isinstance(e, str):
164*3e41d09dSXin LI            return e
165*3e41d09dSXin LI        else:
166*3e41d09dSXin LI            return str(e).encode('utf-8')
167b6cee71dSXin LI
168b6cee71dSXin LI    def setflags(self, flags):
169b6cee71dSXin LI        """
170b6cee71dSXin LI        Set flags on the magic object which determine how magic checking
171b6cee71dSXin LI        behaves; a bitwise OR of the flags described in libmagic(3), but
172b6cee71dSXin LI        without the MAGIC_ prefix.
173b6cee71dSXin LI
174b6cee71dSXin LI        Returns -1 on systems that don't support utime(2) or utimes(2)
175b6cee71dSXin LI        when PRESERVE_ATIME is set.
176b6cee71dSXin LI        """
177b6cee71dSXin LI        return _setflags(self._magic_t, flags)
178b6cee71dSXin LI
179b6cee71dSXin LI    def load(self, filename=None):
180b6cee71dSXin LI        """
181b6cee71dSXin LI        Must be called to load entries in the colon separated list of database
182b6cee71dSXin LI        files passed as argument or the default database file if no argument
183b6cee71dSXin LI        before any magic queries can be performed.
184b6cee71dSXin LI
185b6cee71dSXin LI        Returns 0 on success and -1 on failure.
186b6cee71dSXin LI        """
187b6cee71dSXin LI        return _load(self._magic_t, filename)
188b6cee71dSXin LI
189b6cee71dSXin LI    def compile(self, dbs):
190b6cee71dSXin LI        """
191b6cee71dSXin LI        Compile entries in the colon separated list of database files
192b6cee71dSXin LI        passed as argument or the default database file if no argument.
193b6cee71dSXin LI        Returns 0 on success and -1 on failure.
194b6cee71dSXin LI        The compiled files created are named from the basename(1) of each file
195b6cee71dSXin LI        argument with ".mgc" appended to it.
196b6cee71dSXin LI        """
197b6cee71dSXin LI        return _compile(self._magic_t, dbs)
198b6cee71dSXin LI
199b6cee71dSXin LI    def check(self, dbs):
200b6cee71dSXin LI        """
201b6cee71dSXin LI        Check the validity of entries in the colon separated list of
202b6cee71dSXin LI        database files passed as argument or the default database file
203b6cee71dSXin LI        if no argument.
204b6cee71dSXin LI        Returns 0 on success and -1 on failure.
205b6cee71dSXin LI        """
206b6cee71dSXin LI        return _check(self._magic_t, dbs)
207b6cee71dSXin LI
208b6cee71dSXin LI    def list(self, dbs):
209b6cee71dSXin LI        """
210b6cee71dSXin LI        Check the validity of entries in the colon separated list of
211b6cee71dSXin LI        database files passed as argument or the default database file
212b6cee71dSXin LI        if no argument.
213b6cee71dSXin LI        Returns 0 on success and -1 on failure.
214b6cee71dSXin LI        """
215b6cee71dSXin LI        return _list(self._magic_t, dbs)
216b6cee71dSXin LI
217b6cee71dSXin LI    def errno(self):
218b6cee71dSXin LI        """
219b6cee71dSXin LI        Returns a numeric error code. If return value is 0, an internal
220b6cee71dSXin LI        magic error occurred. If return value is non-zero, the value is
221b6cee71dSXin LI        an OS error code. Use the errno module or os.strerror() can be used
222b6cee71dSXin LI        to provide detailed error information.
223b6cee71dSXin LI        """
224b6cee71dSXin LI        return _errno(self._magic_t)
225b6cee71dSXin LI
226b6cee71dSXin LI
227b6cee71dSXin LIdef open(flags):
228b6cee71dSXin LI    """
229b6cee71dSXin LI    Returns a magic object on success and None on failure.
230b6cee71dSXin LI    Flags argument as for setflags.
231b6cee71dSXin LI    """
232b6cee71dSXin LI    return Magic(_open(flags))
233*3e41d09dSXin LI
234*3e41d09dSXin LI
235*3e41d09dSXin LI# Objects used by `detect_from_` functions
236*3e41d09dSXin LImime_magic = Magic(_open(MAGIC_MIME))
237*3e41d09dSXin LImime_magic.load()
238*3e41d09dSXin LInone_magic = Magic(_open(MAGIC_NONE))
239*3e41d09dSXin LInone_magic.load()
240*3e41d09dSXin LI
241*3e41d09dSXin LI
242*3e41d09dSXin LIdef _create_filemagic(mime_detected, type_detected):
243*3e41d09dSXin LI    mime_type, mime_encoding = mime_detected.split('; ')
244*3e41d09dSXin LI
245*3e41d09dSXin LI    return FileMagic(name=type_detected, mime_type=mime_type,
246*3e41d09dSXin LI                     encoding=mime_encoding.replace('charset=', ''))
247*3e41d09dSXin LI
248*3e41d09dSXin LI
249*3e41d09dSXin LIdef detect_from_filename(filename):
250*3e41d09dSXin LI    '''Detect mime type, encoding and file type from a filename
251*3e41d09dSXin LI
252*3e41d09dSXin LI    Returns a `FileMagic` namedtuple.
253*3e41d09dSXin LI    '''
254*3e41d09dSXin LI
255*3e41d09dSXin LI    return _create_filemagic(mime_magic.file(filename),
256*3e41d09dSXin LI                             none_magic.file(filename))
257*3e41d09dSXin LI
258*3e41d09dSXin LI
259*3e41d09dSXin LIdef detect_from_fobj(fobj):
260*3e41d09dSXin LI    '''Detect mime type, encoding and file type from file-like object
261*3e41d09dSXin LI
262*3e41d09dSXin LI    Returns a `FileMagic` namedtuple.
263*3e41d09dSXin LI    '''
264*3e41d09dSXin LI
265*3e41d09dSXin LI    file_descriptor = fobj.fileno()
266*3e41d09dSXin LI    return _create_filemagic(mime_magic.descriptor(file_descriptor),
267*3e41d09dSXin LI                             none_magic.descriptor(file_descriptor))
268*3e41d09dSXin LI
269*3e41d09dSXin LI
270*3e41d09dSXin LIdef detect_from_content(byte_content):
271*3e41d09dSXin LI    '''Detect mime type, encoding and file type from bytes
272*3e41d09dSXin LI
273*3e41d09dSXin LI    Returns a `FileMagic` namedtuple.
274*3e41d09dSXin LI    '''
275*3e41d09dSXin LI
276*3e41d09dSXin LI    return _create_filemagic(mime_magic.buffer(byte_content),
277*3e41d09dSXin LI                             none_magic.buffer(byte_content))
278