xref: /netbsd-src/external/bsd/file/dist/python/magic.py (revision a77ebd868432a4d7e595fb7709cfc1b8f144789b)
1ae9cfef6Schristos# coding: utf-8
2ae9cfef6Schristos
35ccaa8c0Schristos'''
45ccaa8c0SchristosPython bindings for libmagic
55ccaa8c0Schristos'''
65ccaa8c0Schristos
75ccaa8c0Schristosimport ctypes
8*a77ebd86Schristosimport threading
95ccaa8c0Schristos
10ae9cfef6Schristosfrom collections import namedtuple
11ae9cfef6Schristos
125ccaa8c0Schristosfrom ctypes import *
135ccaa8c0Schristosfrom ctypes.util import find_library
145ccaa8c0Schristos
15012fd133Schristos
165ccaa8c0Schristosdef _init():
175ccaa8c0Schristos    """
185ccaa8c0Schristos    Loads the shared library through ctypes and returns a library
195ccaa8c0Schristos    L{ctypes.CDLL} instance
205ccaa8c0Schristos    """
215ccaa8c0Schristos    return ctypes.cdll.LoadLibrary(find_library('magic'))
225ccaa8c0Schristos
235ccaa8c0Schristos_libraries = {}
245ccaa8c0Schristos_libraries['magic'] = _init()
255ccaa8c0Schristos
265ccaa8c0Schristos# Flag constants for open and setflags
275ccaa8c0SchristosMAGIC_NONE = NONE = 0
285ccaa8c0SchristosMAGIC_DEBUG = DEBUG = 1
295ccaa8c0SchristosMAGIC_SYMLINK = SYMLINK = 2
305ccaa8c0SchristosMAGIC_COMPRESS = COMPRESS = 4
315ccaa8c0SchristosMAGIC_DEVICES = DEVICES = 8
325ccaa8c0SchristosMAGIC_MIME_TYPE = MIME_TYPE = 16
335ccaa8c0SchristosMAGIC_CONTINUE = CONTINUE = 32
345ccaa8c0SchristosMAGIC_CHECK = CHECK = 64
355ccaa8c0SchristosMAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128
365ccaa8c0SchristosMAGIC_RAW = RAW = 256
375ccaa8c0SchristosMAGIC_ERROR = ERROR = 512
385ccaa8c0SchristosMAGIC_MIME_ENCODING = MIME_ENCODING = 1024
39ae9cfef6SchristosMAGIC_MIME = MIME = 1040  # MIME_TYPE + MIME_ENCODING
405ccaa8c0SchristosMAGIC_APPLE = APPLE = 2048
415ccaa8c0Schristos
425ccaa8c0SchristosMAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096
435ccaa8c0SchristosMAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192
445ccaa8c0SchristosMAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384
455ccaa8c0SchristosMAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768
465ccaa8c0SchristosMAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536
475ccaa8c0SchristosMAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072
485ccaa8c0SchristosMAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144
495ccaa8c0SchristosMAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576
505ccaa8c0SchristosMAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152
515ccaa8c0Schristos
525ccaa8c0SchristosMAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824
535ccaa8c0Schristos
5425f16eeaSchristosMAGIC_PARAM_INDIR_MAX = PARAM_INDIR_MAX = 0
5525f16eeaSchristosMAGIC_PARAM_NAME_MAX = PARAM_NAME_MAX = 1
5625f16eeaSchristosMAGIC_PARAM_ELF_PHNUM_MAX = PARAM_ELF_PHNUM_MAX = 2
5725f16eeaSchristosMAGIC_PARAM_ELF_SHNUM_MAX = PARAM_ELF_SHNUM_MAX = 3
5825f16eeaSchristosMAGIC_PARAM_ELF_NOTES_MAX = PARAM_ELF_NOTES_MAX = 4
5925f16eeaSchristosMAGIC_PARAM_REGEX_MAX = PARAM_REGEX_MAX = 5
6025f16eeaSchristosMAGIC_PARAM_BYTES_MAX = PARAM_BYTES_MAX = 6
6125f16eeaSchristos
62ae9cfef6SchristosFileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name'))
63ae9cfef6Schristos
64012fd133Schristos
655ccaa8c0Schristosclass magic_set(Structure):
665ccaa8c0Schristos    pass
675ccaa8c0Schristosmagic_set._fields_ = []
685ccaa8c0Schristosmagic_t = POINTER(magic_set)
695ccaa8c0Schristos
705ccaa8c0Schristos_open = _libraries['magic'].magic_open
715ccaa8c0Schristos_open.restype = magic_t
725ccaa8c0Schristos_open.argtypes = [c_int]
735ccaa8c0Schristos
745ccaa8c0Schristos_close = _libraries['magic'].magic_close
755ccaa8c0Schristos_close.restype = None
765ccaa8c0Schristos_close.argtypes = [magic_t]
775ccaa8c0Schristos
785ccaa8c0Schristos_file = _libraries['magic'].magic_file
795ccaa8c0Schristos_file.restype = c_char_p
805ccaa8c0Schristos_file.argtypes = [magic_t, c_char_p]
815ccaa8c0Schristos
825ccaa8c0Schristos_descriptor = _libraries['magic'].magic_descriptor
835ccaa8c0Schristos_descriptor.restype = c_char_p
845ccaa8c0Schristos_descriptor.argtypes = [magic_t, c_int]
855ccaa8c0Schristos
865ccaa8c0Schristos_buffer = _libraries['magic'].magic_buffer
875ccaa8c0Schristos_buffer.restype = c_char_p
885ccaa8c0Schristos_buffer.argtypes = [magic_t, c_void_p, c_size_t]
895ccaa8c0Schristos
905ccaa8c0Schristos_error = _libraries['magic'].magic_error
915ccaa8c0Schristos_error.restype = c_char_p
925ccaa8c0Schristos_error.argtypes = [magic_t]
935ccaa8c0Schristos
945ccaa8c0Schristos_setflags = _libraries['magic'].magic_setflags
955ccaa8c0Schristos_setflags.restype = c_int
965ccaa8c0Schristos_setflags.argtypes = [magic_t, c_int]
975ccaa8c0Schristos
985ccaa8c0Schristos_load = _libraries['magic'].magic_load
995ccaa8c0Schristos_load.restype = c_int
1005ccaa8c0Schristos_load.argtypes = [magic_t, c_char_p]
1015ccaa8c0Schristos
1025ccaa8c0Schristos_compile = _libraries['magic'].magic_compile
1035ccaa8c0Schristos_compile.restype = c_int
1045ccaa8c0Schristos_compile.argtypes = [magic_t, c_char_p]
1055ccaa8c0Schristos
1065ccaa8c0Schristos_check = _libraries['magic'].magic_check
1075ccaa8c0Schristos_check.restype = c_int
1085ccaa8c0Schristos_check.argtypes = [magic_t, c_char_p]
1095ccaa8c0Schristos
1105ccaa8c0Schristos_list = _libraries['magic'].magic_list
1115ccaa8c0Schristos_list.restype = c_int
1125ccaa8c0Schristos_list.argtypes = [magic_t, c_char_p]
1135ccaa8c0Schristos
1145ccaa8c0Schristos_errno = _libraries['magic'].magic_errno
1155ccaa8c0Schristos_errno.restype = c_int
1165ccaa8c0Schristos_errno.argtypes = [magic_t]
1175ccaa8c0Schristos
11825f16eeaSchristos_getparam = _libraries['magic'].magic_getparam
11925f16eeaSchristos_getparam.restype = c_int
12025f16eeaSchristos_getparam.argtypes = [magic_t, c_int, c_void_p]
12125f16eeaSchristos
12225f16eeaSchristos_setparam = _libraries['magic'].magic_setparam
12325f16eeaSchristos_setparam.restype = c_int
12425f16eeaSchristos_setparam.argtypes = [magic_t, c_int, c_void_p]
12525f16eeaSchristos
126012fd133Schristos
1275ccaa8c0Schristosclass Magic(object):
1285ccaa8c0Schristos    def __init__(self, ms):
1295ccaa8c0Schristos        self._magic_t = ms
1305ccaa8c0Schristos
1315ccaa8c0Schristos    def close(self):
1325ccaa8c0Schristos        """
1335ccaa8c0Schristos        Closes the magic database and deallocates any resources used.
1345ccaa8c0Schristos        """
1355ccaa8c0Schristos        _close(self._magic_t)
1365ccaa8c0Schristos
137008b33e8Schristos    @staticmethod
138008b33e8Schristos    def __tostr(s):
139008b33e8Schristos        if s is None:
140008b33e8Schristos            return None
141008b33e8Schristos        if isinstance(s, str):
142008b33e8Schristos            return s
143008b33e8Schristos        try:  # keep Python 2 compatibility
144008b33e8Schristos            return str(s, 'utf-8')
145008b33e8Schristos        except TypeError:
146008b33e8Schristos            return str(s)
147008b33e8Schristos
148008b33e8Schristos    @staticmethod
149008b33e8Schristos    def __tobytes(b):
150008b33e8Schristos        if b is None:
151008b33e8Schristos            return None
152008b33e8Schristos        if isinstance(b, bytes):
153008b33e8Schristos            return b
154008b33e8Schristos        try:  # keep Python 2 compatibility
155008b33e8Schristos            return bytes(b, 'utf-8')
156008b33e8Schristos        except TypeError:
157008b33e8Schristos            return bytes(b)
158008b33e8Schristos
159f50962bcSchristos    def file(self, filename):
1605ccaa8c0Schristos        """
1615ccaa8c0Schristos        Returns a textual description of the contents of the argument passed
1625ccaa8c0Schristos        as a filename or None if an error occurred and the MAGIC_ERROR flag
1635ccaa8c0Schristos        is set. A call to errno() will return the numeric error code.
1645ccaa8c0Schristos        """
165008b33e8Schristos        return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename)))
1665ccaa8c0Schristos
1675ccaa8c0Schristos    def descriptor(self, fd):
1685ccaa8c0Schristos        """
169008b33e8Schristos        Returns a textual description of the contents of the argument passed
170008b33e8Schristos        as a file descriptor or None if an error occurred and the MAGIC_ERROR
171008b33e8Schristos        flag is set. A call to errno() will return the numeric error code.
1725ccaa8c0Schristos        """
173008b33e8Schristos        return Magic.__tostr(_descriptor(self._magic_t, fd))
1745ccaa8c0Schristos
1755ccaa8c0Schristos    def buffer(self, buf):
1765ccaa8c0Schristos        """
1775ccaa8c0Schristos        Returns a textual description of the contents of the argument passed
1785ccaa8c0Schristos        as a buffer or None if an error occurred and the MAGIC_ERROR flag
1795ccaa8c0Schristos        is set. A call to errno() will return the numeric error code.
1805ccaa8c0Schristos        """
181008b33e8Schristos        return Magic.__tostr(_buffer(self._magic_t, buf, len(buf)))
1825ccaa8c0Schristos
1835ccaa8c0Schristos    def error(self):
1845ccaa8c0Schristos        """
1855ccaa8c0Schristos        Returns a textual explanation of the last error or None
1865ccaa8c0Schristos        if there was no error.
1875ccaa8c0Schristos        """
188008b33e8Schristos        return Magic.__tostr(_error(self._magic_t))
1895ccaa8c0Schristos
1905ccaa8c0Schristos    def setflags(self, flags):
1915ccaa8c0Schristos        """
192012fd133Schristos        Set flags on the magic object which determine how magic checking
193012fd133Schristos        behaves; a bitwise OR of the flags described in libmagic(3), but
194012fd133Schristos        without the MAGIC_ prefix.
1955ccaa8c0Schristos
1965ccaa8c0Schristos        Returns -1 on systems that don't support utime(2) or utimes(2)
1975ccaa8c0Schristos        when PRESERVE_ATIME is set.
1985ccaa8c0Schristos        """
1995ccaa8c0Schristos        return _setflags(self._magic_t, flags)
2005ccaa8c0Schristos
201f50962bcSchristos    def load(self, filename=None):
2025ccaa8c0Schristos        """
203012fd133Schristos        Must be called to load entries in the colon separated list of database
204012fd133Schristos        files passed as argument or the default database file if no argument
205012fd133Schristos        before any magic queries can be performed.
2065ccaa8c0Schristos
2075ccaa8c0Schristos        Returns 0 on success and -1 on failure.
2085ccaa8c0Schristos        """
209008b33e8Schristos        return _load(self._magic_t, Magic.__tobytes(filename))
2105ccaa8c0Schristos
2115ccaa8c0Schristos    def compile(self, dbs):
2125ccaa8c0Schristos        """
2135ccaa8c0Schristos        Compile entries in the colon separated list of database files
2145ccaa8c0Schristos        passed as argument or the default database file if no argument.
2155ccaa8c0Schristos        The compiled files created are named from the basename(1) of each file
2165ccaa8c0Schristos        argument with ".mgc" appended to it.
217008b33e8Schristos
218008b33e8Schristos        Returns 0 on success and -1 on failure.
2195ccaa8c0Schristos        """
220008b33e8Schristos        return _compile(self._magic_t, Magic.__tobytes(dbs))
2215ccaa8c0Schristos
2225ccaa8c0Schristos    def check(self, dbs):
2235ccaa8c0Schristos        """
2245ccaa8c0Schristos        Check the validity of entries in the colon separated list of
2255ccaa8c0Schristos        database files passed as argument or the default database file
2265ccaa8c0Schristos        if no argument.
227008b33e8Schristos
2285ccaa8c0Schristos        Returns 0 on success and -1 on failure.
2295ccaa8c0Schristos        """
230008b33e8Schristos        return _check(self._magic_t, Magic.__tobytes(dbs))
2315ccaa8c0Schristos
2325ccaa8c0Schristos    def list(self, dbs):
2335ccaa8c0Schristos        """
2345ccaa8c0Schristos        Check the validity of entries in the colon separated list of
2355ccaa8c0Schristos        database files passed as argument or the default database file
2365ccaa8c0Schristos        if no argument.
237008b33e8Schristos
2385ccaa8c0Schristos        Returns 0 on success and -1 on failure.
2395ccaa8c0Schristos        """
240008b33e8Schristos        return _list(self._magic_t, Magic.__tobytes(dbs))
2415ccaa8c0Schristos
2425ccaa8c0Schristos    def errno(self):
2435ccaa8c0Schristos        """
2445ccaa8c0Schristos        Returns a numeric error code. If return value is 0, an internal
2455ccaa8c0Schristos        magic error occurred. If return value is non-zero, the value is
2465ccaa8c0Schristos        an OS error code. Use the errno module or os.strerror() can be used
2475ccaa8c0Schristos        to provide detailed error information.
2485ccaa8c0Schristos        """
2495ccaa8c0Schristos        return _errno(self._magic_t)
2505ccaa8c0Schristos
25125f16eeaSchristos    def getparam(self, param):
25225f16eeaSchristos        """
25325f16eeaSchristos        Returns the param value if successful and -1 if the parameter
25425f16eeaSchristos        was unknown.
25525f16eeaSchristos        """
25625f16eeaSchristos        v = c_int()
25725f16eeaSchristos        i = _getparam(self._magic_t, param, byref(v))
25825f16eeaSchristos        if i == -1:
25925f16eeaSchristos            return -1
26025f16eeaSchristos        return v.value
26125f16eeaSchristos
26225f16eeaSchristos    def setparam(self, param, value):
26325f16eeaSchristos        """
26425f16eeaSchristos        Returns 0 if successful and -1 if the parameter was unknown.
26525f16eeaSchristos        """
26625f16eeaSchristos        v = c_int(value)
26725f16eeaSchristos        return _setparam(self._magic_t, param, byref(v))
26825f16eeaSchristos
269012fd133Schristos
2705ccaa8c0Schristosdef open(flags):
2715ccaa8c0Schristos    """
2725ccaa8c0Schristos    Returns a magic object on success and None on failure.
2735ccaa8c0Schristos    Flags argument as for setflags.
2745ccaa8c0Schristos    """
275*a77ebd86Schristos    magic_t = _open(flags)
276*a77ebd86Schristos    if magic_t is None:
277*a77ebd86Schristos        return None
278*a77ebd86Schristos    return Magic(magic_t)
279ae9cfef6Schristos
280ae9cfef6Schristos
281ae9cfef6Schristos# Objects used by `detect_from_` functions
282*a77ebd86Schristosclass error(Exception):
283*a77ebd86Schristos    pass
284ae9cfef6Schristos
285*a77ebd86Schristosclass MagicDetect(object):
286*a77ebd86Schristos    def __init__(self):
287*a77ebd86Schristos        self.mime_magic = open(MAGIC_MIME)
288*a77ebd86Schristos        if self.mime_magic is None:
289*a77ebd86Schristos            raise error
290*a77ebd86Schristos        if self.mime_magic.load() == -1:
291*a77ebd86Schristos            self.mime_magic.close()
292*a77ebd86Schristos            self.mime_magic = None
293*a77ebd86Schristos            raise error
294*a77ebd86Schristos        self.none_magic = open(MAGIC_NONE)
295*a77ebd86Schristos        if self.none_magic is None:
296*a77ebd86Schristos            self.mime_magic.close()
297*a77ebd86Schristos            self.mime_magic = None
298*a77ebd86Schristos            raise error
299*a77ebd86Schristos        if self.none_magic.load() == -1:
300*a77ebd86Schristos            self.none_magic.close()
301*a77ebd86Schristos            self.none_magic = None
302*a77ebd86Schristos            self.mime_magic.close()
303*a77ebd86Schristos            self.mime_magic = None
304*a77ebd86Schristos            raise error
305*a77ebd86Schristos
306*a77ebd86Schristos    def __del__(self):
307*a77ebd86Schristos        if self.mime_magic is not None:
308*a77ebd86Schristos            self.mime_magic.close()
309*a77ebd86Schristos        if self.none_magic is not None:
310*a77ebd86Schristos            self.none_magic.close()
311*a77ebd86Schristos
312*a77ebd86Schristosthreadlocal = threading.local()
313*a77ebd86Schristos
314*a77ebd86Schristosdef _detect_make():
315*a77ebd86Schristos    v = getattr(threadlocal, "magic_instance", None)
316*a77ebd86Schristos    if v is None:
317*a77ebd86Schristos        v = MagicDetect()
318*a77ebd86Schristos        setattr(threadlocal, "magic_instance", v)
319*a77ebd86Schristos    return v
320ae9cfef6Schristos
321ae9cfef6Schristosdef _create_filemagic(mime_detected, type_detected):
322006f8008Schristos    try:
323ae9cfef6Schristos        mime_type, mime_encoding = mime_detected.split('; ')
324006f8008Schristos    except ValueError:
325006f8008Schristos        raise ValueError(mime_detected)
326ae9cfef6Schristos
327ae9cfef6Schristos    return FileMagic(name=type_detected, mime_type=mime_type,
328ae9cfef6Schristos                     encoding=mime_encoding.replace('charset=', ''))
329ae9cfef6Schristos
330ae9cfef6Schristos
331ae9cfef6Schristosdef detect_from_filename(filename):
332ae9cfef6Schristos    '''Detect mime type, encoding and file type from a filename
333ae9cfef6Schristos
334ae9cfef6Schristos    Returns a `FileMagic` namedtuple.
335ae9cfef6Schristos    '''
336*a77ebd86Schristos    x = _detect_make()
337*a77ebd86Schristos    return _create_filemagic(x.mime_magic.file(filename),
338*a77ebd86Schristos                             x.none_magic.file(filename))
339ae9cfef6Schristos
340ae9cfef6Schristos
341ae9cfef6Schristosdef detect_from_fobj(fobj):
342ae9cfef6Schristos    '''Detect mime type, encoding and file type from file-like object
343ae9cfef6Schristos
344ae9cfef6Schristos    Returns a `FileMagic` namedtuple.
345ae9cfef6Schristos    '''
346ae9cfef6Schristos
347ae9cfef6Schristos    file_descriptor = fobj.fileno()
348*a77ebd86Schristos    x = _detect_make()
349*a77ebd86Schristos    return _create_filemagic(x.mime_magic.descriptor(file_descriptor),
350*a77ebd86Schristos                             x.none_magic.descriptor(file_descriptor))
351ae9cfef6Schristos
352ae9cfef6Schristos
353ae9cfef6Schristosdef detect_from_content(byte_content):
354ae9cfef6Schristos    '''Detect mime type, encoding and file type from bytes
355ae9cfef6Schristos
356ae9cfef6Schristos    Returns a `FileMagic` namedtuple.
357ae9cfef6Schristos    '''
358ae9cfef6Schristos
359*a77ebd86Schristos    x = _detect_make()
360*a77ebd86Schristos    return _create_filemagic(x.mime_magic.buffer(byte_content),
361*a77ebd86Schristos                             x.none_magic.buffer(byte_content))
362