10b57cec5SDimitry Andric //===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric
90b57cec5SDimitry Andric #include "llvm/BinaryFormat/Magic.h"
105ffd83dbSDimitry Andric #include "llvm/ADT/StringRef.h"
115ffd83dbSDimitry Andric #include "llvm/ADT/Twine.h"
120b57cec5SDimitry Andric #include "llvm/BinaryFormat/COFF.h"
130b57cec5SDimitry Andric #include "llvm/BinaryFormat/MachO.h"
140b57cec5SDimitry Andric #include "llvm/Support/Endian.h"
150b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
160b57cec5SDimitry Andric
170b57cec5SDimitry Andric #if !defined(_MSC_VER) && !defined(__MINGW32__)
180b57cec5SDimitry Andric #include <unistd.h>
190b57cec5SDimitry Andric #else
200b57cec5SDimitry Andric #include <io.h>
210b57cec5SDimitry Andric #endif
220b57cec5SDimitry Andric
230b57cec5SDimitry Andric using namespace llvm;
240b57cec5SDimitry Andric using namespace llvm::support::endian;
250b57cec5SDimitry Andric using namespace llvm::sys::fs;
260b57cec5SDimitry Andric
270b57cec5SDimitry Andric template <size_t N>
startswith(StringRef Magic,const char (& S)[N])280b57cec5SDimitry Andric static bool startswith(StringRef Magic, const char (&S)[N]) {
295f757f3fSDimitry Andric return Magic.starts_with(StringRef(S, N - 1));
300b57cec5SDimitry Andric }
310b57cec5SDimitry Andric
320b57cec5SDimitry Andric /// Identify the magic in magic.
identify_magic(StringRef Magic)330b57cec5SDimitry Andric file_magic llvm::identify_magic(StringRef Magic) {
340b57cec5SDimitry Andric if (Magic.size() < 4)
350b57cec5SDimitry Andric return file_magic::unknown;
360b57cec5SDimitry Andric switch ((unsigned char)Magic[0]) {
370b57cec5SDimitry Andric case 0x00: {
380b57cec5SDimitry Andric // COFF bigobj, CL.exe's LTO object file, or short import library file
390b57cec5SDimitry Andric if (startswith(Magic, "\0\0\xFF\xFF")) {
400b57cec5SDimitry Andric size_t MinSize =
410b57cec5SDimitry Andric offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic);
420b57cec5SDimitry Andric if (Magic.size() < MinSize)
430b57cec5SDimitry Andric return file_magic::coff_import_library;
440b57cec5SDimitry Andric
450b57cec5SDimitry Andric const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
460b57cec5SDimitry Andric if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0)
470b57cec5SDimitry Andric return file_magic::coff_object;
480b57cec5SDimitry Andric if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0)
490b57cec5SDimitry Andric return file_magic::coff_cl_gl_object;
500b57cec5SDimitry Andric return file_magic::coff_import_library;
510b57cec5SDimitry Andric }
520b57cec5SDimitry Andric // Windows resource file
530b57cec5SDimitry Andric if (Magic.size() >= sizeof(COFF::WinResMagic) &&
540b57cec5SDimitry Andric memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0)
550b57cec5SDimitry Andric return file_magic::windows_resource;
560b57cec5SDimitry Andric // 0x0000 = COFF unknown machine type
570b57cec5SDimitry Andric if (Magic[1] == 0)
580b57cec5SDimitry Andric return file_magic::coff_object;
590b57cec5SDimitry Andric if (startswith(Magic, "\0asm"))
600b57cec5SDimitry Andric return file_magic::wasm_object;
610b57cec5SDimitry Andric break;
620b57cec5SDimitry Andric }
630b57cec5SDimitry Andric
640b57cec5SDimitry Andric case 0x01:
650b57cec5SDimitry Andric // XCOFF format
660b57cec5SDimitry Andric if (startswith(Magic, "\x01\xDF"))
670b57cec5SDimitry Andric return file_magic::xcoff_object_32;
680b57cec5SDimitry Andric if (startswith(Magic, "\x01\xF7"))
690b57cec5SDimitry Andric return file_magic::xcoff_object_64;
700b57cec5SDimitry Andric break;
710b57cec5SDimitry Andric
72fe6060f1SDimitry Andric case 0x03:
73fe6060f1SDimitry Andric if (startswith(Magic, "\x03\xF0\x00"))
74fe6060f1SDimitry Andric return file_magic::goff_object;
755f757f3fSDimitry Andric // SPIR-V format in little-endian mode.
765f757f3fSDimitry Andric if (startswith(Magic, "\x03\x02\x23\x07"))
775f757f3fSDimitry Andric return file_magic::spirv_object;
785f757f3fSDimitry Andric break;
795f757f3fSDimitry Andric
805f757f3fSDimitry Andric case 0x07: // SPIR-V format in big-endian mode.
815f757f3fSDimitry Andric if (startswith(Magic, "\x07\x23\x02\x03"))
825f757f3fSDimitry Andric return file_magic::spirv_object;
83fe6060f1SDimitry Andric break;
84fe6060f1SDimitry Andric
8581ad6265SDimitry Andric case 0x10:
8681ad6265SDimitry Andric if (startswith(Magic, "\x10\xFF\x10\xAD"))
8781ad6265SDimitry Andric return file_magic::offload_binary;
8881ad6265SDimitry Andric break;
8981ad6265SDimitry Andric
900b57cec5SDimitry Andric case 0xDE: // 0x0B17C0DE = BC wraper
910b57cec5SDimitry Andric if (startswith(Magic, "\xDE\xC0\x17\x0B"))
920b57cec5SDimitry Andric return file_magic::bitcode;
930b57cec5SDimitry Andric break;
940b57cec5SDimitry Andric case 'B':
950b57cec5SDimitry Andric if (startswith(Magic, "BC\xC0\xDE"))
960b57cec5SDimitry Andric return file_magic::bitcode;
970b57cec5SDimitry Andric break;
985f757f3fSDimitry Andric case 'C':
995f757f3fSDimitry Andric if (startswith(Magic, "CCOB"))
1005f757f3fSDimitry Andric return file_magic::offload_bundle_compressed;
101*7a6dacacSDimitry Andric if (startswith(Magic, "CPCH"))
102*7a6dacacSDimitry Andric return file_magic::clang_ast;
1035f757f3fSDimitry Andric break;
1040b57cec5SDimitry Andric case '!':
1050b57cec5SDimitry Andric if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
1060b57cec5SDimitry Andric return file_magic::archive;
1070b57cec5SDimitry Andric break;
10804eeddc0SDimitry Andric case '<':
10904eeddc0SDimitry Andric if (startswith(Magic, "<bigaf>\n"))
11004eeddc0SDimitry Andric return file_magic::archive;
11104eeddc0SDimitry Andric break;
1120b57cec5SDimitry Andric case '\177':
1130b57cec5SDimitry Andric if (startswith(Magic, "\177ELF") && Magic.size() >= 18) {
1140b57cec5SDimitry Andric bool Data2MSB = Magic[5] == 2;
1150b57cec5SDimitry Andric unsigned high = Data2MSB ? 16 : 17;
1160b57cec5SDimitry Andric unsigned low = Data2MSB ? 17 : 16;
1170b57cec5SDimitry Andric if (Magic[high] == 0) {
1180b57cec5SDimitry Andric switch (Magic[low]) {
1190b57cec5SDimitry Andric default:
1200b57cec5SDimitry Andric return file_magic::elf;
1210b57cec5SDimitry Andric case 1:
1220b57cec5SDimitry Andric return file_magic::elf_relocatable;
1230b57cec5SDimitry Andric case 2:
1240b57cec5SDimitry Andric return file_magic::elf_executable;
1250b57cec5SDimitry Andric case 3:
1260b57cec5SDimitry Andric return file_magic::elf_shared_object;
1270b57cec5SDimitry Andric case 4:
1280b57cec5SDimitry Andric return file_magic::elf_core;
1290b57cec5SDimitry Andric }
1300b57cec5SDimitry Andric }
1310b57cec5SDimitry Andric // It's still some type of ELF file.
1320b57cec5SDimitry Andric return file_magic::elf;
1330b57cec5SDimitry Andric }
1340b57cec5SDimitry Andric break;
1350b57cec5SDimitry Andric
1360b57cec5SDimitry Andric case 0xCA:
1370b57cec5SDimitry Andric if (startswith(Magic, "\xCA\xFE\xBA\xBE") ||
1380b57cec5SDimitry Andric startswith(Magic, "\xCA\xFE\xBA\xBF")) {
1390b57cec5SDimitry Andric // This is complicated by an overlap with Java class files.
1400b57cec5SDimitry Andric // See the Mach-O section in /usr/share/file/magic for details.
1410b57cec5SDimitry Andric if (Magic.size() >= 8 && Magic[7] < 43)
1420b57cec5SDimitry Andric return file_magic::macho_universal_binary;
1430b57cec5SDimitry Andric }
1440b57cec5SDimitry Andric break;
1450b57cec5SDimitry Andric
1460b57cec5SDimitry Andric // The two magic numbers for mach-o are:
1470b57cec5SDimitry Andric // 0xfeedface - 32-bit mach-o
1480b57cec5SDimitry Andric // 0xfeedfacf - 64-bit mach-o
1490b57cec5SDimitry Andric case 0xFE:
1500b57cec5SDimitry Andric case 0xCE:
1510b57cec5SDimitry Andric case 0xCF: {
1520b57cec5SDimitry Andric uint16_t type = 0;
1530b57cec5SDimitry Andric if (startswith(Magic, "\xFE\xED\xFA\xCE") ||
1540b57cec5SDimitry Andric startswith(Magic, "\xFE\xED\xFA\xCF")) {
1550b57cec5SDimitry Andric /* Native endian */
1560b57cec5SDimitry Andric size_t MinSize;
1570b57cec5SDimitry Andric if (Magic[3] == char(0xCE))
1580b57cec5SDimitry Andric MinSize = sizeof(MachO::mach_header);
1590b57cec5SDimitry Andric else
1600b57cec5SDimitry Andric MinSize = sizeof(MachO::mach_header_64);
1610b57cec5SDimitry Andric if (Magic.size() >= MinSize)
1620b57cec5SDimitry Andric type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15];
1630b57cec5SDimitry Andric } else if (startswith(Magic, "\xCE\xFA\xED\xFE") ||
1640b57cec5SDimitry Andric startswith(Magic, "\xCF\xFA\xED\xFE")) {
1650b57cec5SDimitry Andric /* Reverse endian */
1660b57cec5SDimitry Andric size_t MinSize;
1670b57cec5SDimitry Andric if (Magic[0] == char(0xCE))
1680b57cec5SDimitry Andric MinSize = sizeof(MachO::mach_header);
1690b57cec5SDimitry Andric else
1700b57cec5SDimitry Andric MinSize = sizeof(MachO::mach_header_64);
1710b57cec5SDimitry Andric if (Magic.size() >= MinSize)
1720b57cec5SDimitry Andric type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12];
1730b57cec5SDimitry Andric }
1740b57cec5SDimitry Andric switch (type) {
1750b57cec5SDimitry Andric default:
1760b57cec5SDimitry Andric break;
1770b57cec5SDimitry Andric case 1:
1780b57cec5SDimitry Andric return file_magic::macho_object;
1790b57cec5SDimitry Andric case 2:
1800b57cec5SDimitry Andric return file_magic::macho_executable;
1810b57cec5SDimitry Andric case 3:
1820b57cec5SDimitry Andric return file_magic::macho_fixed_virtual_memory_shared_lib;
1830b57cec5SDimitry Andric case 4:
1840b57cec5SDimitry Andric return file_magic::macho_core;
1850b57cec5SDimitry Andric case 5:
1860b57cec5SDimitry Andric return file_magic::macho_preload_executable;
1870b57cec5SDimitry Andric case 6:
1880b57cec5SDimitry Andric return file_magic::macho_dynamically_linked_shared_lib;
1890b57cec5SDimitry Andric case 7:
1900b57cec5SDimitry Andric return file_magic::macho_dynamic_linker;
1910b57cec5SDimitry Andric case 8:
1920b57cec5SDimitry Andric return file_magic::macho_bundle;
1930b57cec5SDimitry Andric case 9:
1940b57cec5SDimitry Andric return file_magic::macho_dynamically_linked_shared_lib_stub;
1950b57cec5SDimitry Andric case 10:
1960b57cec5SDimitry Andric return file_magic::macho_dsym_companion;
1970b57cec5SDimitry Andric case 11:
1980b57cec5SDimitry Andric return file_magic::macho_kext_bundle;
199bdd1243dSDimitry Andric case 12:
200bdd1243dSDimitry Andric return file_magic::macho_file_set;
2010b57cec5SDimitry Andric }
2020b57cec5SDimitry Andric break;
2030b57cec5SDimitry Andric }
2040b57cec5SDimitry Andric case 0xF0: // PowerPC Windows
2050b57cec5SDimitry Andric case 0x83: // Alpha 32-bit
2060b57cec5SDimitry Andric case 0x84: // Alpha 64-bit
2070b57cec5SDimitry Andric case 0x66: // MPS R4000 Windows
2080b57cec5SDimitry Andric case 0x50: // mc68K
20981ad6265SDimitry Andric if (startswith(Magic, "\x50\xed\x55\xba"))
21081ad6265SDimitry Andric return file_magic::cuda_fatbinary;
211bdd1243dSDimitry Andric [[fallthrough]];
21281ad6265SDimitry Andric
2130b57cec5SDimitry Andric case 0x4c: // 80386 Windows
2140b57cec5SDimitry Andric case 0xc4: // ARMNT Windows
2150b57cec5SDimitry Andric if (Magic[1] == 0x01)
2160b57cec5SDimitry Andric return file_magic::coff_object;
217bdd1243dSDimitry Andric [[fallthrough]];
2180b57cec5SDimitry Andric
2190b57cec5SDimitry Andric case 0x90: // PA-RISC Windows
2200b57cec5SDimitry Andric case 0x68: // mc68K Windows
2210b57cec5SDimitry Andric if (Magic[1] == 0x02)
2220b57cec5SDimitry Andric return file_magic::coff_object;
2230b57cec5SDimitry Andric break;
2240b57cec5SDimitry Andric
2250b57cec5SDimitry Andric case 'M': // Possible MS-DOS stub on Windows PE file, MSF/PDB file or a
2260b57cec5SDimitry Andric // Minidump file.
2270b57cec5SDimitry Andric if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) {
2280b57cec5SDimitry Andric uint32_t off = read32le(Magic.data() + 0x3c);
2290b57cec5SDimitry Andric // PE/COFF file, either EXE or DLL.
2305f757f3fSDimitry Andric if (Magic.substr(off).starts_with(
2310b57cec5SDimitry Andric StringRef(COFF::PEMagic, sizeof(COFF::PEMagic))))
2320b57cec5SDimitry Andric return file_magic::pecoff_executable;
2330b57cec5SDimitry Andric }
2345f757f3fSDimitry Andric if (Magic.starts_with("Microsoft C/C++ MSF 7.00\r\n"))
2350b57cec5SDimitry Andric return file_magic::pdb;
2360b57cec5SDimitry Andric if (startswith(Magic, "MDMP"))
2370b57cec5SDimitry Andric return file_magic::minidump;
2380b57cec5SDimitry Andric break;
2390b57cec5SDimitry Andric
2400b57cec5SDimitry Andric case 0x64: // x86-64 or ARM64 Windows.
2410b57cec5SDimitry Andric if (Magic[1] == char(0x86) || Magic[1] == char(0xaa))
2420b57cec5SDimitry Andric return file_magic::coff_object;
2430b57cec5SDimitry Andric break;
2440b57cec5SDimitry Andric
24506c3fb27SDimitry Andric case 0x2d: // YAML '-' MachO TBD.
2468bcb0991SDimitry Andric if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:"))
2478bcb0991SDimitry Andric return file_magic::tapi_file;
2488bcb0991SDimitry Andric break;
24906c3fb27SDimitry Andric case 0x7b: // JSON '{' MachO TBD.
25006c3fb27SDimitry Andric return file_magic::tapi_file;
25106c3fb27SDimitry Andric break;
2528bcb0991SDimitry Andric
25381ad6265SDimitry Andric case 'D': // DirectX container file - DXBC
25481ad6265SDimitry Andric if (startswith(Magic, "DXBC"))
25581ad6265SDimitry Andric return file_magic::dxcontainer_object;
25681ad6265SDimitry Andric break;
25781ad6265SDimitry Andric
258bdd1243dSDimitry Andric case 0x41: // ARM64EC windows
259bdd1243dSDimitry Andric if (Magic[1] == char(0xA6))
260bdd1243dSDimitry Andric return file_magic::coff_object;
261bdd1243dSDimitry Andric break;
262bdd1243dSDimitry Andric
26306c3fb27SDimitry Andric case 0x4e: // ARM64X windows
26406c3fb27SDimitry Andric if (Magic[1] == char(0xA6))
26506c3fb27SDimitry Andric return file_magic::coff_object;
26606c3fb27SDimitry Andric break;
26706c3fb27SDimitry Andric
2685f757f3fSDimitry Andric case '_': {
2695f757f3fSDimitry Andric const char OBMagic[] = "__CLANG_OFFLOAD_BUNDLE__";
2705f757f3fSDimitry Andric if (Magic.size() >= sizeof(OBMagic) && startswith(Magic, OBMagic))
2715f757f3fSDimitry Andric return file_magic::offload_bundle;
2725f757f3fSDimitry Andric break;
2735f757f3fSDimitry Andric }
2745f757f3fSDimitry Andric
2750b57cec5SDimitry Andric default:
2760b57cec5SDimitry Andric break;
2770b57cec5SDimitry Andric }
2780b57cec5SDimitry Andric return file_magic::unknown;
2790b57cec5SDimitry Andric }
2800b57cec5SDimitry Andric
identify_magic(const Twine & Path,file_magic & Result)2810b57cec5SDimitry Andric std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) {
282fe6060f1SDimitry Andric auto FileOrError = MemoryBuffer::getFile(Path, /*IsText=*/false,
283fe6060f1SDimitry Andric /*RequiresNullTerminator=*/false);
2840b57cec5SDimitry Andric if (!FileOrError)
2850b57cec5SDimitry Andric return FileOrError.getError();
2860b57cec5SDimitry Andric
2870b57cec5SDimitry Andric std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
2880b57cec5SDimitry Andric Result = identify_magic(FileBuffer->getBuffer());
2890b57cec5SDimitry Andric
2900b57cec5SDimitry Andric return std::error_code();
2910b57cec5SDimitry Andric }
292