1264b5d9eSZachary Turner //===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===//
2264b5d9eSZachary Turner //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6264b5d9eSZachary Turner //
7264b5d9eSZachary Turner //===----------------------------------------------------------------------===//
8264b5d9eSZachary Turner
9264b5d9eSZachary Turner #include "llvm/BinaryFormat/Magic.h"
1070d77b5fSSimon Pilgrim #include "llvm/ADT/StringRef.h"
1170d77b5fSSimon Pilgrim #include "llvm/ADT/Twine.h"
12264b5d9eSZachary Turner #include "llvm/BinaryFormat/COFF.h"
13264b5d9eSZachary Turner #include "llvm/BinaryFormat/MachO.h"
14264b5d9eSZachary Turner #include "llvm/Support/Endian.h"
159899b5feSZachary Turner #include "llvm/Support/MemoryBuffer.h"
16264b5d9eSZachary Turner
17264b5d9eSZachary Turner #if !defined(_MSC_VER) && !defined(__MINGW32__)
18264b5d9eSZachary Turner #include <unistd.h>
19264b5d9eSZachary Turner #else
20264b5d9eSZachary Turner #include <io.h>
21264b5d9eSZachary Turner #endif
22264b5d9eSZachary Turner
23264b5d9eSZachary Turner using namespace llvm;
24264b5d9eSZachary Turner using namespace llvm::support::endian;
25264b5d9eSZachary Turner using namespace llvm::sys::fs;
26264b5d9eSZachary Turner
27264b5d9eSZachary Turner template <size_t N>
startswith(StringRef Magic,const char (& S)[N])28264b5d9eSZachary Turner static bool startswith(StringRef Magic, const char (&S)[N]) {
29586ecdf2SKazu Hirata return Magic.starts_with(StringRef(S, N - 1));
30264b5d9eSZachary Turner }
31264b5d9eSZachary Turner
324dfcc4a7SAdrian Prantl /// Identify the magic in magic.
identify_magic(StringRef Magic)33264b5d9eSZachary Turner file_magic llvm::identify_magic(StringRef Magic) {
34264b5d9eSZachary Turner if (Magic.size() < 4)
35264b5d9eSZachary Turner return file_magic::unknown;
36264b5d9eSZachary Turner switch ((unsigned char)Magic[0]) {
37264b5d9eSZachary Turner case 0x00: {
38264b5d9eSZachary Turner // COFF bigobj, CL.exe's LTO object file, or short import library file
39264b5d9eSZachary Turner if (startswith(Magic, "\0\0\xFF\xFF")) {
40264b5d9eSZachary Turner size_t MinSize =
41264b5d9eSZachary Turner offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic);
42264b5d9eSZachary Turner if (Magic.size() < MinSize)
43264b5d9eSZachary Turner return file_magic::coff_import_library;
44264b5d9eSZachary Turner
45264b5d9eSZachary Turner const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
46264b5d9eSZachary Turner if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0)
47264b5d9eSZachary Turner return file_magic::coff_object;
48264b5d9eSZachary Turner if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0)
49264b5d9eSZachary Turner return file_magic::coff_cl_gl_object;
50264b5d9eSZachary Turner return file_magic::coff_import_library;
51264b5d9eSZachary Turner }
52264b5d9eSZachary Turner // Windows resource file
53c8dba240SEric Beckmann if (Magic.size() >= sizeof(COFF::WinResMagic) &&
54c8dba240SEric Beckmann memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0)
55264b5d9eSZachary Turner return file_magic::windows_resource;
56264b5d9eSZachary Turner // 0x0000 = COFF unknown machine type
57264b5d9eSZachary Turner if (Magic[1] == 0)
58264b5d9eSZachary Turner return file_magic::coff_object;
59264b5d9eSZachary Turner if (startswith(Magic, "\0asm"))
60264b5d9eSZachary Turner return file_magic::wasm_object;
61264b5d9eSZachary Turner break;
62264b5d9eSZachary Turner }
63ab2eb2bfSHubert Tong
64ab2eb2bfSHubert Tong case 0x01:
65ab2eb2bfSHubert Tong // XCOFF format
66ab2eb2bfSHubert Tong if (startswith(Magic, "\x01\xDF"))
67ab2eb2bfSHubert Tong return file_magic::xcoff_object_32;
68837ae69fSSean Fertile if (startswith(Magic, "\x01\xF7"))
69837ae69fSSean Fertile return file_magic::xcoff_object_64;
70ab2eb2bfSHubert Tong break;
71ab2eb2bfSHubert Tong
720977f31cSAnirudh Prasad case 0x03:
730977f31cSAnirudh Prasad if (startswith(Magic, "\x03\xF0\x00"))
740977f31cSAnirudh Prasad return file_magic::goff_object;
75edc83886SJoseph Huber // SPIR-V format in little-endian mode.
76edc83886SJoseph Huber if (startswith(Magic, "\x03\x02\x23\x07"))
77edc83886SJoseph Huber return file_magic::spirv_object;
78edc83886SJoseph Huber break;
79edc83886SJoseph Huber
80edc83886SJoseph Huber case 0x07: // SPIR-V format in big-endian mode.
81edc83886SJoseph Huber if (startswith(Magic, "\x07\x23\x02\x03"))
82edc83886SJoseph Huber return file_magic::spirv_object;
830977f31cSAnirudh Prasad break;
840977f31cSAnirudh Prasad
85afd2f7e9SJoseph Huber case 0x10:
86afd2f7e9SJoseph Huber if (startswith(Magic, "\x10\xFF\x10\xAD"))
87afd2f7e9SJoseph Huber return file_magic::offload_binary;
88afd2f7e9SJoseph Huber break;
89afd2f7e9SJoseph Huber
90264b5d9eSZachary Turner case 0xDE: // 0x0B17C0DE = BC wraper
91264b5d9eSZachary Turner if (startswith(Magic, "\xDE\xC0\x17\x0B"))
92264b5d9eSZachary Turner return file_magic::bitcode;
93264b5d9eSZachary Turner break;
94264b5d9eSZachary Turner case 'B':
95264b5d9eSZachary Turner if (startswith(Magic, "BC\xC0\xDE"))
96264b5d9eSZachary Turner return file_magic::bitcode;
97264b5d9eSZachary Turner break;
987e282343SYaxun (Sam) Liu case 'C':
997e282343SYaxun (Sam) Liu if (startswith(Magic, "CCOB"))
1007e282343SYaxun (Sam) Liu return file_magic::offload_bundle_compressed;
101*894c2240SMichael Spencer if (startswith(Magic, "CPCH"))
102*894c2240SMichael Spencer return file_magic::clang_ast;
1037e282343SYaxun (Sam) Liu break;
104264b5d9eSZachary Turner case '!':
105264b5d9eSZachary Turner if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
106264b5d9eSZachary Turner return file_magic::archive;
107264b5d9eSZachary Turner break;
1083062a146Szhijian case '<':
1093062a146Szhijian if (startswith(Magic, "<bigaf>\n"))
1103062a146Szhijian return file_magic::archive;
1113062a146Szhijian break;
112264b5d9eSZachary Turner case '\177':
113264b5d9eSZachary Turner if (startswith(Magic, "\177ELF") && Magic.size() >= 18) {
114264b5d9eSZachary Turner bool Data2MSB = Magic[5] == 2;
115264b5d9eSZachary Turner unsigned high = Data2MSB ? 16 : 17;
116264b5d9eSZachary Turner unsigned low = Data2MSB ? 17 : 16;
117264b5d9eSZachary Turner if (Magic[high] == 0) {
118264b5d9eSZachary Turner switch (Magic[low]) {
119264b5d9eSZachary Turner default:
120264b5d9eSZachary Turner return file_magic::elf;
121264b5d9eSZachary Turner case 1:
122264b5d9eSZachary Turner return file_magic::elf_relocatable;
123264b5d9eSZachary Turner case 2:
124264b5d9eSZachary Turner return file_magic::elf_executable;
125264b5d9eSZachary Turner case 3:
126264b5d9eSZachary Turner return file_magic::elf_shared_object;
127264b5d9eSZachary Turner case 4:
128264b5d9eSZachary Turner return file_magic::elf_core;
129264b5d9eSZachary Turner }
130264b5d9eSZachary Turner }
131264b5d9eSZachary Turner // It's still some type of ELF file.
132264b5d9eSZachary Turner return file_magic::elf;
133264b5d9eSZachary Turner }
134264b5d9eSZachary Turner break;
135264b5d9eSZachary Turner
136264b5d9eSZachary Turner case 0xCA:
137264b5d9eSZachary Turner if (startswith(Magic, "\xCA\xFE\xBA\xBE") ||
138264b5d9eSZachary Turner startswith(Magic, "\xCA\xFE\xBA\xBF")) {
139264b5d9eSZachary Turner // This is complicated by an overlap with Java class files.
140264b5d9eSZachary Turner // See the Mach-O section in /usr/share/file/magic for details.
141264b5d9eSZachary Turner if (Magic.size() >= 8 && Magic[7] < 43)
142264b5d9eSZachary Turner return file_magic::macho_universal_binary;
143264b5d9eSZachary Turner }
144264b5d9eSZachary Turner break;
145264b5d9eSZachary Turner
146264b5d9eSZachary Turner // The two magic numbers for mach-o are:
147264b5d9eSZachary Turner // 0xfeedface - 32-bit mach-o
148264b5d9eSZachary Turner // 0xfeedfacf - 64-bit mach-o
149264b5d9eSZachary Turner case 0xFE:
150264b5d9eSZachary Turner case 0xCE:
151264b5d9eSZachary Turner case 0xCF: {
152264b5d9eSZachary Turner uint16_t type = 0;
153264b5d9eSZachary Turner if (startswith(Magic, "\xFE\xED\xFA\xCE") ||
154264b5d9eSZachary Turner startswith(Magic, "\xFE\xED\xFA\xCF")) {
155264b5d9eSZachary Turner /* Native endian */
156264b5d9eSZachary Turner size_t MinSize;
157264b5d9eSZachary Turner if (Magic[3] == char(0xCE))
158264b5d9eSZachary Turner MinSize = sizeof(MachO::mach_header);
159264b5d9eSZachary Turner else
160264b5d9eSZachary Turner MinSize = sizeof(MachO::mach_header_64);
161264b5d9eSZachary Turner if (Magic.size() >= MinSize)
162264b5d9eSZachary Turner type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15];
163264b5d9eSZachary Turner } else if (startswith(Magic, "\xCE\xFA\xED\xFE") ||
164264b5d9eSZachary Turner startswith(Magic, "\xCF\xFA\xED\xFE")) {
165264b5d9eSZachary Turner /* Reverse endian */
166264b5d9eSZachary Turner size_t MinSize;
167264b5d9eSZachary Turner if (Magic[0] == char(0xCE))
168264b5d9eSZachary Turner MinSize = sizeof(MachO::mach_header);
169264b5d9eSZachary Turner else
170264b5d9eSZachary Turner MinSize = sizeof(MachO::mach_header_64);
171264b5d9eSZachary Turner if (Magic.size() >= MinSize)
172264b5d9eSZachary Turner type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12];
173264b5d9eSZachary Turner }
174264b5d9eSZachary Turner switch (type) {
175264b5d9eSZachary Turner default:
176264b5d9eSZachary Turner break;
177264b5d9eSZachary Turner case 1:
178264b5d9eSZachary Turner return file_magic::macho_object;
179264b5d9eSZachary Turner case 2:
180264b5d9eSZachary Turner return file_magic::macho_executable;
181264b5d9eSZachary Turner case 3:
182264b5d9eSZachary Turner return file_magic::macho_fixed_virtual_memory_shared_lib;
183264b5d9eSZachary Turner case 4:
184264b5d9eSZachary Turner return file_magic::macho_core;
185264b5d9eSZachary Turner case 5:
186264b5d9eSZachary Turner return file_magic::macho_preload_executable;
187264b5d9eSZachary Turner case 6:
188264b5d9eSZachary Turner return file_magic::macho_dynamically_linked_shared_lib;
189264b5d9eSZachary Turner case 7:
190264b5d9eSZachary Turner return file_magic::macho_dynamic_linker;
191264b5d9eSZachary Turner case 8:
192264b5d9eSZachary Turner return file_magic::macho_bundle;
193264b5d9eSZachary Turner case 9:
194264b5d9eSZachary Turner return file_magic::macho_dynamically_linked_shared_lib_stub;
195264b5d9eSZachary Turner case 10:
196264b5d9eSZachary Turner return file_magic::macho_dsym_companion;
197264b5d9eSZachary Turner case 11:
198264b5d9eSZachary Turner return file_magic::macho_kext_bundle;
19961139980SPeter Cooper case 12:
20061139980SPeter Cooper return file_magic::macho_file_set;
201264b5d9eSZachary Turner }
202264b5d9eSZachary Turner break;
203264b5d9eSZachary Turner }
204264b5d9eSZachary Turner case 0xF0: // PowerPC Windows
205264b5d9eSZachary Turner case 0x83: // Alpha 32-bit
206264b5d9eSZachary Turner case 0x84: // Alpha 64-bit
207264b5d9eSZachary Turner case 0x66: // MPS R4000 Windows
208264b5d9eSZachary Turner case 0x50: // mc68K
20924ebdb6cSJoseph Huber if (startswith(Magic, "\x50\xed\x55\xba"))
21024ebdb6cSJoseph Huber return file_magic::cuda_fatbinary;
211de9d80c1SFangrui Song [[fallthrough]];
21224ebdb6cSJoseph Huber
213264b5d9eSZachary Turner case 0x4c: // 80386 Windows
214264b5d9eSZachary Turner case 0xc4: // ARMNT Windows
215264b5d9eSZachary Turner if (Magic[1] == 0x01)
216264b5d9eSZachary Turner return file_magic::coff_object;
217de9d80c1SFangrui Song [[fallthrough]];
218264b5d9eSZachary Turner
219264b5d9eSZachary Turner case 0x90: // PA-RISC Windows
220264b5d9eSZachary Turner case 0x68: // mc68K Windows
221264b5d9eSZachary Turner if (Magic[1] == 0x02)
222264b5d9eSZachary Turner return file_magic::coff_object;
223264b5d9eSZachary Turner break;
224264b5d9eSZachary Turner
225581d79a4SPavel Labath case 'M': // Possible MS-DOS stub on Windows PE file, MSF/PDB file or a
226581d79a4SPavel Labath // Minidump file.
22780df6423SBenjamin Kramer if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) {
228264b5d9eSZachary Turner uint32_t off = read32le(Magic.data() + 0x3c);
229264b5d9eSZachary Turner // PE/COFF file, either EXE or DLL.
230586ecdf2SKazu Hirata if (Magic.substr(off).starts_with(
231deaba386SRafael Espindola StringRef(COFF::PEMagic, sizeof(COFF::PEMagic))))
232264b5d9eSZachary Turner return file_magic::pecoff_executable;
233264b5d9eSZachary Turner }
234586ecdf2SKazu Hirata if (Magic.starts_with("Microsoft C/C++ MSF 7.00\r\n"))
235d860fa64SZachary Turner return file_magic::pdb;
236581d79a4SPavel Labath if (startswith(Magic, "MDMP"))
237581d79a4SPavel Labath return file_magic::minidump;
238264b5d9eSZachary Turner break;
239264b5d9eSZachary Turner
2403fa12130SMartin Storsjo case 0x64: // x86-64 or ARM64 Windows.
2413fa12130SMartin Storsjo if (Magic[1] == char(0x86) || Magic[1] == char(0xaa))
242264b5d9eSZachary Turner return file_magic::coff_object;
243264b5d9eSZachary Turner break;
244264b5d9eSZachary Turner
245bc85cf16SCyndy Ishida case 0x2d: // YAML '-' MachO TBD.
246359840a6SCyndy Ishida if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:"))
247359840a6SCyndy Ishida return file_magic::tapi_file;
248359840a6SCyndy Ishida break;
249bc85cf16SCyndy Ishida case 0x7b: // JSON '{' MachO TBD.
250bc85cf16SCyndy Ishida return file_magic::tapi_file;
251bc85cf16SCyndy Ishida break;
252966c40aeSChris Bieneman
253966c40aeSChris Bieneman case 'D': // DirectX container file - DXBC
25415d20b97SChris Bieneman if (startswith(Magic, "DXBC"))
255966c40aeSChris Bieneman return file_magic::dxcontainer_object;
256966c40aeSChris Bieneman break;
257359840a6SCyndy Ishida
258488ad99eSEli Friedman case 0x41: // ARM64EC windows
259488ad99eSEli Friedman if (Magic[1] == char(0xA6))
260488ad99eSEli Friedman return file_magic::coff_object;
261488ad99eSEli Friedman break;
262488ad99eSEli Friedman
263cea5d287SJacek Caban case 0x4e: // ARM64X windows
264cea5d287SJacek Caban if (Magic[1] == char(0xA6))
265cea5d287SJacek Caban return file_magic::coff_object;
266cea5d287SJacek Caban break;
267cea5d287SJacek Caban
2687e282343SYaxun (Sam) Liu case '_': {
2697e282343SYaxun (Sam) Liu const char OBMagic[] = "__CLANG_OFFLOAD_BUNDLE__";
2707e282343SYaxun (Sam) Liu if (Magic.size() >= sizeof(OBMagic) && startswith(Magic, OBMagic))
2717e282343SYaxun (Sam) Liu return file_magic::offload_bundle;
2727e282343SYaxun (Sam) Liu break;
2737e282343SYaxun (Sam) Liu }
2747e282343SYaxun (Sam) Liu
275264b5d9eSZachary Turner default:
276264b5d9eSZachary Turner break;
277264b5d9eSZachary Turner }
278264b5d9eSZachary Turner return file_magic::unknown;
279264b5d9eSZachary Turner }
280264b5d9eSZachary Turner
identify_magic(const Twine & Path,file_magic & Result)281264b5d9eSZachary Turner std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) {
282c83cd8feSAbhina Sreeskantharajan auto FileOrError = MemoryBuffer::getFile(Path, /*IsText=*/false,
283c83cd8feSAbhina Sreeskantharajan /*RequiresNullTerminator=*/false);
2849899b5feSZachary Turner if (!FileOrError)
2859899b5feSZachary Turner return FileOrError.getError();
286264b5d9eSZachary Turner
2879899b5feSZachary Turner std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
2889899b5feSZachary Turner Result = identify_magic(FileBuffer->getBuffer());
289264b5d9eSZachary Turner
290264b5d9eSZachary Turner return std::error_code();
291264b5d9eSZachary Turner }
292