xref: /llvm-project/llvm/lib/BinaryFormat/Magic.cpp (revision 894c22406f68af6574a62a40ec49e058344c324b)
1264b5d9eSZachary Turner //===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===//
2264b5d9eSZachary Turner //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6264b5d9eSZachary Turner //
7264b5d9eSZachary Turner //===----------------------------------------------------------------------===//
8264b5d9eSZachary Turner 
9264b5d9eSZachary Turner #include "llvm/BinaryFormat/Magic.h"
1070d77b5fSSimon Pilgrim #include "llvm/ADT/StringRef.h"
1170d77b5fSSimon Pilgrim #include "llvm/ADT/Twine.h"
12264b5d9eSZachary Turner #include "llvm/BinaryFormat/COFF.h"
13264b5d9eSZachary Turner #include "llvm/BinaryFormat/MachO.h"
14264b5d9eSZachary Turner #include "llvm/Support/Endian.h"
159899b5feSZachary Turner #include "llvm/Support/MemoryBuffer.h"
16264b5d9eSZachary Turner 
17264b5d9eSZachary Turner #if !defined(_MSC_VER) && !defined(__MINGW32__)
18264b5d9eSZachary Turner #include <unistd.h>
19264b5d9eSZachary Turner #else
20264b5d9eSZachary Turner #include <io.h>
21264b5d9eSZachary Turner #endif
22264b5d9eSZachary Turner 
23264b5d9eSZachary Turner using namespace llvm;
24264b5d9eSZachary Turner using namespace llvm::support::endian;
25264b5d9eSZachary Turner using namespace llvm::sys::fs;
26264b5d9eSZachary Turner 
27264b5d9eSZachary Turner template <size_t N>
startswith(StringRef Magic,const char (& S)[N])28264b5d9eSZachary Turner static bool startswith(StringRef Magic, const char (&S)[N]) {
29586ecdf2SKazu Hirata   return Magic.starts_with(StringRef(S, N - 1));
30264b5d9eSZachary Turner }
31264b5d9eSZachary Turner 
324dfcc4a7SAdrian Prantl /// Identify the magic in magic.
identify_magic(StringRef Magic)33264b5d9eSZachary Turner file_magic llvm::identify_magic(StringRef Magic) {
34264b5d9eSZachary Turner   if (Magic.size() < 4)
35264b5d9eSZachary Turner     return file_magic::unknown;
36264b5d9eSZachary Turner   switch ((unsigned char)Magic[0]) {
37264b5d9eSZachary Turner   case 0x00: {
38264b5d9eSZachary Turner     // COFF bigobj, CL.exe's LTO object file, or short import library file
39264b5d9eSZachary Turner     if (startswith(Magic, "\0\0\xFF\xFF")) {
40264b5d9eSZachary Turner       size_t MinSize =
41264b5d9eSZachary Turner           offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic);
42264b5d9eSZachary Turner       if (Magic.size() < MinSize)
43264b5d9eSZachary Turner         return file_magic::coff_import_library;
44264b5d9eSZachary Turner 
45264b5d9eSZachary Turner       const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
46264b5d9eSZachary Turner       if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0)
47264b5d9eSZachary Turner         return file_magic::coff_object;
48264b5d9eSZachary Turner       if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0)
49264b5d9eSZachary Turner         return file_magic::coff_cl_gl_object;
50264b5d9eSZachary Turner       return file_magic::coff_import_library;
51264b5d9eSZachary Turner     }
52264b5d9eSZachary Turner     // Windows resource file
53c8dba240SEric Beckmann     if (Magic.size() >= sizeof(COFF::WinResMagic) &&
54c8dba240SEric Beckmann         memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0)
55264b5d9eSZachary Turner       return file_magic::windows_resource;
56264b5d9eSZachary Turner     // 0x0000 = COFF unknown machine type
57264b5d9eSZachary Turner     if (Magic[1] == 0)
58264b5d9eSZachary Turner       return file_magic::coff_object;
59264b5d9eSZachary Turner     if (startswith(Magic, "\0asm"))
60264b5d9eSZachary Turner       return file_magic::wasm_object;
61264b5d9eSZachary Turner     break;
62264b5d9eSZachary Turner   }
63ab2eb2bfSHubert Tong 
64ab2eb2bfSHubert Tong   case 0x01:
65ab2eb2bfSHubert Tong     // XCOFF format
66ab2eb2bfSHubert Tong     if (startswith(Magic, "\x01\xDF"))
67ab2eb2bfSHubert Tong       return file_magic::xcoff_object_32;
68837ae69fSSean Fertile     if (startswith(Magic, "\x01\xF7"))
69837ae69fSSean Fertile       return file_magic::xcoff_object_64;
70ab2eb2bfSHubert Tong     break;
71ab2eb2bfSHubert Tong 
720977f31cSAnirudh Prasad   case 0x03:
730977f31cSAnirudh Prasad     if (startswith(Magic, "\x03\xF0\x00"))
740977f31cSAnirudh Prasad       return file_magic::goff_object;
75edc83886SJoseph Huber     // SPIR-V format in little-endian mode.
76edc83886SJoseph Huber     if (startswith(Magic, "\x03\x02\x23\x07"))
77edc83886SJoseph Huber       return file_magic::spirv_object;
78edc83886SJoseph Huber     break;
79edc83886SJoseph Huber 
80edc83886SJoseph Huber   case 0x07: // SPIR-V format in big-endian mode.
81edc83886SJoseph Huber     if (startswith(Magic, "\x07\x23\x02\x03"))
82edc83886SJoseph Huber       return file_magic::spirv_object;
830977f31cSAnirudh Prasad     break;
840977f31cSAnirudh Prasad 
85afd2f7e9SJoseph Huber   case 0x10:
86afd2f7e9SJoseph Huber     if (startswith(Magic, "\x10\xFF\x10\xAD"))
87afd2f7e9SJoseph Huber       return file_magic::offload_binary;
88afd2f7e9SJoseph Huber     break;
89afd2f7e9SJoseph Huber 
90264b5d9eSZachary Turner   case 0xDE: // 0x0B17C0DE = BC wraper
91264b5d9eSZachary Turner     if (startswith(Magic, "\xDE\xC0\x17\x0B"))
92264b5d9eSZachary Turner       return file_magic::bitcode;
93264b5d9eSZachary Turner     break;
94264b5d9eSZachary Turner   case 'B':
95264b5d9eSZachary Turner     if (startswith(Magic, "BC\xC0\xDE"))
96264b5d9eSZachary Turner       return file_magic::bitcode;
97264b5d9eSZachary Turner     break;
987e282343SYaxun (Sam) Liu   case 'C':
997e282343SYaxun (Sam) Liu     if (startswith(Magic, "CCOB"))
1007e282343SYaxun (Sam) Liu       return file_magic::offload_bundle_compressed;
101*894c2240SMichael Spencer     if (startswith(Magic, "CPCH"))
102*894c2240SMichael Spencer       return file_magic::clang_ast;
1037e282343SYaxun (Sam) Liu     break;
104264b5d9eSZachary Turner   case '!':
105264b5d9eSZachary Turner     if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
106264b5d9eSZachary Turner       return file_magic::archive;
107264b5d9eSZachary Turner     break;
1083062a146Szhijian   case '<':
1093062a146Szhijian     if (startswith(Magic, "<bigaf>\n"))
1103062a146Szhijian       return file_magic::archive;
1113062a146Szhijian     break;
112264b5d9eSZachary Turner   case '\177':
113264b5d9eSZachary Turner     if (startswith(Magic, "\177ELF") && Magic.size() >= 18) {
114264b5d9eSZachary Turner       bool Data2MSB = Magic[5] == 2;
115264b5d9eSZachary Turner       unsigned high = Data2MSB ? 16 : 17;
116264b5d9eSZachary Turner       unsigned low = Data2MSB ? 17 : 16;
117264b5d9eSZachary Turner       if (Magic[high] == 0) {
118264b5d9eSZachary Turner         switch (Magic[low]) {
119264b5d9eSZachary Turner         default:
120264b5d9eSZachary Turner           return file_magic::elf;
121264b5d9eSZachary Turner         case 1:
122264b5d9eSZachary Turner           return file_magic::elf_relocatable;
123264b5d9eSZachary Turner         case 2:
124264b5d9eSZachary Turner           return file_magic::elf_executable;
125264b5d9eSZachary Turner         case 3:
126264b5d9eSZachary Turner           return file_magic::elf_shared_object;
127264b5d9eSZachary Turner         case 4:
128264b5d9eSZachary Turner           return file_magic::elf_core;
129264b5d9eSZachary Turner         }
130264b5d9eSZachary Turner       }
131264b5d9eSZachary Turner       // It's still some type of ELF file.
132264b5d9eSZachary Turner       return file_magic::elf;
133264b5d9eSZachary Turner     }
134264b5d9eSZachary Turner     break;
135264b5d9eSZachary Turner 
136264b5d9eSZachary Turner   case 0xCA:
137264b5d9eSZachary Turner     if (startswith(Magic, "\xCA\xFE\xBA\xBE") ||
138264b5d9eSZachary Turner         startswith(Magic, "\xCA\xFE\xBA\xBF")) {
139264b5d9eSZachary Turner       // This is complicated by an overlap with Java class files.
140264b5d9eSZachary Turner       // See the Mach-O section in /usr/share/file/magic for details.
141264b5d9eSZachary Turner       if (Magic.size() >= 8 && Magic[7] < 43)
142264b5d9eSZachary Turner         return file_magic::macho_universal_binary;
143264b5d9eSZachary Turner     }
144264b5d9eSZachary Turner     break;
145264b5d9eSZachary Turner 
146264b5d9eSZachary Turner   // The two magic numbers for mach-o are:
147264b5d9eSZachary Turner   // 0xfeedface - 32-bit mach-o
148264b5d9eSZachary Turner   // 0xfeedfacf - 64-bit mach-o
149264b5d9eSZachary Turner   case 0xFE:
150264b5d9eSZachary Turner   case 0xCE:
151264b5d9eSZachary Turner   case 0xCF: {
152264b5d9eSZachary Turner     uint16_t type = 0;
153264b5d9eSZachary Turner     if (startswith(Magic, "\xFE\xED\xFA\xCE") ||
154264b5d9eSZachary Turner         startswith(Magic, "\xFE\xED\xFA\xCF")) {
155264b5d9eSZachary Turner       /* Native endian */
156264b5d9eSZachary Turner       size_t MinSize;
157264b5d9eSZachary Turner       if (Magic[3] == char(0xCE))
158264b5d9eSZachary Turner         MinSize = sizeof(MachO::mach_header);
159264b5d9eSZachary Turner       else
160264b5d9eSZachary Turner         MinSize = sizeof(MachO::mach_header_64);
161264b5d9eSZachary Turner       if (Magic.size() >= MinSize)
162264b5d9eSZachary Turner         type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15];
163264b5d9eSZachary Turner     } else if (startswith(Magic, "\xCE\xFA\xED\xFE") ||
164264b5d9eSZachary Turner                startswith(Magic, "\xCF\xFA\xED\xFE")) {
165264b5d9eSZachary Turner       /* Reverse endian */
166264b5d9eSZachary Turner       size_t MinSize;
167264b5d9eSZachary Turner       if (Magic[0] == char(0xCE))
168264b5d9eSZachary Turner         MinSize = sizeof(MachO::mach_header);
169264b5d9eSZachary Turner       else
170264b5d9eSZachary Turner         MinSize = sizeof(MachO::mach_header_64);
171264b5d9eSZachary Turner       if (Magic.size() >= MinSize)
172264b5d9eSZachary Turner         type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12];
173264b5d9eSZachary Turner     }
174264b5d9eSZachary Turner     switch (type) {
175264b5d9eSZachary Turner     default:
176264b5d9eSZachary Turner       break;
177264b5d9eSZachary Turner     case 1:
178264b5d9eSZachary Turner       return file_magic::macho_object;
179264b5d9eSZachary Turner     case 2:
180264b5d9eSZachary Turner       return file_magic::macho_executable;
181264b5d9eSZachary Turner     case 3:
182264b5d9eSZachary Turner       return file_magic::macho_fixed_virtual_memory_shared_lib;
183264b5d9eSZachary Turner     case 4:
184264b5d9eSZachary Turner       return file_magic::macho_core;
185264b5d9eSZachary Turner     case 5:
186264b5d9eSZachary Turner       return file_magic::macho_preload_executable;
187264b5d9eSZachary Turner     case 6:
188264b5d9eSZachary Turner       return file_magic::macho_dynamically_linked_shared_lib;
189264b5d9eSZachary Turner     case 7:
190264b5d9eSZachary Turner       return file_magic::macho_dynamic_linker;
191264b5d9eSZachary Turner     case 8:
192264b5d9eSZachary Turner       return file_magic::macho_bundle;
193264b5d9eSZachary Turner     case 9:
194264b5d9eSZachary Turner       return file_magic::macho_dynamically_linked_shared_lib_stub;
195264b5d9eSZachary Turner     case 10:
196264b5d9eSZachary Turner       return file_magic::macho_dsym_companion;
197264b5d9eSZachary Turner     case 11:
198264b5d9eSZachary Turner       return file_magic::macho_kext_bundle;
19961139980SPeter Cooper     case 12:
20061139980SPeter Cooper       return file_magic::macho_file_set;
201264b5d9eSZachary Turner     }
202264b5d9eSZachary Turner     break;
203264b5d9eSZachary Turner   }
204264b5d9eSZachary Turner   case 0xF0: // PowerPC Windows
205264b5d9eSZachary Turner   case 0x83: // Alpha 32-bit
206264b5d9eSZachary Turner   case 0x84: // Alpha 64-bit
207264b5d9eSZachary Turner   case 0x66: // MPS R4000 Windows
208264b5d9eSZachary Turner   case 0x50: // mc68K
20924ebdb6cSJoseph Huber     if (startswith(Magic, "\x50\xed\x55\xba"))
21024ebdb6cSJoseph Huber       return file_magic::cuda_fatbinary;
211de9d80c1SFangrui Song     [[fallthrough]];
21224ebdb6cSJoseph Huber 
213264b5d9eSZachary Turner   case 0x4c: // 80386 Windows
214264b5d9eSZachary Turner   case 0xc4: // ARMNT Windows
215264b5d9eSZachary Turner     if (Magic[1] == 0x01)
216264b5d9eSZachary Turner       return file_magic::coff_object;
217de9d80c1SFangrui Song     [[fallthrough]];
218264b5d9eSZachary Turner 
219264b5d9eSZachary Turner   case 0x90: // PA-RISC Windows
220264b5d9eSZachary Turner   case 0x68: // mc68K Windows
221264b5d9eSZachary Turner     if (Magic[1] == 0x02)
222264b5d9eSZachary Turner       return file_magic::coff_object;
223264b5d9eSZachary Turner     break;
224264b5d9eSZachary Turner 
225581d79a4SPavel Labath   case 'M': // Possible MS-DOS stub on Windows PE file, MSF/PDB file or a
226581d79a4SPavel Labath             // Minidump file.
22780df6423SBenjamin Kramer     if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) {
228264b5d9eSZachary Turner       uint32_t off = read32le(Magic.data() + 0x3c);
229264b5d9eSZachary Turner       // PE/COFF file, either EXE or DLL.
230586ecdf2SKazu Hirata       if (Magic.substr(off).starts_with(
231deaba386SRafael Espindola               StringRef(COFF::PEMagic, sizeof(COFF::PEMagic))))
232264b5d9eSZachary Turner         return file_magic::pecoff_executable;
233264b5d9eSZachary Turner     }
234586ecdf2SKazu Hirata     if (Magic.starts_with("Microsoft C/C++ MSF 7.00\r\n"))
235d860fa64SZachary Turner       return file_magic::pdb;
236581d79a4SPavel Labath     if (startswith(Magic, "MDMP"))
237581d79a4SPavel Labath       return file_magic::minidump;
238264b5d9eSZachary Turner     break;
239264b5d9eSZachary Turner 
2403fa12130SMartin Storsjo   case 0x64: // x86-64 or ARM64 Windows.
2413fa12130SMartin Storsjo     if (Magic[1] == char(0x86) || Magic[1] == char(0xaa))
242264b5d9eSZachary Turner       return file_magic::coff_object;
243264b5d9eSZachary Turner     break;
244264b5d9eSZachary Turner 
245bc85cf16SCyndy Ishida   case 0x2d: // YAML '-' MachO TBD.
246359840a6SCyndy Ishida     if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:"))
247359840a6SCyndy Ishida       return file_magic::tapi_file;
248359840a6SCyndy Ishida     break;
249bc85cf16SCyndy Ishida   case 0x7b: // JSON '{' MachO TBD.
250bc85cf16SCyndy Ishida     return file_magic::tapi_file;
251bc85cf16SCyndy Ishida     break;
252966c40aeSChris Bieneman 
253966c40aeSChris Bieneman   case 'D': // DirectX container file - DXBC
25415d20b97SChris Bieneman     if (startswith(Magic, "DXBC"))
255966c40aeSChris Bieneman       return file_magic::dxcontainer_object;
256966c40aeSChris Bieneman     break;
257359840a6SCyndy Ishida 
258488ad99eSEli Friedman   case 0x41: // ARM64EC windows
259488ad99eSEli Friedman     if (Magic[1] == char(0xA6))
260488ad99eSEli Friedman       return file_magic::coff_object;
261488ad99eSEli Friedman     break;
262488ad99eSEli Friedman 
263cea5d287SJacek Caban   case 0x4e: // ARM64X windows
264cea5d287SJacek Caban     if (Magic[1] == char(0xA6))
265cea5d287SJacek Caban       return file_magic::coff_object;
266cea5d287SJacek Caban     break;
267cea5d287SJacek Caban 
2687e282343SYaxun (Sam) Liu   case '_': {
2697e282343SYaxun (Sam) Liu     const char OBMagic[] = "__CLANG_OFFLOAD_BUNDLE__";
2707e282343SYaxun (Sam) Liu     if (Magic.size() >= sizeof(OBMagic) && startswith(Magic, OBMagic))
2717e282343SYaxun (Sam) Liu       return file_magic::offload_bundle;
2727e282343SYaxun (Sam) Liu     break;
2737e282343SYaxun (Sam) Liu   }
2747e282343SYaxun (Sam) Liu 
275264b5d9eSZachary Turner   default:
276264b5d9eSZachary Turner     break;
277264b5d9eSZachary Turner   }
278264b5d9eSZachary Turner   return file_magic::unknown;
279264b5d9eSZachary Turner }
280264b5d9eSZachary Turner 
identify_magic(const Twine & Path,file_magic & Result)281264b5d9eSZachary Turner std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) {
282c83cd8feSAbhina Sreeskantharajan   auto FileOrError = MemoryBuffer::getFile(Path, /*IsText=*/false,
283c83cd8feSAbhina Sreeskantharajan                                            /*RequiresNullTerminator=*/false);
2849899b5feSZachary Turner   if (!FileOrError)
2859899b5feSZachary Turner     return FileOrError.getError();
286264b5d9eSZachary Turner 
2879899b5feSZachary Turner   std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
2889899b5feSZachary Turner   Result = identify_magic(FileBuffer->getBuffer());
289264b5d9eSZachary Turner 
290264b5d9eSZachary Turner   return std::error_code();
291264b5d9eSZachary Turner }
292