xref: /llvm-project/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp (revision 7f17b6b740bd49b84430a46b366381bfc8b74fb0)
1 //===-- llvm-mc-assemble-fuzzer.cpp - Fuzzer for the MC layer -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #include "llvm-c/Target.h"
12 #include "llvm/MC/MCAsmBackend.h"
13 #include "llvm/MC/MCAsmInfo.h"
14 #include "llvm/MC/MCCodeEmitter.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/MC/MCInstPrinter.h"
17 #include "llvm/MC/MCInstrInfo.h"
18 #include "llvm/MC/MCObjectFileInfo.h"
19 #include "llvm/MC/MCObjectWriter.h"
20 #include "llvm/MC/MCParser/AsmLexer.h"
21 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
22 #include "llvm/MC/MCRegisterInfo.h"
23 #include "llvm/MC/MCSectionMachO.h"
24 #include "llvm/MC/MCStreamer.h"
25 #include "llvm/MC/MCSubtargetInfo.h"
26 #include "llvm/MC/MCTargetOptionsCommandFlags.h"
27 #include "llvm/MC/TargetRegistry.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Support/FileUtilities.h"
30 #include "llvm/Support/MemoryBuffer.h"
31 #include "llvm/Support/SourceMgr.h"
32 #include "llvm/Support/TargetSelect.h"
33 #include "llvm/Support/ToolOutputFile.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/TargetParser/Host.h"
36 #include "llvm/TargetParser/SubtargetFeature.h"
37 
38 using namespace llvm;
39 
40 static mc::RegisterMCTargetOptionsFlags MOF;
41 
42 static cl::opt<std::string>
43     TripleName("triple", cl::desc("Target triple to assemble for, "
44                                   "see -version for available targets"));
45 
46 static cl::opt<std::string>
47     MCPU("mcpu",
48          cl::desc("Target a specific cpu type (-mcpu=help for details)"),
49          cl::value_desc("cpu-name"), cl::init(""));
50 
51 // This is useful for variable-length instruction sets.
52 static cl::opt<unsigned> InsnLimit(
53     "insn-limit",
54     cl::desc("Limit the number of instructions to process (0 for no limit)"),
55     cl::value_desc("count"), cl::init(0));
56 
57 static cl::list<std::string>
58     MAttrs("mattr", cl::CommaSeparated,
59            cl::desc("Target specific attributes (-mattr=help for details)"),
60            cl::value_desc("a1,+a2,-a3,..."));
61 // The feature string derived from -mattr's values.
62 std::string FeaturesStr;
63 
64 static cl::list<std::string>
65     FuzzerArgs("fuzzer-args", cl::Positional,
66                cl::desc("Options to pass to the fuzzer"),
67                cl::PositionalEatsArgs);
68 static std::vector<char *> ModifiedArgv;
69 
70 enum OutputFileType {
71   OFT_Null,
72   OFT_AssemblyFile,
73   OFT_ObjectFile
74 };
75 static cl::opt<OutputFileType>
76 FileType("filetype", cl::init(OFT_AssemblyFile),
77   cl::desc("Choose an output file type:"),
78   cl::values(
79        clEnumValN(OFT_AssemblyFile, "asm",
80                   "Emit an assembly ('.s') file"),
81        clEnumValN(OFT_Null, "null",
82                   "Don't emit anything (for timing purposes)"),
83        clEnumValN(OFT_ObjectFile, "obj",
84                   "Emit a native object ('.o') file")));
85 
86 
87 class LLVMFuzzerInputBuffer : public MemoryBuffer
88 {
89   public:
90     LLVMFuzzerInputBuffer(const uint8_t *data_, size_t size_)
91       : Data(reinterpret_cast<const char *>(data_)),
92         Size(size_) {
93         init(Data, Data+Size, false);
94       }
95 
96 
97     virtual BufferKind getBufferKind() const {
98       return MemoryBuffer_Malloc; // it's not disk-backed so I think that's
99                                   // the intent ... though AFAIK it
100                                   // probably came from an mmap or sbrk
101     }
102 
103   private:
104     const char *Data;
105     size_t Size;
106 };
107 
108 static int AssembleInput(const char *ProgName, const Target *TheTarget,
109                          SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str,
110                          MCAsmInfo &MAI, MCSubtargetInfo &STI,
111                          MCInstrInfo &MCII, MCTargetOptions &MCOptions) {
112   static const bool NoInitialTextSection = false;
113 
114   std::unique_ptr<MCAsmParser> Parser(
115     createMCAsmParser(SrcMgr, Ctx, Str, MAI));
116 
117   std::unique_ptr<MCTargetAsmParser> TAP(
118     TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions));
119 
120   if (!TAP) {
121     errs() << ProgName
122            << ": error: this target '" << TripleName
123            << "', does not support assembly parsing.\n";
124     abort();
125   }
126 
127   Parser->setTargetParser(*TAP);
128 
129   return Parser->Run(NoInitialTextSection);
130 }
131 
132 
133 int AssembleOneInput(const uint8_t *Data, size_t Size) {
134   Triple TheTriple(Triple::normalize(TripleName));
135 
136   SourceMgr SrcMgr;
137 
138   std::unique_ptr<MemoryBuffer> BufferPtr(new LLVMFuzzerInputBuffer(Data, Size));
139 
140   // Tell SrcMgr about this buffer, which is what the parser will pick up.
141   SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc());
142 
143   static const std::vector<std::string> NoIncludeDirs;
144   SrcMgr.setIncludeDirs(NoIncludeDirs);
145 
146   static std::string ArchName;
147   std::string Error;
148   const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple,
149       Error);
150   if (!TheTarget) {
151     errs() << "error: this target '" << TheTriple.normalize()
152       << "/" << ArchName << "', was not found: '" << Error << "'\n";
153 
154     abort();
155   }
156 
157   std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
158   if (!MRI) {
159     errs() << "Unable to create target register info!";
160     abort();
161   }
162 
163   MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags();
164   std::unique_ptr<MCAsmInfo> MAI(
165       TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
166   if (!MAI) {
167     errs() << "Unable to create target asm info!";
168     abort();
169   }
170 
171   std::unique_ptr<MCSubtargetInfo> STI(
172       TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
173 
174   MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr);
175   std::unique_ptr<MCObjectFileInfo> MOFI(
176       TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false));
177   Ctx.setObjectFileInfo(MOFI.get());
178 
179   const unsigned OutputAsmVariant = 0;
180   std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
181   MCInstPrinter *IP = TheTarget->createMCInstPrinter(Triple(TripleName), OutputAsmVariant,
182       *MAI, *MCII, *MRI);
183   if (!IP) {
184     errs()
185       << "error: unable to create instruction printer for target triple '"
186       << TheTriple.normalize() << "' with assembly variant "
187       << OutputAsmVariant << ".\n";
188 
189     abort();
190   }
191 
192   const char *ProgName = "llvm-mc-fuzzer";
193   std::unique_ptr<MCCodeEmitter> CE = nullptr;
194   std::unique_ptr<MCAsmBackend> MAB = nullptr;
195 
196   std::string OutputString;
197   raw_string_ostream Out(OutputString);
198   auto FOut = std::make_unique<formatted_raw_ostream>(Out);
199 
200   std::unique_ptr<MCStreamer> Str;
201 
202   if (FileType == OFT_AssemblyFile) {
203     Str.reset(TheTarget->createAsmStreamer(Ctx, std::move(FOut), IP,
204                                            std::move(CE), std::move(MAB)));
205   } else {
206     assert(FileType == OFT_ObjectFile && "Invalid file type!");
207 
208     std::error_code EC;
209     const std::string OutputFilename = "-";
210     auto Out =
211         std::make_unique<ToolOutputFile>(OutputFilename, EC, sys::fs::OF_None);
212     if (EC) {
213       errs() << EC.message() << '\n';
214       abort();
215     }
216 
217     // Don't waste memory on names of temp labels.
218     Ctx.setUseNamesOnTempLabels(false);
219 
220     std::unique_ptr<buffer_ostream> BOS;
221     raw_pwrite_stream *OS = &Out->os();
222     if (!Out->os().supportsSeeking()) {
223       BOS = std::make_unique<buffer_ostream>(Out->os());
224       OS = BOS.get();
225     }
226 
227     MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, Ctx);
228     MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions);
229     Str.reset(TheTarget->createMCObjectStreamer(
230         TheTriple, Ctx, std::unique_ptr<MCAsmBackend>(MAB),
231         MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(CE),
232         *STI));
233   }
234   const int Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI,
235       *MCII, MCOptions);
236 
237   (void) Res;
238 
239   return 0;
240 }
241 
242 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
243   return AssembleOneInput(Data, Size);
244 }
245 
246 extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc,
247                                                         char ***argv) {
248   // The command line is unusual compared to other fuzzers due to the need to
249   // specify the target. Options like -triple, -mcpu, and -mattr work like
250   // their counterparts in llvm-mc, while -fuzzer-args collects options for the
251   // fuzzer itself.
252   //
253   // Examples:
254   //
255   // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
256   // 4-bytes each and use the contents of ./corpus as the test corpus:
257   //   llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
258   //       -fuzzer-args -max_len=4 -runs=100000 ./corpus
259   //
260   // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
261   // feature enabled using up to 64-byte inputs:
262   //   llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
263   //       -disassemble -fuzzer-args ./corpus
264   //
265   // If your aim is to find instructions that are not tested, then it is
266   // advisable to constrain the maximum input size to a single instruction
267   // using -max_len as in the first example. This results in a test corpus of
268   // individual instructions that test unique paths. Without this constraint,
269   // there will be considerable redundancy in the corpus.
270 
271   char **OriginalArgv = *argv;
272 
273   LLVMInitializeAllTargetInfos();
274   LLVMInitializeAllTargetMCs();
275   LLVMInitializeAllAsmParsers();
276 
277   cl::ParseCommandLineOptions(*argc, OriginalArgv);
278 
279   // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
280   // the driver can parse its arguments.
281   //
282   // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
283   // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
284   // non-const buffer to avoid the need to clean up when the fuzzer terminates.
285   ModifiedArgv.push_back(OriginalArgv[0]);
286   for (const auto &FuzzerArg : FuzzerArgs) {
287     for (int i = 1; i < *argc; ++i) {
288       if (FuzzerArg == OriginalArgv[i])
289         ModifiedArgv.push_back(OriginalArgv[i]);
290     }
291   }
292   *argc = ModifiedArgv.size();
293   *argv = ModifiedArgv.data();
294 
295   // Package up features to be passed to target/subtarget
296   // We have to pass it via a global since the callback doesn't
297   // permit any user data.
298   if (MAttrs.size()) {
299     SubtargetFeatures Features;
300     for (unsigned i = 0; i != MAttrs.size(); ++i)
301       Features.AddFeature(MAttrs[i]);
302     FeaturesStr = Features.getString();
303   }
304 
305   if (TripleName.empty())
306     TripleName = sys::getDefaultTargetTriple();
307 
308   return 0;
309 }
310