xref: /llvm-project/llvm/tools/llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp (revision 8de9f2b558a046da15cf73191da627bdd83676ca)
1990061b6SFangrui Song //===-- llvm-mc-disassemble-fuzzer.cpp - Fuzzer for the MC layer ----------===//
250aa37b9SBrian Cain //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
650aa37b9SBrian Cain //
750aa37b9SBrian Cain //===----------------------------------------------------------------------===//
850aa37b9SBrian Cain //
950aa37b9SBrian Cain //===----------------------------------------------------------------------===//
1050aa37b9SBrian Cain 
1150aa37b9SBrian Cain #include "llvm-c/Disassembler.h"
1250aa37b9SBrian Cain #include "llvm-c/Target.h"
1350aa37b9SBrian Cain #include "llvm/Support/CommandLine.h"
1450aa37b9SBrian Cain #include "llvm/Support/raw_ostream.h"
15d768bf99SArchibald Elliott #include "llvm/TargetParser/Host.h"
16*8de9f2b5SJob Noorman #include "llvm/TargetParser/SubtargetFeature.h"
1750aa37b9SBrian Cain 
1850aa37b9SBrian Cain using namespace llvm;
1950aa37b9SBrian Cain 
2050aa37b9SBrian Cain const unsigned AssemblyTextBufSize = 80;
2150aa37b9SBrian Cain 
2250aa37b9SBrian Cain static cl::opt<std::string>
2350aa37b9SBrian Cain     TripleName("triple", cl::desc("Target triple to assemble for, "
2450aa37b9SBrian Cain                                   "see -version for available targets"));
2550aa37b9SBrian Cain 
2650aa37b9SBrian Cain static cl::opt<std::string>
2750aa37b9SBrian Cain     MCPU("mcpu",
2850aa37b9SBrian Cain          cl::desc("Target a specific cpu type (-mcpu=help for details)"),
2950aa37b9SBrian Cain          cl::value_desc("cpu-name"), cl::init(""));
3050aa37b9SBrian Cain 
3150aa37b9SBrian Cain // This is useful for variable-length instruction sets.
3250aa37b9SBrian Cain static cl::opt<unsigned> InsnLimit(
3350aa37b9SBrian Cain     "insn-limit",
3450aa37b9SBrian Cain     cl::desc("Limit the number of instructions to process (0 for no limit)"),
3550aa37b9SBrian Cain     cl::value_desc("count"), cl::init(0));
3650aa37b9SBrian Cain 
3750aa37b9SBrian Cain static cl::list<std::string>
3850aa37b9SBrian Cain     MAttrs("mattr", cl::CommaSeparated,
3950aa37b9SBrian Cain            cl::desc("Target specific attributes (-mattr=help for details)"),
4050aa37b9SBrian Cain            cl::value_desc("a1,+a2,-a3,..."));
4150aa37b9SBrian Cain // The feature string derived from -mattr's values.
4250aa37b9SBrian Cain std::string FeaturesStr;
4350aa37b9SBrian Cain 
4450aa37b9SBrian Cain static cl::list<std::string>
4550aa37b9SBrian Cain     FuzzerArgs("fuzzer-args", cl::Positional,
46d0d1c416SFangrui Song                cl::desc("Options to pass to the fuzzer"),
4750aa37b9SBrian Cain                cl::PositionalEatsArgs);
4850aa37b9SBrian Cain static std::vector<char *> ModifiedArgv;
4950aa37b9SBrian Cain 
DisassembleOneInput(const uint8_t * Data,size_t Size)5050aa37b9SBrian Cain int DisassembleOneInput(const uint8_t *Data, size_t Size) {
5150aa37b9SBrian Cain   char AssemblyText[AssemblyTextBufSize];
5250aa37b9SBrian Cain 
5350aa37b9SBrian Cain   std::vector<uint8_t> DataCopy(Data, Data + Size);
5450aa37b9SBrian Cain 
5550aa37b9SBrian Cain   LLVMDisasmContextRef Ctx = LLVMCreateDisasmCPUFeatures(
5650aa37b9SBrian Cain       TripleName.c_str(), MCPU.c_str(), FeaturesStr.c_str(), nullptr, 0,
5750aa37b9SBrian Cain       nullptr, nullptr);
5850aa37b9SBrian Cain   assert(Ctx);
5950aa37b9SBrian Cain   uint8_t *p = DataCopy.data();
6050aa37b9SBrian Cain   unsigned Consumed;
6150aa37b9SBrian Cain   unsigned InstructionsProcessed = 0;
6250aa37b9SBrian Cain   do {
6350aa37b9SBrian Cain     Consumed = LLVMDisasmInstruction(Ctx, p, Size, 0, AssemblyText,
6450aa37b9SBrian Cain                                      AssemblyTextBufSize);
6550aa37b9SBrian Cain     Size -= Consumed;
6650aa37b9SBrian Cain     p += Consumed;
6750aa37b9SBrian Cain 
6850aa37b9SBrian Cain     InstructionsProcessed ++;
6950aa37b9SBrian Cain     if (InsnLimit != 0 && InstructionsProcessed < InsnLimit)
7050aa37b9SBrian Cain       break;
7150aa37b9SBrian Cain   } while (Consumed != 0);
7250aa37b9SBrian Cain   LLVMDisasmDispose(Ctx);
7350aa37b9SBrian Cain   return 0;
7450aa37b9SBrian Cain }
7550aa37b9SBrian Cain 
LLVMFuzzerTestOneInput(const uint8_t * Data,size_t Size)7629c5d02fSJustin Bogner extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
7750aa37b9SBrian Cain   return DisassembleOneInput(Data, Size);
7850aa37b9SBrian Cain }
7950aa37b9SBrian Cain 
LLVMFuzzerInitialize(int * argc,char *** argv)8029c5d02fSJustin Bogner extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc,
8129c5d02fSJustin Bogner                                                         char ***argv) {
8250aa37b9SBrian Cain   // The command line is unusual compared to other fuzzers due to the need to
8350aa37b9SBrian Cain   // specify the target. Options like -triple, -mcpu, and -mattr work like
8450aa37b9SBrian Cain   // their counterparts in llvm-mc, while -fuzzer-args collects options for the
8550aa37b9SBrian Cain   // fuzzer itself.
8650aa37b9SBrian Cain   //
8750aa37b9SBrian Cain   // Examples:
8850aa37b9SBrian Cain   //
8950aa37b9SBrian Cain   // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
9050aa37b9SBrian Cain   // 4-bytes each and use the contents of ./corpus as the test corpus:
9150aa37b9SBrian Cain   //   llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
9250aa37b9SBrian Cain   //       -fuzzer-args -max_len=4 -runs=100000 ./corpus
9350aa37b9SBrian Cain   //
9450aa37b9SBrian Cain   // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
9550aa37b9SBrian Cain   // feature enabled using up to 64-byte inputs:
9650aa37b9SBrian Cain   //   llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
9750aa37b9SBrian Cain   //       -disassemble -fuzzer-args ./corpus
9850aa37b9SBrian Cain   //
9950aa37b9SBrian Cain   // If your aim is to find instructions that are not tested, then it is
10050aa37b9SBrian Cain   // advisable to constrain the maximum input size to a single instruction
10150aa37b9SBrian Cain   // using -max_len as in the first example. This results in a test corpus of
10250aa37b9SBrian Cain   // individual instructions that test unique paths. Without this constraint,
10350aa37b9SBrian Cain   // there will be considerable redundancy in the corpus.
10450aa37b9SBrian Cain 
10550aa37b9SBrian Cain   char **OriginalArgv = *argv;
10650aa37b9SBrian Cain 
10750aa37b9SBrian Cain   LLVMInitializeAllTargetInfos();
10850aa37b9SBrian Cain   LLVMInitializeAllTargetMCs();
10950aa37b9SBrian Cain   LLVMInitializeAllDisassemblers();
11050aa37b9SBrian Cain 
11150aa37b9SBrian Cain   cl::ParseCommandLineOptions(*argc, OriginalArgv);
11250aa37b9SBrian Cain 
11350aa37b9SBrian Cain   // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
11450aa37b9SBrian Cain   // the driver can parse its arguments.
11550aa37b9SBrian Cain   //
11650aa37b9SBrian Cain   // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
11750aa37b9SBrian Cain   // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
11850aa37b9SBrian Cain   // non-const buffer to avoid the need to clean up when the fuzzer terminates.
11950aa37b9SBrian Cain   ModifiedArgv.push_back(OriginalArgv[0]);
12050aa37b9SBrian Cain   for (const auto &FuzzerArg : FuzzerArgs) {
12150aa37b9SBrian Cain     for (int i = 1; i < *argc; ++i) {
12250aa37b9SBrian Cain       if (FuzzerArg == OriginalArgv[i])
12350aa37b9SBrian Cain         ModifiedArgv.push_back(OriginalArgv[i]);
12450aa37b9SBrian Cain     }
12550aa37b9SBrian Cain   }
12650aa37b9SBrian Cain   *argc = ModifiedArgv.size();
12750aa37b9SBrian Cain   *argv = ModifiedArgv.data();
12850aa37b9SBrian Cain 
12950aa37b9SBrian Cain   // Package up features to be passed to target/subtarget
13050aa37b9SBrian Cain   // We have to pass it via a global since the callback doesn't
13150aa37b9SBrian Cain   // permit any user data.
13250aa37b9SBrian Cain   if (MAttrs.size()) {
13350aa37b9SBrian Cain     SubtargetFeatures Features;
13450aa37b9SBrian Cain     for (unsigned i = 0; i != MAttrs.size(); ++i)
13550aa37b9SBrian Cain       Features.AddFeature(MAttrs[i]);
13650aa37b9SBrian Cain     FeaturesStr = Features.getString();
13750aa37b9SBrian Cain   }
13850aa37b9SBrian Cain 
13950aa37b9SBrian Cain   if (TripleName.empty())
14050aa37b9SBrian Cain     TripleName = sys::getDefaultTargetTriple();
14150aa37b9SBrian Cain 
14250aa37b9SBrian Cain   return 0;
14350aa37b9SBrian Cain }
144