1 //===-- llvm-mc-assemble-fuzzer.cpp - Fuzzer for the MC layer -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 11 #include "llvm-c/Target.h" 12 #include "llvm/MC/MCAsmBackend.h" 13 #include "llvm/MC/MCAsmInfo.h" 14 #include "llvm/MC/MCCodeEmitter.h" 15 #include "llvm/MC/MCContext.h" 16 #include "llvm/MC/MCInstPrinter.h" 17 #include "llvm/MC/MCInstrInfo.h" 18 #include "llvm/MC/MCObjectFileInfo.h" 19 #include "llvm/MC/MCObjectWriter.h" 20 #include "llvm/MC/MCParser/AsmLexer.h" 21 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 22 #include "llvm/MC/MCRegisterInfo.h" 23 #include "llvm/MC/MCSectionMachO.h" 24 #include "llvm/MC/MCStreamer.h" 25 #include "llvm/MC/MCSubtargetInfo.h" 26 #include "llvm/MC/MCTargetOptionsCommandFlags.h" 27 #include "llvm/MC/TargetRegistry.h" 28 #include "llvm/Support/CommandLine.h" 29 #include "llvm/Support/FileUtilities.h" 30 #include "llvm/Support/MemoryBuffer.h" 31 #include "llvm/Support/SourceMgr.h" 32 #include "llvm/Support/TargetSelect.h" 33 #include "llvm/Support/ToolOutputFile.h" 34 #include "llvm/Support/raw_ostream.h" 35 #include "llvm/TargetParser/Host.h" 36 #include "llvm/TargetParser/SubtargetFeature.h" 37 38 using namespace llvm; 39 40 static mc::RegisterMCTargetOptionsFlags MOF; 41 42 static cl::opt<std::string> 43 TripleName("triple", cl::desc("Target triple to assemble for, " 44 "see -version for available targets")); 45 46 static cl::opt<std::string> 47 MCPU("mcpu", 48 cl::desc("Target a specific cpu type (-mcpu=help for details)"), 49 cl::value_desc("cpu-name"), cl::init("")); 50 51 // This is useful for variable-length instruction sets. 52 static cl::opt<unsigned> InsnLimit( 53 "insn-limit", 54 cl::desc("Limit the number of instructions to process (0 for no limit)"), 55 cl::value_desc("count"), cl::init(0)); 56 57 static cl::list<std::string> 58 MAttrs("mattr", cl::CommaSeparated, 59 cl::desc("Target specific attributes (-mattr=help for details)"), 60 cl::value_desc("a1,+a2,-a3,...")); 61 // The feature string derived from -mattr's values. 62 std::string FeaturesStr; 63 64 static cl::list<std::string> 65 FuzzerArgs("fuzzer-args", cl::Positional, 66 cl::desc("Options to pass to the fuzzer"), 67 cl::PositionalEatsArgs); 68 static std::vector<char *> ModifiedArgv; 69 70 enum OutputFileType { 71 OFT_Null, 72 OFT_AssemblyFile, 73 OFT_ObjectFile 74 }; 75 static cl::opt<OutputFileType> 76 FileType("filetype", cl::init(OFT_AssemblyFile), 77 cl::desc("Choose an output file type:"), 78 cl::values( 79 clEnumValN(OFT_AssemblyFile, "asm", 80 "Emit an assembly ('.s') file"), 81 clEnumValN(OFT_Null, "null", 82 "Don't emit anything (for timing purposes)"), 83 clEnumValN(OFT_ObjectFile, "obj", 84 "Emit a native object ('.o') file"))); 85 86 87 class LLVMFuzzerInputBuffer : public MemoryBuffer 88 { 89 public: 90 LLVMFuzzerInputBuffer(const uint8_t *data_, size_t size_) 91 : Data(reinterpret_cast<const char *>(data_)), 92 Size(size_) { 93 init(Data, Data+Size, false); 94 } 95 96 97 virtual BufferKind getBufferKind() const { 98 return MemoryBuffer_Malloc; // it's not disk-backed so I think that's 99 // the intent ... though AFAIK it 100 // probably came from an mmap or sbrk 101 } 102 103 private: 104 const char *Data; 105 size_t Size; 106 }; 107 108 static int AssembleInput(const char *ProgName, const Target *TheTarget, 109 SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str, 110 MCAsmInfo &MAI, MCSubtargetInfo &STI, 111 MCInstrInfo &MCII, MCTargetOptions &MCOptions) { 112 static const bool NoInitialTextSection = false; 113 114 std::unique_ptr<MCAsmParser> Parser( 115 createMCAsmParser(SrcMgr, Ctx, Str, MAI)); 116 117 std::unique_ptr<MCTargetAsmParser> TAP( 118 TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions)); 119 120 if (!TAP) { 121 errs() << ProgName 122 << ": error: this target '" << TripleName 123 << "', does not support assembly parsing.\n"; 124 abort(); 125 } 126 127 Parser->setTargetParser(*TAP); 128 129 return Parser->Run(NoInitialTextSection); 130 } 131 132 133 int AssembleOneInput(const uint8_t *Data, size_t Size) { 134 Triple TheTriple(Triple::normalize(TripleName)); 135 136 SourceMgr SrcMgr; 137 138 std::unique_ptr<MemoryBuffer> BufferPtr(new LLVMFuzzerInputBuffer(Data, Size)); 139 140 // Tell SrcMgr about this buffer, which is what the parser will pick up. 141 SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc()); 142 143 static const std::vector<std::string> NoIncludeDirs; 144 SrcMgr.setIncludeDirs(NoIncludeDirs); 145 146 static std::string ArchName; 147 std::string Error; 148 const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple, 149 Error); 150 if (!TheTarget) { 151 errs() << "error: this target '" << TheTriple.normalize() 152 << "/" << ArchName << "', was not found: '" << Error << "'\n"; 153 154 abort(); 155 } 156 157 std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName)); 158 if (!MRI) { 159 errs() << "Unable to create target register info!"; 160 abort(); 161 } 162 163 MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags(); 164 std::unique_ptr<MCAsmInfo> MAI( 165 TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); 166 if (!MAI) { 167 errs() << "Unable to create target asm info!"; 168 abort(); 169 } 170 171 std::unique_ptr<MCSubtargetInfo> STI( 172 TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr)); 173 174 MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr); 175 std::unique_ptr<MCObjectFileInfo> MOFI( 176 TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false)); 177 Ctx.setObjectFileInfo(MOFI.get()); 178 179 const unsigned OutputAsmVariant = 0; 180 std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo()); 181 MCInstPrinter *IP = TheTarget->createMCInstPrinter(Triple(TripleName), OutputAsmVariant, 182 *MAI, *MCII, *MRI); 183 if (!IP) { 184 errs() 185 << "error: unable to create instruction printer for target triple '" 186 << TheTriple.normalize() << "' with assembly variant " 187 << OutputAsmVariant << ".\n"; 188 189 abort(); 190 } 191 192 const char *ProgName = "llvm-mc-fuzzer"; 193 std::unique_ptr<MCCodeEmitter> CE = nullptr; 194 std::unique_ptr<MCAsmBackend> MAB = nullptr; 195 196 std::string OutputString; 197 raw_string_ostream Out(OutputString); 198 auto FOut = std::make_unique<formatted_raw_ostream>(Out); 199 200 std::unique_ptr<MCStreamer> Str; 201 202 if (FileType == OFT_AssemblyFile) { 203 Str.reset(TheTarget->createAsmStreamer(Ctx, std::move(FOut), IP, 204 std::move(CE), std::move(MAB))); 205 } else { 206 assert(FileType == OFT_ObjectFile && "Invalid file type!"); 207 208 std::error_code EC; 209 const std::string OutputFilename = "-"; 210 auto Out = 211 std::make_unique<ToolOutputFile>(OutputFilename, EC, sys::fs::OF_None); 212 if (EC) { 213 errs() << EC.message() << '\n'; 214 abort(); 215 } 216 217 // Don't waste memory on names of temp labels. 218 Ctx.setUseNamesOnTempLabels(false); 219 220 std::unique_ptr<buffer_ostream> BOS; 221 raw_pwrite_stream *OS = &Out->os(); 222 if (!Out->os().supportsSeeking()) { 223 BOS = std::make_unique<buffer_ostream>(Out->os()); 224 OS = BOS.get(); 225 } 226 227 MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, Ctx); 228 MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions); 229 Str.reset(TheTarget->createMCObjectStreamer( 230 TheTriple, Ctx, std::unique_ptr<MCAsmBackend>(MAB), 231 MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(CE), 232 *STI)); 233 } 234 const int Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI, 235 *MCII, MCOptions); 236 237 (void) Res; 238 239 return 0; 240 } 241 242 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { 243 return AssembleOneInput(Data, Size); 244 } 245 246 extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, 247 char ***argv) { 248 // The command line is unusual compared to other fuzzers due to the need to 249 // specify the target. Options like -triple, -mcpu, and -mattr work like 250 // their counterparts in llvm-mc, while -fuzzer-args collects options for the 251 // fuzzer itself. 252 // 253 // Examples: 254 // 255 // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to 256 // 4-bytes each and use the contents of ./corpus as the test corpus: 257 // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \ 258 // -fuzzer-args -max_len=4 -runs=100000 ./corpus 259 // 260 // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA 261 // feature enabled using up to 64-byte inputs: 262 // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \ 263 // -disassemble -fuzzer-args ./corpus 264 // 265 // If your aim is to find instructions that are not tested, then it is 266 // advisable to constrain the maximum input size to a single instruction 267 // using -max_len as in the first example. This results in a test corpus of 268 // individual instructions that test unique paths. Without this constraint, 269 // there will be considerable redundancy in the corpus. 270 271 char **OriginalArgv = *argv; 272 273 LLVMInitializeAllTargetInfos(); 274 LLVMInitializeAllTargetMCs(); 275 LLVMInitializeAllAsmParsers(); 276 277 cl::ParseCommandLineOptions(*argc, OriginalArgv); 278 279 // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that 280 // the driver can parse its arguments. 281 // 282 // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs. 283 // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a 284 // non-const buffer to avoid the need to clean up when the fuzzer terminates. 285 ModifiedArgv.push_back(OriginalArgv[0]); 286 for (const auto &FuzzerArg : FuzzerArgs) { 287 for (int i = 1; i < *argc; ++i) { 288 if (FuzzerArg == OriginalArgv[i]) 289 ModifiedArgv.push_back(OriginalArgv[i]); 290 } 291 } 292 *argc = ModifiedArgv.size(); 293 *argv = ModifiedArgv.data(); 294 295 // Package up features to be passed to target/subtarget 296 // We have to pass it via a global since the callback doesn't 297 // permit any user data. 298 if (MAttrs.size()) { 299 SubtargetFeatures Features; 300 for (unsigned i = 0; i != MAttrs.size(); ++i) 301 Features.AddFeature(MAttrs[i]); 302 FeaturesStr = Features.getString(); 303 } 304 305 if (TripleName.empty()) 306 TripleName = sys::getDefaultTargetTriple(); 307 308 return 0; 309 } 310