1 //===- Disassembler.cpp - Disassembler for hex strings --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This class implements the disassembler of strings of bytes written in 10 // hexadecimal, from standard input or from a file. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "Disassembler.h" 15 #include "llvm/ADT/StringExtras.h" 16 #include "llvm/MC/MCAsmInfo.h" 17 #include "llvm/MC/MCContext.h" 18 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 19 #include "llvm/MC/MCInst.h" 20 #include "llvm/MC/MCObjectFileInfo.h" 21 #include "llvm/MC/MCRegisterInfo.h" 22 #include "llvm/MC/MCStreamer.h" 23 #include "llvm/MC/MCSubtargetInfo.h" 24 #include "llvm/MC/TargetRegistry.h" 25 #include "llvm/Support/MemoryBuffer.h" 26 #include "llvm/Support/SourceMgr.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include "llvm/TargetParser/Triple.h" 29 30 using namespace llvm; 31 32 typedef std::pair<std::vector<unsigned char>, std::vector<const char *>> 33 ByteArrayTy; 34 35 static bool PrintInsts(const MCDisassembler &DisAsm, const ByteArrayTy &Bytes, 36 SourceMgr &SM, MCStreamer &Streamer, bool InAtomicBlock, 37 const MCSubtargetInfo &STI) { 38 ArrayRef<uint8_t> Data(Bytes.first.data(), Bytes.first.size()); 39 40 // Disassemble it to strings. 41 uint64_t Size; 42 uint64_t Index; 43 44 for (Index = 0; Index < Bytes.first.size(); Index += Size) { 45 MCInst Inst; 46 47 MCDisassembler::DecodeStatus S; 48 S = DisAsm.getInstruction(Inst, Size, Data.slice(Index), Index, nulls()); 49 switch (S) { 50 case MCDisassembler::Fail: 51 SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]), 52 SourceMgr::DK_Warning, 53 "invalid instruction encoding"); 54 // Don't try to resynchronise the stream in a block 55 if (InAtomicBlock) 56 return true; 57 58 if (Size == 0) 59 Size = 1; // skip illegible bytes 60 61 break; 62 63 case MCDisassembler::SoftFail: 64 SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]), 65 SourceMgr::DK_Warning, 66 "potentially undefined instruction encoding"); 67 [[fallthrough]]; 68 69 case MCDisassembler::Success: 70 Streamer.emitInstruction(Inst, STI); 71 break; 72 } 73 } 74 75 return false; 76 } 77 78 static bool SkipToToken(StringRef &Str) { 79 for (;;) { 80 if (Str.empty()) 81 return false; 82 83 // Strip horizontal whitespace and commas. 84 if (size_t Pos = Str.find_first_not_of(" \t\r\n,")) { 85 Str = Str.substr(Pos); 86 continue; 87 } 88 89 // If this is the start of a comment, remove the rest of the line. 90 if (Str[0] == '#') { 91 Str = Str.substr(Str.find_first_of('\n')); 92 continue; 93 } 94 return true; 95 } 96 } 97 98 static bool byteArrayFromString(ByteArrayTy &ByteArray, StringRef &Str, 99 SourceMgr &SM, bool HexBytes) { 100 while (SkipToToken(Str)) { 101 // Handled by higher level 102 if (Str[0] == '[' || Str[0] == ']') 103 return false; 104 105 // Get the current token. 106 size_t Next = Str.find_first_of(" \t\n\r,#[]"); 107 StringRef Value = Str.substr(0, Next); 108 109 // Convert to a byte and add to the byte vector. 110 unsigned ByteVal; 111 if (HexBytes) { 112 if (Next < 2) { 113 SM.PrintMessage(SMLoc::getFromPointer(Value.data()), 114 SourceMgr::DK_Error, "expected two hex digits"); 115 Str = Str.substr(Next); 116 return true; 117 } 118 Next = 2; 119 unsigned C0 = hexDigitValue(Value[0]); 120 unsigned C1 = hexDigitValue(Value[1]); 121 if (C0 == -1u || C1 == -1u) { 122 SM.PrintMessage(SMLoc::getFromPointer(Value.data()), 123 SourceMgr::DK_Error, "invalid input token"); 124 Str = Str.substr(Next); 125 return true; 126 } 127 ByteVal = C0 * 16 + C1; 128 } else if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) { 129 // If we have an error, print it and skip to the end of line. 130 SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error, 131 "invalid input token"); 132 Str = Str.substr(Str.find('\n')); 133 ByteArray.first.clear(); 134 ByteArray.second.clear(); 135 continue; 136 } 137 138 ByteArray.first.push_back(ByteVal); 139 ByteArray.second.push_back(Value.data()); 140 Str = Str.substr(Next); 141 } 142 143 return false; 144 } 145 146 int Disassembler::disassemble(const Target &T, const std::string &Triple, 147 MCSubtargetInfo &STI, MCStreamer &Streamer, 148 MemoryBuffer &Buffer, SourceMgr &SM, 149 MCContext &Ctx, const MCTargetOptions &MCOptions, 150 bool HexBytes) { 151 std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple)); 152 if (!MRI) { 153 errs() << "error: no register info for target " << Triple << "\n"; 154 return -1; 155 } 156 157 std::unique_ptr<const MCAsmInfo> MAI( 158 T.createMCAsmInfo(*MRI, Triple, MCOptions)); 159 if (!MAI) { 160 errs() << "error: no assembly info for target " << Triple << "\n"; 161 return -1; 162 } 163 164 std::unique_ptr<const MCDisassembler> DisAsm( 165 T.createMCDisassembler(STI, Ctx)); 166 if (!DisAsm) { 167 errs() << "error: no disassembler for target " << Triple << "\n"; 168 return -1; 169 } 170 171 bool ErrorOccurred = false; 172 173 // Convert the input to a vector for disassembly. 174 ByteArrayTy ByteArray; 175 StringRef Str = Buffer.getBuffer(); 176 bool InAtomicBlock = false; 177 178 while (SkipToToken(Str)) { 179 ByteArray.first.clear(); 180 ByteArray.second.clear(); 181 182 if (Str[0] == '[') { 183 if (InAtomicBlock) { 184 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error, 185 "nested atomic blocks make no sense"); 186 ErrorOccurred = true; 187 } 188 InAtomicBlock = true; 189 Str = Str.drop_front(); 190 continue; 191 } else if (Str[0] == ']') { 192 if (!InAtomicBlock) { 193 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error, 194 "attempt to close atomic block without opening"); 195 ErrorOccurred = true; 196 } 197 InAtomicBlock = false; 198 Str = Str.drop_front(); 199 continue; 200 } 201 202 // It's a real token, get the bytes and emit them 203 ErrorOccurred |= byteArrayFromString(ByteArray, Str, SM, HexBytes); 204 205 if (!ByteArray.first.empty()) 206 ErrorOccurred |= 207 PrintInsts(*DisAsm, ByteArray, SM, Streamer, InAtomicBlock, STI); 208 } 209 210 if (InAtomicBlock) { 211 SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error, 212 "unclosed atomic block"); 213 ErrorOccurred = true; 214 } 215 216 return ErrorOccurred; 217 } 218