xref: /freebsd-src/contrib/llvm-project/llvm/tools/llvm-mc/Disassembler.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric //===- Disassembler.cpp - Disassembler for hex strings --------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This class implements the disassembler of strings of bytes written in
100b57cec5SDimitry Andric // hexadecimal, from standard input or from a file.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "Disassembler.h"
150b57cec5SDimitry Andric #include "llvm/MC/MCAsmInfo.h"
160b57cec5SDimitry Andric #include "llvm/MC/MCContext.h"
170b57cec5SDimitry Andric #include "llvm/MC/MCDisassembler/MCDisassembler.h"
180b57cec5SDimitry Andric #include "llvm/MC/MCInst.h"
198bcb0991SDimitry Andric #include "llvm/MC/MCObjectFileInfo.h"
200b57cec5SDimitry Andric #include "llvm/MC/MCRegisterInfo.h"
210b57cec5SDimitry Andric #include "llvm/MC/MCStreamer.h"
220b57cec5SDimitry Andric #include "llvm/MC/MCSubtargetInfo.h"
23349cc55cSDimitry Andric #include "llvm/MC/TargetRegistry.h"
240b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
250b57cec5SDimitry Andric #include "llvm/Support/SourceMgr.h"
260b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
2706c3fb27SDimitry Andric #include "llvm/TargetParser/Triple.h"
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric using namespace llvm;
300b57cec5SDimitry Andric 
310b57cec5SDimitry Andric typedef std::pair<std::vector<unsigned char>, std::vector<const char *>>
320b57cec5SDimitry Andric     ByteArrayTy;
330b57cec5SDimitry Andric 
PrintInsts(const MCDisassembler & DisAsm,const ByteArrayTy & Bytes,SourceMgr & SM,MCStreamer & Streamer,bool InAtomicBlock,const MCSubtargetInfo & STI)34*5f757f3fSDimitry Andric static bool PrintInsts(const MCDisassembler &DisAsm, const ByteArrayTy &Bytes,
35*5f757f3fSDimitry Andric                        SourceMgr &SM, MCStreamer &Streamer, bool InAtomicBlock,
360b57cec5SDimitry Andric                        const MCSubtargetInfo &STI) {
370b57cec5SDimitry Andric   ArrayRef<uint8_t> Data(Bytes.first.data(), Bytes.first.size());
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric   // Disassemble it to strings.
400b57cec5SDimitry Andric   uint64_t Size;
410b57cec5SDimitry Andric   uint64_t Index;
420b57cec5SDimitry Andric 
430b57cec5SDimitry Andric   for (Index = 0; Index < Bytes.first.size(); Index += Size) {
440b57cec5SDimitry Andric     MCInst Inst;
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric     MCDisassembler::DecodeStatus S;
47480093f4SDimitry Andric     S = DisAsm.getInstruction(Inst, Size, Data.slice(Index), Index, nulls());
480b57cec5SDimitry Andric     switch (S) {
490b57cec5SDimitry Andric     case MCDisassembler::Fail:
500b57cec5SDimitry Andric       SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),
510b57cec5SDimitry Andric                       SourceMgr::DK_Warning,
520b57cec5SDimitry Andric                       "invalid instruction encoding");
530b57cec5SDimitry Andric       // Don't try to resynchronise the stream in a block
540b57cec5SDimitry Andric       if (InAtomicBlock)
550b57cec5SDimitry Andric         return true;
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric       if (Size == 0)
580b57cec5SDimitry Andric         Size = 1; // skip illegible bytes
590b57cec5SDimitry Andric 
600b57cec5SDimitry Andric       break;
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric     case MCDisassembler::SoftFail:
630b57cec5SDimitry Andric       SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),
640b57cec5SDimitry Andric                       SourceMgr::DK_Warning,
650b57cec5SDimitry Andric                       "potentially undefined instruction encoding");
66bdd1243dSDimitry Andric       [[fallthrough]];
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric     case MCDisassembler::Success:
695ffd83dbSDimitry Andric       Streamer.emitInstruction(Inst, STI);
700b57cec5SDimitry Andric       break;
710b57cec5SDimitry Andric     }
720b57cec5SDimitry Andric   }
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric   return false;
750b57cec5SDimitry Andric }
760b57cec5SDimitry Andric 
SkipToToken(StringRef & Str)770b57cec5SDimitry Andric static bool SkipToToken(StringRef &Str) {
780b57cec5SDimitry Andric   for (;;) {
790b57cec5SDimitry Andric     if (Str.empty())
800b57cec5SDimitry Andric       return false;
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric     // Strip horizontal whitespace and commas.
830b57cec5SDimitry Andric     if (size_t Pos = Str.find_first_not_of(" \t\r\n,")) {
840b57cec5SDimitry Andric       Str = Str.substr(Pos);
850b57cec5SDimitry Andric       continue;
860b57cec5SDimitry Andric     }
870b57cec5SDimitry Andric 
880b57cec5SDimitry Andric     // If this is the start of a comment, remove the rest of the line.
890b57cec5SDimitry Andric     if (Str[0] == '#') {
900b57cec5SDimitry Andric         Str = Str.substr(Str.find_first_of('\n'));
910b57cec5SDimitry Andric       continue;
920b57cec5SDimitry Andric     }
930b57cec5SDimitry Andric     return true;
940b57cec5SDimitry Andric   }
950b57cec5SDimitry Andric }
960b57cec5SDimitry Andric 
970b57cec5SDimitry Andric 
ByteArrayFromString(ByteArrayTy & ByteArray,StringRef & Str,SourceMgr & SM)980b57cec5SDimitry Andric static bool ByteArrayFromString(ByteArrayTy &ByteArray,
990b57cec5SDimitry Andric                                 StringRef &Str,
1000b57cec5SDimitry Andric                                 SourceMgr &SM) {
1010b57cec5SDimitry Andric   while (SkipToToken(Str)) {
1020b57cec5SDimitry Andric     // Handled by higher level
1030b57cec5SDimitry Andric     if (Str[0] == '[' || Str[0] == ']')
1040b57cec5SDimitry Andric       return false;
1050b57cec5SDimitry Andric 
1060b57cec5SDimitry Andric     // Get the current token.
1070b57cec5SDimitry Andric     size_t Next = Str.find_first_of(" \t\n\r,#[]");
1080b57cec5SDimitry Andric     StringRef Value = Str.substr(0, Next);
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric     // Convert to a byte and add to the byte vector.
1110b57cec5SDimitry Andric     unsigned ByteVal;
1120b57cec5SDimitry Andric     if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
1130b57cec5SDimitry Andric       // If we have an error, print it and skip to the end of line.
1140b57cec5SDimitry Andric       SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error,
1150b57cec5SDimitry Andric                       "invalid input token");
1160b57cec5SDimitry Andric       Str = Str.substr(Str.find('\n'));
1170b57cec5SDimitry Andric       ByteArray.first.clear();
1180b57cec5SDimitry Andric       ByteArray.second.clear();
1190b57cec5SDimitry Andric       continue;
1200b57cec5SDimitry Andric     }
1210b57cec5SDimitry Andric 
1220b57cec5SDimitry Andric     ByteArray.first.push_back(ByteVal);
1230b57cec5SDimitry Andric     ByteArray.second.push_back(Value.data());
1240b57cec5SDimitry Andric     Str = Str.substr(Next);
1250b57cec5SDimitry Andric   }
1260b57cec5SDimitry Andric 
1270b57cec5SDimitry Andric   return false;
1280b57cec5SDimitry Andric }
1290b57cec5SDimitry Andric 
disassemble(const Target & T,const std::string & Triple,MCSubtargetInfo & STI,MCStreamer & Streamer,MemoryBuffer & Buffer,SourceMgr & SM,MCContext & Ctx,const MCTargetOptions & MCOptions)1308bcb0991SDimitry Andric int Disassembler::disassemble(const Target &T, const std::string &Triple,
1318bcb0991SDimitry Andric                               MCSubtargetInfo &STI, MCStreamer &Streamer,
1328bcb0991SDimitry Andric                               MemoryBuffer &Buffer, SourceMgr &SM,
133*5f757f3fSDimitry Andric                               MCContext &Ctx,
134480093f4SDimitry Andric                               const MCTargetOptions &MCOptions) {
1350b57cec5SDimitry Andric 
1360b57cec5SDimitry Andric   std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple));
1370b57cec5SDimitry Andric   if (!MRI) {
1380b57cec5SDimitry Andric     errs() << "error: no register info for target " << Triple << "\n";
1390b57cec5SDimitry Andric     return -1;
1400b57cec5SDimitry Andric   }
1410b57cec5SDimitry Andric 
142480093f4SDimitry Andric   std::unique_ptr<const MCAsmInfo> MAI(
143480093f4SDimitry Andric       T.createMCAsmInfo(*MRI, Triple, MCOptions));
1440b57cec5SDimitry Andric   if (!MAI) {
1450b57cec5SDimitry Andric     errs() << "error: no assembly info for target " << Triple << "\n";
1460b57cec5SDimitry Andric     return -1;
1470b57cec5SDimitry Andric   }
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric   std::unique_ptr<const MCDisassembler> DisAsm(
1500b57cec5SDimitry Andric     T.createMCDisassembler(STI, Ctx));
1510b57cec5SDimitry Andric   if (!DisAsm) {
1520b57cec5SDimitry Andric     errs() << "error: no disassembler for target " << Triple << "\n";
1530b57cec5SDimitry Andric     return -1;
1540b57cec5SDimitry Andric   }
1550b57cec5SDimitry Andric 
1560b57cec5SDimitry Andric   // Set up initial section manually here
157349cc55cSDimitry Andric   Streamer.initSections(false, STI);
1580b57cec5SDimitry Andric 
1590b57cec5SDimitry Andric   bool ErrorOccurred = false;
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric   // Convert the input to a vector for disassembly.
1620b57cec5SDimitry Andric   ByteArrayTy ByteArray;
1630b57cec5SDimitry Andric   StringRef Str = Buffer.getBuffer();
1640b57cec5SDimitry Andric   bool InAtomicBlock = false;
1650b57cec5SDimitry Andric 
1660b57cec5SDimitry Andric   while (SkipToToken(Str)) {
1670b57cec5SDimitry Andric     ByteArray.first.clear();
1680b57cec5SDimitry Andric     ByteArray.second.clear();
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric     if (Str[0] == '[') {
1710b57cec5SDimitry Andric       if (InAtomicBlock) {
1720b57cec5SDimitry Andric         SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
1730b57cec5SDimitry Andric                         "nested atomic blocks make no sense");
1740b57cec5SDimitry Andric         ErrorOccurred = true;
1750b57cec5SDimitry Andric       }
1760b57cec5SDimitry Andric       InAtomicBlock = true;
1770b57cec5SDimitry Andric       Str = Str.drop_front();
1780b57cec5SDimitry Andric       continue;
1790b57cec5SDimitry Andric     } else if (Str[0] == ']') {
1800b57cec5SDimitry Andric       if (!InAtomicBlock) {
1810b57cec5SDimitry Andric         SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
1820b57cec5SDimitry Andric                         "attempt to close atomic block without opening");
1830b57cec5SDimitry Andric         ErrorOccurred = true;
1840b57cec5SDimitry Andric       }
1850b57cec5SDimitry Andric       InAtomicBlock = false;
1860b57cec5SDimitry Andric       Str = Str.drop_front();
1870b57cec5SDimitry Andric       continue;
1880b57cec5SDimitry Andric     }
1890b57cec5SDimitry Andric 
1900b57cec5SDimitry Andric     // It's a real token, get the bytes and emit them
1910b57cec5SDimitry Andric     ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
1920b57cec5SDimitry Andric 
1930b57cec5SDimitry Andric     if (!ByteArray.first.empty())
194*5f757f3fSDimitry Andric       ErrorOccurred |=
195*5f757f3fSDimitry Andric           PrintInsts(*DisAsm, ByteArray, SM, Streamer, InAtomicBlock, STI);
1960b57cec5SDimitry Andric   }
1970b57cec5SDimitry Andric 
1980b57cec5SDimitry Andric   if (InAtomicBlock) {
1990b57cec5SDimitry Andric     SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
2000b57cec5SDimitry Andric                     "unclosed atomic block");
2010b57cec5SDimitry Andric     ErrorOccurred = true;
2020b57cec5SDimitry Andric   }
2030b57cec5SDimitry Andric 
2040b57cec5SDimitry Andric   return ErrorOccurred;
2050b57cec5SDimitry Andric }
206