10b57cec5SDimitry Andric //===- Disassembler.cpp - Disassembler for hex strings --------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This class implements the disassembler of strings of bytes written in
100b57cec5SDimitry Andric // hexadecimal, from standard input or from a file.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric
140b57cec5SDimitry Andric #include "Disassembler.h"
150b57cec5SDimitry Andric #include "llvm/MC/MCAsmInfo.h"
160b57cec5SDimitry Andric #include "llvm/MC/MCContext.h"
170b57cec5SDimitry Andric #include "llvm/MC/MCDisassembler/MCDisassembler.h"
180b57cec5SDimitry Andric #include "llvm/MC/MCInst.h"
198bcb0991SDimitry Andric #include "llvm/MC/MCObjectFileInfo.h"
200b57cec5SDimitry Andric #include "llvm/MC/MCRegisterInfo.h"
210b57cec5SDimitry Andric #include "llvm/MC/MCStreamer.h"
220b57cec5SDimitry Andric #include "llvm/MC/MCSubtargetInfo.h"
23349cc55cSDimitry Andric #include "llvm/MC/TargetRegistry.h"
240b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
250b57cec5SDimitry Andric #include "llvm/Support/SourceMgr.h"
260b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
2706c3fb27SDimitry Andric #include "llvm/TargetParser/Triple.h"
280b57cec5SDimitry Andric
290b57cec5SDimitry Andric using namespace llvm;
300b57cec5SDimitry Andric
310b57cec5SDimitry Andric typedef std::pair<std::vector<unsigned char>, std::vector<const char *>>
320b57cec5SDimitry Andric ByteArrayTy;
330b57cec5SDimitry Andric
PrintInsts(const MCDisassembler & DisAsm,const ByteArrayTy & Bytes,SourceMgr & SM,MCStreamer & Streamer,bool InAtomicBlock,const MCSubtargetInfo & STI)34*5f757f3fSDimitry Andric static bool PrintInsts(const MCDisassembler &DisAsm, const ByteArrayTy &Bytes,
35*5f757f3fSDimitry Andric SourceMgr &SM, MCStreamer &Streamer, bool InAtomicBlock,
360b57cec5SDimitry Andric const MCSubtargetInfo &STI) {
370b57cec5SDimitry Andric ArrayRef<uint8_t> Data(Bytes.first.data(), Bytes.first.size());
380b57cec5SDimitry Andric
390b57cec5SDimitry Andric // Disassemble it to strings.
400b57cec5SDimitry Andric uint64_t Size;
410b57cec5SDimitry Andric uint64_t Index;
420b57cec5SDimitry Andric
430b57cec5SDimitry Andric for (Index = 0; Index < Bytes.first.size(); Index += Size) {
440b57cec5SDimitry Andric MCInst Inst;
450b57cec5SDimitry Andric
460b57cec5SDimitry Andric MCDisassembler::DecodeStatus S;
47480093f4SDimitry Andric S = DisAsm.getInstruction(Inst, Size, Data.slice(Index), Index, nulls());
480b57cec5SDimitry Andric switch (S) {
490b57cec5SDimitry Andric case MCDisassembler::Fail:
500b57cec5SDimitry Andric SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),
510b57cec5SDimitry Andric SourceMgr::DK_Warning,
520b57cec5SDimitry Andric "invalid instruction encoding");
530b57cec5SDimitry Andric // Don't try to resynchronise the stream in a block
540b57cec5SDimitry Andric if (InAtomicBlock)
550b57cec5SDimitry Andric return true;
560b57cec5SDimitry Andric
570b57cec5SDimitry Andric if (Size == 0)
580b57cec5SDimitry Andric Size = 1; // skip illegible bytes
590b57cec5SDimitry Andric
600b57cec5SDimitry Andric break;
610b57cec5SDimitry Andric
620b57cec5SDimitry Andric case MCDisassembler::SoftFail:
630b57cec5SDimitry Andric SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),
640b57cec5SDimitry Andric SourceMgr::DK_Warning,
650b57cec5SDimitry Andric "potentially undefined instruction encoding");
66bdd1243dSDimitry Andric [[fallthrough]];
670b57cec5SDimitry Andric
680b57cec5SDimitry Andric case MCDisassembler::Success:
695ffd83dbSDimitry Andric Streamer.emitInstruction(Inst, STI);
700b57cec5SDimitry Andric break;
710b57cec5SDimitry Andric }
720b57cec5SDimitry Andric }
730b57cec5SDimitry Andric
740b57cec5SDimitry Andric return false;
750b57cec5SDimitry Andric }
760b57cec5SDimitry Andric
SkipToToken(StringRef & Str)770b57cec5SDimitry Andric static bool SkipToToken(StringRef &Str) {
780b57cec5SDimitry Andric for (;;) {
790b57cec5SDimitry Andric if (Str.empty())
800b57cec5SDimitry Andric return false;
810b57cec5SDimitry Andric
820b57cec5SDimitry Andric // Strip horizontal whitespace and commas.
830b57cec5SDimitry Andric if (size_t Pos = Str.find_first_not_of(" \t\r\n,")) {
840b57cec5SDimitry Andric Str = Str.substr(Pos);
850b57cec5SDimitry Andric continue;
860b57cec5SDimitry Andric }
870b57cec5SDimitry Andric
880b57cec5SDimitry Andric // If this is the start of a comment, remove the rest of the line.
890b57cec5SDimitry Andric if (Str[0] == '#') {
900b57cec5SDimitry Andric Str = Str.substr(Str.find_first_of('\n'));
910b57cec5SDimitry Andric continue;
920b57cec5SDimitry Andric }
930b57cec5SDimitry Andric return true;
940b57cec5SDimitry Andric }
950b57cec5SDimitry Andric }
960b57cec5SDimitry Andric
970b57cec5SDimitry Andric
ByteArrayFromString(ByteArrayTy & ByteArray,StringRef & Str,SourceMgr & SM)980b57cec5SDimitry Andric static bool ByteArrayFromString(ByteArrayTy &ByteArray,
990b57cec5SDimitry Andric StringRef &Str,
1000b57cec5SDimitry Andric SourceMgr &SM) {
1010b57cec5SDimitry Andric while (SkipToToken(Str)) {
1020b57cec5SDimitry Andric // Handled by higher level
1030b57cec5SDimitry Andric if (Str[0] == '[' || Str[0] == ']')
1040b57cec5SDimitry Andric return false;
1050b57cec5SDimitry Andric
1060b57cec5SDimitry Andric // Get the current token.
1070b57cec5SDimitry Andric size_t Next = Str.find_first_of(" \t\n\r,#[]");
1080b57cec5SDimitry Andric StringRef Value = Str.substr(0, Next);
1090b57cec5SDimitry Andric
1100b57cec5SDimitry Andric // Convert to a byte and add to the byte vector.
1110b57cec5SDimitry Andric unsigned ByteVal;
1120b57cec5SDimitry Andric if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
1130b57cec5SDimitry Andric // If we have an error, print it and skip to the end of line.
1140b57cec5SDimitry Andric SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error,
1150b57cec5SDimitry Andric "invalid input token");
1160b57cec5SDimitry Andric Str = Str.substr(Str.find('\n'));
1170b57cec5SDimitry Andric ByteArray.first.clear();
1180b57cec5SDimitry Andric ByteArray.second.clear();
1190b57cec5SDimitry Andric continue;
1200b57cec5SDimitry Andric }
1210b57cec5SDimitry Andric
1220b57cec5SDimitry Andric ByteArray.first.push_back(ByteVal);
1230b57cec5SDimitry Andric ByteArray.second.push_back(Value.data());
1240b57cec5SDimitry Andric Str = Str.substr(Next);
1250b57cec5SDimitry Andric }
1260b57cec5SDimitry Andric
1270b57cec5SDimitry Andric return false;
1280b57cec5SDimitry Andric }
1290b57cec5SDimitry Andric
disassemble(const Target & T,const std::string & Triple,MCSubtargetInfo & STI,MCStreamer & Streamer,MemoryBuffer & Buffer,SourceMgr & SM,MCContext & Ctx,const MCTargetOptions & MCOptions)1308bcb0991SDimitry Andric int Disassembler::disassemble(const Target &T, const std::string &Triple,
1318bcb0991SDimitry Andric MCSubtargetInfo &STI, MCStreamer &Streamer,
1328bcb0991SDimitry Andric MemoryBuffer &Buffer, SourceMgr &SM,
133*5f757f3fSDimitry Andric MCContext &Ctx,
134480093f4SDimitry Andric const MCTargetOptions &MCOptions) {
1350b57cec5SDimitry Andric
1360b57cec5SDimitry Andric std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple));
1370b57cec5SDimitry Andric if (!MRI) {
1380b57cec5SDimitry Andric errs() << "error: no register info for target " << Triple << "\n";
1390b57cec5SDimitry Andric return -1;
1400b57cec5SDimitry Andric }
1410b57cec5SDimitry Andric
142480093f4SDimitry Andric std::unique_ptr<const MCAsmInfo> MAI(
143480093f4SDimitry Andric T.createMCAsmInfo(*MRI, Triple, MCOptions));
1440b57cec5SDimitry Andric if (!MAI) {
1450b57cec5SDimitry Andric errs() << "error: no assembly info for target " << Triple << "\n";
1460b57cec5SDimitry Andric return -1;
1470b57cec5SDimitry Andric }
1480b57cec5SDimitry Andric
1490b57cec5SDimitry Andric std::unique_ptr<const MCDisassembler> DisAsm(
1500b57cec5SDimitry Andric T.createMCDisassembler(STI, Ctx));
1510b57cec5SDimitry Andric if (!DisAsm) {
1520b57cec5SDimitry Andric errs() << "error: no disassembler for target " << Triple << "\n";
1530b57cec5SDimitry Andric return -1;
1540b57cec5SDimitry Andric }
1550b57cec5SDimitry Andric
1560b57cec5SDimitry Andric // Set up initial section manually here
157349cc55cSDimitry Andric Streamer.initSections(false, STI);
1580b57cec5SDimitry Andric
1590b57cec5SDimitry Andric bool ErrorOccurred = false;
1600b57cec5SDimitry Andric
1610b57cec5SDimitry Andric // Convert the input to a vector for disassembly.
1620b57cec5SDimitry Andric ByteArrayTy ByteArray;
1630b57cec5SDimitry Andric StringRef Str = Buffer.getBuffer();
1640b57cec5SDimitry Andric bool InAtomicBlock = false;
1650b57cec5SDimitry Andric
1660b57cec5SDimitry Andric while (SkipToToken(Str)) {
1670b57cec5SDimitry Andric ByteArray.first.clear();
1680b57cec5SDimitry Andric ByteArray.second.clear();
1690b57cec5SDimitry Andric
1700b57cec5SDimitry Andric if (Str[0] == '[') {
1710b57cec5SDimitry Andric if (InAtomicBlock) {
1720b57cec5SDimitry Andric SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
1730b57cec5SDimitry Andric "nested atomic blocks make no sense");
1740b57cec5SDimitry Andric ErrorOccurred = true;
1750b57cec5SDimitry Andric }
1760b57cec5SDimitry Andric InAtomicBlock = true;
1770b57cec5SDimitry Andric Str = Str.drop_front();
1780b57cec5SDimitry Andric continue;
1790b57cec5SDimitry Andric } else if (Str[0] == ']') {
1800b57cec5SDimitry Andric if (!InAtomicBlock) {
1810b57cec5SDimitry Andric SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
1820b57cec5SDimitry Andric "attempt to close atomic block without opening");
1830b57cec5SDimitry Andric ErrorOccurred = true;
1840b57cec5SDimitry Andric }
1850b57cec5SDimitry Andric InAtomicBlock = false;
1860b57cec5SDimitry Andric Str = Str.drop_front();
1870b57cec5SDimitry Andric continue;
1880b57cec5SDimitry Andric }
1890b57cec5SDimitry Andric
1900b57cec5SDimitry Andric // It's a real token, get the bytes and emit them
1910b57cec5SDimitry Andric ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
1920b57cec5SDimitry Andric
1930b57cec5SDimitry Andric if (!ByteArray.first.empty())
194*5f757f3fSDimitry Andric ErrorOccurred |=
195*5f757f3fSDimitry Andric PrintInsts(*DisAsm, ByteArray, SM, Streamer, InAtomicBlock, STI);
1960b57cec5SDimitry Andric }
1970b57cec5SDimitry Andric
1980b57cec5SDimitry Andric if (InAtomicBlock) {
1990b57cec5SDimitry Andric SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
2000b57cec5SDimitry Andric "unclosed atomic block");
2010b57cec5SDimitry Andric ErrorOccurred = true;
2020b57cec5SDimitry Andric }
2030b57cec5SDimitry Andric
2040b57cec5SDimitry Andric return ErrorOccurred;
2050b57cec5SDimitry Andric }
206