1 //===-- llvm-mc.cpp - Machine Code Hacking Driver -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This utility is a simple driver that allows command line hacking on machine 11 // code. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/MC/MCParser/MCAsmLexer.h" 16 #include "llvm/MC/MCContext.h" 17 #include "llvm/MC/MCCodeEmitter.h" 18 #include "llvm/MC/MCInstPrinter.h" 19 #include "llvm/MC/MCSectionMachO.h" 20 #include "llvm/MC/MCStreamer.h" 21 #include "llvm/MC/MCParser/AsmParser.h" 22 #include "llvm/Target/TargetAsmBackend.h" 23 #include "llvm/Target/TargetAsmParser.h" 24 #include "llvm/Target/TargetData.h" 25 #include "llvm/Target/TargetRegistry.h" 26 #include "llvm/Target/TargetMachine.h" // FIXME. 27 #include "llvm/Target/TargetSelect.h" 28 #include "llvm/ADT/OwningPtr.h" 29 #include "llvm/Support/CommandLine.h" 30 #include "llvm/Support/FormattedStream.h" 31 #include "llvm/Support/ManagedStatic.h" 32 #include "llvm/Support/MemoryBuffer.h" 33 #include "llvm/Support/PrettyStackTrace.h" 34 #include "llvm/Support/SourceMgr.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include "llvm/System/Host.h" 37 #include "llvm/System/Signals.h" 38 #include "Disassembler.h" 39 using namespace llvm; 40 41 static cl::opt<std::string> 42 InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-")); 43 44 static cl::opt<std::string> 45 OutputFilename("o", cl::desc("Output filename"), 46 cl::value_desc("filename")); 47 48 static cl::opt<bool> 49 ShowEncoding("show-encoding", cl::desc("Show instruction encodings")); 50 51 static cl::opt<bool> 52 ShowInst("show-inst", cl::desc("Show internal instruction representation")); 53 54 static cl::opt<unsigned> 55 OutputAsmVariant("output-asm-variant", 56 cl::desc("Syntax variant to use for output printing")); 57 58 static cl::opt<bool> 59 RelaxAll("mc-relax-all", cl::desc("Relax all fixups")); 60 61 enum OutputFileType { 62 OFT_Null, 63 OFT_AssemblyFile, 64 OFT_ObjectFile 65 }; 66 static cl::opt<OutputFileType> 67 FileType("filetype", cl::init(OFT_AssemblyFile), 68 cl::desc("Choose an output file type:"), 69 cl::values( 70 clEnumValN(OFT_AssemblyFile, "asm", 71 "Emit an assembly ('.s') file"), 72 clEnumValN(OFT_Null, "null", 73 "Don't emit anything (for timing purposes)"), 74 clEnumValN(OFT_ObjectFile, "obj", 75 "Emit a native object ('.o') file"), 76 clEnumValEnd)); 77 78 static cl::opt<bool> 79 Force("f", cl::desc("Enable binary output on terminals")); 80 81 static cl::list<std::string> 82 IncludeDirs("I", cl::desc("Directory of include files"), 83 cl::value_desc("directory"), cl::Prefix); 84 85 static cl::opt<std::string> 86 ArchName("arch", cl::desc("Target arch to assemble for, " 87 "see -version for available targets")); 88 89 static cl::opt<std::string> 90 TripleName("triple", cl::desc("Target triple to assemble for, " 91 "see -version for available targets")); 92 93 static cl::opt<bool> 94 NoInitialTextSection("n", cl::desc( 95 "Don't assume assembly file starts in the text section")); 96 97 enum ActionType { 98 AC_AsLex, 99 AC_Assemble, 100 AC_Disassemble, 101 AC_EDisassemble 102 }; 103 104 static cl::opt<ActionType> 105 Action(cl::desc("Action to perform:"), 106 cl::init(AC_Assemble), 107 cl::values(clEnumValN(AC_AsLex, "as-lex", 108 "Lex tokens from a .s file"), 109 clEnumValN(AC_Assemble, "assemble", 110 "Assemble a .s file (default)"), 111 clEnumValN(AC_Disassemble, "disassemble", 112 "Disassemble strings of hex bytes"), 113 clEnumValN(AC_EDisassemble, "edis", 114 "Enhanced disassembly of strings of hex bytes"), 115 clEnumValEnd)); 116 117 static const Target *GetTarget(const char *ProgName) { 118 // Figure out the target triple. 119 if (TripleName.empty()) 120 TripleName = sys::getHostTriple(); 121 if (!ArchName.empty()) { 122 llvm::Triple TT(TripleName); 123 TT.setArchName(ArchName); 124 TripleName = TT.str(); 125 } 126 127 // Get the target specific parser. 128 std::string Error; 129 const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); 130 if (TheTarget) 131 return TheTarget; 132 133 errs() << ProgName << ": error: unable to get target for '" << TripleName 134 << "', see --version and --triple.\n"; 135 return 0; 136 } 137 138 static int AsLexInput(const char *ProgName) { 139 std::string ErrorMessage; 140 MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename, 141 &ErrorMessage); 142 if (Buffer == 0) { 143 errs() << ProgName << ": "; 144 if (ErrorMessage.size()) 145 errs() << ErrorMessage << "\n"; 146 else 147 errs() << "input file didn't read correctly.\n"; 148 return 1; 149 } 150 151 SourceMgr SrcMgr; 152 153 // Tell SrcMgr about this buffer, which is what TGParser will pick up. 154 SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); 155 156 // Record the location of the include directories so that the lexer can find 157 // it later. 158 SrcMgr.setIncludeDirs(IncludeDirs); 159 160 const Target *TheTarget = GetTarget(ProgName); 161 if (!TheTarget) 162 return 1; 163 164 llvm::OwningPtr<MCAsmInfo> MAI(TheTarget->createAsmInfo(TripleName)); 165 assert(MAI && "Unable to create target asm info!"); 166 167 AsmLexer Lexer(*MAI); 168 169 bool Error = false; 170 171 while (Lexer.Lex().isNot(AsmToken::Eof)) { 172 switch (Lexer.getKind()) { 173 default: 174 SrcMgr.PrintMessage(Lexer.getLoc(), "unknown token", "warning"); 175 Error = true; 176 break; 177 case AsmToken::Error: 178 Error = true; // error already printed. 179 break; 180 case AsmToken::Identifier: 181 outs() << "identifier: " << Lexer.getTok().getString() << '\n'; 182 break; 183 case AsmToken::String: 184 outs() << "string: " << Lexer.getTok().getString() << '\n'; 185 break; 186 case AsmToken::Integer: 187 outs() << "int: " << Lexer.getTok().getString() << '\n'; 188 break; 189 190 case AsmToken::Amp: outs() << "Amp\n"; break; 191 case AsmToken::AmpAmp: outs() << "AmpAmp\n"; break; 192 case AsmToken::Caret: outs() << "Caret\n"; break; 193 case AsmToken::Colon: outs() << "Colon\n"; break; 194 case AsmToken::Comma: outs() << "Comma\n"; break; 195 case AsmToken::Dollar: outs() << "Dollar\n"; break; 196 case AsmToken::EndOfStatement: outs() << "EndOfStatement\n"; break; 197 case AsmToken::Eof: outs() << "Eof\n"; break; 198 case AsmToken::Equal: outs() << "Equal\n"; break; 199 case AsmToken::EqualEqual: outs() << "EqualEqual\n"; break; 200 case AsmToken::Exclaim: outs() << "Exclaim\n"; break; 201 case AsmToken::ExclaimEqual: outs() << "ExclaimEqual\n"; break; 202 case AsmToken::Greater: outs() << "Greater\n"; break; 203 case AsmToken::GreaterEqual: outs() << "GreaterEqual\n"; break; 204 case AsmToken::GreaterGreater: outs() << "GreaterGreater\n"; break; 205 case AsmToken::LParen: outs() << "LParen\n"; break; 206 case AsmToken::Less: outs() << "Less\n"; break; 207 case AsmToken::LessEqual: outs() << "LessEqual\n"; break; 208 case AsmToken::LessGreater: outs() << "LessGreater\n"; break; 209 case AsmToken::LessLess: outs() << "LessLess\n"; break; 210 case AsmToken::Minus: outs() << "Minus\n"; break; 211 case AsmToken::Percent: outs() << "Percent\n"; break; 212 case AsmToken::Pipe: outs() << "Pipe\n"; break; 213 case AsmToken::PipePipe: outs() << "PipePipe\n"; break; 214 case AsmToken::Plus: outs() << "Plus\n"; break; 215 case AsmToken::RParen: outs() << "RParen\n"; break; 216 case AsmToken::Slash: outs() << "Slash\n"; break; 217 case AsmToken::Star: outs() << "Star\n"; break; 218 case AsmToken::Tilde: outs() << "Tilde\n"; break; 219 } 220 } 221 222 return Error; 223 } 224 225 static formatted_raw_ostream *GetOutputStream() { 226 if (OutputFilename == "") 227 OutputFilename = "-"; 228 229 // Make sure that the Out file gets unlinked from the disk if we get a 230 // SIGINT. 231 if (OutputFilename != "-") 232 sys::RemoveFileOnSignal(sys::Path(OutputFilename)); 233 234 std::string Err; 235 raw_fd_ostream *Out = new raw_fd_ostream(OutputFilename.c_str(), Err, 236 raw_fd_ostream::F_Binary); 237 if (!Err.empty()) { 238 errs() << Err << '\n'; 239 delete Out; 240 return 0; 241 } 242 243 return new formatted_raw_ostream(*Out, formatted_raw_ostream::DELETE_STREAM); 244 } 245 246 static int AssembleInput(const char *ProgName) { 247 const Target *TheTarget = GetTarget(ProgName); 248 if (!TheTarget) 249 return 1; 250 251 std::string Error; 252 MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename, &Error); 253 if (Buffer == 0) { 254 errs() << ProgName << ": "; 255 if (Error.size()) 256 errs() << Error << "\n"; 257 else 258 errs() << "input file didn't read correctly.\n"; 259 return 1; 260 } 261 262 SourceMgr SrcMgr; 263 264 // Tell SrcMgr about this buffer, which is what the parser will pick up. 265 SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); 266 267 // Record the location of the include directories so that the lexer can find 268 // it later. 269 SrcMgr.setIncludeDirs(IncludeDirs); 270 271 272 llvm::OwningPtr<MCAsmInfo> MAI(TheTarget->createAsmInfo(TripleName)); 273 assert(MAI && "Unable to create target asm info!"); 274 275 MCContext Ctx(*MAI); 276 formatted_raw_ostream *Out = GetOutputStream(); 277 if (!Out) 278 return 1; 279 280 281 // FIXME: We shouldn't need to do this (and link in codegen). 282 OwningPtr<TargetMachine> TM(TheTarget->createTargetMachine(TripleName, "")); 283 284 if (!TM) { 285 errs() << ProgName << ": error: could not create target for triple '" 286 << TripleName << "'.\n"; 287 return 1; 288 } 289 290 OwningPtr<MCCodeEmitter> CE; 291 OwningPtr<MCStreamer> Str; 292 OwningPtr<TargetAsmBackend> TAB; 293 294 if (FileType == OFT_AssemblyFile) { 295 MCInstPrinter *IP = 296 TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI); 297 if (ShowEncoding) 298 CE.reset(TheTarget->createCodeEmitter(*TM, Ctx)); 299 Str.reset(createAsmStreamer(Ctx, *Out,TM->getTargetData()->isLittleEndian(), 300 /*asmverbose*/true, IP, CE.get(), ShowInst)); 301 } else if (FileType == OFT_Null) { 302 Str.reset(createNullStreamer(Ctx)); 303 } else { 304 assert(FileType == OFT_ObjectFile && "Invalid file type!"); 305 CE.reset(TheTarget->createCodeEmitter(*TM, Ctx)); 306 TAB.reset(TheTarget->createAsmBackend(TripleName)); 307 Str.reset(createMachOStreamer(Ctx, *TAB, *Out, CE.get(), RelaxAll)); 308 } 309 310 AsmParser Parser(SrcMgr, Ctx, *Str.get(), *MAI); 311 OwningPtr<TargetAsmParser> TAP(TheTarget->createAsmParser(Parser)); 312 if (!TAP) { 313 errs() << ProgName 314 << ": error: this target does not support assembly parsing.\n"; 315 return 1; 316 } 317 318 Parser.setTargetParser(*TAP.get()); 319 320 int Res = Parser.Run(NoInitialTextSection); 321 if (Out != &fouts()) 322 delete Out; 323 324 // Delete output on errors. 325 if (Res && OutputFilename != "-") 326 sys::Path(OutputFilename).eraseFromDisk(); 327 328 return Res; 329 } 330 331 static int DisassembleInput(const char *ProgName, bool Enhanced) { 332 const Target *TheTarget = GetTarget(ProgName); 333 if (!TheTarget) 334 return 0; 335 336 std::string ErrorMessage; 337 338 MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename, 339 &ErrorMessage); 340 341 if (Buffer == 0) { 342 errs() << ProgName << ": "; 343 if (ErrorMessage.size()) 344 errs() << ErrorMessage << "\n"; 345 else 346 errs() << "input file didn't read correctly.\n"; 347 return 1; 348 } 349 350 if (Enhanced) 351 return Disassembler::disassembleEnhanced(TripleName, *Buffer); 352 else 353 return Disassembler::disassemble(*TheTarget, TripleName, *Buffer); 354 } 355 356 357 int main(int argc, char **argv) { 358 // Print a stack trace if we signal out. 359 sys::PrintStackTraceOnErrorSignal(); 360 PrettyStackTraceProgram X(argc, argv); 361 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. 362 363 // Initialize targets and assembly printers/parsers. 364 llvm::InitializeAllTargetInfos(); 365 // FIXME: We shouldn't need to initialize the Target(Machine)s. 366 llvm::InitializeAllTargets(); 367 llvm::InitializeAllAsmPrinters(); 368 llvm::InitializeAllAsmParsers(); 369 llvm::InitializeAllDisassemblers(); 370 371 cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n"); 372 373 switch (Action) { 374 default: 375 case AC_AsLex: 376 return AsLexInput(argv[0]); 377 case AC_Assemble: 378 return AssembleInput(argv[0]); 379 case AC_Disassemble: 380 return DisassembleInput(argv[0], false); 381 case AC_EDisassemble: 382 return DisassembleInput(argv[0], true); 383 } 384 385 return 0; 386 } 387 388