13cab2bb3Spatrick //===-- sanitizer_symbolizer_internal.h -------------------------*- C++ -*-===// 23cab2bb3Spatrick // 33cab2bb3Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 43cab2bb3Spatrick // See https://llvm.org/LICENSE.txt for license information. 53cab2bb3Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 63cab2bb3Spatrick // 73cab2bb3Spatrick //===----------------------------------------------------------------------===// 83cab2bb3Spatrick // 93cab2bb3Spatrick // Header for internal classes and functions to be used by implementations of 103cab2bb3Spatrick // symbolizers. 113cab2bb3Spatrick // 123cab2bb3Spatrick //===----------------------------------------------------------------------===// 133cab2bb3Spatrick #ifndef SANITIZER_SYMBOLIZER_INTERNAL_H 143cab2bb3Spatrick #define SANITIZER_SYMBOLIZER_INTERNAL_H 153cab2bb3Spatrick 163cab2bb3Spatrick #include "sanitizer_symbolizer.h" 173cab2bb3Spatrick #include "sanitizer_file.h" 183cab2bb3Spatrick #include "sanitizer_vector.h" 193cab2bb3Spatrick 203cab2bb3Spatrick namespace __sanitizer { 213cab2bb3Spatrick 223cab2bb3Spatrick // Parsing helpers, 'str' is searched for delimiter(s) and a string or uptr 233cab2bb3Spatrick // is extracted. When extracting a string, a newly allocated (using 24*810390e3Srobert // InternalAlloc) and null-terminated buffer is returned. They return a pointer 253cab2bb3Spatrick // to the next characted after the found delimiter. 263cab2bb3Spatrick const char *ExtractToken(const char *str, const char *delims, char **result); 273cab2bb3Spatrick const char *ExtractInt(const char *str, const char *delims, int *result); 283cab2bb3Spatrick const char *ExtractUptr(const char *str, const char *delims, uptr *result); 293cab2bb3Spatrick const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter, 303cab2bb3Spatrick char **result); 313cab2bb3Spatrick 323cab2bb3Spatrick const char *DemangleSwiftAndCXX(const char *name); 333cab2bb3Spatrick 343cab2bb3Spatrick // SymbolizerTool is an interface that is implemented by individual "tools" 353cab2bb3Spatrick // that can perform symbolication (external llvm-symbolizer, libbacktrace, 363cab2bb3Spatrick // Windows DbgHelp symbolizer, etc.). 373cab2bb3Spatrick class SymbolizerTool { 383cab2bb3Spatrick public: 393cab2bb3Spatrick // The main |Symbolizer| class implements a "fallback chain" of symbolizer 403cab2bb3Spatrick // tools. In a request to symbolize an address, if one tool returns false, 413cab2bb3Spatrick // the next tool in the chain will be tried. 423cab2bb3Spatrick SymbolizerTool *next; 433cab2bb3Spatrick SymbolizerTool()443cab2bb3Spatrick SymbolizerTool() : next(nullptr) { } 453cab2bb3Spatrick 463cab2bb3Spatrick // Can't declare pure virtual functions in sanitizer runtimes: 473cab2bb3Spatrick // __cxa_pure_virtual might be unavailable. 483cab2bb3Spatrick 493cab2bb3Spatrick // The |stack| parameter is inout. It is pre-filled with the address, 503cab2bb3Spatrick // module base and module offset values and is to be used to construct 513cab2bb3Spatrick // other stack frames. SymbolizePC(uptr addr,SymbolizedStack * stack)523cab2bb3Spatrick virtual bool SymbolizePC(uptr addr, SymbolizedStack *stack) { 533cab2bb3Spatrick UNIMPLEMENTED(); 543cab2bb3Spatrick } 553cab2bb3Spatrick 563cab2bb3Spatrick // The |info| parameter is inout. It is pre-filled with the module base 573cab2bb3Spatrick // and module offset values. SymbolizeData(uptr addr,DataInfo * info)583cab2bb3Spatrick virtual bool SymbolizeData(uptr addr, DataInfo *info) { 593cab2bb3Spatrick UNIMPLEMENTED(); 603cab2bb3Spatrick } 613cab2bb3Spatrick SymbolizeFrame(uptr addr,FrameInfo * info)623cab2bb3Spatrick virtual bool SymbolizeFrame(uptr addr, FrameInfo *info) { 633cab2bb3Spatrick return false; 643cab2bb3Spatrick } 653cab2bb3Spatrick Flush()663cab2bb3Spatrick virtual void Flush() {} 673cab2bb3Spatrick 683cab2bb3Spatrick // Return nullptr to fallback to the default platform-specific demangler. Demangle(const char * name)693cab2bb3Spatrick virtual const char *Demangle(const char *name) { 703cab2bb3Spatrick return nullptr; 713cab2bb3Spatrick } 721f9cb04fSpatrick 73d89ec533Spatrick protected: ~SymbolizerTool()74d89ec533Spatrick ~SymbolizerTool() {} 753cab2bb3Spatrick }; 763cab2bb3Spatrick 773cab2bb3Spatrick // SymbolizerProcess encapsulates communication between the tool and 783cab2bb3Spatrick // external symbolizer program, running in a different subprocess. 793cab2bb3Spatrick // SymbolizerProcess may not be used from two threads simultaneously. 803cab2bb3Spatrick class SymbolizerProcess { 813cab2bb3Spatrick public: 823cab2bb3Spatrick explicit SymbolizerProcess(const char *path, bool use_posix_spawn = false); 833cab2bb3Spatrick const char *SendCommand(const char *command); 843cab2bb3Spatrick 853cab2bb3Spatrick protected: ~SymbolizerProcess()86d89ec533Spatrick ~SymbolizerProcess() {} 87d89ec533Spatrick 883cab2bb3Spatrick /// The maximum number of arguments required to invoke a tool process. 89*810390e3Srobert static const unsigned kArgVMax = 16; 903cab2bb3Spatrick 913cab2bb3Spatrick // Customizable by subclasses. 923cab2bb3Spatrick virtual bool StartSymbolizerSubprocess(); 93*810390e3Srobert virtual bool ReadFromSymbolizer(); 941f9cb04fSpatrick // Return the environment to run the symbolizer in. GetEnvP()951f9cb04fSpatrick virtual char **GetEnvP() { return GetEnviron(); } GetBuff()96*810390e3Srobert InternalMmapVector<char> &GetBuff() { return buffer_; } 973cab2bb3Spatrick 983cab2bb3Spatrick private: ReachedEndOfOutput(const char * buffer,uptr length)993cab2bb3Spatrick virtual bool ReachedEndOfOutput(const char *buffer, uptr length) const { 1003cab2bb3Spatrick UNIMPLEMENTED(); 1013cab2bb3Spatrick } 1023cab2bb3Spatrick 1033cab2bb3Spatrick /// Fill in an argv array to invoke the child process. GetArgV(const char * path_to_binary,const char * (& argv)[kArgVMax])1043cab2bb3Spatrick virtual void GetArgV(const char *path_to_binary, 1053cab2bb3Spatrick const char *(&argv)[kArgVMax]) const { 1063cab2bb3Spatrick UNIMPLEMENTED(); 1073cab2bb3Spatrick } 1083cab2bb3Spatrick 1093cab2bb3Spatrick bool Restart(); 1103cab2bb3Spatrick const char *SendCommandImpl(const char *command); 1113cab2bb3Spatrick bool WriteToSymbolizer(const char *buffer, uptr length); 1123cab2bb3Spatrick 1133cab2bb3Spatrick const char *path_; 1143cab2bb3Spatrick fd_t input_fd_; 1153cab2bb3Spatrick fd_t output_fd_; 1163cab2bb3Spatrick 117*810390e3Srobert InternalMmapVector<char> buffer_; 1183cab2bb3Spatrick 1193cab2bb3Spatrick static const uptr kMaxTimesRestarted = 5; 1203cab2bb3Spatrick static const int kSymbolizerStartupTimeMillis = 10; 1213cab2bb3Spatrick uptr times_restarted_; 1223cab2bb3Spatrick bool failed_to_start_; 1233cab2bb3Spatrick bool reported_invalid_path_; 1243cab2bb3Spatrick bool use_posix_spawn_; 1253cab2bb3Spatrick }; 1263cab2bb3Spatrick 1273cab2bb3Spatrick class LLVMSymbolizerProcess; 1283cab2bb3Spatrick 1293cab2bb3Spatrick // This tool invokes llvm-symbolizer in a subprocess. It should be as portable 1303cab2bb3Spatrick // as the llvm-symbolizer tool is. 131d89ec533Spatrick class LLVMSymbolizer final : public SymbolizerTool { 1323cab2bb3Spatrick public: 1333cab2bb3Spatrick explicit LLVMSymbolizer(const char *path, LowLevelAllocator *allocator); 1343cab2bb3Spatrick 1353cab2bb3Spatrick bool SymbolizePC(uptr addr, SymbolizedStack *stack) override; 1363cab2bb3Spatrick bool SymbolizeData(uptr addr, DataInfo *info) override; 1373cab2bb3Spatrick bool SymbolizeFrame(uptr addr, FrameInfo *info) override; 1383cab2bb3Spatrick 1393cab2bb3Spatrick private: 1403cab2bb3Spatrick const char *FormatAndSendCommand(const char *command_prefix, 1413cab2bb3Spatrick const char *module_name, uptr module_offset, 1423cab2bb3Spatrick ModuleArch arch); 1433cab2bb3Spatrick 1443cab2bb3Spatrick LLVMSymbolizerProcess *symbolizer_process_; 1453cab2bb3Spatrick static const uptr kBufferSize = 16 * 1024; 1463cab2bb3Spatrick char buffer_[kBufferSize]; 1473cab2bb3Spatrick }; 1483cab2bb3Spatrick 1493cab2bb3Spatrick // Parses one or more two-line strings in the following format: 1503cab2bb3Spatrick // <function_name> 1513cab2bb3Spatrick // <file_name>:<line_number>[:<column_number>] 1523cab2bb3Spatrick // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of 1533cab2bb3Spatrick // them use the same output format. Returns true if any useful debug 1543cab2bb3Spatrick // information was found. 1553cab2bb3Spatrick void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res); 1563cab2bb3Spatrick 1573cab2bb3Spatrick // Parses a two-line string in the following format: 1583cab2bb3Spatrick // <symbol_name> 1593cab2bb3Spatrick // <start_address> <size> 1603cab2bb3Spatrick // Used by LLVMSymbolizer and InternalSymbolizer. 1613cab2bb3Spatrick void ParseSymbolizeDataOutput(const char *str, DataInfo *info); 1623cab2bb3Spatrick 1633cab2bb3Spatrick } // namespace __sanitizer 1643cab2bb3Spatrick 1653cab2bb3Spatrick #endif // SANITIZER_SYMBOLIZER_INTERNAL_H 166