10b57cec5SDimitry Andric //===-- sanitizer_symbolizer_internal.h -------------------------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // Header for internal classes and functions to be used by implementations of 100b57cec5SDimitry Andric // symbolizers. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric #ifndef SANITIZER_SYMBOLIZER_INTERNAL_H 140b57cec5SDimitry Andric #define SANITIZER_SYMBOLIZER_INTERNAL_H 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric #include "sanitizer_file.h" 1706c3fb27SDimitry Andric #include "sanitizer_symbolizer.h" 180b57cec5SDimitry Andric #include "sanitizer_vector.h" 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric namespace __sanitizer { 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric // Parsing helpers, 'str' is searched for delimiter(s) and a string or uptr 230b57cec5SDimitry Andric // is extracted. When extracting a string, a newly allocated (using 24349cc55cSDimitry Andric // InternalAlloc) and null-terminated buffer is returned. They return a pointer 250b57cec5SDimitry Andric // to the next characted after the found delimiter. 260b57cec5SDimitry Andric const char *ExtractToken(const char *str, const char *delims, char **result); 270b57cec5SDimitry Andric const char *ExtractInt(const char *str, const char *delims, int *result); 280b57cec5SDimitry Andric const char *ExtractUptr(const char *str, const char *delims, uptr *result); 290b57cec5SDimitry Andric const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter, 300b57cec5SDimitry Andric char **result); 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric const char *DemangleSwiftAndCXX(const char *name); 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric // SymbolizerTool is an interface that is implemented by individual "tools" 350b57cec5SDimitry Andric // that can perform symbolication (external llvm-symbolizer, libbacktrace, 360b57cec5SDimitry Andric // Windows DbgHelp symbolizer, etc.). 370b57cec5SDimitry Andric class SymbolizerTool { 380b57cec5SDimitry Andric public: 390b57cec5SDimitry Andric // The main |Symbolizer| class implements a "fallback chain" of symbolizer 400b57cec5SDimitry Andric // tools. In a request to symbolize an address, if one tool returns false, 410b57cec5SDimitry Andric // the next tool in the chain will be tried. 420b57cec5SDimitry Andric SymbolizerTool *next; 430b57cec5SDimitry Andric SymbolizerTool()440b57cec5SDimitry Andric SymbolizerTool() : next(nullptr) { } 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric // Can't declare pure virtual functions in sanitizer runtimes: 470b57cec5SDimitry Andric // __cxa_pure_virtual might be unavailable. 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric // The |stack| parameter is inout. It is pre-filled with the address, 500b57cec5SDimitry Andric // module base and module offset values and is to be used to construct 510b57cec5SDimitry Andric // other stack frames. SymbolizePC(uptr addr,SymbolizedStack * stack)520b57cec5SDimitry Andric virtual bool SymbolizePC(uptr addr, SymbolizedStack *stack) { 530b57cec5SDimitry Andric UNIMPLEMENTED(); 540b57cec5SDimitry Andric } 550b57cec5SDimitry Andric 560b57cec5SDimitry Andric // The |info| parameter is inout. It is pre-filled with the module base 570b57cec5SDimitry Andric // and module offset values. SymbolizeData(uptr addr,DataInfo * info)580b57cec5SDimitry Andric virtual bool SymbolizeData(uptr addr, DataInfo *info) { 590b57cec5SDimitry Andric UNIMPLEMENTED(); 600b57cec5SDimitry Andric } 610b57cec5SDimitry Andric SymbolizeFrame(uptr addr,FrameInfo * info)620b57cec5SDimitry Andric virtual bool SymbolizeFrame(uptr addr, FrameInfo *info) { 630b57cec5SDimitry Andric return false; 640b57cec5SDimitry Andric } 650b57cec5SDimitry Andric Flush()660b57cec5SDimitry Andric virtual void Flush() {} 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric // Return nullptr to fallback to the default platform-specific demangler. Demangle(const char * name)690b57cec5SDimitry Andric virtual const char *Demangle(const char *name) { 700b57cec5SDimitry Andric return nullptr; 710b57cec5SDimitry Andric } 725ffd83dbSDimitry Andric 73e8d8bef9SDimitry Andric protected: ~SymbolizerTool()74e8d8bef9SDimitry Andric ~SymbolizerTool() {} 750b57cec5SDimitry Andric }; 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric // SymbolizerProcess encapsulates communication between the tool and 780b57cec5SDimitry Andric // external symbolizer program, running in a different subprocess. 790b57cec5SDimitry Andric // SymbolizerProcess may not be used from two threads simultaneously. 800b57cec5SDimitry Andric class SymbolizerProcess { 810b57cec5SDimitry Andric public: 8268d75effSDimitry Andric explicit SymbolizerProcess(const char *path, bool use_posix_spawn = false); 830b57cec5SDimitry Andric const char *SendCommand(const char *command); 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric protected: ~SymbolizerProcess()86e8d8bef9SDimitry Andric ~SymbolizerProcess() {} 87e8d8bef9SDimitry Andric 8868d75effSDimitry Andric /// The maximum number of arguments required to invoke a tool process. 890eae32dcSDimitry Andric static const unsigned kArgVMax = 16; 9068d75effSDimitry Andric 9168d75effSDimitry Andric // Customizable by subclasses. 9268d75effSDimitry Andric virtual bool StartSymbolizerSubprocess(); 9381ad6265SDimitry Andric virtual bool ReadFromSymbolizer(); 945ffd83dbSDimitry Andric // Return the environment to run the symbolizer in. GetEnvP()955ffd83dbSDimitry Andric virtual char **GetEnvP() { return GetEnviron(); } GetBuff()9681ad6265SDimitry Andric InternalMmapVector<char> &GetBuff() { return buffer_; } 9768d75effSDimitry Andric 9868d75effSDimitry Andric private: ReachedEndOfOutput(const char * buffer,uptr length)990b57cec5SDimitry Andric virtual bool ReachedEndOfOutput(const char *buffer, uptr length) const { 1000b57cec5SDimitry Andric UNIMPLEMENTED(); 1010b57cec5SDimitry Andric } 1020b57cec5SDimitry Andric 1030b57cec5SDimitry Andric /// Fill in an argv array to invoke the child process. GetArgV(const char * path_to_binary,const char * (& argv)[kArgVMax])1040b57cec5SDimitry Andric virtual void GetArgV(const char *path_to_binary, 1050b57cec5SDimitry Andric const char *(&argv)[kArgVMax]) const { 1060b57cec5SDimitry Andric UNIMPLEMENTED(); 1070b57cec5SDimitry Andric } 1080b57cec5SDimitry Andric 1090b57cec5SDimitry Andric bool Restart(); 1100b57cec5SDimitry Andric const char *SendCommandImpl(const char *command); 1110b57cec5SDimitry Andric bool WriteToSymbolizer(const char *buffer, uptr length); 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric const char *path_; 1140b57cec5SDimitry Andric fd_t input_fd_; 1150b57cec5SDimitry Andric fd_t output_fd_; 1160b57cec5SDimitry Andric 11781ad6265SDimitry Andric InternalMmapVector<char> buffer_; 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric static const uptr kMaxTimesRestarted = 5; 1200b57cec5SDimitry Andric static const int kSymbolizerStartupTimeMillis = 10; 1210b57cec5SDimitry Andric uptr times_restarted_; 1220b57cec5SDimitry Andric bool failed_to_start_; 1230b57cec5SDimitry Andric bool reported_invalid_path_; 12468d75effSDimitry Andric bool use_posix_spawn_; 1250b57cec5SDimitry Andric }; 1260b57cec5SDimitry Andric 1270b57cec5SDimitry Andric class LLVMSymbolizerProcess; 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric // This tool invokes llvm-symbolizer in a subprocess. It should be as portable 1300b57cec5SDimitry Andric // as the llvm-symbolizer tool is. 131e8d8bef9SDimitry Andric class LLVMSymbolizer final : public SymbolizerTool { 1320b57cec5SDimitry Andric public: 1330b57cec5SDimitry Andric explicit LLVMSymbolizer(const char *path, LowLevelAllocator *allocator); 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric bool SymbolizePC(uptr addr, SymbolizedStack *stack) override; 1360b57cec5SDimitry Andric bool SymbolizeData(uptr addr, DataInfo *info) override; 1370b57cec5SDimitry Andric bool SymbolizeFrame(uptr addr, FrameInfo *info) override; 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric private: 1400b57cec5SDimitry Andric const char *FormatAndSendCommand(const char *command_prefix, 1410b57cec5SDimitry Andric const char *module_name, uptr module_offset, 1420b57cec5SDimitry Andric ModuleArch arch); 1430b57cec5SDimitry Andric 1440b57cec5SDimitry Andric LLVMSymbolizerProcess *symbolizer_process_; 1450b57cec5SDimitry Andric static const uptr kBufferSize = 16 * 1024; 1460b57cec5SDimitry Andric char buffer_[kBufferSize]; 1470b57cec5SDimitry Andric }; 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric // Parses one or more two-line strings in the following format: 1500b57cec5SDimitry Andric // <function_name> 1510b57cec5SDimitry Andric // <file_name>:<line_number>[:<column_number>] 1520b57cec5SDimitry Andric // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of 1530b57cec5SDimitry Andric // them use the same output format. Returns true if any useful debug 1540b57cec5SDimitry Andric // information was found. 1550b57cec5SDimitry Andric void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res); 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric // Parses a two-line string in the following format: 1580b57cec5SDimitry Andric // <symbol_name> 1590b57cec5SDimitry Andric // <start_address> <size> 1600b57cec5SDimitry Andric // Used by LLVMSymbolizer and InternalSymbolizer. 1610b57cec5SDimitry Andric void ParseSymbolizeDataOutput(const char *str, DataInfo *info); 1620b57cec5SDimitry Andric 163*5f757f3fSDimitry Andric // Parses repeated strings in the following format: 164*5f757f3fSDimitry Andric // <function_name> 165*5f757f3fSDimitry Andric // <var_name> 166*5f757f3fSDimitry Andric // <file_name>:<line_number>[:<column_number>] 167*5f757f3fSDimitry Andric // [<frame_offset>|??] [<size>|??] [<tag_offset>|??] 168*5f757f3fSDimitry Andric // Used by LLVMSymbolizer and InternalSymbolizer. 169*5f757f3fSDimitry Andric void ParseSymbolizeFrameOutput(const char *str, 170*5f757f3fSDimitry Andric InternalMmapVector<LocalInfo> *locals); 171*5f757f3fSDimitry Andric 1720b57cec5SDimitry Andric } // namespace __sanitizer 1730b57cec5SDimitry Andric 1740b57cec5SDimitry Andric #endif // SANITIZER_SYMBOLIZER_INTERNAL_H 175