10b57cec5SDimitry Andric //===-- sanitizer_symbolizer_internal.h -------------------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // Header for internal classes and functions to be used by implementations of
100b57cec5SDimitry Andric // symbolizers.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric #ifndef SANITIZER_SYMBOLIZER_INTERNAL_H
140b57cec5SDimitry Andric #define SANITIZER_SYMBOLIZER_INTERNAL_H
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "sanitizer_file.h"
1706c3fb27SDimitry Andric #include "sanitizer_symbolizer.h"
180b57cec5SDimitry Andric #include "sanitizer_vector.h"
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric namespace __sanitizer {
210b57cec5SDimitry Andric 
220b57cec5SDimitry Andric // Parsing helpers, 'str' is searched for delimiter(s) and a string or uptr
230b57cec5SDimitry Andric // is extracted. When extracting a string, a newly allocated (using
24349cc55cSDimitry Andric // InternalAlloc) and null-terminated buffer is returned. They return a pointer
250b57cec5SDimitry Andric // to the next characted after the found delimiter.
260b57cec5SDimitry Andric const char *ExtractToken(const char *str, const char *delims, char **result);
270b57cec5SDimitry Andric const char *ExtractInt(const char *str, const char *delims, int *result);
280b57cec5SDimitry Andric const char *ExtractUptr(const char *str, const char *delims, uptr *result);
290b57cec5SDimitry Andric const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter,
300b57cec5SDimitry Andric                                       char **result);
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric const char *DemangleSwiftAndCXX(const char *name);
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric // SymbolizerTool is an interface that is implemented by individual "tools"
350b57cec5SDimitry Andric // that can perform symbolication (external llvm-symbolizer, libbacktrace,
360b57cec5SDimitry Andric // Windows DbgHelp symbolizer, etc.).
370b57cec5SDimitry Andric class SymbolizerTool {
380b57cec5SDimitry Andric  public:
390b57cec5SDimitry Andric   // The main |Symbolizer| class implements a "fallback chain" of symbolizer
400b57cec5SDimitry Andric   // tools. In a request to symbolize an address, if one tool returns false,
410b57cec5SDimitry Andric   // the next tool in the chain will be tried.
420b57cec5SDimitry Andric   SymbolizerTool *next;
430b57cec5SDimitry Andric 
SymbolizerTool()440b57cec5SDimitry Andric   SymbolizerTool() : next(nullptr) { }
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric   // Can't declare pure virtual functions in sanitizer runtimes:
470b57cec5SDimitry Andric   // __cxa_pure_virtual might be unavailable.
480b57cec5SDimitry Andric 
490b57cec5SDimitry Andric   // The |stack| parameter is inout. It is pre-filled with the address,
500b57cec5SDimitry Andric   // module base and module offset values and is to be used to construct
510b57cec5SDimitry Andric   // other stack frames.
SymbolizePC(uptr addr,SymbolizedStack * stack)520b57cec5SDimitry Andric   virtual bool SymbolizePC(uptr addr, SymbolizedStack *stack) {
530b57cec5SDimitry Andric     UNIMPLEMENTED();
540b57cec5SDimitry Andric   }
550b57cec5SDimitry Andric 
560b57cec5SDimitry Andric   // The |info| parameter is inout. It is pre-filled with the module base
570b57cec5SDimitry Andric   // and module offset values.
SymbolizeData(uptr addr,DataInfo * info)580b57cec5SDimitry Andric   virtual bool SymbolizeData(uptr addr, DataInfo *info) {
590b57cec5SDimitry Andric     UNIMPLEMENTED();
600b57cec5SDimitry Andric   }
610b57cec5SDimitry Andric 
SymbolizeFrame(uptr addr,FrameInfo * info)620b57cec5SDimitry Andric   virtual bool SymbolizeFrame(uptr addr, FrameInfo *info) {
630b57cec5SDimitry Andric     return false;
640b57cec5SDimitry Andric   }
650b57cec5SDimitry Andric 
Flush()660b57cec5SDimitry Andric   virtual void Flush() {}
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric   // Return nullptr to fallback to the default platform-specific demangler.
Demangle(const char * name)690b57cec5SDimitry Andric   virtual const char *Demangle(const char *name) {
700b57cec5SDimitry Andric     return nullptr;
710b57cec5SDimitry Andric   }
725ffd83dbSDimitry Andric 
73e8d8bef9SDimitry Andric  protected:
~SymbolizerTool()74e8d8bef9SDimitry Andric   ~SymbolizerTool() {}
750b57cec5SDimitry Andric };
760b57cec5SDimitry Andric 
770b57cec5SDimitry Andric // SymbolizerProcess encapsulates communication between the tool and
780b57cec5SDimitry Andric // external symbolizer program, running in a different subprocess.
790b57cec5SDimitry Andric // SymbolizerProcess may not be used from two threads simultaneously.
800b57cec5SDimitry Andric class SymbolizerProcess {
810b57cec5SDimitry Andric  public:
8268d75effSDimitry Andric   explicit SymbolizerProcess(const char *path, bool use_posix_spawn = false);
830b57cec5SDimitry Andric   const char *SendCommand(const char *command);
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric  protected:
~SymbolizerProcess()86e8d8bef9SDimitry Andric   ~SymbolizerProcess() {}
87e8d8bef9SDimitry Andric 
8868d75effSDimitry Andric   /// The maximum number of arguments required to invoke a tool process.
890eae32dcSDimitry Andric   static const unsigned kArgVMax = 16;
9068d75effSDimitry Andric 
9168d75effSDimitry Andric   // Customizable by subclasses.
9268d75effSDimitry Andric   virtual bool StartSymbolizerSubprocess();
9381ad6265SDimitry Andric   virtual bool ReadFromSymbolizer();
945ffd83dbSDimitry Andric   // Return the environment to run the symbolizer in.
GetEnvP()955ffd83dbSDimitry Andric   virtual char **GetEnvP() { return GetEnviron(); }
GetBuff()9681ad6265SDimitry Andric   InternalMmapVector<char> &GetBuff() { return buffer_; }
9768d75effSDimitry Andric 
9868d75effSDimitry Andric  private:
ReachedEndOfOutput(const char * buffer,uptr length)990b57cec5SDimitry Andric   virtual bool ReachedEndOfOutput(const char *buffer, uptr length) const {
1000b57cec5SDimitry Andric     UNIMPLEMENTED();
1010b57cec5SDimitry Andric   }
1020b57cec5SDimitry Andric 
1030b57cec5SDimitry Andric   /// Fill in an argv array to invoke the child process.
GetArgV(const char * path_to_binary,const char * (& argv)[kArgVMax])1040b57cec5SDimitry Andric   virtual void GetArgV(const char *path_to_binary,
1050b57cec5SDimitry Andric                        const char *(&argv)[kArgVMax]) const {
1060b57cec5SDimitry Andric     UNIMPLEMENTED();
1070b57cec5SDimitry Andric   }
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric   bool Restart();
1100b57cec5SDimitry Andric   const char *SendCommandImpl(const char *command);
1110b57cec5SDimitry Andric   bool WriteToSymbolizer(const char *buffer, uptr length);
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric   const char *path_;
1140b57cec5SDimitry Andric   fd_t input_fd_;
1150b57cec5SDimitry Andric   fd_t output_fd_;
1160b57cec5SDimitry Andric 
11781ad6265SDimitry Andric   InternalMmapVector<char> buffer_;
1180b57cec5SDimitry Andric 
1190b57cec5SDimitry Andric   static const uptr kMaxTimesRestarted = 5;
1200b57cec5SDimitry Andric   static const int kSymbolizerStartupTimeMillis = 10;
1210b57cec5SDimitry Andric   uptr times_restarted_;
1220b57cec5SDimitry Andric   bool failed_to_start_;
1230b57cec5SDimitry Andric   bool reported_invalid_path_;
12468d75effSDimitry Andric   bool use_posix_spawn_;
1250b57cec5SDimitry Andric };
1260b57cec5SDimitry Andric 
1270b57cec5SDimitry Andric class LLVMSymbolizerProcess;
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric // This tool invokes llvm-symbolizer in a subprocess. It should be as portable
1300b57cec5SDimitry Andric // as the llvm-symbolizer tool is.
131e8d8bef9SDimitry Andric class LLVMSymbolizer final : public SymbolizerTool {
1320b57cec5SDimitry Andric  public:
1330b57cec5SDimitry Andric   explicit LLVMSymbolizer(const char *path, LowLevelAllocator *allocator);
1340b57cec5SDimitry Andric 
1350b57cec5SDimitry Andric   bool SymbolizePC(uptr addr, SymbolizedStack *stack) override;
1360b57cec5SDimitry Andric   bool SymbolizeData(uptr addr, DataInfo *info) override;
1370b57cec5SDimitry Andric   bool SymbolizeFrame(uptr addr, FrameInfo *info) override;
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric  private:
1400b57cec5SDimitry Andric   const char *FormatAndSendCommand(const char *command_prefix,
1410b57cec5SDimitry Andric                                    const char *module_name, uptr module_offset,
1420b57cec5SDimitry Andric                                    ModuleArch arch);
1430b57cec5SDimitry Andric 
1440b57cec5SDimitry Andric   LLVMSymbolizerProcess *symbolizer_process_;
1450b57cec5SDimitry Andric   static const uptr kBufferSize = 16 * 1024;
1460b57cec5SDimitry Andric   char buffer_[kBufferSize];
1470b57cec5SDimitry Andric };
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric // Parses one or more two-line strings in the following format:
1500b57cec5SDimitry Andric //   <function_name>
1510b57cec5SDimitry Andric //   <file_name>:<line_number>[:<column_number>]
1520b57cec5SDimitry Andric // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
1530b57cec5SDimitry Andric // them use the same output format.  Returns true if any useful debug
1540b57cec5SDimitry Andric // information was found.
1550b57cec5SDimitry Andric void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res);
1560b57cec5SDimitry Andric 
1570b57cec5SDimitry Andric // Parses a two-line string in the following format:
1580b57cec5SDimitry Andric //   <symbol_name>
1590b57cec5SDimitry Andric //   <start_address> <size>
1600b57cec5SDimitry Andric // Used by LLVMSymbolizer and InternalSymbolizer.
1610b57cec5SDimitry Andric void ParseSymbolizeDataOutput(const char *str, DataInfo *info);
1620b57cec5SDimitry Andric 
163*5f757f3fSDimitry Andric // Parses repeated strings in the following format:
164*5f757f3fSDimitry Andric //   <function_name>
165*5f757f3fSDimitry Andric //   <var_name>
166*5f757f3fSDimitry Andric //   <file_name>:<line_number>[:<column_number>]
167*5f757f3fSDimitry Andric //   [<frame_offset>|??] [<size>|??] [<tag_offset>|??]
168*5f757f3fSDimitry Andric // Used by LLVMSymbolizer and InternalSymbolizer.
169*5f757f3fSDimitry Andric void ParseSymbolizeFrameOutput(const char *str,
170*5f757f3fSDimitry Andric                                InternalMmapVector<LocalInfo> *locals);
171*5f757f3fSDimitry Andric 
1720b57cec5SDimitry Andric }  // namespace __sanitizer
1730b57cec5SDimitry Andric 
1740b57cec5SDimitry Andric #endif  // SANITIZER_SYMBOLIZER_INTERNAL_H
175