xref: /openbsd-src/gnu/llvm/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h (revision 810390e339a5425391477d5d41c78d7cab2424ac)
13cab2bb3Spatrick //===-- sanitizer_symbolizer_internal.h -------------------------*- C++ -*-===//
23cab2bb3Spatrick //
33cab2bb3Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
43cab2bb3Spatrick // See https://llvm.org/LICENSE.txt for license information.
53cab2bb3Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
63cab2bb3Spatrick //
73cab2bb3Spatrick //===----------------------------------------------------------------------===//
83cab2bb3Spatrick //
93cab2bb3Spatrick // Header for internal classes and functions to be used by implementations of
103cab2bb3Spatrick // symbolizers.
113cab2bb3Spatrick //
123cab2bb3Spatrick //===----------------------------------------------------------------------===//
133cab2bb3Spatrick #ifndef SANITIZER_SYMBOLIZER_INTERNAL_H
143cab2bb3Spatrick #define SANITIZER_SYMBOLIZER_INTERNAL_H
153cab2bb3Spatrick 
163cab2bb3Spatrick #include "sanitizer_symbolizer.h"
173cab2bb3Spatrick #include "sanitizer_file.h"
183cab2bb3Spatrick #include "sanitizer_vector.h"
193cab2bb3Spatrick 
203cab2bb3Spatrick namespace __sanitizer {
213cab2bb3Spatrick 
223cab2bb3Spatrick // Parsing helpers, 'str' is searched for delimiter(s) and a string or uptr
233cab2bb3Spatrick // is extracted. When extracting a string, a newly allocated (using
24*810390e3Srobert // InternalAlloc) and null-terminated buffer is returned. They return a pointer
253cab2bb3Spatrick // to the next characted after the found delimiter.
263cab2bb3Spatrick const char *ExtractToken(const char *str, const char *delims, char **result);
273cab2bb3Spatrick const char *ExtractInt(const char *str, const char *delims, int *result);
283cab2bb3Spatrick const char *ExtractUptr(const char *str, const char *delims, uptr *result);
293cab2bb3Spatrick const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter,
303cab2bb3Spatrick                                       char **result);
313cab2bb3Spatrick 
323cab2bb3Spatrick const char *DemangleSwiftAndCXX(const char *name);
333cab2bb3Spatrick 
343cab2bb3Spatrick // SymbolizerTool is an interface that is implemented by individual "tools"
353cab2bb3Spatrick // that can perform symbolication (external llvm-symbolizer, libbacktrace,
363cab2bb3Spatrick // Windows DbgHelp symbolizer, etc.).
373cab2bb3Spatrick class SymbolizerTool {
383cab2bb3Spatrick  public:
393cab2bb3Spatrick   // The main |Symbolizer| class implements a "fallback chain" of symbolizer
403cab2bb3Spatrick   // tools. In a request to symbolize an address, if one tool returns false,
413cab2bb3Spatrick   // the next tool in the chain will be tried.
423cab2bb3Spatrick   SymbolizerTool *next;
433cab2bb3Spatrick 
SymbolizerTool()443cab2bb3Spatrick   SymbolizerTool() : next(nullptr) { }
453cab2bb3Spatrick 
463cab2bb3Spatrick   // Can't declare pure virtual functions in sanitizer runtimes:
473cab2bb3Spatrick   // __cxa_pure_virtual might be unavailable.
483cab2bb3Spatrick 
493cab2bb3Spatrick   // The |stack| parameter is inout. It is pre-filled with the address,
503cab2bb3Spatrick   // module base and module offset values and is to be used to construct
513cab2bb3Spatrick   // other stack frames.
SymbolizePC(uptr addr,SymbolizedStack * stack)523cab2bb3Spatrick   virtual bool SymbolizePC(uptr addr, SymbolizedStack *stack) {
533cab2bb3Spatrick     UNIMPLEMENTED();
543cab2bb3Spatrick   }
553cab2bb3Spatrick 
563cab2bb3Spatrick   // The |info| parameter is inout. It is pre-filled with the module base
573cab2bb3Spatrick   // and module offset values.
SymbolizeData(uptr addr,DataInfo * info)583cab2bb3Spatrick   virtual bool SymbolizeData(uptr addr, DataInfo *info) {
593cab2bb3Spatrick     UNIMPLEMENTED();
603cab2bb3Spatrick   }
613cab2bb3Spatrick 
SymbolizeFrame(uptr addr,FrameInfo * info)623cab2bb3Spatrick   virtual bool SymbolizeFrame(uptr addr, FrameInfo *info) {
633cab2bb3Spatrick     return false;
643cab2bb3Spatrick   }
653cab2bb3Spatrick 
Flush()663cab2bb3Spatrick   virtual void Flush() {}
673cab2bb3Spatrick 
683cab2bb3Spatrick   // Return nullptr to fallback to the default platform-specific demangler.
Demangle(const char * name)693cab2bb3Spatrick   virtual const char *Demangle(const char *name) {
703cab2bb3Spatrick     return nullptr;
713cab2bb3Spatrick   }
721f9cb04fSpatrick 
73d89ec533Spatrick  protected:
~SymbolizerTool()74d89ec533Spatrick   ~SymbolizerTool() {}
753cab2bb3Spatrick };
763cab2bb3Spatrick 
773cab2bb3Spatrick // SymbolizerProcess encapsulates communication between the tool and
783cab2bb3Spatrick // external symbolizer program, running in a different subprocess.
793cab2bb3Spatrick // SymbolizerProcess may not be used from two threads simultaneously.
803cab2bb3Spatrick class SymbolizerProcess {
813cab2bb3Spatrick  public:
823cab2bb3Spatrick   explicit SymbolizerProcess(const char *path, bool use_posix_spawn = false);
833cab2bb3Spatrick   const char *SendCommand(const char *command);
843cab2bb3Spatrick 
853cab2bb3Spatrick  protected:
~SymbolizerProcess()86d89ec533Spatrick   ~SymbolizerProcess() {}
87d89ec533Spatrick 
883cab2bb3Spatrick   /// The maximum number of arguments required to invoke a tool process.
89*810390e3Srobert   static const unsigned kArgVMax = 16;
903cab2bb3Spatrick 
913cab2bb3Spatrick   // Customizable by subclasses.
923cab2bb3Spatrick   virtual bool StartSymbolizerSubprocess();
93*810390e3Srobert   virtual bool ReadFromSymbolizer();
941f9cb04fSpatrick   // Return the environment to run the symbolizer in.
GetEnvP()951f9cb04fSpatrick   virtual char **GetEnvP() { return GetEnviron(); }
GetBuff()96*810390e3Srobert   InternalMmapVector<char> &GetBuff() { return buffer_; }
973cab2bb3Spatrick 
983cab2bb3Spatrick  private:
ReachedEndOfOutput(const char * buffer,uptr length)993cab2bb3Spatrick   virtual bool ReachedEndOfOutput(const char *buffer, uptr length) const {
1003cab2bb3Spatrick     UNIMPLEMENTED();
1013cab2bb3Spatrick   }
1023cab2bb3Spatrick 
1033cab2bb3Spatrick   /// Fill in an argv array to invoke the child process.
GetArgV(const char * path_to_binary,const char * (& argv)[kArgVMax])1043cab2bb3Spatrick   virtual void GetArgV(const char *path_to_binary,
1053cab2bb3Spatrick                        const char *(&argv)[kArgVMax]) const {
1063cab2bb3Spatrick     UNIMPLEMENTED();
1073cab2bb3Spatrick   }
1083cab2bb3Spatrick 
1093cab2bb3Spatrick   bool Restart();
1103cab2bb3Spatrick   const char *SendCommandImpl(const char *command);
1113cab2bb3Spatrick   bool WriteToSymbolizer(const char *buffer, uptr length);
1123cab2bb3Spatrick 
1133cab2bb3Spatrick   const char *path_;
1143cab2bb3Spatrick   fd_t input_fd_;
1153cab2bb3Spatrick   fd_t output_fd_;
1163cab2bb3Spatrick 
117*810390e3Srobert   InternalMmapVector<char> buffer_;
1183cab2bb3Spatrick 
1193cab2bb3Spatrick   static const uptr kMaxTimesRestarted = 5;
1203cab2bb3Spatrick   static const int kSymbolizerStartupTimeMillis = 10;
1213cab2bb3Spatrick   uptr times_restarted_;
1223cab2bb3Spatrick   bool failed_to_start_;
1233cab2bb3Spatrick   bool reported_invalid_path_;
1243cab2bb3Spatrick   bool use_posix_spawn_;
1253cab2bb3Spatrick };
1263cab2bb3Spatrick 
1273cab2bb3Spatrick class LLVMSymbolizerProcess;
1283cab2bb3Spatrick 
1293cab2bb3Spatrick // This tool invokes llvm-symbolizer in a subprocess. It should be as portable
1303cab2bb3Spatrick // as the llvm-symbolizer tool is.
131d89ec533Spatrick class LLVMSymbolizer final : public SymbolizerTool {
1323cab2bb3Spatrick  public:
1333cab2bb3Spatrick   explicit LLVMSymbolizer(const char *path, LowLevelAllocator *allocator);
1343cab2bb3Spatrick 
1353cab2bb3Spatrick   bool SymbolizePC(uptr addr, SymbolizedStack *stack) override;
1363cab2bb3Spatrick   bool SymbolizeData(uptr addr, DataInfo *info) override;
1373cab2bb3Spatrick   bool SymbolizeFrame(uptr addr, FrameInfo *info) override;
1383cab2bb3Spatrick 
1393cab2bb3Spatrick  private:
1403cab2bb3Spatrick   const char *FormatAndSendCommand(const char *command_prefix,
1413cab2bb3Spatrick                                    const char *module_name, uptr module_offset,
1423cab2bb3Spatrick                                    ModuleArch arch);
1433cab2bb3Spatrick 
1443cab2bb3Spatrick   LLVMSymbolizerProcess *symbolizer_process_;
1453cab2bb3Spatrick   static const uptr kBufferSize = 16 * 1024;
1463cab2bb3Spatrick   char buffer_[kBufferSize];
1473cab2bb3Spatrick };
1483cab2bb3Spatrick 
1493cab2bb3Spatrick // Parses one or more two-line strings in the following format:
1503cab2bb3Spatrick //   <function_name>
1513cab2bb3Spatrick //   <file_name>:<line_number>[:<column_number>]
1523cab2bb3Spatrick // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
1533cab2bb3Spatrick // them use the same output format.  Returns true if any useful debug
1543cab2bb3Spatrick // information was found.
1553cab2bb3Spatrick void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res);
1563cab2bb3Spatrick 
1573cab2bb3Spatrick // Parses a two-line string in the following format:
1583cab2bb3Spatrick //   <symbol_name>
1593cab2bb3Spatrick //   <start_address> <size>
1603cab2bb3Spatrick // Used by LLVMSymbolizer and InternalSymbolizer.
1613cab2bb3Spatrick void ParseSymbolizeDataOutput(const char *str, DataInfo *info);
1623cab2bb3Spatrick 
1633cab2bb3Spatrick }  // namespace __sanitizer
1643cab2bb3Spatrick 
1653cab2bb3Spatrick #endif  // SANITIZER_SYMBOLIZER_INTERNAL_H
166