xref: /openbsd-src/gnu/llvm/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp (revision 810390e339a5425391477d5d41c78d7cab2424ac)
13cab2bb3Spatrick //===-- sanitizer_symbolizer_libcdep.cpp ----------------------------------===//
23cab2bb3Spatrick //
33cab2bb3Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
43cab2bb3Spatrick // See https://llvm.org/LICENSE.txt for license information.
53cab2bb3Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
63cab2bb3Spatrick //
73cab2bb3Spatrick //===----------------------------------------------------------------------===//
83cab2bb3Spatrick //
93cab2bb3Spatrick // This file is shared between AddressSanitizer and ThreadSanitizer
103cab2bb3Spatrick // run-time libraries.
113cab2bb3Spatrick //===----------------------------------------------------------------------===//
123cab2bb3Spatrick 
133cab2bb3Spatrick #include "sanitizer_allocator_internal.h"
143cab2bb3Spatrick #include "sanitizer_internal_defs.h"
15d89ec533Spatrick #include "sanitizer_platform.h"
163cab2bb3Spatrick #include "sanitizer_symbolizer_internal.h"
173cab2bb3Spatrick 
183cab2bb3Spatrick namespace __sanitizer {
193cab2bb3Spatrick 
GetOrInit()203cab2bb3Spatrick Symbolizer *Symbolizer::GetOrInit() {
213cab2bb3Spatrick   SpinMutexLock l(&init_mu_);
223cab2bb3Spatrick   if (symbolizer_)
233cab2bb3Spatrick     return symbolizer_;
243cab2bb3Spatrick   symbolizer_ = PlatformInit();
253cab2bb3Spatrick   CHECK(symbolizer_);
263cab2bb3Spatrick   return symbolizer_;
273cab2bb3Spatrick }
283cab2bb3Spatrick 
293cab2bb3Spatrick // See sanitizer_symbolizer_markup.cpp.
303cab2bb3Spatrick #if !SANITIZER_SYMBOLIZER_MARKUP
313cab2bb3Spatrick 
ExtractToken(const char * str,const char * delims,char ** result)323cab2bb3Spatrick const char *ExtractToken(const char *str, const char *delims, char **result) {
333cab2bb3Spatrick   uptr prefix_len = internal_strcspn(str, delims);
343cab2bb3Spatrick   *result = (char*)InternalAlloc(prefix_len + 1);
353cab2bb3Spatrick   internal_memcpy(*result, str, prefix_len);
363cab2bb3Spatrick   (*result)[prefix_len] = '\0';
373cab2bb3Spatrick   const char *prefix_end = str + prefix_len;
383cab2bb3Spatrick   if (*prefix_end != '\0') prefix_end++;
393cab2bb3Spatrick   return prefix_end;
403cab2bb3Spatrick }
413cab2bb3Spatrick 
ExtractInt(const char * str,const char * delims,int * result)423cab2bb3Spatrick const char *ExtractInt(const char *str, const char *delims, int *result) {
431f9cb04fSpatrick   char *buff = nullptr;
443cab2bb3Spatrick   const char *ret = ExtractToken(str, delims, &buff);
451f9cb04fSpatrick   if (buff) {
463cab2bb3Spatrick     *result = (int)internal_atoll(buff);
473cab2bb3Spatrick   }
483cab2bb3Spatrick   InternalFree(buff);
493cab2bb3Spatrick   return ret;
503cab2bb3Spatrick }
513cab2bb3Spatrick 
ExtractUptr(const char * str,const char * delims,uptr * result)523cab2bb3Spatrick const char *ExtractUptr(const char *str, const char *delims, uptr *result) {
531f9cb04fSpatrick   char *buff = nullptr;
543cab2bb3Spatrick   const char *ret = ExtractToken(str, delims, &buff);
551f9cb04fSpatrick   if (buff) {
563cab2bb3Spatrick     *result = (uptr)internal_atoll(buff);
573cab2bb3Spatrick   }
583cab2bb3Spatrick   InternalFree(buff);
593cab2bb3Spatrick   return ret;
603cab2bb3Spatrick }
613cab2bb3Spatrick 
ExtractSptr(const char * str,const char * delims,sptr * result)623cab2bb3Spatrick const char *ExtractSptr(const char *str, const char *delims, sptr *result) {
631f9cb04fSpatrick   char *buff = nullptr;
643cab2bb3Spatrick   const char *ret = ExtractToken(str, delims, &buff);
651f9cb04fSpatrick   if (buff) {
663cab2bb3Spatrick     *result = (sptr)internal_atoll(buff);
673cab2bb3Spatrick   }
683cab2bb3Spatrick   InternalFree(buff);
693cab2bb3Spatrick   return ret;
703cab2bb3Spatrick }
713cab2bb3Spatrick 
ExtractTokenUpToDelimiter(const char * str,const char * delimiter,char ** result)723cab2bb3Spatrick const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter,
733cab2bb3Spatrick                                       char **result) {
743cab2bb3Spatrick   const char *found_delimiter = internal_strstr(str, delimiter);
753cab2bb3Spatrick   uptr prefix_len =
763cab2bb3Spatrick       found_delimiter ? found_delimiter - str : internal_strlen(str);
773cab2bb3Spatrick   *result = (char *)InternalAlloc(prefix_len + 1);
783cab2bb3Spatrick   internal_memcpy(*result, str, prefix_len);
793cab2bb3Spatrick   (*result)[prefix_len] = '\0';
803cab2bb3Spatrick   const char *prefix_end = str + prefix_len;
813cab2bb3Spatrick   if (*prefix_end != '\0') prefix_end += internal_strlen(delimiter);
823cab2bb3Spatrick   return prefix_end;
833cab2bb3Spatrick }
843cab2bb3Spatrick 
SymbolizePC(uptr addr)853cab2bb3Spatrick SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
86*810390e3Srobert   Lock l(&mu_);
873cab2bb3Spatrick   SymbolizedStack *res = SymbolizedStack::New(addr);
88*810390e3Srobert   auto *mod = FindModuleForAddress(addr);
89*810390e3Srobert   if (!mod)
903cab2bb3Spatrick     return res;
913cab2bb3Spatrick   // Always fill data about module name and offset.
92*810390e3Srobert   res->info.FillModuleInfo(*mod);
933cab2bb3Spatrick   for (auto &tool : tools_) {
943cab2bb3Spatrick     SymbolizerScope sym_scope(this);
953cab2bb3Spatrick     if (tool.SymbolizePC(addr, res)) {
963cab2bb3Spatrick       return res;
973cab2bb3Spatrick     }
983cab2bb3Spatrick   }
993cab2bb3Spatrick   return res;
1003cab2bb3Spatrick }
1013cab2bb3Spatrick 
SymbolizeData(uptr addr,DataInfo * info)1023cab2bb3Spatrick bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
103*810390e3Srobert   Lock l(&mu_);
1041f9cb04fSpatrick   const char *module_name = nullptr;
1053cab2bb3Spatrick   uptr module_offset;
1063cab2bb3Spatrick   ModuleArch arch;
1073cab2bb3Spatrick   if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset,
1083cab2bb3Spatrick                                          &arch))
1093cab2bb3Spatrick     return false;
1103cab2bb3Spatrick   info->Clear();
1113cab2bb3Spatrick   info->module = internal_strdup(module_name);
1123cab2bb3Spatrick   info->module_offset = module_offset;
1133cab2bb3Spatrick   info->module_arch = arch;
1143cab2bb3Spatrick   for (auto &tool : tools_) {
1153cab2bb3Spatrick     SymbolizerScope sym_scope(this);
1163cab2bb3Spatrick     if (tool.SymbolizeData(addr, info)) {
1173cab2bb3Spatrick       return true;
1183cab2bb3Spatrick     }
1193cab2bb3Spatrick   }
1203cab2bb3Spatrick   return true;
1213cab2bb3Spatrick }
1223cab2bb3Spatrick 
SymbolizeFrame(uptr addr,FrameInfo * info)1233cab2bb3Spatrick bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
124*810390e3Srobert   Lock l(&mu_);
1251f9cb04fSpatrick   const char *module_name = nullptr;
1263cab2bb3Spatrick   if (!FindModuleNameAndOffsetForAddress(
1273cab2bb3Spatrick           addr, &module_name, &info->module_offset, &info->module_arch))
1283cab2bb3Spatrick     return false;
1293cab2bb3Spatrick   info->module = internal_strdup(module_name);
1303cab2bb3Spatrick   for (auto &tool : tools_) {
1313cab2bb3Spatrick     SymbolizerScope sym_scope(this);
1323cab2bb3Spatrick     if (tool.SymbolizeFrame(addr, info)) {
1333cab2bb3Spatrick       return true;
1343cab2bb3Spatrick     }
1353cab2bb3Spatrick   }
1363cab2bb3Spatrick   return true;
1373cab2bb3Spatrick }
1383cab2bb3Spatrick 
GetModuleNameAndOffsetForPC(uptr pc,const char ** module_name,uptr * module_address)1393cab2bb3Spatrick bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
1403cab2bb3Spatrick                                              uptr *module_address) {
141*810390e3Srobert   Lock l(&mu_);
1423cab2bb3Spatrick   const char *internal_module_name = nullptr;
1433cab2bb3Spatrick   ModuleArch arch;
1443cab2bb3Spatrick   if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name,
1453cab2bb3Spatrick                                          module_address, &arch))
1463cab2bb3Spatrick     return false;
1473cab2bb3Spatrick 
1483cab2bb3Spatrick   if (module_name)
1493cab2bb3Spatrick     *module_name = module_names_.GetOwnedCopy(internal_module_name);
1503cab2bb3Spatrick   return true;
1513cab2bb3Spatrick }
1523cab2bb3Spatrick 
Flush()1533cab2bb3Spatrick void Symbolizer::Flush() {
154*810390e3Srobert   Lock l(&mu_);
1553cab2bb3Spatrick   for (auto &tool : tools_) {
1563cab2bb3Spatrick     SymbolizerScope sym_scope(this);
1573cab2bb3Spatrick     tool.Flush();
1583cab2bb3Spatrick   }
1593cab2bb3Spatrick }
1603cab2bb3Spatrick 
Demangle(const char * name)1613cab2bb3Spatrick const char *Symbolizer::Demangle(const char *name) {
162*810390e3Srobert   Lock l(&mu_);
1633cab2bb3Spatrick   for (auto &tool : tools_) {
1643cab2bb3Spatrick     SymbolizerScope sym_scope(this);
1653cab2bb3Spatrick     if (const char *demangled = tool.Demangle(name))
1663cab2bb3Spatrick       return demangled;
1673cab2bb3Spatrick   }
1683cab2bb3Spatrick   return PlatformDemangle(name);
1693cab2bb3Spatrick }
1703cab2bb3Spatrick 
FindModuleNameAndOffsetForAddress(uptr address,const char ** module_name,uptr * module_offset,ModuleArch * module_arch)1713cab2bb3Spatrick bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address,
1723cab2bb3Spatrick                                                    const char **module_name,
1733cab2bb3Spatrick                                                    uptr *module_offset,
1743cab2bb3Spatrick                                                    ModuleArch *module_arch) {
1753cab2bb3Spatrick   const LoadedModule *module = FindModuleForAddress(address);
1761f9cb04fSpatrick   if (!module)
1773cab2bb3Spatrick     return false;
1783cab2bb3Spatrick   *module_name = module->full_name();
1793cab2bb3Spatrick   *module_offset = address - module->base_address();
1803cab2bb3Spatrick   *module_arch = module->arch();
1813cab2bb3Spatrick   return true;
1823cab2bb3Spatrick }
1833cab2bb3Spatrick 
RefreshModules()1843cab2bb3Spatrick void Symbolizer::RefreshModules() {
1853cab2bb3Spatrick   modules_.init();
1863cab2bb3Spatrick   fallback_modules_.fallbackInit();
1873cab2bb3Spatrick   RAW_CHECK(modules_.size() > 0);
1883cab2bb3Spatrick   modules_fresh_ = true;
1893cab2bb3Spatrick }
1903cab2bb3Spatrick 
SearchForModule(const ListOfModules & modules,uptr address)1913cab2bb3Spatrick static const LoadedModule *SearchForModule(const ListOfModules &modules,
1923cab2bb3Spatrick                                            uptr address) {
1933cab2bb3Spatrick   for (uptr i = 0; i < modules.size(); i++) {
1943cab2bb3Spatrick     if (modules[i].containsAddress(address)) {
1953cab2bb3Spatrick       return &modules[i];
1963cab2bb3Spatrick     }
1973cab2bb3Spatrick   }
1983cab2bb3Spatrick   return nullptr;
1993cab2bb3Spatrick }
2003cab2bb3Spatrick 
FindModuleForAddress(uptr address)2013cab2bb3Spatrick const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) {
2023cab2bb3Spatrick   bool modules_were_reloaded = false;
2033cab2bb3Spatrick   if (!modules_fresh_) {
2043cab2bb3Spatrick     RefreshModules();
2053cab2bb3Spatrick     modules_were_reloaded = true;
2063cab2bb3Spatrick   }
2073cab2bb3Spatrick   const LoadedModule *module = SearchForModule(modules_, address);
2083cab2bb3Spatrick   if (module) return module;
2093cab2bb3Spatrick 
2103cab2bb3Spatrick   // dlopen/dlclose interceptors invalidate the module list, but when
2113cab2bb3Spatrick   // interception is disabled, we need to retry if the lookup fails in
2123cab2bb3Spatrick   // case the module list changed.
2133cab2bb3Spatrick #if !SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
2143cab2bb3Spatrick   if (!modules_were_reloaded) {
2153cab2bb3Spatrick     RefreshModules();
2163cab2bb3Spatrick     module = SearchForModule(modules_, address);
2173cab2bb3Spatrick     if (module) return module;
2183cab2bb3Spatrick   }
2193cab2bb3Spatrick #endif
2203cab2bb3Spatrick 
2213cab2bb3Spatrick   if (fallback_modules_.size()) {
2223cab2bb3Spatrick     module = SearchForModule(fallback_modules_, address);
2233cab2bb3Spatrick   }
2243cab2bb3Spatrick   return module;
2253cab2bb3Spatrick }
2263cab2bb3Spatrick 
2273cab2bb3Spatrick // For now we assume the following protocol:
2283cab2bb3Spatrick // For each request of the form
2293cab2bb3Spatrick //   <module_name> <module_offset>
2303cab2bb3Spatrick // passed to STDIN, external symbolizer prints to STDOUT response:
2313cab2bb3Spatrick //   <function_name>
2323cab2bb3Spatrick //   <file_name>:<line_number>:<column_number>
2333cab2bb3Spatrick //   <function_name>
2343cab2bb3Spatrick //   <file_name>:<line_number>:<column_number>
2353cab2bb3Spatrick //   ...
2363cab2bb3Spatrick //   <empty line>
237d89ec533Spatrick class LLVMSymbolizerProcess final : public SymbolizerProcess {
2383cab2bb3Spatrick  public:
LLVMSymbolizerProcess(const char * path)2393cab2bb3Spatrick   explicit LLVMSymbolizerProcess(const char *path)
240*810390e3Srobert       : SymbolizerProcess(path, /*use_posix_spawn=*/SANITIZER_APPLE) {}
2413cab2bb3Spatrick 
2423cab2bb3Spatrick  private:
ReachedEndOfOutput(const char * buffer,uptr length) const2433cab2bb3Spatrick   bool ReachedEndOfOutput(const char *buffer, uptr length) const override {
2443cab2bb3Spatrick     // Empty line marks the end of llvm-symbolizer output.
2453cab2bb3Spatrick     return length >= 2 && buffer[length - 1] == '\n' &&
2463cab2bb3Spatrick            buffer[length - 2] == '\n';
2473cab2bb3Spatrick   }
2483cab2bb3Spatrick 
2493cab2bb3Spatrick   // When adding a new architecture, don't forget to also update
2503cab2bb3Spatrick   // script/asan_symbolize.py and sanitizer_common.h.
GetArgV(const char * path_to_binary,const char * (& argv)[kArgVMax]) const2513cab2bb3Spatrick   void GetArgV(const char *path_to_binary,
2523cab2bb3Spatrick                const char *(&argv)[kArgVMax]) const override {
2533cab2bb3Spatrick #if defined(__x86_64h__)
2543cab2bb3Spatrick     const char* const kSymbolizerArch = "--default-arch=x86_64h";
2553cab2bb3Spatrick #elif defined(__x86_64__)
2563cab2bb3Spatrick     const char* const kSymbolizerArch = "--default-arch=x86_64";
2573cab2bb3Spatrick #elif defined(__i386__)
2583cab2bb3Spatrick     const char* const kSymbolizerArch = "--default-arch=i386";
259*810390e3Srobert #elif SANITIZER_LOONGARCH64
260*810390e3Srobert     const char *const kSymbolizerArch = "--default-arch=loongarch64";
261d89ec533Spatrick #elif SANITIZER_RISCV64
262d89ec533Spatrick     const char *const kSymbolizerArch = "--default-arch=riscv64";
2633cab2bb3Spatrick #elif defined(__aarch64__)
2643cab2bb3Spatrick     const char* const kSymbolizerArch = "--default-arch=arm64";
2653cab2bb3Spatrick #elif defined(__arm__)
2663cab2bb3Spatrick     const char* const kSymbolizerArch = "--default-arch=arm";
2673cab2bb3Spatrick #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
2683cab2bb3Spatrick     const char* const kSymbolizerArch = "--default-arch=powerpc64";
2693cab2bb3Spatrick #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
2703cab2bb3Spatrick     const char* const kSymbolizerArch = "--default-arch=powerpc64le";
2713cab2bb3Spatrick #elif defined(__s390x__)
2723cab2bb3Spatrick     const char* const kSymbolizerArch = "--default-arch=s390x";
2733cab2bb3Spatrick #elif defined(__s390__)
2743cab2bb3Spatrick     const char* const kSymbolizerArch = "--default-arch=s390";
2753cab2bb3Spatrick #else
2763cab2bb3Spatrick     const char* const kSymbolizerArch = "--default-arch=unknown";
2773cab2bb3Spatrick #endif
2783cab2bb3Spatrick 
279*810390e3Srobert     const char *const demangle_flag =
280*810390e3Srobert         common_flags()->demangle ? "--demangle" : "--no-demangle";
281*810390e3Srobert     const char *const inline_flag =
282*810390e3Srobert         common_flags()->symbolize_inline_frames ? "--inlines" : "--no-inlines";
2833cab2bb3Spatrick     int i = 0;
2843cab2bb3Spatrick     argv[i++] = path_to_binary;
285*810390e3Srobert     argv[i++] = demangle_flag;
2863cab2bb3Spatrick     argv[i++] = inline_flag;
2873cab2bb3Spatrick     argv[i++] = kSymbolizerArch;
2883cab2bb3Spatrick     argv[i++] = nullptr;
289*810390e3Srobert     CHECK_LE(i, kArgVMax);
2903cab2bb3Spatrick   }
2913cab2bb3Spatrick };
2923cab2bb3Spatrick 
LLVMSymbolizer(const char * path,LowLevelAllocator * allocator)2933cab2bb3Spatrick LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator)
2943cab2bb3Spatrick     : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {}
2953cab2bb3Spatrick 
2963cab2bb3Spatrick // Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on
2973cab2bb3Spatrick // Windows, so extract tokens from the right hand side first. The column info is
2983cab2bb3Spatrick // also optional.
ParseFileLineInfo(AddressInfo * info,const char * str)2993cab2bb3Spatrick static const char *ParseFileLineInfo(AddressInfo *info, const char *str) {
3001f9cb04fSpatrick   char *file_line_info = nullptr;
3013cab2bb3Spatrick   str = ExtractToken(str, "\n", &file_line_info);
3023cab2bb3Spatrick   CHECK(file_line_info);
3033cab2bb3Spatrick 
3043cab2bb3Spatrick   if (uptr size = internal_strlen(file_line_info)) {
3053cab2bb3Spatrick     char *back = file_line_info + size - 1;
3063cab2bb3Spatrick     for (int i = 0; i < 2; ++i) {
3073cab2bb3Spatrick       while (back > file_line_info && IsDigit(*back)) --back;
3083cab2bb3Spatrick       if (*back != ':' || !IsDigit(back[1])) break;
3093cab2bb3Spatrick       info->column = info->line;
3103cab2bb3Spatrick       info->line = internal_atoll(back + 1);
3113cab2bb3Spatrick       // Truncate the string at the colon to keep only filename.
3123cab2bb3Spatrick       *back = '\0';
3133cab2bb3Spatrick       --back;
3143cab2bb3Spatrick     }
3153cab2bb3Spatrick     ExtractToken(file_line_info, "", &info->file);
3163cab2bb3Spatrick   }
3173cab2bb3Spatrick 
3183cab2bb3Spatrick   InternalFree(file_line_info);
3193cab2bb3Spatrick   return str;
3203cab2bb3Spatrick }
3213cab2bb3Spatrick 
3223cab2bb3Spatrick // Parses one or more two-line strings in the following format:
3233cab2bb3Spatrick //   <function_name>
3243cab2bb3Spatrick //   <file_name>:<line_number>[:<column_number>]
3253cab2bb3Spatrick // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
3263cab2bb3Spatrick // them use the same output format.
ParseSymbolizePCOutput(const char * str,SymbolizedStack * res)3273cab2bb3Spatrick void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) {
3283cab2bb3Spatrick   bool top_frame = true;
3293cab2bb3Spatrick   SymbolizedStack *last = res;
3303cab2bb3Spatrick   while (true) {
3311f9cb04fSpatrick     char *function_name = nullptr;
3323cab2bb3Spatrick     str = ExtractToken(str, "\n", &function_name);
3333cab2bb3Spatrick     CHECK(function_name);
3343cab2bb3Spatrick     if (function_name[0] == '\0') {
3353cab2bb3Spatrick       // There are no more frames.
3363cab2bb3Spatrick       InternalFree(function_name);
3373cab2bb3Spatrick       break;
3383cab2bb3Spatrick     }
3393cab2bb3Spatrick     SymbolizedStack *cur;
3403cab2bb3Spatrick     if (top_frame) {
3413cab2bb3Spatrick       cur = res;
3423cab2bb3Spatrick       top_frame = false;
3433cab2bb3Spatrick     } else {
3443cab2bb3Spatrick       cur = SymbolizedStack::New(res->info.address);
3453cab2bb3Spatrick       cur->info.FillModuleInfo(res->info.module, res->info.module_offset,
3463cab2bb3Spatrick                                res->info.module_arch);
3473cab2bb3Spatrick       last->next = cur;
3483cab2bb3Spatrick       last = cur;
3493cab2bb3Spatrick     }
3503cab2bb3Spatrick 
3513cab2bb3Spatrick     AddressInfo *info = &cur->info;
3523cab2bb3Spatrick     info->function = function_name;
3533cab2bb3Spatrick     str = ParseFileLineInfo(info, str);
3543cab2bb3Spatrick 
3553cab2bb3Spatrick     // Functions and filenames can be "??", in which case we write 0
3563cab2bb3Spatrick     // to address info to mark that names are unknown.
3573cab2bb3Spatrick     if (0 == internal_strcmp(info->function, "??")) {
3583cab2bb3Spatrick       InternalFree(info->function);
3593cab2bb3Spatrick       info->function = 0;
3603cab2bb3Spatrick     }
361d89ec533Spatrick     if (info->file && 0 == internal_strcmp(info->file, "??")) {
3623cab2bb3Spatrick       InternalFree(info->file);
3633cab2bb3Spatrick       info->file = 0;
3643cab2bb3Spatrick     }
3653cab2bb3Spatrick   }
3663cab2bb3Spatrick }
3673cab2bb3Spatrick 
368*810390e3Srobert // Parses a two- or three-line string in the following format:
3693cab2bb3Spatrick //   <symbol_name>
3703cab2bb3Spatrick //   <start_address> <size>
371*810390e3Srobert //   <filename>:<column>
372*810390e3Srobert // Used by LLVMSymbolizer and InternalSymbolizer. LLVMSymbolizer added support
373*810390e3Srobert // for symbolizing the third line in D123538, but we support the older two-line
374*810390e3Srobert // information as well.
ParseSymbolizeDataOutput(const char * str,DataInfo * info)3753cab2bb3Spatrick void ParseSymbolizeDataOutput(const char *str, DataInfo *info) {
3763cab2bb3Spatrick   str = ExtractToken(str, "\n", &info->name);
3773cab2bb3Spatrick   str = ExtractUptr(str, " ", &info->start);
3783cab2bb3Spatrick   str = ExtractUptr(str, "\n", &info->size);
379*810390e3Srobert   // Note: If the third line isn't present, these calls will set info.{file,
380*810390e3Srobert   // line} to empty strings.
381*810390e3Srobert   str = ExtractToken(str, ":", &info->file);
382*810390e3Srobert   str = ExtractUptr(str, "\n", &info->line);
3833cab2bb3Spatrick }
3843cab2bb3Spatrick 
ParseSymbolizeFrameOutput(const char * str,InternalMmapVector<LocalInfo> * locals)3853cab2bb3Spatrick static void ParseSymbolizeFrameOutput(const char *str,
3863cab2bb3Spatrick                                       InternalMmapVector<LocalInfo> *locals) {
3873cab2bb3Spatrick   if (internal_strncmp(str, "??", 2) == 0)
3883cab2bb3Spatrick     return;
3893cab2bb3Spatrick 
3903cab2bb3Spatrick   while (*str) {
3913cab2bb3Spatrick     LocalInfo local;
3923cab2bb3Spatrick     str = ExtractToken(str, "\n", &local.function_name);
3933cab2bb3Spatrick     str = ExtractToken(str, "\n", &local.name);
3943cab2bb3Spatrick 
3953cab2bb3Spatrick     AddressInfo addr;
3963cab2bb3Spatrick     str = ParseFileLineInfo(&addr, str);
3973cab2bb3Spatrick     local.decl_file = addr.file;
3983cab2bb3Spatrick     local.decl_line = addr.line;
3993cab2bb3Spatrick 
4003cab2bb3Spatrick     local.has_frame_offset = internal_strncmp(str, "??", 2) != 0;
4013cab2bb3Spatrick     str = ExtractSptr(str, " ", &local.frame_offset);
4023cab2bb3Spatrick 
4033cab2bb3Spatrick     local.has_size = internal_strncmp(str, "??", 2) != 0;
4043cab2bb3Spatrick     str = ExtractUptr(str, " ", &local.size);
4053cab2bb3Spatrick 
4063cab2bb3Spatrick     local.has_tag_offset = internal_strncmp(str, "??", 2) != 0;
4073cab2bb3Spatrick     str = ExtractUptr(str, "\n", &local.tag_offset);
4083cab2bb3Spatrick 
4093cab2bb3Spatrick     locals->push_back(local);
4103cab2bb3Spatrick   }
4113cab2bb3Spatrick }
4123cab2bb3Spatrick 
SymbolizePC(uptr addr,SymbolizedStack * stack)4133cab2bb3Spatrick bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
4143cab2bb3Spatrick   AddressInfo *info = &stack->info;
4153cab2bb3Spatrick   const char *buf = FormatAndSendCommand(
4163cab2bb3Spatrick       "CODE", info->module, info->module_offset, info->module_arch);
4171f9cb04fSpatrick   if (!buf)
4181f9cb04fSpatrick     return false;
4193cab2bb3Spatrick   ParseSymbolizePCOutput(buf, stack);
4203cab2bb3Spatrick   return true;
4213cab2bb3Spatrick }
4223cab2bb3Spatrick 
SymbolizeData(uptr addr,DataInfo * info)4233cab2bb3Spatrick bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
4243cab2bb3Spatrick   const char *buf = FormatAndSendCommand(
4253cab2bb3Spatrick       "DATA", info->module, info->module_offset, info->module_arch);
4261f9cb04fSpatrick   if (!buf)
4271f9cb04fSpatrick     return false;
4283cab2bb3Spatrick   ParseSymbolizeDataOutput(buf, info);
4293cab2bb3Spatrick   info->start += (addr - info->module_offset); // Add the base address.
4303cab2bb3Spatrick   return true;
4313cab2bb3Spatrick }
4323cab2bb3Spatrick 
SymbolizeFrame(uptr addr,FrameInfo * info)4333cab2bb3Spatrick bool LLVMSymbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
4343cab2bb3Spatrick   const char *buf = FormatAndSendCommand(
4353cab2bb3Spatrick       "FRAME", info->module, info->module_offset, info->module_arch);
4361f9cb04fSpatrick   if (!buf)
4371f9cb04fSpatrick     return false;
4383cab2bb3Spatrick   ParseSymbolizeFrameOutput(buf, &info->locals);
4393cab2bb3Spatrick   return true;
4403cab2bb3Spatrick }
4413cab2bb3Spatrick 
FormatAndSendCommand(const char * command_prefix,const char * module_name,uptr module_offset,ModuleArch arch)4423cab2bb3Spatrick const char *LLVMSymbolizer::FormatAndSendCommand(const char *command_prefix,
4433cab2bb3Spatrick                                                  const char *module_name,
4443cab2bb3Spatrick                                                  uptr module_offset,
4453cab2bb3Spatrick                                                  ModuleArch arch) {
4463cab2bb3Spatrick   CHECK(module_name);
4471f9cb04fSpatrick   int size_needed = 0;
4481f9cb04fSpatrick   if (arch == kModuleArchUnknown)
4491f9cb04fSpatrick     size_needed = internal_snprintf(buffer_, kBufferSize, "%s \"%s\" 0x%zx\n",
4501f9cb04fSpatrick                                     command_prefix, module_name, module_offset);
4511f9cb04fSpatrick   else
4521f9cb04fSpatrick     size_needed = internal_snprintf(buffer_, kBufferSize,
4531f9cb04fSpatrick                                     "%s \"%s:%s\" 0x%zx\n", command_prefix,
4541f9cb04fSpatrick                                     module_name, ModuleArchToString(arch),
4551f9cb04fSpatrick                                     module_offset);
4561f9cb04fSpatrick 
4571f9cb04fSpatrick   if (size_needed >= static_cast<int>(kBufferSize)) {
4583cab2bb3Spatrick     Report("WARNING: Command buffer too small");
4593cab2bb3Spatrick     return nullptr;
4603cab2bb3Spatrick   }
4611f9cb04fSpatrick 
4623cab2bb3Spatrick   return symbolizer_process_->SendCommand(buffer_);
4633cab2bb3Spatrick }
4643cab2bb3Spatrick 
SymbolizerProcess(const char * path,bool use_posix_spawn)4653cab2bb3Spatrick SymbolizerProcess::SymbolizerProcess(const char *path, bool use_posix_spawn)
4663cab2bb3Spatrick     : path_(path),
4673cab2bb3Spatrick       input_fd_(kInvalidFd),
4683cab2bb3Spatrick       output_fd_(kInvalidFd),
4693cab2bb3Spatrick       times_restarted_(0),
4703cab2bb3Spatrick       failed_to_start_(false),
4713cab2bb3Spatrick       reported_invalid_path_(false),
4723cab2bb3Spatrick       use_posix_spawn_(use_posix_spawn) {
4733cab2bb3Spatrick   CHECK(path_);
4743cab2bb3Spatrick   CHECK_NE(path_[0], '\0');
4753cab2bb3Spatrick }
4763cab2bb3Spatrick 
IsSameModule(const char * path)4773cab2bb3Spatrick static bool IsSameModule(const char* path) {
4783cab2bb3Spatrick   if (const char* ProcessName = GetProcessName()) {
4793cab2bb3Spatrick     if (const char* SymbolizerName = StripModuleName(path)) {
4803cab2bb3Spatrick       return !internal_strcmp(ProcessName, SymbolizerName);
4813cab2bb3Spatrick     }
4823cab2bb3Spatrick   }
4833cab2bb3Spatrick   return false;
4843cab2bb3Spatrick }
4853cab2bb3Spatrick 
SendCommand(const char * command)4863cab2bb3Spatrick const char *SymbolizerProcess::SendCommand(const char *command) {
4873cab2bb3Spatrick   if (failed_to_start_)
4883cab2bb3Spatrick     return nullptr;
4893cab2bb3Spatrick   if (IsSameModule(path_)) {
4903cab2bb3Spatrick     Report("WARNING: Symbolizer was blocked from starting itself!\n");
4913cab2bb3Spatrick     failed_to_start_ = true;
4923cab2bb3Spatrick     return nullptr;
4933cab2bb3Spatrick   }
4943cab2bb3Spatrick   for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) {
4953cab2bb3Spatrick     // Start or restart symbolizer if we failed to send command to it.
4963cab2bb3Spatrick     if (const char *res = SendCommandImpl(command))
4973cab2bb3Spatrick       return res;
4983cab2bb3Spatrick     Restart();
4993cab2bb3Spatrick   }
5003cab2bb3Spatrick   if (!failed_to_start_) {
5013cab2bb3Spatrick     Report("WARNING: Failed to use and restart external symbolizer!\n");
5023cab2bb3Spatrick     failed_to_start_ = true;
5033cab2bb3Spatrick   }
5041f9cb04fSpatrick   return nullptr;
5053cab2bb3Spatrick }
5063cab2bb3Spatrick 
SendCommandImpl(const char * command)5073cab2bb3Spatrick const char *SymbolizerProcess::SendCommandImpl(const char *command) {
5083cab2bb3Spatrick   if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd)
5091f9cb04fSpatrick       return nullptr;
5103cab2bb3Spatrick   if (!WriteToSymbolizer(command, internal_strlen(command)))
5111f9cb04fSpatrick       return nullptr;
512*810390e3Srobert   if (!ReadFromSymbolizer())
5131f9cb04fSpatrick     return nullptr;
514*810390e3Srobert   return buffer_.data();
5153cab2bb3Spatrick }
5163cab2bb3Spatrick 
Restart()5173cab2bb3Spatrick bool SymbolizerProcess::Restart() {
5183cab2bb3Spatrick   if (input_fd_ != kInvalidFd)
5193cab2bb3Spatrick     CloseFile(input_fd_);
5203cab2bb3Spatrick   if (output_fd_ != kInvalidFd)
5213cab2bb3Spatrick     CloseFile(output_fd_);
5223cab2bb3Spatrick   return StartSymbolizerSubprocess();
5233cab2bb3Spatrick }
5243cab2bb3Spatrick 
ReadFromSymbolizer()525*810390e3Srobert bool SymbolizerProcess::ReadFromSymbolizer() {
526*810390e3Srobert   buffer_.clear();
527*810390e3Srobert   constexpr uptr max_length = 1024;
528*810390e3Srobert   bool ret = true;
529*810390e3Srobert   do {
5303cab2bb3Spatrick     uptr just_read = 0;
531*810390e3Srobert     uptr size_before = buffer_.size();
532*810390e3Srobert     buffer_.resize(size_before + max_length);
533*810390e3Srobert     buffer_.resize(buffer_.capacity());
534*810390e3Srobert     bool ret = ReadFromFile(input_fd_, &buffer_[size_before],
535*810390e3Srobert                             buffer_.size() - size_before, &just_read);
536*810390e3Srobert 
537*810390e3Srobert     if (!ret)
538*810390e3Srobert       just_read = 0;
539*810390e3Srobert 
540*810390e3Srobert     buffer_.resize(size_before + just_read);
541*810390e3Srobert 
5423cab2bb3Spatrick     // We can't read 0 bytes, as we don't expect external symbolizer to close
5433cab2bb3Spatrick     // its stdout.
544*810390e3Srobert     if (just_read == 0) {
5453cab2bb3Spatrick       Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
546*810390e3Srobert       ret = false;
5473cab2bb3Spatrick       break;
5483cab2bb3Spatrick     }
549*810390e3Srobert   } while (!ReachedEndOfOutput(buffer_.data(), buffer_.size()));
550*810390e3Srobert   buffer_.push_back('\0');
551*810390e3Srobert   return ret;
5523cab2bb3Spatrick }
5533cab2bb3Spatrick 
WriteToSymbolizer(const char * buffer,uptr length)5543cab2bb3Spatrick bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) {
5553cab2bb3Spatrick   if (length == 0)
5563cab2bb3Spatrick     return true;
5573cab2bb3Spatrick   uptr write_len = 0;
5583cab2bb3Spatrick   bool success = WriteToFile(output_fd_, buffer, length, &write_len);
5593cab2bb3Spatrick   if (!success || write_len != length) {
5603cab2bb3Spatrick     Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
5613cab2bb3Spatrick     return false;
5623cab2bb3Spatrick   }
5633cab2bb3Spatrick   return true;
5643cab2bb3Spatrick }
5653cab2bb3Spatrick 
5663cab2bb3Spatrick #endif  // !SANITIZER_SYMBOLIZER_MARKUP
5673cab2bb3Spatrick 
5683cab2bb3Spatrick }  // namespace __sanitizer
569