168d75effSDimitry Andric //===-- sanitizer_symbolizer_libcdep.cpp ----------------------------------===//
268d75effSDimitry Andric //
368d75effSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
468d75effSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
568d75effSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
668d75effSDimitry Andric //
768d75effSDimitry Andric //===----------------------------------------------------------------------===//
868d75effSDimitry Andric //
968d75effSDimitry Andric // This file is shared between AddressSanitizer and ThreadSanitizer
1068d75effSDimitry Andric // run-time libraries.
1168d75effSDimitry Andric //===----------------------------------------------------------------------===//
1268d75effSDimitry Andric
1368d75effSDimitry Andric #include "sanitizer_allocator_internal.h"
1468d75effSDimitry Andric #include "sanitizer_internal_defs.h"
15e8d8bef9SDimitry Andric #include "sanitizer_platform.h"
1668d75effSDimitry Andric #include "sanitizer_symbolizer_internal.h"
1768d75effSDimitry Andric
1868d75effSDimitry Andric namespace __sanitizer {
1968d75effSDimitry Andric
GetOrInit()2068d75effSDimitry Andric Symbolizer *Symbolizer::GetOrInit() {
2168d75effSDimitry Andric SpinMutexLock l(&init_mu_);
2268d75effSDimitry Andric if (symbolizer_)
2368d75effSDimitry Andric return symbolizer_;
2468d75effSDimitry Andric symbolizer_ = PlatformInit();
2568d75effSDimitry Andric CHECK(symbolizer_);
2668d75effSDimitry Andric return symbolizer_;
2768d75effSDimitry Andric }
2868d75effSDimitry Andric
2968d75effSDimitry Andric // See sanitizer_symbolizer_markup.cpp.
3068d75effSDimitry Andric #if !SANITIZER_SYMBOLIZER_MARKUP
3168d75effSDimitry Andric
ExtractToken(const char * str,const char * delims,char ** result)3268d75effSDimitry Andric const char *ExtractToken(const char *str, const char *delims, char **result) {
3368d75effSDimitry Andric uptr prefix_len = internal_strcspn(str, delims);
3468d75effSDimitry Andric *result = (char*)InternalAlloc(prefix_len + 1);
3568d75effSDimitry Andric internal_memcpy(*result, str, prefix_len);
3668d75effSDimitry Andric (*result)[prefix_len] = '\0';
3768d75effSDimitry Andric const char *prefix_end = str + prefix_len;
3868d75effSDimitry Andric if (*prefix_end != '\0') prefix_end++;
3968d75effSDimitry Andric return prefix_end;
4068d75effSDimitry Andric }
4168d75effSDimitry Andric
ExtractInt(const char * str,const char * delims,int * result)4268d75effSDimitry Andric const char *ExtractInt(const char *str, const char *delims, int *result) {
435ffd83dbSDimitry Andric char *buff = nullptr;
4468d75effSDimitry Andric const char *ret = ExtractToken(str, delims, &buff);
455ffd83dbSDimitry Andric if (buff) {
4668d75effSDimitry Andric *result = (int)internal_atoll(buff);
4768d75effSDimitry Andric }
4868d75effSDimitry Andric InternalFree(buff);
4968d75effSDimitry Andric return ret;
5068d75effSDimitry Andric }
5168d75effSDimitry Andric
ExtractUptr(const char * str,const char * delims,uptr * result)5268d75effSDimitry Andric const char *ExtractUptr(const char *str, const char *delims, uptr *result) {
535ffd83dbSDimitry Andric char *buff = nullptr;
5468d75effSDimitry Andric const char *ret = ExtractToken(str, delims, &buff);
555ffd83dbSDimitry Andric if (buff) {
5668d75effSDimitry Andric *result = (uptr)internal_atoll(buff);
5768d75effSDimitry Andric }
5868d75effSDimitry Andric InternalFree(buff);
5968d75effSDimitry Andric return ret;
6068d75effSDimitry Andric }
6168d75effSDimitry Andric
ExtractSptr(const char * str,const char * delims,sptr * result)6268d75effSDimitry Andric const char *ExtractSptr(const char *str, const char *delims, sptr *result) {
635ffd83dbSDimitry Andric char *buff = nullptr;
6468d75effSDimitry Andric const char *ret = ExtractToken(str, delims, &buff);
655ffd83dbSDimitry Andric if (buff) {
6668d75effSDimitry Andric *result = (sptr)internal_atoll(buff);
6768d75effSDimitry Andric }
6868d75effSDimitry Andric InternalFree(buff);
6968d75effSDimitry Andric return ret;
7068d75effSDimitry Andric }
7168d75effSDimitry Andric
ExtractTokenUpToDelimiter(const char * str,const char * delimiter,char ** result)7268d75effSDimitry Andric const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter,
7368d75effSDimitry Andric char **result) {
7468d75effSDimitry Andric const char *found_delimiter = internal_strstr(str, delimiter);
7568d75effSDimitry Andric uptr prefix_len =
7668d75effSDimitry Andric found_delimiter ? found_delimiter - str : internal_strlen(str);
7768d75effSDimitry Andric *result = (char *)InternalAlloc(prefix_len + 1);
7868d75effSDimitry Andric internal_memcpy(*result, str, prefix_len);
7968d75effSDimitry Andric (*result)[prefix_len] = '\0';
8068d75effSDimitry Andric const char *prefix_end = str + prefix_len;
8168d75effSDimitry Andric if (*prefix_end != '\0') prefix_end += internal_strlen(delimiter);
8268d75effSDimitry Andric return prefix_end;
8368d75effSDimitry Andric }
8468d75effSDimitry Andric
SymbolizePC(uptr addr)8568d75effSDimitry Andric SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
86349cc55cSDimitry Andric Lock l(&mu_);
8768d75effSDimitry Andric SymbolizedStack *res = SymbolizedStack::New(addr);
880eae32dcSDimitry Andric auto *mod = FindModuleForAddress(addr);
890eae32dcSDimitry Andric if (!mod)
9068d75effSDimitry Andric return res;
9168d75effSDimitry Andric // Always fill data about module name and offset.
920eae32dcSDimitry Andric res->info.FillModuleInfo(*mod);
9368d75effSDimitry Andric for (auto &tool : tools_) {
9468d75effSDimitry Andric SymbolizerScope sym_scope(this);
9568d75effSDimitry Andric if (tool.SymbolizePC(addr, res)) {
9668d75effSDimitry Andric return res;
9768d75effSDimitry Andric }
9868d75effSDimitry Andric }
9968d75effSDimitry Andric return res;
10068d75effSDimitry Andric }
10168d75effSDimitry Andric
SymbolizeData(uptr addr,DataInfo * info)10268d75effSDimitry Andric bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
103349cc55cSDimitry Andric Lock l(&mu_);
1045ffd83dbSDimitry Andric const char *module_name = nullptr;
10568d75effSDimitry Andric uptr module_offset;
10668d75effSDimitry Andric ModuleArch arch;
10768d75effSDimitry Andric if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset,
10868d75effSDimitry Andric &arch))
10968d75effSDimitry Andric return false;
11068d75effSDimitry Andric info->Clear();
11168d75effSDimitry Andric info->module = internal_strdup(module_name);
11268d75effSDimitry Andric info->module_offset = module_offset;
11368d75effSDimitry Andric info->module_arch = arch;
11468d75effSDimitry Andric for (auto &tool : tools_) {
11568d75effSDimitry Andric SymbolizerScope sym_scope(this);
11668d75effSDimitry Andric if (tool.SymbolizeData(addr, info)) {
11768d75effSDimitry Andric return true;
11868d75effSDimitry Andric }
11968d75effSDimitry Andric }
120*5f757f3fSDimitry Andric return false;
12168d75effSDimitry Andric }
12268d75effSDimitry Andric
SymbolizeFrame(uptr addr,FrameInfo * info)12368d75effSDimitry Andric bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
124349cc55cSDimitry Andric Lock l(&mu_);
1255ffd83dbSDimitry Andric const char *module_name = nullptr;
12668d75effSDimitry Andric if (!FindModuleNameAndOffsetForAddress(
12768d75effSDimitry Andric addr, &module_name, &info->module_offset, &info->module_arch))
12868d75effSDimitry Andric return false;
12968d75effSDimitry Andric info->module = internal_strdup(module_name);
13068d75effSDimitry Andric for (auto &tool : tools_) {
13168d75effSDimitry Andric SymbolizerScope sym_scope(this);
13268d75effSDimitry Andric if (tool.SymbolizeFrame(addr, info)) {
13368d75effSDimitry Andric return true;
13468d75effSDimitry Andric }
13568d75effSDimitry Andric }
136*5f757f3fSDimitry Andric return false;
13768d75effSDimitry Andric }
13868d75effSDimitry Andric
GetModuleNameAndOffsetForPC(uptr pc,const char ** module_name,uptr * module_address)13968d75effSDimitry Andric bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
14068d75effSDimitry Andric uptr *module_address) {
141349cc55cSDimitry Andric Lock l(&mu_);
14268d75effSDimitry Andric const char *internal_module_name = nullptr;
14368d75effSDimitry Andric ModuleArch arch;
14468d75effSDimitry Andric if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name,
14568d75effSDimitry Andric module_address, &arch))
14668d75effSDimitry Andric return false;
14768d75effSDimitry Andric
14868d75effSDimitry Andric if (module_name)
14968d75effSDimitry Andric *module_name = module_names_.GetOwnedCopy(internal_module_name);
15068d75effSDimitry Andric return true;
15168d75effSDimitry Andric }
15268d75effSDimitry Andric
Flush()15368d75effSDimitry Andric void Symbolizer::Flush() {
154349cc55cSDimitry Andric Lock l(&mu_);
15568d75effSDimitry Andric for (auto &tool : tools_) {
15668d75effSDimitry Andric SymbolizerScope sym_scope(this);
15768d75effSDimitry Andric tool.Flush();
15868d75effSDimitry Andric }
15968d75effSDimitry Andric }
16068d75effSDimitry Andric
Demangle(const char * name)16168d75effSDimitry Andric const char *Symbolizer::Demangle(const char *name) {
162*5f757f3fSDimitry Andric CHECK(name);
163349cc55cSDimitry Andric Lock l(&mu_);
16468d75effSDimitry Andric for (auto &tool : tools_) {
16568d75effSDimitry Andric SymbolizerScope sym_scope(this);
16668d75effSDimitry Andric if (const char *demangled = tool.Demangle(name))
16768d75effSDimitry Andric return demangled;
16868d75effSDimitry Andric }
169*5f757f3fSDimitry Andric if (const char *demangled = PlatformDemangle(name))
170*5f757f3fSDimitry Andric return demangled;
171*5f757f3fSDimitry Andric return name;
17268d75effSDimitry Andric }
17368d75effSDimitry Andric
FindModuleNameAndOffsetForAddress(uptr address,const char ** module_name,uptr * module_offset,ModuleArch * module_arch)17468d75effSDimitry Andric bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address,
17568d75effSDimitry Andric const char **module_name,
17668d75effSDimitry Andric uptr *module_offset,
17768d75effSDimitry Andric ModuleArch *module_arch) {
17868d75effSDimitry Andric const LoadedModule *module = FindModuleForAddress(address);
1795ffd83dbSDimitry Andric if (!module)
18068d75effSDimitry Andric return false;
18168d75effSDimitry Andric *module_name = module->full_name();
18268d75effSDimitry Andric *module_offset = address - module->base_address();
18368d75effSDimitry Andric *module_arch = module->arch();
18468d75effSDimitry Andric return true;
18568d75effSDimitry Andric }
18668d75effSDimitry Andric
RefreshModules()18768d75effSDimitry Andric void Symbolizer::RefreshModules() {
18868d75effSDimitry Andric modules_.init();
18968d75effSDimitry Andric fallback_modules_.fallbackInit();
19068d75effSDimitry Andric RAW_CHECK(modules_.size() > 0);
19168d75effSDimitry Andric modules_fresh_ = true;
19268d75effSDimitry Andric }
19368d75effSDimitry Andric
GetRefreshedListOfModules()194*5f757f3fSDimitry Andric const ListOfModules &Symbolizer::GetRefreshedListOfModules() {
195*5f757f3fSDimitry Andric if (!modules_fresh_)
196*5f757f3fSDimitry Andric RefreshModules();
197*5f757f3fSDimitry Andric
198*5f757f3fSDimitry Andric return modules_;
199*5f757f3fSDimitry Andric }
200*5f757f3fSDimitry Andric
SearchForModule(const ListOfModules & modules,uptr address)20168d75effSDimitry Andric static const LoadedModule *SearchForModule(const ListOfModules &modules,
20268d75effSDimitry Andric uptr address) {
20368d75effSDimitry Andric for (uptr i = 0; i < modules.size(); i++) {
20468d75effSDimitry Andric if (modules[i].containsAddress(address)) {
20568d75effSDimitry Andric return &modules[i];
20668d75effSDimitry Andric }
20768d75effSDimitry Andric }
20868d75effSDimitry Andric return nullptr;
20968d75effSDimitry Andric }
21068d75effSDimitry Andric
FindModuleForAddress(uptr address)21168d75effSDimitry Andric const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) {
21268d75effSDimitry Andric bool modules_were_reloaded = false;
21368d75effSDimitry Andric if (!modules_fresh_) {
21468d75effSDimitry Andric RefreshModules();
21568d75effSDimitry Andric modules_were_reloaded = true;
21668d75effSDimitry Andric }
21768d75effSDimitry Andric const LoadedModule *module = SearchForModule(modules_, address);
21868d75effSDimitry Andric if (module) return module;
21968d75effSDimitry Andric
22068d75effSDimitry Andric // dlopen/dlclose interceptors invalidate the module list, but when
22168d75effSDimitry Andric // interception is disabled, we need to retry if the lookup fails in
22268d75effSDimitry Andric // case the module list changed.
22368d75effSDimitry Andric #if !SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
22468d75effSDimitry Andric if (!modules_were_reloaded) {
22568d75effSDimitry Andric RefreshModules();
22668d75effSDimitry Andric module = SearchForModule(modules_, address);
22768d75effSDimitry Andric if (module) return module;
22868d75effSDimitry Andric }
22968d75effSDimitry Andric #endif
23068d75effSDimitry Andric
23168d75effSDimitry Andric if (fallback_modules_.size()) {
23268d75effSDimitry Andric module = SearchForModule(fallback_modules_, address);
23368d75effSDimitry Andric }
23468d75effSDimitry Andric return module;
23568d75effSDimitry Andric }
23668d75effSDimitry Andric
23768d75effSDimitry Andric // For now we assume the following protocol:
23868d75effSDimitry Andric // For each request of the form
23968d75effSDimitry Andric // <module_name> <module_offset>
24068d75effSDimitry Andric // passed to STDIN, external symbolizer prints to STDOUT response:
24168d75effSDimitry Andric // <function_name>
24268d75effSDimitry Andric // <file_name>:<line_number>:<column_number>
24368d75effSDimitry Andric // <function_name>
24468d75effSDimitry Andric // <file_name>:<line_number>:<column_number>
24568d75effSDimitry Andric // ...
24668d75effSDimitry Andric // <empty line>
247e8d8bef9SDimitry Andric class LLVMSymbolizerProcess final : public SymbolizerProcess {
24868d75effSDimitry Andric public:
LLVMSymbolizerProcess(const char * path)24968d75effSDimitry Andric explicit LLVMSymbolizerProcess(const char *path)
25081ad6265SDimitry Andric : SymbolizerProcess(path, /*use_posix_spawn=*/SANITIZER_APPLE) {}
25168d75effSDimitry Andric
25268d75effSDimitry Andric private:
ReachedEndOfOutput(const char * buffer,uptr length) const25368d75effSDimitry Andric bool ReachedEndOfOutput(const char *buffer, uptr length) const override {
25468d75effSDimitry Andric // Empty line marks the end of llvm-symbolizer output.
25568d75effSDimitry Andric return length >= 2 && buffer[length - 1] == '\n' &&
25668d75effSDimitry Andric buffer[length - 2] == '\n';
25768d75effSDimitry Andric }
25868d75effSDimitry Andric
25968d75effSDimitry Andric // When adding a new architecture, don't forget to also update
26068d75effSDimitry Andric // script/asan_symbolize.py and sanitizer_common.h.
GetArgV(const char * path_to_binary,const char * (& argv)[kArgVMax]) const26168d75effSDimitry Andric void GetArgV(const char *path_to_binary,
26268d75effSDimitry Andric const char *(&argv)[kArgVMax]) const override {
26368d75effSDimitry Andric #if defined(__x86_64h__)
26468d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=x86_64h";
26568d75effSDimitry Andric #elif defined(__x86_64__)
26668d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=x86_64";
26768d75effSDimitry Andric #elif defined(__i386__)
26868d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=i386";
269bdd1243dSDimitry Andric #elif SANITIZER_LOONGARCH64
270bdd1243dSDimitry Andric const char *const kSymbolizerArch = "--default-arch=loongarch64";
271e8d8bef9SDimitry Andric #elif SANITIZER_RISCV64
272e8d8bef9SDimitry Andric const char *const kSymbolizerArch = "--default-arch=riscv64";
27368d75effSDimitry Andric #elif defined(__aarch64__)
27468d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=arm64";
27568d75effSDimitry Andric #elif defined(__arm__)
27668d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=arm";
27768d75effSDimitry Andric #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
27868d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=powerpc64";
27968d75effSDimitry Andric #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
28068d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=powerpc64le";
28168d75effSDimitry Andric #elif defined(__s390x__)
28268d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=s390x";
28368d75effSDimitry Andric #elif defined(__s390__)
28468d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=s390";
28568d75effSDimitry Andric #else
28668d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=unknown";
28768d75effSDimitry Andric #endif
28868d75effSDimitry Andric
2890eae32dcSDimitry Andric const char *const demangle_flag =
2900eae32dcSDimitry Andric common_flags()->demangle ? "--demangle" : "--no-demangle";
2910eae32dcSDimitry Andric const char *const inline_flag =
2920eae32dcSDimitry Andric common_flags()->symbolize_inline_frames ? "--inlines" : "--no-inlines";
29368d75effSDimitry Andric int i = 0;
29468d75effSDimitry Andric argv[i++] = path_to_binary;
2950eae32dcSDimitry Andric argv[i++] = demangle_flag;
29668d75effSDimitry Andric argv[i++] = inline_flag;
29768d75effSDimitry Andric argv[i++] = kSymbolizerArch;
29868d75effSDimitry Andric argv[i++] = nullptr;
2990eae32dcSDimitry Andric CHECK_LE(i, kArgVMax);
30068d75effSDimitry Andric }
30168d75effSDimitry Andric };
30268d75effSDimitry Andric
LLVMSymbolizer(const char * path,LowLevelAllocator * allocator)30368d75effSDimitry Andric LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator)
30468d75effSDimitry Andric : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {}
30568d75effSDimitry Andric
30668d75effSDimitry Andric // Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on
30768d75effSDimitry Andric // Windows, so extract tokens from the right hand side first. The column info is
30868d75effSDimitry Andric // also optional.
ParseFileLineInfo(AddressInfo * info,const char * str)30968d75effSDimitry Andric static const char *ParseFileLineInfo(AddressInfo *info, const char *str) {
3105ffd83dbSDimitry Andric char *file_line_info = nullptr;
31168d75effSDimitry Andric str = ExtractToken(str, "\n", &file_line_info);
31268d75effSDimitry Andric CHECK(file_line_info);
31368d75effSDimitry Andric
31468d75effSDimitry Andric if (uptr size = internal_strlen(file_line_info)) {
31568d75effSDimitry Andric char *back = file_line_info + size - 1;
31668d75effSDimitry Andric for (int i = 0; i < 2; ++i) {
31768d75effSDimitry Andric while (back > file_line_info && IsDigit(*back)) --back;
31868d75effSDimitry Andric if (*back != ':' || !IsDigit(back[1])) break;
31968d75effSDimitry Andric info->column = info->line;
32068d75effSDimitry Andric info->line = internal_atoll(back + 1);
32168d75effSDimitry Andric // Truncate the string at the colon to keep only filename.
32268d75effSDimitry Andric *back = '\0';
32368d75effSDimitry Andric --back;
32468d75effSDimitry Andric }
32568d75effSDimitry Andric ExtractToken(file_line_info, "", &info->file);
32668d75effSDimitry Andric }
32768d75effSDimitry Andric
32868d75effSDimitry Andric InternalFree(file_line_info);
32968d75effSDimitry Andric return str;
33068d75effSDimitry Andric }
33168d75effSDimitry Andric
33268d75effSDimitry Andric // Parses one or more two-line strings in the following format:
33368d75effSDimitry Andric // <function_name>
33468d75effSDimitry Andric // <file_name>:<line_number>[:<column_number>]
33568d75effSDimitry Andric // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
33668d75effSDimitry Andric // them use the same output format.
ParseSymbolizePCOutput(const char * str,SymbolizedStack * res)33768d75effSDimitry Andric void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) {
33868d75effSDimitry Andric bool top_frame = true;
33968d75effSDimitry Andric SymbolizedStack *last = res;
34068d75effSDimitry Andric while (true) {
3415ffd83dbSDimitry Andric char *function_name = nullptr;
34268d75effSDimitry Andric str = ExtractToken(str, "\n", &function_name);
34368d75effSDimitry Andric CHECK(function_name);
34468d75effSDimitry Andric if (function_name[0] == '\0') {
34568d75effSDimitry Andric // There are no more frames.
34668d75effSDimitry Andric InternalFree(function_name);
34768d75effSDimitry Andric break;
34868d75effSDimitry Andric }
34968d75effSDimitry Andric SymbolizedStack *cur;
35068d75effSDimitry Andric if (top_frame) {
35168d75effSDimitry Andric cur = res;
35268d75effSDimitry Andric top_frame = false;
35368d75effSDimitry Andric } else {
35468d75effSDimitry Andric cur = SymbolizedStack::New(res->info.address);
35568d75effSDimitry Andric cur->info.FillModuleInfo(res->info.module, res->info.module_offset,
35668d75effSDimitry Andric res->info.module_arch);
35768d75effSDimitry Andric last->next = cur;
35868d75effSDimitry Andric last = cur;
35968d75effSDimitry Andric }
36068d75effSDimitry Andric
36168d75effSDimitry Andric AddressInfo *info = &cur->info;
36268d75effSDimitry Andric info->function = function_name;
36368d75effSDimitry Andric str = ParseFileLineInfo(info, str);
36468d75effSDimitry Andric
36568d75effSDimitry Andric // Functions and filenames can be "??", in which case we write 0
36668d75effSDimitry Andric // to address info to mark that names are unknown.
36768d75effSDimitry Andric if (0 == internal_strcmp(info->function, "??")) {
36868d75effSDimitry Andric InternalFree(info->function);
36968d75effSDimitry Andric info->function = 0;
37068d75effSDimitry Andric }
371fe6060f1SDimitry Andric if (info->file && 0 == internal_strcmp(info->file, "??")) {
37268d75effSDimitry Andric InternalFree(info->file);
37368d75effSDimitry Andric info->file = 0;
37468d75effSDimitry Andric }
37568d75effSDimitry Andric }
37668d75effSDimitry Andric }
37768d75effSDimitry Andric
37881ad6265SDimitry Andric // Parses a two- or three-line string in the following format:
37968d75effSDimitry Andric // <symbol_name>
38068d75effSDimitry Andric // <start_address> <size>
38181ad6265SDimitry Andric // <filename>:<column>
38281ad6265SDimitry Andric // Used by LLVMSymbolizer and InternalSymbolizer. LLVMSymbolizer added support
38381ad6265SDimitry Andric // for symbolizing the third line in D123538, but we support the older two-line
38481ad6265SDimitry Andric // information as well.
ParseSymbolizeDataOutput(const char * str,DataInfo * info)38568d75effSDimitry Andric void ParseSymbolizeDataOutput(const char *str, DataInfo *info) {
38668d75effSDimitry Andric str = ExtractToken(str, "\n", &info->name);
38768d75effSDimitry Andric str = ExtractUptr(str, " ", &info->start);
38868d75effSDimitry Andric str = ExtractUptr(str, "\n", &info->size);
38981ad6265SDimitry Andric // Note: If the third line isn't present, these calls will set info.{file,
39081ad6265SDimitry Andric // line} to empty strings.
39181ad6265SDimitry Andric str = ExtractToken(str, ":", &info->file);
39281ad6265SDimitry Andric str = ExtractUptr(str, "\n", &info->line);
39368d75effSDimitry Andric }
39468d75effSDimitry Andric
ParseSymbolizeFrameOutput(const char * str,InternalMmapVector<LocalInfo> * locals)395*5f757f3fSDimitry Andric void ParseSymbolizeFrameOutput(const char *str,
39668d75effSDimitry Andric InternalMmapVector<LocalInfo> *locals) {
39768d75effSDimitry Andric if (internal_strncmp(str, "??", 2) == 0)
39868d75effSDimitry Andric return;
39968d75effSDimitry Andric
40068d75effSDimitry Andric while (*str) {
40168d75effSDimitry Andric LocalInfo local;
40268d75effSDimitry Andric str = ExtractToken(str, "\n", &local.function_name);
40368d75effSDimitry Andric str = ExtractToken(str, "\n", &local.name);
40468d75effSDimitry Andric
40568d75effSDimitry Andric AddressInfo addr;
40668d75effSDimitry Andric str = ParseFileLineInfo(&addr, str);
40768d75effSDimitry Andric local.decl_file = addr.file;
40868d75effSDimitry Andric local.decl_line = addr.line;
40968d75effSDimitry Andric
41068d75effSDimitry Andric local.has_frame_offset = internal_strncmp(str, "??", 2) != 0;
41168d75effSDimitry Andric str = ExtractSptr(str, " ", &local.frame_offset);
41268d75effSDimitry Andric
41368d75effSDimitry Andric local.has_size = internal_strncmp(str, "??", 2) != 0;
41468d75effSDimitry Andric str = ExtractUptr(str, " ", &local.size);
41568d75effSDimitry Andric
41668d75effSDimitry Andric local.has_tag_offset = internal_strncmp(str, "??", 2) != 0;
41768d75effSDimitry Andric str = ExtractUptr(str, "\n", &local.tag_offset);
41868d75effSDimitry Andric
41968d75effSDimitry Andric locals->push_back(local);
42068d75effSDimitry Andric }
42168d75effSDimitry Andric }
42268d75effSDimitry Andric
SymbolizePC(uptr addr,SymbolizedStack * stack)42368d75effSDimitry Andric bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
42468d75effSDimitry Andric AddressInfo *info = &stack->info;
42568d75effSDimitry Andric const char *buf = FormatAndSendCommand(
42668d75effSDimitry Andric "CODE", info->module, info->module_offset, info->module_arch);
4275ffd83dbSDimitry Andric if (!buf)
4285ffd83dbSDimitry Andric return false;
42968d75effSDimitry Andric ParseSymbolizePCOutput(buf, stack);
43068d75effSDimitry Andric return true;
43168d75effSDimitry Andric }
43268d75effSDimitry Andric
SymbolizeData(uptr addr,DataInfo * info)43368d75effSDimitry Andric bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
43468d75effSDimitry Andric const char *buf = FormatAndSendCommand(
43568d75effSDimitry Andric "DATA", info->module, info->module_offset, info->module_arch);
4365ffd83dbSDimitry Andric if (!buf)
4375ffd83dbSDimitry Andric return false;
43868d75effSDimitry Andric ParseSymbolizeDataOutput(buf, info);
43968d75effSDimitry Andric info->start += (addr - info->module_offset); // Add the base address.
44068d75effSDimitry Andric return true;
44168d75effSDimitry Andric }
44268d75effSDimitry Andric
SymbolizeFrame(uptr addr,FrameInfo * info)44368d75effSDimitry Andric bool LLVMSymbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
44468d75effSDimitry Andric const char *buf = FormatAndSendCommand(
44568d75effSDimitry Andric "FRAME", info->module, info->module_offset, info->module_arch);
4465ffd83dbSDimitry Andric if (!buf)
4475ffd83dbSDimitry Andric return false;
44868d75effSDimitry Andric ParseSymbolizeFrameOutput(buf, &info->locals);
44968d75effSDimitry Andric return true;
45068d75effSDimitry Andric }
45168d75effSDimitry Andric
FormatAndSendCommand(const char * command_prefix,const char * module_name,uptr module_offset,ModuleArch arch)45268d75effSDimitry Andric const char *LLVMSymbolizer::FormatAndSendCommand(const char *command_prefix,
45368d75effSDimitry Andric const char *module_name,
45468d75effSDimitry Andric uptr module_offset,
45568d75effSDimitry Andric ModuleArch arch) {
45668d75effSDimitry Andric CHECK(module_name);
4575ffd83dbSDimitry Andric int size_needed = 0;
4585ffd83dbSDimitry Andric if (arch == kModuleArchUnknown)
4595ffd83dbSDimitry Andric size_needed = internal_snprintf(buffer_, kBufferSize, "%s \"%s\" 0x%zx\n",
4605ffd83dbSDimitry Andric command_prefix, module_name, module_offset);
4615ffd83dbSDimitry Andric else
4625ffd83dbSDimitry Andric size_needed = internal_snprintf(buffer_, kBufferSize,
4635ffd83dbSDimitry Andric "%s \"%s:%s\" 0x%zx\n", command_prefix,
4645ffd83dbSDimitry Andric module_name, ModuleArchToString(arch),
4655ffd83dbSDimitry Andric module_offset);
4665ffd83dbSDimitry Andric
4675ffd83dbSDimitry Andric if (size_needed >= static_cast<int>(kBufferSize)) {
46868d75effSDimitry Andric Report("WARNING: Command buffer too small");
46968d75effSDimitry Andric return nullptr;
47068d75effSDimitry Andric }
4715ffd83dbSDimitry Andric
47268d75effSDimitry Andric return symbolizer_process_->SendCommand(buffer_);
47368d75effSDimitry Andric }
47468d75effSDimitry Andric
SymbolizerProcess(const char * path,bool use_posix_spawn)47568d75effSDimitry Andric SymbolizerProcess::SymbolizerProcess(const char *path, bool use_posix_spawn)
47668d75effSDimitry Andric : path_(path),
47768d75effSDimitry Andric input_fd_(kInvalidFd),
47868d75effSDimitry Andric output_fd_(kInvalidFd),
47968d75effSDimitry Andric times_restarted_(0),
48068d75effSDimitry Andric failed_to_start_(false),
48168d75effSDimitry Andric reported_invalid_path_(false),
48268d75effSDimitry Andric use_posix_spawn_(use_posix_spawn) {
48368d75effSDimitry Andric CHECK(path_);
48468d75effSDimitry Andric CHECK_NE(path_[0], '\0');
48568d75effSDimitry Andric }
48668d75effSDimitry Andric
IsSameModule(const char * path)48768d75effSDimitry Andric static bool IsSameModule(const char* path) {
48868d75effSDimitry Andric if (const char* ProcessName = GetProcessName()) {
48968d75effSDimitry Andric if (const char* SymbolizerName = StripModuleName(path)) {
49068d75effSDimitry Andric return !internal_strcmp(ProcessName, SymbolizerName);
49168d75effSDimitry Andric }
49268d75effSDimitry Andric }
49368d75effSDimitry Andric return false;
49468d75effSDimitry Andric }
49568d75effSDimitry Andric
SendCommand(const char * command)49668d75effSDimitry Andric const char *SymbolizerProcess::SendCommand(const char *command) {
49768d75effSDimitry Andric if (failed_to_start_)
49868d75effSDimitry Andric return nullptr;
49968d75effSDimitry Andric if (IsSameModule(path_)) {
50068d75effSDimitry Andric Report("WARNING: Symbolizer was blocked from starting itself!\n");
50168d75effSDimitry Andric failed_to_start_ = true;
50268d75effSDimitry Andric return nullptr;
50368d75effSDimitry Andric }
50468d75effSDimitry Andric for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) {
50568d75effSDimitry Andric // Start or restart symbolizer if we failed to send command to it.
50668d75effSDimitry Andric if (const char *res = SendCommandImpl(command))
50768d75effSDimitry Andric return res;
50868d75effSDimitry Andric Restart();
50968d75effSDimitry Andric }
51068d75effSDimitry Andric if (!failed_to_start_) {
51168d75effSDimitry Andric Report("WARNING: Failed to use and restart external symbolizer!\n");
51268d75effSDimitry Andric failed_to_start_ = true;
51368d75effSDimitry Andric }
5145ffd83dbSDimitry Andric return nullptr;
51568d75effSDimitry Andric }
51668d75effSDimitry Andric
SendCommandImpl(const char * command)51768d75effSDimitry Andric const char *SymbolizerProcess::SendCommandImpl(const char *command) {
51868d75effSDimitry Andric if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd)
5195ffd83dbSDimitry Andric return nullptr;
52068d75effSDimitry Andric if (!WriteToSymbolizer(command, internal_strlen(command)))
5215ffd83dbSDimitry Andric return nullptr;
52281ad6265SDimitry Andric if (!ReadFromSymbolizer())
5235ffd83dbSDimitry Andric return nullptr;
52481ad6265SDimitry Andric return buffer_.data();
52568d75effSDimitry Andric }
52668d75effSDimitry Andric
Restart()52768d75effSDimitry Andric bool SymbolizerProcess::Restart() {
52868d75effSDimitry Andric if (input_fd_ != kInvalidFd)
52968d75effSDimitry Andric CloseFile(input_fd_);
53068d75effSDimitry Andric if (output_fd_ != kInvalidFd)
53168d75effSDimitry Andric CloseFile(output_fd_);
53268d75effSDimitry Andric return StartSymbolizerSubprocess();
53368d75effSDimitry Andric }
53468d75effSDimitry Andric
ReadFromSymbolizer()53581ad6265SDimitry Andric bool SymbolizerProcess::ReadFromSymbolizer() {
53681ad6265SDimitry Andric buffer_.clear();
53781ad6265SDimitry Andric constexpr uptr max_length = 1024;
53881ad6265SDimitry Andric bool ret = true;
53981ad6265SDimitry Andric do {
54068d75effSDimitry Andric uptr just_read = 0;
54181ad6265SDimitry Andric uptr size_before = buffer_.size();
54281ad6265SDimitry Andric buffer_.resize(size_before + max_length);
54381ad6265SDimitry Andric buffer_.resize(buffer_.capacity());
54481ad6265SDimitry Andric bool ret = ReadFromFile(input_fd_, &buffer_[size_before],
54581ad6265SDimitry Andric buffer_.size() - size_before, &just_read);
54681ad6265SDimitry Andric
54781ad6265SDimitry Andric if (!ret)
54881ad6265SDimitry Andric just_read = 0;
54981ad6265SDimitry Andric
55081ad6265SDimitry Andric buffer_.resize(size_before + just_read);
55181ad6265SDimitry Andric
55268d75effSDimitry Andric // We can't read 0 bytes, as we don't expect external symbolizer to close
55368d75effSDimitry Andric // its stdout.
55481ad6265SDimitry Andric if (just_read == 0) {
55568d75effSDimitry Andric Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
55681ad6265SDimitry Andric ret = false;
55768d75effSDimitry Andric break;
55868d75effSDimitry Andric }
55981ad6265SDimitry Andric } while (!ReachedEndOfOutput(buffer_.data(), buffer_.size()));
56081ad6265SDimitry Andric buffer_.push_back('\0');
56181ad6265SDimitry Andric return ret;
56268d75effSDimitry Andric }
56368d75effSDimitry Andric
WriteToSymbolizer(const char * buffer,uptr length)56468d75effSDimitry Andric bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) {
56568d75effSDimitry Andric if (length == 0)
56668d75effSDimitry Andric return true;
56768d75effSDimitry Andric uptr write_len = 0;
56868d75effSDimitry Andric bool success = WriteToFile(output_fd_, buffer, length, &write_len);
56968d75effSDimitry Andric if (!success || write_len != length) {
57068d75effSDimitry Andric Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
57168d75effSDimitry Andric return false;
57268d75effSDimitry Andric }
57368d75effSDimitry Andric return true;
57468d75effSDimitry Andric }
57568d75effSDimitry Andric
57668d75effSDimitry Andric #endif // !SANITIZER_SYMBOLIZER_MARKUP
57768d75effSDimitry Andric
57868d75effSDimitry Andric } // namespace __sanitizer
579