xref: /netbsd-src/external/gpl3/gcc.old/dist/libsanitizer/sanitizer_common/sanitizer_symbolizer_libcdep.cc (revision c0a68be459da21030695f60d10265c2fc49758f8)
1 //===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===//
2 //
3 // This file is distributed under the University of Illinois Open Source
4 // License. See LICENSE.TXT for details.
5 //
6 //===----------------------------------------------------------------------===//
7 //
8 // This file is shared between AddressSanitizer and ThreadSanitizer
9 // run-time libraries.
10 //===----------------------------------------------------------------------===//
11 
12 #include "sanitizer_allocator_internal.h"
13 #include "sanitizer_internal_defs.h"
14 #include "sanitizer_symbolizer_internal.h"
15 
16 namespace __sanitizer {
17 
GetOrInit()18 Symbolizer *Symbolizer::GetOrInit() {
19   SpinMutexLock l(&init_mu_);
20   if (symbolizer_)
21     return symbolizer_;
22   symbolizer_ = PlatformInit();
23   CHECK(symbolizer_);
24   return symbolizer_;
25 }
26 
27 // See sanitizer_symbolizer_markup.cc.
28 #if !SANITIZER_SYMBOLIZER_MARKUP
29 
ExtractToken(const char * str,const char * delims,char ** result)30 const char *ExtractToken(const char *str, const char *delims, char **result) {
31   uptr prefix_len = internal_strcspn(str, delims);
32   *result = (char*)InternalAlloc(prefix_len + 1);
33   internal_memcpy(*result, str, prefix_len);
34   (*result)[prefix_len] = '\0';
35   const char *prefix_end = str + prefix_len;
36   if (*prefix_end != '\0') prefix_end++;
37   return prefix_end;
38 }
39 
ExtractInt(const char * str,const char * delims,int * result)40 const char *ExtractInt(const char *str, const char *delims, int *result) {
41   char *buff;
42   const char *ret = ExtractToken(str, delims, &buff);
43   if (buff != 0) {
44     *result = (int)internal_atoll(buff);
45   }
46   InternalFree(buff);
47   return ret;
48 }
49 
ExtractUptr(const char * str,const char * delims,uptr * result)50 const char *ExtractUptr(const char *str, const char *delims, uptr *result) {
51   char *buff;
52   const char *ret = ExtractToken(str, delims, &buff);
53   if (buff != 0) {
54     *result = (uptr)internal_atoll(buff);
55   }
56   InternalFree(buff);
57   return ret;
58 }
59 
ExtractTokenUpToDelimiter(const char * str,const char * delimiter,char ** result)60 const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter,
61                                       char **result) {
62   const char *found_delimiter = internal_strstr(str, delimiter);
63   uptr prefix_len =
64       found_delimiter ? found_delimiter - str : internal_strlen(str);
65   *result = (char *)InternalAlloc(prefix_len + 1);
66   internal_memcpy(*result, str, prefix_len);
67   (*result)[prefix_len] = '\0';
68   const char *prefix_end = str + prefix_len;
69   if (*prefix_end != '\0') prefix_end += internal_strlen(delimiter);
70   return prefix_end;
71 }
72 
SymbolizePC(uptr addr)73 SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
74   BlockingMutexLock l(&mu_);
75   const char *module_name;
76   uptr module_offset;
77   ModuleArch arch;
78   SymbolizedStack *res = SymbolizedStack::New(addr);
79   if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset,
80                                          &arch))
81     return res;
82   // Always fill data about module name and offset.
83   res->info.FillModuleInfo(module_name, module_offset, arch);
84   for (auto &tool : tools_) {
85     SymbolizerScope sym_scope(this);
86     if (tool.SymbolizePC(addr, res)) {
87       return res;
88     }
89   }
90   return res;
91 }
92 
SymbolizeData(uptr addr,DataInfo * info)93 bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
94   BlockingMutexLock l(&mu_);
95   const char *module_name;
96   uptr module_offset;
97   ModuleArch arch;
98   if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset,
99                                          &arch))
100     return false;
101   info->Clear();
102   info->module = internal_strdup(module_name);
103   info->module_offset = module_offset;
104   info->module_arch = arch;
105   for (auto &tool : tools_) {
106     SymbolizerScope sym_scope(this);
107     if (tool.SymbolizeData(addr, info)) {
108       return true;
109     }
110   }
111   return true;
112 }
113 
GetModuleNameAndOffsetForPC(uptr pc,const char ** module_name,uptr * module_address)114 bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
115                                              uptr *module_address) {
116   BlockingMutexLock l(&mu_);
117   const char *internal_module_name = nullptr;
118   ModuleArch arch;
119   if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name,
120                                          module_address, &arch))
121     return false;
122 
123   if (module_name)
124     *module_name = module_names_.GetOwnedCopy(internal_module_name);
125   return true;
126 }
127 
Flush()128 void Symbolizer::Flush() {
129   BlockingMutexLock l(&mu_);
130   for (auto &tool : tools_) {
131     SymbolizerScope sym_scope(this);
132     tool.Flush();
133   }
134 }
135 
Demangle(const char * name)136 const char *Symbolizer::Demangle(const char *name) {
137   BlockingMutexLock l(&mu_);
138   for (auto &tool : tools_) {
139     SymbolizerScope sym_scope(this);
140     if (const char *demangled = tool.Demangle(name))
141       return demangled;
142   }
143   return PlatformDemangle(name);
144 }
145 
FindModuleNameAndOffsetForAddress(uptr address,const char ** module_name,uptr * module_offset,ModuleArch * module_arch)146 bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address,
147                                                    const char **module_name,
148                                                    uptr *module_offset,
149                                                    ModuleArch *module_arch) {
150   const LoadedModule *module = FindModuleForAddress(address);
151   if (module == nullptr)
152     return false;
153   *module_name = module->full_name();
154   *module_offset = address - module->base_address();
155   *module_arch = module->arch();
156   return true;
157 }
158 
RefreshModules()159 void Symbolizer::RefreshModules() {
160   modules_.init();
161   fallback_modules_.fallbackInit();
162   RAW_CHECK(modules_.size() > 0);
163   modules_fresh_ = true;
164 }
165 
SearchForModule(const ListOfModules & modules,uptr address)166 static const LoadedModule *SearchForModule(const ListOfModules &modules,
167                                            uptr address) {
168   for (uptr i = 0; i < modules.size(); i++) {
169     if (modules[i].containsAddress(address)) {
170       return &modules[i];
171     }
172   }
173   return nullptr;
174 }
175 
FindModuleForAddress(uptr address)176 const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) {
177   bool modules_were_reloaded = false;
178   if (!modules_fresh_) {
179     RefreshModules();
180     modules_were_reloaded = true;
181   }
182   const LoadedModule *module = SearchForModule(modules_, address);
183   if (module) return module;
184 
185   // dlopen/dlclose interceptors invalidate the module list, but when
186   // interception is disabled, we need to retry if the lookup fails in
187   // case the module list changed.
188 #if !SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
189   if (!modules_were_reloaded) {
190     RefreshModules();
191     module = SearchForModule(modules_, address);
192     if (module) return module;
193   }
194 #endif
195 
196   if (fallback_modules_.size()) {
197     module = SearchForModule(fallback_modules_, address);
198   }
199   return module;
200 }
201 
202 // For now we assume the following protocol:
203 // For each request of the form
204 //   <module_name> <module_offset>
205 // passed to STDIN, external symbolizer prints to STDOUT response:
206 //   <function_name>
207 //   <file_name>:<line_number>:<column_number>
208 //   <function_name>
209 //   <file_name>:<line_number>:<column_number>
210 //   ...
211 //   <empty line>
212 class LLVMSymbolizerProcess : public SymbolizerProcess {
213  public:
LLVMSymbolizerProcess(const char * path)214   explicit LLVMSymbolizerProcess(const char *path) : SymbolizerProcess(path) {}
215 
216  private:
ReachedEndOfOutput(const char * buffer,uptr length) const217   bool ReachedEndOfOutput(const char *buffer, uptr length) const override {
218     // Empty line marks the end of llvm-symbolizer output.
219     return length >= 2 && buffer[length - 1] == '\n' &&
220            buffer[length - 2] == '\n';
221   }
222 
223   // When adding a new architecture, don't forget to also update
224   // script/asan_symbolize.py and sanitizer_common.h.
GetArgV(const char * path_to_binary,const char * (& argv)[kArgVMax]) const225   void GetArgV(const char *path_to_binary,
226                const char *(&argv)[kArgVMax]) const override {
227 #if defined(__x86_64h__)
228     const char* const kSymbolizerArch = "--default-arch=x86_64h";
229 #elif defined(__x86_64__)
230     const char* const kSymbolizerArch = "--default-arch=x86_64";
231 #elif defined(__i386__)
232     const char* const kSymbolizerArch = "--default-arch=i386";
233 #elif defined(__aarch64__)
234     const char* const kSymbolizerArch = "--default-arch=arm64";
235 #elif defined(__arm__)
236     const char* const kSymbolizerArch = "--default-arch=arm";
237 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
238     const char* const kSymbolizerArch = "--default-arch=powerpc64";
239 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
240     const char* const kSymbolizerArch = "--default-arch=powerpc64le";
241 #elif defined(__s390x__)
242     const char* const kSymbolizerArch = "--default-arch=s390x";
243 #elif defined(__s390__)
244     const char* const kSymbolizerArch = "--default-arch=s390";
245 #else
246     const char* const kSymbolizerArch = "--default-arch=unknown";
247 #endif
248 
249     const char *const inline_flag = common_flags()->symbolize_inline_frames
250                                         ? "--inlining=true"
251                                         : "--inlining=false";
252     int i = 0;
253     argv[i++] = path_to_binary;
254     argv[i++] = inline_flag;
255     argv[i++] = kSymbolizerArch;
256     argv[i++] = nullptr;
257   }
258 };
259 
LLVMSymbolizer(const char * path,LowLevelAllocator * allocator)260 LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator)
261     : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {}
262 
263 // Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on
264 // Windows, so extract tokens from the right hand side first. The column info is
265 // also optional.
ParseFileLineInfo(AddressInfo * info,const char * str)266 static const char *ParseFileLineInfo(AddressInfo *info, const char *str) {
267   char *file_line_info = 0;
268   str = ExtractToken(str, "\n", &file_line_info);
269   CHECK(file_line_info);
270 
271   if (uptr size = internal_strlen(file_line_info)) {
272     char *back = file_line_info + size - 1;
273     for (int i = 0; i < 2; ++i) {
274       while (back > file_line_info && IsDigit(*back)) --back;
275       if (*back != ':' || !IsDigit(back[1])) break;
276       info->column = info->line;
277       info->line = internal_atoll(back + 1);
278       // Truncate the string at the colon to keep only filename.
279       *back = '\0';
280       --back;
281     }
282     ExtractToken(file_line_info, "", &info->file);
283   }
284 
285   InternalFree(file_line_info);
286   return str;
287 }
288 
289 // Parses one or more two-line strings in the following format:
290 //   <function_name>
291 //   <file_name>:<line_number>[:<column_number>]
292 // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
293 // them use the same output format.
ParseSymbolizePCOutput(const char * str,SymbolizedStack * res)294 void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) {
295   bool top_frame = true;
296   SymbolizedStack *last = res;
297   while (true) {
298     char *function_name = 0;
299     str = ExtractToken(str, "\n", &function_name);
300     CHECK(function_name);
301     if (function_name[0] == '\0') {
302       // There are no more frames.
303       InternalFree(function_name);
304       break;
305     }
306     SymbolizedStack *cur;
307     if (top_frame) {
308       cur = res;
309       top_frame = false;
310     } else {
311       cur = SymbolizedStack::New(res->info.address);
312       cur->info.FillModuleInfo(res->info.module, res->info.module_offset,
313                                res->info.module_arch);
314       last->next = cur;
315       last = cur;
316     }
317 
318     AddressInfo *info = &cur->info;
319     info->function = function_name;
320     str = ParseFileLineInfo(info, str);
321 
322     // Functions and filenames can be "??", in which case we write 0
323     // to address info to mark that names are unknown.
324     if (0 == internal_strcmp(info->function, "??")) {
325       InternalFree(info->function);
326       info->function = 0;
327     }
328     if (0 == internal_strcmp(info->file, "??")) {
329       InternalFree(info->file);
330       info->file = 0;
331     }
332   }
333 }
334 
335 // Parses a two-line string in the following format:
336 //   <symbol_name>
337 //   <start_address> <size>
338 // Used by LLVMSymbolizer and InternalSymbolizer.
ParseSymbolizeDataOutput(const char * str,DataInfo * info)339 void ParseSymbolizeDataOutput(const char *str, DataInfo *info) {
340   str = ExtractToken(str, "\n", &info->name);
341   str = ExtractUptr(str, " ", &info->start);
342   str = ExtractUptr(str, "\n", &info->size);
343 }
344 
SymbolizePC(uptr addr,SymbolizedStack * stack)345 bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
346   AddressInfo *info = &stack->info;
347   const char *buf = FormatAndSendCommand(
348       /*is_data*/ false, info->module, info->module_offset, info->module_arch);
349   if (buf) {
350     ParseSymbolizePCOutput(buf, stack);
351     return true;
352   }
353   return false;
354 }
355 
SymbolizeData(uptr addr,DataInfo * info)356 bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
357   const char *buf = FormatAndSendCommand(
358       /*is_data*/ true, info->module, info->module_offset, info->module_arch);
359   if (buf) {
360     ParseSymbolizeDataOutput(buf, info);
361     info->start += (addr - info->module_offset); // Add the base address.
362     return true;
363   }
364   return false;
365 }
366 
FormatAndSendCommand(bool is_data,const char * module_name,uptr module_offset,ModuleArch arch)367 const char *LLVMSymbolizer::FormatAndSendCommand(bool is_data,
368                                                  const char *module_name,
369                                                  uptr module_offset,
370                                                  ModuleArch arch) {
371   CHECK(module_name);
372   const char *is_data_str = is_data ? "DATA " : "";
373   if (arch == kModuleArchUnknown) {
374     if (internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n", is_data_str,
375                           module_name,
376                           module_offset) >= static_cast<int>(kBufferSize)) {
377       Report("WARNING: Command buffer too small");
378       return nullptr;
379     }
380   } else {
381     if (internal_snprintf(buffer_, kBufferSize, "%s\"%s:%s\" 0x%zx\n",
382                           is_data_str, module_name, ModuleArchToString(arch),
383                           module_offset) >= static_cast<int>(kBufferSize)) {
384       Report("WARNING: Command buffer too small");
385       return nullptr;
386     }
387   }
388   return symbolizer_process_->SendCommand(buffer_);
389 }
390 
SymbolizerProcess(const char * path,bool use_forkpty)391 SymbolizerProcess::SymbolizerProcess(const char *path, bool use_forkpty)
392     : path_(path),
393       input_fd_(kInvalidFd),
394       output_fd_(kInvalidFd),
395       times_restarted_(0),
396       failed_to_start_(false),
397       reported_invalid_path_(false),
398       use_forkpty_(use_forkpty) {
399   CHECK(path_);
400   CHECK_NE(path_[0], '\0');
401 }
402 
IsSameModule(const char * path)403 static bool IsSameModule(const char* path) {
404   if (const char* ProcessName = GetProcessName()) {
405     if (const char* SymbolizerName = StripModuleName(path)) {
406       return !internal_strcmp(ProcessName, SymbolizerName);
407     }
408   }
409   return false;
410 }
411 
SendCommand(const char * command)412 const char *SymbolizerProcess::SendCommand(const char *command) {
413   if (failed_to_start_)
414     return nullptr;
415   if (IsSameModule(path_)) {
416     Report("WARNING: Symbolizer was blocked from starting itself!\n");
417     failed_to_start_ = true;
418     return nullptr;
419   }
420   for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) {
421     // Start or restart symbolizer if we failed to send command to it.
422     if (const char *res = SendCommandImpl(command))
423       return res;
424     Restart();
425   }
426   if (!failed_to_start_) {
427     Report("WARNING: Failed to use and restart external symbolizer!\n");
428     failed_to_start_ = true;
429   }
430   return 0;
431 }
432 
SendCommandImpl(const char * command)433 const char *SymbolizerProcess::SendCommandImpl(const char *command) {
434   if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd)
435       return 0;
436   if (!WriteToSymbolizer(command, internal_strlen(command)))
437       return 0;
438   if (!ReadFromSymbolizer(buffer_, kBufferSize))
439       return 0;
440   return buffer_;
441 }
442 
Restart()443 bool SymbolizerProcess::Restart() {
444   if (input_fd_ != kInvalidFd)
445     CloseFile(input_fd_);
446   if (output_fd_ != kInvalidFd)
447     CloseFile(output_fd_);
448   return StartSymbolizerSubprocess();
449 }
450 
ReadFromSymbolizer(char * buffer,uptr max_length)451 bool SymbolizerProcess::ReadFromSymbolizer(char *buffer, uptr max_length) {
452   if (max_length == 0)
453     return true;
454   uptr read_len = 0;
455   while (true) {
456     uptr just_read = 0;
457     bool success = ReadFromFile(input_fd_, buffer + read_len,
458                                 max_length - read_len - 1, &just_read);
459     // We can't read 0 bytes, as we don't expect external symbolizer to close
460     // its stdout.
461     if (!success || just_read == 0) {
462       Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
463       return false;
464     }
465     read_len += just_read;
466     if (ReachedEndOfOutput(buffer, read_len))
467       break;
468     if (read_len + 1 == max_length) {
469       Report("WARNING: Symbolizer buffer too small\n");
470       read_len = 0;
471       break;
472     }
473   }
474   buffer[read_len] = '\0';
475   return true;
476 }
477 
WriteToSymbolizer(const char * buffer,uptr length)478 bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) {
479   if (length == 0)
480     return true;
481   uptr write_len = 0;
482   bool success = WriteToFile(output_fd_, buffer, length, &write_len);
483   if (!success || write_len != length) {
484     Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
485     return false;
486   }
487   return true;
488 }
489 
490 #endif  // !SANITIZER_SYMBOLIZER_MARKUP
491 
492 }  // namespace __sanitizer
493