master
  1//===-- sanitizer_symbolizer_internal.h -------------------------*- C++ -*-===//
  2//
  3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4// See https://llvm.org/LICENSE.txt for license information.
  5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6//
  7//===----------------------------------------------------------------------===//
  8//
  9// Header for internal classes and functions to be used by implementations of
 10// symbolizers.
 11//
 12//===----------------------------------------------------------------------===//
 13#ifndef SANITIZER_SYMBOLIZER_INTERNAL_H
 14#define SANITIZER_SYMBOLIZER_INTERNAL_H
 15
 16#include "sanitizer_file.h"
 17#include "sanitizer_symbolizer.h"
 18#include "sanitizer_vector.h"
 19
 20namespace __sanitizer {
 21
 22// Parsing helpers, 'str' is searched for delimiter(s) and a string or uptr
 23// is extracted. When extracting a string, a newly allocated (using
 24// InternalAlloc) and null-terminated buffer is returned. They return a pointer
 25// to the next characted after the found delimiter.
 26const char *ExtractToken(const char *str, const char *delims, char **result);
 27const char *ExtractInt(const char *str, const char *delims, int *result);
 28const char *ExtractUptr(const char *str, const char *delims, uptr *result);
 29const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter,
 30                                      char **result);
 31
 32const char *DemangleSwiftAndCXX(const char *name);
 33
 34// SymbolizerTool is an interface that is implemented by individual "tools"
 35// that can perform symbolication (external llvm-symbolizer, libbacktrace,
 36// Windows DbgHelp symbolizer, etc.).
 37class SymbolizerTool {
 38 public:
 39  // The main |Symbolizer| class implements a "fallback chain" of symbolizer
 40  // tools. In a request to symbolize an address, if one tool returns false,
 41  // the next tool in the chain will be tried.
 42  SymbolizerTool *next;
 43
 44  SymbolizerTool() : next(nullptr) { }
 45
 46  // Can't declare pure virtual functions in sanitizer runtimes:
 47  // __cxa_pure_virtual might be unavailable.
 48
 49  // The |stack| parameter is inout. It is pre-filled with the address,
 50  // module base and module offset values and is to be used to construct
 51  // other stack frames.
 52  virtual bool SymbolizePC(uptr addr, SymbolizedStack *stack) {
 53    UNIMPLEMENTED();
 54  }
 55
 56  // The |info| parameter is inout. It is pre-filled with the module base
 57  // and module offset values.
 58  virtual bool SymbolizeData(uptr addr, DataInfo *info) {
 59    UNIMPLEMENTED();
 60  }
 61
 62  virtual bool SymbolizeFrame(uptr addr, FrameInfo *info) {
 63    return false;
 64  }
 65
 66  virtual void Flush() {}
 67
 68  // Return nullptr to fallback to the default platform-specific demangler.
 69  virtual const char *Demangle(const char *name) {
 70    return nullptr;
 71  }
 72
 73 protected:
 74  ~SymbolizerTool() {}
 75};
 76
 77// SymbolizerProcess encapsulates communication between the tool and
 78// external symbolizer program, running in a different subprocess.
 79// SymbolizerProcess may not be used from two threads simultaneously.
 80class SymbolizerProcess {
 81 public:
 82  explicit SymbolizerProcess(const char *path, bool use_posix_spawn = false);
 83  const char *SendCommand(const char *command);
 84
 85 protected:
 86  ~SymbolizerProcess() {}
 87
 88  /// The maximum number of arguments required to invoke a tool process.
 89  static const unsigned kArgVMax = 16;
 90
 91  // Customizable by subclasses.
 92  virtual bool StartSymbolizerSubprocess();
 93  virtual bool ReadFromSymbolizer();
 94  // Return the environment to run the symbolizer in.
 95  virtual char **GetEnvP() { return GetEnviron(); }
 96  InternalMmapVector<char> &GetBuff() { return buffer_; }
 97
 98 private:
 99  virtual bool ReachedEndOfOutput(const char *buffer, uptr length) const {
100    UNIMPLEMENTED();
101  }
102
103  /// Fill in an argv array to invoke the child process.
104  virtual void GetArgV(const char *path_to_binary,
105                       const char *(&argv)[kArgVMax]) const {
106    UNIMPLEMENTED();
107  }
108
109  bool Restart();
110  const char *SendCommandImpl(const char *command);
111  bool WriteToSymbolizer(const char *buffer, uptr length);
112
113  const char *path_;
114  fd_t input_fd_;
115  fd_t output_fd_;
116
117  InternalMmapVector<char> buffer_;
118
119  static const uptr kMaxTimesRestarted = 5;
120  static const int kSymbolizerStartupTimeMillis = 10;
121  uptr times_restarted_;
122  bool failed_to_start_;
123  bool reported_invalid_path_;
124  bool use_posix_spawn_;
125};
126
127class LLVMSymbolizerProcess;
128
129// This tool invokes llvm-symbolizer in a subprocess. It should be as portable
130// as the llvm-symbolizer tool is.
131class LLVMSymbolizer final : public SymbolizerTool {
132 public:
133  explicit LLVMSymbolizer(const char *path, LowLevelAllocator *allocator);
134
135  bool SymbolizePC(uptr addr, SymbolizedStack *stack) override;
136  bool SymbolizeData(uptr addr, DataInfo *info) override;
137  bool SymbolizeFrame(uptr addr, FrameInfo *info) override;
138
139 private:
140  const char *FormatAndSendCommand(const char *command_prefix,
141                                   const char *module_name, uptr module_offset,
142                                   ModuleArch arch);
143
144  LLVMSymbolizerProcess *symbolizer_process_;
145  static const uptr kBufferSize = 16 * 1024;
146  char buffer_[kBufferSize];
147};
148
149// Parses one or more two-line strings in the following format:
150//   <function_name>
151//   <file_name>:<line_number>[:<column_number>]
152// Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
153// them use the same output format.  Returns true if any useful debug
154// information was found.
155void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res);
156
157// Parses a two-line string in the following format:
158//   <symbol_name>
159//   <start_address> <size>
160// Used by LLVMSymbolizer and InternalSymbolizer.
161void ParseSymbolizeDataOutput(const char *str, DataInfo *info);
162
163// Parses repeated strings in the following format:
164//   <function_name>
165//   <var_name>
166//   <file_name>:<line_number>[:<column_number>]
167//   [<frame_offset>|??] [<size>|??] [<tag_offset>|??]
168// Used by LLVMSymbolizer and InternalSymbolizer.
169void ParseSymbolizeFrameOutput(const char *str,
170                               InternalMmapVector<LocalInfo> *locals);
171
172}  // namespace __sanitizer
173
174#endif  // SANITIZER_SYMBOLIZER_INTERNAL_H