xref: /llvm-project/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp (revision 74eb079e06ae052feda28e63f4f63303efc01236)
1 //===-- IRDynamicChecks.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/IR/Constants.h"
10 #include "llvm/IR/DataLayout.h"
11 #include "llvm/IR/Function.h"
12 #include "llvm/IR/Instructions.h"
13 #include "llvm/IR/Module.h"
14 #include "llvm/IR/Value.h"
15 #include "llvm/Support/raw_ostream.h"
16 
17 #include "IRDynamicChecks.h"
18 
19 #include "lldb/Expression/UtilityFunction.h"
20 #include "lldb/Target/ExecutionContext.h"
21 #include "lldb/Target/Process.h"
22 #include "lldb/Target/StackFrame.h"
23 #include "lldb/Target/Target.h"
24 #include "lldb/Utility/ConstString.h"
25 #include "lldb/Utility/LLDBLog.h"
26 #include "lldb/Utility/Log.h"
27 
28 #include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
29 
30 using namespace llvm;
31 using namespace lldb_private;
32 
33 static char ID;
34 
35 #define VALID_POINTER_CHECK_NAME "_$__lldb_valid_pointer_check"
36 #define VALID_OBJC_OBJECT_CHECK_NAME "$__lldb_objc_object_check"
37 
38 static const char g_valid_pointer_check_text[] =
39     "extern \"C\" void\n"
40     "_$__lldb_valid_pointer_check (unsigned char *$__lldb_arg_ptr)\n"
41     "{\n"
42     "    unsigned char $__lldb_local_val = *$__lldb_arg_ptr;\n"
43     "}";
44 
45 ClangDynamicCheckerFunctions::ClangDynamicCheckerFunctions()
46     : DynamicCheckerFunctions(DCF_Clang) {}
47 
48 ClangDynamicCheckerFunctions::~ClangDynamicCheckerFunctions() = default;
49 
50 llvm::Error ClangDynamicCheckerFunctions::Install(
51     DiagnosticManager &diagnostic_manager, ExecutionContext &exe_ctx) {
52   Expected<std::unique_ptr<UtilityFunction>> utility_fn =
53       exe_ctx.GetTargetRef().CreateUtilityFunction(
54           g_valid_pointer_check_text, VALID_POINTER_CHECK_NAME,
55           lldb::eLanguageTypeC, exe_ctx);
56   if (!utility_fn)
57     return utility_fn.takeError();
58   m_valid_pointer_check = std::move(*utility_fn);
59 
60   if (Process *process = exe_ctx.GetProcessPtr()) {
61     ObjCLanguageRuntime *objc_language_runtime =
62         ObjCLanguageRuntime::Get(*process);
63 
64     if (objc_language_runtime) {
65       Expected<std::unique_ptr<UtilityFunction>> checker_fn =
66           objc_language_runtime->CreateObjectChecker(VALID_OBJC_OBJECT_CHECK_NAME, exe_ctx);
67       if (!checker_fn)
68         return checker_fn.takeError();
69       m_objc_object_check = std::move(*checker_fn);
70     }
71   }
72 
73   return Error::success();
74 }
75 
76 bool ClangDynamicCheckerFunctions::DoCheckersExplainStop(lldb::addr_t addr,
77                                                          Stream &message) {
78   // FIXME: We have to get the checkers to know why they scotched the call in
79   // more detail,
80   // so we can print a better message here.
81   if (m_valid_pointer_check && m_valid_pointer_check->ContainsAddress(addr)) {
82     message.Printf("Attempted to dereference an invalid pointer.");
83     return true;
84   } else if (m_objc_object_check &&
85              m_objc_object_check->ContainsAddress(addr)) {
86     message.Printf("Attempted to dereference an invalid ObjC Object or send it "
87                    "an unrecognized selector");
88     return true;
89   }
90   return false;
91 }
92 
93 static std::string PrintValue(llvm::Value *V, bool truncate = false) {
94   std::string s;
95   raw_string_ostream rso(s);
96   V->print(rso);
97   if (truncate)
98     s.resize(s.length() - 1);
99   return s;
100 }
101 
102 /// \class Instrumenter IRDynamicChecks.cpp
103 /// Finds and instruments individual LLVM IR instructions
104 ///
105 /// When instrumenting LLVM IR, it is frequently desirable to first search for
106 /// instructions, and then later modify them.  This way iterators remain
107 /// intact, and multiple passes can look at the same code base without
108 /// treading on each other's toes.
109 ///
110 /// The Instrumenter class implements this functionality.  A client first
111 /// calls Inspect on a function, which populates a list of instructions to be
112 /// instrumented.  Then, later, when all passes' Inspect functions have been
113 /// called, the client calls Instrument, which adds the desired
114 /// instrumentation.
115 ///
116 /// A subclass of Instrumenter must override InstrumentInstruction, which
117 /// is responsible for adding whatever instrumentation is necessary.
118 ///
119 /// A subclass of Instrumenter may override:
120 ///
121 /// - InspectInstruction [default: does nothing]
122 ///
123 /// - InspectBasicBlock [default: iterates through the instructions in a
124 ///   basic block calling InspectInstruction]
125 ///
126 /// - InspectFunction [default: iterates through the basic blocks in a
127 ///   function calling InspectBasicBlock]
128 class Instrumenter {
129 public:
130   /// Constructor
131   ///
132   /// \param[in] module
133   ///     The module being instrumented.
134   Instrumenter(llvm::Module &module,
135                std::shared_ptr<UtilityFunction> checker_function)
136       : m_module(module), m_checker_function(checker_function) {}
137 
138   virtual ~Instrumenter() = default;
139 
140   /// Inspect a function to find instructions to instrument
141   ///
142   /// \param[in] function
143   ///     The function to inspect.
144   ///
145   /// \return
146   ///     True on success; false on error.
147   bool Inspect(llvm::Function &function) { return InspectFunction(function); }
148 
149   /// Instrument all the instructions found by Inspect()
150   ///
151   /// \return
152   ///     True on success; false on error.
153   bool Instrument() {
154     for (InstIterator ii = m_to_instrument.begin(),
155                       last_ii = m_to_instrument.end();
156          ii != last_ii; ++ii) {
157       if (!InstrumentInstruction(*ii))
158         return false;
159     }
160 
161     return true;
162   }
163 
164 protected:
165   /// Add instrumentation to a single instruction
166   ///
167   /// \param[in] inst
168   ///     The instruction to be instrumented.
169   ///
170   /// \return
171   ///     True on success; false otherwise.
172   virtual bool InstrumentInstruction(llvm::Instruction *inst) = 0;
173 
174   /// Register a single instruction to be instrumented
175   ///
176   /// \param[in] inst
177   ///     The instruction to be instrumented.
178   void RegisterInstruction(llvm::Instruction &inst) {
179     m_to_instrument.push_back(&inst);
180   }
181 
182   /// Determine whether a single instruction is interesting to instrument,
183   /// and, if so, call RegisterInstruction
184   ///
185   /// \param[in] i
186   ///     The instruction to be inspected.
187   ///
188   /// \return
189   ///     False if there was an error scanning; true otherwise.
190   virtual bool InspectInstruction(llvm::Instruction &i) { return true; }
191 
192   /// Scan a basic block to see if any instructions are interesting
193   ///
194   /// \param[in] bb
195   ///     The basic block to be inspected.
196   ///
197   /// \return
198   ///     False if there was an error scanning; true otherwise.
199   virtual bool InspectBasicBlock(llvm::BasicBlock &bb) {
200     for (llvm::BasicBlock::iterator ii = bb.begin(), last_ii = bb.end();
201          ii != last_ii; ++ii) {
202       if (!InspectInstruction(*ii))
203         return false;
204     }
205 
206     return true;
207   }
208 
209   /// Scan a function to see if any instructions are interesting
210   ///
211   /// \param[in] f
212   ///     The function to be inspected.
213   ///
214   /// \return
215   ///     False if there was an error scanning; true otherwise.
216   virtual bool InspectFunction(llvm::Function &f) {
217     for (llvm::Function::iterator bbi = f.begin(), last_bbi = f.end();
218          bbi != last_bbi; ++bbi) {
219       if (!InspectBasicBlock(*bbi))
220         return false;
221     }
222 
223     return true;
224   }
225 
226   /// Build a function pointer for a function with signature void
227   /// (*)(uint8_t*) with a given address
228   ///
229   /// \param[in] start_address
230   ///     The address of the function.
231   ///
232   /// \return
233   ///     The function pointer, for use in a CallInst.
234   llvm::FunctionCallee BuildPointerValidatorFunc(lldb::addr_t start_address) {
235     llvm::Type *param_array[1];
236 
237     param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy());
238 
239     ArrayRef<llvm::Type *> params(param_array, 1);
240 
241     FunctionType *fun_ty = FunctionType::get(
242         llvm::Type::getVoidTy(m_module.getContext()), params, true);
243     PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty);
244     Constant *fun_addr_int =
245         ConstantInt::get(GetIntptrTy(), start_address, false);
246     return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)};
247   }
248 
249   /// Build a function pointer for a function with signature void
250   /// (*)(uint8_t*, uint8_t*) with a given address
251   ///
252   /// \param[in] start_address
253   ///     The address of the function.
254   ///
255   /// \return
256   ///     The function pointer, for use in a CallInst.
257   llvm::FunctionCallee BuildObjectCheckerFunc(lldb::addr_t start_address) {
258     llvm::Type *param_array[2];
259 
260     param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy());
261     param_array[1] = const_cast<llvm::PointerType *>(GetI8PtrTy());
262 
263     ArrayRef<llvm::Type *> params(param_array, 2);
264 
265     FunctionType *fun_ty = FunctionType::get(
266         llvm::Type::getVoidTy(m_module.getContext()), params, true);
267     PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty);
268     Constant *fun_addr_int =
269         ConstantInt::get(GetIntptrTy(), start_address, false);
270     return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)};
271   }
272 
273   PointerType *GetI8PtrTy() {
274     if (!m_i8ptr_ty)
275       m_i8ptr_ty = llvm::PointerType::getUnqual(m_module.getContext());
276 
277     return m_i8ptr_ty;
278   }
279 
280   IntegerType *GetIntptrTy() {
281     if (!m_intptr_ty) {
282       m_intptr_ty = llvm::Type::getIntNTy(
283           m_module.getContext(),
284           m_module.getDataLayout().getPointerSizeInBits());
285     }
286 
287     return m_intptr_ty;
288   }
289 
290   typedef std::vector<llvm::Instruction *> InstVector;
291   typedef InstVector::iterator InstIterator;
292 
293   InstVector m_to_instrument; ///< List of instructions the inspector found
294   llvm::Module &m_module;     ///< The module which is being instrumented
295   std::shared_ptr<UtilityFunction>
296       m_checker_function; ///< The dynamic checker function for the process
297 
298 private:
299   PointerType *m_i8ptr_ty = nullptr;
300   IntegerType *m_intptr_ty = nullptr;
301 };
302 
303 class ValidPointerChecker : public Instrumenter {
304 public:
305   ValidPointerChecker(llvm::Module &module,
306                       std::shared_ptr<UtilityFunction> checker_function)
307       : Instrumenter(module, checker_function),
308         m_valid_pointer_check_func(nullptr) {}
309 
310   ~ValidPointerChecker() override = default;
311 
312 protected:
313   bool InstrumentInstruction(llvm::Instruction *inst) override {
314     Log *log = GetLog(LLDBLog::Expressions);
315 
316     LLDB_LOGF(log, "Instrumenting load/store instruction: %s\n",
317               PrintValue(inst).c_str());
318 
319     if (!m_valid_pointer_check_func)
320       m_valid_pointer_check_func =
321           BuildPointerValidatorFunc(m_checker_function->StartAddress());
322 
323     llvm::Value *dereferenced_ptr = nullptr;
324 
325     if (llvm::LoadInst *li = dyn_cast<llvm::LoadInst>(inst))
326       dereferenced_ptr = li->getPointerOperand();
327     else if (llvm::StoreInst *si = dyn_cast<llvm::StoreInst>(inst))
328       dereferenced_ptr = si->getPointerOperand();
329     else
330       return false;
331 
332     // Insert an instruction to call the helper with the result
333     CallInst::Create(m_valid_pointer_check_func, dereferenced_ptr, "",
334                      inst->getIterator());
335 
336     return true;
337   }
338 
339   bool InspectInstruction(llvm::Instruction &i) override {
340     if (isa<llvm::LoadInst>(&i) || isa<llvm::StoreInst>(&i))
341       RegisterInstruction(i);
342 
343     return true;
344   }
345 
346 private:
347   llvm::FunctionCallee m_valid_pointer_check_func;
348 };
349 
350 class ObjcObjectChecker : public Instrumenter {
351 public:
352   ObjcObjectChecker(llvm::Module &module,
353                     std::shared_ptr<UtilityFunction> checker_function)
354       : Instrumenter(module, checker_function),
355         m_objc_object_check_func(nullptr) {}
356 
357   ~ObjcObjectChecker() override = default;
358 
359   enum msgSend_type {
360     eMsgSend = 0,
361     eMsgSendSuper,
362     eMsgSendSuper_stret,
363     eMsgSend_fpret,
364     eMsgSend_stret
365   };
366 
367   std::map<llvm::Instruction *, msgSend_type> msgSend_types;
368 
369 protected:
370   bool InstrumentInstruction(llvm::Instruction *inst) override {
371     CallInst *call_inst = dyn_cast<CallInst>(inst);
372 
373     if (!call_inst)
374       return false; // call_inst really shouldn't be nullptr, because otherwise
375                     // InspectInstruction wouldn't have registered it
376 
377     if (!m_objc_object_check_func)
378       m_objc_object_check_func =
379           BuildObjectCheckerFunc(m_checker_function->StartAddress());
380 
381     // id objc_msgSend(id theReceiver, SEL theSelector, ...)
382 
383     llvm::Value *target_object;
384     llvm::Value *selector;
385 
386     switch (msgSend_types[inst]) {
387     case eMsgSend:
388     case eMsgSend_fpret:
389       // On arm64, clang uses objc_msgSend for scalar and struct return
390       // calls.  The call instruction will record which was used.
391       if (call_inst->hasStructRetAttr()) {
392         target_object = call_inst->getArgOperand(1);
393         selector = call_inst->getArgOperand(2);
394       } else {
395         target_object = call_inst->getArgOperand(0);
396         selector = call_inst->getArgOperand(1);
397       }
398       break;
399     case eMsgSend_stret:
400       target_object = call_inst->getArgOperand(1);
401       selector = call_inst->getArgOperand(2);
402       break;
403     case eMsgSendSuper:
404     case eMsgSendSuper_stret:
405       return true;
406     }
407 
408     // These objects should always be valid according to Sean Calannan
409     assert(target_object);
410     assert(selector);
411 
412     // Insert an instruction to call the helper with the result
413 
414     llvm::Value *arg_array[2];
415 
416     arg_array[0] = target_object;
417     arg_array[1] = selector;
418 
419     ArrayRef<llvm::Value *> args(arg_array, 2);
420 
421     CallInst::Create(m_objc_object_check_func, args, "", inst->getIterator());
422 
423     return true;
424   }
425 
426   static llvm::Function *GetFunction(llvm::Value *value) {
427     if (llvm::Function *function = llvm::dyn_cast<llvm::Function>(value)) {
428       return function;
429     }
430 
431     if (llvm::ConstantExpr *const_expr =
432             llvm::dyn_cast<llvm::ConstantExpr>(value)) {
433       switch (const_expr->getOpcode()) {
434       default:
435         return nullptr;
436       case llvm::Instruction::BitCast:
437         return GetFunction(const_expr->getOperand(0));
438       }
439     }
440 
441     return nullptr;
442   }
443 
444   static llvm::Function *GetCalledFunction(llvm::CallInst *inst) {
445     return GetFunction(inst->getCalledOperand());
446   }
447 
448   bool InspectInstruction(llvm::Instruction &i) override {
449     Log *log = GetLog(LLDBLog::Expressions);
450 
451     CallInst *call_inst = dyn_cast<CallInst>(&i);
452 
453     if (call_inst) {
454       const llvm::Function *called_function = GetCalledFunction(call_inst);
455 
456       if (!called_function)
457         return true;
458 
459       std::string name_str = called_function->getName().str();
460       const char *name_cstr = name_str.c_str();
461 
462       LLDB_LOGF(log, "Found call to %s: %s\n", name_cstr,
463                 PrintValue(call_inst).c_str());
464 
465       if (name_str.find("objc_msgSend") == std::string::npos)
466         return true;
467 
468       if (!strcmp(name_cstr, "objc_msgSend")) {
469         RegisterInstruction(i);
470         msgSend_types[&i] = eMsgSend;
471         return true;
472       }
473 
474       if (!strcmp(name_cstr, "objc_msgSend_stret")) {
475         RegisterInstruction(i);
476         msgSend_types[&i] = eMsgSend_stret;
477         return true;
478       }
479 
480       if (!strcmp(name_cstr, "objc_msgSend_fpret")) {
481         RegisterInstruction(i);
482         msgSend_types[&i] = eMsgSend_fpret;
483         return true;
484       }
485 
486       if (!strcmp(name_cstr, "objc_msgSendSuper")) {
487         RegisterInstruction(i);
488         msgSend_types[&i] = eMsgSendSuper;
489         return true;
490       }
491 
492       if (!strcmp(name_cstr, "objc_msgSendSuper_stret")) {
493         RegisterInstruction(i);
494         msgSend_types[&i] = eMsgSendSuper_stret;
495         return true;
496       }
497 
498       LLDB_LOGF(log,
499                 "Function name '%s' contains 'objc_msgSend' but is not handled",
500                 name_str.c_str());
501 
502       return true;
503     }
504 
505     return true;
506   }
507 
508 private:
509   llvm::FunctionCallee m_objc_object_check_func;
510 };
511 
512 IRDynamicChecks::IRDynamicChecks(
513     ClangDynamicCheckerFunctions &checker_functions, const char *func_name)
514     : ModulePass(ID), m_func_name(func_name),
515       m_checker_functions(checker_functions) {}
516 
517 IRDynamicChecks::~IRDynamicChecks() = default;
518 
519 bool IRDynamicChecks::runOnModule(llvm::Module &M) {
520   Log *log = GetLog(LLDBLog::Expressions);
521 
522   llvm::Function *function = M.getFunction(StringRef(m_func_name));
523 
524   if (!function) {
525     LLDB_LOGF(log, "Couldn't find %s() in the module", m_func_name.c_str());
526 
527     return false;
528   }
529 
530   if (m_checker_functions.m_valid_pointer_check) {
531     ValidPointerChecker vpc(M, m_checker_functions.m_valid_pointer_check);
532 
533     if (!vpc.Inspect(*function))
534       return false;
535 
536     if (!vpc.Instrument())
537       return false;
538   }
539 
540   if (m_checker_functions.m_objc_object_check) {
541     ObjcObjectChecker ooc(M, m_checker_functions.m_objc_object_check);
542 
543     if (!ooc.Inspect(*function))
544       return false;
545 
546     if (!ooc.Instrument())
547       return false;
548   }
549 
550   if (log && log->GetVerbose()) {
551     std::string s;
552     raw_string_ostream oss(s);
553 
554     M.print(oss, nullptr);
555 
556     LLDB_LOGF(log, "Module after dynamic checks: \n%s", s.c_str());
557   }
558 
559   return true;
560 }
561 
562 void IRDynamicChecks::assignPassManager(PMStack &PMS, PassManagerType T) {}
563 
564 PassManagerType IRDynamicChecks::getPotentialPassManagerType() const {
565   return PMT_ModulePassManager;
566 }
567