xref: /freebsd-src/contrib/llvm-project/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1 //===-- IRDynamicChecks.cpp -------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/IR/Constants.h"
10 #include "llvm/IR/DataLayout.h"
11 #include "llvm/IR/Function.h"
12 #include "llvm/IR/Instructions.h"
13 #include "llvm/IR/Module.h"
14 #include "llvm/IR/Value.h"
15 #include "llvm/Support/raw_ostream.h"
16 
17 #include "IRDynamicChecks.h"
18 
19 #include "lldb/Expression/UtilityFunction.h"
20 #include "lldb/Target/ExecutionContext.h"
21 #include "lldb/Target/Process.h"
22 #include "lldb/Target/StackFrame.h"
23 #include "lldb/Target/Target.h"
24 #include "lldb/Utility/ConstString.h"
25 #include "lldb/Utility/Log.h"
26 
27 #include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
28 
29 using namespace llvm;
30 using namespace lldb_private;
31 
32 static char ID;
33 
34 #define VALID_POINTER_CHECK_NAME "_$__lldb_valid_pointer_check"
35 #define VALID_OBJC_OBJECT_CHECK_NAME "$__lldb_objc_object_check"
36 
37 static const char g_valid_pointer_check_text[] =
38     "extern \"C\" void\n"
39     "_$__lldb_valid_pointer_check (unsigned char *$__lldb_arg_ptr)\n"
40     "{\n"
41     "    unsigned char $__lldb_local_val = *$__lldb_arg_ptr;\n"
42     "}";
43 
44 ClangDynamicCheckerFunctions::ClangDynamicCheckerFunctions()
45     : DynamicCheckerFunctions(DCF_Clang) {}
46 
47 ClangDynamicCheckerFunctions::~ClangDynamicCheckerFunctions() = default;
48 
49 bool ClangDynamicCheckerFunctions::Install(
50     DiagnosticManager &diagnostic_manager, ExecutionContext &exe_ctx) {
51   Status error;
52   m_valid_pointer_check.reset(
53       exe_ctx.GetTargetRef().GetUtilityFunctionForLanguage(
54           g_valid_pointer_check_text, lldb::eLanguageTypeC,
55           VALID_POINTER_CHECK_NAME, error));
56   if (error.Fail())
57     return false;
58 
59   if (!m_valid_pointer_check->Install(diagnostic_manager, exe_ctx))
60     return false;
61 
62   Process *process = exe_ctx.GetProcessPtr();
63 
64   if (process) {
65     ObjCLanguageRuntime *objc_language_runtime =
66         ObjCLanguageRuntime::Get(*process);
67 
68     if (objc_language_runtime) {
69       m_objc_object_check.reset(objc_language_runtime->CreateObjectChecker(
70           VALID_OBJC_OBJECT_CHECK_NAME));
71 
72       if (!m_objc_object_check->Install(diagnostic_manager, exe_ctx))
73         return false;
74     }
75   }
76 
77   return true;
78 }
79 
80 bool ClangDynamicCheckerFunctions::DoCheckersExplainStop(lldb::addr_t addr,
81                                                          Stream &message) {
82   // FIXME: We have to get the checkers to know why they scotched the call in
83   // more detail,
84   // so we can print a better message here.
85   if (m_valid_pointer_check && m_valid_pointer_check->ContainsAddress(addr)) {
86     message.Printf("Attempted to dereference an invalid pointer.");
87     return true;
88   } else if (m_objc_object_check &&
89              m_objc_object_check->ContainsAddress(addr)) {
90     message.Printf("Attempted to dereference an invalid ObjC Object or send it "
91                    "an unrecognized selector");
92     return true;
93   }
94   return false;
95 }
96 
97 static std::string PrintValue(llvm::Value *V, bool truncate = false) {
98   std::string s;
99   raw_string_ostream rso(s);
100   V->print(rso);
101   rso.flush();
102   if (truncate)
103     s.resize(s.length() - 1);
104   return s;
105 }
106 
107 /// \class Instrumenter IRDynamicChecks.cpp
108 /// Finds and instruments individual LLVM IR instructions
109 ///
110 /// When instrumenting LLVM IR, it is frequently desirable to first search for
111 /// instructions, and then later modify them.  This way iterators remain
112 /// intact, and multiple passes can look at the same code base without
113 /// treading on each other's toes.
114 ///
115 /// The Instrumenter class implements this functionality.  A client first
116 /// calls Inspect on a function, which populates a list of instructions to be
117 /// instrumented.  Then, later, when all passes' Inspect functions have been
118 /// called, the client calls Instrument, which adds the desired
119 /// instrumentation.
120 ///
121 /// A subclass of Instrumenter must override InstrumentInstruction, which
122 /// is responsible for adding whatever instrumentation is necessary.
123 ///
124 /// A subclass of Instrumenter may override:
125 ///
126 /// - InspectInstruction [default: does nothing]
127 ///
128 /// - InspectBasicBlock [default: iterates through the instructions in a
129 ///   basic block calling InspectInstruction]
130 ///
131 /// - InspectFunction [default: iterates through the basic blocks in a
132 ///   function calling InspectBasicBlock]
133 class Instrumenter {
134 public:
135   /// Constructor
136   ///
137   /// \param[in] module
138   ///     The module being instrumented.
139   Instrumenter(llvm::Module &module,
140                std::shared_ptr<UtilityFunction> checker_function)
141       : m_module(module), m_checker_function(checker_function),
142         m_i8ptr_ty(nullptr), m_intptr_ty(nullptr) {}
143 
144   virtual ~Instrumenter() = default;
145 
146   /// Inspect a function to find instructions to instrument
147   ///
148   /// \param[in] function
149   ///     The function to inspect.
150   ///
151   /// \return
152   ///     True on success; false on error.
153   bool Inspect(llvm::Function &function) { return InspectFunction(function); }
154 
155   /// Instrument all the instructions found by Inspect()
156   ///
157   /// \return
158   ///     True on success; false on error.
159   bool Instrument() {
160     for (InstIterator ii = m_to_instrument.begin(),
161                       last_ii = m_to_instrument.end();
162          ii != last_ii; ++ii) {
163       if (!InstrumentInstruction(*ii))
164         return false;
165     }
166 
167     return true;
168   }
169 
170 protected:
171   /// Add instrumentation to a single instruction
172   ///
173   /// \param[in] inst
174   ///     The instruction to be instrumented.
175   ///
176   /// \return
177   ///     True on success; false otherwise.
178   virtual bool InstrumentInstruction(llvm::Instruction *inst) = 0;
179 
180   /// Register a single instruction to be instrumented
181   ///
182   /// \param[in] inst
183   ///     The instruction to be instrumented.
184   void RegisterInstruction(llvm::Instruction &i) {
185     m_to_instrument.push_back(&i);
186   }
187 
188   /// Determine whether a single instruction is interesting to instrument,
189   /// and, if so, call RegisterInstruction
190   ///
191   /// \param[in] i
192   ///     The instruction to be inspected.
193   ///
194   /// \return
195   ///     False if there was an error scanning; true otherwise.
196   virtual bool InspectInstruction(llvm::Instruction &i) { return true; }
197 
198   /// Scan a basic block to see if any instructions are interesting
199   ///
200   /// \param[in] bb
201   ///     The basic block to be inspected.
202   ///
203   /// \return
204   ///     False if there was an error scanning; true otherwise.
205   virtual bool InspectBasicBlock(llvm::BasicBlock &bb) {
206     for (llvm::BasicBlock::iterator ii = bb.begin(), last_ii = bb.end();
207          ii != last_ii; ++ii) {
208       if (!InspectInstruction(*ii))
209         return false;
210     }
211 
212     return true;
213   }
214 
215   /// Scan a function to see if any instructions are interesting
216   ///
217   /// \param[in] f
218   ///     The function to be inspected.
219   ///
220   /// \return
221   ///     False if there was an error scanning; true otherwise.
222   virtual bool InspectFunction(llvm::Function &f) {
223     for (llvm::Function::iterator bbi = f.begin(), last_bbi = f.end();
224          bbi != last_bbi; ++bbi) {
225       if (!InspectBasicBlock(*bbi))
226         return false;
227     }
228 
229     return true;
230   }
231 
232   /// Build a function pointer for a function with signature void
233   /// (*)(uint8_t*) with a given address
234   ///
235   /// \param[in] start_address
236   ///     The address of the function.
237   ///
238   /// \return
239   ///     The function pointer, for use in a CallInst.
240   llvm::FunctionCallee BuildPointerValidatorFunc(lldb::addr_t start_address) {
241     llvm::Type *param_array[1];
242 
243     param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy());
244 
245     ArrayRef<llvm::Type *> params(param_array, 1);
246 
247     FunctionType *fun_ty = FunctionType::get(
248         llvm::Type::getVoidTy(m_module.getContext()), params, true);
249     PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty);
250     Constant *fun_addr_int =
251         ConstantInt::get(GetIntptrTy(), start_address, false);
252     return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)};
253   }
254 
255   /// Build a function pointer for a function with signature void
256   /// (*)(uint8_t*, uint8_t*) with a given address
257   ///
258   /// \param[in] start_address
259   ///     The address of the function.
260   ///
261   /// \return
262   ///     The function pointer, for use in a CallInst.
263   llvm::FunctionCallee BuildObjectCheckerFunc(lldb::addr_t start_address) {
264     llvm::Type *param_array[2];
265 
266     param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy());
267     param_array[1] = const_cast<llvm::PointerType *>(GetI8PtrTy());
268 
269     ArrayRef<llvm::Type *> params(param_array, 2);
270 
271     FunctionType *fun_ty = FunctionType::get(
272         llvm::Type::getVoidTy(m_module.getContext()), params, true);
273     PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty);
274     Constant *fun_addr_int =
275         ConstantInt::get(GetIntptrTy(), start_address, false);
276     return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)};
277   }
278 
279   PointerType *GetI8PtrTy() {
280     if (!m_i8ptr_ty)
281       m_i8ptr_ty = llvm::Type::getInt8PtrTy(m_module.getContext());
282 
283     return m_i8ptr_ty;
284   }
285 
286   IntegerType *GetIntptrTy() {
287     if (!m_intptr_ty) {
288       llvm::DataLayout data_layout(&m_module);
289 
290       m_intptr_ty = llvm::Type::getIntNTy(m_module.getContext(),
291                                           data_layout.getPointerSizeInBits());
292     }
293 
294     return m_intptr_ty;
295   }
296 
297   typedef std::vector<llvm::Instruction *> InstVector;
298   typedef InstVector::iterator InstIterator;
299 
300   InstVector m_to_instrument; ///< List of instructions the inspector found
301   llvm::Module &m_module;     ///< The module which is being instrumented
302   std::shared_ptr<UtilityFunction>
303       m_checker_function; ///< The dynamic checker function for the process
304 
305 private:
306   PointerType *m_i8ptr_ty;
307   IntegerType *m_intptr_ty;
308 };
309 
310 class ValidPointerChecker : public Instrumenter {
311 public:
312   ValidPointerChecker(llvm::Module &module,
313                       std::shared_ptr<UtilityFunction> checker_function)
314       : Instrumenter(module, checker_function),
315         m_valid_pointer_check_func(nullptr) {}
316 
317   ~ValidPointerChecker() override = default;
318 
319 protected:
320   bool InstrumentInstruction(llvm::Instruction *inst) override {
321     Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS));
322 
323     if (log)
324       log->Printf("Instrumenting load/store instruction: %s\n",
325                   PrintValue(inst).c_str());
326 
327     if (!m_valid_pointer_check_func)
328       m_valid_pointer_check_func =
329           BuildPointerValidatorFunc(m_checker_function->StartAddress());
330 
331     llvm::Value *dereferenced_ptr = nullptr;
332 
333     if (llvm::LoadInst *li = dyn_cast<llvm::LoadInst>(inst))
334       dereferenced_ptr = li->getPointerOperand();
335     else if (llvm::StoreInst *si = dyn_cast<llvm::StoreInst>(inst))
336       dereferenced_ptr = si->getPointerOperand();
337     else
338       return false;
339 
340     // Insert an instruction to cast the loaded value to int8_t*
341 
342     BitCastInst *bit_cast =
343         new BitCastInst(dereferenced_ptr, GetI8PtrTy(), "", inst);
344 
345     // Insert an instruction to call the helper with the result
346 
347     llvm::Value *arg_array[1];
348 
349     arg_array[0] = bit_cast;
350 
351     llvm::ArrayRef<llvm::Value *> args(arg_array, 1);
352 
353     CallInst::Create(m_valid_pointer_check_func, args, "", inst);
354 
355     return true;
356   }
357 
358   bool InspectInstruction(llvm::Instruction &i) override {
359     if (dyn_cast<llvm::LoadInst>(&i) || dyn_cast<llvm::StoreInst>(&i))
360       RegisterInstruction(i);
361 
362     return true;
363   }
364 
365 private:
366   llvm::FunctionCallee m_valid_pointer_check_func;
367 };
368 
369 class ObjcObjectChecker : public Instrumenter {
370 public:
371   ObjcObjectChecker(llvm::Module &module,
372                     std::shared_ptr<UtilityFunction> checker_function)
373       : Instrumenter(module, checker_function),
374         m_objc_object_check_func(nullptr) {}
375 
376   ~ObjcObjectChecker() override = default;
377 
378   enum msgSend_type {
379     eMsgSend = 0,
380     eMsgSendSuper,
381     eMsgSendSuper_stret,
382     eMsgSend_fpret,
383     eMsgSend_stret
384   };
385 
386   std::map<llvm::Instruction *, msgSend_type> msgSend_types;
387 
388 protected:
389   bool InstrumentInstruction(llvm::Instruction *inst) override {
390     CallInst *call_inst = dyn_cast<CallInst>(inst);
391 
392     if (!call_inst)
393       return false; // call_inst really shouldn't be nullptr, because otherwise
394                     // InspectInstruction wouldn't have registered it
395 
396     if (!m_objc_object_check_func)
397       m_objc_object_check_func =
398           BuildObjectCheckerFunc(m_checker_function->StartAddress());
399 
400     // id objc_msgSend(id theReceiver, SEL theSelector, ...)
401 
402     llvm::Value *target_object;
403     llvm::Value *selector;
404 
405     switch (msgSend_types[inst]) {
406     case eMsgSend:
407     case eMsgSend_fpret:
408       // On arm64, clang uses objc_msgSend for scalar and struct return
409       // calls.  The call instruction will record which was used.
410       if (call_inst->hasStructRetAttr()) {
411         target_object = call_inst->getArgOperand(1);
412         selector = call_inst->getArgOperand(2);
413       } else {
414         target_object = call_inst->getArgOperand(0);
415         selector = call_inst->getArgOperand(1);
416       }
417       break;
418     case eMsgSend_stret:
419       target_object = call_inst->getArgOperand(1);
420       selector = call_inst->getArgOperand(2);
421       break;
422     case eMsgSendSuper:
423     case eMsgSendSuper_stret:
424       return true;
425     }
426 
427     // These objects should always be valid according to Sean Calannan
428     assert(target_object);
429     assert(selector);
430 
431     // Insert an instruction to cast the receiver id to int8_t*
432 
433     BitCastInst *bit_cast =
434         new BitCastInst(target_object, GetI8PtrTy(), "", inst);
435 
436     // Insert an instruction to call the helper with the result
437 
438     llvm::Value *arg_array[2];
439 
440     arg_array[0] = bit_cast;
441     arg_array[1] = selector;
442 
443     ArrayRef<llvm::Value *> args(arg_array, 2);
444 
445     CallInst::Create(m_objc_object_check_func, args, "", inst);
446 
447     return true;
448   }
449 
450   static llvm::Function *GetFunction(llvm::Value *value) {
451     if (llvm::Function *function = llvm::dyn_cast<llvm::Function>(value)) {
452       return function;
453     }
454 
455     if (llvm::ConstantExpr *const_expr =
456             llvm::dyn_cast<llvm::ConstantExpr>(value)) {
457       switch (const_expr->getOpcode()) {
458       default:
459         return nullptr;
460       case llvm::Instruction::BitCast:
461         return GetFunction(const_expr->getOperand(0));
462       }
463     }
464 
465     return nullptr;
466   }
467 
468   static llvm::Function *GetCalledFunction(llvm::CallInst *inst) {
469     return GetFunction(inst->getCalledValue());
470   }
471 
472   bool InspectInstruction(llvm::Instruction &i) override {
473     Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS));
474 
475     CallInst *call_inst = dyn_cast<CallInst>(&i);
476 
477     if (call_inst) {
478       const llvm::Function *called_function = GetCalledFunction(call_inst);
479 
480       if (!called_function)
481         return true;
482 
483       std::string name_str = called_function->getName().str();
484       const char *name_cstr = name_str.c_str();
485 
486       if (log)
487         log->Printf("Found call to %s: %s\n", name_cstr,
488                     PrintValue(call_inst).c_str());
489 
490       if (name_str.find("objc_msgSend") == std::string::npos)
491         return true;
492 
493       if (!strcmp(name_cstr, "objc_msgSend")) {
494         RegisterInstruction(i);
495         msgSend_types[&i] = eMsgSend;
496         return true;
497       }
498 
499       if (!strcmp(name_cstr, "objc_msgSend_stret")) {
500         RegisterInstruction(i);
501         msgSend_types[&i] = eMsgSend_stret;
502         return true;
503       }
504 
505       if (!strcmp(name_cstr, "objc_msgSend_fpret")) {
506         RegisterInstruction(i);
507         msgSend_types[&i] = eMsgSend_fpret;
508         return true;
509       }
510 
511       if (!strcmp(name_cstr, "objc_msgSendSuper")) {
512         RegisterInstruction(i);
513         msgSend_types[&i] = eMsgSendSuper;
514         return true;
515       }
516 
517       if (!strcmp(name_cstr, "objc_msgSendSuper_stret")) {
518         RegisterInstruction(i);
519         msgSend_types[&i] = eMsgSendSuper_stret;
520         return true;
521       }
522 
523       if (log)
524         log->Printf(
525             "Function name '%s' contains 'objc_msgSend' but is not handled",
526             name_str.c_str());
527 
528       return true;
529     }
530 
531     return true;
532   }
533 
534 private:
535   llvm::FunctionCallee m_objc_object_check_func;
536 };
537 
538 IRDynamicChecks::IRDynamicChecks(
539     ClangDynamicCheckerFunctions &checker_functions, const char *func_name)
540     : ModulePass(ID), m_func_name(func_name),
541       m_checker_functions(checker_functions) {}
542 
543 IRDynamicChecks::~IRDynamicChecks() = default;
544 
545 bool IRDynamicChecks::runOnModule(llvm::Module &M) {
546   Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS));
547 
548   llvm::Function *function = M.getFunction(StringRef(m_func_name));
549 
550   if (!function) {
551     if (log)
552       log->Printf("Couldn't find %s() in the module", m_func_name.c_str());
553 
554     return false;
555   }
556 
557   if (m_checker_functions.m_valid_pointer_check) {
558     ValidPointerChecker vpc(M, m_checker_functions.m_valid_pointer_check);
559 
560     if (!vpc.Inspect(*function))
561       return false;
562 
563     if (!vpc.Instrument())
564       return false;
565   }
566 
567   if (m_checker_functions.m_objc_object_check) {
568     ObjcObjectChecker ooc(M, m_checker_functions.m_objc_object_check);
569 
570     if (!ooc.Inspect(*function))
571       return false;
572 
573     if (!ooc.Instrument())
574       return false;
575   }
576 
577   if (log && log->GetVerbose()) {
578     std::string s;
579     raw_string_ostream oss(s);
580 
581     M.print(oss, nullptr);
582 
583     oss.flush();
584 
585     log->Printf("Module after dynamic checks: \n%s", s.c_str());
586   }
587 
588   return true;
589 }
590 
591 void IRDynamicChecks::assignPassManager(PMStack &PMS, PassManagerType T) {}
592 
593 PassManagerType IRDynamicChecks::getPotentialPassManagerType() const {
594   return PMT_ModulePassManager;
595 }
596