1 //===----- UninitializedObjectChecker.cpp ------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a checker that reports uninitialized fields in objects
10 // created after a constructor call.
11 //
12 // To read about command line options and how the checker works, refer to the
13 // top of the file and inline comments in UninitializedObject.h.
14 //
15 // Some of the logic is implemented in UninitializedPointee.cpp, to reduce the
16 // complexity of this file.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
21 #include "UninitializedObject.h"
22 #include "clang/ASTMatchers/ASTMatchFinder.h"
23 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
24 #include "clang/StaticAnalyzer/Core/Checker.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
27 
28 using namespace clang;
29 using namespace clang::ento;
30 using namespace clang::ast_matchers;
31 
32 /// We'll mark fields (and pointee of fields) that are confirmed to be
33 /// uninitialized as already analyzed.
34 REGISTER_SET_WITH_PROGRAMSTATE(AnalyzedRegions, const MemRegion *)
35 
36 namespace {
37 
38 class UninitializedObjectChecker
39     : public Checker<check::EndFunction, check::DeadSymbols> {
40   std::unique_ptr<BuiltinBug> BT_uninitField;
41 
42 public:
43   // The fields of this struct will be initialized when registering the checker.
44   UninitObjCheckerOptions Opts;
45 
46   UninitializedObjectChecker()
47       : BT_uninitField(new BuiltinBug(this, "Uninitialized fields")) {}
48 
49   void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const;
50   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
51 };
52 
53 /// A basic field type, that is not a pointer or a reference, it's dynamic and
54 /// static type is the same.
55 class RegularField final : public FieldNode {
56 public:
57   RegularField(const FieldRegion *FR) : FieldNode(FR) {}
58 
59   virtual void printNoteMsg(llvm::raw_ostream &Out) const override {
60     Out << "uninitialized field ";
61   }
62 
63   virtual void printPrefix(llvm::raw_ostream &Out) const override {}
64 
65   virtual void printNode(llvm::raw_ostream &Out) const override {
66     Out << getVariableName(getDecl());
67   }
68 
69   virtual void printSeparator(llvm::raw_ostream &Out) const override {
70     Out << '.';
71   }
72 };
73 
74 /// Represents that the FieldNode that comes after this is declared in a base
75 /// of the previous FieldNode. As such, this descendant doesn't wrap a
76 /// FieldRegion, and is purely a tool to describe a relation between two other
77 /// FieldRegion wrapping descendants.
78 class BaseClass final : public FieldNode {
79   const QualType BaseClassT;
80 
81 public:
82   BaseClass(const QualType &T) : FieldNode(nullptr), BaseClassT(T) {
83     assert(!T.isNull());
84     assert(T->getAsCXXRecordDecl());
85   }
86 
87   virtual void printNoteMsg(llvm::raw_ostream &Out) const override {
88     llvm_unreachable("This node can never be the final node in the "
89                      "fieldchain!");
90   }
91 
92   virtual void printPrefix(llvm::raw_ostream &Out) const override {}
93 
94   virtual void printNode(llvm::raw_ostream &Out) const override {
95     Out << BaseClassT->getAsCXXRecordDecl()->getName() << "::";
96   }
97 
98   virtual void printSeparator(llvm::raw_ostream &Out) const override {}
99 
100   virtual bool isBase() const override { return true; }
101 };
102 
103 } // end of anonymous namespace
104 
105 // Utility function declarations.
106 
107 /// Returns the region that was constructed by CtorDecl, or nullptr if that
108 /// isn't possible.
109 static const TypedValueRegion *
110 getConstructedRegion(const CXXConstructorDecl *CtorDecl,
111                      CheckerContext &Context);
112 
113 /// Checks whether the object constructed by \p Ctor will be analyzed later
114 /// (e.g. if the object is a field of another object, in which case we'd check
115 /// it multiple times).
116 static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
117                                       CheckerContext &Context);
118 
119 /// Checks whether RD contains a field with a name or type name that matches
120 /// \p Pattern.
121 static bool shouldIgnoreRecord(const RecordDecl *RD, StringRef Pattern);
122 
123 /// Checks _syntactically_ whether it is possible to access FD from the record
124 /// that contains it without a preceding assert (even if that access happens
125 /// inside a method). This is mainly used for records that act like unions, like
126 /// having multiple bit fields, with only a fraction being properly initialized.
127 /// If these fields are properly guarded with asserts, this method returns
128 /// false.
129 ///
130 /// Since this check is done syntactically, this method could be inaccurate.
131 static bool hasUnguardedAccess(const FieldDecl *FD, ProgramStateRef State);
132 
133 //===----------------------------------------------------------------------===//
134 //                  Methods for UninitializedObjectChecker.
135 //===----------------------------------------------------------------------===//
136 
137 void UninitializedObjectChecker::checkEndFunction(
138     const ReturnStmt *RS, CheckerContext &Context) const {
139 
140   const auto *CtorDecl = dyn_cast_or_null<CXXConstructorDecl>(
141       Context.getLocationContext()->getDecl());
142   if (!CtorDecl)
143     return;
144 
145   if (!CtorDecl->isUserProvided())
146     return;
147 
148   if (CtorDecl->getParent()->isUnion())
149     return;
150 
151   // This avoids essentially the same error being reported multiple times.
152   if (willObjectBeAnalyzedLater(CtorDecl, Context))
153     return;
154 
155   const TypedValueRegion *R = getConstructedRegion(CtorDecl, Context);
156   if (!R)
157     return;
158 
159   FindUninitializedFields F(Context.getState(), R, Opts);
160 
161   std::pair<ProgramStateRef, const UninitFieldMap &> UninitInfo =
162       F.getResults();
163 
164   ProgramStateRef UpdatedState = UninitInfo.first;
165   const UninitFieldMap &UninitFields = UninitInfo.second;
166 
167   if (UninitFields.empty()) {
168     Context.addTransition(UpdatedState);
169     return;
170   }
171 
172   // There are uninitialized fields in the record.
173 
174   ExplodedNode *Node = Context.generateNonFatalErrorNode(UpdatedState);
175   if (!Node)
176     return;
177 
178   PathDiagnosticLocation LocUsedForUniqueing;
179   const Stmt *CallSite = Context.getStackFrame()->getCallSite();
180   if (CallSite)
181     LocUsedForUniqueing = PathDiagnosticLocation::createBegin(
182         CallSite, Context.getSourceManager(), Node->getLocationContext());
183 
184   // For Plist consumers that don't support notes just yet, we'll convert notes
185   // to warnings.
186   if (Opts.ShouldConvertNotesToWarnings) {
187     for (const auto &Pair : UninitFields) {
188 
189       auto Report = llvm::make_unique<BugReport>(
190           *BT_uninitField, Pair.second, Node, LocUsedForUniqueing,
191           Node->getLocationContext()->getDecl());
192       Context.emitReport(std::move(Report));
193     }
194     return;
195   }
196 
197   SmallString<100> WarningBuf;
198   llvm::raw_svector_ostream WarningOS(WarningBuf);
199   WarningOS << UninitFields.size() << " uninitialized field"
200             << (UninitFields.size() == 1 ? "" : "s")
201             << " at the end of the constructor call";
202 
203   auto Report = llvm::make_unique<BugReport>(
204       *BT_uninitField, WarningOS.str(), Node, LocUsedForUniqueing,
205       Node->getLocationContext()->getDecl());
206 
207   for (const auto &Pair : UninitFields) {
208     Report->addNote(Pair.second,
209                     PathDiagnosticLocation::create(Pair.first->getDecl(),
210                                                    Context.getSourceManager()));
211   }
212   Context.emitReport(std::move(Report));
213 }
214 
215 void UninitializedObjectChecker::checkDeadSymbols(SymbolReaper &SR,
216                                                   CheckerContext &C) const {
217   ProgramStateRef State = C.getState();
218   for (const MemRegion *R : State->get<AnalyzedRegions>()) {
219     if (!SR.isLiveRegion(R))
220       State = State->remove<AnalyzedRegions>(R);
221   }
222 }
223 
224 //===----------------------------------------------------------------------===//
225 //                   Methods for FindUninitializedFields.
226 //===----------------------------------------------------------------------===//
227 
228 FindUninitializedFields::FindUninitializedFields(
229     ProgramStateRef State, const TypedValueRegion *const R,
230     const UninitObjCheckerOptions &Opts)
231     : State(State), ObjectR(R), Opts(Opts) {
232 
233   isNonUnionUninit(ObjectR, FieldChainInfo(ChainFactory));
234 
235   // In non-pedantic mode, if ObjectR doesn't contain a single initialized
236   // field, we'll assume that Object was intentionally left uninitialized.
237   if (!Opts.IsPedantic && !isAnyFieldInitialized())
238     UninitFields.clear();
239 }
240 
241 bool FindUninitializedFields::addFieldToUninits(FieldChainInfo Chain,
242                                                 const MemRegion *PointeeR) {
243   const FieldRegion *FR = Chain.getUninitRegion();
244 
245   assert((PointeeR || !isDereferencableType(FR->getDecl()->getType())) &&
246          "One must also pass the pointee region as a parameter for "
247          "dereferenceable fields!");
248 
249   if (State->getStateManager().getContext().getSourceManager().isInSystemHeader(
250           FR->getDecl()->getLocation()))
251     return false;
252 
253   if (Opts.IgnoreGuardedFields && !hasUnguardedAccess(FR->getDecl(), State))
254     return false;
255 
256   if (State->contains<AnalyzedRegions>(FR))
257     return false;
258 
259   if (PointeeR) {
260     if (State->contains<AnalyzedRegions>(PointeeR)) {
261       return false;
262     }
263     State = State->add<AnalyzedRegions>(PointeeR);
264   }
265 
266   State = State->add<AnalyzedRegions>(FR);
267 
268   UninitFieldMap::mapped_type NoteMsgBuf;
269   llvm::raw_svector_ostream OS(NoteMsgBuf);
270   Chain.printNoteMsg(OS);
271 
272   return UninitFields.insert({FR, std::move(NoteMsgBuf)}).second;
273 }
274 
275 bool FindUninitializedFields::isNonUnionUninit(const TypedValueRegion *R,
276                                                FieldChainInfo LocalChain) {
277   assert(R->getValueType()->isRecordType() &&
278          !R->getValueType()->isUnionType() &&
279          "This method only checks non-union record objects!");
280 
281   const RecordDecl *RD = R->getValueType()->getAsRecordDecl()->getDefinition();
282 
283   if (!RD) {
284     IsAnyFieldInitialized = true;
285     return true;
286   }
287 
288   if (!Opts.IgnoredRecordsWithFieldPattern.empty() &&
289       shouldIgnoreRecord(RD, Opts.IgnoredRecordsWithFieldPattern)) {
290     IsAnyFieldInitialized = true;
291     return false;
292   }
293 
294   bool ContainsUninitField = false;
295 
296   // Are all of this non-union's fields initialized?
297   for (const FieldDecl *I : RD->fields()) {
298 
299     const auto FieldVal =
300         State->getLValue(I, loc::MemRegionVal(R)).castAs<loc::MemRegionVal>();
301     const auto *FR = FieldVal.getRegionAs<FieldRegion>();
302     QualType T = I->getType();
303 
304     // If LocalChain already contains FR, then we encountered a cyclic
305     // reference. In this case, region FR is already under checking at an
306     // earlier node in the directed tree.
307     if (LocalChain.contains(FR))
308       return false;
309 
310     if (T->isStructureOrClassType()) {
311       if (isNonUnionUninit(FR, LocalChain.add(RegularField(FR))))
312         ContainsUninitField = true;
313       continue;
314     }
315 
316     if (T->isUnionType()) {
317       if (isUnionUninit(FR)) {
318         if (addFieldToUninits(LocalChain.add(RegularField(FR))))
319           ContainsUninitField = true;
320       } else
321         IsAnyFieldInitialized = true;
322       continue;
323     }
324 
325     if (T->isArrayType()) {
326       IsAnyFieldInitialized = true;
327       continue;
328     }
329 
330     SVal V = State->getSVal(FieldVal);
331 
332     if (isDereferencableType(T) || V.getAs<nonloc::LocAsInteger>()) {
333       if (isDereferencableUninit(FR, LocalChain))
334         ContainsUninitField = true;
335       continue;
336     }
337 
338     if (isPrimitiveType(T)) {
339       if (isPrimitiveUninit(V)) {
340         if (addFieldToUninits(LocalChain.add(RegularField(FR))))
341           ContainsUninitField = true;
342       }
343       continue;
344     }
345 
346     llvm_unreachable("All cases are handled!");
347   }
348 
349   // Checking bases. The checker will regard inherited data members as direct
350   // fields.
351   const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD);
352   if (!CXXRD)
353     return ContainsUninitField;
354 
355   for (const CXXBaseSpecifier &BaseSpec : CXXRD->bases()) {
356     const auto *BaseRegion = State->getLValue(BaseSpec, R)
357                                  .castAs<loc::MemRegionVal>()
358                                  .getRegionAs<TypedValueRegion>();
359 
360     // If the head of the list is also a BaseClass, we'll overwrite it to avoid
361     // note messages like 'this->A::B::x'.
362     if (!LocalChain.isEmpty() && LocalChain.getHead().isBase()) {
363       if (isNonUnionUninit(BaseRegion, LocalChain.replaceHead(
364                                            BaseClass(BaseSpec.getType()))))
365         ContainsUninitField = true;
366     } else {
367       if (isNonUnionUninit(BaseRegion,
368                            LocalChain.add(BaseClass(BaseSpec.getType()))))
369         ContainsUninitField = true;
370     }
371   }
372 
373   return ContainsUninitField;
374 }
375 
376 bool FindUninitializedFields::isUnionUninit(const TypedValueRegion *R) {
377   assert(R->getValueType()->isUnionType() &&
378          "This method only checks union objects!");
379   // TODO: Implement support for union fields.
380   return false;
381 }
382 
383 bool FindUninitializedFields::isPrimitiveUninit(const SVal &V) {
384   if (V.isUndef())
385     return true;
386 
387   IsAnyFieldInitialized = true;
388   return false;
389 }
390 
391 //===----------------------------------------------------------------------===//
392 //                       Methods for FieldChainInfo.
393 //===----------------------------------------------------------------------===//
394 
395 bool FieldChainInfo::contains(const FieldRegion *FR) const {
396   for (const FieldNode &Node : Chain) {
397     if (Node.isSameRegion(FR))
398       return true;
399   }
400   return false;
401 }
402 
403 /// Prints every element except the last to `Out`. Since ImmutableLists store
404 /// elements in reverse order, and have no reverse iterators, we use a
405 /// recursive function to print the fieldchain correctly. The last element in
406 /// the chain is to be printed by `FieldChainInfo::print`.
407 static void printTail(llvm::raw_ostream &Out,
408                       const FieldChainInfo::FieldChain L);
409 
410 // FIXME: This function constructs an incorrect string in the following case:
411 //
412 //   struct Base { int x; };
413 //   struct D1 : Base {}; struct D2 : Base {};
414 //
415 //   struct MostDerived : D1, D2 {
416 //     MostDerived() {}
417 //   }
418 //
419 // A call to MostDerived::MostDerived() will cause two notes that say
420 // "uninitialized field 'this->x'", but we can't refer to 'x' directly,
421 // we need an explicit namespace resolution whether the uninit field was
422 // 'D1::x' or 'D2::x'.
423 void FieldChainInfo::printNoteMsg(llvm::raw_ostream &Out) const {
424   if (Chain.isEmpty())
425     return;
426 
427   const FieldNode &LastField = getHead();
428 
429   LastField.printNoteMsg(Out);
430   Out << '\'';
431 
432   for (const FieldNode &Node : Chain)
433     Node.printPrefix(Out);
434 
435   Out << "this->";
436   printTail(Out, Chain.getTail());
437   LastField.printNode(Out);
438   Out << '\'';
439 }
440 
441 static void printTail(llvm::raw_ostream &Out,
442                       const FieldChainInfo::FieldChain L) {
443   if (L.isEmpty())
444     return;
445 
446   printTail(Out, L.getTail());
447 
448   L.getHead().printNode(Out);
449   L.getHead().printSeparator(Out);
450 }
451 
452 //===----------------------------------------------------------------------===//
453 //                           Utility functions.
454 //===----------------------------------------------------------------------===//
455 
456 static const TypedValueRegion *
457 getConstructedRegion(const CXXConstructorDecl *CtorDecl,
458                      CheckerContext &Context) {
459 
460   Loc ThisLoc =
461       Context.getSValBuilder().getCXXThis(CtorDecl, Context.getStackFrame());
462 
463   SVal ObjectV = Context.getState()->getSVal(ThisLoc);
464 
465   auto *R = ObjectV.getAsRegion()->getAs<TypedValueRegion>();
466   if (R && !R->getValueType()->getAsCXXRecordDecl())
467     return nullptr;
468 
469   return R;
470 }
471 
472 static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
473                                       CheckerContext &Context) {
474 
475   const TypedValueRegion *CurrRegion = getConstructedRegion(Ctor, Context);
476   if (!CurrRegion)
477     return false;
478 
479   const LocationContext *LC = Context.getLocationContext();
480   while ((LC = LC->getParent())) {
481 
482     // If \p Ctor was called by another constructor.
483     const auto *OtherCtor = dyn_cast<CXXConstructorDecl>(LC->getDecl());
484     if (!OtherCtor)
485       continue;
486 
487     const TypedValueRegion *OtherRegion =
488         getConstructedRegion(OtherCtor, Context);
489     if (!OtherRegion)
490       continue;
491 
492     // If the CurrRegion is a subregion of OtherRegion, it will be analyzed
493     // during the analysis of OtherRegion.
494     if (CurrRegion->isSubRegionOf(OtherRegion))
495       return true;
496   }
497 
498   return false;
499 }
500 
501 static bool shouldIgnoreRecord(const RecordDecl *RD, StringRef Pattern) {
502   llvm::Regex R(Pattern);
503 
504   for (const FieldDecl *FD : RD->fields()) {
505     if (R.match(FD->getType().getAsString()))
506       return true;
507     if (R.match(FD->getName()))
508       return true;
509   }
510 
511   return false;
512 }
513 
514 static const Stmt *getMethodBody(const CXXMethodDecl *M) {
515   if (isa<CXXConstructorDecl>(M))
516     return nullptr;
517 
518   if (!M->isDefined())
519     return nullptr;
520 
521   return M->getDefinition()->getBody();
522 }
523 
524 static bool hasUnguardedAccess(const FieldDecl *FD, ProgramStateRef State) {
525 
526   if (FD->getAccess() == AccessSpecifier::AS_public)
527     return true;
528 
529   const auto *Parent = dyn_cast<CXXRecordDecl>(FD->getParent());
530 
531   if (!Parent)
532     return true;
533 
534   Parent = Parent->getDefinition();
535   assert(Parent && "The record's definition must be avaible if an uninitialized"
536                    " field of it was found!");
537 
538   ASTContext &AC = State->getStateManager().getContext();
539 
540   auto FieldAccessM = memberExpr(hasDeclaration(equalsNode(FD))).bind("access");
541 
542   auto AssertLikeM = callExpr(callee(functionDecl(
543       anyOf(hasName("exit"), hasName("panic"), hasName("error"),
544             hasName("Assert"), hasName("assert"), hasName("ziperr"),
545             hasName("assfail"), hasName("db_error"), hasName("__assert"),
546             hasName("__assert2"), hasName("_wassert"), hasName("__assert_rtn"),
547             hasName("__assert_fail"), hasName("dtrace_assfail"),
548             hasName("yy_fatal_error"), hasName("_XCAssertionFailureHandler"),
549             hasName("_DTAssertionFailureHandler"),
550             hasName("_TSAssertionFailureHandler")))));
551 
552   auto NoReturnFuncM = callExpr(callee(functionDecl(isNoReturn())));
553 
554   auto GuardM =
555       stmt(anyOf(ifStmt(), switchStmt(), conditionalOperator(), AssertLikeM,
556             NoReturnFuncM))
557           .bind("guard");
558 
559   for (const CXXMethodDecl *M : Parent->methods()) {
560     const Stmt *MethodBody = getMethodBody(M);
561     if (!MethodBody)
562       continue;
563 
564     auto Accesses = match(stmt(hasDescendant(FieldAccessM)), *MethodBody, AC);
565     if (Accesses.empty())
566       continue;
567     const auto *FirstAccess = Accesses[0].getNodeAs<MemberExpr>("access");
568     assert(FirstAccess);
569 
570     auto Guards = match(stmt(hasDescendant(GuardM)), *MethodBody, AC);
571     if (Guards.empty())
572       return true;
573     const auto *FirstGuard = Guards[0].getNodeAs<Stmt>("guard");
574     assert(FirstGuard);
575 
576     if (FirstAccess->getBeginLoc() < FirstGuard->getBeginLoc())
577       return true;
578   }
579 
580   return false;
581 }
582 
583 std::string clang::ento::getVariableName(const FieldDecl *Field) {
584   // If Field is a captured lambda variable, Field->getName() will return with
585   // an empty string. We can however acquire it's name from the lambda's
586   // captures.
587   const auto *CXXParent = dyn_cast<CXXRecordDecl>(Field->getParent());
588 
589   if (CXXParent && CXXParent->isLambda()) {
590     assert(CXXParent->captures_begin());
591     auto It = CXXParent->captures_begin() + Field->getFieldIndex();
592 
593     if (It->capturesVariable())
594       return llvm::Twine("/*captured variable*/" +
595                          It->getCapturedVar()->getName())
596           .str();
597 
598     if (It->capturesThis())
599       return "/*'this' capture*/";
600 
601     llvm_unreachable("No other capture type is expected!");
602   }
603 
604   return Field->getName();
605 }
606 
607 void ento::registerUninitializedObjectChecker(CheckerManager &Mgr) {
608   auto Chk = Mgr.registerChecker<UninitializedObjectChecker>();
609 
610   AnalyzerOptions &AnOpts = Mgr.getAnalyzerOptions();
611   UninitObjCheckerOptions &ChOpts = Chk->Opts;
612 
613   ChOpts.IsPedantic =
614       AnOpts.getCheckerBooleanOption(Chk, "Pedantic", /*DefaultVal*/ false);
615   ChOpts.ShouldConvertNotesToWarnings = AnOpts.getCheckerBooleanOption(
616       Chk, "NotesAsWarnings", /*DefaultVal*/ false);
617   ChOpts.CheckPointeeInitialization = AnOpts.getCheckerBooleanOption(
618       Chk, "CheckPointeeInitialization", /*DefaultVal*/ false);
619   ChOpts.IgnoredRecordsWithFieldPattern =
620       AnOpts.getCheckerStringOption(Chk, "IgnoreRecordsWithField",
621                                     /*DefaultVal*/ "");
622   ChOpts.IgnoreGuardedFields =
623       AnOpts.getCheckerBooleanOption(Chk, "IgnoreGuardedFields",
624                                      /*DefaultVal*/ false);
625 }
626 
627 bool ento::shouldRegisterUninitializedObjectChecker(const LangOptions &LO) {
628   return true;
629 }
630