1 //===----- UninitializedObjectChecker.cpp ------------------------*- C++ -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a checker that reports uninitialized fields in objects
11 // created after a constructor call.
12 //
13 // To read about command line options and how the checker works, refer to the
14 // top of the file and inline comments in UninitializedObject.h.
15 //
16 // Some of the logic is implemented in UninitializedPointee.cpp, to reduce the
17 // complexity of this file.
18 //
19 //===----------------------------------------------------------------------===//
20 
21 #include "../ClangSACheckers.h"
22 #include "UninitializedObject.h"
23 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
24 #include "clang/StaticAnalyzer/Core/Checker.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
27 
28 using namespace clang;
29 using namespace clang::ento;
30 
31 /// We'll mark fields (and pointee of fields) that are confirmed to be
32 /// uninitialized as already analyzed.
33 REGISTER_SET_WITH_PROGRAMSTATE(AnalyzedRegions, const MemRegion *)
34 
35 namespace {
36 
37 class UninitializedObjectChecker
38     : public Checker<check::EndFunction, check::DeadSymbols> {
39   std::unique_ptr<BuiltinBug> BT_uninitField;
40 
41 public:
42   // The fields of this struct will be initialized when registering the checker.
43   UninitObjCheckerOptions Opts;
44 
45   UninitializedObjectChecker()
46       : BT_uninitField(new BuiltinBug(this, "Uninitialized fields")) {}
47 
48   void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const;
49   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
50 };
51 
52 /// A basic field type, that is not a pointer or a reference, it's dynamic and
53 /// static type is the same.
54 class RegularField final : public FieldNode {
55 public:
56   RegularField(const FieldRegion *FR) : FieldNode(FR) {}
57 
58   virtual void printNoteMsg(llvm::raw_ostream &Out) const override {
59     Out << "uninitialized field ";
60   }
61 
62   virtual void printPrefix(llvm::raw_ostream &Out) const override {}
63 
64   virtual void printNode(llvm::raw_ostream &Out) const override {
65     Out << getVariableName(getDecl());
66   }
67 
68   virtual void printSeparator(llvm::raw_ostream &Out) const override {
69     Out << '.';
70   }
71 };
72 
73 /// Represents that the FieldNode that comes after this is declared in a base
74 /// of the previous FieldNode. As such, this descendant doesn't wrap a
75 /// FieldRegion, and is purely a tool to describe a relation between two other
76 /// FieldRegion wrapping descendants.
77 class BaseClass final : public FieldNode {
78   const QualType BaseClassT;
79 
80 public:
81   BaseClass(const QualType &T) : FieldNode(nullptr), BaseClassT(T) {
82     assert(!T.isNull());
83     assert(T->getAsCXXRecordDecl());
84   }
85 
86   virtual void printNoteMsg(llvm::raw_ostream &Out) const override {
87     llvm_unreachable("This node can never be the final node in the "
88                      "fieldchain!");
89   }
90 
91   virtual void printPrefix(llvm::raw_ostream &Out) const override {}
92 
93   virtual void printNode(llvm::raw_ostream &Out) const override {
94     Out << BaseClassT->getAsCXXRecordDecl()->getName() << "::";
95   }
96 
97   virtual void printSeparator(llvm::raw_ostream &Out) const override {}
98 
99   virtual bool isBase() const override { return true; }
100 };
101 
102 } // end of anonymous namespace
103 
104 // Utility function declarations.
105 
106 /// Returns the region that was constructed by CtorDecl, or nullptr if that
107 /// isn't possible.
108 static const TypedValueRegion *
109 getConstructedRegion(const CXXConstructorDecl *CtorDecl,
110                      CheckerContext &Context);
111 
112 /// Checks whether the object constructed by \p Ctor will be analyzed later
113 /// (e.g. if the object is a field of another object, in which case we'd check
114 /// it multiple times).
115 static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
116                                       CheckerContext &Context);
117 
118 /// Checks whether RD contains a field with a name or type name that matches
119 /// \p Pattern.
120 static bool shouldIgnoreRecord(const RecordDecl *RD, StringRef Pattern);
121 
122 //===----------------------------------------------------------------------===//
123 //                  Methods for UninitializedObjectChecker.
124 //===----------------------------------------------------------------------===//
125 
126 void UninitializedObjectChecker::checkEndFunction(
127     const ReturnStmt *RS, CheckerContext &Context) const {
128 
129   const auto *CtorDecl = dyn_cast_or_null<CXXConstructorDecl>(
130       Context.getLocationContext()->getDecl());
131   if (!CtorDecl)
132     return;
133 
134   if (!CtorDecl->isUserProvided())
135     return;
136 
137   if (CtorDecl->getParent()->isUnion())
138     return;
139 
140   // This avoids essentially the same error being reported multiple times.
141   if (willObjectBeAnalyzedLater(CtorDecl, Context))
142     return;
143 
144   const TypedValueRegion *R = getConstructedRegion(CtorDecl, Context);
145   if (!R)
146     return;
147 
148   FindUninitializedFields F(Context.getState(), R, Opts);
149 
150   std::pair<ProgramStateRef, const UninitFieldMap &> UninitInfo =
151       F.getResults();
152 
153   ProgramStateRef UpdatedState = UninitInfo.first;
154   const UninitFieldMap &UninitFields = UninitInfo.second;
155 
156   if (UninitFields.empty()) {
157     Context.addTransition(UpdatedState);
158     return;
159   }
160 
161   // There are uninitialized fields in the record.
162 
163   ExplodedNode *Node = Context.generateNonFatalErrorNode(UpdatedState);
164   if (!Node)
165     return;
166 
167   PathDiagnosticLocation LocUsedForUniqueing;
168   const Stmt *CallSite = Context.getStackFrame()->getCallSite();
169   if (CallSite)
170     LocUsedForUniqueing = PathDiagnosticLocation::createBegin(
171         CallSite, Context.getSourceManager(), Node->getLocationContext());
172 
173   // For Plist consumers that don't support notes just yet, we'll convert notes
174   // to warnings.
175   if (Opts.ShouldConvertNotesToWarnings) {
176     for (const auto &Pair : UninitFields) {
177 
178       auto Report = llvm::make_unique<BugReport>(
179           *BT_uninitField, Pair.second, Node, LocUsedForUniqueing,
180           Node->getLocationContext()->getDecl());
181       Context.emitReport(std::move(Report));
182     }
183     return;
184   }
185 
186   SmallString<100> WarningBuf;
187   llvm::raw_svector_ostream WarningOS(WarningBuf);
188   WarningOS << UninitFields.size() << " uninitialized field"
189             << (UninitFields.size() == 1 ? "" : "s")
190             << " at the end of the constructor call";
191 
192   auto Report = llvm::make_unique<BugReport>(
193       *BT_uninitField, WarningOS.str(), Node, LocUsedForUniqueing,
194       Node->getLocationContext()->getDecl());
195 
196   for (const auto &Pair : UninitFields) {
197     Report->addNote(Pair.second,
198                     PathDiagnosticLocation::create(Pair.first->getDecl(),
199                                                    Context.getSourceManager()));
200   }
201   Context.emitReport(std::move(Report));
202 }
203 
204 void UninitializedObjectChecker::checkDeadSymbols(SymbolReaper &SR,
205                                                   CheckerContext &C) const {
206   ProgramStateRef State = C.getState();
207   for (const MemRegion *R : State->get<AnalyzedRegions>()) {
208     if (!SR.isLiveRegion(R))
209       State = State->remove<AnalyzedRegions>(R);
210   }
211 }
212 
213 //===----------------------------------------------------------------------===//
214 //                   Methods for FindUninitializedFields.
215 //===----------------------------------------------------------------------===//
216 
217 FindUninitializedFields::FindUninitializedFields(
218     ProgramStateRef State, const TypedValueRegion *const R,
219     const UninitObjCheckerOptions &Opts)
220     : State(State), ObjectR(R), Opts(Opts) {
221 
222   isNonUnionUninit(ObjectR, FieldChainInfo(ChainFactory));
223 
224   // In non-pedantic mode, if ObjectR doesn't contain a single initialized
225   // field, we'll assume that Object was intentionally left uninitialized.
226   if (!Opts.IsPedantic && !isAnyFieldInitialized())
227     UninitFields.clear();
228 }
229 
230 bool FindUninitializedFields::addFieldToUninits(FieldChainInfo Chain,
231                                                 const MemRegion *PointeeR) {
232   const FieldRegion *FR = Chain.getUninitRegion();
233 
234   assert((PointeeR || !isDereferencableType(FR->getDecl()->getType())) &&
235          "One must also pass the pointee region as a parameter for "
236          "dereferenceable fields!");
237 
238   if (State->contains<AnalyzedRegions>(FR))
239     return false;
240 
241   if (PointeeR) {
242     if (State->contains<AnalyzedRegions>(PointeeR)) {
243       return false;
244     }
245     State = State->add<AnalyzedRegions>(PointeeR);
246   }
247 
248   State = State->add<AnalyzedRegions>(FR);
249 
250   if (State->getStateManager().getContext().getSourceManager().isInSystemHeader(
251           FR->getDecl()->getLocation()))
252     return false;
253 
254   UninitFieldMap::mapped_type NoteMsgBuf;
255   llvm::raw_svector_ostream OS(NoteMsgBuf);
256   Chain.printNoteMsg(OS);
257   return UninitFields.insert({FR, std::move(NoteMsgBuf)}).second;
258 }
259 
260 bool FindUninitializedFields::isNonUnionUninit(const TypedValueRegion *R,
261                                                FieldChainInfo LocalChain) {
262   assert(R->getValueType()->isRecordType() &&
263          !R->getValueType()->isUnionType() &&
264          "This method only checks non-union record objects!");
265 
266   const RecordDecl *RD = R->getValueType()->getAsRecordDecl()->getDefinition();
267 
268   if (!RD) {
269     IsAnyFieldInitialized = true;
270     return true;
271   }
272 
273   if (!Opts.IgnoredRecordsWithFieldPattern.empty() &&
274       shouldIgnoreRecord(RD, Opts.IgnoredRecordsWithFieldPattern)) {
275     IsAnyFieldInitialized = true;
276     return false;
277   }
278 
279   bool ContainsUninitField = false;
280 
281   // Are all of this non-union's fields initialized?
282   for (const FieldDecl *I : RD->fields()) {
283 
284     const auto FieldVal =
285         State->getLValue(I, loc::MemRegionVal(R)).castAs<loc::MemRegionVal>();
286     const auto *FR = FieldVal.getRegionAs<FieldRegion>();
287     QualType T = I->getType();
288 
289     // If LocalChain already contains FR, then we encountered a cyclic
290     // reference. In this case, region FR is already under checking at an
291     // earlier node in the directed tree.
292     if (LocalChain.contains(FR))
293       return false;
294 
295     if (T->isStructureOrClassType()) {
296       if (isNonUnionUninit(FR, LocalChain.add(RegularField(FR))))
297         ContainsUninitField = true;
298       continue;
299     }
300 
301     if (T->isUnionType()) {
302       if (isUnionUninit(FR)) {
303         if (addFieldToUninits(LocalChain.add(RegularField(FR))))
304           ContainsUninitField = true;
305       } else
306         IsAnyFieldInitialized = true;
307       continue;
308     }
309 
310     if (T->isArrayType()) {
311       IsAnyFieldInitialized = true;
312       continue;
313     }
314 
315     SVal V = State->getSVal(FieldVal);
316 
317     if (isDereferencableType(T) || V.getAs<nonloc::LocAsInteger>()) {
318       if (isDereferencableUninit(FR, LocalChain))
319         ContainsUninitField = true;
320       continue;
321     }
322 
323     if (isPrimitiveType(T)) {
324       if (isPrimitiveUninit(V)) {
325         if (addFieldToUninits(LocalChain.add(RegularField(FR))))
326           ContainsUninitField = true;
327       }
328       continue;
329     }
330 
331     llvm_unreachable("All cases are handled!");
332   }
333 
334   // Checking bases. The checker will regard inherited data members as direct
335   // fields.
336   const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD);
337   if (!CXXRD)
338     return ContainsUninitField;
339 
340   for (const CXXBaseSpecifier &BaseSpec : CXXRD->bases()) {
341     const auto *BaseRegion = State->getLValue(BaseSpec, R)
342                                  .castAs<loc::MemRegionVal>()
343                                  .getRegionAs<TypedValueRegion>();
344 
345     // If the head of the list is also a BaseClass, we'll overwrite it to avoid
346     // note messages like 'this->A::B::x'.
347     if (!LocalChain.isEmpty() && LocalChain.getHead().isBase()) {
348       if (isNonUnionUninit(BaseRegion, LocalChain.replaceHead(
349                                            BaseClass(BaseSpec.getType()))))
350         ContainsUninitField = true;
351     } else {
352       if (isNonUnionUninit(BaseRegion,
353                            LocalChain.add(BaseClass(BaseSpec.getType()))))
354         ContainsUninitField = true;
355     }
356   }
357 
358   return ContainsUninitField;
359 }
360 
361 bool FindUninitializedFields::isUnionUninit(const TypedValueRegion *R) {
362   assert(R->getValueType()->isUnionType() &&
363          "This method only checks union objects!");
364   // TODO: Implement support for union fields.
365   return false;
366 }
367 
368 bool FindUninitializedFields::isPrimitiveUninit(const SVal &V) {
369   if (V.isUndef())
370     return true;
371 
372   IsAnyFieldInitialized = true;
373   return false;
374 }
375 
376 //===----------------------------------------------------------------------===//
377 //                       Methods for FieldChainInfo.
378 //===----------------------------------------------------------------------===//
379 
380 bool FieldChainInfo::contains(const FieldRegion *FR) const {
381   for (const FieldNode &Node : Chain) {
382     if (Node.isSameRegion(FR))
383       return true;
384   }
385   return false;
386 }
387 
388 /// Prints every element except the last to `Out`. Since ImmutableLists store
389 /// elements in reverse order, and have no reverse iterators, we use a
390 /// recursive function to print the fieldchain correctly. The last element in
391 /// the chain is to be printed by `FieldChainInfo::print`.
392 static void printTail(llvm::raw_ostream &Out,
393                       const FieldChainInfo::FieldChain L);
394 
395 // FIXME: This function constructs an incorrect string in the following case:
396 //
397 //   struct Base { int x; };
398 //   struct D1 : Base {}; struct D2 : Base {};
399 //
400 //   struct MostDerived : D1, D2 {
401 //     MostDerived() {}
402 //   }
403 //
404 // A call to MostDerived::MostDerived() will cause two notes that say
405 // "uninitialized field 'this->x'", but we can't refer to 'x' directly,
406 // we need an explicit namespace resolution whether the uninit field was
407 // 'D1::x' or 'D2::x'.
408 void FieldChainInfo::printNoteMsg(llvm::raw_ostream &Out) const {
409   if (Chain.isEmpty())
410     return;
411 
412   const FieldNode &LastField = getHead();
413 
414   LastField.printNoteMsg(Out);
415   Out << '\'';
416 
417   for (const FieldNode &Node : Chain)
418     Node.printPrefix(Out);
419 
420   Out << "this->";
421   printTail(Out, Chain.getTail());
422   LastField.printNode(Out);
423   Out << '\'';
424 }
425 
426 static void printTail(llvm::raw_ostream &Out,
427                       const FieldChainInfo::FieldChain L) {
428   if (L.isEmpty())
429     return;
430 
431   printTail(Out, L.getTail());
432 
433   L.getHead().printNode(Out);
434   L.getHead().printSeparator(Out);
435 }
436 
437 //===----------------------------------------------------------------------===//
438 //                           Utility functions.
439 //===----------------------------------------------------------------------===//
440 
441 static const TypedValueRegion *
442 getConstructedRegion(const CXXConstructorDecl *CtorDecl,
443                      CheckerContext &Context) {
444 
445   Loc ThisLoc = Context.getSValBuilder().getCXXThis(CtorDecl,
446                                                     Context.getStackFrame());
447 
448   SVal ObjectV = Context.getState()->getSVal(ThisLoc);
449 
450   auto *R = ObjectV.getAsRegion()->getAs<TypedValueRegion>();
451   if (R && !R->getValueType()->getAsCXXRecordDecl())
452     return nullptr;
453 
454   return R;
455 }
456 
457 static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
458                                       CheckerContext &Context) {
459 
460   const TypedValueRegion *CurrRegion = getConstructedRegion(Ctor, Context);
461   if (!CurrRegion)
462     return false;
463 
464   const LocationContext *LC = Context.getLocationContext();
465   while ((LC = LC->getParent())) {
466 
467     // If \p Ctor was called by another constructor.
468     const auto *OtherCtor = dyn_cast<CXXConstructorDecl>(LC->getDecl());
469     if (!OtherCtor)
470       continue;
471 
472     const TypedValueRegion *OtherRegion =
473         getConstructedRegion(OtherCtor, Context);
474     if (!OtherRegion)
475       continue;
476 
477     // If the CurrRegion is a subregion of OtherRegion, it will be analyzed
478     // during the analysis of OtherRegion.
479     if (CurrRegion->isSubRegionOf(OtherRegion))
480       return true;
481   }
482 
483   return false;
484 }
485 
486 static bool shouldIgnoreRecord(const RecordDecl *RD, StringRef Pattern) {
487   llvm::Regex R(Pattern);
488 
489   for (const FieldDecl *FD : RD->fields()) {
490     if (R.match(FD->getType().getAsString()))
491       return true;
492     if (R.match(FD->getName()))
493       return true;
494   }
495 
496   return false;
497 }
498 
499 std::string clang::ento::getVariableName(const FieldDecl *Field) {
500   // If Field is a captured lambda variable, Field->getName() will return with
501   // an empty string. We can however acquire it's name from the lambda's
502   // captures.
503   const auto *CXXParent = dyn_cast<CXXRecordDecl>(Field->getParent());
504 
505   if (CXXParent && CXXParent->isLambda()) {
506     assert(CXXParent->captures_begin());
507     auto It = CXXParent->captures_begin() + Field->getFieldIndex();
508 
509     if (It->capturesVariable())
510       return llvm::Twine("/*captured variable*/" +
511                          It->getCapturedVar()->getName())
512           .str();
513 
514     if (It->capturesThis())
515       return "/*'this' capture*/";
516 
517     llvm_unreachable("No other capture type is expected!");
518   }
519 
520   return Field->getName();
521 }
522 
523 void ento::registerUninitializedObjectChecker(CheckerManager &Mgr) {
524   auto Chk = Mgr.registerChecker<UninitializedObjectChecker>();
525 
526   AnalyzerOptions &AnOpts = Mgr.getAnalyzerOptions();
527   UninitObjCheckerOptions &ChOpts = Chk->Opts;
528 
529   ChOpts.IsPedantic =
530       AnOpts.getCheckerBooleanOption("Pedantic", /*DefaultVal*/ false, Chk);
531   ChOpts.ShouldConvertNotesToWarnings =
532       AnOpts.getCheckerBooleanOption("NotesAsWarnings", /*DefaultVal*/ false, Chk);
533   ChOpts.CheckPointeeInitialization = AnOpts.getCheckerBooleanOption(
534       "CheckPointeeInitialization", /*DefaultVal*/ false, Chk);
535   ChOpts.IgnoredRecordsWithFieldPattern =
536       AnOpts.getCheckerStringOption("IgnoreRecordsWithField",
537                                /*DefaultVal*/ "", Chk);
538 }
539