1 //===----- UninitializedObjectChecker.cpp ------------------------*- C++ -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a checker that reports uninitialized fields in objects
11 // created after a constructor call.
12 //
13 // This checker has several options:
14 //   - "Pedantic" (boolean). If its not set or is set to false, the checker
15 //     won't emit warnings for objects that don't have at least one initialized
16 //     field. This may be set with
17 //
18 //     `-analyzer-config alpha.cplusplus.UninitializedObject:Pedantic=true`.
19 //
20 //   - "NotesAsWarnings" (boolean). If set to true, the checker will emit a
21 //     warning for each uninitalized field, as opposed to emitting one warning
22 //     per constructor call, and listing the uninitialized fields that belongs
23 //     to it in notes. Defaults to false.
24 //
25 //     `-analyzer-config \
26 //         alpha.cplusplus.UninitializedObject:NotesAsWarnings=true`.
27 //
28 //   - "CheckPointeeInitialization" (boolean). If set to false, the checker will
29 //     not analyze the pointee of pointer/reference fields, and will only check
30 //     whether the object itself is initialized. Defaults to false.
31 //
32 //     `-analyzer-config \
33 //         alpha.cplusplus.UninitializedObject:CheckPointeeInitialization=true`.
34 //
35 //     TODO: With some clever heuristics, some pointers should be dereferenced
36 //     by default. For example, if the pointee is constructed within the
37 //     constructor call, it's reasonable to say that no external object
38 //     references it, and we wouldn't generate multiple report on the same
39 //     pointee.
40 //
41 // To read about how the checker works, refer to the comments in
42 // UninitializedObject.h.
43 //
44 // Some of the logic is implemented in UninitializedPointee.cpp, to reduce the
45 // complexity of this file.
46 //
47 //===----------------------------------------------------------------------===//
48 
49 #include "../ClangSACheckers.h"
50 #include "UninitializedObject.h"
51 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
52 #include "clang/StaticAnalyzer/Core/Checker.h"
53 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
54 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
55 
56 using namespace clang;
57 using namespace clang::ento;
58 
59 namespace {
60 
61 class UninitializedObjectChecker : public Checker<check::EndFunction> {
62   std::unique_ptr<BuiltinBug> BT_uninitField;
63 
64 public:
65   // These fields will be initialized when registering the checker.
66   bool IsPedantic;
67   bool ShouldConvertNotesToWarnings;
68   bool CheckPointeeInitialization;
69 
70   UninitializedObjectChecker()
71       : BT_uninitField(new BuiltinBug(this, "Uninitialized fields")) {}
72   void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const;
73 };
74 
75 /// A basic field type, that is not a pointer or a reference, it's dynamic and
76 /// static type is the same.
77 class RegularField final : public FieldNode {
78 public:
79   RegularField(const FieldRegion *FR) : FieldNode(FR) {}
80 
81   virtual void printNoteMsg(llvm::raw_ostream &Out) const override {
82     Out << "uninitialized field ";
83   }
84 
85   virtual void printPrefix(llvm::raw_ostream &Out) const override {}
86 
87   virtual void printNode(llvm::raw_ostream &Out) const override {
88     Out << getVariableName(getDecl());
89   }
90 
91   virtual void printSeparator(llvm::raw_ostream &Out) const override {
92     Out << '.';
93   }
94 };
95 
96 /// Represents that the FieldNode that comes after this is declared in a base
97 /// of the previous FieldNode. As such, this descendant doesn't wrap a
98 /// FieldRegion, and is purely a tool to describe a relation between two other
99 /// FieldRegion wrapping descendants.
100 class BaseClass final : public FieldNode {
101   const QualType BaseClassT;
102 
103 public:
104   BaseClass(const QualType &T) : FieldNode(nullptr), BaseClassT(T) {
105     assert(!T.isNull());
106     assert(T->getAsCXXRecordDecl());
107   }
108 
109   virtual void printNoteMsg(llvm::raw_ostream &Out) const override {
110     llvm_unreachable("This node can never be the final node in the "
111                      "fieldchain!");
112   }
113 
114   virtual void printPrefix(llvm::raw_ostream &Out) const override {}
115 
116   virtual void printNode(llvm::raw_ostream &Out) const override {
117     Out << BaseClassT->getAsCXXRecordDecl()->getName() << "::";
118   }
119 
120   virtual void printSeparator(llvm::raw_ostream &Out) const override {}
121 
122   virtual bool isBase() const override { return true; }
123 };
124 
125 } // end of anonymous namespace
126 
127 // Utility function declarations.
128 
129 /// Returns the object that was constructed by CtorDecl, or None if that isn't
130 /// possible.
131 // TODO: Refactor this function so that it returns the constructed object's
132 // region.
133 static Optional<nonloc::LazyCompoundVal>
134 getObjectVal(const CXXConstructorDecl *CtorDecl, CheckerContext &Context);
135 
136 /// Checks whether the object constructed by \p Ctor will be analyzed later
137 /// (e.g. if the object is a field of another object, in which case we'd check
138 /// it multiple times).
139 static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
140                                       CheckerContext &Context);
141 
142 //===----------------------------------------------------------------------===//
143 //                  Methods for UninitializedObjectChecker.
144 //===----------------------------------------------------------------------===//
145 
146 void UninitializedObjectChecker::checkEndFunction(
147     const ReturnStmt *RS, CheckerContext &Context) const {
148 
149   const auto *CtorDecl = dyn_cast_or_null<CXXConstructorDecl>(
150       Context.getLocationContext()->getDecl());
151   if (!CtorDecl)
152     return;
153 
154   if (!CtorDecl->isUserProvided())
155     return;
156 
157   if (CtorDecl->getParent()->isUnion())
158     return;
159 
160   // This avoids essentially the same error being reported multiple times.
161   if (willObjectBeAnalyzedLater(CtorDecl, Context))
162     return;
163 
164   Optional<nonloc::LazyCompoundVal> Object = getObjectVal(CtorDecl, Context);
165   if (!Object)
166     return;
167 
168   FindUninitializedFields F(Context.getState(), Object->getRegion(),
169                             CheckPointeeInitialization);
170 
171   const UninitFieldMap &UninitFields = F.getUninitFields();
172 
173   if (UninitFields.empty())
174     return;
175 
176   // In non-pedantic mode, if Object's region doesn't contain a single
177   // initialized field, we'll assume that Object was intentionally left
178   // uninitialized.
179   if (!IsPedantic && !F.isAnyFieldInitialized())
180     return;
181 
182   // There are uninitialized fields in the record.
183 
184   ExplodedNode *Node = Context.generateNonFatalErrorNode(Context.getState());
185   if (!Node)
186     return;
187 
188   PathDiagnosticLocation LocUsedForUniqueing;
189   const Stmt *CallSite = Context.getStackFrame()->getCallSite();
190   if (CallSite)
191     LocUsedForUniqueing = PathDiagnosticLocation::createBegin(
192         CallSite, Context.getSourceManager(), Node->getLocationContext());
193 
194   // For Plist consumers that don't support notes just yet, we'll convert notes
195   // to warnings.
196   if (ShouldConvertNotesToWarnings) {
197     for (const auto &Pair : UninitFields) {
198 
199       auto Report = llvm::make_unique<BugReport>(
200           *BT_uninitField, Pair.second, Node, LocUsedForUniqueing,
201           Node->getLocationContext()->getDecl());
202       Context.emitReport(std::move(Report));
203     }
204     return;
205   }
206 
207   SmallString<100> WarningBuf;
208   llvm::raw_svector_ostream WarningOS(WarningBuf);
209   WarningOS << UninitFields.size() << " uninitialized field"
210             << (UninitFields.size() == 1 ? "" : "s")
211             << " at the end of the constructor call";
212 
213   auto Report = llvm::make_unique<BugReport>(
214       *BT_uninitField, WarningOS.str(), Node, LocUsedForUniqueing,
215       Node->getLocationContext()->getDecl());
216 
217   for (const auto &Pair : UninitFields) {
218     Report->addNote(Pair.second,
219                     PathDiagnosticLocation::create(Pair.first->getDecl(),
220                                                    Context.getSourceManager()));
221   }
222   Context.emitReport(std::move(Report));
223 }
224 
225 //===----------------------------------------------------------------------===//
226 //                   Methods for FindUninitializedFields.
227 //===----------------------------------------------------------------------===//
228 
229 FindUninitializedFields::FindUninitializedFields(
230     ProgramStateRef State, const TypedValueRegion *const R,
231     bool CheckPointeeInitialization)
232     : State(State), ObjectR(R),
233       CheckPointeeInitialization(CheckPointeeInitialization) {
234 
235   isNonUnionUninit(ObjectR, FieldChainInfo(ChainFactory));
236 }
237 
238 bool FindUninitializedFields::addFieldToUninits(FieldChainInfo Chain) {
239   if (State->getStateManager().getContext().getSourceManager().isInSystemHeader(
240           Chain.getUninitRegion()->getDecl()->getLocation()))
241     return false;
242 
243   UninitFieldMap::mapped_type NoteMsgBuf;
244   llvm::raw_svector_ostream OS(NoteMsgBuf);
245   Chain.printNoteMsg(OS);
246   return UninitFields
247       .insert(std::make_pair(Chain.getUninitRegion(), std::move(NoteMsgBuf)))
248       .second;
249 }
250 
251 bool FindUninitializedFields::isNonUnionUninit(const TypedValueRegion *R,
252                                                FieldChainInfo LocalChain) {
253   assert(R->getValueType()->isRecordType() &&
254          !R->getValueType()->isUnionType() &&
255          "This method only checks non-union record objects!");
256 
257   const RecordDecl *RD = R->getValueType()->getAsRecordDecl()->getDefinition();
258 
259   if (!RD) {
260     IsAnyFieldInitialized = true;
261     return true;
262   }
263 
264   bool ContainsUninitField = false;
265 
266   // Are all of this non-union's fields initialized?
267   for (const FieldDecl *I : RD->fields()) {
268 
269     const auto FieldVal =
270         State->getLValue(I, loc::MemRegionVal(R)).castAs<loc::MemRegionVal>();
271     const auto *FR = FieldVal.getRegionAs<FieldRegion>();
272     QualType T = I->getType();
273 
274     // If LocalChain already contains FR, then we encountered a cyclic
275     // reference. In this case, region FR is already under checking at an
276     // earlier node in the directed tree.
277     if (LocalChain.contains(FR))
278       return false;
279 
280     if (T->isStructureOrClassType()) {
281       if (isNonUnionUninit(FR, LocalChain.add(RegularField(FR))))
282         ContainsUninitField = true;
283       continue;
284     }
285 
286     if (T->isUnionType()) {
287       if (isUnionUninit(FR)) {
288         if (addFieldToUninits(LocalChain.add(RegularField(FR))))
289           ContainsUninitField = true;
290       } else
291         IsAnyFieldInitialized = true;
292       continue;
293     }
294 
295     if (T->isArrayType()) {
296       IsAnyFieldInitialized = true;
297       continue;
298     }
299 
300     if (isDereferencableType(T)) {
301       if (isDereferencableUninit(FR, LocalChain))
302         ContainsUninitField = true;
303       continue;
304     }
305 
306     if (isPrimitiveType(T)) {
307       SVal V = State->getSVal(FieldVal);
308 
309       if (isPrimitiveUninit(V)) {
310         if (addFieldToUninits(LocalChain.add(RegularField(FR))))
311           ContainsUninitField = true;
312       }
313       continue;
314     }
315 
316     llvm_unreachable("All cases are handled!");
317   }
318 
319   // Checking bases. The checker will regard inherited data members as direct
320   // fields.
321   const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD);
322   if (!CXXRD)
323     return ContainsUninitField;
324 
325   for (const CXXBaseSpecifier &BaseSpec : CXXRD->bases()) {
326     const auto *BaseRegion = State->getLValue(BaseSpec, R)
327                                  .castAs<loc::MemRegionVal>()
328                                  .getRegionAs<TypedValueRegion>();
329 
330     // If the head of the list is also a BaseClass, we'll overwrite it to avoid
331     // note messages like 'this->A::B::x'.
332     if (!LocalChain.isEmpty() && LocalChain.getHead().isBase()) {
333       if (isNonUnionUninit(BaseRegion, LocalChain.replaceHead(
334                                            BaseClass(BaseSpec.getType()))))
335         ContainsUninitField = true;
336     } else {
337       if (isNonUnionUninit(BaseRegion,
338                            LocalChain.add(BaseClass(BaseSpec.getType()))))
339         ContainsUninitField = true;
340     }
341   }
342 
343   return ContainsUninitField;
344 }
345 
346 bool FindUninitializedFields::isUnionUninit(const TypedValueRegion *R) {
347   assert(R->getValueType()->isUnionType() &&
348          "This method only checks union objects!");
349   // TODO: Implement support for union fields.
350   return false;
351 }
352 
353 bool FindUninitializedFields::isPrimitiveUninit(const SVal &V) {
354   if (V.isUndef())
355     return true;
356 
357   IsAnyFieldInitialized = true;
358   return false;
359 }
360 
361 //===----------------------------------------------------------------------===//
362 //                       Methods for FieldChainInfo.
363 //===----------------------------------------------------------------------===//
364 
365 const FieldRegion *FieldChainInfo::getUninitRegion() const {
366   assert(!Chain.isEmpty() && "Empty fieldchain!");
367 
368   // ImmutableList::getHead() isn't a const method, hence the not too nice
369   // implementation.
370   return (*Chain.begin()).getRegion();
371 }
372 
373 bool FieldChainInfo::contains(const FieldRegion *FR) const {
374   for (const FieldNode &Node : Chain) {
375     if (Node.isSameRegion(FR))
376       return true;
377   }
378   return false;
379 }
380 
381 /// Prints every element except the last to `Out`. Since ImmutableLists store
382 /// elements in reverse order, and have no reverse iterators, we use a
383 /// recursive function to print the fieldchain correctly. The last element in
384 /// the chain is to be printed by `FieldChainInfo::print`.
385 static void printTail(llvm::raw_ostream &Out,
386                       const FieldChainInfo::FieldChainImpl *L);
387 
388 // FIXME: This function constructs an incorrect string in the following case:
389 //
390 //   struct Base { int x; };
391 //   struct D1 : Base {}; struct D2 : Base {};
392 //
393 //   struct MostDerived : D1, D2 {
394 //     MostDerived() {}
395 //   }
396 //
397 // A call to MostDerived::MostDerived() will cause two notes that say
398 // "uninitialized field 'this->x'", but we can't refer to 'x' directly,
399 // we need an explicit namespace resolution whether the uninit field was
400 // 'D1::x' or 'D2::x'.
401 void FieldChainInfo::printNoteMsg(llvm::raw_ostream &Out) const {
402   if (Chain.isEmpty())
403     return;
404 
405   const FieldChainImpl *L = Chain.getInternalPointer();
406   const FieldNode &LastField = L->getHead();
407 
408   LastField.printNoteMsg(Out);
409   Out << '\'';
410 
411   for (const FieldNode &Node : Chain)
412     Node.printPrefix(Out);
413 
414   Out << "this->";
415   printTail(Out, L->getTail());
416   LastField.printNode(Out);
417   Out << '\'';
418 }
419 
420 static void printTail(llvm::raw_ostream &Out,
421                       const FieldChainInfo::FieldChainImpl *L) {
422   if (!L)
423     return;
424 
425   printTail(Out, L->getTail());
426 
427   L->getHead().printNode(Out);
428   L->getHead().printSeparator(Out);
429 }
430 
431 //===----------------------------------------------------------------------===//
432 //                           Utility functions.
433 //===----------------------------------------------------------------------===//
434 
435 static Optional<nonloc::LazyCompoundVal>
436 getObjectVal(const CXXConstructorDecl *CtorDecl, CheckerContext &Context) {
437 
438   Loc ThisLoc = Context.getSValBuilder().getCXXThis(CtorDecl->getParent(),
439                                                     Context.getStackFrame());
440   // Getting the value for 'this'.
441   SVal This = Context.getState()->getSVal(ThisLoc);
442 
443   // Getting the value for '*this'.
444   SVal Object = Context.getState()->getSVal(This.castAs<Loc>());
445 
446   return Object.getAs<nonloc::LazyCompoundVal>();
447 }
448 
449 static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
450                                       CheckerContext &Context) {
451 
452   Optional<nonloc::LazyCompoundVal> CurrentObject = getObjectVal(Ctor, Context);
453   if (!CurrentObject)
454     return false;
455 
456   const LocationContext *LC = Context.getLocationContext();
457   while ((LC = LC->getParent())) {
458 
459     // If \p Ctor was called by another constructor.
460     const auto *OtherCtor = dyn_cast<CXXConstructorDecl>(LC->getDecl());
461     if (!OtherCtor)
462       continue;
463 
464     Optional<nonloc::LazyCompoundVal> OtherObject =
465         getObjectVal(OtherCtor, Context);
466     if (!OtherObject)
467       continue;
468 
469     // If the CurrentObject is a subregion of OtherObject, it will be analyzed
470     // during the analysis of OtherObject.
471     if (CurrentObject->getRegion()->isSubRegionOf(OtherObject->getRegion()))
472       return true;
473   }
474 
475   return false;
476 }
477 
478 std::string clang::ento::getVariableName(const FieldDecl *Field) {
479   // If Field is a captured lambda variable, Field->getName() will return with
480   // an empty string. We can however acquire it's name from the lambda's
481   // captures.
482   const auto *CXXParent = dyn_cast<CXXRecordDecl>(Field->getParent());
483 
484   if (CXXParent && CXXParent->isLambda()) {
485     assert(CXXParent->captures_begin());
486     auto It = CXXParent->captures_begin() + Field->getFieldIndex();
487 
488     if (It->capturesVariable())
489       return llvm::Twine("/*captured variable*/" +
490                          It->getCapturedVar()->getName())
491           .str();
492 
493     if (It->capturesThis())
494       return "/*'this' capture*/";
495 
496     llvm_unreachable("No other capture type is expected!");
497   }
498 
499   return Field->getName();
500 }
501 
502 void ento::registerUninitializedObjectChecker(CheckerManager &Mgr) {
503   auto Chk = Mgr.registerChecker<UninitializedObjectChecker>();
504 
505   Chk->IsPedantic = Mgr.getAnalyzerOptions().getBooleanOption(
506       "Pedantic", /*DefaultVal*/ false, Chk);
507   Chk->ShouldConvertNotesToWarnings = Mgr.getAnalyzerOptions().getBooleanOption(
508       "NotesAsWarnings", /*DefaultVal*/ false, Chk);
509   Chk->CheckPointeeInitialization = Mgr.getAnalyzerOptions().getBooleanOption(
510       "CheckPointeeInitialization", /*DefaultVal*/ false, Chk);
511 }
512