1 //===----- UninitializedObjectChecker.cpp ------------------------*- C++ -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a checker that reports uninitialized fields in objects
11 // created after a constructor call.
12 //
13 // This checker has several options:
14 //   - "Pedantic" (boolean). If its not set or is set to false, the checker
15 //     won't emit warnings for objects that don't have at least one initialized
16 //     field. This may be set with
17 //
18 //     `-analyzer-config alpha.cplusplus.UninitializedObject:Pedantic=true`.
19 //
20 //   - "NotesAsWarnings" (boolean). If set to true, the checker will emit a
21 //     warning for each uninitalized field, as opposed to emitting one warning
22 //     per constructor call, and listing the uninitialized fields that belongs
23 //     to it in notes. Defaults to false.
24 //
25 //     `-analyzer-config \
26 //         alpha.cplusplus.UninitializedObject:NotesAsWarnings=true`.
27 //
28 //   - "CheckPointeeInitialization" (boolean). If set to false, the checker will
29 //     not analyze the pointee of pointer/reference fields, and will only check
30 //     whether the object itself is initialized. Defaults to false.
31 //
32 //     `-analyzer-config \
33 //         alpha.cplusplus.UninitializedObject:CheckPointeeInitialization=true`.
34 //
35 //     TODO: With some clever heuristics, some pointers should be dereferenced
36 //     by default. For example, if the pointee is constructed within the
37 //     constructor call, it's reasonable to say that no external object
38 //     references it, and we wouldn't generate multiple report on the same
39 //     pointee.
40 //
41 // To read about how the checker works, refer to the comments in
42 // UninitializedObject.h.
43 //
44 // Some of the logic is implemented in UninitializedPointee.cpp, to reduce the
45 // complexity of this file.
46 //
47 //===----------------------------------------------------------------------===//
48 
49 #include "../ClangSACheckers.h"
50 #include "UninitializedObject.h"
51 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
52 #include "clang/StaticAnalyzer/Core/Checker.h"
53 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
54 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
55 
56 using namespace clang;
57 using namespace clang::ento;
58 
59 namespace {
60 
61 class UninitializedObjectChecker : public Checker<check::EndFunction> {
62   std::unique_ptr<BuiltinBug> BT_uninitField;
63 
64 public:
65   // These fields will be initialized when registering the checker.
66   bool IsPedantic;
67   bool ShouldConvertNotesToWarnings;
68   bool CheckPointeeInitialization;
69 
70   UninitializedObjectChecker()
71       : BT_uninitField(new BuiltinBug(this, "Uninitialized fields")) {}
72   void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const;
73 };
74 
75 /// A basic field type, that is not a pointer or a reference, it's dynamic and
76 /// static type is the same.
77 class RegularField final : public FieldNode {
78 public:
79   RegularField(const FieldRegion *FR) : FieldNode(FR) {}
80 
81   virtual void printNoteMsg(llvm::raw_ostream &Out) const override {
82     Out << "uninitialized field ";
83   }
84 
85   virtual void printPrefix(llvm::raw_ostream &Out) const override {}
86 
87   virtual void printNode(llvm::raw_ostream &Out) const override {
88     Out << getVariableName(getDecl());
89   }
90 
91   virtual void printSeparator(llvm::raw_ostream &Out) const override {
92     Out << '.';
93   }
94 };
95 
96 /// Represents that the FieldNode that comes after this is declared in a base
97 /// of the previous FieldNode.
98 class BaseClass final : public FieldNode {
99   const QualType BaseClassT;
100 
101 public:
102   BaseClass(const QualType &T) : FieldNode(nullptr), BaseClassT(T) {
103     assert(!T.isNull());
104     assert(T->getAsCXXRecordDecl());
105   }
106 
107   virtual void printNoteMsg(llvm::raw_ostream &Out) const override {
108     llvm_unreachable("This node can never be the final node in the "
109                      "fieldchain!");
110   }
111 
112   virtual void printPrefix(llvm::raw_ostream &Out) const override {}
113 
114   virtual void printNode(llvm::raw_ostream &Out) const override {
115     Out << BaseClassT->getAsCXXRecordDecl()->getName() << "::";
116   }
117 
118   virtual void printSeparator(llvm::raw_ostream &Out) const override {}
119 
120   virtual bool isBase() const override { return true; }
121 };
122 
123 } // end of anonymous namespace
124 
125 // Utility function declarations.
126 
127 /// Returns the object that was constructed by CtorDecl, or None if that isn't
128 /// possible.
129 // TODO: Refactor this function so that it returns the constructed object's
130 // region.
131 static Optional<nonloc::LazyCompoundVal>
132 getObjectVal(const CXXConstructorDecl *CtorDecl, CheckerContext &Context);
133 
134 /// Checks whether the object constructed by \p Ctor will be analyzed later
135 /// (e.g. if the object is a field of another object, in which case we'd check
136 /// it multiple times).
137 static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
138                                       CheckerContext &Context);
139 
140 //===----------------------------------------------------------------------===//
141 //                  Methods for UninitializedObjectChecker.
142 //===----------------------------------------------------------------------===//
143 
144 void UninitializedObjectChecker::checkEndFunction(
145     const ReturnStmt *RS, CheckerContext &Context) const {
146 
147   const auto *CtorDecl = dyn_cast_or_null<CXXConstructorDecl>(
148       Context.getLocationContext()->getDecl());
149   if (!CtorDecl)
150     return;
151 
152   if (!CtorDecl->isUserProvided())
153     return;
154 
155   if (CtorDecl->getParent()->isUnion())
156     return;
157 
158   // This avoids essentially the same error being reported multiple times.
159   if (willObjectBeAnalyzedLater(CtorDecl, Context))
160     return;
161 
162   Optional<nonloc::LazyCompoundVal> Object = getObjectVal(CtorDecl, Context);
163   if (!Object)
164     return;
165 
166   FindUninitializedFields F(Context.getState(), Object->getRegion(),
167                             CheckPointeeInitialization);
168 
169   const UninitFieldMap &UninitFields = F.getUninitFields();
170 
171   if (UninitFields.empty())
172     return;
173 
174   // In non-pedantic mode, if Object's region doesn't contain a single
175   // initialized field, we'll assume that Object was intentionally left
176   // uninitialized.
177   if (!IsPedantic && !F.isAnyFieldInitialized())
178     return;
179 
180   // There are uninitialized fields in the record.
181 
182   ExplodedNode *Node = Context.generateNonFatalErrorNode(Context.getState());
183   if (!Node)
184     return;
185 
186   PathDiagnosticLocation LocUsedForUniqueing;
187   const Stmt *CallSite = Context.getStackFrame()->getCallSite();
188   if (CallSite)
189     LocUsedForUniqueing = PathDiagnosticLocation::createBegin(
190         CallSite, Context.getSourceManager(), Node->getLocationContext());
191 
192   // For Plist consumers that don't support notes just yet, we'll convert notes
193   // to warnings.
194   if (ShouldConvertNotesToWarnings) {
195     for (const auto &Pair : UninitFields) {
196 
197       auto Report = llvm::make_unique<BugReport>(
198           *BT_uninitField, Pair.second, Node, LocUsedForUniqueing,
199           Node->getLocationContext()->getDecl());
200       Context.emitReport(std::move(Report));
201     }
202     return;
203   }
204 
205   SmallString<100> WarningBuf;
206   llvm::raw_svector_ostream WarningOS(WarningBuf);
207   WarningOS << UninitFields.size() << " uninitialized field"
208             << (UninitFields.size() == 1 ? "" : "s")
209             << " at the end of the constructor call";
210 
211   auto Report = llvm::make_unique<BugReport>(
212       *BT_uninitField, WarningOS.str(), Node, LocUsedForUniqueing,
213       Node->getLocationContext()->getDecl());
214 
215   for (const auto &Pair : UninitFields) {
216     Report->addNote(Pair.second,
217                     PathDiagnosticLocation::create(Pair.first->getDecl(),
218                                                    Context.getSourceManager()));
219   }
220   Context.emitReport(std::move(Report));
221 }
222 
223 //===----------------------------------------------------------------------===//
224 //                   Methods for FindUninitializedFields.
225 //===----------------------------------------------------------------------===//
226 
227 FindUninitializedFields::FindUninitializedFields(
228     ProgramStateRef State, const TypedValueRegion *const R,
229     bool CheckPointeeInitialization)
230     : State(State), ObjectR(R),
231       CheckPointeeInitialization(CheckPointeeInitialization) {
232 
233   isNonUnionUninit(ObjectR, FieldChainInfo(ChainFactory));
234 }
235 
236 bool FindUninitializedFields::addFieldToUninits(FieldChainInfo Chain) {
237   if (State->getStateManager().getContext().getSourceManager().isInSystemHeader(
238           Chain.getUninitRegion()->getDecl()->getLocation()))
239     return false;
240 
241   UninitFieldMap::mapped_type NoteMsgBuf;
242   llvm::raw_svector_ostream OS(NoteMsgBuf);
243   Chain.printNoteMsg(OS);
244   return UninitFields
245       .insert(std::make_pair(Chain.getUninitRegion(), std::move(NoteMsgBuf)))
246       .second;
247 }
248 
249 bool FindUninitializedFields::isNonUnionUninit(const TypedValueRegion *R,
250                                                FieldChainInfo LocalChain) {
251   assert(R->getValueType()->isRecordType() &&
252          !R->getValueType()->isUnionType() &&
253          "This method only checks non-union record objects!");
254 
255   const RecordDecl *RD = R->getValueType()->getAsRecordDecl()->getDefinition();
256 
257   if (!RD) {
258     IsAnyFieldInitialized = true;
259     return true;
260   }
261 
262   bool ContainsUninitField = false;
263 
264   // Are all of this non-union's fields initialized?
265   for (const FieldDecl *I : RD->fields()) {
266 
267     const auto FieldVal =
268         State->getLValue(I, loc::MemRegionVal(R)).castAs<loc::MemRegionVal>();
269     const auto *FR = FieldVal.getRegionAs<FieldRegion>();
270     QualType T = I->getType();
271 
272     // If LocalChain already contains FR, then we encountered a cyclic
273     // reference. In this case, region FR is already under checking at an
274     // earlier node in the directed tree.
275     if (LocalChain.contains(FR))
276       return false;
277 
278     if (T->isStructureOrClassType()) {
279       if (isNonUnionUninit(FR, LocalChain.add(RegularField(FR))))
280         ContainsUninitField = true;
281       continue;
282     }
283 
284     if (T->isUnionType()) {
285       if (isUnionUninit(FR)) {
286         if (addFieldToUninits(LocalChain.add(RegularField(FR))))
287           ContainsUninitField = true;
288       } else
289         IsAnyFieldInitialized = true;
290       continue;
291     }
292 
293     if (T->isArrayType()) {
294       IsAnyFieldInitialized = true;
295       continue;
296     }
297 
298     if (isDereferencableType(T)) {
299       if (isPointerOrReferenceUninit(FR, LocalChain))
300         ContainsUninitField = true;
301       continue;
302     }
303 
304     if (isPrimitiveType(T)) {
305       SVal V = State->getSVal(FieldVal);
306 
307       if (isPrimitiveUninit(V)) {
308         if (addFieldToUninits(LocalChain.add(RegularField(FR))))
309           ContainsUninitField = true;
310       }
311       continue;
312     }
313 
314     llvm_unreachable("All cases are handled!");
315   }
316 
317   // Checking bases.
318   const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD);
319   if (!CXXRD)
320     return ContainsUninitField;
321 
322   for (const CXXBaseSpecifier &BaseSpec : CXXRD->bases()) {
323     const auto *BaseRegion = State->getLValue(BaseSpec, R)
324                                  .castAs<loc::MemRegionVal>()
325                                  .getRegionAs<TypedValueRegion>();
326 
327     // If the head of the list is also a BaseClass, we'll overwrite it to avoid
328     // note messages like 'this->A::B::x'.
329     if (!LocalChain.isEmpty() && LocalChain.getHead().isBase()) {
330       if (isNonUnionUninit(BaseRegion, LocalChain.replaceHead(
331                                            BaseClass(BaseSpec.getType()))))
332         ContainsUninitField = true;
333     } else {
334       if (isNonUnionUninit(BaseRegion,
335                            LocalChain.add(BaseClass(BaseSpec.getType()))))
336         ContainsUninitField = true;
337     }
338   }
339 
340   return ContainsUninitField;
341 }
342 
343 bool FindUninitializedFields::isUnionUninit(const TypedValueRegion *R) {
344   assert(R->getValueType()->isUnionType() &&
345          "This method only checks union objects!");
346   // TODO: Implement support for union fields.
347   return false;
348 }
349 
350 bool FindUninitializedFields::isPrimitiveUninit(const SVal &V) {
351   if (V.isUndef())
352     return true;
353 
354   IsAnyFieldInitialized = true;
355   return false;
356 }
357 
358 //===----------------------------------------------------------------------===//
359 //                       Methods for FieldChainInfo.
360 //===----------------------------------------------------------------------===//
361 
362 const FieldRegion *FieldChainInfo::getUninitRegion() const {
363   assert(!Chain.isEmpty() && "Empty fieldchain!");
364   return (*Chain.begin()).getRegion();
365 }
366 
367 bool FieldChainInfo::contains(const FieldRegion *FR) const {
368   for (const FieldNode &Node : Chain) {
369     if (Node.isSameRegion(FR))
370       return true;
371   }
372   return false;
373 }
374 
375 /// Prints every element except the last to `Out`. Since ImmutableLists store
376 /// elements in reverse order, and have no reverse iterators, we use a
377 /// recursive function to print the fieldchain correctly. The last element in
378 /// the chain is to be printed by `print`.
379 static void printTail(llvm::raw_ostream &Out,
380                       const FieldChainInfo::FieldChainImpl *L);
381 
382 // TODO: This function constructs an incorrect string if a void pointer is a
383 // part of the chain:
384 //
385 //   struct B { int x; }
386 //
387 //   struct A {
388 //     void *vptr;
389 //     A(void* vptr) : vptr(vptr) {}
390 //   };
391 //
392 //   void f() {
393 //     B b;
394 //     A a(&b);
395 //   }
396 //
397 // The note message will be "uninitialized field 'this->vptr->x'", even though
398 // void pointers can't be dereferenced. This should be changed to "uninitialized
399 // field 'static_cast<B*>(this->vptr)->x'".
400 //
401 // TODO: This function constructs an incorrect fieldchain string in the
402 // following case:
403 //
404 //   struct Base { int x; };
405 //   struct D1 : Base {}; struct D2 : Base {};
406 //
407 //   struct MostDerived : D1, D2 {
408 //     MostDerived() {}
409 //   }
410 //
411 // A call to MostDerived::MostDerived() will cause two notes that say
412 // "uninitialized field 'this->x'", but we can't refer to 'x' directly,
413 // we need an explicit namespace resolution whether the uninit field was
414 // 'D1::x' or 'D2::x'.
415 void FieldChainInfo::printNoteMsg(llvm::raw_ostream &Out) const {
416   if (Chain.isEmpty())
417     return;
418 
419   const FieldChainImpl *L = Chain.getInternalPointer();
420   const FieldNode &LastField = L->getHead();
421 
422   LastField.printNoteMsg(Out);
423   Out << '\'';
424 
425   for (const FieldNode &Node : Chain)
426     Node.printPrefix(Out);
427 
428   Out << "this->";
429   printTail(Out, L->getTail());
430   LastField.printNode(Out);
431   Out << '\'';
432 }
433 
434 static void printTail(llvm::raw_ostream &Out,
435                       const FieldChainInfo::FieldChainImpl *L) {
436   if (!L)
437     return;
438 
439   printTail(Out, L->getTail());
440 
441   L->getHead().printNode(Out);
442   L->getHead().printSeparator(Out);
443 }
444 
445 //===----------------------------------------------------------------------===//
446 //                           Utility functions.
447 //===----------------------------------------------------------------------===//
448 
449 static Optional<nonloc::LazyCompoundVal>
450 getObjectVal(const CXXConstructorDecl *CtorDecl, CheckerContext &Context) {
451 
452   Loc ThisLoc = Context.getSValBuilder().getCXXThis(CtorDecl->getParent(),
453                                                     Context.getStackFrame());
454   // Getting the value for 'this'.
455   SVal This = Context.getState()->getSVal(ThisLoc);
456 
457   // Getting the value for '*this'.
458   SVal Object = Context.getState()->getSVal(This.castAs<Loc>());
459 
460   return Object.getAs<nonloc::LazyCompoundVal>();
461 }
462 
463 static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
464                                       CheckerContext &Context) {
465 
466   Optional<nonloc::LazyCompoundVal> CurrentObject = getObjectVal(Ctor, Context);
467   if (!CurrentObject)
468     return false;
469 
470   const LocationContext *LC = Context.getLocationContext();
471   while ((LC = LC->getParent())) {
472 
473     // If \p Ctor was called by another constructor.
474     const auto *OtherCtor = dyn_cast<CXXConstructorDecl>(LC->getDecl());
475     if (!OtherCtor)
476       continue;
477 
478     Optional<nonloc::LazyCompoundVal> OtherObject =
479         getObjectVal(OtherCtor, Context);
480     if (!OtherObject)
481       continue;
482 
483     // If the CurrentObject is a subregion of OtherObject, it will be analyzed
484     // during the analysis of OtherObject.
485     if (CurrentObject->getRegion()->isSubRegionOf(OtherObject->getRegion()))
486       return true;
487   }
488 
489   return false;
490 }
491 
492 std::string clang::ento::getVariableName(const FieldDecl *Field) {
493   // If Field is a captured lambda variable, Field->getName() will return with
494   // an empty string. We can however acquire it's name from the lambda's
495   // captures.
496   const auto *CXXParent = dyn_cast<CXXRecordDecl>(Field->getParent());
497 
498   if (CXXParent && CXXParent->isLambda()) {
499     assert(CXXParent->captures_begin());
500     auto It = CXXParent->captures_begin() + Field->getFieldIndex();
501 
502     if (It->capturesVariable())
503       return llvm::Twine("/*captured variable*/" +
504                          It->getCapturedVar()->getName())
505           .str();
506 
507     if (It->capturesThis())
508       return "/*'this' capture*/";
509 
510     llvm_unreachable("No other capture type is expected!");
511   }
512 
513   return Field->getName();
514 }
515 
516 void ento::registerUninitializedObjectChecker(CheckerManager &Mgr) {
517   auto Chk = Mgr.registerChecker<UninitializedObjectChecker>();
518   Chk->IsPedantic = Mgr.getAnalyzerOptions().getBooleanOption(
519       "Pedantic", /*DefaultVal*/ false, Chk);
520   Chk->ShouldConvertNotesToWarnings = Mgr.getAnalyzerOptions().getBooleanOption(
521       "NotesAsWarnings", /*DefaultVal*/ false, Chk);
522   Chk->CheckPointeeInitialization = Mgr.getAnalyzerOptions().getBooleanOption(
523       "CheckPointeeInitialization", /*DefaultVal*/ false, Chk);
524 }
525