1 //===----- UninitializedObjectChecker.cpp ------------------------*- C++ -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a checker that reports uninitialized fields in objects
11 // created after a constructor call.
12 //
13 // This checker has several options:
14 //   - "Pedantic" (boolean). If its not set or is set to false, the checker
15 //     won't emit warnings for objects that don't have at least one initialized
16 //     field. This may be set with
17 //
18 //     `-analyzer-config alpha.cplusplus.UninitializedObject:Pedantic=true`.
19 //
20 //   - "NotesAsWarnings" (boolean). If set to true, the checker will emit a
21 //     warning for each uninitalized field, as opposed to emitting one warning
22 //     per constructor call, and listing the uninitialized fields that belongs
23 //     to it in notes. Defaults to false.
24 //
25 //     `-analyzer-config \
26 //         alpha.cplusplus.UninitializedObject:NotesAsWarnings=true`.
27 //
28 //   - "CheckPointeeInitialization" (boolean). If set to false, the checker will
29 //     not analyze the pointee of pointer/reference fields, and will only check
30 //     whether the object itself is initialized. Defaults to false.
31 //
32 //     `-analyzer-config \
33 //         alpha.cplusplus.UninitializedObject:CheckPointeeInitialization=true`.
34 //
35 //     TODO: With some clever heuristics, some pointers should be dereferenced
36 //     by default. For example, if the pointee is constructed within the
37 //     constructor call, it's reasonable to say that no external object
38 //     references it, and we wouldn't generate multiple report on the same
39 //     pointee.
40 //
41 // To read about how the checker works, refer to the comments in
42 // UninitializedObject.h.
43 //
44 // Some of the logic is implemented in UninitializedPointee.cpp, to reduce the
45 // complexity of this file.
46 //
47 //===----------------------------------------------------------------------===//
48 
49 #include "UninitializedObject.h"
50 #include "ClangSACheckers.h"
51 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
52 #include "clang/StaticAnalyzer/Core/Checker.h"
53 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
54 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
55 
56 using namespace clang;
57 using namespace clang::ento;
58 
59 namespace {
60 
61 class UninitializedObjectChecker : public Checker<check::EndFunction> {
62   std::unique_ptr<BuiltinBug> BT_uninitField;
63 
64 public:
65   // These fields will be initialized when registering the checker.
66   bool IsPedantic;
67   bool ShouldConvertNotesToWarnings;
68   bool CheckPointeeInitialization;
69 
70   UninitializedObjectChecker()
71       : BT_uninitField(new BuiltinBug(this, "Uninitialized fields")) {}
72   void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const;
73 };
74 
75 } // end of anonymous namespace
76 
77 // Utility function declarations.
78 
79 /// Returns the object that was constructed by CtorDecl, or None if that isn't
80 /// possible.
81 // TODO: Refactor this function so that it returns the constructed object's
82 // region.
83 static Optional<nonloc::LazyCompoundVal>
84 getObjectVal(const CXXConstructorDecl *CtorDecl, CheckerContext &Context);
85 
86 /// Checks whether the object constructed by \p Ctor will be analyzed later
87 /// (e.g. if the object is a field of another object, in which case we'd check
88 /// it multiple times).
89 static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
90                                CheckerContext &Context);
91 
92 /// Constructs a note message for a given FieldChainInfo object.
93 static void printNoteMessage(llvm::raw_ostream &Out,
94                              const FieldChainInfo &Chain);
95 
96 /// Returns with Field's name. This is a helper function to get the correct name
97 /// even if Field is a captured lambda variable.
98 static StringRef getVariableName(const FieldDecl *Field);
99 
100 //===----------------------------------------------------------------------===//
101 //                  Methods for UninitializedObjectChecker.
102 //===----------------------------------------------------------------------===//
103 
104 void UninitializedObjectChecker::checkEndFunction(
105     const ReturnStmt *RS, CheckerContext &Context) const {
106 
107   const auto *CtorDecl = dyn_cast_or_null<CXXConstructorDecl>(
108       Context.getLocationContext()->getDecl());
109   if (!CtorDecl)
110     return;
111 
112   if (!CtorDecl->isUserProvided())
113     return;
114 
115   if (CtorDecl->getParent()->isUnion())
116     return;
117 
118   // This avoids essentially the same error being reported multiple times.
119   if (willObjectBeAnalyzedLater(CtorDecl, Context))
120     return;
121 
122   Optional<nonloc::LazyCompoundVal> Object = getObjectVal(CtorDecl, Context);
123   if (!Object)
124     return;
125 
126   FindUninitializedFields F(Context.getState(), Object->getRegion(), IsPedantic,
127                             CheckPointeeInitialization);
128 
129   const UninitFieldSet &UninitFields = F.getUninitFields();
130 
131   if (UninitFields.empty())
132     return;
133 
134   // There are uninitialized fields in the record.
135 
136   ExplodedNode *Node = Context.generateNonFatalErrorNode(Context.getState());
137   if (!Node)
138     return;
139 
140   PathDiagnosticLocation LocUsedForUniqueing;
141   const Stmt *CallSite = Context.getStackFrame()->getCallSite();
142   if (CallSite)
143     LocUsedForUniqueing = PathDiagnosticLocation::createBegin(
144         CallSite, Context.getSourceManager(), Node->getLocationContext());
145 
146   // For Plist consumers that don't support notes just yet, we'll convert notes
147   // to warnings.
148   if (ShouldConvertNotesToWarnings) {
149     for (const auto &Chain : UninitFields) {
150       SmallString<100> WarningBuf;
151       llvm::raw_svector_ostream WarningOS(WarningBuf);
152 
153       printNoteMessage(WarningOS, Chain);
154 
155       auto Report = llvm::make_unique<BugReport>(
156           *BT_uninitField, WarningOS.str(), Node, LocUsedForUniqueing,
157           Node->getLocationContext()->getDecl());
158       Context.emitReport(std::move(Report));
159     }
160     return;
161   }
162 
163   SmallString<100> WarningBuf;
164   llvm::raw_svector_ostream WarningOS(WarningBuf);
165   WarningOS << UninitFields.size() << " uninitialized field"
166             << (UninitFields.size() == 1 ? "" : "s")
167             << " at the end of the constructor call";
168 
169   auto Report = llvm::make_unique<BugReport>(
170       *BT_uninitField, WarningOS.str(), Node, LocUsedForUniqueing,
171       Node->getLocationContext()->getDecl());
172 
173   for (const auto &Chain : UninitFields) {
174     SmallString<200> NoteBuf;
175     llvm::raw_svector_ostream NoteOS(NoteBuf);
176 
177     printNoteMessage(NoteOS, Chain);
178 
179     Report->addNote(NoteOS.str(),
180                     PathDiagnosticLocation::create(Chain.getEndOfChain(),
181                                                    Context.getSourceManager()));
182   }
183   Context.emitReport(std::move(Report));
184 }
185 
186 //===----------------------------------------------------------------------===//
187 //                   Methods for FindUninitializedFields.
188 //===----------------------------------------------------------------------===//
189 
190 FindUninitializedFields::FindUninitializedFields(
191     ProgramStateRef State, const TypedValueRegion *const R, bool IsPedantic,
192     bool CheckPointeeInitialization)
193     : State(State), ObjectR(R), IsPedantic(IsPedantic),
194       CheckPointeeInitialization(CheckPointeeInitialization) {}
195 
196 const UninitFieldSet &FindUninitializedFields::getUninitFields() {
197   isNonUnionUninit(ObjectR, FieldChainInfo(Factory));
198 
199   if (!IsPedantic && !IsAnyFieldInitialized)
200     UninitFields.clear();
201 
202   return UninitFields;
203 }
204 
205 bool FindUninitializedFields::addFieldToUninits(FieldChainInfo Chain) {
206   if (State->getStateManager().getContext().getSourceManager().isInSystemHeader(
207           Chain.getEndOfChain()->getLocation()))
208     return false;
209 
210   return UninitFields.insert(Chain).second;
211 }
212 
213 bool FindUninitializedFields::isNonUnionUninit(const TypedValueRegion *R,
214                                                FieldChainInfo LocalChain) {
215   assert(R->getValueType()->isRecordType() &&
216          !R->getValueType()->isUnionType() &&
217          "This method only checks non-union record objects!");
218 
219   const RecordDecl *RD =
220       R->getValueType()->getAs<RecordType>()->getDecl()->getDefinition();
221   assert(RD && "Referred record has no definition");
222 
223   bool ContainsUninitField = false;
224 
225   // Are all of this non-union's fields initialized?
226   for (const FieldDecl *I : RD->fields()) {
227 
228     const auto FieldVal =
229         State->getLValue(I, loc::MemRegionVal(R)).castAs<loc::MemRegionVal>();
230     const auto *FR = FieldVal.getRegionAs<FieldRegion>();
231     QualType T = I->getType();
232 
233     // If LocalChain already contains FR, then we encountered a cyclic
234     // reference. In this case, region FR is already under checking at an
235     // earlier node in the directed tree.
236     if (LocalChain.contains(FR))
237       return false;
238 
239     if (T->isStructureOrClassType()) {
240       if (isNonUnionUninit(FR, {LocalChain, FR}))
241         ContainsUninitField = true;
242       continue;
243     }
244 
245     if (T->isUnionType()) {
246       if (isUnionUninit(FR)) {
247         if (addFieldToUninits({LocalChain, FR}))
248           ContainsUninitField = true;
249       } else
250         IsAnyFieldInitialized = true;
251       continue;
252     }
253 
254     if (T->isArrayType()) {
255       IsAnyFieldInitialized = true;
256       continue;
257     }
258 
259     if (T->isPointerType() || T->isReferenceType() || T->isBlockPointerType()) {
260       if (isPointerOrReferenceUninit(FR, LocalChain))
261         ContainsUninitField = true;
262       continue;
263     }
264 
265     if (isPrimitiveType(T)) {
266       SVal V = State->getSVal(FieldVal);
267 
268       if (isPrimitiveUninit(V)) {
269         if (addFieldToUninits({LocalChain, FR}))
270           ContainsUninitField = true;
271       }
272       continue;
273     }
274 
275     llvm_unreachable("All cases are handled!");
276   }
277 
278   // Checking bases.
279   // FIXME: As of now, because of `willObjectBeAnalyzedLater`, objects whose
280   // type is a descendant of another type will emit warnings for uninitalized
281   // inherited members.
282   // This is not the only way to analyze bases of an object -- if we didn't
283   // filter them out, and didn't analyze the bases, this checker would run for
284   // each base of the object in order of base initailization and in theory would
285   // find every uninitalized field. This approach could also make handling
286   // diamond inheritances more easily.
287   //
288   // This rule (that a descendant type's cunstructor is responsible for
289   // initializing inherited data members) is not obvious, and should it should
290   // be.
291   const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD);
292   if (!CXXRD)
293     return ContainsUninitField;
294 
295   for (const CXXBaseSpecifier &BaseSpec : CXXRD->bases()) {
296     const auto *BaseRegion = State->getLValue(BaseSpec, R)
297                                  .castAs<loc::MemRegionVal>()
298                                  .getRegionAs<TypedValueRegion>();
299 
300     if (isNonUnionUninit(BaseRegion, LocalChain))
301       ContainsUninitField = true;
302   }
303 
304   return ContainsUninitField;
305 }
306 
307 bool FindUninitializedFields::isUnionUninit(const TypedValueRegion *R) {
308   assert(R->getValueType()->isUnionType() &&
309          "This method only checks union objects!");
310   // TODO: Implement support for union fields.
311   return false;
312 }
313 
314 bool FindUninitializedFields::isPrimitiveUninit(const SVal &V) {
315   if (V.isUndef())
316     return true;
317 
318   IsAnyFieldInitialized = true;
319   return false;
320 }
321 
322 //===----------------------------------------------------------------------===//
323 //                       Methods for FieldChainInfo.
324 //===----------------------------------------------------------------------===//
325 
326 FieldChainInfo::FieldChainInfo(const FieldChainInfo &Other,
327                                const FieldRegion *FR, const bool IsDereferenced)
328     : FieldChainInfo(Other, IsDereferenced) {
329   assert(!contains(FR) && "Can't add a field that is already a part of the "
330                           "fieldchain! Is this a cyclic reference?");
331   Chain = Factory.add(FR, Other.Chain);
332 }
333 
334 bool FieldChainInfo::isPointer() const {
335   assert(!Chain.isEmpty() && "Empty fieldchain!");
336   return (*Chain.begin())->getDecl()->getType()->isPointerType();
337 }
338 
339 bool FieldChainInfo::isDereferenced() const {
340   assert(isPointer() && "Only pointers may or may not be dereferenced!");
341   return IsDereferenced;
342 }
343 
344 const FieldDecl *FieldChainInfo::getEndOfChain() const {
345   assert(!Chain.isEmpty() && "Empty fieldchain!");
346   return (*Chain.begin())->getDecl();
347 }
348 
349 // TODO: This function constructs an incorrect string if a void pointer is a
350 // part of the chain:
351 //
352 //   struct B { int x; }
353 //
354 //   struct A {
355 //     void *vptr;
356 //     A(void* vptr) : vptr(vptr) {}
357 //   };
358 //
359 //   void f() {
360 //     B b;
361 //     A a(&b);
362 //   }
363 //
364 // The note message will be "uninitialized field 'this->vptr->x'", even though
365 // void pointers can't be dereferenced. This should be changed to "uninitialized
366 // field 'static_cast<B*>(this->vptr)->x'".
367 //
368 // TODO: This function constructs an incorrect fieldchain string in the
369 // following case:
370 //
371 //   struct Base { int x; };
372 //   struct D1 : Base {}; struct D2 : Base {};
373 //
374 //   struct MostDerived : D1, D2 {
375 //     MostDerived() {}
376 //   }
377 //
378 // A call to MostDerived::MostDerived() will cause two notes that say
379 // "uninitialized field 'this->x'", but we can't refer to 'x' directly,
380 // we need an explicit namespace resolution whether the uninit field was
381 // 'D1::x' or 'D2::x'.
382 void FieldChainInfo::print(llvm::raw_ostream &Out) const {
383   if (Chain.isEmpty())
384     return;
385 
386   const llvm::ImmutableListImpl<const FieldRegion *> *L =
387       Chain.getInternalPointer();
388   printTail(Out, L->getTail());
389   Out << getVariableName(L->getHead()->getDecl());
390 }
391 
392 void FieldChainInfo::printTail(
393     llvm::raw_ostream &Out,
394     const llvm::ImmutableListImpl<const FieldRegion *> *L) {
395   if (!L)
396     return;
397 
398   printTail(Out, L->getTail());
399   const FieldDecl *Field = L->getHead()->getDecl();
400   Out << getVariableName(Field);
401   Out << (Field->getType()->isPointerType() ? "->" : ".");
402 }
403 
404 //===----------------------------------------------------------------------===//
405 //                           Utility functions.
406 //===----------------------------------------------------------------------===//
407 
408 static Optional<nonloc::LazyCompoundVal>
409 getObjectVal(const CXXConstructorDecl *CtorDecl, CheckerContext &Context) {
410 
411   Loc ThisLoc = Context.getSValBuilder().getCXXThis(CtorDecl->getParent(),
412                                                     Context.getStackFrame());
413   // Getting the value for 'this'.
414   SVal This = Context.getState()->getSVal(ThisLoc);
415 
416   // Getting the value for '*this'.
417   SVal Object = Context.getState()->getSVal(This.castAs<Loc>());
418 
419   return Object.getAs<nonloc::LazyCompoundVal>();
420 }
421 
422 static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
423                                CheckerContext &Context) {
424 
425   Optional<nonloc::LazyCompoundVal> CurrentObject = getObjectVal(Ctor, Context);
426   if (!CurrentObject)
427     return false;
428 
429   const LocationContext *LC = Context.getLocationContext();
430   while ((LC = LC->getParent())) {
431 
432     // If \p Ctor was called by another constructor.
433     const auto *OtherCtor = dyn_cast<CXXConstructorDecl>(LC->getDecl());
434     if (!OtherCtor)
435       continue;
436 
437     Optional<nonloc::LazyCompoundVal> OtherObject =
438         getObjectVal(OtherCtor, Context);
439     if (!OtherObject)
440       continue;
441 
442     // If the CurrentObject is a subregion of OtherObject, it will be analyzed
443     // during the analysis of OtherObject.
444     if (CurrentObject->getRegion()->isSubRegionOf(OtherObject->getRegion()))
445       return true;
446   }
447 
448   return false;
449 }
450 
451 static void printNoteMessage(llvm::raw_ostream &Out,
452                              const FieldChainInfo &Chain) {
453   if (Chain.isPointer()) {
454     if (Chain.isDereferenced())
455       Out << "uninitialized pointee 'this->";
456     else
457       Out << "uninitialized pointer 'this->";
458   } else
459     Out << "uninitialized field 'this->";
460   Chain.print(Out);
461   Out << "'";
462 }
463 
464 static StringRef getVariableName(const FieldDecl *Field) {
465   // If Field is a captured lambda variable, Field->getName() will return with
466   // an empty string. We can however acquire it's name from the lambda's
467   // captures.
468   const auto *CXXParent = dyn_cast<CXXRecordDecl>(Field->getParent());
469 
470   if (CXXParent && CXXParent->isLambda()) {
471     assert(CXXParent->captures_begin());
472     auto It = CXXParent->captures_begin() + Field->getFieldIndex();
473     return It->getCapturedVar()->getName();
474   }
475 
476   return Field->getName();
477 }
478 
479 void ento::registerUninitializedObjectChecker(CheckerManager &Mgr) {
480   auto Chk = Mgr.registerChecker<UninitializedObjectChecker>();
481   Chk->IsPedantic = Mgr.getAnalyzerOptions().getBooleanOption(
482       "Pedantic", /*DefaultVal*/ false, Chk);
483   Chk->ShouldConvertNotesToWarnings = Mgr.getAnalyzerOptions().getBooleanOption(
484       "NotesAsWarnings", /*DefaultVal*/ false, Chk);
485   Chk->CheckPointeeInitialization = Mgr.getAnalyzerOptions().getBooleanOption(
486       "CheckPointeeInitialization", /*DefaultVal*/ false, Chk);
487 }
488