10b57cec5SDimitry Andric //===----- UninitializedPointee.cpp ------------------------------*- C++ -*-==//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file defines functions and methods for handling pointers and references
100b57cec5SDimitry Andric // to reduce the size and complexity of UninitializedObjectChecker.cpp.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric // To read about command line options and documentation about how the checker
130b57cec5SDimitry Andric // works, refer to UninitializedObjectChecker.h.
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric #include "UninitializedObject.h"
180b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
190b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/Checker.h"
200b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21a7dea167SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h"
22*bdd1243dSDimitry Andric #include <optional>
230b57cec5SDimitry Andric 
240b57cec5SDimitry Andric using namespace clang;
250b57cec5SDimitry Andric using namespace clang::ento;
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric namespace {
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric /// Represents a pointer or a reference field.
300b57cec5SDimitry Andric class LocField final : public FieldNode {
310b57cec5SDimitry Andric   /// We'll store whether the pointee or the pointer itself is uninitialited.
320b57cec5SDimitry Andric   const bool IsDereferenced;
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric public:
LocField(const FieldRegion * FR,const bool IsDereferenced=true)350b57cec5SDimitry Andric   LocField(const FieldRegion *FR, const bool IsDereferenced = true)
360b57cec5SDimitry Andric       : FieldNode(FR), IsDereferenced(IsDereferenced) {}
370b57cec5SDimitry Andric 
printNoteMsg(llvm::raw_ostream & Out) const38972a253aSDimitry Andric   void printNoteMsg(llvm::raw_ostream &Out) const override {
390b57cec5SDimitry Andric     if (IsDereferenced)
400b57cec5SDimitry Andric       Out << "uninitialized pointee ";
410b57cec5SDimitry Andric     else
420b57cec5SDimitry Andric       Out << "uninitialized pointer ";
430b57cec5SDimitry Andric   }
440b57cec5SDimitry Andric 
printPrefix(llvm::raw_ostream & Out) const45972a253aSDimitry Andric   void printPrefix(llvm::raw_ostream &Out) const override {}
460b57cec5SDimitry Andric 
printNode(llvm::raw_ostream & Out) const47972a253aSDimitry Andric   void printNode(llvm::raw_ostream &Out) const override {
480b57cec5SDimitry Andric     Out << getVariableName(getDecl());
490b57cec5SDimitry Andric   }
500b57cec5SDimitry Andric 
printSeparator(llvm::raw_ostream & Out) const51972a253aSDimitry Andric   void printSeparator(llvm::raw_ostream &Out) const override {
520b57cec5SDimitry Andric     if (getDecl()->getType()->isPointerType())
530b57cec5SDimitry Andric       Out << "->";
540b57cec5SDimitry Andric     else
550b57cec5SDimitry Andric       Out << '.';
560b57cec5SDimitry Andric   }
570b57cec5SDimitry Andric };
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric /// Represents a nonloc::LocAsInteger or void* field, that point to objects, but
600b57cec5SDimitry Andric /// needs to be casted back to its dynamic type for a correct note message.
610b57cec5SDimitry Andric class NeedsCastLocField final : public FieldNode {
620b57cec5SDimitry Andric   QualType CastBackType;
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric public:
NeedsCastLocField(const FieldRegion * FR,const QualType & T)650b57cec5SDimitry Andric   NeedsCastLocField(const FieldRegion *FR, const QualType &T)
660b57cec5SDimitry Andric       : FieldNode(FR), CastBackType(T) {}
670b57cec5SDimitry Andric 
printNoteMsg(llvm::raw_ostream & Out) const68972a253aSDimitry Andric   void printNoteMsg(llvm::raw_ostream &Out) const override {
690b57cec5SDimitry Andric     Out << "uninitialized pointee ";
700b57cec5SDimitry Andric   }
710b57cec5SDimitry Andric 
printPrefix(llvm::raw_ostream & Out) const72972a253aSDimitry Andric   void printPrefix(llvm::raw_ostream &Out) const override {
730b57cec5SDimitry Andric     // If this object is a nonloc::LocAsInteger.
740b57cec5SDimitry Andric     if (getDecl()->getType()->isIntegerType())
750b57cec5SDimitry Andric       Out << "reinterpret_cast";
760b57cec5SDimitry Andric     // If this pointer's dynamic type is different then it's static type.
770b57cec5SDimitry Andric     else
780b57cec5SDimitry Andric       Out << "static_cast";
790b57cec5SDimitry Andric     Out << '<' << CastBackType.getAsString() << ">(";
800b57cec5SDimitry Andric   }
810b57cec5SDimitry Andric 
printNode(llvm::raw_ostream & Out) const82972a253aSDimitry Andric   void printNode(llvm::raw_ostream &Out) const override {
830b57cec5SDimitry Andric     Out << getVariableName(getDecl()) << ')';
840b57cec5SDimitry Andric   }
850b57cec5SDimitry Andric 
printSeparator(llvm::raw_ostream & Out) const86972a253aSDimitry Andric   void printSeparator(llvm::raw_ostream &Out) const override { Out << "->"; }
870b57cec5SDimitry Andric };
880b57cec5SDimitry Andric 
890b57cec5SDimitry Andric /// Represents a Loc field that points to itself.
900b57cec5SDimitry Andric class CyclicLocField final : public FieldNode {
910b57cec5SDimitry Andric 
920b57cec5SDimitry Andric public:
CyclicLocField(const FieldRegion * FR)930b57cec5SDimitry Andric   CyclicLocField(const FieldRegion *FR) : FieldNode(FR) {}
940b57cec5SDimitry Andric 
printNoteMsg(llvm::raw_ostream & Out) const95972a253aSDimitry Andric   void printNoteMsg(llvm::raw_ostream &Out) const override {
960b57cec5SDimitry Andric     Out << "object references itself ";
970b57cec5SDimitry Andric   }
980b57cec5SDimitry Andric 
printPrefix(llvm::raw_ostream & Out) const99972a253aSDimitry Andric   void printPrefix(llvm::raw_ostream &Out) const override {}
1000b57cec5SDimitry Andric 
printNode(llvm::raw_ostream & Out) const101972a253aSDimitry Andric   void printNode(llvm::raw_ostream &Out) const override {
1020b57cec5SDimitry Andric     Out << getVariableName(getDecl());
1030b57cec5SDimitry Andric   }
1040b57cec5SDimitry Andric 
printSeparator(llvm::raw_ostream & Out) const105972a253aSDimitry Andric   void printSeparator(llvm::raw_ostream &Out) const override {
1060b57cec5SDimitry Andric     llvm_unreachable("CyclicLocField objects must be the last node of the "
1070b57cec5SDimitry Andric                      "fieldchain!");
1080b57cec5SDimitry Andric   }
1090b57cec5SDimitry Andric };
1100b57cec5SDimitry Andric 
1110b57cec5SDimitry Andric } // end of anonymous namespace
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric // Utility function declarations.
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric struct DereferenceInfo {
1160b57cec5SDimitry Andric   const TypedValueRegion *R;
1170b57cec5SDimitry Andric   const bool NeedsCastBack;
1180b57cec5SDimitry Andric   const bool IsCyclic;
DereferenceInfoDereferenceInfo1190b57cec5SDimitry Andric   DereferenceInfo(const TypedValueRegion *R, bool NCB, bool IC)
1200b57cec5SDimitry Andric       : R(R), NeedsCastBack(NCB), IsCyclic(IC) {}
1210b57cec5SDimitry Andric };
1220b57cec5SDimitry Andric 
1230b57cec5SDimitry Andric /// Dereferences \p FR and returns with the pointee's region, and whether it
1240b57cec5SDimitry Andric /// needs to be casted back to it's location type. If for whatever reason
125*bdd1243dSDimitry Andric /// dereferencing fails, returns std::nullopt.
126*bdd1243dSDimitry Andric static std::optional<DereferenceInfo> dereference(ProgramStateRef State,
1270b57cec5SDimitry Andric                                                   const FieldRegion *FR);
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric /// Returns whether \p T can be (transitively) dereferenced to a void pointer
1300b57cec5SDimitry Andric /// type (void*, void**, ...).
1310b57cec5SDimitry Andric static bool isVoidPointer(QualType T);
1320b57cec5SDimitry Andric 
1330b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1340b57cec5SDimitry Andric //                   Methods for FindUninitializedFields.
1350b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1360b57cec5SDimitry Andric 
isDereferencableUninit(const FieldRegion * FR,FieldChainInfo LocalChain)1370b57cec5SDimitry Andric bool FindUninitializedFields::isDereferencableUninit(
1380b57cec5SDimitry Andric     const FieldRegion *FR, FieldChainInfo LocalChain) {
1390b57cec5SDimitry Andric 
1400b57cec5SDimitry Andric   SVal V = State->getSVal(FR);
1410b57cec5SDimitry Andric 
1420b57cec5SDimitry Andric   assert((isDereferencableType(FR->getDecl()->getType()) ||
14381ad6265SDimitry Andric           isa<nonloc::LocAsInteger>(V)) &&
1440b57cec5SDimitry Andric          "This method only checks dereferenceable objects!");
1450b57cec5SDimitry Andric 
14681ad6265SDimitry Andric   if (V.isUnknown() || isa<loc::ConcreteInt>(V)) {
1470b57cec5SDimitry Andric     IsAnyFieldInitialized = true;
1480b57cec5SDimitry Andric     return false;
1490b57cec5SDimitry Andric   }
1500b57cec5SDimitry Andric 
1510b57cec5SDimitry Andric   if (V.isUndef()) {
1520b57cec5SDimitry Andric     return addFieldToUninits(
1530b57cec5SDimitry Andric         LocalChain.add(LocField(FR, /*IsDereferenced*/ false)), FR);
1540b57cec5SDimitry Andric   }
1550b57cec5SDimitry Andric 
1560b57cec5SDimitry Andric   if (!Opts.CheckPointeeInitialization) {
1570b57cec5SDimitry Andric     IsAnyFieldInitialized = true;
1580b57cec5SDimitry Andric     return false;
1590b57cec5SDimitry Andric   }
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric   // At this point the pointer itself is initialized and points to a valid
1620b57cec5SDimitry Andric   // location, we'll now check the pointee.
163*bdd1243dSDimitry Andric   std::optional<DereferenceInfo> DerefInfo = dereference(State, FR);
1640b57cec5SDimitry Andric   if (!DerefInfo) {
1650b57cec5SDimitry Andric     IsAnyFieldInitialized = true;
1660b57cec5SDimitry Andric     return false;
1670b57cec5SDimitry Andric   }
1680b57cec5SDimitry Andric 
1690b57cec5SDimitry Andric   if (DerefInfo->IsCyclic)
1700b57cec5SDimitry Andric     return addFieldToUninits(LocalChain.add(CyclicLocField(FR)), FR);
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric   const TypedValueRegion *R = DerefInfo->R;
1730b57cec5SDimitry Andric   const bool NeedsCastBack = DerefInfo->NeedsCastBack;
1740b57cec5SDimitry Andric 
1750b57cec5SDimitry Andric   QualType DynT = R->getLocationType();
1760b57cec5SDimitry Andric   QualType PointeeT = DynT->getPointeeType();
1770b57cec5SDimitry Andric 
1780b57cec5SDimitry Andric   if (PointeeT->isStructureOrClassType()) {
1790b57cec5SDimitry Andric     if (NeedsCastBack)
1800b57cec5SDimitry Andric       return isNonUnionUninit(R, LocalChain.add(NeedsCastLocField(FR, DynT)));
1810b57cec5SDimitry Andric     return isNonUnionUninit(R, LocalChain.add(LocField(FR)));
1820b57cec5SDimitry Andric   }
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric   if (PointeeT->isUnionType()) {
1850b57cec5SDimitry Andric     if (isUnionUninit(R)) {
1860b57cec5SDimitry Andric       if (NeedsCastBack)
1870b57cec5SDimitry Andric         return addFieldToUninits(LocalChain.add(NeedsCastLocField(FR, DynT)),
1880b57cec5SDimitry Andric                                  R);
1890b57cec5SDimitry Andric       return addFieldToUninits(LocalChain.add(LocField(FR)), R);
1900b57cec5SDimitry Andric     } else {
1910b57cec5SDimitry Andric       IsAnyFieldInitialized = true;
1920b57cec5SDimitry Andric       return false;
1930b57cec5SDimitry Andric     }
1940b57cec5SDimitry Andric   }
1950b57cec5SDimitry Andric 
1960b57cec5SDimitry Andric   if (PointeeT->isArrayType()) {
1970b57cec5SDimitry Andric     IsAnyFieldInitialized = true;
1980b57cec5SDimitry Andric     return false;
1990b57cec5SDimitry Andric   }
2000b57cec5SDimitry Andric 
2010b57cec5SDimitry Andric   assert((isPrimitiveType(PointeeT) || isDereferencableType(PointeeT)) &&
2020b57cec5SDimitry Andric          "At this point FR must either have a primitive dynamic type, or it "
2030b57cec5SDimitry Andric          "must be a null, undefined, unknown or concrete pointer!");
2040b57cec5SDimitry Andric 
2050b57cec5SDimitry Andric   SVal PointeeV = State->getSVal(R);
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric   if (isPrimitiveUninit(PointeeV)) {
2080b57cec5SDimitry Andric     if (NeedsCastBack)
2090b57cec5SDimitry Andric       return addFieldToUninits(LocalChain.add(NeedsCastLocField(FR, DynT)), R);
2100b57cec5SDimitry Andric     return addFieldToUninits(LocalChain.add(LocField(FR)), R);
2110b57cec5SDimitry Andric   }
2120b57cec5SDimitry Andric 
2130b57cec5SDimitry Andric   IsAnyFieldInitialized = true;
2140b57cec5SDimitry Andric   return false;
2150b57cec5SDimitry Andric }
2160b57cec5SDimitry Andric 
2170b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
2180b57cec5SDimitry Andric //                           Utility functions.
2190b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
2200b57cec5SDimitry Andric 
dereference(ProgramStateRef State,const FieldRegion * FR)221*bdd1243dSDimitry Andric static std::optional<DereferenceInfo> dereference(ProgramStateRef State,
2220b57cec5SDimitry Andric                                                   const FieldRegion *FR) {
2230b57cec5SDimitry Andric 
2240b57cec5SDimitry Andric   llvm::SmallSet<const TypedValueRegion *, 5> VisitedRegions;
2250b57cec5SDimitry Andric 
2260b57cec5SDimitry Andric   SVal V = State->getSVal(FR);
2270b57cec5SDimitry Andric   assert(V.getAsRegion() && "V must have an underlying region!");
2280b57cec5SDimitry Andric 
2290b57cec5SDimitry Andric   // If the static type of the field is a void pointer, or it is a
2300b57cec5SDimitry Andric   // nonloc::LocAsInteger, we need to cast it back to the dynamic type before
2310b57cec5SDimitry Andric   // dereferencing.
23281ad6265SDimitry Andric   bool NeedsCastBack =
23381ad6265SDimitry Andric       isVoidPointer(FR->getDecl()->getType()) || isa<nonloc::LocAsInteger>(V);
2340b57cec5SDimitry Andric 
2350b57cec5SDimitry Andric   // The region we'd like to acquire.
2360b57cec5SDimitry Andric   const auto *R = V.getAsRegion()->getAs<TypedValueRegion>();
2370b57cec5SDimitry Andric   if (!R)
238*bdd1243dSDimitry Andric     return std::nullopt;
2390b57cec5SDimitry Andric 
2400b57cec5SDimitry Andric   VisitedRegions.insert(R);
2410b57cec5SDimitry Andric 
2420b57cec5SDimitry Andric   // We acquire the dynamic type of R,
2430b57cec5SDimitry Andric   QualType DynT = R->getLocationType();
2440b57cec5SDimitry Andric 
2450b57cec5SDimitry Andric   while (const MemRegion *Tmp = State->getSVal(R, DynT).getAsRegion()) {
2460b57cec5SDimitry Andric 
2470b57cec5SDimitry Andric     R = Tmp->getAs<TypedValueRegion>();
2480b57cec5SDimitry Andric     if (!R)
249*bdd1243dSDimitry Andric       return std::nullopt;
2500b57cec5SDimitry Andric 
2510b57cec5SDimitry Andric     // We found a cyclic pointer, like int *ptr = (int *)&ptr.
2520b57cec5SDimitry Andric     if (!VisitedRegions.insert(R).second)
2530b57cec5SDimitry Andric       return DereferenceInfo{R, NeedsCastBack, /*IsCyclic*/ true};
2540b57cec5SDimitry Andric 
2550b57cec5SDimitry Andric     DynT = R->getLocationType();
2560b57cec5SDimitry Andric     // In order to ensure that this loop terminates, we're also checking the
2570b57cec5SDimitry Andric     // dynamic type of R, since type hierarchy is finite.
2580b57cec5SDimitry Andric     if (isDereferencableType(DynT->getPointeeType()))
2590b57cec5SDimitry Andric       break;
2600b57cec5SDimitry Andric   }
2610b57cec5SDimitry Andric 
262a7dea167SDimitry Andric   while (isa<CXXBaseObjectRegion>(R)) {
2630b57cec5SDimitry Andric     NeedsCastBack = true;
264a7dea167SDimitry Andric     const auto *SuperR = dyn_cast<TypedValueRegion>(R->getSuperRegion());
265a7dea167SDimitry Andric     if (!SuperR)
2660b57cec5SDimitry Andric       break;
267a7dea167SDimitry Andric 
268a7dea167SDimitry Andric     R = SuperR;
2690b57cec5SDimitry Andric   }
2700b57cec5SDimitry Andric 
2710b57cec5SDimitry Andric   return DereferenceInfo{R, NeedsCastBack, /*IsCyclic*/ false};
2720b57cec5SDimitry Andric }
2730b57cec5SDimitry Andric 
isVoidPointer(QualType T)2740b57cec5SDimitry Andric static bool isVoidPointer(QualType T) {
2750b57cec5SDimitry Andric   while (!T.isNull()) {
2760b57cec5SDimitry Andric     if (T->isVoidPointerType())
2770b57cec5SDimitry Andric       return true;
2780b57cec5SDimitry Andric     T = T->getPointeeType();
2790b57cec5SDimitry Andric   }
2800b57cec5SDimitry Andric   return false;
2810b57cec5SDimitry Andric }
282