xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp (revision 8a5cfdf7851dcdb4e16c510b133d7d0e79e43fc4)
1 //=== PointerArithChecker.cpp - Pointer arithmetic checker -----*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This files defines PointerArithChecker, a builtin checker that checks for
10 // pointer arithmetic on locations other than array elements.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/AST/DeclCXX.h"
15 #include "clang/AST/ExprCXX.h"
16 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
17 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
18 #include "clang/StaticAnalyzer/Core/Checker.h"
19 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21 #include "llvm/ADT/StringRef.h"
22 
23 using namespace clang;
24 using namespace ento;
25 
26 namespace {
27 enum class AllocKind {
28   SingleObject,
29   Array,
30   Unknown,
31   Reinterpreted // Single object interpreted as an array.
32 };
33 } // end namespace
34 
35 namespace llvm {
36 template <> struct FoldingSetTrait<AllocKind> {
37   static inline void Profile(AllocKind X, FoldingSetNodeID &ID) {
38     ID.AddInteger(static_cast<int>(X));
39   }
40 };
41 } // end namespace llvm
42 
43 namespace {
44 class PointerArithChecker
45     : public Checker<
46           check::PreStmt<BinaryOperator>, check::PreStmt<UnaryOperator>,
47           check::PreStmt<ArraySubscriptExpr>, check::PreStmt<CastExpr>,
48           check::PostStmt<CastExpr>, check::PostStmt<CXXNewExpr>,
49           check::PostStmt<CallExpr>, check::DeadSymbols> {
50   AllocKind getKindOfNewOp(const CXXNewExpr *NE, const FunctionDecl *FD) const;
51   const MemRegion *getArrayRegion(const MemRegion *Region, bool &Polymorphic,
52                                   AllocKind &AKind, CheckerContext &C) const;
53   const MemRegion *getPointedRegion(const MemRegion *Region,
54                                     CheckerContext &C) const;
55   void reportPointerArithMisuse(const Expr *E, CheckerContext &C,
56                                 bool PointedNeeded = false) const;
57   void initAllocIdentifiers(ASTContext &C) const;
58 
59   mutable std::unique_ptr<BugType> BT_pointerArith;
60   mutable std::unique_ptr<BugType> BT_polyArray;
61   mutable llvm::SmallSet<IdentifierInfo *, 8> AllocFunctions;
62 
63 public:
64   void checkPreStmt(const UnaryOperator *UOp, CheckerContext &C) const;
65   void checkPreStmt(const BinaryOperator *BOp, CheckerContext &C) const;
66   void checkPreStmt(const ArraySubscriptExpr *SubExpr, CheckerContext &C) const;
67   void checkPreStmt(const CastExpr *CE, CheckerContext &C) const;
68   void checkPostStmt(const CastExpr *CE, CheckerContext &C) const;
69   void checkPostStmt(const CXXNewExpr *NE, CheckerContext &C) const;
70   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
71   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
72 };
73 } // end namespace
74 
75 REGISTER_MAP_WITH_PROGRAMSTATE(RegionState, const MemRegion *, AllocKind)
76 
77 void PointerArithChecker::checkDeadSymbols(SymbolReaper &SR,
78                                            CheckerContext &C) const {
79   // TODO: intentional leak. Some information is garbage collected too early,
80   // see http://reviews.llvm.org/D14203 for further information.
81   /*ProgramStateRef State = C.getState();
82   RegionStateTy RegionStates = State->get<RegionState>();
83   for (const MemRegion *Reg: llvm::make_first_range(RegionStates)) {
84     if (!SR.isLiveRegion(Reg))
85       State = State->remove<RegionState>(Reg);
86   }
87   C.addTransition(State);*/
88 }
89 
90 AllocKind PointerArithChecker::getKindOfNewOp(const CXXNewExpr *NE,
91                                               const FunctionDecl *FD) const {
92   // This checker try not to assume anything about placement and overloaded
93   // new to avoid false positives.
94   if (isa<CXXMethodDecl>(FD))
95     return AllocKind::Unknown;
96   if (FD->getNumParams() != 1 || FD->isVariadic())
97     return AllocKind::Unknown;
98   if (NE->isArray())
99     return AllocKind::Array;
100 
101   return AllocKind::SingleObject;
102 }
103 
104 const MemRegion *
105 PointerArithChecker::getPointedRegion(const MemRegion *Region,
106                                       CheckerContext &C) const {
107   assert(Region);
108   ProgramStateRef State = C.getState();
109   SVal S = State->getSVal(Region);
110   return S.getAsRegion();
111 }
112 
113 /// Checks whether a region is the part of an array.
114 /// In case there is a derived to base cast above the array element, the
115 /// Polymorphic output value is set to true. AKind output value is set to the
116 /// allocation kind of the inspected region.
117 const MemRegion *PointerArithChecker::getArrayRegion(const MemRegion *Region,
118                                                      bool &Polymorphic,
119                                                      AllocKind &AKind,
120                                                      CheckerContext &C) const {
121   assert(Region);
122   while (const auto *BaseRegion = dyn_cast<CXXBaseObjectRegion>(Region)) {
123     Region = BaseRegion->getSuperRegion();
124     Polymorphic = true;
125   }
126   if (const auto *ElemRegion = dyn_cast<ElementRegion>(Region)) {
127     Region = ElemRegion->getSuperRegion();
128   }
129 
130   ProgramStateRef State = C.getState();
131   if (const AllocKind *Kind = State->get<RegionState>(Region)) {
132     AKind = *Kind;
133     if (*Kind == AllocKind::Array)
134       return Region;
135     else
136       return nullptr;
137   }
138   // When the region is symbolic and we do not have any information about it,
139   // assume that this is an array to avoid false positives.
140   if (isa<SymbolicRegion>(Region))
141     return Region;
142 
143   // No AllocKind stored and not symbolic, assume that it points to a single
144   // object.
145   return nullptr;
146 }
147 
148 void PointerArithChecker::reportPointerArithMisuse(const Expr *E,
149                                                    CheckerContext &C,
150                                                    bool PointedNeeded) const {
151   SourceRange SR = E->getSourceRange();
152   if (SR.isInvalid())
153     return;
154 
155   ProgramStateRef State = C.getState();
156   const MemRegion *Region = C.getSVal(E).getAsRegion();
157   if (!Region)
158     return;
159   if (PointedNeeded)
160     Region = getPointedRegion(Region, C);
161   if (!Region)
162     return;
163 
164   bool IsPolymorphic = false;
165   AllocKind Kind = AllocKind::Unknown;
166   if (const MemRegion *ArrayRegion =
167           getArrayRegion(Region, IsPolymorphic, Kind, C)) {
168     if (!IsPolymorphic)
169       return;
170     if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
171       if (!BT_polyArray)
172         BT_polyArray.reset(new BugType(this, "Dangerous pointer arithmetic"));
173       constexpr llvm::StringLiteral Msg =
174           "Pointer arithmetic on a pointer to base class is dangerous "
175           "because derived and base class may have different size.";
176       auto R = std::make_unique<PathSensitiveBugReport>(*BT_polyArray, Msg, N);
177       R->addRange(E->getSourceRange());
178       R->markInteresting(ArrayRegion);
179       C.emitReport(std::move(R));
180     }
181     return;
182   }
183 
184   if (Kind == AllocKind::Reinterpreted)
185     return;
186 
187   // We might not have enough information about symbolic regions.
188   if (Kind != AllocKind::SingleObject &&
189       Region->getKind() == MemRegion::Kind::SymbolicRegionKind)
190     return;
191 
192   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
193     if (!BT_pointerArith)
194       BT_pointerArith.reset(new BugType(this, "Dangerous pointer arithmetic"));
195     constexpr llvm::StringLiteral Msg =
196         "Pointer arithmetic on non-array variables relies on memory layout, "
197         "which is dangerous.";
198     auto R = std::make_unique<PathSensitiveBugReport>(*BT_pointerArith, Msg, N);
199     R->addRange(SR);
200     R->markInteresting(Region);
201     C.emitReport(std::move(R));
202   }
203 }
204 
205 void PointerArithChecker::initAllocIdentifiers(ASTContext &C) const {
206   if (!AllocFunctions.empty())
207     return;
208   AllocFunctions.insert(&C.Idents.get("alloca"));
209   AllocFunctions.insert(&C.Idents.get("malloc"));
210   AllocFunctions.insert(&C.Idents.get("realloc"));
211   AllocFunctions.insert(&C.Idents.get("calloc"));
212   AllocFunctions.insert(&C.Idents.get("valloc"));
213 }
214 
215 void PointerArithChecker::checkPostStmt(const CallExpr *CE,
216                                         CheckerContext &C) const {
217   ProgramStateRef State = C.getState();
218   const FunctionDecl *FD = C.getCalleeDecl(CE);
219   if (!FD)
220     return;
221   IdentifierInfo *FunI = FD->getIdentifier();
222   initAllocIdentifiers(C.getASTContext());
223   if (AllocFunctions.count(FunI) == 0)
224     return;
225 
226   SVal SV = C.getSVal(CE);
227   const MemRegion *Region = SV.getAsRegion();
228   if (!Region)
229     return;
230   // Assume that C allocation functions allocate arrays to avoid false
231   // positives.
232   // TODO: Add heuristics to distinguish alloc calls that allocates single
233   // objecs.
234   State = State->set<RegionState>(Region, AllocKind::Array);
235   C.addTransition(State);
236 }
237 
238 void PointerArithChecker::checkPostStmt(const CXXNewExpr *NE,
239                                         CheckerContext &C) const {
240   const FunctionDecl *FD = NE->getOperatorNew();
241   if (!FD)
242     return;
243 
244   AllocKind Kind = getKindOfNewOp(NE, FD);
245 
246   ProgramStateRef State = C.getState();
247   SVal AllocedVal = C.getSVal(NE);
248   const MemRegion *Region = AllocedVal.getAsRegion();
249   if (!Region)
250     return;
251   State = State->set<RegionState>(Region, Kind);
252   C.addTransition(State);
253 }
254 
255 void PointerArithChecker::checkPostStmt(const CastExpr *CE,
256                                         CheckerContext &C) const {
257   if (CE->getCastKind() != CastKind::CK_BitCast)
258     return;
259 
260   const Expr *CastedExpr = CE->getSubExpr();
261   ProgramStateRef State = C.getState();
262   SVal CastedVal = C.getSVal(CastedExpr);
263 
264   const MemRegion *Region = CastedVal.getAsRegion();
265   if (!Region)
266     return;
267 
268   // Suppress reinterpret casted hits.
269   State = State->set<RegionState>(Region, AllocKind::Reinterpreted);
270   C.addTransition(State);
271 }
272 
273 void PointerArithChecker::checkPreStmt(const CastExpr *CE,
274                                        CheckerContext &C) const {
275   if (CE->getCastKind() != CastKind::CK_ArrayToPointerDecay)
276     return;
277 
278   const Expr *CastedExpr = CE->getSubExpr();
279   ProgramStateRef State = C.getState();
280   SVal CastedVal = C.getSVal(CastedExpr);
281 
282   const MemRegion *Region = CastedVal.getAsRegion();
283   if (!Region)
284     return;
285 
286   if (const AllocKind *Kind = State->get<RegionState>(Region)) {
287     if (*Kind == AllocKind::Array || *Kind == AllocKind::Reinterpreted)
288       return;
289   }
290   State = State->set<RegionState>(Region, AllocKind::Array);
291   C.addTransition(State);
292 }
293 
294 void PointerArithChecker::checkPreStmt(const UnaryOperator *UOp,
295                                        CheckerContext &C) const {
296   if (!UOp->isIncrementDecrementOp() || !UOp->getType()->isPointerType())
297     return;
298   reportPointerArithMisuse(UOp->getSubExpr(), C, true);
299 }
300 
301 void PointerArithChecker::checkPreStmt(const ArraySubscriptExpr *SubsExpr,
302                                        CheckerContext &C) const {
303   SVal Idx = C.getSVal(SubsExpr->getIdx());
304 
305   // Indexing with 0 is OK.
306   if (Idx.isZeroConstant())
307     return;
308 
309   // Indexing vector-type expressions is also OK.
310   if (SubsExpr->getBase()->getType()->isVectorType())
311     return;
312   reportPointerArithMisuse(SubsExpr->getBase(), C);
313 }
314 
315 void PointerArithChecker::checkPreStmt(const BinaryOperator *BOp,
316                                        CheckerContext &C) const {
317   BinaryOperatorKind OpKind = BOp->getOpcode();
318   if (!BOp->isAdditiveOp() && OpKind != BO_AddAssign && OpKind != BO_SubAssign)
319     return;
320 
321   const Expr *Lhs = BOp->getLHS();
322   const Expr *Rhs = BOp->getRHS();
323   ProgramStateRef State = C.getState();
324 
325   if (Rhs->getType()->isIntegerType() && Lhs->getType()->isPointerType()) {
326     SVal RHSVal = C.getSVal(Rhs);
327     if (State->isNull(RHSVal).isConstrainedTrue())
328       return;
329     reportPointerArithMisuse(Lhs, C, !BOp->isAdditiveOp());
330   }
331   // The int += ptr; case is not valid C++.
332   if (Lhs->getType()->isIntegerType() && Rhs->getType()->isPointerType()) {
333     SVal LHSVal = C.getSVal(Lhs);
334     if (State->isNull(LHSVal).isConstrainedTrue())
335       return;
336     reportPointerArithMisuse(Rhs, C);
337   }
338 }
339 
340 void ento::registerPointerArithChecker(CheckerManager &mgr) {
341   mgr.registerChecker<PointerArithChecker>();
342 }
343 
344 bool ento::shouldRegisterPointerArithChecker(const CheckerManager &mgr) {
345   return true;
346 }
347