xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp (revision 455bd58d4e76c12031825d3077edb91ca92de0bc)
1 //= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This defines CStringChecker, which is an assortment of checks on calls
11 // to functions in <string.h>.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ClangSACheckers.h"
16 #include "clang/StaticAnalyzer/Core/Checker.h"
17 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
18 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
19 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/GRStateTrait.h"
21 #include "llvm/ADT/StringSwitch.h"
22 
23 using namespace clang;
24 using namespace ento;
25 
26 namespace {
27 class CStringChecker : public Checker< eval::Call,
28                                          check::PreStmt<DeclStmt>,
29                                          check::LiveSymbols,
30                                          check::DeadSymbols,
31                                          check::RegionChanges
32                                          > {
33   mutable llvm::OwningPtr<BugType> BT_Null, BT_Bounds, BT_BoundsWrite,
34                                    BT_Overlap, BT_NotCString,
35                                    BT_AdditionOverflow;
36 public:
37   static void *getTag() { static int tag; return &tag; }
38 
39   bool evalCall(const CallExpr *CE, CheckerContext &C) const;
40   void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
41   void checkLiveSymbols(const GRState *state, SymbolReaper &SR) const;
42   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
43   bool wantsRegionChangeUpdate(const GRState *state) const;
44 
45   const GRState *checkRegionChanges(const GRState *state,
46                                     const StoreManager::InvalidatedSymbols *,
47                                     const MemRegion * const *Begin,
48                                     const MemRegion * const *End) const;
49 
50   typedef void (CStringChecker::*FnCheck)(CheckerContext &,
51                                           const CallExpr *) const;
52 
53   void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
54   void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
55   void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
56   void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
57   void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
58                       const GRState *state,
59                       const Expr *Size, const Expr *Source, const Expr *Dest,
60                       bool Restricted = false,
61                       bool IsMempcpy = false) const;
62 
63   void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
64 
65   void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
66   void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
67   void evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
68                            bool IsStrnlen = false) const;
69 
70   void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
71   void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
72   void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
73   void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool returnEnd,
74                         bool isBounded, bool isAppending) const;
75 
76   void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
77   void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
78 
79   void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
80   void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
81   void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
82   void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
83   void evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
84                         bool isBounded = false, bool ignoreCase = false) const;
85 
86   // Utility methods
87   std::pair<const GRState*, const GRState*>
88   static assumeZero(CheckerContext &C,
89                     const GRState *state, SVal V, QualType Ty);
90 
91   static const GRState *setCStringLength(const GRState *state,
92                                          const MemRegion *MR, SVal strLength);
93   static SVal getCStringLengthForRegion(CheckerContext &C,
94                                         const GRState *&state,
95                                         const Expr *Ex, const MemRegion *MR,
96                                         bool hypothetical);
97   SVal getCStringLength(CheckerContext &C, const GRState *&state,
98                         const Expr *Ex, SVal Buf,
99                         bool hypothetical = false) const;
100 
101   const StringLiteral *getCStringLiteral(CheckerContext &C,
102                                          const GRState *&state,
103                                          const Expr *expr,
104                                          SVal val) const;
105 
106   static const GRState *InvalidateBuffer(CheckerContext &C,
107                                          const GRState *state,
108                                          const Expr *Ex, SVal V);
109 
110   static bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
111                               const MemRegion *MR);
112 
113   // Re-usable checks
114   const GRState *checkNonNull(CheckerContext &C, const GRState *state,
115                                const Expr *S, SVal l) const;
116   const GRState *CheckLocation(CheckerContext &C, const GRState *state,
117                                const Expr *S, SVal l,
118                                bool IsDestination = false) const;
119   const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state,
120                                    const Expr *Size,
121                                    const Expr *FirstBuf,
122                                    const Expr *SecondBuf = NULL,
123                                    bool FirstIsDestination = false) const;
124   const GRState *CheckOverlap(CheckerContext &C, const GRState *state,
125                               const Expr *Size, const Expr *First,
126                               const Expr *Second) const;
127   void emitOverlapBug(CheckerContext &C, const GRState *state,
128                       const Stmt *First, const Stmt *Second) const;
129   const GRState *checkAdditionOverflow(CheckerContext &C, const GRState *state,
130                                        NonLoc left, NonLoc right) const;
131 };
132 
133 class CStringLength {
134 public:
135   typedef llvm::ImmutableMap<const MemRegion *, SVal> EntryMap;
136 };
137 } //end anonymous namespace
138 
139 namespace clang {
140 namespace ento {
141   template <>
142   struct GRStateTrait<CStringLength>
143     : public GRStatePartialTrait<CStringLength::EntryMap> {
144     static void *GDMIndex() { return CStringChecker::getTag(); }
145   };
146 }
147 }
148 
149 //===----------------------------------------------------------------------===//
150 // Individual checks and utility methods.
151 //===----------------------------------------------------------------------===//
152 
153 std::pair<const GRState*, const GRState*>
154 CStringChecker::assumeZero(CheckerContext &C, const GRState *state, SVal V,
155                            QualType Ty) {
156   DefinedSVal *val = dyn_cast<DefinedSVal>(&V);
157   if (!val)
158     return std::pair<const GRState*, const GRState *>(state, state);
159 
160   SValBuilder &svalBuilder = C.getSValBuilder();
161   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
162   return state->assume(svalBuilder.evalEQ(state, *val, zero));
163 }
164 
165 const GRState *CStringChecker::checkNonNull(CheckerContext &C,
166                                             const GRState *state,
167                                             const Expr *S, SVal l) const {
168   // If a previous check has failed, propagate the failure.
169   if (!state)
170     return NULL;
171 
172   const GRState *stateNull, *stateNonNull;
173   llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
174 
175   if (stateNull && !stateNonNull) {
176     ExplodedNode *N = C.generateSink(stateNull);
177     if (!N)
178       return NULL;
179 
180     if (!BT_Null)
181       BT_Null.reset(new BuiltinBug("API",
182         "Null pointer argument in call to byte string function"));
183 
184     // Generate a report for this bug.
185     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
186     EnhancedBugReport *report = new EnhancedBugReport(*BT,
187                                                       BT->getDescription(), N);
188 
189     report->addRange(S->getSourceRange());
190     report->addVisitorCreator(bugreporter::registerTrackNullOrUndefValue, S);
191     C.EmitReport(report);
192     return NULL;
193   }
194 
195   // From here on, assume that the value is non-null.
196   assert(stateNonNull);
197   return stateNonNull;
198 }
199 
200 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
201 const GRState *CStringChecker::CheckLocation(CheckerContext &C,
202                                              const GRState *state,
203                                              const Expr *S, SVal l,
204                                              bool IsDestination) const {
205   // If a previous check has failed, propagate the failure.
206   if (!state)
207     return NULL;
208 
209   // Check for out of bound array element access.
210   const MemRegion *R = l.getAsRegion();
211   if (!R)
212     return state;
213 
214   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
215   if (!ER)
216     return state;
217 
218   assert(ER->getValueType() == C.getASTContext().CharTy &&
219     "CheckLocation should only be called with char* ElementRegions");
220 
221   // Get the size of the array.
222   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
223   SValBuilder &svalBuilder = C.getSValBuilder();
224   SVal Extent =
225     svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
226   DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent);
227 
228   // Get the index of the accessed element.
229   DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex());
230 
231   const GRState *StInBound = state->assumeInBound(Idx, Size, true);
232   const GRState *StOutBound = state->assumeInBound(Idx, Size, false);
233   if (StOutBound && !StInBound) {
234     ExplodedNode *N = C.generateSink(StOutBound);
235     if (!N)
236       return NULL;
237 
238     BuiltinBug *BT;
239     if (IsDestination) {
240       if (!BT_BoundsWrite) {
241         BT_BoundsWrite.reset(new BuiltinBug("Out-of-bound array access",
242           "Byte string function overflows destination buffer"));
243       }
244       BT = static_cast<BuiltinBug*>(BT_BoundsWrite.get());
245     } else {
246       if (!BT_Bounds) {
247         BT_Bounds.reset(new BuiltinBug("Out-of-bound array access",
248           "Byte string function accesses out-of-bound array element"));
249       }
250       BT = static_cast<BuiltinBug*>(BT_Bounds.get());
251     }
252 
253     // FIXME: It would be nice to eventually make this diagnostic more clear,
254     // e.g., by referencing the original declaration or by saying *why* this
255     // reference is outside the range.
256 
257     // Generate a report for this bug.
258     RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N);
259 
260     report->addRange(S->getSourceRange());
261     C.EmitReport(report);
262     return NULL;
263   }
264 
265   // Array bound check succeeded.  From this point forward the array bound
266   // should always succeed.
267   return StInBound;
268 }
269 
270 const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C,
271                                                  const GRState *state,
272                                                  const Expr *Size,
273                                                  const Expr *FirstBuf,
274                                                  const Expr *SecondBuf,
275                                                 bool FirstIsDestination) const {
276   // If a previous check has failed, propagate the failure.
277   if (!state)
278     return NULL;
279 
280   SValBuilder &svalBuilder = C.getSValBuilder();
281   ASTContext &Ctx = svalBuilder.getContext();
282 
283   QualType sizeTy = Size->getType();
284   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
285 
286   // Check that the first buffer is non-null.
287   SVal BufVal = state->getSVal(FirstBuf);
288   state = checkNonNull(C, state, FirstBuf, BufVal);
289   if (!state)
290     return NULL;
291 
292   // Get the access length and make sure it is known.
293   SVal LengthVal = state->getSVal(Size);
294   NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
295   if (!Length)
296     return state;
297 
298   // Compute the offset of the last element to be accessed: size-1.
299   NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy));
300   NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub,
301                                                     *Length, One, sizeTy));
302 
303   // Check that the first buffer is sufficiently long.
304   SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
305   if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
306     SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
307                                           LastOffset, PtrTy);
308     state = CheckLocation(C, state, FirstBuf, BufEnd, FirstIsDestination);
309 
310     // If the buffer isn't large enough, abort.
311     if (!state)
312       return NULL;
313   }
314 
315   // If there's a second buffer, check it as well.
316   if (SecondBuf) {
317     BufVal = state->getSVal(SecondBuf);
318     state = checkNonNull(C, state, SecondBuf, BufVal);
319     if (!state)
320       return NULL;
321 
322     BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
323     if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
324       SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
325                                             LastOffset, PtrTy);
326       state = CheckLocation(C, state, SecondBuf, BufEnd);
327     }
328   }
329 
330   // Large enough or not, return this state!
331   return state;
332 }
333 
334 const GRState *CStringChecker::CheckOverlap(CheckerContext &C,
335                                             const GRState *state,
336                                             const Expr *Size,
337                                             const Expr *First,
338                                             const Expr *Second) const {
339   // Do a simple check for overlap: if the two arguments are from the same
340   // buffer, see if the end of the first is greater than the start of the second
341   // or vice versa.
342 
343   // If a previous check has failed, propagate the failure.
344   if (!state)
345     return NULL;
346 
347   const GRState *stateTrue, *stateFalse;
348 
349   // Get the buffer values and make sure they're known locations.
350   SVal firstVal = state->getSVal(First);
351   SVal secondVal = state->getSVal(Second);
352 
353   Loc *firstLoc = dyn_cast<Loc>(&firstVal);
354   if (!firstLoc)
355     return state;
356 
357   Loc *secondLoc = dyn_cast<Loc>(&secondVal);
358   if (!secondLoc)
359     return state;
360 
361   // Are the two values the same?
362   SValBuilder &svalBuilder = C.getSValBuilder();
363   llvm::tie(stateTrue, stateFalse) =
364     state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
365 
366   if (stateTrue && !stateFalse) {
367     // If the values are known to be equal, that's automatically an overlap.
368     emitOverlapBug(C, stateTrue, First, Second);
369     return NULL;
370   }
371 
372   // assume the two expressions are not equal.
373   assert(stateFalse);
374   state = stateFalse;
375 
376   // Which value comes first?
377   QualType cmpTy = svalBuilder.getComparisonType();
378   SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
379                                          *firstLoc, *secondLoc, cmpTy);
380   DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse);
381   if (!reverseTest)
382     return state;
383 
384   llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
385   if (stateTrue) {
386     if (stateFalse) {
387       // If we don't know which one comes first, we can't perform this test.
388       return state;
389     } else {
390       // Switch the values so that firstVal is before secondVal.
391       Loc *tmpLoc = firstLoc;
392       firstLoc = secondLoc;
393       secondLoc = tmpLoc;
394 
395       // Switch the Exprs as well, so that they still correspond.
396       const Expr *tmpExpr = First;
397       First = Second;
398       Second = tmpExpr;
399     }
400   }
401 
402   // Get the length, and make sure it too is known.
403   SVal LengthVal = state->getSVal(Size);
404   NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
405   if (!Length)
406     return state;
407 
408   // Convert the first buffer's start address to char*.
409   // Bail out if the cast fails.
410   QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
411   SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy,
412                                          First->getType());
413   Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart);
414   if (!FirstStartLoc)
415     return state;
416 
417   // Compute the end of the first buffer. Bail out if THAT fails.
418   SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
419                                  *FirstStartLoc, *Length, CharPtrTy);
420   Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd);
421   if (!FirstEndLoc)
422     return state;
423 
424   // Is the end of the first buffer past the start of the second buffer?
425   SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
426                                 *FirstEndLoc, *secondLoc, cmpTy);
427   DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap);
428   if (!OverlapTest)
429     return state;
430 
431   llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
432 
433   if (stateTrue && !stateFalse) {
434     // Overlap!
435     emitOverlapBug(C, stateTrue, First, Second);
436     return NULL;
437   }
438 
439   // assume the two expressions don't overlap.
440   assert(stateFalse);
441   return stateFalse;
442 }
443 
444 void CStringChecker::emitOverlapBug(CheckerContext &C, const GRState *state,
445                                   const Stmt *First, const Stmt *Second) const {
446   ExplodedNode *N = C.generateSink(state);
447   if (!N)
448     return;
449 
450   if (!BT_Overlap)
451     BT_Overlap.reset(new BugType("Unix API", "Improper arguments"));
452 
453   // Generate a report for this bug.
454   RangedBugReport *report =
455     new RangedBugReport(*BT_Overlap,
456       "Arguments must not be overlapping buffers", N);
457   report->addRange(First->getSourceRange());
458   report->addRange(Second->getSourceRange());
459 
460   C.EmitReport(report);
461 }
462 
463 const GRState *CStringChecker::checkAdditionOverflow(CheckerContext &C,
464                                                      const GRState *state,
465                                                      NonLoc left,
466                                                      NonLoc right) const {
467   // If a previous check has failed, propagate the failure.
468   if (!state)
469     return NULL;
470 
471   SValBuilder &svalBuilder = C.getSValBuilder();
472   BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
473 
474   QualType sizeTy = svalBuilder.getContext().getSizeType();
475   const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
476   NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
477 
478   SVal maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
479                                                sizeTy);
480 
481   if (maxMinusRight.isUnknownOrUndef()) {
482     // Try switching the operands. (The order of these two assignments is
483     // important!)
484     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
485                                             sizeTy);
486     left = right;
487   }
488 
489   if (NonLoc *maxMinusRightNL = dyn_cast<NonLoc>(&maxMinusRight)) {
490     QualType cmpTy = svalBuilder.getConditionType();
491     // If left > max - right, we have an overflow.
492     SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
493                                                 *maxMinusRightNL, cmpTy);
494 
495     const GRState *stateOverflow, *stateOkay;
496     llvm::tie(stateOverflow, stateOkay) =
497       state->assume(cast<DefinedOrUnknownSVal>(willOverflow));
498 
499     if (stateOverflow && !stateOkay) {
500       // We have an overflow. Emit a bug report.
501       ExplodedNode *N = C.generateSink(stateOverflow);
502       if (!N)
503         return NULL;
504 
505       if (!BT_AdditionOverflow)
506         BT_AdditionOverflow.reset(new BuiltinBug("API",
507           "Sum of expressions causes overflow"));
508 
509       llvm::SmallString<120> buf;
510       llvm::raw_svector_ostream os(buf);
511       // This isn't a great error message, but this should never occur in real
512       // code anyway -- you'd have to create a buffer longer than a size_t can
513       // represent, which is sort of a contradiction.
514       os << "This expression will create a string whose length is too big to "
515          << "be represented as a size_t";
516 
517       // Generate a report for this bug.
518       BugReport *report = new BugReport(*BT_AdditionOverflow, os.str(), N);
519       C.EmitReport(report);
520 
521       return NULL;
522     }
523 
524     // From now on, assume an overflow didn't occur.
525     assert(stateOkay);
526     state = stateOkay;
527   }
528 
529   return state;
530 }
531 
532 const GRState *CStringChecker::setCStringLength(const GRState *state,
533                                                 const MemRegion *MR,
534                                                 SVal strLength) {
535   assert(!strLength.isUndef() && "Attempt to set an undefined string length");
536 
537   MR = MR->StripCasts();
538 
539   switch (MR->getKind()) {
540   case MemRegion::StringRegionKind:
541     // FIXME: This can happen if we strcpy() into a string region. This is
542     // undefined [C99 6.4.5p6], but we should still warn about it.
543     return state;
544 
545   case MemRegion::SymbolicRegionKind:
546   case MemRegion::AllocaRegionKind:
547   case MemRegion::VarRegionKind:
548   case MemRegion::FieldRegionKind:
549   case MemRegion::ObjCIvarRegionKind:
550     // These are the types we can currently track string lengths for.
551     break;
552 
553   case MemRegion::ElementRegionKind:
554     // FIXME: Handle element regions by upper-bounding the parent region's
555     // string length.
556     return state;
557 
558   default:
559     // Other regions (mostly non-data) can't have a reliable C string length.
560     // For now, just ignore the change.
561     // FIXME: These are rare but not impossible. We should output some kind of
562     // warning for things like strcpy((char[]){'a', 0}, "b");
563     return state;
564   }
565 
566   if (strLength.isUnknown())
567     return state->remove<CStringLength>(MR);
568 
569   return state->set<CStringLength>(MR, strLength);
570 }
571 
572 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
573                                                const GRState *&state,
574                                                const Expr *Ex,
575                                                const MemRegion *MR,
576                                                bool hypothetical) {
577   if (!hypothetical) {
578     // If there's a recorded length, go ahead and return it.
579     const SVal *Recorded = state->get<CStringLength>(MR);
580     if (Recorded)
581       return *Recorded;
582   }
583 
584   // Otherwise, get a new symbol and update the state.
585   unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
586   SValBuilder &svalBuilder = C.getSValBuilder();
587   QualType sizeTy = svalBuilder.getContext().getSizeType();
588   SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
589                                                     MR, Ex, sizeTy, Count);
590 
591   if (!hypothetical)
592     state = state->set<CStringLength>(MR, strLength);
593 
594   return strLength;
595 }
596 
597 SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state,
598                                       const Expr *Ex, SVal Buf,
599                                       bool hypothetical) const {
600   const MemRegion *MR = Buf.getAsRegion();
601   if (!MR) {
602     // If we can't get a region, see if it's something we /know/ isn't a
603     // C string. In the context of locations, the only time we can issue such
604     // a warning is for labels.
605     if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) {
606       if (ExplodedNode *N = C.generateNode(state)) {
607         if (!BT_NotCString)
608           BT_NotCString.reset(new BuiltinBug("API",
609             "Argument is not a null-terminated string."));
610 
611         llvm::SmallString<120> buf;
612         llvm::raw_svector_ostream os(buf);
613         os << "Argument to byte string function is the address of the label '"
614            << Label->getLabel()->getName()
615            << "', which is not a null-terminated string";
616 
617         // Generate a report for this bug.
618         EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
619                                                           os.str(), N);
620 
621         report->addRange(Ex->getSourceRange());
622         C.EmitReport(report);
623       }
624 
625       return UndefinedVal();
626     }
627 
628     // If it's not a region and not a label, give up.
629     return UnknownVal();
630   }
631 
632   // If we have a region, strip casts from it and see if we can figure out
633   // its length. For anything we can't figure out, just return UnknownVal.
634   MR = MR->StripCasts();
635 
636   switch (MR->getKind()) {
637   case MemRegion::StringRegionKind: {
638     // Modifying the contents of string regions is undefined [C99 6.4.5p6],
639     // so we can assume that the byte length is the correct C string length.
640     SValBuilder &svalBuilder = C.getSValBuilder();
641     QualType sizeTy = svalBuilder.getContext().getSizeType();
642     const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
643     return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
644   }
645   case MemRegion::SymbolicRegionKind:
646   case MemRegion::AllocaRegionKind:
647   case MemRegion::VarRegionKind:
648   case MemRegion::FieldRegionKind:
649   case MemRegion::ObjCIvarRegionKind:
650     return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
651   case MemRegion::CompoundLiteralRegionKind:
652     // FIXME: Can we track this? Is it necessary?
653     return UnknownVal();
654   case MemRegion::ElementRegionKind:
655     // FIXME: How can we handle this? It's not good enough to subtract the
656     // offset from the base string length; consider "123\x00567" and &a[5].
657     return UnknownVal();
658   default:
659     // Other regions (mostly non-data) can't have a reliable C string length.
660     // In this case, an error is emitted and UndefinedVal is returned.
661     // The caller should always be prepared to handle this case.
662     if (ExplodedNode *N = C.generateNode(state)) {
663       if (!BT_NotCString)
664         BT_NotCString.reset(new BuiltinBug("API",
665           "Argument is not a null-terminated string."));
666 
667       llvm::SmallString<120> buf;
668       llvm::raw_svector_ostream os(buf);
669 
670       os << "Argument to byte string function is ";
671 
672       if (SummarizeRegion(os, C.getASTContext(), MR))
673         os << ", which is not a null-terminated string";
674       else
675         os << "not a null-terminated string";
676 
677       // Generate a report for this bug.
678       EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
679                                                         os.str(), N);
680 
681       report->addRange(Ex->getSourceRange());
682       C.EmitReport(report);
683     }
684 
685     return UndefinedVal();
686   }
687 }
688 
689 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
690   const GRState *&state, const Expr *expr, SVal val) const {
691 
692   // Get the memory region pointed to by the val.
693   const MemRegion *bufRegion = val.getAsRegion();
694   if (!bufRegion)
695     return NULL;
696 
697   // Strip casts off the memory region.
698   bufRegion = bufRegion->StripCasts();
699 
700   // Cast the memory region to a string region.
701   const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
702   if (!strRegion)
703     return NULL;
704 
705   // Return the actual string in the string region.
706   return strRegion->getStringLiteral();
707 }
708 
709 const GRState *CStringChecker::InvalidateBuffer(CheckerContext &C,
710                                                 const GRState *state,
711                                                 const Expr *E, SVal V) {
712   Loc *L = dyn_cast<Loc>(&V);
713   if (!L)
714     return state;
715 
716   // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
717   // some assumptions about the value that CFRefCount can't. Even so, it should
718   // probably be refactored.
719   if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) {
720     const MemRegion *R = MR->getRegion()->StripCasts();
721 
722     // Are we dealing with an ElementRegion?  If so, we should be invalidating
723     // the super-region.
724     if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
725       R = ER->getSuperRegion();
726       // FIXME: What about layers of ElementRegions?
727     }
728 
729     // Invalidate this region.
730     unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
731     return state->invalidateRegion(R, E, Count, NULL);
732   }
733 
734   // If we have a non-region value by chance, just remove the binding.
735   // FIXME: is this necessary or correct? This handles the non-Region
736   //  cases.  Is it ever valid to store to these?
737   return state->unbindLoc(*L);
738 }
739 
740 bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
741                                      const MemRegion *MR) {
742   const TypedRegion *TR = dyn_cast<TypedRegion>(MR);
743   if (!TR)
744     return false;
745 
746   switch (TR->getKind()) {
747   case MemRegion::FunctionTextRegionKind: {
748     const FunctionDecl *FD = cast<FunctionTextRegion>(TR)->getDecl();
749     if (FD)
750       os << "the address of the function '" << FD << "'";
751     else
752       os << "the address of a function";
753     return true;
754   }
755   case MemRegion::BlockTextRegionKind:
756     os << "block text";
757     return true;
758   case MemRegion::BlockDataRegionKind:
759     os << "a block";
760     return true;
761   case MemRegion::CXXThisRegionKind:
762   case MemRegion::CXXTempObjectRegionKind:
763     os << "a C++ temp object of type " << TR->getValueType().getAsString();
764     return true;
765   case MemRegion::VarRegionKind:
766     os << "a variable of type" << TR->getValueType().getAsString();
767     return true;
768   case MemRegion::FieldRegionKind:
769     os << "a field of type " << TR->getValueType().getAsString();
770     return true;
771   case MemRegion::ObjCIvarRegionKind:
772     os << "an instance variable of type " << TR->getValueType().getAsString();
773     return true;
774   default:
775     return false;
776   }
777 }
778 
779 //===----------------------------------------------------------------------===//
780 // evaluation of individual function calls.
781 //===----------------------------------------------------------------------===//
782 
783 void CStringChecker::evalCopyCommon(CheckerContext &C,
784                                     const CallExpr *CE,
785                                     const GRState *state,
786                                     const Expr *Size, const Expr *Dest,
787                                     const Expr *Source, bool Restricted,
788                                     bool IsMempcpy) const {
789   // See if the size argument is zero.
790   SVal sizeVal = state->getSVal(Size);
791   QualType sizeTy = Size->getType();
792 
793   const GRState *stateZeroSize, *stateNonZeroSize;
794   llvm::tie(stateZeroSize, stateNonZeroSize) =
795     assumeZero(C, state, sizeVal, sizeTy);
796 
797   // Get the value of the Dest.
798   SVal destVal = state->getSVal(Dest);
799 
800   // If the size is zero, there won't be any actual memory access, so
801   // just bind the return value to the destination buffer and return.
802   if (stateZeroSize) {
803     stateZeroSize = stateZeroSize->BindExpr(CE, destVal);
804     C.addTransition(stateZeroSize);
805   }
806 
807   // If the size can be nonzero, we have to check the other arguments.
808   if (stateNonZeroSize) {
809     state = stateNonZeroSize;
810 
811     // Ensure the destination is not null. If it is NULL there will be a
812     // NULL pointer dereference.
813     state = checkNonNull(C, state, Dest, destVal);
814     if (!state)
815       return;
816 
817     // Get the value of the Src.
818     SVal srcVal = state->getSVal(Source);
819 
820     // Ensure the source is not null. If it is NULL there will be a
821     // NULL pointer dereference.
822     state = checkNonNull(C, state, Source, srcVal);
823     if (!state)
824       return;
825 
826     // Ensure the accesses are valid and that the buffers do not overlap.
827     state = CheckBufferAccess(C, state, Size, Dest, Source,
828                               /* FirstIsDst = */ true);
829     if (Restricted)
830       state = CheckOverlap(C, state, Size, Dest, Source);
831 
832     if (!state)
833       return;
834 
835     // If this is mempcpy, get the byte after the last byte copied and
836     // bind the expr.
837     if (IsMempcpy) {
838       loc::MemRegionVal *destRegVal = dyn_cast<loc::MemRegionVal>(&destVal);
839       assert(destRegVal && "Destination should be a known MemRegionVal here");
840 
841       // Get the length to copy.
842       NonLoc *lenValNonLoc = dyn_cast<NonLoc>(&sizeVal);
843 
844       if (lenValNonLoc) {
845         // Get the byte after the last byte copied.
846         SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
847                                                           *destRegVal,
848                                                           *lenValNonLoc,
849                                                           Dest->getType());
850 
851         // The byte after the last byte copied is the return value.
852         state = state->BindExpr(CE, lastElement);
853       } else {
854         // If we don't know how much we copied, we can at least
855         // conjure a return value for later.
856         unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
857         SVal result =
858           C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count);
859         state = state->BindExpr(CE, result);
860       }
861 
862     } else {
863       // All other copies return the destination buffer.
864       // (Well, bcopy() has a void return type, but this won't hurt.)
865       state = state->BindExpr(CE, destVal);
866     }
867 
868     // Invalidate the destination.
869     // FIXME: Even if we can't perfectly model the copy, we should see if we
870     // can use LazyCompoundVals to copy the source values into the destination.
871     // This would probably remove any existing bindings past the end of the
872     // copied region, but that's still an improvement over blank invalidation.
873     state = InvalidateBuffer(C, state, Dest, state->getSVal(Dest));
874     C.addTransition(state);
875   }
876 }
877 
878 
879 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
880   // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
881   // The return value is the address of the destination buffer.
882   const Expr *Dest = CE->getArg(0);
883   const GRState *state = C.getState();
884 
885   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
886 }
887 
888 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
889   // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
890   // The return value is a pointer to the byte following the last written byte.
891   const Expr *Dest = CE->getArg(0);
892   const GRState *state = C.getState();
893 
894   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
895 }
896 
897 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
898   // void *memmove(void *dst, const void *src, size_t n);
899   // The return value is the address of the destination buffer.
900   const Expr *Dest = CE->getArg(0);
901   const GRState *state = C.getState();
902 
903   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
904 }
905 
906 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
907   // void bcopy(const void *src, void *dst, size_t n);
908   evalCopyCommon(C, CE, C.getState(),
909                  CE->getArg(2), CE->getArg(1), CE->getArg(0));
910 }
911 
912 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
913   // int memcmp(const void *s1, const void *s2, size_t n);
914   const Expr *Left = CE->getArg(0);
915   const Expr *Right = CE->getArg(1);
916   const Expr *Size = CE->getArg(2);
917 
918   const GRState *state = C.getState();
919   SValBuilder &svalBuilder = C.getSValBuilder();
920 
921   // See if the size argument is zero.
922   SVal sizeVal = state->getSVal(Size);
923   QualType sizeTy = Size->getType();
924 
925   const GRState *stateZeroSize, *stateNonZeroSize;
926   llvm::tie(stateZeroSize, stateNonZeroSize) =
927     assumeZero(C, state, sizeVal, sizeTy);
928 
929   // If the size can be zero, the result will be 0 in that case, and we don't
930   // have to check either of the buffers.
931   if (stateZeroSize) {
932     state = stateZeroSize;
933     state = state->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType()));
934     C.addTransition(state);
935   }
936 
937   // If the size can be nonzero, we have to check the other arguments.
938   if (stateNonZeroSize) {
939     state = stateNonZeroSize;
940     // If we know the two buffers are the same, we know the result is 0.
941     // First, get the two buffers' addresses. Another checker will have already
942     // made sure they're not undefined.
943     DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(state->getSVal(Left));
944     DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(state->getSVal(Right));
945 
946     // See if they are the same.
947     DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
948     const GRState *StSameBuf, *StNotSameBuf;
949     llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
950 
951     // If the two arguments might be the same buffer, we know the result is 0,
952     // and we only need to check one size.
953     if (StSameBuf) {
954       state = StSameBuf;
955       state = CheckBufferAccess(C, state, Size, Left);
956       if (state) {
957         state = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType()));
958         C.addTransition(state);
959       }
960     }
961 
962     // If the two arguments might be different buffers, we have to check the
963     // size of both of them.
964     if (StNotSameBuf) {
965       state = StNotSameBuf;
966       state = CheckBufferAccess(C, state, Size, Left, Right);
967       if (state) {
968         // The return value is the comparison result, which we don't know.
969         unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
970         SVal CmpV = svalBuilder.getConjuredSymbolVal(NULL, CE, Count);
971         state = state->BindExpr(CE, CmpV);
972         C.addTransition(state);
973       }
974     }
975   }
976 }
977 
978 void CStringChecker::evalstrLength(CheckerContext &C,
979                                    const CallExpr *CE) const {
980   // size_t strlen(const char *s);
981   evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
982 }
983 
984 void CStringChecker::evalstrnLength(CheckerContext &C,
985                                     const CallExpr *CE) const {
986   // size_t strnlen(const char *s, size_t maxlen);
987   evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
988 }
989 
990 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
991                                          bool IsStrnlen) const {
992   const GRState *state = C.getState();
993 
994   if (IsStrnlen) {
995     const Expr *maxlenExpr = CE->getArg(1);
996     SVal maxlenVal = state->getSVal(maxlenExpr);
997 
998     const GRState *stateZeroSize, *stateNonZeroSize;
999     llvm::tie(stateZeroSize, stateNonZeroSize) =
1000       assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1001 
1002     // If the size can be zero, the result will be 0 in that case, and we don't
1003     // have to check the string itself.
1004     if (stateZeroSize) {
1005       SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1006       stateZeroSize = stateZeroSize->BindExpr(CE, zero);
1007       C.addTransition(stateZeroSize);
1008     }
1009 
1010     // If the size is GUARANTEED to be zero, we're done!
1011     if (!stateNonZeroSize)
1012       return;
1013 
1014     // Otherwise, record the assumption that the size is nonzero.
1015     state = stateNonZeroSize;
1016   }
1017 
1018   // Check that the string argument is non-null.
1019   const Expr *Arg = CE->getArg(0);
1020   SVal ArgVal = state->getSVal(Arg);
1021 
1022   state = checkNonNull(C, state, Arg, ArgVal);
1023 
1024   if (!state)
1025     return;
1026 
1027   SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1028 
1029   // If the argument isn't a valid C string, there's no valid state to
1030   // transition to.
1031   if (strLength.isUndef())
1032     return;
1033 
1034   DefinedOrUnknownSVal result = UnknownVal();
1035 
1036   // If the check is for strnlen() then bind the return value to no more than
1037   // the maxlen value.
1038   if (IsStrnlen) {
1039     QualType cmpTy = C.getSValBuilder().getComparisonType();
1040 
1041     // It's a little unfortunate to be getting this again,
1042     // but it's not that expensive...
1043     const Expr *maxlenExpr = CE->getArg(1);
1044     SVal maxlenVal = state->getSVal(maxlenExpr);
1045 
1046     NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
1047     NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal);
1048 
1049     if (strLengthNL && maxlenValNL) {
1050       const GRState *stateStringTooLong, *stateStringNotTooLong;
1051 
1052       // Check if the strLength is greater than the maxlen.
1053       llvm::tie(stateStringTooLong, stateStringNotTooLong) =
1054         state->assume(cast<DefinedOrUnknownSVal>
1055                       (C.getSValBuilder().evalBinOpNN(state, BO_GT,
1056                                                       *strLengthNL,
1057                                                       *maxlenValNL,
1058                                                       cmpTy)));
1059 
1060       if (stateStringTooLong && !stateStringNotTooLong) {
1061         // If the string is longer than maxlen, return maxlen.
1062         result = *maxlenValNL;
1063       } else if (stateStringNotTooLong && !stateStringTooLong) {
1064         // If the string is shorter than maxlen, return its length.
1065         result = *strLengthNL;
1066       }
1067     }
1068 
1069     if (result.isUnknown()) {
1070       // If we don't have enough information for a comparison, there's
1071       // no guarantee the full string length will actually be returned.
1072       // All we know is the return value is the min of the string length
1073       // and the limit. This is better than nothing.
1074       unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
1075       result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count);
1076       NonLoc *resultNL = cast<NonLoc>(&result);
1077 
1078       if (strLengthNL) {
1079         state = state->assume(cast<DefinedOrUnknownSVal>
1080                               (C.getSValBuilder().evalBinOpNN(state, BO_LE,
1081                                                               *resultNL,
1082                                                               *strLengthNL,
1083                                                               cmpTy)), true);
1084       }
1085 
1086       if (maxlenValNL) {
1087         state = state->assume(cast<DefinedOrUnknownSVal>
1088                               (C.getSValBuilder().evalBinOpNN(state, BO_LE,
1089                                                               *resultNL,
1090                                                               *maxlenValNL,
1091                                                               cmpTy)), true);
1092       }
1093     }
1094 
1095   } else {
1096     // This is a plain strlen(), not strnlen().
1097     result = cast<DefinedOrUnknownSVal>(strLength);
1098 
1099     // If we don't know the length of the string, conjure a return
1100     // value, so it can be used in constraints, at least.
1101     if (result.isUnknown()) {
1102       unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
1103       result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count);
1104     }
1105   }
1106 
1107   // Bind the return value.
1108   assert(!result.isUnknown() && "Should have conjured a value by now");
1109   state = state->BindExpr(CE, result);
1110   C.addTransition(state);
1111 }
1112 
1113 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1114   // char *strcpy(char *restrict dst, const char *restrict src);
1115   evalStrcpyCommon(C, CE,
1116                    /* returnEnd = */ false,
1117                    /* isBounded = */ false,
1118                    /* isAppending = */ false);
1119 }
1120 
1121 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1122   // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1123   evalStrcpyCommon(C, CE,
1124                    /* returnEnd = */ false,
1125                    /* isBounded = */ true,
1126                    /* isAppending = */ false);
1127 }
1128 
1129 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1130   // char *stpcpy(char *restrict dst, const char *restrict src);
1131   evalStrcpyCommon(C, CE,
1132                    /* returnEnd = */ true,
1133                    /* isBounded = */ false,
1134                    /* isAppending = */ false);
1135 }
1136 
1137 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1138   //char *strcat(char *restrict s1, const char *restrict s2);
1139   evalStrcpyCommon(C, CE,
1140                    /* returnEnd = */ false,
1141                    /* isBounded = */ false,
1142                    /* isAppending = */ true);
1143 }
1144 
1145 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1146   //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1147   evalStrcpyCommon(C, CE,
1148                    /* returnEnd = */ false,
1149                    /* isBounded = */ true,
1150                    /* isAppending = */ true);
1151 }
1152 
1153 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1154                                       bool returnEnd, bool isBounded,
1155                                       bool isAppending) const {
1156   const GRState *state = C.getState();
1157 
1158   // Check that the destination is non-null.
1159   const Expr *Dst = CE->getArg(0);
1160   SVal DstVal = state->getSVal(Dst);
1161 
1162   state = checkNonNull(C, state, Dst, DstVal);
1163   if (!state)
1164     return;
1165 
1166   // Check that the source is non-null.
1167   const Expr *srcExpr = CE->getArg(1);
1168   SVal srcVal = state->getSVal(srcExpr);
1169   state = checkNonNull(C, state, srcExpr, srcVal);
1170   if (!state)
1171     return;
1172 
1173   // Get the string length of the source.
1174   SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1175 
1176   // If the source isn't a valid C string, give up.
1177   if (strLength.isUndef())
1178     return;
1179 
1180   SValBuilder &svalBuilder = C.getSValBuilder();
1181   QualType cmpTy = svalBuilder.getConditionType();
1182 
1183   SVal amountCopied = UnknownVal();
1184 
1185   // If the function is strncpy, strncat, etc... it is bounded.
1186   if (isBounded) {
1187     // Get the max number of characters to copy.
1188     const Expr *lenExpr = CE->getArg(2);
1189     SVal lenVal = state->getSVal(lenExpr);
1190 
1191     // Protect against misdeclared strncpy().
1192     lenVal = svalBuilder.evalCast(lenVal,
1193                                   svalBuilder.getContext().getSizeType(),
1194                                   lenExpr->getType());
1195 
1196     NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
1197     NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal);
1198 
1199     // If we know both values, we might be able to figure out how much
1200     // we're copying.
1201     if (strLengthNL && lenValNL) {
1202       const GRState *stateSourceTooLong, *stateSourceNotTooLong;
1203 
1204       // Check if the max number to copy is less than the length of the src.
1205       llvm::tie(stateSourceTooLong, stateSourceNotTooLong) =
1206         state->assume(cast<DefinedOrUnknownSVal>
1207                       (svalBuilder.evalBinOpNN(state, BO_GT, *strLengthNL,
1208                                                *lenValNL, cmpTy)));
1209 
1210       if (stateSourceTooLong && !stateSourceNotTooLong) {
1211         // Max number to copy is less than the length of the src, so the actual
1212         // strLength copied is the max number arg.
1213         state = stateSourceTooLong;
1214         amountCopied = lenVal;
1215 
1216       } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1217         // The source buffer entirely fits in the bound.
1218         state = stateSourceNotTooLong;
1219         amountCopied = strLength;
1220       }
1221     }
1222 
1223     // If we couldn't pin down the copy length, at least bound it.
1224     if (amountCopied.isUnknown()) {
1225       // Try to get a "hypothetical" string length symbol, which we can later
1226       // set as a real value if that turns out to be the case.
1227       amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1228       assert(!amountCopied.isUndef());
1229 
1230       if (NonLoc *amountCopiedNL = dyn_cast<NonLoc>(&amountCopied)) {
1231         if (lenValNL) {
1232           // amountCopied <= lenVal
1233           SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1234                                                              *amountCopiedNL,
1235                                                              *lenValNL,
1236                                                              cmpTy);
1237           state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanBound),
1238                                 true);
1239           if (!state)
1240             return;
1241         }
1242 
1243         if (strLengthNL) {
1244           // amountCopied <= strlen(source)
1245           SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1246                                                            *amountCopiedNL,
1247                                                            *strLengthNL,
1248                                                            cmpTy);
1249           state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanSrc),
1250                                 true);
1251           if (!state)
1252             return;
1253         }
1254       }
1255     }
1256 
1257   } else {
1258     // The function isn't bounded. The amount copied should match the length
1259     // of the source buffer.
1260     amountCopied = strLength;
1261   }
1262 
1263   assert(state);
1264 
1265   // This represents the number of characters copied into the destination
1266   // buffer. (It may not actually be the strlen if the destination buffer
1267   // is not terminated.)
1268   SVal finalStrLength = UnknownVal();
1269 
1270   // If this is an appending function (strcat, strncat...) then set the
1271   // string length to strlen(src) + strlen(dst) since the buffer will
1272   // ultimately contain both.
1273   if (isAppending) {
1274     // Get the string length of the destination. If the destination is memory
1275     // that can't have a string length, we shouldn't be copying into it anyway.
1276     SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1277     if (dstStrLength.isUndef())
1278       return;
1279 
1280     QualType sizeTy = svalBuilder.getContext().getSizeType();
1281 
1282     NonLoc *srcStrLengthNL = dyn_cast<NonLoc>(&amountCopied);
1283     NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength);
1284 
1285     // If we know both string lengths, we might know the final string length.
1286     if (srcStrLengthNL && dstStrLengthNL) {
1287       // Make sure the two lengths together don't overflow a size_t.
1288       state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1289       if (!state)
1290         return;
1291 
1292       finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1293                                                *dstStrLengthNL, sizeTy);
1294     }
1295 
1296     // If we couldn't get a single value for the final string length,
1297     // we can at least bound it by the individual lengths.
1298     if (finalStrLength.isUnknown()) {
1299       // Try to get a "hypothetical" string length symbol, which we can later
1300       // set as a real value if that turns out to be the case.
1301       finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1302       assert(!finalStrLength.isUndef());
1303 
1304       if (NonLoc *finalStrLengthNL = dyn_cast<NonLoc>(&finalStrLength)) {
1305         if (srcStrLengthNL) {
1306           // finalStrLength >= srcStrLength
1307           SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1308                                                         *finalStrLengthNL,
1309                                                         *srcStrLengthNL,
1310                                                         cmpTy);
1311           state = state->assume(cast<DefinedOrUnknownSVal>(sourceInResult),
1312                                 true);
1313           if (!state)
1314             return;
1315         }
1316 
1317         if (dstStrLengthNL) {
1318           // finalStrLength >= dstStrLength
1319           SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1320                                                       *finalStrLengthNL,
1321                                                       *dstStrLengthNL,
1322                                                       cmpTy);
1323           state = state->assume(cast<DefinedOrUnknownSVal>(destInResult),
1324                                 true);
1325           if (!state)
1326             return;
1327         }
1328       }
1329     }
1330 
1331   } else {
1332     // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1333     // the final string length will match the input string length.
1334     finalStrLength = amountCopied;
1335   }
1336 
1337   // The final result of the function will either be a pointer past the last
1338   // copied element, or a pointer to the start of the destination buffer.
1339   SVal Result = (returnEnd ? UnknownVal() : DstVal);
1340 
1341   assert(state);
1342 
1343   // If the destination is a MemRegion, try to check for a buffer overflow and
1344   // record the new string length.
1345   if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) {
1346     // If the final length is known, we can check for an overflow.
1347     if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&finalStrLength)) {
1348       SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1349                                                  *knownStrLength,
1350                                                  Dst->getType());
1351 
1352       state = CheckLocation(C, state, Dst, lastElement, /* IsDst = */ true);
1353       if (!state)
1354         return;
1355 
1356       // If this is a stpcpy-style copy, the last element is the return value.
1357       if (returnEnd)
1358         Result = lastElement;
1359     }
1360 
1361     // Invalidate the destination. This must happen before we set the C string
1362     // length because invalidation will clear the length.
1363     // FIXME: Even if we can't perfectly model the copy, we should see if we
1364     // can use LazyCompoundVals to copy the source values into the destination.
1365     // This would probably remove any existing bindings past the end of the
1366     // string, but that's still an improvement over blank invalidation.
1367     state = InvalidateBuffer(C, state, Dst, *dstRegVal);
1368 
1369     // Set the C string length of the destination.
1370     state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1371   }
1372 
1373   assert(state);
1374 
1375   // If this is a stpcpy-style copy, but we were unable to check for a buffer
1376   // overflow, we still need a result. Conjure a return value.
1377   if (returnEnd && Result.isUnknown()) {
1378     unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
1379     Result = svalBuilder.getConjuredSymbolVal(NULL, CE, Count);
1380   }
1381 
1382   // Set the return value.
1383   state = state->BindExpr(CE, Result);
1384   C.addTransition(state);
1385 }
1386 
1387 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1388   //int strcmp(const char *restrict s1, const char *restrict s2);
1389   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1390 }
1391 
1392 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1393   //int strncmp(const char *restrict s1, const char *restrict s2, size_t n);
1394   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1395 }
1396 
1397 void CStringChecker::evalStrcasecmp(CheckerContext &C,
1398                                     const CallExpr *CE) const {
1399   //int strcasecmp(const char *restrict s1, const char *restrict s2);
1400   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1401 }
1402 
1403 void CStringChecker::evalStrncasecmp(CheckerContext &C,
1404                                      const CallExpr *CE) const {
1405   //int strncasecmp(const char *restrict s1, const char *restrict s2, size_t n);
1406   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1407 }
1408 
1409 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1410                                       bool isBounded, bool ignoreCase) const {
1411   const GRState *state = C.getState();
1412 
1413   // Check that the first string is non-null
1414   const Expr *s1 = CE->getArg(0);
1415   SVal s1Val = state->getSVal(s1);
1416   state = checkNonNull(C, state, s1, s1Val);
1417   if (!state)
1418     return;
1419 
1420   // Check that the second string is non-null.
1421   const Expr *s2 = CE->getArg(1);
1422   SVal s2Val = state->getSVal(s2);
1423   state = checkNonNull(C, state, s2, s2Val);
1424   if (!state)
1425     return;
1426 
1427   // Get the string length of the first string or give up.
1428   SVal s1Length = getCStringLength(C, state, s1, s1Val);
1429   if (s1Length.isUndef())
1430     return;
1431 
1432   // Get the string length of the second string or give up.
1433   SVal s2Length = getCStringLength(C, state, s2, s2Val);
1434   if (s2Length.isUndef())
1435     return;
1436 
1437   // Get the string literal of the first string.
1438   const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1439   if (!s1StrLiteral)
1440     return;
1441   llvm::StringRef s1StrRef = s1StrLiteral->getString();
1442 
1443   // Get the string literal of the second string.
1444   const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1445   if (!s2StrLiteral)
1446     return;
1447   llvm::StringRef s2StrRef = s2StrLiteral->getString();
1448 
1449   int result;
1450   if (isBounded) {
1451     // Get the max number of characters to compare.
1452     const Expr *lenExpr = CE->getArg(2);
1453     SVal lenVal = state->getSVal(lenExpr);
1454 
1455     // Dynamically cast the length to a ConcreteInt. If it is not a ConcreteInt
1456     // then give up, otherwise get the value and use it as the bounds.
1457     nonloc::ConcreteInt *CI = dyn_cast<nonloc::ConcreteInt>(&lenVal);
1458     if (!CI)
1459       return;
1460     llvm::APSInt lenInt(CI->getValue());
1461 
1462     // Create substrings of each to compare the prefix.
1463     s1StrRef = s1StrRef.substr(0, (size_t)lenInt.getLimitedValue());
1464     s2StrRef = s2StrRef.substr(0, (size_t)lenInt.getLimitedValue());
1465   }
1466 
1467   if (ignoreCase) {
1468     // Compare string 1 to string 2 the same way strcasecmp() does.
1469     result = s1StrRef.compare_lower(s2StrRef);
1470   } else {
1471     // Compare string 1 to string 2 the same way strcmp() does.
1472     result = s1StrRef.compare(s2StrRef);
1473   }
1474 
1475   // Build the SVal of the comparison to bind the return value.
1476   SValBuilder &svalBuilder = C.getSValBuilder();
1477   QualType intTy = svalBuilder.getContext().IntTy;
1478   SVal resultVal = svalBuilder.makeIntVal(result, intTy);
1479 
1480   // Bind the return value of the expression.
1481   // Set the return value.
1482   state = state->BindExpr(CE, resultVal);
1483   C.addTransition(state);
1484 }
1485 
1486 //===----------------------------------------------------------------------===//
1487 // The driver method, and other Checker callbacks.
1488 //===----------------------------------------------------------------------===//
1489 
1490 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
1491   // Get the callee.  All the functions we care about are C functions
1492   // with simple identifiers.
1493   const GRState *state = C.getState();
1494   const Expr *Callee = CE->getCallee();
1495   const FunctionDecl *FD = state->getSVal(Callee).getAsFunctionDecl();
1496 
1497   if (!FD)
1498     return false;
1499 
1500   // Get the name of the callee. If it's a builtin, strip off the prefix.
1501   IdentifierInfo *II = FD->getIdentifier();
1502   if (!II)   // if no identifier, not a simple C function
1503     return false;
1504   llvm::StringRef Name = II->getName();
1505   if (Name.startswith("__builtin_"))
1506     Name = Name.substr(10);
1507 
1508   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
1509     .Cases("memcpy", "__memcpy_chk", &CStringChecker::evalMemcpy)
1510     .Cases("mempcpy", "__mempcpy_chk", &CStringChecker::evalMempcpy)
1511     .Cases("memcmp", "bcmp", &CStringChecker::evalMemcmp)
1512     .Cases("memmove", "__memmove_chk", &CStringChecker::evalMemmove)
1513     .Cases("strcpy", "__strcpy_chk", &CStringChecker::evalStrcpy)
1514     //.Cases("strncpy", "__strncpy_chk", &CStringChecker::evalStrncpy)
1515     .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::evalStpcpy)
1516     .Cases("strcat", "__strcat_chk", &CStringChecker::evalStrcat)
1517     .Cases("strncat", "__strncat_chk", &CStringChecker::evalStrncat)
1518     .Case("strlen", &CStringChecker::evalstrLength)
1519     .Case("strnlen", &CStringChecker::evalstrnLength)
1520     .Case("strcmp", &CStringChecker::evalStrcmp)
1521     .Case("strncmp", &CStringChecker::evalStrncmp)
1522     .Case("strcasecmp", &CStringChecker::evalStrcasecmp)
1523     .Case("strncasecmp", &CStringChecker::evalStrncasecmp)
1524     .Case("bcopy", &CStringChecker::evalBcopy)
1525     .Default(NULL);
1526 
1527   // If the callee isn't a string function, let another checker handle it.
1528   if (!evalFunction)
1529     return false;
1530 
1531   // Check and evaluate the call.
1532   (this->*evalFunction)(C, CE);
1533   return true;
1534 }
1535 
1536 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
1537   // Record string length for char a[] = "abc";
1538   const GRState *state = C.getState();
1539 
1540   for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end();
1541        I != E; ++I) {
1542     const VarDecl *D = dyn_cast<VarDecl>(*I);
1543     if (!D)
1544       continue;
1545 
1546     // FIXME: Handle array fields of structs.
1547     if (!D->getType()->isArrayType())
1548       continue;
1549 
1550     const Expr *Init = D->getInit();
1551     if (!Init)
1552       continue;
1553     if (!isa<StringLiteral>(Init))
1554       continue;
1555 
1556     Loc VarLoc = state->getLValue(D, C.getPredecessor()->getLocationContext());
1557     const MemRegion *MR = VarLoc.getAsRegion();
1558     if (!MR)
1559       continue;
1560 
1561     SVal StrVal = state->getSVal(Init);
1562     assert(StrVal.isValid() && "Initializer string is unknown or undefined");
1563     DefinedOrUnknownSVal strLength
1564       = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal));
1565 
1566     state = state->set<CStringLength>(MR, strLength);
1567   }
1568 
1569   C.addTransition(state);
1570 }
1571 
1572 bool CStringChecker::wantsRegionChangeUpdate(const GRState *state) const {
1573   CStringLength::EntryMap Entries = state->get<CStringLength>();
1574   return !Entries.isEmpty();
1575 }
1576 
1577 const GRState *
1578 CStringChecker::checkRegionChanges(const GRState *state,
1579                                    const StoreManager::InvalidatedSymbols *,
1580                                    const MemRegion * const *Begin,
1581                                    const MemRegion * const *End) const {
1582   CStringLength::EntryMap Entries = state->get<CStringLength>();
1583   if (Entries.isEmpty())
1584     return state;
1585 
1586   llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
1587   llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
1588 
1589   // First build sets for the changed regions and their super-regions.
1590   for ( ; Begin != End; ++Begin) {
1591     const MemRegion *MR = *Begin;
1592     Invalidated.insert(MR);
1593 
1594     SuperRegions.insert(MR);
1595     while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
1596       MR = SR->getSuperRegion();
1597       SuperRegions.insert(MR);
1598     }
1599   }
1600 
1601   CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>();
1602 
1603   // Then loop over the entries in the current state.
1604   for (CStringLength::EntryMap::iterator I = Entries.begin(),
1605        E = Entries.end(); I != E; ++I) {
1606     const MemRegion *MR = I.getKey();
1607 
1608     // Is this entry for a super-region of a changed region?
1609     if (SuperRegions.count(MR)) {
1610       Entries = F.remove(Entries, MR);
1611       continue;
1612     }
1613 
1614     // Is this entry for a sub-region of a changed region?
1615     const MemRegion *Super = MR;
1616     while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
1617       Super = SR->getSuperRegion();
1618       if (Invalidated.count(Super)) {
1619         Entries = F.remove(Entries, MR);
1620         break;
1621       }
1622     }
1623   }
1624 
1625   return state->set<CStringLength>(Entries);
1626 }
1627 
1628 void CStringChecker::checkLiveSymbols(const GRState *state,
1629                                       SymbolReaper &SR) const {
1630   // Mark all symbols in our string length map as valid.
1631   CStringLength::EntryMap Entries = state->get<CStringLength>();
1632 
1633   for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end();
1634        I != E; ++I) {
1635     SVal Len = I.getData();
1636 
1637     for (SVal::symbol_iterator si = Len.symbol_begin(), se = Len.symbol_end();
1638          si != se; ++si)
1639       SR.markInUse(*si);
1640   }
1641 }
1642 
1643 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
1644                                       CheckerContext &C) const {
1645   if (!SR.hasDeadSymbols())
1646     return;
1647 
1648   const GRState *state = C.getState();
1649   CStringLength::EntryMap Entries = state->get<CStringLength>();
1650   if (Entries.isEmpty())
1651     return;
1652 
1653   CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>();
1654   for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end();
1655        I != E; ++I) {
1656     SVal Len = I.getData();
1657     if (SymbolRef Sym = Len.getAsSymbol()) {
1658       if (SR.isDead(Sym))
1659         Entries = F.remove(Entries, I.getKey());
1660     }
1661   }
1662 
1663   state = state->set<CStringLength>(Entries);
1664   C.generateNode(state);
1665 }
1666 
1667 void ento::registerCStringChecker(CheckerManager &mgr) {
1668   mgr.registerChecker<CStringChecker>();
1669 }
1670