xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp (revision 634c12d23f690d4b1947154d7b4945bd0f6bfa4f)
1 //= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This defines CStringChecker, which is an assortment of checks on calls
11 // to functions in <string.h>.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ClangSACheckers.h"
16 #include "clang/StaticAnalyzer/Core/Checker.h"
17 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
18 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
19 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/GRStateTrait.h"
21 #include "llvm/ADT/StringSwitch.h"
22 
23 using namespace clang;
24 using namespace ento;
25 
26 namespace {
27 class CStringChecker : public Checker< eval::Call,
28                                          check::PreStmt<DeclStmt>,
29                                          check::LiveSymbols,
30                                          check::DeadSymbols,
31                                          check::RegionChanges
32                                          > {
33   mutable llvm::OwningPtr<BugType> BT_Null, BT_Bounds, BT_BoundsWrite,
34                                    BT_Overlap, BT_NotCString,
35                                    BT_AdditionOverflow;
36 public:
37   static void *getTag() { static int tag; return &tag; }
38 
39   bool evalCall(const CallExpr *CE, CheckerContext &C) const;
40   void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
41   void checkLiveSymbols(const GRState *state, SymbolReaper &SR) const;
42   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
43   bool wantsRegionChangeUpdate(const GRState *state) const;
44 
45   const GRState *checkRegionChanges(const GRState *state,
46                                     const StoreManager::InvalidatedSymbols *,
47                                     const MemRegion * const *Begin,
48                                     const MemRegion * const *End) const;
49 
50   typedef void (CStringChecker::*FnCheck)(CheckerContext &,
51                                           const CallExpr *) const;
52 
53   void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
54   void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
55   void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
56   void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
57   void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
58                       const GRState *state,
59                       const Expr *Size, const Expr *Source, const Expr *Dest,
60                       bool Restricted = false,
61                       bool IsMempcpy = false) const;
62 
63   void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
64 
65   void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
66   void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
67   void evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
68                            bool IsStrnlen = false) const;
69 
70   void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
71   void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
72   void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
73   void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool returnEnd,
74                         bool isBounded, bool isAppending) const;
75 
76   void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
77   void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
78 
79   void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
80   void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
81   void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
82   void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
83   void evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
84                         bool isBounded = false, bool ignoreCase = false) const;
85 
86   // Utility methods
87   std::pair<const GRState*, const GRState*>
88   static assumeZero(CheckerContext &C,
89                     const GRState *state, SVal V, QualType Ty);
90 
91   static const GRState *setCStringLength(const GRState *state,
92                                          const MemRegion *MR, SVal strLength);
93   static SVal getCStringLengthForRegion(CheckerContext &C,
94                                         const GRState *&state,
95                                         const Expr *Ex, const MemRegion *MR,
96                                         bool hypothetical);
97   SVal getCStringLength(CheckerContext &C, const GRState *&state,
98                         const Expr *Ex, SVal Buf,
99                         bool hypothetical = false) const;
100 
101   const StringLiteral *getCStringLiteral(CheckerContext &C,
102                                          const GRState *&state,
103                                          const Expr *expr,
104                                          SVal val) const;
105 
106   static const GRState *InvalidateBuffer(CheckerContext &C,
107                                          const GRState *state,
108                                          const Expr *Ex, SVal V);
109 
110   static bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
111                               const MemRegion *MR);
112 
113   // Re-usable checks
114   const GRState *checkNonNull(CheckerContext &C, const GRState *state,
115                                const Expr *S, SVal l) const;
116   const GRState *CheckLocation(CheckerContext &C, const GRState *state,
117                                const Expr *S, SVal l,
118                                bool IsDestination = false) const;
119   const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state,
120                                    const Expr *Size,
121                                    const Expr *FirstBuf,
122                                    const Expr *SecondBuf = NULL,
123                                    bool FirstIsDestination = false) const;
124   const GRState *CheckOverlap(CheckerContext &C, const GRState *state,
125                               const Expr *Size, const Expr *First,
126                               const Expr *Second) const;
127   void emitOverlapBug(CheckerContext &C, const GRState *state,
128                       const Stmt *First, const Stmt *Second) const;
129   const GRState *checkAdditionOverflow(CheckerContext &C, const GRState *state,
130                                        NonLoc left, NonLoc right) const;
131 };
132 
133 class CStringLength {
134 public:
135   typedef llvm::ImmutableMap<const MemRegion *, SVal> EntryMap;
136 };
137 } //end anonymous namespace
138 
139 namespace clang {
140 namespace ento {
141   template <>
142   struct GRStateTrait<CStringLength>
143     : public GRStatePartialTrait<CStringLength::EntryMap> {
144     static void *GDMIndex() { return CStringChecker::getTag(); }
145   };
146 }
147 }
148 
149 //===----------------------------------------------------------------------===//
150 // Individual checks and utility methods.
151 //===----------------------------------------------------------------------===//
152 
153 std::pair<const GRState*, const GRState*>
154 CStringChecker::assumeZero(CheckerContext &C, const GRState *state, SVal V,
155                            QualType Ty) {
156   DefinedSVal *val = dyn_cast<DefinedSVal>(&V);
157   if (!val)
158     return std::pair<const GRState*, const GRState *>(state, state);
159 
160   SValBuilder &svalBuilder = C.getSValBuilder();
161   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
162   return state->assume(svalBuilder.evalEQ(state, *val, zero));
163 }
164 
165 const GRState *CStringChecker::checkNonNull(CheckerContext &C,
166                                             const GRState *state,
167                                             const Expr *S, SVal l) const {
168   // If a previous check has failed, propagate the failure.
169   if (!state)
170     return NULL;
171 
172   const GRState *stateNull, *stateNonNull;
173   llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
174 
175   if (stateNull && !stateNonNull) {
176     ExplodedNode *N = C.generateSink(stateNull);
177     if (!N)
178       return NULL;
179 
180     if (!BT_Null)
181       BT_Null.reset(new BuiltinBug("API",
182         "Null pointer argument in call to byte string function"));
183 
184     // Generate a report for this bug.
185     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
186     EnhancedBugReport *report = new EnhancedBugReport(*BT,
187                                                       BT->getDescription(), N);
188 
189     report->addRange(S->getSourceRange());
190     report->addVisitorCreator(bugreporter::registerTrackNullOrUndefValue, S);
191     C.EmitReport(report);
192     return NULL;
193   }
194 
195   // From here on, assume that the value is non-null.
196   assert(stateNonNull);
197   return stateNonNull;
198 }
199 
200 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
201 const GRState *CStringChecker::CheckLocation(CheckerContext &C,
202                                              const GRState *state,
203                                              const Expr *S, SVal l,
204                                              bool IsDestination) const {
205   // If a previous check has failed, propagate the failure.
206   if (!state)
207     return NULL;
208 
209   // Check for out of bound array element access.
210   const MemRegion *R = l.getAsRegion();
211   if (!R)
212     return state;
213 
214   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
215   if (!ER)
216     return state;
217 
218   assert(ER->getValueType() == C.getASTContext().CharTy &&
219     "CheckLocation should only be called with char* ElementRegions");
220 
221   // Get the size of the array.
222   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
223   SValBuilder &svalBuilder = C.getSValBuilder();
224   SVal Extent = svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
225   DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent);
226 
227   // Get the index of the accessed element.
228   DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex());
229 
230   const GRState *StInBound = state->assumeInBound(Idx, Size, true);
231   const GRState *StOutBound = state->assumeInBound(Idx, Size, false);
232   if (StOutBound && !StInBound) {
233     ExplodedNode *N = C.generateSink(StOutBound);
234     if (!N)
235       return NULL;
236 
237     BuiltinBug *BT;
238     if (IsDestination) {
239       if (!BT_BoundsWrite) {
240         BT_BoundsWrite.reset(new BuiltinBug("Out-of-bound array access",
241           "Byte string function overflows destination buffer"));
242       }
243       BT = static_cast<BuiltinBug*>(BT_BoundsWrite.get());
244     } else {
245       if (!BT_Bounds) {
246         BT_Bounds.reset(new BuiltinBug("Out-of-bound array access",
247           "Byte string function accesses out-of-bound array element"));
248       }
249       BT = static_cast<BuiltinBug*>(BT_Bounds.get());
250     }
251 
252     // FIXME: It would be nice to eventually make this diagnostic more clear,
253     // e.g., by referencing the original declaration or by saying *why* this
254     // reference is outside the range.
255 
256     // Generate a report for this bug.
257     RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N);
258 
259     report->addRange(S->getSourceRange());
260     C.EmitReport(report);
261     return NULL;
262   }
263 
264   // Array bound check succeeded.  From this point forward the array bound
265   // should always succeed.
266   return StInBound;
267 }
268 
269 const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C,
270                                                  const GRState *state,
271                                                  const Expr *Size,
272                                                  const Expr *FirstBuf,
273                                                  const Expr *SecondBuf,
274                                                 bool FirstIsDestination) const {
275   // If a previous check has failed, propagate the failure.
276   if (!state)
277     return NULL;
278 
279   SValBuilder &svalBuilder = C.getSValBuilder();
280   ASTContext &Ctx = C.getASTContext();
281 
282   QualType sizeTy = Size->getType();
283   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
284 
285   // Check that the first buffer is non-null.
286   SVal BufVal = state->getSVal(FirstBuf);
287   state = checkNonNull(C, state, FirstBuf, BufVal);
288   if (!state)
289     return NULL;
290 
291   // Get the access length and make sure it is known.
292   SVal LengthVal = state->getSVal(Size);
293   NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
294   if (!Length)
295     return state;
296 
297   // Compute the offset of the last element to be accessed: size-1.
298   NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy));
299   NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub,
300                                                     *Length, One, sizeTy));
301 
302   // Check that the first buffer is sufficiently long.
303   SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
304   if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
305     SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
306                                           LastOffset, PtrTy);
307     state = CheckLocation(C, state, FirstBuf, BufEnd, FirstIsDestination);
308 
309     // If the buffer isn't large enough, abort.
310     if (!state)
311       return NULL;
312   }
313 
314   // If there's a second buffer, check it as well.
315   if (SecondBuf) {
316     BufVal = state->getSVal(SecondBuf);
317     state = checkNonNull(C, state, SecondBuf, BufVal);
318     if (!state)
319       return NULL;
320 
321     BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
322     if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
323       SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
324                                             LastOffset, PtrTy);
325       state = CheckLocation(C, state, SecondBuf, BufEnd);
326     }
327   }
328 
329   // Large enough or not, return this state!
330   return state;
331 }
332 
333 const GRState *CStringChecker::CheckOverlap(CheckerContext &C,
334                                             const GRState *state,
335                                             const Expr *Size,
336                                             const Expr *First,
337                                             const Expr *Second) const {
338   // Do a simple check for overlap: if the two arguments are from the same
339   // buffer, see if the end of the first is greater than the start of the second
340   // or vice versa.
341 
342   // If a previous check has failed, propagate the failure.
343   if (!state)
344     return NULL;
345 
346   const GRState *stateTrue, *stateFalse;
347 
348   // Get the buffer values and make sure they're known locations.
349   SVal firstVal = state->getSVal(First);
350   SVal secondVal = state->getSVal(Second);
351 
352   Loc *firstLoc = dyn_cast<Loc>(&firstVal);
353   if (!firstLoc)
354     return state;
355 
356   Loc *secondLoc = dyn_cast<Loc>(&secondVal);
357   if (!secondLoc)
358     return state;
359 
360   // Are the two values the same?
361   SValBuilder &svalBuilder = C.getSValBuilder();
362   llvm::tie(stateTrue, stateFalse) =
363     state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
364 
365   if (stateTrue && !stateFalse) {
366     // If the values are known to be equal, that's automatically an overlap.
367     emitOverlapBug(C, stateTrue, First, Second);
368     return NULL;
369   }
370 
371   // assume the two expressions are not equal.
372   assert(stateFalse);
373   state = stateFalse;
374 
375   // Which value comes first?
376   ASTContext &Ctx = svalBuilder.getContext();
377   QualType cmpTy = Ctx.IntTy;
378   SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
379                                          *firstLoc, *secondLoc, cmpTy);
380   DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse);
381   if (!reverseTest)
382     return state;
383 
384   llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
385   if (stateTrue) {
386     if (stateFalse) {
387       // If we don't know which one comes first, we can't perform this test.
388       return state;
389     } else {
390       // Switch the values so that firstVal is before secondVal.
391       Loc *tmpLoc = firstLoc;
392       firstLoc = secondLoc;
393       secondLoc = tmpLoc;
394 
395       // Switch the Exprs as well, so that they still correspond.
396       const Expr *tmpExpr = First;
397       First = Second;
398       Second = tmpExpr;
399     }
400   }
401 
402   // Get the length, and make sure it too is known.
403   SVal LengthVal = state->getSVal(Size);
404   NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
405   if (!Length)
406     return state;
407 
408   // Convert the first buffer's start address to char*.
409   // Bail out if the cast fails.
410   QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
411   SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, First->getType());
412   Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart);
413   if (!FirstStartLoc)
414     return state;
415 
416   // Compute the end of the first buffer. Bail out if THAT fails.
417   SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
418                                  *FirstStartLoc, *Length, CharPtrTy);
419   Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd);
420   if (!FirstEndLoc)
421     return state;
422 
423   // Is the end of the first buffer past the start of the second buffer?
424   SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
425                                 *FirstEndLoc, *secondLoc, cmpTy);
426   DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap);
427   if (!OverlapTest)
428     return state;
429 
430   llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
431 
432   if (stateTrue && !stateFalse) {
433     // Overlap!
434     emitOverlapBug(C, stateTrue, First, Second);
435     return NULL;
436   }
437 
438   // assume the two expressions don't overlap.
439   assert(stateFalse);
440   return stateFalse;
441 }
442 
443 void CStringChecker::emitOverlapBug(CheckerContext &C, const GRState *state,
444                                   const Stmt *First, const Stmt *Second) const {
445   ExplodedNode *N = C.generateSink(state);
446   if (!N)
447     return;
448 
449   if (!BT_Overlap)
450     BT_Overlap.reset(new BugType("Unix API", "Improper arguments"));
451 
452   // Generate a report for this bug.
453   RangedBugReport *report =
454     new RangedBugReport(*BT_Overlap,
455       "Arguments must not be overlapping buffers", N);
456   report->addRange(First->getSourceRange());
457   report->addRange(Second->getSourceRange());
458 
459   C.EmitReport(report);
460 }
461 
462 const GRState *CStringChecker::checkAdditionOverflow(CheckerContext &C,
463                                                      const GRState *state,
464                                                      NonLoc left,
465                                                      NonLoc right) const {
466   // If a previous check has failed, propagate the failure.
467   if (!state)
468     return NULL;
469 
470   SValBuilder &svalBuilder = C.getSValBuilder();
471   BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
472 
473   QualType sizeTy = svalBuilder.getContext().getSizeType();
474   const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
475   NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
476 
477   SVal maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
478                                                sizeTy);
479 
480   if (maxMinusRight.isUnknownOrUndef()) {
481     // Try switching the operands. (The order of these two assignments is
482     // important!)
483     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
484                                             sizeTy);
485     left = right;
486   }
487 
488   if (NonLoc *maxMinusRightNL = dyn_cast<NonLoc>(&maxMinusRight)) {
489     QualType cmpTy = svalBuilder.getConditionType();
490     // If left > max - right, we have an overflow.
491     SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
492                                                 *maxMinusRightNL, cmpTy);
493 
494     const GRState *stateOverflow, *stateOkay;
495     llvm::tie(stateOverflow, stateOkay) =
496       state->assume(cast<DefinedOrUnknownSVal>(willOverflow));
497 
498     if (stateOverflow && !stateOkay) {
499       // We have an overflow. Emit a bug report.
500       ExplodedNode *N = C.generateSink(stateOverflow);
501       if (!N)
502         return NULL;
503 
504       if (!BT_AdditionOverflow)
505         BT_AdditionOverflow.reset(new BuiltinBug("API",
506           "Sum of expressions causes overflow"));
507 
508       llvm::SmallString<120> buf;
509       llvm::raw_svector_ostream os(buf);
510       // This isn't a great error message, but this should never occur in real
511       // code anyway -- you'd have to create a buffer longer than a size_t can
512       // represent, which is sort of a contradiction.
513       os << "This expression will create a string whose length is too big to "
514          << "be represented as a size_t";
515 
516       // Generate a report for this bug.
517       BugReport *report = new BugReport(*BT_AdditionOverflow, os.str(), N);
518       C.EmitReport(report);
519 
520       return NULL;
521     }
522 
523     // From now on, assume an overflow didn't occur.
524     assert(stateOkay);
525     state = stateOkay;
526   }
527 
528   return state;
529 }
530 
531 const GRState *CStringChecker::setCStringLength(const GRState *state,
532                                                 const MemRegion *MR,
533                                                 SVal strLength) {
534   assert(!strLength.isUndef() && "Attempt to set an undefined string length");
535 
536   MR = MR->StripCasts();
537 
538   switch (MR->getKind()) {
539   case MemRegion::StringRegionKind:
540     // FIXME: This can happen if we strcpy() into a string region. This is
541     // undefined [C99 6.4.5p6], but we should still warn about it.
542     return state;
543 
544   case MemRegion::SymbolicRegionKind:
545   case MemRegion::AllocaRegionKind:
546   case MemRegion::VarRegionKind:
547   case MemRegion::FieldRegionKind:
548   case MemRegion::ObjCIvarRegionKind:
549     // These are the types we can currently track string lengths for.
550     break;
551 
552   case MemRegion::ElementRegionKind:
553     // FIXME: Handle element regions by upper-bounding the parent region's
554     // string length.
555     return state;
556 
557   default:
558     // Other regions (mostly non-data) can't have a reliable C string length.
559     // For now, just ignore the change.
560     // FIXME: These are rare but not impossible. We should output some kind of
561     // warning for things like strcpy((char[]){'a', 0}, "b");
562     return state;
563   }
564 
565   if (strLength.isUnknown())
566     return state->remove<CStringLength>(MR);
567 
568   return state->set<CStringLength>(MR, strLength);
569 }
570 
571 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
572                                                const GRState *&state,
573                                                const Expr *Ex,
574                                                const MemRegion *MR,
575                                                bool hypothetical) {
576   if (!hypothetical) {
577     // If there's a recorded length, go ahead and return it.
578     const SVal *Recorded = state->get<CStringLength>(MR);
579     if (Recorded)
580       return *Recorded;
581   }
582 
583   // Otherwise, get a new symbol and update the state.
584   unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
585   SValBuilder &svalBuilder = C.getSValBuilder();
586   QualType sizeTy = svalBuilder.getContext().getSizeType();
587   SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
588                                                     MR, Ex, sizeTy, Count);
589 
590   if (!hypothetical)
591     state = state->set<CStringLength>(MR, strLength);
592 
593   return strLength;
594 }
595 
596 SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state,
597                                       const Expr *Ex, SVal Buf,
598                                       bool hypothetical) const {
599   const MemRegion *MR = Buf.getAsRegion();
600   if (!MR) {
601     // If we can't get a region, see if it's something we /know/ isn't a
602     // C string. In the context of locations, the only time we can issue such
603     // a warning is for labels.
604     if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) {
605       if (ExplodedNode *N = C.generateNode(state)) {
606         if (!BT_NotCString)
607           BT_NotCString.reset(new BuiltinBug("API",
608             "Argument is not a null-terminated string."));
609 
610         llvm::SmallString<120> buf;
611         llvm::raw_svector_ostream os(buf);
612         os << "Argument to byte string function is the address of the label '"
613            << Label->getLabel()->getName()
614            << "', which is not a null-terminated string";
615 
616         // Generate a report for this bug.
617         EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
618                                                           os.str(), N);
619 
620         report->addRange(Ex->getSourceRange());
621         C.EmitReport(report);
622       }
623 
624       return UndefinedVal();
625     }
626 
627     // If it's not a region and not a label, give up.
628     return UnknownVal();
629   }
630 
631   // If we have a region, strip casts from it and see if we can figure out
632   // its length. For anything we can't figure out, just return UnknownVal.
633   MR = MR->StripCasts();
634 
635   switch (MR->getKind()) {
636   case MemRegion::StringRegionKind: {
637     // Modifying the contents of string regions is undefined [C99 6.4.5p6],
638     // so we can assume that the byte length is the correct C string length.
639     SValBuilder &svalBuilder = C.getSValBuilder();
640     QualType sizeTy = svalBuilder.getContext().getSizeType();
641     const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
642     return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
643   }
644   case MemRegion::SymbolicRegionKind:
645   case MemRegion::AllocaRegionKind:
646   case MemRegion::VarRegionKind:
647   case MemRegion::FieldRegionKind:
648   case MemRegion::ObjCIvarRegionKind:
649     return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
650   case MemRegion::CompoundLiteralRegionKind:
651     // FIXME: Can we track this? Is it necessary?
652     return UnknownVal();
653   case MemRegion::ElementRegionKind:
654     // FIXME: How can we handle this? It's not good enough to subtract the
655     // offset from the base string length; consider "123\x00567" and &a[5].
656     return UnknownVal();
657   default:
658     // Other regions (mostly non-data) can't have a reliable C string length.
659     // In this case, an error is emitted and UndefinedVal is returned.
660     // The caller should always be prepared to handle this case.
661     if (ExplodedNode *N = C.generateNode(state)) {
662       if (!BT_NotCString)
663         BT_NotCString.reset(new BuiltinBug("API",
664           "Argument is not a null-terminated string."));
665 
666       llvm::SmallString<120> buf;
667       llvm::raw_svector_ostream os(buf);
668 
669       os << "Argument to byte string function is ";
670 
671       if (SummarizeRegion(os, C.getASTContext(), MR))
672         os << ", which is not a null-terminated string";
673       else
674         os << "not a null-terminated string";
675 
676       // Generate a report for this bug.
677       EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
678                                                         os.str(), N);
679 
680       report->addRange(Ex->getSourceRange());
681       C.EmitReport(report);
682     }
683 
684     return UndefinedVal();
685   }
686 }
687 
688 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
689   const GRState *&state, const Expr *expr, SVal val) const {
690 
691   // Get the memory region pointed to by the val.
692   const MemRegion *bufRegion = val.getAsRegion();
693   if (!bufRegion)
694     return NULL;
695 
696   // Strip casts off the memory region.
697   bufRegion = bufRegion->StripCasts();
698 
699   // Cast the memory region to a string region.
700   const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
701   if (!strRegion)
702     return NULL;
703 
704   // Return the actual string in the string region.
705   return strRegion->getStringLiteral();
706 }
707 
708 const GRState *CStringChecker::InvalidateBuffer(CheckerContext &C,
709                                                 const GRState *state,
710                                                 const Expr *E, SVal V) {
711   Loc *L = dyn_cast<Loc>(&V);
712   if (!L)
713     return state;
714 
715   // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
716   // some assumptions about the value that CFRefCount can't. Even so, it should
717   // probably be refactored.
718   if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) {
719     const MemRegion *R = MR->getRegion()->StripCasts();
720 
721     // Are we dealing with an ElementRegion?  If so, we should be invalidating
722     // the super-region.
723     if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
724       R = ER->getSuperRegion();
725       // FIXME: What about layers of ElementRegions?
726     }
727 
728     // Invalidate this region.
729     unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
730     return state->invalidateRegion(R, E, Count, NULL);
731   }
732 
733   // If we have a non-region value by chance, just remove the binding.
734   // FIXME: is this necessary or correct? This handles the non-Region
735   //  cases.  Is it ever valid to store to these?
736   return state->unbindLoc(*L);
737 }
738 
739 bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
740                                      const MemRegion *MR) {
741   const TypedRegion *TR = dyn_cast<TypedRegion>(MR);
742   if (!TR)
743     return false;
744 
745   switch (TR->getKind()) {
746   case MemRegion::FunctionTextRegionKind: {
747     const FunctionDecl *FD = cast<FunctionTextRegion>(TR)->getDecl();
748     if (FD)
749       os << "the address of the function '" << FD << "'";
750     else
751       os << "the address of a function";
752     return true;
753   }
754   case MemRegion::BlockTextRegionKind:
755     os << "block text";
756     return true;
757   case MemRegion::BlockDataRegionKind:
758     os << "a block";
759     return true;
760   case MemRegion::CXXThisRegionKind:
761   case MemRegion::CXXTempObjectRegionKind:
762     os << "a C++ temp object of type " << TR->getValueType().getAsString();
763     return true;
764   case MemRegion::VarRegionKind:
765     os << "a variable of type" << TR->getValueType().getAsString();
766     return true;
767   case MemRegion::FieldRegionKind:
768     os << "a field of type " << TR->getValueType().getAsString();
769     return true;
770   case MemRegion::ObjCIvarRegionKind:
771     os << "an instance variable of type " << TR->getValueType().getAsString();
772     return true;
773   default:
774     return false;
775   }
776 }
777 
778 //===----------------------------------------------------------------------===//
779 // evaluation of individual function calls.
780 //===----------------------------------------------------------------------===//
781 
782 void CStringChecker::evalCopyCommon(CheckerContext &C,
783                                     const CallExpr *CE,
784                                     const GRState *state,
785                                     const Expr *Size, const Expr *Dest,
786                                     const Expr *Source, bool Restricted,
787                                     bool IsMempcpy) const {
788   // See if the size argument is zero.
789   SVal sizeVal = state->getSVal(Size);
790   QualType sizeTy = Size->getType();
791 
792   const GRState *stateZeroSize, *stateNonZeroSize;
793   llvm::tie(stateZeroSize, stateNonZeroSize) = assumeZero(C, state, sizeVal, sizeTy);
794 
795   // Get the value of the Dest.
796   SVal destVal = state->getSVal(Dest);
797 
798   // If the size is zero, there won't be any actual memory access, so
799   // just bind the return value to the destination buffer and return.
800   if (stateZeroSize) {
801     stateZeroSize = stateZeroSize->BindExpr(CE, destVal);
802     C.addTransition(stateZeroSize);
803   }
804 
805   // If the size can be nonzero, we have to check the other arguments.
806   if (stateNonZeroSize) {
807     state = stateNonZeroSize;
808 
809     // Ensure the destination is not null. If it is NULL there will be a
810     // NULL pointer dereference.
811     state = checkNonNull(C, state, Dest, destVal);
812     if (!state)
813       return;
814 
815     // Get the value of the Src.
816     SVal srcVal = state->getSVal(Source);
817 
818     // Ensure the source is not null. If it is NULL there will be a
819     // NULL pointer dereference.
820     state = checkNonNull(C, state, Source, srcVal);
821     if (!state)
822       return;
823 
824     // Ensure the accesses are valid and that the buffers do not overlap.
825     state = CheckBufferAccess(C, state, Size, Dest, Source,
826                               /* FirstIsDst = */ true);
827     if (Restricted)
828       state = CheckOverlap(C, state, Size, Dest, Source);
829 
830     if (!state)
831       return;
832 
833     // If this is mempcpy, get the byte after the last byte copied and
834     // bind the expr.
835     if (IsMempcpy) {
836       loc::MemRegionVal *destRegVal = dyn_cast<loc::MemRegionVal>(&destVal);
837       assert(destRegVal && "Destination should be a known MemRegionVal here");
838 
839       // Get the length to copy.
840       NonLoc *lenValNonLoc = dyn_cast<NonLoc>(&sizeVal);
841 
842       if (lenValNonLoc) {
843         // Get the byte after the last byte copied.
844         SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
845                                                           *destRegVal,
846                                                           *lenValNonLoc,
847                                                           Dest->getType());
848 
849         // The byte after the last byte copied is the return value.
850         state = state->BindExpr(CE, lastElement);
851       } else {
852         // If we don't know how much we copied, we can at least
853         // conjure a return value for later.
854         unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
855         SVal result =
856           C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count);
857         state = state->BindExpr(CE, result);
858       }
859 
860     } else {
861       // All other copies return the destination buffer.
862       // (Well, bcopy() has a void return type, but this won't hurt.)
863       state = state->BindExpr(CE, destVal);
864     }
865 
866     // Invalidate the destination.
867     // FIXME: Even if we can't perfectly model the copy, we should see if we
868     // can use LazyCompoundVals to copy the source values into the destination.
869     // This would probably remove any existing bindings past the end of the
870     // copied region, but that's still an improvement over blank invalidation.
871     state = InvalidateBuffer(C, state, Dest, state->getSVal(Dest));
872     C.addTransition(state);
873   }
874 }
875 
876 
877 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
878   // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
879   // The return value is the address of the destination buffer.
880   const Expr *Dest = CE->getArg(0);
881   const GRState *state = C.getState();
882 
883   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
884 }
885 
886 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
887   // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
888   // The return value is a pointer to the byte following the last written byte.
889   const Expr *Dest = CE->getArg(0);
890   const GRState *state = C.getState();
891 
892   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
893 }
894 
895 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
896   // void *memmove(void *dst, const void *src, size_t n);
897   // The return value is the address of the destination buffer.
898   const Expr *Dest = CE->getArg(0);
899   const GRState *state = C.getState();
900 
901   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
902 }
903 
904 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
905   // void bcopy(const void *src, void *dst, size_t n);
906   evalCopyCommon(C, CE, C.getState(),
907                  CE->getArg(2), CE->getArg(1), CE->getArg(0));
908 }
909 
910 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
911   // int memcmp(const void *s1, const void *s2, size_t n);
912   const Expr *Left = CE->getArg(0);
913   const Expr *Right = CE->getArg(1);
914   const Expr *Size = CE->getArg(2);
915 
916   const GRState *state = C.getState();
917   SValBuilder &svalBuilder = C.getSValBuilder();
918 
919   // See if the size argument is zero.
920   SVal sizeVal = state->getSVal(Size);
921   QualType sizeTy = Size->getType();
922 
923   const GRState *stateZeroSize, *stateNonZeroSize;
924   llvm::tie(stateZeroSize, stateNonZeroSize) =
925     assumeZero(C, state, sizeVal, sizeTy);
926 
927   // If the size can be zero, the result will be 0 in that case, and we don't
928   // have to check either of the buffers.
929   if (stateZeroSize) {
930     state = stateZeroSize;
931     state = state->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType()));
932     C.addTransition(state);
933   }
934 
935   // If the size can be nonzero, we have to check the other arguments.
936   if (stateNonZeroSize) {
937     state = stateNonZeroSize;
938     // If we know the two buffers are the same, we know the result is 0.
939     // First, get the two buffers' addresses. Another checker will have already
940     // made sure they're not undefined.
941     DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(state->getSVal(Left));
942     DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(state->getSVal(Right));
943 
944     // See if they are the same.
945     DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
946     const GRState *StSameBuf, *StNotSameBuf;
947     llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
948 
949     // If the two arguments might be the same buffer, we know the result is zero,
950     // and we only need to check one size.
951     if (StSameBuf) {
952       state = StSameBuf;
953       state = CheckBufferAccess(C, state, Size, Left);
954       if (state) {
955         state = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType()));
956         C.addTransition(state);
957       }
958     }
959 
960     // If the two arguments might be different buffers, we have to check the
961     // size of both of them.
962     if (StNotSameBuf) {
963       state = StNotSameBuf;
964       state = CheckBufferAccess(C, state, Size, Left, Right);
965       if (state) {
966         // The return value is the comparison result, which we don't know.
967         unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
968         SVal CmpV = svalBuilder.getConjuredSymbolVal(NULL, CE, Count);
969         state = state->BindExpr(CE, CmpV);
970         C.addTransition(state);
971       }
972     }
973   }
974 }
975 
976 void CStringChecker::evalstrLength(CheckerContext &C,
977                                    const CallExpr *CE) const {
978   // size_t strlen(const char *s);
979   evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
980 }
981 
982 void CStringChecker::evalstrnLength(CheckerContext &C,
983                                     const CallExpr *CE) const {
984   // size_t strnlen(const char *s, size_t maxlen);
985   evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
986 }
987 
988 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
989                                          bool IsStrnlen) const {
990   const GRState *state = C.getState();
991 
992   if (IsStrnlen) {
993     const Expr *maxlenExpr = CE->getArg(1);
994     SVal maxlenVal = state->getSVal(maxlenExpr);
995 
996     const GRState *stateZeroSize, *stateNonZeroSize;
997     llvm::tie(stateZeroSize, stateNonZeroSize) =
998       assumeZero(C, state, maxlenVal, maxlenExpr->getType());
999 
1000     // If the size can be zero, the result will be 0 in that case, and we don't
1001     // have to check the string itself.
1002     if (stateZeroSize) {
1003       SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1004       stateZeroSize = stateZeroSize->BindExpr(CE, zero);
1005       C.addTransition(stateZeroSize);
1006     }
1007 
1008     // If the size is GUARANTEED to be zero, we're done!
1009     if (!stateNonZeroSize)
1010       return;
1011 
1012     // Otherwise, record the assumption that the size is nonzero.
1013     state = stateNonZeroSize;
1014   }
1015 
1016   // Check that the string argument is non-null.
1017   const Expr *Arg = CE->getArg(0);
1018   SVal ArgVal = state->getSVal(Arg);
1019 
1020   state = checkNonNull(C, state, Arg, ArgVal);
1021 
1022   if (!state)
1023     return;
1024 
1025   SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1026 
1027   // If the argument isn't a valid C string, there's no valid state to
1028   // transition to.
1029   if (strLength.isUndef())
1030     return;
1031 
1032   DefinedOrUnknownSVal result = UnknownVal();
1033 
1034   // If the check is for strnlen() then bind the return value to no more than
1035   // the maxlen value.
1036   if (IsStrnlen) {
1037     QualType cmpTy = C.getSValBuilder().getContext().IntTy;
1038 
1039     // It's a little unfortunate to be getting this again,
1040     // but it's not that expensive...
1041     const Expr *maxlenExpr = CE->getArg(1);
1042     SVal maxlenVal = state->getSVal(maxlenExpr);
1043 
1044     NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
1045     NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal);
1046 
1047     if (strLengthNL && maxlenValNL) {
1048       const GRState *stateStringTooLong, *stateStringNotTooLong;
1049 
1050       // Check if the strLength is greater than the maxlen.
1051       llvm::tie(stateStringTooLong, stateStringNotTooLong) =
1052         state->assume(cast<DefinedOrUnknownSVal>
1053                       (C.getSValBuilder().evalBinOpNN(state, BO_GT,
1054                                                       *strLengthNL,
1055                                                       *maxlenValNL,
1056                                                       cmpTy)));
1057 
1058       if (stateStringTooLong && !stateStringNotTooLong) {
1059         // If the string is longer than maxlen, return maxlen.
1060         result = *maxlenValNL;
1061       } else if (stateStringNotTooLong && !stateStringTooLong) {
1062         // If the string is shorter than maxlen, return its length.
1063         result = *strLengthNL;
1064       }
1065     }
1066 
1067     if (result.isUnknown()) {
1068       // If we don't have enough information for a comparison, there's
1069       // no guarantee the full string length will actually be returned.
1070       // All we know is the return value is the min of the string length
1071       // and the limit. This is better than nothing.
1072       unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
1073       result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count);
1074       NonLoc *resultNL = cast<NonLoc>(&result);
1075 
1076       if (strLengthNL) {
1077         state = state->assume(cast<DefinedOrUnknownSVal>
1078                               (C.getSValBuilder().evalBinOpNN(state, BO_LE,
1079                                                               *resultNL,
1080                                                               *strLengthNL,
1081                                                               cmpTy)), true);
1082       }
1083 
1084       if (maxlenValNL) {
1085         state = state->assume(cast<DefinedOrUnknownSVal>
1086                               (C.getSValBuilder().evalBinOpNN(state, BO_LE,
1087                                                               *resultNL,
1088                                                               *maxlenValNL,
1089                                                               cmpTy)), true);
1090       }
1091     }
1092 
1093   } else {
1094     // This is a plain strlen(), not strnlen().
1095     result = cast<DefinedOrUnknownSVal>(strLength);
1096 
1097     // If we don't know the length of the string, conjure a return
1098     // value, so it can be used in constraints, at least.
1099     if (result.isUnknown()) {
1100       unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
1101       result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count);
1102     }
1103   }
1104 
1105   // Bind the return value.
1106   assert(!result.isUnknown() && "Should have conjured a value by now");
1107   state = state->BindExpr(CE, result);
1108   C.addTransition(state);
1109 }
1110 
1111 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1112   // char *strcpy(char *restrict dst, const char *restrict src);
1113   evalStrcpyCommon(C, CE,
1114                    /* returnEnd = */ false,
1115                    /* isBounded = */ false,
1116                    /* isAppending = */ false);
1117 }
1118 
1119 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1120   // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1121   evalStrcpyCommon(C, CE,
1122                    /* returnEnd = */ false,
1123                    /* isBounded = */ true,
1124                    /* isAppending = */ false);
1125 }
1126 
1127 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1128   // char *stpcpy(char *restrict dst, const char *restrict src);
1129   evalStrcpyCommon(C, CE,
1130                    /* returnEnd = */ true,
1131                    /* isBounded = */ false,
1132                    /* isAppending = */ false);
1133 }
1134 
1135 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1136   //char *strcat(char *restrict s1, const char *restrict s2);
1137   evalStrcpyCommon(C, CE,
1138                    /* returnEnd = */ false,
1139                    /* isBounded = */ false,
1140                    /* isAppending = */ true);
1141 }
1142 
1143 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1144   //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1145   evalStrcpyCommon(C, CE,
1146                    /* returnEnd = */ false,
1147                    /* isBounded = */ true,
1148                    /* isAppending = */ true);
1149 }
1150 
1151 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1152                                       bool returnEnd, bool isBounded,
1153                                       bool isAppending) const {
1154   const GRState *state = C.getState();
1155 
1156   // Check that the destination is non-null.
1157   const Expr *Dst = CE->getArg(0);
1158   SVal DstVal = state->getSVal(Dst);
1159 
1160   state = checkNonNull(C, state, Dst, DstVal);
1161   if (!state)
1162     return;
1163 
1164   // Check that the source is non-null.
1165   const Expr *srcExpr = CE->getArg(1);
1166   SVal srcVal = state->getSVal(srcExpr);
1167   state = checkNonNull(C, state, srcExpr, srcVal);
1168   if (!state)
1169     return;
1170 
1171   // Get the string length of the source.
1172   SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1173 
1174   // If the source isn't a valid C string, give up.
1175   if (strLength.isUndef())
1176     return;
1177 
1178   SValBuilder &svalBuilder = C.getSValBuilder();
1179   QualType cmpTy = svalBuilder.getConditionType();
1180 
1181   SVal amountCopied = UnknownVal();
1182 
1183   // If the function is strncpy, strncat, etc... it is bounded.
1184   if (isBounded) {
1185     // Get the max number of characters to copy.
1186     const Expr *lenExpr = CE->getArg(2);
1187     SVal lenVal = state->getSVal(lenExpr);
1188 
1189     // Protect against misdeclared strncpy().
1190     lenVal = svalBuilder.evalCast(lenVal,
1191                                   svalBuilder.getContext().getSizeType(),
1192                                   lenExpr->getType());
1193 
1194     NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
1195     NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal);
1196 
1197     // If we know both values, we might be able to figure out how much
1198     // we're copying.
1199     if (strLengthNL && lenValNL) {
1200       const GRState *stateSourceTooLong, *stateSourceNotTooLong;
1201 
1202       // Check if the max number to copy is less than the length of the src.
1203       llvm::tie(stateSourceTooLong, stateSourceNotTooLong) =
1204         state->assume(cast<DefinedOrUnknownSVal>
1205                       (svalBuilder.evalBinOpNN(state, BO_GT, *strLengthNL,
1206                                                *lenValNL, cmpTy)));
1207 
1208       if (stateSourceTooLong && !stateSourceNotTooLong) {
1209         // Max number to copy is less than the length of the src, so the actual
1210         // strLength copied is the max number arg.
1211         state = stateSourceTooLong;
1212         amountCopied = lenVal;
1213 
1214       } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1215         // The source buffer entirely fits in the bound.
1216         state = stateSourceNotTooLong;
1217         amountCopied = strLength;
1218       }
1219     }
1220 
1221     // If we couldn't pin down the copy length, at least bound it.
1222     if (amountCopied.isUnknown()) {
1223       // Try to get a "hypothetical" string length symbol, which we can later
1224       // set as a real value if that turns out to be the case.
1225       amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1226       assert(!amountCopied.isUndef());
1227 
1228       if (NonLoc *amountCopiedNL = dyn_cast<NonLoc>(&amountCopied)) {
1229         if (lenValNL) {
1230           // amountCopied <= lenVal
1231           SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1232                                                              *amountCopiedNL,
1233                                                              *lenValNL,
1234                                                              cmpTy);
1235           state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanBound),
1236                                 true);
1237           if (!state)
1238             return;
1239         }
1240 
1241         if (strLengthNL) {
1242           // amountCopied <= strlen(source)
1243           SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1244                                                            *amountCopiedNL,
1245                                                            *strLengthNL,
1246                                                            cmpTy);
1247           state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanSrc),
1248                                 true);
1249           if (!state)
1250             return;
1251         }
1252       }
1253     }
1254 
1255   } else {
1256     // The function isn't bounded. The amount copied should match the length
1257     // of the source buffer.
1258     amountCopied = strLength;
1259   }
1260 
1261   assert(state);
1262 
1263   // This represents the number of characters copied into the destination
1264   // buffer. (It may not actually be the strlen if the destination buffer
1265   // is not terminated.)
1266   SVal finalStrLength = UnknownVal();
1267 
1268   // If this is an appending function (strcat, strncat...) then set the
1269   // string length to strlen(src) + strlen(dst) since the buffer will
1270   // ultimately contain both.
1271   if (isAppending) {
1272     // Get the string length of the destination, or give up.
1273     SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1274     if (dstStrLength.isUndef())
1275       return;
1276 
1277     QualType sizeTy = svalBuilder.getContext().getSizeType();
1278 
1279     NonLoc *srcStrLengthNL = dyn_cast<NonLoc>(&amountCopied);
1280     NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength);
1281 
1282     // If we know both string lengths, we might know the final string length.
1283     if (srcStrLengthNL && dstStrLengthNL) {
1284       // Make sure the two lengths together don't overflow a size_t.
1285       state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1286       if (!state)
1287         return;
1288 
1289       finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1290                                                *dstStrLengthNL, sizeTy);
1291     }
1292 
1293     // If we couldn't get a single value for the final string length,
1294     // we can at least bound it by the individual lengths.
1295     if (finalStrLength.isUnknown()) {
1296       // Try to get a "hypothetical" string length symbol, which we can later
1297       // set as a real value if that turns out to be the case.
1298       finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1299       assert(!finalStrLength.isUndef());
1300 
1301       if (NonLoc *finalStrLengthNL = dyn_cast<NonLoc>(&finalStrLength)) {
1302         if (srcStrLengthNL) {
1303           // finalStrLength >= srcStrLength
1304           SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1305                                                         *finalStrLengthNL,
1306                                                         *srcStrLengthNL,
1307                                                         cmpTy);
1308           state = state->assume(cast<DefinedOrUnknownSVal>(sourceInResult),
1309                                 true);
1310           if (!state)
1311             return;
1312         }
1313 
1314         if (dstStrLengthNL) {
1315           // finalStrLength >= dstStrLength
1316           SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1317                                                       *finalStrLengthNL,
1318                                                       *dstStrLengthNL,
1319                                                       cmpTy);
1320           state = state->assume(cast<DefinedOrUnknownSVal>(destInResult),
1321                                 true);
1322           if (!state)
1323             return;
1324         }
1325       }
1326     }
1327 
1328   } else {
1329     // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1330     // the final string length will match the input string length.
1331     finalStrLength = amountCopied;
1332   }
1333 
1334   // The final result of the function will either be a pointer past the last
1335   // copied element, or a pointer to the start of the destination buffer.
1336   SVal Result = (returnEnd ? UnknownVal() : DstVal);
1337 
1338   assert(state);
1339 
1340   // If the destination is a MemRegion, try to check for a buffer overflow and
1341   // record the new string length.
1342   if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) {
1343     // If the final length is known, we can check for an overflow.
1344     if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&finalStrLength)) {
1345       SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1346                                                  *knownStrLength,
1347                                                  Dst->getType());
1348 
1349       state = CheckLocation(C, state, Dst, lastElement, /* IsDst = */ true);
1350       if (!state)
1351         return;
1352 
1353       // If this is a stpcpy-style copy, the last element is the return value.
1354       if (returnEnd)
1355         Result = lastElement;
1356     }
1357 
1358     // Invalidate the destination. This must happen before we set the C string
1359     // length because invalidation will clear the length.
1360     // FIXME: Even if we can't perfectly model the copy, we should see if we
1361     // can use LazyCompoundVals to copy the source values into the destination.
1362     // This would probably remove any existing bindings past the end of the
1363     // string, but that's still an improvement over blank invalidation.
1364     state = InvalidateBuffer(C, state, Dst, *dstRegVal);
1365 
1366     // Set the C string length of the destination.
1367     state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1368   }
1369 
1370   assert(state);
1371 
1372   // If this is a stpcpy-style copy, but we were unable to check for a buffer
1373   // overflow, we still need a result. Conjure a return value.
1374   if (returnEnd && Result.isUnknown()) {
1375     unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
1376     Result = svalBuilder.getConjuredSymbolVal(NULL, CE, Count);
1377   }
1378 
1379   // Set the return value.
1380   state = state->BindExpr(CE, Result);
1381   C.addTransition(state);
1382 }
1383 
1384 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1385   //int strcmp(const char *restrict s1, const char *restrict s2);
1386   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1387 }
1388 
1389 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1390   //int strncmp(const char *restrict s1, const char *restrict s2, size_t n);
1391   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1392 }
1393 
1394 void CStringChecker::evalStrcasecmp(CheckerContext &C,
1395                                     const CallExpr *CE) const {
1396   //int strcasecmp(const char *restrict s1, const char *restrict s2);
1397   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1398 }
1399 
1400 void CStringChecker::evalStrncasecmp(CheckerContext &C,
1401                                      const CallExpr *CE) const {
1402   //int strncasecmp(const char *restrict s1, const char *restrict s2, size_t n);
1403   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1404 }
1405 
1406 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1407                                       bool isBounded, bool ignoreCase) const {
1408   const GRState *state = C.getState();
1409 
1410   // Check that the first string is non-null
1411   const Expr *s1 = CE->getArg(0);
1412   SVal s1Val = state->getSVal(s1);
1413   state = checkNonNull(C, state, s1, s1Val);
1414   if (!state)
1415     return;
1416 
1417   // Check that the second string is non-null.
1418   const Expr *s2 = CE->getArg(1);
1419   SVal s2Val = state->getSVal(s2);
1420   state = checkNonNull(C, state, s2, s2Val);
1421   if (!state)
1422     return;
1423 
1424   // Get the string length of the first string or give up.
1425   SVal s1Length = getCStringLength(C, state, s1, s1Val);
1426   if (s1Length.isUndef())
1427     return;
1428 
1429   // Get the string length of the second string or give up.
1430   SVal s2Length = getCStringLength(C, state, s2, s2Val);
1431   if (s2Length.isUndef())
1432     return;
1433 
1434   // Get the string literal of the first string.
1435   const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1436   if (!s1StrLiteral)
1437     return;
1438   llvm::StringRef s1StrRef = s1StrLiteral->getString();
1439 
1440   // Get the string literal of the second string.
1441   const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1442   if (!s2StrLiteral)
1443     return;
1444   llvm::StringRef s2StrRef = s2StrLiteral->getString();
1445 
1446   int result;
1447   if (isBounded) {
1448     // Get the max number of characters to compare.
1449     const Expr *lenExpr = CE->getArg(2);
1450     SVal lenVal = state->getSVal(lenExpr);
1451 
1452     // Dynamically cast the length to a ConcreteInt. If it is not a ConcreteInt
1453     // then give up, otherwise get the value and use it as the bounds.
1454     nonloc::ConcreteInt *CI = dyn_cast<nonloc::ConcreteInt>(&lenVal);
1455     if (!CI)
1456       return;
1457     llvm::APSInt lenInt(CI->getValue());
1458 
1459     // Create substrings of each to compare the prefix.
1460     s1StrRef = s1StrRef.substr(0, (size_t)lenInt.getLimitedValue());
1461     s2StrRef = s2StrRef.substr(0, (size_t)lenInt.getLimitedValue());
1462   }
1463 
1464   if (ignoreCase) {
1465     // Compare string 1 to string 2 the same way strcasecmp() does.
1466     result = s1StrRef.compare_lower(s2StrRef);
1467   } else {
1468     // Compare string 1 to string 2 the same way strcmp() does.
1469     result = s1StrRef.compare(s2StrRef);
1470   }
1471 
1472   // Build the SVal of the comparison to bind the return value.
1473   SValBuilder &svalBuilder = C.getSValBuilder();
1474   QualType intTy = svalBuilder.getContext().IntTy;
1475   SVal resultVal = svalBuilder.makeIntVal(result, intTy);
1476 
1477   // Bind the return value of the expression.
1478   // Set the return value.
1479   state = state->BindExpr(CE, resultVal);
1480   C.addTransition(state);
1481 }
1482 
1483 //===----------------------------------------------------------------------===//
1484 // The driver method, and other Checker callbacks.
1485 //===----------------------------------------------------------------------===//
1486 
1487 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
1488   // Get the callee.  All the functions we care about are C functions
1489   // with simple identifiers.
1490   const GRState *state = C.getState();
1491   const Expr *Callee = CE->getCallee();
1492   const FunctionDecl *FD = state->getSVal(Callee).getAsFunctionDecl();
1493 
1494   if (!FD)
1495     return false;
1496 
1497   // Get the name of the callee. If it's a builtin, strip off the prefix.
1498   IdentifierInfo *II = FD->getIdentifier();
1499   if (!II)   // if no identifier, not a simple C function
1500     return false;
1501   llvm::StringRef Name = II->getName();
1502   if (Name.startswith("__builtin_"))
1503     Name = Name.substr(10);
1504 
1505   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
1506     .Cases("memcpy", "__memcpy_chk", &CStringChecker::evalMemcpy)
1507     .Cases("mempcpy", "__mempcpy_chk", &CStringChecker::evalMempcpy)
1508     .Cases("memcmp", "bcmp", &CStringChecker::evalMemcmp)
1509     .Cases("memmove", "__memmove_chk", &CStringChecker::evalMemmove)
1510     .Cases("strcpy", "__strcpy_chk", &CStringChecker::evalStrcpy)
1511     //.Cases("strncpy", "__strncpy_chk", &CStringChecker::evalStrncpy)
1512     .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::evalStpcpy)
1513     .Cases("strcat", "__strcat_chk", &CStringChecker::evalStrcat)
1514     .Cases("strncat", "__strncat_chk", &CStringChecker::evalStrncat)
1515     .Case("strlen", &CStringChecker::evalstrLength)
1516     .Case("strnlen", &CStringChecker::evalstrnLength)
1517     .Case("strcmp", &CStringChecker::evalStrcmp)
1518     .Case("strncmp", &CStringChecker::evalStrncmp)
1519     .Case("strcasecmp", &CStringChecker::evalStrcasecmp)
1520     .Case("strncasecmp", &CStringChecker::evalStrncasecmp)
1521     .Case("bcopy", &CStringChecker::evalBcopy)
1522     .Default(NULL);
1523 
1524   // If the callee isn't a string function, let another checker handle it.
1525   if (!evalFunction)
1526     return false;
1527 
1528   // Check and evaluate the call.
1529   (this->*evalFunction)(C, CE);
1530   return true;
1531 }
1532 
1533 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
1534   // Record string length for char a[] = "abc";
1535   const GRState *state = C.getState();
1536 
1537   for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end();
1538        I != E; ++I) {
1539     const VarDecl *D = dyn_cast<VarDecl>(*I);
1540     if (!D)
1541       continue;
1542 
1543     // FIXME: Handle array fields of structs.
1544     if (!D->getType()->isArrayType())
1545       continue;
1546 
1547     const Expr *Init = D->getInit();
1548     if (!Init)
1549       continue;
1550     if (!isa<StringLiteral>(Init))
1551       continue;
1552 
1553     Loc VarLoc = state->getLValue(D, C.getPredecessor()->getLocationContext());
1554     const MemRegion *MR = VarLoc.getAsRegion();
1555     if (!MR)
1556       continue;
1557 
1558     SVal StrVal = state->getSVal(Init);
1559     assert(StrVal.isValid() && "Initializer string is unknown or undefined");
1560     DefinedOrUnknownSVal strLength
1561       = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal));
1562 
1563     state = state->set<CStringLength>(MR, strLength);
1564   }
1565 
1566   C.addTransition(state);
1567 }
1568 
1569 bool CStringChecker::wantsRegionChangeUpdate(const GRState *state) const {
1570   CStringLength::EntryMap Entries = state->get<CStringLength>();
1571   return !Entries.isEmpty();
1572 }
1573 
1574 const GRState *
1575 CStringChecker::checkRegionChanges(const GRState *state,
1576                                    const StoreManager::InvalidatedSymbols *,
1577                                    const MemRegion * const *Begin,
1578                                    const MemRegion * const *End) const {
1579   CStringLength::EntryMap Entries = state->get<CStringLength>();
1580   if (Entries.isEmpty())
1581     return state;
1582 
1583   llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
1584   llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
1585 
1586   // First build sets for the changed regions and their super-regions.
1587   for ( ; Begin != End; ++Begin) {
1588     const MemRegion *MR = *Begin;
1589     Invalidated.insert(MR);
1590 
1591     SuperRegions.insert(MR);
1592     while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
1593       MR = SR->getSuperRegion();
1594       SuperRegions.insert(MR);
1595     }
1596   }
1597 
1598   CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>();
1599 
1600   // Then loop over the entries in the current state.
1601   for (CStringLength::EntryMap::iterator I = Entries.begin(),
1602        E = Entries.end(); I != E; ++I) {
1603     const MemRegion *MR = I.getKey();
1604 
1605     // Is this entry for a super-region of a changed region?
1606     if (SuperRegions.count(MR)) {
1607       Entries = F.remove(Entries, MR);
1608       continue;
1609     }
1610 
1611     // Is this entry for a sub-region of a changed region?
1612     const MemRegion *Super = MR;
1613     while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
1614       Super = SR->getSuperRegion();
1615       if (Invalidated.count(Super)) {
1616         Entries = F.remove(Entries, MR);
1617         break;
1618       }
1619     }
1620   }
1621 
1622   return state->set<CStringLength>(Entries);
1623 }
1624 
1625 void CStringChecker::checkLiveSymbols(const GRState *state,
1626                                       SymbolReaper &SR) const {
1627   // Mark all symbols in our string length map as valid.
1628   CStringLength::EntryMap Entries = state->get<CStringLength>();
1629 
1630   for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end();
1631        I != E; ++I) {
1632     SVal Len = I.getData();
1633 
1634     for (SVal::symbol_iterator si = Len.symbol_begin(), se = Len.symbol_end();
1635          si != se; ++si)
1636       SR.markInUse(*si);
1637   }
1638 }
1639 
1640 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
1641                                       CheckerContext &C) const {
1642   if (!SR.hasDeadSymbols())
1643     return;
1644 
1645   const GRState *state = C.getState();
1646   CStringLength::EntryMap Entries = state->get<CStringLength>();
1647   if (Entries.isEmpty())
1648     return;
1649 
1650   CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>();
1651   for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end();
1652        I != E; ++I) {
1653     SVal Len = I.getData();
1654     if (SymbolRef Sym = Len.getAsSymbol()) {
1655       if (SR.isDead(Sym))
1656         Entries = F.remove(Entries, I.getKey());
1657     }
1658   }
1659 
1660   state = state->set<CStringLength>(Entries);
1661   C.generateNode(state);
1662 }
1663 
1664 void ento::registerCStringChecker(CheckerManager &mgr) {
1665   mgr.registerChecker<CStringChecker>();
1666 }
1667