xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp (revision c026370858e31e99a8190e4c945676fb2f3e941f)
1 //= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This defines CStringChecker, which is an assortment of checks on calls
11 // to functions in <string.h>.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ClangSACheckers.h"
16 #include "clang/StaticAnalyzer/Core/Checker.h"
17 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
18 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
19 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/GRStateTrait.h"
21 #include "llvm/ADT/StringSwitch.h"
22 
23 using namespace clang;
24 using namespace ento;
25 
26 namespace {
27 class CStringChecker : public Checker< eval::Call,
28                                          check::PreStmt<DeclStmt>,
29                                          check::LiveSymbols,
30                                          check::DeadSymbols,
31                                          check::RegionChanges
32                                          > {
33   mutable llvm::OwningPtr<BugType> BT_Null, BT_Bounds, BT_BoundsWrite,
34                                    BT_Overlap, BT_NotCString,
35                                    BT_AdditionOverflow;
36 public:
37   static void *getTag() { static int tag; return &tag; }
38 
39   bool evalCall(const CallExpr *CE, CheckerContext &C) const;
40   void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
41   void checkLiveSymbols(const GRState *state, SymbolReaper &SR) const;
42   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
43   bool wantsRegionChangeUpdate(const GRState *state) const;
44 
45   const GRState *checkRegionChanges(const GRState *state,
46                                     const StoreManager::InvalidatedSymbols *,
47                                     const MemRegion * const *Begin,
48                                     const MemRegion * const *End) const;
49 
50   typedef void (CStringChecker::*FnCheck)(CheckerContext &,
51                                           const CallExpr *) const;
52 
53   void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
54   void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
55   void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
56   void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
57   void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
58                       const GRState *state,
59                       const Expr *Size, const Expr *Source, const Expr *Dest,
60                       bool Restricted = false,
61                       bool IsMempcpy = false) const;
62 
63   void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
64 
65   void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
66   void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
67   void evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
68                            bool IsStrnlen = false) const;
69 
70   void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
71   void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
72   void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
73   void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool returnEnd,
74                         bool isBounded, bool isAppending) const;
75 
76   void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
77   void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
78 
79   void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
80   void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
81   void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
82   void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
83   void evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
84                         bool isBounded = false, bool ignoreCase = false) const;
85 
86   // Utility methods
87   std::pair<const GRState*, const GRState*>
88   static assumeZero(CheckerContext &C,
89                     const GRState *state, SVal V, QualType Ty);
90 
91   static const GRState *setCStringLength(const GRState *state,
92                                          const MemRegion *MR, SVal strLength);
93   static SVal getCStringLengthForRegion(CheckerContext &C,
94                                         const GRState *&state,
95                                         const Expr *Ex, const MemRegion *MR,
96                                         bool hypothetical);
97   SVal getCStringLength(CheckerContext &C, const GRState *&state,
98                         const Expr *Ex, SVal Buf,
99                         bool hypothetical = false) const;
100 
101   const StringLiteral *getCStringLiteral(CheckerContext &C,
102                                          const GRState *&state,
103                                          const Expr *expr,
104                                          SVal val) const;
105 
106   static const GRState *InvalidateBuffer(CheckerContext &C,
107                                          const GRState *state,
108                                          const Expr *Ex, SVal V);
109 
110   static bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
111                               const MemRegion *MR);
112 
113   // Re-usable checks
114   const GRState *checkNonNull(CheckerContext &C, const GRState *state,
115                                const Expr *S, SVal l) const;
116   const GRState *CheckLocation(CheckerContext &C, const GRState *state,
117                                const Expr *S, SVal l,
118                                bool IsDestination = false) const;
119   const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state,
120                                    const Expr *Size,
121                                    const Expr *FirstBuf,
122                                    const Expr *SecondBuf = NULL,
123                                    bool FirstIsDestination = false) const;
124   const GRState *CheckOverlap(CheckerContext &C, const GRState *state,
125                               const Expr *Size, const Expr *First,
126                               const Expr *Second) const;
127   void emitOverlapBug(CheckerContext &C, const GRState *state,
128                       const Stmt *First, const Stmt *Second) const;
129   const GRState *checkAdditionOverflow(CheckerContext &C, const GRState *state,
130                                        NonLoc left, NonLoc right) const;
131 };
132 
133 class CStringLength {
134 public:
135   typedef llvm::ImmutableMap<const MemRegion *, SVal> EntryMap;
136 };
137 } //end anonymous namespace
138 
139 namespace clang {
140 namespace ento {
141   template <>
142   struct GRStateTrait<CStringLength>
143     : public GRStatePartialTrait<CStringLength::EntryMap> {
144     static void *GDMIndex() { return CStringChecker::getTag(); }
145   };
146 }
147 }
148 
149 //===----------------------------------------------------------------------===//
150 // Individual checks and utility methods.
151 //===----------------------------------------------------------------------===//
152 
153 std::pair<const GRState*, const GRState*>
154 CStringChecker::assumeZero(CheckerContext &C, const GRState *state, SVal V,
155                            QualType Ty) {
156   DefinedSVal *val = dyn_cast<DefinedSVal>(&V);
157   if (!val)
158     return std::pair<const GRState*, const GRState *>(state, state);
159 
160   SValBuilder &svalBuilder = C.getSValBuilder();
161   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
162   return state->assume(svalBuilder.evalEQ(state, *val, zero));
163 }
164 
165 const GRState *CStringChecker::checkNonNull(CheckerContext &C,
166                                             const GRState *state,
167                                             const Expr *S, SVal l) const {
168   // If a previous check has failed, propagate the failure.
169   if (!state)
170     return NULL;
171 
172   const GRState *stateNull, *stateNonNull;
173   llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
174 
175   if (stateNull && !stateNonNull) {
176     ExplodedNode *N = C.generateSink(stateNull);
177     if (!N)
178       return NULL;
179 
180     if (!BT_Null)
181       BT_Null.reset(new BuiltinBug("API",
182         "Null pointer argument in call to byte string function"));
183 
184     // Generate a report for this bug.
185     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
186     EnhancedBugReport *report = new EnhancedBugReport(*BT,
187                                                       BT->getDescription(), N);
188 
189     report->addRange(S->getSourceRange());
190     report->addVisitorCreator(bugreporter::registerTrackNullOrUndefValue, S);
191     C.EmitReport(report);
192     return NULL;
193   }
194 
195   // From here on, assume that the value is non-null.
196   assert(stateNonNull);
197   return stateNonNull;
198 }
199 
200 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
201 const GRState *CStringChecker::CheckLocation(CheckerContext &C,
202                                              const GRState *state,
203                                              const Expr *S, SVal l,
204                                              bool IsDestination) const {
205   // If a previous check has failed, propagate the failure.
206   if (!state)
207     return NULL;
208 
209   // Check for out of bound array element access.
210   const MemRegion *R = l.getAsRegion();
211   if (!R)
212     return state;
213 
214   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
215   if (!ER)
216     return state;
217 
218   assert(ER->getValueType() == C.getASTContext().CharTy &&
219     "CheckLocation should only be called with char* ElementRegions");
220 
221   // Get the size of the array.
222   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
223   SValBuilder &svalBuilder = C.getSValBuilder();
224   SVal Extent =
225     svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
226   DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent);
227 
228   // Get the index of the accessed element.
229   DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex());
230 
231   const GRState *StInBound = state->assumeInBound(Idx, Size, true);
232   const GRState *StOutBound = state->assumeInBound(Idx, Size, false);
233   if (StOutBound && !StInBound) {
234     ExplodedNode *N = C.generateSink(StOutBound);
235     if (!N)
236       return NULL;
237 
238     BuiltinBug *BT;
239     if (IsDestination) {
240       if (!BT_BoundsWrite) {
241         BT_BoundsWrite.reset(new BuiltinBug("Out-of-bound array access",
242           "Byte string function overflows destination buffer"));
243       }
244       BT = static_cast<BuiltinBug*>(BT_BoundsWrite.get());
245     } else {
246       if (!BT_Bounds) {
247         BT_Bounds.reset(new BuiltinBug("Out-of-bound array access",
248           "Byte string function accesses out-of-bound array element"));
249       }
250       BT = static_cast<BuiltinBug*>(BT_Bounds.get());
251     }
252 
253     // FIXME: It would be nice to eventually make this diagnostic more clear,
254     // e.g., by referencing the original declaration or by saying *why* this
255     // reference is outside the range.
256 
257     // Generate a report for this bug.
258     RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N);
259 
260     report->addRange(S->getSourceRange());
261     C.EmitReport(report);
262     return NULL;
263   }
264 
265   // Array bound check succeeded.  From this point forward the array bound
266   // should always succeed.
267   return StInBound;
268 }
269 
270 const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C,
271                                                  const GRState *state,
272                                                  const Expr *Size,
273                                                  const Expr *FirstBuf,
274                                                  const Expr *SecondBuf,
275                                                 bool FirstIsDestination) const {
276   // If a previous check has failed, propagate the failure.
277   if (!state)
278     return NULL;
279 
280   SValBuilder &svalBuilder = C.getSValBuilder();
281   ASTContext &Ctx = svalBuilder.getContext();
282 
283   QualType sizeTy = Size->getType();
284   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
285 
286   // Check that the first buffer is non-null.
287   SVal BufVal = state->getSVal(FirstBuf);
288   state = checkNonNull(C, state, FirstBuf, BufVal);
289   if (!state)
290     return NULL;
291 
292   // Get the access length and make sure it is known.
293   SVal LengthVal = state->getSVal(Size);
294   NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
295   if (!Length)
296     return state;
297 
298   // Compute the offset of the last element to be accessed: size-1.
299   NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy));
300   NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub,
301                                                     *Length, One, sizeTy));
302 
303   // Check that the first buffer is sufficiently long.
304   SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
305   if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
306     SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
307                                           LastOffset, PtrTy);
308     state = CheckLocation(C, state, FirstBuf, BufEnd, FirstIsDestination);
309 
310     // If the buffer isn't large enough, abort.
311     if (!state)
312       return NULL;
313   }
314 
315   // If there's a second buffer, check it as well.
316   if (SecondBuf) {
317     BufVal = state->getSVal(SecondBuf);
318     state = checkNonNull(C, state, SecondBuf, BufVal);
319     if (!state)
320       return NULL;
321 
322     BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
323     if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
324       SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
325                                             LastOffset, PtrTy);
326       state = CheckLocation(C, state, SecondBuf, BufEnd);
327     }
328   }
329 
330   // Large enough or not, return this state!
331   return state;
332 }
333 
334 const GRState *CStringChecker::CheckOverlap(CheckerContext &C,
335                                             const GRState *state,
336                                             const Expr *Size,
337                                             const Expr *First,
338                                             const Expr *Second) const {
339   // Do a simple check for overlap: if the two arguments are from the same
340   // buffer, see if the end of the first is greater than the start of the second
341   // or vice versa.
342 
343   // If a previous check has failed, propagate the failure.
344   if (!state)
345     return NULL;
346 
347   const GRState *stateTrue, *stateFalse;
348 
349   // Get the buffer values and make sure they're known locations.
350   SVal firstVal = state->getSVal(First);
351   SVal secondVal = state->getSVal(Second);
352 
353   Loc *firstLoc = dyn_cast<Loc>(&firstVal);
354   if (!firstLoc)
355     return state;
356 
357   Loc *secondLoc = dyn_cast<Loc>(&secondVal);
358   if (!secondLoc)
359     return state;
360 
361   // Are the two values the same?
362   SValBuilder &svalBuilder = C.getSValBuilder();
363   llvm::tie(stateTrue, stateFalse) =
364     state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
365 
366   if (stateTrue && !stateFalse) {
367     // If the values are known to be equal, that's automatically an overlap.
368     emitOverlapBug(C, stateTrue, First, Second);
369     return NULL;
370   }
371 
372   // assume the two expressions are not equal.
373   assert(stateFalse);
374   state = stateFalse;
375 
376   // Which value comes first?
377   QualType cmpTy = svalBuilder.getConditionType();
378   SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
379                                          *firstLoc, *secondLoc, cmpTy);
380   DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse);
381   if (!reverseTest)
382     return state;
383 
384   llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
385   if (stateTrue) {
386     if (stateFalse) {
387       // If we don't know which one comes first, we can't perform this test.
388       return state;
389     } else {
390       // Switch the values so that firstVal is before secondVal.
391       Loc *tmpLoc = firstLoc;
392       firstLoc = secondLoc;
393       secondLoc = tmpLoc;
394 
395       // Switch the Exprs as well, so that they still correspond.
396       const Expr *tmpExpr = First;
397       First = Second;
398       Second = tmpExpr;
399     }
400   }
401 
402   // Get the length, and make sure it too is known.
403   SVal LengthVal = state->getSVal(Size);
404   NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
405   if (!Length)
406     return state;
407 
408   // Convert the first buffer's start address to char*.
409   // Bail out if the cast fails.
410   ASTContext &Ctx = svalBuilder.getContext();
411   QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
412   SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy,
413                                          First->getType());
414   Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart);
415   if (!FirstStartLoc)
416     return state;
417 
418   // Compute the end of the first buffer. Bail out if THAT fails.
419   SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
420                                  *FirstStartLoc, *Length, CharPtrTy);
421   Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd);
422   if (!FirstEndLoc)
423     return state;
424 
425   // Is the end of the first buffer past the start of the second buffer?
426   SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
427                                 *FirstEndLoc, *secondLoc, cmpTy);
428   DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap);
429   if (!OverlapTest)
430     return state;
431 
432   llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
433 
434   if (stateTrue && !stateFalse) {
435     // Overlap!
436     emitOverlapBug(C, stateTrue, First, Second);
437     return NULL;
438   }
439 
440   // assume the two expressions don't overlap.
441   assert(stateFalse);
442   return stateFalse;
443 }
444 
445 void CStringChecker::emitOverlapBug(CheckerContext &C, const GRState *state,
446                                   const Stmt *First, const Stmt *Second) const {
447   ExplodedNode *N = C.generateSink(state);
448   if (!N)
449     return;
450 
451   if (!BT_Overlap)
452     BT_Overlap.reset(new BugType("Unix API", "Improper arguments"));
453 
454   // Generate a report for this bug.
455   RangedBugReport *report =
456     new RangedBugReport(*BT_Overlap,
457       "Arguments must not be overlapping buffers", N);
458   report->addRange(First->getSourceRange());
459   report->addRange(Second->getSourceRange());
460 
461   C.EmitReport(report);
462 }
463 
464 const GRState *CStringChecker::checkAdditionOverflow(CheckerContext &C,
465                                                      const GRState *state,
466                                                      NonLoc left,
467                                                      NonLoc right) const {
468   // If a previous check has failed, propagate the failure.
469   if (!state)
470     return NULL;
471 
472   SValBuilder &svalBuilder = C.getSValBuilder();
473   BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
474 
475   QualType sizeTy = svalBuilder.getContext().getSizeType();
476   const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
477   NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
478 
479   SVal maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
480                                                sizeTy);
481 
482   if (maxMinusRight.isUnknownOrUndef()) {
483     // Try switching the operands. (The order of these two assignments is
484     // important!)
485     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
486                                             sizeTy);
487     left = right;
488   }
489 
490   if (NonLoc *maxMinusRightNL = dyn_cast<NonLoc>(&maxMinusRight)) {
491     QualType cmpTy = svalBuilder.getConditionType();
492     // If left > max - right, we have an overflow.
493     SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
494                                                 *maxMinusRightNL, cmpTy);
495 
496     const GRState *stateOverflow, *stateOkay;
497     llvm::tie(stateOverflow, stateOkay) =
498       state->assume(cast<DefinedOrUnknownSVal>(willOverflow));
499 
500     if (stateOverflow && !stateOkay) {
501       // We have an overflow. Emit a bug report.
502       ExplodedNode *N = C.generateSink(stateOverflow);
503       if (!N)
504         return NULL;
505 
506       if (!BT_AdditionOverflow)
507         BT_AdditionOverflow.reset(new BuiltinBug("API",
508           "Sum of expressions causes overflow"));
509 
510       llvm::SmallString<120> buf;
511       llvm::raw_svector_ostream os(buf);
512       // This isn't a great error message, but this should never occur in real
513       // code anyway -- you'd have to create a buffer longer than a size_t can
514       // represent, which is sort of a contradiction.
515       os << "This expression will create a string whose length is too big to "
516          << "be represented as a size_t";
517 
518       // Generate a report for this bug.
519       BugReport *report = new BugReport(*BT_AdditionOverflow, os.str(), N);
520       C.EmitReport(report);
521 
522       return NULL;
523     }
524 
525     // From now on, assume an overflow didn't occur.
526     assert(stateOkay);
527     state = stateOkay;
528   }
529 
530   return state;
531 }
532 
533 const GRState *CStringChecker::setCStringLength(const GRState *state,
534                                                 const MemRegion *MR,
535                                                 SVal strLength) {
536   assert(!strLength.isUndef() && "Attempt to set an undefined string length");
537 
538   MR = MR->StripCasts();
539 
540   switch (MR->getKind()) {
541   case MemRegion::StringRegionKind:
542     // FIXME: This can happen if we strcpy() into a string region. This is
543     // undefined [C99 6.4.5p6], but we should still warn about it.
544     return state;
545 
546   case MemRegion::SymbolicRegionKind:
547   case MemRegion::AllocaRegionKind:
548   case MemRegion::VarRegionKind:
549   case MemRegion::FieldRegionKind:
550   case MemRegion::ObjCIvarRegionKind:
551     // These are the types we can currently track string lengths for.
552     break;
553 
554   case MemRegion::ElementRegionKind:
555     // FIXME: Handle element regions by upper-bounding the parent region's
556     // string length.
557     return state;
558 
559   default:
560     // Other regions (mostly non-data) can't have a reliable C string length.
561     // For now, just ignore the change.
562     // FIXME: These are rare but not impossible. We should output some kind of
563     // warning for things like strcpy((char[]){'a', 0}, "b");
564     return state;
565   }
566 
567   if (strLength.isUnknown())
568     return state->remove<CStringLength>(MR);
569 
570   return state->set<CStringLength>(MR, strLength);
571 }
572 
573 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
574                                                const GRState *&state,
575                                                const Expr *Ex,
576                                                const MemRegion *MR,
577                                                bool hypothetical) {
578   if (!hypothetical) {
579     // If there's a recorded length, go ahead and return it.
580     const SVal *Recorded = state->get<CStringLength>(MR);
581     if (Recorded)
582       return *Recorded;
583   }
584 
585   // Otherwise, get a new symbol and update the state.
586   unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
587   SValBuilder &svalBuilder = C.getSValBuilder();
588   QualType sizeTy = svalBuilder.getContext().getSizeType();
589   SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
590                                                     MR, Ex, sizeTy, Count);
591 
592   if (!hypothetical)
593     state = state->set<CStringLength>(MR, strLength);
594 
595   return strLength;
596 }
597 
598 SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state,
599                                       const Expr *Ex, SVal Buf,
600                                       bool hypothetical) const {
601   const MemRegion *MR = Buf.getAsRegion();
602   if (!MR) {
603     // If we can't get a region, see if it's something we /know/ isn't a
604     // C string. In the context of locations, the only time we can issue such
605     // a warning is for labels.
606     if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) {
607       if (ExplodedNode *N = C.generateNode(state)) {
608         if (!BT_NotCString)
609           BT_NotCString.reset(new BuiltinBug("API",
610             "Argument is not a null-terminated string."));
611 
612         llvm::SmallString<120> buf;
613         llvm::raw_svector_ostream os(buf);
614         os << "Argument to byte string function is the address of the label '"
615            << Label->getLabel()->getName()
616            << "', which is not a null-terminated string";
617 
618         // Generate a report for this bug.
619         EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
620                                                           os.str(), N);
621 
622         report->addRange(Ex->getSourceRange());
623         C.EmitReport(report);
624       }
625 
626       return UndefinedVal();
627     }
628 
629     // If it's not a region and not a label, give up.
630     return UnknownVal();
631   }
632 
633   // If we have a region, strip casts from it and see if we can figure out
634   // its length. For anything we can't figure out, just return UnknownVal.
635   MR = MR->StripCasts();
636 
637   switch (MR->getKind()) {
638   case MemRegion::StringRegionKind: {
639     // Modifying the contents of string regions is undefined [C99 6.4.5p6],
640     // so we can assume that the byte length is the correct C string length.
641     SValBuilder &svalBuilder = C.getSValBuilder();
642     QualType sizeTy = svalBuilder.getContext().getSizeType();
643     const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
644     return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
645   }
646   case MemRegion::SymbolicRegionKind:
647   case MemRegion::AllocaRegionKind:
648   case MemRegion::VarRegionKind:
649   case MemRegion::FieldRegionKind:
650   case MemRegion::ObjCIvarRegionKind:
651     return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
652   case MemRegion::CompoundLiteralRegionKind:
653     // FIXME: Can we track this? Is it necessary?
654     return UnknownVal();
655   case MemRegion::ElementRegionKind:
656     // FIXME: How can we handle this? It's not good enough to subtract the
657     // offset from the base string length; consider "123\x00567" and &a[5].
658     return UnknownVal();
659   default:
660     // Other regions (mostly non-data) can't have a reliable C string length.
661     // In this case, an error is emitted and UndefinedVal is returned.
662     // The caller should always be prepared to handle this case.
663     if (ExplodedNode *N = C.generateNode(state)) {
664       if (!BT_NotCString)
665         BT_NotCString.reset(new BuiltinBug("API",
666           "Argument is not a null-terminated string."));
667 
668       llvm::SmallString<120> buf;
669       llvm::raw_svector_ostream os(buf);
670 
671       os << "Argument to byte string function is ";
672 
673       if (SummarizeRegion(os, C.getASTContext(), MR))
674         os << ", which is not a null-terminated string";
675       else
676         os << "not a null-terminated string";
677 
678       // Generate a report for this bug.
679       EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
680                                                         os.str(), N);
681 
682       report->addRange(Ex->getSourceRange());
683       C.EmitReport(report);
684     }
685 
686     return UndefinedVal();
687   }
688 }
689 
690 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
691   const GRState *&state, const Expr *expr, SVal val) const {
692 
693   // Get the memory region pointed to by the val.
694   const MemRegion *bufRegion = val.getAsRegion();
695   if (!bufRegion)
696     return NULL;
697 
698   // Strip casts off the memory region.
699   bufRegion = bufRegion->StripCasts();
700 
701   // Cast the memory region to a string region.
702   const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
703   if (!strRegion)
704     return NULL;
705 
706   // Return the actual string in the string region.
707   return strRegion->getStringLiteral();
708 }
709 
710 const GRState *CStringChecker::InvalidateBuffer(CheckerContext &C,
711                                                 const GRState *state,
712                                                 const Expr *E, SVal V) {
713   Loc *L = dyn_cast<Loc>(&V);
714   if (!L)
715     return state;
716 
717   // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
718   // some assumptions about the value that CFRefCount can't. Even so, it should
719   // probably be refactored.
720   if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) {
721     const MemRegion *R = MR->getRegion()->StripCasts();
722 
723     // Are we dealing with an ElementRegion?  If so, we should be invalidating
724     // the super-region.
725     if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
726       R = ER->getSuperRegion();
727       // FIXME: What about layers of ElementRegions?
728     }
729 
730     // Invalidate this region.
731     unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
732     return state->invalidateRegion(R, E, Count, NULL);
733   }
734 
735   // If we have a non-region value by chance, just remove the binding.
736   // FIXME: is this necessary or correct? This handles the non-Region
737   //  cases.  Is it ever valid to store to these?
738   return state->unbindLoc(*L);
739 }
740 
741 bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
742                                      const MemRegion *MR) {
743   const TypedRegion *TR = dyn_cast<TypedRegion>(MR);
744   if (!TR)
745     return false;
746 
747   switch (TR->getKind()) {
748   case MemRegion::FunctionTextRegionKind: {
749     const FunctionDecl *FD = cast<FunctionTextRegion>(TR)->getDecl();
750     if (FD)
751       os << "the address of the function '" << FD << "'";
752     else
753       os << "the address of a function";
754     return true;
755   }
756   case MemRegion::BlockTextRegionKind:
757     os << "block text";
758     return true;
759   case MemRegion::BlockDataRegionKind:
760     os << "a block";
761     return true;
762   case MemRegion::CXXThisRegionKind:
763   case MemRegion::CXXTempObjectRegionKind:
764     os << "a C++ temp object of type " << TR->getValueType().getAsString();
765     return true;
766   case MemRegion::VarRegionKind:
767     os << "a variable of type" << TR->getValueType().getAsString();
768     return true;
769   case MemRegion::FieldRegionKind:
770     os << "a field of type " << TR->getValueType().getAsString();
771     return true;
772   case MemRegion::ObjCIvarRegionKind:
773     os << "an instance variable of type " << TR->getValueType().getAsString();
774     return true;
775   default:
776     return false;
777   }
778 }
779 
780 //===----------------------------------------------------------------------===//
781 // evaluation of individual function calls.
782 //===----------------------------------------------------------------------===//
783 
784 void CStringChecker::evalCopyCommon(CheckerContext &C,
785                                     const CallExpr *CE,
786                                     const GRState *state,
787                                     const Expr *Size, const Expr *Dest,
788                                     const Expr *Source, bool Restricted,
789                                     bool IsMempcpy) const {
790   // See if the size argument is zero.
791   SVal sizeVal = state->getSVal(Size);
792   QualType sizeTy = Size->getType();
793 
794   const GRState *stateZeroSize, *stateNonZeroSize;
795   llvm::tie(stateZeroSize, stateNonZeroSize) =
796     assumeZero(C, state, sizeVal, sizeTy);
797 
798   // Get the value of the Dest.
799   SVal destVal = state->getSVal(Dest);
800 
801   // If the size is zero, there won't be any actual memory access, so
802   // just bind the return value to the destination buffer and return.
803   if (stateZeroSize) {
804     stateZeroSize = stateZeroSize->BindExpr(CE, destVal);
805     C.addTransition(stateZeroSize);
806   }
807 
808   // If the size can be nonzero, we have to check the other arguments.
809   if (stateNonZeroSize) {
810     state = stateNonZeroSize;
811 
812     // Ensure the destination is not null. If it is NULL there will be a
813     // NULL pointer dereference.
814     state = checkNonNull(C, state, Dest, destVal);
815     if (!state)
816       return;
817 
818     // Get the value of the Src.
819     SVal srcVal = state->getSVal(Source);
820 
821     // Ensure the source is not null. If it is NULL there will be a
822     // NULL pointer dereference.
823     state = checkNonNull(C, state, Source, srcVal);
824     if (!state)
825       return;
826 
827     // Ensure the accesses are valid and that the buffers do not overlap.
828     state = CheckBufferAccess(C, state, Size, Dest, Source,
829                               /* FirstIsDst = */ true);
830     if (Restricted)
831       state = CheckOverlap(C, state, Size, Dest, Source);
832 
833     if (!state)
834       return;
835 
836     // If this is mempcpy, get the byte after the last byte copied and
837     // bind the expr.
838     if (IsMempcpy) {
839       loc::MemRegionVal *destRegVal = dyn_cast<loc::MemRegionVal>(&destVal);
840       assert(destRegVal && "Destination should be a known MemRegionVal here");
841 
842       // Get the length to copy.
843       NonLoc *lenValNonLoc = dyn_cast<NonLoc>(&sizeVal);
844 
845       if (lenValNonLoc) {
846         // Get the byte after the last byte copied.
847         SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
848                                                           *destRegVal,
849                                                           *lenValNonLoc,
850                                                           Dest->getType());
851 
852         // The byte after the last byte copied is the return value.
853         state = state->BindExpr(CE, lastElement);
854       } else {
855         // If we don't know how much we copied, we can at least
856         // conjure a return value for later.
857         unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
858         SVal result =
859           C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count);
860         state = state->BindExpr(CE, result);
861       }
862 
863     } else {
864       // All other copies return the destination buffer.
865       // (Well, bcopy() has a void return type, but this won't hurt.)
866       state = state->BindExpr(CE, destVal);
867     }
868 
869     // Invalidate the destination.
870     // FIXME: Even if we can't perfectly model the copy, we should see if we
871     // can use LazyCompoundVals to copy the source values into the destination.
872     // This would probably remove any existing bindings past the end of the
873     // copied region, but that's still an improvement over blank invalidation.
874     state = InvalidateBuffer(C, state, Dest, state->getSVal(Dest));
875     C.addTransition(state);
876   }
877 }
878 
879 
880 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
881   // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
882   // The return value is the address of the destination buffer.
883   const Expr *Dest = CE->getArg(0);
884   const GRState *state = C.getState();
885 
886   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
887 }
888 
889 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
890   // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
891   // The return value is a pointer to the byte following the last written byte.
892   const Expr *Dest = CE->getArg(0);
893   const GRState *state = C.getState();
894 
895   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
896 }
897 
898 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
899   // void *memmove(void *dst, const void *src, size_t n);
900   // The return value is the address of the destination buffer.
901   const Expr *Dest = CE->getArg(0);
902   const GRState *state = C.getState();
903 
904   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
905 }
906 
907 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
908   // void bcopy(const void *src, void *dst, size_t n);
909   evalCopyCommon(C, CE, C.getState(),
910                  CE->getArg(2), CE->getArg(1), CE->getArg(0));
911 }
912 
913 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
914   // int memcmp(const void *s1, const void *s2, size_t n);
915   const Expr *Left = CE->getArg(0);
916   const Expr *Right = CE->getArg(1);
917   const Expr *Size = CE->getArg(2);
918 
919   const GRState *state = C.getState();
920   SValBuilder &svalBuilder = C.getSValBuilder();
921 
922   // See if the size argument is zero.
923   SVal sizeVal = state->getSVal(Size);
924   QualType sizeTy = Size->getType();
925 
926   const GRState *stateZeroSize, *stateNonZeroSize;
927   llvm::tie(stateZeroSize, stateNonZeroSize) =
928     assumeZero(C, state, sizeVal, sizeTy);
929 
930   // If the size can be zero, the result will be 0 in that case, and we don't
931   // have to check either of the buffers.
932   if (stateZeroSize) {
933     state = stateZeroSize;
934     state = state->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType()));
935     C.addTransition(state);
936   }
937 
938   // If the size can be nonzero, we have to check the other arguments.
939   if (stateNonZeroSize) {
940     state = stateNonZeroSize;
941     // If we know the two buffers are the same, we know the result is 0.
942     // First, get the two buffers' addresses. Another checker will have already
943     // made sure they're not undefined.
944     DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(state->getSVal(Left));
945     DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(state->getSVal(Right));
946 
947     // See if they are the same.
948     DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
949     const GRState *StSameBuf, *StNotSameBuf;
950     llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
951 
952     // If the two arguments might be the same buffer, we know the result is 0,
953     // and we only need to check one size.
954     if (StSameBuf) {
955       state = StSameBuf;
956       state = CheckBufferAccess(C, state, Size, Left);
957       if (state) {
958         state = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType()));
959         C.addTransition(state);
960       }
961     }
962 
963     // If the two arguments might be different buffers, we have to check the
964     // size of both of them.
965     if (StNotSameBuf) {
966       state = StNotSameBuf;
967       state = CheckBufferAccess(C, state, Size, Left, Right);
968       if (state) {
969         // The return value is the comparison result, which we don't know.
970         unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
971         SVal CmpV = svalBuilder.getConjuredSymbolVal(NULL, CE, Count);
972         state = state->BindExpr(CE, CmpV);
973         C.addTransition(state);
974       }
975     }
976   }
977 }
978 
979 void CStringChecker::evalstrLength(CheckerContext &C,
980                                    const CallExpr *CE) const {
981   // size_t strlen(const char *s);
982   evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
983 }
984 
985 void CStringChecker::evalstrnLength(CheckerContext &C,
986                                     const CallExpr *CE) const {
987   // size_t strnlen(const char *s, size_t maxlen);
988   evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
989 }
990 
991 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
992                                          bool IsStrnlen) const {
993   const GRState *state = C.getState();
994 
995   if (IsStrnlen) {
996     const Expr *maxlenExpr = CE->getArg(1);
997     SVal maxlenVal = state->getSVal(maxlenExpr);
998 
999     const GRState *stateZeroSize, *stateNonZeroSize;
1000     llvm::tie(stateZeroSize, stateNonZeroSize) =
1001       assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1002 
1003     // If the size can be zero, the result will be 0 in that case, and we don't
1004     // have to check the string itself.
1005     if (stateZeroSize) {
1006       SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1007       stateZeroSize = stateZeroSize->BindExpr(CE, zero);
1008       C.addTransition(stateZeroSize);
1009     }
1010 
1011     // If the size is GUARANTEED to be zero, we're done!
1012     if (!stateNonZeroSize)
1013       return;
1014 
1015     // Otherwise, record the assumption that the size is nonzero.
1016     state = stateNonZeroSize;
1017   }
1018 
1019   // Check that the string argument is non-null.
1020   const Expr *Arg = CE->getArg(0);
1021   SVal ArgVal = state->getSVal(Arg);
1022 
1023   state = checkNonNull(C, state, Arg, ArgVal);
1024 
1025   if (!state)
1026     return;
1027 
1028   SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1029 
1030   // If the argument isn't a valid C string, there's no valid state to
1031   // transition to.
1032   if (strLength.isUndef())
1033     return;
1034 
1035   DefinedOrUnknownSVal result = UnknownVal();
1036 
1037   // If the check is for strnlen() then bind the return value to no more than
1038   // the maxlen value.
1039   if (IsStrnlen) {
1040     QualType cmpTy = C.getSValBuilder().getConditionType();
1041 
1042     // It's a little unfortunate to be getting this again,
1043     // but it's not that expensive...
1044     const Expr *maxlenExpr = CE->getArg(1);
1045     SVal maxlenVal = state->getSVal(maxlenExpr);
1046 
1047     NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
1048     NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal);
1049 
1050     if (strLengthNL && maxlenValNL) {
1051       const GRState *stateStringTooLong, *stateStringNotTooLong;
1052 
1053       // Check if the strLength is greater than the maxlen.
1054       llvm::tie(stateStringTooLong, stateStringNotTooLong) =
1055         state->assume(cast<DefinedOrUnknownSVal>
1056                       (C.getSValBuilder().evalBinOpNN(state, BO_GT,
1057                                                       *strLengthNL,
1058                                                       *maxlenValNL,
1059                                                       cmpTy)));
1060 
1061       if (stateStringTooLong && !stateStringNotTooLong) {
1062         // If the string is longer than maxlen, return maxlen.
1063         result = *maxlenValNL;
1064       } else if (stateStringNotTooLong && !stateStringTooLong) {
1065         // If the string is shorter than maxlen, return its length.
1066         result = *strLengthNL;
1067       }
1068     }
1069 
1070     if (result.isUnknown()) {
1071       // If we don't have enough information for a comparison, there's
1072       // no guarantee the full string length will actually be returned.
1073       // All we know is the return value is the min of the string length
1074       // and the limit. This is better than nothing.
1075       unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
1076       result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count);
1077       NonLoc *resultNL = cast<NonLoc>(&result);
1078 
1079       if (strLengthNL) {
1080         state = state->assume(cast<DefinedOrUnknownSVal>
1081                               (C.getSValBuilder().evalBinOpNN(state, BO_LE,
1082                                                               *resultNL,
1083                                                               *strLengthNL,
1084                                                               cmpTy)), true);
1085       }
1086 
1087       if (maxlenValNL) {
1088         state = state->assume(cast<DefinedOrUnknownSVal>
1089                               (C.getSValBuilder().evalBinOpNN(state, BO_LE,
1090                                                               *resultNL,
1091                                                               *maxlenValNL,
1092                                                               cmpTy)), true);
1093       }
1094     }
1095 
1096   } else {
1097     // This is a plain strlen(), not strnlen().
1098     result = cast<DefinedOrUnknownSVal>(strLength);
1099 
1100     // If we don't know the length of the string, conjure a return
1101     // value, so it can be used in constraints, at least.
1102     if (result.isUnknown()) {
1103       unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
1104       result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count);
1105     }
1106   }
1107 
1108   // Bind the return value.
1109   assert(!result.isUnknown() && "Should have conjured a value by now");
1110   state = state->BindExpr(CE, result);
1111   C.addTransition(state);
1112 }
1113 
1114 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1115   // char *strcpy(char *restrict dst, const char *restrict src);
1116   evalStrcpyCommon(C, CE,
1117                    /* returnEnd = */ false,
1118                    /* isBounded = */ false,
1119                    /* isAppending = */ false);
1120 }
1121 
1122 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1123   // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1124   evalStrcpyCommon(C, CE,
1125                    /* returnEnd = */ false,
1126                    /* isBounded = */ true,
1127                    /* isAppending = */ false);
1128 }
1129 
1130 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1131   // char *stpcpy(char *restrict dst, const char *restrict src);
1132   evalStrcpyCommon(C, CE,
1133                    /* returnEnd = */ true,
1134                    /* isBounded = */ false,
1135                    /* isAppending = */ false);
1136 }
1137 
1138 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1139   //char *strcat(char *restrict s1, const char *restrict s2);
1140   evalStrcpyCommon(C, CE,
1141                    /* returnEnd = */ false,
1142                    /* isBounded = */ false,
1143                    /* isAppending = */ true);
1144 }
1145 
1146 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1147   //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1148   evalStrcpyCommon(C, CE,
1149                    /* returnEnd = */ false,
1150                    /* isBounded = */ true,
1151                    /* isAppending = */ true);
1152 }
1153 
1154 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1155                                       bool returnEnd, bool isBounded,
1156                                       bool isAppending) const {
1157   const GRState *state = C.getState();
1158 
1159   // Check that the destination is non-null.
1160   const Expr *Dst = CE->getArg(0);
1161   SVal DstVal = state->getSVal(Dst);
1162 
1163   state = checkNonNull(C, state, Dst, DstVal);
1164   if (!state)
1165     return;
1166 
1167   // Check that the source is non-null.
1168   const Expr *srcExpr = CE->getArg(1);
1169   SVal srcVal = state->getSVal(srcExpr);
1170   state = checkNonNull(C, state, srcExpr, srcVal);
1171   if (!state)
1172     return;
1173 
1174   // Get the string length of the source.
1175   SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1176 
1177   // If the source isn't a valid C string, give up.
1178   if (strLength.isUndef())
1179     return;
1180 
1181   SValBuilder &svalBuilder = C.getSValBuilder();
1182   QualType cmpTy = svalBuilder.getConditionType();
1183 
1184   SVal amountCopied = UnknownVal();
1185 
1186   // If the function is strncpy, strncat, etc... it is bounded.
1187   if (isBounded) {
1188     // Get the max number of characters to copy.
1189     const Expr *lenExpr = CE->getArg(2);
1190     SVal lenVal = state->getSVal(lenExpr);
1191 
1192     // Protect against misdeclared strncpy().
1193     lenVal = svalBuilder.evalCast(lenVal,
1194                                   svalBuilder.getContext().getSizeType(),
1195                                   lenExpr->getType());
1196 
1197     NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
1198     NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal);
1199 
1200     // If we know both values, we might be able to figure out how much
1201     // we're copying.
1202     if (strLengthNL && lenValNL) {
1203       const GRState *stateSourceTooLong, *stateSourceNotTooLong;
1204 
1205       // Check if the max number to copy is less than the length of the src.
1206       llvm::tie(stateSourceTooLong, stateSourceNotTooLong) =
1207         state->assume(cast<DefinedOrUnknownSVal>
1208                       (svalBuilder.evalBinOpNN(state, BO_GT, *strLengthNL,
1209                                                *lenValNL, cmpTy)));
1210 
1211       if (stateSourceTooLong && !stateSourceNotTooLong) {
1212         // Max number to copy is less than the length of the src, so the actual
1213         // strLength copied is the max number arg.
1214         state = stateSourceTooLong;
1215         amountCopied = lenVal;
1216 
1217       } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1218         // The source buffer entirely fits in the bound.
1219         state = stateSourceNotTooLong;
1220         amountCopied = strLength;
1221       }
1222     }
1223 
1224     // If we couldn't pin down the copy length, at least bound it.
1225     if (amountCopied.isUnknown()) {
1226       // Try to get a "hypothetical" string length symbol, which we can later
1227       // set as a real value if that turns out to be the case.
1228       amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1229       assert(!amountCopied.isUndef());
1230 
1231       if (NonLoc *amountCopiedNL = dyn_cast<NonLoc>(&amountCopied)) {
1232         if (lenValNL) {
1233           // amountCopied <= lenVal
1234           SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1235                                                              *amountCopiedNL,
1236                                                              *lenValNL,
1237                                                              cmpTy);
1238           state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanBound),
1239                                 true);
1240           if (!state)
1241             return;
1242         }
1243 
1244         if (strLengthNL) {
1245           // amountCopied <= strlen(source)
1246           SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1247                                                            *amountCopiedNL,
1248                                                            *strLengthNL,
1249                                                            cmpTy);
1250           state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanSrc),
1251                                 true);
1252           if (!state)
1253             return;
1254         }
1255       }
1256     }
1257 
1258   } else {
1259     // The function isn't bounded. The amount copied should match the length
1260     // of the source buffer.
1261     amountCopied = strLength;
1262   }
1263 
1264   assert(state);
1265 
1266   // This represents the number of characters copied into the destination
1267   // buffer. (It may not actually be the strlen if the destination buffer
1268   // is not terminated.)
1269   SVal finalStrLength = UnknownVal();
1270 
1271   // If this is an appending function (strcat, strncat...) then set the
1272   // string length to strlen(src) + strlen(dst) since the buffer will
1273   // ultimately contain both.
1274   if (isAppending) {
1275     // Get the string length of the destination. If the destination is memory
1276     // that can't have a string length, we shouldn't be copying into it anyway.
1277     SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1278     if (dstStrLength.isUndef())
1279       return;
1280 
1281     QualType sizeTy = svalBuilder.getContext().getSizeType();
1282 
1283     NonLoc *srcStrLengthNL = dyn_cast<NonLoc>(&amountCopied);
1284     NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength);
1285 
1286     // If we know both string lengths, we might know the final string length.
1287     if (srcStrLengthNL && dstStrLengthNL) {
1288       // Make sure the two lengths together don't overflow a size_t.
1289       state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1290       if (!state)
1291         return;
1292 
1293       finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1294                                                *dstStrLengthNL, sizeTy);
1295     }
1296 
1297     // If we couldn't get a single value for the final string length,
1298     // we can at least bound it by the individual lengths.
1299     if (finalStrLength.isUnknown()) {
1300       // Try to get a "hypothetical" string length symbol, which we can later
1301       // set as a real value if that turns out to be the case.
1302       finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1303       assert(!finalStrLength.isUndef());
1304 
1305       if (NonLoc *finalStrLengthNL = dyn_cast<NonLoc>(&finalStrLength)) {
1306         if (srcStrLengthNL) {
1307           // finalStrLength >= srcStrLength
1308           SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1309                                                         *finalStrLengthNL,
1310                                                         *srcStrLengthNL,
1311                                                         cmpTy);
1312           state = state->assume(cast<DefinedOrUnknownSVal>(sourceInResult),
1313                                 true);
1314           if (!state)
1315             return;
1316         }
1317 
1318         if (dstStrLengthNL) {
1319           // finalStrLength >= dstStrLength
1320           SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1321                                                       *finalStrLengthNL,
1322                                                       *dstStrLengthNL,
1323                                                       cmpTy);
1324           state = state->assume(cast<DefinedOrUnknownSVal>(destInResult),
1325                                 true);
1326           if (!state)
1327             return;
1328         }
1329       }
1330     }
1331 
1332   } else {
1333     // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1334     // the final string length will match the input string length.
1335     finalStrLength = amountCopied;
1336   }
1337 
1338   // The final result of the function will either be a pointer past the last
1339   // copied element, or a pointer to the start of the destination buffer.
1340   SVal Result = (returnEnd ? UnknownVal() : DstVal);
1341 
1342   assert(state);
1343 
1344   // If the destination is a MemRegion, try to check for a buffer overflow and
1345   // record the new string length.
1346   if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) {
1347     // If the final length is known, we can check for an overflow.
1348     if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&finalStrLength)) {
1349       SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1350                                                  *knownStrLength,
1351                                                  Dst->getType());
1352 
1353       state = CheckLocation(C, state, Dst, lastElement, /* IsDst = */ true);
1354       if (!state)
1355         return;
1356 
1357       // If this is a stpcpy-style copy, the last element is the return value.
1358       if (returnEnd)
1359         Result = lastElement;
1360     }
1361 
1362     // Invalidate the destination. This must happen before we set the C string
1363     // length because invalidation will clear the length.
1364     // FIXME: Even if we can't perfectly model the copy, we should see if we
1365     // can use LazyCompoundVals to copy the source values into the destination.
1366     // This would probably remove any existing bindings past the end of the
1367     // string, but that's still an improvement over blank invalidation.
1368     state = InvalidateBuffer(C, state, Dst, *dstRegVal);
1369 
1370     // Set the C string length of the destination.
1371     state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1372   }
1373 
1374   assert(state);
1375 
1376   // If this is a stpcpy-style copy, but we were unable to check for a buffer
1377   // overflow, we still need a result. Conjure a return value.
1378   if (returnEnd && Result.isUnknown()) {
1379     unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
1380     Result = svalBuilder.getConjuredSymbolVal(NULL, CE, Count);
1381   }
1382 
1383   // Set the return value.
1384   state = state->BindExpr(CE, Result);
1385   C.addTransition(state);
1386 }
1387 
1388 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1389   //int strcmp(const char *s1, const char *s2);
1390   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1391 }
1392 
1393 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1394   //int strncmp(const char *s1, const char *s2, size_t n);
1395   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1396 }
1397 
1398 void CStringChecker::evalStrcasecmp(CheckerContext &C,
1399                                     const CallExpr *CE) const {
1400   //int strcasecmp(const char *s1, const char *s2);
1401   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1402 }
1403 
1404 void CStringChecker::evalStrncasecmp(CheckerContext &C,
1405                                      const CallExpr *CE) const {
1406   //int strncasecmp(const char *s1, const char *s2, size_t n);
1407   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1408 }
1409 
1410 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1411                                       bool isBounded, bool ignoreCase) const {
1412   const GRState *state = C.getState();
1413 
1414   // Check that the first string is non-null
1415   const Expr *s1 = CE->getArg(0);
1416   SVal s1Val = state->getSVal(s1);
1417   state = checkNonNull(C, state, s1, s1Val);
1418   if (!state)
1419     return;
1420 
1421   // Check that the second string is non-null.
1422   const Expr *s2 = CE->getArg(1);
1423   SVal s2Val = state->getSVal(s2);
1424   state = checkNonNull(C, state, s2, s2Val);
1425   if (!state)
1426     return;
1427 
1428   // Get the string length of the first string or give up.
1429   SVal s1Length = getCStringLength(C, state, s1, s1Val);
1430   if (s1Length.isUndef())
1431     return;
1432 
1433   // Get the string length of the second string or give up.
1434   SVal s2Length = getCStringLength(C, state, s2, s2Val);
1435   if (s2Length.isUndef())
1436     return;
1437 
1438   // If we know the two buffers are the same, we know the result is 0.
1439   // First, get the two buffers' addresses. Another checker will have already
1440   // made sure they're not undefined.
1441   DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(s1Val);
1442   DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(s2Val);
1443 
1444   // See if they are the same.
1445   SValBuilder &svalBuilder = C.getSValBuilder();
1446   DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1447   const GRState *StSameBuf, *StNotSameBuf;
1448   llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1449 
1450   // If the two arguments might be the same buffer, we know the result is 0,
1451   // and we only need to check one size.
1452   if (StSameBuf) {
1453     StSameBuf = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType()));
1454     C.addTransition(StSameBuf);
1455 
1456     // If the two arguments are GUARANTEED to be the same, we're done!
1457     if (!StNotSameBuf)
1458       return;
1459   }
1460 
1461   assert(StNotSameBuf);
1462   state = StNotSameBuf;
1463 
1464   // At this point we can go about comparing the two buffers.
1465   // For now, we only do this if they're both known string literals.
1466 
1467   // Attempt to extract string literals from both expressions.
1468   const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1469   const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1470   bool canComputeResult = false;
1471 
1472   if (s1StrLiteral && s2StrLiteral) {
1473     llvm::StringRef s1StrRef = s1StrLiteral->getString();
1474     llvm::StringRef s2StrRef = s2StrLiteral->getString();
1475 
1476     if (isBounded) {
1477       // Get the max number of characters to compare.
1478       const Expr *lenExpr = CE->getArg(2);
1479       SVal lenVal = state->getSVal(lenExpr);
1480 
1481       // If the length is known, we can get the right substrings.
1482       if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
1483         // Create substrings of each to compare the prefix.
1484         s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
1485         s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
1486         canComputeResult = true;
1487       }
1488     } else {
1489       // This is a normal, unbounded strcmp.
1490       canComputeResult = true;
1491     }
1492 
1493     if (canComputeResult) {
1494       // Real strcmp stops at null characters.
1495       size_t s1Term = s1StrRef.find('\0');
1496       if (s1Term != llvm::StringRef::npos)
1497         s1StrRef = s1StrRef.substr(0, s1Term);
1498 
1499       size_t s2Term = s2StrRef.find('\0');
1500       if (s2Term != llvm::StringRef::npos)
1501         s2StrRef = s2StrRef.substr(0, s2Term);
1502 
1503       // Use StringRef's comparison methods to compute the actual result.
1504       int result;
1505 
1506       if (ignoreCase) {
1507         // Compare string 1 to string 2 the same way strcasecmp() does.
1508         result = s1StrRef.compare_lower(s2StrRef);
1509       } else {
1510         // Compare string 1 to string 2 the same way strcmp() does.
1511         result = s1StrRef.compare(s2StrRef);
1512       }
1513 
1514       // Build the SVal of the comparison and bind the return value.
1515       SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
1516       state = state->BindExpr(CE, resultVal);
1517     }
1518   }
1519 
1520   if (!canComputeResult) {
1521     // Conjure a symbolic value. It's the best we can do.
1522     unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
1523     SVal resultVal = svalBuilder.getConjuredSymbolVal(NULL, CE, Count);
1524     state = state->BindExpr(CE, resultVal);
1525   }
1526 
1527   // Record this as a possible path.
1528   C.addTransition(state);
1529 }
1530 
1531 //===----------------------------------------------------------------------===//
1532 // The driver method, and other Checker callbacks.
1533 //===----------------------------------------------------------------------===//
1534 
1535 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
1536   // Get the callee.  All the functions we care about are C functions
1537   // with simple identifiers.
1538   const GRState *state = C.getState();
1539   const Expr *Callee = CE->getCallee();
1540   const FunctionDecl *FD = state->getSVal(Callee).getAsFunctionDecl();
1541 
1542   if (!FD)
1543     return false;
1544 
1545   // Get the name of the callee. If it's a builtin, strip off the prefix.
1546   IdentifierInfo *II = FD->getIdentifier();
1547   if (!II)   // if no identifier, not a simple C function
1548     return false;
1549   llvm::StringRef Name = II->getName();
1550   if (Name.startswith("__builtin_"))
1551     Name = Name.substr(10);
1552 
1553   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
1554     .Cases("memcpy", "__memcpy_chk", &CStringChecker::evalMemcpy)
1555     .Cases("mempcpy", "__mempcpy_chk", &CStringChecker::evalMempcpy)
1556     .Cases("memcmp", "bcmp", &CStringChecker::evalMemcmp)
1557     .Cases("memmove", "__memmove_chk", &CStringChecker::evalMemmove)
1558     .Cases("strcpy", "__strcpy_chk", &CStringChecker::evalStrcpy)
1559     //.Cases("strncpy", "__strncpy_chk", &CStringChecker::evalStrncpy)
1560     .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::evalStpcpy)
1561     .Cases("strcat", "__strcat_chk", &CStringChecker::evalStrcat)
1562     .Cases("strncat", "__strncat_chk", &CStringChecker::evalStrncat)
1563     .Case("strlen", &CStringChecker::evalstrLength)
1564     .Case("strnlen", &CStringChecker::evalstrnLength)
1565     .Case("strcmp", &CStringChecker::evalStrcmp)
1566     .Case("strncmp", &CStringChecker::evalStrncmp)
1567     .Case("strcasecmp", &CStringChecker::evalStrcasecmp)
1568     .Case("strncasecmp", &CStringChecker::evalStrncasecmp)
1569     .Case("bcopy", &CStringChecker::evalBcopy)
1570     .Default(NULL);
1571 
1572   // If the callee isn't a string function, let another checker handle it.
1573   if (!evalFunction)
1574     return false;
1575 
1576   // Check and evaluate the call.
1577   (this->*evalFunction)(C, CE);
1578   return true;
1579 }
1580 
1581 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
1582   // Record string length for char a[] = "abc";
1583   const GRState *state = C.getState();
1584 
1585   for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end();
1586        I != E; ++I) {
1587     const VarDecl *D = dyn_cast<VarDecl>(*I);
1588     if (!D)
1589       continue;
1590 
1591     // FIXME: Handle array fields of structs.
1592     if (!D->getType()->isArrayType())
1593       continue;
1594 
1595     const Expr *Init = D->getInit();
1596     if (!Init)
1597       continue;
1598     if (!isa<StringLiteral>(Init))
1599       continue;
1600 
1601     Loc VarLoc = state->getLValue(D, C.getPredecessor()->getLocationContext());
1602     const MemRegion *MR = VarLoc.getAsRegion();
1603     if (!MR)
1604       continue;
1605 
1606     SVal StrVal = state->getSVal(Init);
1607     assert(StrVal.isValid() && "Initializer string is unknown or undefined");
1608     DefinedOrUnknownSVal strLength
1609       = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal));
1610 
1611     state = state->set<CStringLength>(MR, strLength);
1612   }
1613 
1614   C.addTransition(state);
1615 }
1616 
1617 bool CStringChecker::wantsRegionChangeUpdate(const GRState *state) const {
1618   CStringLength::EntryMap Entries = state->get<CStringLength>();
1619   return !Entries.isEmpty();
1620 }
1621 
1622 const GRState *
1623 CStringChecker::checkRegionChanges(const GRState *state,
1624                                    const StoreManager::InvalidatedSymbols *,
1625                                    const MemRegion * const *Begin,
1626                                    const MemRegion * const *End) const {
1627   CStringLength::EntryMap Entries = state->get<CStringLength>();
1628   if (Entries.isEmpty())
1629     return state;
1630 
1631   llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
1632   llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
1633 
1634   // First build sets for the changed regions and their super-regions.
1635   for ( ; Begin != End; ++Begin) {
1636     const MemRegion *MR = *Begin;
1637     Invalidated.insert(MR);
1638 
1639     SuperRegions.insert(MR);
1640     while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
1641       MR = SR->getSuperRegion();
1642       SuperRegions.insert(MR);
1643     }
1644   }
1645 
1646   CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>();
1647 
1648   // Then loop over the entries in the current state.
1649   for (CStringLength::EntryMap::iterator I = Entries.begin(),
1650        E = Entries.end(); I != E; ++I) {
1651     const MemRegion *MR = I.getKey();
1652 
1653     // Is this entry for a super-region of a changed region?
1654     if (SuperRegions.count(MR)) {
1655       Entries = F.remove(Entries, MR);
1656       continue;
1657     }
1658 
1659     // Is this entry for a sub-region of a changed region?
1660     const MemRegion *Super = MR;
1661     while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
1662       Super = SR->getSuperRegion();
1663       if (Invalidated.count(Super)) {
1664         Entries = F.remove(Entries, MR);
1665         break;
1666       }
1667     }
1668   }
1669 
1670   return state->set<CStringLength>(Entries);
1671 }
1672 
1673 void CStringChecker::checkLiveSymbols(const GRState *state,
1674                                       SymbolReaper &SR) const {
1675   // Mark all symbols in our string length map as valid.
1676   CStringLength::EntryMap Entries = state->get<CStringLength>();
1677 
1678   for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end();
1679        I != E; ++I) {
1680     SVal Len = I.getData();
1681 
1682     for (SVal::symbol_iterator si = Len.symbol_begin(), se = Len.symbol_end();
1683          si != se; ++si)
1684       SR.markInUse(*si);
1685   }
1686 }
1687 
1688 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
1689                                       CheckerContext &C) const {
1690   if (!SR.hasDeadSymbols())
1691     return;
1692 
1693   const GRState *state = C.getState();
1694   CStringLength::EntryMap Entries = state->get<CStringLength>();
1695   if (Entries.isEmpty())
1696     return;
1697 
1698   CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>();
1699   for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end();
1700        I != E; ++I) {
1701     SVal Len = I.getData();
1702     if (SymbolRef Sym = Len.getAsSymbol()) {
1703       if (SR.isDead(Sym))
1704         Entries = F.remove(Entries, I.getKey());
1705     }
1706   }
1707 
1708   state = state->set<CStringLength>(Entries);
1709   C.generateNode(state);
1710 }
1711 
1712 void ento::registerCStringChecker(CheckerManager &mgr) {
1713   mgr.registerChecker<CStringChecker>();
1714 }
1715