xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp (revision 0c34c1a25f61e88e10eeef5a237bae72a724613f)
1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This defines CStringChecker, which is an assortment of checks on calls
11 // to functions in <string.h>.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ClangSACheckers.h"
16 #include "InterCheckerAPI.h"
17 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
18 #include "clang/StaticAnalyzer/Core/Checker.h"
19 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 using namespace clang;
28 using namespace ento;
29 
30 namespace {
31 class CStringChecker : public Checker< eval::Call,
32                                          check::PreStmt<DeclStmt>,
33                                          check::LiveSymbols,
34                                          check::DeadSymbols,
35                                          check::RegionChanges
36                                          > {
37   mutable OwningPtr<BugType> BT_Null,
38                              BT_Bounds,
39                              BT_Overlap,
40                              BT_NotCString,
41                              BT_AdditionOverflow;
42 
43   mutable const char *CurrentFunctionDescription;
44 
45 public:
46   /// The filter is used to filter out the diagnostics which are not enabled by
47   /// the user.
48   struct CStringChecksFilter {
49     DefaultBool CheckCStringNullArg;
50     DefaultBool CheckCStringOutOfBounds;
51     DefaultBool CheckCStringBufferOverlap;
52     DefaultBool CheckCStringNotNullTerm;
53   };
54 
55   CStringChecksFilter Filter;
56 
57   static void *getTag() { static int tag; return &tag; }
58 
59   bool evalCall(const CallExpr *CE, CheckerContext &C) const;
60   void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
61   void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
62   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
63   bool wantsRegionChangeUpdate(ProgramStateRef state) const;
64 
65   ProgramStateRef
66     checkRegionChanges(ProgramStateRef state,
67                        const InvalidatedSymbols *,
68                        ArrayRef<const MemRegion *> ExplicitRegions,
69                        ArrayRef<const MemRegion *> Regions,
70                        const CallEvent *Call) const;
71 
72   typedef void (CStringChecker::*FnCheck)(CheckerContext &,
73                                           const CallExpr *) const;
74 
75   void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
76   void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
77   void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
78   void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
79   void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
80                       ProgramStateRef state,
81                       const Expr *Size,
82                       const Expr *Source,
83                       const Expr *Dest,
84                       bool Restricted = false,
85                       bool IsMempcpy = false) const;
86 
87   void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
88 
89   void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
90   void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
91   void evalstrLengthCommon(CheckerContext &C,
92                            const CallExpr *CE,
93                            bool IsStrnlen = false) const;
94 
95   void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
96   void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
97   void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
98   void evalStrcpyCommon(CheckerContext &C,
99                         const CallExpr *CE,
100                         bool returnEnd,
101                         bool isBounded,
102                         bool isAppending) const;
103 
104   void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
105   void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
106 
107   void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
108   void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
109   void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
110   void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
111   void evalStrcmpCommon(CheckerContext &C,
112                         const CallExpr *CE,
113                         bool isBounded = false,
114                         bool ignoreCase = false) const;
115 
116   // Utility methods
117   std::pair<ProgramStateRef , ProgramStateRef >
118   static assumeZero(CheckerContext &C,
119                     ProgramStateRef state, SVal V, QualType Ty);
120 
121   static ProgramStateRef setCStringLength(ProgramStateRef state,
122                                               const MemRegion *MR,
123                                               SVal strLength);
124   static SVal getCStringLengthForRegion(CheckerContext &C,
125                                         ProgramStateRef &state,
126                                         const Expr *Ex,
127                                         const MemRegion *MR,
128                                         bool hypothetical);
129   SVal getCStringLength(CheckerContext &C,
130                         ProgramStateRef &state,
131                         const Expr *Ex,
132                         SVal Buf,
133                         bool hypothetical = false) const;
134 
135   const StringLiteral *getCStringLiteral(CheckerContext &C,
136                                          ProgramStateRef &state,
137                                          const Expr *expr,
138                                          SVal val) const;
139 
140   static ProgramStateRef InvalidateBuffer(CheckerContext &C,
141                                               ProgramStateRef state,
142                                               const Expr *Ex, SVal V);
143 
144   static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
145                               const MemRegion *MR);
146 
147   // Re-usable checks
148   ProgramStateRef checkNonNull(CheckerContext &C,
149                                    ProgramStateRef state,
150                                    const Expr *S,
151                                    SVal l) const;
152   ProgramStateRef CheckLocation(CheckerContext &C,
153                                     ProgramStateRef state,
154                                     const Expr *S,
155                                     SVal l,
156                                     const char *message = NULL) const;
157   ProgramStateRef CheckBufferAccess(CheckerContext &C,
158                                         ProgramStateRef state,
159                                         const Expr *Size,
160                                         const Expr *FirstBuf,
161                                         const Expr *SecondBuf,
162                                         const char *firstMessage = NULL,
163                                         const char *secondMessage = NULL,
164                                         bool WarnAboutSize = false) const;
165 
166   ProgramStateRef CheckBufferAccess(CheckerContext &C,
167                                         ProgramStateRef state,
168                                         const Expr *Size,
169                                         const Expr *Buf,
170                                         const char *message = NULL,
171                                         bool WarnAboutSize = false) const {
172     // This is a convenience override.
173     return CheckBufferAccess(C, state, Size, Buf, NULL, message, NULL,
174                              WarnAboutSize);
175   }
176   ProgramStateRef CheckOverlap(CheckerContext &C,
177                                    ProgramStateRef state,
178                                    const Expr *Size,
179                                    const Expr *First,
180                                    const Expr *Second) const;
181   void emitOverlapBug(CheckerContext &C,
182                       ProgramStateRef state,
183                       const Stmt *First,
184                       const Stmt *Second) const;
185 
186   ProgramStateRef checkAdditionOverflow(CheckerContext &C,
187                                             ProgramStateRef state,
188                                             NonLoc left,
189                                             NonLoc right) const;
190 };
191 
192 } //end anonymous namespace
193 
194 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
195 
196 //===----------------------------------------------------------------------===//
197 // Individual checks and utility methods.
198 //===----------------------------------------------------------------------===//
199 
200 std::pair<ProgramStateRef , ProgramStateRef >
201 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
202                            QualType Ty) {
203   DefinedSVal *val = dyn_cast<DefinedSVal>(&V);
204   if (!val)
205     return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
206 
207   SValBuilder &svalBuilder = C.getSValBuilder();
208   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
209   return state->assume(svalBuilder.evalEQ(state, *val, zero));
210 }
211 
212 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
213                                             ProgramStateRef state,
214                                             const Expr *S, SVal l) const {
215   // If a previous check has failed, propagate the failure.
216   if (!state)
217     return NULL;
218 
219   ProgramStateRef stateNull, stateNonNull;
220   llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
221 
222   if (stateNull && !stateNonNull) {
223     if (!Filter.CheckCStringNullArg)
224       return NULL;
225 
226     ExplodedNode *N = C.generateSink(stateNull);
227     if (!N)
228       return NULL;
229 
230     if (!BT_Null)
231       BT_Null.reset(new BuiltinBug("Unix API",
232         "Null pointer argument in call to byte string function"));
233 
234     SmallString<80> buf;
235     llvm::raw_svector_ostream os(buf);
236     assert(CurrentFunctionDescription);
237     os << "Null pointer argument in call to " << CurrentFunctionDescription;
238 
239     // Generate a report for this bug.
240     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
241     BugReport *report = new BugReport(*BT, os.str(), N);
242 
243     report->addRange(S->getSourceRange());
244     bugreporter::trackNullOrUndefValue(N, S, *report);
245     C.emitReport(report);
246     return NULL;
247   }
248 
249   // From here on, assume that the value is non-null.
250   assert(stateNonNull);
251   return stateNonNull;
252 }
253 
254 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
255 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
256                                              ProgramStateRef state,
257                                              const Expr *S, SVal l,
258                                              const char *warningMsg) const {
259   // If a previous check has failed, propagate the failure.
260   if (!state)
261     return NULL;
262 
263   // Check for out of bound array element access.
264   const MemRegion *R = l.getAsRegion();
265   if (!R)
266     return state;
267 
268   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
269   if (!ER)
270     return state;
271 
272   assert(ER->getValueType() == C.getASTContext().CharTy &&
273     "CheckLocation should only be called with char* ElementRegions");
274 
275   // Get the size of the array.
276   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
277   SValBuilder &svalBuilder = C.getSValBuilder();
278   SVal Extent =
279     svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
280   DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent);
281 
282   // Get the index of the accessed element.
283   DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex());
284 
285   ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true);
286   ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false);
287   if (StOutBound && !StInBound) {
288     ExplodedNode *N = C.generateSink(StOutBound);
289     if (!N)
290       return NULL;
291 
292     if (!BT_Bounds) {
293       BT_Bounds.reset(new BuiltinBug("Out-of-bound array access",
294         "Byte string function accesses out-of-bound array element"));
295     }
296     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get());
297 
298     // Generate a report for this bug.
299     BugReport *report;
300     if (warningMsg) {
301       report = new BugReport(*BT, warningMsg, N);
302     } else {
303       assert(CurrentFunctionDescription);
304       assert(CurrentFunctionDescription[0] != '\0');
305 
306       SmallString<80> buf;
307       llvm::raw_svector_ostream os(buf);
308       os << (char)toupper(CurrentFunctionDescription[0])
309          << &CurrentFunctionDescription[1]
310          << " accesses out-of-bound array element";
311       report = new BugReport(*BT, os.str(), N);
312     }
313 
314     // FIXME: It would be nice to eventually make this diagnostic more clear,
315     // e.g., by referencing the original declaration or by saying *why* this
316     // reference is outside the range.
317 
318     report->addRange(S->getSourceRange());
319     C.emitReport(report);
320     return NULL;
321   }
322 
323   // Array bound check succeeded.  From this point forward the array bound
324   // should always succeed.
325   return StInBound;
326 }
327 
328 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C,
329                                                  ProgramStateRef state,
330                                                  const Expr *Size,
331                                                  const Expr *FirstBuf,
332                                                  const Expr *SecondBuf,
333                                                  const char *firstMessage,
334                                                  const char *secondMessage,
335                                                  bool WarnAboutSize) const {
336   // If a previous check has failed, propagate the failure.
337   if (!state)
338     return NULL;
339 
340   SValBuilder &svalBuilder = C.getSValBuilder();
341   ASTContext &Ctx = svalBuilder.getContext();
342   const LocationContext *LCtx = C.getLocationContext();
343 
344   QualType sizeTy = Size->getType();
345   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
346 
347   // Check that the first buffer is non-null.
348   SVal BufVal = state->getSVal(FirstBuf, LCtx);
349   state = checkNonNull(C, state, FirstBuf, BufVal);
350   if (!state)
351     return NULL;
352 
353   // If out-of-bounds checking is turned off, skip the rest.
354   if (!Filter.CheckCStringOutOfBounds)
355     return state;
356 
357   // Get the access length and make sure it is known.
358   // FIXME: This assumes the caller has already checked that the access length
359   // is positive. And that it's unsigned.
360   SVal LengthVal = state->getSVal(Size, LCtx);
361   NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
362   if (!Length)
363     return state;
364 
365   // Compute the offset of the last element to be accessed: size-1.
366   NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy));
367   NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub,
368                                                     *Length, One, sizeTy));
369 
370   // Check that the first buffer is sufficiently long.
371   SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
372   if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
373     const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf);
374 
375     SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
376                                           LastOffset, PtrTy);
377     state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage);
378 
379     // If the buffer isn't large enough, abort.
380     if (!state)
381       return NULL;
382   }
383 
384   // If there's a second buffer, check it as well.
385   if (SecondBuf) {
386     BufVal = state->getSVal(SecondBuf, LCtx);
387     state = checkNonNull(C, state, SecondBuf, BufVal);
388     if (!state)
389       return NULL;
390 
391     BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
392     if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
393       const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf);
394 
395       SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
396                                             LastOffset, PtrTy);
397       state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage);
398     }
399   }
400 
401   // Large enough or not, return this state!
402   return state;
403 }
404 
405 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
406                                             ProgramStateRef state,
407                                             const Expr *Size,
408                                             const Expr *First,
409                                             const Expr *Second) const {
410   if (!Filter.CheckCStringBufferOverlap)
411     return state;
412 
413   // Do a simple check for overlap: if the two arguments are from the same
414   // buffer, see if the end of the first is greater than the start of the second
415   // or vice versa.
416 
417   // If a previous check has failed, propagate the failure.
418   if (!state)
419     return NULL;
420 
421   ProgramStateRef stateTrue, stateFalse;
422 
423   // Get the buffer values and make sure they're known locations.
424   const LocationContext *LCtx = C.getLocationContext();
425   SVal firstVal = state->getSVal(First, LCtx);
426   SVal secondVal = state->getSVal(Second, LCtx);
427 
428   Loc *firstLoc = dyn_cast<Loc>(&firstVal);
429   if (!firstLoc)
430     return state;
431 
432   Loc *secondLoc = dyn_cast<Loc>(&secondVal);
433   if (!secondLoc)
434     return state;
435 
436   // Are the two values the same?
437   SValBuilder &svalBuilder = C.getSValBuilder();
438   llvm::tie(stateTrue, stateFalse) =
439     state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
440 
441   if (stateTrue && !stateFalse) {
442     // If the values are known to be equal, that's automatically an overlap.
443     emitOverlapBug(C, stateTrue, First, Second);
444     return NULL;
445   }
446 
447   // assume the two expressions are not equal.
448   assert(stateFalse);
449   state = stateFalse;
450 
451   // Which value comes first?
452   QualType cmpTy = svalBuilder.getConditionType();
453   SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
454                                          *firstLoc, *secondLoc, cmpTy);
455   DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse);
456   if (!reverseTest)
457     return state;
458 
459   llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
460   if (stateTrue) {
461     if (stateFalse) {
462       // If we don't know which one comes first, we can't perform this test.
463       return state;
464     } else {
465       // Switch the values so that firstVal is before secondVal.
466       Loc *tmpLoc = firstLoc;
467       firstLoc = secondLoc;
468       secondLoc = tmpLoc;
469 
470       // Switch the Exprs as well, so that they still correspond.
471       const Expr *tmpExpr = First;
472       First = Second;
473       Second = tmpExpr;
474     }
475   }
476 
477   // Get the length, and make sure it too is known.
478   SVal LengthVal = state->getSVal(Size, LCtx);
479   NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
480   if (!Length)
481     return state;
482 
483   // Convert the first buffer's start address to char*.
484   // Bail out if the cast fails.
485   ASTContext &Ctx = svalBuilder.getContext();
486   QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
487   SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy,
488                                          First->getType());
489   Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart);
490   if (!FirstStartLoc)
491     return state;
492 
493   // Compute the end of the first buffer. Bail out if THAT fails.
494   SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
495                                  *FirstStartLoc, *Length, CharPtrTy);
496   Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd);
497   if (!FirstEndLoc)
498     return state;
499 
500   // Is the end of the first buffer past the start of the second buffer?
501   SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
502                                 *FirstEndLoc, *secondLoc, cmpTy);
503   DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap);
504   if (!OverlapTest)
505     return state;
506 
507   llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
508 
509   if (stateTrue && !stateFalse) {
510     // Overlap!
511     emitOverlapBug(C, stateTrue, First, Second);
512     return NULL;
513   }
514 
515   // assume the two expressions don't overlap.
516   assert(stateFalse);
517   return stateFalse;
518 }
519 
520 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
521                                   const Stmt *First, const Stmt *Second) const {
522   ExplodedNode *N = C.generateSink(state);
523   if (!N)
524     return;
525 
526   if (!BT_Overlap)
527     BT_Overlap.reset(new BugType("Unix API", "Improper arguments"));
528 
529   // Generate a report for this bug.
530   BugReport *report =
531     new BugReport(*BT_Overlap,
532       "Arguments must not be overlapping buffers", N);
533   report->addRange(First->getSourceRange());
534   report->addRange(Second->getSourceRange());
535 
536   C.emitReport(report);
537 }
538 
539 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
540                                                      ProgramStateRef state,
541                                                      NonLoc left,
542                                                      NonLoc right) const {
543   // If out-of-bounds checking is turned off, skip the rest.
544   if (!Filter.CheckCStringOutOfBounds)
545     return state;
546 
547   // If a previous check has failed, propagate the failure.
548   if (!state)
549     return NULL;
550 
551   SValBuilder &svalBuilder = C.getSValBuilder();
552   BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
553 
554   QualType sizeTy = svalBuilder.getContext().getSizeType();
555   const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
556   NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
557 
558   SVal maxMinusRight;
559   if (isa<nonloc::ConcreteInt>(right)) {
560     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
561                                                  sizeTy);
562   } else {
563     // Try switching the operands. (The order of these two assignments is
564     // important!)
565     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
566                                             sizeTy);
567     left = right;
568   }
569 
570   if (NonLoc *maxMinusRightNL = dyn_cast<NonLoc>(&maxMinusRight)) {
571     QualType cmpTy = svalBuilder.getConditionType();
572     // If left > max - right, we have an overflow.
573     SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
574                                                 *maxMinusRightNL, cmpTy);
575 
576     ProgramStateRef stateOverflow, stateOkay;
577     llvm::tie(stateOverflow, stateOkay) =
578       state->assume(cast<DefinedOrUnknownSVal>(willOverflow));
579 
580     if (stateOverflow && !stateOkay) {
581       // We have an overflow. Emit a bug report.
582       ExplodedNode *N = C.generateSink(stateOverflow);
583       if (!N)
584         return NULL;
585 
586       if (!BT_AdditionOverflow)
587         BT_AdditionOverflow.reset(new BuiltinBug("API",
588           "Sum of expressions causes overflow"));
589 
590       // This isn't a great error message, but this should never occur in real
591       // code anyway -- you'd have to create a buffer longer than a size_t can
592       // represent, which is sort of a contradiction.
593       const char *warning =
594         "This expression will create a string whose length is too big to "
595         "be represented as a size_t";
596 
597       // Generate a report for this bug.
598       BugReport *report = new BugReport(*BT_AdditionOverflow, warning, N);
599       C.emitReport(report);
600 
601       return NULL;
602     }
603 
604     // From now on, assume an overflow didn't occur.
605     assert(stateOkay);
606     state = stateOkay;
607   }
608 
609   return state;
610 }
611 
612 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
613                                                 const MemRegion *MR,
614                                                 SVal strLength) {
615   assert(!strLength.isUndef() && "Attempt to set an undefined string length");
616 
617   MR = MR->StripCasts();
618 
619   switch (MR->getKind()) {
620   case MemRegion::StringRegionKind:
621     // FIXME: This can happen if we strcpy() into a string region. This is
622     // undefined [C99 6.4.5p6], but we should still warn about it.
623     return state;
624 
625   case MemRegion::SymbolicRegionKind:
626   case MemRegion::AllocaRegionKind:
627   case MemRegion::VarRegionKind:
628   case MemRegion::FieldRegionKind:
629   case MemRegion::ObjCIvarRegionKind:
630     // These are the types we can currently track string lengths for.
631     break;
632 
633   case MemRegion::ElementRegionKind:
634     // FIXME: Handle element regions by upper-bounding the parent region's
635     // string length.
636     return state;
637 
638   default:
639     // Other regions (mostly non-data) can't have a reliable C string length.
640     // For now, just ignore the change.
641     // FIXME: These are rare but not impossible. We should output some kind of
642     // warning for things like strcpy((char[]){'a', 0}, "b");
643     return state;
644   }
645 
646   if (strLength.isUnknown())
647     return state->remove<CStringLength>(MR);
648 
649   return state->set<CStringLength>(MR, strLength);
650 }
651 
652 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
653                                                ProgramStateRef &state,
654                                                const Expr *Ex,
655                                                const MemRegion *MR,
656                                                bool hypothetical) {
657   if (!hypothetical) {
658     // If there's a recorded length, go ahead and return it.
659     const SVal *Recorded = state->get<CStringLength>(MR);
660     if (Recorded)
661       return *Recorded;
662   }
663 
664   // Otherwise, get a new symbol and update the state.
665   SValBuilder &svalBuilder = C.getSValBuilder();
666   QualType sizeTy = svalBuilder.getContext().getSizeType();
667   SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
668                                                     MR, Ex, sizeTy,
669                                                     C.blockCount());
670 
671   if (!hypothetical)
672     state = state->set<CStringLength>(MR, strLength);
673 
674   return strLength;
675 }
676 
677 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
678                                       const Expr *Ex, SVal Buf,
679                                       bool hypothetical) const {
680   const MemRegion *MR = Buf.getAsRegion();
681   if (!MR) {
682     // If we can't get a region, see if it's something we /know/ isn't a
683     // C string. In the context of locations, the only time we can issue such
684     // a warning is for labels.
685     if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) {
686       if (!Filter.CheckCStringNotNullTerm)
687         return UndefinedVal();
688 
689       if (ExplodedNode *N = C.addTransition(state)) {
690         if (!BT_NotCString)
691           BT_NotCString.reset(new BuiltinBug("Unix API",
692             "Argument is not a null-terminated string."));
693 
694         SmallString<120> buf;
695         llvm::raw_svector_ostream os(buf);
696         assert(CurrentFunctionDescription);
697         os << "Argument to " << CurrentFunctionDescription
698            << " is the address of the label '" << Label->getLabel()->getName()
699            << "', which is not a null-terminated string";
700 
701         // Generate a report for this bug.
702         BugReport *report = new BugReport(*BT_NotCString,
703                                                           os.str(), N);
704 
705         report->addRange(Ex->getSourceRange());
706         C.emitReport(report);
707       }
708       return UndefinedVal();
709 
710     }
711 
712     // If it's not a region and not a label, give up.
713     return UnknownVal();
714   }
715 
716   // If we have a region, strip casts from it and see if we can figure out
717   // its length. For anything we can't figure out, just return UnknownVal.
718   MR = MR->StripCasts();
719 
720   switch (MR->getKind()) {
721   case MemRegion::StringRegionKind: {
722     // Modifying the contents of string regions is undefined [C99 6.4.5p6],
723     // so we can assume that the byte length is the correct C string length.
724     SValBuilder &svalBuilder = C.getSValBuilder();
725     QualType sizeTy = svalBuilder.getContext().getSizeType();
726     const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
727     return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
728   }
729   case MemRegion::SymbolicRegionKind:
730   case MemRegion::AllocaRegionKind:
731   case MemRegion::VarRegionKind:
732   case MemRegion::FieldRegionKind:
733   case MemRegion::ObjCIvarRegionKind:
734     return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
735   case MemRegion::CompoundLiteralRegionKind:
736     // FIXME: Can we track this? Is it necessary?
737     return UnknownVal();
738   case MemRegion::ElementRegionKind:
739     // FIXME: How can we handle this? It's not good enough to subtract the
740     // offset from the base string length; consider "123\x00567" and &a[5].
741     return UnknownVal();
742   default:
743     // Other regions (mostly non-data) can't have a reliable C string length.
744     // In this case, an error is emitted and UndefinedVal is returned.
745     // The caller should always be prepared to handle this case.
746     if (!Filter.CheckCStringNotNullTerm)
747       return UndefinedVal();
748 
749     if (ExplodedNode *N = C.addTransition(state)) {
750       if (!BT_NotCString)
751         BT_NotCString.reset(new BuiltinBug("Unix API",
752           "Argument is not a null-terminated string."));
753 
754       SmallString<120> buf;
755       llvm::raw_svector_ostream os(buf);
756 
757       assert(CurrentFunctionDescription);
758       os << "Argument to " << CurrentFunctionDescription << " is ";
759 
760       if (SummarizeRegion(os, C.getASTContext(), MR))
761         os << ", which is not a null-terminated string";
762       else
763         os << "not a null-terminated string";
764 
765       // Generate a report for this bug.
766       BugReport *report = new BugReport(*BT_NotCString,
767                                                         os.str(), N);
768 
769       report->addRange(Ex->getSourceRange());
770       C.emitReport(report);
771     }
772 
773     return UndefinedVal();
774   }
775 }
776 
777 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
778   ProgramStateRef &state, const Expr *expr, SVal val) const {
779 
780   // Get the memory region pointed to by the val.
781   const MemRegion *bufRegion = val.getAsRegion();
782   if (!bufRegion)
783     return NULL;
784 
785   // Strip casts off the memory region.
786   bufRegion = bufRegion->StripCasts();
787 
788   // Cast the memory region to a string region.
789   const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
790   if (!strRegion)
791     return NULL;
792 
793   // Return the actual string in the string region.
794   return strRegion->getStringLiteral();
795 }
796 
797 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
798                                                 ProgramStateRef state,
799                                                 const Expr *E, SVal V) {
800   Loc *L = dyn_cast<Loc>(&V);
801   if (!L)
802     return state;
803 
804   // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
805   // some assumptions about the value that CFRefCount can't. Even so, it should
806   // probably be refactored.
807   if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) {
808     const MemRegion *R = MR->getRegion()->StripCasts();
809 
810     // Are we dealing with an ElementRegion?  If so, we should be invalidating
811     // the super-region.
812     if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
813       R = ER->getSuperRegion();
814       // FIXME: What about layers of ElementRegions?
815     }
816 
817     // Invalidate this region.
818     const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
819     return state->invalidateRegions(R, E, C.blockCount(), LCtx,
820                                     /*CausesPointerEscape*/ false);
821   }
822 
823   // If we have a non-region value by chance, just remove the binding.
824   // FIXME: is this necessary or correct? This handles the non-Region
825   //  cases.  Is it ever valid to store to these?
826   return state->killBinding(*L);
827 }
828 
829 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
830                                      const MemRegion *MR) {
831   const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
832 
833   switch (MR->getKind()) {
834   case MemRegion::FunctionTextRegionKind: {
835     const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl();
836     if (FD)
837       os << "the address of the function '" << *FD << '\'';
838     else
839       os << "the address of a function";
840     return true;
841   }
842   case MemRegion::BlockTextRegionKind:
843     os << "block text";
844     return true;
845   case MemRegion::BlockDataRegionKind:
846     os << "a block";
847     return true;
848   case MemRegion::CXXThisRegionKind:
849   case MemRegion::CXXTempObjectRegionKind:
850     os << "a C++ temp object of type " << TVR->getValueType().getAsString();
851     return true;
852   case MemRegion::VarRegionKind:
853     os << "a variable of type" << TVR->getValueType().getAsString();
854     return true;
855   case MemRegion::FieldRegionKind:
856     os << "a field of type " << TVR->getValueType().getAsString();
857     return true;
858   case MemRegion::ObjCIvarRegionKind:
859     os << "an instance variable of type " << TVR->getValueType().getAsString();
860     return true;
861   default:
862     return false;
863   }
864 }
865 
866 //===----------------------------------------------------------------------===//
867 // evaluation of individual function calls.
868 //===----------------------------------------------------------------------===//
869 
870 void CStringChecker::evalCopyCommon(CheckerContext &C,
871                                     const CallExpr *CE,
872                                     ProgramStateRef state,
873                                     const Expr *Size, const Expr *Dest,
874                                     const Expr *Source, bool Restricted,
875                                     bool IsMempcpy) const {
876   CurrentFunctionDescription = "memory copy function";
877 
878   // See if the size argument is zero.
879   const LocationContext *LCtx = C.getLocationContext();
880   SVal sizeVal = state->getSVal(Size, LCtx);
881   QualType sizeTy = Size->getType();
882 
883   ProgramStateRef stateZeroSize, stateNonZeroSize;
884   llvm::tie(stateZeroSize, stateNonZeroSize) =
885     assumeZero(C, state, sizeVal, sizeTy);
886 
887   // Get the value of the Dest.
888   SVal destVal = state->getSVal(Dest, LCtx);
889 
890   // If the size is zero, there won't be any actual memory access, so
891   // just bind the return value to the destination buffer and return.
892   if (stateZeroSize && !stateNonZeroSize) {
893     stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
894     C.addTransition(stateZeroSize);
895     return;
896   }
897 
898   // If the size can be nonzero, we have to check the other arguments.
899   if (stateNonZeroSize) {
900     state = stateNonZeroSize;
901 
902     // Ensure the destination is not null. If it is NULL there will be a
903     // NULL pointer dereference.
904     state = checkNonNull(C, state, Dest, destVal);
905     if (!state)
906       return;
907 
908     // Get the value of the Src.
909     SVal srcVal = state->getSVal(Source, LCtx);
910 
911     // Ensure the source is not null. If it is NULL there will be a
912     // NULL pointer dereference.
913     state = checkNonNull(C, state, Source, srcVal);
914     if (!state)
915       return;
916 
917     // Ensure the accesses are valid and that the buffers do not overlap.
918     const char * const writeWarning =
919       "Memory copy function overflows destination buffer";
920     state = CheckBufferAccess(C, state, Size, Dest, Source,
921                               writeWarning, /* sourceWarning = */ NULL);
922     if (Restricted)
923       state = CheckOverlap(C, state, Size, Dest, Source);
924 
925     if (!state)
926       return;
927 
928     // If this is mempcpy, get the byte after the last byte copied and
929     // bind the expr.
930     if (IsMempcpy) {
931       loc::MemRegionVal *destRegVal = dyn_cast<loc::MemRegionVal>(&destVal);
932       assert(destRegVal && "Destination should be a known MemRegionVal here");
933 
934       // Get the length to copy.
935       NonLoc *lenValNonLoc = dyn_cast<NonLoc>(&sizeVal);
936 
937       if (lenValNonLoc) {
938         // Get the byte after the last byte copied.
939         SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
940                                                           *destRegVal,
941                                                           *lenValNonLoc,
942                                                           Dest->getType());
943 
944         // The byte after the last byte copied is the return value.
945         state = state->BindExpr(CE, LCtx, lastElement);
946       } else {
947         // If we don't know how much we copied, we can at least
948         // conjure a return value for later.
949         SVal result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx,
950                                                           C.blockCount());
951         state = state->BindExpr(CE, LCtx, result);
952       }
953 
954     } else {
955       // All other copies return the destination buffer.
956       // (Well, bcopy() has a void return type, but this won't hurt.)
957       state = state->BindExpr(CE, LCtx, destVal);
958     }
959 
960     // Invalidate the destination.
961     // FIXME: Even if we can't perfectly model the copy, we should see if we
962     // can use LazyCompoundVals to copy the source values into the destination.
963     // This would probably remove any existing bindings past the end of the
964     // copied region, but that's still an improvement over blank invalidation.
965     state = InvalidateBuffer(C, state, Dest,
966                              state->getSVal(Dest, C.getLocationContext()));
967     C.addTransition(state);
968   }
969 }
970 
971 
972 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
973   if (CE->getNumArgs() < 3)
974     return;
975 
976   // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
977   // The return value is the address of the destination buffer.
978   const Expr *Dest = CE->getArg(0);
979   ProgramStateRef state = C.getState();
980 
981   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
982 }
983 
984 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
985   if (CE->getNumArgs() < 3)
986     return;
987 
988   // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
989   // The return value is a pointer to the byte following the last written byte.
990   const Expr *Dest = CE->getArg(0);
991   ProgramStateRef state = C.getState();
992 
993   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
994 }
995 
996 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
997   if (CE->getNumArgs() < 3)
998     return;
999 
1000   // void *memmove(void *dst, const void *src, size_t n);
1001   // The return value is the address of the destination buffer.
1002   const Expr *Dest = CE->getArg(0);
1003   ProgramStateRef state = C.getState();
1004 
1005   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
1006 }
1007 
1008 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
1009   if (CE->getNumArgs() < 3)
1010     return;
1011 
1012   // void bcopy(const void *src, void *dst, size_t n);
1013   evalCopyCommon(C, CE, C.getState(),
1014                  CE->getArg(2), CE->getArg(1), CE->getArg(0));
1015 }
1016 
1017 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
1018   if (CE->getNumArgs() < 3)
1019     return;
1020 
1021   // int memcmp(const void *s1, const void *s2, size_t n);
1022   CurrentFunctionDescription = "memory comparison function";
1023 
1024   const Expr *Left = CE->getArg(0);
1025   const Expr *Right = CE->getArg(1);
1026   const Expr *Size = CE->getArg(2);
1027 
1028   ProgramStateRef state = C.getState();
1029   SValBuilder &svalBuilder = C.getSValBuilder();
1030 
1031   // See if the size argument is zero.
1032   const LocationContext *LCtx = C.getLocationContext();
1033   SVal sizeVal = state->getSVal(Size, LCtx);
1034   QualType sizeTy = Size->getType();
1035 
1036   ProgramStateRef stateZeroSize, stateNonZeroSize;
1037   llvm::tie(stateZeroSize, stateNonZeroSize) =
1038     assumeZero(C, state, sizeVal, sizeTy);
1039 
1040   // If the size can be zero, the result will be 0 in that case, and we don't
1041   // have to check either of the buffers.
1042   if (stateZeroSize) {
1043     state = stateZeroSize;
1044     state = state->BindExpr(CE, LCtx,
1045                             svalBuilder.makeZeroVal(CE->getType()));
1046     C.addTransition(state);
1047   }
1048 
1049   // If the size can be nonzero, we have to check the other arguments.
1050   if (stateNonZeroSize) {
1051     state = stateNonZeroSize;
1052     // If we know the two buffers are the same, we know the result is 0.
1053     // First, get the two buffers' addresses. Another checker will have already
1054     // made sure they're not undefined.
1055     DefinedOrUnknownSVal LV =
1056       cast<DefinedOrUnknownSVal>(state->getSVal(Left, LCtx));
1057     DefinedOrUnknownSVal RV =
1058       cast<DefinedOrUnknownSVal>(state->getSVal(Right, LCtx));
1059 
1060     // See if they are the same.
1061     DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1062     ProgramStateRef StSameBuf, StNotSameBuf;
1063     llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1064 
1065     // If the two arguments might be the same buffer, we know the result is 0,
1066     // and we only need to check one size.
1067     if (StSameBuf) {
1068       state = StSameBuf;
1069       state = CheckBufferAccess(C, state, Size, Left);
1070       if (state) {
1071         state = StSameBuf->BindExpr(CE, LCtx,
1072                                     svalBuilder.makeZeroVal(CE->getType()));
1073         C.addTransition(state);
1074       }
1075     }
1076 
1077     // If the two arguments might be different buffers, we have to check the
1078     // size of both of them.
1079     if (StNotSameBuf) {
1080       state = StNotSameBuf;
1081       state = CheckBufferAccess(C, state, Size, Left, Right);
1082       if (state) {
1083         // The return value is the comparison result, which we don't know.
1084         SVal CmpV = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1085         state = state->BindExpr(CE, LCtx, CmpV);
1086         C.addTransition(state);
1087       }
1088     }
1089   }
1090 }
1091 
1092 void CStringChecker::evalstrLength(CheckerContext &C,
1093                                    const CallExpr *CE) const {
1094   if (CE->getNumArgs() < 1)
1095     return;
1096 
1097   // size_t strlen(const char *s);
1098   evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
1099 }
1100 
1101 void CStringChecker::evalstrnLength(CheckerContext &C,
1102                                     const CallExpr *CE) const {
1103   if (CE->getNumArgs() < 2)
1104     return;
1105 
1106   // size_t strnlen(const char *s, size_t maxlen);
1107   evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
1108 }
1109 
1110 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
1111                                          bool IsStrnlen) const {
1112   CurrentFunctionDescription = "string length function";
1113   ProgramStateRef state = C.getState();
1114   const LocationContext *LCtx = C.getLocationContext();
1115 
1116   if (IsStrnlen) {
1117     const Expr *maxlenExpr = CE->getArg(1);
1118     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1119 
1120     ProgramStateRef stateZeroSize, stateNonZeroSize;
1121     llvm::tie(stateZeroSize, stateNonZeroSize) =
1122       assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1123 
1124     // If the size can be zero, the result will be 0 in that case, and we don't
1125     // have to check the string itself.
1126     if (stateZeroSize) {
1127       SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1128       stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
1129       C.addTransition(stateZeroSize);
1130     }
1131 
1132     // If the size is GUARANTEED to be zero, we're done!
1133     if (!stateNonZeroSize)
1134       return;
1135 
1136     // Otherwise, record the assumption that the size is nonzero.
1137     state = stateNonZeroSize;
1138   }
1139 
1140   // Check that the string argument is non-null.
1141   const Expr *Arg = CE->getArg(0);
1142   SVal ArgVal = state->getSVal(Arg, LCtx);
1143 
1144   state = checkNonNull(C, state, Arg, ArgVal);
1145 
1146   if (!state)
1147     return;
1148 
1149   SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1150 
1151   // If the argument isn't a valid C string, there's no valid state to
1152   // transition to.
1153   if (strLength.isUndef())
1154     return;
1155 
1156   DefinedOrUnknownSVal result = UnknownVal();
1157 
1158   // If the check is for strnlen() then bind the return value to no more than
1159   // the maxlen value.
1160   if (IsStrnlen) {
1161     QualType cmpTy = C.getSValBuilder().getConditionType();
1162 
1163     // It's a little unfortunate to be getting this again,
1164     // but it's not that expensive...
1165     const Expr *maxlenExpr = CE->getArg(1);
1166     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1167 
1168     NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
1169     NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal);
1170 
1171     if (strLengthNL && maxlenValNL) {
1172       ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1173 
1174       // Check if the strLength is greater than the maxlen.
1175       llvm::tie(stateStringTooLong, stateStringNotTooLong) =
1176         state->assume(cast<DefinedOrUnknownSVal>
1177                       (C.getSValBuilder().evalBinOpNN(state, BO_GT,
1178                                                       *strLengthNL,
1179                                                       *maxlenValNL,
1180                                                       cmpTy)));
1181 
1182       if (stateStringTooLong && !stateStringNotTooLong) {
1183         // If the string is longer than maxlen, return maxlen.
1184         result = *maxlenValNL;
1185       } else if (stateStringNotTooLong && !stateStringTooLong) {
1186         // If the string is shorter than maxlen, return its length.
1187         result = *strLengthNL;
1188       }
1189     }
1190 
1191     if (result.isUnknown()) {
1192       // If we don't have enough information for a comparison, there's
1193       // no guarantee the full string length will actually be returned.
1194       // All we know is the return value is the min of the string length
1195       // and the limit. This is better than nothing.
1196       result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount());
1197       NonLoc *resultNL = cast<NonLoc>(&result);
1198 
1199       if (strLengthNL) {
1200         state = state->assume(cast<DefinedOrUnknownSVal>
1201                               (C.getSValBuilder().evalBinOpNN(state, BO_LE,
1202                                                               *resultNL,
1203                                                               *strLengthNL,
1204                                                               cmpTy)), true);
1205       }
1206 
1207       if (maxlenValNL) {
1208         state = state->assume(cast<DefinedOrUnknownSVal>
1209                               (C.getSValBuilder().evalBinOpNN(state, BO_LE,
1210                                                               *resultNL,
1211                                                               *maxlenValNL,
1212                                                               cmpTy)), true);
1213       }
1214     }
1215 
1216   } else {
1217     // This is a plain strlen(), not strnlen().
1218     result = cast<DefinedOrUnknownSVal>(strLength);
1219 
1220     // If we don't know the length of the string, conjure a return
1221     // value, so it can be used in constraints, at least.
1222     if (result.isUnknown()) {
1223       result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount());
1224     }
1225   }
1226 
1227   // Bind the return value.
1228   assert(!result.isUnknown() && "Should have conjured a value by now");
1229   state = state->BindExpr(CE, LCtx, result);
1230   C.addTransition(state);
1231 }
1232 
1233 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1234   if (CE->getNumArgs() < 2)
1235     return;
1236 
1237   // char *strcpy(char *restrict dst, const char *restrict src);
1238   evalStrcpyCommon(C, CE,
1239                    /* returnEnd = */ false,
1240                    /* isBounded = */ false,
1241                    /* isAppending = */ false);
1242 }
1243 
1244 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1245   if (CE->getNumArgs() < 3)
1246     return;
1247 
1248   // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1249   evalStrcpyCommon(C, CE,
1250                    /* returnEnd = */ false,
1251                    /* isBounded = */ true,
1252                    /* isAppending = */ false);
1253 }
1254 
1255 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1256   if (CE->getNumArgs() < 2)
1257     return;
1258 
1259   // char *stpcpy(char *restrict dst, const char *restrict src);
1260   evalStrcpyCommon(C, CE,
1261                    /* returnEnd = */ true,
1262                    /* isBounded = */ false,
1263                    /* isAppending = */ false);
1264 }
1265 
1266 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1267   if (CE->getNumArgs() < 2)
1268     return;
1269 
1270   //char *strcat(char *restrict s1, const char *restrict s2);
1271   evalStrcpyCommon(C, CE,
1272                    /* returnEnd = */ false,
1273                    /* isBounded = */ false,
1274                    /* isAppending = */ true);
1275 }
1276 
1277 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1278   if (CE->getNumArgs() < 3)
1279     return;
1280 
1281   //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1282   evalStrcpyCommon(C, CE,
1283                    /* returnEnd = */ false,
1284                    /* isBounded = */ true,
1285                    /* isAppending = */ true);
1286 }
1287 
1288 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1289                                       bool returnEnd, bool isBounded,
1290                                       bool isAppending) const {
1291   CurrentFunctionDescription = "string copy function";
1292   ProgramStateRef state = C.getState();
1293   const LocationContext *LCtx = C.getLocationContext();
1294 
1295   // Check that the destination is non-null.
1296   const Expr *Dst = CE->getArg(0);
1297   SVal DstVal = state->getSVal(Dst, LCtx);
1298 
1299   state = checkNonNull(C, state, Dst, DstVal);
1300   if (!state)
1301     return;
1302 
1303   // Check that the source is non-null.
1304   const Expr *srcExpr = CE->getArg(1);
1305   SVal srcVal = state->getSVal(srcExpr, LCtx);
1306   state = checkNonNull(C, state, srcExpr, srcVal);
1307   if (!state)
1308     return;
1309 
1310   // Get the string length of the source.
1311   SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1312 
1313   // If the source isn't a valid C string, give up.
1314   if (strLength.isUndef())
1315     return;
1316 
1317   SValBuilder &svalBuilder = C.getSValBuilder();
1318   QualType cmpTy = svalBuilder.getConditionType();
1319   QualType sizeTy = svalBuilder.getContext().getSizeType();
1320 
1321   // These two values allow checking two kinds of errors:
1322   // - actual overflows caused by a source that doesn't fit in the destination
1323   // - potential overflows caused by a bound that could exceed the destination
1324   SVal amountCopied = UnknownVal();
1325   SVal maxLastElementIndex = UnknownVal();
1326   const char *boundWarning = NULL;
1327 
1328   // If the function is strncpy, strncat, etc... it is bounded.
1329   if (isBounded) {
1330     // Get the max number of characters to copy.
1331     const Expr *lenExpr = CE->getArg(2);
1332     SVal lenVal = state->getSVal(lenExpr, LCtx);
1333 
1334     // Protect against misdeclared strncpy().
1335     lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType());
1336 
1337     NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
1338     NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal);
1339 
1340     // If we know both values, we might be able to figure out how much
1341     // we're copying.
1342     if (strLengthNL && lenValNL) {
1343       ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1344 
1345       // Check if the max number to copy is less than the length of the src.
1346       // If the bound is equal to the source length, strncpy won't null-
1347       // terminate the result!
1348       llvm::tie(stateSourceTooLong, stateSourceNotTooLong) =
1349         state->assume(cast<DefinedOrUnknownSVal>
1350                       (svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL,
1351                                                *lenValNL, cmpTy)));
1352 
1353       if (stateSourceTooLong && !stateSourceNotTooLong) {
1354         // Max number to copy is less than the length of the src, so the actual
1355         // strLength copied is the max number arg.
1356         state = stateSourceTooLong;
1357         amountCopied = lenVal;
1358 
1359       } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1360         // The source buffer entirely fits in the bound.
1361         state = stateSourceNotTooLong;
1362         amountCopied = strLength;
1363       }
1364     }
1365 
1366     // We still want to know if the bound is known to be too large.
1367     if (lenValNL) {
1368       if (isAppending) {
1369         // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1370 
1371         // Get the string length of the destination. If the destination is
1372         // memory that can't have a string length, we shouldn't be copying
1373         // into it anyway.
1374         SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1375         if (dstStrLength.isUndef())
1376           return;
1377 
1378         if (NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength)) {
1379           maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add,
1380                                                         *lenValNL,
1381                                                         *dstStrLengthNL,
1382                                                         sizeTy);
1383           boundWarning = "Size argument is greater than the free space in the "
1384                          "destination buffer";
1385         }
1386 
1387       } else {
1388         // For strncpy, this is just checking that lenVal <= sizeof(dst)
1389         // (Yes, strncpy and strncat differ in how they treat termination.
1390         // strncat ALWAYS terminates, but strncpy doesn't.)
1391 
1392         // We need a special case for when the copy size is zero, in which
1393         // case strncpy will do no work at all. Our bounds check uses n-1
1394         // as the last element accessed, so n == 0 is problematic.
1395         ProgramStateRef StateZeroSize, StateNonZeroSize;
1396         llvm::tie(StateZeroSize, StateNonZeroSize) =
1397           assumeZero(C, state, *lenValNL, sizeTy);
1398 
1399         // If the size is known to be zero, we're done.
1400         if (StateZeroSize && !StateNonZeroSize) {
1401           StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
1402           C.addTransition(StateZeroSize);
1403           return;
1404         }
1405 
1406         // Otherwise, go ahead and figure out the last element we'll touch.
1407         // We don't record the non-zero assumption here because we can't
1408         // be sure. We won't warn on a possible zero.
1409         NonLoc one = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy));
1410         maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1411                                                       one, sizeTy);
1412         boundWarning = "Size argument is greater than the length of the "
1413                        "destination buffer";
1414       }
1415     }
1416 
1417     // If we couldn't pin down the copy length, at least bound it.
1418     // FIXME: We should actually run this code path for append as well, but
1419     // right now it creates problems with constraints (since we can end up
1420     // trying to pass constraints from symbol to symbol).
1421     if (amountCopied.isUnknown() && !isAppending) {
1422       // Try to get a "hypothetical" string length symbol, which we can later
1423       // set as a real value if that turns out to be the case.
1424       amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1425       assert(!amountCopied.isUndef());
1426 
1427       if (NonLoc *amountCopiedNL = dyn_cast<NonLoc>(&amountCopied)) {
1428         if (lenValNL) {
1429           // amountCopied <= lenVal
1430           SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1431                                                              *amountCopiedNL,
1432                                                              *lenValNL,
1433                                                              cmpTy);
1434           state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanBound),
1435                                 true);
1436           if (!state)
1437             return;
1438         }
1439 
1440         if (strLengthNL) {
1441           // amountCopied <= strlen(source)
1442           SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1443                                                            *amountCopiedNL,
1444                                                            *strLengthNL,
1445                                                            cmpTy);
1446           state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanSrc),
1447                                 true);
1448           if (!state)
1449             return;
1450         }
1451       }
1452     }
1453 
1454   } else {
1455     // The function isn't bounded. The amount copied should match the length
1456     // of the source buffer.
1457     amountCopied = strLength;
1458   }
1459 
1460   assert(state);
1461 
1462   // This represents the number of characters copied into the destination
1463   // buffer. (It may not actually be the strlen if the destination buffer
1464   // is not terminated.)
1465   SVal finalStrLength = UnknownVal();
1466 
1467   // If this is an appending function (strcat, strncat...) then set the
1468   // string length to strlen(src) + strlen(dst) since the buffer will
1469   // ultimately contain both.
1470   if (isAppending) {
1471     // Get the string length of the destination. If the destination is memory
1472     // that can't have a string length, we shouldn't be copying into it anyway.
1473     SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1474     if (dstStrLength.isUndef())
1475       return;
1476 
1477     NonLoc *srcStrLengthNL = dyn_cast<NonLoc>(&amountCopied);
1478     NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength);
1479 
1480     // If we know both string lengths, we might know the final string length.
1481     if (srcStrLengthNL && dstStrLengthNL) {
1482       // Make sure the two lengths together don't overflow a size_t.
1483       state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1484       if (!state)
1485         return;
1486 
1487       finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1488                                                *dstStrLengthNL, sizeTy);
1489     }
1490 
1491     // If we couldn't get a single value for the final string length,
1492     // we can at least bound it by the individual lengths.
1493     if (finalStrLength.isUnknown()) {
1494       // Try to get a "hypothetical" string length symbol, which we can later
1495       // set as a real value if that turns out to be the case.
1496       finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1497       assert(!finalStrLength.isUndef());
1498 
1499       if (NonLoc *finalStrLengthNL = dyn_cast<NonLoc>(&finalStrLength)) {
1500         if (srcStrLengthNL) {
1501           // finalStrLength >= srcStrLength
1502           SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1503                                                         *finalStrLengthNL,
1504                                                         *srcStrLengthNL,
1505                                                         cmpTy);
1506           state = state->assume(cast<DefinedOrUnknownSVal>(sourceInResult),
1507                                 true);
1508           if (!state)
1509             return;
1510         }
1511 
1512         if (dstStrLengthNL) {
1513           // finalStrLength >= dstStrLength
1514           SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1515                                                       *finalStrLengthNL,
1516                                                       *dstStrLengthNL,
1517                                                       cmpTy);
1518           state = state->assume(cast<DefinedOrUnknownSVal>(destInResult),
1519                                 true);
1520           if (!state)
1521             return;
1522         }
1523       }
1524     }
1525 
1526   } else {
1527     // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1528     // the final string length will match the input string length.
1529     finalStrLength = amountCopied;
1530   }
1531 
1532   // The final result of the function will either be a pointer past the last
1533   // copied element, or a pointer to the start of the destination buffer.
1534   SVal Result = (returnEnd ? UnknownVal() : DstVal);
1535 
1536   assert(state);
1537 
1538   // If the destination is a MemRegion, try to check for a buffer overflow and
1539   // record the new string length.
1540   if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) {
1541     QualType ptrTy = Dst->getType();
1542 
1543     // If we have an exact value on a bounded copy, use that to check for
1544     // overflows, rather than our estimate about how much is actually copied.
1545     if (boundWarning) {
1546       if (NonLoc *maxLastNL = dyn_cast<NonLoc>(&maxLastElementIndex)) {
1547         SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1548                                                       *maxLastNL, ptrTy);
1549         state = CheckLocation(C, state, CE->getArg(2), maxLastElement,
1550                               boundWarning);
1551         if (!state)
1552           return;
1553       }
1554     }
1555 
1556     // Then, if the final length is known...
1557     if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&finalStrLength)) {
1558       SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1559                                                  *knownStrLength, ptrTy);
1560 
1561       // ...and we haven't checked the bound, we'll check the actual copy.
1562       if (!boundWarning) {
1563         const char * const warningMsg =
1564           "String copy function overflows destination buffer";
1565         state = CheckLocation(C, state, Dst, lastElement, warningMsg);
1566         if (!state)
1567           return;
1568       }
1569 
1570       // If this is a stpcpy-style copy, the last element is the return value.
1571       if (returnEnd)
1572         Result = lastElement;
1573     }
1574 
1575     // Invalidate the destination. This must happen before we set the C string
1576     // length because invalidation will clear the length.
1577     // FIXME: Even if we can't perfectly model the copy, we should see if we
1578     // can use LazyCompoundVals to copy the source values into the destination.
1579     // This would probably remove any existing bindings past the end of the
1580     // string, but that's still an improvement over blank invalidation.
1581     state = InvalidateBuffer(C, state, Dst, *dstRegVal);
1582 
1583     // Set the C string length of the destination, if we know it.
1584     if (isBounded && !isAppending) {
1585       // strncpy is annoying in that it doesn't guarantee to null-terminate
1586       // the result string. If the original string didn't fit entirely inside
1587       // the bound (including the null-terminator), we don't know how long the
1588       // result is.
1589       if (amountCopied != strLength)
1590         finalStrLength = UnknownVal();
1591     }
1592     state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1593   }
1594 
1595   assert(state);
1596 
1597   // If this is a stpcpy-style copy, but we were unable to check for a buffer
1598   // overflow, we still need a result. Conjure a return value.
1599   if (returnEnd && Result.isUnknown()) {
1600     Result = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1601   }
1602 
1603   // Set the return value.
1604   state = state->BindExpr(CE, LCtx, Result);
1605   C.addTransition(state);
1606 }
1607 
1608 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1609   if (CE->getNumArgs() < 2)
1610     return;
1611 
1612   //int strcmp(const char *s1, const char *s2);
1613   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1614 }
1615 
1616 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1617   if (CE->getNumArgs() < 3)
1618     return;
1619 
1620   //int strncmp(const char *s1, const char *s2, size_t n);
1621   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1622 }
1623 
1624 void CStringChecker::evalStrcasecmp(CheckerContext &C,
1625                                     const CallExpr *CE) const {
1626   if (CE->getNumArgs() < 2)
1627     return;
1628 
1629   //int strcasecmp(const char *s1, const char *s2);
1630   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1631 }
1632 
1633 void CStringChecker::evalStrncasecmp(CheckerContext &C,
1634                                      const CallExpr *CE) const {
1635   if (CE->getNumArgs() < 3)
1636     return;
1637 
1638   //int strncasecmp(const char *s1, const char *s2, size_t n);
1639   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1640 }
1641 
1642 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1643                                       bool isBounded, bool ignoreCase) const {
1644   CurrentFunctionDescription = "string comparison function";
1645   ProgramStateRef state = C.getState();
1646   const LocationContext *LCtx = C.getLocationContext();
1647 
1648   // Check that the first string is non-null
1649   const Expr *s1 = CE->getArg(0);
1650   SVal s1Val = state->getSVal(s1, LCtx);
1651   state = checkNonNull(C, state, s1, s1Val);
1652   if (!state)
1653     return;
1654 
1655   // Check that the second string is non-null.
1656   const Expr *s2 = CE->getArg(1);
1657   SVal s2Val = state->getSVal(s2, LCtx);
1658   state = checkNonNull(C, state, s2, s2Val);
1659   if (!state)
1660     return;
1661 
1662   // Get the string length of the first string or give up.
1663   SVal s1Length = getCStringLength(C, state, s1, s1Val);
1664   if (s1Length.isUndef())
1665     return;
1666 
1667   // Get the string length of the second string or give up.
1668   SVal s2Length = getCStringLength(C, state, s2, s2Val);
1669   if (s2Length.isUndef())
1670     return;
1671 
1672   // If we know the two buffers are the same, we know the result is 0.
1673   // First, get the two buffers' addresses. Another checker will have already
1674   // made sure they're not undefined.
1675   DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(s1Val);
1676   DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(s2Val);
1677 
1678   // See if they are the same.
1679   SValBuilder &svalBuilder = C.getSValBuilder();
1680   DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1681   ProgramStateRef StSameBuf, StNotSameBuf;
1682   llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1683 
1684   // If the two arguments might be the same buffer, we know the result is 0,
1685   // and we only need to check one size.
1686   if (StSameBuf) {
1687     StSameBuf = StSameBuf->BindExpr(CE, LCtx,
1688                                     svalBuilder.makeZeroVal(CE->getType()));
1689     C.addTransition(StSameBuf);
1690 
1691     // If the two arguments are GUARANTEED to be the same, we're done!
1692     if (!StNotSameBuf)
1693       return;
1694   }
1695 
1696   assert(StNotSameBuf);
1697   state = StNotSameBuf;
1698 
1699   // At this point we can go about comparing the two buffers.
1700   // For now, we only do this if they're both known string literals.
1701 
1702   // Attempt to extract string literals from both expressions.
1703   const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1704   const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1705   bool canComputeResult = false;
1706 
1707   if (s1StrLiteral && s2StrLiteral) {
1708     StringRef s1StrRef = s1StrLiteral->getString();
1709     StringRef s2StrRef = s2StrLiteral->getString();
1710 
1711     if (isBounded) {
1712       // Get the max number of characters to compare.
1713       const Expr *lenExpr = CE->getArg(2);
1714       SVal lenVal = state->getSVal(lenExpr, LCtx);
1715 
1716       // If the length is known, we can get the right substrings.
1717       if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
1718         // Create substrings of each to compare the prefix.
1719         s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
1720         s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
1721         canComputeResult = true;
1722       }
1723     } else {
1724       // This is a normal, unbounded strcmp.
1725       canComputeResult = true;
1726     }
1727 
1728     if (canComputeResult) {
1729       // Real strcmp stops at null characters.
1730       size_t s1Term = s1StrRef.find('\0');
1731       if (s1Term != StringRef::npos)
1732         s1StrRef = s1StrRef.substr(0, s1Term);
1733 
1734       size_t s2Term = s2StrRef.find('\0');
1735       if (s2Term != StringRef::npos)
1736         s2StrRef = s2StrRef.substr(0, s2Term);
1737 
1738       // Use StringRef's comparison methods to compute the actual result.
1739       int result;
1740 
1741       if (ignoreCase) {
1742         // Compare string 1 to string 2 the same way strcasecmp() does.
1743         result = s1StrRef.compare_lower(s2StrRef);
1744       } else {
1745         // Compare string 1 to string 2 the same way strcmp() does.
1746         result = s1StrRef.compare(s2StrRef);
1747       }
1748 
1749       // Build the SVal of the comparison and bind the return value.
1750       SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
1751       state = state->BindExpr(CE, LCtx, resultVal);
1752     }
1753   }
1754 
1755   if (!canComputeResult) {
1756     // Conjure a symbolic value. It's the best we can do.
1757     SVal resultVal = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1758     state = state->BindExpr(CE, LCtx, resultVal);
1759   }
1760 
1761   // Record this as a possible path.
1762   C.addTransition(state);
1763 }
1764 
1765 //===----------------------------------------------------------------------===//
1766 // The driver method, and other Checker callbacks.
1767 //===----------------------------------------------------------------------===//
1768 
1769 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
1770   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
1771 
1772   if (!FDecl)
1773     return false;
1774 
1775   FnCheck evalFunction = 0;
1776   if (C.isCLibraryFunction(FDecl, "memcpy"))
1777     evalFunction =  &CStringChecker::evalMemcpy;
1778   else if (C.isCLibraryFunction(FDecl, "mempcpy"))
1779     evalFunction =  &CStringChecker::evalMempcpy;
1780   else if (C.isCLibraryFunction(FDecl, "memcmp"))
1781     evalFunction =  &CStringChecker::evalMemcmp;
1782   else if (C.isCLibraryFunction(FDecl, "memmove"))
1783     evalFunction =  &CStringChecker::evalMemmove;
1784   else if (C.isCLibraryFunction(FDecl, "strcpy"))
1785     evalFunction =  &CStringChecker::evalStrcpy;
1786   else if (C.isCLibraryFunction(FDecl, "strncpy"))
1787     evalFunction =  &CStringChecker::evalStrncpy;
1788   else if (C.isCLibraryFunction(FDecl, "stpcpy"))
1789     evalFunction =  &CStringChecker::evalStpcpy;
1790   else if (C.isCLibraryFunction(FDecl, "strcat"))
1791     evalFunction =  &CStringChecker::evalStrcat;
1792   else if (C.isCLibraryFunction(FDecl, "strncat"))
1793     evalFunction =  &CStringChecker::evalStrncat;
1794   else if (C.isCLibraryFunction(FDecl, "strlen"))
1795     evalFunction =  &CStringChecker::evalstrLength;
1796   else if (C.isCLibraryFunction(FDecl, "strnlen"))
1797     evalFunction =  &CStringChecker::evalstrnLength;
1798   else if (C.isCLibraryFunction(FDecl, "strcmp"))
1799     evalFunction =  &CStringChecker::evalStrcmp;
1800   else if (C.isCLibraryFunction(FDecl, "strncmp"))
1801     evalFunction =  &CStringChecker::evalStrncmp;
1802   else if (C.isCLibraryFunction(FDecl, "strcasecmp"))
1803     evalFunction =  &CStringChecker::evalStrcasecmp;
1804   else if (C.isCLibraryFunction(FDecl, "strncasecmp"))
1805     evalFunction =  &CStringChecker::evalStrncasecmp;
1806   else if (C.isCLibraryFunction(FDecl, "bcopy"))
1807     evalFunction =  &CStringChecker::evalBcopy;
1808   else if (C.isCLibraryFunction(FDecl, "bcmp"))
1809     evalFunction =  &CStringChecker::evalMemcmp;
1810 
1811   // If the callee isn't a string function, let another checker handle it.
1812   if (!evalFunction)
1813     return false;
1814 
1815   // Make sure each function sets its own description.
1816   // (But don't bother in a release build.)
1817   assert(!(CurrentFunctionDescription = NULL));
1818 
1819   // Check and evaluate the call.
1820   (this->*evalFunction)(C, CE);
1821 
1822   // If the evaluate call resulted in no change, chain to the next eval call
1823   // handler.
1824   // Note, the custom CString evaluation calls assume that basic safety
1825   // properties are held. However, if the user chooses to turn off some of these
1826   // checks, we ignore the issues and leave the call evaluation to a generic
1827   // handler.
1828   if (!C.isDifferent())
1829     return false;
1830 
1831   return true;
1832 }
1833 
1834 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
1835   // Record string length for char a[] = "abc";
1836   ProgramStateRef state = C.getState();
1837 
1838   for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end();
1839        I != E; ++I) {
1840     const VarDecl *D = dyn_cast<VarDecl>(*I);
1841     if (!D)
1842       continue;
1843 
1844     // FIXME: Handle array fields of structs.
1845     if (!D->getType()->isArrayType())
1846       continue;
1847 
1848     const Expr *Init = D->getInit();
1849     if (!Init)
1850       continue;
1851     if (!isa<StringLiteral>(Init))
1852       continue;
1853 
1854     Loc VarLoc = state->getLValue(D, C.getLocationContext());
1855     const MemRegion *MR = VarLoc.getAsRegion();
1856     if (!MR)
1857       continue;
1858 
1859     SVal StrVal = state->getSVal(Init, C.getLocationContext());
1860     assert(StrVal.isValid() && "Initializer string is unknown or undefined");
1861     DefinedOrUnknownSVal strLength
1862       = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal));
1863 
1864     state = state->set<CStringLength>(MR, strLength);
1865   }
1866 
1867   C.addTransition(state);
1868 }
1869 
1870 bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const {
1871   CStringLengthTy Entries = state->get<CStringLength>();
1872   return !Entries.isEmpty();
1873 }
1874 
1875 ProgramStateRef
1876 CStringChecker::checkRegionChanges(ProgramStateRef state,
1877                                    const InvalidatedSymbols *,
1878                                    ArrayRef<const MemRegion *> ExplicitRegions,
1879                                    ArrayRef<const MemRegion *> Regions,
1880                                    const CallEvent *Call) const {
1881   CStringLengthTy Entries = state->get<CStringLength>();
1882   if (Entries.isEmpty())
1883     return state;
1884 
1885   llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
1886   llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
1887 
1888   // First build sets for the changed regions and their super-regions.
1889   for (ArrayRef<const MemRegion *>::iterator
1890        I = Regions.begin(), E = Regions.end(); I != E; ++I) {
1891     const MemRegion *MR = *I;
1892     Invalidated.insert(MR);
1893 
1894     SuperRegions.insert(MR);
1895     while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
1896       MR = SR->getSuperRegion();
1897       SuperRegions.insert(MR);
1898     }
1899   }
1900 
1901   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
1902 
1903   // Then loop over the entries in the current state.
1904   for (CStringLengthTy::iterator I = Entries.begin(),
1905        E = Entries.end(); I != E; ++I) {
1906     const MemRegion *MR = I.getKey();
1907 
1908     // Is this entry for a super-region of a changed region?
1909     if (SuperRegions.count(MR)) {
1910       Entries = F.remove(Entries, MR);
1911       continue;
1912     }
1913 
1914     // Is this entry for a sub-region of a changed region?
1915     const MemRegion *Super = MR;
1916     while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
1917       Super = SR->getSuperRegion();
1918       if (Invalidated.count(Super)) {
1919         Entries = F.remove(Entries, MR);
1920         break;
1921       }
1922     }
1923   }
1924 
1925   return state->set<CStringLength>(Entries);
1926 }
1927 
1928 void CStringChecker::checkLiveSymbols(ProgramStateRef state,
1929                                       SymbolReaper &SR) const {
1930   // Mark all symbols in our string length map as valid.
1931   CStringLengthTy Entries = state->get<CStringLength>();
1932 
1933   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
1934        I != E; ++I) {
1935     SVal Len = I.getData();
1936 
1937     for (SymExpr::symbol_iterator si = Len.symbol_begin(),
1938                                   se = Len.symbol_end(); si != se; ++si)
1939       SR.markInUse(*si);
1940   }
1941 }
1942 
1943 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
1944                                       CheckerContext &C) const {
1945   if (!SR.hasDeadSymbols())
1946     return;
1947 
1948   ProgramStateRef state = C.getState();
1949   CStringLengthTy Entries = state->get<CStringLength>();
1950   if (Entries.isEmpty())
1951     return;
1952 
1953   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
1954   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
1955        I != E; ++I) {
1956     SVal Len = I.getData();
1957     if (SymbolRef Sym = Len.getAsSymbol()) {
1958       if (SR.isDead(Sym))
1959         Entries = F.remove(Entries, I.getKey());
1960     }
1961   }
1962 
1963   state = state->set<CStringLength>(Entries);
1964   C.addTransition(state);
1965 }
1966 
1967 #define REGISTER_CHECKER(name) \
1968 void ento::register##name(CheckerManager &mgr) {\
1969   static CStringChecker *TheChecker = 0; \
1970   if (TheChecker == 0) \
1971     TheChecker = mgr.registerChecker<CStringChecker>(); \
1972   TheChecker->Filter.Check##name = true; \
1973 }
1974 
1975 REGISTER_CHECKER(CStringNullArg)
1976 REGISTER_CHECKER(CStringOutOfBounds)
1977 REGISTER_CHECKER(CStringBufferOverlap)
1978 REGISTER_CHECKER(CStringNotNullTerm)
1979 
1980 void ento::registerCStringCheckerBasic(CheckerManager &Mgr) {
1981   registerCStringNullArg(Mgr);
1982 }
1983