xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp (revision 4aca9b1cd852fcf4e11fa7ff26b73df6fbef8a4c)
1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This defines CStringChecker, which is an assortment of checks on calls
11 // to functions in <string.h>.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ClangSACheckers.h"
16 #include "InterCheckerAPI.h"
17 #include "clang/Basic/CharInfo.h"
18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
19 #include "clang/StaticAnalyzer/Core/Checker.h"
20 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 using namespace clang;
29 using namespace ento;
30 
31 namespace {
32 class CStringChecker : public Checker< eval::Call,
33                                          check::PreStmt<DeclStmt>,
34                                          check::LiveSymbols,
35                                          check::DeadSymbols,
36                                          check::RegionChanges
37                                          > {
38   mutable OwningPtr<BugType> BT_Null,
39                              BT_Bounds,
40                              BT_Overlap,
41                              BT_NotCString,
42                              BT_AdditionOverflow;
43 
44   mutable const char *CurrentFunctionDescription;
45 
46 public:
47   /// The filter is used to filter out the diagnostics which are not enabled by
48   /// the user.
49   struct CStringChecksFilter {
50     DefaultBool CheckCStringNullArg;
51     DefaultBool CheckCStringOutOfBounds;
52     DefaultBool CheckCStringBufferOverlap;
53     DefaultBool CheckCStringNotNullTerm;
54 
55     CheckName CheckNameCStringNullArg;
56     CheckName CheckNameCStringOutOfBounds;
57     CheckName CheckNameCStringBufferOverlap;
58     CheckName CheckNameCStringNotNullTerm;
59   };
60 
61   CStringChecksFilter Filter;
62 
63   static void *getTag() { static int tag; return &tag; }
64 
65   bool evalCall(const CallExpr *CE, CheckerContext &C) const;
66   void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
67   void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
68   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
69   bool wantsRegionChangeUpdate(ProgramStateRef state) const;
70 
71   ProgramStateRef
72     checkRegionChanges(ProgramStateRef state,
73                        const InvalidatedSymbols *,
74                        ArrayRef<const MemRegion *> ExplicitRegions,
75                        ArrayRef<const MemRegion *> Regions,
76                        const CallEvent *Call) const;
77 
78   typedef void (CStringChecker::*FnCheck)(CheckerContext &,
79                                           const CallExpr *) const;
80 
81   void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
82   void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
83   void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
84   void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
85   void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
86                       ProgramStateRef state,
87                       const Expr *Size,
88                       const Expr *Source,
89                       const Expr *Dest,
90                       bool Restricted = false,
91                       bool IsMempcpy = false) const;
92 
93   void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
94 
95   void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
96   void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
97   void evalstrLengthCommon(CheckerContext &C,
98                            const CallExpr *CE,
99                            bool IsStrnlen = false) const;
100 
101   void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
102   void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
103   void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
104   void evalStrcpyCommon(CheckerContext &C,
105                         const CallExpr *CE,
106                         bool returnEnd,
107                         bool isBounded,
108                         bool isAppending) const;
109 
110   void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
111   void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
112 
113   void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
114   void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
115   void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
116   void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
117   void evalStrcmpCommon(CheckerContext &C,
118                         const CallExpr *CE,
119                         bool isBounded = false,
120                         bool ignoreCase = false) const;
121 
122   void evalStrsep(CheckerContext &C, const CallExpr *CE) const;
123 
124   // Utility methods
125   std::pair<ProgramStateRef , ProgramStateRef >
126   static assumeZero(CheckerContext &C,
127                     ProgramStateRef state, SVal V, QualType Ty);
128 
129   static ProgramStateRef setCStringLength(ProgramStateRef state,
130                                               const MemRegion *MR,
131                                               SVal strLength);
132   static SVal getCStringLengthForRegion(CheckerContext &C,
133                                         ProgramStateRef &state,
134                                         const Expr *Ex,
135                                         const MemRegion *MR,
136                                         bool hypothetical);
137   SVal getCStringLength(CheckerContext &C,
138                         ProgramStateRef &state,
139                         const Expr *Ex,
140                         SVal Buf,
141                         bool hypothetical = false) const;
142 
143   const StringLiteral *getCStringLiteral(CheckerContext &C,
144                                          ProgramStateRef &state,
145                                          const Expr *expr,
146                                          SVal val) const;
147 
148   static ProgramStateRef InvalidateBuffer(CheckerContext &C,
149                                           ProgramStateRef state,
150                                           const Expr *Ex, SVal V,
151                                           bool IsSourceBuffer);
152 
153   static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
154                               const MemRegion *MR);
155 
156   // Re-usable checks
157   ProgramStateRef checkNonNull(CheckerContext &C,
158                                    ProgramStateRef state,
159                                    const Expr *S,
160                                    SVal l) const;
161   ProgramStateRef CheckLocation(CheckerContext &C,
162                                     ProgramStateRef state,
163                                     const Expr *S,
164                                     SVal l,
165                                     const char *message = NULL) const;
166   ProgramStateRef CheckBufferAccess(CheckerContext &C,
167                                         ProgramStateRef state,
168                                         const Expr *Size,
169                                         const Expr *FirstBuf,
170                                         const Expr *SecondBuf,
171                                         const char *firstMessage = NULL,
172                                         const char *secondMessage = NULL,
173                                         bool WarnAboutSize = false) const;
174 
175   ProgramStateRef CheckBufferAccess(CheckerContext &C,
176                                         ProgramStateRef state,
177                                         const Expr *Size,
178                                         const Expr *Buf,
179                                         const char *message = NULL,
180                                         bool WarnAboutSize = false) const {
181     // This is a convenience override.
182     return CheckBufferAccess(C, state, Size, Buf, NULL, message, NULL,
183                              WarnAboutSize);
184   }
185   ProgramStateRef CheckOverlap(CheckerContext &C,
186                                    ProgramStateRef state,
187                                    const Expr *Size,
188                                    const Expr *First,
189                                    const Expr *Second) const;
190   void emitOverlapBug(CheckerContext &C,
191                       ProgramStateRef state,
192                       const Stmt *First,
193                       const Stmt *Second) const;
194 
195   ProgramStateRef checkAdditionOverflow(CheckerContext &C,
196                                             ProgramStateRef state,
197                                             NonLoc left,
198                                             NonLoc right) const;
199 };
200 
201 } //end anonymous namespace
202 
203 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
204 
205 //===----------------------------------------------------------------------===//
206 // Individual checks and utility methods.
207 //===----------------------------------------------------------------------===//
208 
209 std::pair<ProgramStateRef , ProgramStateRef >
210 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
211                            QualType Ty) {
212   Optional<DefinedSVal> val = V.getAs<DefinedSVal>();
213   if (!val)
214     return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
215 
216   SValBuilder &svalBuilder = C.getSValBuilder();
217   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
218   return state->assume(svalBuilder.evalEQ(state, *val, zero));
219 }
220 
221 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
222                                             ProgramStateRef state,
223                                             const Expr *S, SVal l) const {
224   // If a previous check has failed, propagate the failure.
225   if (!state)
226     return NULL;
227 
228   ProgramStateRef stateNull, stateNonNull;
229   llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
230 
231   if (stateNull && !stateNonNull) {
232     if (!Filter.CheckCStringNullArg)
233       return NULL;
234 
235     ExplodedNode *N = C.generateSink(stateNull);
236     if (!N)
237       return NULL;
238 
239     if (!BT_Null)
240       BT_Null.reset(new BuiltinBug(
241           Filter.CheckNameCStringNullArg, categories::UnixAPI,
242           "Null pointer argument in call to byte string function"));
243 
244     SmallString<80> buf;
245     llvm::raw_svector_ostream os(buf);
246     assert(CurrentFunctionDescription);
247     os << "Null pointer argument in call to " << CurrentFunctionDescription;
248 
249     // Generate a report for this bug.
250     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
251     BugReport *report = new BugReport(*BT, os.str(), N);
252 
253     report->addRange(S->getSourceRange());
254     bugreporter::trackNullOrUndefValue(N, S, *report);
255     C.emitReport(report);
256     return NULL;
257   }
258 
259   // From here on, assume that the value is non-null.
260   assert(stateNonNull);
261   return stateNonNull;
262 }
263 
264 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
265 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
266                                              ProgramStateRef state,
267                                              const Expr *S, SVal l,
268                                              const char *warningMsg) const {
269   // If a previous check has failed, propagate the failure.
270   if (!state)
271     return NULL;
272 
273   // Check for out of bound array element access.
274   const MemRegion *R = l.getAsRegion();
275   if (!R)
276     return state;
277 
278   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
279   if (!ER)
280     return state;
281 
282   assert(ER->getValueType() == C.getASTContext().CharTy &&
283     "CheckLocation should only be called with char* ElementRegions");
284 
285   // Get the size of the array.
286   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
287   SValBuilder &svalBuilder = C.getSValBuilder();
288   SVal Extent =
289     svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
290   DefinedOrUnknownSVal Size = Extent.castAs<DefinedOrUnknownSVal>();
291 
292   // Get the index of the accessed element.
293   DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
294 
295   ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true);
296   ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false);
297   if (StOutBound && !StInBound) {
298     ExplodedNode *N = C.generateSink(StOutBound);
299     if (!N)
300       return NULL;
301 
302     if (!BT_Bounds) {
303       BT_Bounds.reset(new BuiltinBug(
304           Filter.CheckNameCStringOutOfBounds, "Out-of-bound array access",
305           "Byte string function accesses out-of-bound array element"));
306     }
307     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get());
308 
309     // Generate a report for this bug.
310     BugReport *report;
311     if (warningMsg) {
312       report = new BugReport(*BT, warningMsg, N);
313     } else {
314       assert(CurrentFunctionDescription);
315       assert(CurrentFunctionDescription[0] != '\0');
316 
317       SmallString<80> buf;
318       llvm::raw_svector_ostream os(buf);
319       os << toUppercase(CurrentFunctionDescription[0])
320          << &CurrentFunctionDescription[1]
321          << " accesses out-of-bound array element";
322       report = new BugReport(*BT, os.str(), N);
323     }
324 
325     // FIXME: It would be nice to eventually make this diagnostic more clear,
326     // e.g., by referencing the original declaration or by saying *why* this
327     // reference is outside the range.
328 
329     report->addRange(S->getSourceRange());
330     C.emitReport(report);
331     return NULL;
332   }
333 
334   // Array bound check succeeded.  From this point forward the array bound
335   // should always succeed.
336   return StInBound;
337 }
338 
339 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C,
340                                                  ProgramStateRef state,
341                                                  const Expr *Size,
342                                                  const Expr *FirstBuf,
343                                                  const Expr *SecondBuf,
344                                                  const char *firstMessage,
345                                                  const char *secondMessage,
346                                                  bool WarnAboutSize) const {
347   // If a previous check has failed, propagate the failure.
348   if (!state)
349     return NULL;
350 
351   SValBuilder &svalBuilder = C.getSValBuilder();
352   ASTContext &Ctx = svalBuilder.getContext();
353   const LocationContext *LCtx = C.getLocationContext();
354 
355   QualType sizeTy = Size->getType();
356   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
357 
358   // Check that the first buffer is non-null.
359   SVal BufVal = state->getSVal(FirstBuf, LCtx);
360   state = checkNonNull(C, state, FirstBuf, BufVal);
361   if (!state)
362     return NULL;
363 
364   // If out-of-bounds checking is turned off, skip the rest.
365   if (!Filter.CheckCStringOutOfBounds)
366     return state;
367 
368   // Get the access length and make sure it is known.
369   // FIXME: This assumes the caller has already checked that the access length
370   // is positive. And that it's unsigned.
371   SVal LengthVal = state->getSVal(Size, LCtx);
372   Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
373   if (!Length)
374     return state;
375 
376   // Compute the offset of the last element to be accessed: size-1.
377   NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
378   NonLoc LastOffset = svalBuilder
379       .evalBinOpNN(state, BO_Sub, *Length, One, sizeTy).castAs<NonLoc>();
380 
381   // Check that the first buffer is sufficiently long.
382   SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
383   if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
384     const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf);
385 
386     SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
387                                           LastOffset, PtrTy);
388     state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage);
389 
390     // If the buffer isn't large enough, abort.
391     if (!state)
392       return NULL;
393   }
394 
395   // If there's a second buffer, check it as well.
396   if (SecondBuf) {
397     BufVal = state->getSVal(SecondBuf, LCtx);
398     state = checkNonNull(C, state, SecondBuf, BufVal);
399     if (!state)
400       return NULL;
401 
402     BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
403     if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
404       const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf);
405 
406       SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
407                                             LastOffset, PtrTy);
408       state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage);
409     }
410   }
411 
412   // Large enough or not, return this state!
413   return state;
414 }
415 
416 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
417                                             ProgramStateRef state,
418                                             const Expr *Size,
419                                             const Expr *First,
420                                             const Expr *Second) const {
421   if (!Filter.CheckCStringBufferOverlap)
422     return state;
423 
424   // Do a simple check for overlap: if the two arguments are from the same
425   // buffer, see if the end of the first is greater than the start of the second
426   // or vice versa.
427 
428   // If a previous check has failed, propagate the failure.
429   if (!state)
430     return NULL;
431 
432   ProgramStateRef stateTrue, stateFalse;
433 
434   // Get the buffer values and make sure they're known locations.
435   const LocationContext *LCtx = C.getLocationContext();
436   SVal firstVal = state->getSVal(First, LCtx);
437   SVal secondVal = state->getSVal(Second, LCtx);
438 
439   Optional<Loc> firstLoc = firstVal.getAs<Loc>();
440   if (!firstLoc)
441     return state;
442 
443   Optional<Loc> secondLoc = secondVal.getAs<Loc>();
444   if (!secondLoc)
445     return state;
446 
447   // Are the two values the same?
448   SValBuilder &svalBuilder = C.getSValBuilder();
449   llvm::tie(stateTrue, stateFalse) =
450     state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
451 
452   if (stateTrue && !stateFalse) {
453     // If the values are known to be equal, that's automatically an overlap.
454     emitOverlapBug(C, stateTrue, First, Second);
455     return NULL;
456   }
457 
458   // assume the two expressions are not equal.
459   assert(stateFalse);
460   state = stateFalse;
461 
462   // Which value comes first?
463   QualType cmpTy = svalBuilder.getConditionType();
464   SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
465                                          *firstLoc, *secondLoc, cmpTy);
466   Optional<DefinedOrUnknownSVal> reverseTest =
467       reverse.getAs<DefinedOrUnknownSVal>();
468   if (!reverseTest)
469     return state;
470 
471   llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
472   if (stateTrue) {
473     if (stateFalse) {
474       // If we don't know which one comes first, we can't perform this test.
475       return state;
476     } else {
477       // Switch the values so that firstVal is before secondVal.
478       std::swap(firstLoc, secondLoc);
479 
480       // Switch the Exprs as well, so that they still correspond.
481       std::swap(First, Second);
482     }
483   }
484 
485   // Get the length, and make sure it too is known.
486   SVal LengthVal = state->getSVal(Size, LCtx);
487   Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
488   if (!Length)
489     return state;
490 
491   // Convert the first buffer's start address to char*.
492   // Bail out if the cast fails.
493   ASTContext &Ctx = svalBuilder.getContext();
494   QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
495   SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy,
496                                          First->getType());
497   Optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
498   if (!FirstStartLoc)
499     return state;
500 
501   // Compute the end of the first buffer. Bail out if THAT fails.
502   SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
503                                  *FirstStartLoc, *Length, CharPtrTy);
504   Optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
505   if (!FirstEndLoc)
506     return state;
507 
508   // Is the end of the first buffer past the start of the second buffer?
509   SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
510                                 *FirstEndLoc, *secondLoc, cmpTy);
511   Optional<DefinedOrUnknownSVal> OverlapTest =
512       Overlap.getAs<DefinedOrUnknownSVal>();
513   if (!OverlapTest)
514     return state;
515 
516   llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
517 
518   if (stateTrue && !stateFalse) {
519     // Overlap!
520     emitOverlapBug(C, stateTrue, First, Second);
521     return NULL;
522   }
523 
524   // assume the two expressions don't overlap.
525   assert(stateFalse);
526   return stateFalse;
527 }
528 
529 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
530                                   const Stmt *First, const Stmt *Second) const {
531   ExplodedNode *N = C.generateSink(state);
532   if (!N)
533     return;
534 
535   if (!BT_Overlap)
536     BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
537                                  categories::UnixAPI, "Improper arguments"));
538 
539   // Generate a report for this bug.
540   BugReport *report =
541     new BugReport(*BT_Overlap,
542       "Arguments must not be overlapping buffers", N);
543   report->addRange(First->getSourceRange());
544   report->addRange(Second->getSourceRange());
545 
546   C.emitReport(report);
547 }
548 
549 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
550                                                      ProgramStateRef state,
551                                                      NonLoc left,
552                                                      NonLoc right) const {
553   // If out-of-bounds checking is turned off, skip the rest.
554   if (!Filter.CheckCStringOutOfBounds)
555     return state;
556 
557   // If a previous check has failed, propagate the failure.
558   if (!state)
559     return NULL;
560 
561   SValBuilder &svalBuilder = C.getSValBuilder();
562   BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
563 
564   QualType sizeTy = svalBuilder.getContext().getSizeType();
565   const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
566   NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
567 
568   SVal maxMinusRight;
569   if (right.getAs<nonloc::ConcreteInt>()) {
570     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
571                                                  sizeTy);
572   } else {
573     // Try switching the operands. (The order of these two assignments is
574     // important!)
575     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
576                                             sizeTy);
577     left = right;
578   }
579 
580   if (Optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
581     QualType cmpTy = svalBuilder.getConditionType();
582     // If left > max - right, we have an overflow.
583     SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
584                                                 *maxMinusRightNL, cmpTy);
585 
586     ProgramStateRef stateOverflow, stateOkay;
587     llvm::tie(stateOverflow, stateOkay) =
588       state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
589 
590     if (stateOverflow && !stateOkay) {
591       // We have an overflow. Emit a bug report.
592       ExplodedNode *N = C.generateSink(stateOverflow);
593       if (!N)
594         return NULL;
595 
596       if (!BT_AdditionOverflow)
597         BT_AdditionOverflow.reset(
598             new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API",
599                            "Sum of expressions causes overflow"));
600 
601       // This isn't a great error message, but this should never occur in real
602       // code anyway -- you'd have to create a buffer longer than a size_t can
603       // represent, which is sort of a contradiction.
604       const char *warning =
605         "This expression will create a string whose length is too big to "
606         "be represented as a size_t";
607 
608       // Generate a report for this bug.
609       BugReport *report = new BugReport(*BT_AdditionOverflow, warning, N);
610       C.emitReport(report);
611 
612       return NULL;
613     }
614 
615     // From now on, assume an overflow didn't occur.
616     assert(stateOkay);
617     state = stateOkay;
618   }
619 
620   return state;
621 }
622 
623 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
624                                                 const MemRegion *MR,
625                                                 SVal strLength) {
626   assert(!strLength.isUndef() && "Attempt to set an undefined string length");
627 
628   MR = MR->StripCasts();
629 
630   switch (MR->getKind()) {
631   case MemRegion::StringRegionKind:
632     // FIXME: This can happen if we strcpy() into a string region. This is
633     // undefined [C99 6.4.5p6], but we should still warn about it.
634     return state;
635 
636   case MemRegion::SymbolicRegionKind:
637   case MemRegion::AllocaRegionKind:
638   case MemRegion::VarRegionKind:
639   case MemRegion::FieldRegionKind:
640   case MemRegion::ObjCIvarRegionKind:
641     // These are the types we can currently track string lengths for.
642     break;
643 
644   case MemRegion::ElementRegionKind:
645     // FIXME: Handle element regions by upper-bounding the parent region's
646     // string length.
647     return state;
648 
649   default:
650     // Other regions (mostly non-data) can't have a reliable C string length.
651     // For now, just ignore the change.
652     // FIXME: These are rare but not impossible. We should output some kind of
653     // warning for things like strcpy((char[]){'a', 0}, "b");
654     return state;
655   }
656 
657   if (strLength.isUnknown())
658     return state->remove<CStringLength>(MR);
659 
660   return state->set<CStringLength>(MR, strLength);
661 }
662 
663 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
664                                                ProgramStateRef &state,
665                                                const Expr *Ex,
666                                                const MemRegion *MR,
667                                                bool hypothetical) {
668   if (!hypothetical) {
669     // If there's a recorded length, go ahead and return it.
670     const SVal *Recorded = state->get<CStringLength>(MR);
671     if (Recorded)
672       return *Recorded;
673   }
674 
675   // Otherwise, get a new symbol and update the state.
676   SValBuilder &svalBuilder = C.getSValBuilder();
677   QualType sizeTy = svalBuilder.getContext().getSizeType();
678   SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
679                                                     MR, Ex, sizeTy,
680                                                     C.blockCount());
681 
682   if (!hypothetical) {
683     if (Optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
684       // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
685       BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
686       const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
687       llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
688       const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
689                                                         fourInt);
690       NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
691       SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn,
692                                                 maxLength, sizeTy);
693       state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
694     }
695     state = state->set<CStringLength>(MR, strLength);
696   }
697 
698   return strLength;
699 }
700 
701 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
702                                       const Expr *Ex, SVal Buf,
703                                       bool hypothetical) const {
704   const MemRegion *MR = Buf.getAsRegion();
705   if (!MR) {
706     // If we can't get a region, see if it's something we /know/ isn't a
707     // C string. In the context of locations, the only time we can issue such
708     // a warning is for labels.
709     if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
710       if (!Filter.CheckCStringNotNullTerm)
711         return UndefinedVal();
712 
713       if (ExplodedNode *N = C.addTransition(state)) {
714         if (!BT_NotCString)
715           BT_NotCString.reset(new BuiltinBug(
716               Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
717               "Argument is not a null-terminated string."));
718 
719         SmallString<120> buf;
720         llvm::raw_svector_ostream os(buf);
721         assert(CurrentFunctionDescription);
722         os << "Argument to " << CurrentFunctionDescription
723            << " is the address of the label '" << Label->getLabel()->getName()
724            << "', which is not a null-terminated string";
725 
726         // Generate a report for this bug.
727         BugReport *report = new BugReport(*BT_NotCString, os.str(), N);
728 
729         report->addRange(Ex->getSourceRange());
730         C.emitReport(report);
731       }
732       return UndefinedVal();
733 
734     }
735 
736     // If it's not a region and not a label, give up.
737     return UnknownVal();
738   }
739 
740   // If we have a region, strip casts from it and see if we can figure out
741   // its length. For anything we can't figure out, just return UnknownVal.
742   MR = MR->StripCasts();
743 
744   switch (MR->getKind()) {
745   case MemRegion::StringRegionKind: {
746     // Modifying the contents of string regions is undefined [C99 6.4.5p6],
747     // so we can assume that the byte length is the correct C string length.
748     SValBuilder &svalBuilder = C.getSValBuilder();
749     QualType sizeTy = svalBuilder.getContext().getSizeType();
750     const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
751     return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
752   }
753   case MemRegion::SymbolicRegionKind:
754   case MemRegion::AllocaRegionKind:
755   case MemRegion::VarRegionKind:
756   case MemRegion::FieldRegionKind:
757   case MemRegion::ObjCIvarRegionKind:
758     return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
759   case MemRegion::CompoundLiteralRegionKind:
760     // FIXME: Can we track this? Is it necessary?
761     return UnknownVal();
762   case MemRegion::ElementRegionKind:
763     // FIXME: How can we handle this? It's not good enough to subtract the
764     // offset from the base string length; consider "123\x00567" and &a[5].
765     return UnknownVal();
766   default:
767     // Other regions (mostly non-data) can't have a reliable C string length.
768     // In this case, an error is emitted and UndefinedVal is returned.
769     // The caller should always be prepared to handle this case.
770     if (!Filter.CheckCStringNotNullTerm)
771       return UndefinedVal();
772 
773     if (ExplodedNode *N = C.addTransition(state)) {
774       if (!BT_NotCString)
775         BT_NotCString.reset(new BuiltinBug(
776             Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
777             "Argument is not a null-terminated string."));
778 
779       SmallString<120> buf;
780       llvm::raw_svector_ostream os(buf);
781 
782       assert(CurrentFunctionDescription);
783       os << "Argument to " << CurrentFunctionDescription << " is ";
784 
785       if (SummarizeRegion(os, C.getASTContext(), MR))
786         os << ", which is not a null-terminated string";
787       else
788         os << "not a null-terminated string";
789 
790       // Generate a report for this bug.
791       BugReport *report = new BugReport(*BT_NotCString,
792                                                         os.str(), N);
793 
794       report->addRange(Ex->getSourceRange());
795       C.emitReport(report);
796     }
797 
798     return UndefinedVal();
799   }
800 }
801 
802 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
803   ProgramStateRef &state, const Expr *expr, SVal val) const {
804 
805   // Get the memory region pointed to by the val.
806   const MemRegion *bufRegion = val.getAsRegion();
807   if (!bufRegion)
808     return NULL;
809 
810   // Strip casts off the memory region.
811   bufRegion = bufRegion->StripCasts();
812 
813   // Cast the memory region to a string region.
814   const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
815   if (!strRegion)
816     return NULL;
817 
818   // Return the actual string in the string region.
819   return strRegion->getStringLiteral();
820 }
821 
822 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
823                                                  ProgramStateRef state,
824                                                  const Expr *E, SVal V,
825                                                  bool IsSourceBuffer) {
826   Optional<Loc> L = V.getAs<Loc>();
827   if (!L)
828     return state;
829 
830   // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
831   // some assumptions about the value that CFRefCount can't. Even so, it should
832   // probably be refactored.
833   if (Optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
834     const MemRegion *R = MR->getRegion()->StripCasts();
835 
836     // Are we dealing with an ElementRegion?  If so, we should be invalidating
837     // the super-region.
838     if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
839       R = ER->getSuperRegion();
840       // FIXME: What about layers of ElementRegions?
841     }
842 
843     // Invalidate this region.
844     const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
845 
846     bool CausesPointerEscape = false;
847     RegionAndSymbolInvalidationTraits ITraits;
848     // Invalidate and escape only indirect regions accessible through the source
849     // buffer.
850     if (IsSourceBuffer) {
851       ITraits.setTrait(R,
852                        RegionAndSymbolInvalidationTraits::TK_PreserveContents);
853       ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
854       CausesPointerEscape = true;
855     }
856 
857     return state->invalidateRegions(R, E, C.blockCount(), LCtx,
858                                     CausesPointerEscape, 0, 0, &ITraits);
859   }
860 
861   // If we have a non-region value by chance, just remove the binding.
862   // FIXME: is this necessary or correct? This handles the non-Region
863   //  cases.  Is it ever valid to store to these?
864   return state->killBinding(*L);
865 }
866 
867 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
868                                      const MemRegion *MR) {
869   const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
870 
871   switch (MR->getKind()) {
872   case MemRegion::FunctionTextRegionKind: {
873     const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl();
874     if (FD)
875       os << "the address of the function '" << *FD << '\'';
876     else
877       os << "the address of a function";
878     return true;
879   }
880   case MemRegion::BlockTextRegionKind:
881     os << "block text";
882     return true;
883   case MemRegion::BlockDataRegionKind:
884     os << "a block";
885     return true;
886   case MemRegion::CXXThisRegionKind:
887   case MemRegion::CXXTempObjectRegionKind:
888     os << "a C++ temp object of type " << TVR->getValueType().getAsString();
889     return true;
890   case MemRegion::VarRegionKind:
891     os << "a variable of type" << TVR->getValueType().getAsString();
892     return true;
893   case MemRegion::FieldRegionKind:
894     os << "a field of type " << TVR->getValueType().getAsString();
895     return true;
896   case MemRegion::ObjCIvarRegionKind:
897     os << "an instance variable of type " << TVR->getValueType().getAsString();
898     return true;
899   default:
900     return false;
901   }
902 }
903 
904 //===----------------------------------------------------------------------===//
905 // evaluation of individual function calls.
906 //===----------------------------------------------------------------------===//
907 
908 void CStringChecker::evalCopyCommon(CheckerContext &C,
909                                     const CallExpr *CE,
910                                     ProgramStateRef state,
911                                     const Expr *Size, const Expr *Dest,
912                                     const Expr *Source, bool Restricted,
913                                     bool IsMempcpy) const {
914   CurrentFunctionDescription = "memory copy function";
915 
916   // See if the size argument is zero.
917   const LocationContext *LCtx = C.getLocationContext();
918   SVal sizeVal = state->getSVal(Size, LCtx);
919   QualType sizeTy = Size->getType();
920 
921   ProgramStateRef stateZeroSize, stateNonZeroSize;
922   llvm::tie(stateZeroSize, stateNonZeroSize) =
923     assumeZero(C, state, sizeVal, sizeTy);
924 
925   // Get the value of the Dest.
926   SVal destVal = state->getSVal(Dest, LCtx);
927 
928   // If the size is zero, there won't be any actual memory access, so
929   // just bind the return value to the destination buffer and return.
930   if (stateZeroSize && !stateNonZeroSize) {
931     stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
932     C.addTransition(stateZeroSize);
933     return;
934   }
935 
936   // If the size can be nonzero, we have to check the other arguments.
937   if (stateNonZeroSize) {
938     state = stateNonZeroSize;
939 
940     // Ensure the destination is not null. If it is NULL there will be a
941     // NULL pointer dereference.
942     state = checkNonNull(C, state, Dest, destVal);
943     if (!state)
944       return;
945 
946     // Get the value of the Src.
947     SVal srcVal = state->getSVal(Source, LCtx);
948 
949     // Ensure the source is not null. If it is NULL there will be a
950     // NULL pointer dereference.
951     state = checkNonNull(C, state, Source, srcVal);
952     if (!state)
953       return;
954 
955     // Ensure the accesses are valid and that the buffers do not overlap.
956     const char * const writeWarning =
957       "Memory copy function overflows destination buffer";
958     state = CheckBufferAccess(C, state, Size, Dest, Source,
959                               writeWarning, /* sourceWarning = */ NULL);
960     if (Restricted)
961       state = CheckOverlap(C, state, Size, Dest, Source);
962 
963     if (!state)
964       return;
965 
966     // If this is mempcpy, get the byte after the last byte copied and
967     // bind the expr.
968     if (IsMempcpy) {
969       loc::MemRegionVal destRegVal = destVal.castAs<loc::MemRegionVal>();
970 
971       // Get the length to copy.
972       if (Optional<NonLoc> lenValNonLoc = sizeVal.getAs<NonLoc>()) {
973         // Get the byte after the last byte copied.
974         SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
975                                                           destRegVal,
976                                                           *lenValNonLoc,
977                                                           Dest->getType());
978 
979         // The byte after the last byte copied is the return value.
980         state = state->BindExpr(CE, LCtx, lastElement);
981       } else {
982         // If we don't know how much we copied, we can at least
983         // conjure a return value for later.
984         SVal result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx,
985                                                           C.blockCount());
986         state = state->BindExpr(CE, LCtx, result);
987       }
988 
989     } else {
990       // All other copies return the destination buffer.
991       // (Well, bcopy() has a void return type, but this won't hurt.)
992       state = state->BindExpr(CE, LCtx, destVal);
993     }
994 
995     // Invalidate the destination (regular invalidation without pointer-escaping
996     // the address of the top-level region).
997     // FIXME: Even if we can't perfectly model the copy, we should see if we
998     // can use LazyCompoundVals to copy the source values into the destination.
999     // This would probably remove any existing bindings past the end of the
1000     // copied region, but that's still an improvement over blank invalidation.
1001     state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest),
1002                              /*IsSourceBuffer*/false);
1003 
1004     // Invalidate the source (const-invalidation without const-pointer-escaping
1005     // the address of the top-level region).
1006     state = InvalidateBuffer(C, state, Source, C.getSVal(Source),
1007                              /*IsSourceBuffer*/true);
1008 
1009     C.addTransition(state);
1010   }
1011 }
1012 
1013 
1014 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
1015   if (CE->getNumArgs() < 3)
1016     return;
1017 
1018   // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1019   // The return value is the address of the destination buffer.
1020   const Expr *Dest = CE->getArg(0);
1021   ProgramStateRef state = C.getState();
1022 
1023   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
1024 }
1025 
1026 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
1027   if (CE->getNumArgs() < 3)
1028     return;
1029 
1030   // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1031   // The return value is a pointer to the byte following the last written byte.
1032   const Expr *Dest = CE->getArg(0);
1033   ProgramStateRef state = C.getState();
1034 
1035   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
1036 }
1037 
1038 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
1039   if (CE->getNumArgs() < 3)
1040     return;
1041 
1042   // void *memmove(void *dst, const void *src, size_t n);
1043   // The return value is the address of the destination buffer.
1044   const Expr *Dest = CE->getArg(0);
1045   ProgramStateRef state = C.getState();
1046 
1047   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
1048 }
1049 
1050 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
1051   if (CE->getNumArgs() < 3)
1052     return;
1053 
1054   // void bcopy(const void *src, void *dst, size_t n);
1055   evalCopyCommon(C, CE, C.getState(),
1056                  CE->getArg(2), CE->getArg(1), CE->getArg(0));
1057 }
1058 
1059 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
1060   if (CE->getNumArgs() < 3)
1061     return;
1062 
1063   // int memcmp(const void *s1, const void *s2, size_t n);
1064   CurrentFunctionDescription = "memory comparison function";
1065 
1066   const Expr *Left = CE->getArg(0);
1067   const Expr *Right = CE->getArg(1);
1068   const Expr *Size = CE->getArg(2);
1069 
1070   ProgramStateRef state = C.getState();
1071   SValBuilder &svalBuilder = C.getSValBuilder();
1072 
1073   // See if the size argument is zero.
1074   const LocationContext *LCtx = C.getLocationContext();
1075   SVal sizeVal = state->getSVal(Size, LCtx);
1076   QualType sizeTy = Size->getType();
1077 
1078   ProgramStateRef stateZeroSize, stateNonZeroSize;
1079   llvm::tie(stateZeroSize, stateNonZeroSize) =
1080     assumeZero(C, state, sizeVal, sizeTy);
1081 
1082   // If the size can be zero, the result will be 0 in that case, and we don't
1083   // have to check either of the buffers.
1084   if (stateZeroSize) {
1085     state = stateZeroSize;
1086     state = state->BindExpr(CE, LCtx,
1087                             svalBuilder.makeZeroVal(CE->getType()));
1088     C.addTransition(state);
1089   }
1090 
1091   // If the size can be nonzero, we have to check the other arguments.
1092   if (stateNonZeroSize) {
1093     state = stateNonZeroSize;
1094     // If we know the two buffers are the same, we know the result is 0.
1095     // First, get the two buffers' addresses. Another checker will have already
1096     // made sure they're not undefined.
1097     DefinedOrUnknownSVal LV =
1098         state->getSVal(Left, LCtx).castAs<DefinedOrUnknownSVal>();
1099     DefinedOrUnknownSVal RV =
1100         state->getSVal(Right, LCtx).castAs<DefinedOrUnknownSVal>();
1101 
1102     // See if they are the same.
1103     DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1104     ProgramStateRef StSameBuf, StNotSameBuf;
1105     llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1106 
1107     // If the two arguments might be the same buffer, we know the result is 0,
1108     // and we only need to check one size.
1109     if (StSameBuf) {
1110       state = StSameBuf;
1111       state = CheckBufferAccess(C, state, Size, Left);
1112       if (state) {
1113         state = StSameBuf->BindExpr(CE, LCtx,
1114                                     svalBuilder.makeZeroVal(CE->getType()));
1115         C.addTransition(state);
1116       }
1117     }
1118 
1119     // If the two arguments might be different buffers, we have to check the
1120     // size of both of them.
1121     if (StNotSameBuf) {
1122       state = StNotSameBuf;
1123       state = CheckBufferAccess(C, state, Size, Left, Right);
1124       if (state) {
1125         // The return value is the comparison result, which we don't know.
1126         SVal CmpV = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1127         state = state->BindExpr(CE, LCtx, CmpV);
1128         C.addTransition(state);
1129       }
1130     }
1131   }
1132 }
1133 
1134 void CStringChecker::evalstrLength(CheckerContext &C,
1135                                    const CallExpr *CE) const {
1136   if (CE->getNumArgs() < 1)
1137     return;
1138 
1139   // size_t strlen(const char *s);
1140   evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
1141 }
1142 
1143 void CStringChecker::evalstrnLength(CheckerContext &C,
1144                                     const CallExpr *CE) const {
1145   if (CE->getNumArgs() < 2)
1146     return;
1147 
1148   // size_t strnlen(const char *s, size_t maxlen);
1149   evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
1150 }
1151 
1152 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
1153                                          bool IsStrnlen) const {
1154   CurrentFunctionDescription = "string length function";
1155   ProgramStateRef state = C.getState();
1156   const LocationContext *LCtx = C.getLocationContext();
1157 
1158   if (IsStrnlen) {
1159     const Expr *maxlenExpr = CE->getArg(1);
1160     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1161 
1162     ProgramStateRef stateZeroSize, stateNonZeroSize;
1163     llvm::tie(stateZeroSize, stateNonZeroSize) =
1164       assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1165 
1166     // If the size can be zero, the result will be 0 in that case, and we don't
1167     // have to check the string itself.
1168     if (stateZeroSize) {
1169       SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1170       stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
1171       C.addTransition(stateZeroSize);
1172     }
1173 
1174     // If the size is GUARANTEED to be zero, we're done!
1175     if (!stateNonZeroSize)
1176       return;
1177 
1178     // Otherwise, record the assumption that the size is nonzero.
1179     state = stateNonZeroSize;
1180   }
1181 
1182   // Check that the string argument is non-null.
1183   const Expr *Arg = CE->getArg(0);
1184   SVal ArgVal = state->getSVal(Arg, LCtx);
1185 
1186   state = checkNonNull(C, state, Arg, ArgVal);
1187 
1188   if (!state)
1189     return;
1190 
1191   SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1192 
1193   // If the argument isn't a valid C string, there's no valid state to
1194   // transition to.
1195   if (strLength.isUndef())
1196     return;
1197 
1198   DefinedOrUnknownSVal result = UnknownVal();
1199 
1200   // If the check is for strnlen() then bind the return value to no more than
1201   // the maxlen value.
1202   if (IsStrnlen) {
1203     QualType cmpTy = C.getSValBuilder().getConditionType();
1204 
1205     // It's a little unfortunate to be getting this again,
1206     // but it's not that expensive...
1207     const Expr *maxlenExpr = CE->getArg(1);
1208     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1209 
1210     Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1211     Optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1212 
1213     if (strLengthNL && maxlenValNL) {
1214       ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1215 
1216       // Check if the strLength is greater than the maxlen.
1217       llvm::tie(stateStringTooLong, stateStringNotTooLong) =
1218           state->assume(C.getSValBuilder().evalBinOpNN(
1219               state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1220                             .castAs<DefinedOrUnknownSVal>());
1221 
1222       if (stateStringTooLong && !stateStringNotTooLong) {
1223         // If the string is longer than maxlen, return maxlen.
1224         result = *maxlenValNL;
1225       } else if (stateStringNotTooLong && !stateStringTooLong) {
1226         // If the string is shorter than maxlen, return its length.
1227         result = *strLengthNL;
1228       }
1229     }
1230 
1231     if (result.isUnknown()) {
1232       // If we don't have enough information for a comparison, there's
1233       // no guarantee the full string length will actually be returned.
1234       // All we know is the return value is the min of the string length
1235       // and the limit. This is better than nothing.
1236       result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount());
1237       NonLoc resultNL = result.castAs<NonLoc>();
1238 
1239       if (strLengthNL) {
1240         state = state->assume(C.getSValBuilder().evalBinOpNN(
1241                                   state, BO_LE, resultNL, *strLengthNL, cmpTy)
1242                                   .castAs<DefinedOrUnknownSVal>(), true);
1243       }
1244 
1245       if (maxlenValNL) {
1246         state = state->assume(C.getSValBuilder().evalBinOpNN(
1247                                   state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1248                                   .castAs<DefinedOrUnknownSVal>(), true);
1249       }
1250     }
1251 
1252   } else {
1253     // This is a plain strlen(), not strnlen().
1254     result = strLength.castAs<DefinedOrUnknownSVal>();
1255 
1256     // If we don't know the length of the string, conjure a return
1257     // value, so it can be used in constraints, at least.
1258     if (result.isUnknown()) {
1259       result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount());
1260     }
1261   }
1262 
1263   // Bind the return value.
1264   assert(!result.isUnknown() && "Should have conjured a value by now");
1265   state = state->BindExpr(CE, LCtx, result);
1266   C.addTransition(state);
1267 }
1268 
1269 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1270   if (CE->getNumArgs() < 2)
1271     return;
1272 
1273   // char *strcpy(char *restrict dst, const char *restrict src);
1274   evalStrcpyCommon(C, CE,
1275                    /* returnEnd = */ false,
1276                    /* isBounded = */ false,
1277                    /* isAppending = */ false);
1278 }
1279 
1280 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1281   if (CE->getNumArgs() < 3)
1282     return;
1283 
1284   // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1285   evalStrcpyCommon(C, CE,
1286                    /* returnEnd = */ false,
1287                    /* isBounded = */ true,
1288                    /* isAppending = */ false);
1289 }
1290 
1291 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1292   if (CE->getNumArgs() < 2)
1293     return;
1294 
1295   // char *stpcpy(char *restrict dst, const char *restrict src);
1296   evalStrcpyCommon(C, CE,
1297                    /* returnEnd = */ true,
1298                    /* isBounded = */ false,
1299                    /* isAppending = */ false);
1300 }
1301 
1302 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1303   if (CE->getNumArgs() < 2)
1304     return;
1305 
1306   //char *strcat(char *restrict s1, const char *restrict s2);
1307   evalStrcpyCommon(C, CE,
1308                    /* returnEnd = */ false,
1309                    /* isBounded = */ false,
1310                    /* isAppending = */ true);
1311 }
1312 
1313 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1314   if (CE->getNumArgs() < 3)
1315     return;
1316 
1317   //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1318   evalStrcpyCommon(C, CE,
1319                    /* returnEnd = */ false,
1320                    /* isBounded = */ true,
1321                    /* isAppending = */ true);
1322 }
1323 
1324 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1325                                       bool returnEnd, bool isBounded,
1326                                       bool isAppending) const {
1327   CurrentFunctionDescription = "string copy function";
1328   ProgramStateRef state = C.getState();
1329   const LocationContext *LCtx = C.getLocationContext();
1330 
1331   // Check that the destination is non-null.
1332   const Expr *Dst = CE->getArg(0);
1333   SVal DstVal = state->getSVal(Dst, LCtx);
1334 
1335   state = checkNonNull(C, state, Dst, DstVal);
1336   if (!state)
1337     return;
1338 
1339   // Check that the source is non-null.
1340   const Expr *srcExpr = CE->getArg(1);
1341   SVal srcVal = state->getSVal(srcExpr, LCtx);
1342   state = checkNonNull(C, state, srcExpr, srcVal);
1343   if (!state)
1344     return;
1345 
1346   // Get the string length of the source.
1347   SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1348 
1349   // If the source isn't a valid C string, give up.
1350   if (strLength.isUndef())
1351     return;
1352 
1353   SValBuilder &svalBuilder = C.getSValBuilder();
1354   QualType cmpTy = svalBuilder.getConditionType();
1355   QualType sizeTy = svalBuilder.getContext().getSizeType();
1356 
1357   // These two values allow checking two kinds of errors:
1358   // - actual overflows caused by a source that doesn't fit in the destination
1359   // - potential overflows caused by a bound that could exceed the destination
1360   SVal amountCopied = UnknownVal();
1361   SVal maxLastElementIndex = UnknownVal();
1362   const char *boundWarning = NULL;
1363 
1364   // If the function is strncpy, strncat, etc... it is bounded.
1365   if (isBounded) {
1366     // Get the max number of characters to copy.
1367     const Expr *lenExpr = CE->getArg(2);
1368     SVal lenVal = state->getSVal(lenExpr, LCtx);
1369 
1370     // Protect against misdeclared strncpy().
1371     lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType());
1372 
1373     Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1374     Optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1375 
1376     // If we know both values, we might be able to figure out how much
1377     // we're copying.
1378     if (strLengthNL && lenValNL) {
1379       ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1380 
1381       // Check if the max number to copy is less than the length of the src.
1382       // If the bound is equal to the source length, strncpy won't null-
1383       // terminate the result!
1384       llvm::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1385           svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1386               .castAs<DefinedOrUnknownSVal>());
1387 
1388       if (stateSourceTooLong && !stateSourceNotTooLong) {
1389         // Max number to copy is less than the length of the src, so the actual
1390         // strLength copied is the max number arg.
1391         state = stateSourceTooLong;
1392         amountCopied = lenVal;
1393 
1394       } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1395         // The source buffer entirely fits in the bound.
1396         state = stateSourceNotTooLong;
1397         amountCopied = strLength;
1398       }
1399     }
1400 
1401     // We still want to know if the bound is known to be too large.
1402     if (lenValNL) {
1403       if (isAppending) {
1404         // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1405 
1406         // Get the string length of the destination. If the destination is
1407         // memory that can't have a string length, we shouldn't be copying
1408         // into it anyway.
1409         SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1410         if (dstStrLength.isUndef())
1411           return;
1412 
1413         if (Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>()) {
1414           maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add,
1415                                                         *lenValNL,
1416                                                         *dstStrLengthNL,
1417                                                         sizeTy);
1418           boundWarning = "Size argument is greater than the free space in the "
1419                          "destination buffer";
1420         }
1421 
1422       } else {
1423         // For strncpy, this is just checking that lenVal <= sizeof(dst)
1424         // (Yes, strncpy and strncat differ in how they treat termination.
1425         // strncat ALWAYS terminates, but strncpy doesn't.)
1426 
1427         // We need a special case for when the copy size is zero, in which
1428         // case strncpy will do no work at all. Our bounds check uses n-1
1429         // as the last element accessed, so n == 0 is problematic.
1430         ProgramStateRef StateZeroSize, StateNonZeroSize;
1431         llvm::tie(StateZeroSize, StateNonZeroSize) =
1432           assumeZero(C, state, *lenValNL, sizeTy);
1433 
1434         // If the size is known to be zero, we're done.
1435         if (StateZeroSize && !StateNonZeroSize) {
1436           StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
1437           C.addTransition(StateZeroSize);
1438           return;
1439         }
1440 
1441         // Otherwise, go ahead and figure out the last element we'll touch.
1442         // We don't record the non-zero assumption here because we can't
1443         // be sure. We won't warn on a possible zero.
1444         NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
1445         maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1446                                                       one, sizeTy);
1447         boundWarning = "Size argument is greater than the length of the "
1448                        "destination buffer";
1449       }
1450     }
1451 
1452     // If we couldn't pin down the copy length, at least bound it.
1453     // FIXME: We should actually run this code path for append as well, but
1454     // right now it creates problems with constraints (since we can end up
1455     // trying to pass constraints from symbol to symbol).
1456     if (amountCopied.isUnknown() && !isAppending) {
1457       // Try to get a "hypothetical" string length symbol, which we can later
1458       // set as a real value if that turns out to be the case.
1459       amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1460       assert(!amountCopied.isUndef());
1461 
1462       if (Optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>()) {
1463         if (lenValNL) {
1464           // amountCopied <= lenVal
1465           SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1466                                                              *amountCopiedNL,
1467                                                              *lenValNL,
1468                                                              cmpTy);
1469           state = state->assume(
1470               copiedLessThanBound.castAs<DefinedOrUnknownSVal>(), true);
1471           if (!state)
1472             return;
1473         }
1474 
1475         if (strLengthNL) {
1476           // amountCopied <= strlen(source)
1477           SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1478                                                            *amountCopiedNL,
1479                                                            *strLengthNL,
1480                                                            cmpTy);
1481           state = state->assume(
1482               copiedLessThanSrc.castAs<DefinedOrUnknownSVal>(), true);
1483           if (!state)
1484             return;
1485         }
1486       }
1487     }
1488 
1489   } else {
1490     // The function isn't bounded. The amount copied should match the length
1491     // of the source buffer.
1492     amountCopied = strLength;
1493   }
1494 
1495   assert(state);
1496 
1497   // This represents the number of characters copied into the destination
1498   // buffer. (It may not actually be the strlen if the destination buffer
1499   // is not terminated.)
1500   SVal finalStrLength = UnknownVal();
1501 
1502   // If this is an appending function (strcat, strncat...) then set the
1503   // string length to strlen(src) + strlen(dst) since the buffer will
1504   // ultimately contain both.
1505   if (isAppending) {
1506     // Get the string length of the destination. If the destination is memory
1507     // that can't have a string length, we shouldn't be copying into it anyway.
1508     SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1509     if (dstStrLength.isUndef())
1510       return;
1511 
1512     Optional<NonLoc> srcStrLengthNL = amountCopied.getAs<NonLoc>();
1513     Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1514 
1515     // If we know both string lengths, we might know the final string length.
1516     if (srcStrLengthNL && dstStrLengthNL) {
1517       // Make sure the two lengths together don't overflow a size_t.
1518       state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1519       if (!state)
1520         return;
1521 
1522       finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1523                                                *dstStrLengthNL, sizeTy);
1524     }
1525 
1526     // If we couldn't get a single value for the final string length,
1527     // we can at least bound it by the individual lengths.
1528     if (finalStrLength.isUnknown()) {
1529       // Try to get a "hypothetical" string length symbol, which we can later
1530       // set as a real value if that turns out to be the case.
1531       finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1532       assert(!finalStrLength.isUndef());
1533 
1534       if (Optional<NonLoc> finalStrLengthNL = finalStrLength.getAs<NonLoc>()) {
1535         if (srcStrLengthNL) {
1536           // finalStrLength >= srcStrLength
1537           SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1538                                                         *finalStrLengthNL,
1539                                                         *srcStrLengthNL,
1540                                                         cmpTy);
1541           state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
1542                                 true);
1543           if (!state)
1544             return;
1545         }
1546 
1547         if (dstStrLengthNL) {
1548           // finalStrLength >= dstStrLength
1549           SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1550                                                       *finalStrLengthNL,
1551                                                       *dstStrLengthNL,
1552                                                       cmpTy);
1553           state =
1554               state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
1555           if (!state)
1556             return;
1557         }
1558       }
1559     }
1560 
1561   } else {
1562     // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1563     // the final string length will match the input string length.
1564     finalStrLength = amountCopied;
1565   }
1566 
1567   // The final result of the function will either be a pointer past the last
1568   // copied element, or a pointer to the start of the destination buffer.
1569   SVal Result = (returnEnd ? UnknownVal() : DstVal);
1570 
1571   assert(state);
1572 
1573   // If the destination is a MemRegion, try to check for a buffer overflow and
1574   // record the new string length.
1575   if (Optional<loc::MemRegionVal> dstRegVal =
1576           DstVal.getAs<loc::MemRegionVal>()) {
1577     QualType ptrTy = Dst->getType();
1578 
1579     // If we have an exact value on a bounded copy, use that to check for
1580     // overflows, rather than our estimate about how much is actually copied.
1581     if (boundWarning) {
1582       if (Optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
1583         SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1584                                                       *maxLastNL, ptrTy);
1585         state = CheckLocation(C, state, CE->getArg(2), maxLastElement,
1586                               boundWarning);
1587         if (!state)
1588           return;
1589       }
1590     }
1591 
1592     // Then, if the final length is known...
1593     if (Optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
1594       SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1595                                                  *knownStrLength, ptrTy);
1596 
1597       // ...and we haven't checked the bound, we'll check the actual copy.
1598       if (!boundWarning) {
1599         const char * const warningMsg =
1600           "String copy function overflows destination buffer";
1601         state = CheckLocation(C, state, Dst, lastElement, warningMsg);
1602         if (!state)
1603           return;
1604       }
1605 
1606       // If this is a stpcpy-style copy, the last element is the return value.
1607       if (returnEnd)
1608         Result = lastElement;
1609     }
1610 
1611     // Invalidate the destination (regular invalidation without pointer-escaping
1612     // the address of the top-level region). This must happen before we set the
1613     // C string length because invalidation will clear the length.
1614     // FIXME: Even if we can't perfectly model the copy, we should see if we
1615     // can use LazyCompoundVals to copy the source values into the destination.
1616     // This would probably remove any existing bindings past the end of the
1617     // string, but that's still an improvement over blank invalidation.
1618     state = InvalidateBuffer(C, state, Dst, *dstRegVal,
1619                              /*IsSourceBuffer*/false);
1620 
1621     // Invalidate the source (const-invalidation without const-pointer-escaping
1622     // the address of the top-level region).
1623     state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true);
1624 
1625     // Set the C string length of the destination, if we know it.
1626     if (isBounded && !isAppending) {
1627       // strncpy is annoying in that it doesn't guarantee to null-terminate
1628       // the result string. If the original string didn't fit entirely inside
1629       // the bound (including the null-terminator), we don't know how long the
1630       // result is.
1631       if (amountCopied != strLength)
1632         finalStrLength = UnknownVal();
1633     }
1634     state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1635   }
1636 
1637   assert(state);
1638 
1639   // If this is a stpcpy-style copy, but we were unable to check for a buffer
1640   // overflow, we still need a result. Conjure a return value.
1641   if (returnEnd && Result.isUnknown()) {
1642     Result = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1643   }
1644 
1645   // Set the return value.
1646   state = state->BindExpr(CE, LCtx, Result);
1647   C.addTransition(state);
1648 }
1649 
1650 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1651   if (CE->getNumArgs() < 2)
1652     return;
1653 
1654   //int strcmp(const char *s1, const char *s2);
1655   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1656 }
1657 
1658 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1659   if (CE->getNumArgs() < 3)
1660     return;
1661 
1662   //int strncmp(const char *s1, const char *s2, size_t n);
1663   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1664 }
1665 
1666 void CStringChecker::evalStrcasecmp(CheckerContext &C,
1667                                     const CallExpr *CE) const {
1668   if (CE->getNumArgs() < 2)
1669     return;
1670 
1671   //int strcasecmp(const char *s1, const char *s2);
1672   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1673 }
1674 
1675 void CStringChecker::evalStrncasecmp(CheckerContext &C,
1676                                      const CallExpr *CE) const {
1677   if (CE->getNumArgs() < 3)
1678     return;
1679 
1680   //int strncasecmp(const char *s1, const char *s2, size_t n);
1681   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1682 }
1683 
1684 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1685                                       bool isBounded, bool ignoreCase) const {
1686   CurrentFunctionDescription = "string comparison function";
1687   ProgramStateRef state = C.getState();
1688   const LocationContext *LCtx = C.getLocationContext();
1689 
1690   // Check that the first string is non-null
1691   const Expr *s1 = CE->getArg(0);
1692   SVal s1Val = state->getSVal(s1, LCtx);
1693   state = checkNonNull(C, state, s1, s1Val);
1694   if (!state)
1695     return;
1696 
1697   // Check that the second string is non-null.
1698   const Expr *s2 = CE->getArg(1);
1699   SVal s2Val = state->getSVal(s2, LCtx);
1700   state = checkNonNull(C, state, s2, s2Val);
1701   if (!state)
1702     return;
1703 
1704   // Get the string length of the first string or give up.
1705   SVal s1Length = getCStringLength(C, state, s1, s1Val);
1706   if (s1Length.isUndef())
1707     return;
1708 
1709   // Get the string length of the second string or give up.
1710   SVal s2Length = getCStringLength(C, state, s2, s2Val);
1711   if (s2Length.isUndef())
1712     return;
1713 
1714   // If we know the two buffers are the same, we know the result is 0.
1715   // First, get the two buffers' addresses. Another checker will have already
1716   // made sure they're not undefined.
1717   DefinedOrUnknownSVal LV = s1Val.castAs<DefinedOrUnknownSVal>();
1718   DefinedOrUnknownSVal RV = s2Val.castAs<DefinedOrUnknownSVal>();
1719 
1720   // See if they are the same.
1721   SValBuilder &svalBuilder = C.getSValBuilder();
1722   DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1723   ProgramStateRef StSameBuf, StNotSameBuf;
1724   llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1725 
1726   // If the two arguments might be the same buffer, we know the result is 0,
1727   // and we only need to check one size.
1728   if (StSameBuf) {
1729     StSameBuf = StSameBuf->BindExpr(CE, LCtx,
1730                                     svalBuilder.makeZeroVal(CE->getType()));
1731     C.addTransition(StSameBuf);
1732 
1733     // If the two arguments are GUARANTEED to be the same, we're done!
1734     if (!StNotSameBuf)
1735       return;
1736   }
1737 
1738   assert(StNotSameBuf);
1739   state = StNotSameBuf;
1740 
1741   // At this point we can go about comparing the two buffers.
1742   // For now, we only do this if they're both known string literals.
1743 
1744   // Attempt to extract string literals from both expressions.
1745   const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1746   const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1747   bool canComputeResult = false;
1748 
1749   if (s1StrLiteral && s2StrLiteral) {
1750     StringRef s1StrRef = s1StrLiteral->getString();
1751     StringRef s2StrRef = s2StrLiteral->getString();
1752 
1753     if (isBounded) {
1754       // Get the max number of characters to compare.
1755       const Expr *lenExpr = CE->getArg(2);
1756       SVal lenVal = state->getSVal(lenExpr, LCtx);
1757 
1758       // If the length is known, we can get the right substrings.
1759       if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
1760         // Create substrings of each to compare the prefix.
1761         s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
1762         s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
1763         canComputeResult = true;
1764       }
1765     } else {
1766       // This is a normal, unbounded strcmp.
1767       canComputeResult = true;
1768     }
1769 
1770     if (canComputeResult) {
1771       // Real strcmp stops at null characters.
1772       size_t s1Term = s1StrRef.find('\0');
1773       if (s1Term != StringRef::npos)
1774         s1StrRef = s1StrRef.substr(0, s1Term);
1775 
1776       size_t s2Term = s2StrRef.find('\0');
1777       if (s2Term != StringRef::npos)
1778         s2StrRef = s2StrRef.substr(0, s2Term);
1779 
1780       // Use StringRef's comparison methods to compute the actual result.
1781       int result;
1782 
1783       if (ignoreCase) {
1784         // Compare string 1 to string 2 the same way strcasecmp() does.
1785         result = s1StrRef.compare_lower(s2StrRef);
1786       } else {
1787         // Compare string 1 to string 2 the same way strcmp() does.
1788         result = s1StrRef.compare(s2StrRef);
1789       }
1790 
1791       // Build the SVal of the comparison and bind the return value.
1792       SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
1793       state = state->BindExpr(CE, LCtx, resultVal);
1794     }
1795   }
1796 
1797   if (!canComputeResult) {
1798     // Conjure a symbolic value. It's the best we can do.
1799     SVal resultVal = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1800     state = state->BindExpr(CE, LCtx, resultVal);
1801   }
1802 
1803   // Record this as a possible path.
1804   C.addTransition(state);
1805 }
1806 
1807 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
1808   //char *strsep(char **stringp, const char *delim);
1809   if (CE->getNumArgs() < 2)
1810     return;
1811 
1812   // Sanity: does the search string parameter match the return type?
1813   const Expr *SearchStrPtr = CE->getArg(0);
1814   QualType CharPtrTy = SearchStrPtr->getType()->getPointeeType();
1815   if (CharPtrTy.isNull() ||
1816       CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType())
1817     return;
1818 
1819   CurrentFunctionDescription = "strsep()";
1820   ProgramStateRef State = C.getState();
1821   const LocationContext *LCtx = C.getLocationContext();
1822 
1823   // Check that the search string pointer is non-null (though it may point to
1824   // a null string).
1825   SVal SearchStrVal = State->getSVal(SearchStrPtr, LCtx);
1826   State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
1827   if (!State)
1828     return;
1829 
1830   // Check that the delimiter string is non-null.
1831   const Expr *DelimStr = CE->getArg(1);
1832   SVal DelimStrVal = State->getSVal(DelimStr, LCtx);
1833   State = checkNonNull(C, State, DelimStr, DelimStrVal);
1834   if (!State)
1835     return;
1836 
1837   SValBuilder &SVB = C.getSValBuilder();
1838   SVal Result;
1839   if (Optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
1840     // Get the current value of the search string pointer, as a char*.
1841     Result = State->getSVal(*SearchStrLoc, CharPtrTy);
1842 
1843     // Invalidate the search string, representing the change of one delimiter
1844     // character to NUL.
1845     State = InvalidateBuffer(C, State, SearchStrPtr, Result,
1846                              /*IsSourceBuffer*/false);
1847 
1848     // Overwrite the search string pointer. The new value is either an address
1849     // further along in the same string, or NULL if there are no more tokens.
1850     State = State->bindLoc(*SearchStrLoc,
1851                            SVB.conjureSymbolVal(getTag(), CE, LCtx, CharPtrTy,
1852                                                 C.blockCount()));
1853   } else {
1854     assert(SearchStrVal.isUnknown());
1855     // Conjure a symbolic value. It's the best we can do.
1856     Result = SVB.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1857   }
1858 
1859   // Set the return value, and finish.
1860   State = State->BindExpr(CE, LCtx, Result);
1861   C.addTransition(State);
1862 }
1863 
1864 
1865 //===----------------------------------------------------------------------===//
1866 // The driver method, and other Checker callbacks.
1867 //===----------------------------------------------------------------------===//
1868 
1869 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
1870   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
1871 
1872   if (!FDecl)
1873     return false;
1874 
1875   // FIXME: Poorly-factored string switches are slow.
1876   FnCheck evalFunction = 0;
1877   if (C.isCLibraryFunction(FDecl, "memcpy"))
1878     evalFunction =  &CStringChecker::evalMemcpy;
1879   else if (C.isCLibraryFunction(FDecl, "mempcpy"))
1880     evalFunction =  &CStringChecker::evalMempcpy;
1881   else if (C.isCLibraryFunction(FDecl, "memcmp"))
1882     evalFunction =  &CStringChecker::evalMemcmp;
1883   else if (C.isCLibraryFunction(FDecl, "memmove"))
1884     evalFunction =  &CStringChecker::evalMemmove;
1885   else if (C.isCLibraryFunction(FDecl, "strcpy"))
1886     evalFunction =  &CStringChecker::evalStrcpy;
1887   else if (C.isCLibraryFunction(FDecl, "strncpy"))
1888     evalFunction =  &CStringChecker::evalStrncpy;
1889   else if (C.isCLibraryFunction(FDecl, "stpcpy"))
1890     evalFunction =  &CStringChecker::evalStpcpy;
1891   else if (C.isCLibraryFunction(FDecl, "strcat"))
1892     evalFunction =  &CStringChecker::evalStrcat;
1893   else if (C.isCLibraryFunction(FDecl, "strncat"))
1894     evalFunction =  &CStringChecker::evalStrncat;
1895   else if (C.isCLibraryFunction(FDecl, "strlen"))
1896     evalFunction =  &CStringChecker::evalstrLength;
1897   else if (C.isCLibraryFunction(FDecl, "strnlen"))
1898     evalFunction =  &CStringChecker::evalstrnLength;
1899   else if (C.isCLibraryFunction(FDecl, "strcmp"))
1900     evalFunction =  &CStringChecker::evalStrcmp;
1901   else if (C.isCLibraryFunction(FDecl, "strncmp"))
1902     evalFunction =  &CStringChecker::evalStrncmp;
1903   else if (C.isCLibraryFunction(FDecl, "strcasecmp"))
1904     evalFunction =  &CStringChecker::evalStrcasecmp;
1905   else if (C.isCLibraryFunction(FDecl, "strncasecmp"))
1906     evalFunction =  &CStringChecker::evalStrncasecmp;
1907   else if (C.isCLibraryFunction(FDecl, "strsep"))
1908     evalFunction =  &CStringChecker::evalStrsep;
1909   else if (C.isCLibraryFunction(FDecl, "bcopy"))
1910     evalFunction =  &CStringChecker::evalBcopy;
1911   else if (C.isCLibraryFunction(FDecl, "bcmp"))
1912     evalFunction =  &CStringChecker::evalMemcmp;
1913 
1914   // If the callee isn't a string function, let another checker handle it.
1915   if (!evalFunction)
1916     return false;
1917 
1918   // Make sure each function sets its own description.
1919   // (But don't bother in a release build.)
1920   assert(!(CurrentFunctionDescription = NULL));
1921 
1922   // Check and evaluate the call.
1923   (this->*evalFunction)(C, CE);
1924 
1925   // If the evaluate call resulted in no change, chain to the next eval call
1926   // handler.
1927   // Note, the custom CString evaluation calls assume that basic safety
1928   // properties are held. However, if the user chooses to turn off some of these
1929   // checks, we ignore the issues and leave the call evaluation to a generic
1930   // handler.
1931   if (!C.isDifferent())
1932     return false;
1933 
1934   return true;
1935 }
1936 
1937 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
1938   // Record string length for char a[] = "abc";
1939   ProgramStateRef state = C.getState();
1940 
1941   for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end();
1942        I != E; ++I) {
1943     const VarDecl *D = dyn_cast<VarDecl>(*I);
1944     if (!D)
1945       continue;
1946 
1947     // FIXME: Handle array fields of structs.
1948     if (!D->getType()->isArrayType())
1949       continue;
1950 
1951     const Expr *Init = D->getInit();
1952     if (!Init)
1953       continue;
1954     if (!isa<StringLiteral>(Init))
1955       continue;
1956 
1957     Loc VarLoc = state->getLValue(D, C.getLocationContext());
1958     const MemRegion *MR = VarLoc.getAsRegion();
1959     if (!MR)
1960       continue;
1961 
1962     SVal StrVal = state->getSVal(Init, C.getLocationContext());
1963     assert(StrVal.isValid() && "Initializer string is unknown or undefined");
1964     DefinedOrUnknownSVal strLength =
1965         getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
1966 
1967     state = state->set<CStringLength>(MR, strLength);
1968   }
1969 
1970   C.addTransition(state);
1971 }
1972 
1973 bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const {
1974   CStringLengthTy Entries = state->get<CStringLength>();
1975   return !Entries.isEmpty();
1976 }
1977 
1978 ProgramStateRef
1979 CStringChecker::checkRegionChanges(ProgramStateRef state,
1980                                    const InvalidatedSymbols *,
1981                                    ArrayRef<const MemRegion *> ExplicitRegions,
1982                                    ArrayRef<const MemRegion *> Regions,
1983                                    const CallEvent *Call) const {
1984   CStringLengthTy Entries = state->get<CStringLength>();
1985   if (Entries.isEmpty())
1986     return state;
1987 
1988   llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
1989   llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
1990 
1991   // First build sets for the changed regions and their super-regions.
1992   for (ArrayRef<const MemRegion *>::iterator
1993        I = Regions.begin(), E = Regions.end(); I != E; ++I) {
1994     const MemRegion *MR = *I;
1995     Invalidated.insert(MR);
1996 
1997     SuperRegions.insert(MR);
1998     while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
1999       MR = SR->getSuperRegion();
2000       SuperRegions.insert(MR);
2001     }
2002   }
2003 
2004   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2005 
2006   // Then loop over the entries in the current state.
2007   for (CStringLengthTy::iterator I = Entries.begin(),
2008        E = Entries.end(); I != E; ++I) {
2009     const MemRegion *MR = I.getKey();
2010 
2011     // Is this entry for a super-region of a changed region?
2012     if (SuperRegions.count(MR)) {
2013       Entries = F.remove(Entries, MR);
2014       continue;
2015     }
2016 
2017     // Is this entry for a sub-region of a changed region?
2018     const MemRegion *Super = MR;
2019     while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2020       Super = SR->getSuperRegion();
2021       if (Invalidated.count(Super)) {
2022         Entries = F.remove(Entries, MR);
2023         break;
2024       }
2025     }
2026   }
2027 
2028   return state->set<CStringLength>(Entries);
2029 }
2030 
2031 void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2032                                       SymbolReaper &SR) const {
2033   // Mark all symbols in our string length map as valid.
2034   CStringLengthTy Entries = state->get<CStringLength>();
2035 
2036   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2037        I != E; ++I) {
2038     SVal Len = I.getData();
2039 
2040     for (SymExpr::symbol_iterator si = Len.symbol_begin(),
2041                                   se = Len.symbol_end(); si != se; ++si)
2042       SR.markInUse(*si);
2043   }
2044 }
2045 
2046 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2047                                       CheckerContext &C) const {
2048   if (!SR.hasDeadSymbols())
2049     return;
2050 
2051   ProgramStateRef state = C.getState();
2052   CStringLengthTy Entries = state->get<CStringLength>();
2053   if (Entries.isEmpty())
2054     return;
2055 
2056   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2057   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2058        I != E; ++I) {
2059     SVal Len = I.getData();
2060     if (SymbolRef Sym = Len.getAsSymbol()) {
2061       if (SR.isDead(Sym))
2062         Entries = F.remove(Entries, I.getKey());
2063     }
2064   }
2065 
2066   state = state->set<CStringLength>(Entries);
2067   C.addTransition(state);
2068 }
2069 
2070 #define REGISTER_CHECKER(name)                                                 \
2071   void ento::register##name(CheckerManager &mgr) {                             \
2072     CStringChecker *checker = mgr.registerChecker<CStringChecker>();           \
2073     checker->Filter.Check##name = true;                                        \
2074     checker->Filter.CheckName##name = mgr.getCurrentCheckName();               \
2075   }
2076 
2077 REGISTER_CHECKER(CStringNullArg)
2078 REGISTER_CHECKER(CStringOutOfBounds)
2079 REGISTER_CHECKER(CStringBufferOverlap)
2080 REGISTER_CHECKER(CStringNotNullTerm)
2081 
2082 void ento::registerCStringCheckerBasic(CheckerManager &Mgr) {
2083   registerCStringNullArg(Mgr);
2084 }
2085