xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp (revision 6fe0f035bd1dd1034b341e5a740c725044f65b90)
1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This defines CStringChecker, which is an assortment of checks on calls
11 // to functions in <string.h>.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ClangSACheckers.h"
16 #include "InterCheckerAPI.h"
17 #include "clang/Basic/CharInfo.h"
18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
19 #include "clang/StaticAnalyzer/Core/Checker.h"
20 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 using namespace clang;
28 using namespace ento;
29 
30 namespace {
31 class CStringChecker : public Checker< eval::Call,
32                                          check::PreStmt<DeclStmt>,
33                                          check::LiveSymbols,
34                                          check::DeadSymbols,
35                                          check::RegionChanges
36                                          > {
37   mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
38       BT_NotCString, BT_AdditionOverflow;
39 
40   mutable const char *CurrentFunctionDescription;
41 
42 public:
43   /// The filter is used to filter out the diagnostics which are not enabled by
44   /// the user.
45   struct CStringChecksFilter {
46     DefaultBool CheckCStringNullArg;
47     DefaultBool CheckCStringOutOfBounds;
48     DefaultBool CheckCStringBufferOverlap;
49     DefaultBool CheckCStringNotNullTerm;
50 
51     CheckName CheckNameCStringNullArg;
52     CheckName CheckNameCStringOutOfBounds;
53     CheckName CheckNameCStringBufferOverlap;
54     CheckName CheckNameCStringNotNullTerm;
55   };
56 
57   CStringChecksFilter Filter;
58 
59   static void *getTag() { static int tag; return &tag; }
60 
61   bool evalCall(const CallExpr *CE, CheckerContext &C) const;
62   void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
63   void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
64   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
65 
66   ProgramStateRef
67     checkRegionChanges(ProgramStateRef state,
68                        const InvalidatedSymbols *,
69                        ArrayRef<const MemRegion *> ExplicitRegions,
70                        ArrayRef<const MemRegion *> Regions,
71                        const LocationContext *LCtx,
72                        const CallEvent *Call) const;
73 
74   typedef void (CStringChecker::*FnCheck)(CheckerContext &,
75                                           const CallExpr *) const;
76 
77   void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
78   void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
79   void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
80   void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
81   void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
82                       ProgramStateRef state,
83                       const Expr *Size,
84                       const Expr *Source,
85                       const Expr *Dest,
86                       bool Restricted = false,
87                       bool IsMempcpy = false) const;
88 
89   void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
90 
91   void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
92   void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
93   void evalstrLengthCommon(CheckerContext &C,
94                            const CallExpr *CE,
95                            bool IsStrnlen = false) const;
96 
97   void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
98   void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
99   void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
100   void evalStrcpyCommon(CheckerContext &C,
101                         const CallExpr *CE,
102                         bool returnEnd,
103                         bool isBounded,
104                         bool isAppending) const;
105 
106   void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
107   void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
108 
109   void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
110   void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
111   void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
112   void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
113   void evalStrcmpCommon(CheckerContext &C,
114                         const CallExpr *CE,
115                         bool isBounded = false,
116                         bool ignoreCase = false) const;
117 
118   void evalStrsep(CheckerContext &C, const CallExpr *CE) const;
119 
120   void evalStdCopy(CheckerContext &C, const CallExpr *CE) const;
121   void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const;
122   void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const;
123   void evalMemset(CheckerContext &C, const CallExpr *CE) const;
124 
125   // Utility methods
126   std::pair<ProgramStateRef , ProgramStateRef >
127   static assumeZero(CheckerContext &C,
128                     ProgramStateRef state, SVal V, QualType Ty);
129 
130   static ProgramStateRef setCStringLength(ProgramStateRef state,
131                                               const MemRegion *MR,
132                                               SVal strLength);
133   static SVal getCStringLengthForRegion(CheckerContext &C,
134                                         ProgramStateRef &state,
135                                         const Expr *Ex,
136                                         const MemRegion *MR,
137                                         bool hypothetical);
138   SVal getCStringLength(CheckerContext &C,
139                         ProgramStateRef &state,
140                         const Expr *Ex,
141                         SVal Buf,
142                         bool hypothetical = false) const;
143 
144   const StringLiteral *getCStringLiteral(CheckerContext &C,
145                                          ProgramStateRef &state,
146                                          const Expr *expr,
147                                          SVal val) const;
148 
149   static ProgramStateRef InvalidateBuffer(CheckerContext &C,
150                                           ProgramStateRef state,
151                                           const Expr *Ex, SVal V,
152                                           bool IsSourceBuffer,
153                                           const Expr *Size);
154 
155   static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
156                               const MemRegion *MR);
157 
158   // Re-usable checks
159   ProgramStateRef checkNonNull(CheckerContext &C,
160                                    ProgramStateRef state,
161                                    const Expr *S,
162                                    SVal l) const;
163   ProgramStateRef CheckLocation(CheckerContext &C,
164                                     ProgramStateRef state,
165                                     const Expr *S,
166                                     SVal l,
167                                     const char *message = nullptr) const;
168   ProgramStateRef CheckBufferAccess(CheckerContext &C,
169                                         ProgramStateRef state,
170                                         const Expr *Size,
171                                         const Expr *FirstBuf,
172                                         const Expr *SecondBuf,
173                                         const char *firstMessage = nullptr,
174                                         const char *secondMessage = nullptr,
175                                         bool WarnAboutSize = false) const;
176 
177   ProgramStateRef CheckBufferAccess(CheckerContext &C,
178                                         ProgramStateRef state,
179                                         const Expr *Size,
180                                         const Expr *Buf,
181                                         const char *message = nullptr,
182                                         bool WarnAboutSize = false) const {
183     // This is a convenience override.
184     return CheckBufferAccess(C, state, Size, Buf, nullptr, message, nullptr,
185                              WarnAboutSize);
186   }
187   ProgramStateRef CheckOverlap(CheckerContext &C,
188                                    ProgramStateRef state,
189                                    const Expr *Size,
190                                    const Expr *First,
191                                    const Expr *Second) const;
192   void emitOverlapBug(CheckerContext &C,
193                       ProgramStateRef state,
194                       const Stmt *First,
195                       const Stmt *Second) const;
196 
197   ProgramStateRef checkAdditionOverflow(CheckerContext &C,
198                                             ProgramStateRef state,
199                                             NonLoc left,
200                                             NonLoc right) const;
201 
202   // Return true if the destination buffer of the copy function may be in bound.
203   // Expects SVal of Size to be positive and unsigned.
204   // Expects SVal of FirstBuf to be a FieldRegion.
205   static bool IsFirstBufInBound(CheckerContext &C,
206                                 ProgramStateRef state,
207                                 const Expr *FirstBuf,
208                                 const Expr *Size);
209 };
210 
211 } //end anonymous namespace
212 
213 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
214 
215 //===----------------------------------------------------------------------===//
216 // Individual checks and utility methods.
217 //===----------------------------------------------------------------------===//
218 
219 std::pair<ProgramStateRef , ProgramStateRef >
220 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
221                            QualType Ty) {
222   Optional<DefinedSVal> val = V.getAs<DefinedSVal>();
223   if (!val)
224     return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
225 
226   SValBuilder &svalBuilder = C.getSValBuilder();
227   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
228   return state->assume(svalBuilder.evalEQ(state, *val, zero));
229 }
230 
231 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
232                                             ProgramStateRef state,
233                                             const Expr *S, SVal l) const {
234   // If a previous check has failed, propagate the failure.
235   if (!state)
236     return nullptr;
237 
238   ProgramStateRef stateNull, stateNonNull;
239   std::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
240 
241   if (stateNull && !stateNonNull) {
242     if (!Filter.CheckCStringNullArg)
243       return nullptr;
244 
245     ExplodedNode *N = C.generateErrorNode(stateNull);
246     if (!N)
247       return nullptr;
248 
249     if (!BT_Null)
250       BT_Null.reset(new BuiltinBug(
251           Filter.CheckNameCStringNullArg, categories::UnixAPI,
252           "Null pointer argument in call to byte string function"));
253 
254     SmallString<80> buf;
255     llvm::raw_svector_ostream os(buf);
256     assert(CurrentFunctionDescription);
257     os << "Null pointer argument in call to " << CurrentFunctionDescription;
258 
259     // Generate a report for this bug.
260     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
261     auto report = llvm::make_unique<BugReport>(*BT, os.str(), N);
262 
263     report->addRange(S->getSourceRange());
264     bugreporter::trackNullOrUndefValue(N, S, *report);
265     C.emitReport(std::move(report));
266     return nullptr;
267   }
268 
269   // From here on, assume that the value is non-null.
270   assert(stateNonNull);
271   return stateNonNull;
272 }
273 
274 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
275 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
276                                              ProgramStateRef state,
277                                              const Expr *S, SVal l,
278                                              const char *warningMsg) const {
279   // If a previous check has failed, propagate the failure.
280   if (!state)
281     return nullptr;
282 
283   // Check for out of bound array element access.
284   const MemRegion *R = l.getAsRegion();
285   if (!R)
286     return state;
287 
288   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
289   if (!ER)
290     return state;
291 
292   if (ER->getValueType() != C.getASTContext().CharTy)
293     return state;
294 
295   // Get the size of the array.
296   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
297   SValBuilder &svalBuilder = C.getSValBuilder();
298   SVal Extent =
299     svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
300   DefinedOrUnknownSVal Size = Extent.castAs<DefinedOrUnknownSVal>();
301 
302   // Get the index of the accessed element.
303   DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
304 
305   ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true);
306   ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false);
307   if (StOutBound && !StInBound) {
308     ExplodedNode *N = C.generateErrorNode(StOutBound);
309     if (!N)
310       return nullptr;
311 
312     CheckName Name;
313     // These checks are either enabled by the CString out-of-bounds checker
314     // explicitly or the "basic" CStringNullArg checker support that Malloc
315     // checker enables.
316     assert(Filter.CheckCStringOutOfBounds || Filter.CheckCStringNullArg);
317     if (Filter.CheckCStringOutOfBounds)
318       Name = Filter.CheckNameCStringOutOfBounds;
319     else
320       Name = Filter.CheckNameCStringNullArg;
321 
322     if (!BT_Bounds) {
323       BT_Bounds.reset(new BuiltinBug(
324           Name, "Out-of-bound array access",
325           "Byte string function accesses out-of-bound array element"));
326     }
327     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get());
328 
329     // Generate a report for this bug.
330     std::unique_ptr<BugReport> report;
331     if (warningMsg) {
332       report = llvm::make_unique<BugReport>(*BT, warningMsg, N);
333     } else {
334       assert(CurrentFunctionDescription);
335       assert(CurrentFunctionDescription[0] != '\0');
336 
337       SmallString<80> buf;
338       llvm::raw_svector_ostream os(buf);
339       os << toUppercase(CurrentFunctionDescription[0])
340          << &CurrentFunctionDescription[1]
341          << " accesses out-of-bound array element";
342       report = llvm::make_unique<BugReport>(*BT, os.str(), N);
343     }
344 
345     // FIXME: It would be nice to eventually make this diagnostic more clear,
346     // e.g., by referencing the original declaration or by saying *why* this
347     // reference is outside the range.
348 
349     report->addRange(S->getSourceRange());
350     C.emitReport(std::move(report));
351     return nullptr;
352   }
353 
354   // Array bound check succeeded.  From this point forward the array bound
355   // should always succeed.
356   return StInBound;
357 }
358 
359 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C,
360                                                  ProgramStateRef state,
361                                                  const Expr *Size,
362                                                  const Expr *FirstBuf,
363                                                  const Expr *SecondBuf,
364                                                  const char *firstMessage,
365                                                  const char *secondMessage,
366                                                  bool WarnAboutSize) const {
367   // If a previous check has failed, propagate the failure.
368   if (!state)
369     return nullptr;
370 
371   SValBuilder &svalBuilder = C.getSValBuilder();
372   ASTContext &Ctx = svalBuilder.getContext();
373   const LocationContext *LCtx = C.getLocationContext();
374 
375   QualType sizeTy = Size->getType();
376   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
377 
378   // Check that the first buffer is non-null.
379   SVal BufVal = C.getSVal(FirstBuf);
380   state = checkNonNull(C, state, FirstBuf, BufVal);
381   if (!state)
382     return nullptr;
383 
384   // If out-of-bounds checking is turned off, skip the rest.
385   if (!Filter.CheckCStringOutOfBounds)
386     return state;
387 
388   // Get the access length and make sure it is known.
389   // FIXME: This assumes the caller has already checked that the access length
390   // is positive. And that it's unsigned.
391   SVal LengthVal = C.getSVal(Size);
392   Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
393   if (!Length)
394     return state;
395 
396   // Compute the offset of the last element to be accessed: size-1.
397   NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
398   SVal Offset = svalBuilder.evalBinOpNN(state, BO_Sub, *Length, One, sizeTy);
399   if (Offset.isUnknown())
400     return nullptr;
401   NonLoc LastOffset = Offset.castAs<NonLoc>();
402 
403   // Check that the first buffer is sufficiently long.
404   SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
405   if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
406     const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf);
407 
408     SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
409                                           LastOffset, PtrTy);
410     state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage);
411 
412     // If the buffer isn't large enough, abort.
413     if (!state)
414       return nullptr;
415   }
416 
417   // If there's a second buffer, check it as well.
418   if (SecondBuf) {
419     BufVal = state->getSVal(SecondBuf, LCtx);
420     state = checkNonNull(C, state, SecondBuf, BufVal);
421     if (!state)
422       return nullptr;
423 
424     BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
425     if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
426       const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf);
427 
428       SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
429                                             LastOffset, PtrTy);
430       state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage);
431     }
432   }
433 
434   // Large enough or not, return this state!
435   return state;
436 }
437 
438 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
439                                             ProgramStateRef state,
440                                             const Expr *Size,
441                                             const Expr *First,
442                                             const Expr *Second) const {
443   if (!Filter.CheckCStringBufferOverlap)
444     return state;
445 
446   // Do a simple check for overlap: if the two arguments are from the same
447   // buffer, see if the end of the first is greater than the start of the second
448   // or vice versa.
449 
450   // If a previous check has failed, propagate the failure.
451   if (!state)
452     return nullptr;
453 
454   ProgramStateRef stateTrue, stateFalse;
455 
456   // Get the buffer values and make sure they're known locations.
457   const LocationContext *LCtx = C.getLocationContext();
458   SVal firstVal = state->getSVal(First, LCtx);
459   SVal secondVal = state->getSVal(Second, LCtx);
460 
461   Optional<Loc> firstLoc = firstVal.getAs<Loc>();
462   if (!firstLoc)
463     return state;
464 
465   Optional<Loc> secondLoc = secondVal.getAs<Loc>();
466   if (!secondLoc)
467     return state;
468 
469   // Are the two values the same?
470   SValBuilder &svalBuilder = C.getSValBuilder();
471   std::tie(stateTrue, stateFalse) =
472     state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
473 
474   if (stateTrue && !stateFalse) {
475     // If the values are known to be equal, that's automatically an overlap.
476     emitOverlapBug(C, stateTrue, First, Second);
477     return nullptr;
478   }
479 
480   // assume the two expressions are not equal.
481   assert(stateFalse);
482   state = stateFalse;
483 
484   // Which value comes first?
485   QualType cmpTy = svalBuilder.getConditionType();
486   SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
487                                          *firstLoc, *secondLoc, cmpTy);
488   Optional<DefinedOrUnknownSVal> reverseTest =
489       reverse.getAs<DefinedOrUnknownSVal>();
490   if (!reverseTest)
491     return state;
492 
493   std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
494   if (stateTrue) {
495     if (stateFalse) {
496       // If we don't know which one comes first, we can't perform this test.
497       return state;
498     } else {
499       // Switch the values so that firstVal is before secondVal.
500       std::swap(firstLoc, secondLoc);
501 
502       // Switch the Exprs as well, so that they still correspond.
503       std::swap(First, Second);
504     }
505   }
506 
507   // Get the length, and make sure it too is known.
508   SVal LengthVal = state->getSVal(Size, LCtx);
509   Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
510   if (!Length)
511     return state;
512 
513   // Convert the first buffer's start address to char*.
514   // Bail out if the cast fails.
515   ASTContext &Ctx = svalBuilder.getContext();
516   QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
517   SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy,
518                                          First->getType());
519   Optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
520   if (!FirstStartLoc)
521     return state;
522 
523   // Compute the end of the first buffer. Bail out if THAT fails.
524   SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
525                                  *FirstStartLoc, *Length, CharPtrTy);
526   Optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
527   if (!FirstEndLoc)
528     return state;
529 
530   // Is the end of the first buffer past the start of the second buffer?
531   SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
532                                 *FirstEndLoc, *secondLoc, cmpTy);
533   Optional<DefinedOrUnknownSVal> OverlapTest =
534       Overlap.getAs<DefinedOrUnknownSVal>();
535   if (!OverlapTest)
536     return state;
537 
538   std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
539 
540   if (stateTrue && !stateFalse) {
541     // Overlap!
542     emitOverlapBug(C, stateTrue, First, Second);
543     return nullptr;
544   }
545 
546   // assume the two expressions don't overlap.
547   assert(stateFalse);
548   return stateFalse;
549 }
550 
551 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
552                                   const Stmt *First, const Stmt *Second) const {
553   ExplodedNode *N = C.generateErrorNode(state);
554   if (!N)
555     return;
556 
557   if (!BT_Overlap)
558     BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
559                                  categories::UnixAPI, "Improper arguments"));
560 
561   // Generate a report for this bug.
562   auto report = llvm::make_unique<BugReport>(
563       *BT_Overlap, "Arguments must not be overlapping buffers", N);
564   report->addRange(First->getSourceRange());
565   report->addRange(Second->getSourceRange());
566 
567   C.emitReport(std::move(report));
568 }
569 
570 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
571                                                      ProgramStateRef state,
572                                                      NonLoc left,
573                                                      NonLoc right) const {
574   // If out-of-bounds checking is turned off, skip the rest.
575   if (!Filter.CheckCStringOutOfBounds)
576     return state;
577 
578   // If a previous check has failed, propagate the failure.
579   if (!state)
580     return nullptr;
581 
582   SValBuilder &svalBuilder = C.getSValBuilder();
583   BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
584 
585   QualType sizeTy = svalBuilder.getContext().getSizeType();
586   const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
587   NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
588 
589   SVal maxMinusRight;
590   if (right.getAs<nonloc::ConcreteInt>()) {
591     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
592                                                  sizeTy);
593   } else {
594     // Try switching the operands. (The order of these two assignments is
595     // important!)
596     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
597                                             sizeTy);
598     left = right;
599   }
600 
601   if (Optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
602     QualType cmpTy = svalBuilder.getConditionType();
603     // If left > max - right, we have an overflow.
604     SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
605                                                 *maxMinusRightNL, cmpTy);
606 
607     ProgramStateRef stateOverflow, stateOkay;
608     std::tie(stateOverflow, stateOkay) =
609       state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
610 
611     if (stateOverflow && !stateOkay) {
612       // We have an overflow. Emit a bug report.
613       ExplodedNode *N = C.generateErrorNode(stateOverflow);
614       if (!N)
615         return nullptr;
616 
617       if (!BT_AdditionOverflow)
618         BT_AdditionOverflow.reset(
619             new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API",
620                            "Sum of expressions causes overflow"));
621 
622       // This isn't a great error message, but this should never occur in real
623       // code anyway -- you'd have to create a buffer longer than a size_t can
624       // represent, which is sort of a contradiction.
625       const char *warning =
626         "This expression will create a string whose length is too big to "
627         "be represented as a size_t";
628 
629       // Generate a report for this bug.
630       C.emitReport(
631           llvm::make_unique<BugReport>(*BT_AdditionOverflow, warning, N));
632 
633       return nullptr;
634     }
635 
636     // From now on, assume an overflow didn't occur.
637     assert(stateOkay);
638     state = stateOkay;
639   }
640 
641   return state;
642 }
643 
644 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
645                                                 const MemRegion *MR,
646                                                 SVal strLength) {
647   assert(!strLength.isUndef() && "Attempt to set an undefined string length");
648 
649   MR = MR->StripCasts();
650 
651   switch (MR->getKind()) {
652   case MemRegion::StringRegionKind:
653     // FIXME: This can happen if we strcpy() into a string region. This is
654     // undefined [C99 6.4.5p6], but we should still warn about it.
655     return state;
656 
657   case MemRegion::SymbolicRegionKind:
658   case MemRegion::AllocaRegionKind:
659   case MemRegion::VarRegionKind:
660   case MemRegion::FieldRegionKind:
661   case MemRegion::ObjCIvarRegionKind:
662     // These are the types we can currently track string lengths for.
663     break;
664 
665   case MemRegion::ElementRegionKind:
666     // FIXME: Handle element regions by upper-bounding the parent region's
667     // string length.
668     return state;
669 
670   default:
671     // Other regions (mostly non-data) can't have a reliable C string length.
672     // For now, just ignore the change.
673     // FIXME: These are rare but not impossible. We should output some kind of
674     // warning for things like strcpy((char[]){'a', 0}, "b");
675     return state;
676   }
677 
678   if (strLength.isUnknown())
679     return state->remove<CStringLength>(MR);
680 
681   return state->set<CStringLength>(MR, strLength);
682 }
683 
684 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
685                                                ProgramStateRef &state,
686                                                const Expr *Ex,
687                                                const MemRegion *MR,
688                                                bool hypothetical) {
689   if (!hypothetical) {
690     // If there's a recorded length, go ahead and return it.
691     const SVal *Recorded = state->get<CStringLength>(MR);
692     if (Recorded)
693       return *Recorded;
694   }
695 
696   // Otherwise, get a new symbol and update the state.
697   SValBuilder &svalBuilder = C.getSValBuilder();
698   QualType sizeTy = svalBuilder.getContext().getSizeType();
699   SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
700                                                     MR, Ex, sizeTy,
701                                                     C.getLocationContext(),
702                                                     C.blockCount());
703 
704   if (!hypothetical) {
705     if (Optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
706       // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
707       BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
708       const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
709       llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
710       const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
711                                                         fourInt);
712       NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
713       SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn,
714                                                 maxLength, sizeTy);
715       state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
716     }
717     state = state->set<CStringLength>(MR, strLength);
718   }
719 
720   return strLength;
721 }
722 
723 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
724                                       const Expr *Ex, SVal Buf,
725                                       bool hypothetical) const {
726   const MemRegion *MR = Buf.getAsRegion();
727   if (!MR) {
728     // If we can't get a region, see if it's something we /know/ isn't a
729     // C string. In the context of locations, the only time we can issue such
730     // a warning is for labels.
731     if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
732       if (!Filter.CheckCStringNotNullTerm)
733         return UndefinedVal();
734 
735       if (ExplodedNode *N = C.generateNonFatalErrorNode(state)) {
736         if (!BT_NotCString)
737           BT_NotCString.reset(new BuiltinBug(
738               Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
739               "Argument is not a null-terminated string."));
740 
741         SmallString<120> buf;
742         llvm::raw_svector_ostream os(buf);
743         assert(CurrentFunctionDescription);
744         os << "Argument to " << CurrentFunctionDescription
745            << " is the address of the label '" << Label->getLabel()->getName()
746            << "', which is not a null-terminated string";
747 
748         // Generate a report for this bug.
749         auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N);
750 
751         report->addRange(Ex->getSourceRange());
752         C.emitReport(std::move(report));
753       }
754       return UndefinedVal();
755 
756     }
757 
758     // If it's not a region and not a label, give up.
759     return UnknownVal();
760   }
761 
762   // If we have a region, strip casts from it and see if we can figure out
763   // its length. For anything we can't figure out, just return UnknownVal.
764   MR = MR->StripCasts();
765 
766   switch (MR->getKind()) {
767   case MemRegion::StringRegionKind: {
768     // Modifying the contents of string regions is undefined [C99 6.4.5p6],
769     // so we can assume that the byte length is the correct C string length.
770     SValBuilder &svalBuilder = C.getSValBuilder();
771     QualType sizeTy = svalBuilder.getContext().getSizeType();
772     const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
773     return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
774   }
775   case MemRegion::SymbolicRegionKind:
776   case MemRegion::AllocaRegionKind:
777   case MemRegion::VarRegionKind:
778   case MemRegion::FieldRegionKind:
779   case MemRegion::ObjCIvarRegionKind:
780     return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
781   case MemRegion::CompoundLiteralRegionKind:
782     // FIXME: Can we track this? Is it necessary?
783     return UnknownVal();
784   case MemRegion::ElementRegionKind:
785     // FIXME: How can we handle this? It's not good enough to subtract the
786     // offset from the base string length; consider "123\x00567" and &a[5].
787     return UnknownVal();
788   default:
789     // Other regions (mostly non-data) can't have a reliable C string length.
790     // In this case, an error is emitted and UndefinedVal is returned.
791     // The caller should always be prepared to handle this case.
792     if (!Filter.CheckCStringNotNullTerm)
793       return UndefinedVal();
794 
795     if (ExplodedNode *N = C.generateNonFatalErrorNode(state)) {
796       if (!BT_NotCString)
797         BT_NotCString.reset(new BuiltinBug(
798             Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
799             "Argument is not a null-terminated string."));
800 
801       SmallString<120> buf;
802       llvm::raw_svector_ostream os(buf);
803 
804       assert(CurrentFunctionDescription);
805       os << "Argument to " << CurrentFunctionDescription << " is ";
806 
807       if (SummarizeRegion(os, C.getASTContext(), MR))
808         os << ", which is not a null-terminated string";
809       else
810         os << "not a null-terminated string";
811 
812       // Generate a report for this bug.
813       auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N);
814 
815       report->addRange(Ex->getSourceRange());
816       C.emitReport(std::move(report));
817     }
818 
819     return UndefinedVal();
820   }
821 }
822 
823 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
824   ProgramStateRef &state, const Expr *expr, SVal val) const {
825 
826   // Get the memory region pointed to by the val.
827   const MemRegion *bufRegion = val.getAsRegion();
828   if (!bufRegion)
829     return nullptr;
830 
831   // Strip casts off the memory region.
832   bufRegion = bufRegion->StripCasts();
833 
834   // Cast the memory region to a string region.
835   const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
836   if (!strRegion)
837     return nullptr;
838 
839   // Return the actual string in the string region.
840   return strRegion->getStringLiteral();
841 }
842 
843 bool CStringChecker::IsFirstBufInBound(CheckerContext &C,
844                                        ProgramStateRef state,
845                                        const Expr *FirstBuf,
846                                        const Expr *Size) {
847   // If we do not know that the buffer is long enough we return 'true'.
848   // Otherwise the parent region of this field region would also get
849   // invalidated, which would lead to warnings based on an unknown state.
850 
851   // Originally copied from CheckBufferAccess and CheckLocation.
852   SValBuilder &svalBuilder = C.getSValBuilder();
853   ASTContext &Ctx = svalBuilder.getContext();
854   const LocationContext *LCtx = C.getLocationContext();
855 
856   QualType sizeTy = Size->getType();
857   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
858   SVal BufVal = state->getSVal(FirstBuf, LCtx);
859 
860   SVal LengthVal = state->getSVal(Size, LCtx);
861   Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
862   if (!Length)
863     return true; // cf top comment.
864 
865   // Compute the offset of the last element to be accessed: size-1.
866   NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
867   SVal Offset = svalBuilder.evalBinOpNN(state, BO_Sub, *Length, One, sizeTy);
868   if (Offset.isUnknown())
869     return true; // cf top comment
870   NonLoc LastOffset = Offset.castAs<NonLoc>();
871 
872   // Check that the first buffer is sufficiently long.
873   SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
874   Optional<Loc> BufLoc = BufStart.getAs<Loc>();
875   if (!BufLoc)
876     return true; // cf top comment.
877 
878   SVal BufEnd =
879       svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, LastOffset, PtrTy);
880 
881   // Check for out of bound array element access.
882   const MemRegion *R = BufEnd.getAsRegion();
883   if (!R)
884     return true; // cf top comment.
885 
886   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
887   if (!ER)
888     return true; // cf top comment.
889 
890   // FIXME: Does this crash when a non-standard definition
891   // of a library function is encountered?
892   assert(ER->getValueType() == C.getASTContext().CharTy &&
893          "IsFirstBufInBound should only be called with char* ElementRegions");
894 
895   // Get the size of the array.
896   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
897   SVal Extent =
898       svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
899   DefinedOrUnknownSVal ExtentSize = Extent.castAs<DefinedOrUnknownSVal>();
900 
901   // Get the index of the accessed element.
902   DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
903 
904   ProgramStateRef StInBound = state->assumeInBound(Idx, ExtentSize, true);
905 
906   return static_cast<bool>(StInBound);
907 }
908 
909 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
910                                                  ProgramStateRef state,
911                                                  const Expr *E, SVal V,
912                                                  bool IsSourceBuffer,
913                                                  const Expr *Size) {
914   Optional<Loc> L = V.getAs<Loc>();
915   if (!L)
916     return state;
917 
918   // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
919   // some assumptions about the value that CFRefCount can't. Even so, it should
920   // probably be refactored.
921   if (Optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
922     const MemRegion *R = MR->getRegion()->StripCasts();
923 
924     // Are we dealing with an ElementRegion?  If so, we should be invalidating
925     // the super-region.
926     if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
927       R = ER->getSuperRegion();
928       // FIXME: What about layers of ElementRegions?
929     }
930 
931     // Invalidate this region.
932     const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
933 
934     bool CausesPointerEscape = false;
935     RegionAndSymbolInvalidationTraits ITraits;
936     // Invalidate and escape only indirect regions accessible through the source
937     // buffer.
938     if (IsSourceBuffer) {
939       ITraits.setTrait(R->getBaseRegion(),
940                        RegionAndSymbolInvalidationTraits::TK_PreserveContents);
941       ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
942       CausesPointerEscape = true;
943     } else {
944       const MemRegion::Kind& K = R->getKind();
945       if (K == MemRegion::FieldRegionKind)
946         if (Size && IsFirstBufInBound(C, state, E, Size)) {
947           // If destination buffer is a field region and access is in bound,
948           // do not invalidate its super region.
949           ITraits.setTrait(
950               R,
951               RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
952         }
953     }
954 
955     return state->invalidateRegions(R, E, C.blockCount(), LCtx,
956                                     CausesPointerEscape, nullptr, nullptr,
957                                     &ITraits);
958   }
959 
960   // If we have a non-region value by chance, just remove the binding.
961   // FIXME: is this necessary or correct? This handles the non-Region
962   //  cases.  Is it ever valid to store to these?
963   return state->killBinding(*L);
964 }
965 
966 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
967                                      const MemRegion *MR) {
968   const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
969 
970   switch (MR->getKind()) {
971   case MemRegion::FunctionCodeRegionKind: {
972     const NamedDecl *FD = cast<FunctionCodeRegion>(MR)->getDecl();
973     if (FD)
974       os << "the address of the function '" << *FD << '\'';
975     else
976       os << "the address of a function";
977     return true;
978   }
979   case MemRegion::BlockCodeRegionKind:
980     os << "block text";
981     return true;
982   case MemRegion::BlockDataRegionKind:
983     os << "a block";
984     return true;
985   case MemRegion::CXXThisRegionKind:
986   case MemRegion::CXXTempObjectRegionKind:
987     os << "a C++ temp object of type " << TVR->getValueType().getAsString();
988     return true;
989   case MemRegion::VarRegionKind:
990     os << "a variable of type" << TVR->getValueType().getAsString();
991     return true;
992   case MemRegion::FieldRegionKind:
993     os << "a field of type " << TVR->getValueType().getAsString();
994     return true;
995   case MemRegion::ObjCIvarRegionKind:
996     os << "an instance variable of type " << TVR->getValueType().getAsString();
997     return true;
998   default:
999     return false;
1000   }
1001 }
1002 
1003 //===----------------------------------------------------------------------===//
1004 // evaluation of individual function calls.
1005 //===----------------------------------------------------------------------===//
1006 
1007 void CStringChecker::evalCopyCommon(CheckerContext &C,
1008                                     const CallExpr *CE,
1009                                     ProgramStateRef state,
1010                                     const Expr *Size, const Expr *Dest,
1011                                     const Expr *Source, bool Restricted,
1012                                     bool IsMempcpy) const {
1013   CurrentFunctionDescription = "memory copy function";
1014 
1015   // See if the size argument is zero.
1016   const LocationContext *LCtx = C.getLocationContext();
1017   SVal sizeVal = state->getSVal(Size, LCtx);
1018   QualType sizeTy = Size->getType();
1019 
1020   ProgramStateRef stateZeroSize, stateNonZeroSize;
1021   std::tie(stateZeroSize, stateNonZeroSize) =
1022     assumeZero(C, state, sizeVal, sizeTy);
1023 
1024   // Get the value of the Dest.
1025   SVal destVal = state->getSVal(Dest, LCtx);
1026 
1027   // If the size is zero, there won't be any actual memory access, so
1028   // just bind the return value to the destination buffer and return.
1029   if (stateZeroSize && !stateNonZeroSize) {
1030     stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
1031     C.addTransition(stateZeroSize);
1032     return;
1033   }
1034 
1035   // If the size can be nonzero, we have to check the other arguments.
1036   if (stateNonZeroSize) {
1037     state = stateNonZeroSize;
1038 
1039     // Ensure the destination is not null. If it is NULL there will be a
1040     // NULL pointer dereference.
1041     state = checkNonNull(C, state, Dest, destVal);
1042     if (!state)
1043       return;
1044 
1045     // Get the value of the Src.
1046     SVal srcVal = state->getSVal(Source, LCtx);
1047 
1048     // Ensure the source is not null. If it is NULL there will be a
1049     // NULL pointer dereference.
1050     state = checkNonNull(C, state, Source, srcVal);
1051     if (!state)
1052       return;
1053 
1054     // Ensure the accesses are valid and that the buffers do not overlap.
1055     const char * const writeWarning =
1056       "Memory copy function overflows destination buffer";
1057     state = CheckBufferAccess(C, state, Size, Dest, Source,
1058                               writeWarning, /* sourceWarning = */ nullptr);
1059     if (Restricted)
1060       state = CheckOverlap(C, state, Size, Dest, Source);
1061 
1062     if (!state)
1063       return;
1064 
1065     // If this is mempcpy, get the byte after the last byte copied and
1066     // bind the expr.
1067     if (IsMempcpy) {
1068       // Get the byte after the last byte copied.
1069       SValBuilder &SvalBuilder = C.getSValBuilder();
1070       ASTContext &Ctx = SvalBuilder.getContext();
1071       QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
1072       SVal DestRegCharVal =
1073           SvalBuilder.evalCast(destVal, CharPtrTy, Dest->getType());
1074       SVal lastElement = C.getSValBuilder().evalBinOp(
1075           state, BO_Add, DestRegCharVal, sizeVal, Dest->getType());
1076       // If we don't know how much we copied, we can at least
1077       // conjure a return value for later.
1078       if (lastElement.isUnknown())
1079         lastElement = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1080                                                           C.blockCount());
1081 
1082       // The byte after the last byte copied is the return value.
1083       state = state->BindExpr(CE, LCtx, lastElement);
1084     } else {
1085       // All other copies return the destination buffer.
1086       // (Well, bcopy() has a void return type, but this won't hurt.)
1087       state = state->BindExpr(CE, LCtx, destVal);
1088     }
1089 
1090     // Invalidate the destination (regular invalidation without pointer-escaping
1091     // the address of the top-level region).
1092     // FIXME: Even if we can't perfectly model the copy, we should see if we
1093     // can use LazyCompoundVals to copy the source values into the destination.
1094     // This would probably remove any existing bindings past the end of the
1095     // copied region, but that's still an improvement over blank invalidation.
1096     state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest),
1097                              /*IsSourceBuffer*/false, Size);
1098 
1099     // Invalidate the source (const-invalidation without const-pointer-escaping
1100     // the address of the top-level region).
1101     state = InvalidateBuffer(C, state, Source, C.getSVal(Source),
1102                              /*IsSourceBuffer*/true, nullptr);
1103 
1104     C.addTransition(state);
1105   }
1106 }
1107 
1108 
1109 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
1110   if (CE->getNumArgs() < 3)
1111     return;
1112 
1113   // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1114   // The return value is the address of the destination buffer.
1115   const Expr *Dest = CE->getArg(0);
1116   ProgramStateRef state = C.getState();
1117 
1118   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
1119 }
1120 
1121 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
1122   if (CE->getNumArgs() < 3)
1123     return;
1124 
1125   // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1126   // The return value is a pointer to the byte following the last written byte.
1127   const Expr *Dest = CE->getArg(0);
1128   ProgramStateRef state = C.getState();
1129 
1130   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
1131 }
1132 
1133 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
1134   if (CE->getNumArgs() < 3)
1135     return;
1136 
1137   // void *memmove(void *dst, const void *src, size_t n);
1138   // The return value is the address of the destination buffer.
1139   const Expr *Dest = CE->getArg(0);
1140   ProgramStateRef state = C.getState();
1141 
1142   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
1143 }
1144 
1145 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
1146   if (CE->getNumArgs() < 3)
1147     return;
1148 
1149   // void bcopy(const void *src, void *dst, size_t n);
1150   evalCopyCommon(C, CE, C.getState(),
1151                  CE->getArg(2), CE->getArg(1), CE->getArg(0));
1152 }
1153 
1154 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
1155   if (CE->getNumArgs() < 3)
1156     return;
1157 
1158   // int memcmp(const void *s1, const void *s2, size_t n);
1159   CurrentFunctionDescription = "memory comparison function";
1160 
1161   const Expr *Left = CE->getArg(0);
1162   const Expr *Right = CE->getArg(1);
1163   const Expr *Size = CE->getArg(2);
1164 
1165   ProgramStateRef state = C.getState();
1166   SValBuilder &svalBuilder = C.getSValBuilder();
1167 
1168   // See if the size argument is zero.
1169   const LocationContext *LCtx = C.getLocationContext();
1170   SVal sizeVal = state->getSVal(Size, LCtx);
1171   QualType sizeTy = Size->getType();
1172 
1173   ProgramStateRef stateZeroSize, stateNonZeroSize;
1174   std::tie(stateZeroSize, stateNonZeroSize) =
1175     assumeZero(C, state, sizeVal, sizeTy);
1176 
1177   // If the size can be zero, the result will be 0 in that case, and we don't
1178   // have to check either of the buffers.
1179   if (stateZeroSize) {
1180     state = stateZeroSize;
1181     state = state->BindExpr(CE, LCtx,
1182                             svalBuilder.makeZeroVal(CE->getType()));
1183     C.addTransition(state);
1184   }
1185 
1186   // If the size can be nonzero, we have to check the other arguments.
1187   if (stateNonZeroSize) {
1188     state = stateNonZeroSize;
1189     // If we know the two buffers are the same, we know the result is 0.
1190     // First, get the two buffers' addresses. Another checker will have already
1191     // made sure they're not undefined.
1192     DefinedOrUnknownSVal LV =
1193         state->getSVal(Left, LCtx).castAs<DefinedOrUnknownSVal>();
1194     DefinedOrUnknownSVal RV =
1195         state->getSVal(Right, LCtx).castAs<DefinedOrUnknownSVal>();
1196 
1197     // See if they are the same.
1198     DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1199     ProgramStateRef StSameBuf, StNotSameBuf;
1200     std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1201 
1202     // If the two arguments might be the same buffer, we know the result is 0,
1203     // and we only need to check one size.
1204     if (StSameBuf) {
1205       state = StSameBuf;
1206       state = CheckBufferAccess(C, state, Size, Left);
1207       if (state) {
1208         state = StSameBuf->BindExpr(CE, LCtx,
1209                                     svalBuilder.makeZeroVal(CE->getType()));
1210         C.addTransition(state);
1211       }
1212     }
1213 
1214     // If the two arguments might be different buffers, we have to check the
1215     // size of both of them.
1216     if (StNotSameBuf) {
1217       state = StNotSameBuf;
1218       state = CheckBufferAccess(C, state, Size, Left, Right);
1219       if (state) {
1220         // The return value is the comparison result, which we don't know.
1221         SVal CmpV = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
1222                                                  C.blockCount());
1223         state = state->BindExpr(CE, LCtx, CmpV);
1224         C.addTransition(state);
1225       }
1226     }
1227   }
1228 }
1229 
1230 void CStringChecker::evalstrLength(CheckerContext &C,
1231                                    const CallExpr *CE) const {
1232   if (CE->getNumArgs() < 1)
1233     return;
1234 
1235   // size_t strlen(const char *s);
1236   evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
1237 }
1238 
1239 void CStringChecker::evalstrnLength(CheckerContext &C,
1240                                     const CallExpr *CE) const {
1241   if (CE->getNumArgs() < 2)
1242     return;
1243 
1244   // size_t strnlen(const char *s, size_t maxlen);
1245   evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
1246 }
1247 
1248 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
1249                                          bool IsStrnlen) const {
1250   CurrentFunctionDescription = "string length function";
1251   ProgramStateRef state = C.getState();
1252   const LocationContext *LCtx = C.getLocationContext();
1253 
1254   if (IsStrnlen) {
1255     const Expr *maxlenExpr = CE->getArg(1);
1256     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1257 
1258     ProgramStateRef stateZeroSize, stateNonZeroSize;
1259     std::tie(stateZeroSize, stateNonZeroSize) =
1260       assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1261 
1262     // If the size can be zero, the result will be 0 in that case, and we don't
1263     // have to check the string itself.
1264     if (stateZeroSize) {
1265       SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1266       stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
1267       C.addTransition(stateZeroSize);
1268     }
1269 
1270     // If the size is GUARANTEED to be zero, we're done!
1271     if (!stateNonZeroSize)
1272       return;
1273 
1274     // Otherwise, record the assumption that the size is nonzero.
1275     state = stateNonZeroSize;
1276   }
1277 
1278   // Check that the string argument is non-null.
1279   const Expr *Arg = CE->getArg(0);
1280   SVal ArgVal = state->getSVal(Arg, LCtx);
1281 
1282   state = checkNonNull(C, state, Arg, ArgVal);
1283 
1284   if (!state)
1285     return;
1286 
1287   SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1288 
1289   // If the argument isn't a valid C string, there's no valid state to
1290   // transition to.
1291   if (strLength.isUndef())
1292     return;
1293 
1294   DefinedOrUnknownSVal result = UnknownVal();
1295 
1296   // If the check is for strnlen() then bind the return value to no more than
1297   // the maxlen value.
1298   if (IsStrnlen) {
1299     QualType cmpTy = C.getSValBuilder().getConditionType();
1300 
1301     // It's a little unfortunate to be getting this again,
1302     // but it's not that expensive...
1303     const Expr *maxlenExpr = CE->getArg(1);
1304     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1305 
1306     Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1307     Optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1308 
1309     if (strLengthNL && maxlenValNL) {
1310       ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1311 
1312       // Check if the strLength is greater than the maxlen.
1313       std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1314           C.getSValBuilder()
1315               .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1316               .castAs<DefinedOrUnknownSVal>());
1317 
1318       if (stateStringTooLong && !stateStringNotTooLong) {
1319         // If the string is longer than maxlen, return maxlen.
1320         result = *maxlenValNL;
1321       } else if (stateStringNotTooLong && !stateStringTooLong) {
1322         // If the string is shorter than maxlen, return its length.
1323         result = *strLengthNL;
1324       }
1325     }
1326 
1327     if (result.isUnknown()) {
1328       // If we don't have enough information for a comparison, there's
1329       // no guarantee the full string length will actually be returned.
1330       // All we know is the return value is the min of the string length
1331       // and the limit. This is better than nothing.
1332       result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1333                                                    C.blockCount());
1334       NonLoc resultNL = result.castAs<NonLoc>();
1335 
1336       if (strLengthNL) {
1337         state = state->assume(C.getSValBuilder().evalBinOpNN(
1338                                   state, BO_LE, resultNL, *strLengthNL, cmpTy)
1339                                   .castAs<DefinedOrUnknownSVal>(), true);
1340       }
1341 
1342       if (maxlenValNL) {
1343         state = state->assume(C.getSValBuilder().evalBinOpNN(
1344                                   state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1345                                   .castAs<DefinedOrUnknownSVal>(), true);
1346       }
1347     }
1348 
1349   } else {
1350     // This is a plain strlen(), not strnlen().
1351     result = strLength.castAs<DefinedOrUnknownSVal>();
1352 
1353     // If we don't know the length of the string, conjure a return
1354     // value, so it can be used in constraints, at least.
1355     if (result.isUnknown()) {
1356       result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1357                                                    C.blockCount());
1358     }
1359   }
1360 
1361   // Bind the return value.
1362   assert(!result.isUnknown() && "Should have conjured a value by now");
1363   state = state->BindExpr(CE, LCtx, result);
1364   C.addTransition(state);
1365 }
1366 
1367 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1368   if (CE->getNumArgs() < 2)
1369     return;
1370 
1371   // char *strcpy(char *restrict dst, const char *restrict src);
1372   evalStrcpyCommon(C, CE,
1373                    /* returnEnd = */ false,
1374                    /* isBounded = */ false,
1375                    /* isAppending = */ false);
1376 }
1377 
1378 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1379   if (CE->getNumArgs() < 3)
1380     return;
1381 
1382   // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1383   evalStrcpyCommon(C, CE,
1384                    /* returnEnd = */ false,
1385                    /* isBounded = */ true,
1386                    /* isAppending = */ false);
1387 }
1388 
1389 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1390   if (CE->getNumArgs() < 2)
1391     return;
1392 
1393   // char *stpcpy(char *restrict dst, const char *restrict src);
1394   evalStrcpyCommon(C, CE,
1395                    /* returnEnd = */ true,
1396                    /* isBounded = */ false,
1397                    /* isAppending = */ false);
1398 }
1399 
1400 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1401   if (CE->getNumArgs() < 2)
1402     return;
1403 
1404   //char *strcat(char *restrict s1, const char *restrict s2);
1405   evalStrcpyCommon(C, CE,
1406                    /* returnEnd = */ false,
1407                    /* isBounded = */ false,
1408                    /* isAppending = */ true);
1409 }
1410 
1411 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1412   if (CE->getNumArgs() < 3)
1413     return;
1414 
1415   //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1416   evalStrcpyCommon(C, CE,
1417                    /* returnEnd = */ false,
1418                    /* isBounded = */ true,
1419                    /* isAppending = */ true);
1420 }
1421 
1422 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1423                                       bool returnEnd, bool isBounded,
1424                                       bool isAppending) const {
1425   CurrentFunctionDescription = "string copy function";
1426   ProgramStateRef state = C.getState();
1427   const LocationContext *LCtx = C.getLocationContext();
1428 
1429   // Check that the destination is non-null.
1430   const Expr *Dst = CE->getArg(0);
1431   SVal DstVal = state->getSVal(Dst, LCtx);
1432 
1433   state = checkNonNull(C, state, Dst, DstVal);
1434   if (!state)
1435     return;
1436 
1437   // Check that the source is non-null.
1438   const Expr *srcExpr = CE->getArg(1);
1439   SVal srcVal = state->getSVal(srcExpr, LCtx);
1440   state = checkNonNull(C, state, srcExpr, srcVal);
1441   if (!state)
1442     return;
1443 
1444   // Get the string length of the source.
1445   SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1446 
1447   // If the source isn't a valid C string, give up.
1448   if (strLength.isUndef())
1449     return;
1450 
1451   SValBuilder &svalBuilder = C.getSValBuilder();
1452   QualType cmpTy = svalBuilder.getConditionType();
1453   QualType sizeTy = svalBuilder.getContext().getSizeType();
1454 
1455   // These two values allow checking two kinds of errors:
1456   // - actual overflows caused by a source that doesn't fit in the destination
1457   // - potential overflows caused by a bound that could exceed the destination
1458   SVal amountCopied = UnknownVal();
1459   SVal maxLastElementIndex = UnknownVal();
1460   const char *boundWarning = nullptr;
1461 
1462   // If the function is strncpy, strncat, etc... it is bounded.
1463   if (isBounded) {
1464     // Get the max number of characters to copy.
1465     const Expr *lenExpr = CE->getArg(2);
1466     SVal lenVal = state->getSVal(lenExpr, LCtx);
1467 
1468     // Protect against misdeclared strncpy().
1469     lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType());
1470 
1471     Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1472     Optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1473 
1474     // If we know both values, we might be able to figure out how much
1475     // we're copying.
1476     if (strLengthNL && lenValNL) {
1477       ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1478 
1479       // Check if the max number to copy is less than the length of the src.
1480       // If the bound is equal to the source length, strncpy won't null-
1481       // terminate the result!
1482       std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1483           svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1484               .castAs<DefinedOrUnknownSVal>());
1485 
1486       if (stateSourceTooLong && !stateSourceNotTooLong) {
1487         // Max number to copy is less than the length of the src, so the actual
1488         // strLength copied is the max number arg.
1489         state = stateSourceTooLong;
1490         amountCopied = lenVal;
1491 
1492       } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1493         // The source buffer entirely fits in the bound.
1494         state = stateSourceNotTooLong;
1495         amountCopied = strLength;
1496       }
1497     }
1498 
1499     // We still want to know if the bound is known to be too large.
1500     if (lenValNL) {
1501       if (isAppending) {
1502         // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1503 
1504         // Get the string length of the destination. If the destination is
1505         // memory that can't have a string length, we shouldn't be copying
1506         // into it anyway.
1507         SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1508         if (dstStrLength.isUndef())
1509           return;
1510 
1511         if (Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>()) {
1512           maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add,
1513                                                         *lenValNL,
1514                                                         *dstStrLengthNL,
1515                                                         sizeTy);
1516           boundWarning = "Size argument is greater than the free space in the "
1517                          "destination buffer";
1518         }
1519 
1520       } else {
1521         // For strncpy, this is just checking that lenVal <= sizeof(dst)
1522         // (Yes, strncpy and strncat differ in how they treat termination.
1523         // strncat ALWAYS terminates, but strncpy doesn't.)
1524 
1525         // We need a special case for when the copy size is zero, in which
1526         // case strncpy will do no work at all. Our bounds check uses n-1
1527         // as the last element accessed, so n == 0 is problematic.
1528         ProgramStateRef StateZeroSize, StateNonZeroSize;
1529         std::tie(StateZeroSize, StateNonZeroSize) =
1530           assumeZero(C, state, *lenValNL, sizeTy);
1531 
1532         // If the size is known to be zero, we're done.
1533         if (StateZeroSize && !StateNonZeroSize) {
1534           StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
1535           C.addTransition(StateZeroSize);
1536           return;
1537         }
1538 
1539         // Otherwise, go ahead and figure out the last element we'll touch.
1540         // We don't record the non-zero assumption here because we can't
1541         // be sure. We won't warn on a possible zero.
1542         NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
1543         maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1544                                                       one, sizeTy);
1545         boundWarning = "Size argument is greater than the length of the "
1546                        "destination buffer";
1547       }
1548     }
1549 
1550     // If we couldn't pin down the copy length, at least bound it.
1551     // FIXME: We should actually run this code path for append as well, but
1552     // right now it creates problems with constraints (since we can end up
1553     // trying to pass constraints from symbol to symbol).
1554     if (amountCopied.isUnknown() && !isAppending) {
1555       // Try to get a "hypothetical" string length symbol, which we can later
1556       // set as a real value if that turns out to be the case.
1557       amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1558       assert(!amountCopied.isUndef());
1559 
1560       if (Optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>()) {
1561         if (lenValNL) {
1562           // amountCopied <= lenVal
1563           SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1564                                                              *amountCopiedNL,
1565                                                              *lenValNL,
1566                                                              cmpTy);
1567           state = state->assume(
1568               copiedLessThanBound.castAs<DefinedOrUnknownSVal>(), true);
1569           if (!state)
1570             return;
1571         }
1572 
1573         if (strLengthNL) {
1574           // amountCopied <= strlen(source)
1575           SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1576                                                            *amountCopiedNL,
1577                                                            *strLengthNL,
1578                                                            cmpTy);
1579           state = state->assume(
1580               copiedLessThanSrc.castAs<DefinedOrUnknownSVal>(), true);
1581           if (!state)
1582             return;
1583         }
1584       }
1585     }
1586 
1587   } else {
1588     // The function isn't bounded. The amount copied should match the length
1589     // of the source buffer.
1590     amountCopied = strLength;
1591   }
1592 
1593   assert(state);
1594 
1595   // This represents the number of characters copied into the destination
1596   // buffer. (It may not actually be the strlen if the destination buffer
1597   // is not terminated.)
1598   SVal finalStrLength = UnknownVal();
1599 
1600   // If this is an appending function (strcat, strncat...) then set the
1601   // string length to strlen(src) + strlen(dst) since the buffer will
1602   // ultimately contain both.
1603   if (isAppending) {
1604     // Get the string length of the destination. If the destination is memory
1605     // that can't have a string length, we shouldn't be copying into it anyway.
1606     SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1607     if (dstStrLength.isUndef())
1608       return;
1609 
1610     Optional<NonLoc> srcStrLengthNL = amountCopied.getAs<NonLoc>();
1611     Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1612 
1613     // If we know both string lengths, we might know the final string length.
1614     if (srcStrLengthNL && dstStrLengthNL) {
1615       // Make sure the two lengths together don't overflow a size_t.
1616       state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1617       if (!state)
1618         return;
1619 
1620       finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1621                                                *dstStrLengthNL, sizeTy);
1622     }
1623 
1624     // If we couldn't get a single value for the final string length,
1625     // we can at least bound it by the individual lengths.
1626     if (finalStrLength.isUnknown()) {
1627       // Try to get a "hypothetical" string length symbol, which we can later
1628       // set as a real value if that turns out to be the case.
1629       finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1630       assert(!finalStrLength.isUndef());
1631 
1632       if (Optional<NonLoc> finalStrLengthNL = finalStrLength.getAs<NonLoc>()) {
1633         if (srcStrLengthNL) {
1634           // finalStrLength >= srcStrLength
1635           SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1636                                                         *finalStrLengthNL,
1637                                                         *srcStrLengthNL,
1638                                                         cmpTy);
1639           state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
1640                                 true);
1641           if (!state)
1642             return;
1643         }
1644 
1645         if (dstStrLengthNL) {
1646           // finalStrLength >= dstStrLength
1647           SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1648                                                       *finalStrLengthNL,
1649                                                       *dstStrLengthNL,
1650                                                       cmpTy);
1651           state =
1652               state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
1653           if (!state)
1654             return;
1655         }
1656       }
1657     }
1658 
1659   } else {
1660     // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1661     // the final string length will match the input string length.
1662     finalStrLength = amountCopied;
1663   }
1664 
1665   // The final result of the function will either be a pointer past the last
1666   // copied element, or a pointer to the start of the destination buffer.
1667   SVal Result = (returnEnd ? UnknownVal() : DstVal);
1668 
1669   assert(state);
1670 
1671   // If the destination is a MemRegion, try to check for a buffer overflow and
1672   // record the new string length.
1673   if (Optional<loc::MemRegionVal> dstRegVal =
1674           DstVal.getAs<loc::MemRegionVal>()) {
1675     QualType ptrTy = Dst->getType();
1676 
1677     // If we have an exact value on a bounded copy, use that to check for
1678     // overflows, rather than our estimate about how much is actually copied.
1679     if (boundWarning) {
1680       if (Optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
1681         SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1682                                                       *maxLastNL, ptrTy);
1683         state = CheckLocation(C, state, CE->getArg(2), maxLastElement,
1684                               boundWarning);
1685         if (!state)
1686           return;
1687       }
1688     }
1689 
1690     // Then, if the final length is known...
1691     if (Optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
1692       SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1693                                                  *knownStrLength, ptrTy);
1694 
1695       // ...and we haven't checked the bound, we'll check the actual copy.
1696       if (!boundWarning) {
1697         const char * const warningMsg =
1698           "String copy function overflows destination buffer";
1699         state = CheckLocation(C, state, Dst, lastElement, warningMsg);
1700         if (!state)
1701           return;
1702       }
1703 
1704       // If this is a stpcpy-style copy, the last element is the return value.
1705       if (returnEnd)
1706         Result = lastElement;
1707     }
1708 
1709     // Invalidate the destination (regular invalidation without pointer-escaping
1710     // the address of the top-level region). This must happen before we set the
1711     // C string length because invalidation will clear the length.
1712     // FIXME: Even if we can't perfectly model the copy, we should see if we
1713     // can use LazyCompoundVals to copy the source values into the destination.
1714     // This would probably remove any existing bindings past the end of the
1715     // string, but that's still an improvement over blank invalidation.
1716     state = InvalidateBuffer(C, state, Dst, *dstRegVal,
1717                              /*IsSourceBuffer*/false, nullptr);
1718 
1719     // Invalidate the source (const-invalidation without const-pointer-escaping
1720     // the address of the top-level region).
1721     state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true,
1722                              nullptr);
1723 
1724     // Set the C string length of the destination, if we know it.
1725     if (isBounded && !isAppending) {
1726       // strncpy is annoying in that it doesn't guarantee to null-terminate
1727       // the result string. If the original string didn't fit entirely inside
1728       // the bound (including the null-terminator), we don't know how long the
1729       // result is.
1730       if (amountCopied != strLength)
1731         finalStrLength = UnknownVal();
1732     }
1733     state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1734   }
1735 
1736   assert(state);
1737 
1738   // If this is a stpcpy-style copy, but we were unable to check for a buffer
1739   // overflow, we still need a result. Conjure a return value.
1740   if (returnEnd && Result.isUnknown()) {
1741     Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
1742   }
1743 
1744   // Set the return value.
1745   state = state->BindExpr(CE, LCtx, Result);
1746   C.addTransition(state);
1747 }
1748 
1749 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1750   if (CE->getNumArgs() < 2)
1751     return;
1752 
1753   //int strcmp(const char *s1, const char *s2);
1754   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1755 }
1756 
1757 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1758   if (CE->getNumArgs() < 3)
1759     return;
1760 
1761   //int strncmp(const char *s1, const char *s2, size_t n);
1762   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1763 }
1764 
1765 void CStringChecker::evalStrcasecmp(CheckerContext &C,
1766                                     const CallExpr *CE) const {
1767   if (CE->getNumArgs() < 2)
1768     return;
1769 
1770   //int strcasecmp(const char *s1, const char *s2);
1771   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1772 }
1773 
1774 void CStringChecker::evalStrncasecmp(CheckerContext &C,
1775                                      const CallExpr *CE) const {
1776   if (CE->getNumArgs() < 3)
1777     return;
1778 
1779   //int strncasecmp(const char *s1, const char *s2, size_t n);
1780   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1781 }
1782 
1783 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1784                                       bool isBounded, bool ignoreCase) const {
1785   CurrentFunctionDescription = "string comparison function";
1786   ProgramStateRef state = C.getState();
1787   const LocationContext *LCtx = C.getLocationContext();
1788 
1789   // Check that the first string is non-null
1790   const Expr *s1 = CE->getArg(0);
1791   SVal s1Val = state->getSVal(s1, LCtx);
1792   state = checkNonNull(C, state, s1, s1Val);
1793   if (!state)
1794     return;
1795 
1796   // Check that the second string is non-null.
1797   const Expr *s2 = CE->getArg(1);
1798   SVal s2Val = state->getSVal(s2, LCtx);
1799   state = checkNonNull(C, state, s2, s2Val);
1800   if (!state)
1801     return;
1802 
1803   // Get the string length of the first string or give up.
1804   SVal s1Length = getCStringLength(C, state, s1, s1Val);
1805   if (s1Length.isUndef())
1806     return;
1807 
1808   // Get the string length of the second string or give up.
1809   SVal s2Length = getCStringLength(C, state, s2, s2Val);
1810   if (s2Length.isUndef())
1811     return;
1812 
1813   // If we know the two buffers are the same, we know the result is 0.
1814   // First, get the two buffers' addresses. Another checker will have already
1815   // made sure they're not undefined.
1816   DefinedOrUnknownSVal LV = s1Val.castAs<DefinedOrUnknownSVal>();
1817   DefinedOrUnknownSVal RV = s2Val.castAs<DefinedOrUnknownSVal>();
1818 
1819   // See if they are the same.
1820   SValBuilder &svalBuilder = C.getSValBuilder();
1821   DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1822   ProgramStateRef StSameBuf, StNotSameBuf;
1823   std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1824 
1825   // If the two arguments might be the same buffer, we know the result is 0,
1826   // and we only need to check one size.
1827   if (StSameBuf) {
1828     StSameBuf = StSameBuf->BindExpr(CE, LCtx,
1829                                     svalBuilder.makeZeroVal(CE->getType()));
1830     C.addTransition(StSameBuf);
1831 
1832     // If the two arguments are GUARANTEED to be the same, we're done!
1833     if (!StNotSameBuf)
1834       return;
1835   }
1836 
1837   assert(StNotSameBuf);
1838   state = StNotSameBuf;
1839 
1840   // At this point we can go about comparing the two buffers.
1841   // For now, we only do this if they're both known string literals.
1842 
1843   // Attempt to extract string literals from both expressions.
1844   const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1845   const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1846   bool canComputeResult = false;
1847   SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
1848                                                 C.blockCount());
1849 
1850   if (s1StrLiteral && s2StrLiteral) {
1851     StringRef s1StrRef = s1StrLiteral->getString();
1852     StringRef s2StrRef = s2StrLiteral->getString();
1853 
1854     if (isBounded) {
1855       // Get the max number of characters to compare.
1856       const Expr *lenExpr = CE->getArg(2);
1857       SVal lenVal = state->getSVal(lenExpr, LCtx);
1858 
1859       // If the length is known, we can get the right substrings.
1860       if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
1861         // Create substrings of each to compare the prefix.
1862         s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
1863         s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
1864         canComputeResult = true;
1865       }
1866     } else {
1867       // This is a normal, unbounded strcmp.
1868       canComputeResult = true;
1869     }
1870 
1871     if (canComputeResult) {
1872       // Real strcmp stops at null characters.
1873       size_t s1Term = s1StrRef.find('\0');
1874       if (s1Term != StringRef::npos)
1875         s1StrRef = s1StrRef.substr(0, s1Term);
1876 
1877       size_t s2Term = s2StrRef.find('\0');
1878       if (s2Term != StringRef::npos)
1879         s2StrRef = s2StrRef.substr(0, s2Term);
1880 
1881       // Use StringRef's comparison methods to compute the actual result.
1882       int compareRes = ignoreCase ? s1StrRef.compare_lower(s2StrRef)
1883                                   : s1StrRef.compare(s2StrRef);
1884 
1885       // The strcmp function returns an integer greater than, equal to, or less
1886       // than zero, [c11, p7.24.4.2].
1887       if (compareRes == 0) {
1888         resultVal = svalBuilder.makeIntVal(compareRes, CE->getType());
1889       }
1890       else {
1891         DefinedSVal zeroVal = svalBuilder.makeIntVal(0, CE->getType());
1892         // Constrain strcmp's result range based on the result of StringRef's
1893         // comparison methods.
1894         BinaryOperatorKind op = (compareRes == 1) ? BO_GT : BO_LT;
1895         SVal compareWithZero =
1896           svalBuilder.evalBinOp(state, op, resultVal, zeroVal,
1897                                 svalBuilder.getConditionType());
1898         DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();
1899         state = state->assume(compareWithZeroVal, true);
1900       }
1901     }
1902   }
1903 
1904   state = state->BindExpr(CE, LCtx, resultVal);
1905 
1906   // Record this as a possible path.
1907   C.addTransition(state);
1908 }
1909 
1910 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
1911   //char *strsep(char **stringp, const char *delim);
1912   if (CE->getNumArgs() < 2)
1913     return;
1914 
1915   // Sanity: does the search string parameter match the return type?
1916   const Expr *SearchStrPtr = CE->getArg(0);
1917   QualType CharPtrTy = SearchStrPtr->getType()->getPointeeType();
1918   if (CharPtrTy.isNull() ||
1919       CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType())
1920     return;
1921 
1922   CurrentFunctionDescription = "strsep()";
1923   ProgramStateRef State = C.getState();
1924   const LocationContext *LCtx = C.getLocationContext();
1925 
1926   // Check that the search string pointer is non-null (though it may point to
1927   // a null string).
1928   SVal SearchStrVal = State->getSVal(SearchStrPtr, LCtx);
1929   State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
1930   if (!State)
1931     return;
1932 
1933   // Check that the delimiter string is non-null.
1934   const Expr *DelimStr = CE->getArg(1);
1935   SVal DelimStrVal = State->getSVal(DelimStr, LCtx);
1936   State = checkNonNull(C, State, DelimStr, DelimStrVal);
1937   if (!State)
1938     return;
1939 
1940   SValBuilder &SVB = C.getSValBuilder();
1941   SVal Result;
1942   if (Optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
1943     // Get the current value of the search string pointer, as a char*.
1944     Result = State->getSVal(*SearchStrLoc, CharPtrTy);
1945 
1946     // Invalidate the search string, representing the change of one delimiter
1947     // character to NUL.
1948     State = InvalidateBuffer(C, State, SearchStrPtr, Result,
1949                              /*IsSourceBuffer*/false, nullptr);
1950 
1951     // Overwrite the search string pointer. The new value is either an address
1952     // further along in the same string, or NULL if there are no more tokens.
1953     State = State->bindLoc(*SearchStrLoc,
1954                            SVB.conjureSymbolVal(getTag(),
1955                                                 CE,
1956                                                 LCtx,
1957                                                 CharPtrTy,
1958                                                 C.blockCount()),
1959                            LCtx);
1960   } else {
1961     assert(SearchStrVal.isUnknown());
1962     // Conjure a symbolic value. It's the best we can do.
1963     Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
1964   }
1965 
1966   // Set the return value, and finish.
1967   State = State->BindExpr(CE, LCtx, Result);
1968   C.addTransition(State);
1969 }
1970 
1971 // These should probably be moved into a C++ standard library checker.
1972 void CStringChecker::evalStdCopy(CheckerContext &C, const CallExpr *CE) const {
1973   evalStdCopyCommon(C, CE);
1974 }
1975 
1976 void CStringChecker::evalStdCopyBackward(CheckerContext &C,
1977                                          const CallExpr *CE) const {
1978   evalStdCopyCommon(C, CE);
1979 }
1980 
1981 void CStringChecker::evalStdCopyCommon(CheckerContext &C,
1982                                        const CallExpr *CE) const {
1983   if (CE->getNumArgs() < 3)
1984     return;
1985 
1986   ProgramStateRef State = C.getState();
1987 
1988   const LocationContext *LCtx = C.getLocationContext();
1989 
1990   // template <class _InputIterator, class _OutputIterator>
1991   // _OutputIterator
1992   // copy(_InputIterator __first, _InputIterator __last,
1993   //        _OutputIterator __result)
1994 
1995   // Invalidate the destination buffer
1996   const Expr *Dst = CE->getArg(2);
1997   SVal DstVal = State->getSVal(Dst, LCtx);
1998   State = InvalidateBuffer(C, State, Dst, DstVal, /*IsSource=*/false,
1999                            /*Size=*/nullptr);
2000 
2001   SValBuilder &SVB = C.getSValBuilder();
2002 
2003   SVal ResultVal = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
2004   State = State->BindExpr(CE, LCtx, ResultVal);
2005 
2006   C.addTransition(State);
2007 }
2008 
2009 void CStringChecker::evalMemset(CheckerContext &C, const CallExpr *CE) const {
2010   if (CE->getNumArgs() != 3)
2011     return;
2012 
2013   CurrentFunctionDescription = "memory set function";
2014 
2015   const Expr *Mem = CE->getArg(0);
2016   const Expr *Size = CE->getArg(2);
2017   ProgramStateRef State = C.getState();
2018 
2019   // See if the size argument is zero.
2020   const LocationContext *LCtx = C.getLocationContext();
2021   SVal SizeVal = State->getSVal(Size, LCtx);
2022   QualType SizeTy = Size->getType();
2023 
2024   ProgramStateRef StateZeroSize, StateNonZeroSize;
2025   std::tie(StateZeroSize, StateNonZeroSize) =
2026     assumeZero(C, State, SizeVal, SizeTy);
2027 
2028   // Get the value of the memory area.
2029   SVal MemVal = State->getSVal(Mem, LCtx);
2030 
2031   // If the size is zero, there won't be any actual memory access, so
2032   // just bind the return value to the Mem buffer and return.
2033   if (StateZeroSize && !StateNonZeroSize) {
2034     StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, MemVal);
2035     C.addTransition(StateZeroSize);
2036     return;
2037   }
2038 
2039   // Ensure the memory area is not null.
2040   // If it is NULL there will be a NULL pointer dereference.
2041   State = checkNonNull(C, StateNonZeroSize, Mem, MemVal);
2042   if (!State)
2043     return;
2044 
2045   State = CheckBufferAccess(C, State, Size, Mem);
2046   if (!State)
2047     return;
2048   State = InvalidateBuffer(C, State, Mem, C.getSVal(Mem),
2049                            /*IsSourceBuffer*/false, Size);
2050   if (!State)
2051     return;
2052 
2053   State = State->BindExpr(CE, LCtx, MemVal);
2054   C.addTransition(State);
2055 }
2056 
2057 static bool isCPPStdLibraryFunction(const FunctionDecl *FD, StringRef Name) {
2058   IdentifierInfo *II = FD->getIdentifier();
2059   if (!II)
2060     return false;
2061 
2062   if (!AnalysisDeclContext::isInStdNamespace(FD))
2063     return false;
2064 
2065   if (II->getName().equals(Name))
2066     return true;
2067 
2068   return false;
2069 }
2070 //===----------------------------------------------------------------------===//
2071 // The driver method, and other Checker callbacks.
2072 //===----------------------------------------------------------------------===//
2073 
2074 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
2075   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
2076 
2077   if (!FDecl)
2078     return false;
2079 
2080   // FIXME: Poorly-factored string switches are slow.
2081   FnCheck evalFunction = nullptr;
2082   if (C.isCLibraryFunction(FDecl, "memcpy"))
2083     evalFunction =  &CStringChecker::evalMemcpy;
2084   else if (C.isCLibraryFunction(FDecl, "mempcpy"))
2085     evalFunction =  &CStringChecker::evalMempcpy;
2086   else if (C.isCLibraryFunction(FDecl, "memcmp"))
2087     evalFunction =  &CStringChecker::evalMemcmp;
2088   else if (C.isCLibraryFunction(FDecl, "memmove"))
2089     evalFunction =  &CStringChecker::evalMemmove;
2090   else if (C.isCLibraryFunction(FDecl, "memset"))
2091     evalFunction =  &CStringChecker::evalMemset;
2092   else if (C.isCLibraryFunction(FDecl, "strcpy"))
2093     evalFunction =  &CStringChecker::evalStrcpy;
2094   else if (C.isCLibraryFunction(FDecl, "strncpy"))
2095     evalFunction =  &CStringChecker::evalStrncpy;
2096   else if (C.isCLibraryFunction(FDecl, "stpcpy"))
2097     evalFunction =  &CStringChecker::evalStpcpy;
2098   else if (C.isCLibraryFunction(FDecl, "strcat"))
2099     evalFunction =  &CStringChecker::evalStrcat;
2100   else if (C.isCLibraryFunction(FDecl, "strncat"))
2101     evalFunction =  &CStringChecker::evalStrncat;
2102   else if (C.isCLibraryFunction(FDecl, "strlen"))
2103     evalFunction =  &CStringChecker::evalstrLength;
2104   else if (C.isCLibraryFunction(FDecl, "strnlen"))
2105     evalFunction =  &CStringChecker::evalstrnLength;
2106   else if (C.isCLibraryFunction(FDecl, "strcmp"))
2107     evalFunction =  &CStringChecker::evalStrcmp;
2108   else if (C.isCLibraryFunction(FDecl, "strncmp"))
2109     evalFunction =  &CStringChecker::evalStrncmp;
2110   else if (C.isCLibraryFunction(FDecl, "strcasecmp"))
2111     evalFunction =  &CStringChecker::evalStrcasecmp;
2112   else if (C.isCLibraryFunction(FDecl, "strncasecmp"))
2113     evalFunction =  &CStringChecker::evalStrncasecmp;
2114   else if (C.isCLibraryFunction(FDecl, "strsep"))
2115     evalFunction =  &CStringChecker::evalStrsep;
2116   else if (C.isCLibraryFunction(FDecl, "bcopy"))
2117     evalFunction =  &CStringChecker::evalBcopy;
2118   else if (C.isCLibraryFunction(FDecl, "bcmp"))
2119     evalFunction =  &CStringChecker::evalMemcmp;
2120   else if (isCPPStdLibraryFunction(FDecl, "copy"))
2121     evalFunction =  &CStringChecker::evalStdCopy;
2122   else if (isCPPStdLibraryFunction(FDecl, "copy_backward"))
2123     evalFunction =  &CStringChecker::evalStdCopyBackward;
2124 
2125   // If the callee isn't a string function, let another checker handle it.
2126   if (!evalFunction)
2127     return false;
2128 
2129   // Check and evaluate the call.
2130   (this->*evalFunction)(C, CE);
2131 
2132   // If the evaluate call resulted in no change, chain to the next eval call
2133   // handler.
2134   // Note, the custom CString evaluation calls assume that basic safety
2135   // properties are held. However, if the user chooses to turn off some of these
2136   // checks, we ignore the issues and leave the call evaluation to a generic
2137   // handler.
2138   return C.isDifferent();
2139 }
2140 
2141 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2142   // Record string length for char a[] = "abc";
2143   ProgramStateRef state = C.getState();
2144 
2145   for (const auto *I : DS->decls()) {
2146     const VarDecl *D = dyn_cast<VarDecl>(I);
2147     if (!D)
2148       continue;
2149 
2150     // FIXME: Handle array fields of structs.
2151     if (!D->getType()->isArrayType())
2152       continue;
2153 
2154     const Expr *Init = D->getInit();
2155     if (!Init)
2156       continue;
2157     if (!isa<StringLiteral>(Init))
2158       continue;
2159 
2160     Loc VarLoc = state->getLValue(D, C.getLocationContext());
2161     const MemRegion *MR = VarLoc.getAsRegion();
2162     if (!MR)
2163       continue;
2164 
2165     SVal StrVal = C.getSVal(Init);
2166     assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2167     DefinedOrUnknownSVal strLength =
2168         getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
2169 
2170     state = state->set<CStringLength>(MR, strLength);
2171   }
2172 
2173   C.addTransition(state);
2174 }
2175 
2176 ProgramStateRef
2177 CStringChecker::checkRegionChanges(ProgramStateRef state,
2178                                    const InvalidatedSymbols *,
2179                                    ArrayRef<const MemRegion *> ExplicitRegions,
2180                                    ArrayRef<const MemRegion *> Regions,
2181                                    const LocationContext *LCtx,
2182                                    const CallEvent *Call) const {
2183   CStringLengthTy Entries = state->get<CStringLength>();
2184   if (Entries.isEmpty())
2185     return state;
2186 
2187   llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2188   llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2189 
2190   // First build sets for the changed regions and their super-regions.
2191   for (ArrayRef<const MemRegion *>::iterator
2192        I = Regions.begin(), E = Regions.end(); I != E; ++I) {
2193     const MemRegion *MR = *I;
2194     Invalidated.insert(MR);
2195 
2196     SuperRegions.insert(MR);
2197     while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2198       MR = SR->getSuperRegion();
2199       SuperRegions.insert(MR);
2200     }
2201   }
2202 
2203   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2204 
2205   // Then loop over the entries in the current state.
2206   for (CStringLengthTy::iterator I = Entries.begin(),
2207        E = Entries.end(); I != E; ++I) {
2208     const MemRegion *MR = I.getKey();
2209 
2210     // Is this entry for a super-region of a changed region?
2211     if (SuperRegions.count(MR)) {
2212       Entries = F.remove(Entries, MR);
2213       continue;
2214     }
2215 
2216     // Is this entry for a sub-region of a changed region?
2217     const MemRegion *Super = MR;
2218     while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2219       Super = SR->getSuperRegion();
2220       if (Invalidated.count(Super)) {
2221         Entries = F.remove(Entries, MR);
2222         break;
2223       }
2224     }
2225   }
2226 
2227   return state->set<CStringLength>(Entries);
2228 }
2229 
2230 void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2231                                       SymbolReaper &SR) const {
2232   // Mark all symbols in our string length map as valid.
2233   CStringLengthTy Entries = state->get<CStringLength>();
2234 
2235   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2236        I != E; ++I) {
2237     SVal Len = I.getData();
2238 
2239     for (SymExpr::symbol_iterator si = Len.symbol_begin(),
2240                                   se = Len.symbol_end(); si != se; ++si)
2241       SR.markInUse(*si);
2242   }
2243 }
2244 
2245 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2246                                       CheckerContext &C) const {
2247   if (!SR.hasDeadSymbols())
2248     return;
2249 
2250   ProgramStateRef state = C.getState();
2251   CStringLengthTy Entries = state->get<CStringLength>();
2252   if (Entries.isEmpty())
2253     return;
2254 
2255   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2256   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2257        I != E; ++I) {
2258     SVal Len = I.getData();
2259     if (SymbolRef Sym = Len.getAsSymbol()) {
2260       if (SR.isDead(Sym))
2261         Entries = F.remove(Entries, I.getKey());
2262     }
2263   }
2264 
2265   state = state->set<CStringLength>(Entries);
2266   C.addTransition(state);
2267 }
2268 
2269 #define REGISTER_CHECKER(name)                                                 \
2270   void ento::register##name(CheckerManager &mgr) {                             \
2271     CStringChecker *checker = mgr.registerChecker<CStringChecker>();           \
2272     checker->Filter.Check##name = true;                                        \
2273     checker->Filter.CheckName##name = mgr.getCurrentCheckName();               \
2274   }
2275 
2276 REGISTER_CHECKER(CStringNullArg)
2277 REGISTER_CHECKER(CStringOutOfBounds)
2278 REGISTER_CHECKER(CStringBufferOverlap)
2279 REGISTER_CHECKER(CStringNotNullTerm)
2280 
2281 void ento::registerCStringCheckerBasic(CheckerManager &Mgr) {
2282   registerCStringNullArg(Mgr);
2283 }
2284