xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp (revision 7af1c99024897c9c4b1255020afbe529c22ac20c)
1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This defines CStringChecker, which is an assortment of checks on calls
11 // to functions in <string.h>.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ClangSACheckers.h"
16 #include "InterCheckerAPI.h"
17 #include "clang/Basic/CharInfo.h"
18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
19 #include "clang/StaticAnalyzer/Core/Checker.h"
20 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 using namespace clang;
28 using namespace ento;
29 
30 namespace {
31 class CStringChecker : public Checker< eval::Call,
32                                          check::PreStmt<DeclStmt>,
33                                          check::LiveSymbols,
34                                          check::DeadSymbols,
35                                          check::RegionChanges
36                                          > {
37   mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
38       BT_NotCString, BT_AdditionOverflow;
39 
40   mutable const char *CurrentFunctionDescription;
41 
42 public:
43   /// The filter is used to filter out the diagnostics which are not enabled by
44   /// the user.
45   struct CStringChecksFilter {
46     DefaultBool CheckCStringNullArg;
47     DefaultBool CheckCStringOutOfBounds;
48     DefaultBool CheckCStringBufferOverlap;
49     DefaultBool CheckCStringNotNullTerm;
50 
51     CheckName CheckNameCStringNullArg;
52     CheckName CheckNameCStringOutOfBounds;
53     CheckName CheckNameCStringBufferOverlap;
54     CheckName CheckNameCStringNotNullTerm;
55   };
56 
57   CStringChecksFilter Filter;
58 
59   static void *getTag() { static int tag; return &tag; }
60 
61   bool evalCall(const CallExpr *CE, CheckerContext &C) const;
62   void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
63   void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
64   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
65 
66   ProgramStateRef
67     checkRegionChanges(ProgramStateRef state,
68                        const InvalidatedSymbols *,
69                        ArrayRef<const MemRegion *> ExplicitRegions,
70                        ArrayRef<const MemRegion *> Regions,
71                        const LocationContext *LCtx,
72                        const CallEvent *Call) const;
73 
74   typedef void (CStringChecker::*FnCheck)(CheckerContext &,
75                                           const CallExpr *) const;
76 
77   void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
78   void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
79   void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
80   void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
81   void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
82                       ProgramStateRef state,
83                       const Expr *Size,
84                       const Expr *Source,
85                       const Expr *Dest,
86                       bool Restricted = false,
87                       bool IsMempcpy = false) const;
88 
89   void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
90 
91   void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
92   void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
93   void evalstrLengthCommon(CheckerContext &C,
94                            const CallExpr *CE,
95                            bool IsStrnlen = false) const;
96 
97   void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
98   void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
99   void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
100   void evalStrcpyCommon(CheckerContext &C,
101                         const CallExpr *CE,
102                         bool returnEnd,
103                         bool isBounded,
104                         bool isAppending) const;
105 
106   void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
107   void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
108 
109   void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
110   void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
111   void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
112   void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
113   void evalStrcmpCommon(CheckerContext &C,
114                         const CallExpr *CE,
115                         bool isBounded = false,
116                         bool ignoreCase = false) const;
117 
118   void evalStrsep(CheckerContext &C, const CallExpr *CE) const;
119 
120   void evalStdCopy(CheckerContext &C, const CallExpr *CE) const;
121   void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const;
122   void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const;
123   void evalMemset(CheckerContext &C, const CallExpr *CE) const;
124 
125   // Utility methods
126   std::pair<ProgramStateRef , ProgramStateRef >
127   static assumeZero(CheckerContext &C,
128                     ProgramStateRef state, SVal V, QualType Ty);
129 
130   static ProgramStateRef setCStringLength(ProgramStateRef state,
131                                               const MemRegion *MR,
132                                               SVal strLength);
133   static SVal getCStringLengthForRegion(CheckerContext &C,
134                                         ProgramStateRef &state,
135                                         const Expr *Ex,
136                                         const MemRegion *MR,
137                                         bool hypothetical);
138   SVal getCStringLength(CheckerContext &C,
139                         ProgramStateRef &state,
140                         const Expr *Ex,
141                         SVal Buf,
142                         bool hypothetical = false) const;
143 
144   const StringLiteral *getCStringLiteral(CheckerContext &C,
145                                          ProgramStateRef &state,
146                                          const Expr *expr,
147                                          SVal val) const;
148 
149   static ProgramStateRef InvalidateBuffer(CheckerContext &C,
150                                           ProgramStateRef state,
151                                           const Expr *Ex, SVal V,
152                                           bool IsSourceBuffer,
153                                           const Expr *Size);
154 
155   static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
156                               const MemRegion *MR);
157 
158   // Re-usable checks
159   ProgramStateRef checkNonNull(CheckerContext &C,
160                                    ProgramStateRef state,
161                                    const Expr *S,
162                                    SVal l) const;
163   ProgramStateRef CheckLocation(CheckerContext &C,
164                                     ProgramStateRef state,
165                                     const Expr *S,
166                                     SVal l,
167                                     const char *message = nullptr) const;
168   ProgramStateRef CheckBufferAccess(CheckerContext &C,
169                                         ProgramStateRef state,
170                                         const Expr *Size,
171                                         const Expr *FirstBuf,
172                                         const Expr *SecondBuf,
173                                         const char *firstMessage = nullptr,
174                                         const char *secondMessage = nullptr,
175                                         bool WarnAboutSize = false) const;
176 
177   ProgramStateRef CheckBufferAccess(CheckerContext &C,
178                                         ProgramStateRef state,
179                                         const Expr *Size,
180                                         const Expr *Buf,
181                                         const char *message = nullptr,
182                                         bool WarnAboutSize = false) const {
183     // This is a convenience override.
184     return CheckBufferAccess(C, state, Size, Buf, nullptr, message, nullptr,
185                              WarnAboutSize);
186   }
187   ProgramStateRef CheckOverlap(CheckerContext &C,
188                                    ProgramStateRef state,
189                                    const Expr *Size,
190                                    const Expr *First,
191                                    const Expr *Second) const;
192   void emitOverlapBug(CheckerContext &C,
193                       ProgramStateRef state,
194                       const Stmt *First,
195                       const Stmt *Second) const;
196 
197   void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
198                       StringRef WarningMsg) const;
199   void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
200                           const Stmt *S, StringRef WarningMsg) const;
201   void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
202                          const Stmt *S, StringRef WarningMsg) const;
203   void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;
204 
205   ProgramStateRef checkAdditionOverflow(CheckerContext &C,
206                                             ProgramStateRef state,
207                                             NonLoc left,
208                                             NonLoc right) const;
209 
210   // Return true if the destination buffer of the copy function may be in bound.
211   // Expects SVal of Size to be positive and unsigned.
212   // Expects SVal of FirstBuf to be a FieldRegion.
213   static bool IsFirstBufInBound(CheckerContext &C,
214                                 ProgramStateRef state,
215                                 const Expr *FirstBuf,
216                                 const Expr *Size);
217 };
218 
219 } //end anonymous namespace
220 
221 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
222 
223 //===----------------------------------------------------------------------===//
224 // Individual checks and utility methods.
225 //===----------------------------------------------------------------------===//
226 
227 std::pair<ProgramStateRef , ProgramStateRef >
228 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
229                            QualType Ty) {
230   Optional<DefinedSVal> val = V.getAs<DefinedSVal>();
231   if (!val)
232     return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
233 
234   SValBuilder &svalBuilder = C.getSValBuilder();
235   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
236   return state->assume(svalBuilder.evalEQ(state, *val, zero));
237 }
238 
239 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
240                                             ProgramStateRef state,
241                                             const Expr *S, SVal l) const {
242   // If a previous check has failed, propagate the failure.
243   if (!state)
244     return nullptr;
245 
246   ProgramStateRef stateNull, stateNonNull;
247   std::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
248 
249   if (stateNull && !stateNonNull) {
250     if (Filter.CheckCStringNullArg) {
251       SmallString<80> buf;
252       llvm::raw_svector_ostream os(buf);
253       assert(CurrentFunctionDescription);
254       os << "Null pointer argument in call to " << CurrentFunctionDescription;
255 
256       emitNullArgBug(C, stateNull, S, os.str());
257     }
258     return nullptr;
259   }
260 
261   // From here on, assume that the value is non-null.
262   assert(stateNonNull);
263   return stateNonNull;
264 }
265 
266 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
267 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
268                                              ProgramStateRef state,
269                                              const Expr *S, SVal l,
270                                              const char *warningMsg) const {
271   // If a previous check has failed, propagate the failure.
272   if (!state)
273     return nullptr;
274 
275   // Check for out of bound array element access.
276   const MemRegion *R = l.getAsRegion();
277   if (!R)
278     return state;
279 
280   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
281   if (!ER)
282     return state;
283 
284   if (ER->getValueType() != C.getASTContext().CharTy)
285     return state;
286 
287   // Get the size of the array.
288   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
289   SValBuilder &svalBuilder = C.getSValBuilder();
290   SVal Extent =
291     svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
292   DefinedOrUnknownSVal Size = Extent.castAs<DefinedOrUnknownSVal>();
293 
294   // Get the index of the accessed element.
295   DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
296 
297   ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true);
298   ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false);
299   if (StOutBound && !StInBound) {
300     // These checks are either enabled by the CString out-of-bounds checker
301     // explicitly or the "basic" CStringNullArg checker support that Malloc
302     // checker enables.
303     assert(Filter.CheckCStringOutOfBounds || Filter.CheckCStringNullArg);
304 
305     // Emit a bug report.
306     if (warningMsg) {
307       emitOutOfBoundsBug(C, StOutBound, S, warningMsg);
308     } else {
309       assert(CurrentFunctionDescription);
310       assert(CurrentFunctionDescription[0] != '\0');
311 
312       SmallString<80> buf;
313       llvm::raw_svector_ostream os(buf);
314       os << toUppercase(CurrentFunctionDescription[0])
315          << &CurrentFunctionDescription[1]
316          << " accesses out-of-bound array element";
317       emitOutOfBoundsBug(C, StOutBound, S, os.str());
318     }
319     return nullptr;
320   }
321 
322   // Array bound check succeeded.  From this point forward the array bound
323   // should always succeed.
324   return StInBound;
325 }
326 
327 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C,
328                                                  ProgramStateRef state,
329                                                  const Expr *Size,
330                                                  const Expr *FirstBuf,
331                                                  const Expr *SecondBuf,
332                                                  const char *firstMessage,
333                                                  const char *secondMessage,
334                                                  bool WarnAboutSize) const {
335   // If a previous check has failed, propagate the failure.
336   if (!state)
337     return nullptr;
338 
339   SValBuilder &svalBuilder = C.getSValBuilder();
340   ASTContext &Ctx = svalBuilder.getContext();
341   const LocationContext *LCtx = C.getLocationContext();
342 
343   QualType sizeTy = Size->getType();
344   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
345 
346   // Check that the first buffer is non-null.
347   SVal BufVal = C.getSVal(FirstBuf);
348   state = checkNonNull(C, state, FirstBuf, BufVal);
349   if (!state)
350     return nullptr;
351 
352   // If out-of-bounds checking is turned off, skip the rest.
353   if (!Filter.CheckCStringOutOfBounds)
354     return state;
355 
356   // Get the access length and make sure it is known.
357   // FIXME: This assumes the caller has already checked that the access length
358   // is positive. And that it's unsigned.
359   SVal LengthVal = C.getSVal(Size);
360   Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
361   if (!Length)
362     return state;
363 
364   // Compute the offset of the last element to be accessed: size-1.
365   NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
366   SVal Offset = svalBuilder.evalBinOpNN(state, BO_Sub, *Length, One, sizeTy);
367   if (Offset.isUnknown())
368     return nullptr;
369   NonLoc LastOffset = Offset.castAs<NonLoc>();
370 
371   // Check that the first buffer is sufficiently long.
372   SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
373   if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
374     const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf);
375 
376     SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
377                                           LastOffset, PtrTy);
378     state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage);
379 
380     // If the buffer isn't large enough, abort.
381     if (!state)
382       return nullptr;
383   }
384 
385   // If there's a second buffer, check it as well.
386   if (SecondBuf) {
387     BufVal = state->getSVal(SecondBuf, LCtx);
388     state = checkNonNull(C, state, SecondBuf, BufVal);
389     if (!state)
390       return nullptr;
391 
392     BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
393     if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
394       const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf);
395 
396       SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
397                                             LastOffset, PtrTy);
398       state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage);
399     }
400   }
401 
402   // Large enough or not, return this state!
403   return state;
404 }
405 
406 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
407                                             ProgramStateRef state,
408                                             const Expr *Size,
409                                             const Expr *First,
410                                             const Expr *Second) const {
411   if (!Filter.CheckCStringBufferOverlap)
412     return state;
413 
414   // Do a simple check for overlap: if the two arguments are from the same
415   // buffer, see if the end of the first is greater than the start of the second
416   // or vice versa.
417 
418   // If a previous check has failed, propagate the failure.
419   if (!state)
420     return nullptr;
421 
422   ProgramStateRef stateTrue, stateFalse;
423 
424   // Get the buffer values and make sure they're known locations.
425   const LocationContext *LCtx = C.getLocationContext();
426   SVal firstVal = state->getSVal(First, LCtx);
427   SVal secondVal = state->getSVal(Second, LCtx);
428 
429   Optional<Loc> firstLoc = firstVal.getAs<Loc>();
430   if (!firstLoc)
431     return state;
432 
433   Optional<Loc> secondLoc = secondVal.getAs<Loc>();
434   if (!secondLoc)
435     return state;
436 
437   // Are the two values the same?
438   SValBuilder &svalBuilder = C.getSValBuilder();
439   std::tie(stateTrue, stateFalse) =
440     state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
441 
442   if (stateTrue && !stateFalse) {
443     // If the values are known to be equal, that's automatically an overlap.
444     emitOverlapBug(C, stateTrue, First, Second);
445     return nullptr;
446   }
447 
448   // assume the two expressions are not equal.
449   assert(stateFalse);
450   state = stateFalse;
451 
452   // Which value comes first?
453   QualType cmpTy = svalBuilder.getConditionType();
454   SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
455                                          *firstLoc, *secondLoc, cmpTy);
456   Optional<DefinedOrUnknownSVal> reverseTest =
457       reverse.getAs<DefinedOrUnknownSVal>();
458   if (!reverseTest)
459     return state;
460 
461   std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
462   if (stateTrue) {
463     if (stateFalse) {
464       // If we don't know which one comes first, we can't perform this test.
465       return state;
466     } else {
467       // Switch the values so that firstVal is before secondVal.
468       std::swap(firstLoc, secondLoc);
469 
470       // Switch the Exprs as well, so that they still correspond.
471       std::swap(First, Second);
472     }
473   }
474 
475   // Get the length, and make sure it too is known.
476   SVal LengthVal = state->getSVal(Size, LCtx);
477   Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
478   if (!Length)
479     return state;
480 
481   // Convert the first buffer's start address to char*.
482   // Bail out if the cast fails.
483   ASTContext &Ctx = svalBuilder.getContext();
484   QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
485   SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy,
486                                          First->getType());
487   Optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
488   if (!FirstStartLoc)
489     return state;
490 
491   // Compute the end of the first buffer. Bail out if THAT fails.
492   SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
493                                  *FirstStartLoc, *Length, CharPtrTy);
494   Optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
495   if (!FirstEndLoc)
496     return state;
497 
498   // Is the end of the first buffer past the start of the second buffer?
499   SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
500                                 *FirstEndLoc, *secondLoc, cmpTy);
501   Optional<DefinedOrUnknownSVal> OverlapTest =
502       Overlap.getAs<DefinedOrUnknownSVal>();
503   if (!OverlapTest)
504     return state;
505 
506   std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
507 
508   if (stateTrue && !stateFalse) {
509     // Overlap!
510     emitOverlapBug(C, stateTrue, First, Second);
511     return nullptr;
512   }
513 
514   // assume the two expressions don't overlap.
515   assert(stateFalse);
516   return stateFalse;
517 }
518 
519 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
520                                   const Stmt *First, const Stmt *Second) const {
521   ExplodedNode *N = C.generateErrorNode(state);
522   if (!N)
523     return;
524 
525   if (!BT_Overlap)
526     BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
527                                  categories::UnixAPI, "Improper arguments"));
528 
529   // Generate a report for this bug.
530   auto report = llvm::make_unique<BugReport>(
531       *BT_Overlap, "Arguments must not be overlapping buffers", N);
532   report->addRange(First->getSourceRange());
533   report->addRange(Second->getSourceRange());
534 
535   C.emitReport(std::move(report));
536 }
537 
538 void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
539                                     const Stmt *S, StringRef WarningMsg) const {
540   if (ExplodedNode *N = C.generateErrorNode(State)) {
541     if (!BT_Null)
542       BT_Null.reset(new BuiltinBug(
543           Filter.CheckNameCStringNullArg, categories::UnixAPI,
544           "Null pointer argument in call to byte string function"));
545 
546     BuiltinBug *BT = static_cast<BuiltinBug *>(BT_Null.get());
547     auto Report = llvm::make_unique<BugReport>(*BT, WarningMsg, N);
548     bugreporter::trackNullOrUndefValue(N, S, *Report);
549     C.emitReport(std::move(Report));
550   }
551 }
552 
553 void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
554                                         ProgramStateRef State, const Stmt *S,
555                                         StringRef WarningMsg) const {
556   if (ExplodedNode *N = C.generateErrorNode(State)) {
557     if (!BT_Bounds)
558       BT_Bounds.reset(new BuiltinBug(
559           Filter.CheckCStringOutOfBounds ? Filter.CheckNameCStringOutOfBounds
560                                          : Filter.CheckNameCStringNullArg,
561           "Out-of-bound array access",
562           "Byte string function accesses out-of-bound array element"));
563 
564     BuiltinBug *BT = static_cast<BuiltinBug *>(BT_Bounds.get());
565 
566     // FIXME: It would be nice to eventually make this diagnostic more clear,
567     // e.g., by referencing the original declaration or by saying *why* this
568     // reference is outside the range.
569     auto Report = llvm::make_unique<BugReport>(*BT, WarningMsg, N);
570     Report->addRange(S->getSourceRange());
571     C.emitReport(std::move(Report));
572   }
573 }
574 
575 void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
576                                        const Stmt *S,
577                                        StringRef WarningMsg) const {
578   if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {
579     if (!BT_NotCString)
580       BT_NotCString.reset(new BuiltinBug(
581           Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
582           "Argument is not a null-terminated string."));
583 
584     auto Report = llvm::make_unique<BugReport>(*BT_NotCString, WarningMsg, N);
585 
586     Report->addRange(S->getSourceRange());
587     C.emitReport(std::move(Report));
588   }
589 }
590 
591 void CStringChecker::emitAdditionOverflowBug(CheckerContext &C,
592                                              ProgramStateRef State) const {
593   if (ExplodedNode *N = C.generateErrorNode(State)) {
594     if (!BT_NotCString)
595       BT_NotCString.reset(
596           new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API",
597                          "Sum of expressions causes overflow."));
598 
599     // This isn't a great error message, but this should never occur in real
600     // code anyway -- you'd have to create a buffer longer than a size_t can
601     // represent, which is sort of a contradiction.
602     const char *WarningMsg =
603         "This expression will create a string whose length is too big to "
604         "be represented as a size_t";
605 
606     auto Report = llvm::make_unique<BugReport>(*BT_NotCString, WarningMsg, N);
607     C.emitReport(std::move(Report));
608   }
609 }
610 
611 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
612                                                      ProgramStateRef state,
613                                                      NonLoc left,
614                                                      NonLoc right) const {
615   // If out-of-bounds checking is turned off, skip the rest.
616   if (!Filter.CheckCStringOutOfBounds)
617     return state;
618 
619   // If a previous check has failed, propagate the failure.
620   if (!state)
621     return nullptr;
622 
623   SValBuilder &svalBuilder = C.getSValBuilder();
624   BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
625 
626   QualType sizeTy = svalBuilder.getContext().getSizeType();
627   const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
628   NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
629 
630   SVal maxMinusRight;
631   if (right.getAs<nonloc::ConcreteInt>()) {
632     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
633                                                  sizeTy);
634   } else {
635     // Try switching the operands. (The order of these two assignments is
636     // important!)
637     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
638                                             sizeTy);
639     left = right;
640   }
641 
642   if (Optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
643     QualType cmpTy = svalBuilder.getConditionType();
644     // If left > max - right, we have an overflow.
645     SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
646                                                 *maxMinusRightNL, cmpTy);
647 
648     ProgramStateRef stateOverflow, stateOkay;
649     std::tie(stateOverflow, stateOkay) =
650       state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
651 
652     if (stateOverflow && !stateOkay) {
653       // We have an overflow. Emit a bug report.
654       emitAdditionOverflowBug(C, stateOverflow);
655       return nullptr;
656     }
657 
658     // From now on, assume an overflow didn't occur.
659     assert(stateOkay);
660     state = stateOkay;
661   }
662 
663   return state;
664 }
665 
666 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
667                                                 const MemRegion *MR,
668                                                 SVal strLength) {
669   assert(!strLength.isUndef() && "Attempt to set an undefined string length");
670 
671   MR = MR->StripCasts();
672 
673   switch (MR->getKind()) {
674   case MemRegion::StringRegionKind:
675     // FIXME: This can happen if we strcpy() into a string region. This is
676     // undefined [C99 6.4.5p6], but we should still warn about it.
677     return state;
678 
679   case MemRegion::SymbolicRegionKind:
680   case MemRegion::AllocaRegionKind:
681   case MemRegion::VarRegionKind:
682   case MemRegion::FieldRegionKind:
683   case MemRegion::ObjCIvarRegionKind:
684     // These are the types we can currently track string lengths for.
685     break;
686 
687   case MemRegion::ElementRegionKind:
688     // FIXME: Handle element regions by upper-bounding the parent region's
689     // string length.
690     return state;
691 
692   default:
693     // Other regions (mostly non-data) can't have a reliable C string length.
694     // For now, just ignore the change.
695     // FIXME: These are rare but not impossible. We should output some kind of
696     // warning for things like strcpy((char[]){'a', 0}, "b");
697     return state;
698   }
699 
700   if (strLength.isUnknown())
701     return state->remove<CStringLength>(MR);
702 
703   return state->set<CStringLength>(MR, strLength);
704 }
705 
706 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
707                                                ProgramStateRef &state,
708                                                const Expr *Ex,
709                                                const MemRegion *MR,
710                                                bool hypothetical) {
711   if (!hypothetical) {
712     // If there's a recorded length, go ahead and return it.
713     const SVal *Recorded = state->get<CStringLength>(MR);
714     if (Recorded)
715       return *Recorded;
716   }
717 
718   // Otherwise, get a new symbol and update the state.
719   SValBuilder &svalBuilder = C.getSValBuilder();
720   QualType sizeTy = svalBuilder.getContext().getSizeType();
721   SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
722                                                     MR, Ex, sizeTy,
723                                                     C.getLocationContext(),
724                                                     C.blockCount());
725 
726   if (!hypothetical) {
727     if (Optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
728       // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
729       BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
730       const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
731       llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
732       const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
733                                                         fourInt);
734       NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
735       SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn,
736                                                 maxLength, sizeTy);
737       state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
738     }
739     state = state->set<CStringLength>(MR, strLength);
740   }
741 
742   return strLength;
743 }
744 
745 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
746                                       const Expr *Ex, SVal Buf,
747                                       bool hypothetical) const {
748   const MemRegion *MR = Buf.getAsRegion();
749   if (!MR) {
750     // If we can't get a region, see if it's something we /know/ isn't a
751     // C string. In the context of locations, the only time we can issue such
752     // a warning is for labels.
753     if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
754       if (Filter.CheckCStringNotNullTerm) {
755         SmallString<120> buf;
756         llvm::raw_svector_ostream os(buf);
757         assert(CurrentFunctionDescription);
758         os << "Argument to " << CurrentFunctionDescription
759            << " is the address of the label '" << Label->getLabel()->getName()
760            << "', which is not a null-terminated string";
761 
762         emitNotCStringBug(C, state, Ex, os.str());
763       }
764       return UndefinedVal();
765     }
766 
767     // If it's not a region and not a label, give up.
768     return UnknownVal();
769   }
770 
771   // If we have a region, strip casts from it and see if we can figure out
772   // its length. For anything we can't figure out, just return UnknownVal.
773   MR = MR->StripCasts();
774 
775   switch (MR->getKind()) {
776   case MemRegion::StringRegionKind: {
777     // Modifying the contents of string regions is undefined [C99 6.4.5p6],
778     // so we can assume that the byte length is the correct C string length.
779     SValBuilder &svalBuilder = C.getSValBuilder();
780     QualType sizeTy = svalBuilder.getContext().getSizeType();
781     const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
782     return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
783   }
784   case MemRegion::SymbolicRegionKind:
785   case MemRegion::AllocaRegionKind:
786   case MemRegion::VarRegionKind:
787   case MemRegion::FieldRegionKind:
788   case MemRegion::ObjCIvarRegionKind:
789     return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
790   case MemRegion::CompoundLiteralRegionKind:
791     // FIXME: Can we track this? Is it necessary?
792     return UnknownVal();
793   case MemRegion::ElementRegionKind:
794     // FIXME: How can we handle this? It's not good enough to subtract the
795     // offset from the base string length; consider "123\x00567" and &a[5].
796     return UnknownVal();
797   default:
798     // Other regions (mostly non-data) can't have a reliable C string length.
799     // In this case, an error is emitted and UndefinedVal is returned.
800     // The caller should always be prepared to handle this case.
801     if (Filter.CheckCStringNotNullTerm) {
802       SmallString<120> buf;
803       llvm::raw_svector_ostream os(buf);
804 
805       assert(CurrentFunctionDescription);
806       os << "Argument to " << CurrentFunctionDescription << " is ";
807 
808       if (SummarizeRegion(os, C.getASTContext(), MR))
809         os << ", which is not a null-terminated string";
810       else
811         os << "not a null-terminated string";
812 
813       emitNotCStringBug(C, state, Ex, os.str());
814     }
815     return UndefinedVal();
816   }
817 }
818 
819 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
820   ProgramStateRef &state, const Expr *expr, SVal val) const {
821 
822   // Get the memory region pointed to by the val.
823   const MemRegion *bufRegion = val.getAsRegion();
824   if (!bufRegion)
825     return nullptr;
826 
827   // Strip casts off the memory region.
828   bufRegion = bufRegion->StripCasts();
829 
830   // Cast the memory region to a string region.
831   const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
832   if (!strRegion)
833     return nullptr;
834 
835   // Return the actual string in the string region.
836   return strRegion->getStringLiteral();
837 }
838 
839 bool CStringChecker::IsFirstBufInBound(CheckerContext &C,
840                                        ProgramStateRef state,
841                                        const Expr *FirstBuf,
842                                        const Expr *Size) {
843   // If we do not know that the buffer is long enough we return 'true'.
844   // Otherwise the parent region of this field region would also get
845   // invalidated, which would lead to warnings based on an unknown state.
846 
847   // Originally copied from CheckBufferAccess and CheckLocation.
848   SValBuilder &svalBuilder = C.getSValBuilder();
849   ASTContext &Ctx = svalBuilder.getContext();
850   const LocationContext *LCtx = C.getLocationContext();
851 
852   QualType sizeTy = Size->getType();
853   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
854   SVal BufVal = state->getSVal(FirstBuf, LCtx);
855 
856   SVal LengthVal = state->getSVal(Size, LCtx);
857   Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
858   if (!Length)
859     return true; // cf top comment.
860 
861   // Compute the offset of the last element to be accessed: size-1.
862   NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
863   SVal Offset = svalBuilder.evalBinOpNN(state, BO_Sub, *Length, One, sizeTy);
864   if (Offset.isUnknown())
865     return true; // cf top comment
866   NonLoc LastOffset = Offset.castAs<NonLoc>();
867 
868   // Check that the first buffer is sufficiently long.
869   SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
870   Optional<Loc> BufLoc = BufStart.getAs<Loc>();
871   if (!BufLoc)
872     return true; // cf top comment.
873 
874   SVal BufEnd =
875       svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, LastOffset, PtrTy);
876 
877   // Check for out of bound array element access.
878   const MemRegion *R = BufEnd.getAsRegion();
879   if (!R)
880     return true; // cf top comment.
881 
882   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
883   if (!ER)
884     return true; // cf top comment.
885 
886   // FIXME: Does this crash when a non-standard definition
887   // of a library function is encountered?
888   assert(ER->getValueType() == C.getASTContext().CharTy &&
889          "IsFirstBufInBound should only be called with char* ElementRegions");
890 
891   // Get the size of the array.
892   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
893   SVal Extent =
894       svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
895   DefinedOrUnknownSVal ExtentSize = Extent.castAs<DefinedOrUnknownSVal>();
896 
897   // Get the index of the accessed element.
898   DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
899 
900   ProgramStateRef StInBound = state->assumeInBound(Idx, ExtentSize, true);
901 
902   return static_cast<bool>(StInBound);
903 }
904 
905 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
906                                                  ProgramStateRef state,
907                                                  const Expr *E, SVal V,
908                                                  bool IsSourceBuffer,
909                                                  const Expr *Size) {
910   Optional<Loc> L = V.getAs<Loc>();
911   if (!L)
912     return state;
913 
914   // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
915   // some assumptions about the value that CFRefCount can't. Even so, it should
916   // probably be refactored.
917   if (Optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
918     const MemRegion *R = MR->getRegion()->StripCasts();
919 
920     // Are we dealing with an ElementRegion?  If so, we should be invalidating
921     // the super-region.
922     if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
923       R = ER->getSuperRegion();
924       // FIXME: What about layers of ElementRegions?
925     }
926 
927     // Invalidate this region.
928     const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
929 
930     bool CausesPointerEscape = false;
931     RegionAndSymbolInvalidationTraits ITraits;
932     // Invalidate and escape only indirect regions accessible through the source
933     // buffer.
934     if (IsSourceBuffer) {
935       ITraits.setTrait(R->getBaseRegion(),
936                        RegionAndSymbolInvalidationTraits::TK_PreserveContents);
937       ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
938       CausesPointerEscape = true;
939     } else {
940       const MemRegion::Kind& K = R->getKind();
941       if (K == MemRegion::FieldRegionKind)
942         if (Size && IsFirstBufInBound(C, state, E, Size)) {
943           // If destination buffer is a field region and access is in bound,
944           // do not invalidate its super region.
945           ITraits.setTrait(
946               R,
947               RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
948         }
949     }
950 
951     return state->invalidateRegions(R, E, C.blockCount(), LCtx,
952                                     CausesPointerEscape, nullptr, nullptr,
953                                     &ITraits);
954   }
955 
956   // If we have a non-region value by chance, just remove the binding.
957   // FIXME: is this necessary or correct? This handles the non-Region
958   //  cases.  Is it ever valid to store to these?
959   return state->killBinding(*L);
960 }
961 
962 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
963                                      const MemRegion *MR) {
964   const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
965 
966   switch (MR->getKind()) {
967   case MemRegion::FunctionCodeRegionKind: {
968     const NamedDecl *FD = cast<FunctionCodeRegion>(MR)->getDecl();
969     if (FD)
970       os << "the address of the function '" << *FD << '\'';
971     else
972       os << "the address of a function";
973     return true;
974   }
975   case MemRegion::BlockCodeRegionKind:
976     os << "block text";
977     return true;
978   case MemRegion::BlockDataRegionKind:
979     os << "a block";
980     return true;
981   case MemRegion::CXXThisRegionKind:
982   case MemRegion::CXXTempObjectRegionKind:
983     os << "a C++ temp object of type " << TVR->getValueType().getAsString();
984     return true;
985   case MemRegion::VarRegionKind:
986     os << "a variable of type" << TVR->getValueType().getAsString();
987     return true;
988   case MemRegion::FieldRegionKind:
989     os << "a field of type " << TVR->getValueType().getAsString();
990     return true;
991   case MemRegion::ObjCIvarRegionKind:
992     os << "an instance variable of type " << TVR->getValueType().getAsString();
993     return true;
994   default:
995     return false;
996   }
997 }
998 
999 //===----------------------------------------------------------------------===//
1000 // evaluation of individual function calls.
1001 //===----------------------------------------------------------------------===//
1002 
1003 void CStringChecker::evalCopyCommon(CheckerContext &C,
1004                                     const CallExpr *CE,
1005                                     ProgramStateRef state,
1006                                     const Expr *Size, const Expr *Dest,
1007                                     const Expr *Source, bool Restricted,
1008                                     bool IsMempcpy) const {
1009   CurrentFunctionDescription = "memory copy function";
1010 
1011   // See if the size argument is zero.
1012   const LocationContext *LCtx = C.getLocationContext();
1013   SVal sizeVal = state->getSVal(Size, LCtx);
1014   QualType sizeTy = Size->getType();
1015 
1016   ProgramStateRef stateZeroSize, stateNonZeroSize;
1017   std::tie(stateZeroSize, stateNonZeroSize) =
1018     assumeZero(C, state, sizeVal, sizeTy);
1019 
1020   // Get the value of the Dest.
1021   SVal destVal = state->getSVal(Dest, LCtx);
1022 
1023   // If the size is zero, there won't be any actual memory access, so
1024   // just bind the return value to the destination buffer and return.
1025   if (stateZeroSize && !stateNonZeroSize) {
1026     stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
1027     C.addTransition(stateZeroSize);
1028     return;
1029   }
1030 
1031   // If the size can be nonzero, we have to check the other arguments.
1032   if (stateNonZeroSize) {
1033     state = stateNonZeroSize;
1034 
1035     // Ensure the destination is not null. If it is NULL there will be a
1036     // NULL pointer dereference.
1037     state = checkNonNull(C, state, Dest, destVal);
1038     if (!state)
1039       return;
1040 
1041     // Get the value of the Src.
1042     SVal srcVal = state->getSVal(Source, LCtx);
1043 
1044     // Ensure the source is not null. If it is NULL there will be a
1045     // NULL pointer dereference.
1046     state = checkNonNull(C, state, Source, srcVal);
1047     if (!state)
1048       return;
1049 
1050     // Ensure the accesses are valid and that the buffers do not overlap.
1051     const char * const writeWarning =
1052       "Memory copy function overflows destination buffer";
1053     state = CheckBufferAccess(C, state, Size, Dest, Source,
1054                               writeWarning, /* sourceWarning = */ nullptr);
1055     if (Restricted)
1056       state = CheckOverlap(C, state, Size, Dest, Source);
1057 
1058     if (!state)
1059       return;
1060 
1061     // If this is mempcpy, get the byte after the last byte copied and
1062     // bind the expr.
1063     if (IsMempcpy) {
1064       // Get the byte after the last byte copied.
1065       SValBuilder &SvalBuilder = C.getSValBuilder();
1066       ASTContext &Ctx = SvalBuilder.getContext();
1067       QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
1068       SVal DestRegCharVal =
1069           SvalBuilder.evalCast(destVal, CharPtrTy, Dest->getType());
1070       SVal lastElement = C.getSValBuilder().evalBinOp(
1071           state, BO_Add, DestRegCharVal, sizeVal, Dest->getType());
1072       // If we don't know how much we copied, we can at least
1073       // conjure a return value for later.
1074       if (lastElement.isUnknown())
1075         lastElement = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1076                                                           C.blockCount());
1077 
1078       // The byte after the last byte copied is the return value.
1079       state = state->BindExpr(CE, LCtx, lastElement);
1080     } else {
1081       // All other copies return the destination buffer.
1082       // (Well, bcopy() has a void return type, but this won't hurt.)
1083       state = state->BindExpr(CE, LCtx, destVal);
1084     }
1085 
1086     // Invalidate the destination (regular invalidation without pointer-escaping
1087     // the address of the top-level region).
1088     // FIXME: Even if we can't perfectly model the copy, we should see if we
1089     // can use LazyCompoundVals to copy the source values into the destination.
1090     // This would probably remove any existing bindings past the end of the
1091     // copied region, but that's still an improvement over blank invalidation.
1092     state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest),
1093                              /*IsSourceBuffer*/false, Size);
1094 
1095     // Invalidate the source (const-invalidation without const-pointer-escaping
1096     // the address of the top-level region).
1097     state = InvalidateBuffer(C, state, Source, C.getSVal(Source),
1098                              /*IsSourceBuffer*/true, nullptr);
1099 
1100     C.addTransition(state);
1101   }
1102 }
1103 
1104 
1105 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
1106   if (CE->getNumArgs() < 3)
1107     return;
1108 
1109   // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1110   // The return value is the address of the destination buffer.
1111   const Expr *Dest = CE->getArg(0);
1112   ProgramStateRef state = C.getState();
1113 
1114   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
1115 }
1116 
1117 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
1118   if (CE->getNumArgs() < 3)
1119     return;
1120 
1121   // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1122   // The return value is a pointer to the byte following the last written byte.
1123   const Expr *Dest = CE->getArg(0);
1124   ProgramStateRef state = C.getState();
1125 
1126   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
1127 }
1128 
1129 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
1130   if (CE->getNumArgs() < 3)
1131     return;
1132 
1133   // void *memmove(void *dst, const void *src, size_t n);
1134   // The return value is the address of the destination buffer.
1135   const Expr *Dest = CE->getArg(0);
1136   ProgramStateRef state = C.getState();
1137 
1138   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
1139 }
1140 
1141 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
1142   if (CE->getNumArgs() < 3)
1143     return;
1144 
1145   // void bcopy(const void *src, void *dst, size_t n);
1146   evalCopyCommon(C, CE, C.getState(),
1147                  CE->getArg(2), CE->getArg(1), CE->getArg(0));
1148 }
1149 
1150 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
1151   if (CE->getNumArgs() < 3)
1152     return;
1153 
1154   // int memcmp(const void *s1, const void *s2, size_t n);
1155   CurrentFunctionDescription = "memory comparison function";
1156 
1157   const Expr *Left = CE->getArg(0);
1158   const Expr *Right = CE->getArg(1);
1159   const Expr *Size = CE->getArg(2);
1160 
1161   ProgramStateRef state = C.getState();
1162   SValBuilder &svalBuilder = C.getSValBuilder();
1163 
1164   // See if the size argument is zero.
1165   const LocationContext *LCtx = C.getLocationContext();
1166   SVal sizeVal = state->getSVal(Size, LCtx);
1167   QualType sizeTy = Size->getType();
1168 
1169   ProgramStateRef stateZeroSize, stateNonZeroSize;
1170   std::tie(stateZeroSize, stateNonZeroSize) =
1171     assumeZero(C, state, sizeVal, sizeTy);
1172 
1173   // If the size can be zero, the result will be 0 in that case, and we don't
1174   // have to check either of the buffers.
1175   if (stateZeroSize) {
1176     state = stateZeroSize;
1177     state = state->BindExpr(CE, LCtx,
1178                             svalBuilder.makeZeroVal(CE->getType()));
1179     C.addTransition(state);
1180   }
1181 
1182   // If the size can be nonzero, we have to check the other arguments.
1183   if (stateNonZeroSize) {
1184     state = stateNonZeroSize;
1185     // If we know the two buffers are the same, we know the result is 0.
1186     // First, get the two buffers' addresses. Another checker will have already
1187     // made sure they're not undefined.
1188     DefinedOrUnknownSVal LV =
1189         state->getSVal(Left, LCtx).castAs<DefinedOrUnknownSVal>();
1190     DefinedOrUnknownSVal RV =
1191         state->getSVal(Right, LCtx).castAs<DefinedOrUnknownSVal>();
1192 
1193     // See if they are the same.
1194     DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1195     ProgramStateRef StSameBuf, StNotSameBuf;
1196     std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1197 
1198     // If the two arguments might be the same buffer, we know the result is 0,
1199     // and we only need to check one size.
1200     if (StSameBuf) {
1201       state = StSameBuf;
1202       state = CheckBufferAccess(C, state, Size, Left);
1203       if (state) {
1204         state = StSameBuf->BindExpr(CE, LCtx,
1205                                     svalBuilder.makeZeroVal(CE->getType()));
1206         C.addTransition(state);
1207       }
1208     }
1209 
1210     // If the two arguments might be different buffers, we have to check the
1211     // size of both of them.
1212     if (StNotSameBuf) {
1213       state = StNotSameBuf;
1214       state = CheckBufferAccess(C, state, Size, Left, Right);
1215       if (state) {
1216         // The return value is the comparison result, which we don't know.
1217         SVal CmpV = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
1218                                                  C.blockCount());
1219         state = state->BindExpr(CE, LCtx, CmpV);
1220         C.addTransition(state);
1221       }
1222     }
1223   }
1224 }
1225 
1226 void CStringChecker::evalstrLength(CheckerContext &C,
1227                                    const CallExpr *CE) const {
1228   if (CE->getNumArgs() < 1)
1229     return;
1230 
1231   // size_t strlen(const char *s);
1232   evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
1233 }
1234 
1235 void CStringChecker::evalstrnLength(CheckerContext &C,
1236                                     const CallExpr *CE) const {
1237   if (CE->getNumArgs() < 2)
1238     return;
1239 
1240   // size_t strnlen(const char *s, size_t maxlen);
1241   evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
1242 }
1243 
1244 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
1245                                          bool IsStrnlen) const {
1246   CurrentFunctionDescription = "string length function";
1247   ProgramStateRef state = C.getState();
1248   const LocationContext *LCtx = C.getLocationContext();
1249 
1250   if (IsStrnlen) {
1251     const Expr *maxlenExpr = CE->getArg(1);
1252     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1253 
1254     ProgramStateRef stateZeroSize, stateNonZeroSize;
1255     std::tie(stateZeroSize, stateNonZeroSize) =
1256       assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1257 
1258     // If the size can be zero, the result will be 0 in that case, and we don't
1259     // have to check the string itself.
1260     if (stateZeroSize) {
1261       SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1262       stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
1263       C.addTransition(stateZeroSize);
1264     }
1265 
1266     // If the size is GUARANTEED to be zero, we're done!
1267     if (!stateNonZeroSize)
1268       return;
1269 
1270     // Otherwise, record the assumption that the size is nonzero.
1271     state = stateNonZeroSize;
1272   }
1273 
1274   // Check that the string argument is non-null.
1275   const Expr *Arg = CE->getArg(0);
1276   SVal ArgVal = state->getSVal(Arg, LCtx);
1277 
1278   state = checkNonNull(C, state, Arg, ArgVal);
1279 
1280   if (!state)
1281     return;
1282 
1283   SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1284 
1285   // If the argument isn't a valid C string, there's no valid state to
1286   // transition to.
1287   if (strLength.isUndef())
1288     return;
1289 
1290   DefinedOrUnknownSVal result = UnknownVal();
1291 
1292   // If the check is for strnlen() then bind the return value to no more than
1293   // the maxlen value.
1294   if (IsStrnlen) {
1295     QualType cmpTy = C.getSValBuilder().getConditionType();
1296 
1297     // It's a little unfortunate to be getting this again,
1298     // but it's not that expensive...
1299     const Expr *maxlenExpr = CE->getArg(1);
1300     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1301 
1302     Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1303     Optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1304 
1305     if (strLengthNL && maxlenValNL) {
1306       ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1307 
1308       // Check if the strLength is greater than the maxlen.
1309       std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1310           C.getSValBuilder()
1311               .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1312               .castAs<DefinedOrUnknownSVal>());
1313 
1314       if (stateStringTooLong && !stateStringNotTooLong) {
1315         // If the string is longer than maxlen, return maxlen.
1316         result = *maxlenValNL;
1317       } else if (stateStringNotTooLong && !stateStringTooLong) {
1318         // If the string is shorter than maxlen, return its length.
1319         result = *strLengthNL;
1320       }
1321     }
1322 
1323     if (result.isUnknown()) {
1324       // If we don't have enough information for a comparison, there's
1325       // no guarantee the full string length will actually be returned.
1326       // All we know is the return value is the min of the string length
1327       // and the limit. This is better than nothing.
1328       result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1329                                                    C.blockCount());
1330       NonLoc resultNL = result.castAs<NonLoc>();
1331 
1332       if (strLengthNL) {
1333         state = state->assume(C.getSValBuilder().evalBinOpNN(
1334                                   state, BO_LE, resultNL, *strLengthNL, cmpTy)
1335                                   .castAs<DefinedOrUnknownSVal>(), true);
1336       }
1337 
1338       if (maxlenValNL) {
1339         state = state->assume(C.getSValBuilder().evalBinOpNN(
1340                                   state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1341                                   .castAs<DefinedOrUnknownSVal>(), true);
1342       }
1343     }
1344 
1345   } else {
1346     // This is a plain strlen(), not strnlen().
1347     result = strLength.castAs<DefinedOrUnknownSVal>();
1348 
1349     // If we don't know the length of the string, conjure a return
1350     // value, so it can be used in constraints, at least.
1351     if (result.isUnknown()) {
1352       result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1353                                                    C.blockCount());
1354     }
1355   }
1356 
1357   // Bind the return value.
1358   assert(!result.isUnknown() && "Should have conjured a value by now");
1359   state = state->BindExpr(CE, LCtx, result);
1360   C.addTransition(state);
1361 }
1362 
1363 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1364   if (CE->getNumArgs() < 2)
1365     return;
1366 
1367   // char *strcpy(char *restrict dst, const char *restrict src);
1368   evalStrcpyCommon(C, CE,
1369                    /* returnEnd = */ false,
1370                    /* isBounded = */ false,
1371                    /* isAppending = */ false);
1372 }
1373 
1374 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1375   if (CE->getNumArgs() < 3)
1376     return;
1377 
1378   // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1379   evalStrcpyCommon(C, CE,
1380                    /* returnEnd = */ false,
1381                    /* isBounded = */ true,
1382                    /* isAppending = */ false);
1383 }
1384 
1385 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1386   if (CE->getNumArgs() < 2)
1387     return;
1388 
1389   // char *stpcpy(char *restrict dst, const char *restrict src);
1390   evalStrcpyCommon(C, CE,
1391                    /* returnEnd = */ true,
1392                    /* isBounded = */ false,
1393                    /* isAppending = */ false);
1394 }
1395 
1396 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1397   if (CE->getNumArgs() < 2)
1398     return;
1399 
1400   //char *strcat(char *restrict s1, const char *restrict s2);
1401   evalStrcpyCommon(C, CE,
1402                    /* returnEnd = */ false,
1403                    /* isBounded = */ false,
1404                    /* isAppending = */ true);
1405 }
1406 
1407 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1408   if (CE->getNumArgs() < 3)
1409     return;
1410 
1411   //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1412   evalStrcpyCommon(C, CE,
1413                    /* returnEnd = */ false,
1414                    /* isBounded = */ true,
1415                    /* isAppending = */ true);
1416 }
1417 
1418 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1419                                       bool returnEnd, bool isBounded,
1420                                       bool isAppending) const {
1421   CurrentFunctionDescription = "string copy function";
1422   ProgramStateRef state = C.getState();
1423   const LocationContext *LCtx = C.getLocationContext();
1424 
1425   // Check that the destination is non-null.
1426   const Expr *Dst = CE->getArg(0);
1427   SVal DstVal = state->getSVal(Dst, LCtx);
1428 
1429   state = checkNonNull(C, state, Dst, DstVal);
1430   if (!state)
1431     return;
1432 
1433   // Check that the source is non-null.
1434   const Expr *srcExpr = CE->getArg(1);
1435   SVal srcVal = state->getSVal(srcExpr, LCtx);
1436   state = checkNonNull(C, state, srcExpr, srcVal);
1437   if (!state)
1438     return;
1439 
1440   // Get the string length of the source.
1441   SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1442 
1443   // If the source isn't a valid C string, give up.
1444   if (strLength.isUndef())
1445     return;
1446 
1447   SValBuilder &svalBuilder = C.getSValBuilder();
1448   QualType cmpTy = svalBuilder.getConditionType();
1449   QualType sizeTy = svalBuilder.getContext().getSizeType();
1450 
1451   // These two values allow checking two kinds of errors:
1452   // - actual overflows caused by a source that doesn't fit in the destination
1453   // - potential overflows caused by a bound that could exceed the destination
1454   SVal amountCopied = UnknownVal();
1455   SVal maxLastElementIndex = UnknownVal();
1456   const char *boundWarning = nullptr;
1457 
1458   // If the function is strncpy, strncat, etc... it is bounded.
1459   if (isBounded) {
1460     // Get the max number of characters to copy.
1461     const Expr *lenExpr = CE->getArg(2);
1462     SVal lenVal = state->getSVal(lenExpr, LCtx);
1463 
1464     // Protect against misdeclared strncpy().
1465     lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType());
1466 
1467     Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1468     Optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1469 
1470     // If we know both values, we might be able to figure out how much
1471     // we're copying.
1472     if (strLengthNL && lenValNL) {
1473       ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1474 
1475       // Check if the max number to copy is less than the length of the src.
1476       // If the bound is equal to the source length, strncpy won't null-
1477       // terminate the result!
1478       std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1479           svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1480               .castAs<DefinedOrUnknownSVal>());
1481 
1482       if (stateSourceTooLong && !stateSourceNotTooLong) {
1483         // Max number to copy is less than the length of the src, so the actual
1484         // strLength copied is the max number arg.
1485         state = stateSourceTooLong;
1486         amountCopied = lenVal;
1487 
1488       } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1489         // The source buffer entirely fits in the bound.
1490         state = stateSourceNotTooLong;
1491         amountCopied = strLength;
1492       }
1493     }
1494 
1495     // We still want to know if the bound is known to be too large.
1496     if (lenValNL) {
1497       if (isAppending) {
1498         // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1499 
1500         // Get the string length of the destination. If the destination is
1501         // memory that can't have a string length, we shouldn't be copying
1502         // into it anyway.
1503         SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1504         if (dstStrLength.isUndef())
1505           return;
1506 
1507         if (Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>()) {
1508           maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add,
1509                                                         *lenValNL,
1510                                                         *dstStrLengthNL,
1511                                                         sizeTy);
1512           boundWarning = "Size argument is greater than the free space in the "
1513                          "destination buffer";
1514         }
1515 
1516       } else {
1517         // For strncpy, this is just checking that lenVal <= sizeof(dst)
1518         // (Yes, strncpy and strncat differ in how they treat termination.
1519         // strncat ALWAYS terminates, but strncpy doesn't.)
1520 
1521         // We need a special case for when the copy size is zero, in which
1522         // case strncpy will do no work at all. Our bounds check uses n-1
1523         // as the last element accessed, so n == 0 is problematic.
1524         ProgramStateRef StateZeroSize, StateNonZeroSize;
1525         std::tie(StateZeroSize, StateNonZeroSize) =
1526           assumeZero(C, state, *lenValNL, sizeTy);
1527 
1528         // If the size is known to be zero, we're done.
1529         if (StateZeroSize && !StateNonZeroSize) {
1530           StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
1531           C.addTransition(StateZeroSize);
1532           return;
1533         }
1534 
1535         // Otherwise, go ahead and figure out the last element we'll touch.
1536         // We don't record the non-zero assumption here because we can't
1537         // be sure. We won't warn on a possible zero.
1538         NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
1539         maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1540                                                       one, sizeTy);
1541         boundWarning = "Size argument is greater than the length of the "
1542                        "destination buffer";
1543       }
1544     }
1545 
1546     // If we couldn't pin down the copy length, at least bound it.
1547     // FIXME: We should actually run this code path for append as well, but
1548     // right now it creates problems with constraints (since we can end up
1549     // trying to pass constraints from symbol to symbol).
1550     if (amountCopied.isUnknown() && !isAppending) {
1551       // Try to get a "hypothetical" string length symbol, which we can later
1552       // set as a real value if that turns out to be the case.
1553       amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1554       assert(!amountCopied.isUndef());
1555 
1556       if (Optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>()) {
1557         if (lenValNL) {
1558           // amountCopied <= lenVal
1559           SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1560                                                              *amountCopiedNL,
1561                                                              *lenValNL,
1562                                                              cmpTy);
1563           state = state->assume(
1564               copiedLessThanBound.castAs<DefinedOrUnknownSVal>(), true);
1565           if (!state)
1566             return;
1567         }
1568 
1569         if (strLengthNL) {
1570           // amountCopied <= strlen(source)
1571           SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1572                                                            *amountCopiedNL,
1573                                                            *strLengthNL,
1574                                                            cmpTy);
1575           state = state->assume(
1576               copiedLessThanSrc.castAs<DefinedOrUnknownSVal>(), true);
1577           if (!state)
1578             return;
1579         }
1580       }
1581     }
1582 
1583   } else {
1584     // The function isn't bounded. The amount copied should match the length
1585     // of the source buffer.
1586     amountCopied = strLength;
1587   }
1588 
1589   assert(state);
1590 
1591   // This represents the number of characters copied into the destination
1592   // buffer. (It may not actually be the strlen if the destination buffer
1593   // is not terminated.)
1594   SVal finalStrLength = UnknownVal();
1595 
1596   // If this is an appending function (strcat, strncat...) then set the
1597   // string length to strlen(src) + strlen(dst) since the buffer will
1598   // ultimately contain both.
1599   if (isAppending) {
1600     // Get the string length of the destination. If the destination is memory
1601     // that can't have a string length, we shouldn't be copying into it anyway.
1602     SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1603     if (dstStrLength.isUndef())
1604       return;
1605 
1606     Optional<NonLoc> srcStrLengthNL = amountCopied.getAs<NonLoc>();
1607     Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1608 
1609     // If we know both string lengths, we might know the final string length.
1610     if (srcStrLengthNL && dstStrLengthNL) {
1611       // Make sure the two lengths together don't overflow a size_t.
1612       state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1613       if (!state)
1614         return;
1615 
1616       finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1617                                                *dstStrLengthNL, sizeTy);
1618     }
1619 
1620     // If we couldn't get a single value for the final string length,
1621     // we can at least bound it by the individual lengths.
1622     if (finalStrLength.isUnknown()) {
1623       // Try to get a "hypothetical" string length symbol, which we can later
1624       // set as a real value if that turns out to be the case.
1625       finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1626       assert(!finalStrLength.isUndef());
1627 
1628       if (Optional<NonLoc> finalStrLengthNL = finalStrLength.getAs<NonLoc>()) {
1629         if (srcStrLengthNL) {
1630           // finalStrLength >= srcStrLength
1631           SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1632                                                         *finalStrLengthNL,
1633                                                         *srcStrLengthNL,
1634                                                         cmpTy);
1635           state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
1636                                 true);
1637           if (!state)
1638             return;
1639         }
1640 
1641         if (dstStrLengthNL) {
1642           // finalStrLength >= dstStrLength
1643           SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1644                                                       *finalStrLengthNL,
1645                                                       *dstStrLengthNL,
1646                                                       cmpTy);
1647           state =
1648               state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
1649           if (!state)
1650             return;
1651         }
1652       }
1653     }
1654 
1655   } else {
1656     // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1657     // the final string length will match the input string length.
1658     finalStrLength = amountCopied;
1659   }
1660 
1661   // The final result of the function will either be a pointer past the last
1662   // copied element, or a pointer to the start of the destination buffer.
1663   SVal Result = (returnEnd ? UnknownVal() : DstVal);
1664 
1665   assert(state);
1666 
1667   // If the destination is a MemRegion, try to check for a buffer overflow and
1668   // record the new string length.
1669   if (Optional<loc::MemRegionVal> dstRegVal =
1670           DstVal.getAs<loc::MemRegionVal>()) {
1671     QualType ptrTy = Dst->getType();
1672 
1673     // If we have an exact value on a bounded copy, use that to check for
1674     // overflows, rather than our estimate about how much is actually copied.
1675     if (boundWarning) {
1676       if (Optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
1677         SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1678                                                       *maxLastNL, ptrTy);
1679         state = CheckLocation(C, state, CE->getArg(2), maxLastElement,
1680                               boundWarning);
1681         if (!state)
1682           return;
1683       }
1684     }
1685 
1686     // Then, if the final length is known...
1687     if (Optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
1688       SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1689                                                  *knownStrLength, ptrTy);
1690 
1691       // ...and we haven't checked the bound, we'll check the actual copy.
1692       if (!boundWarning) {
1693         const char * const warningMsg =
1694           "String copy function overflows destination buffer";
1695         state = CheckLocation(C, state, Dst, lastElement, warningMsg);
1696         if (!state)
1697           return;
1698       }
1699 
1700       // If this is a stpcpy-style copy, the last element is the return value.
1701       if (returnEnd)
1702         Result = lastElement;
1703     }
1704 
1705     // Invalidate the destination (regular invalidation without pointer-escaping
1706     // the address of the top-level region). This must happen before we set the
1707     // C string length because invalidation will clear the length.
1708     // FIXME: Even if we can't perfectly model the copy, we should see if we
1709     // can use LazyCompoundVals to copy the source values into the destination.
1710     // This would probably remove any existing bindings past the end of the
1711     // string, but that's still an improvement over blank invalidation.
1712     state = InvalidateBuffer(C, state, Dst, *dstRegVal,
1713                              /*IsSourceBuffer*/false, nullptr);
1714 
1715     // Invalidate the source (const-invalidation without const-pointer-escaping
1716     // the address of the top-level region).
1717     state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true,
1718                              nullptr);
1719 
1720     // Set the C string length of the destination, if we know it.
1721     if (isBounded && !isAppending) {
1722       // strncpy is annoying in that it doesn't guarantee to null-terminate
1723       // the result string. If the original string didn't fit entirely inside
1724       // the bound (including the null-terminator), we don't know how long the
1725       // result is.
1726       if (amountCopied != strLength)
1727         finalStrLength = UnknownVal();
1728     }
1729     state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1730   }
1731 
1732   assert(state);
1733 
1734   // If this is a stpcpy-style copy, but we were unable to check for a buffer
1735   // overflow, we still need a result. Conjure a return value.
1736   if (returnEnd && Result.isUnknown()) {
1737     Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
1738   }
1739 
1740   // Set the return value.
1741   state = state->BindExpr(CE, LCtx, Result);
1742   C.addTransition(state);
1743 }
1744 
1745 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1746   if (CE->getNumArgs() < 2)
1747     return;
1748 
1749   //int strcmp(const char *s1, const char *s2);
1750   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1751 }
1752 
1753 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1754   if (CE->getNumArgs() < 3)
1755     return;
1756 
1757   //int strncmp(const char *s1, const char *s2, size_t n);
1758   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1759 }
1760 
1761 void CStringChecker::evalStrcasecmp(CheckerContext &C,
1762                                     const CallExpr *CE) const {
1763   if (CE->getNumArgs() < 2)
1764     return;
1765 
1766   //int strcasecmp(const char *s1, const char *s2);
1767   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1768 }
1769 
1770 void CStringChecker::evalStrncasecmp(CheckerContext &C,
1771                                      const CallExpr *CE) const {
1772   if (CE->getNumArgs() < 3)
1773     return;
1774 
1775   //int strncasecmp(const char *s1, const char *s2, size_t n);
1776   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1777 }
1778 
1779 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1780                                       bool isBounded, bool ignoreCase) const {
1781   CurrentFunctionDescription = "string comparison function";
1782   ProgramStateRef state = C.getState();
1783   const LocationContext *LCtx = C.getLocationContext();
1784 
1785   // Check that the first string is non-null
1786   const Expr *s1 = CE->getArg(0);
1787   SVal s1Val = state->getSVal(s1, LCtx);
1788   state = checkNonNull(C, state, s1, s1Val);
1789   if (!state)
1790     return;
1791 
1792   // Check that the second string is non-null.
1793   const Expr *s2 = CE->getArg(1);
1794   SVal s2Val = state->getSVal(s2, LCtx);
1795   state = checkNonNull(C, state, s2, s2Val);
1796   if (!state)
1797     return;
1798 
1799   // Get the string length of the first string or give up.
1800   SVal s1Length = getCStringLength(C, state, s1, s1Val);
1801   if (s1Length.isUndef())
1802     return;
1803 
1804   // Get the string length of the second string or give up.
1805   SVal s2Length = getCStringLength(C, state, s2, s2Val);
1806   if (s2Length.isUndef())
1807     return;
1808 
1809   // If we know the two buffers are the same, we know the result is 0.
1810   // First, get the two buffers' addresses. Another checker will have already
1811   // made sure they're not undefined.
1812   DefinedOrUnknownSVal LV = s1Val.castAs<DefinedOrUnknownSVal>();
1813   DefinedOrUnknownSVal RV = s2Val.castAs<DefinedOrUnknownSVal>();
1814 
1815   // See if they are the same.
1816   SValBuilder &svalBuilder = C.getSValBuilder();
1817   DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1818   ProgramStateRef StSameBuf, StNotSameBuf;
1819   std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1820 
1821   // If the two arguments might be the same buffer, we know the result is 0,
1822   // and we only need to check one size.
1823   if (StSameBuf) {
1824     StSameBuf = StSameBuf->BindExpr(CE, LCtx,
1825                                     svalBuilder.makeZeroVal(CE->getType()));
1826     C.addTransition(StSameBuf);
1827 
1828     // If the two arguments are GUARANTEED to be the same, we're done!
1829     if (!StNotSameBuf)
1830       return;
1831   }
1832 
1833   assert(StNotSameBuf);
1834   state = StNotSameBuf;
1835 
1836   // At this point we can go about comparing the two buffers.
1837   // For now, we only do this if they're both known string literals.
1838 
1839   // Attempt to extract string literals from both expressions.
1840   const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1841   const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1842   bool canComputeResult = false;
1843   SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
1844                                                 C.blockCount());
1845 
1846   if (s1StrLiteral && s2StrLiteral) {
1847     StringRef s1StrRef = s1StrLiteral->getString();
1848     StringRef s2StrRef = s2StrLiteral->getString();
1849 
1850     if (isBounded) {
1851       // Get the max number of characters to compare.
1852       const Expr *lenExpr = CE->getArg(2);
1853       SVal lenVal = state->getSVal(lenExpr, LCtx);
1854 
1855       // If the length is known, we can get the right substrings.
1856       if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
1857         // Create substrings of each to compare the prefix.
1858         s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
1859         s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
1860         canComputeResult = true;
1861       }
1862     } else {
1863       // This is a normal, unbounded strcmp.
1864       canComputeResult = true;
1865     }
1866 
1867     if (canComputeResult) {
1868       // Real strcmp stops at null characters.
1869       size_t s1Term = s1StrRef.find('\0');
1870       if (s1Term != StringRef::npos)
1871         s1StrRef = s1StrRef.substr(0, s1Term);
1872 
1873       size_t s2Term = s2StrRef.find('\0');
1874       if (s2Term != StringRef::npos)
1875         s2StrRef = s2StrRef.substr(0, s2Term);
1876 
1877       // Use StringRef's comparison methods to compute the actual result.
1878       int compareRes = ignoreCase ? s1StrRef.compare_lower(s2StrRef)
1879                                   : s1StrRef.compare(s2StrRef);
1880 
1881       // The strcmp function returns an integer greater than, equal to, or less
1882       // than zero, [c11, p7.24.4.2].
1883       if (compareRes == 0) {
1884         resultVal = svalBuilder.makeIntVal(compareRes, CE->getType());
1885       }
1886       else {
1887         DefinedSVal zeroVal = svalBuilder.makeIntVal(0, CE->getType());
1888         // Constrain strcmp's result range based on the result of StringRef's
1889         // comparison methods.
1890         BinaryOperatorKind op = (compareRes == 1) ? BO_GT : BO_LT;
1891         SVal compareWithZero =
1892           svalBuilder.evalBinOp(state, op, resultVal, zeroVal,
1893                                 svalBuilder.getConditionType());
1894         DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();
1895         state = state->assume(compareWithZeroVal, true);
1896       }
1897     }
1898   }
1899 
1900   state = state->BindExpr(CE, LCtx, resultVal);
1901 
1902   // Record this as a possible path.
1903   C.addTransition(state);
1904 }
1905 
1906 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
1907   //char *strsep(char **stringp, const char *delim);
1908   if (CE->getNumArgs() < 2)
1909     return;
1910 
1911   // Sanity: does the search string parameter match the return type?
1912   const Expr *SearchStrPtr = CE->getArg(0);
1913   QualType CharPtrTy = SearchStrPtr->getType()->getPointeeType();
1914   if (CharPtrTy.isNull() ||
1915       CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType())
1916     return;
1917 
1918   CurrentFunctionDescription = "strsep()";
1919   ProgramStateRef State = C.getState();
1920   const LocationContext *LCtx = C.getLocationContext();
1921 
1922   // Check that the search string pointer is non-null (though it may point to
1923   // a null string).
1924   SVal SearchStrVal = State->getSVal(SearchStrPtr, LCtx);
1925   State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
1926   if (!State)
1927     return;
1928 
1929   // Check that the delimiter string is non-null.
1930   const Expr *DelimStr = CE->getArg(1);
1931   SVal DelimStrVal = State->getSVal(DelimStr, LCtx);
1932   State = checkNonNull(C, State, DelimStr, DelimStrVal);
1933   if (!State)
1934     return;
1935 
1936   SValBuilder &SVB = C.getSValBuilder();
1937   SVal Result;
1938   if (Optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
1939     // Get the current value of the search string pointer, as a char*.
1940     Result = State->getSVal(*SearchStrLoc, CharPtrTy);
1941 
1942     // Invalidate the search string, representing the change of one delimiter
1943     // character to NUL.
1944     State = InvalidateBuffer(C, State, SearchStrPtr, Result,
1945                              /*IsSourceBuffer*/false, nullptr);
1946 
1947     // Overwrite the search string pointer. The new value is either an address
1948     // further along in the same string, or NULL if there are no more tokens.
1949     State = State->bindLoc(*SearchStrLoc,
1950                            SVB.conjureSymbolVal(getTag(),
1951                                                 CE,
1952                                                 LCtx,
1953                                                 CharPtrTy,
1954                                                 C.blockCount()),
1955                            LCtx);
1956   } else {
1957     assert(SearchStrVal.isUnknown());
1958     // Conjure a symbolic value. It's the best we can do.
1959     Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
1960   }
1961 
1962   // Set the return value, and finish.
1963   State = State->BindExpr(CE, LCtx, Result);
1964   C.addTransition(State);
1965 }
1966 
1967 // These should probably be moved into a C++ standard library checker.
1968 void CStringChecker::evalStdCopy(CheckerContext &C, const CallExpr *CE) const {
1969   evalStdCopyCommon(C, CE);
1970 }
1971 
1972 void CStringChecker::evalStdCopyBackward(CheckerContext &C,
1973                                          const CallExpr *CE) const {
1974   evalStdCopyCommon(C, CE);
1975 }
1976 
1977 void CStringChecker::evalStdCopyCommon(CheckerContext &C,
1978                                        const CallExpr *CE) const {
1979   if (CE->getNumArgs() < 3)
1980     return;
1981 
1982   ProgramStateRef State = C.getState();
1983 
1984   const LocationContext *LCtx = C.getLocationContext();
1985 
1986   // template <class _InputIterator, class _OutputIterator>
1987   // _OutputIterator
1988   // copy(_InputIterator __first, _InputIterator __last,
1989   //        _OutputIterator __result)
1990 
1991   // Invalidate the destination buffer
1992   const Expr *Dst = CE->getArg(2);
1993   SVal DstVal = State->getSVal(Dst, LCtx);
1994   State = InvalidateBuffer(C, State, Dst, DstVal, /*IsSource=*/false,
1995                            /*Size=*/nullptr);
1996 
1997   SValBuilder &SVB = C.getSValBuilder();
1998 
1999   SVal ResultVal = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
2000   State = State->BindExpr(CE, LCtx, ResultVal);
2001 
2002   C.addTransition(State);
2003 }
2004 
2005 void CStringChecker::evalMemset(CheckerContext &C, const CallExpr *CE) const {
2006   if (CE->getNumArgs() != 3)
2007     return;
2008 
2009   CurrentFunctionDescription = "memory set function";
2010 
2011   const Expr *Mem = CE->getArg(0);
2012   const Expr *Size = CE->getArg(2);
2013   ProgramStateRef State = C.getState();
2014 
2015   // See if the size argument is zero.
2016   const LocationContext *LCtx = C.getLocationContext();
2017   SVal SizeVal = State->getSVal(Size, LCtx);
2018   QualType SizeTy = Size->getType();
2019 
2020   ProgramStateRef StateZeroSize, StateNonZeroSize;
2021   std::tie(StateZeroSize, StateNonZeroSize) =
2022     assumeZero(C, State, SizeVal, SizeTy);
2023 
2024   // Get the value of the memory area.
2025   SVal MemVal = State->getSVal(Mem, LCtx);
2026 
2027   // If the size is zero, there won't be any actual memory access, so
2028   // just bind the return value to the Mem buffer and return.
2029   if (StateZeroSize && !StateNonZeroSize) {
2030     StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, MemVal);
2031     C.addTransition(StateZeroSize);
2032     return;
2033   }
2034 
2035   // Ensure the memory area is not null.
2036   // If it is NULL there will be a NULL pointer dereference.
2037   State = checkNonNull(C, StateNonZeroSize, Mem, MemVal);
2038   if (!State)
2039     return;
2040 
2041   State = CheckBufferAccess(C, State, Size, Mem);
2042   if (!State)
2043     return;
2044   State = InvalidateBuffer(C, State, Mem, C.getSVal(Mem),
2045                            /*IsSourceBuffer*/false, Size);
2046   if (!State)
2047     return;
2048 
2049   State = State->BindExpr(CE, LCtx, MemVal);
2050   C.addTransition(State);
2051 }
2052 
2053 static bool isCPPStdLibraryFunction(const FunctionDecl *FD, StringRef Name) {
2054   IdentifierInfo *II = FD->getIdentifier();
2055   if (!II)
2056     return false;
2057 
2058   if (!AnalysisDeclContext::isInStdNamespace(FD))
2059     return false;
2060 
2061   if (II->getName().equals(Name))
2062     return true;
2063 
2064   return false;
2065 }
2066 //===----------------------------------------------------------------------===//
2067 // The driver method, and other Checker callbacks.
2068 //===----------------------------------------------------------------------===//
2069 
2070 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
2071   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
2072 
2073   if (!FDecl)
2074     return false;
2075 
2076   // FIXME: Poorly-factored string switches are slow.
2077   FnCheck evalFunction = nullptr;
2078   if (C.isCLibraryFunction(FDecl, "memcpy"))
2079     evalFunction =  &CStringChecker::evalMemcpy;
2080   else if (C.isCLibraryFunction(FDecl, "mempcpy"))
2081     evalFunction =  &CStringChecker::evalMempcpy;
2082   else if (C.isCLibraryFunction(FDecl, "memcmp"))
2083     evalFunction =  &CStringChecker::evalMemcmp;
2084   else if (C.isCLibraryFunction(FDecl, "memmove"))
2085     evalFunction =  &CStringChecker::evalMemmove;
2086   else if (C.isCLibraryFunction(FDecl, "memset"))
2087     evalFunction =  &CStringChecker::evalMemset;
2088   else if (C.isCLibraryFunction(FDecl, "strcpy"))
2089     evalFunction =  &CStringChecker::evalStrcpy;
2090   else if (C.isCLibraryFunction(FDecl, "strncpy"))
2091     evalFunction =  &CStringChecker::evalStrncpy;
2092   else if (C.isCLibraryFunction(FDecl, "stpcpy"))
2093     evalFunction =  &CStringChecker::evalStpcpy;
2094   else if (C.isCLibraryFunction(FDecl, "strcat"))
2095     evalFunction =  &CStringChecker::evalStrcat;
2096   else if (C.isCLibraryFunction(FDecl, "strncat"))
2097     evalFunction =  &CStringChecker::evalStrncat;
2098   else if (C.isCLibraryFunction(FDecl, "strlen"))
2099     evalFunction =  &CStringChecker::evalstrLength;
2100   else if (C.isCLibraryFunction(FDecl, "strnlen"))
2101     evalFunction =  &CStringChecker::evalstrnLength;
2102   else if (C.isCLibraryFunction(FDecl, "strcmp"))
2103     evalFunction =  &CStringChecker::evalStrcmp;
2104   else if (C.isCLibraryFunction(FDecl, "strncmp"))
2105     evalFunction =  &CStringChecker::evalStrncmp;
2106   else if (C.isCLibraryFunction(FDecl, "strcasecmp"))
2107     evalFunction =  &CStringChecker::evalStrcasecmp;
2108   else if (C.isCLibraryFunction(FDecl, "strncasecmp"))
2109     evalFunction =  &CStringChecker::evalStrncasecmp;
2110   else if (C.isCLibraryFunction(FDecl, "strsep"))
2111     evalFunction =  &CStringChecker::evalStrsep;
2112   else if (C.isCLibraryFunction(FDecl, "bcopy"))
2113     evalFunction =  &CStringChecker::evalBcopy;
2114   else if (C.isCLibraryFunction(FDecl, "bcmp"))
2115     evalFunction =  &CStringChecker::evalMemcmp;
2116   else if (isCPPStdLibraryFunction(FDecl, "copy"))
2117     evalFunction =  &CStringChecker::evalStdCopy;
2118   else if (isCPPStdLibraryFunction(FDecl, "copy_backward"))
2119     evalFunction =  &CStringChecker::evalStdCopyBackward;
2120 
2121   // If the callee isn't a string function, let another checker handle it.
2122   if (!evalFunction)
2123     return false;
2124 
2125   // Check and evaluate the call.
2126   (this->*evalFunction)(C, CE);
2127 
2128   // If the evaluate call resulted in no change, chain to the next eval call
2129   // handler.
2130   // Note, the custom CString evaluation calls assume that basic safety
2131   // properties are held. However, if the user chooses to turn off some of these
2132   // checks, we ignore the issues and leave the call evaluation to a generic
2133   // handler.
2134   return C.isDifferent();
2135 }
2136 
2137 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2138   // Record string length for char a[] = "abc";
2139   ProgramStateRef state = C.getState();
2140 
2141   for (const auto *I : DS->decls()) {
2142     const VarDecl *D = dyn_cast<VarDecl>(I);
2143     if (!D)
2144       continue;
2145 
2146     // FIXME: Handle array fields of structs.
2147     if (!D->getType()->isArrayType())
2148       continue;
2149 
2150     const Expr *Init = D->getInit();
2151     if (!Init)
2152       continue;
2153     if (!isa<StringLiteral>(Init))
2154       continue;
2155 
2156     Loc VarLoc = state->getLValue(D, C.getLocationContext());
2157     const MemRegion *MR = VarLoc.getAsRegion();
2158     if (!MR)
2159       continue;
2160 
2161     SVal StrVal = C.getSVal(Init);
2162     assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2163     DefinedOrUnknownSVal strLength =
2164         getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
2165 
2166     state = state->set<CStringLength>(MR, strLength);
2167   }
2168 
2169   C.addTransition(state);
2170 }
2171 
2172 ProgramStateRef
2173 CStringChecker::checkRegionChanges(ProgramStateRef state,
2174                                    const InvalidatedSymbols *,
2175                                    ArrayRef<const MemRegion *> ExplicitRegions,
2176                                    ArrayRef<const MemRegion *> Regions,
2177                                    const LocationContext *LCtx,
2178                                    const CallEvent *Call) const {
2179   CStringLengthTy Entries = state->get<CStringLength>();
2180   if (Entries.isEmpty())
2181     return state;
2182 
2183   llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2184   llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2185 
2186   // First build sets for the changed regions and their super-regions.
2187   for (ArrayRef<const MemRegion *>::iterator
2188        I = Regions.begin(), E = Regions.end(); I != E; ++I) {
2189     const MemRegion *MR = *I;
2190     Invalidated.insert(MR);
2191 
2192     SuperRegions.insert(MR);
2193     while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2194       MR = SR->getSuperRegion();
2195       SuperRegions.insert(MR);
2196     }
2197   }
2198 
2199   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2200 
2201   // Then loop over the entries in the current state.
2202   for (CStringLengthTy::iterator I = Entries.begin(),
2203        E = Entries.end(); I != E; ++I) {
2204     const MemRegion *MR = I.getKey();
2205 
2206     // Is this entry for a super-region of a changed region?
2207     if (SuperRegions.count(MR)) {
2208       Entries = F.remove(Entries, MR);
2209       continue;
2210     }
2211 
2212     // Is this entry for a sub-region of a changed region?
2213     const MemRegion *Super = MR;
2214     while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2215       Super = SR->getSuperRegion();
2216       if (Invalidated.count(Super)) {
2217         Entries = F.remove(Entries, MR);
2218         break;
2219       }
2220     }
2221   }
2222 
2223   return state->set<CStringLength>(Entries);
2224 }
2225 
2226 void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2227                                       SymbolReaper &SR) const {
2228   // Mark all symbols in our string length map as valid.
2229   CStringLengthTy Entries = state->get<CStringLength>();
2230 
2231   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2232        I != E; ++I) {
2233     SVal Len = I.getData();
2234 
2235     for (SymExpr::symbol_iterator si = Len.symbol_begin(),
2236                                   se = Len.symbol_end(); si != se; ++si)
2237       SR.markInUse(*si);
2238   }
2239 }
2240 
2241 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2242                                       CheckerContext &C) const {
2243   if (!SR.hasDeadSymbols())
2244     return;
2245 
2246   ProgramStateRef state = C.getState();
2247   CStringLengthTy Entries = state->get<CStringLength>();
2248   if (Entries.isEmpty())
2249     return;
2250 
2251   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2252   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2253        I != E; ++I) {
2254     SVal Len = I.getData();
2255     if (SymbolRef Sym = Len.getAsSymbol()) {
2256       if (SR.isDead(Sym))
2257         Entries = F.remove(Entries, I.getKey());
2258     }
2259   }
2260 
2261   state = state->set<CStringLength>(Entries);
2262   C.addTransition(state);
2263 }
2264 
2265 #define REGISTER_CHECKER(name)                                                 \
2266   void ento::register##name(CheckerManager &mgr) {                             \
2267     CStringChecker *checker = mgr.registerChecker<CStringChecker>();           \
2268     checker->Filter.Check##name = true;                                        \
2269     checker->Filter.CheckName##name = mgr.getCurrentCheckName();               \
2270   }
2271 
2272 REGISTER_CHECKER(CStringNullArg)
2273 REGISTER_CHECKER(CStringOutOfBounds)
2274 REGISTER_CHECKER(CStringBufferOverlap)
2275 REGISTER_CHECKER(CStringNotNullTerm)
2276 
2277 void ento::registerCStringCheckerBasic(CheckerManager &Mgr) {
2278   registerCStringNullArg(Mgr);
2279 }
2280