xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp (revision 535bbcccb12b3a98bd53b68fef678e38814dbb0c)
1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This defines CStringChecker, which is an assortment of checks on calls
11 // to functions in <string.h>.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ClangSACheckers.h"
16 #include "InterCheckerAPI.h"
17 #include "clang/Basic/CharInfo.h"
18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
19 #include "clang/StaticAnalyzer/Core/Checker.h"
20 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 using namespace clang;
29 using namespace ento;
30 
31 namespace {
32 class CStringChecker : public Checker< eval::Call,
33                                          check::PreStmt<DeclStmt>,
34                                          check::LiveSymbols,
35                                          check::DeadSymbols,
36                                          check::RegionChanges
37                                          > {
38   mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
39       BT_NotCString, BT_AdditionOverflow;
40 
41   mutable const char *CurrentFunctionDescription;
42 
43 public:
44   /// The filter is used to filter out the diagnostics which are not enabled by
45   /// the user.
46   struct CStringChecksFilter {
47     DefaultBool CheckCStringNullArg;
48     DefaultBool CheckCStringOutOfBounds;
49     DefaultBool CheckCStringBufferOverlap;
50     DefaultBool CheckCStringNotNullTerm;
51 
52     CheckName CheckNameCStringNullArg;
53     CheckName CheckNameCStringOutOfBounds;
54     CheckName CheckNameCStringBufferOverlap;
55     CheckName CheckNameCStringNotNullTerm;
56   };
57 
58   CStringChecksFilter Filter;
59 
60   static void *getTag() { static int tag; return &tag; }
61 
62   bool evalCall(const CallExpr *CE, CheckerContext &C) const;
63   void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
64   void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
65   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
66   bool wantsRegionChangeUpdate(ProgramStateRef state) const;
67 
68   ProgramStateRef
69     checkRegionChanges(ProgramStateRef state,
70                        const InvalidatedSymbols *,
71                        ArrayRef<const MemRegion *> ExplicitRegions,
72                        ArrayRef<const MemRegion *> Regions,
73                        const CallEvent *Call) const;
74 
75   typedef void (CStringChecker::*FnCheck)(CheckerContext &,
76                                           const CallExpr *) const;
77 
78   void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
79   void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
80   void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
81   void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
82   void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
83                       ProgramStateRef state,
84                       const Expr *Size,
85                       const Expr *Source,
86                       const Expr *Dest,
87                       bool Restricted = false,
88                       bool IsMempcpy = false) const;
89 
90   void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
91 
92   void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
93   void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
94   void evalstrLengthCommon(CheckerContext &C,
95                            const CallExpr *CE,
96                            bool IsStrnlen = false) const;
97 
98   void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
99   void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
100   void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
101   void evalStrcpyCommon(CheckerContext &C,
102                         const CallExpr *CE,
103                         bool returnEnd,
104                         bool isBounded,
105                         bool isAppending) const;
106 
107   void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
108   void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
109 
110   void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
111   void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
112   void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
113   void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
114   void evalStrcmpCommon(CheckerContext &C,
115                         const CallExpr *CE,
116                         bool isBounded = false,
117                         bool ignoreCase = false) const;
118 
119   void evalStrsep(CheckerContext &C, const CallExpr *CE) const;
120 
121   // Utility methods
122   std::pair<ProgramStateRef , ProgramStateRef >
123   static assumeZero(CheckerContext &C,
124                     ProgramStateRef state, SVal V, QualType Ty);
125 
126   static ProgramStateRef setCStringLength(ProgramStateRef state,
127                                               const MemRegion *MR,
128                                               SVal strLength);
129   static SVal getCStringLengthForRegion(CheckerContext &C,
130                                         ProgramStateRef &state,
131                                         const Expr *Ex,
132                                         const MemRegion *MR,
133                                         bool hypothetical);
134   SVal getCStringLength(CheckerContext &C,
135                         ProgramStateRef &state,
136                         const Expr *Ex,
137                         SVal Buf,
138                         bool hypothetical = false) const;
139 
140   const StringLiteral *getCStringLiteral(CheckerContext &C,
141                                          ProgramStateRef &state,
142                                          const Expr *expr,
143                                          SVal val) const;
144 
145   static ProgramStateRef InvalidateBuffer(CheckerContext &C,
146                                           ProgramStateRef state,
147                                           const Expr *Ex, SVal V,
148                                           bool IsSourceBuffer);
149 
150   static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
151                               const MemRegion *MR);
152 
153   // Re-usable checks
154   ProgramStateRef checkNonNull(CheckerContext &C,
155                                    ProgramStateRef state,
156                                    const Expr *S,
157                                    SVal l) const;
158   ProgramStateRef CheckLocation(CheckerContext &C,
159                                     ProgramStateRef state,
160                                     const Expr *S,
161                                     SVal l,
162                                     const char *message = NULL) const;
163   ProgramStateRef CheckBufferAccess(CheckerContext &C,
164                                         ProgramStateRef state,
165                                         const Expr *Size,
166                                         const Expr *FirstBuf,
167                                         const Expr *SecondBuf,
168                                         const char *firstMessage = NULL,
169                                         const char *secondMessage = NULL,
170                                         bool WarnAboutSize = false) const;
171 
172   ProgramStateRef CheckBufferAccess(CheckerContext &C,
173                                         ProgramStateRef state,
174                                         const Expr *Size,
175                                         const Expr *Buf,
176                                         const char *message = NULL,
177                                         bool WarnAboutSize = false) const {
178     // This is a convenience override.
179     return CheckBufferAccess(C, state, Size, Buf, NULL, message, NULL,
180                              WarnAboutSize);
181   }
182   ProgramStateRef CheckOverlap(CheckerContext &C,
183                                    ProgramStateRef state,
184                                    const Expr *Size,
185                                    const Expr *First,
186                                    const Expr *Second) const;
187   void emitOverlapBug(CheckerContext &C,
188                       ProgramStateRef state,
189                       const Stmt *First,
190                       const Stmt *Second) const;
191 
192   ProgramStateRef checkAdditionOverflow(CheckerContext &C,
193                                             ProgramStateRef state,
194                                             NonLoc left,
195                                             NonLoc right) const;
196 };
197 
198 } //end anonymous namespace
199 
200 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
201 
202 //===----------------------------------------------------------------------===//
203 // Individual checks and utility methods.
204 //===----------------------------------------------------------------------===//
205 
206 std::pair<ProgramStateRef , ProgramStateRef >
207 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
208                            QualType Ty) {
209   Optional<DefinedSVal> val = V.getAs<DefinedSVal>();
210   if (!val)
211     return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
212 
213   SValBuilder &svalBuilder = C.getSValBuilder();
214   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
215   return state->assume(svalBuilder.evalEQ(state, *val, zero));
216 }
217 
218 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
219                                             ProgramStateRef state,
220                                             const Expr *S, SVal l) const {
221   // If a previous check has failed, propagate the failure.
222   if (!state)
223     return NULL;
224 
225   ProgramStateRef stateNull, stateNonNull;
226   std::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
227 
228   if (stateNull && !stateNonNull) {
229     if (!Filter.CheckCStringNullArg)
230       return NULL;
231 
232     ExplodedNode *N = C.generateSink(stateNull);
233     if (!N)
234       return NULL;
235 
236     if (!BT_Null)
237       BT_Null.reset(new BuiltinBug(
238           Filter.CheckNameCStringNullArg, categories::UnixAPI,
239           "Null pointer argument in call to byte string function"));
240 
241     SmallString<80> buf;
242     llvm::raw_svector_ostream os(buf);
243     assert(CurrentFunctionDescription);
244     os << "Null pointer argument in call to " << CurrentFunctionDescription;
245 
246     // Generate a report for this bug.
247     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
248     BugReport *report = new BugReport(*BT, os.str(), N);
249 
250     report->addRange(S->getSourceRange());
251     bugreporter::trackNullOrUndefValue(N, S, *report);
252     C.emitReport(report);
253     return NULL;
254   }
255 
256   // From here on, assume that the value is non-null.
257   assert(stateNonNull);
258   return stateNonNull;
259 }
260 
261 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
262 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
263                                              ProgramStateRef state,
264                                              const Expr *S, SVal l,
265                                              const char *warningMsg) const {
266   // If a previous check has failed, propagate the failure.
267   if (!state)
268     return NULL;
269 
270   // Check for out of bound array element access.
271   const MemRegion *R = l.getAsRegion();
272   if (!R)
273     return state;
274 
275   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
276   if (!ER)
277     return state;
278 
279   assert(ER->getValueType() == C.getASTContext().CharTy &&
280     "CheckLocation should only be called with char* ElementRegions");
281 
282   // Get the size of the array.
283   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
284   SValBuilder &svalBuilder = C.getSValBuilder();
285   SVal Extent =
286     svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
287   DefinedOrUnknownSVal Size = Extent.castAs<DefinedOrUnknownSVal>();
288 
289   // Get the index of the accessed element.
290   DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
291 
292   ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true);
293   ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false);
294   if (StOutBound && !StInBound) {
295     ExplodedNode *N = C.generateSink(StOutBound);
296     if (!N)
297       return NULL;
298 
299     if (!BT_Bounds) {
300       BT_Bounds.reset(new BuiltinBug(
301           Filter.CheckNameCStringOutOfBounds, "Out-of-bound array access",
302           "Byte string function accesses out-of-bound array element"));
303     }
304     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get());
305 
306     // Generate a report for this bug.
307     BugReport *report;
308     if (warningMsg) {
309       report = new BugReport(*BT, warningMsg, N);
310     } else {
311       assert(CurrentFunctionDescription);
312       assert(CurrentFunctionDescription[0] != '\0');
313 
314       SmallString<80> buf;
315       llvm::raw_svector_ostream os(buf);
316       os << toUppercase(CurrentFunctionDescription[0])
317          << &CurrentFunctionDescription[1]
318          << " accesses out-of-bound array element";
319       report = new BugReport(*BT, os.str(), N);
320     }
321 
322     // FIXME: It would be nice to eventually make this diagnostic more clear,
323     // e.g., by referencing the original declaration or by saying *why* this
324     // reference is outside the range.
325 
326     report->addRange(S->getSourceRange());
327     C.emitReport(report);
328     return NULL;
329   }
330 
331   // Array bound check succeeded.  From this point forward the array bound
332   // should always succeed.
333   return StInBound;
334 }
335 
336 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C,
337                                                  ProgramStateRef state,
338                                                  const Expr *Size,
339                                                  const Expr *FirstBuf,
340                                                  const Expr *SecondBuf,
341                                                  const char *firstMessage,
342                                                  const char *secondMessage,
343                                                  bool WarnAboutSize) const {
344   // If a previous check has failed, propagate the failure.
345   if (!state)
346     return NULL;
347 
348   SValBuilder &svalBuilder = C.getSValBuilder();
349   ASTContext &Ctx = svalBuilder.getContext();
350   const LocationContext *LCtx = C.getLocationContext();
351 
352   QualType sizeTy = Size->getType();
353   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
354 
355   // Check that the first buffer is non-null.
356   SVal BufVal = state->getSVal(FirstBuf, LCtx);
357   state = checkNonNull(C, state, FirstBuf, BufVal);
358   if (!state)
359     return NULL;
360 
361   // If out-of-bounds checking is turned off, skip the rest.
362   if (!Filter.CheckCStringOutOfBounds)
363     return state;
364 
365   // Get the access length and make sure it is known.
366   // FIXME: This assumes the caller has already checked that the access length
367   // is positive. And that it's unsigned.
368   SVal LengthVal = state->getSVal(Size, LCtx);
369   Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
370   if (!Length)
371     return state;
372 
373   // Compute the offset of the last element to be accessed: size-1.
374   NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
375   NonLoc LastOffset = svalBuilder
376       .evalBinOpNN(state, BO_Sub, *Length, One, sizeTy).castAs<NonLoc>();
377 
378   // Check that the first buffer is sufficiently long.
379   SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
380   if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
381     const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf);
382 
383     SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
384                                           LastOffset, PtrTy);
385     state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage);
386 
387     // If the buffer isn't large enough, abort.
388     if (!state)
389       return NULL;
390   }
391 
392   // If there's a second buffer, check it as well.
393   if (SecondBuf) {
394     BufVal = state->getSVal(SecondBuf, LCtx);
395     state = checkNonNull(C, state, SecondBuf, BufVal);
396     if (!state)
397       return NULL;
398 
399     BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
400     if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
401       const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf);
402 
403       SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
404                                             LastOffset, PtrTy);
405       state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage);
406     }
407   }
408 
409   // Large enough or not, return this state!
410   return state;
411 }
412 
413 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
414                                             ProgramStateRef state,
415                                             const Expr *Size,
416                                             const Expr *First,
417                                             const Expr *Second) const {
418   if (!Filter.CheckCStringBufferOverlap)
419     return state;
420 
421   // Do a simple check for overlap: if the two arguments are from the same
422   // buffer, see if the end of the first is greater than the start of the second
423   // or vice versa.
424 
425   // If a previous check has failed, propagate the failure.
426   if (!state)
427     return NULL;
428 
429   ProgramStateRef stateTrue, stateFalse;
430 
431   // Get the buffer values and make sure they're known locations.
432   const LocationContext *LCtx = C.getLocationContext();
433   SVal firstVal = state->getSVal(First, LCtx);
434   SVal secondVal = state->getSVal(Second, LCtx);
435 
436   Optional<Loc> firstLoc = firstVal.getAs<Loc>();
437   if (!firstLoc)
438     return state;
439 
440   Optional<Loc> secondLoc = secondVal.getAs<Loc>();
441   if (!secondLoc)
442     return state;
443 
444   // Are the two values the same?
445   SValBuilder &svalBuilder = C.getSValBuilder();
446   std::tie(stateTrue, stateFalse) =
447     state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
448 
449   if (stateTrue && !stateFalse) {
450     // If the values are known to be equal, that's automatically an overlap.
451     emitOverlapBug(C, stateTrue, First, Second);
452     return NULL;
453   }
454 
455   // assume the two expressions are not equal.
456   assert(stateFalse);
457   state = stateFalse;
458 
459   // Which value comes first?
460   QualType cmpTy = svalBuilder.getConditionType();
461   SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
462                                          *firstLoc, *secondLoc, cmpTy);
463   Optional<DefinedOrUnknownSVal> reverseTest =
464       reverse.getAs<DefinedOrUnknownSVal>();
465   if (!reverseTest)
466     return state;
467 
468   std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
469   if (stateTrue) {
470     if (stateFalse) {
471       // If we don't know which one comes first, we can't perform this test.
472       return state;
473     } else {
474       // Switch the values so that firstVal is before secondVal.
475       std::swap(firstLoc, secondLoc);
476 
477       // Switch the Exprs as well, so that they still correspond.
478       std::swap(First, Second);
479     }
480   }
481 
482   // Get the length, and make sure it too is known.
483   SVal LengthVal = state->getSVal(Size, LCtx);
484   Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
485   if (!Length)
486     return state;
487 
488   // Convert the first buffer's start address to char*.
489   // Bail out if the cast fails.
490   ASTContext &Ctx = svalBuilder.getContext();
491   QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
492   SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy,
493                                          First->getType());
494   Optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
495   if (!FirstStartLoc)
496     return state;
497 
498   // Compute the end of the first buffer. Bail out if THAT fails.
499   SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
500                                  *FirstStartLoc, *Length, CharPtrTy);
501   Optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
502   if (!FirstEndLoc)
503     return state;
504 
505   // Is the end of the first buffer past the start of the second buffer?
506   SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
507                                 *FirstEndLoc, *secondLoc, cmpTy);
508   Optional<DefinedOrUnknownSVal> OverlapTest =
509       Overlap.getAs<DefinedOrUnknownSVal>();
510   if (!OverlapTest)
511     return state;
512 
513   std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
514 
515   if (stateTrue && !stateFalse) {
516     // Overlap!
517     emitOverlapBug(C, stateTrue, First, Second);
518     return NULL;
519   }
520 
521   // assume the two expressions don't overlap.
522   assert(stateFalse);
523   return stateFalse;
524 }
525 
526 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
527                                   const Stmt *First, const Stmt *Second) const {
528   ExplodedNode *N = C.generateSink(state);
529   if (!N)
530     return;
531 
532   if (!BT_Overlap)
533     BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
534                                  categories::UnixAPI, "Improper arguments"));
535 
536   // Generate a report for this bug.
537   BugReport *report =
538     new BugReport(*BT_Overlap,
539       "Arguments must not be overlapping buffers", N);
540   report->addRange(First->getSourceRange());
541   report->addRange(Second->getSourceRange());
542 
543   C.emitReport(report);
544 }
545 
546 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
547                                                      ProgramStateRef state,
548                                                      NonLoc left,
549                                                      NonLoc right) const {
550   // If out-of-bounds checking is turned off, skip the rest.
551   if (!Filter.CheckCStringOutOfBounds)
552     return state;
553 
554   // If a previous check has failed, propagate the failure.
555   if (!state)
556     return NULL;
557 
558   SValBuilder &svalBuilder = C.getSValBuilder();
559   BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
560 
561   QualType sizeTy = svalBuilder.getContext().getSizeType();
562   const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
563   NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
564 
565   SVal maxMinusRight;
566   if (right.getAs<nonloc::ConcreteInt>()) {
567     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
568                                                  sizeTy);
569   } else {
570     // Try switching the operands. (The order of these two assignments is
571     // important!)
572     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
573                                             sizeTy);
574     left = right;
575   }
576 
577   if (Optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
578     QualType cmpTy = svalBuilder.getConditionType();
579     // If left > max - right, we have an overflow.
580     SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
581                                                 *maxMinusRightNL, cmpTy);
582 
583     ProgramStateRef stateOverflow, stateOkay;
584     std::tie(stateOverflow, stateOkay) =
585       state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
586 
587     if (stateOverflow && !stateOkay) {
588       // We have an overflow. Emit a bug report.
589       ExplodedNode *N = C.generateSink(stateOverflow);
590       if (!N)
591         return NULL;
592 
593       if (!BT_AdditionOverflow)
594         BT_AdditionOverflow.reset(
595             new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API",
596                            "Sum of expressions causes overflow"));
597 
598       // This isn't a great error message, but this should never occur in real
599       // code anyway -- you'd have to create a buffer longer than a size_t can
600       // represent, which is sort of a contradiction.
601       const char *warning =
602         "This expression will create a string whose length is too big to "
603         "be represented as a size_t";
604 
605       // Generate a report for this bug.
606       BugReport *report = new BugReport(*BT_AdditionOverflow, warning, N);
607       C.emitReport(report);
608 
609       return NULL;
610     }
611 
612     // From now on, assume an overflow didn't occur.
613     assert(stateOkay);
614     state = stateOkay;
615   }
616 
617   return state;
618 }
619 
620 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
621                                                 const MemRegion *MR,
622                                                 SVal strLength) {
623   assert(!strLength.isUndef() && "Attempt to set an undefined string length");
624 
625   MR = MR->StripCasts();
626 
627   switch (MR->getKind()) {
628   case MemRegion::StringRegionKind:
629     // FIXME: This can happen if we strcpy() into a string region. This is
630     // undefined [C99 6.4.5p6], but we should still warn about it.
631     return state;
632 
633   case MemRegion::SymbolicRegionKind:
634   case MemRegion::AllocaRegionKind:
635   case MemRegion::VarRegionKind:
636   case MemRegion::FieldRegionKind:
637   case MemRegion::ObjCIvarRegionKind:
638     // These are the types we can currently track string lengths for.
639     break;
640 
641   case MemRegion::ElementRegionKind:
642     // FIXME: Handle element regions by upper-bounding the parent region's
643     // string length.
644     return state;
645 
646   default:
647     // Other regions (mostly non-data) can't have a reliable C string length.
648     // For now, just ignore the change.
649     // FIXME: These are rare but not impossible. We should output some kind of
650     // warning for things like strcpy((char[]){'a', 0}, "b");
651     return state;
652   }
653 
654   if (strLength.isUnknown())
655     return state->remove<CStringLength>(MR);
656 
657   return state->set<CStringLength>(MR, strLength);
658 }
659 
660 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
661                                                ProgramStateRef &state,
662                                                const Expr *Ex,
663                                                const MemRegion *MR,
664                                                bool hypothetical) {
665   if (!hypothetical) {
666     // If there's a recorded length, go ahead and return it.
667     const SVal *Recorded = state->get<CStringLength>(MR);
668     if (Recorded)
669       return *Recorded;
670   }
671 
672   // Otherwise, get a new symbol and update the state.
673   SValBuilder &svalBuilder = C.getSValBuilder();
674   QualType sizeTy = svalBuilder.getContext().getSizeType();
675   SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
676                                                     MR, Ex, sizeTy,
677                                                     C.blockCount());
678 
679   if (!hypothetical) {
680     if (Optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
681       // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
682       BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
683       const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
684       llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
685       const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
686                                                         fourInt);
687       NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
688       SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn,
689                                                 maxLength, sizeTy);
690       state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
691     }
692     state = state->set<CStringLength>(MR, strLength);
693   }
694 
695   return strLength;
696 }
697 
698 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
699                                       const Expr *Ex, SVal Buf,
700                                       bool hypothetical) const {
701   const MemRegion *MR = Buf.getAsRegion();
702   if (!MR) {
703     // If we can't get a region, see if it's something we /know/ isn't a
704     // C string. In the context of locations, the only time we can issue such
705     // a warning is for labels.
706     if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
707       if (!Filter.CheckCStringNotNullTerm)
708         return UndefinedVal();
709 
710       if (ExplodedNode *N = C.addTransition(state)) {
711         if (!BT_NotCString)
712           BT_NotCString.reset(new BuiltinBug(
713               Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
714               "Argument is not a null-terminated string."));
715 
716         SmallString<120> buf;
717         llvm::raw_svector_ostream os(buf);
718         assert(CurrentFunctionDescription);
719         os << "Argument to " << CurrentFunctionDescription
720            << " is the address of the label '" << Label->getLabel()->getName()
721            << "', which is not a null-terminated string";
722 
723         // Generate a report for this bug.
724         BugReport *report = new BugReport(*BT_NotCString, os.str(), N);
725 
726         report->addRange(Ex->getSourceRange());
727         C.emitReport(report);
728       }
729       return UndefinedVal();
730 
731     }
732 
733     // If it's not a region and not a label, give up.
734     return UnknownVal();
735   }
736 
737   // If we have a region, strip casts from it and see if we can figure out
738   // its length. For anything we can't figure out, just return UnknownVal.
739   MR = MR->StripCasts();
740 
741   switch (MR->getKind()) {
742   case MemRegion::StringRegionKind: {
743     // Modifying the contents of string regions is undefined [C99 6.4.5p6],
744     // so we can assume that the byte length is the correct C string length.
745     SValBuilder &svalBuilder = C.getSValBuilder();
746     QualType sizeTy = svalBuilder.getContext().getSizeType();
747     const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
748     return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
749   }
750   case MemRegion::SymbolicRegionKind:
751   case MemRegion::AllocaRegionKind:
752   case MemRegion::VarRegionKind:
753   case MemRegion::FieldRegionKind:
754   case MemRegion::ObjCIvarRegionKind:
755     return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
756   case MemRegion::CompoundLiteralRegionKind:
757     // FIXME: Can we track this? Is it necessary?
758     return UnknownVal();
759   case MemRegion::ElementRegionKind:
760     // FIXME: How can we handle this? It's not good enough to subtract the
761     // offset from the base string length; consider "123\x00567" and &a[5].
762     return UnknownVal();
763   default:
764     // Other regions (mostly non-data) can't have a reliable C string length.
765     // In this case, an error is emitted and UndefinedVal is returned.
766     // The caller should always be prepared to handle this case.
767     if (!Filter.CheckCStringNotNullTerm)
768       return UndefinedVal();
769 
770     if (ExplodedNode *N = C.addTransition(state)) {
771       if (!BT_NotCString)
772         BT_NotCString.reset(new BuiltinBug(
773             Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
774             "Argument is not a null-terminated string."));
775 
776       SmallString<120> buf;
777       llvm::raw_svector_ostream os(buf);
778 
779       assert(CurrentFunctionDescription);
780       os << "Argument to " << CurrentFunctionDescription << " is ";
781 
782       if (SummarizeRegion(os, C.getASTContext(), MR))
783         os << ", which is not a null-terminated string";
784       else
785         os << "not a null-terminated string";
786 
787       // Generate a report for this bug.
788       BugReport *report = new BugReport(*BT_NotCString,
789                                                         os.str(), N);
790 
791       report->addRange(Ex->getSourceRange());
792       C.emitReport(report);
793     }
794 
795     return UndefinedVal();
796   }
797 }
798 
799 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
800   ProgramStateRef &state, const Expr *expr, SVal val) const {
801 
802   // Get the memory region pointed to by the val.
803   const MemRegion *bufRegion = val.getAsRegion();
804   if (!bufRegion)
805     return NULL;
806 
807   // Strip casts off the memory region.
808   bufRegion = bufRegion->StripCasts();
809 
810   // Cast the memory region to a string region.
811   const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
812   if (!strRegion)
813     return NULL;
814 
815   // Return the actual string in the string region.
816   return strRegion->getStringLiteral();
817 }
818 
819 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
820                                                  ProgramStateRef state,
821                                                  const Expr *E, SVal V,
822                                                  bool IsSourceBuffer) {
823   Optional<Loc> L = V.getAs<Loc>();
824   if (!L)
825     return state;
826 
827   // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
828   // some assumptions about the value that CFRefCount can't. Even so, it should
829   // probably be refactored.
830   if (Optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
831     const MemRegion *R = MR->getRegion()->StripCasts();
832 
833     // Are we dealing with an ElementRegion?  If so, we should be invalidating
834     // the super-region.
835     if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
836       R = ER->getSuperRegion();
837       // FIXME: What about layers of ElementRegions?
838     }
839 
840     // Invalidate this region.
841     const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
842 
843     bool CausesPointerEscape = false;
844     RegionAndSymbolInvalidationTraits ITraits;
845     // Invalidate and escape only indirect regions accessible through the source
846     // buffer.
847     if (IsSourceBuffer) {
848       ITraits.setTrait(R,
849                        RegionAndSymbolInvalidationTraits::TK_PreserveContents);
850       ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
851       CausesPointerEscape = true;
852     }
853 
854     return state->invalidateRegions(R, E, C.blockCount(), LCtx,
855                                     CausesPointerEscape, 0, 0, &ITraits);
856   }
857 
858   // If we have a non-region value by chance, just remove the binding.
859   // FIXME: is this necessary or correct? This handles the non-Region
860   //  cases.  Is it ever valid to store to these?
861   return state->killBinding(*L);
862 }
863 
864 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
865                                      const MemRegion *MR) {
866   const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
867 
868   switch (MR->getKind()) {
869   case MemRegion::FunctionTextRegionKind: {
870     const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl();
871     if (FD)
872       os << "the address of the function '" << *FD << '\'';
873     else
874       os << "the address of a function";
875     return true;
876   }
877   case MemRegion::BlockTextRegionKind:
878     os << "block text";
879     return true;
880   case MemRegion::BlockDataRegionKind:
881     os << "a block";
882     return true;
883   case MemRegion::CXXThisRegionKind:
884   case MemRegion::CXXTempObjectRegionKind:
885     os << "a C++ temp object of type " << TVR->getValueType().getAsString();
886     return true;
887   case MemRegion::VarRegionKind:
888     os << "a variable of type" << TVR->getValueType().getAsString();
889     return true;
890   case MemRegion::FieldRegionKind:
891     os << "a field of type " << TVR->getValueType().getAsString();
892     return true;
893   case MemRegion::ObjCIvarRegionKind:
894     os << "an instance variable of type " << TVR->getValueType().getAsString();
895     return true;
896   default:
897     return false;
898   }
899 }
900 
901 //===----------------------------------------------------------------------===//
902 // evaluation of individual function calls.
903 //===----------------------------------------------------------------------===//
904 
905 void CStringChecker::evalCopyCommon(CheckerContext &C,
906                                     const CallExpr *CE,
907                                     ProgramStateRef state,
908                                     const Expr *Size, const Expr *Dest,
909                                     const Expr *Source, bool Restricted,
910                                     bool IsMempcpy) const {
911   CurrentFunctionDescription = "memory copy function";
912 
913   // See if the size argument is zero.
914   const LocationContext *LCtx = C.getLocationContext();
915   SVal sizeVal = state->getSVal(Size, LCtx);
916   QualType sizeTy = Size->getType();
917 
918   ProgramStateRef stateZeroSize, stateNonZeroSize;
919   std::tie(stateZeroSize, stateNonZeroSize) =
920     assumeZero(C, state, sizeVal, sizeTy);
921 
922   // Get the value of the Dest.
923   SVal destVal = state->getSVal(Dest, LCtx);
924 
925   // If the size is zero, there won't be any actual memory access, so
926   // just bind the return value to the destination buffer and return.
927   if (stateZeroSize && !stateNonZeroSize) {
928     stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
929     C.addTransition(stateZeroSize);
930     return;
931   }
932 
933   // If the size can be nonzero, we have to check the other arguments.
934   if (stateNonZeroSize) {
935     state = stateNonZeroSize;
936 
937     // Ensure the destination is not null. If it is NULL there will be a
938     // NULL pointer dereference.
939     state = checkNonNull(C, state, Dest, destVal);
940     if (!state)
941       return;
942 
943     // Get the value of the Src.
944     SVal srcVal = state->getSVal(Source, LCtx);
945 
946     // Ensure the source is not null. If it is NULL there will be a
947     // NULL pointer dereference.
948     state = checkNonNull(C, state, Source, srcVal);
949     if (!state)
950       return;
951 
952     // Ensure the accesses are valid and that the buffers do not overlap.
953     const char * const writeWarning =
954       "Memory copy function overflows destination buffer";
955     state = CheckBufferAccess(C, state, Size, Dest, Source,
956                               writeWarning, /* sourceWarning = */ NULL);
957     if (Restricted)
958       state = CheckOverlap(C, state, Size, Dest, Source);
959 
960     if (!state)
961       return;
962 
963     // If this is mempcpy, get the byte after the last byte copied and
964     // bind the expr.
965     if (IsMempcpy) {
966       loc::MemRegionVal destRegVal = destVal.castAs<loc::MemRegionVal>();
967 
968       // Get the length to copy.
969       if (Optional<NonLoc> lenValNonLoc = sizeVal.getAs<NonLoc>()) {
970         // Get the byte after the last byte copied.
971         SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
972                                                           destRegVal,
973                                                           *lenValNonLoc,
974                                                           Dest->getType());
975 
976         // The byte after the last byte copied is the return value.
977         state = state->BindExpr(CE, LCtx, lastElement);
978       } else {
979         // If we don't know how much we copied, we can at least
980         // conjure a return value for later.
981         SVal result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx,
982                                                           C.blockCount());
983         state = state->BindExpr(CE, LCtx, result);
984       }
985 
986     } else {
987       // All other copies return the destination buffer.
988       // (Well, bcopy() has a void return type, but this won't hurt.)
989       state = state->BindExpr(CE, LCtx, destVal);
990     }
991 
992     // Invalidate the destination (regular invalidation without pointer-escaping
993     // the address of the top-level region).
994     // FIXME: Even if we can't perfectly model the copy, we should see if we
995     // can use LazyCompoundVals to copy the source values into the destination.
996     // This would probably remove any existing bindings past the end of the
997     // copied region, but that's still an improvement over blank invalidation.
998     state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest),
999                              /*IsSourceBuffer*/false);
1000 
1001     // Invalidate the source (const-invalidation without const-pointer-escaping
1002     // the address of the top-level region).
1003     state = InvalidateBuffer(C, state, Source, C.getSVal(Source),
1004                              /*IsSourceBuffer*/true);
1005 
1006     C.addTransition(state);
1007   }
1008 }
1009 
1010 
1011 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
1012   if (CE->getNumArgs() < 3)
1013     return;
1014 
1015   // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1016   // The return value is the address of the destination buffer.
1017   const Expr *Dest = CE->getArg(0);
1018   ProgramStateRef state = C.getState();
1019 
1020   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
1021 }
1022 
1023 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
1024   if (CE->getNumArgs() < 3)
1025     return;
1026 
1027   // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1028   // The return value is a pointer to the byte following the last written byte.
1029   const Expr *Dest = CE->getArg(0);
1030   ProgramStateRef state = C.getState();
1031 
1032   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
1033 }
1034 
1035 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
1036   if (CE->getNumArgs() < 3)
1037     return;
1038 
1039   // void *memmove(void *dst, const void *src, size_t n);
1040   // The return value is the address of the destination buffer.
1041   const Expr *Dest = CE->getArg(0);
1042   ProgramStateRef state = C.getState();
1043 
1044   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
1045 }
1046 
1047 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
1048   if (CE->getNumArgs() < 3)
1049     return;
1050 
1051   // void bcopy(const void *src, void *dst, size_t n);
1052   evalCopyCommon(C, CE, C.getState(),
1053                  CE->getArg(2), CE->getArg(1), CE->getArg(0));
1054 }
1055 
1056 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
1057   if (CE->getNumArgs() < 3)
1058     return;
1059 
1060   // int memcmp(const void *s1, const void *s2, size_t n);
1061   CurrentFunctionDescription = "memory comparison function";
1062 
1063   const Expr *Left = CE->getArg(0);
1064   const Expr *Right = CE->getArg(1);
1065   const Expr *Size = CE->getArg(2);
1066 
1067   ProgramStateRef state = C.getState();
1068   SValBuilder &svalBuilder = C.getSValBuilder();
1069 
1070   // See if the size argument is zero.
1071   const LocationContext *LCtx = C.getLocationContext();
1072   SVal sizeVal = state->getSVal(Size, LCtx);
1073   QualType sizeTy = Size->getType();
1074 
1075   ProgramStateRef stateZeroSize, stateNonZeroSize;
1076   std::tie(stateZeroSize, stateNonZeroSize) =
1077     assumeZero(C, state, sizeVal, sizeTy);
1078 
1079   // If the size can be zero, the result will be 0 in that case, and we don't
1080   // have to check either of the buffers.
1081   if (stateZeroSize) {
1082     state = stateZeroSize;
1083     state = state->BindExpr(CE, LCtx,
1084                             svalBuilder.makeZeroVal(CE->getType()));
1085     C.addTransition(state);
1086   }
1087 
1088   // If the size can be nonzero, we have to check the other arguments.
1089   if (stateNonZeroSize) {
1090     state = stateNonZeroSize;
1091     // If we know the two buffers are the same, we know the result is 0.
1092     // First, get the two buffers' addresses. Another checker will have already
1093     // made sure they're not undefined.
1094     DefinedOrUnknownSVal LV =
1095         state->getSVal(Left, LCtx).castAs<DefinedOrUnknownSVal>();
1096     DefinedOrUnknownSVal RV =
1097         state->getSVal(Right, LCtx).castAs<DefinedOrUnknownSVal>();
1098 
1099     // See if they are the same.
1100     DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1101     ProgramStateRef StSameBuf, StNotSameBuf;
1102     std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1103 
1104     // If the two arguments might be the same buffer, we know the result is 0,
1105     // and we only need to check one size.
1106     if (StSameBuf) {
1107       state = StSameBuf;
1108       state = CheckBufferAccess(C, state, Size, Left);
1109       if (state) {
1110         state = StSameBuf->BindExpr(CE, LCtx,
1111                                     svalBuilder.makeZeroVal(CE->getType()));
1112         C.addTransition(state);
1113       }
1114     }
1115 
1116     // If the two arguments might be different buffers, we have to check the
1117     // size of both of them.
1118     if (StNotSameBuf) {
1119       state = StNotSameBuf;
1120       state = CheckBufferAccess(C, state, Size, Left, Right);
1121       if (state) {
1122         // The return value is the comparison result, which we don't know.
1123         SVal CmpV = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1124         state = state->BindExpr(CE, LCtx, CmpV);
1125         C.addTransition(state);
1126       }
1127     }
1128   }
1129 }
1130 
1131 void CStringChecker::evalstrLength(CheckerContext &C,
1132                                    const CallExpr *CE) const {
1133   if (CE->getNumArgs() < 1)
1134     return;
1135 
1136   // size_t strlen(const char *s);
1137   evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
1138 }
1139 
1140 void CStringChecker::evalstrnLength(CheckerContext &C,
1141                                     const CallExpr *CE) const {
1142   if (CE->getNumArgs() < 2)
1143     return;
1144 
1145   // size_t strnlen(const char *s, size_t maxlen);
1146   evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
1147 }
1148 
1149 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
1150                                          bool IsStrnlen) const {
1151   CurrentFunctionDescription = "string length function";
1152   ProgramStateRef state = C.getState();
1153   const LocationContext *LCtx = C.getLocationContext();
1154 
1155   if (IsStrnlen) {
1156     const Expr *maxlenExpr = CE->getArg(1);
1157     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1158 
1159     ProgramStateRef stateZeroSize, stateNonZeroSize;
1160     std::tie(stateZeroSize, stateNonZeroSize) =
1161       assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1162 
1163     // If the size can be zero, the result will be 0 in that case, and we don't
1164     // have to check the string itself.
1165     if (stateZeroSize) {
1166       SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1167       stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
1168       C.addTransition(stateZeroSize);
1169     }
1170 
1171     // If the size is GUARANTEED to be zero, we're done!
1172     if (!stateNonZeroSize)
1173       return;
1174 
1175     // Otherwise, record the assumption that the size is nonzero.
1176     state = stateNonZeroSize;
1177   }
1178 
1179   // Check that the string argument is non-null.
1180   const Expr *Arg = CE->getArg(0);
1181   SVal ArgVal = state->getSVal(Arg, LCtx);
1182 
1183   state = checkNonNull(C, state, Arg, ArgVal);
1184 
1185   if (!state)
1186     return;
1187 
1188   SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1189 
1190   // If the argument isn't a valid C string, there's no valid state to
1191   // transition to.
1192   if (strLength.isUndef())
1193     return;
1194 
1195   DefinedOrUnknownSVal result = UnknownVal();
1196 
1197   // If the check is for strnlen() then bind the return value to no more than
1198   // the maxlen value.
1199   if (IsStrnlen) {
1200     QualType cmpTy = C.getSValBuilder().getConditionType();
1201 
1202     // It's a little unfortunate to be getting this again,
1203     // but it's not that expensive...
1204     const Expr *maxlenExpr = CE->getArg(1);
1205     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1206 
1207     Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1208     Optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1209 
1210     if (strLengthNL && maxlenValNL) {
1211       ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1212 
1213       // Check if the strLength is greater than the maxlen.
1214       std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1215           C.getSValBuilder()
1216               .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1217               .castAs<DefinedOrUnknownSVal>());
1218 
1219       if (stateStringTooLong && !stateStringNotTooLong) {
1220         // If the string is longer than maxlen, return maxlen.
1221         result = *maxlenValNL;
1222       } else if (stateStringNotTooLong && !stateStringTooLong) {
1223         // If the string is shorter than maxlen, return its length.
1224         result = *strLengthNL;
1225       }
1226     }
1227 
1228     if (result.isUnknown()) {
1229       // If we don't have enough information for a comparison, there's
1230       // no guarantee the full string length will actually be returned.
1231       // All we know is the return value is the min of the string length
1232       // and the limit. This is better than nothing.
1233       result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount());
1234       NonLoc resultNL = result.castAs<NonLoc>();
1235 
1236       if (strLengthNL) {
1237         state = state->assume(C.getSValBuilder().evalBinOpNN(
1238                                   state, BO_LE, resultNL, *strLengthNL, cmpTy)
1239                                   .castAs<DefinedOrUnknownSVal>(), true);
1240       }
1241 
1242       if (maxlenValNL) {
1243         state = state->assume(C.getSValBuilder().evalBinOpNN(
1244                                   state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1245                                   .castAs<DefinedOrUnknownSVal>(), true);
1246       }
1247     }
1248 
1249   } else {
1250     // This is a plain strlen(), not strnlen().
1251     result = strLength.castAs<DefinedOrUnknownSVal>();
1252 
1253     // If we don't know the length of the string, conjure a return
1254     // value, so it can be used in constraints, at least.
1255     if (result.isUnknown()) {
1256       result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount());
1257     }
1258   }
1259 
1260   // Bind the return value.
1261   assert(!result.isUnknown() && "Should have conjured a value by now");
1262   state = state->BindExpr(CE, LCtx, result);
1263   C.addTransition(state);
1264 }
1265 
1266 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1267   if (CE->getNumArgs() < 2)
1268     return;
1269 
1270   // char *strcpy(char *restrict dst, const char *restrict src);
1271   evalStrcpyCommon(C, CE,
1272                    /* returnEnd = */ false,
1273                    /* isBounded = */ false,
1274                    /* isAppending = */ false);
1275 }
1276 
1277 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1278   if (CE->getNumArgs() < 3)
1279     return;
1280 
1281   // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1282   evalStrcpyCommon(C, CE,
1283                    /* returnEnd = */ false,
1284                    /* isBounded = */ true,
1285                    /* isAppending = */ false);
1286 }
1287 
1288 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1289   if (CE->getNumArgs() < 2)
1290     return;
1291 
1292   // char *stpcpy(char *restrict dst, const char *restrict src);
1293   evalStrcpyCommon(C, CE,
1294                    /* returnEnd = */ true,
1295                    /* isBounded = */ false,
1296                    /* isAppending = */ false);
1297 }
1298 
1299 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1300   if (CE->getNumArgs() < 2)
1301     return;
1302 
1303   //char *strcat(char *restrict s1, const char *restrict s2);
1304   evalStrcpyCommon(C, CE,
1305                    /* returnEnd = */ false,
1306                    /* isBounded = */ false,
1307                    /* isAppending = */ true);
1308 }
1309 
1310 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1311   if (CE->getNumArgs() < 3)
1312     return;
1313 
1314   //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1315   evalStrcpyCommon(C, CE,
1316                    /* returnEnd = */ false,
1317                    /* isBounded = */ true,
1318                    /* isAppending = */ true);
1319 }
1320 
1321 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1322                                       bool returnEnd, bool isBounded,
1323                                       bool isAppending) const {
1324   CurrentFunctionDescription = "string copy function";
1325   ProgramStateRef state = C.getState();
1326   const LocationContext *LCtx = C.getLocationContext();
1327 
1328   // Check that the destination is non-null.
1329   const Expr *Dst = CE->getArg(0);
1330   SVal DstVal = state->getSVal(Dst, LCtx);
1331 
1332   state = checkNonNull(C, state, Dst, DstVal);
1333   if (!state)
1334     return;
1335 
1336   // Check that the source is non-null.
1337   const Expr *srcExpr = CE->getArg(1);
1338   SVal srcVal = state->getSVal(srcExpr, LCtx);
1339   state = checkNonNull(C, state, srcExpr, srcVal);
1340   if (!state)
1341     return;
1342 
1343   // Get the string length of the source.
1344   SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1345 
1346   // If the source isn't a valid C string, give up.
1347   if (strLength.isUndef())
1348     return;
1349 
1350   SValBuilder &svalBuilder = C.getSValBuilder();
1351   QualType cmpTy = svalBuilder.getConditionType();
1352   QualType sizeTy = svalBuilder.getContext().getSizeType();
1353 
1354   // These two values allow checking two kinds of errors:
1355   // - actual overflows caused by a source that doesn't fit in the destination
1356   // - potential overflows caused by a bound that could exceed the destination
1357   SVal amountCopied = UnknownVal();
1358   SVal maxLastElementIndex = UnknownVal();
1359   const char *boundWarning = NULL;
1360 
1361   // If the function is strncpy, strncat, etc... it is bounded.
1362   if (isBounded) {
1363     // Get the max number of characters to copy.
1364     const Expr *lenExpr = CE->getArg(2);
1365     SVal lenVal = state->getSVal(lenExpr, LCtx);
1366 
1367     // Protect against misdeclared strncpy().
1368     lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType());
1369 
1370     Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1371     Optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1372 
1373     // If we know both values, we might be able to figure out how much
1374     // we're copying.
1375     if (strLengthNL && lenValNL) {
1376       ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1377 
1378       // Check if the max number to copy is less than the length of the src.
1379       // If the bound is equal to the source length, strncpy won't null-
1380       // terminate the result!
1381       std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1382           svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1383               .castAs<DefinedOrUnknownSVal>());
1384 
1385       if (stateSourceTooLong && !stateSourceNotTooLong) {
1386         // Max number to copy is less than the length of the src, so the actual
1387         // strLength copied is the max number arg.
1388         state = stateSourceTooLong;
1389         amountCopied = lenVal;
1390 
1391       } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1392         // The source buffer entirely fits in the bound.
1393         state = stateSourceNotTooLong;
1394         amountCopied = strLength;
1395       }
1396     }
1397 
1398     // We still want to know if the bound is known to be too large.
1399     if (lenValNL) {
1400       if (isAppending) {
1401         // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1402 
1403         // Get the string length of the destination. If the destination is
1404         // memory that can't have a string length, we shouldn't be copying
1405         // into it anyway.
1406         SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1407         if (dstStrLength.isUndef())
1408           return;
1409 
1410         if (Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>()) {
1411           maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add,
1412                                                         *lenValNL,
1413                                                         *dstStrLengthNL,
1414                                                         sizeTy);
1415           boundWarning = "Size argument is greater than the free space in the "
1416                          "destination buffer";
1417         }
1418 
1419       } else {
1420         // For strncpy, this is just checking that lenVal <= sizeof(dst)
1421         // (Yes, strncpy and strncat differ in how they treat termination.
1422         // strncat ALWAYS terminates, but strncpy doesn't.)
1423 
1424         // We need a special case for when the copy size is zero, in which
1425         // case strncpy will do no work at all. Our bounds check uses n-1
1426         // as the last element accessed, so n == 0 is problematic.
1427         ProgramStateRef StateZeroSize, StateNonZeroSize;
1428         std::tie(StateZeroSize, StateNonZeroSize) =
1429           assumeZero(C, state, *lenValNL, sizeTy);
1430 
1431         // If the size is known to be zero, we're done.
1432         if (StateZeroSize && !StateNonZeroSize) {
1433           StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
1434           C.addTransition(StateZeroSize);
1435           return;
1436         }
1437 
1438         // Otherwise, go ahead and figure out the last element we'll touch.
1439         // We don't record the non-zero assumption here because we can't
1440         // be sure. We won't warn on a possible zero.
1441         NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
1442         maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1443                                                       one, sizeTy);
1444         boundWarning = "Size argument is greater than the length of the "
1445                        "destination buffer";
1446       }
1447     }
1448 
1449     // If we couldn't pin down the copy length, at least bound it.
1450     // FIXME: We should actually run this code path for append as well, but
1451     // right now it creates problems with constraints (since we can end up
1452     // trying to pass constraints from symbol to symbol).
1453     if (amountCopied.isUnknown() && !isAppending) {
1454       // Try to get a "hypothetical" string length symbol, which we can later
1455       // set as a real value if that turns out to be the case.
1456       amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1457       assert(!amountCopied.isUndef());
1458 
1459       if (Optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>()) {
1460         if (lenValNL) {
1461           // amountCopied <= lenVal
1462           SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1463                                                              *amountCopiedNL,
1464                                                              *lenValNL,
1465                                                              cmpTy);
1466           state = state->assume(
1467               copiedLessThanBound.castAs<DefinedOrUnknownSVal>(), true);
1468           if (!state)
1469             return;
1470         }
1471 
1472         if (strLengthNL) {
1473           // amountCopied <= strlen(source)
1474           SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1475                                                            *amountCopiedNL,
1476                                                            *strLengthNL,
1477                                                            cmpTy);
1478           state = state->assume(
1479               copiedLessThanSrc.castAs<DefinedOrUnknownSVal>(), true);
1480           if (!state)
1481             return;
1482         }
1483       }
1484     }
1485 
1486   } else {
1487     // The function isn't bounded. The amount copied should match the length
1488     // of the source buffer.
1489     amountCopied = strLength;
1490   }
1491 
1492   assert(state);
1493 
1494   // This represents the number of characters copied into the destination
1495   // buffer. (It may not actually be the strlen if the destination buffer
1496   // is not terminated.)
1497   SVal finalStrLength = UnknownVal();
1498 
1499   // If this is an appending function (strcat, strncat...) then set the
1500   // string length to strlen(src) + strlen(dst) since the buffer will
1501   // ultimately contain both.
1502   if (isAppending) {
1503     // Get the string length of the destination. If the destination is memory
1504     // that can't have a string length, we shouldn't be copying into it anyway.
1505     SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1506     if (dstStrLength.isUndef())
1507       return;
1508 
1509     Optional<NonLoc> srcStrLengthNL = amountCopied.getAs<NonLoc>();
1510     Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1511 
1512     // If we know both string lengths, we might know the final string length.
1513     if (srcStrLengthNL && dstStrLengthNL) {
1514       // Make sure the two lengths together don't overflow a size_t.
1515       state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1516       if (!state)
1517         return;
1518 
1519       finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1520                                                *dstStrLengthNL, sizeTy);
1521     }
1522 
1523     // If we couldn't get a single value for the final string length,
1524     // we can at least bound it by the individual lengths.
1525     if (finalStrLength.isUnknown()) {
1526       // Try to get a "hypothetical" string length symbol, which we can later
1527       // set as a real value if that turns out to be the case.
1528       finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1529       assert(!finalStrLength.isUndef());
1530 
1531       if (Optional<NonLoc> finalStrLengthNL = finalStrLength.getAs<NonLoc>()) {
1532         if (srcStrLengthNL) {
1533           // finalStrLength >= srcStrLength
1534           SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1535                                                         *finalStrLengthNL,
1536                                                         *srcStrLengthNL,
1537                                                         cmpTy);
1538           state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
1539                                 true);
1540           if (!state)
1541             return;
1542         }
1543 
1544         if (dstStrLengthNL) {
1545           // finalStrLength >= dstStrLength
1546           SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1547                                                       *finalStrLengthNL,
1548                                                       *dstStrLengthNL,
1549                                                       cmpTy);
1550           state =
1551               state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
1552           if (!state)
1553             return;
1554         }
1555       }
1556     }
1557 
1558   } else {
1559     // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1560     // the final string length will match the input string length.
1561     finalStrLength = amountCopied;
1562   }
1563 
1564   // The final result of the function will either be a pointer past the last
1565   // copied element, or a pointer to the start of the destination buffer.
1566   SVal Result = (returnEnd ? UnknownVal() : DstVal);
1567 
1568   assert(state);
1569 
1570   // If the destination is a MemRegion, try to check for a buffer overflow and
1571   // record the new string length.
1572   if (Optional<loc::MemRegionVal> dstRegVal =
1573           DstVal.getAs<loc::MemRegionVal>()) {
1574     QualType ptrTy = Dst->getType();
1575 
1576     // If we have an exact value on a bounded copy, use that to check for
1577     // overflows, rather than our estimate about how much is actually copied.
1578     if (boundWarning) {
1579       if (Optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
1580         SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1581                                                       *maxLastNL, ptrTy);
1582         state = CheckLocation(C, state, CE->getArg(2), maxLastElement,
1583                               boundWarning);
1584         if (!state)
1585           return;
1586       }
1587     }
1588 
1589     // Then, if the final length is known...
1590     if (Optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
1591       SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1592                                                  *knownStrLength, ptrTy);
1593 
1594       // ...and we haven't checked the bound, we'll check the actual copy.
1595       if (!boundWarning) {
1596         const char * const warningMsg =
1597           "String copy function overflows destination buffer";
1598         state = CheckLocation(C, state, Dst, lastElement, warningMsg);
1599         if (!state)
1600           return;
1601       }
1602 
1603       // If this is a stpcpy-style copy, the last element is the return value.
1604       if (returnEnd)
1605         Result = lastElement;
1606     }
1607 
1608     // Invalidate the destination (regular invalidation without pointer-escaping
1609     // the address of the top-level region). This must happen before we set the
1610     // C string length because invalidation will clear the length.
1611     // FIXME: Even if we can't perfectly model the copy, we should see if we
1612     // can use LazyCompoundVals to copy the source values into the destination.
1613     // This would probably remove any existing bindings past the end of the
1614     // string, but that's still an improvement over blank invalidation.
1615     state = InvalidateBuffer(C, state, Dst, *dstRegVal,
1616                              /*IsSourceBuffer*/false);
1617 
1618     // Invalidate the source (const-invalidation without const-pointer-escaping
1619     // the address of the top-level region).
1620     state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true);
1621 
1622     // Set the C string length of the destination, if we know it.
1623     if (isBounded && !isAppending) {
1624       // strncpy is annoying in that it doesn't guarantee to null-terminate
1625       // the result string. If the original string didn't fit entirely inside
1626       // the bound (including the null-terminator), we don't know how long the
1627       // result is.
1628       if (amountCopied != strLength)
1629         finalStrLength = UnknownVal();
1630     }
1631     state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1632   }
1633 
1634   assert(state);
1635 
1636   // If this is a stpcpy-style copy, but we were unable to check for a buffer
1637   // overflow, we still need a result. Conjure a return value.
1638   if (returnEnd && Result.isUnknown()) {
1639     Result = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1640   }
1641 
1642   // Set the return value.
1643   state = state->BindExpr(CE, LCtx, Result);
1644   C.addTransition(state);
1645 }
1646 
1647 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1648   if (CE->getNumArgs() < 2)
1649     return;
1650 
1651   //int strcmp(const char *s1, const char *s2);
1652   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1653 }
1654 
1655 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1656   if (CE->getNumArgs() < 3)
1657     return;
1658 
1659   //int strncmp(const char *s1, const char *s2, size_t n);
1660   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1661 }
1662 
1663 void CStringChecker::evalStrcasecmp(CheckerContext &C,
1664                                     const CallExpr *CE) const {
1665   if (CE->getNumArgs() < 2)
1666     return;
1667 
1668   //int strcasecmp(const char *s1, const char *s2);
1669   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1670 }
1671 
1672 void CStringChecker::evalStrncasecmp(CheckerContext &C,
1673                                      const CallExpr *CE) const {
1674   if (CE->getNumArgs() < 3)
1675     return;
1676 
1677   //int strncasecmp(const char *s1, const char *s2, size_t n);
1678   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1679 }
1680 
1681 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1682                                       bool isBounded, bool ignoreCase) const {
1683   CurrentFunctionDescription = "string comparison function";
1684   ProgramStateRef state = C.getState();
1685   const LocationContext *LCtx = C.getLocationContext();
1686 
1687   // Check that the first string is non-null
1688   const Expr *s1 = CE->getArg(0);
1689   SVal s1Val = state->getSVal(s1, LCtx);
1690   state = checkNonNull(C, state, s1, s1Val);
1691   if (!state)
1692     return;
1693 
1694   // Check that the second string is non-null.
1695   const Expr *s2 = CE->getArg(1);
1696   SVal s2Val = state->getSVal(s2, LCtx);
1697   state = checkNonNull(C, state, s2, s2Val);
1698   if (!state)
1699     return;
1700 
1701   // Get the string length of the first string or give up.
1702   SVal s1Length = getCStringLength(C, state, s1, s1Val);
1703   if (s1Length.isUndef())
1704     return;
1705 
1706   // Get the string length of the second string or give up.
1707   SVal s2Length = getCStringLength(C, state, s2, s2Val);
1708   if (s2Length.isUndef())
1709     return;
1710 
1711   // If we know the two buffers are the same, we know the result is 0.
1712   // First, get the two buffers' addresses. Another checker will have already
1713   // made sure they're not undefined.
1714   DefinedOrUnknownSVal LV = s1Val.castAs<DefinedOrUnknownSVal>();
1715   DefinedOrUnknownSVal RV = s2Val.castAs<DefinedOrUnknownSVal>();
1716 
1717   // See if they are the same.
1718   SValBuilder &svalBuilder = C.getSValBuilder();
1719   DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1720   ProgramStateRef StSameBuf, StNotSameBuf;
1721   std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1722 
1723   // If the two arguments might be the same buffer, we know the result is 0,
1724   // and we only need to check one size.
1725   if (StSameBuf) {
1726     StSameBuf = StSameBuf->BindExpr(CE, LCtx,
1727                                     svalBuilder.makeZeroVal(CE->getType()));
1728     C.addTransition(StSameBuf);
1729 
1730     // If the two arguments are GUARANTEED to be the same, we're done!
1731     if (!StNotSameBuf)
1732       return;
1733   }
1734 
1735   assert(StNotSameBuf);
1736   state = StNotSameBuf;
1737 
1738   // At this point we can go about comparing the two buffers.
1739   // For now, we only do this if they're both known string literals.
1740 
1741   // Attempt to extract string literals from both expressions.
1742   const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1743   const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1744   bool canComputeResult = false;
1745 
1746   if (s1StrLiteral && s2StrLiteral) {
1747     StringRef s1StrRef = s1StrLiteral->getString();
1748     StringRef s2StrRef = s2StrLiteral->getString();
1749 
1750     if (isBounded) {
1751       // Get the max number of characters to compare.
1752       const Expr *lenExpr = CE->getArg(2);
1753       SVal lenVal = state->getSVal(lenExpr, LCtx);
1754 
1755       // If the length is known, we can get the right substrings.
1756       if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
1757         // Create substrings of each to compare the prefix.
1758         s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
1759         s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
1760         canComputeResult = true;
1761       }
1762     } else {
1763       // This is a normal, unbounded strcmp.
1764       canComputeResult = true;
1765     }
1766 
1767     if (canComputeResult) {
1768       // Real strcmp stops at null characters.
1769       size_t s1Term = s1StrRef.find('\0');
1770       if (s1Term != StringRef::npos)
1771         s1StrRef = s1StrRef.substr(0, s1Term);
1772 
1773       size_t s2Term = s2StrRef.find('\0');
1774       if (s2Term != StringRef::npos)
1775         s2StrRef = s2StrRef.substr(0, s2Term);
1776 
1777       // Use StringRef's comparison methods to compute the actual result.
1778       int result;
1779 
1780       if (ignoreCase) {
1781         // Compare string 1 to string 2 the same way strcasecmp() does.
1782         result = s1StrRef.compare_lower(s2StrRef);
1783       } else {
1784         // Compare string 1 to string 2 the same way strcmp() does.
1785         result = s1StrRef.compare(s2StrRef);
1786       }
1787 
1788       // Build the SVal of the comparison and bind the return value.
1789       SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
1790       state = state->BindExpr(CE, LCtx, resultVal);
1791     }
1792   }
1793 
1794   if (!canComputeResult) {
1795     // Conjure a symbolic value. It's the best we can do.
1796     SVal resultVal = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1797     state = state->BindExpr(CE, LCtx, resultVal);
1798   }
1799 
1800   // Record this as a possible path.
1801   C.addTransition(state);
1802 }
1803 
1804 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
1805   //char *strsep(char **stringp, const char *delim);
1806   if (CE->getNumArgs() < 2)
1807     return;
1808 
1809   // Sanity: does the search string parameter match the return type?
1810   const Expr *SearchStrPtr = CE->getArg(0);
1811   QualType CharPtrTy = SearchStrPtr->getType()->getPointeeType();
1812   if (CharPtrTy.isNull() ||
1813       CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType())
1814     return;
1815 
1816   CurrentFunctionDescription = "strsep()";
1817   ProgramStateRef State = C.getState();
1818   const LocationContext *LCtx = C.getLocationContext();
1819 
1820   // Check that the search string pointer is non-null (though it may point to
1821   // a null string).
1822   SVal SearchStrVal = State->getSVal(SearchStrPtr, LCtx);
1823   State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
1824   if (!State)
1825     return;
1826 
1827   // Check that the delimiter string is non-null.
1828   const Expr *DelimStr = CE->getArg(1);
1829   SVal DelimStrVal = State->getSVal(DelimStr, LCtx);
1830   State = checkNonNull(C, State, DelimStr, DelimStrVal);
1831   if (!State)
1832     return;
1833 
1834   SValBuilder &SVB = C.getSValBuilder();
1835   SVal Result;
1836   if (Optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
1837     // Get the current value of the search string pointer, as a char*.
1838     Result = State->getSVal(*SearchStrLoc, CharPtrTy);
1839 
1840     // Invalidate the search string, representing the change of one delimiter
1841     // character to NUL.
1842     State = InvalidateBuffer(C, State, SearchStrPtr, Result,
1843                              /*IsSourceBuffer*/false);
1844 
1845     // Overwrite the search string pointer. The new value is either an address
1846     // further along in the same string, or NULL if there are no more tokens.
1847     State = State->bindLoc(*SearchStrLoc,
1848                            SVB.conjureSymbolVal(getTag(), CE, LCtx, CharPtrTy,
1849                                                 C.blockCount()));
1850   } else {
1851     assert(SearchStrVal.isUnknown());
1852     // Conjure a symbolic value. It's the best we can do.
1853     Result = SVB.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1854   }
1855 
1856   // Set the return value, and finish.
1857   State = State->BindExpr(CE, LCtx, Result);
1858   C.addTransition(State);
1859 }
1860 
1861 
1862 //===----------------------------------------------------------------------===//
1863 // The driver method, and other Checker callbacks.
1864 //===----------------------------------------------------------------------===//
1865 
1866 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
1867   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
1868 
1869   if (!FDecl)
1870     return false;
1871 
1872   // FIXME: Poorly-factored string switches are slow.
1873   FnCheck evalFunction = 0;
1874   if (C.isCLibraryFunction(FDecl, "memcpy"))
1875     evalFunction =  &CStringChecker::evalMemcpy;
1876   else if (C.isCLibraryFunction(FDecl, "mempcpy"))
1877     evalFunction =  &CStringChecker::evalMempcpy;
1878   else if (C.isCLibraryFunction(FDecl, "memcmp"))
1879     evalFunction =  &CStringChecker::evalMemcmp;
1880   else if (C.isCLibraryFunction(FDecl, "memmove"))
1881     evalFunction =  &CStringChecker::evalMemmove;
1882   else if (C.isCLibraryFunction(FDecl, "strcpy"))
1883     evalFunction =  &CStringChecker::evalStrcpy;
1884   else if (C.isCLibraryFunction(FDecl, "strncpy"))
1885     evalFunction =  &CStringChecker::evalStrncpy;
1886   else if (C.isCLibraryFunction(FDecl, "stpcpy"))
1887     evalFunction =  &CStringChecker::evalStpcpy;
1888   else if (C.isCLibraryFunction(FDecl, "strcat"))
1889     evalFunction =  &CStringChecker::evalStrcat;
1890   else if (C.isCLibraryFunction(FDecl, "strncat"))
1891     evalFunction =  &CStringChecker::evalStrncat;
1892   else if (C.isCLibraryFunction(FDecl, "strlen"))
1893     evalFunction =  &CStringChecker::evalstrLength;
1894   else if (C.isCLibraryFunction(FDecl, "strnlen"))
1895     evalFunction =  &CStringChecker::evalstrnLength;
1896   else if (C.isCLibraryFunction(FDecl, "strcmp"))
1897     evalFunction =  &CStringChecker::evalStrcmp;
1898   else if (C.isCLibraryFunction(FDecl, "strncmp"))
1899     evalFunction =  &CStringChecker::evalStrncmp;
1900   else if (C.isCLibraryFunction(FDecl, "strcasecmp"))
1901     evalFunction =  &CStringChecker::evalStrcasecmp;
1902   else if (C.isCLibraryFunction(FDecl, "strncasecmp"))
1903     evalFunction =  &CStringChecker::evalStrncasecmp;
1904   else if (C.isCLibraryFunction(FDecl, "strsep"))
1905     evalFunction =  &CStringChecker::evalStrsep;
1906   else if (C.isCLibraryFunction(FDecl, "bcopy"))
1907     evalFunction =  &CStringChecker::evalBcopy;
1908   else if (C.isCLibraryFunction(FDecl, "bcmp"))
1909     evalFunction =  &CStringChecker::evalMemcmp;
1910 
1911   // If the callee isn't a string function, let another checker handle it.
1912   if (!evalFunction)
1913     return false;
1914 
1915   // Make sure each function sets its own description.
1916   // (But don't bother in a release build.)
1917   assert(!(CurrentFunctionDescription = NULL));
1918 
1919   // Check and evaluate the call.
1920   (this->*evalFunction)(C, CE);
1921 
1922   // If the evaluate call resulted in no change, chain to the next eval call
1923   // handler.
1924   // Note, the custom CString evaluation calls assume that basic safety
1925   // properties are held. However, if the user chooses to turn off some of these
1926   // checks, we ignore the issues and leave the call evaluation to a generic
1927   // handler.
1928   if (!C.isDifferent())
1929     return false;
1930 
1931   return true;
1932 }
1933 
1934 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
1935   // Record string length for char a[] = "abc";
1936   ProgramStateRef state = C.getState();
1937 
1938   for (const auto *I : DS->decls()) {
1939     const VarDecl *D = dyn_cast<VarDecl>(I);
1940     if (!D)
1941       continue;
1942 
1943     // FIXME: Handle array fields of structs.
1944     if (!D->getType()->isArrayType())
1945       continue;
1946 
1947     const Expr *Init = D->getInit();
1948     if (!Init)
1949       continue;
1950     if (!isa<StringLiteral>(Init))
1951       continue;
1952 
1953     Loc VarLoc = state->getLValue(D, C.getLocationContext());
1954     const MemRegion *MR = VarLoc.getAsRegion();
1955     if (!MR)
1956       continue;
1957 
1958     SVal StrVal = state->getSVal(Init, C.getLocationContext());
1959     assert(StrVal.isValid() && "Initializer string is unknown or undefined");
1960     DefinedOrUnknownSVal strLength =
1961         getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
1962 
1963     state = state->set<CStringLength>(MR, strLength);
1964   }
1965 
1966   C.addTransition(state);
1967 }
1968 
1969 bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const {
1970   CStringLengthTy Entries = state->get<CStringLength>();
1971   return !Entries.isEmpty();
1972 }
1973 
1974 ProgramStateRef
1975 CStringChecker::checkRegionChanges(ProgramStateRef state,
1976                                    const InvalidatedSymbols *,
1977                                    ArrayRef<const MemRegion *> ExplicitRegions,
1978                                    ArrayRef<const MemRegion *> Regions,
1979                                    const CallEvent *Call) const {
1980   CStringLengthTy Entries = state->get<CStringLength>();
1981   if (Entries.isEmpty())
1982     return state;
1983 
1984   llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
1985   llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
1986 
1987   // First build sets for the changed regions and their super-regions.
1988   for (ArrayRef<const MemRegion *>::iterator
1989        I = Regions.begin(), E = Regions.end(); I != E; ++I) {
1990     const MemRegion *MR = *I;
1991     Invalidated.insert(MR);
1992 
1993     SuperRegions.insert(MR);
1994     while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
1995       MR = SR->getSuperRegion();
1996       SuperRegions.insert(MR);
1997     }
1998   }
1999 
2000   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2001 
2002   // Then loop over the entries in the current state.
2003   for (CStringLengthTy::iterator I = Entries.begin(),
2004        E = Entries.end(); I != E; ++I) {
2005     const MemRegion *MR = I.getKey();
2006 
2007     // Is this entry for a super-region of a changed region?
2008     if (SuperRegions.count(MR)) {
2009       Entries = F.remove(Entries, MR);
2010       continue;
2011     }
2012 
2013     // Is this entry for a sub-region of a changed region?
2014     const MemRegion *Super = MR;
2015     while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2016       Super = SR->getSuperRegion();
2017       if (Invalidated.count(Super)) {
2018         Entries = F.remove(Entries, MR);
2019         break;
2020       }
2021     }
2022   }
2023 
2024   return state->set<CStringLength>(Entries);
2025 }
2026 
2027 void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2028                                       SymbolReaper &SR) const {
2029   // Mark all symbols in our string length map as valid.
2030   CStringLengthTy Entries = state->get<CStringLength>();
2031 
2032   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2033        I != E; ++I) {
2034     SVal Len = I.getData();
2035 
2036     for (SymExpr::symbol_iterator si = Len.symbol_begin(),
2037                                   se = Len.symbol_end(); si != se; ++si)
2038       SR.markInUse(*si);
2039   }
2040 }
2041 
2042 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2043                                       CheckerContext &C) const {
2044   if (!SR.hasDeadSymbols())
2045     return;
2046 
2047   ProgramStateRef state = C.getState();
2048   CStringLengthTy Entries = state->get<CStringLength>();
2049   if (Entries.isEmpty())
2050     return;
2051 
2052   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2053   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2054        I != E; ++I) {
2055     SVal Len = I.getData();
2056     if (SymbolRef Sym = Len.getAsSymbol()) {
2057       if (SR.isDead(Sym))
2058         Entries = F.remove(Entries, I.getKey());
2059     }
2060   }
2061 
2062   state = state->set<CStringLength>(Entries);
2063   C.addTransition(state);
2064 }
2065 
2066 #define REGISTER_CHECKER(name)                                                 \
2067   void ento::register##name(CheckerManager &mgr) {                             \
2068     CStringChecker *checker = mgr.registerChecker<CStringChecker>();           \
2069     checker->Filter.Check##name = true;                                        \
2070     checker->Filter.CheckName##name = mgr.getCurrentCheckName();               \
2071   }
2072 
2073 REGISTER_CHECKER(CStringNullArg)
2074 REGISTER_CHECKER(CStringOutOfBounds)
2075 REGISTER_CHECKER(CStringBufferOverlap)
2076 REGISTER_CHECKER(CStringNotNullTerm)
2077 
2078 void ento::registerCStringCheckerBasic(CheckerManager &Mgr) {
2079   registerCStringNullArg(Mgr);
2080 }
2081