xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp (revision 18470e32870a408960345419df788675edcd7b77)
1 //= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This defines CStringChecker, which is an assortment of checks on calls
11 // to functions in <string.h>.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ClangSACheckers.h"
16 #include "clang/StaticAnalyzer/Core/Checker.h"
17 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
18 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
19 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/GRStateTrait.h"
21 #include "llvm/ADT/StringSwitch.h"
22 
23 using namespace clang;
24 using namespace ento;
25 
26 namespace {
27 class CStringChecker : public Checker< eval::Call,
28                                          check::PreStmt<DeclStmt>,
29                                          check::LiveSymbols,
30                                          check::DeadSymbols,
31                                          check::RegionChanges
32                                          > {
33   mutable llvm::OwningPtr<BugType> BT_Null, BT_Bounds, BT_BoundsWrite,
34                                    BT_Overlap, BT_NotCString;
35 public:
36   static void *getTag() { static int tag; return &tag; }
37 
38   bool evalCall(const CallExpr *CE, CheckerContext &C) const;
39   void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
40   void checkLiveSymbols(const GRState *state, SymbolReaper &SR) const;
41   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
42   bool wantsRegionChangeUpdate(const GRState *state) const;
43 
44   const GRState *checkRegionChanges(const GRState *state,
45                                     const MemRegion * const *Begin,
46                                     const MemRegion * const *End) const;
47 
48   typedef void (CStringChecker::*FnCheck)(CheckerContext &,
49                                           const CallExpr *) const;
50 
51   void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
52   void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
53   void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
54   void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
55   void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
56                       const GRState *state,
57                       const Expr *Size, const Expr *Source, const Expr *Dest,
58                       bool Restricted = false,
59                       bool IsMempcpy = false) const;
60 
61   void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
62 
63   void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
64   void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
65   void evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
66                            bool IsStrnlen = false) const;
67 
68   void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
69   void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
70   void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
71   void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool returnEnd,
72                         bool isBounded, bool isAppending) const;
73 
74   void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
75   void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
76 
77   void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
78   void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
79   void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
80   void evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
81                         bool isBounded = false, bool ignoreCase = false) const;
82 
83   // Utility methods
84   std::pair<const GRState*, const GRState*>
85   static assumeZero(CheckerContext &C,
86                     const GRState *state, SVal V, QualType Ty);
87 
88   static const GRState *setCStringLength(const GRState *state,
89                                          const MemRegion *MR, SVal strLength);
90   static SVal getCStringLengthForRegion(CheckerContext &C,
91                                         const GRState *&state,
92                                         const Expr *Ex, const MemRegion *MR);
93   SVal getCStringLength(CheckerContext &C, const GRState *&state,
94                         const Expr *Ex, SVal Buf) const;
95 
96   const StringLiteral *getCStringLiteral(CheckerContext &C,
97                                          const GRState *&state,
98                                          const Expr *expr,
99                                          SVal val) const;
100 
101   static const GRState *InvalidateBuffer(CheckerContext &C,
102                                          const GRState *state,
103                                          const Expr *Ex, SVal V);
104 
105   static bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
106                               const MemRegion *MR);
107 
108   // Re-usable checks
109   const GRState *checkNonNull(CheckerContext &C, const GRState *state,
110                                const Expr *S, SVal l) const;
111   const GRState *CheckLocation(CheckerContext &C, const GRState *state,
112                                const Expr *S, SVal l,
113                                bool IsDestination = false) const;
114   const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state,
115                                    const Expr *Size,
116                                    const Expr *FirstBuf,
117                                    const Expr *SecondBuf = NULL,
118                                    bool FirstIsDestination = false) const;
119   const GRState *CheckOverlap(CheckerContext &C, const GRState *state,
120                               const Expr *Size, const Expr *First,
121                               const Expr *Second) const;
122   void emitOverlapBug(CheckerContext &C, const GRState *state,
123                       const Stmt *First, const Stmt *Second) const;
124 };
125 
126 class CStringLength {
127 public:
128   typedef llvm::ImmutableMap<const MemRegion *, SVal> EntryMap;
129 };
130 } //end anonymous namespace
131 
132 namespace clang {
133 namespace ento {
134   template <>
135   struct GRStateTrait<CStringLength>
136     : public GRStatePartialTrait<CStringLength::EntryMap> {
137     static void *GDMIndex() { return CStringChecker::getTag(); }
138   };
139 }
140 }
141 
142 //===----------------------------------------------------------------------===//
143 // Individual checks and utility methods.
144 //===----------------------------------------------------------------------===//
145 
146 std::pair<const GRState*, const GRState*>
147 CStringChecker::assumeZero(CheckerContext &C, const GRState *state, SVal V,
148                            QualType Ty) {
149   DefinedSVal *val = dyn_cast<DefinedSVal>(&V);
150   if (!val)
151     return std::pair<const GRState*, const GRState *>(state, state);
152 
153   SValBuilder &svalBuilder = C.getSValBuilder();
154   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
155   return state->assume(svalBuilder.evalEQ(state, *val, zero));
156 }
157 
158 const GRState *CStringChecker::checkNonNull(CheckerContext &C,
159                                             const GRState *state,
160                                             const Expr *S, SVal l) const {
161   // If a previous check has failed, propagate the failure.
162   if (!state)
163     return NULL;
164 
165   const GRState *stateNull, *stateNonNull;
166   llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
167 
168   if (stateNull && !stateNonNull) {
169     ExplodedNode *N = C.generateSink(stateNull);
170     if (!N)
171       return NULL;
172 
173     if (!BT_Null)
174       BT_Null.reset(new BuiltinBug("API",
175         "Null pointer argument in call to byte string function"));
176 
177     // Generate a report for this bug.
178     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
179     EnhancedBugReport *report = new EnhancedBugReport(*BT,
180                                                       BT->getDescription(), N);
181 
182     report->addRange(S->getSourceRange());
183     report->addVisitorCreator(bugreporter::registerTrackNullOrUndefValue, S);
184     C.EmitReport(report);
185     return NULL;
186   }
187 
188   // From here on, assume that the value is non-null.
189   assert(stateNonNull);
190   return stateNonNull;
191 }
192 
193 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
194 const GRState *CStringChecker::CheckLocation(CheckerContext &C,
195                                              const GRState *state,
196                                              const Expr *S, SVal l,
197                                              bool IsDestination) const {
198   // If a previous check has failed, propagate the failure.
199   if (!state)
200     return NULL;
201 
202   // Check for out of bound array element access.
203   const MemRegion *R = l.getAsRegion();
204   if (!R)
205     return state;
206 
207   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
208   if (!ER)
209     return state;
210 
211   assert(ER->getValueType() == C.getASTContext().CharTy &&
212     "CheckLocation should only be called with char* ElementRegions");
213 
214   // Get the size of the array.
215   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
216   SValBuilder &svalBuilder = C.getSValBuilder();
217   SVal Extent = svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
218   DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent);
219 
220   // Get the index of the accessed element.
221   DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex());
222 
223   const GRState *StInBound = state->assumeInBound(Idx, Size, true);
224   const GRState *StOutBound = state->assumeInBound(Idx, Size, false);
225   if (StOutBound && !StInBound) {
226     ExplodedNode *N = C.generateSink(StOutBound);
227     if (!N)
228       return NULL;
229 
230     BuiltinBug *BT;
231     if (IsDestination) {
232       if (!BT_BoundsWrite) {
233         BT_BoundsWrite.reset(new BuiltinBug("Out-of-bound array access",
234           "Byte string function overflows destination buffer"));
235       }
236       BT = static_cast<BuiltinBug*>(BT_BoundsWrite.get());
237     } else {
238       if (!BT_Bounds) {
239         BT_Bounds.reset(new BuiltinBug("Out-of-bound array access",
240           "Byte string function accesses out-of-bound array element"));
241       }
242       BT = static_cast<BuiltinBug*>(BT_Bounds.get());
243     }
244 
245     // FIXME: It would be nice to eventually make this diagnostic more clear,
246     // e.g., by referencing the original declaration or by saying *why* this
247     // reference is outside the range.
248 
249     // Generate a report for this bug.
250     RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N);
251 
252     report->addRange(S->getSourceRange());
253     C.EmitReport(report);
254     return NULL;
255   }
256 
257   // Array bound check succeeded.  From this point forward the array bound
258   // should always succeed.
259   return StInBound;
260 }
261 
262 const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C,
263                                                  const GRState *state,
264                                                  const Expr *Size,
265                                                  const Expr *FirstBuf,
266                                                  const Expr *SecondBuf,
267                                                 bool FirstIsDestination) const {
268   // If a previous check has failed, propagate the failure.
269   if (!state)
270     return NULL;
271 
272   SValBuilder &svalBuilder = C.getSValBuilder();
273   ASTContext &Ctx = C.getASTContext();
274 
275   QualType sizeTy = Size->getType();
276   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
277 
278   // Check that the first buffer is non-null.
279   SVal BufVal = state->getSVal(FirstBuf);
280   state = checkNonNull(C, state, FirstBuf, BufVal);
281   if (!state)
282     return NULL;
283 
284   // Get the access length and make sure it is known.
285   SVal LengthVal = state->getSVal(Size);
286   NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
287   if (!Length)
288     return state;
289 
290   // Compute the offset of the last element to be accessed: size-1.
291   NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy));
292   NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub,
293                                                     *Length, One, sizeTy));
294 
295   // Check that the first buffer is sufficiently long.
296   SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
297   if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
298     SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
299                                           LastOffset, PtrTy);
300     state = CheckLocation(C, state, FirstBuf, BufEnd, FirstIsDestination);
301 
302     // If the buffer isn't large enough, abort.
303     if (!state)
304       return NULL;
305   }
306 
307   // If there's a second buffer, check it as well.
308   if (SecondBuf) {
309     BufVal = state->getSVal(SecondBuf);
310     state = checkNonNull(C, state, SecondBuf, BufVal);
311     if (!state)
312       return NULL;
313 
314     BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
315     if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
316       SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
317                                             LastOffset, PtrTy);
318       state = CheckLocation(C, state, SecondBuf, BufEnd);
319     }
320   }
321 
322   // Large enough or not, return this state!
323   return state;
324 }
325 
326 const GRState *CStringChecker::CheckOverlap(CheckerContext &C,
327                                             const GRState *state,
328                                             const Expr *Size,
329                                             const Expr *First,
330                                             const Expr *Second) const {
331   // Do a simple check for overlap: if the two arguments are from the same
332   // buffer, see if the end of the first is greater than the start of the second
333   // or vice versa.
334 
335   // If a previous check has failed, propagate the failure.
336   if (!state)
337     return NULL;
338 
339   const GRState *stateTrue, *stateFalse;
340 
341   // Get the buffer values and make sure they're known locations.
342   SVal firstVal = state->getSVal(First);
343   SVal secondVal = state->getSVal(Second);
344 
345   Loc *firstLoc = dyn_cast<Loc>(&firstVal);
346   if (!firstLoc)
347     return state;
348 
349   Loc *secondLoc = dyn_cast<Loc>(&secondVal);
350   if (!secondLoc)
351     return state;
352 
353   // Are the two values the same?
354   SValBuilder &svalBuilder = C.getSValBuilder();
355   llvm::tie(stateTrue, stateFalse) =
356     state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
357 
358   if (stateTrue && !stateFalse) {
359     // If the values are known to be equal, that's automatically an overlap.
360     emitOverlapBug(C, stateTrue, First, Second);
361     return NULL;
362   }
363 
364   // assume the two expressions are not equal.
365   assert(stateFalse);
366   state = stateFalse;
367 
368   // Which value comes first?
369   ASTContext &Ctx = svalBuilder.getContext();
370   QualType cmpTy = Ctx.IntTy;
371   SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
372                                          *firstLoc, *secondLoc, cmpTy);
373   DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse);
374   if (!reverseTest)
375     return state;
376 
377   llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
378   if (stateTrue) {
379     if (stateFalse) {
380       // If we don't know which one comes first, we can't perform this test.
381       return state;
382     } else {
383       // Switch the values so that firstVal is before secondVal.
384       Loc *tmpLoc = firstLoc;
385       firstLoc = secondLoc;
386       secondLoc = tmpLoc;
387 
388       // Switch the Exprs as well, so that they still correspond.
389       const Expr *tmpExpr = First;
390       First = Second;
391       Second = tmpExpr;
392     }
393   }
394 
395   // Get the length, and make sure it too is known.
396   SVal LengthVal = state->getSVal(Size);
397   NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
398   if (!Length)
399     return state;
400 
401   // Convert the first buffer's start address to char*.
402   // Bail out if the cast fails.
403   QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
404   SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, First->getType());
405   Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart);
406   if (!FirstStartLoc)
407     return state;
408 
409   // Compute the end of the first buffer. Bail out if THAT fails.
410   SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
411                                  *FirstStartLoc, *Length, CharPtrTy);
412   Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd);
413   if (!FirstEndLoc)
414     return state;
415 
416   // Is the end of the first buffer past the start of the second buffer?
417   SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
418                                 *FirstEndLoc, *secondLoc, cmpTy);
419   DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap);
420   if (!OverlapTest)
421     return state;
422 
423   llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
424 
425   if (stateTrue && !stateFalse) {
426     // Overlap!
427     emitOverlapBug(C, stateTrue, First, Second);
428     return NULL;
429   }
430 
431   // assume the two expressions don't overlap.
432   assert(stateFalse);
433   return stateFalse;
434 }
435 
436 void CStringChecker::emitOverlapBug(CheckerContext &C, const GRState *state,
437                                   const Stmt *First, const Stmt *Second) const {
438   ExplodedNode *N = C.generateSink(state);
439   if (!N)
440     return;
441 
442   if (!BT_Overlap)
443     BT_Overlap.reset(new BugType("Unix API", "Improper arguments"));
444 
445   // Generate a report for this bug.
446   RangedBugReport *report =
447     new RangedBugReport(*BT_Overlap,
448       "Arguments must not be overlapping buffers", N);
449   report->addRange(First->getSourceRange());
450   report->addRange(Second->getSourceRange());
451 
452   C.EmitReport(report);
453 }
454 
455 const GRState *CStringChecker::setCStringLength(const GRState *state,
456                                                 const MemRegion *MR,
457                                                 SVal strLength) {
458   assert(!strLength.isUndef() && "Attempt to set an undefined string length");
459   if (strLength.isUnknown())
460     return state;
461 
462   MR = MR->StripCasts();
463 
464   switch (MR->getKind()) {
465   case MemRegion::StringRegionKind:
466     // FIXME: This can happen if we strcpy() into a string region. This is
467     // undefined [C99 6.4.5p6], but we should still warn about it.
468     return state;
469 
470   case MemRegion::SymbolicRegionKind:
471   case MemRegion::AllocaRegionKind:
472   case MemRegion::VarRegionKind:
473   case MemRegion::FieldRegionKind:
474   case MemRegion::ObjCIvarRegionKind:
475     return state->set<CStringLength>(MR, strLength);
476 
477   case MemRegion::ElementRegionKind:
478     // FIXME: Handle element regions by upper-bounding the parent region's
479     // string length.
480     return state;
481 
482   default:
483     // Other regions (mostly non-data) can't have a reliable C string length.
484     // For now, just ignore the change.
485     // FIXME: These are rare but not impossible. We should output some kind of
486     // warning for things like strcpy((char[]){'a', 0}, "b");
487     return state;
488   }
489 }
490 
491 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
492                                                const GRState *&state,
493                                                const Expr *Ex,
494                                                const MemRegion *MR) {
495   // If there's a recorded length, go ahead and return it.
496   const SVal *Recorded = state->get<CStringLength>(MR);
497   if (Recorded)
498     return *Recorded;
499 
500   // Otherwise, get a new symbol and update the state.
501   unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
502   SValBuilder &svalBuilder = C.getSValBuilder();
503   QualType sizeTy = svalBuilder.getContext().getSizeType();
504   SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
505                                                     MR, Ex, sizeTy, Count);
506   state = state->set<CStringLength>(MR, strLength);
507   return strLength;
508 }
509 
510 SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state,
511                                       const Expr *Ex, SVal Buf) const {
512   const MemRegion *MR = Buf.getAsRegion();
513   if (!MR) {
514     // If we can't get a region, see if it's something we /know/ isn't a
515     // C string. In the context of locations, the only time we can issue such
516     // a warning is for labels.
517     if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) {
518       if (ExplodedNode *N = C.generateNode(state)) {
519         if (!BT_NotCString)
520           BT_NotCString.reset(new BuiltinBug("API",
521             "Argument is not a null-terminated string."));
522 
523         llvm::SmallString<120> buf;
524         llvm::raw_svector_ostream os(buf);
525         os << "Argument to byte string function is the address of the label '"
526            << Label->getLabel()->getName()
527            << "', which is not a null-terminated string";
528 
529         // Generate a report for this bug.
530         EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
531                                                           os.str(), N);
532 
533         report->addRange(Ex->getSourceRange());
534         C.EmitReport(report);
535       }
536 
537       return UndefinedVal();
538     }
539 
540     // If it's not a region and not a label, give up.
541     return UnknownVal();
542   }
543 
544   // If we have a region, strip casts from it and see if we can figure out
545   // its length. For anything we can't figure out, just return UnknownVal.
546   MR = MR->StripCasts();
547 
548   switch (MR->getKind()) {
549   case MemRegion::StringRegionKind: {
550     // Modifying the contents of string regions is undefined [C99 6.4.5p6],
551     // so we can assume that the byte length is the correct C string length.
552     SValBuilder &svalBuilder = C.getSValBuilder();
553     QualType sizeTy = svalBuilder.getContext().getSizeType();
554     const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
555     return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
556   }
557   case MemRegion::SymbolicRegionKind:
558   case MemRegion::AllocaRegionKind:
559   case MemRegion::VarRegionKind:
560   case MemRegion::FieldRegionKind:
561   case MemRegion::ObjCIvarRegionKind:
562     return getCStringLengthForRegion(C, state, Ex, MR);
563   case MemRegion::CompoundLiteralRegionKind:
564     // FIXME: Can we track this? Is it necessary?
565     return UnknownVal();
566   case MemRegion::ElementRegionKind:
567     // FIXME: How can we handle this? It's not good enough to subtract the
568     // offset from the base string length; consider "123\x00567" and &a[5].
569     return UnknownVal();
570   default:
571     // Other regions (mostly non-data) can't have a reliable C string length.
572     // In this case, an error is emitted and UndefinedVal is returned.
573     // The caller should always be prepared to handle this case.
574     if (ExplodedNode *N = C.generateNode(state)) {
575       if (!BT_NotCString)
576         BT_NotCString.reset(new BuiltinBug("API",
577           "Argument is not a null-terminated string."));
578 
579       llvm::SmallString<120> buf;
580       llvm::raw_svector_ostream os(buf);
581 
582       os << "Argument to byte string function is ";
583 
584       if (SummarizeRegion(os, C.getASTContext(), MR))
585         os << ", which is not a null-terminated string";
586       else
587         os << "not a null-terminated string";
588 
589       // Generate a report for this bug.
590       EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
591                                                         os.str(), N);
592 
593       report->addRange(Ex->getSourceRange());
594       C.EmitReport(report);
595     }
596 
597     return UndefinedVal();
598   }
599 }
600 
601 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
602   const GRState *&state, const Expr *expr, SVal val) const {
603 
604   // Get the memory region pointed to by the val.
605   const MemRegion *bufRegion = val.getAsRegion();
606   if (!bufRegion)
607     return NULL;
608 
609   // Strip casts off the memory region.
610   bufRegion = bufRegion->StripCasts();
611 
612   // Cast the memory region to a string region.
613   const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
614   if (!strRegion)
615     return NULL;
616 
617   // Return the actual string in the string region.
618   return strRegion->getStringLiteral();
619 }
620 
621 const GRState *CStringChecker::InvalidateBuffer(CheckerContext &C,
622                                                 const GRState *state,
623                                                 const Expr *E, SVal V) {
624   Loc *L = dyn_cast<Loc>(&V);
625   if (!L)
626     return state;
627 
628   // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
629   // some assumptions about the value that CFRefCount can't. Even so, it should
630   // probably be refactored.
631   if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) {
632     const MemRegion *R = MR->getRegion()->StripCasts();
633 
634     // Are we dealing with an ElementRegion?  If so, we should be invalidating
635     // the super-region.
636     if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
637       R = ER->getSuperRegion();
638       // FIXME: What about layers of ElementRegions?
639     }
640 
641     // Invalidate this region.
642     unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
643     return state->invalidateRegion(R, E, Count, NULL);
644   }
645 
646   // If we have a non-region value by chance, just remove the binding.
647   // FIXME: is this necessary or correct? This handles the non-Region
648   //  cases.  Is it ever valid to store to these?
649   return state->unbindLoc(*L);
650 }
651 
652 bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
653                                      const MemRegion *MR) {
654   const TypedRegion *TR = dyn_cast<TypedRegion>(MR);
655   if (!TR)
656     return false;
657 
658   switch (TR->getKind()) {
659   case MemRegion::FunctionTextRegionKind: {
660     const FunctionDecl *FD = cast<FunctionTextRegion>(TR)->getDecl();
661     if (FD)
662       os << "the address of the function '" << FD << "'";
663     else
664       os << "the address of a function";
665     return true;
666   }
667   case MemRegion::BlockTextRegionKind:
668     os << "block text";
669     return true;
670   case MemRegion::BlockDataRegionKind:
671     os << "a block";
672     return true;
673   case MemRegion::CXXThisRegionKind:
674   case MemRegion::CXXTempObjectRegionKind:
675     os << "a C++ temp object of type " << TR->getValueType().getAsString();
676     return true;
677   case MemRegion::VarRegionKind:
678     os << "a variable of type" << TR->getValueType().getAsString();
679     return true;
680   case MemRegion::FieldRegionKind:
681     os << "a field of type " << TR->getValueType().getAsString();
682     return true;
683   case MemRegion::ObjCIvarRegionKind:
684     os << "an instance variable of type " << TR->getValueType().getAsString();
685     return true;
686   default:
687     return false;
688   }
689 }
690 
691 //===----------------------------------------------------------------------===//
692 // evaluation of individual function calls.
693 //===----------------------------------------------------------------------===//
694 
695 void CStringChecker::evalCopyCommon(CheckerContext &C,
696                                     const CallExpr *CE,
697                                     const GRState *state,
698                                     const Expr *Size, const Expr *Dest,
699                                     const Expr *Source, bool Restricted,
700                                     bool IsMempcpy) const {
701   // See if the size argument is zero.
702   SVal sizeVal = state->getSVal(Size);
703   QualType sizeTy = Size->getType();
704 
705   const GRState *stateZeroSize, *stateNonZeroSize;
706   llvm::tie(stateZeroSize, stateNonZeroSize) = assumeZero(C, state, sizeVal, sizeTy);
707 
708   // Get the value of the Dest.
709   SVal destVal = state->getSVal(Dest);
710 
711   // If the size is zero, there won't be any actual memory access, so
712   // just bind the return value to the destination buffer and return.
713   if (stateZeroSize) {
714     C.addTransition(stateZeroSize);
715     if (IsMempcpy)
716       state->BindExpr(CE, destVal);
717     else
718       state->BindExpr(CE, sizeVal);
719     return;
720   }
721 
722   // If the size can be nonzero, we have to check the other arguments.
723   if (stateNonZeroSize) {
724 
725     // Ensure the destination is not null. If it is NULL there will be a
726     // NULL pointer dereference.
727     state = checkNonNull(C, state, Dest, destVal);
728     if (!state)
729       return;
730 
731     // Get the value of the Src.
732     SVal srcVal = state->getSVal(Source);
733 
734     // Ensure the source is not null. If it is NULL there will be a
735     // NULL pointer dereference.
736     state = checkNonNull(C, state, Source, srcVal);
737     if (!state)
738       return;
739 
740     // Ensure the buffers do not overlap.
741     state = stateNonZeroSize;
742     state = CheckBufferAccess(C, state, Size, Dest, Source,
743                               /* FirstIsDst = */ true);
744     if (Restricted)
745       state = CheckOverlap(C, state, Size, Dest, Source);
746 
747     if (state) {
748 
749       // If this is mempcpy, get the byte after the last byte copied and
750       // bind the expr.
751       if (IsMempcpy) {
752         loc::MemRegionVal *destRegVal = dyn_cast<loc::MemRegionVal>(&destVal);
753 
754         // Get the length to copy.
755         SVal lenVal = state->getSVal(Size);
756         NonLoc *lenValNonLoc = dyn_cast<NonLoc>(&lenVal);
757 
758         // Get the byte after the last byte copied.
759         SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
760                                                           *destRegVal,
761                                                           *lenValNonLoc,
762                                                           Dest->getType());
763 
764         // The byte after the last byte copied is the return value.
765         state = state->BindExpr(CE, lastElement);
766       }
767 
768       // Invalidate the destination.
769       // FIXME: Even if we can't perfectly model the copy, we should see if we
770       // can use LazyCompoundVals to copy the source values into the destination.
771       // This would probably remove any existing bindings past the end of the
772       // copied region, but that's still an improvement over blank invalidation.
773       state = InvalidateBuffer(C, state, Dest, state->getSVal(Dest));
774       C.addTransition(state);
775     }
776   }
777 }
778 
779 
780 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
781   // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
782   // The return value is the address of the destination buffer.
783   const Expr *Dest = CE->getArg(0);
784   const GRState *state = C.getState();
785   state = state->BindExpr(CE, state->getSVal(Dest));
786   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
787 }
788 
789 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
790   // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
791   // The return value is a pointer to the byte following the last written byte.
792   const Expr *Dest = CE->getArg(0);
793   const GRState *state = C.getState();
794 
795   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
796 }
797 
798 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
799   // void *memmove(void *dst, const void *src, size_t n);
800   // The return value is the address of the destination buffer.
801   const Expr *Dest = CE->getArg(0);
802   const GRState *state = C.getState();
803   state = state->BindExpr(CE, state->getSVal(Dest));
804   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
805 }
806 
807 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
808   // void bcopy(const void *src, void *dst, size_t n);
809   evalCopyCommon(C, CE, C.getState(),
810                  CE->getArg(2), CE->getArg(1), CE->getArg(0));
811 }
812 
813 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
814   // int memcmp(const void *s1, const void *s2, size_t n);
815   const Expr *Left = CE->getArg(0);
816   const Expr *Right = CE->getArg(1);
817   const Expr *Size = CE->getArg(2);
818 
819   const GRState *state = C.getState();
820   SValBuilder &svalBuilder = C.getSValBuilder();
821 
822   // See if the size argument is zero.
823   SVal sizeVal = state->getSVal(Size);
824   QualType sizeTy = Size->getType();
825 
826   const GRState *stateZeroSize, *stateNonZeroSize;
827   llvm::tie(stateZeroSize, stateNonZeroSize) =
828     assumeZero(C, state, sizeVal, sizeTy);
829 
830   // If the size can be zero, the result will be 0 in that case, and we don't
831   // have to check either of the buffers.
832   if (stateZeroSize) {
833     state = stateZeroSize;
834     state = state->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType()));
835     C.addTransition(state);
836   }
837 
838   // If the size can be nonzero, we have to check the other arguments.
839   if (stateNonZeroSize) {
840     state = stateNonZeroSize;
841     // If we know the two buffers are the same, we know the result is 0.
842     // First, get the two buffers' addresses. Another checker will have already
843     // made sure they're not undefined.
844     DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(state->getSVal(Left));
845     DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(state->getSVal(Right));
846 
847     // See if they are the same.
848     DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
849     const GRState *StSameBuf, *StNotSameBuf;
850     llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
851 
852     // If the two arguments might be the same buffer, we know the result is zero,
853     // and we only need to check one size.
854     if (StSameBuf) {
855       state = StSameBuf;
856       state = CheckBufferAccess(C, state, Size, Left);
857       if (state) {
858         state = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType()));
859         C.addTransition(state);
860       }
861     }
862 
863     // If the two arguments might be different buffers, we have to check the
864     // size of both of them.
865     if (StNotSameBuf) {
866       state = StNotSameBuf;
867       state = CheckBufferAccess(C, state, Size, Left, Right);
868       if (state) {
869         // The return value is the comparison result, which we don't know.
870         unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
871         SVal CmpV = svalBuilder.getConjuredSymbolVal(NULL, CE, Count);
872         state = state->BindExpr(CE, CmpV);
873         C.addTransition(state);
874       }
875     }
876   }
877 }
878 
879 void CStringChecker::evalstrLength(CheckerContext &C,
880                                    const CallExpr *CE) const {
881   // size_t strlen(const char *s);
882   evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
883 }
884 
885 void CStringChecker::evalstrnLength(CheckerContext &C,
886                                     const CallExpr *CE) const {
887   // size_t strnlen(const char *s, size_t maxlen);
888   evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
889 }
890 
891 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
892                                          bool IsStrnlen) const {
893   const GRState *state = C.getState();
894   const Expr *Arg = CE->getArg(0);
895   SVal ArgVal = state->getSVal(Arg);
896 
897   // Check that the argument is non-null.
898   state = checkNonNull(C, state, Arg, ArgVal);
899 
900   if (state) {
901     SVal strLength = getCStringLength(C, state, Arg, ArgVal);
902 
903     // If the argument isn't a valid C string, there's no valid state to
904     // transition to.
905     if (strLength.isUndef())
906       return;
907 
908     // If the check is for strnlen() then bind the return value to no more than
909     // the maxlen value.
910     if (IsStrnlen) {
911       const Expr *maxlenExpr = CE->getArg(1);
912       SVal maxlenVal = state->getSVal(maxlenExpr);
913 
914       NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
915       NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal);
916 
917       QualType cmpTy = C.getSValBuilder().getContext().IntTy;
918       const GRState *stateTrue, *stateFalse;
919 
920       // Check if the strLength is greater than or equal to the maxlen
921       llvm::tie(stateTrue, stateFalse) =
922         state->assume(cast<DefinedOrUnknownSVal>
923                       (C.getSValBuilder().evalBinOpNN(state, BO_GE,
924                                                       *strLengthNL, *maxlenValNL,
925                                                       cmpTy)));
926 
927       // If the strLength is greater than or equal to the maxlen, set strLength
928       // to maxlen
929       if (stateTrue && !stateFalse) {
930         strLength = maxlenVal;
931       }
932     }
933 
934     // If getCStringLength couldn't figure out the length, conjure a return
935     // value, so it can be used in constraints, at least.
936     if (strLength.isUnknown()) {
937       unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
938       strLength = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count);
939     }
940 
941     // Bind the return value.
942     state = state->BindExpr(CE, strLength);
943     C.addTransition(state);
944   }
945 }
946 
947 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
948   // char *strcpy(char *restrict dst, const char *restrict src);
949   evalStrcpyCommon(C, CE,
950                    /* returnEnd = */ false,
951                    /* isBounded = */ false,
952                    /* isAppending = */ false);
953 }
954 
955 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
956   // char *strcpy(char *restrict dst, const char *restrict src);
957   evalStrcpyCommon(C, CE,
958                    /* returnEnd = */ false,
959                    /* isBounded = */ true,
960                    /* isAppending = */ false);
961 }
962 
963 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
964   // char *stpcpy(char *restrict dst, const char *restrict src);
965   evalStrcpyCommon(C, CE,
966                    /* returnEnd = */ true,
967                    /* isBounded = */ false,
968                    /* isAppending = */ false);
969 }
970 
971 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
972   //char *strcat(char *restrict s1, const char *restrict s2);
973   evalStrcpyCommon(C, CE,
974                    /* returnEnd = */ false,
975                    /* isBounded = */ false,
976                    /* isAppending = */ true);
977 }
978 
979 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
980   //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
981   evalStrcpyCommon(C, CE,
982                    /* returnEnd = */ false,
983                    /* isBounded = */ true,
984                    /* isAppending = */ true);
985 }
986 
987 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
988                                       bool returnEnd, bool isBounded,
989                                       bool isAppending) const {
990   const GRState *state = C.getState();
991 
992   // Check that the destination is non-null.
993   const Expr *Dst = CE->getArg(0);
994   SVal DstVal = state->getSVal(Dst);
995 
996   state = checkNonNull(C, state, Dst, DstVal);
997   if (!state)
998     return;
999 
1000   // Check that the source is non-null.
1001   const Expr *srcExpr = CE->getArg(1);
1002   SVal srcVal = state->getSVal(srcExpr);
1003   state = checkNonNull(C, state, srcExpr, srcVal);
1004   if (!state)
1005     return;
1006 
1007   // Get the string length of the source.
1008   SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1009 
1010   // If the source isn't a valid C string, give up.
1011   if (strLength.isUndef())
1012     return;
1013 
1014   // If the function is strncpy, strncat, etc... it is bounded.
1015   if (isBounded) {
1016     // Get the max number of characters to copy.
1017     const Expr *lenExpr = CE->getArg(2);
1018     SVal lenVal = state->getSVal(lenExpr);
1019 
1020     // Cast the length to a NonLoc SVal. If it is not a NonLoc then give up.
1021     NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
1022     if (!strLengthNL)
1023       return;
1024 
1025     // Cast the max length to a NonLoc SVal. If it is not a NonLoc then give up.
1026     NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal);
1027     if (!lenValNL)
1028       return;
1029 
1030     QualType cmpTy = C.getSValBuilder().getContext().IntTy;
1031     const GRState *stateTrue, *stateFalse;
1032 
1033     // Check if the max number to copy is less than the length of the src.
1034     llvm::tie(stateTrue, stateFalse) =
1035       state->assume(cast<DefinedOrUnknownSVal>
1036                     (C.getSValBuilder().evalBinOpNN(state, BO_GT,
1037                                                     *strLengthNL, *lenValNL,
1038                                                     cmpTy)));
1039 
1040     if (stateTrue) {
1041       // Max number to copy is less than the length of the src, so the actual
1042       // strLength copied is the max number arg.
1043       strLength = lenVal;
1044     }
1045   }
1046 
1047   // If this is an appending function (strcat, strncat...) then set the
1048   // string length to strlen(src) + strlen(dst) since the buffer will
1049   // ultimately contain both.
1050   if (isAppending) {
1051     // Get the string length of the destination, or give up.
1052     SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1053     if (dstStrLength.isUndef())
1054       return;
1055 
1056     NonLoc *srcStrLengthNL = dyn_cast<NonLoc>(&strLength);
1057     NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength);
1058 
1059     // If src or dst cast to NonLoc is NULL, give up.
1060     if ((!srcStrLengthNL) || (!dstStrLengthNL))
1061       return;
1062 
1063     QualType addTy = C.getSValBuilder().getContext().getSizeType();
1064 
1065     strLength = C.getSValBuilder().evalBinOpNN(state, BO_Add,
1066                                                *srcStrLengthNL, *dstStrLengthNL,
1067                                                addTy);
1068   }
1069 
1070   SVal Result = (returnEnd ? UnknownVal() : DstVal);
1071 
1072   // If the destination is a MemRegion, try to check for a buffer overflow and
1073   // record the new string length.
1074   if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) {
1075     // If the length is known, we can check for an overflow.
1076     if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&strLength)) {
1077       SVal lastElement =
1078         C.getSValBuilder().evalBinOpLN(state, BO_Add, *dstRegVal,
1079                                        *knownStrLength, Dst->getType());
1080 
1081       state = CheckLocation(C, state, Dst, lastElement, /* IsDst = */ true);
1082       if (!state)
1083         return;
1084 
1085       // If this is a stpcpy-style copy, the last element is the return value.
1086       if (returnEnd)
1087         Result = lastElement;
1088     }
1089 
1090     // Invalidate the destination. This must happen before we set the C string
1091     // length because invalidation will clear the length.
1092     // FIXME: Even if we can't perfectly model the copy, we should see if we
1093     // can use LazyCompoundVals to copy the source values into the destination.
1094     // This would probably remove any existing bindings past the end of the
1095     // string, but that's still an improvement over blank invalidation.
1096     state = InvalidateBuffer(C, state, Dst, *dstRegVal);
1097 
1098     // Set the C string length of the destination.
1099     state = setCStringLength(state, dstRegVal->getRegion(), strLength);
1100   }
1101 
1102   // If this is a stpcpy-style copy, but we were unable to check for a buffer
1103   // overflow, we still need a result. Conjure a return value.
1104   if (returnEnd && Result.isUnknown()) {
1105     SValBuilder &svalBuilder = C.getSValBuilder();
1106     unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
1107     strLength = svalBuilder.getConjuredSymbolVal(NULL, CE, Count);
1108   }
1109 
1110   // Set the return value.
1111   state = state->BindExpr(CE, Result);
1112   C.addTransition(state);
1113 }
1114 
1115 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1116   //int strcmp(const char *restrict s1, const char *restrict s2);
1117   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1118 }
1119 
1120 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1121   //int strncmp(const char *restrict s1, const char *restrict s2, size_t n);
1122   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1123 }
1124 
1125 void CStringChecker::evalStrcasecmp(CheckerContext &C,
1126                                     const CallExpr *CE) const {
1127   //int strcasecmp(const char *restrict s1, const char *restrict s2);
1128   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1129 }
1130 
1131 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1132                                       bool isBounded, bool ignoreCase) const {
1133   const GRState *state = C.getState();
1134 
1135   // Check that the first string is non-null
1136   const Expr *s1 = CE->getArg(0);
1137   SVal s1Val = state->getSVal(s1);
1138   state = checkNonNull(C, state, s1, s1Val);
1139   if (!state)
1140     return;
1141 
1142   // Check that the second string is non-null.
1143   const Expr *s2 = CE->getArg(1);
1144   SVal s2Val = state->getSVal(s2);
1145   state = checkNonNull(C, state, s2, s2Val);
1146   if (!state)
1147     return;
1148 
1149   // Get the string length of the first string or give up.
1150   SVal s1Length = getCStringLength(C, state, s1, s1Val);
1151   if (s1Length.isUndef())
1152     return;
1153 
1154   // Get the string length of the second string or give up.
1155   SVal s2Length = getCStringLength(C, state, s2, s2Val);
1156   if (s2Length.isUndef())
1157     return;
1158 
1159   // Get the string literal of the first string.
1160   const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1161   if (!s1StrLiteral)
1162     return;
1163   llvm::StringRef s1StrRef = s1StrLiteral->getString();
1164 
1165   // Get the string literal of the second string.
1166   const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1167   if (!s2StrLiteral)
1168     return;
1169   llvm::StringRef s2StrRef = s2StrLiteral->getString();
1170 
1171   int result;
1172   if (isBounded) {
1173     // Get the max number of characters to compare.
1174     const Expr *lenExpr = CE->getArg(2);
1175     SVal lenVal = state->getSVal(lenExpr);
1176 
1177     // Dynamically cast the length to a ConcreteInt. If it is not a ConcreteInt
1178     // then give up, otherwise get the value and use it as the bounds.
1179     nonloc::ConcreteInt *CI = dyn_cast<nonloc::ConcreteInt>(&lenVal);
1180     if (!CI)
1181       return;
1182     llvm::APSInt lenInt(CI->getValue());
1183 
1184     // Compare using the bounds provided like strncmp() does.
1185     if (ignoreCase) {
1186       // TODO Implement compare_lower(RHS, n) in LLVM StringRef.
1187       // result = s1StrRef.compare_lower(s2StrRef,
1188       //                                 (size_t)lenInt.getLimitedValue());
1189 
1190       // For now, give up.
1191       return;
1192     } else {
1193       // Create substrings of each to compare the prefix.
1194       llvm::StringRef s1SubStr =
1195         s1StrRef.substr(0, (size_t)lenInt.getLimitedValue());
1196       llvm::StringRef s2SubStr =
1197         s2StrRef.substr(0, (size_t)lenInt.getLimitedValue());
1198 
1199       // Compare the substrings.
1200       result = s1SubStr.compare(s2SubStr);
1201     }
1202   } else {
1203     // Compare string 1 to string 2 the same way strcmp() does.
1204     if (ignoreCase) {
1205       result = s1StrRef.compare_lower(s2StrRef);
1206     } else {
1207       result = s1StrRef.compare(s2StrRef);
1208     }
1209   }
1210 
1211   // Build the SVal of the comparison to bind the return value.
1212   SValBuilder &svalBuilder = C.getSValBuilder();
1213   QualType intTy = svalBuilder.getContext().IntTy;
1214   SVal resultVal = svalBuilder.makeIntVal(result, intTy);
1215 
1216   // Bind the return value of the expression.
1217   // Set the return value.
1218   state = state->BindExpr(CE, resultVal);
1219   C.addTransition(state);
1220 }
1221 
1222 //===----------------------------------------------------------------------===//
1223 // The driver method, and other Checker callbacks.
1224 //===----------------------------------------------------------------------===//
1225 
1226 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
1227   // Get the callee.  All the functions we care about are C functions
1228   // with simple identifiers.
1229   const GRState *state = C.getState();
1230   const Expr *Callee = CE->getCallee();
1231   const FunctionDecl *FD = state->getSVal(Callee).getAsFunctionDecl();
1232 
1233   if (!FD)
1234     return false;
1235 
1236   // Get the name of the callee. If it's a builtin, strip off the prefix.
1237   IdentifierInfo *II = FD->getIdentifier();
1238   if (!II)   // if no identifier, not a simple C function
1239     return false;
1240   llvm::StringRef Name = II->getName();
1241   if (Name.startswith("__builtin_"))
1242     Name = Name.substr(10);
1243 
1244   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
1245     .Cases("memcpy", "__memcpy_chk", &CStringChecker::evalMemcpy)
1246     .Case("mempcpy", &CStringChecker::evalMempcpy)
1247     .Cases("memcmp", "bcmp", &CStringChecker::evalMemcmp)
1248     .Cases("memmove", "__memmove_chk", &CStringChecker::evalMemmove)
1249     .Cases("strcpy", "__strcpy_chk", &CStringChecker::evalStrcpy)
1250     .Cases("strncpy", "__strncpy_chk", &CStringChecker::evalStrncpy)
1251     .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::evalStpcpy)
1252     .Cases("strcat", "__strcat_chk", &CStringChecker::evalStrcat)
1253     .Cases("strncat", "__strncat_chk", &CStringChecker::evalStrncat)
1254     .Case("strlen", &CStringChecker::evalstrLength)
1255     .Case("strnlen", &CStringChecker::evalstrnLength)
1256     .Case("strcmp", &CStringChecker::evalStrcmp)
1257     .Case("strncmp", &CStringChecker::evalStrncmp)
1258     .Case("strcasecmp", &CStringChecker::evalStrcasecmp)
1259     .Case("bcopy", &CStringChecker::evalBcopy)
1260     .Default(NULL);
1261 
1262   // If the callee isn't a string function, let another checker handle it.
1263   if (!evalFunction)
1264     return false;
1265 
1266   // Check and evaluate the call.
1267   (this->*evalFunction)(C, CE);
1268   return true;
1269 }
1270 
1271 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
1272   // Record string length for char a[] = "abc";
1273   const GRState *state = C.getState();
1274 
1275   for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end();
1276        I != E; ++I) {
1277     const VarDecl *D = dyn_cast<VarDecl>(*I);
1278     if (!D)
1279       continue;
1280 
1281     // FIXME: Handle array fields of structs.
1282     if (!D->getType()->isArrayType())
1283       continue;
1284 
1285     const Expr *Init = D->getInit();
1286     if (!Init)
1287       continue;
1288     if (!isa<StringLiteral>(Init))
1289       continue;
1290 
1291     Loc VarLoc = state->getLValue(D, C.getPredecessor()->getLocationContext());
1292     const MemRegion *MR = VarLoc.getAsRegion();
1293     if (!MR)
1294       continue;
1295 
1296     SVal StrVal = state->getSVal(Init);
1297     assert(StrVal.isValid() && "Initializer string is unknown or undefined");
1298     DefinedOrUnknownSVal strLength
1299       = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal));
1300 
1301     state = state->set<CStringLength>(MR, strLength);
1302   }
1303 
1304   C.addTransition(state);
1305 }
1306 
1307 bool CStringChecker::wantsRegionChangeUpdate(const GRState *state) const {
1308   CStringLength::EntryMap Entries = state->get<CStringLength>();
1309   return !Entries.isEmpty();
1310 }
1311 
1312 const GRState *
1313 CStringChecker::checkRegionChanges(const GRState *state,
1314                                    const MemRegion * const *Begin,
1315                                    const MemRegion * const *End) const {
1316   CStringLength::EntryMap Entries = state->get<CStringLength>();
1317   if (Entries.isEmpty())
1318     return state;
1319 
1320   llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
1321   llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
1322 
1323   // First build sets for the changed regions and their super-regions.
1324   for ( ; Begin != End; ++Begin) {
1325     const MemRegion *MR = *Begin;
1326     Invalidated.insert(MR);
1327 
1328     SuperRegions.insert(MR);
1329     while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
1330       MR = SR->getSuperRegion();
1331       SuperRegions.insert(MR);
1332     }
1333   }
1334 
1335   CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>();
1336 
1337   // Then loop over the entries in the current state.
1338   for (CStringLength::EntryMap::iterator I = Entries.begin(),
1339        E = Entries.end(); I != E; ++I) {
1340     const MemRegion *MR = I.getKey();
1341 
1342     // Is this entry for a super-region of a changed region?
1343     if (SuperRegions.count(MR)) {
1344       Entries = F.remove(Entries, MR);
1345       continue;
1346     }
1347 
1348     // Is this entry for a sub-region of a changed region?
1349     const MemRegion *Super = MR;
1350     while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
1351       Super = SR->getSuperRegion();
1352       if (Invalidated.count(Super)) {
1353         Entries = F.remove(Entries, MR);
1354         break;
1355       }
1356     }
1357   }
1358 
1359   return state->set<CStringLength>(Entries);
1360 }
1361 
1362 void CStringChecker::checkLiveSymbols(const GRState *state,
1363                                       SymbolReaper &SR) const {
1364   // Mark all symbols in our string length map as valid.
1365   CStringLength::EntryMap Entries = state->get<CStringLength>();
1366 
1367   for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end();
1368        I != E; ++I) {
1369     SVal Len = I.getData();
1370     if (SymbolRef Sym = Len.getAsSymbol())
1371       SR.markInUse(Sym);
1372   }
1373 }
1374 
1375 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
1376                                       CheckerContext &C) const {
1377   if (!SR.hasDeadSymbols())
1378     return;
1379 
1380   const GRState *state = C.getState();
1381   CStringLength::EntryMap Entries = state->get<CStringLength>();
1382   if (Entries.isEmpty())
1383     return;
1384 
1385   CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>();
1386   for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end();
1387        I != E; ++I) {
1388     SVal Len = I.getData();
1389     if (SymbolRef Sym = Len.getAsSymbol()) {
1390       if (SR.isDead(Sym))
1391         Entries = F.remove(Entries, I.getKey());
1392     }
1393   }
1394 
1395   state = state->set<CStringLength>(Entries);
1396   C.generateNode(state);
1397 }
1398 
1399 void ento::registerCStringChecker(CheckerManager &mgr) {
1400   mgr.registerChecker<CStringChecker>();
1401 }
1402