xref: /freebsd-src/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp (revision c9ccf3a32da427475985b85d7df023ccfb138c27)
1 // SimpleSValBuilder.cpp - A basic SValBuilder -----------------------*- C++ -*-
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines SimpleSValBuilder, a basic implementation of SValBuilder.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
14 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
15 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
16 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
17 #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
18 
19 using namespace clang;
20 using namespace ento;
21 
22 namespace {
23 class SimpleSValBuilder : public SValBuilder {
24 
25   // With one `simplifySValOnce` call, a compound symbols might collapse to
26   // simpler symbol tree that is still possible to further simplify. Thus, we
27   // do the simplification on a new symbol tree until we reach the simplest
28   // form, i.e. the fixpoint.
29   // Consider the following symbol `(b * b) * b * b` which has this tree:
30   //       *
31   //      / \
32   //     *   b
33   //    /  \
34   //   /    b
35   // (b * b)
36   // Now, if the `b * b == 1` new constraint is added then during the first
37   // iteration we have the following transformations:
38   //       *                  *
39   //      / \                / \
40   //     *   b     -->      b   b
41   //    /  \
42   //   /    b
43   //  1
44   // We need another iteration to reach the final result `1`.
45   SVal simplifyUntilFixpoint(ProgramStateRef State, SVal Val);
46 
47   // Recursively descends into symbolic expressions and replaces symbols
48   // with their known values (in the sense of the getKnownValue() method).
49   // We traverse the symbol tree and query the constraint values for the
50   // sub-trees and if a value is a constant we do the constant folding.
51   SVal simplifySValOnce(ProgramStateRef State, SVal V);
52 
53 public:
54   SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context,
55                     ProgramStateManager &stateMgr)
56       : SValBuilder(alloc, context, stateMgr) {}
57   ~SimpleSValBuilder() override {}
58 
59   SVal evalMinus(NonLoc val) override;
60   SVal evalComplement(NonLoc val) override;
61   SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op,
62                    NonLoc lhs, NonLoc rhs, QualType resultTy) override;
63   SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op,
64                    Loc lhs, Loc rhs, QualType resultTy) override;
65   SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op,
66                    Loc lhs, NonLoc rhs, QualType resultTy) override;
67 
68   /// getKnownValue - evaluates a given SVal. If the SVal has only one possible
69   ///  (integer) value, that value is returned. Otherwise, returns NULL.
70   const llvm::APSInt *getKnownValue(ProgramStateRef state, SVal V) override;
71 
72   SVal simplifySVal(ProgramStateRef State, SVal V) override;
73 
74   SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op,
75                      const llvm::APSInt &RHS, QualType resultTy);
76 };
77 } // end anonymous namespace
78 
79 SValBuilder *ento::createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc,
80                                            ASTContext &context,
81                                            ProgramStateManager &stateMgr) {
82   return new SimpleSValBuilder(alloc, context, stateMgr);
83 }
84 
85 //===----------------------------------------------------------------------===//
86 // Transfer function for unary operators.
87 //===----------------------------------------------------------------------===//
88 
89 SVal SimpleSValBuilder::evalMinus(NonLoc val) {
90   switch (val.getSubKind()) {
91   case nonloc::ConcreteIntKind:
92     return val.castAs<nonloc::ConcreteInt>().evalMinus(*this);
93   default:
94     return UnknownVal();
95   }
96 }
97 
98 SVal SimpleSValBuilder::evalComplement(NonLoc X) {
99   switch (X.getSubKind()) {
100   case nonloc::ConcreteIntKind:
101     return X.castAs<nonloc::ConcreteInt>().evalComplement(*this);
102   default:
103     return UnknownVal();
104   }
105 }
106 
107 //===----------------------------------------------------------------------===//
108 // Transfer function for binary operators.
109 //===----------------------------------------------------------------------===//
110 
111 SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS,
112                                     BinaryOperator::Opcode op,
113                                     const llvm::APSInt &RHS,
114                                     QualType resultTy) {
115   bool isIdempotent = false;
116 
117   // Check for a few special cases with known reductions first.
118   switch (op) {
119   default:
120     // We can't reduce this case; just treat it normally.
121     break;
122   case BO_Mul:
123     // a*0 and a*1
124     if (RHS == 0)
125       return makeIntVal(0, resultTy);
126     else if (RHS == 1)
127       isIdempotent = true;
128     break;
129   case BO_Div:
130     // a/0 and a/1
131     if (RHS == 0)
132       // This is also handled elsewhere.
133       return UndefinedVal();
134     else if (RHS == 1)
135       isIdempotent = true;
136     break;
137   case BO_Rem:
138     // a%0 and a%1
139     if (RHS == 0)
140       // This is also handled elsewhere.
141       return UndefinedVal();
142     else if (RHS == 1)
143       return makeIntVal(0, resultTy);
144     break;
145   case BO_Add:
146   case BO_Sub:
147   case BO_Shl:
148   case BO_Shr:
149   case BO_Xor:
150     // a+0, a-0, a<<0, a>>0, a^0
151     if (RHS == 0)
152       isIdempotent = true;
153     break;
154   case BO_And:
155     // a&0 and a&(~0)
156     if (RHS == 0)
157       return makeIntVal(0, resultTy);
158     else if (RHS.isAllOnes())
159       isIdempotent = true;
160     break;
161   case BO_Or:
162     // a|0 and a|(~0)
163     if (RHS == 0)
164       isIdempotent = true;
165     else if (RHS.isAllOnes()) {
166       const llvm::APSInt &Result = BasicVals.Convert(resultTy, RHS);
167       return nonloc::ConcreteInt(Result);
168     }
169     break;
170   }
171 
172   // Idempotent ops (like a*1) can still change the type of an expression.
173   // Wrap the LHS up in a NonLoc again and let evalCast do the
174   // dirty work.
175   if (isIdempotent)
176     return evalCast(nonloc::SymbolVal(LHS), resultTy, QualType{});
177 
178   // If we reach this point, the expression cannot be simplified.
179   // Make a SymbolVal for the entire expression, after converting the RHS.
180   const llvm::APSInt *ConvertedRHS = &RHS;
181   if (BinaryOperator::isComparisonOp(op)) {
182     // We're looking for a type big enough to compare the symbolic value
183     // with the given constant.
184     // FIXME: This is an approximation of Sema::UsualArithmeticConversions.
185     ASTContext &Ctx = getContext();
186     QualType SymbolType = LHS->getType();
187     uint64_t ValWidth = RHS.getBitWidth();
188     uint64_t TypeWidth = Ctx.getTypeSize(SymbolType);
189 
190     if (ValWidth < TypeWidth) {
191       // If the value is too small, extend it.
192       ConvertedRHS = &BasicVals.Convert(SymbolType, RHS);
193     } else if (ValWidth == TypeWidth) {
194       // If the value is signed but the symbol is unsigned, do the comparison
195       // in unsigned space. [C99 6.3.1.8]
196       // (For the opposite case, the value is already unsigned.)
197       if (RHS.isSigned() && !SymbolType->isSignedIntegerOrEnumerationType())
198         ConvertedRHS = &BasicVals.Convert(SymbolType, RHS);
199     }
200   } else
201     ConvertedRHS = &BasicVals.Convert(resultTy, RHS);
202 
203   return makeNonLoc(LHS, op, *ConvertedRHS, resultTy);
204 }
205 
206 // See if Sym is known to be a relation Rel with Bound.
207 static bool isInRelation(BinaryOperator::Opcode Rel, SymbolRef Sym,
208                          llvm::APSInt Bound, ProgramStateRef State) {
209   SValBuilder &SVB = State->getStateManager().getSValBuilder();
210   SVal Result =
211       SVB.evalBinOpNN(State, Rel, nonloc::SymbolVal(Sym),
212                       nonloc::ConcreteInt(Bound), SVB.getConditionType());
213   if (auto DV = Result.getAs<DefinedSVal>()) {
214     return !State->assume(*DV, false);
215   }
216   return false;
217 }
218 
219 // See if Sym is known to be within [min/4, max/4], where min and max
220 // are the bounds of the symbol's integral type. With such symbols,
221 // some manipulations can be performed without the risk of overflow.
222 // assume() doesn't cause infinite recursion because we should be dealing
223 // with simpler symbols on every recursive call.
224 static bool isWithinConstantOverflowBounds(SymbolRef Sym,
225                                            ProgramStateRef State) {
226   SValBuilder &SVB = State->getStateManager().getSValBuilder();
227   BasicValueFactory &BV = SVB.getBasicValueFactory();
228 
229   QualType T = Sym->getType();
230   assert(T->isSignedIntegerOrEnumerationType() &&
231          "This only works with signed integers!");
232   APSIntType AT = BV.getAPSIntType(T);
233 
234   llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
235   return isInRelation(BO_LE, Sym, Max, State) &&
236          isInRelation(BO_GE, Sym, Min, State);
237 }
238 
239 // Same for the concrete integers: see if I is within [min/4, max/4].
240 static bool isWithinConstantOverflowBounds(llvm::APSInt I) {
241   APSIntType AT(I);
242   assert(!AT.isUnsigned() &&
243          "This only works with signed integers!");
244 
245   llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
246   return (I <= Max) && (I >= -Max);
247 }
248 
249 static std::pair<SymbolRef, llvm::APSInt>
250 decomposeSymbol(SymbolRef Sym, BasicValueFactory &BV) {
251   if (const auto *SymInt = dyn_cast<SymIntExpr>(Sym))
252     if (BinaryOperator::isAdditiveOp(SymInt->getOpcode()))
253       return std::make_pair(SymInt->getLHS(),
254                             (SymInt->getOpcode() == BO_Add) ?
255                             (SymInt->getRHS()) :
256                             (-SymInt->getRHS()));
257 
258   // Fail to decompose: "reduce" the problem to the "$x + 0" case.
259   return std::make_pair(Sym, BV.getValue(0, Sym->getType()));
260 }
261 
262 // Simplify "(LSym + LInt) Op (RSym + RInt)" assuming all values are of the
263 // same signed integral type and no overflows occur (which should be checked
264 // by the caller).
265 static NonLoc doRearrangeUnchecked(ProgramStateRef State,
266                                    BinaryOperator::Opcode Op,
267                                    SymbolRef LSym, llvm::APSInt LInt,
268                                    SymbolRef RSym, llvm::APSInt RInt) {
269   SValBuilder &SVB = State->getStateManager().getSValBuilder();
270   BasicValueFactory &BV = SVB.getBasicValueFactory();
271   SymbolManager &SymMgr = SVB.getSymbolManager();
272 
273   QualType SymTy = LSym->getType();
274   assert(SymTy == RSym->getType() &&
275          "Symbols are not of the same type!");
276   assert(APSIntType(LInt) == BV.getAPSIntType(SymTy) &&
277          "Integers are not of the same type as symbols!");
278   assert(APSIntType(RInt) == BV.getAPSIntType(SymTy) &&
279          "Integers are not of the same type as symbols!");
280 
281   QualType ResultTy;
282   if (BinaryOperator::isComparisonOp(Op))
283     ResultTy = SVB.getConditionType();
284   else if (BinaryOperator::isAdditiveOp(Op))
285     ResultTy = SymTy;
286   else
287     llvm_unreachable("Operation not suitable for unchecked rearrangement!");
288 
289   // FIXME: Can we use assume() without getting into an infinite recursion?
290   if (LSym == RSym)
291     return SVB.evalBinOpNN(State, Op, nonloc::ConcreteInt(LInt),
292                            nonloc::ConcreteInt(RInt), ResultTy)
293         .castAs<NonLoc>();
294 
295   SymbolRef ResultSym = nullptr;
296   BinaryOperator::Opcode ResultOp;
297   llvm::APSInt ResultInt;
298   if (BinaryOperator::isComparisonOp(Op)) {
299     // Prefer comparing to a non-negative number.
300     // FIXME: Maybe it'd be better to have consistency in
301     // "$x - $y" vs. "$y - $x" because those are solver's keys.
302     if (LInt > RInt) {
303       ResultSym = SymMgr.getSymSymExpr(RSym, BO_Sub, LSym, SymTy);
304       ResultOp = BinaryOperator::reverseComparisonOp(Op);
305       ResultInt = LInt - RInt; // Opposite order!
306     } else {
307       ResultSym = SymMgr.getSymSymExpr(LSym, BO_Sub, RSym, SymTy);
308       ResultOp = Op;
309       ResultInt = RInt - LInt; // Opposite order!
310     }
311   } else {
312     ResultSym = SymMgr.getSymSymExpr(LSym, Op, RSym, SymTy);
313     ResultInt = (Op == BO_Add) ? (LInt + RInt) : (LInt - RInt);
314     ResultOp = BO_Add;
315     // Bring back the cosmetic difference.
316     if (ResultInt < 0) {
317       ResultInt = -ResultInt;
318       ResultOp = BO_Sub;
319     } else if (ResultInt == 0) {
320       // Shortcut: Simplify "$x + 0" to "$x".
321       return nonloc::SymbolVal(ResultSym);
322     }
323   }
324   const llvm::APSInt &PersistentResultInt = BV.getValue(ResultInt);
325   return nonloc::SymbolVal(
326       SymMgr.getSymIntExpr(ResultSym, ResultOp, PersistentResultInt, ResultTy));
327 }
328 
329 // Rearrange if symbol type matches the result type and if the operator is a
330 // comparison operator, both symbol and constant must be within constant
331 // overflow bounds.
332 static bool shouldRearrange(ProgramStateRef State, BinaryOperator::Opcode Op,
333                             SymbolRef Sym, llvm::APSInt Int, QualType Ty) {
334   return Sym->getType() == Ty &&
335     (!BinaryOperator::isComparisonOp(Op) ||
336      (isWithinConstantOverflowBounds(Sym, State) &&
337       isWithinConstantOverflowBounds(Int)));
338 }
339 
340 static Optional<NonLoc> tryRearrange(ProgramStateRef State,
341                                      BinaryOperator::Opcode Op, NonLoc Lhs,
342                                      NonLoc Rhs, QualType ResultTy) {
343   ProgramStateManager &StateMgr = State->getStateManager();
344   SValBuilder &SVB = StateMgr.getSValBuilder();
345 
346   // We expect everything to be of the same type - this type.
347   QualType SingleTy;
348 
349   // FIXME: After putting complexity threshold to the symbols we can always
350   //        rearrange additive operations but rearrange comparisons only if
351   //        option is set.
352   if (!SVB.getAnalyzerOptions().ShouldAggressivelySimplifyBinaryOperation)
353     return None;
354 
355   SymbolRef LSym = Lhs.getAsSymbol();
356   if (!LSym)
357     return None;
358 
359   if (BinaryOperator::isComparisonOp(Op)) {
360     SingleTy = LSym->getType();
361     if (ResultTy != SVB.getConditionType())
362       return None;
363     // Initialize SingleTy later with a symbol's type.
364   } else if (BinaryOperator::isAdditiveOp(Op)) {
365     SingleTy = ResultTy;
366     if (LSym->getType() != SingleTy)
367       return None;
368   } else {
369     // Don't rearrange other operations.
370     return None;
371   }
372 
373   assert(!SingleTy.isNull() && "We should have figured out the type by now!");
374 
375   // Rearrange signed symbolic expressions only
376   if (!SingleTy->isSignedIntegerOrEnumerationType())
377     return None;
378 
379   SymbolRef RSym = Rhs.getAsSymbol();
380   if (!RSym || RSym->getType() != SingleTy)
381     return None;
382 
383   BasicValueFactory &BV = State->getBasicVals();
384   llvm::APSInt LInt, RInt;
385   std::tie(LSym, LInt) = decomposeSymbol(LSym, BV);
386   std::tie(RSym, RInt) = decomposeSymbol(RSym, BV);
387   if (!shouldRearrange(State, Op, LSym, LInt, SingleTy) ||
388       !shouldRearrange(State, Op, RSym, RInt, SingleTy))
389     return None;
390 
391   // We know that no overflows can occur anymore.
392   return doRearrangeUnchecked(State, Op, LSym, LInt, RSym, RInt);
393 }
394 
395 SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state,
396                                   BinaryOperator::Opcode op,
397                                   NonLoc lhs, NonLoc rhs,
398                                   QualType resultTy)  {
399   NonLoc InputLHS = lhs;
400   NonLoc InputRHS = rhs;
401 
402   // Constraints may have changed since the creation of a bound SVal. Check if
403   // the values can be simplified based on those new constraints.
404   SVal simplifiedLhs = simplifySVal(state, lhs);
405   SVal simplifiedRhs = simplifySVal(state, rhs);
406   if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>())
407     lhs = *simplifiedLhsAsNonLoc;
408   if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>())
409     rhs = *simplifiedRhsAsNonLoc;
410 
411   // Handle trivial case where left-side and right-side are the same.
412   if (lhs == rhs)
413     switch (op) {
414       default:
415         break;
416       case BO_EQ:
417       case BO_LE:
418       case BO_GE:
419         return makeTruthVal(true, resultTy);
420       case BO_LT:
421       case BO_GT:
422       case BO_NE:
423         return makeTruthVal(false, resultTy);
424       case BO_Xor:
425       case BO_Sub:
426         if (resultTy->isIntegralOrEnumerationType())
427           return makeIntVal(0, resultTy);
428         return evalCast(makeIntVal(0, /*isUnsigned=*/false), resultTy,
429                         QualType{});
430       case BO_Or:
431       case BO_And:
432         return evalCast(lhs, resultTy, QualType{});
433     }
434 
435   while (true) {
436     switch (lhs.getSubKind()) {
437     default:
438       return makeSymExprValNN(op, lhs, rhs, resultTy);
439     case nonloc::PointerToMemberKind: {
440       assert(rhs.getSubKind() == nonloc::PointerToMemberKind &&
441              "Both SVals should have pointer-to-member-type");
442       auto LPTM = lhs.castAs<nonloc::PointerToMember>(),
443            RPTM = rhs.castAs<nonloc::PointerToMember>();
444       auto LPTMD = LPTM.getPTMData(), RPTMD = RPTM.getPTMData();
445       switch (op) {
446         case BO_EQ:
447           return makeTruthVal(LPTMD == RPTMD, resultTy);
448         case BO_NE:
449           return makeTruthVal(LPTMD != RPTMD, resultTy);
450         default:
451           return UnknownVal();
452       }
453     }
454     case nonloc::LocAsIntegerKind: {
455       Loc lhsL = lhs.castAs<nonloc::LocAsInteger>().getLoc();
456       switch (rhs.getSubKind()) {
457         case nonloc::LocAsIntegerKind:
458           // FIXME: at the moment the implementation
459           // of modeling "pointers as integers" is not complete.
460           if (!BinaryOperator::isComparisonOp(op))
461             return UnknownVal();
462           return evalBinOpLL(state, op, lhsL,
463                              rhs.castAs<nonloc::LocAsInteger>().getLoc(),
464                              resultTy);
465         case nonloc::ConcreteIntKind: {
466           // FIXME: at the moment the implementation
467           // of modeling "pointers as integers" is not complete.
468           if (!BinaryOperator::isComparisonOp(op))
469             return UnknownVal();
470           // Transform the integer into a location and compare.
471           // FIXME: This only makes sense for comparisons. If we want to, say,
472           // add 1 to a LocAsInteger, we'd better unpack the Loc and add to it,
473           // then pack it back into a LocAsInteger.
474           llvm::APSInt i = rhs.castAs<nonloc::ConcreteInt>().getValue();
475           // If the region has a symbolic base, pay attention to the type; it
476           // might be coming from a non-default address space. For non-symbolic
477           // regions it doesn't matter that much because such comparisons would
478           // most likely evaluate to concrete false anyway. FIXME: We might
479           // still need to handle the non-comparison case.
480           if (SymbolRef lSym = lhs.getAsLocSymbol(true))
481             BasicVals.getAPSIntType(lSym->getType()).apply(i);
482           else
483             BasicVals.getAPSIntType(Context.VoidPtrTy).apply(i);
484           return evalBinOpLL(state, op, lhsL, makeLoc(i), resultTy);
485         }
486         default:
487           switch (op) {
488             case BO_EQ:
489               return makeTruthVal(false, resultTy);
490             case BO_NE:
491               return makeTruthVal(true, resultTy);
492             default:
493               // This case also handles pointer arithmetic.
494               return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
495           }
496       }
497     }
498     case nonloc::ConcreteIntKind: {
499       llvm::APSInt LHSValue = lhs.castAs<nonloc::ConcreteInt>().getValue();
500 
501       // If we're dealing with two known constants, just perform the operation.
502       if (const llvm::APSInt *KnownRHSValue = getKnownValue(state, rhs)) {
503         llvm::APSInt RHSValue = *KnownRHSValue;
504         if (BinaryOperator::isComparisonOp(op)) {
505           // We're looking for a type big enough to compare the two values.
506           // FIXME: This is not correct. char + short will result in a promotion
507           // to int. Unfortunately we have lost types by this point.
508           APSIntType CompareType = std::max(APSIntType(LHSValue),
509                                             APSIntType(RHSValue));
510           CompareType.apply(LHSValue);
511           CompareType.apply(RHSValue);
512         } else if (!BinaryOperator::isShiftOp(op)) {
513           APSIntType IntType = BasicVals.getAPSIntType(resultTy);
514           IntType.apply(LHSValue);
515           IntType.apply(RHSValue);
516         }
517 
518         const llvm::APSInt *Result =
519           BasicVals.evalAPSInt(op, LHSValue, RHSValue);
520         if (!Result)
521           return UndefinedVal();
522 
523         return nonloc::ConcreteInt(*Result);
524       }
525 
526       // Swap the left and right sides and flip the operator if doing so
527       // allows us to better reason about the expression (this is a form
528       // of expression canonicalization).
529       // While we're at it, catch some special cases for non-commutative ops.
530       switch (op) {
531       case BO_LT:
532       case BO_GT:
533       case BO_LE:
534       case BO_GE:
535         op = BinaryOperator::reverseComparisonOp(op);
536         LLVM_FALLTHROUGH;
537       case BO_EQ:
538       case BO_NE:
539       case BO_Add:
540       case BO_Mul:
541       case BO_And:
542       case BO_Xor:
543       case BO_Or:
544         std::swap(lhs, rhs);
545         continue;
546       case BO_Shr:
547         // (~0)>>a
548         if (LHSValue.isAllOnes() && LHSValue.isSigned())
549           return evalCast(lhs, resultTy, QualType{});
550         LLVM_FALLTHROUGH;
551       case BO_Shl:
552         // 0<<a and 0>>a
553         if (LHSValue == 0)
554           return evalCast(lhs, resultTy, QualType{});
555         return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
556       case BO_Div:
557         // 0 / x == 0
558       case BO_Rem:
559         // 0 % x == 0
560         if (LHSValue == 0)
561           return makeZeroVal(resultTy);
562         LLVM_FALLTHROUGH;
563       default:
564         return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
565       }
566     }
567     case nonloc::SymbolValKind: {
568       // We only handle LHS as simple symbols or SymIntExprs.
569       SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol();
570 
571       // LHS is a symbolic expression.
572       if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) {
573 
574         // Is this a logical not? (!x is represented as x == 0.)
575         if (op == BO_EQ && rhs.isZeroConstant()) {
576           // We know how to negate certain expressions. Simplify them here.
577 
578           BinaryOperator::Opcode opc = symIntExpr->getOpcode();
579           switch (opc) {
580           default:
581             // We don't know how to negate this operation.
582             // Just handle it as if it were a normal comparison to 0.
583             break;
584           case BO_LAnd:
585           case BO_LOr:
586             llvm_unreachable("Logical operators handled by branching logic.");
587           case BO_Assign:
588           case BO_MulAssign:
589           case BO_DivAssign:
590           case BO_RemAssign:
591           case BO_AddAssign:
592           case BO_SubAssign:
593           case BO_ShlAssign:
594           case BO_ShrAssign:
595           case BO_AndAssign:
596           case BO_XorAssign:
597           case BO_OrAssign:
598           case BO_Comma:
599             llvm_unreachable("'=' and ',' operators handled by ExprEngine.");
600           case BO_PtrMemD:
601           case BO_PtrMemI:
602             llvm_unreachable("Pointer arithmetic not handled here.");
603           case BO_LT:
604           case BO_GT:
605           case BO_LE:
606           case BO_GE:
607           case BO_EQ:
608           case BO_NE:
609             assert(resultTy->isBooleanType() ||
610                    resultTy == getConditionType());
611             assert(symIntExpr->getType()->isBooleanType() ||
612                    getContext().hasSameUnqualifiedType(symIntExpr->getType(),
613                                                        getConditionType()));
614             // Negate the comparison and make a value.
615             opc = BinaryOperator::negateComparisonOp(opc);
616             return makeNonLoc(symIntExpr->getLHS(), opc,
617                 symIntExpr->getRHS(), resultTy);
618           }
619         }
620 
621         // For now, only handle expressions whose RHS is a constant.
622         if (const llvm::APSInt *RHSValue = getKnownValue(state, rhs)) {
623           // If both the LHS and the current expression are additive,
624           // fold their constants and try again.
625           if (BinaryOperator::isAdditiveOp(op)) {
626             BinaryOperator::Opcode lop = symIntExpr->getOpcode();
627             if (BinaryOperator::isAdditiveOp(lop)) {
628               // Convert the two constants to a common type, then combine them.
629 
630               // resultTy may not be the best type to convert to, but it's
631               // probably the best choice in expressions with mixed type
632               // (such as x+1U+2LL). The rules for implicit conversions should
633               // choose a reasonable type to preserve the expression, and will
634               // at least match how the value is going to be used.
635               APSIntType IntType = BasicVals.getAPSIntType(resultTy);
636               const llvm::APSInt &first = IntType.convert(symIntExpr->getRHS());
637               const llvm::APSInt &second = IntType.convert(*RHSValue);
638 
639               const llvm::APSInt *newRHS;
640               if (lop == op)
641                 newRHS = BasicVals.evalAPSInt(BO_Add, first, second);
642               else
643                 newRHS = BasicVals.evalAPSInt(BO_Sub, first, second);
644 
645               assert(newRHS && "Invalid operation despite common type!");
646               rhs = nonloc::ConcreteInt(*newRHS);
647               lhs = nonloc::SymbolVal(symIntExpr->getLHS());
648               op = lop;
649               continue;
650             }
651           }
652 
653           // Otherwise, make a SymIntExpr out of the expression.
654           return MakeSymIntVal(symIntExpr, op, *RHSValue, resultTy);
655         }
656       }
657 
658       // Is the RHS a constant?
659       if (const llvm::APSInt *RHSValue = getKnownValue(state, rhs))
660         return MakeSymIntVal(Sym, op, *RHSValue, resultTy);
661 
662       if (Optional<NonLoc> V = tryRearrange(state, op, lhs, rhs, resultTy))
663         return *V;
664 
665       // Give up -- this is not a symbolic expression we can handle.
666       return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
667     }
668     }
669   }
670 }
671 
672 static SVal evalBinOpFieldRegionFieldRegion(const FieldRegion *LeftFR,
673                                             const FieldRegion *RightFR,
674                                             BinaryOperator::Opcode op,
675                                             QualType resultTy,
676                                             SimpleSValBuilder &SVB) {
677   // Only comparisons are meaningful here!
678   if (!BinaryOperator::isComparisonOp(op))
679     return UnknownVal();
680 
681   // Next, see if the two FRs have the same super-region.
682   // FIXME: This doesn't handle casts yet, and simply stripping the casts
683   // doesn't help.
684   if (LeftFR->getSuperRegion() != RightFR->getSuperRegion())
685     return UnknownVal();
686 
687   const FieldDecl *LeftFD = LeftFR->getDecl();
688   const FieldDecl *RightFD = RightFR->getDecl();
689   const RecordDecl *RD = LeftFD->getParent();
690 
691   // Make sure the two FRs are from the same kind of record. Just in case!
692   // FIXME: This is probably where inheritance would be a problem.
693   if (RD != RightFD->getParent())
694     return UnknownVal();
695 
696   // We know for sure that the two fields are not the same, since that
697   // would have given us the same SVal.
698   if (op == BO_EQ)
699     return SVB.makeTruthVal(false, resultTy);
700   if (op == BO_NE)
701     return SVB.makeTruthVal(true, resultTy);
702 
703   // Iterate through the fields and see which one comes first.
704   // [C99 6.7.2.1.13] "Within a structure object, the non-bit-field
705   // members and the units in which bit-fields reside have addresses that
706   // increase in the order in which they are declared."
707   bool leftFirst = (op == BO_LT || op == BO_LE);
708   for (const auto *I : RD->fields()) {
709     if (I == LeftFD)
710       return SVB.makeTruthVal(leftFirst, resultTy);
711     if (I == RightFD)
712       return SVB.makeTruthVal(!leftFirst, resultTy);
713   }
714 
715   llvm_unreachable("Fields not found in parent record's definition");
716 }
717 
718 // FIXME: all this logic will change if/when we have MemRegion::getLocation().
719 SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state,
720                                   BinaryOperator::Opcode op,
721                                   Loc lhs, Loc rhs,
722                                   QualType resultTy) {
723   // Only comparisons and subtractions are valid operations on two pointers.
724   // See [C99 6.5.5 through 6.5.14] or [C++0x 5.6 through 5.15].
725   // However, if a pointer is casted to an integer, evalBinOpNN may end up
726   // calling this function with another operation (PR7527). We don't attempt to
727   // model this for now, but it could be useful, particularly when the
728   // "location" is actually an integer value that's been passed through a void*.
729   if (!(BinaryOperator::isComparisonOp(op) || op == BO_Sub))
730     return UnknownVal();
731 
732   // Special cases for when both sides are identical.
733   if (lhs == rhs) {
734     switch (op) {
735     default:
736       llvm_unreachable("Unimplemented operation for two identical values");
737     case BO_Sub:
738       return makeZeroVal(resultTy);
739     case BO_EQ:
740     case BO_LE:
741     case BO_GE:
742       return makeTruthVal(true, resultTy);
743     case BO_NE:
744     case BO_LT:
745     case BO_GT:
746       return makeTruthVal(false, resultTy);
747     }
748   }
749 
750   switch (lhs.getSubKind()) {
751   default:
752     llvm_unreachable("Ordering not implemented for this Loc.");
753 
754   case loc::GotoLabelKind:
755     // The only thing we know about labels is that they're non-null.
756     if (rhs.isZeroConstant()) {
757       switch (op) {
758       default:
759         break;
760       case BO_Sub:
761         return evalCast(lhs, resultTy, QualType{});
762       case BO_EQ:
763       case BO_LE:
764       case BO_LT:
765         return makeTruthVal(false, resultTy);
766       case BO_NE:
767       case BO_GT:
768       case BO_GE:
769         return makeTruthVal(true, resultTy);
770       }
771     }
772     // There may be two labels for the same location, and a function region may
773     // have the same address as a label at the start of the function (depending
774     // on the ABI).
775     // FIXME: we can probably do a comparison against other MemRegions, though.
776     // FIXME: is there a way to tell if two labels refer to the same location?
777     return UnknownVal();
778 
779   case loc::ConcreteIntKind: {
780     // If one of the operands is a symbol and the other is a constant,
781     // build an expression for use by the constraint manager.
782     if (SymbolRef rSym = rhs.getAsLocSymbol()) {
783       // We can only build expressions with symbols on the left,
784       // so we need a reversible operator.
785       if (!BinaryOperator::isComparisonOp(op) || op == BO_Cmp)
786         return UnknownVal();
787 
788       const llvm::APSInt &lVal = lhs.castAs<loc::ConcreteInt>().getValue();
789       op = BinaryOperator::reverseComparisonOp(op);
790       return makeNonLoc(rSym, op, lVal, resultTy);
791     }
792 
793     // If both operands are constants, just perform the operation.
794     if (Optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
795       SVal ResultVal =
796           lhs.castAs<loc::ConcreteInt>().evalBinOp(BasicVals, op, *rInt);
797       if (Optional<NonLoc> Result = ResultVal.getAs<NonLoc>())
798         return evalCast(*Result, resultTy, QualType{});
799 
800       assert(!ResultVal.getAs<Loc>() && "Loc-Loc ops should not produce Locs");
801       return UnknownVal();
802     }
803 
804     // Special case comparisons against NULL.
805     // This must come after the test if the RHS is a symbol, which is used to
806     // build constraints. The address of any non-symbolic region is guaranteed
807     // to be non-NULL, as is any label.
808     assert(rhs.getAs<loc::MemRegionVal>() || rhs.getAs<loc::GotoLabel>());
809     if (lhs.isZeroConstant()) {
810       switch (op) {
811       default:
812         break;
813       case BO_EQ:
814       case BO_GT:
815       case BO_GE:
816         return makeTruthVal(false, resultTy);
817       case BO_NE:
818       case BO_LT:
819       case BO_LE:
820         return makeTruthVal(true, resultTy);
821       }
822     }
823 
824     // Comparing an arbitrary integer to a region or label address is
825     // completely unknowable.
826     return UnknownVal();
827   }
828   case loc::MemRegionValKind: {
829     if (Optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
830       // If one of the operands is a symbol and the other is a constant,
831       // build an expression for use by the constraint manager.
832       if (SymbolRef lSym = lhs.getAsLocSymbol(true)) {
833         if (BinaryOperator::isComparisonOp(op))
834           return MakeSymIntVal(lSym, op, rInt->getValue(), resultTy);
835         return UnknownVal();
836       }
837       // Special case comparisons to NULL.
838       // This must come after the test if the LHS is a symbol, which is used to
839       // build constraints. The address of any non-symbolic region is guaranteed
840       // to be non-NULL.
841       if (rInt->isZeroConstant()) {
842         if (op == BO_Sub)
843           return evalCast(lhs, resultTy, QualType{});
844 
845         if (BinaryOperator::isComparisonOp(op)) {
846           QualType boolType = getContext().BoolTy;
847           NonLoc l = evalCast(lhs, boolType, QualType{}).castAs<NonLoc>();
848           NonLoc r = makeTruthVal(false, boolType).castAs<NonLoc>();
849           return evalBinOpNN(state, op, l, r, resultTy);
850         }
851       }
852 
853       // Comparing a region to an arbitrary integer is completely unknowable.
854       return UnknownVal();
855     }
856 
857     // Get both values as regions, if possible.
858     const MemRegion *LeftMR = lhs.getAsRegion();
859     assert(LeftMR && "MemRegionValKind SVal doesn't have a region!");
860 
861     const MemRegion *RightMR = rhs.getAsRegion();
862     if (!RightMR)
863       // The RHS is probably a label, which in theory could address a region.
864       // FIXME: we can probably make a more useful statement about non-code
865       // regions, though.
866       return UnknownVal();
867 
868     const MemRegion *LeftBase = LeftMR->getBaseRegion();
869     const MemRegion *RightBase = RightMR->getBaseRegion();
870     const MemSpaceRegion *LeftMS = LeftBase->getMemorySpace();
871     const MemSpaceRegion *RightMS = RightBase->getMemorySpace();
872     const MemSpaceRegion *UnknownMS = MemMgr.getUnknownRegion();
873 
874     // If the two regions are from different known memory spaces they cannot be
875     // equal. Also, assume that no symbolic region (whose memory space is
876     // unknown) is on the stack.
877     if (LeftMS != RightMS &&
878         ((LeftMS != UnknownMS && RightMS != UnknownMS) ||
879          (isa<StackSpaceRegion>(LeftMS) || isa<StackSpaceRegion>(RightMS)))) {
880       switch (op) {
881       default:
882         return UnknownVal();
883       case BO_EQ:
884         return makeTruthVal(false, resultTy);
885       case BO_NE:
886         return makeTruthVal(true, resultTy);
887       }
888     }
889 
890     // If both values wrap regions, see if they're from different base regions.
891     // Note, heap base symbolic regions are assumed to not alias with
892     // each other; for example, we assume that malloc returns different address
893     // on each invocation.
894     // FIXME: ObjC object pointers always reside on the heap, but currently
895     // we treat their memory space as unknown, because symbolic pointers
896     // to ObjC objects may alias. There should be a way to construct
897     // possibly-aliasing heap-based regions. For instance, MacOSXApiChecker
898     // guesses memory space for ObjC object pointers manually instead of
899     // relying on us.
900     if (LeftBase != RightBase &&
901         ((!isa<SymbolicRegion>(LeftBase) && !isa<SymbolicRegion>(RightBase)) ||
902          (isa<HeapSpaceRegion>(LeftMS) || isa<HeapSpaceRegion>(RightMS))) ){
903       switch (op) {
904       default:
905         return UnknownVal();
906       case BO_EQ:
907         return makeTruthVal(false, resultTy);
908       case BO_NE:
909         return makeTruthVal(true, resultTy);
910       }
911     }
912 
913     // Handle special cases for when both regions are element regions.
914     const ElementRegion *RightER = dyn_cast<ElementRegion>(RightMR);
915     const ElementRegion *LeftER = dyn_cast<ElementRegion>(LeftMR);
916     if (RightER && LeftER) {
917       // Next, see if the two ERs have the same super-region and matching types.
918       // FIXME: This should do something useful even if the types don't match,
919       // though if both indexes are constant the RegionRawOffset path will
920       // give the correct answer.
921       if (LeftER->getSuperRegion() == RightER->getSuperRegion() &&
922           LeftER->getElementType() == RightER->getElementType()) {
923         // Get the left index and cast it to the correct type.
924         // If the index is unknown or undefined, bail out here.
925         SVal LeftIndexVal = LeftER->getIndex();
926         Optional<NonLoc> LeftIndex = LeftIndexVal.getAs<NonLoc>();
927         if (!LeftIndex)
928           return UnknownVal();
929         LeftIndexVal = evalCast(*LeftIndex, ArrayIndexTy, QualType{});
930         LeftIndex = LeftIndexVal.getAs<NonLoc>();
931         if (!LeftIndex)
932           return UnknownVal();
933 
934         // Do the same for the right index.
935         SVal RightIndexVal = RightER->getIndex();
936         Optional<NonLoc> RightIndex = RightIndexVal.getAs<NonLoc>();
937         if (!RightIndex)
938           return UnknownVal();
939         RightIndexVal = evalCast(*RightIndex, ArrayIndexTy, QualType{});
940         RightIndex = RightIndexVal.getAs<NonLoc>();
941         if (!RightIndex)
942           return UnknownVal();
943 
944         // Actually perform the operation.
945         // evalBinOpNN expects the two indexes to already be the right type.
946         return evalBinOpNN(state, op, *LeftIndex, *RightIndex, resultTy);
947       }
948     }
949 
950     // Special handling of the FieldRegions, even with symbolic offsets.
951     const FieldRegion *RightFR = dyn_cast<FieldRegion>(RightMR);
952     const FieldRegion *LeftFR = dyn_cast<FieldRegion>(LeftMR);
953     if (RightFR && LeftFR) {
954       SVal R = evalBinOpFieldRegionFieldRegion(LeftFR, RightFR, op, resultTy,
955                                                *this);
956       if (!R.isUnknown())
957         return R;
958     }
959 
960     // Compare the regions using the raw offsets.
961     RegionOffset LeftOffset = LeftMR->getAsOffset();
962     RegionOffset RightOffset = RightMR->getAsOffset();
963 
964     if (LeftOffset.getRegion() != nullptr &&
965         LeftOffset.getRegion() == RightOffset.getRegion() &&
966         !LeftOffset.hasSymbolicOffset() && !RightOffset.hasSymbolicOffset()) {
967       int64_t left = LeftOffset.getOffset();
968       int64_t right = RightOffset.getOffset();
969 
970       switch (op) {
971         default:
972           return UnknownVal();
973         case BO_LT:
974           return makeTruthVal(left < right, resultTy);
975         case BO_GT:
976           return makeTruthVal(left > right, resultTy);
977         case BO_LE:
978           return makeTruthVal(left <= right, resultTy);
979         case BO_GE:
980           return makeTruthVal(left >= right, resultTy);
981         case BO_EQ:
982           return makeTruthVal(left == right, resultTy);
983         case BO_NE:
984           return makeTruthVal(left != right, resultTy);
985       }
986     }
987 
988     // At this point we're not going to get a good answer, but we can try
989     // conjuring an expression instead.
990     SymbolRef LHSSym = lhs.getAsLocSymbol();
991     SymbolRef RHSSym = rhs.getAsLocSymbol();
992     if (LHSSym && RHSSym)
993       return makeNonLoc(LHSSym, op, RHSSym, resultTy);
994 
995     // If we get here, we have no way of comparing the regions.
996     return UnknownVal();
997   }
998   }
999 }
1000 
1001 SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state,
1002                                     BinaryOperator::Opcode op, Loc lhs,
1003                                     NonLoc rhs, QualType resultTy) {
1004   if (op >= BO_PtrMemD && op <= BO_PtrMemI) {
1005     if (auto PTMSV = rhs.getAs<nonloc::PointerToMember>()) {
1006       if (PTMSV->isNullMemberPointer())
1007         return UndefinedVal();
1008 
1009       auto getFieldLValue = [&](const auto *FD) -> SVal {
1010         SVal Result = lhs;
1011 
1012         for (const auto &I : *PTMSV)
1013           Result = StateMgr.getStoreManager().evalDerivedToBase(
1014               Result, I->getType(), I->isVirtual());
1015 
1016         return state->getLValue(FD, Result);
1017       };
1018 
1019       if (const auto *FD = PTMSV->getDeclAs<FieldDecl>()) {
1020         return getFieldLValue(FD);
1021       }
1022       if (const auto *FD = PTMSV->getDeclAs<IndirectFieldDecl>()) {
1023         return getFieldLValue(FD);
1024       }
1025     }
1026 
1027     return rhs;
1028   }
1029 
1030   assert(!BinaryOperator::isComparisonOp(op) &&
1031          "arguments to comparison ops must be of the same type");
1032 
1033   // Special case: rhs is a zero constant.
1034   if (rhs.isZeroConstant())
1035     return lhs;
1036 
1037   // Perserve the null pointer so that it can be found by the DerefChecker.
1038   if (lhs.isZeroConstant())
1039     return lhs;
1040 
1041   // We are dealing with pointer arithmetic.
1042 
1043   // Handle pointer arithmetic on constant values.
1044   if (Optional<nonloc::ConcreteInt> rhsInt = rhs.getAs<nonloc::ConcreteInt>()) {
1045     if (Optional<loc::ConcreteInt> lhsInt = lhs.getAs<loc::ConcreteInt>()) {
1046       const llvm::APSInt &leftI = lhsInt->getValue();
1047       assert(leftI.isUnsigned());
1048       llvm::APSInt rightI(rhsInt->getValue(), /* isUnsigned */ true);
1049 
1050       // Convert the bitwidth of rightI.  This should deal with overflow
1051       // since we are dealing with concrete values.
1052       rightI = rightI.extOrTrunc(leftI.getBitWidth());
1053 
1054       // Offset the increment by the pointer size.
1055       llvm::APSInt Multiplicand(rightI.getBitWidth(), /* isUnsigned */ true);
1056       QualType pointeeType = resultTy->getPointeeType();
1057       Multiplicand = getContext().getTypeSizeInChars(pointeeType).getQuantity();
1058       rightI *= Multiplicand;
1059 
1060       // Compute the adjusted pointer.
1061       switch (op) {
1062         case BO_Add:
1063           rightI = leftI + rightI;
1064           break;
1065         case BO_Sub:
1066           rightI = leftI - rightI;
1067           break;
1068         default:
1069           llvm_unreachable("Invalid pointer arithmetic operation");
1070       }
1071       return loc::ConcreteInt(getBasicValueFactory().getValue(rightI));
1072     }
1073   }
1074 
1075   // Handle cases where 'lhs' is a region.
1076   if (const MemRegion *region = lhs.getAsRegion()) {
1077     rhs = convertToArrayIndex(rhs).castAs<NonLoc>();
1078     SVal index = UnknownVal();
1079     const SubRegion *superR = nullptr;
1080     // We need to know the type of the pointer in order to add an integer to it.
1081     // Depending on the type, different amount of bytes is added.
1082     QualType elementType;
1083 
1084     if (const ElementRegion *elemReg = dyn_cast<ElementRegion>(region)) {
1085       assert(op == BO_Add || op == BO_Sub);
1086       index = evalBinOpNN(state, op, elemReg->getIndex(), rhs,
1087                           getArrayIndexType());
1088       superR = cast<SubRegion>(elemReg->getSuperRegion());
1089       elementType = elemReg->getElementType();
1090     }
1091     else if (isa<SubRegion>(region)) {
1092       assert(op == BO_Add || op == BO_Sub);
1093       index = (op == BO_Add) ? rhs : evalMinus(rhs);
1094       superR = cast<SubRegion>(region);
1095       // TODO: Is this actually reliable? Maybe improving our MemRegion
1096       // hierarchy to provide typed regions for all non-void pointers would be
1097       // better. For instance, we cannot extend this towards LocAsInteger
1098       // operations, where result type of the expression is integer.
1099       if (resultTy->isAnyPointerType())
1100         elementType = resultTy->getPointeeType();
1101     }
1102 
1103     // Represent arithmetic on void pointers as arithmetic on char pointers.
1104     // It is fine when a TypedValueRegion of char value type represents
1105     // a void pointer. Note that arithmetic on void pointers is a GCC extension.
1106     if (elementType->isVoidType())
1107       elementType = getContext().CharTy;
1108 
1109     if (Optional<NonLoc> indexV = index.getAs<NonLoc>()) {
1110       return loc::MemRegionVal(MemMgr.getElementRegion(elementType, *indexV,
1111                                                        superR, getContext()));
1112     }
1113   }
1114   return UnknownVal();
1115 }
1116 
1117 const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state,
1118                                                    SVal V) {
1119   V = simplifySVal(state, V);
1120   if (V.isUnknownOrUndef())
1121     return nullptr;
1122 
1123   if (Optional<loc::ConcreteInt> X = V.getAs<loc::ConcreteInt>())
1124     return &X->getValue();
1125 
1126   if (Optional<nonloc::ConcreteInt> X = V.getAs<nonloc::ConcreteInt>())
1127     return &X->getValue();
1128 
1129   if (SymbolRef Sym = V.getAsSymbol())
1130     return state->getConstraintManager().getSymVal(state, Sym);
1131 
1132   return nullptr;
1133 }
1134 
1135 SVal SimpleSValBuilder::simplifyUntilFixpoint(ProgramStateRef State, SVal Val) {
1136   SVal SimplifiedVal = simplifySValOnce(State, Val);
1137   while (SimplifiedVal != Val) {
1138     Val = SimplifiedVal;
1139     SimplifiedVal = simplifySValOnce(State, Val);
1140   }
1141   return SimplifiedVal;
1142 }
1143 
1144 SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) {
1145   return simplifyUntilFixpoint(State, V);
1146 }
1147 
1148 SVal SimpleSValBuilder::simplifySValOnce(ProgramStateRef State, SVal V) {
1149   // For now, this function tries to constant-fold symbols inside a
1150   // nonloc::SymbolVal, and does nothing else. More simplifications should
1151   // be possible, such as constant-folding an index in an ElementRegion.
1152 
1153   class Simplifier : public FullSValVisitor<Simplifier, SVal> {
1154     ProgramStateRef State;
1155     SValBuilder &SVB;
1156 
1157     // Cache results for the lifetime of the Simplifier. Results change every
1158     // time new constraints are added to the program state, which is the whole
1159     // point of simplifying, and for that very reason it's pointless to maintain
1160     // the same cache for the duration of the whole analysis.
1161     llvm::DenseMap<SymbolRef, SVal> Cached;
1162 
1163     static bool isUnchanged(SymbolRef Sym, SVal Val) {
1164       return Sym == Val.getAsSymbol();
1165     }
1166 
1167     SVal cache(SymbolRef Sym, SVal V) {
1168       Cached[Sym] = V;
1169       return V;
1170     }
1171 
1172     SVal skip(SymbolRef Sym) {
1173       return cache(Sym, SVB.makeSymbolVal(Sym));
1174     }
1175 
1176     // Return the known const value for the Sym if available, or return Undef
1177     // otherwise.
1178     SVal getConst(SymbolRef Sym) {
1179       const llvm::APSInt *Const =
1180           State->getConstraintManager().getSymVal(State, Sym);
1181       if (Const)
1182         return Loc::isLocType(Sym->getType()) ? (SVal)SVB.makeIntLocVal(*Const)
1183                                               : (SVal)SVB.makeIntVal(*Const);
1184       return UndefinedVal();
1185     }
1186 
1187     SVal getConstOrVisit(SymbolRef Sym) {
1188       const SVal Ret = getConst(Sym);
1189       if (Ret.isUndef())
1190         return Visit(Sym);
1191       return Ret;
1192     }
1193 
1194   public:
1195     Simplifier(ProgramStateRef State)
1196         : State(State), SVB(State->getStateManager().getSValBuilder()) {}
1197 
1198     SVal VisitSymbolData(const SymbolData *S) {
1199       // No cache here.
1200       if (const llvm::APSInt *I =
1201               SVB.getKnownValue(State, SVB.makeSymbolVal(S)))
1202         return Loc::isLocType(S->getType()) ? (SVal)SVB.makeIntLocVal(*I)
1203                                             : (SVal)SVB.makeIntVal(*I);
1204       return SVB.makeSymbolVal(S);
1205     }
1206 
1207     // TODO: Support SymbolCast.
1208 
1209     SVal VisitSymIntExpr(const SymIntExpr *S) {
1210       auto I = Cached.find(S);
1211       if (I != Cached.end())
1212         return I->second;
1213 
1214       SVal LHS = getConstOrVisit(S->getLHS());
1215       if (isUnchanged(S->getLHS(), LHS))
1216         return skip(S);
1217 
1218       SVal RHS;
1219       // By looking at the APSInt in the right-hand side of S, we cannot
1220       // figure out if it should be treated as a Loc or as a NonLoc.
1221       // So make our guess by recalling that we cannot multiply pointers
1222       // or compare a pointer to an integer.
1223       if (Loc::isLocType(S->getLHS()->getType()) &&
1224           BinaryOperator::isComparisonOp(S->getOpcode())) {
1225         // The usual conversion of $sym to &SymRegion{$sym}, as they have
1226         // the same meaning for Loc-type symbols, but the latter form
1227         // is preferred in SVal computations for being Loc itself.
1228         if (SymbolRef Sym = LHS.getAsSymbol()) {
1229           assert(Loc::isLocType(Sym->getType()));
1230           LHS = SVB.makeLoc(Sym);
1231         }
1232         RHS = SVB.makeIntLocVal(S->getRHS());
1233       } else {
1234         RHS = SVB.makeIntVal(S->getRHS());
1235       }
1236 
1237       return cache(
1238           S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1239     }
1240 
1241     SVal VisitIntSymExpr(const IntSymExpr *S) {
1242       auto I = Cached.find(S);
1243       if (I != Cached.end())
1244         return I->second;
1245 
1246       SVal RHS = getConstOrVisit(S->getRHS());
1247       if (isUnchanged(S->getRHS(), RHS))
1248         return skip(S);
1249 
1250       SVal LHS = SVB.makeIntVal(S->getLHS());
1251       return cache(
1252           S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1253     }
1254 
1255     SVal VisitSymSymExpr(const SymSymExpr *S) {
1256       auto I = Cached.find(S);
1257       if (I != Cached.end())
1258         return I->second;
1259 
1260       // For now don't try to simplify mixed Loc/NonLoc expressions
1261       // because they often appear from LocAsInteger operations
1262       // and we don't know how to combine a LocAsInteger
1263       // with a concrete value.
1264       if (Loc::isLocType(S->getLHS()->getType()) !=
1265           Loc::isLocType(S->getRHS()->getType()))
1266         return skip(S);
1267 
1268       SVal LHS = getConstOrVisit(S->getLHS());
1269       SVal RHS = getConstOrVisit(S->getRHS());
1270 
1271       if (isUnchanged(S->getLHS(), LHS) && isUnchanged(S->getRHS(), RHS))
1272         return skip(S);
1273 
1274       return cache(
1275           S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1276     }
1277 
1278     SVal VisitSymExpr(SymbolRef S) { return nonloc::SymbolVal(S); }
1279 
1280     SVal VisitMemRegion(const MemRegion *R) { return loc::MemRegionVal(R); }
1281 
1282     SVal VisitNonLocSymbolVal(nonloc::SymbolVal V) {
1283       // Simplification is much more costly than computing complexity.
1284       // For high complexity, it may be not worth it.
1285       return Visit(V.getSymbol());
1286     }
1287 
1288     SVal VisitSVal(SVal V) { return V; }
1289   };
1290 
1291   // A crude way of preventing this function from calling itself from evalBinOp.
1292   static bool isReentering = false;
1293   if (isReentering)
1294     return V;
1295 
1296   isReentering = true;
1297   SVal SimplifiedV = Simplifier(State).Visit(V);
1298   isReentering = false;
1299 
1300   return SimplifiedV;
1301 }
1302