1 // SimpleSValBuilder.cpp - A basic SValBuilder -----------------------*- C++ -*- 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines SimpleSValBuilder, a basic implementation of SValBuilder. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" 14 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" 15 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" 16 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" 17 #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h" 18 19 using namespace clang; 20 using namespace ento; 21 22 namespace { 23 class SimpleSValBuilder : public SValBuilder { 24 25 // With one `simplifySValOnce` call, a compound symbols might collapse to 26 // simpler symbol tree that is still possible to further simplify. Thus, we 27 // do the simplification on a new symbol tree until we reach the simplest 28 // form, i.e. the fixpoint. 29 // Consider the following symbol `(b * b) * b * b` which has this tree: 30 // * 31 // / \ 32 // * b 33 // / \ 34 // / b 35 // (b * b) 36 // Now, if the `b * b == 1` new constraint is added then during the first 37 // iteration we have the following transformations: 38 // * * 39 // / \ / \ 40 // * b --> b b 41 // / \ 42 // / b 43 // 1 44 // We need another iteration to reach the final result `1`. 45 SVal simplifyUntilFixpoint(ProgramStateRef State, SVal Val); 46 47 // Recursively descends into symbolic expressions and replaces symbols 48 // with their known values (in the sense of the getKnownValue() method). 49 // We traverse the symbol tree and query the constraint values for the 50 // sub-trees and if a value is a constant we do the constant folding. 51 SVal simplifySValOnce(ProgramStateRef State, SVal V); 52 53 public: 54 SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context, 55 ProgramStateManager &stateMgr) 56 : SValBuilder(alloc, context, stateMgr) {} 57 ~SimpleSValBuilder() override {} 58 59 SVal evalMinus(NonLoc val) override; 60 SVal evalComplement(NonLoc val) override; 61 SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, 62 NonLoc lhs, NonLoc rhs, QualType resultTy) override; 63 SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op, 64 Loc lhs, Loc rhs, QualType resultTy) override; 65 SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op, 66 Loc lhs, NonLoc rhs, QualType resultTy) override; 67 68 /// getKnownValue - evaluates a given SVal. If the SVal has only one possible 69 /// (integer) value, that value is returned. Otherwise, returns NULL. 70 const llvm::APSInt *getKnownValue(ProgramStateRef state, SVal V) override; 71 72 SVal simplifySVal(ProgramStateRef State, SVal V) override; 73 74 SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op, 75 const llvm::APSInt &RHS, QualType resultTy); 76 }; 77 } // end anonymous namespace 78 79 SValBuilder *ento::createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc, 80 ASTContext &context, 81 ProgramStateManager &stateMgr) { 82 return new SimpleSValBuilder(alloc, context, stateMgr); 83 } 84 85 //===----------------------------------------------------------------------===// 86 // Transfer function for unary operators. 87 //===----------------------------------------------------------------------===// 88 89 SVal SimpleSValBuilder::evalMinus(NonLoc val) { 90 switch (val.getSubKind()) { 91 case nonloc::ConcreteIntKind: 92 return val.castAs<nonloc::ConcreteInt>().evalMinus(*this); 93 default: 94 return UnknownVal(); 95 } 96 } 97 98 SVal SimpleSValBuilder::evalComplement(NonLoc X) { 99 switch (X.getSubKind()) { 100 case nonloc::ConcreteIntKind: 101 return X.castAs<nonloc::ConcreteInt>().evalComplement(*this); 102 default: 103 return UnknownVal(); 104 } 105 } 106 107 // Checks if the negation the value and flipping sign preserve 108 // the semantics on the operation in the resultType 109 static bool isNegationValuePreserving(const llvm::APSInt &Value, 110 APSIntType ResultType) { 111 const unsigned ValueBits = Value.getSignificantBits(); 112 if (ValueBits == ResultType.getBitWidth()) { 113 // The value is the lowest negative value that is representable 114 // in signed integer with bitWith of result type. The 115 // negation is representable if resultType is unsigned. 116 return ResultType.isUnsigned(); 117 } 118 119 // If resultType bitWith is higher that number of bits required 120 // to represent RHS, the sign flip produce same value. 121 return ValueBits < ResultType.getBitWidth(); 122 } 123 124 //===----------------------------------------------------------------------===// 125 // Transfer function for binary operators. 126 //===----------------------------------------------------------------------===// 127 128 SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS, 129 BinaryOperator::Opcode op, 130 const llvm::APSInt &RHS, 131 QualType resultTy) { 132 bool isIdempotent = false; 133 134 // Check for a few special cases with known reductions first. 135 switch (op) { 136 default: 137 // We can't reduce this case; just treat it normally. 138 break; 139 case BO_Mul: 140 // a*0 and a*1 141 if (RHS == 0) 142 return makeIntVal(0, resultTy); 143 else if (RHS == 1) 144 isIdempotent = true; 145 break; 146 case BO_Div: 147 // a/0 and a/1 148 if (RHS == 0) 149 // This is also handled elsewhere. 150 return UndefinedVal(); 151 else if (RHS == 1) 152 isIdempotent = true; 153 break; 154 case BO_Rem: 155 // a%0 and a%1 156 if (RHS == 0) 157 // This is also handled elsewhere. 158 return UndefinedVal(); 159 else if (RHS == 1) 160 return makeIntVal(0, resultTy); 161 break; 162 case BO_Add: 163 case BO_Sub: 164 case BO_Shl: 165 case BO_Shr: 166 case BO_Xor: 167 // a+0, a-0, a<<0, a>>0, a^0 168 if (RHS == 0) 169 isIdempotent = true; 170 break; 171 case BO_And: 172 // a&0 and a&(~0) 173 if (RHS == 0) 174 return makeIntVal(0, resultTy); 175 else if (RHS.isAllOnes()) 176 isIdempotent = true; 177 break; 178 case BO_Or: 179 // a|0 and a|(~0) 180 if (RHS == 0) 181 isIdempotent = true; 182 else if (RHS.isAllOnes()) { 183 const llvm::APSInt &Result = BasicVals.Convert(resultTy, RHS); 184 return nonloc::ConcreteInt(Result); 185 } 186 break; 187 } 188 189 // Idempotent ops (like a*1) can still change the type of an expression. 190 // Wrap the LHS up in a NonLoc again and let evalCast do the 191 // dirty work. 192 if (isIdempotent) 193 return evalCast(nonloc::SymbolVal(LHS), resultTy, QualType{}); 194 195 // If we reach this point, the expression cannot be simplified. 196 // Make a SymbolVal for the entire expression, after converting the RHS. 197 const llvm::APSInt *ConvertedRHS = &RHS; 198 if (BinaryOperator::isComparisonOp(op)) { 199 // We're looking for a type big enough to compare the symbolic value 200 // with the given constant. 201 // FIXME: This is an approximation of Sema::UsualArithmeticConversions. 202 ASTContext &Ctx = getContext(); 203 QualType SymbolType = LHS->getType(); 204 uint64_t ValWidth = RHS.getBitWidth(); 205 uint64_t TypeWidth = Ctx.getTypeSize(SymbolType); 206 207 if (ValWidth < TypeWidth) { 208 // If the value is too small, extend it. 209 ConvertedRHS = &BasicVals.Convert(SymbolType, RHS); 210 } else if (ValWidth == TypeWidth) { 211 // If the value is signed but the symbol is unsigned, do the comparison 212 // in unsigned space. [C99 6.3.1.8] 213 // (For the opposite case, the value is already unsigned.) 214 if (RHS.isSigned() && !SymbolType->isSignedIntegerOrEnumerationType()) 215 ConvertedRHS = &BasicVals.Convert(SymbolType, RHS); 216 } 217 } else if (BinaryOperator::isAdditiveOp(op) && RHS.isNegative()) { 218 // Change a+(-N) into a-N, and a-(-N) into a+N 219 // Adjust addition/subtraction of negative value, to 220 // subtraction/addition of the negated value. 221 APSIntType resultIntTy = BasicVals.getAPSIntType(resultTy); 222 if (isNegationValuePreserving(RHS, resultIntTy)) { 223 ConvertedRHS = &BasicVals.getValue(-resultIntTy.convert(RHS)); 224 op = (op == BO_Add) ? BO_Sub : BO_Add; 225 } else { 226 ConvertedRHS = &BasicVals.Convert(resultTy, RHS); 227 } 228 } else 229 ConvertedRHS = &BasicVals.Convert(resultTy, RHS); 230 231 return makeNonLoc(LHS, op, *ConvertedRHS, resultTy); 232 } 233 234 // See if Sym is known to be a relation Rel with Bound. 235 static bool isInRelation(BinaryOperator::Opcode Rel, SymbolRef Sym, 236 llvm::APSInt Bound, ProgramStateRef State) { 237 SValBuilder &SVB = State->getStateManager().getSValBuilder(); 238 SVal Result = 239 SVB.evalBinOpNN(State, Rel, nonloc::SymbolVal(Sym), 240 nonloc::ConcreteInt(Bound), SVB.getConditionType()); 241 if (auto DV = Result.getAs<DefinedSVal>()) { 242 return !State->assume(*DV, false); 243 } 244 return false; 245 } 246 247 // See if Sym is known to be within [min/4, max/4], where min and max 248 // are the bounds of the symbol's integral type. With such symbols, 249 // some manipulations can be performed without the risk of overflow. 250 // assume() doesn't cause infinite recursion because we should be dealing 251 // with simpler symbols on every recursive call. 252 static bool isWithinConstantOverflowBounds(SymbolRef Sym, 253 ProgramStateRef State) { 254 SValBuilder &SVB = State->getStateManager().getSValBuilder(); 255 BasicValueFactory &BV = SVB.getBasicValueFactory(); 256 257 QualType T = Sym->getType(); 258 assert(T->isSignedIntegerOrEnumerationType() && 259 "This only works with signed integers!"); 260 APSIntType AT = BV.getAPSIntType(T); 261 262 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max; 263 return isInRelation(BO_LE, Sym, Max, State) && 264 isInRelation(BO_GE, Sym, Min, State); 265 } 266 267 // Same for the concrete integers: see if I is within [min/4, max/4]. 268 static bool isWithinConstantOverflowBounds(llvm::APSInt I) { 269 APSIntType AT(I); 270 assert(!AT.isUnsigned() && 271 "This only works with signed integers!"); 272 273 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max; 274 return (I <= Max) && (I >= -Max); 275 } 276 277 static std::pair<SymbolRef, llvm::APSInt> 278 decomposeSymbol(SymbolRef Sym, BasicValueFactory &BV) { 279 if (const auto *SymInt = dyn_cast<SymIntExpr>(Sym)) 280 if (BinaryOperator::isAdditiveOp(SymInt->getOpcode())) 281 return std::make_pair(SymInt->getLHS(), 282 (SymInt->getOpcode() == BO_Add) ? 283 (SymInt->getRHS()) : 284 (-SymInt->getRHS())); 285 286 // Fail to decompose: "reduce" the problem to the "$x + 0" case. 287 return std::make_pair(Sym, BV.getValue(0, Sym->getType())); 288 } 289 290 // Simplify "(LSym + LInt) Op (RSym + RInt)" assuming all values are of the 291 // same signed integral type and no overflows occur (which should be checked 292 // by the caller). 293 static NonLoc doRearrangeUnchecked(ProgramStateRef State, 294 BinaryOperator::Opcode Op, 295 SymbolRef LSym, llvm::APSInt LInt, 296 SymbolRef RSym, llvm::APSInt RInt) { 297 SValBuilder &SVB = State->getStateManager().getSValBuilder(); 298 BasicValueFactory &BV = SVB.getBasicValueFactory(); 299 SymbolManager &SymMgr = SVB.getSymbolManager(); 300 301 QualType SymTy = LSym->getType(); 302 assert(SymTy == RSym->getType() && 303 "Symbols are not of the same type!"); 304 assert(APSIntType(LInt) == BV.getAPSIntType(SymTy) && 305 "Integers are not of the same type as symbols!"); 306 assert(APSIntType(RInt) == BV.getAPSIntType(SymTy) && 307 "Integers are not of the same type as symbols!"); 308 309 QualType ResultTy; 310 if (BinaryOperator::isComparisonOp(Op)) 311 ResultTy = SVB.getConditionType(); 312 else if (BinaryOperator::isAdditiveOp(Op)) 313 ResultTy = SymTy; 314 else 315 llvm_unreachable("Operation not suitable for unchecked rearrangement!"); 316 317 // FIXME: Can we use assume() without getting into an infinite recursion? 318 if (LSym == RSym) 319 return SVB.evalBinOpNN(State, Op, nonloc::ConcreteInt(LInt), 320 nonloc::ConcreteInt(RInt), ResultTy) 321 .castAs<NonLoc>(); 322 323 SymbolRef ResultSym = nullptr; 324 BinaryOperator::Opcode ResultOp; 325 llvm::APSInt ResultInt; 326 if (BinaryOperator::isComparisonOp(Op)) { 327 // Prefer comparing to a non-negative number. 328 // FIXME: Maybe it'd be better to have consistency in 329 // "$x - $y" vs. "$y - $x" because those are solver's keys. 330 if (LInt > RInt) { 331 ResultSym = SymMgr.getSymSymExpr(RSym, BO_Sub, LSym, SymTy); 332 ResultOp = BinaryOperator::reverseComparisonOp(Op); 333 ResultInt = LInt - RInt; // Opposite order! 334 } else { 335 ResultSym = SymMgr.getSymSymExpr(LSym, BO_Sub, RSym, SymTy); 336 ResultOp = Op; 337 ResultInt = RInt - LInt; // Opposite order! 338 } 339 } else { 340 ResultSym = SymMgr.getSymSymExpr(LSym, Op, RSym, SymTy); 341 ResultInt = (Op == BO_Add) ? (LInt + RInt) : (LInt - RInt); 342 ResultOp = BO_Add; 343 // Bring back the cosmetic difference. 344 if (ResultInt < 0) { 345 ResultInt = -ResultInt; 346 ResultOp = BO_Sub; 347 } else if (ResultInt == 0) { 348 // Shortcut: Simplify "$x + 0" to "$x". 349 return nonloc::SymbolVal(ResultSym); 350 } 351 } 352 const llvm::APSInt &PersistentResultInt = BV.getValue(ResultInt); 353 return nonloc::SymbolVal( 354 SymMgr.getSymIntExpr(ResultSym, ResultOp, PersistentResultInt, ResultTy)); 355 } 356 357 // Rearrange if symbol type matches the result type and if the operator is a 358 // comparison operator, both symbol and constant must be within constant 359 // overflow bounds. 360 static bool shouldRearrange(ProgramStateRef State, BinaryOperator::Opcode Op, 361 SymbolRef Sym, llvm::APSInt Int, QualType Ty) { 362 return Sym->getType() == Ty && 363 (!BinaryOperator::isComparisonOp(Op) || 364 (isWithinConstantOverflowBounds(Sym, State) && 365 isWithinConstantOverflowBounds(Int))); 366 } 367 368 static Optional<NonLoc> tryRearrange(ProgramStateRef State, 369 BinaryOperator::Opcode Op, NonLoc Lhs, 370 NonLoc Rhs, QualType ResultTy) { 371 ProgramStateManager &StateMgr = State->getStateManager(); 372 SValBuilder &SVB = StateMgr.getSValBuilder(); 373 374 // We expect everything to be of the same type - this type. 375 QualType SingleTy; 376 377 // FIXME: After putting complexity threshold to the symbols we can always 378 // rearrange additive operations but rearrange comparisons only if 379 // option is set. 380 if (!SVB.getAnalyzerOptions().ShouldAggressivelySimplifyBinaryOperation) 381 return None; 382 383 SymbolRef LSym = Lhs.getAsSymbol(); 384 if (!LSym) 385 return None; 386 387 if (BinaryOperator::isComparisonOp(Op)) { 388 SingleTy = LSym->getType(); 389 if (ResultTy != SVB.getConditionType()) 390 return None; 391 // Initialize SingleTy later with a symbol's type. 392 } else if (BinaryOperator::isAdditiveOp(Op)) { 393 SingleTy = ResultTy; 394 if (LSym->getType() != SingleTy) 395 return None; 396 } else { 397 // Don't rearrange other operations. 398 return None; 399 } 400 401 assert(!SingleTy.isNull() && "We should have figured out the type by now!"); 402 403 // Rearrange signed symbolic expressions only 404 if (!SingleTy->isSignedIntegerOrEnumerationType()) 405 return None; 406 407 SymbolRef RSym = Rhs.getAsSymbol(); 408 if (!RSym || RSym->getType() != SingleTy) 409 return None; 410 411 BasicValueFactory &BV = State->getBasicVals(); 412 llvm::APSInt LInt, RInt; 413 std::tie(LSym, LInt) = decomposeSymbol(LSym, BV); 414 std::tie(RSym, RInt) = decomposeSymbol(RSym, BV); 415 if (!shouldRearrange(State, Op, LSym, LInt, SingleTy) || 416 !shouldRearrange(State, Op, RSym, RInt, SingleTy)) 417 return None; 418 419 // We know that no overflows can occur anymore. 420 return doRearrangeUnchecked(State, Op, LSym, LInt, RSym, RInt); 421 } 422 423 SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state, 424 BinaryOperator::Opcode op, 425 NonLoc lhs, NonLoc rhs, 426 QualType resultTy) { 427 NonLoc InputLHS = lhs; 428 NonLoc InputRHS = rhs; 429 430 // Constraints may have changed since the creation of a bound SVal. Check if 431 // the values can be simplified based on those new constraints. 432 SVal simplifiedLhs = simplifySVal(state, lhs); 433 SVal simplifiedRhs = simplifySVal(state, rhs); 434 if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>()) 435 lhs = *simplifiedLhsAsNonLoc; 436 if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>()) 437 rhs = *simplifiedRhsAsNonLoc; 438 439 // Handle trivial case where left-side and right-side are the same. 440 if (lhs == rhs) 441 switch (op) { 442 default: 443 break; 444 case BO_EQ: 445 case BO_LE: 446 case BO_GE: 447 return makeTruthVal(true, resultTy); 448 case BO_LT: 449 case BO_GT: 450 case BO_NE: 451 return makeTruthVal(false, resultTy); 452 case BO_Xor: 453 case BO_Sub: 454 if (resultTy->isIntegralOrEnumerationType()) 455 return makeIntVal(0, resultTy); 456 return evalCast(makeIntVal(0, /*isUnsigned=*/false), resultTy, 457 QualType{}); 458 case BO_Or: 459 case BO_And: 460 return evalCast(lhs, resultTy, QualType{}); 461 } 462 463 while (true) { 464 switch (lhs.getSubKind()) { 465 default: 466 return makeSymExprValNN(op, lhs, rhs, resultTy); 467 case nonloc::PointerToMemberKind: { 468 assert(rhs.getSubKind() == nonloc::PointerToMemberKind && 469 "Both SVals should have pointer-to-member-type"); 470 auto LPTM = lhs.castAs<nonloc::PointerToMember>(), 471 RPTM = rhs.castAs<nonloc::PointerToMember>(); 472 auto LPTMD = LPTM.getPTMData(), RPTMD = RPTM.getPTMData(); 473 switch (op) { 474 case BO_EQ: 475 return makeTruthVal(LPTMD == RPTMD, resultTy); 476 case BO_NE: 477 return makeTruthVal(LPTMD != RPTMD, resultTy); 478 default: 479 return UnknownVal(); 480 } 481 } 482 case nonloc::LocAsIntegerKind: { 483 Loc lhsL = lhs.castAs<nonloc::LocAsInteger>().getLoc(); 484 switch (rhs.getSubKind()) { 485 case nonloc::LocAsIntegerKind: 486 // FIXME: at the moment the implementation 487 // of modeling "pointers as integers" is not complete. 488 if (!BinaryOperator::isComparisonOp(op)) 489 return UnknownVal(); 490 return evalBinOpLL(state, op, lhsL, 491 rhs.castAs<nonloc::LocAsInteger>().getLoc(), 492 resultTy); 493 case nonloc::ConcreteIntKind: { 494 // FIXME: at the moment the implementation 495 // of modeling "pointers as integers" is not complete. 496 if (!BinaryOperator::isComparisonOp(op)) 497 return UnknownVal(); 498 // Transform the integer into a location and compare. 499 // FIXME: This only makes sense for comparisons. If we want to, say, 500 // add 1 to a LocAsInteger, we'd better unpack the Loc and add to it, 501 // then pack it back into a LocAsInteger. 502 llvm::APSInt i = rhs.castAs<nonloc::ConcreteInt>().getValue(); 503 // If the region has a symbolic base, pay attention to the type; it 504 // might be coming from a non-default address space. For non-symbolic 505 // regions it doesn't matter that much because such comparisons would 506 // most likely evaluate to concrete false anyway. FIXME: We might 507 // still need to handle the non-comparison case. 508 if (SymbolRef lSym = lhs.getAsLocSymbol(true)) 509 BasicVals.getAPSIntType(lSym->getType()).apply(i); 510 else 511 BasicVals.getAPSIntType(Context.VoidPtrTy).apply(i); 512 return evalBinOpLL(state, op, lhsL, makeLoc(i), resultTy); 513 } 514 default: 515 switch (op) { 516 case BO_EQ: 517 return makeTruthVal(false, resultTy); 518 case BO_NE: 519 return makeTruthVal(true, resultTy); 520 default: 521 // This case also handles pointer arithmetic. 522 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); 523 } 524 } 525 } 526 case nonloc::ConcreteIntKind: { 527 llvm::APSInt LHSValue = lhs.castAs<nonloc::ConcreteInt>().getValue(); 528 529 // If we're dealing with two known constants, just perform the operation. 530 if (const llvm::APSInt *KnownRHSValue = getKnownValue(state, rhs)) { 531 llvm::APSInt RHSValue = *KnownRHSValue; 532 if (BinaryOperator::isComparisonOp(op)) { 533 // We're looking for a type big enough to compare the two values. 534 // FIXME: This is not correct. char + short will result in a promotion 535 // to int. Unfortunately we have lost types by this point. 536 APSIntType CompareType = std::max(APSIntType(LHSValue), 537 APSIntType(RHSValue)); 538 CompareType.apply(LHSValue); 539 CompareType.apply(RHSValue); 540 } else if (!BinaryOperator::isShiftOp(op)) { 541 APSIntType IntType = BasicVals.getAPSIntType(resultTy); 542 IntType.apply(LHSValue); 543 IntType.apply(RHSValue); 544 } 545 546 const llvm::APSInt *Result = 547 BasicVals.evalAPSInt(op, LHSValue, RHSValue); 548 if (!Result) 549 return UndefinedVal(); 550 551 return nonloc::ConcreteInt(*Result); 552 } 553 554 // Swap the left and right sides and flip the operator if doing so 555 // allows us to better reason about the expression (this is a form 556 // of expression canonicalization). 557 // While we're at it, catch some special cases for non-commutative ops. 558 switch (op) { 559 case BO_LT: 560 case BO_GT: 561 case BO_LE: 562 case BO_GE: 563 op = BinaryOperator::reverseComparisonOp(op); 564 LLVM_FALLTHROUGH; 565 case BO_EQ: 566 case BO_NE: 567 case BO_Add: 568 case BO_Mul: 569 case BO_And: 570 case BO_Xor: 571 case BO_Or: 572 std::swap(lhs, rhs); 573 continue; 574 case BO_Shr: 575 // (~0)>>a 576 if (LHSValue.isAllOnes() && LHSValue.isSigned()) 577 return evalCast(lhs, resultTy, QualType{}); 578 LLVM_FALLTHROUGH; 579 case BO_Shl: 580 // 0<<a and 0>>a 581 if (LHSValue == 0) 582 return evalCast(lhs, resultTy, QualType{}); 583 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); 584 case BO_Div: 585 // 0 / x == 0 586 case BO_Rem: 587 // 0 % x == 0 588 if (LHSValue == 0) 589 return makeZeroVal(resultTy); 590 LLVM_FALLTHROUGH; 591 default: 592 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); 593 } 594 } 595 case nonloc::SymbolValKind: { 596 // We only handle LHS as simple symbols or SymIntExprs. 597 SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol(); 598 599 // LHS is a symbolic expression. 600 if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) { 601 602 // Is this a logical not? (!x is represented as x == 0.) 603 if (op == BO_EQ && rhs.isZeroConstant()) { 604 // We know how to negate certain expressions. Simplify them here. 605 606 BinaryOperator::Opcode opc = symIntExpr->getOpcode(); 607 switch (opc) { 608 default: 609 // We don't know how to negate this operation. 610 // Just handle it as if it were a normal comparison to 0. 611 break; 612 case BO_LAnd: 613 case BO_LOr: 614 llvm_unreachable("Logical operators handled by branching logic."); 615 case BO_Assign: 616 case BO_MulAssign: 617 case BO_DivAssign: 618 case BO_RemAssign: 619 case BO_AddAssign: 620 case BO_SubAssign: 621 case BO_ShlAssign: 622 case BO_ShrAssign: 623 case BO_AndAssign: 624 case BO_XorAssign: 625 case BO_OrAssign: 626 case BO_Comma: 627 llvm_unreachable("'=' and ',' operators handled by ExprEngine."); 628 case BO_PtrMemD: 629 case BO_PtrMemI: 630 llvm_unreachable("Pointer arithmetic not handled here."); 631 case BO_LT: 632 case BO_GT: 633 case BO_LE: 634 case BO_GE: 635 case BO_EQ: 636 case BO_NE: 637 assert(resultTy->isBooleanType() || 638 resultTy == getConditionType()); 639 assert(symIntExpr->getType()->isBooleanType() || 640 getContext().hasSameUnqualifiedType(symIntExpr->getType(), 641 getConditionType())); 642 // Negate the comparison and make a value. 643 opc = BinaryOperator::negateComparisonOp(opc); 644 return makeNonLoc(symIntExpr->getLHS(), opc, 645 symIntExpr->getRHS(), resultTy); 646 } 647 } 648 649 // For now, only handle expressions whose RHS is a constant. 650 if (const llvm::APSInt *RHSValue = getKnownValue(state, rhs)) { 651 // If both the LHS and the current expression are additive, 652 // fold their constants and try again. 653 if (BinaryOperator::isAdditiveOp(op)) { 654 BinaryOperator::Opcode lop = symIntExpr->getOpcode(); 655 if (BinaryOperator::isAdditiveOp(lop)) { 656 // Convert the two constants to a common type, then combine them. 657 658 // resultTy may not be the best type to convert to, but it's 659 // probably the best choice in expressions with mixed type 660 // (such as x+1U+2LL). The rules for implicit conversions should 661 // choose a reasonable type to preserve the expression, and will 662 // at least match how the value is going to be used. 663 APSIntType IntType = BasicVals.getAPSIntType(resultTy); 664 const llvm::APSInt &first = IntType.convert(symIntExpr->getRHS()); 665 const llvm::APSInt &second = IntType.convert(*RHSValue); 666 667 // If the op and lop agrees, then we just need to 668 // sum the constants. Otherwise, we change to operation 669 // type if substraction would produce negative value 670 // (and cause overflow for unsigned integers), 671 // as consequence x+1U-10 produces x-9U, instead 672 // of x+4294967287U, that would be produced without this 673 // additional check. 674 const llvm::APSInt *newRHS; 675 if (lop == op) { 676 newRHS = BasicVals.evalAPSInt(BO_Add, first, second); 677 } else if (first >= second) { 678 newRHS = BasicVals.evalAPSInt(BO_Sub, first, second); 679 op = lop; 680 } else { 681 newRHS = BasicVals.evalAPSInt(BO_Sub, second, first); 682 } 683 684 assert(newRHS && "Invalid operation despite common type!"); 685 rhs = nonloc::ConcreteInt(*newRHS); 686 lhs = nonloc::SymbolVal(symIntExpr->getLHS()); 687 continue; 688 } 689 } 690 691 // Otherwise, make a SymIntExpr out of the expression. 692 return MakeSymIntVal(symIntExpr, op, *RHSValue, resultTy); 693 } 694 } 695 696 // Is the RHS a constant? 697 if (const llvm::APSInt *RHSValue = getKnownValue(state, rhs)) 698 return MakeSymIntVal(Sym, op, *RHSValue, resultTy); 699 700 if (Optional<NonLoc> V = tryRearrange(state, op, lhs, rhs, resultTy)) 701 return *V; 702 703 // Give up -- this is not a symbolic expression we can handle. 704 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); 705 } 706 } 707 } 708 } 709 710 static SVal evalBinOpFieldRegionFieldRegion(const FieldRegion *LeftFR, 711 const FieldRegion *RightFR, 712 BinaryOperator::Opcode op, 713 QualType resultTy, 714 SimpleSValBuilder &SVB) { 715 // Only comparisons are meaningful here! 716 if (!BinaryOperator::isComparisonOp(op)) 717 return UnknownVal(); 718 719 // Next, see if the two FRs have the same super-region. 720 // FIXME: This doesn't handle casts yet, and simply stripping the casts 721 // doesn't help. 722 if (LeftFR->getSuperRegion() != RightFR->getSuperRegion()) 723 return UnknownVal(); 724 725 const FieldDecl *LeftFD = LeftFR->getDecl(); 726 const FieldDecl *RightFD = RightFR->getDecl(); 727 const RecordDecl *RD = LeftFD->getParent(); 728 729 // Make sure the two FRs are from the same kind of record. Just in case! 730 // FIXME: This is probably where inheritance would be a problem. 731 if (RD != RightFD->getParent()) 732 return UnknownVal(); 733 734 // We know for sure that the two fields are not the same, since that 735 // would have given us the same SVal. 736 if (op == BO_EQ) 737 return SVB.makeTruthVal(false, resultTy); 738 if (op == BO_NE) 739 return SVB.makeTruthVal(true, resultTy); 740 741 // Iterate through the fields and see which one comes first. 742 // [C99 6.7.2.1.13] "Within a structure object, the non-bit-field 743 // members and the units in which bit-fields reside have addresses that 744 // increase in the order in which they are declared." 745 bool leftFirst = (op == BO_LT || op == BO_LE); 746 for (const auto *I : RD->fields()) { 747 if (I == LeftFD) 748 return SVB.makeTruthVal(leftFirst, resultTy); 749 if (I == RightFD) 750 return SVB.makeTruthVal(!leftFirst, resultTy); 751 } 752 753 llvm_unreachable("Fields not found in parent record's definition"); 754 } 755 756 // This is used in debug builds only for now because some downstream users 757 // may hit this assert in their subsequent merges. 758 // There are still places in the analyzer where equal bitwidth Locs 759 // are compared, and need to be found and corrected. Recent previous fixes have 760 // addressed the known problems of making NULLs with specific bitwidths 761 // for Loc comparisons along with deprecation of APIs for the same purpose. 762 // 763 static void assertEqualBitWidths(ProgramStateRef State, Loc RhsLoc, 764 Loc LhsLoc) { 765 // Implements a "best effort" check for RhsLoc and LhsLoc bit widths 766 ASTContext &Ctx = State->getStateManager().getContext(); 767 uint64_t RhsBitwidth = 768 RhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(RhsLoc.getType(Ctx)); 769 uint64_t LhsBitwidth = 770 LhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(LhsLoc.getType(Ctx)); 771 if (RhsBitwidth && LhsBitwidth && 772 (LhsLoc.getSubKind() == RhsLoc.getSubKind())) { 773 assert(RhsBitwidth == LhsBitwidth && 774 "RhsLoc and LhsLoc bitwidth must be same!"); 775 } 776 } 777 778 // FIXME: all this logic will change if/when we have MemRegion::getLocation(). 779 SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state, 780 BinaryOperator::Opcode op, 781 Loc lhs, Loc rhs, 782 QualType resultTy) { 783 784 // Assert that bitwidth of lhs and rhs are the same. 785 // This can happen if two different address spaces are used, 786 // and the bitwidths of the address spaces are different. 787 // See LIT case clang/test/Analysis/cstring-checker-addressspace.c 788 // FIXME: See comment above in the function assertEqualBitWidths 789 assertEqualBitWidths(state, rhs, lhs); 790 791 // Only comparisons and subtractions are valid operations on two pointers. 792 // See [C99 6.5.5 through 6.5.14] or [C++0x 5.6 through 5.15]. 793 // However, if a pointer is casted to an integer, evalBinOpNN may end up 794 // calling this function with another operation (PR7527). We don't attempt to 795 // model this for now, but it could be useful, particularly when the 796 // "location" is actually an integer value that's been passed through a void*. 797 if (!(BinaryOperator::isComparisonOp(op) || op == BO_Sub)) 798 return UnknownVal(); 799 800 // Special cases for when both sides are identical. 801 if (lhs == rhs) { 802 switch (op) { 803 default: 804 llvm_unreachable("Unimplemented operation for two identical values"); 805 case BO_Sub: 806 return makeZeroVal(resultTy); 807 case BO_EQ: 808 case BO_LE: 809 case BO_GE: 810 return makeTruthVal(true, resultTy); 811 case BO_NE: 812 case BO_LT: 813 case BO_GT: 814 return makeTruthVal(false, resultTy); 815 } 816 } 817 818 switch (lhs.getSubKind()) { 819 default: 820 llvm_unreachable("Ordering not implemented for this Loc."); 821 822 case loc::GotoLabelKind: 823 // The only thing we know about labels is that they're non-null. 824 if (rhs.isZeroConstant()) { 825 switch (op) { 826 default: 827 break; 828 case BO_Sub: 829 return evalCast(lhs, resultTy, QualType{}); 830 case BO_EQ: 831 case BO_LE: 832 case BO_LT: 833 return makeTruthVal(false, resultTy); 834 case BO_NE: 835 case BO_GT: 836 case BO_GE: 837 return makeTruthVal(true, resultTy); 838 } 839 } 840 // There may be two labels for the same location, and a function region may 841 // have the same address as a label at the start of the function (depending 842 // on the ABI). 843 // FIXME: we can probably do a comparison against other MemRegions, though. 844 // FIXME: is there a way to tell if two labels refer to the same location? 845 return UnknownVal(); 846 847 case loc::ConcreteIntKind: { 848 // If one of the operands is a symbol and the other is a constant, 849 // build an expression for use by the constraint manager. 850 if (SymbolRef rSym = rhs.getAsLocSymbol()) { 851 // We can only build expressions with symbols on the left, 852 // so we need a reversible operator. 853 if (!BinaryOperator::isComparisonOp(op) || op == BO_Cmp) 854 return UnknownVal(); 855 856 const llvm::APSInt &lVal = lhs.castAs<loc::ConcreteInt>().getValue(); 857 op = BinaryOperator::reverseComparisonOp(op); 858 return makeNonLoc(rSym, op, lVal, resultTy); 859 } 860 861 // If both operands are constants, just perform the operation. 862 if (Optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) { 863 SVal ResultVal = 864 lhs.castAs<loc::ConcreteInt>().evalBinOp(BasicVals, op, *rInt); 865 if (Optional<NonLoc> Result = ResultVal.getAs<NonLoc>()) 866 return evalCast(*Result, resultTy, QualType{}); 867 868 assert(!ResultVal.getAs<Loc>() && "Loc-Loc ops should not produce Locs"); 869 return UnknownVal(); 870 } 871 872 // Special case comparisons against NULL. 873 // This must come after the test if the RHS is a symbol, which is used to 874 // build constraints. The address of any non-symbolic region is guaranteed 875 // to be non-NULL, as is any label. 876 assert(rhs.getAs<loc::MemRegionVal>() || rhs.getAs<loc::GotoLabel>()); 877 if (lhs.isZeroConstant()) { 878 switch (op) { 879 default: 880 break; 881 case BO_EQ: 882 case BO_GT: 883 case BO_GE: 884 return makeTruthVal(false, resultTy); 885 case BO_NE: 886 case BO_LT: 887 case BO_LE: 888 return makeTruthVal(true, resultTy); 889 } 890 } 891 892 // Comparing an arbitrary integer to a region or label address is 893 // completely unknowable. 894 return UnknownVal(); 895 } 896 case loc::MemRegionValKind: { 897 if (Optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) { 898 // If one of the operands is a symbol and the other is a constant, 899 // build an expression for use by the constraint manager. 900 if (SymbolRef lSym = lhs.getAsLocSymbol(true)) { 901 if (BinaryOperator::isComparisonOp(op)) 902 return MakeSymIntVal(lSym, op, rInt->getValue(), resultTy); 903 return UnknownVal(); 904 } 905 // Special case comparisons to NULL. 906 // This must come after the test if the LHS is a symbol, which is used to 907 // build constraints. The address of any non-symbolic region is guaranteed 908 // to be non-NULL. 909 if (rInt->isZeroConstant()) { 910 if (op == BO_Sub) 911 return evalCast(lhs, resultTy, QualType{}); 912 913 if (BinaryOperator::isComparisonOp(op)) { 914 QualType boolType = getContext().BoolTy; 915 NonLoc l = evalCast(lhs, boolType, QualType{}).castAs<NonLoc>(); 916 NonLoc r = makeTruthVal(false, boolType).castAs<NonLoc>(); 917 return evalBinOpNN(state, op, l, r, resultTy); 918 } 919 } 920 921 // Comparing a region to an arbitrary integer is completely unknowable. 922 return UnknownVal(); 923 } 924 925 // Get both values as regions, if possible. 926 const MemRegion *LeftMR = lhs.getAsRegion(); 927 assert(LeftMR && "MemRegionValKind SVal doesn't have a region!"); 928 929 const MemRegion *RightMR = rhs.getAsRegion(); 930 if (!RightMR) 931 // The RHS is probably a label, which in theory could address a region. 932 // FIXME: we can probably make a more useful statement about non-code 933 // regions, though. 934 return UnknownVal(); 935 936 const MemRegion *LeftBase = LeftMR->getBaseRegion(); 937 const MemRegion *RightBase = RightMR->getBaseRegion(); 938 const MemSpaceRegion *LeftMS = LeftBase->getMemorySpace(); 939 const MemSpaceRegion *RightMS = RightBase->getMemorySpace(); 940 const MemSpaceRegion *UnknownMS = MemMgr.getUnknownRegion(); 941 942 // If the two regions are from different known memory spaces they cannot be 943 // equal. Also, assume that no symbolic region (whose memory space is 944 // unknown) is on the stack. 945 if (LeftMS != RightMS && 946 ((LeftMS != UnknownMS && RightMS != UnknownMS) || 947 (isa<StackSpaceRegion>(LeftMS) || isa<StackSpaceRegion>(RightMS)))) { 948 switch (op) { 949 default: 950 return UnknownVal(); 951 case BO_EQ: 952 return makeTruthVal(false, resultTy); 953 case BO_NE: 954 return makeTruthVal(true, resultTy); 955 } 956 } 957 958 // If both values wrap regions, see if they're from different base regions. 959 // Note, heap base symbolic regions are assumed to not alias with 960 // each other; for example, we assume that malloc returns different address 961 // on each invocation. 962 // FIXME: ObjC object pointers always reside on the heap, but currently 963 // we treat their memory space as unknown, because symbolic pointers 964 // to ObjC objects may alias. There should be a way to construct 965 // possibly-aliasing heap-based regions. For instance, MacOSXApiChecker 966 // guesses memory space for ObjC object pointers manually instead of 967 // relying on us. 968 if (LeftBase != RightBase && 969 ((!isa<SymbolicRegion>(LeftBase) && !isa<SymbolicRegion>(RightBase)) || 970 (isa<HeapSpaceRegion>(LeftMS) || isa<HeapSpaceRegion>(RightMS))) ){ 971 switch (op) { 972 default: 973 return UnknownVal(); 974 case BO_EQ: 975 return makeTruthVal(false, resultTy); 976 case BO_NE: 977 return makeTruthVal(true, resultTy); 978 } 979 } 980 981 // Handle special cases for when both regions are element regions. 982 const ElementRegion *RightER = dyn_cast<ElementRegion>(RightMR); 983 const ElementRegion *LeftER = dyn_cast<ElementRegion>(LeftMR); 984 if (RightER && LeftER) { 985 // Next, see if the two ERs have the same super-region and matching types. 986 // FIXME: This should do something useful even if the types don't match, 987 // though if both indexes are constant the RegionRawOffset path will 988 // give the correct answer. 989 if (LeftER->getSuperRegion() == RightER->getSuperRegion() && 990 LeftER->getElementType() == RightER->getElementType()) { 991 // Get the left index and cast it to the correct type. 992 // If the index is unknown or undefined, bail out here. 993 SVal LeftIndexVal = LeftER->getIndex(); 994 Optional<NonLoc> LeftIndex = LeftIndexVal.getAs<NonLoc>(); 995 if (!LeftIndex) 996 return UnknownVal(); 997 LeftIndexVal = evalCast(*LeftIndex, ArrayIndexTy, QualType{}); 998 LeftIndex = LeftIndexVal.getAs<NonLoc>(); 999 if (!LeftIndex) 1000 return UnknownVal(); 1001 1002 // Do the same for the right index. 1003 SVal RightIndexVal = RightER->getIndex(); 1004 Optional<NonLoc> RightIndex = RightIndexVal.getAs<NonLoc>(); 1005 if (!RightIndex) 1006 return UnknownVal(); 1007 RightIndexVal = evalCast(*RightIndex, ArrayIndexTy, QualType{}); 1008 RightIndex = RightIndexVal.getAs<NonLoc>(); 1009 if (!RightIndex) 1010 return UnknownVal(); 1011 1012 // Actually perform the operation. 1013 // evalBinOpNN expects the two indexes to already be the right type. 1014 return evalBinOpNN(state, op, *LeftIndex, *RightIndex, resultTy); 1015 } 1016 } 1017 1018 // Special handling of the FieldRegions, even with symbolic offsets. 1019 const FieldRegion *RightFR = dyn_cast<FieldRegion>(RightMR); 1020 const FieldRegion *LeftFR = dyn_cast<FieldRegion>(LeftMR); 1021 if (RightFR && LeftFR) { 1022 SVal R = evalBinOpFieldRegionFieldRegion(LeftFR, RightFR, op, resultTy, 1023 *this); 1024 if (!R.isUnknown()) 1025 return R; 1026 } 1027 1028 // Compare the regions using the raw offsets. 1029 RegionOffset LeftOffset = LeftMR->getAsOffset(); 1030 RegionOffset RightOffset = RightMR->getAsOffset(); 1031 1032 if (LeftOffset.getRegion() != nullptr && 1033 LeftOffset.getRegion() == RightOffset.getRegion() && 1034 !LeftOffset.hasSymbolicOffset() && !RightOffset.hasSymbolicOffset()) { 1035 int64_t left = LeftOffset.getOffset(); 1036 int64_t right = RightOffset.getOffset(); 1037 1038 switch (op) { 1039 default: 1040 return UnknownVal(); 1041 case BO_LT: 1042 return makeTruthVal(left < right, resultTy); 1043 case BO_GT: 1044 return makeTruthVal(left > right, resultTy); 1045 case BO_LE: 1046 return makeTruthVal(left <= right, resultTy); 1047 case BO_GE: 1048 return makeTruthVal(left >= right, resultTy); 1049 case BO_EQ: 1050 return makeTruthVal(left == right, resultTy); 1051 case BO_NE: 1052 return makeTruthVal(left != right, resultTy); 1053 } 1054 } 1055 1056 // At this point we're not going to get a good answer, but we can try 1057 // conjuring an expression instead. 1058 SymbolRef LHSSym = lhs.getAsLocSymbol(); 1059 SymbolRef RHSSym = rhs.getAsLocSymbol(); 1060 if (LHSSym && RHSSym) 1061 return makeNonLoc(LHSSym, op, RHSSym, resultTy); 1062 1063 // If we get here, we have no way of comparing the regions. 1064 return UnknownVal(); 1065 } 1066 } 1067 } 1068 1069 SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state, 1070 BinaryOperator::Opcode op, Loc lhs, 1071 NonLoc rhs, QualType resultTy) { 1072 if (op >= BO_PtrMemD && op <= BO_PtrMemI) { 1073 if (auto PTMSV = rhs.getAs<nonloc::PointerToMember>()) { 1074 if (PTMSV->isNullMemberPointer()) 1075 return UndefinedVal(); 1076 1077 auto getFieldLValue = [&](const auto *FD) -> SVal { 1078 SVal Result = lhs; 1079 1080 for (const auto &I : *PTMSV) 1081 Result = StateMgr.getStoreManager().evalDerivedToBase( 1082 Result, I->getType(), I->isVirtual()); 1083 1084 return state->getLValue(FD, Result); 1085 }; 1086 1087 if (const auto *FD = PTMSV->getDeclAs<FieldDecl>()) { 1088 return getFieldLValue(FD); 1089 } 1090 if (const auto *FD = PTMSV->getDeclAs<IndirectFieldDecl>()) { 1091 return getFieldLValue(FD); 1092 } 1093 } 1094 1095 return rhs; 1096 } 1097 1098 assert(!BinaryOperator::isComparisonOp(op) && 1099 "arguments to comparison ops must be of the same type"); 1100 1101 // Special case: rhs is a zero constant. 1102 if (rhs.isZeroConstant()) 1103 return lhs; 1104 1105 // Perserve the null pointer so that it can be found by the DerefChecker. 1106 if (lhs.isZeroConstant()) 1107 return lhs; 1108 1109 // We are dealing with pointer arithmetic. 1110 1111 // Handle pointer arithmetic on constant values. 1112 if (Optional<nonloc::ConcreteInt> rhsInt = rhs.getAs<nonloc::ConcreteInt>()) { 1113 if (Optional<loc::ConcreteInt> lhsInt = lhs.getAs<loc::ConcreteInt>()) { 1114 const llvm::APSInt &leftI = lhsInt->getValue(); 1115 assert(leftI.isUnsigned()); 1116 llvm::APSInt rightI(rhsInt->getValue(), /* isUnsigned */ true); 1117 1118 // Convert the bitwidth of rightI. This should deal with overflow 1119 // since we are dealing with concrete values. 1120 rightI = rightI.extOrTrunc(leftI.getBitWidth()); 1121 1122 // Offset the increment by the pointer size. 1123 llvm::APSInt Multiplicand(rightI.getBitWidth(), /* isUnsigned */ true); 1124 QualType pointeeType = resultTy->getPointeeType(); 1125 Multiplicand = getContext().getTypeSizeInChars(pointeeType).getQuantity(); 1126 rightI *= Multiplicand; 1127 1128 // Compute the adjusted pointer. 1129 switch (op) { 1130 case BO_Add: 1131 rightI = leftI + rightI; 1132 break; 1133 case BO_Sub: 1134 rightI = leftI - rightI; 1135 break; 1136 default: 1137 llvm_unreachable("Invalid pointer arithmetic operation"); 1138 } 1139 return loc::ConcreteInt(getBasicValueFactory().getValue(rightI)); 1140 } 1141 } 1142 1143 // Handle cases where 'lhs' is a region. 1144 if (const MemRegion *region = lhs.getAsRegion()) { 1145 rhs = convertToArrayIndex(rhs).castAs<NonLoc>(); 1146 SVal index = UnknownVal(); 1147 const SubRegion *superR = nullptr; 1148 // We need to know the type of the pointer in order to add an integer to it. 1149 // Depending on the type, different amount of bytes is added. 1150 QualType elementType; 1151 1152 if (const ElementRegion *elemReg = dyn_cast<ElementRegion>(region)) { 1153 assert(op == BO_Add || op == BO_Sub); 1154 index = evalBinOpNN(state, op, elemReg->getIndex(), rhs, 1155 getArrayIndexType()); 1156 superR = cast<SubRegion>(elemReg->getSuperRegion()); 1157 elementType = elemReg->getElementType(); 1158 } 1159 else if (isa<SubRegion>(region)) { 1160 assert(op == BO_Add || op == BO_Sub); 1161 index = (op == BO_Add) ? rhs : evalMinus(rhs); 1162 superR = cast<SubRegion>(region); 1163 // TODO: Is this actually reliable? Maybe improving our MemRegion 1164 // hierarchy to provide typed regions for all non-void pointers would be 1165 // better. For instance, we cannot extend this towards LocAsInteger 1166 // operations, where result type of the expression is integer. 1167 if (resultTy->isAnyPointerType()) 1168 elementType = resultTy->getPointeeType(); 1169 } 1170 1171 // Represent arithmetic on void pointers as arithmetic on char pointers. 1172 // It is fine when a TypedValueRegion of char value type represents 1173 // a void pointer. Note that arithmetic on void pointers is a GCC extension. 1174 if (elementType->isVoidType()) 1175 elementType = getContext().CharTy; 1176 1177 if (Optional<NonLoc> indexV = index.getAs<NonLoc>()) { 1178 return loc::MemRegionVal(MemMgr.getElementRegion(elementType, *indexV, 1179 superR, getContext())); 1180 } 1181 } 1182 return UnknownVal(); 1183 } 1184 1185 const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state, 1186 SVal V) { 1187 V = simplifySVal(state, V); 1188 if (V.isUnknownOrUndef()) 1189 return nullptr; 1190 1191 if (Optional<loc::ConcreteInt> X = V.getAs<loc::ConcreteInt>()) 1192 return &X->getValue(); 1193 1194 if (Optional<nonloc::ConcreteInt> X = V.getAs<nonloc::ConcreteInt>()) 1195 return &X->getValue(); 1196 1197 if (SymbolRef Sym = V.getAsSymbol()) 1198 return state->getConstraintManager().getSymVal(state, Sym); 1199 1200 return nullptr; 1201 } 1202 1203 SVal SimpleSValBuilder::simplifyUntilFixpoint(ProgramStateRef State, SVal Val) { 1204 SVal SimplifiedVal = simplifySValOnce(State, Val); 1205 while (SimplifiedVal != Val) { 1206 Val = SimplifiedVal; 1207 SimplifiedVal = simplifySValOnce(State, Val); 1208 } 1209 return SimplifiedVal; 1210 } 1211 1212 SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) { 1213 return simplifyUntilFixpoint(State, V); 1214 } 1215 1216 SVal SimpleSValBuilder::simplifySValOnce(ProgramStateRef State, SVal V) { 1217 // For now, this function tries to constant-fold symbols inside a 1218 // nonloc::SymbolVal, and does nothing else. More simplifications should 1219 // be possible, such as constant-folding an index in an ElementRegion. 1220 1221 class Simplifier : public FullSValVisitor<Simplifier, SVal> { 1222 ProgramStateRef State; 1223 SValBuilder &SVB; 1224 1225 // Cache results for the lifetime of the Simplifier. Results change every 1226 // time new constraints are added to the program state, which is the whole 1227 // point of simplifying, and for that very reason it's pointless to maintain 1228 // the same cache for the duration of the whole analysis. 1229 llvm::DenseMap<SymbolRef, SVal> Cached; 1230 1231 static bool isUnchanged(SymbolRef Sym, SVal Val) { 1232 return Sym == Val.getAsSymbol(); 1233 } 1234 1235 SVal cache(SymbolRef Sym, SVal V) { 1236 Cached[Sym] = V; 1237 return V; 1238 } 1239 1240 SVal skip(SymbolRef Sym) { 1241 return cache(Sym, SVB.makeSymbolVal(Sym)); 1242 } 1243 1244 // Return the known const value for the Sym if available, or return Undef 1245 // otherwise. 1246 SVal getConst(SymbolRef Sym) { 1247 const llvm::APSInt *Const = 1248 State->getConstraintManager().getSymVal(State, Sym); 1249 if (Const) 1250 return Loc::isLocType(Sym->getType()) ? (SVal)SVB.makeIntLocVal(*Const) 1251 : (SVal)SVB.makeIntVal(*Const); 1252 return UndefinedVal(); 1253 } 1254 1255 SVal getConstOrVisit(SymbolRef Sym) { 1256 const SVal Ret = getConst(Sym); 1257 if (Ret.isUndef()) 1258 return Visit(Sym); 1259 return Ret; 1260 } 1261 1262 public: 1263 Simplifier(ProgramStateRef State) 1264 : State(State), SVB(State->getStateManager().getSValBuilder()) {} 1265 1266 SVal VisitSymbolData(const SymbolData *S) { 1267 // No cache here. 1268 if (const llvm::APSInt *I = 1269 SVB.getKnownValue(State, SVB.makeSymbolVal(S))) 1270 return Loc::isLocType(S->getType()) ? (SVal)SVB.makeIntLocVal(*I) 1271 : (SVal)SVB.makeIntVal(*I); 1272 return SVB.makeSymbolVal(S); 1273 } 1274 1275 // TODO: Support SymbolCast. 1276 1277 SVal VisitSymIntExpr(const SymIntExpr *S) { 1278 auto I = Cached.find(S); 1279 if (I != Cached.end()) 1280 return I->second; 1281 1282 SVal LHS = getConstOrVisit(S->getLHS()); 1283 if (isUnchanged(S->getLHS(), LHS)) 1284 return skip(S); 1285 1286 SVal RHS; 1287 // By looking at the APSInt in the right-hand side of S, we cannot 1288 // figure out if it should be treated as a Loc or as a NonLoc. 1289 // So make our guess by recalling that we cannot multiply pointers 1290 // or compare a pointer to an integer. 1291 if (Loc::isLocType(S->getLHS()->getType()) && 1292 BinaryOperator::isComparisonOp(S->getOpcode())) { 1293 // The usual conversion of $sym to &SymRegion{$sym}, as they have 1294 // the same meaning for Loc-type symbols, but the latter form 1295 // is preferred in SVal computations for being Loc itself. 1296 if (SymbolRef Sym = LHS.getAsSymbol()) { 1297 assert(Loc::isLocType(Sym->getType())); 1298 LHS = SVB.makeLoc(Sym); 1299 } 1300 RHS = SVB.makeIntLocVal(S->getRHS()); 1301 } else { 1302 RHS = SVB.makeIntVal(S->getRHS()); 1303 } 1304 1305 return cache( 1306 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType())); 1307 } 1308 1309 SVal VisitIntSymExpr(const IntSymExpr *S) { 1310 auto I = Cached.find(S); 1311 if (I != Cached.end()) 1312 return I->second; 1313 1314 SVal RHS = getConstOrVisit(S->getRHS()); 1315 if (isUnchanged(S->getRHS(), RHS)) 1316 return skip(S); 1317 1318 SVal LHS = SVB.makeIntVal(S->getLHS()); 1319 return cache( 1320 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType())); 1321 } 1322 1323 SVal VisitSymSymExpr(const SymSymExpr *S) { 1324 auto I = Cached.find(S); 1325 if (I != Cached.end()) 1326 return I->second; 1327 1328 // For now don't try to simplify mixed Loc/NonLoc expressions 1329 // because they often appear from LocAsInteger operations 1330 // and we don't know how to combine a LocAsInteger 1331 // with a concrete value. 1332 if (Loc::isLocType(S->getLHS()->getType()) != 1333 Loc::isLocType(S->getRHS()->getType())) 1334 return skip(S); 1335 1336 SVal LHS = getConstOrVisit(S->getLHS()); 1337 SVal RHS = getConstOrVisit(S->getRHS()); 1338 1339 if (isUnchanged(S->getLHS(), LHS) && isUnchanged(S->getRHS(), RHS)) 1340 return skip(S); 1341 1342 return cache( 1343 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType())); 1344 } 1345 1346 SVal VisitSymExpr(SymbolRef S) { return nonloc::SymbolVal(S); } 1347 1348 SVal VisitMemRegion(const MemRegion *R) { return loc::MemRegionVal(R); } 1349 1350 SVal VisitNonLocSymbolVal(nonloc::SymbolVal V) { 1351 // Simplification is much more costly than computing complexity. 1352 // For high complexity, it may be not worth it. 1353 return Visit(V.getSymbol()); 1354 } 1355 1356 SVal VisitSVal(SVal V) { return V; } 1357 }; 1358 1359 // A crude way of preventing this function from calling itself from evalBinOp. 1360 static bool isReentering = false; 1361 if (isReentering) 1362 return V; 1363 1364 isReentering = true; 1365 SVal SimplifiedV = Simplifier(State).Visit(V); 1366 isReentering = false; 1367 1368 return SimplifiedV; 1369 } 1370