1 //===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass statically checks for common and easily-identified constructs 10 // which produce undefined or likely unintended behavior in LLVM IR. 11 // 12 // It is not a guarantee of correctness, in two ways. First, it isn't 13 // comprehensive. There are checks which could be done statically which are 14 // not yet implemented. Some of these are indicated by TODO comments, but 15 // those aren't comprehensive either. Second, many conditions cannot be 16 // checked statically. This pass does no dynamic instrumentation, so it 17 // can't check for all possible problems. 18 // 19 // Another limitation is that it assumes all code will be executed. A store 20 // through a null pointer in a basic block which is never reached is harmless, 21 // but this pass will warn about it anyway. This is the main reason why most 22 // of these checks live here instead of in the Verifier pass. 23 // 24 // Optimization passes may make conditions that this pass checks for more or 25 // less obvious. If an optimization pass appears to be introducing a warning, 26 // it may be that the optimization pass is merely exposing an existing 27 // condition in the code. 28 // 29 // This code may be run before instcombine. In many cases, instcombine checks 30 // for the same kinds of things and turns instructions with undefined behavior 31 // into unreachable (or equivalent). Because of this, this pass makes some 32 // effort to look through bitcasts and so on. 33 // 34 //===----------------------------------------------------------------------===// 35 36 #include "llvm/Analysis/Lint.h" 37 #include "llvm/ADT/APInt.h" 38 #include "llvm/ADT/ArrayRef.h" 39 #include "llvm/ADT/SmallPtrSet.h" 40 #include "llvm/ADT/Twine.h" 41 #include "llvm/Analysis/AliasAnalysis.h" 42 #include "llvm/Analysis/AssumptionCache.h" 43 #include "llvm/Analysis/BasicAliasAnalysis.h" 44 #include "llvm/Analysis/ConstantFolding.h" 45 #include "llvm/Analysis/InstructionSimplify.h" 46 #include "llvm/Analysis/Loads.h" 47 #include "llvm/Analysis/MemoryLocation.h" 48 #include "llvm/Analysis/ScopedNoAliasAA.h" 49 #include "llvm/Analysis/TargetLibraryInfo.h" 50 #include "llvm/Analysis/TypeBasedAliasAnalysis.h" 51 #include "llvm/Analysis/ValueTracking.h" 52 #include "llvm/IR/Argument.h" 53 #include "llvm/IR/BasicBlock.h" 54 #include "llvm/IR/Constant.h" 55 #include "llvm/IR/Constants.h" 56 #include "llvm/IR/DataLayout.h" 57 #include "llvm/IR/DerivedTypes.h" 58 #include "llvm/IR/Dominators.h" 59 #include "llvm/IR/Function.h" 60 #include "llvm/IR/GlobalVariable.h" 61 #include "llvm/IR/InstVisitor.h" 62 #include "llvm/IR/InstrTypes.h" 63 #include "llvm/IR/Instruction.h" 64 #include "llvm/IR/Instructions.h" 65 #include "llvm/IR/IntrinsicInst.h" 66 #include "llvm/IR/Module.h" 67 #include "llvm/IR/PassManager.h" 68 #include "llvm/IR/Type.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/Support/AMDGPUAddrSpace.h" 71 #include "llvm/Support/Casting.h" 72 #include "llvm/Support/KnownBits.h" 73 #include "llvm/Support/raw_ostream.h" 74 #include <cassert> 75 #include <cstdint> 76 #include <iterator> 77 #include <string> 78 79 using namespace llvm; 80 81 static const char LintAbortOnErrorArgName[] = "lint-abort-on-error"; 82 static cl::opt<bool> 83 LintAbortOnError(LintAbortOnErrorArgName, cl::init(false), 84 cl::desc("In the Lint pass, abort on errors.")); 85 86 namespace { 87 namespace MemRef { 88 static const unsigned Read = 1; 89 static const unsigned Write = 2; 90 static const unsigned Callee = 4; 91 static const unsigned Branchee = 8; 92 } // end namespace MemRef 93 94 class Lint : public InstVisitor<Lint> { 95 friend class InstVisitor<Lint>; 96 97 void visitFunction(Function &F); 98 99 void visitCallBase(CallBase &CB); 100 void visitMemoryReference(Instruction &I, const MemoryLocation &Loc, 101 MaybeAlign Alignment, Type *Ty, unsigned Flags); 102 103 void visitReturnInst(ReturnInst &I); 104 void visitLoadInst(LoadInst &I); 105 void visitStoreInst(StoreInst &I); 106 void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I); 107 void visitAtomicRMWInst(AtomicRMWInst &I); 108 void visitXor(BinaryOperator &I); 109 void visitSub(BinaryOperator &I); 110 void visitLShr(BinaryOperator &I); 111 void visitAShr(BinaryOperator &I); 112 void visitShl(BinaryOperator &I); 113 void visitSDiv(BinaryOperator &I); 114 void visitUDiv(BinaryOperator &I); 115 void visitSRem(BinaryOperator &I); 116 void visitURem(BinaryOperator &I); 117 void visitAllocaInst(AllocaInst &I); 118 void visitVAArgInst(VAArgInst &I); 119 void visitIndirectBrInst(IndirectBrInst &I); 120 void visitExtractElementInst(ExtractElementInst &I); 121 void visitInsertElementInst(InsertElementInst &I); 122 void visitUnreachableInst(UnreachableInst &I); 123 124 Value *findValue(Value *V, bool OffsetOk) const; 125 Value *findValueImpl(Value *V, bool OffsetOk, 126 SmallPtrSetImpl<Value *> &Visited) const; 127 128 public: 129 Module *Mod; 130 Triple TT; 131 const DataLayout *DL; 132 AliasAnalysis *AA; 133 AssumptionCache *AC; 134 DominatorTree *DT; 135 TargetLibraryInfo *TLI; 136 137 std::string Messages; 138 raw_string_ostream MessagesStr; 139 140 Lint(Module *Mod, const DataLayout *DL, AliasAnalysis *AA, 141 AssumptionCache *AC, DominatorTree *DT, TargetLibraryInfo *TLI) 142 : Mod(Mod), TT(Triple::normalize(Mod->getTargetTriple())), DL(DL), AA(AA), 143 AC(AC), DT(DT), TLI(TLI), MessagesStr(Messages) {} 144 145 void WriteValues(ArrayRef<const Value *> Vs) { 146 for (const Value *V : Vs) { 147 if (!V) 148 continue; 149 if (isa<Instruction>(V)) { 150 MessagesStr << *V << '\n'; 151 } else { 152 V->printAsOperand(MessagesStr, true, Mod); 153 MessagesStr << '\n'; 154 } 155 } 156 } 157 158 /// A check failed, so printout out the condition and the message. 159 /// 160 /// This provides a nice place to put a breakpoint if you want to see why 161 /// something is not correct. 162 void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } 163 164 /// A check failed (with values to print). 165 /// 166 /// This calls the Message-only version so that the above is easier to set 167 /// a breakpoint on. 168 template <typename T1, typename... Ts> 169 void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) { 170 CheckFailed(Message); 171 WriteValues({V1, Vs...}); 172 } 173 }; 174 } // end anonymous namespace 175 176 // Check - We know that cond should be true, if not print an error message. 177 #define Check(C, ...) \ 178 do { \ 179 if (!(C)) { \ 180 CheckFailed(__VA_ARGS__); \ 181 return; \ 182 } \ 183 } while (false) 184 185 void Lint::visitFunction(Function &F) { 186 // This isn't undefined behavior, it's just a little unusual, and it's a 187 // fairly common mistake to neglect to name a function. 188 Check(F.hasName() || F.hasLocalLinkage(), 189 "Unusual: Unnamed function with non-local linkage", &F); 190 191 // TODO: Check for irreducible control flow. 192 } 193 194 void Lint::visitCallBase(CallBase &I) { 195 Value *Callee = I.getCalledOperand(); 196 197 visitMemoryReference(I, MemoryLocation::getAfter(Callee), std::nullopt, 198 nullptr, MemRef::Callee); 199 200 if (Function *F = dyn_cast<Function>(findValue(Callee, 201 /*OffsetOk=*/false))) { 202 Check(I.getCallingConv() == F->getCallingConv(), 203 "Undefined behavior: Caller and callee calling convention differ", 204 &I); 205 206 FunctionType *FT = F->getFunctionType(); 207 unsigned NumActualArgs = I.arg_size(); 208 209 Check(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs 210 : FT->getNumParams() == NumActualArgs, 211 "Undefined behavior: Call argument count mismatches callee " 212 "argument count", 213 &I); 214 215 Check(FT->getReturnType() == I.getType(), 216 "Undefined behavior: Call return type mismatches " 217 "callee return type", 218 &I); 219 220 // Check argument types (in case the callee was casted) and attributes. 221 // TODO: Verify that caller and callee attributes are compatible. 222 Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); 223 auto AI = I.arg_begin(), AE = I.arg_end(); 224 for (; AI != AE; ++AI) { 225 Value *Actual = *AI; 226 if (PI != PE) { 227 Argument *Formal = &*PI++; 228 Check(Formal->getType() == Actual->getType(), 229 "Undefined behavior: Call argument type mismatches " 230 "callee parameter type", 231 &I); 232 233 // Check that noalias arguments don't alias other arguments. This is 234 // not fully precise because we don't know the sizes of the dereferenced 235 // memory regions. 236 if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) { 237 AttributeList PAL = I.getAttributes(); 238 unsigned ArgNo = 0; 239 for (auto *BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) { 240 // Skip ByVal arguments since they will be memcpy'd to the callee's 241 // stack so we're not really passing the pointer anyway. 242 if (PAL.hasParamAttr(ArgNo, Attribute::ByVal)) 243 continue; 244 // If both arguments are readonly, they have no dependence. 245 if (Formal->onlyReadsMemory() && I.onlyReadsMemory(ArgNo)) 246 continue; 247 // Skip readnone arguments since those are guaranteed not to be 248 // dereferenced anyway. 249 if (I.doesNotAccessMemory(ArgNo)) 250 continue; 251 if (AI != BI && (*BI)->getType()->isPointerTy() && 252 !isa<ConstantPointerNull>(*BI)) { 253 AliasResult Result = AA->alias(*AI, *BI); 254 Check(Result != AliasResult::MustAlias && 255 Result != AliasResult::PartialAlias, 256 "Unusual: noalias argument aliases another argument", &I); 257 } 258 } 259 } 260 261 // Check that an sret argument points to valid memory. 262 if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { 263 Type *Ty = Formal->getParamStructRetType(); 264 MemoryLocation Loc( 265 Actual, LocationSize::precise(DL->getTypeStoreSize(Ty))); 266 visitMemoryReference(I, Loc, DL->getABITypeAlign(Ty), Ty, 267 MemRef::Read | MemRef::Write); 268 } 269 270 // Check that ABI attributes for the function and call-site match. 271 unsigned ArgNo = AI->getOperandNo(); 272 Attribute::AttrKind ABIAttributes[] = { 273 Attribute::ZExt, Attribute::SExt, Attribute::InReg, 274 Attribute::ByVal, Attribute::ByRef, Attribute::InAlloca, 275 Attribute::Preallocated, Attribute::StructRet}; 276 AttributeList CallAttrs = I.getAttributes(); 277 for (Attribute::AttrKind Attr : ABIAttributes) { 278 Attribute CallAttr = CallAttrs.getParamAttr(ArgNo, Attr); 279 Attribute FnAttr = F->getParamAttribute(ArgNo, Attr); 280 Check(CallAttr.isValid() == FnAttr.isValid(), 281 Twine("Undefined behavior: ABI attribute ") + 282 Attribute::getNameFromAttrKind(Attr) + 283 " not present on both function and call-site", 284 &I); 285 if (CallAttr.isValid() && FnAttr.isValid()) { 286 Check(CallAttr == FnAttr, 287 Twine("Undefined behavior: ABI attribute ") + 288 Attribute::getNameFromAttrKind(Attr) + 289 " does not have same argument for function and call-site", 290 &I); 291 } 292 } 293 } 294 } 295 } 296 297 if (const auto *CI = dyn_cast<CallInst>(&I)) { 298 if (CI->isTailCall()) { 299 const AttributeList &PAL = CI->getAttributes(); 300 unsigned ArgNo = 0; 301 for (Value *Arg : I.args()) { 302 // Skip ByVal arguments since they will be memcpy'd to the callee's 303 // stack anyway. 304 if (PAL.hasParamAttr(ArgNo++, Attribute::ByVal)) 305 continue; 306 Value *Obj = findValue(Arg, /*OffsetOk=*/true); 307 Check(!isa<AllocaInst>(Obj), 308 "Undefined behavior: Call with \"tail\" keyword references " 309 "alloca", 310 &I); 311 } 312 } 313 } 314 315 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) 316 switch (II->getIntrinsicID()) { 317 default: 318 break; 319 320 // TODO: Check more intrinsics 321 322 case Intrinsic::memcpy: 323 case Intrinsic::memcpy_inline: { 324 MemCpyInst *MCI = cast<MemCpyInst>(&I); 325 visitMemoryReference(I, MemoryLocation::getForDest(MCI), 326 MCI->getDestAlign(), nullptr, MemRef::Write); 327 visitMemoryReference(I, MemoryLocation::getForSource(MCI), 328 MCI->getSourceAlign(), nullptr, MemRef::Read); 329 330 // Check that the memcpy arguments don't overlap. The AliasAnalysis API 331 // isn't expressive enough for what we really want to do. Known partial 332 // overlap is not distinguished from the case where nothing is known. 333 auto Size = LocationSize::afterPointer(); 334 if (const ConstantInt *Len = 335 dyn_cast<ConstantInt>(findValue(MCI->getLength(), 336 /*OffsetOk=*/false))) 337 if (Len->getValue().isIntN(32)) 338 Size = LocationSize::precise(Len->getValue().getZExtValue()); 339 Check(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != 340 AliasResult::MustAlias, 341 "Undefined behavior: memcpy source and destination overlap", &I); 342 break; 343 } 344 case Intrinsic::memmove: { 345 MemMoveInst *MMI = cast<MemMoveInst>(&I); 346 visitMemoryReference(I, MemoryLocation::getForDest(MMI), 347 MMI->getDestAlign(), nullptr, MemRef::Write); 348 visitMemoryReference(I, MemoryLocation::getForSource(MMI), 349 MMI->getSourceAlign(), nullptr, MemRef::Read); 350 break; 351 } 352 case Intrinsic::memset: { 353 MemSetInst *MSI = cast<MemSetInst>(&I); 354 visitMemoryReference(I, MemoryLocation::getForDest(MSI), 355 MSI->getDestAlign(), nullptr, MemRef::Write); 356 break; 357 } 358 case Intrinsic::memset_inline: { 359 MemSetInlineInst *MSII = cast<MemSetInlineInst>(&I); 360 visitMemoryReference(I, MemoryLocation::getForDest(MSII), 361 MSII->getDestAlign(), nullptr, MemRef::Write); 362 break; 363 } 364 365 case Intrinsic::vastart: 366 // vastart in non-varargs function is rejected by the verifier 367 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), 368 std::nullopt, nullptr, MemRef::Read | MemRef::Write); 369 break; 370 case Intrinsic::vacopy: 371 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), 372 std::nullopt, nullptr, MemRef::Write); 373 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 1, TLI), 374 std::nullopt, nullptr, MemRef::Read); 375 break; 376 case Intrinsic::vaend: 377 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), 378 std::nullopt, nullptr, MemRef::Read | MemRef::Write); 379 break; 380 381 case Intrinsic::stackrestore: 382 // Stackrestore doesn't read or write memory, but it sets the 383 // stack pointer, which the compiler may read from or write to 384 // at any time, so check it for both readability and writeability. 385 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), 386 std::nullopt, nullptr, MemRef::Read | MemRef::Write); 387 break; 388 case Intrinsic::get_active_lane_mask: 389 if (auto *TripCount = dyn_cast<ConstantInt>(I.getArgOperand(1))) 390 Check(!TripCount->isZero(), 391 "get_active_lane_mask: operand #2 " 392 "must be greater than 0", 393 &I); 394 break; 395 } 396 } 397 398 void Lint::visitReturnInst(ReturnInst &I) { 399 Function *F = I.getParent()->getParent(); 400 Check(!F->doesNotReturn(), 401 "Unusual: Return statement in function with noreturn attribute", &I); 402 403 if (Value *V = I.getReturnValue()) { 404 Value *Obj = findValue(V, /*OffsetOk=*/true); 405 Check(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I); 406 } 407 } 408 409 // TODO: Check that the reference is in bounds. 410 // TODO: Check readnone/readonly function attributes. 411 void Lint::visitMemoryReference(Instruction &I, const MemoryLocation &Loc, 412 MaybeAlign Align, Type *Ty, unsigned Flags) { 413 // If no memory is being referenced, it doesn't matter if the pointer 414 // is valid. 415 if (Loc.Size.isZero()) 416 return; 417 418 Value *Ptr = const_cast<Value *>(Loc.Ptr); 419 Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); 420 Check(!isa<ConstantPointerNull>(UnderlyingObject), 421 "Undefined behavior: Null pointer dereference", &I); 422 Check(!isa<UndefValue>(UnderlyingObject), 423 "Undefined behavior: Undef pointer dereference", &I); 424 Check(!isa<ConstantInt>(UnderlyingObject) || 425 !cast<ConstantInt>(UnderlyingObject)->isMinusOne(), 426 "Unusual: All-ones pointer dereference", &I); 427 Check(!isa<ConstantInt>(UnderlyingObject) || 428 !cast<ConstantInt>(UnderlyingObject)->isOne(), 429 "Unusual: Address one pointer dereference", &I); 430 431 if (Flags & MemRef::Write) { 432 if (TT.isAMDGPU()) 433 Check(!AMDGPU::isConstantAddressSpace( 434 UnderlyingObject->getType()->getPointerAddressSpace()), 435 "Undefined behavior: Write to memory in const addrspace", &I); 436 437 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject)) 438 Check(!GV->isConstant(), "Undefined behavior: Write to read-only memory", 439 &I); 440 Check(!isa<Function>(UnderlyingObject) && 441 !isa<BlockAddress>(UnderlyingObject), 442 "Undefined behavior: Write to text section", &I); 443 } 444 if (Flags & MemRef::Read) { 445 Check(!isa<Function>(UnderlyingObject), "Unusual: Load from function body", 446 &I); 447 Check(!isa<BlockAddress>(UnderlyingObject), 448 "Undefined behavior: Load from block address", &I); 449 } 450 if (Flags & MemRef::Callee) { 451 Check(!isa<BlockAddress>(UnderlyingObject), 452 "Undefined behavior: Call to block address", &I); 453 } 454 if (Flags & MemRef::Branchee) { 455 Check(!isa<Constant>(UnderlyingObject) || 456 isa<BlockAddress>(UnderlyingObject), 457 "Undefined behavior: Branch to non-blockaddress", &I); 458 } 459 460 // Check for buffer overflows and misalignment. 461 // Only handles memory references that read/write something simple like an 462 // alloca instruction or a global variable. 463 int64_t Offset = 0; 464 if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *DL)) { 465 // OK, so the access is to a constant offset from Ptr. Check that Ptr is 466 // something we can handle and if so extract the size of this base object 467 // along with its alignment. 468 uint64_t BaseSize = MemoryLocation::UnknownSize; 469 MaybeAlign BaseAlign; 470 471 if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { 472 Type *ATy = AI->getAllocatedType(); 473 if (!AI->isArrayAllocation() && ATy->isSized() && !ATy->isScalableTy()) 474 BaseSize = DL->getTypeAllocSize(ATy).getFixedValue(); 475 BaseAlign = AI->getAlign(); 476 } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { 477 // If the global may be defined differently in another compilation unit 478 // then don't warn about funky memory accesses. 479 if (GV->hasDefinitiveInitializer()) { 480 Type *GTy = GV->getValueType(); 481 if (GTy->isSized()) 482 BaseSize = DL->getTypeAllocSize(GTy); 483 BaseAlign = GV->getAlign(); 484 if (!BaseAlign && GTy->isSized()) 485 BaseAlign = DL->getABITypeAlign(GTy); 486 } 487 } 488 489 // Accesses from before the start or after the end of the object are not 490 // defined. 491 Check(!Loc.Size.hasValue() || Loc.Size.isScalable() || 492 BaseSize == MemoryLocation::UnknownSize || 493 (Offset >= 0 && Offset + Loc.Size.getValue() <= BaseSize), 494 "Undefined behavior: Buffer overflow", &I); 495 496 // Accesses that say that the memory is more aligned than it is are not 497 // defined. 498 if (!Align && Ty && Ty->isSized()) 499 Align = DL->getABITypeAlign(Ty); 500 if (BaseAlign && Align) 501 Check(*Align <= commonAlignment(*BaseAlign, Offset), 502 "Undefined behavior: Memory reference address is misaligned", &I); 503 } 504 } 505 506 void Lint::visitLoadInst(LoadInst &I) { 507 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), I.getType(), 508 MemRef::Read); 509 } 510 511 void Lint::visitStoreInst(StoreInst &I) { 512 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), 513 I.getOperand(0)->getType(), MemRef::Write); 514 } 515 516 void Lint::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { 517 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), 518 I.getOperand(0)->getType(), MemRef::Write); 519 } 520 521 void Lint::visitAtomicRMWInst(AtomicRMWInst &I) { 522 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), 523 I.getOperand(0)->getType(), MemRef::Write); 524 } 525 526 void Lint::visitXor(BinaryOperator &I) { 527 Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), 528 "Undefined result: xor(undef, undef)", &I); 529 } 530 531 void Lint::visitSub(BinaryOperator &I) { 532 Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), 533 "Undefined result: sub(undef, undef)", &I); 534 } 535 536 void Lint::visitLShr(BinaryOperator &I) { 537 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1), 538 /*OffsetOk=*/false))) 539 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 540 "Undefined result: Shift count out of range", &I); 541 } 542 543 void Lint::visitAShr(BinaryOperator &I) { 544 if (ConstantInt *CI = 545 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 546 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 547 "Undefined result: Shift count out of range", &I); 548 } 549 550 void Lint::visitShl(BinaryOperator &I) { 551 if (ConstantInt *CI = 552 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 553 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 554 "Undefined result: Shift count out of range", &I); 555 } 556 557 static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, 558 AssumptionCache *AC) { 559 // Assume undef could be zero. 560 if (isa<UndefValue>(V)) 561 return true; 562 563 VectorType *VecTy = dyn_cast<VectorType>(V->getType()); 564 if (!VecTy) { 565 KnownBits Known = 566 computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT); 567 return Known.isZero(); 568 } 569 570 // Per-component check doesn't work with zeroinitializer 571 Constant *C = dyn_cast<Constant>(V); 572 if (!C) 573 return false; 574 575 if (C->isZeroValue()) 576 return true; 577 578 // For a vector, KnownZero will only be true if all values are zero, so check 579 // this per component 580 for (unsigned I = 0, N = cast<FixedVectorType>(VecTy)->getNumElements(); 581 I != N; ++I) { 582 Constant *Elem = C->getAggregateElement(I); 583 if (isa<UndefValue>(Elem)) 584 return true; 585 586 KnownBits Known = computeKnownBits(Elem, DL); 587 if (Known.isZero()) 588 return true; 589 } 590 591 return false; 592 } 593 594 void Lint::visitSDiv(BinaryOperator &I) { 595 Check(!isZero(I.getOperand(1), I.getDataLayout(), DT, AC), 596 "Undefined behavior: Division by zero", &I); 597 } 598 599 void Lint::visitUDiv(BinaryOperator &I) { 600 Check(!isZero(I.getOperand(1), I.getDataLayout(), DT, AC), 601 "Undefined behavior: Division by zero", &I); 602 } 603 604 void Lint::visitSRem(BinaryOperator &I) { 605 Check(!isZero(I.getOperand(1), I.getDataLayout(), DT, AC), 606 "Undefined behavior: Division by zero", &I); 607 } 608 609 void Lint::visitURem(BinaryOperator &I) { 610 Check(!isZero(I.getOperand(1), I.getDataLayout(), DT, AC), 611 "Undefined behavior: Division by zero", &I); 612 } 613 614 void Lint::visitAllocaInst(AllocaInst &I) { 615 if (isa<ConstantInt>(I.getArraySize())) 616 // This isn't undefined behavior, it's just an obvious pessimization. 617 Check(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), 618 "Pessimization: Static alloca outside of entry block", &I); 619 620 // TODO: Check for an unusual size (MSB set?) 621 } 622 623 void Lint::visitVAArgInst(VAArgInst &I) { 624 visitMemoryReference(I, MemoryLocation::get(&I), std::nullopt, nullptr, 625 MemRef::Read | MemRef::Write); 626 } 627 628 void Lint::visitIndirectBrInst(IndirectBrInst &I) { 629 visitMemoryReference(I, MemoryLocation::getAfter(I.getAddress()), 630 std::nullopt, nullptr, MemRef::Branchee); 631 632 Check(I.getNumDestinations() != 0, 633 "Undefined behavior: indirectbr with no destinations", &I); 634 } 635 636 void Lint::visitExtractElementInst(ExtractElementInst &I) { 637 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(), 638 /*OffsetOk=*/false))) { 639 ElementCount EC = I.getVectorOperandType()->getElementCount(); 640 Check(EC.isScalable() || CI->getValue().ult(EC.getFixedValue()), 641 "Undefined result: extractelement index out of range", &I); 642 } 643 } 644 645 void Lint::visitInsertElementInst(InsertElementInst &I) { 646 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2), 647 /*OffsetOk=*/false))) { 648 ElementCount EC = I.getType()->getElementCount(); 649 Check(EC.isScalable() || CI->getValue().ult(EC.getFixedValue()), 650 "Undefined result: insertelement index out of range", &I); 651 } 652 } 653 654 void Lint::visitUnreachableInst(UnreachableInst &I) { 655 // This isn't undefined behavior, it's merely suspicious. 656 Check(&I == &I.getParent()->front() || 657 std::prev(I.getIterator())->mayHaveSideEffects(), 658 "Unusual: unreachable immediately preceded by instruction without " 659 "side effects", 660 &I); 661 } 662 663 /// findValue - Look through bitcasts and simple memory reference patterns 664 /// to identify an equivalent, but more informative, value. If OffsetOk 665 /// is true, look through getelementptrs with non-zero offsets too. 666 /// 667 /// Most analysis passes don't require this logic, because instcombine 668 /// will simplify most of these kinds of things away. But it's a goal of 669 /// this Lint pass to be useful even on non-optimized IR. 670 Value *Lint::findValue(Value *V, bool OffsetOk) const { 671 SmallPtrSet<Value *, 4> Visited; 672 return findValueImpl(V, OffsetOk, Visited); 673 } 674 675 /// findValueImpl - Implementation helper for findValue. 676 Value *Lint::findValueImpl(Value *V, bool OffsetOk, 677 SmallPtrSetImpl<Value *> &Visited) const { 678 // Detect self-referential values. 679 if (!Visited.insert(V).second) 680 return PoisonValue::get(V->getType()); 681 682 // TODO: Look through sext or zext cast, when the result is known to 683 // be interpreted as signed or unsigned, respectively. 684 // TODO: Look through eliminable cast pairs. 685 // TODO: Look through calls with unique return values. 686 // TODO: Look through vector insert/extract/shuffle. 687 V = OffsetOk ? getUnderlyingObject(V) : V->stripPointerCasts(); 688 if (LoadInst *L = dyn_cast<LoadInst>(V)) { 689 BasicBlock::iterator BBI = L->getIterator(); 690 BasicBlock *BB = L->getParent(); 691 SmallPtrSet<BasicBlock *, 4> VisitedBlocks; 692 BatchAAResults BatchAA(*AA); 693 for (;;) { 694 if (!VisitedBlocks.insert(BB).second) 695 break; 696 if (Value *U = 697 FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, &BatchAA)) 698 return findValueImpl(U, OffsetOk, Visited); 699 if (BBI != BB->begin()) 700 break; 701 BB = BB->getUniquePredecessor(); 702 if (!BB) 703 break; 704 BBI = BB->end(); 705 } 706 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 707 if (Value *W = PN->hasConstantValue()) 708 return findValueImpl(W, OffsetOk, Visited); 709 } else if (CastInst *CI = dyn_cast<CastInst>(V)) { 710 if (CI->isNoopCast(*DL)) 711 return findValueImpl(CI->getOperand(0), OffsetOk, Visited); 712 } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { 713 if (Value *W = 714 FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices())) 715 if (W != V) 716 return findValueImpl(W, OffsetOk, Visited); 717 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 718 // Same as above, but for ConstantExpr instead of Instruction. 719 if (Instruction::isCast(CE->getOpcode())) { 720 if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), 721 CE->getOperand(0)->getType(), CE->getType(), 722 *DL)) 723 return findValueImpl(CE->getOperand(0), OffsetOk, Visited); 724 } 725 } 726 727 // As a last resort, try SimplifyInstruction or constant folding. 728 if (Instruction *Inst = dyn_cast<Instruction>(V)) { 729 if (Value *W = simplifyInstruction(Inst, {*DL, TLI, DT, AC})) 730 return findValueImpl(W, OffsetOk, Visited); 731 } else if (auto *C = dyn_cast<Constant>(V)) { 732 Value *W = ConstantFoldConstant(C, *DL, TLI); 733 if (W != V) 734 return findValueImpl(W, OffsetOk, Visited); 735 } 736 737 return V; 738 } 739 740 PreservedAnalyses LintPass::run(Function &F, FunctionAnalysisManager &AM) { 741 auto *Mod = F.getParent(); 742 auto *DL = &F.getDataLayout(); 743 auto *AA = &AM.getResult<AAManager>(F); 744 auto *AC = &AM.getResult<AssumptionAnalysis>(F); 745 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); 746 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F); 747 Lint L(Mod, DL, AA, AC, DT, TLI); 748 L.visit(F); 749 dbgs() << L.MessagesStr.str(); 750 if (LintAbortOnError && !L.MessagesStr.str().empty()) 751 report_fatal_error(Twine("Linter found errors, aborting. (enabled by --") + 752 LintAbortOnErrorArgName + ")", 753 false); 754 return PreservedAnalyses::all(); 755 } 756 757 //===----------------------------------------------------------------------===// 758 // Implement the public interfaces to this file... 759 //===----------------------------------------------------------------------===// 760 761 /// lintFunction - Check a function for errors, printing messages on stderr. 762 /// 763 void llvm::lintFunction(const Function &f) { 764 Function &F = const_cast<Function &>(f); 765 assert(!F.isDeclaration() && "Cannot lint external functions"); 766 767 FunctionAnalysisManager FAM; 768 FAM.registerPass([&] { return TargetLibraryAnalysis(); }); 769 FAM.registerPass([&] { return DominatorTreeAnalysis(); }); 770 FAM.registerPass([&] { return AssumptionAnalysis(); }); 771 FAM.registerPass([&] { 772 AAManager AA; 773 AA.registerFunctionAnalysis<BasicAA>(); 774 AA.registerFunctionAnalysis<ScopedNoAliasAA>(); 775 AA.registerFunctionAnalysis<TypeBasedAA>(); 776 return AA; 777 }); 778 LintPass().run(F, FAM); 779 } 780 781 /// lintModule - Check a module for errors, printing messages on stderr. 782 /// 783 void llvm::lintModule(const Module &M) { 784 for (const Function &F : M) { 785 if (!F.isDeclaration()) 786 lintFunction(F); 787 } 788 } 789