1 //===-- RandomIRBuilder.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/FuzzMutate/RandomIRBuilder.h" 10 #include "llvm/ADT/STLExtras.h" 11 #include "llvm/FuzzMutate/OpDescriptor.h" 12 #include "llvm/FuzzMutate/Random.h" 13 #include "llvm/IR/BasicBlock.h" 14 #include "llvm/IR/Constants.h" 15 #include "llvm/IR/DataLayout.h" 16 #include "llvm/IR/Dominators.h" 17 #include "llvm/IR/Function.h" 18 #include "llvm/IR/Instructions.h" 19 #include "llvm/IR/IntrinsicInst.h" 20 #include "llvm/IR/Module.h" 21 22 using namespace llvm; 23 using namespace fuzzerop; 24 25 /// Return a vector of Blocks that dominates this block, excluding current 26 /// block. 27 static std::vector<BasicBlock *> getDominators(BasicBlock *BB) { 28 std::vector<BasicBlock *> ret; 29 DominatorTree DT(*BB->getParent()); 30 DomTreeNode *Node = DT.getNode(BB); 31 // It's possible that an orphan block is not in the dom tree. In that case we 32 // just return nothing. 33 if (!Node) 34 return ret; 35 Node = Node->getIDom(); 36 while (Node && Node->getBlock()) { 37 ret.push_back(Node->getBlock()); 38 // Get parent block. 39 Node = Node->getIDom(); 40 } 41 return ret; 42 } 43 44 /// Return a vector of Blocks that is dominated by this block, excluding current 45 /// block 46 static std::vector<BasicBlock *> getDominatees(BasicBlock *BB) { 47 DominatorTree DT(*BB->getParent()); 48 std::vector<BasicBlock *> ret; 49 DomTreeNode *Parent = DT.getNode(BB); 50 // It's possible that an orphan block is not in the dom tree. In that case we 51 // just return nothing. 52 if (!Parent) 53 return ret; 54 for (DomTreeNode *Child : Parent->children()) 55 ret.push_back(Child->getBlock()); 56 uint64_t Idx = 0; 57 while (Idx < ret.size()) { 58 DomTreeNode *Node = DT[ret[Idx]]; 59 Idx++; 60 for (DomTreeNode *Child : Node->children()) 61 ret.push_back(Child->getBlock()); 62 } 63 return ret; 64 } 65 66 AllocaInst *RandomIRBuilder::createStackMemory(Function *F, Type *Ty, 67 Value *Init) { 68 /// TODO: For all Allocas, maybe allocate an array. 69 BasicBlock *EntryBB = &F->getEntryBlock(); 70 const DataLayout &DL = F->getDataLayout(); 71 AllocaInst *Alloca = new AllocaInst(Ty, DL.getAllocaAddrSpace(), "A", 72 EntryBB->getFirstInsertionPt()); 73 if (Init) 74 new StoreInst(Init, Alloca, std::next(Alloca->getIterator())); 75 return Alloca; 76 } 77 78 std::pair<GlobalVariable *, bool> 79 RandomIRBuilder::findOrCreateGlobalVariable(Module *M, ArrayRef<Value *> Srcs, 80 fuzzerop::SourcePred Pred) { 81 auto MatchesPred = [&Srcs, &Pred](GlobalVariable *GV) { 82 // Can't directly compare GV's type, as it would be a pointer to the actual 83 // type. 84 return Pred.matches(Srcs, PoisonValue::get(GV->getValueType())); 85 }; 86 bool DidCreate = false; 87 SmallVector<GlobalVariable *, 4> GlobalVars; 88 for (GlobalVariable &GV : M->globals()) { 89 GlobalVars.push_back(&GV); 90 } 91 auto RS = makeSampler(Rand, make_filter_range(GlobalVars, MatchesPred)); 92 RS.sample(nullptr, 1); 93 GlobalVariable *GV = RS.getSelection(); 94 if (!GV) { 95 DidCreate = true; 96 using LinkageTypes = GlobalVariable::LinkageTypes; 97 auto TRS = makeSampler<Constant *>(Rand); 98 TRS.sample(Pred.generate(Srcs, KnownTypes)); 99 Constant *Init = TRS.getSelection(); 100 Type *Ty = Init->getType(); 101 GV = new GlobalVariable(*M, Ty, false, LinkageTypes::ExternalLinkage, Init, 102 "G", nullptr, 103 GlobalValue::ThreadLocalMode::NotThreadLocal, 104 M->getDataLayout().getDefaultGlobalsAddressSpace()); 105 } 106 return {GV, DidCreate}; 107 } 108 109 Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB, 110 ArrayRef<Instruction *> Insts) { 111 return findOrCreateSource(BB, Insts, {}, anyType()); 112 } 113 114 Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB, 115 ArrayRef<Instruction *> Insts, 116 ArrayRef<Value *> Srcs, 117 SourcePred Pred, 118 bool allowConstant) { 119 auto MatchesPred = [&Srcs, &Pred](Value *V) { return Pred.matches(Srcs, V); }; 120 SmallVector<uint64_t, 8> SrcTys; 121 for (uint64_t i = 0; i < EndOfValueSource; i++) 122 SrcTys.push_back(i); 123 std::shuffle(SrcTys.begin(), SrcTys.end(), Rand); 124 for (uint64_t SrcTy : SrcTys) { 125 switch (SrcTy) { 126 case SrcFromInstInCurBlock: { 127 auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred)); 128 if (!RS.isEmpty()) { 129 return RS.getSelection(); 130 } 131 break; 132 } 133 case FunctionArgument: { 134 Function *F = BB.getParent(); 135 SmallVector<Argument *, 8> Args; 136 for (uint64_t i = 0; i < F->arg_size(); i++) { 137 Args.push_back(F->getArg(i)); 138 } 139 auto RS = makeSampler(Rand, make_filter_range(Args, MatchesPred)); 140 if (!RS.isEmpty()) { 141 return RS.getSelection(); 142 } 143 break; 144 } 145 case InstInDominator: { 146 auto Dominators = getDominators(&BB); 147 std::shuffle(Dominators.begin(), Dominators.end(), Rand); 148 for (BasicBlock *Dom : Dominators) { 149 SmallVector<Instruction *, 16> Instructions; 150 for (Instruction &I : *Dom) { 151 Instructions.push_back(&I); 152 } 153 auto RS = 154 makeSampler(Rand, make_filter_range(Instructions, MatchesPred)); 155 // Also consider choosing no source, meaning we want a new one. 156 if (!RS.isEmpty()) { 157 return RS.getSelection(); 158 } 159 } 160 break; 161 } 162 case SrcFromGlobalVariable: { 163 Module *M = BB.getParent()->getParent(); 164 auto [GV, DidCreate] = findOrCreateGlobalVariable(M, Srcs, Pred); 165 Type *Ty = GV->getValueType(); 166 LoadInst *LoadGV = nullptr; 167 if (BB.getTerminator()) { 168 LoadGV = new LoadInst(Ty, GV, "LGV", BB.getFirstInsertionPt()); 169 } else { 170 LoadGV = new LoadInst(Ty, GV, "LGV", &BB); 171 } 172 // Because we might be generating new values, we have to check if it 173 // matches again. 174 if (DidCreate) { 175 if (Pred.matches(Srcs, LoadGV)) { 176 return LoadGV; 177 } 178 LoadGV->eraseFromParent(); 179 // If no one is using this GlobalVariable, delete it too. 180 if (GV->use_empty()) { 181 GV->eraseFromParent(); 182 } 183 } 184 break; 185 } 186 case NewConstOrStack: { 187 return newSource(BB, Insts, Srcs, Pred, allowConstant); 188 } 189 default: 190 case EndOfValueSource: { 191 llvm_unreachable("EndOfValueSource executed"); 192 } 193 } 194 } 195 llvm_unreachable("Can't find a source"); 196 } 197 198 Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts, 199 ArrayRef<Value *> Srcs, SourcePred Pred, 200 bool allowConstant) { 201 // Generate some constants to choose from. 202 auto RS = makeSampler<Value *>(Rand); 203 RS.sample(Pred.generate(Srcs, KnownTypes)); 204 205 // If we can find a pointer to load from, use it half the time. 206 Value *Ptr = findPointer(BB, Insts); 207 if (Ptr) { 208 // Create load from the chosen pointer 209 auto IP = BB.getFirstInsertionPt(); 210 if (auto *I = dyn_cast<Instruction>(Ptr)) { 211 IP = ++I->getIterator(); 212 assert(IP != BB.end() && "guaranteed by the findPointer"); 213 } 214 // Pick the type independently. 215 Type *AccessTy = RS.getSelection()->getType(); 216 auto *NewLoad = new LoadInst(AccessTy, Ptr, "L", IP); 217 218 // Only sample this load if it really matches the descriptor 219 if (Pred.matches(Srcs, NewLoad)) 220 RS.sample(NewLoad, RS.totalWeight()); 221 else 222 NewLoad->eraseFromParent(); 223 } 224 225 Value *newSrc = RS.getSelection(); 226 // Generate a stack alloca and store the constant to it if constant is not 227 // allowed, our hope is that later mutations can generate some values and 228 // store to this placeholder. 229 if (!allowConstant && isa<Constant>(newSrc)) { 230 Type *Ty = newSrc->getType(); 231 Function *F = BB.getParent(); 232 AllocaInst *Alloca = createStackMemory(F, Ty, newSrc); 233 if (BB.getTerminator()) { 234 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", 235 BB.getTerminator()->getIterator()); 236 } else { 237 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", &BB); 238 } 239 } 240 return newSrc; 241 } 242 243 static bool isCompatibleReplacement(const Instruction *I, const Use &Operand, 244 const Value *Replacement) { 245 unsigned int OperandNo = Operand.getOperandNo(); 246 if (Operand->getType() != Replacement->getType()) 247 return false; 248 switch (I->getOpcode()) { 249 case Instruction::GetElementPtr: 250 case Instruction::ExtractElement: 251 case Instruction::ExtractValue: 252 // TODO: We could potentially validate these, but for now just leave indices 253 // alone. 254 if (OperandNo >= 1) 255 return false; 256 break; 257 case Instruction::InsertValue: 258 case Instruction::InsertElement: 259 case Instruction::ShuffleVector: 260 if (OperandNo >= 2) 261 return false; 262 break; 263 // For Br/Switch, we only try to modify the 1st Operand (condition). 264 // Modify other operands, like switch case may accidently change case from 265 // ConstantInt to a register, which is illegal. 266 case Instruction::Switch: 267 case Instruction::Br: 268 if (OperandNo >= 1) 269 return false; 270 break; 271 case Instruction::Call: 272 case Instruction::Invoke: 273 case Instruction::CallBr: { 274 const Function *Callee = cast<CallBase>(I)->getCalledFunction(); 275 // If it's an indirect call, give up. 276 if (!Callee) 277 return false; 278 // If callee is not an intrinsic, operand 0 is the function to be called. 279 // Since we cannot assume that the replacement is a function pointer, 280 // we give up. 281 if (!Callee->getIntrinsicID() && OperandNo == 0) 282 return false; 283 return !Callee->hasParamAttribute(OperandNo, Attribute::ImmArg); 284 } 285 default: 286 break; 287 } 288 return true; 289 } 290 291 Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB, 292 ArrayRef<Instruction *> Insts, 293 Value *V) { 294 SmallVector<uint64_t, 8> SinkTys; 295 for (uint64_t i = 0; i < EndOfValueSink; i++) 296 SinkTys.push_back(i); 297 std::shuffle(SinkTys.begin(), SinkTys.end(), Rand); 298 auto findSinkAndConnect = 299 [this, V](ArrayRef<Instruction *> Instructions) -> Instruction * { 300 auto RS = makeSampler<Use *>(Rand); 301 for (auto &I : Instructions) { 302 for (Use &U : I->operands()) 303 if (isCompatibleReplacement(I, U, V)) 304 RS.sample(&U, 1); 305 } 306 if (!RS.isEmpty()) { 307 Use *Sink = RS.getSelection(); 308 User *U = Sink->getUser(); 309 unsigned OpNo = Sink->getOperandNo(); 310 U->setOperand(OpNo, V); 311 return cast<Instruction>(U); 312 } 313 return nullptr; 314 }; 315 Instruction *Sink = nullptr; 316 for (uint64_t SinkTy : SinkTys) { 317 switch (SinkTy) { 318 case SinkToInstInCurBlock: 319 Sink = findSinkAndConnect(Insts); 320 if (Sink) 321 return Sink; 322 break; 323 case PointersInDominator: { 324 auto Dominators = getDominators(&BB); 325 std::shuffle(Dominators.begin(), Dominators.end(), Rand); 326 for (BasicBlock *Dom : Dominators) { 327 for (Instruction &I : *Dom) { 328 if (isa<PointerType>(I.getType())) 329 return new StoreInst(V, &I, Insts.back()->getIterator()); 330 } 331 } 332 break; 333 } 334 case InstInDominatee: { 335 auto Dominatees = getDominatees(&BB); 336 std::shuffle(Dominatees.begin(), Dominatees.end(), Rand); 337 for (BasicBlock *Dominee : Dominatees) { 338 std::vector<Instruction *> Instructions; 339 for (Instruction &I : *Dominee) 340 Instructions.push_back(&I); 341 Sink = findSinkAndConnect(Instructions); 342 if (Sink) { 343 return Sink; 344 } 345 } 346 break; 347 } 348 case NewStore: 349 /// TODO: allocate a new stack memory. 350 return newSink(BB, Insts, V); 351 case SinkToGlobalVariable: { 352 Module *M = BB.getParent()->getParent(); 353 auto [GV, DidCreate] = 354 findOrCreateGlobalVariable(M, {}, fuzzerop::onlyType(V->getType())); 355 return new StoreInst(V, GV, Insts.back()->getIterator()); 356 } 357 case EndOfValueSink: 358 default: 359 llvm_unreachable("EndOfValueSink executed"); 360 } 361 } 362 llvm_unreachable("Can't find a sink"); 363 } 364 365 Instruction *RandomIRBuilder::newSink(BasicBlock &BB, 366 ArrayRef<Instruction *> Insts, Value *V) { 367 Value *Ptr = findPointer(BB, Insts); 368 if (!Ptr) { 369 if (uniform(Rand, 0, 1)) { 370 Type *Ty = V->getType(); 371 Ptr = createStackMemory(BB.getParent(), Ty, PoisonValue::get(Ty)); 372 } else { 373 Ptr = PoisonValue::get(PointerType::get(V->getContext(), 0)); 374 } 375 } 376 377 return new StoreInst(V, Ptr, Insts.back()->getIterator()); 378 } 379 380 Value *RandomIRBuilder::findPointer(BasicBlock &BB, 381 ArrayRef<Instruction *> Insts) { 382 auto IsMatchingPtr = [](Instruction *Inst) { 383 // Invoke instructions sometimes produce valid pointers but currently 384 // we can't insert loads or stores from them 385 if (Inst->isTerminator()) 386 return false; 387 388 return Inst->getType()->isPointerTy(); 389 }; 390 if (auto RS = makeSampler(Rand, make_filter_range(Insts, IsMatchingPtr))) 391 return RS.getSelection(); 392 return nullptr; 393 } 394 395 Type *RandomIRBuilder::randomType() { 396 uint64_t TyIdx = uniform<uint64_t>(Rand, 0, KnownTypes.size() - 1); 397 return KnownTypes[TyIdx]; 398 } 399 400 Function *RandomIRBuilder::createFunctionDeclaration(Module &M, 401 uint64_t ArgNum) { 402 Type *RetType = randomType(); 403 404 SmallVector<Type *, 2> Args; 405 for (uint64_t i = 0; i < ArgNum; i++) { 406 Args.push_back(randomType()); 407 } 408 409 Function *F = Function::Create(FunctionType::get(RetType, Args, 410 /*isVarArg=*/false), 411 GlobalValue::ExternalLinkage, "f", &M); 412 return F; 413 } 414 Function *RandomIRBuilder::createFunctionDeclaration(Module &M) { 415 return createFunctionDeclaration( 416 M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum)); 417 } 418 419 Function *RandomIRBuilder::createFunctionDefinition(Module &M, 420 uint64_t ArgNum) { 421 Function *F = this->createFunctionDeclaration(M, ArgNum); 422 423 // TODO: Some arguments and a return value would probably be more 424 // interesting. 425 LLVMContext &Context = M.getContext(); 426 const DataLayout &DL = M.getDataLayout(); 427 BasicBlock *BB = BasicBlock::Create(Context, "BB", F); 428 Type *RetTy = F->getReturnType(); 429 if (RetTy != Type::getVoidTy(Context)) { 430 Instruction *RetAlloca = 431 new AllocaInst(RetTy, DL.getAllocaAddrSpace(), "RP", BB); 432 Instruction *RetLoad = new LoadInst(RetTy, RetAlloca, "", BB); 433 ReturnInst::Create(Context, RetLoad, BB); 434 } else { 435 ReturnInst::Create(Context, BB); 436 } 437 438 return F; 439 } 440 Function *RandomIRBuilder::createFunctionDefinition(Module &M) { 441 return createFunctionDefinition( 442 M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum)); 443 } 444