1 //===-- IRMutator.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/FuzzMutate/IRMutator.h"
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/ADT/SmallSet.h"
12 #include "llvm/Analysis/TargetLibraryInfo.h"
13 #include "llvm/Bitcode/BitcodeReader.h"
14 #include "llvm/Bitcode/BitcodeWriter.h"
15 #include "llvm/FuzzMutate/Operations.h"
16 #include "llvm/FuzzMutate/Random.h"
17 #include "llvm/FuzzMutate/RandomIRBuilder.h"
18 #include "llvm/IR/BasicBlock.h"
19 #include "llvm/IR/FMF.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/InstIterator.h"
22 #include "llvm/IR/Instructions.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/IR/Operator.h"
25 #include "llvm/IR/Verifier.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include "llvm/Support/SourceMgr.h"
28 #include "llvm/Transforms/Scalar/DCE.h"
29 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
30 #include <optional>
31
32 using namespace llvm;
33
createEmptyFunction(Module & M)34 static void createEmptyFunction(Module &M) {
35 // TODO: Some arguments and a return value would probably be more interesting.
36 LLVMContext &Context = M.getContext();
37 Function *F = Function::Create(FunctionType::get(Type::getVoidTy(Context), {},
38 /*isVarArg=*/false),
39 GlobalValue::ExternalLinkage, "f", &M);
40 BasicBlock *BB = BasicBlock::Create(Context, "BB", F);
41 ReturnInst::Create(Context, BB);
42 }
43
mutate(Module & M,RandomIRBuilder & IB)44 void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) {
45 auto RS = makeSampler<Function *>(IB.Rand);
46 for (Function &F : M)
47 if (!F.isDeclaration())
48 RS.sample(&F, /*Weight=*/1);
49
50 if (RS.isEmpty())
51 createEmptyFunction(M);
52 else
53 mutate(*RS.getSelection(), IB);
54 }
55
mutate(Function & F,RandomIRBuilder & IB)56 void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) {
57 mutate(*makeSampler(IB.Rand, make_pointer_range(F)).getSelection(), IB);
58 }
59
mutate(BasicBlock & BB,RandomIRBuilder & IB)60 void IRMutationStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
61 mutate(*makeSampler(IB.Rand, make_pointer_range(BB)).getSelection(), IB);
62 }
63
mutateModule(Module & M,int Seed,size_t CurSize,size_t MaxSize)64 void IRMutator::mutateModule(Module &M, int Seed, size_t CurSize,
65 size_t MaxSize) {
66 std::vector<Type *> Types;
67 for (const auto &Getter : AllowedTypes)
68 Types.push_back(Getter(M.getContext()));
69 RandomIRBuilder IB(Seed, Types);
70
71 auto RS = makeSampler<IRMutationStrategy *>(IB.Rand);
72 for (const auto &Strategy : Strategies)
73 RS.sample(Strategy.get(),
74 Strategy->getWeight(CurSize, MaxSize, RS.totalWeight()));
75 auto Strategy = RS.getSelection();
76
77 Strategy->mutate(M, IB);
78 }
79
eliminateDeadCode(Function & F)80 static void eliminateDeadCode(Function &F) {
81 FunctionPassManager FPM;
82 FPM.addPass(DCEPass());
83 FunctionAnalysisManager FAM;
84 FAM.registerPass([&] { return TargetLibraryAnalysis(); });
85 FAM.registerPass([&] { return PassInstrumentationAnalysis(); });
86 FPM.run(F, FAM);
87 }
88
mutate(Function & F,RandomIRBuilder & IB)89 void InjectorIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
90 IRMutationStrategy::mutate(F, IB);
91 eliminateDeadCode(F);
92 }
93
getDefaultOps()94 std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() {
95 std::vector<fuzzerop::OpDescriptor> Ops;
96 describeFuzzerIntOps(Ops);
97 describeFuzzerFloatOps(Ops);
98 describeFuzzerControlFlowOps(Ops);
99 describeFuzzerPointerOps(Ops);
100 describeFuzzerAggregateOps(Ops);
101 describeFuzzerVectorOps(Ops);
102 return Ops;
103 }
104
105 std::optional<fuzzerop::OpDescriptor>
chooseOperation(Value * Src,RandomIRBuilder & IB)106 InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) {
107 auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) {
108 return Op.SourcePreds[0].matches({}, Src);
109 };
110 auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred));
111 if (RS.isEmpty())
112 return std::nullopt;
113 return *RS;
114 }
115
mutate(BasicBlock & BB,RandomIRBuilder & IB)116 void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
117 SmallVector<Instruction *, 32> Insts;
118 for (auto I = BB.getFirstInsertionPt(), E = BB.end(); I != E; ++I)
119 Insts.push_back(&*I);
120 if (Insts.size() < 1)
121 return;
122
123 // Choose an insertion point for our new instruction.
124 size_t IP = uniform<size_t>(IB.Rand, 0, Insts.size() - 1);
125
126 auto InstsBefore = ArrayRef(Insts).slice(0, IP);
127 auto InstsAfter = ArrayRef(Insts).slice(IP);
128
129 // Choose a source, which will be used to constrain the operation selection.
130 SmallVector<Value *, 2> Srcs;
131 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore));
132
133 // Choose an operation that's constrained to be valid for the type of the
134 // source, collect any other sources it needs, and then build it.
135 auto OpDesc = chooseOperation(Srcs[0], IB);
136 // Bail if no operation was found
137 if (!OpDesc)
138 return;
139
140 for (const auto &Pred : ArrayRef(OpDesc->SourcePreds).slice(1))
141 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
142
143 if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP])) {
144 // Find a sink and wire up the results of the operation.
145 IB.connectToSink(BB, InstsAfter, Op);
146 }
147 }
148
getWeight(size_t CurrentSize,size_t MaxSize,uint64_t CurrentWeight)149 uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize,
150 uint64_t CurrentWeight) {
151 // If we have less than 200 bytes, panic and try to always delete.
152 if (CurrentSize > MaxSize - 200)
153 return CurrentWeight ? CurrentWeight * 100 : 1;
154 // Draw a line starting from when we only have 1k left and increasing linearly
155 // to double the current weight.
156 int64_t Line = (-2 * static_cast<int64_t>(CurrentWeight)) *
157 (static_cast<int64_t>(MaxSize) -
158 static_cast<int64_t>(CurrentSize) - 1000) /
159 1000;
160 // Clamp negative weights to zero.
161 if (Line < 0)
162 return 0;
163 return Line;
164 }
165
mutate(Function & F,RandomIRBuilder & IB)166 void InstDeleterIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
167 auto RS = makeSampler<Instruction *>(IB.Rand);
168 for (Instruction &Inst : instructions(F)) {
169 // TODO: We can't handle these instructions.
170 if (Inst.isTerminator() || Inst.isEHPad() || Inst.isSwiftError() ||
171 isa<PHINode>(Inst))
172 continue;
173
174 RS.sample(&Inst, /*Weight=*/1);
175 }
176 if (RS.isEmpty())
177 return;
178
179 // Delete the instruction.
180 mutate(*RS.getSelection(), IB);
181 // Clean up any dead code that's left over after removing the instruction.
182 eliminateDeadCode(F);
183 }
184
mutate(Instruction & Inst,RandomIRBuilder & IB)185 void InstDeleterIRStrategy::mutate(Instruction &Inst, RandomIRBuilder &IB) {
186 assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG");
187
188 if (Inst.getType()->isVoidTy()) {
189 // Instructions with void type (ie, store) have no uses to worry about. Just
190 // erase it and move on.
191 Inst.eraseFromParent();
192 return;
193 }
194
195 // Otherwise we need to find some other value with the right type to keep the
196 // users happy.
197 auto Pred = fuzzerop::onlyType(Inst.getType());
198 auto RS = makeSampler<Value *>(IB.Rand);
199 SmallVector<Instruction *, 32> InstsBefore;
200 BasicBlock *BB = Inst.getParent();
201 for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E;
202 ++I) {
203 if (Pred.matches({}, &*I))
204 RS.sample(&*I, /*Weight=*/1);
205 InstsBefore.push_back(&*I);
206 }
207 if (!RS)
208 RS.sample(IB.newSource(*BB, InstsBefore, {}, Pred), /*Weight=*/1);
209
210 Inst.replaceAllUsesWith(RS.getSelection());
211 Inst.eraseFromParent();
212 }
213
mutate(Instruction & Inst,RandomIRBuilder & IB)214 void InstModificationIRStrategy::mutate(Instruction &Inst,
215 RandomIRBuilder &IB) {
216 SmallVector<std::function<void()>, 8> Modifications;
217 CmpInst *CI = nullptr;
218 GetElementPtrInst *GEP = nullptr;
219 switch (Inst.getOpcode()) {
220 default:
221 break;
222 // Add nsw, nuw flag
223 case Instruction::Add:
224 case Instruction::Mul:
225 case Instruction::Sub:
226 case Instruction::Shl:
227 Modifications.push_back(
228 [&Inst]() { Inst.setHasNoSignedWrap(!Inst.hasNoSignedWrap()); });
229 Modifications.push_back(
230 [&Inst]() { Inst.setHasNoUnsignedWrap(!Inst.hasNoUnsignedWrap()); });
231 break;
232 case Instruction::ICmp:
233 CI = cast<ICmpInst>(&Inst);
234 for (unsigned p = CmpInst::FIRST_ICMP_PREDICATE;
235 p <= CmpInst::LAST_ICMP_PREDICATE; p++) {
236 Modifications.push_back(
237 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
238 }
239 break;
240 // Add inbound flag.
241 case Instruction::GetElementPtr:
242 GEP = cast<GetElementPtrInst>(&Inst);
243 Modifications.push_back(
244 [GEP]() { GEP->setIsInBounds(!GEP->isInBounds()); });
245 break;
246 // Add exact flag.
247 case Instruction::UDiv:
248 case Instruction::SDiv:
249 case Instruction::LShr:
250 case Instruction::AShr:
251 Modifications.push_back([&Inst] { Inst.setIsExact(!Inst.isExact()); });
252 break;
253
254 case Instruction::FCmp:
255 CI = cast<ICmpInst>(&Inst);
256 for (unsigned p = CmpInst::FIRST_FCMP_PREDICATE;
257 p <= CmpInst::LAST_FCMP_PREDICATE; p++) {
258 Modifications.push_back(
259 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
260 }
261 break;
262 }
263
264 // Add fast math flag if possible.
265 if (isa<FPMathOperator>(&Inst)) {
266 // Try setting everything unless they are already on.
267 Modifications.push_back(
268 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().all()); });
269 // Try unsetting everything unless they are already off.
270 Modifications.push_back(
271 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().none()); });
272 // Individual setting by flipping the bit
273 Modifications.push_back(
274 [&Inst] { Inst.setHasAllowReassoc(!Inst.hasAllowReassoc()); });
275 Modifications.push_back([&Inst] { Inst.setHasNoNaNs(!Inst.hasNoNaNs()); });
276 Modifications.push_back([&Inst] { Inst.setHasNoInfs(!Inst.hasNoInfs()); });
277 Modifications.push_back(
278 [&Inst] { Inst.setHasNoSignedZeros(!Inst.hasNoSignedZeros()); });
279 Modifications.push_back(
280 [&Inst] { Inst.setHasAllowReciprocal(!Inst.hasAllowReciprocal()); });
281 Modifications.push_back(
282 [&Inst] { Inst.setHasAllowContract(!Inst.hasAllowContract()); });
283 Modifications.push_back(
284 [&Inst] { Inst.setHasApproxFunc(!Inst.hasApproxFunc()); });
285 }
286
287 // Randomly switch operands of instructions
288 std::pair<int, int> NoneItem({-1, -1}), ShuffleItems(NoneItem);
289 switch (Inst.getOpcode()) {
290 case Instruction::SDiv:
291 case Instruction::UDiv:
292 case Instruction::SRem:
293 case Instruction::URem:
294 case Instruction::FDiv:
295 case Instruction::FRem: {
296 // Verify that the after shuffle the second operand is not
297 // constant 0.
298 Value *Operand = Inst.getOperand(0);
299 if (Constant *C = dyn_cast<Constant>(Operand)) {
300 if (!C->isZeroValue()) {
301 ShuffleItems = {0, 1};
302 }
303 }
304 break;
305 }
306 case Instruction::Select:
307 ShuffleItems = {1, 2};
308 break;
309 case Instruction::Add:
310 case Instruction::Sub:
311 case Instruction::Mul:
312 case Instruction::Shl:
313 case Instruction::LShr:
314 case Instruction::AShr:
315 case Instruction::And:
316 case Instruction::Or:
317 case Instruction::Xor:
318 case Instruction::FAdd:
319 case Instruction::FSub:
320 case Instruction::FMul:
321 case Instruction::ICmp:
322 case Instruction::FCmp:
323 case Instruction::ShuffleVector:
324 ShuffleItems = {0, 1};
325 break;
326 }
327 if (ShuffleItems != NoneItem) {
328 Modifications.push_back([&Inst, &ShuffleItems]() {
329 Value *Op0 = Inst.getOperand(ShuffleItems.first);
330 Inst.setOperand(ShuffleItems.first, Inst.getOperand(ShuffleItems.second));
331 Inst.setOperand(ShuffleItems.second, Op0);
332 });
333 }
334
335 auto RS = makeSampler(IB.Rand, Modifications);
336 if (RS)
337 RS.getSelection()();
338 }
339
340 /// Return a case value that is not already taken to make sure we don't have two
341 /// cases with same value.
getUniqueCaseValue(SmallSet<uint64_t,4> & CasesTaken,uint64_t MaxValue,RandomIRBuilder & IB)342 static uint64_t getUniqueCaseValue(SmallSet<uint64_t, 4> &CasesTaken,
343 uint64_t MaxValue, RandomIRBuilder &IB) {
344 uint64_t tmp;
345 do {
346 tmp = uniform<uint64_t>(IB.Rand, 0, MaxValue);
347 } while (CasesTaken.count(tmp) != 0);
348 CasesTaken.insert(tmp);
349 return tmp;
350 }
351
mutate(BasicBlock & BB,RandomIRBuilder & IB)352 void InsertCFGStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
353 SmallVector<Instruction *, 32> Insts;
354 for (auto I = BB.getFirstInsertionPt(), E = BB.end(); I != E; ++I)
355 Insts.push_back(&*I);
356 if (Insts.size() < 1)
357 return;
358
359 // Choose a point where we split the block.
360 uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
361 auto InstsBeforeSplit = ArrayRef(Insts).slice(0, IP);
362
363 // `Sink` inherits Blocks' terminator, `Source` will have a BranchInst
364 // directly jumps to `Sink`. Here, we have to create a new terminator for
365 // `Source`.
366 BasicBlock *Block = Insts[IP]->getParent();
367 BasicBlock *Source = Block;
368 BasicBlock *Sink = Block->splitBasicBlock(Insts[IP], "BB");
369
370 Function *F = BB.getParent();
371 LLVMContext &C = F->getParent()->getContext();
372 // A coin decides if it is branch or switch
373 if (uniform<uint64_t>(IB.Rand, 0, 1)) {
374 // Branch
375 BasicBlock *IfTrue = BasicBlock::Create(C, "T", F);
376 BasicBlock *IfFalse = BasicBlock::Create(C, "F", F);
377 Value *Cond =
378 IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
379 fuzzerop::onlyType(Type::getInt1Ty(C)), false);
380 BranchInst *Branch = BranchInst::Create(IfTrue, IfFalse, Cond);
381 // Remove the old terminator.
382 ReplaceInstWithInst(Source->getTerminator(), Branch);
383 // Connect these blocks to `Sink`
384 connectBlocksToSink({IfTrue, IfFalse}, Sink, IB);
385 } else {
386 // Switch
387 // Determine Integer type, it IS possible we use a boolean to switch.
388 auto RS =
389 makeSampler(IB.Rand, make_filter_range(IB.KnownTypes, [](Type *Ty) {
390 return Ty->isIntegerTy();
391 }));
392 assert(RS && "There is no integer type in all allowed types, is the "
393 "setting correct?");
394 Type *Ty = RS.getSelection();
395 IntegerType *IntTy = cast<IntegerType>(Ty);
396
397 uint64_t BitSize = IntTy->getBitWidth();
398 uint64_t MaxCaseVal =
399 (BitSize >= 64) ? (uint64_t)-1 : ((uint64_t)1 << BitSize) - 1;
400 // Create Switch inst in Block
401 Value *Cond = IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
402 fuzzerop::onlyType(IntTy), false);
403 BasicBlock *DefaultBlock = BasicBlock::Create(C, "SW_D", F);
404 uint64_t NumCases = uniform<uint64_t>(IB.Rand, 1, MaxNumCases);
405 NumCases = (NumCases > MaxCaseVal) ? MaxCaseVal + 1 : NumCases;
406 SwitchInst *Switch = SwitchInst::Create(Cond, DefaultBlock, NumCases);
407 // Remove the old terminator.
408 ReplaceInstWithInst(Source->getTerminator(), Switch);
409
410 // Create blocks, for each block assign a case value.
411 SmallVector<BasicBlock *, 4> Blocks({DefaultBlock});
412 SmallSet<uint64_t, 4> CasesTaken;
413 for (uint64_t i = 0; i < NumCases; i++) {
414 uint64_t CaseVal = getUniqueCaseValue(CasesTaken, MaxCaseVal, IB);
415 BasicBlock *CaseBlock = BasicBlock::Create(C, "SW_C", F);
416 ConstantInt *OnValue = ConstantInt::get(IntTy, CaseVal);
417 Switch->addCase(OnValue, CaseBlock);
418 Blocks.push_back(CaseBlock);
419 }
420
421 // Connect these blocks to `Sink`
422 connectBlocksToSink(Blocks, Sink, IB);
423 }
424 }
425
426 /// The caller has to guarantee that these blocks are "empty", i.e. it doesn't
427 /// even have terminator.
connectBlocksToSink(ArrayRef<BasicBlock * > Blocks,BasicBlock * Sink,RandomIRBuilder & IB)428 void InsertCFGStrategy::connectBlocksToSink(ArrayRef<BasicBlock *> Blocks,
429 BasicBlock *Sink,
430 RandomIRBuilder &IB) {
431 uint64_t DirectSinkIdx = uniform<uint64_t>(IB.Rand, 0, Blocks.size() - 1);
432 for (uint64_t i = 0; i < Blocks.size(); i++) {
433 // We have at least one block that directly goes to sink.
434 CFGToSink ToSink = (i == DirectSinkIdx)
435 ? CFGToSink::DirectSink
436 : static_cast<CFGToSink>(uniform<uint64_t>(
437 IB.Rand, 0, CFGToSink::EndOfCFGToLink - 1));
438 BasicBlock *BB = Blocks[i];
439 Function *F = BB->getParent();
440 LLVMContext &C = F->getParent()->getContext();
441 switch (ToSink) {
442 case CFGToSink::Return: {
443 Type *RetTy = F->getReturnType();
444 Value *RetValue = nullptr;
445 if (!RetTy->isVoidTy())
446 RetValue =
447 IB.findOrCreateSource(*BB, {}, {}, fuzzerop::onlyType(RetTy));
448 ReturnInst::Create(C, RetValue, BB);
449 break;
450 }
451 case CFGToSink::DirectSink: {
452 BranchInst::Create(Sink, BB);
453 break;
454 }
455 case CFGToSink::SinkOrSelfLoop: {
456 SmallVector<BasicBlock *, 2> Branches({Sink, BB});
457 // A coin decides which block is true branch.
458 uint64_t coin = uniform<uint64_t>(IB.Rand, 0, 1);
459 Value *Cond = IB.findOrCreateSource(
460 *BB, {}, {}, fuzzerop::onlyType(Type::getInt1Ty(C)), false);
461 BranchInst::Create(Branches[coin], Branches[1 - coin], Cond, BB);
462 break;
463 }
464 case CFGToSink::EndOfCFGToLink:
465 llvm_unreachable("EndOfCFGToLink executed, something's wrong.");
466 }
467 }
468 }
469
mutate(BasicBlock & BB,RandomIRBuilder & IB)470 void InsertPHIStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
471 // Can't insert PHI node to entry node.
472 if (&BB == &BB.getParent()->getEntryBlock())
473 return;
474 Type *Ty = IB.randomType();
475 PHINode *PHI = PHINode::Create(Ty, llvm::pred_size(&BB), "", &BB.front());
476
477 // Use a map to make sure the same incoming basic block has the same value.
478 DenseMap<BasicBlock *, Value *> IncomingValues;
479 for (BasicBlock *Pred : predecessors(&BB)) {
480 Value *Src = IncomingValues[Pred];
481 // If `Pred` is not in the map yet, we'll get a nullptr.
482 if (!Src) {
483 SmallVector<Instruction *, 32> Insts;
484 for (auto I = Pred->begin(); I != Pred->end(); ++I)
485 Insts.push_back(&*I);
486 // There is no need to inform IB what previously used values are if we are
487 // using `onlyType`
488 Src = IB.findOrCreateSource(*Pred, Insts, {}, fuzzerop::onlyType(Ty));
489 IncomingValues[Pred] = Src;
490 }
491 PHI->addIncoming(Src, Pred);
492 }
493 SmallVector<Instruction *, 32> InstsAfter;
494 for (auto I = BB.getFirstInsertionPt(), E = BB.end(); I != E; ++I)
495 InstsAfter.push_back(&*I);
496 IB.connectToSink(BB, InstsAfter, PHI);
497 }
498
mutate(Function & F,RandomIRBuilder & IB)499 void SinkInstructionStrategy::mutate(Function &F, RandomIRBuilder &IB) {
500 for (BasicBlock &BB : F) {
501 this->mutate(BB, IB);
502 }
503 }
mutate(BasicBlock & BB,RandomIRBuilder & IB)504 void SinkInstructionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
505 SmallVector<Instruction *, 32> Insts;
506 for (auto I = BB.getFirstInsertionPt(), E = BB.end(); I != E; ++I)
507 Insts.push_back(&*I);
508 if (Insts.size() < 1)
509 return;
510 // Choose an Instruction to mutate.
511 uint64_t Idx = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
512 Instruction *Inst = Insts[Idx];
513 // `Idx + 1` so we don't sink to ourselves.
514 auto InstsAfter = ArrayRef(Insts).slice(Idx + 1);
515 LLVMContext &C = BB.getParent()->getParent()->getContext();
516 // Don't sink terminators, void function calls, etc.
517 if (Inst->getType() != Type::getVoidTy(C))
518 // Find a new sink and wire up the results of the operation.
519 IB.connectToSink(BB, InstsAfter, Inst);
520 }
521
mutate(BasicBlock & BB,RandomIRBuilder & IB)522 void ShuffleBlockStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
523
524 SmallPtrSet<Instruction *, 8> AliveInsts;
525 for (auto &I : make_early_inc_range(make_range(
526 BB.getFirstInsertionPt(), BB.getTerminator()->getIterator()))) {
527 // First gather all instructions that can be shuffled. Don't take
528 // terminator.
529 AliveInsts.insert(&I);
530 // Then remove these instructions from the block
531 I.removeFromParent();
532 }
533
534 // Shuffle these instructions using topological sort.
535 // Returns true if all current instruction's dependencies in this block have
536 // been shuffled. If so, this instruction can be shuffled too.
537 auto hasAliveParent = [&AliveInsts](Instruction *I) {
538 for (Value *O : I->operands()) {
539 Instruction *P = dyn_cast<Instruction>(O);
540 if (P && AliveInsts.count(P))
541 return true;
542 }
543 return false;
544 };
545 // Get all alive instructions that depend on the current instruction.
546 auto getAliveChildren = [&AliveInsts](Instruction *I) {
547 SmallPtrSet<Instruction *, 4> Children;
548 for (Value *U : I->users()) {
549 Instruction *P = dyn_cast<Instruction>(U);
550 if (P && AliveInsts.count(P))
551 Children.insert(P);
552 }
553 return Children;
554 };
555 SmallPtrSet<Instruction *, 8> Roots;
556 SmallVector<Instruction *, 8> Insts;
557 for (Instruction *I : AliveInsts) {
558 if (!hasAliveParent(I))
559 Roots.insert(I);
560 }
561 // Topological sort by randomly selecting a node without a parent, or root.
562 while (!Roots.empty()) {
563 auto RS = makeSampler<Instruction *>(IB.Rand);
564 for (Instruction *Root : Roots)
565 RS.sample(Root, 1);
566 Instruction *Root = RS.getSelection();
567 Roots.erase(Root);
568 AliveInsts.erase(Root);
569 Insts.push_back(Root);
570 for (Instruction *Child : getAliveChildren(Root)) {
571 if (!hasAliveParent(Child)) {
572 Roots.insert(Child);
573 }
574 }
575 }
576
577 Instruction *Terminator = BB.getTerminator();
578 // Then put instructions back.
579 for (Instruction *I : Insts) {
580 I->insertBefore(Terminator);
581 }
582 }
583
parseModule(const uint8_t * Data,size_t Size,LLVMContext & Context)584 std::unique_ptr<Module> llvm::parseModule(const uint8_t *Data, size_t Size,
585 LLVMContext &Context) {
586
587 if (Size <= 1)
588 // We get bogus data given an empty corpus - just create a new module.
589 return std::make_unique<Module>("M", Context);
590
591 auto Buffer = MemoryBuffer::getMemBuffer(
592 StringRef(reinterpret_cast<const char *>(Data), Size), "Fuzzer input",
593 /*RequiresNullTerminator=*/false);
594
595 SMDiagnostic Err;
596 auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context);
597 if (Error E = M.takeError()) {
598 errs() << toString(std::move(E)) << "\n";
599 return nullptr;
600 }
601 return std::move(M.get());
602 }
603
writeModule(const Module & M,uint8_t * Dest,size_t MaxSize)604 size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) {
605 std::string Buf;
606 {
607 raw_string_ostream OS(Buf);
608 WriteBitcodeToFile(M, OS);
609 }
610 if (Buf.size() > MaxSize)
611 return 0;
612 memcpy(Dest, Buf.data(), Buf.size());
613 return Buf.size();
614 }
615
parseAndVerify(const uint8_t * Data,size_t Size,LLVMContext & Context)616 std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size,
617 LLVMContext &Context) {
618 auto M = parseModule(Data, Size, Context);
619 if (!M || verifyModule(*M, &errs()))
620 return nullptr;
621
622 return M;
623 }
624