109467b48Spatrick //===- Delta.cpp - Delta Debugging Algorithm Implementation ---------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This file contains the implementation for the Delta Debugging Algorithm:
1009467b48Spatrick // it splits a given set of Targets (i.e. Functions, Instructions, BBs, etc.)
1109467b48Spatrick // into chunks and tries to reduce the number chunks that are interesting.
1209467b48Spatrick //
1309467b48Spatrick //===----------------------------------------------------------------------===//
1409467b48Spatrick
1509467b48Spatrick #include "Delta.h"
16*d415bd75Srobert #include "ReducerWorkItem.h"
17*d415bd75Srobert #include "TestRunner.h"
18*d415bd75Srobert #include "Utils.h"
1909467b48Spatrick #include "llvm/ADT/STLExtras.h"
20*d415bd75Srobert #include "llvm/Analysis/ModuleSummaryAnalysis.h"
21*d415bd75Srobert #include "llvm/Analysis/ProfileSummaryInfo.h"
22*d415bd75Srobert #include "llvm/Bitcode/BitcodeReader.h"
23*d415bd75Srobert #include "llvm/Bitcode/BitcodeWriter.h"
24*d415bd75Srobert #include "llvm/CodeGen/MachineFunction.h"
25*d415bd75Srobert #include "llvm/IR/Module.h"
2673471bf0Spatrick #include "llvm/IR/Verifier.h"
27*d415bd75Srobert #include "llvm/MC/TargetRegistry.h"
2873471bf0Spatrick #include "llvm/Support/CommandLine.h"
29*d415bd75Srobert #include "llvm/Support/MemoryBufferRef.h"
30*d415bd75Srobert #include "llvm/Support/ThreadPool.h"
3109467b48Spatrick #include <fstream>
3209467b48Spatrick #include <set>
3309467b48Spatrick
3409467b48Spatrick using namespace llvm;
3509467b48Spatrick
36*d415bd75Srobert extern cl::OptionCategory LLVMReduceOptions;
37*d415bd75Srobert
3873471bf0Spatrick static cl::opt<bool> AbortOnInvalidReduction(
3973471bf0Spatrick "abort-on-invalid-reduction",
40*d415bd75Srobert cl::desc("Abort if any reduction results in invalid IR"),
41*d415bd75Srobert cl::cat(LLVMReduceOptions));
4273471bf0Spatrick
43*d415bd75Srobert static cl::opt<unsigned int> StartingGranularityLevel(
44*d415bd75Srobert "starting-granularity-level",
45*d415bd75Srobert cl::desc("Number of times to divide chunks prior to first test"),
46*d415bd75Srobert cl::cat(LLVMReduceOptions));
4773471bf0Spatrick
48*d415bd75Srobert #ifdef LLVM_ENABLE_THREADS
49*d415bd75Srobert static cl::opt<unsigned> NumJobs(
50*d415bd75Srobert "j",
51*d415bd75Srobert cl::desc("Maximum number of threads to use to process chunks. Set to 1 to "
52*d415bd75Srobert "disable parallelism."),
53*d415bd75Srobert cl::init(1), cl::cat(LLVMReduceOptions));
54*d415bd75Srobert #else
55*d415bd75Srobert unsigned NumJobs = 1;
56*d415bd75Srobert #endif
5709467b48Spatrick
5809467b48Spatrick /// Splits Chunks in half and prints them.
5909467b48Spatrick /// If unable to split (when chunk size is 1) returns false.
increaseGranularity(std::vector<Chunk> & Chunks)6009467b48Spatrick static bool increaseGranularity(std::vector<Chunk> &Chunks) {
61*d415bd75Srobert if (Verbose)
6209467b48Spatrick errs() << "Increasing granularity...";
6309467b48Spatrick std::vector<Chunk> NewChunks;
64*d415bd75Srobert bool SplitAny = false;
6509467b48Spatrick
66*d415bd75Srobert for (Chunk C : Chunks) {
6773471bf0Spatrick if (C.End - C.Begin == 0)
6809467b48Spatrick NewChunks.push_back(C);
6909467b48Spatrick else {
7073471bf0Spatrick int Half = (C.Begin + C.End) / 2;
7173471bf0Spatrick NewChunks.push_back({C.Begin, Half});
7273471bf0Spatrick NewChunks.push_back({Half + 1, C.End});
73*d415bd75Srobert SplitAny = true;
7409467b48Spatrick }
7509467b48Spatrick }
76*d415bd75Srobert if (SplitAny) {
7709467b48Spatrick Chunks = NewChunks;
78*d415bd75Srobert if (Verbose) {
79*d415bd75Srobert errs() << "Success! " << NewChunks.size() << " New Chunks:\n";
8009467b48Spatrick for (auto C : Chunks) {
8109467b48Spatrick errs() << '\t';
8209467b48Spatrick C.print();
8309467b48Spatrick errs() << '\n';
8409467b48Spatrick }
8509467b48Spatrick }
86*d415bd75Srobert }
87*d415bd75Srobert return SplitAny;
8809467b48Spatrick }
8909467b48Spatrick
90*d415bd75Srobert // Check if \p ChunkToCheckForUninterestingness is interesting. Returns the
91*d415bd75Srobert // modified module if the chunk resulted in a reduction.
92*d415bd75Srobert static std::unique_ptr<ReducerWorkItem>
CheckChunk(const Chunk ChunkToCheckForUninterestingness,std::unique_ptr<ReducerWorkItem> Clone,const TestRunner & Test,ReductionFunc ExtractChunksFromModule,const DenseSet<Chunk> & UninterestingChunks,const std::vector<Chunk> & ChunksStillConsideredInteresting)93*d415bd75Srobert CheckChunk(const Chunk ChunkToCheckForUninterestingness,
94*d415bd75Srobert std::unique_ptr<ReducerWorkItem> Clone, const TestRunner &Test,
95*d415bd75Srobert ReductionFunc ExtractChunksFromModule,
96*d415bd75Srobert const DenseSet<Chunk> &UninterestingChunks,
97*d415bd75Srobert const std::vector<Chunk> &ChunksStillConsideredInteresting) {
9873471bf0Spatrick // Take all of ChunksStillConsideredInteresting chunks, except those we've
9973471bf0Spatrick // already deemed uninteresting (UninterestingChunks) but didn't remove
10073471bf0Spatrick // from ChunksStillConsideredInteresting yet, and additionally ignore
10173471bf0Spatrick // ChunkToCheckForUninterestingness chunk.
10209467b48Spatrick std::vector<Chunk> CurrentChunks;
10373471bf0Spatrick CurrentChunks.reserve(ChunksStillConsideredInteresting.size() -
10473471bf0Spatrick UninterestingChunks.size() - 1);
105*d415bd75Srobert copy_if(ChunksStillConsideredInteresting, std::back_inserter(CurrentChunks),
106*d415bd75Srobert [&](const Chunk &C) {
107*d415bd75Srobert return C != ChunkToCheckForUninterestingness &&
108*d415bd75Srobert !UninterestingChunks.count(C);
10973471bf0Spatrick });
11009467b48Spatrick
11109467b48Spatrick // Generate Module with only Targets inside Current Chunks
112*d415bd75Srobert Oracle O(CurrentChunks);
113*d415bd75Srobert ExtractChunksFromModule(O, *Clone);
11409467b48Spatrick
11573471bf0Spatrick // Some reductions may result in invalid IR. Skip such reductions.
116*d415bd75Srobert if (Clone->verify(&errs())) {
11773471bf0Spatrick if (AbortOnInvalidReduction) {
118*d415bd75Srobert errs() << "Invalid reduction, aborting.\n";
119*d415bd75Srobert Clone->print(errs());
12073471bf0Spatrick exit(1);
12173471bf0Spatrick }
122*d415bd75Srobert if (Verbose) {
12373471bf0Spatrick errs() << " **** WARNING | reduction resulted in invalid module, "
12473471bf0Spatrick "skipping\n";
125*d415bd75Srobert }
126*d415bd75Srobert return nullptr;
12773471bf0Spatrick }
12873471bf0Spatrick
129*d415bd75Srobert if (Verbose) {
13009467b48Spatrick errs() << "Ignoring: ";
13173471bf0Spatrick ChunkToCheckForUninterestingness.print();
13273471bf0Spatrick for (const Chunk &C : UninterestingChunks)
13309467b48Spatrick C.print();
13409467b48Spatrick errs() << "\n";
135*d415bd75Srobert }
136*d415bd75Srobert
137*d415bd75Srobert if (!Clone->isReduced(Test)) {
138*d415bd75Srobert // Program became non-reduced, so this chunk appears to be interesting.
139*d415bd75Srobert if (Verbose)
140*d415bd75Srobert errs() << "\n";
141*d415bd75Srobert return nullptr;
142*d415bd75Srobert }
143*d415bd75Srobert return Clone;
144*d415bd75Srobert }
145*d415bd75Srobert
ProcessChunkFromSerializedBitcode(const Chunk ChunkToCheckForUninterestingness,const TestRunner & Test,ReductionFunc ExtractChunksFromModule,const DenseSet<Chunk> & UninterestingChunks,ArrayRef<Chunk> ChunksStillConsideredInteresting,StringRef OriginalBC,std::atomic<bool> & AnyReduced)146*d415bd75Srobert static SmallString<0> ProcessChunkFromSerializedBitcode(
147*d415bd75Srobert const Chunk ChunkToCheckForUninterestingness, const TestRunner &Test,
148*d415bd75Srobert ReductionFunc ExtractChunksFromModule,
149*d415bd75Srobert const DenseSet<Chunk> &UninterestingChunks,
150*d415bd75Srobert ArrayRef<Chunk> ChunksStillConsideredInteresting, StringRef OriginalBC,
151*d415bd75Srobert std::atomic<bool> &AnyReduced) {
152*d415bd75Srobert LLVMContext Ctx;
153*d415bd75Srobert auto CloneMMM = std::make_unique<ReducerWorkItem>();
154*d415bd75Srobert MemoryBufferRef Data(OriginalBC, "<bc file>");
155*d415bd75Srobert CloneMMM->readBitcode(Data, Ctx, Test.getToolName());
156*d415bd75Srobert
157*d415bd75Srobert SmallString<0> Result;
158*d415bd75Srobert if (std::unique_ptr<ReducerWorkItem> ChunkResult =
159*d415bd75Srobert CheckChunk(ChunkToCheckForUninterestingness, std::move(CloneMMM),
160*d415bd75Srobert Test, ExtractChunksFromModule, UninterestingChunks,
161*d415bd75Srobert ChunksStillConsideredInteresting)) {
162*d415bd75Srobert raw_svector_ostream BCOS(Result);
163*d415bd75Srobert ChunkResult->writeBitcode(BCOS);
164*d415bd75Srobert // Communicate that the task reduced a chunk.
165*d415bd75Srobert AnyReduced = true;
166*d415bd75Srobert }
167*d415bd75Srobert return Result;
168*d415bd75Srobert }
169*d415bd75Srobert
170*d415bd75Srobert using SharedTaskQueue = std::deque<std::shared_future<SmallString<0>>>;
171*d415bd75Srobert
waitAndDiscardResultsBarrier(SharedTaskQueue & TaskQueue)172*d415bd75Srobert static void waitAndDiscardResultsBarrier(SharedTaskQueue &TaskQueue) {
173*d415bd75Srobert while (!TaskQueue.empty()) {
174*d415bd75Srobert auto &Future = TaskQueue.front();
175*d415bd75Srobert Future.wait();
176*d415bd75Srobert TaskQueue.pop_front();
177*d415bd75Srobert }
178*d415bd75Srobert }
179*d415bd75Srobert
180*d415bd75Srobert /// Runs the Delta Debugging algorithm, splits the code into chunks and
181*d415bd75Srobert /// reduces the amount of chunks that are considered interesting by the
182*d415bd75Srobert /// given test. The number of chunks is determined by a preliminary run of the
183*d415bd75Srobert /// reduction pass where no change must be made to the module.
runDeltaPass(TestRunner & Test,ReductionFunc ExtractChunksFromModule,StringRef Message)184*d415bd75Srobert void llvm::runDeltaPass(TestRunner &Test, ReductionFunc ExtractChunksFromModule,
185*d415bd75Srobert StringRef Message) {
186*d415bd75Srobert assert(!Test.getProgram().verify(&errs()) &&
187*d415bd75Srobert "input module is broken before making changes");
188*d415bd75Srobert errs() << "*** " << Message << "...\n";
189*d415bd75Srobert
190*d415bd75Srobert int Targets;
191*d415bd75Srobert {
192*d415bd75Srobert // Count the number of chunks by counting the number of calls to
193*d415bd75Srobert // Oracle::shouldKeep() but always returning true so no changes are
194*d415bd75Srobert // made.
195*d415bd75Srobert std::vector<Chunk> AllChunks = {{0, INT_MAX}};
196*d415bd75Srobert Oracle Counter(AllChunks);
197*d415bd75Srobert ExtractChunksFromModule(Counter, Test.getProgram());
198*d415bd75Srobert Targets = Counter.count();
199*d415bd75Srobert
200*d415bd75Srobert assert(!Test.getProgram().verify(&errs()) &&
201*d415bd75Srobert "input module is broken after counting chunks");
202*d415bd75Srobert assert(Test.getProgram().isReduced(Test) &&
203*d415bd75Srobert "input module no longer interesting after counting chunks");
204*d415bd75Srobert
205*d415bd75Srobert #ifndef NDEBUG
206*d415bd75Srobert // Make sure that the number of chunks does not change as we reduce.
207*d415bd75Srobert std::vector<Chunk> NoChunks = {{0, INT_MAX}};
208*d415bd75Srobert Oracle NoChunksCounter(NoChunks);
209*d415bd75Srobert std::unique_ptr<ReducerWorkItem> Clone =
210*d415bd75Srobert Test.getProgram().clone(Test.getTargetMachine());
211*d415bd75Srobert ExtractChunksFromModule(NoChunksCounter, *Clone);
212*d415bd75Srobert assert(Targets == NoChunksCounter.count() &&
213*d415bd75Srobert "number of chunks changes when reducing");
214*d415bd75Srobert #endif
215*d415bd75Srobert }
216*d415bd75Srobert if (!Targets) {
217*d415bd75Srobert if (Verbose)
218*d415bd75Srobert errs() << "\nNothing to reduce\n";
219*d415bd75Srobert errs() << "----------------------------\n";
220*d415bd75Srobert return;
221*d415bd75Srobert }
222*d415bd75Srobert
223*d415bd75Srobert std::vector<Chunk> ChunksStillConsideredInteresting = {{0, Targets - 1}};
224*d415bd75Srobert std::unique_ptr<ReducerWorkItem> ReducedProgram;
225*d415bd75Srobert
226*d415bd75Srobert for (unsigned int Level = 0; Level < StartingGranularityLevel; Level++) {
227*d415bd75Srobert increaseGranularity(ChunksStillConsideredInteresting);
228*d415bd75Srobert }
229*d415bd75Srobert
230*d415bd75Srobert std::atomic<bool> AnyReduced;
231*d415bd75Srobert std::unique_ptr<ThreadPool> ChunkThreadPoolPtr;
232*d415bd75Srobert if (NumJobs > 1)
233*d415bd75Srobert ChunkThreadPoolPtr =
234*d415bd75Srobert std::make_unique<ThreadPool>(hardware_concurrency(NumJobs));
235*d415bd75Srobert
236*d415bd75Srobert bool FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity;
237*d415bd75Srobert do {
238*d415bd75Srobert FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = false;
239*d415bd75Srobert
240*d415bd75Srobert DenseSet<Chunk> UninterestingChunks;
241*d415bd75Srobert
242*d415bd75Srobert // When running with more than one thread, serialize the original bitcode
243*d415bd75Srobert // to OriginalBC.
244*d415bd75Srobert SmallString<0> OriginalBC;
245*d415bd75Srobert if (NumJobs > 1) {
246*d415bd75Srobert raw_svector_ostream BCOS(OriginalBC);
247*d415bd75Srobert Test.getProgram().writeBitcode(BCOS);
248*d415bd75Srobert }
249*d415bd75Srobert
250*d415bd75Srobert SharedTaskQueue TaskQueue;
251*d415bd75Srobert for (auto I = ChunksStillConsideredInteresting.rbegin(),
252*d415bd75Srobert E = ChunksStillConsideredInteresting.rend();
253*d415bd75Srobert I != E; ++I) {
254*d415bd75Srobert std::unique_ptr<ReducerWorkItem> Result = nullptr;
255*d415bd75Srobert unsigned WorkLeft = std::distance(I, E);
256*d415bd75Srobert
257*d415bd75Srobert // Run in parallel mode, if the user requested more than one thread and
258*d415bd75Srobert // there are at least a few chunks to process.
259*d415bd75Srobert if (NumJobs > 1 && WorkLeft > 1) {
260*d415bd75Srobert unsigned NumInitialTasks = std::min(WorkLeft, unsigned(NumJobs));
261*d415bd75Srobert unsigned NumChunksProcessed = 0;
262*d415bd75Srobert
263*d415bd75Srobert ThreadPool &ChunkThreadPool = *ChunkThreadPoolPtr;
264*d415bd75Srobert TaskQueue.clear();
265*d415bd75Srobert
266*d415bd75Srobert AnyReduced = false;
267*d415bd75Srobert // Queue jobs to process NumInitialTasks chunks in parallel using
268*d415bd75Srobert // ChunkThreadPool. When the tasks are added to the pool, parse the
269*d415bd75Srobert // original module from OriginalBC with a fresh LLVMContext object. This
270*d415bd75Srobert // ensures that the cloned module of each task uses an independent
271*d415bd75Srobert // LLVMContext object. If a task reduces the input, serialize the result
272*d415bd75Srobert // back in the corresponding Result element.
273*d415bd75Srobert for (unsigned J = 0; J < NumInitialTasks; ++J) {
274*d415bd75Srobert Chunk ChunkToCheck = *(I + J);
275*d415bd75Srobert TaskQueue.emplace_back(ChunkThreadPool.async(
276*d415bd75Srobert ProcessChunkFromSerializedBitcode, ChunkToCheck, std::ref(Test),
277*d415bd75Srobert ExtractChunksFromModule, UninterestingChunks,
278*d415bd75Srobert ChunksStillConsideredInteresting, OriginalBC,
279*d415bd75Srobert std::ref(AnyReduced)));
280*d415bd75Srobert }
281*d415bd75Srobert
282*d415bd75Srobert // Start processing results of the queued tasks. We wait for the first
283*d415bd75Srobert // task in the queue to finish. If it reduced a chunk, we parse the
284*d415bd75Srobert // result and exit the loop.
285*d415bd75Srobert // Otherwise we will try to schedule a new task, if
286*d415bd75Srobert // * no other pending job reduced a chunk and
287*d415bd75Srobert // * we have not reached the end of the chunk.
288*d415bd75Srobert while (!TaskQueue.empty()) {
289*d415bd75Srobert auto &Future = TaskQueue.front();
290*d415bd75Srobert Future.wait();
291*d415bd75Srobert
292*d415bd75Srobert NumChunksProcessed++;
293*d415bd75Srobert SmallString<0> Res = Future.get();
294*d415bd75Srobert TaskQueue.pop_front();
295*d415bd75Srobert if (Res.empty()) {
296*d415bd75Srobert unsigned NumScheduledTasks = NumChunksProcessed + TaskQueue.size();
297*d415bd75Srobert if (!AnyReduced && I + NumScheduledTasks != E) {
298*d415bd75Srobert Chunk ChunkToCheck = *(I + NumScheduledTasks);
299*d415bd75Srobert TaskQueue.emplace_back(ChunkThreadPool.async(
300*d415bd75Srobert ProcessChunkFromSerializedBitcode, ChunkToCheck,
301*d415bd75Srobert std::ref(Test), ExtractChunksFromModule, UninterestingChunks,
302*d415bd75Srobert ChunksStillConsideredInteresting, OriginalBC,
303*d415bd75Srobert std::ref(AnyReduced)));
304*d415bd75Srobert }
30509467b48Spatrick continue;
30609467b48Spatrick }
30709467b48Spatrick
308*d415bd75Srobert Result = std::make_unique<ReducerWorkItem>();
309*d415bd75Srobert MemoryBufferRef Data(StringRef(Res), "<bc file>");
310*d415bd75Srobert Result->readBitcode(Data, Test.getProgram().M->getContext(),
311*d415bd75Srobert Test.getToolName());
312*d415bd75Srobert break;
313*d415bd75Srobert }
314*d415bd75Srobert
315*d415bd75Srobert // If we broke out of the loop, we still need to wait for everything to
316*d415bd75Srobert // avoid race access to the chunk set.
317*d415bd75Srobert //
318*d415bd75Srobert // TODO: Create a way to kill remaining items we're ignoring; they could
319*d415bd75Srobert // take a long time.
320*d415bd75Srobert waitAndDiscardResultsBarrier(TaskQueue);
321*d415bd75Srobert
322*d415bd75Srobert // Forward I to the last chunk processed in parallel.
323*d415bd75Srobert I += NumChunksProcessed - 1;
324*d415bd75Srobert } else {
325*d415bd75Srobert Result =
326*d415bd75Srobert CheckChunk(*I, Test.getProgram().clone(Test.getTargetMachine()),
327*d415bd75Srobert Test, ExtractChunksFromModule, UninterestingChunks,
328*d415bd75Srobert ChunksStillConsideredInteresting);
329*d415bd75Srobert }
330*d415bd75Srobert
331*d415bd75Srobert if (!Result)
332*d415bd75Srobert continue;
333*d415bd75Srobert
334*d415bd75Srobert const Chunk ChunkToCheckForUninterestingness = *I;
33573471bf0Spatrick FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = true;
33673471bf0Spatrick UninterestingChunks.insert(ChunkToCheckForUninterestingness);
337*d415bd75Srobert ReducedProgram = std::move(Result);
338*d415bd75Srobert
339*d415bd75Srobert // FIXME: Report meaningful progress info
340*d415bd75Srobert Test.writeOutput(" **** SUCCESS | Saved new best reduction to ");
34109467b48Spatrick }
34209467b48Spatrick // Delete uninteresting chunks
34373471bf0Spatrick erase_if(ChunksStillConsideredInteresting,
34473471bf0Spatrick [&UninterestingChunks](const Chunk &C) {
34509467b48Spatrick return UninterestingChunks.count(C);
34609467b48Spatrick });
34773471bf0Spatrick } while (!ChunksStillConsideredInteresting.empty() &&
34873471bf0Spatrick (FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity ||
34973471bf0Spatrick increaseGranularity(ChunksStillConsideredInteresting)));
35009467b48Spatrick
35109467b48Spatrick // If we reduced the testcase replace it
35209467b48Spatrick if (ReducedProgram)
35309467b48Spatrick Test.setProgram(std::move(ReducedProgram));
354*d415bd75Srobert if (Verbose)
35509467b48Spatrick errs() << "Couldn't increase anymore.\n";
356*d415bd75Srobert errs() << "----------------------------\n";
35709467b48Spatrick }
358