10b57cec5SDimitry Andric //===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file adds DWARF discriminators to the IR. Path discriminators are
100b57cec5SDimitry Andric // used to decide what CFG path was taken inside sub-graphs whose instructions
110b57cec5SDimitry Andric // share the same line and column number information.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric // The main user of this is the sample profiler. Instruction samples are
140b57cec5SDimitry Andric // mapped to line number information. Since a single line may be spread
150b57cec5SDimitry Andric // out over several basic blocks, discriminators add more precise location
160b57cec5SDimitry Andric // for the samples.
170b57cec5SDimitry Andric //
180b57cec5SDimitry Andric // For example,
190b57cec5SDimitry Andric //
200b57cec5SDimitry Andric // 1 #define ASSERT(P)
210b57cec5SDimitry Andric // 2 if (!(P))
220b57cec5SDimitry Andric // 3 abort()
230b57cec5SDimitry Andric // ...
240b57cec5SDimitry Andric // 100 while (true) {
250b57cec5SDimitry Andric // 101 ASSERT (sum < 0);
260b57cec5SDimitry Andric // 102 ...
270b57cec5SDimitry Andric // 130 }
280b57cec5SDimitry Andric //
290b57cec5SDimitry Andric // when converted to IR, this snippet looks something like:
300b57cec5SDimitry Andric //
310b57cec5SDimitry Andric // while.body: ; preds = %entry, %if.end
320b57cec5SDimitry Andric // %0 = load i32* %sum, align 4, !dbg !15
330b57cec5SDimitry Andric // %cmp = icmp slt i32 %0, 0, !dbg !15
340b57cec5SDimitry Andric // br i1 %cmp, label %if.end, label %if.then, !dbg !15
350b57cec5SDimitry Andric //
360b57cec5SDimitry Andric // if.then: ; preds = %while.body
370b57cec5SDimitry Andric // call void @abort(), !dbg !15
380b57cec5SDimitry Andric // br label %if.end, !dbg !15
390b57cec5SDimitry Andric //
400b57cec5SDimitry Andric // Notice that all the instructions in blocks 'while.body' and 'if.then'
410b57cec5SDimitry Andric // have exactly the same debug information. When this program is sampled
420b57cec5SDimitry Andric // at runtime, the profiler will assume that all these instructions are
430b57cec5SDimitry Andric // equally frequent. This, in turn, will consider the edge while.body->if.then
440b57cec5SDimitry Andric // to be frequently taken (which is incorrect).
450b57cec5SDimitry Andric //
460b57cec5SDimitry Andric // By adding a discriminator value to the instructions in block 'if.then',
470b57cec5SDimitry Andric // we can distinguish instructions at line 101 with discriminator 0 from
480b57cec5SDimitry Andric // the instructions at line 101 with discriminator 1.
490b57cec5SDimitry Andric //
500b57cec5SDimitry Andric // For more details about DWARF discriminators, please visit
510b57cec5SDimitry Andric // http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
520b57cec5SDimitry Andric //
530b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
540b57cec5SDimitry Andric
550b57cec5SDimitry Andric #include "llvm/Transforms/Utils/AddDiscriminators.h"
560b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
570b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h"
580b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
590b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
600b57cec5SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h"
610b57cec5SDimitry Andric #include "llvm/IR/Function.h"
620b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
630b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
640b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
650b57cec5SDimitry Andric #include "llvm/IR/PassManager.h"
660b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
670b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
680b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
690b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
70fe6060f1SDimitry Andric #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
710b57cec5SDimitry Andric #include <utility>
720b57cec5SDimitry Andric
730b57cec5SDimitry Andric using namespace llvm;
74fe6060f1SDimitry Andric using namespace sampleprofutil;
750b57cec5SDimitry Andric
760b57cec5SDimitry Andric #define DEBUG_TYPE "add-discriminators"
770b57cec5SDimitry Andric
780b57cec5SDimitry Andric // Command line option to disable discriminator generation even in the
790b57cec5SDimitry Andric // presence of debug information. This is only needed when debugging
800b57cec5SDimitry Andric // debug info generation issues.
810b57cec5SDimitry Andric static cl::opt<bool> NoDiscriminators(
820b57cec5SDimitry Andric "no-discriminators", cl::init(false),
830b57cec5SDimitry Andric cl::desc("Disable generation of discriminator information."));
840b57cec5SDimitry Andric
shouldHaveDiscriminator(const Instruction * I)850b57cec5SDimitry Andric static bool shouldHaveDiscriminator(const Instruction *I) {
860b57cec5SDimitry Andric return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I);
870b57cec5SDimitry Andric }
880b57cec5SDimitry Andric
890b57cec5SDimitry Andric /// Assign DWARF discriminators.
900b57cec5SDimitry Andric ///
910b57cec5SDimitry Andric /// To assign discriminators, we examine the boundaries of every
920b57cec5SDimitry Andric /// basic block and its successors. Suppose there is a basic block B1
930b57cec5SDimitry Andric /// with successor B2. The last instruction I1 in B1 and the first
940b57cec5SDimitry Andric /// instruction I2 in B2 are located at the same file and line number.
950b57cec5SDimitry Andric /// This situation is illustrated in the following code snippet:
960b57cec5SDimitry Andric ///
970b57cec5SDimitry Andric /// if (i < 10) x = i;
980b57cec5SDimitry Andric ///
990b57cec5SDimitry Andric /// entry:
1000b57cec5SDimitry Andric /// br i1 %cmp, label %if.then, label %if.end, !dbg !10
1010b57cec5SDimitry Andric /// if.then:
1020b57cec5SDimitry Andric /// %1 = load i32* %i.addr, align 4, !dbg !10
1030b57cec5SDimitry Andric /// store i32 %1, i32* %x, align 4, !dbg !10
1040b57cec5SDimitry Andric /// br label %if.end, !dbg !10
1050b57cec5SDimitry Andric /// if.end:
1060b57cec5SDimitry Andric /// ret void, !dbg !12
1070b57cec5SDimitry Andric ///
1080b57cec5SDimitry Andric /// Notice how the branch instruction in block 'entry' and all the
1090b57cec5SDimitry Andric /// instructions in block 'if.then' have the exact same debug location
1100b57cec5SDimitry Andric /// information (!dbg !10).
1110b57cec5SDimitry Andric ///
1120b57cec5SDimitry Andric /// To distinguish instructions in block 'entry' from instructions in
1130b57cec5SDimitry Andric /// block 'if.then', we generate a new lexical block for all the
1140b57cec5SDimitry Andric /// instruction in block 'if.then' that share the same file and line
1150b57cec5SDimitry Andric /// location with the last instruction of block 'entry'.
1160b57cec5SDimitry Andric ///
1170b57cec5SDimitry Andric /// This new lexical block will have the same location information as
1180b57cec5SDimitry Andric /// the previous one, but with a new DWARF discriminator value.
1190b57cec5SDimitry Andric ///
1200b57cec5SDimitry Andric /// One of the main uses of this discriminator value is in runtime
1210b57cec5SDimitry Andric /// sample profilers. It allows the profiler to distinguish instructions
1220b57cec5SDimitry Andric /// at location !dbg !10 that execute on different basic blocks. This is
1230b57cec5SDimitry Andric /// important because while the predicate 'if (x < 10)' may have been
1240b57cec5SDimitry Andric /// executed millions of times, the assignment 'x = i' may have only
1250b57cec5SDimitry Andric /// executed a handful of times (meaning that the entry->if.then edge is
1260b57cec5SDimitry Andric /// seldom taken).
1270b57cec5SDimitry Andric ///
1280b57cec5SDimitry Andric /// If we did not have discriminator information, the profiler would
1290b57cec5SDimitry Andric /// assign the same weight to both blocks 'entry' and 'if.then', which
1300b57cec5SDimitry Andric /// in turn will make it conclude that the entry->if.then edge is very
1310b57cec5SDimitry Andric /// hot.
1320b57cec5SDimitry Andric ///
1330b57cec5SDimitry Andric /// To decide where to create new discriminator values, this function
1340b57cec5SDimitry Andric /// traverses the CFG and examines instruction at basic block boundaries.
1350b57cec5SDimitry Andric /// If the last instruction I1 of a block B1 is at the same file and line
1360b57cec5SDimitry Andric /// location as instruction I2 of successor B2, then it creates a new
1370b57cec5SDimitry Andric /// lexical block for I2 and all the instruction in B2 that share the same
1380b57cec5SDimitry Andric /// file and line location as I2. This new lexical block will have a
1390b57cec5SDimitry Andric /// different discriminator number than I1.
addDiscriminators(Function & F)1400b57cec5SDimitry Andric static bool addDiscriminators(Function &F) {
1410b57cec5SDimitry Andric // If the function has debug information, but the user has disabled
1420b57cec5SDimitry Andric // discriminators, do nothing.
1430b57cec5SDimitry Andric // Simlarly, if the function has no debug info, do nothing.
1440b57cec5SDimitry Andric if (NoDiscriminators || !F.getSubprogram())
1450b57cec5SDimitry Andric return false;
1460b57cec5SDimitry Andric
147fe6060f1SDimitry Andric // Create FSDiscriminatorVariable if flow sensitive discriminators are used.
148fe6060f1SDimitry Andric if (EnableFSDiscriminator)
149fe6060f1SDimitry Andric createFSDiscriminatorVariable(F.getParent());
150fe6060f1SDimitry Andric
1510b57cec5SDimitry Andric bool Changed = false;
1520b57cec5SDimitry Andric
1530b57cec5SDimitry Andric using Location = std::pair<StringRef, unsigned>;
1540b57cec5SDimitry Andric using BBSet = DenseSet<const BasicBlock *>;
1550b57cec5SDimitry Andric using LocationBBMap = DenseMap<Location, BBSet>;
1560b57cec5SDimitry Andric using LocationDiscriminatorMap = DenseMap<Location, unsigned>;
1570b57cec5SDimitry Andric using LocationSet = DenseSet<Location>;
1580b57cec5SDimitry Andric
1590b57cec5SDimitry Andric LocationBBMap LBM;
1600b57cec5SDimitry Andric LocationDiscriminatorMap LDM;
1610b57cec5SDimitry Andric
1620b57cec5SDimitry Andric // Traverse all instructions in the function. If the source line location
1630b57cec5SDimitry Andric // of the instruction appears in other basic block, assign a new
1640b57cec5SDimitry Andric // discriminator for this instruction.
1650b57cec5SDimitry Andric for (BasicBlock &B : F) {
166*bdd1243dSDimitry Andric for (auto &I : B) {
1670b57cec5SDimitry Andric // Not all intrinsic calls should have a discriminator.
1680b57cec5SDimitry Andric // We want to avoid a non-deterministic assignment of discriminators at
1690b57cec5SDimitry Andric // different debug levels. We still allow discriminators on memory
1700b57cec5SDimitry Andric // intrinsic calls because those can be early expanded by SROA into
1710b57cec5SDimitry Andric // pairs of loads and stores, and the expanded load/store instructions
1720b57cec5SDimitry Andric // should have a valid discriminator.
1730b57cec5SDimitry Andric if (!shouldHaveDiscriminator(&I))
1740b57cec5SDimitry Andric continue;
1750b57cec5SDimitry Andric const DILocation *DIL = I.getDebugLoc();
1760b57cec5SDimitry Andric if (!DIL)
1770b57cec5SDimitry Andric continue;
1780b57cec5SDimitry Andric Location L = std::make_pair(DIL->getFilename(), DIL->getLine());
1790b57cec5SDimitry Andric auto &BBMap = LBM[L];
1800b57cec5SDimitry Andric auto R = BBMap.insert(&B);
1810b57cec5SDimitry Andric if (BBMap.size() == 1)
1820b57cec5SDimitry Andric continue;
1830b57cec5SDimitry Andric // If we could insert more than one block with the same line+file, a
1840b57cec5SDimitry Andric // discriminator is needed to distinguish both instructions.
1850b57cec5SDimitry Andric // Only the lowest 7 bits are used to represent a discriminator to fit
1860b57cec5SDimitry Andric // it in 1 byte ULEB128 representation.
1870b57cec5SDimitry Andric unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
1880b57cec5SDimitry Andric auto NewDIL = DIL->cloneWithBaseDiscriminator(Discriminator);
1890b57cec5SDimitry Andric if (!NewDIL) {
1900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Could not encode discriminator: "
1910b57cec5SDimitry Andric << DIL->getFilename() << ":" << DIL->getLine() << ":"
1920b57cec5SDimitry Andric << DIL->getColumn() << ":" << Discriminator << " "
1930b57cec5SDimitry Andric << I << "\n");
1940b57cec5SDimitry Andric } else {
19581ad6265SDimitry Andric I.setDebugLoc(*NewDIL);
1960b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
1970b57cec5SDimitry Andric << DIL->getColumn() << ":" << Discriminator << " " << I
1980b57cec5SDimitry Andric << "\n");
1990b57cec5SDimitry Andric }
2000b57cec5SDimitry Andric Changed = true;
2010b57cec5SDimitry Andric }
2020b57cec5SDimitry Andric }
2030b57cec5SDimitry Andric
2040b57cec5SDimitry Andric // Traverse all instructions and assign new discriminators to call
2050b57cec5SDimitry Andric // instructions with the same lineno that are in the same basic block.
2060b57cec5SDimitry Andric // Sample base profile needs to distinguish different function calls within
2070b57cec5SDimitry Andric // a same source line for correct profile annotation.
2080b57cec5SDimitry Andric for (BasicBlock &B : F) {
2090b57cec5SDimitry Andric LocationSet CallLocations;
210*bdd1243dSDimitry Andric for (auto &I : B) {
2110b57cec5SDimitry Andric // We bypass intrinsic calls for the following two reasons:
212480093f4SDimitry Andric // 1) We want to avoid a non-deterministic assignment of
2130b57cec5SDimitry Andric // discriminators.
2140b57cec5SDimitry Andric // 2) We want to minimize the number of base discriminators used.
2150b57cec5SDimitry Andric if (!isa<InvokeInst>(I) && (!isa<CallInst>(I) || isa<IntrinsicInst>(I)))
2160b57cec5SDimitry Andric continue;
2170b57cec5SDimitry Andric
2180b57cec5SDimitry Andric DILocation *CurrentDIL = I.getDebugLoc();
2190b57cec5SDimitry Andric if (!CurrentDIL)
2200b57cec5SDimitry Andric continue;
2210b57cec5SDimitry Andric Location L =
2220b57cec5SDimitry Andric std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
2230b57cec5SDimitry Andric if (!CallLocations.insert(L).second) {
2240b57cec5SDimitry Andric unsigned Discriminator = ++LDM[L];
2250b57cec5SDimitry Andric auto NewDIL = CurrentDIL->cloneWithBaseDiscriminator(Discriminator);
2260b57cec5SDimitry Andric if (!NewDIL) {
2270b57cec5SDimitry Andric LLVM_DEBUG(dbgs()
2280b57cec5SDimitry Andric << "Could not encode discriminator: "
2290b57cec5SDimitry Andric << CurrentDIL->getFilename() << ":"
2300b57cec5SDimitry Andric << CurrentDIL->getLine() << ":" << CurrentDIL->getColumn()
2310b57cec5SDimitry Andric << ":" << Discriminator << " " << I << "\n");
2320b57cec5SDimitry Andric } else {
23381ad6265SDimitry Andric I.setDebugLoc(*NewDIL);
2340b57cec5SDimitry Andric Changed = true;
2350b57cec5SDimitry Andric }
2360b57cec5SDimitry Andric }
2370b57cec5SDimitry Andric }
2380b57cec5SDimitry Andric }
2390b57cec5SDimitry Andric return Changed;
2400b57cec5SDimitry Andric }
2410b57cec5SDimitry Andric
run(Function & F,FunctionAnalysisManager & AM)2420b57cec5SDimitry Andric PreservedAnalyses AddDiscriminatorsPass::run(Function &F,
2430b57cec5SDimitry Andric FunctionAnalysisManager &AM) {
2440b57cec5SDimitry Andric if (!addDiscriminators(F))
2450b57cec5SDimitry Andric return PreservedAnalyses::all();
2460b57cec5SDimitry Andric
2470b57cec5SDimitry Andric // FIXME: should be all()
2480b57cec5SDimitry Andric return PreservedAnalyses::none();
2490b57cec5SDimitry Andric }
250