15d7950a4SHongtao Yu //===-- MissingFrameInferrer.cpp - Missing frame inferrer --------- C++ -*-===//
25d7950a4SHongtao Yu //
35d7950a4SHongtao Yu // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45d7950a4SHongtao Yu // See https://llvm.org/LICENSE.txt for license information.
55d7950a4SHongtao Yu // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65d7950a4SHongtao Yu //
75d7950a4SHongtao Yu //===----------------------------------------------------------------------===//
85d7950a4SHongtao Yu
95d7950a4SHongtao Yu #include "MissingFrameInferrer.h"
105d7950a4SHongtao Yu #include "PerfReader.h"
115d7950a4SHongtao Yu #include "ProfiledBinary.h"
125d7950a4SHongtao Yu #include "llvm/ADT/SCCIterator.h"
135d7950a4SHongtao Yu #include "llvm/ADT/Statistic.h"
145d7950a4SHongtao Yu #include <algorithm>
155d7950a4SHongtao Yu #include <cstdint>
165d7950a4SHongtao Yu #include <iterator>
175d7950a4SHongtao Yu #include <queue>
185d7950a4SHongtao Yu #include <sys/types.h>
195d7950a4SHongtao Yu
205d7950a4SHongtao Yu #define DEBUG_TYPE "missing-frame-inferrer"
215d7950a4SHongtao Yu
225d7950a4SHongtao Yu using namespace llvm;
235d7950a4SHongtao Yu using namespace sampleprof;
245d7950a4SHongtao Yu
255d7950a4SHongtao Yu STATISTIC(TailCallUniReachable,
265d7950a4SHongtao Yu "Number of frame pairs reachable via a unique tail call path");
275d7950a4SHongtao Yu STATISTIC(TailCallMultiReachable,
285d7950a4SHongtao Yu "Number of frame pairs reachable via a multiple tail call paths");
295d7950a4SHongtao Yu STATISTIC(TailCallUnreachable,
305d7950a4SHongtao Yu "Number of frame pairs unreachable via any tail call path");
315d7950a4SHongtao Yu STATISTIC(TailCallFuncSingleTailCalls,
325d7950a4SHongtao Yu "Number of functions with single tail call site");
335d7950a4SHongtao Yu STATISTIC(TailCallFuncMultipleTailCalls,
345d7950a4SHongtao Yu "Number of functions with multiple tail call sites");
355d7950a4SHongtao Yu STATISTIC(TailCallMaxTailCallPath, "Length of the longest tail call path");
365d7950a4SHongtao Yu
375d7950a4SHongtao Yu static cl::opt<uint32_t>
385d7950a4SHongtao Yu MaximumSearchDepth("max-search-depth", cl::init(UINT32_MAX - 1),
395d7950a4SHongtao Yu cl::desc("The maximum levels the DFS-based missing "
405d7950a4SHongtao Yu "frame search should go with"));
415d7950a4SHongtao Yu
initialize(const ContextSampleCounterMap * SampleCounters)425d7950a4SHongtao Yu void MissingFrameInferrer::initialize(
435d7950a4SHongtao Yu const ContextSampleCounterMap *SampleCounters) {
445d7950a4SHongtao Yu // Refine call edges based on LBR samples.
455d7950a4SHongtao Yu if (SampleCounters) {
465d7950a4SHongtao Yu std::unordered_map<uint64_t, std::unordered_set<uint64_t>> SampledCalls;
475d7950a4SHongtao Yu std::unordered_map<uint64_t, std::unordered_set<uint64_t>> SampledTailCalls;
485d7950a4SHongtao Yu
495d7950a4SHongtao Yu // Populate SampledCalls based on static call sites. Similarly to
505d7950a4SHongtao Yu // SampledTailCalls.
515d7950a4SHongtao Yu for (const auto &CI : *SampleCounters) {
525d7950a4SHongtao Yu for (auto Item : CI.second.BranchCounter) {
535d7950a4SHongtao Yu auto From = Item.first.first;
545d7950a4SHongtao Yu auto To = Item.first.second;
555d7950a4SHongtao Yu if (CallEdges.count(From)) {
565d7950a4SHongtao Yu assert(CallEdges[From].size() == 1 &&
575d7950a4SHongtao Yu "A callsite should only appear once with either a known or a "
585d7950a4SHongtao Yu "zero (unknown) target value at this point");
595d7950a4SHongtao Yu SampledCalls[From].insert(To);
605d7950a4SHongtao Yu }
615d7950a4SHongtao Yu if (TailCallEdges.count(From)) {
625d7950a4SHongtao Yu assert(TailCallEdges[From].size() == 1 &&
635d7950a4SHongtao Yu "A callsite should only appear once with either a known or a "
645d7950a4SHongtao Yu "zero (unknown) target value at this point");
655d7950a4SHongtao Yu FuncRange *FromFRange = Binary->findFuncRange(From);
665d7950a4SHongtao Yu FuncRange *ToFRange = Binary->findFuncRange(To);
675d7950a4SHongtao Yu if (FromFRange != ToFRange)
685d7950a4SHongtao Yu SampledTailCalls[From].insert(To);
695d7950a4SHongtao Yu }
705d7950a4SHongtao Yu }
715d7950a4SHongtao Yu }
725d7950a4SHongtao Yu
735d7950a4SHongtao Yu // Replace static edges with dynamic edges.
745d7950a4SHongtao Yu CallEdges = SampledCalls;
755d7950a4SHongtao Yu TailCallEdges = SampledTailCalls;
765d7950a4SHongtao Yu }
775d7950a4SHongtao Yu
785d7950a4SHongtao Yu // Populate function-based edges. This is to speed up address to function
795d7950a4SHongtao Yu // translation.
805d7950a4SHongtao Yu for (auto Call : CallEdges)
815d7950a4SHongtao Yu for (auto Target : Call.second)
825d7950a4SHongtao Yu if (FuncRange *ToFRange = Binary->findFuncRange(Target))
835d7950a4SHongtao Yu CallEdgesF[Call.first].insert(ToFRange->Func);
845d7950a4SHongtao Yu
855d7950a4SHongtao Yu for (auto Call : TailCallEdges) {
865d7950a4SHongtao Yu for (auto Target : Call.second) {
875d7950a4SHongtao Yu if (FuncRange *ToFRange = Binary->findFuncRange(Target)) {
885d7950a4SHongtao Yu TailCallEdgesF[Call.first].insert(ToFRange->Func);
895d7950a4SHongtao Yu TailCallTargetFuncs.insert(ToFRange->Func);
905d7950a4SHongtao Yu }
915d7950a4SHongtao Yu }
925d7950a4SHongtao Yu if (FuncRange *FromFRange = Binary->findFuncRange(Call.first))
935d7950a4SHongtao Yu FuncToTailCallMap[FromFRange->Func].push_back(Call.first);
945d7950a4SHongtao Yu }
955d7950a4SHongtao Yu
965d7950a4SHongtao Yu #if LLVM_ENABLE_STATS
975d7950a4SHongtao Yu for (auto F : FuncToTailCallMap) {
985d7950a4SHongtao Yu assert(F.second.size() > 0 && "");
995d7950a4SHongtao Yu if (F.second.size() > 1)
1005d7950a4SHongtao Yu TailCallFuncMultipleTailCalls++;
1015d7950a4SHongtao Yu else
1025d7950a4SHongtao Yu TailCallFuncSingleTailCalls++;
1035d7950a4SHongtao Yu }
1045d7950a4SHongtao Yu #endif
1055d7950a4SHongtao Yu
1065d7950a4SHongtao Yu #ifndef NDEBUG
1075d7950a4SHongtao Yu auto PrintCallTargets =
1085d7950a4SHongtao Yu [&](const std::unordered_map<uint64_t, std::unordered_set<uint64_t>>
1095d7950a4SHongtao Yu &CallTargets,
1105d7950a4SHongtao Yu bool IsTailCall) {
1115d7950a4SHongtao Yu for (const auto &Targets : CallTargets) {
1125d7950a4SHongtao Yu for (const auto &Target : Targets.second) {
1135d7950a4SHongtao Yu dbgs() << (IsTailCall ? "TailCall" : "Call");
1145d7950a4SHongtao Yu dbgs() << " From " << format("%8" PRIx64, Targets.first) << " to "
1155d7950a4SHongtao Yu << format("%8" PRIx64, Target) << "\n";
1165d7950a4SHongtao Yu }
1175d7950a4SHongtao Yu }
1185d7950a4SHongtao Yu };
1195d7950a4SHongtao Yu
1205d7950a4SHongtao Yu LLVM_DEBUG(dbgs() << "============================\n ";
1215d7950a4SHongtao Yu dbgs() << "Call targets:\n";
1225d7950a4SHongtao Yu PrintCallTargets(CallEdges, false);
1235d7950a4SHongtao Yu dbgs() << "\nTail call targets:\n";
1245d7950a4SHongtao Yu PrintCallTargets(CallEdges, true);
1255d7950a4SHongtao Yu dbgs() << "============================\n";);
1265d7950a4SHongtao Yu #endif
1275d7950a4SHongtao Yu }
1285d7950a4SHongtao Yu
computeUniqueTailCallPath(BinaryFunction * From,BinaryFunction * To,SmallVectorImpl<uint64_t> & Path)1295d7950a4SHongtao Yu uint64_t MissingFrameInferrer::computeUniqueTailCallPath(
1305d7950a4SHongtao Yu BinaryFunction *From, BinaryFunction *To, SmallVectorImpl<uint64_t> &Path) {
1315d7950a4SHongtao Yu // Search for a unique path comprised of only tail call edges for a given
1325d7950a4SHongtao Yu // source and target frame address on the a tail call graph that consists of
1335d7950a4SHongtao Yu // only tail call edges. Note that only a unique path counts. Multiple paths
1345d7950a4SHongtao Yu // are treated unreachable.
1355d7950a4SHongtao Yu if (From == To)
1365d7950a4SHongtao Yu return 1;
1375d7950a4SHongtao Yu
1385d7950a4SHongtao Yu // Ignore cyclic paths. Since we are doing a recursive DFS walk, if the source
1395d7950a4SHongtao Yu // frame being visited is already in the stack, it means we are seeing a
1405d7950a4SHongtao Yu // cycle. This is done before querying the cached result because the cached
1415d7950a4SHongtao Yu // result may be computed based on the same path. Consider the following case:
1425d7950a4SHongtao Yu // A -> B, B -> A, A -> D
1435d7950a4SHongtao Yu // When computing unique reachablity from A to D, the cached result for (B,D)
1445d7950a4SHongtao Yu // should not be counted since the unique path B->A->D is basically the same
1455d7950a4SHongtao Yu // path as A->D. Counting that with invalidate the uniqueness from A to D.
1465d7950a4SHongtao Yu if (Visiting.contains(From))
1475d7950a4SHongtao Yu return 0;
1485d7950a4SHongtao Yu
1495d7950a4SHongtao Yu // If already computed, return the cached result.
1505d7950a4SHongtao Yu auto I = UniquePaths.find({From, To});
1515d7950a4SHongtao Yu if (I != UniquePaths.end()) {
1525d7950a4SHongtao Yu Path.append(I->second.begin(), I->second.end());
1535d7950a4SHongtao Yu return 1;
1545d7950a4SHongtao Yu }
1555d7950a4SHongtao Yu
1565d7950a4SHongtao Yu auto J = NonUniquePaths.find({From, To});
1575d7950a4SHongtao Yu if (J != NonUniquePaths.end()) {
1585d7950a4SHongtao Yu return J->second;
1595d7950a4SHongtao Yu }
1605d7950a4SHongtao Yu
1615d7950a4SHongtao Yu uint64_t Pos = Path.size();
1625d7950a4SHongtao Yu
1635d7950a4SHongtao Yu // DFS walk each outgoing tail call edges.
1645d7950a4SHongtao Yu // Bail out if we are already at the the maximum searching depth.
1655d7950a4SHongtao Yu if (CurSearchingDepth == MaximumSearchDepth)
1665d7950a4SHongtao Yu return 0;
1675d7950a4SHongtao Yu
1685d7950a4SHongtao Yu
1695d7950a4SHongtao Yu if (!FuncToTailCallMap.count(From))
1705d7950a4SHongtao Yu return 0;
1715d7950a4SHongtao Yu
1725d7950a4SHongtao Yu CurSearchingDepth++;
1735d7950a4SHongtao Yu Visiting.insert(From);
1745d7950a4SHongtao Yu uint64_t NumPaths = 0;
1755d7950a4SHongtao Yu for (auto TailCall : FuncToTailCallMap[From]) {
1765d7950a4SHongtao Yu NumPaths += computeUniqueTailCallPath(TailCall, To, Path);
1775d7950a4SHongtao Yu // Stop analyzing the remaining if we are already seeing more than one
1785d7950a4SHongtao Yu // reachable paths.
1795d7950a4SHongtao Yu if (NumPaths > 1)
1805d7950a4SHongtao Yu break;
1815d7950a4SHongtao Yu }
1825d7950a4SHongtao Yu CurSearchingDepth--;
1835d7950a4SHongtao Yu Visiting.erase(From);
1845d7950a4SHongtao Yu
1855d7950a4SHongtao Yu // Undo already-computed path if it is not unique.
1865d7950a4SHongtao Yu if (NumPaths != 1) {
1875d7950a4SHongtao Yu Path.pop_back_n(Path.size() - Pos);
1885d7950a4SHongtao Yu }
1895d7950a4SHongtao Yu
1905d7950a4SHongtao Yu // Cache the result.
1915d7950a4SHongtao Yu if (NumPaths == 1) {
1925d7950a4SHongtao Yu UniquePaths[{From, To}].assign(Path.begin() + Pos, Path.end());
1935d7950a4SHongtao Yu #if LLVM_ENABLE_STATS
1945d7950a4SHongtao Yu auto &LocalPath = UniquePaths[{From, To}];
1955d7950a4SHongtao Yu assert((LocalPath.size() <= MaximumSearchDepth + 1) &&
1965d7950a4SHongtao Yu "Path should not be longer than the maximum searching depth");
197e2b9cd79SFlorian Hahn TailCallMaxTailCallPath = std::max(uint64_t(LocalPath.size()),
198e2b9cd79SFlorian Hahn TailCallMaxTailCallPath.getValue());
1995d7950a4SHongtao Yu #endif
2005d7950a4SHongtao Yu } else {
2015d7950a4SHongtao Yu NonUniquePaths[{From, To}] = NumPaths;
2025d7950a4SHongtao Yu }
2035d7950a4SHongtao Yu
2045d7950a4SHongtao Yu return NumPaths;
2055d7950a4SHongtao Yu }
2065d7950a4SHongtao Yu
computeUniqueTailCallPath(uint64_t From,BinaryFunction * To,SmallVectorImpl<uint64_t> & Path)2075d7950a4SHongtao Yu uint64_t MissingFrameInferrer::computeUniqueTailCallPath(
2085d7950a4SHongtao Yu uint64_t From, BinaryFunction *To, SmallVectorImpl<uint64_t> &Path) {
2095d7950a4SHongtao Yu if (!TailCallEdgesF.count(From))
2105d7950a4SHongtao Yu return 0;
2115d7950a4SHongtao Yu Path.push_back(From);
2125d7950a4SHongtao Yu uint64_t NumPaths = 0;
2135d7950a4SHongtao Yu for (auto Target : TailCallEdgesF[From]) {
2145d7950a4SHongtao Yu NumPaths += computeUniqueTailCallPath(Target, To, Path);
2155d7950a4SHongtao Yu // Stop analyzing the remaining if we are already seeing more than one
2165d7950a4SHongtao Yu // reachable paths.
2175d7950a4SHongtao Yu if (NumPaths > 1)
2185d7950a4SHongtao Yu break;
2195d7950a4SHongtao Yu }
2205d7950a4SHongtao Yu
2215d7950a4SHongtao Yu // Undo already-computed path if it is not unique.
2225d7950a4SHongtao Yu if (NumPaths != 1)
2235d7950a4SHongtao Yu Path.pop_back();
2245d7950a4SHongtao Yu return NumPaths;
2255d7950a4SHongtao Yu }
2265d7950a4SHongtao Yu
inferMissingFrames(uint64_t From,uint64_t To,SmallVectorImpl<uint64_t> & UniquePath)2275d7950a4SHongtao Yu bool MissingFrameInferrer::inferMissingFrames(
2285d7950a4SHongtao Yu uint64_t From, uint64_t To, SmallVectorImpl<uint64_t> &UniquePath) {
2295d7950a4SHongtao Yu assert(!TailCallEdgesF.count(From) &&
2305d7950a4SHongtao Yu "transition between From and To cannot be via a tailcall otherwise "
2315d7950a4SHongtao Yu "they would not show up at the same time");
2325d7950a4SHongtao Yu UniquePath.push_back(From);
2335d7950a4SHongtao Yu uint64_t Pos = UniquePath.size();
2345d7950a4SHongtao Yu
2355d7950a4SHongtao Yu FuncRange *ToFRange = Binary->findFuncRange(To);
2365d7950a4SHongtao Yu if (!ToFRange)
2375d7950a4SHongtao Yu return false;
2385d7950a4SHongtao Yu
2395d7950a4SHongtao Yu // Bail out if caller has no known outgoing call edges.
2405d7950a4SHongtao Yu if (!CallEdgesF.count(From))
2415d7950a4SHongtao Yu return false;
2425d7950a4SHongtao Yu
2435d7950a4SHongtao Yu // Done with the inference if the calle is reachable via a single callsite.
2445d7950a4SHongtao Yu // This may not be accurate but it improves the search throughput.
245*a3b9c153SKazu Hirata if (llvm::is_contained(CallEdgesF[From], ToFRange->Func))
2465d7950a4SHongtao Yu return true;
2475d7950a4SHongtao Yu
2485d7950a4SHongtao Yu // Bail out if callee is not tailcall reachable at all.
2495d7950a4SHongtao Yu if (!TailCallTargetFuncs.contains(ToFRange->Func))
2505d7950a4SHongtao Yu return false;
2515d7950a4SHongtao Yu
2525d7950a4SHongtao Yu Visiting.clear();
2535d7950a4SHongtao Yu CurSearchingDepth = 0;
2545d7950a4SHongtao Yu uint64_t NumPaths = 0;
2555d7950a4SHongtao Yu for (auto Target : CallEdgesF[From]) {
2565d7950a4SHongtao Yu NumPaths +=
2575d7950a4SHongtao Yu computeUniqueTailCallPath(Target, ToFRange->Func, UniquePath);
2585d7950a4SHongtao Yu // Stop analyzing the remaining if we are already seeing more than one
2595d7950a4SHongtao Yu // reachable paths.
2605d7950a4SHongtao Yu if (NumPaths > 1)
2615d7950a4SHongtao Yu break;
2625d7950a4SHongtao Yu }
2635d7950a4SHongtao Yu
2645d7950a4SHongtao Yu // Undo already-computed path if it is not unique.
2655d7950a4SHongtao Yu if (NumPaths != 1) {
2665d7950a4SHongtao Yu UniquePath.pop_back_n(UniquePath.size() - Pos);
2675d7950a4SHongtao Yu assert(UniquePath.back() == From && "broken path");
2685d7950a4SHongtao Yu }
2695d7950a4SHongtao Yu
27009e79659SDavid Blaikie #if LLVM_ENABLE_STATS
2715d7950a4SHongtao Yu if (NumPaths == 1) {
2725d7950a4SHongtao Yu if (ReachableViaUniquePaths.insert({From, ToFRange->StartAddress}).second)
2735d7950a4SHongtao Yu TailCallUniReachable++;
2745d7950a4SHongtao Yu } else if (NumPaths == 0) {
2755d7950a4SHongtao Yu if (Unreachables.insert({From, ToFRange->StartAddress}).second) {
2765d7950a4SHongtao Yu TailCallUnreachable++;
2775d7950a4SHongtao Yu LLVM_DEBUG(dbgs() << "No path found from "
2785d7950a4SHongtao Yu << format("%8" PRIx64 ":", From) << " to "
2795d7950a4SHongtao Yu << format("%8" PRIx64 ":", ToFRange->StartAddress)
2805d7950a4SHongtao Yu << "\n");
2815d7950a4SHongtao Yu }
2825d7950a4SHongtao Yu } else if (NumPaths > 1) {
2835d7950a4SHongtao Yu if (ReachableViaMultiPaths.insert({From, ToFRange->StartAddress})
2845d7950a4SHongtao Yu .second) {
2855d7950a4SHongtao Yu TailCallMultiReachable++;
2865d7950a4SHongtao Yu LLVM_DEBUG(dbgs() << "Multiple paths found from "
2875d7950a4SHongtao Yu << format("%8" PRIx64 ":", From) << " to "
2885d7950a4SHongtao Yu << format("%8" PRIx64 ":", ToFRange->StartAddress)
2895d7950a4SHongtao Yu << "\n");
2905d7950a4SHongtao Yu }
2915d7950a4SHongtao Yu }
2925d7950a4SHongtao Yu #endif
2935d7950a4SHongtao Yu
2945d7950a4SHongtao Yu return NumPaths == 1;
2955d7950a4SHongtao Yu }
2965d7950a4SHongtao Yu
inferMissingFrames(const SmallVectorImpl<uint64_t> & Context,SmallVectorImpl<uint64_t> & NewContext)2975d7950a4SHongtao Yu void MissingFrameInferrer::inferMissingFrames(
2985d7950a4SHongtao Yu const SmallVectorImpl<uint64_t> &Context,
2995d7950a4SHongtao Yu SmallVectorImpl<uint64_t> &NewContext) {
3005d7950a4SHongtao Yu if (Context.size() == 1) {
3015d7950a4SHongtao Yu NewContext = Context;
3025d7950a4SHongtao Yu return;
3035d7950a4SHongtao Yu }
3045d7950a4SHongtao Yu
3055d7950a4SHongtao Yu NewContext.clear();
3065d7950a4SHongtao Yu for (uint64_t I = 1; I < Context.size(); I++) {
3075d7950a4SHongtao Yu inferMissingFrames(Context[I - 1], Context[I], NewContext);
3085d7950a4SHongtao Yu }
3095d7950a4SHongtao Yu NewContext.push_back(Context.back());
3105d7950a4SHongtao Yu
3115d7950a4SHongtao Yu assert((NewContext.size() >= Context.size()) &&
3125d7950a4SHongtao Yu "Inferred context should include all frames in the original context");
3135d7950a4SHongtao Yu assert((NewContext.size() > Context.size() || NewContext == Context) &&
3145d7950a4SHongtao Yu "Inferred context should be exactly the same "
3155d7950a4SHongtao Yu "with the original context");
3165d7950a4SHongtao Yu }
317