xref: /llvm-project/llvm/lib/ProfileData/MemProf.cpp (revision 6fb967ec9e13216ee1b4fc15764e0b3df9e5683f)
10a418490SSnehasish Kumar #include "llvm/ProfileData/MemProf.h"
227a4f254SSnehasish Kumar #include "llvm/ADT/SmallVector.h"
327a4f254SSnehasish Kumar #include "llvm/IR/Function.h"
4fc97efa4Sserge-sans-paille #include "llvm/ProfileData/InstrProf.h"
50edc32fdSSnehasish Kumar #include "llvm/ProfileData/SampleProf.h"
674799f42SKazu Hirata #include "llvm/Support/BLAKE3.h"
70a418490SSnehasish Kumar #include "llvm/Support/Endian.h"
80a418490SSnehasish Kumar #include "llvm/Support/EndianStream.h"
974799f42SKazu Hirata #include "llvm/Support/HashBuilder.h"
100a418490SSnehasish Kumar 
110a418490SSnehasish Kumar namespace llvm {
120a418490SSnehasish Kumar namespace memprof {
13cb9589b2SKazu Hirata MemProfSchema getFullSchema() {
14cb9589b2SKazu Hirata   MemProfSchema List;
15cb9589b2SKazu Hirata #define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name);
16cb9589b2SKazu Hirata #include "llvm/ProfileData/MIBEntryDef.inc"
17cb9589b2SKazu Hirata #undef MIBEntryDef
18cb9589b2SKazu Hirata   return List;
19cb9589b2SKazu Hirata }
20cb9589b2SKazu Hirata 
21cb9589b2SKazu Hirata MemProfSchema getHotColdSchema() {
22cb9589b2SKazu Hirata   return {Meta::AllocCount, Meta::TotalSize, Meta::TotalLifetime,
23cb9589b2SKazu Hirata           Meta::TotalLifetimeAccessDensity};
24cb9589b2SKazu Hirata }
25cb9589b2SKazu Hirata 
26edf733bcSKazu Hirata static size_t serializedSizeV2(const IndexedAllocationInfo &IAI,
27edf733bcSKazu Hirata                                const MemProfSchema &Schema) {
28d89914f3SKazu Hirata   size_t Size = 0;
29d89914f3SKazu Hirata   // The CallStackId
30d89914f3SKazu Hirata   Size += sizeof(CallStackId);
31d89914f3SKazu Hirata   // The size of the payload.
32edf733bcSKazu Hirata   Size += PortableMemInfoBlock::serializedSize(Schema);
33d89914f3SKazu Hirata   return Size;
34d89914f3SKazu Hirata }
35d89914f3SKazu Hirata 
3637f30234SKazu Hirata static size_t serializedSizeV3(const IndexedAllocationInfo &IAI,
3737f30234SKazu Hirata                                const MemProfSchema &Schema) {
3837f30234SKazu Hirata   size_t Size = 0;
3937f30234SKazu Hirata   // The linear call stack ID.
4037f30234SKazu Hirata   Size += sizeof(LinearCallStackId);
4137f30234SKazu Hirata   // The size of the payload.
4237f30234SKazu Hirata   Size += PortableMemInfoBlock::serializedSize(Schema);
4337f30234SKazu Hirata   return Size;
4437f30234SKazu Hirata }
4537f30234SKazu Hirata 
46edf733bcSKazu Hirata size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema,
47edf733bcSKazu Hirata                                              IndexedVersion Version) const {
48d89914f3SKazu Hirata   switch (Version) {
49d89914f3SKazu Hirata   case Version2:
50edf733bcSKazu Hirata     return serializedSizeV2(*this, Schema);
5137f30234SKazu Hirata   case Version3:
5237f30234SKazu Hirata     return serializedSizeV3(*this, Schema);
53d89914f3SKazu Hirata   }
54d89914f3SKazu Hirata   llvm_unreachable("unsupported MemProf version");
55d89914f3SKazu Hirata }
56d89914f3SKazu Hirata 
57edf733bcSKazu Hirata static size_t serializedSizeV2(const IndexedMemProfRecord &Record,
58edf733bcSKazu Hirata                                const MemProfSchema &Schema) {
59e09245b3SKazu Hirata   // The number of alloc sites to serialize.
60e09245b3SKazu Hirata   size_t Result = sizeof(uint64_t);
61d89914f3SKazu Hirata   for (const IndexedAllocationInfo &N : Record.AllocSites)
62edf733bcSKazu Hirata     Result += N.serializedSize(Schema, Version2);
63d89914f3SKazu Hirata 
64d89914f3SKazu Hirata   // The number of callsites we have information for.
65d89914f3SKazu Hirata   Result += sizeof(uint64_t);
66d89914f3SKazu Hirata   // The CallStackId
67d89914f3SKazu Hirata   Result += Record.CallSiteIds.size() * sizeof(CallStackId);
68d89914f3SKazu Hirata   return Result;
69d89914f3SKazu Hirata }
70d89914f3SKazu Hirata 
7137f30234SKazu Hirata static size_t serializedSizeV3(const IndexedMemProfRecord &Record,
7237f30234SKazu Hirata                                const MemProfSchema &Schema) {
7337f30234SKazu Hirata   // The number of alloc sites to serialize.
7437f30234SKazu Hirata   size_t Result = sizeof(uint64_t);
7537f30234SKazu Hirata   for (const IndexedAllocationInfo &N : Record.AllocSites)
7637f30234SKazu Hirata     Result += N.serializedSize(Schema, Version3);
7737f30234SKazu Hirata 
7837f30234SKazu Hirata   // The number of callsites we have information for.
7937f30234SKazu Hirata   Result += sizeof(uint64_t);
8037f30234SKazu Hirata   // The linear call stack ID.
8137f30234SKazu Hirata   Result += Record.CallSiteIds.size() * sizeof(LinearCallStackId);
8237f30234SKazu Hirata   return Result;
8337f30234SKazu Hirata }
8437f30234SKazu Hirata 
85edf733bcSKazu Hirata size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema,
86edf733bcSKazu Hirata                                             IndexedVersion Version) const {
87d89914f3SKazu Hirata   switch (Version) {
88d89914f3SKazu Hirata   case Version2:
89edf733bcSKazu Hirata     return serializedSizeV2(*this, Schema);
9037f30234SKazu Hirata   case Version3:
9137f30234SKazu Hirata     return serializedSizeV3(*this, Schema);
92d89914f3SKazu Hirata   }
93d89914f3SKazu Hirata   llvm_unreachable("unsupported MemProf version");
94d89914f3SKazu Hirata }
95d89914f3SKazu Hirata 
963f16ff4eSKazu Hirata static void serializeV2(const IndexedMemProfRecord &Record,
97d89914f3SKazu Hirata                         const MemProfSchema &Schema, raw_ostream &OS) {
98d89914f3SKazu Hirata   using namespace support;
99d89914f3SKazu Hirata 
100d89914f3SKazu Hirata   endian::Writer LE(OS, llvm::endianness::little);
101d89914f3SKazu Hirata 
102d89914f3SKazu Hirata   LE.write<uint64_t>(Record.AllocSites.size());
103d89914f3SKazu Hirata   for (const IndexedAllocationInfo &N : Record.AllocSites) {
104d89914f3SKazu Hirata     LE.write<CallStackId>(N.CSId);
105d89914f3SKazu Hirata     N.Info.serialize(Schema, OS);
106d89914f3SKazu Hirata   }
107d89914f3SKazu Hirata 
108d89914f3SKazu Hirata   // Related contexts.
109d89914f3SKazu Hirata   LE.write<uint64_t>(Record.CallSiteIds.size());
110d89914f3SKazu Hirata   for (const auto &CSId : Record.CallSiteIds)
111d89914f3SKazu Hirata     LE.write<CallStackId>(CSId);
112d89914f3SKazu Hirata }
113d89914f3SKazu Hirata 
1149a8b73c7SKazu Hirata static void serializeV3(
1159a8b73c7SKazu Hirata     const IndexedMemProfRecord &Record, const MemProfSchema &Schema,
11690acfbf9SKazu Hirata     raw_ostream &OS,
1179a8b73c7SKazu Hirata     llvm::DenseMap<CallStackId, LinearCallStackId> &MemProfCallStackIndexes) {
11890acfbf9SKazu Hirata   using namespace support;
11990acfbf9SKazu Hirata 
12090acfbf9SKazu Hirata   endian::Writer LE(OS, llvm::endianness::little);
12190acfbf9SKazu Hirata 
12290acfbf9SKazu Hirata   LE.write<uint64_t>(Record.AllocSites.size());
12390acfbf9SKazu Hirata   for (const IndexedAllocationInfo &N : Record.AllocSites) {
12490acfbf9SKazu Hirata     assert(MemProfCallStackIndexes.contains(N.CSId));
1259a8b73c7SKazu Hirata     LE.write<LinearCallStackId>(MemProfCallStackIndexes[N.CSId]);
12690acfbf9SKazu Hirata     N.Info.serialize(Schema, OS);
12790acfbf9SKazu Hirata   }
12890acfbf9SKazu Hirata 
12990acfbf9SKazu Hirata   // Related contexts.
13090acfbf9SKazu Hirata   LE.write<uint64_t>(Record.CallSiteIds.size());
13190acfbf9SKazu Hirata   for (const auto &CSId : Record.CallSiteIds) {
13290acfbf9SKazu Hirata     assert(MemProfCallStackIndexes.contains(CSId));
1339a8b73c7SKazu Hirata     LE.write<LinearCallStackId>(MemProfCallStackIndexes[CSId]);
13490acfbf9SKazu Hirata   }
13590acfbf9SKazu Hirata }
13690acfbf9SKazu Hirata 
13790acfbf9SKazu Hirata void IndexedMemProfRecord::serialize(
13890acfbf9SKazu Hirata     const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version,
139bfa937a4SKazu Hirata     llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes)
140bfa937a4SKazu Hirata     const {
141d89914f3SKazu Hirata   switch (Version) {
142d89914f3SKazu Hirata   case Version2:
143d89914f3SKazu Hirata     serializeV2(*this, Schema, OS);
144d89914f3SKazu Hirata     return;
14590acfbf9SKazu Hirata   case Version3:
14690acfbf9SKazu Hirata     serializeV3(*this, Schema, OS, *MemProfCallStackIndexes);
14790acfbf9SKazu Hirata     return;
148d89914f3SKazu Hirata   }
149d89914f3SKazu Hirata   llvm_unreachable("unsupported MemProf version");
150d89914f3SKazu Hirata }
151d89914f3SKazu Hirata 
1523f16ff4eSKazu Hirata static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,
153d89914f3SKazu Hirata                                           const unsigned char *Ptr) {
154d89914f3SKazu Hirata   using namespace support;
155d89914f3SKazu Hirata 
156d89914f3SKazu Hirata   IndexedMemProfRecord Record;
157d89914f3SKazu Hirata 
158d89914f3SKazu Hirata   // Read the meminfo nodes.
159d89914f3SKazu Hirata   const uint64_t NumNodes =
160f430e374SKazu Hirata       endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
1618d2258fdSKazu Hirata   Record.AllocSites.reserve(NumNodes);
162d89914f3SKazu Hirata   for (uint64_t I = 0; I < NumNodes; I++) {
163d89914f3SKazu Hirata     IndexedAllocationInfo Node;
164f430e374SKazu Hirata     Node.CSId = endian::readNext<CallStackId, llvm::endianness::little>(Ptr);
165d89914f3SKazu Hirata     Node.Info.deserialize(Schema, Ptr);
166edf733bcSKazu Hirata     Ptr += PortableMemInfoBlock::serializedSize(Schema);
167d89914f3SKazu Hirata     Record.AllocSites.push_back(Node);
168d89914f3SKazu Hirata   }
169d89914f3SKazu Hirata 
170d89914f3SKazu Hirata   // Read the callsite information.
171d89914f3SKazu Hirata   const uint64_t NumCtxs =
172f430e374SKazu Hirata       endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
1738d2258fdSKazu Hirata   Record.CallSiteIds.reserve(NumCtxs);
174d89914f3SKazu Hirata   for (uint64_t J = 0; J < NumCtxs; J++) {
175d89914f3SKazu Hirata     CallStackId CSId =
176f430e374SKazu Hirata         endian::readNext<CallStackId, llvm::endianness::little>(Ptr);
177d89914f3SKazu Hirata     Record.CallSiteIds.push_back(CSId);
178d89914f3SKazu Hirata   }
179d89914f3SKazu Hirata 
180d89914f3SKazu Hirata   return Record;
181d89914f3SKazu Hirata }
182d89914f3SKazu Hirata 
18337f30234SKazu Hirata static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema,
18437f30234SKazu Hirata                                           const unsigned char *Ptr) {
18537f30234SKazu Hirata   using namespace support;
18637f30234SKazu Hirata 
18737f30234SKazu Hirata   IndexedMemProfRecord Record;
18837f30234SKazu Hirata 
18937f30234SKazu Hirata   // Read the meminfo nodes.
19037f30234SKazu Hirata   const uint64_t NumNodes =
19137f30234SKazu Hirata       endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
19237f30234SKazu Hirata   Record.AllocSites.reserve(NumNodes);
19337f30234SKazu Hirata   for (uint64_t I = 0; I < NumNodes; I++) {
19437f30234SKazu Hirata     IndexedAllocationInfo Node;
1959a8b73c7SKazu Hirata     Node.CSId =
1969a8b73c7SKazu Hirata         endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
19737f30234SKazu Hirata     Node.Info.deserialize(Schema, Ptr);
19837f30234SKazu Hirata     Ptr += PortableMemInfoBlock::serializedSize(Schema);
19937f30234SKazu Hirata     Record.AllocSites.push_back(Node);
20037f30234SKazu Hirata   }
20137f30234SKazu Hirata 
20237f30234SKazu Hirata   // Read the callsite information.
20337f30234SKazu Hirata   const uint64_t NumCtxs =
20437f30234SKazu Hirata       endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
20537f30234SKazu Hirata   Record.CallSiteIds.reserve(NumCtxs);
20637f30234SKazu Hirata   for (uint64_t J = 0; J < NumCtxs; J++) {
20737f30234SKazu Hirata     // We are storing LinearCallStackId in CallSiteIds, which is a vector of
20837f30234SKazu Hirata     // CallStackId.  Assert that CallStackId is no smaller than
20937f30234SKazu Hirata     // LinearCallStackId.
21037f30234SKazu Hirata     static_assert(sizeof(LinearCallStackId) <= sizeof(CallStackId));
21137f30234SKazu Hirata     LinearCallStackId CSId =
21237f30234SKazu Hirata         endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
21337f30234SKazu Hirata     Record.CallSiteIds.push_back(CSId);
21437f30234SKazu Hirata   }
21537f30234SKazu Hirata 
21637f30234SKazu Hirata   return Record;
21737f30234SKazu Hirata }
21837f30234SKazu Hirata 
219d89914f3SKazu Hirata IndexedMemProfRecord
220d89914f3SKazu Hirata IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
221d89914f3SKazu Hirata                                   const unsigned char *Ptr,
222d89914f3SKazu Hirata                                   IndexedVersion Version) {
223d89914f3SKazu Hirata   switch (Version) {
224d89914f3SKazu Hirata   case Version2:
225d89914f3SKazu Hirata     return deserializeV2(Schema, Ptr);
22637f30234SKazu Hirata   case Version3:
22737f30234SKazu Hirata     return deserializeV3(Schema, Ptr);
228d89914f3SKazu Hirata   }
229d89914f3SKazu Hirata   llvm_unreachable("unsupported MemProf version");
230d89914f3SKazu Hirata }
231d89914f3SKazu Hirata 
2328137bd9eSKazu Hirata MemProfRecord IndexedMemProfRecord::toMemProfRecord(
2334a918f07SKazu Hirata     llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const {
2348137bd9eSKazu Hirata   MemProfRecord Record;
2358137bd9eSKazu Hirata 
236300663a1SKazu Hirata   Record.AllocSites.reserve(AllocSites.size());
2374e0ff054SKazu Hirata   for (const IndexedAllocationInfo &IndexedAI : AllocSites) {
2384e0ff054SKazu Hirata     AllocationInfo AI;
2398137bd9eSKazu Hirata     AI.Info = IndexedAI.Info;
2408137bd9eSKazu Hirata     AI.CallStack = Callback(IndexedAI.CSId);
241300663a1SKazu Hirata     Record.AllocSites.push_back(std::move(AI));
2428137bd9eSKazu Hirata   }
2438137bd9eSKazu Hirata 
244300663a1SKazu Hirata   Record.CallSites.reserve(CallSiteIds.size());
2454e0ff054SKazu Hirata   for (CallStackId CSId : CallSiteIds)
2468137bd9eSKazu Hirata     Record.CallSites.push_back(Callback(CSId));
2478137bd9eSKazu Hirata 
2488137bd9eSKazu Hirata   return Record;
2498137bd9eSKazu Hirata }
2508137bd9eSKazu Hirata 
2516dd6a616SSnehasish Kumar GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) {
252749d595dSTeresa Johnson   // Canonicalize the function name to drop suffixes such as ".llvm.". Note
253749d595dSTeresa Johnson   // we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop
254749d595dSTeresa Johnson   // those by default. This is by design to differentiate internal linkage
255749d595dSTeresa Johnson   // functions during matching. By dropping the other suffixes we can then match
256749d595dSTeresa Johnson   // functions in the profile use phase prior to their addition. Note that this
257749d595dSTeresa Johnson   // applies to both instrumented and sampled function names.
2580edc32fdSSnehasish Kumar   StringRef CanonicalName =
2590edc32fdSSnehasish Kumar       sampleprof::FunctionSamples::getCanonicalFnName(FunctionName);
26027a4f254SSnehasish Kumar 
26127a4f254SSnehasish Kumar   // We use the function guid which we expect to be a uint64_t. At
2620edc32fdSSnehasish Kumar   // this time, it is the lower 64 bits of the md5 of the canonical
2630edc32fdSSnehasish Kumar   // function name.
2640edc32fdSSnehasish Kumar   return Function::getGUID(CanonicalName);
2650a418490SSnehasish Kumar }
2660a418490SSnehasish Kumar 
2670a418490SSnehasish Kumar Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
2680a418490SSnehasish Kumar   using namespace support;
2690a418490SSnehasish Kumar 
2700a418490SSnehasish Kumar   const unsigned char *Ptr = Buffer;
2710a418490SSnehasish Kumar   const uint64_t NumSchemaIds =
272f430e374SKazu Hirata       endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
2730a418490SSnehasish Kumar   if (NumSchemaIds > static_cast<uint64_t>(Meta::Size)) {
2740a418490SSnehasish Kumar     return make_error<InstrProfError>(instrprof_error::malformed,
2750a418490SSnehasish Kumar                                       "memprof schema invalid");
2760a418490SSnehasish Kumar   }
2770a418490SSnehasish Kumar 
2780a418490SSnehasish Kumar   MemProfSchema Result;
2790a418490SSnehasish Kumar   for (size_t I = 0; I < NumSchemaIds; I++) {
28002f67c09SKazu Hirata     const uint64_t Tag =
281f430e374SKazu Hirata         endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
2820a418490SSnehasish Kumar     if (Tag >= static_cast<uint64_t>(Meta::Size)) {
2830a418490SSnehasish Kumar       return make_error<InstrProfError>(instrprof_error::malformed,
2840a418490SSnehasish Kumar                                         "memprof schema invalid");
2850a418490SSnehasish Kumar     }
2860a418490SSnehasish Kumar     Result.push_back(static_cast<Meta>(Tag));
2870a418490SSnehasish Kumar   }
288e2d539bbSKazu Hirata   // Advance the buffer to one past the schema if we succeeded.
2890a418490SSnehasish Kumar   Buffer = Ptr;
2900a418490SSnehasish Kumar   return Result;
2910a418490SSnehasish Kumar }
2920a418490SSnehasish Kumar 
293*6fb967ecSKazu Hirata CallStackId IndexedMemProfData::hashCallStack(ArrayRef<FrameId> CS) const {
29474799f42SKazu Hirata   llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
29574799f42SKazu Hirata       HashBuilder;
29674799f42SKazu Hirata   for (FrameId F : CS)
29774799f42SKazu Hirata     HashBuilder.add(F);
29874799f42SKazu Hirata   llvm::BLAKE3Result<8> Hash = HashBuilder.final();
29974799f42SKazu Hirata   CallStackId CSId;
30074799f42SKazu Hirata   std::memcpy(&CSId, Hash.data(), sizeof(Hash));
30174799f42SKazu Hirata   return CSId;
30274799f42SKazu Hirata }
30374799f42SKazu Hirata 
3045c0df5feSKazu Hirata // Encode a call stack into RadixArray.  Return the starting index within
3055c0df5feSKazu Hirata // RadixArray.  For each call stack we encode, we emit two or three components
3065c0df5feSKazu Hirata // into RadixArray.  If a given call stack doesn't have a common prefix relative
3075c0df5feSKazu Hirata // to the previous one, we emit:
3085c0df5feSKazu Hirata //
3095c0df5feSKazu Hirata // - the frames in the given call stack in the root-to-leaf order
3105c0df5feSKazu Hirata //
3115c0df5feSKazu Hirata // - the length of the given call stack
3125c0df5feSKazu Hirata //
3135c0df5feSKazu Hirata // If a given call stack has a non-empty common prefix relative to the previous
3145c0df5feSKazu Hirata // one, we emit:
3155c0df5feSKazu Hirata //
3165c0df5feSKazu Hirata // - the relative location of the common prefix, encoded as a negative number.
3175c0df5feSKazu Hirata //
3185c0df5feSKazu Hirata // - a portion of the given call stack that's beyond the common prefix
3195c0df5feSKazu Hirata //
3205c0df5feSKazu Hirata // - the length of the given call stack, including the length of the common
3215c0df5feSKazu Hirata //   prefix.
3225c0df5feSKazu Hirata //
3235c0df5feSKazu Hirata // The resulting RadixArray requires a somewhat unintuitive backward traversal
3245c0df5feSKazu Hirata // to reconstruct a call stack -- read the call stack length and scan backward
3255c0df5feSKazu Hirata // while collecting frames in the leaf to root order.  build, the caller of this
3265c0df5feSKazu Hirata // function, reverses RadixArray in place so that we can reconstruct a call
3275c0df5feSKazu Hirata // stack as if we were deserializing an array in a typical way -- the call stack
3285c0df5feSKazu Hirata // length followed by the frames in the leaf-to-root order except that we need
3295c0df5feSKazu Hirata // to handle pointers to parents along the way.
3305c0df5feSKazu Hirata //
3315c0df5feSKazu Hirata // To quickly determine the location of the common prefix within RadixArray,
3325c0df5feSKazu Hirata // Indexes caches the indexes of the previous call stack's frames within
3335c0df5feSKazu Hirata // RadixArray.
334e14827f0STeresa Johnson template <typename FrameIdTy>
335e14827f0STeresa Johnson LinearCallStackId CallStackRadixTreeBuilder<FrameIdTy>::encodeCallStack(
336e14827f0STeresa Johnson     const llvm::SmallVector<FrameIdTy> *CallStack,
337e14827f0STeresa Johnson     const llvm::SmallVector<FrameIdTy> *Prev,
338ff7b42c1SKazu Hirata     const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes) {
3395c0df5feSKazu Hirata   // Compute the length of the common root prefix between Prev and CallStack.
3405c0df5feSKazu Hirata   uint32_t CommonLen = 0;
3415c0df5feSKazu Hirata   if (Prev) {
3425c0df5feSKazu Hirata     auto Pos = std::mismatch(Prev->rbegin(), Prev->rend(), CallStack->rbegin(),
3435c0df5feSKazu Hirata                              CallStack->rend());
3445c0df5feSKazu Hirata     CommonLen = std::distance(CallStack->rbegin(), Pos.second);
3455c0df5feSKazu Hirata   }
3465c0df5feSKazu Hirata 
3475c0df5feSKazu Hirata   // Drop the portion beyond CommonLen.
3485c0df5feSKazu Hirata   assert(CommonLen <= Indexes.size());
3495c0df5feSKazu Hirata   Indexes.resize(CommonLen);
3505c0df5feSKazu Hirata 
3515c0df5feSKazu Hirata   // Append a pointer to the parent.
3525c0df5feSKazu Hirata   if (CommonLen) {
3535c0df5feSKazu Hirata     uint32_t CurrentIndex = RadixArray.size();
3545c0df5feSKazu Hirata     uint32_t ParentIndex = Indexes.back();
3555c0df5feSKazu Hirata     // The offset to the parent must be negative because we are pointing to an
3565c0df5feSKazu Hirata     // element we've already added to RadixArray.
3575c0df5feSKazu Hirata     assert(ParentIndex < CurrentIndex);
3585c0df5feSKazu Hirata     RadixArray.push_back(ParentIndex - CurrentIndex);
3595c0df5feSKazu Hirata   }
3605c0df5feSKazu Hirata 
3615c0df5feSKazu Hirata   // Copy the part of the call stack beyond the common prefix to RadixArray.
3625c0df5feSKazu Hirata   assert(CommonLen <= CallStack->size());
363e14827f0STeresa Johnson   for (FrameIdTy F : llvm::drop_begin(llvm::reverse(*CallStack), CommonLen)) {
3645c0df5feSKazu Hirata     // Remember the index of F in RadixArray.
3655c0df5feSKazu Hirata     Indexes.push_back(RadixArray.size());
366e14827f0STeresa Johnson     RadixArray.push_back(
367e14827f0STeresa Johnson         MemProfFrameIndexes ? MemProfFrameIndexes->find(F)->second : F);
3685c0df5feSKazu Hirata   }
3695c0df5feSKazu Hirata   assert(CallStack->size() == Indexes.size());
3705c0df5feSKazu Hirata 
3715c0df5feSKazu Hirata   // End with the call stack length.
3725c0df5feSKazu Hirata   RadixArray.push_back(CallStack->size());
3735c0df5feSKazu Hirata 
3745c0df5feSKazu Hirata   // Return the index within RadixArray where we can start reconstructing a
3755c0df5feSKazu Hirata   // given call stack from.
3765c0df5feSKazu Hirata   return RadixArray.size() - 1;
3775c0df5feSKazu Hirata }
3785c0df5feSKazu Hirata 
379e14827f0STeresa Johnson template <typename FrameIdTy>
380e14827f0STeresa Johnson void CallStackRadixTreeBuilder<FrameIdTy>::build(
381e14827f0STeresa Johnson     llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
3825c0df5feSKazu Hirata         &&MemProfCallStackData,
383ff7b42c1SKazu Hirata     const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes,
384e14827f0STeresa Johnson     llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram) {
3855c0df5feSKazu Hirata   // Take the vector portion of MemProfCallStackData.  The vector is exactly
3865c0df5feSKazu Hirata   // what we need to sort.  Also, we no longer need its lookup capability.
3875c0df5feSKazu Hirata   llvm::SmallVector<CSIdPair, 0> CallStacks = MemProfCallStackData.takeVector();
3885c0df5feSKazu Hirata 
3895c0df5feSKazu Hirata   // Return early if we have no work to do.
3905c0df5feSKazu Hirata   if (CallStacks.empty()) {
3915c0df5feSKazu Hirata     RadixArray.clear();
3925c0df5feSKazu Hirata     CallStackPos.clear();
3935c0df5feSKazu Hirata     return;
3945c0df5feSKazu Hirata   }
3955c0df5feSKazu Hirata 
396dc3f8c2fSKazu Hirata   // Sorting the list of call stacks in the dictionary order is sufficient to
397dc3f8c2fSKazu Hirata   // maximize the length of the common prefix between two adjacent call stacks
398dc3f8c2fSKazu Hirata   // and thus minimize the length of RadixArray.  However, we go one step
399dc3f8c2fSKazu Hirata   // further and try to reduce the number of times we follow pointers to parents
400dc3f8c2fSKazu Hirata   // during deserilization.  Consider a poorly encoded radix tree:
401dc3f8c2fSKazu Hirata   //
402dc3f8c2fSKazu Hirata   // CallStackId 1:  f1 -> f2 -> f3
403dc3f8c2fSKazu Hirata   //                  |
404dc3f8c2fSKazu Hirata   // CallStackId 2:   +--- f4 -> f5
405dc3f8c2fSKazu Hirata   //                        |
406dc3f8c2fSKazu Hirata   // CallStackId 3:         +--> f6
407dc3f8c2fSKazu Hirata   //
408dc3f8c2fSKazu Hirata   // Here, f2 and f4 appear once and twice, respectively, in the call stacks.
409dc3f8c2fSKazu Hirata   // Once we encode CallStackId 1 into RadixArray, every other call stack with
410dc3f8c2fSKazu Hirata   // common prefix f1 ends up pointing to CallStackId 1.  Since CallStackId 3
411dc3f8c2fSKazu Hirata   // share "f1 f4" with CallStackId 2, CallStackId 3 needs to follow pointers to
412dc3f8c2fSKazu Hirata   // parents twice.
413dc3f8c2fSKazu Hirata   //
414dc3f8c2fSKazu Hirata   // We try to alleviate the situation by sorting the list of call stacks by
415dc3f8c2fSKazu Hirata   // comparing the popularity of frames rather than the integer values of
416dc3f8c2fSKazu Hirata   // FrameIds.  In the example above, f4 is more popular than f2, so we sort the
417dc3f8c2fSKazu Hirata   // call stacks and encode them as:
418dc3f8c2fSKazu Hirata   //
419dc3f8c2fSKazu Hirata   // CallStackId 2:  f1 -- f4 -> f5
420dc3f8c2fSKazu Hirata   //                  |     |
421dc3f8c2fSKazu Hirata   // CallStackId 3:   |     +--> f6
422dc3f8c2fSKazu Hirata   //                  |
423dc3f8c2fSKazu Hirata   // CallStackId 1:   +--> f2 -> f3
424dc3f8c2fSKazu Hirata   //
425dc3f8c2fSKazu Hirata   // Notice that CallStackId 3 follows a pointer to a parent only once.
426dc3f8c2fSKazu Hirata   //
427dc3f8c2fSKazu Hirata   // All this is a quick-n-dirty trick to reduce the number of jumps.  The
428dc3f8c2fSKazu Hirata   // proper way would be to compute the weight of each radix tree node -- how
429dc3f8c2fSKazu Hirata   // many call stacks use a given radix tree node, and encode a radix tree from
430dc3f8c2fSKazu Hirata   // the heaviest node first.  We do not do so because that's a lot of work.
4315c0df5feSKazu Hirata   llvm::sort(CallStacks, [&](const CSIdPair &L, const CSIdPair &R) {
4325c0df5feSKazu Hirata     // Call stacks are stored from leaf to root.  Perform comparisons from the
4335c0df5feSKazu Hirata     // root.
4345c0df5feSKazu Hirata     return std::lexicographical_compare(
4355c0df5feSKazu Hirata         L.second.rbegin(), L.second.rend(), R.second.rbegin(), R.second.rend(),
436e14827f0STeresa Johnson         [&](FrameIdTy F1, FrameIdTy F2) {
437dc3f8c2fSKazu Hirata           uint64_t H1 = FrameHistogram[F1].Count;
438dc3f8c2fSKazu Hirata           uint64_t H2 = FrameHistogram[F2].Count;
439dc3f8c2fSKazu Hirata           // Popular frames should come later because we encode call stacks from
440dc3f8c2fSKazu Hirata           // the last one in the list.
441dc3f8c2fSKazu Hirata           if (H1 != H2)
442dc3f8c2fSKazu Hirata             return H1 < H2;
443dc3f8c2fSKazu Hirata           // For sort stability.
444dc3f8c2fSKazu Hirata           return F1 < F2;
445dc3f8c2fSKazu Hirata         });
4465c0df5feSKazu Hirata   });
4475c0df5feSKazu Hirata 
4485c0df5feSKazu Hirata   // Reserve some reasonable amount of storage.
4495c0df5feSKazu Hirata   RadixArray.clear();
4505c0df5feSKazu Hirata   RadixArray.reserve(CallStacks.size() * 8);
4515c0df5feSKazu Hirata 
4525c0df5feSKazu Hirata   // Indexes will grow as long as the longest call stack.
4535c0df5feSKazu Hirata   Indexes.clear();
4545c0df5feSKazu Hirata   Indexes.reserve(512);
4555c0df5feSKazu Hirata 
4565c0df5feSKazu Hirata   // CallStackPos will grow to exactly CallStacks.size() entries.
4575c0df5feSKazu Hirata   CallStackPos.clear();
4585c0df5feSKazu Hirata   CallStackPos.reserve(CallStacks.size());
4595c0df5feSKazu Hirata 
4605c0df5feSKazu Hirata   // Compute the radix array.  We encode one call stack at a time, computing the
4615c0df5feSKazu Hirata   // longest prefix that's shared with the previous call stack we encode.  For
4625c0df5feSKazu Hirata   // each call stack we encode, we remember a mapping from CallStackId to its
4635c0df5feSKazu Hirata   // position within RadixArray.
4645c0df5feSKazu Hirata   //
4655c0df5feSKazu Hirata   // As an optimization, we encode from the last call stack in CallStacks to
4665c0df5feSKazu Hirata   // reduce the number of times we follow pointers to the parents.  Consider the
4675c0df5feSKazu Hirata   // list of call stacks that has been sorted in the dictionary order:
4685c0df5feSKazu Hirata   //
4695c0df5feSKazu Hirata   // Call Stack 1: F1
4705c0df5feSKazu Hirata   // Call Stack 2: F1 -> F2
4715c0df5feSKazu Hirata   // Call Stack 3: F1 -> F2 -> F3
4725c0df5feSKazu Hirata   //
4735c0df5feSKazu Hirata   // If we traversed CallStacks in the forward order, we would end up with a
4745c0df5feSKazu Hirata   // radix tree like:
4755c0df5feSKazu Hirata   //
4765c0df5feSKazu Hirata   // Call Stack 1:  F1
4775c0df5feSKazu Hirata   //                |
4785c0df5feSKazu Hirata   // Call Stack 2:  +---> F2
4795c0df5feSKazu Hirata   //                      |
4805c0df5feSKazu Hirata   // Call Stack 3:        +---> F3
4815c0df5feSKazu Hirata   //
4825c0df5feSKazu Hirata   // Notice that each call stack jumps to the previous one.  However, if we
4835c0df5feSKazu Hirata   // traverse CallStacks in the reverse order, then Call Stack 3 has the
4845c0df5feSKazu Hirata   // complete call stack encoded without any pointers.  Call Stack 1 and 2 point
4855c0df5feSKazu Hirata   // to appropriate prefixes of Call Stack 3.
486e14827f0STeresa Johnson   const llvm::SmallVector<FrameIdTy> *Prev = nullptr;
4875c0df5feSKazu Hirata   for (const auto &[CSId, CallStack] : llvm::reverse(CallStacks)) {
4885c0df5feSKazu Hirata     LinearCallStackId Pos =
4895c0df5feSKazu Hirata         encodeCallStack(&CallStack, Prev, MemProfFrameIndexes);
4905c0df5feSKazu Hirata     CallStackPos.insert({CSId, Pos});
4915c0df5feSKazu Hirata     Prev = &CallStack;
4925c0df5feSKazu Hirata   }
4935c0df5feSKazu Hirata 
4945c0df5feSKazu Hirata   // "RadixArray.size() - 1" below is problematic if RadixArray is empty.
4955c0df5feSKazu Hirata   assert(!RadixArray.empty());
4965c0df5feSKazu Hirata 
4975c0df5feSKazu Hirata   // Reverse the radix array in place.  We do so mostly for intuitive
4985c0df5feSKazu Hirata   // deserialization where we would read the length field and then the call
4995c0df5feSKazu Hirata   // stack frames proper just like any other array deserialization, except
5005c0df5feSKazu Hirata   // that we have occasional jumps to take advantage of prefixes.
5015c0df5feSKazu Hirata   for (size_t I = 0, J = RadixArray.size() - 1; I < J; ++I, --J)
5025c0df5feSKazu Hirata     std::swap(RadixArray[I], RadixArray[J]);
5035c0df5feSKazu Hirata 
5045c0df5feSKazu Hirata   // "Reverse" the indexes stored in CallStackPos.
5055c0df5feSKazu Hirata   for (auto &[K, V] : CallStackPos)
5065c0df5feSKazu Hirata     V = RadixArray.size() - 1 - V;
5075c0df5feSKazu Hirata }
5085c0df5feSKazu Hirata 
509e14827f0STeresa Johnson // Explicitly instantiate class with the utilized FrameIdTy.
510e14827f0STeresa Johnson template class CallStackRadixTreeBuilder<FrameId>;
511776476c2STeresa Johnson template class CallStackRadixTreeBuilder<LinearFrameId>;
512e14827f0STeresa Johnson 
513e14827f0STeresa Johnson template <typename FrameIdTy>
514e14827f0STeresa Johnson llvm::DenseMap<FrameIdTy, FrameStat>
515e14827f0STeresa Johnson computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
516dc3f8c2fSKazu Hirata                           &MemProfCallStackData) {
517e14827f0STeresa Johnson   llvm::DenseMap<FrameIdTy, FrameStat> Histogram;
518dc3f8c2fSKazu Hirata 
519dc3f8c2fSKazu Hirata   for (const auto &KV : MemProfCallStackData) {
520dc3f8c2fSKazu Hirata     const auto &CS = KV.second;
521dc3f8c2fSKazu Hirata     for (unsigned I = 0, E = CS.size(); I != E; ++I) {
522dc3f8c2fSKazu Hirata       auto &S = Histogram[CS[I]];
523dc3f8c2fSKazu Hirata       ++S.Count;
524dc3f8c2fSKazu Hirata       S.PositionSum += I;
525dc3f8c2fSKazu Hirata     }
526dc3f8c2fSKazu Hirata   }
527dc3f8c2fSKazu Hirata   return Histogram;
528dc3f8c2fSKazu Hirata }
529dc3f8c2fSKazu Hirata 
530e14827f0STeresa Johnson // Explicitly instantiate function with the utilized FrameIdTy.
531e14827f0STeresa Johnson template llvm::DenseMap<FrameId, FrameStat> computeFrameHistogram<FrameId>(
532e14827f0STeresa Johnson     llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
533e14827f0STeresa Johnson         &MemProfCallStackData);
534776476c2STeresa Johnson template llvm::DenseMap<LinearFrameId, FrameStat>
535776476c2STeresa Johnson computeFrameHistogram<LinearFrameId>(
536776476c2STeresa Johnson     llvm::MapVector<CallStackId, llvm::SmallVector<LinearFrameId>>
537776476c2STeresa Johnson         &MemProfCallStackData);
5380a418490SSnehasish Kumar } // namespace memprof
5390a418490SSnehasish Kumar } // namespace llvm
540