1 //===- CodeLayout.h - Code layout/placement algorithms ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Declares methods and data structures for code layout algorithms. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_TRANSFORMS_UTILS_CODELAYOUT_H 15 #define LLVM_TRANSFORMS_UTILS_CODELAYOUT_H 16 17 #include "llvm/ADT/ArrayRef.h" 18 19 #include <utility> 20 #include <vector> 21 22 namespace llvm::codelayout { 23 24 using EdgeT = std::pair<uint64_t, uint64_t>; 25 26 struct EdgeCount { 27 uint64_t src; 28 uint64_t dst; 29 uint64_t count; 30 }; 31 32 /// Find a layout of nodes (basic blocks) of a given CFG optimizing jump 33 /// locality and thus processor I-cache utilization. This is achieved via 34 /// increasing the number of fall-through jumps and co-locating frequently 35 /// executed nodes together. 36 /// The nodes are assumed to be indexed by integers from [0, |V|) so that the 37 /// current order is the identity permutation. 38 /// \p NodeSizes: The sizes of the nodes (in bytes). 39 /// \p NodeCounts: The execution counts of the nodes in the profile. 40 /// \p EdgeCounts: The execution counts of every edge (jump) in the profile. The 41 /// map also defines the edges in CFG and should include 0-count edges. 42 /// \returns The best block order found. 43 std::vector<uint64_t> computeExtTspLayout(ArrayRef<uint64_t> NodeSizes, 44 ArrayRef<uint64_t> NodeCounts, 45 ArrayRef<EdgeCount> EdgeCounts); 46 47 /// Estimate the "quality" of a given node order in CFG. The higher the score, 48 /// the better the order is. The score is designed to reflect the locality of 49 /// the given order, which is anti-correlated with the number of I-cache misses 50 /// in a typical execution of the function. 51 double calcExtTspScore(ArrayRef<uint64_t> Order, ArrayRef<uint64_t> NodeSizes, 52 ArrayRef<EdgeCount> EdgeCounts); 53 54 /// Estimate the "quality" of the current node order in CFG. 55 double calcExtTspScore(ArrayRef<uint64_t> NodeSizes, 56 ArrayRef<EdgeCount> EdgeCounts); 57 58 /// Algorithm-specific params for Cache-Directed Sort. The values are tuned for 59 /// the best performance of large-scale front-end bound binaries. 60 struct CDSortConfig { 61 /// The size of the cache. 62 unsigned CacheEntries = 16; 63 /// The size of a line in the cache. 64 unsigned CacheSize = 2048; 65 /// The maximum size of a chain to create. 66 unsigned MaxChainSize = 128; 67 /// The power exponent for the distance-based locality. 68 double DistancePower = 0.25; 69 /// The scale factor for the frequency-based locality. 70 double FrequencyScale = 0.25; 71 }; 72 73 /// Apply a Cache-Directed Sort for functions represented by a call graph. 74 /// The placement is done by optimizing the call locality by co-locating 75 /// frequently executed functions. 76 /// \p FuncSizes: The sizes of the nodes (in bytes). 77 /// \p FuncCounts: The execution counts of the nodes in the profile. 78 /// \p CallCounts: The execution counts of every edge (jump) in the profile. The 79 /// map also defines the edges in CFG and should include 0-count edges. 80 /// \p CallOffsets: The offsets of the calls from their source nodes. 81 /// \returns The best function order found. 82 std::vector<uint64_t> computeCacheDirectedLayout( 83 ArrayRef<uint64_t> FuncSizes, ArrayRef<uint64_t> FuncCounts, 84 ArrayRef<EdgeCount> CallCounts, ArrayRef<uint64_t> CallOffsets); 85 86 /// Apply a Cache-Directed Sort with a custom config. 87 std::vector<uint64_t> computeCacheDirectedLayout( 88 const CDSortConfig &Config, ArrayRef<uint64_t> FuncSizes, 89 ArrayRef<uint64_t> FuncCounts, ArrayRef<EdgeCount> CallCounts, 90 ArrayRef<uint64_t> CallOffsets); 91 92 } // namespace llvm::codelayout 93 94 #endif // LLVM_TRANSFORMS_UTILS_CODELAYOUT_H 95