xref: /llvm-project/llvm/include/llvm/Transforms/Utils/CodeLayout.h (revision fbec1c2a08ce2ae9750ddf3cecc86c5dd2bbc9d8)
1 //===- CodeLayout.h - Code layout/placement algorithms  ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Declares methods and data structures for code layout algorithms.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_TRANSFORMS_UTILS_CODELAYOUT_H
15 #define LLVM_TRANSFORMS_UTILS_CODELAYOUT_H
16 
17 #include "llvm/ADT/ArrayRef.h"
18 
19 #include <utility>
20 #include <vector>
21 
22 namespace llvm::codelayout {
23 
24 using EdgeT = std::pair<uint64_t, uint64_t>;
25 
26 struct EdgeCount {
27   uint64_t src;
28   uint64_t dst;
29   uint64_t count;
30 };
31 
32 /// Find a layout of nodes (basic blocks) of a given CFG optimizing jump
33 /// locality and thus processor I-cache utilization. This is achieved via
34 /// increasing the number of fall-through jumps and co-locating frequently
35 /// executed nodes together.
36 /// The nodes are assumed to be indexed by integers from [0, |V|) so that the
37 /// current order is the identity permutation.
38 /// \p NodeSizes: The sizes of the nodes (in bytes).
39 /// \p NodeCounts: The execution counts of the nodes in the profile.
40 /// \p EdgeCounts: The execution counts of every edge (jump) in the profile. The
41 ///    map also defines the edges in CFG and should include 0-count edges.
42 /// \returns The best block order found.
43 std::vector<uint64_t> computeExtTspLayout(ArrayRef<uint64_t> NodeSizes,
44                                           ArrayRef<uint64_t> NodeCounts,
45                                           ArrayRef<EdgeCount> EdgeCounts);
46 
47 /// Estimate the "quality" of a given node order in CFG. The higher the score,
48 /// the better the order is. The score is designed to reflect the locality of
49 /// the given order, which is anti-correlated with the number of I-cache misses
50 /// in a typical execution of the function.
51 double calcExtTspScore(ArrayRef<uint64_t> Order, ArrayRef<uint64_t> NodeSizes,
52                        ArrayRef<EdgeCount> EdgeCounts);
53 
54 /// Estimate the "quality" of the current node order in CFG.
55 double calcExtTspScore(ArrayRef<uint64_t> NodeSizes,
56                        ArrayRef<EdgeCount> EdgeCounts);
57 
58 /// Algorithm-specific params for Cache-Directed Sort. The values are tuned for
59 /// the best performance of large-scale front-end bound binaries.
60 struct CDSortConfig {
61   /// The size of the cache.
62   unsigned CacheEntries = 16;
63   /// The size of a line in the cache.
64   unsigned CacheSize = 2048;
65   /// The maximum size of a chain to create.
66   unsigned MaxChainSize = 128;
67   /// The power exponent for the distance-based locality.
68   double DistancePower = 0.25;
69   /// The scale factor for the frequency-based locality.
70   double FrequencyScale = 0.25;
71 };
72 
73 /// Apply a Cache-Directed Sort for functions represented by a call graph.
74 /// The placement is done by optimizing the call locality by co-locating
75 /// frequently executed functions.
76 /// \p FuncSizes: The sizes of the nodes (in bytes).
77 /// \p FuncCounts: The execution counts of the nodes in the profile.
78 /// \p CallCounts: The execution counts of every edge (jump) in the profile. The
79 ///    map also defines the edges in CFG and should include 0-count edges.
80 /// \p CallOffsets: The offsets of the calls from their source nodes.
81 /// \returns The best function order found.
82 std::vector<uint64_t> computeCacheDirectedLayout(
83     ArrayRef<uint64_t> FuncSizes, ArrayRef<uint64_t> FuncCounts,
84     ArrayRef<EdgeCount> CallCounts, ArrayRef<uint64_t> CallOffsets);
85 
86 /// Apply a Cache-Directed Sort with a custom config.
87 std::vector<uint64_t> computeCacheDirectedLayout(
88     const CDSortConfig &Config, ArrayRef<uint64_t> FuncSizes,
89     ArrayRef<uint64_t> FuncCounts, ArrayRef<EdgeCount> CallCounts,
90     ArrayRef<uint64_t> CallOffsets);
91 
92 } // namespace llvm::codelayout
93 
94 #endif // LLVM_TRANSFORMS_UTILS_CODELAYOUT_H
95