xref: /llvm-project/llvm/tools/llvm-exegesis/lib/Clustering.h (revision 389bf5d870b3bc014b004a750c539786eba8c543)
1 //===-- Clustering.h --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Utilities to compute benchmark result clusters.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
15 #define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
16 
17 #include "BenchmarkResult.h"
18 #include "llvm/Support/Error.h"
19 #include <limits>
20 #include <vector>
21 
22 namespace llvm {
23 namespace exegesis {
24 
25 class BenchmarkClustering {
26 public:
27   enum ModeE { Dbscan, Naive };
28 
29   // Clusters `Points` using DBSCAN with the given parameters. See the cc file
30   // for more explanations on the algorithm.
31   static Expected<BenchmarkClustering>
32   create(const std::vector<Benchmark> &Points, ModeE Mode,
33          size_t DbscanMinPts, double AnalysisClusteringEpsilon,
34          const MCSubtargetInfo *SubtargetInfo = nullptr,
35          const MCInstrInfo *InstrInfo = nullptr);
36 
37   class ClusterId {
38   public:
noise()39     static ClusterId noise() { return ClusterId(kNoise); }
error()40     static ClusterId error() { return ClusterId(kError); }
41     static ClusterId makeValid(size_t Id, bool IsUnstable = false) {
42       return ClusterId(Id, IsUnstable);
43     }
makeValidUnstable(size_t Id)44     static ClusterId makeValidUnstable(size_t Id) {
45       return makeValid(Id, /*IsUnstable=*/true);
46     }
47 
ClusterId()48     ClusterId() : Id_(kUndef), IsUnstable_(false) {}
49 
50     // Compare id's, ignoring the 'unstability' bit.
51     bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }
52     bool operator<(const ClusterId &O) const { return Id_ < O.Id_; }
53 
isValid()54     bool isValid() const { return Id_ <= kMaxValid; }
isUnstable()55     bool isUnstable() const { return IsUnstable_; }
isNoise()56     bool isNoise() const { return Id_ == kNoise; }
isError()57     bool isError() const { return Id_ == kError; }
isUndef()58     bool isUndef() const { return Id_ == kUndef; }
59 
60     // Precondition: isValid().
getId()61     size_t getId() const {
62       assert(isValid());
63       return Id_;
64     }
65 
66   private:
67     ClusterId(size_t Id, bool IsUnstable = false)
Id_(Id)68         : Id_(Id), IsUnstable_(IsUnstable) {}
69 
70     static constexpr const size_t kMaxValid =
71         (std::numeric_limits<size_t>::max() >> 1) - 4;
72     static constexpr const size_t kNoise = kMaxValid + 1;
73     static constexpr const size_t kError = kMaxValid + 2;
74     static constexpr const size_t kUndef = kMaxValid + 3;
75 
76     size_t Id_ : (std::numeric_limits<size_t>::digits - 1);
77     size_t IsUnstable_ : 1;
78   };
79   static_assert(sizeof(ClusterId) == sizeof(size_t), "should be a bit field.");
80 
81   struct Cluster {
82     Cluster() = delete;
ClusterCluster83     explicit Cluster(const ClusterId &Id) : Id(Id) {}
84 
85     const ClusterId Id;
86     // Indices of benchmarks within the cluster.
87     std::vector<int> PointIndices;
88   };
89 
getClusterIdForPoint(size_t P)90   ClusterId getClusterIdForPoint(size_t P) const {
91     return ClusterIdForPoint_[P];
92   }
93 
getPoints()94   const std::vector<Benchmark> &getPoints() const { return Points_; }
95 
getCluster(ClusterId Id)96   const Cluster &getCluster(ClusterId Id) const {
97     assert(!Id.isUndef() && "unlabeled cluster");
98     if (Id.isNoise()) {
99       return NoiseCluster_;
100     }
101     if (Id.isError()) {
102       return ErrorCluster_;
103     }
104     return Clusters_[Id.getId()];
105   }
106 
getValidClusters()107   const std::vector<Cluster> &getValidClusters() const { return Clusters_; }
108 
109   // Returns true if the given point is within a distance Epsilon of each other.
isNeighbour(const std::vector<BenchmarkMeasure> & P,const std::vector<BenchmarkMeasure> & Q,const double EpsilonSquared_)110   bool isNeighbour(const std::vector<BenchmarkMeasure> &P,
111                    const std::vector<BenchmarkMeasure> &Q,
112                    const double EpsilonSquared_) const {
113     double DistanceSquared = 0.0;
114     for (size_t I = 0, E = P.size(); I < E; ++I) {
115       const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue;
116       DistanceSquared += Diff * Diff;
117     }
118     return DistanceSquared <= EpsilonSquared_;
119   }
120 
121 private:
122   BenchmarkClustering(
123       const std::vector<Benchmark> &Points,
124       double AnalysisClusteringEpsilonSquared);
125 
126   Error validateAndSetup();
127 
128   void clusterizeDbScan(size_t MinPts);
129   void clusterizeNaive(const MCSubtargetInfo &SubtargetInfo,
130                        const MCInstrInfo &InstrInfo);
131 
132   // Stabilization is only needed if dbscan was used to clusterize.
133   void stabilize(unsigned NumOpcodes);
134 
135   void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const;
136 
137   bool areAllNeighbours(ArrayRef<size_t> Pts) const;
138 
139   const std::vector<Benchmark> &Points_;
140   const double AnalysisClusteringEpsilonSquared_;
141 
142   int NumDimensions_ = 0;
143   // ClusterForPoint_[P] is the cluster id for Points[P].
144   std::vector<ClusterId> ClusterIdForPoint_;
145   std::vector<Cluster> Clusters_;
146   Cluster NoiseCluster_;
147   Cluster ErrorCluster_;
148 };
149 
150 class SchedClassClusterCentroid {
151 public:
getStats()152   const std::vector<PerInstructionStats> &getStats() const {
153     return Representative;
154   }
155 
156   std::vector<BenchmarkMeasure> getAsPoint() const;
157 
158   void addPoint(ArrayRef<BenchmarkMeasure> Point);
159 
160   bool validate(Benchmark::ModeE Mode) const;
161 
162 private:
163   // Measurement stats for the points in the SchedClassCluster.
164   std::vector<PerInstructionStats> Representative;
165 };
166 
167 } // namespace exegesis
168 } // namespace llvm
169 
170 #endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
171