1 //===-- Clustering.h --------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Utilities to compute benchmark result clusters. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H 15 #define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H 16 17 #include "BenchmarkResult.h" 18 #include "llvm/Support/Error.h" 19 #include <limits> 20 #include <vector> 21 22 namespace llvm { 23 namespace exegesis { 24 25 class BenchmarkClustering { 26 public: 27 enum ModeE { Dbscan, Naive }; 28 29 // Clusters `Points` using DBSCAN with the given parameters. See the cc file 30 // for more explanations on the algorithm. 31 static Expected<BenchmarkClustering> 32 create(const std::vector<Benchmark> &Points, ModeE Mode, 33 size_t DbscanMinPts, double AnalysisClusteringEpsilon, 34 const MCSubtargetInfo *SubtargetInfo = nullptr, 35 const MCInstrInfo *InstrInfo = nullptr); 36 37 class ClusterId { 38 public: noise()39 static ClusterId noise() { return ClusterId(kNoise); } error()40 static ClusterId error() { return ClusterId(kError); } 41 static ClusterId makeValid(size_t Id, bool IsUnstable = false) { 42 return ClusterId(Id, IsUnstable); 43 } makeValidUnstable(size_t Id)44 static ClusterId makeValidUnstable(size_t Id) { 45 return makeValid(Id, /*IsUnstable=*/true); 46 } 47 ClusterId()48 ClusterId() : Id_(kUndef), IsUnstable_(false) {} 49 50 // Compare id's, ignoring the 'unstability' bit. 51 bool operator==(const ClusterId &O) const { return Id_ == O.Id_; } 52 bool operator<(const ClusterId &O) const { return Id_ < O.Id_; } 53 isValid()54 bool isValid() const { return Id_ <= kMaxValid; } isUnstable()55 bool isUnstable() const { return IsUnstable_; } isNoise()56 bool isNoise() const { return Id_ == kNoise; } isError()57 bool isError() const { return Id_ == kError; } isUndef()58 bool isUndef() const { return Id_ == kUndef; } 59 60 // Precondition: isValid(). getId()61 size_t getId() const { 62 assert(isValid()); 63 return Id_; 64 } 65 66 private: 67 ClusterId(size_t Id, bool IsUnstable = false) Id_(Id)68 : Id_(Id), IsUnstable_(IsUnstable) {} 69 70 static constexpr const size_t kMaxValid = 71 (std::numeric_limits<size_t>::max() >> 1) - 4; 72 static constexpr const size_t kNoise = kMaxValid + 1; 73 static constexpr const size_t kError = kMaxValid + 2; 74 static constexpr const size_t kUndef = kMaxValid + 3; 75 76 size_t Id_ : (std::numeric_limits<size_t>::digits - 1); 77 size_t IsUnstable_ : 1; 78 }; 79 static_assert(sizeof(ClusterId) == sizeof(size_t), "should be a bit field."); 80 81 struct Cluster { 82 Cluster() = delete; ClusterCluster83 explicit Cluster(const ClusterId &Id) : Id(Id) {} 84 85 const ClusterId Id; 86 // Indices of benchmarks within the cluster. 87 std::vector<int> PointIndices; 88 }; 89 getClusterIdForPoint(size_t P)90 ClusterId getClusterIdForPoint(size_t P) const { 91 return ClusterIdForPoint_[P]; 92 } 93 getPoints()94 const std::vector<Benchmark> &getPoints() const { return Points_; } 95 getCluster(ClusterId Id)96 const Cluster &getCluster(ClusterId Id) const { 97 assert(!Id.isUndef() && "unlabeled cluster"); 98 if (Id.isNoise()) { 99 return NoiseCluster_; 100 } 101 if (Id.isError()) { 102 return ErrorCluster_; 103 } 104 return Clusters_[Id.getId()]; 105 } 106 getValidClusters()107 const std::vector<Cluster> &getValidClusters() const { return Clusters_; } 108 109 // Returns true if the given point is within a distance Epsilon of each other. isNeighbour(const std::vector<BenchmarkMeasure> & P,const std::vector<BenchmarkMeasure> & Q,const double EpsilonSquared_)110 bool isNeighbour(const std::vector<BenchmarkMeasure> &P, 111 const std::vector<BenchmarkMeasure> &Q, 112 const double EpsilonSquared_) const { 113 double DistanceSquared = 0.0; 114 for (size_t I = 0, E = P.size(); I < E; ++I) { 115 const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue; 116 DistanceSquared += Diff * Diff; 117 } 118 return DistanceSquared <= EpsilonSquared_; 119 } 120 121 private: 122 BenchmarkClustering( 123 const std::vector<Benchmark> &Points, 124 double AnalysisClusteringEpsilonSquared); 125 126 Error validateAndSetup(); 127 128 void clusterizeDbScan(size_t MinPts); 129 void clusterizeNaive(const MCSubtargetInfo &SubtargetInfo, 130 const MCInstrInfo &InstrInfo); 131 132 // Stabilization is only needed if dbscan was used to clusterize. 133 void stabilize(unsigned NumOpcodes); 134 135 void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const; 136 137 bool areAllNeighbours(ArrayRef<size_t> Pts) const; 138 139 const std::vector<Benchmark> &Points_; 140 const double AnalysisClusteringEpsilonSquared_; 141 142 int NumDimensions_ = 0; 143 // ClusterForPoint_[P] is the cluster id for Points[P]. 144 std::vector<ClusterId> ClusterIdForPoint_; 145 std::vector<Cluster> Clusters_; 146 Cluster NoiseCluster_; 147 Cluster ErrorCluster_; 148 }; 149 150 class SchedClassClusterCentroid { 151 public: getStats()152 const std::vector<PerInstructionStats> &getStats() const { 153 return Representative; 154 } 155 156 std::vector<BenchmarkMeasure> getAsPoint() const; 157 158 void addPoint(ArrayRef<BenchmarkMeasure> Point); 159 160 bool validate(Benchmark::ModeE Mode) const; 161 162 private: 163 // Measurement stats for the points in the SchedClassCluster. 164 std::vector<PerInstructionStats> Representative; 165 }; 166 167 } // namespace exegesis 168 } // namespace llvm 169 170 #endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H 171