1 //===-- Clustering.cpp ------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Clustering.h" 10 #include "llvm/ADT/SetVector.h" 11 #include "llvm/ADT/SmallSet.h" 12 #include "llvm/ADT/SmallVector.h" 13 #include <algorithm> 14 #include <string> 15 #include <vector> 16 17 namespace llvm { 18 namespace exegesis { 19 20 // The clustering problem has the following characteristics: 21 // (A) - Low dimension (dimensions are typically proc resource units, 22 // typically < 10). 23 // (B) - Number of points : ~thousands (points are measurements of an MCInst) 24 // (C) - Number of clusters: ~tens. 25 // (D) - The number of clusters is not known /a priory/. 26 // (E) - The amount of noise is relatively small. 27 // The problem is rather small. In terms of algorithms, (D) disqualifies 28 // k-means and makes algorithms such as DBSCAN[1] or OPTICS[2] more applicable. 29 // 30 // We've used DBSCAN here because it's simple to implement. This is a pretty 31 // straightforward and inefficient implementation of the pseudocode in [2]. 32 // 33 // [1] https://en.wikipedia.org/wiki/DBSCAN 34 // [2] https://en.wikipedia.org/wiki/OPTICS_algorithm 35 36 // Finds the points at distance less than sqrt(EpsilonSquared) of Q (not 37 // including Q). 38 void InstructionBenchmarkClustering::rangeQuery( 39 const size_t Q, std::vector<size_t> &Neighbors) const { 40 Neighbors.clear(); 41 Neighbors.reserve(Points_.size() - 1); // The Q itself isn't a neighbor. 42 const auto &QMeasurements = Points_[Q].Measurements; 43 for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) { 44 if (P == Q) 45 continue; 46 const auto &PMeasurements = Points_[P].Measurements; 47 if (PMeasurements.empty()) // Error point. 48 continue; 49 if (isNeighbour(PMeasurements, QMeasurements, 50 AnalysisClusteringEpsilonSquared_)) { 51 Neighbors.push_back(P); 52 } 53 } 54 } 55 56 InstructionBenchmarkClustering::InstructionBenchmarkClustering( 57 const std::vector<InstructionBenchmark> &Points, 58 const double AnalysisClusteringEpsilonSquared) 59 : Points_(Points), 60 AnalysisClusteringEpsilonSquared_(AnalysisClusteringEpsilonSquared), 61 NoiseCluster_(ClusterId::noise()), ErrorCluster_(ClusterId::error()) {} 62 63 llvm::Error InstructionBenchmarkClustering::validateAndSetup() { 64 ClusterIdForPoint_.resize(Points_.size()); 65 // Mark erroneous measurements out. 66 // All points must have the same number of dimensions, in the same order. 67 const std::vector<BenchmarkMeasure> *LastMeasurement = nullptr; 68 for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) { 69 const auto &Point = Points_[P]; 70 if (!Point.Error.empty()) { 71 ClusterIdForPoint_[P] = ClusterId::error(); 72 ErrorCluster_.PointIndices.push_back(P); 73 continue; 74 } 75 const auto *CurMeasurement = &Point.Measurements; 76 if (LastMeasurement) { 77 if (LastMeasurement->size() != CurMeasurement->size()) { 78 return llvm::make_error<llvm::StringError>( 79 "inconsistent measurement dimensions", 80 llvm::inconvertibleErrorCode()); 81 } 82 for (size_t I = 0, E = LastMeasurement->size(); I < E; ++I) { 83 if (LastMeasurement->at(I).Key != CurMeasurement->at(I).Key) { 84 return llvm::make_error<llvm::StringError>( 85 "inconsistent measurement dimensions keys", 86 llvm::inconvertibleErrorCode()); 87 } 88 } 89 } 90 LastMeasurement = CurMeasurement; 91 } 92 if (LastMeasurement) { 93 NumDimensions_ = LastMeasurement->size(); 94 } 95 return llvm::Error::success(); 96 } 97 98 void InstructionBenchmarkClustering::dbScan(const size_t MinPts) { 99 std::vector<size_t> Neighbors; // Persistent buffer to avoid allocs. 100 for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) { 101 if (!ClusterIdForPoint_[P].isUndef()) 102 continue; // Previously processed in inner loop. 103 rangeQuery(P, Neighbors); 104 if (Neighbors.size() + 1 < MinPts) { // Density check. 105 // The region around P is not dense enough to create a new cluster, mark 106 // as noise for now. 107 ClusterIdForPoint_[P] = ClusterId::noise(); 108 continue; 109 } 110 111 // Create a new cluster, add P. 112 Clusters_.emplace_back(ClusterId::makeValid(Clusters_.size())); 113 Cluster &CurrentCluster = Clusters_.back(); 114 ClusterIdForPoint_[P] = CurrentCluster.Id; /* Label initial point */ 115 CurrentCluster.PointIndices.push_back(P); 116 117 // Process P's neighbors. 118 llvm::SetVector<size_t, std::deque<size_t>> ToProcess; 119 ToProcess.insert(Neighbors.begin(), Neighbors.end()); 120 while (!ToProcess.empty()) { 121 // Retrieve a point from the set. 122 const size_t Q = *ToProcess.begin(); 123 ToProcess.erase(ToProcess.begin()); 124 125 if (ClusterIdForPoint_[Q].isNoise()) { 126 // Change noise point to border point. 127 ClusterIdForPoint_[Q] = CurrentCluster.Id; 128 CurrentCluster.PointIndices.push_back(Q); 129 continue; 130 } 131 if (!ClusterIdForPoint_[Q].isUndef()) { 132 continue; // Previously processed. 133 } 134 // Add Q to the current custer. 135 ClusterIdForPoint_[Q] = CurrentCluster.Id; 136 CurrentCluster.PointIndices.push_back(Q); 137 // And extend to the neighbors of Q if the region is dense enough. 138 rangeQuery(Q, Neighbors); 139 if (Neighbors.size() + 1 >= MinPts) { 140 ToProcess.insert(Neighbors.begin(), Neighbors.end()); 141 } 142 } 143 } 144 // assert(Neighbors.capacity() == (Points_.size() - 1)); 145 // ^ True, but it is not quaranteed to be true in all the cases. 146 147 // Add noisy points to noise cluster. 148 for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) { 149 if (ClusterIdForPoint_[P].isNoise()) { 150 NoiseCluster_.PointIndices.push_back(P); 151 } 152 } 153 } 154 155 // Given an instruction Opcode, we can make benchmarks (measurements) of the 156 // instruction characteristics/performance. Then, to facilitate further analysis 157 // we group the benchmarks with *similar* characteristics into clusters. 158 // Now, this is all not entirely deterministic. Some instructions have variable 159 // characteristics, depending on their arguments. And thus, if we do several 160 // benchmarks of the same instruction Opcode, we may end up with *different* 161 // performance characteristics measurements. And when we then do clustering, 162 // these several benchmarks of the same instruction Opcode may end up being 163 // clustered into *different* clusters. This is not great for further analysis. 164 // We shall find every opcode with benchmarks not in just one cluster, and move 165 // *all* the benchmarks of said Opcode into one new unstable cluster per Opcode. 166 void InstructionBenchmarkClustering::stabilize(unsigned NumOpcodes) { 167 // Given an instruction Opcode, in which clusters do benchmarks of this 168 // instruction lie? Normally, they all should be in the same cluster. 169 std::vector<llvm::SmallSet<ClusterId, 1>> OpcodeToClusterIDs; 170 OpcodeToClusterIDs.resize(NumOpcodes); 171 // The list of opcodes that have more than one cluster. 172 llvm::SetVector<size_t> UnstableOpcodes; 173 // Populate OpcodeToClusterIDs and UnstableOpcodes data structures. 174 assert(ClusterIdForPoint_.size() == Points_.size() && "size mismatch"); 175 for (const auto &Point : zip(Points_, ClusterIdForPoint_)) { 176 const ClusterId &ClusterIdOfPoint = std::get<1>(Point); 177 if (!ClusterIdOfPoint.isValid()) 178 continue; // Only process fully valid clusters. 179 const unsigned Opcode = std::get<0>(Point).keyInstruction().getOpcode(); 180 assert(Opcode < NumOpcodes && "NumOpcodes is incorrect (too small)"); 181 llvm::SmallSet<ClusterId, 1> &ClusterIDsOfOpcode = 182 OpcodeToClusterIDs[Opcode]; 183 ClusterIDsOfOpcode.insert(ClusterIdOfPoint); 184 // Is there more than one ClusterID for this opcode?. 185 if (ClusterIDsOfOpcode.size() < 2) 186 continue; // If not, then at this moment this Opcode is stable. 187 // Else let's record this unstable opcode for future use. 188 UnstableOpcodes.insert(Opcode); 189 } 190 assert(OpcodeToClusterIDs.size() == NumOpcodes && "sanity check"); 191 192 // We know with how many [new] clusters we will end up with. 193 const auto NewTotalClusterCount = Clusters_.size() + UnstableOpcodes.size(); 194 Clusters_.reserve(NewTotalClusterCount); 195 for (const size_t UnstableOpcode : UnstableOpcodes.getArrayRef()) { 196 const llvm::SmallSet<ClusterId, 1> &ClusterIDs = 197 OpcodeToClusterIDs[UnstableOpcode]; 198 assert(ClusterIDs.size() > 1 && 199 "Should only have Opcodes with more than one cluster."); 200 201 // Create a new unstable cluster, one per Opcode. 202 Clusters_.emplace_back(ClusterId::makeValidUnstable(Clusters_.size())); 203 Cluster &UnstableCluster = Clusters_.back(); 204 // We will find *at least* one point in each of these clusters. 205 UnstableCluster.PointIndices.reserve(ClusterIDs.size()); 206 207 // Go through every cluster which we recorded as containing benchmarks 208 // of this UnstableOpcode. NOTE: we only recorded valid clusters. 209 for (const ClusterId &CID : ClusterIDs) { 210 assert(CID.isValid() && 211 "We only recorded valid clusters, not noise/error clusters."); 212 Cluster &OldCluster = Clusters_[CID.getId()]; // Valid clusters storage. 213 // Within each cluster, go through each point, and either move it to the 214 // new unstable cluster, or 'keep' it. 215 // In this case, we'll reshuffle OldCluster.PointIndices vector 216 // so that all the points that are *not* for UnstableOpcode are first, 217 // and the rest of the points is for the UnstableOpcode. 218 const auto it = std::stable_partition( 219 OldCluster.PointIndices.begin(), OldCluster.PointIndices.end(), 220 [this, UnstableOpcode](size_t P) { 221 return Points_[P].keyInstruction().getOpcode() != UnstableOpcode; 222 }); 223 assert(std::distance(it, OldCluster.PointIndices.end()) > 0 && 224 "Should have found at least one bad point"); 225 // Mark to-be-moved points as belonging to the new cluster. 226 std::for_each(it, OldCluster.PointIndices.end(), 227 [this, &UnstableCluster](size_t P) { 228 ClusterIdForPoint_[P] = UnstableCluster.Id; 229 }); 230 // Actually append to-be-moved points to the new cluster. 231 UnstableCluster.PointIndices.insert(UnstableCluster.PointIndices.end(), 232 it, OldCluster.PointIndices.end()); 233 // And finally, remove "to-be-moved" points form the old cluster. 234 OldCluster.PointIndices.erase(it, OldCluster.PointIndices.end()); 235 // Now, the old cluster may end up being empty, but let's just keep it 236 // in whatever state it ended up. Purging empty clusters isn't worth it. 237 }; 238 assert(UnstableCluster.PointIndices.size() > 1 && 239 "New unstable cluster should end up with more than one point."); 240 assert(UnstableCluster.PointIndices.size() >= ClusterIDs.size() && 241 "New unstable cluster should end up with no less points than there " 242 "was clusters"); 243 } 244 assert(Clusters_.size() == NewTotalClusterCount && "sanity check"); 245 } 246 247 llvm::Expected<InstructionBenchmarkClustering> 248 InstructionBenchmarkClustering::create( 249 const std::vector<InstructionBenchmark> &Points, const size_t MinPts, 250 const double AnalysisClusteringEpsilon, 251 llvm::Optional<unsigned> NumOpcodes) { 252 InstructionBenchmarkClustering Clustering( 253 Points, AnalysisClusteringEpsilon * AnalysisClusteringEpsilon); 254 if (auto Error = Clustering.validateAndSetup()) { 255 return std::move(Error); 256 } 257 if (Clustering.ErrorCluster_.PointIndices.size() == Points.size()) { 258 return Clustering; // Nothing to cluster. 259 } 260 261 Clustering.dbScan(MinPts); 262 263 if (NumOpcodes.hasValue()) 264 Clustering.stabilize(NumOpcodes.getValue()); 265 266 return Clustering; 267 } 268 269 } // namespace exegesis 270 } // namespace llvm 271