1 //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/IR/StructuralHash.h" 10 #include "llvm/IR/Function.h" 11 #include "llvm/IR/GlobalVariable.h" 12 #include "llvm/IR/InstrTypes.h" 13 #include "llvm/IR/Instructions.h" 14 #include "llvm/IR/IntrinsicInst.h" 15 #include "llvm/IR/Module.h" 16 17 using namespace llvm; 18 19 namespace { 20 21 // Basic hashing mechanism to detect structural change to the IR, used to verify 22 // pass return status consistency with actual change. In addition to being used 23 // by the MergeFunctions pass. 24 25 class StructuralHashImpl { 26 stable_hash Hash = 4; 27 28 bool DetailedHash; 29 30 // This random value acts as a block header, as otherwise the partition of 31 // opcodes into BBs wouldn't affect the hash, only the order of the opcodes. 32 static constexpr stable_hash BlockHeaderHash = 45798; 33 static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72; 34 static constexpr stable_hash GlobalHeaderHash = 23456; 35 36 /// IgnoreOp is a function that returns true if the operand should be ignored. 37 IgnoreOperandFunc IgnoreOp = nullptr; 38 /// A mapping from instruction indices to instruction pointers. 39 /// The index represents the position of an instruction based on the order in 40 /// which it is first encountered. 41 std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr; 42 /// A mapping from pairs of instruction indices and operand indices 43 /// to the hashes of the operands. 44 std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr; 45 46 /// Assign a unique ID to each Value in the order they are first seen. 47 DenseMap<const Value *, int> ValueToId; 48 49 stable_hash hashType(Type *ValueType) { 50 SmallVector<stable_hash> Hashes; 51 Hashes.emplace_back(ValueType->getTypeID()); 52 if (ValueType->isIntegerTy()) 53 Hashes.emplace_back(ValueType->getIntegerBitWidth()); 54 return stable_hash_combine(Hashes); 55 } 56 57 public: 58 StructuralHashImpl() = delete; 59 explicit StructuralHashImpl(bool DetailedHash, 60 IgnoreOperandFunc IgnoreOp = nullptr) 61 : DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) { 62 if (IgnoreOp) { 63 IndexInstruction = std::make_unique<IndexInstrMap>(); 64 IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>(); 65 } 66 } 67 68 stable_hash hashAPInt(const APInt &I) { 69 SmallVector<stable_hash> Hashes; 70 Hashes.emplace_back(I.getBitWidth()); 71 auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords()); 72 Hashes.append(RawVals.begin(), RawVals.end()); 73 return stable_hash_combine(Hashes); 74 } 75 76 stable_hash hashAPFloat(const APFloat &F) { 77 return hashAPInt(F.bitcastToAPInt()); 78 } 79 80 stable_hash hashGlobalValue(const GlobalValue *GV) { 81 if (!GV->hasName()) 82 return 0; 83 return stable_hash_name(GV->getName()); 84 } 85 86 // Compute a hash for a Constant. This function is logically similar to 87 // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here 88 // we're interested in computing a hash rather than comparing two Constants. 89 // Some of the logic is simplified, e.g, we don't expand GEPOperator. 90 stable_hash hashConstant(Constant *C) { 91 SmallVector<stable_hash> Hashes; 92 93 Type *Ty = C->getType(); 94 Hashes.emplace_back(hashType(Ty)); 95 96 if (C->isNullValue()) { 97 Hashes.emplace_back(static_cast<stable_hash>('N')); 98 return stable_hash_combine(Hashes); 99 } 100 101 if (auto *G = dyn_cast<GlobalValue>(C)) { 102 Hashes.emplace_back(hashGlobalValue(G)); 103 return stable_hash_combine(Hashes); 104 } 105 106 if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) { 107 Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues())); 108 return stable_hash_combine(Hashes); 109 } 110 111 switch (C->getValueID()) { 112 case Value::ConstantIntVal: { 113 const APInt &Int = cast<ConstantInt>(C)->getValue(); 114 Hashes.emplace_back(hashAPInt(Int)); 115 return stable_hash_combine(Hashes); 116 } 117 case Value::ConstantFPVal: { 118 const APFloat &APF = cast<ConstantFP>(C)->getValueAPF(); 119 Hashes.emplace_back(hashAPFloat(APF)); 120 return stable_hash_combine(Hashes); 121 } 122 case Value::ConstantArrayVal: 123 case Value::ConstantStructVal: 124 case Value::ConstantVectorVal: 125 case Value::ConstantExprVal: { 126 for (const auto &Op : C->operands()) { 127 auto H = hashConstant(cast<Constant>(Op)); 128 Hashes.emplace_back(H); 129 } 130 return stable_hash_combine(Hashes); 131 } 132 case Value::BlockAddressVal: { 133 const BlockAddress *BA = cast<BlockAddress>(C); 134 auto H = hashGlobalValue(BA->getFunction()); 135 Hashes.emplace_back(H); 136 return stable_hash_combine(Hashes); 137 } 138 case Value::DSOLocalEquivalentVal: { 139 const auto *Equiv = cast<DSOLocalEquivalent>(C); 140 auto H = hashGlobalValue(Equiv->getGlobalValue()); 141 Hashes.emplace_back(H); 142 return stable_hash_combine(Hashes); 143 } 144 default: 145 // Skip other types of constants for simplicity. 146 return stable_hash_combine(Hashes); 147 } 148 } 149 150 stable_hash hashValue(Value *V) { 151 // Check constant and return its hash. 152 Constant *C = dyn_cast<Constant>(V); 153 if (C) 154 return hashConstant(C); 155 156 // Hash argument number. 157 SmallVector<stable_hash> Hashes; 158 if (Argument *Arg = dyn_cast<Argument>(V)) 159 Hashes.emplace_back(Arg->getArgNo()); 160 161 // Get an index (an insertion order) for the non-constant value. 162 auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size()); 163 Hashes.emplace_back(It->second); 164 165 return stable_hash_combine(Hashes); 166 } 167 168 stable_hash hashOperand(Value *Operand) { 169 SmallVector<stable_hash> Hashes; 170 Hashes.emplace_back(hashType(Operand->getType())); 171 Hashes.emplace_back(hashValue(Operand)); 172 return stable_hash_combine(Hashes); 173 } 174 175 stable_hash hashInstruction(const Instruction &Inst) { 176 SmallVector<stable_hash> Hashes; 177 Hashes.emplace_back(Inst.getOpcode()); 178 179 if (!DetailedHash) 180 return stable_hash_combine(Hashes); 181 182 Hashes.emplace_back(hashType(Inst.getType())); 183 184 // Handle additional properties of specific instructions that cause 185 // semantic differences in the IR. 186 if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst)) 187 Hashes.emplace_back(ComparisonInstruction->getPredicate()); 188 189 unsigned InstIdx = 0; 190 if (IndexInstruction) { 191 InstIdx = IndexInstruction->size(); 192 IndexInstruction->try_emplace(InstIdx, const_cast<Instruction *>(&Inst)); 193 } 194 195 for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) { 196 auto OpndHash = hashOperand(Op); 197 if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) { 198 assert(IndexOperandHashMap); 199 IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash); 200 } else 201 Hashes.emplace_back(OpndHash); 202 } 203 204 return stable_hash_combine(Hashes); 205 } 206 207 // A function hash is calculated by considering only the number of arguments 208 // and whether a function is varargs, the order of basic blocks (given by the 209 // successors of each basic block in depth first order), and the order of 210 // opcodes of each instruction within each of these basic blocks. This mirrors 211 // the strategy FunctionComparator::compare() uses to compare functions by 212 // walking the BBs in depth first order and comparing each instruction in 213 // sequence. Because this hash currently does not look at the operands, it is 214 // insensitive to things such as the target of calls and the constants used in 215 // the function, which makes it useful when possibly merging functions which 216 // are the same modulo constants and call targets. 217 // 218 // Note that different users of StructuralHash will want different behavior 219 // out of it (i.e., MergeFunctions will want something different from PM 220 // expensive checks for pass modification status). When modifying this 221 // function, most changes should be gated behind an option and enabled 222 // selectively. 223 void update(const Function &F) { 224 // Declarations don't affect analyses. 225 if (F.isDeclaration()) 226 return; 227 228 SmallVector<stable_hash> Hashes; 229 Hashes.emplace_back(Hash); 230 Hashes.emplace_back(FunctionHeaderHash); 231 232 Hashes.emplace_back(F.isVarArg()); 233 Hashes.emplace_back(F.arg_size()); 234 235 SmallVector<const BasicBlock *, 8> BBs; 236 SmallPtrSet<const BasicBlock *, 16> VisitedBBs; 237 238 // Walk the blocks in the same order as 239 // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the 240 // function "structure." (BB and opcode sequence) 241 BBs.push_back(&F.getEntryBlock()); 242 VisitedBBs.insert(BBs[0]); 243 while (!BBs.empty()) { 244 const BasicBlock *BB = BBs.pop_back_val(); 245 246 Hashes.emplace_back(BlockHeaderHash); 247 for (auto &Inst : *BB) 248 Hashes.emplace_back(hashInstruction(Inst)); 249 250 for (const BasicBlock *Succ : successors(BB)) 251 if (VisitedBBs.insert(Succ).second) 252 BBs.push_back(Succ); 253 } 254 255 // Update the combined hash in place. 256 Hash = stable_hash_combine(Hashes); 257 } 258 259 void update(const GlobalVariable &GV) { 260 // Declarations and used/compiler.used don't affect analyses. 261 // Since there are several `llvm.*` metadata, like `llvm.embedded.object`, 262 // we ignore anything with the `.llvm` prefix 263 if (GV.isDeclaration() || GV.getName().starts_with("llvm.")) 264 return; 265 SmallVector<stable_hash> Hashes; 266 Hashes.emplace_back(Hash); 267 Hashes.emplace_back(GlobalHeaderHash); 268 Hashes.emplace_back(GV.getValueType()->getTypeID()); 269 270 // Update the combined hash in place. 271 Hash = stable_hash_combine(Hashes); 272 } 273 274 void update(const Module &M) { 275 for (const GlobalVariable &GV : M.globals()) 276 update(GV); 277 for (const Function &F : M) 278 update(F); 279 } 280 281 uint64_t getHash() const { return Hash; } 282 283 std::unique_ptr<IndexInstrMap> getIndexInstrMap() { 284 return std::move(IndexInstruction); 285 } 286 287 std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() { 288 return std::move(IndexOperandHashMap); 289 } 290 }; 291 292 } // namespace 293 294 stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) { 295 StructuralHashImpl H(DetailedHash); 296 H.update(F); 297 return H.getHash(); 298 } 299 300 stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { 301 StructuralHashImpl H(DetailedHash); 302 H.update(M); 303 return H.getHash(); 304 } 305 306 FunctionHashInfo 307 llvm::StructuralHashWithDifferences(const Function &F, 308 IgnoreOperandFunc IgnoreOp) { 309 StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp); 310 H.update(F); 311 return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(), 312 H.getIndexPairOpndHashMap()); 313 } 314