1 //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/IR/StructuralHash.h" 10 #include "llvm/IR/Function.h" 11 #include "llvm/IR/GlobalVariable.h" 12 #include "llvm/IR/InstrTypes.h" 13 #include "llvm/IR/Instructions.h" 14 #include "llvm/IR/IntrinsicInst.h" 15 #include "llvm/IR/Module.h" 16 17 using namespace llvm; 18 19 namespace { 20 21 // Basic hashing mechanism to detect structural change to the IR, used to verify 22 // pass return status consistency with actual change. In addition to being used 23 // by the MergeFunctions pass. 24 25 class StructuralHashImpl { 26 stable_hash Hash = 4; 27 28 bool DetailedHash; 29 30 // This random value acts as a block header, as otherwise the partition of 31 // opcodes into BBs wouldn't affect the hash, only the order of the opcodes. 32 static constexpr stable_hash BlockHeaderHash = 45798; 33 static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72; 34 static constexpr stable_hash GlobalHeaderHash = 23456; 35 36 /// IgnoreOp is a function that returns true if the operand should be ignored. 37 IgnoreOperandFunc IgnoreOp = nullptr; 38 /// A mapping from instruction indices to instruction pointers. 39 /// The index represents the position of an instruction based on the order in 40 /// which it is first encountered. 41 std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr; 42 /// A mapping from pairs of instruction indices and operand indices 43 /// to the hashes of the operands. 44 std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr; 45 46 /// Assign a unique ID to each Value in the order they are first seen. 47 DenseMap<const Value *, int> ValueToId; 48 49 static stable_hash hashType(Type *ValueType) { 50 SmallVector<stable_hash> Hashes; 51 Hashes.emplace_back(ValueType->getTypeID()); 52 if (ValueType->isIntegerTy()) 53 Hashes.emplace_back(ValueType->getIntegerBitWidth()); 54 return stable_hash_combine(Hashes); 55 } 56 57 public: 58 StructuralHashImpl() = delete; 59 explicit StructuralHashImpl(bool DetailedHash, 60 IgnoreOperandFunc IgnoreOp = nullptr) 61 : DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) { 62 if (IgnoreOp) { 63 IndexInstruction = std::make_unique<IndexInstrMap>(); 64 IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>(); 65 } 66 } 67 68 static stable_hash hashAPInt(const APInt &I) { 69 SmallVector<stable_hash> Hashes; 70 Hashes.emplace_back(I.getBitWidth()); 71 auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords()); 72 Hashes.append(RawVals.begin(), RawVals.end()); 73 return stable_hash_combine(Hashes); 74 } 75 76 static stable_hash hashAPFloat(const APFloat &F) { 77 return hashAPInt(F.bitcastToAPInt()); 78 } 79 80 static stable_hash hashGlobalVariable(const GlobalVariable &GVar) { 81 if (!GVar.hasInitializer()) 82 return hashGlobalValue(&GVar); 83 84 // Hash the contents of a string. 85 if (GVar.getName().starts_with(".str")) { 86 auto *C = GVar.getInitializer(); 87 if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) 88 if (Seq->isString()) 89 return stable_hash_name(Seq->getAsString()); 90 } 91 92 // Hash structural contents of Objective-C metadata in specific sections. 93 // This can be extended to other metadata if needed. 94 static constexpr const char *SectionNames[] = { 95 "__cfstring", "__cstring", "__objc_classrefs", 96 "__objc_methname", "__objc_selrefs", 97 }; 98 if (GVar.hasSection()) { 99 StringRef SectionName = GVar.getSection(); 100 for (const char *Name : SectionNames) 101 if (SectionName.contains(Name)) 102 return hashConstant(GVar.getInitializer()); 103 } 104 105 return hashGlobalValue(&GVar); 106 } 107 108 static stable_hash hashGlobalValue(const GlobalValue *GV) { 109 if (!GV->hasName()) 110 return 0; 111 return stable_hash_name(GV->getName()); 112 } 113 114 // Compute a hash for a Constant. This function is logically similar to 115 // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here 116 // we're interested in computing a hash rather than comparing two Constants. 117 // Some of the logic is simplified, e.g, we don't expand GEPOperator. 118 static stable_hash hashConstant(const Constant *C) { 119 SmallVector<stable_hash> Hashes; 120 121 Type *Ty = C->getType(); 122 Hashes.emplace_back(hashType(Ty)); 123 124 if (C->isNullValue()) { 125 Hashes.emplace_back(static_cast<stable_hash>('N')); 126 return stable_hash_combine(Hashes); 127 } 128 129 if (auto *GVar = dyn_cast<GlobalVariable>(C)) { 130 Hashes.emplace_back(hashGlobalVariable(*GVar)); 131 return stable_hash_combine(Hashes); 132 } 133 134 if (auto *G = dyn_cast<GlobalValue>(C)) { 135 Hashes.emplace_back(hashGlobalValue(G)); 136 return stable_hash_combine(Hashes); 137 } 138 139 if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) { 140 if (Seq->isString()) { 141 Hashes.emplace_back(stable_hash_name(Seq->getAsString())); 142 return stable_hash_combine(Hashes); 143 } 144 } 145 146 switch (C->getValueID()) { 147 case Value::ConstantIntVal: { 148 const APInt &Int = cast<ConstantInt>(C)->getValue(); 149 Hashes.emplace_back(hashAPInt(Int)); 150 return stable_hash_combine(Hashes); 151 } 152 case Value::ConstantFPVal: { 153 const APFloat &APF = cast<ConstantFP>(C)->getValueAPF(); 154 Hashes.emplace_back(hashAPFloat(APF)); 155 return stable_hash_combine(Hashes); 156 } 157 case Value::ConstantArrayVal: 158 case Value::ConstantStructVal: 159 case Value::ConstantVectorVal: 160 case Value::ConstantExprVal: { 161 for (const auto &Op : C->operands()) { 162 auto H = hashConstant(cast<Constant>(Op)); 163 Hashes.emplace_back(H); 164 } 165 return stable_hash_combine(Hashes); 166 } 167 case Value::BlockAddressVal: { 168 const BlockAddress *BA = cast<BlockAddress>(C); 169 auto H = hashGlobalValue(BA->getFunction()); 170 Hashes.emplace_back(H); 171 return stable_hash_combine(Hashes); 172 } 173 case Value::DSOLocalEquivalentVal: { 174 const auto *Equiv = cast<DSOLocalEquivalent>(C); 175 auto H = hashGlobalValue(Equiv->getGlobalValue()); 176 Hashes.emplace_back(H); 177 return stable_hash_combine(Hashes); 178 } 179 default: 180 // Skip other types of constants for simplicity. 181 return stable_hash_combine(Hashes); 182 } 183 } 184 185 stable_hash hashValue(Value *V) { 186 // Check constant and return its hash. 187 Constant *C = dyn_cast<Constant>(V); 188 if (C) 189 return hashConstant(C); 190 191 // Hash argument number. 192 SmallVector<stable_hash> Hashes; 193 if (Argument *Arg = dyn_cast<Argument>(V)) 194 Hashes.emplace_back(Arg->getArgNo()); 195 196 // Get an index (an insertion order) for the non-constant value. 197 auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size()); 198 Hashes.emplace_back(It->second); 199 200 return stable_hash_combine(Hashes); 201 } 202 203 stable_hash hashOperand(Value *Operand) { 204 SmallVector<stable_hash> Hashes; 205 Hashes.emplace_back(hashType(Operand->getType())); 206 Hashes.emplace_back(hashValue(Operand)); 207 return stable_hash_combine(Hashes); 208 } 209 210 stable_hash hashInstruction(const Instruction &Inst) { 211 SmallVector<stable_hash> Hashes; 212 Hashes.emplace_back(Inst.getOpcode()); 213 214 if (!DetailedHash) 215 return stable_hash_combine(Hashes); 216 217 Hashes.emplace_back(hashType(Inst.getType())); 218 219 // Handle additional properties of specific instructions that cause 220 // semantic differences in the IR. 221 if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst)) 222 Hashes.emplace_back(ComparisonInstruction->getPredicate()); 223 224 unsigned InstIdx = 0; 225 if (IndexInstruction) { 226 InstIdx = IndexInstruction->size(); 227 IndexInstruction->try_emplace(InstIdx, const_cast<Instruction *>(&Inst)); 228 } 229 230 for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) { 231 auto OpndHash = hashOperand(Op); 232 if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) { 233 assert(IndexOperandHashMap); 234 IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash); 235 } else 236 Hashes.emplace_back(OpndHash); 237 } 238 239 return stable_hash_combine(Hashes); 240 } 241 242 // A function hash is calculated by considering only the number of arguments 243 // and whether a function is varargs, the order of basic blocks (given by the 244 // successors of each basic block in depth first order), and the order of 245 // opcodes of each instruction within each of these basic blocks. This mirrors 246 // the strategy FunctionComparator::compare() uses to compare functions by 247 // walking the BBs in depth first order and comparing each instruction in 248 // sequence. Because this hash currently does not look at the operands, it is 249 // insensitive to things such as the target of calls and the constants used in 250 // the function, which makes it useful when possibly merging functions which 251 // are the same modulo constants and call targets. 252 // 253 // Note that different users of StructuralHash will want different behavior 254 // out of it (i.e., MergeFunctions will want something different from PM 255 // expensive checks for pass modification status). When modifying this 256 // function, most changes should be gated behind an option and enabled 257 // selectively. 258 void update(const Function &F) { 259 // Declarations don't affect analyses. 260 if (F.isDeclaration()) 261 return; 262 263 SmallVector<stable_hash> Hashes; 264 Hashes.emplace_back(Hash); 265 Hashes.emplace_back(FunctionHeaderHash); 266 267 Hashes.emplace_back(F.isVarArg()); 268 Hashes.emplace_back(F.arg_size()); 269 270 SmallVector<const BasicBlock *, 8> BBs; 271 SmallPtrSet<const BasicBlock *, 16> VisitedBBs; 272 273 // Walk the blocks in the same order as 274 // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the 275 // function "structure." (BB and opcode sequence) 276 BBs.push_back(&F.getEntryBlock()); 277 VisitedBBs.insert(BBs[0]); 278 while (!BBs.empty()) { 279 const BasicBlock *BB = BBs.pop_back_val(); 280 281 Hashes.emplace_back(BlockHeaderHash); 282 for (auto &Inst : *BB) 283 Hashes.emplace_back(hashInstruction(Inst)); 284 285 for (const BasicBlock *Succ : successors(BB)) 286 if (VisitedBBs.insert(Succ).second) 287 BBs.push_back(Succ); 288 } 289 290 // Update the combined hash in place. 291 Hash = stable_hash_combine(Hashes); 292 } 293 294 void update(const GlobalVariable &GV) { 295 // Declarations and used/compiler.used don't affect analyses. 296 // Since there are several `llvm.*` metadata, like `llvm.embedded.object`, 297 // we ignore anything with the `.llvm` prefix 298 if (GV.isDeclaration() || GV.getName().starts_with("llvm.")) 299 return; 300 SmallVector<stable_hash> Hashes; 301 Hashes.emplace_back(Hash); 302 Hashes.emplace_back(GlobalHeaderHash); 303 Hashes.emplace_back(GV.getValueType()->getTypeID()); 304 305 // Update the combined hash in place. 306 Hash = stable_hash_combine(Hashes); 307 } 308 309 void update(const Module &M) { 310 for (const GlobalVariable &GV : M.globals()) 311 update(GV); 312 for (const Function &F : M) 313 update(F); 314 } 315 316 uint64_t getHash() const { return Hash; } 317 318 std::unique_ptr<IndexInstrMap> getIndexInstrMap() { 319 return std::move(IndexInstruction); 320 } 321 322 std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() { 323 return std::move(IndexOperandHashMap); 324 } 325 }; 326 327 } // namespace 328 329 stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) { 330 StructuralHashImpl H(DetailedHash); 331 H.update(F); 332 return H.getHash(); 333 } 334 335 stable_hash llvm::StructuralHash(const GlobalVariable &GVar) { 336 return StructuralHashImpl::hashGlobalVariable(GVar); 337 } 338 339 stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { 340 StructuralHashImpl H(DetailedHash); 341 H.update(M); 342 return H.getHash(); 343 } 344 345 FunctionHashInfo 346 llvm::StructuralHashWithDifferences(const Function &F, 347 IgnoreOperandFunc IgnoreOp) { 348 StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp); 349 H.update(F); 350 return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(), 351 H.getIndexPairOpndHashMap()); 352 } 353