1 //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/IR/StructuralHash.h" 10 #include "llvm/ADT/Hashing.h" 11 #include "llvm/IR/Function.h" 12 #include "llvm/IR/GlobalVariable.h" 13 #include "llvm/IR/InstrTypes.h" 14 #include "llvm/IR/Instructions.h" 15 #include "llvm/IR/IntrinsicInst.h" 16 #include "llvm/IR/Module.h" 17 18 using namespace llvm; 19 20 namespace { 21 22 // Basic hashing mechanism to detect structural change to the IR, used to verify 23 // pass return status consistency with actual change. In addition to being used 24 // by the MergeFunctions pass. 25 26 class StructuralHashImpl { 27 stable_hash Hash = 4; 28 29 bool DetailedHash; 30 31 // This random value acts as a block header, as otherwise the partition of 32 // opcodes into BBs wouldn't affect the hash, only the order of the opcodes. 33 static constexpr stable_hash BlockHeaderHash = 45798; 34 static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72; 35 static constexpr stable_hash GlobalHeaderHash = 23456; 36 37 // This will produce different values on 32-bit and 64-bit systens as 38 // hash_combine returns a size_t. However, this is only used for 39 // detailed hashing which, in-tree, only needs to distinguish between 40 // differences in functions. 41 // TODO: This is not stable. 42 template <typename T> stable_hash hashArbitaryType(const T &V) { 43 return hash_combine(V); 44 } 45 46 stable_hash hashType(Type *ValueType) { 47 SmallVector<stable_hash> Hashes; 48 Hashes.emplace_back(ValueType->getTypeID()); 49 if (ValueType->isIntegerTy()) 50 Hashes.emplace_back(ValueType->getIntegerBitWidth()); 51 return stable_hash_combine(Hashes); 52 } 53 54 public: 55 StructuralHashImpl() = delete; 56 explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {} 57 58 stable_hash hashConstant(Constant *C) { 59 SmallVector<stable_hash> Hashes; 60 // TODO: hashArbitaryType() is not stable. 61 if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) { 62 Hashes.emplace_back(hashArbitaryType(ConstInt->getValue())); 63 } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) { 64 Hashes.emplace_back(hashArbitaryType(ConstFP->getValue())); 65 } else if (Function *Func = dyn_cast<Function>(C)) { 66 // Hashing the name will be deterministic as LLVM's hashing infrastructure 67 // has explicit support for hashing strings and will not simply hash 68 // the pointer. 69 Hashes.emplace_back(hashArbitaryType(Func->getName())); 70 } 71 72 return stable_hash_combine(Hashes); 73 } 74 75 stable_hash hashValue(Value *V) { 76 // Check constant and return its hash. 77 Constant *C = dyn_cast<Constant>(V); 78 if (C) 79 return hashConstant(C); 80 81 // Hash argument number. 82 SmallVector<stable_hash> Hashes; 83 if (Argument *Arg = dyn_cast<Argument>(V)) 84 Hashes.emplace_back(Arg->getArgNo()); 85 86 return stable_hash_combine(Hashes); 87 } 88 89 stable_hash hashOperand(Value *Operand) { 90 SmallVector<stable_hash> Hashes; 91 Hashes.emplace_back(hashType(Operand->getType())); 92 Hashes.emplace_back(hashValue(Operand)); 93 return stable_hash_combine(Hashes); 94 } 95 96 stable_hash hashInstruction(const Instruction &Inst) { 97 SmallVector<stable_hash> Hashes; 98 Hashes.emplace_back(Inst.getOpcode()); 99 100 if (!DetailedHash) 101 return stable_hash_combine(Hashes); 102 103 Hashes.emplace_back(hashType(Inst.getType())); 104 105 // Handle additional properties of specific instructions that cause 106 // semantic differences in the IR. 107 if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst)) 108 Hashes.emplace_back(ComparisonInstruction->getPredicate()); 109 110 for (const auto &Op : Inst.operands()) 111 Hashes.emplace_back(hashOperand(Op)); 112 113 return stable_hash_combine(Hashes); 114 } 115 116 // A function hash is calculated by considering only the number of arguments 117 // and whether a function is varargs, the order of basic blocks (given by the 118 // successors of each basic block in depth first order), and the order of 119 // opcodes of each instruction within each of these basic blocks. This mirrors 120 // the strategy FunctionComparator::compare() uses to compare functions by 121 // walking the BBs in depth first order and comparing each instruction in 122 // sequence. Because this hash currently does not look at the operands, it is 123 // insensitive to things such as the target of calls and the constants used in 124 // the function, which makes it useful when possibly merging functions which 125 // are the same modulo constants and call targets. 126 // 127 // Note that different users of StructuralHash will want different behavior 128 // out of it (i.e., MergeFunctions will want something different from PM 129 // expensive checks for pass modification status). When modifying this 130 // function, most changes should be gated behind an option and enabled 131 // selectively. 132 void update(const Function &F) { 133 // Declarations don't affect analyses. 134 if (F.isDeclaration()) 135 return; 136 137 SmallVector<stable_hash> Hashes; 138 Hashes.emplace_back(Hash); 139 Hashes.emplace_back(FunctionHeaderHash); 140 141 Hashes.emplace_back(F.isVarArg()); 142 Hashes.emplace_back(F.arg_size()); 143 144 SmallVector<const BasicBlock *, 8> BBs; 145 SmallPtrSet<const BasicBlock *, 16> VisitedBBs; 146 147 // Walk the blocks in the same order as 148 // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the 149 // function "structure." (BB and opcode sequence) 150 BBs.push_back(&F.getEntryBlock()); 151 VisitedBBs.insert(BBs[0]); 152 while (!BBs.empty()) { 153 const BasicBlock *BB = BBs.pop_back_val(); 154 155 Hashes.emplace_back(BlockHeaderHash); 156 for (auto &Inst : *BB) 157 Hashes.emplace_back(hashInstruction(Inst)); 158 159 for (const BasicBlock *Succ : successors(BB)) 160 if (VisitedBBs.insert(Succ).second) 161 BBs.push_back(Succ); 162 } 163 164 // Update the combined hash in place. 165 Hash = stable_hash_combine(Hashes); 166 } 167 168 void update(const GlobalVariable &GV) { 169 // Declarations and used/compiler.used don't affect analyses. 170 // Since there are several `llvm.*` metadata, like `llvm.embedded.object`, 171 // we ignore anything with the `.llvm` prefix 172 if (GV.isDeclaration() || GV.getName().starts_with("llvm.")) 173 return; 174 SmallVector<stable_hash> Hashes; 175 Hashes.emplace_back(Hash); 176 Hashes.emplace_back(GlobalHeaderHash); 177 Hashes.emplace_back(GV.getValueType()->getTypeID()); 178 179 // Update the combined hash in place. 180 Hash = stable_hash_combine(Hashes); 181 } 182 183 void update(const Module &M) { 184 for (const GlobalVariable &GV : M.globals()) 185 update(GV); 186 for (const Function &F : M) 187 update(F); 188 } 189 190 uint64_t getHash() const { return Hash; } 191 }; 192 193 } // namespace 194 195 stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) { 196 StructuralHashImpl H(DetailedHash); 197 H.update(F); 198 return H.getHash(); 199 } 200 201 stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { 202 StructuralHashImpl H(DetailedHash); 203 H.update(M); 204 return H.getHash(); 205 } 206