xref: /llvm-project/llvm/lib/IR/StructuralHash.cpp (revision 4f41862c5a5241654a37ee994ed0074a815d3633)
1 //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/IR/StructuralHash.h"
10 #include "llvm/IR/Function.h"
11 #include "llvm/IR/GlobalVariable.h"
12 #include "llvm/IR/InstrTypes.h"
13 #include "llvm/IR/Instructions.h"
14 #include "llvm/IR/IntrinsicInst.h"
15 #include "llvm/IR/Module.h"
16 
17 using namespace llvm;
18 
19 namespace {
20 
21 // Basic hashing mechanism to detect structural change to the IR, used to verify
22 // pass return status consistency with actual change. In addition to being used
23 // by the MergeFunctions pass.
24 
25 class StructuralHashImpl {
26   stable_hash Hash = 4;
27 
28   bool DetailedHash;
29 
30   // This random value acts as a block header, as otherwise the partition of
31   // opcodes into BBs wouldn't affect the hash, only the order of the opcodes.
32   static constexpr stable_hash BlockHeaderHash = 45798;
33   static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
34   static constexpr stable_hash GlobalHeaderHash = 23456;
35 
36   /// IgnoreOp is a function that returns true if the operand should be ignored.
37   IgnoreOperandFunc IgnoreOp = nullptr;
38   /// A mapping from instruction indices to instruction pointers.
39   /// The index represents the position of an instruction based on the order in
40   /// which it is first encountered.
41   std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr;
42   /// A mapping from pairs of instruction indices and operand indices
43   /// to the hashes of the operands.
44   std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr;
45 
46   /// Assign a unique ID to each Value in the order they are first seen.
47   DenseMap<const Value *, int> ValueToId;
48 
49   static stable_hash hashType(Type *ValueType) {
50     SmallVector<stable_hash> Hashes;
51     Hashes.emplace_back(ValueType->getTypeID());
52     if (ValueType->isIntegerTy())
53       Hashes.emplace_back(ValueType->getIntegerBitWidth());
54     return stable_hash_combine(Hashes);
55   }
56 
57 public:
58   StructuralHashImpl() = delete;
59   explicit StructuralHashImpl(bool DetailedHash,
60                               IgnoreOperandFunc IgnoreOp = nullptr)
61       : DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) {
62     if (IgnoreOp) {
63       IndexInstruction = std::make_unique<IndexInstrMap>();
64       IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
65     }
66   }
67 
68   static stable_hash hashAPInt(const APInt &I) {
69     SmallVector<stable_hash> Hashes;
70     Hashes.emplace_back(I.getBitWidth());
71     auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords());
72     Hashes.append(RawVals.begin(), RawVals.end());
73     return stable_hash_combine(Hashes);
74   }
75 
76   static stable_hash hashAPFloat(const APFloat &F) {
77     return hashAPInt(F.bitcastToAPInt());
78   }
79 
80   static stable_hash hashGlobalVariable(const GlobalVariable &GVar) {
81     if (!GVar.hasInitializer())
82       return hashGlobalValue(&GVar);
83 
84     // Hash the contents of a string.
85     if (GVar.getName().starts_with(".str")) {
86       auto *C = GVar.getInitializer();
87       if (const auto *Seq = dyn_cast<ConstantDataSequential>(C))
88         if (Seq->isString())
89           return stable_hash_name(Seq->getAsString());
90     }
91 
92     // Hash structural contents of Objective-C metadata in specific sections.
93     // This can be extended to other metadata if needed.
94     static constexpr const char *SectionNames[] = {
95         "__cfstring",      "__cstring",      "__objc_classrefs",
96         "__objc_methname", "__objc_selrefs",
97     };
98     if (GVar.hasSection()) {
99       StringRef SectionName = GVar.getSection();
100       for (const char *Name : SectionNames)
101         if (SectionName.contains(Name))
102           return hashConstant(GVar.getInitializer());
103     }
104 
105     return hashGlobalValue(&GVar);
106   }
107 
108   static stable_hash hashGlobalValue(const GlobalValue *GV) {
109     if (!GV->hasName())
110       return 0;
111     return stable_hash_name(GV->getName());
112   }
113 
114   // Compute a hash for a Constant. This function is logically similar to
115   // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
116   // we're interested in computing a hash rather than comparing two Constants.
117   // Some of the logic is simplified, e.g, we don't expand GEPOperator.
118   static stable_hash hashConstant(const Constant *C) {
119     SmallVector<stable_hash> Hashes;
120 
121     Type *Ty = C->getType();
122     Hashes.emplace_back(hashType(Ty));
123 
124     if (C->isNullValue()) {
125       Hashes.emplace_back(static_cast<stable_hash>('N'));
126       return stable_hash_combine(Hashes);
127     }
128 
129     if (auto *GVar = dyn_cast<GlobalVariable>(C)) {
130       Hashes.emplace_back(hashGlobalVariable(*GVar));
131       return stable_hash_combine(Hashes);
132     }
133 
134     if (auto *G = dyn_cast<GlobalValue>(C)) {
135       Hashes.emplace_back(hashGlobalValue(G));
136       return stable_hash_combine(Hashes);
137     }
138 
139     if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
140       if (Seq->isString()) {
141         Hashes.emplace_back(stable_hash_name(Seq->getAsString()));
142         return stable_hash_combine(Hashes);
143       }
144     }
145 
146     switch (C->getValueID()) {
147     case Value::ConstantIntVal: {
148       const APInt &Int = cast<ConstantInt>(C)->getValue();
149       Hashes.emplace_back(hashAPInt(Int));
150       return stable_hash_combine(Hashes);
151     }
152     case Value::ConstantFPVal: {
153       const APFloat &APF = cast<ConstantFP>(C)->getValueAPF();
154       Hashes.emplace_back(hashAPFloat(APF));
155       return stable_hash_combine(Hashes);
156     }
157     case Value::ConstantArrayVal:
158     case Value::ConstantStructVal:
159     case Value::ConstantVectorVal:
160     case Value::ConstantExprVal: {
161       for (const auto &Op : C->operands()) {
162         auto H = hashConstant(cast<Constant>(Op));
163         Hashes.emplace_back(H);
164       }
165       return stable_hash_combine(Hashes);
166     }
167     case Value::BlockAddressVal: {
168       const BlockAddress *BA = cast<BlockAddress>(C);
169       auto H = hashGlobalValue(BA->getFunction());
170       Hashes.emplace_back(H);
171       return stable_hash_combine(Hashes);
172     }
173     case Value::DSOLocalEquivalentVal: {
174       const auto *Equiv = cast<DSOLocalEquivalent>(C);
175       auto H = hashGlobalValue(Equiv->getGlobalValue());
176       Hashes.emplace_back(H);
177       return stable_hash_combine(Hashes);
178     }
179     default:
180       // Skip other types of constants for simplicity.
181       return stable_hash_combine(Hashes);
182     }
183   }
184 
185   stable_hash hashValue(Value *V) {
186     // Check constant and return its hash.
187     Constant *C = dyn_cast<Constant>(V);
188     if (C)
189       return hashConstant(C);
190 
191     // Hash argument number.
192     SmallVector<stable_hash> Hashes;
193     if (Argument *Arg = dyn_cast<Argument>(V))
194       Hashes.emplace_back(Arg->getArgNo());
195 
196     // Get an index (an insertion order) for the non-constant value.
197     auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size());
198     Hashes.emplace_back(It->second);
199 
200     return stable_hash_combine(Hashes);
201   }
202 
203   stable_hash hashOperand(Value *Operand) {
204     SmallVector<stable_hash> Hashes;
205     Hashes.emplace_back(hashType(Operand->getType()));
206     Hashes.emplace_back(hashValue(Operand));
207     return stable_hash_combine(Hashes);
208   }
209 
210   stable_hash hashInstruction(const Instruction &Inst) {
211     SmallVector<stable_hash> Hashes;
212     Hashes.emplace_back(Inst.getOpcode());
213 
214     if (!DetailedHash)
215       return stable_hash_combine(Hashes);
216 
217     Hashes.emplace_back(hashType(Inst.getType()));
218 
219     // Handle additional properties of specific instructions that cause
220     // semantic differences in the IR.
221     if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
222       Hashes.emplace_back(ComparisonInstruction->getPredicate());
223 
224     unsigned InstIdx = 0;
225     if (IndexInstruction) {
226       InstIdx = IndexInstruction->size();
227       IndexInstruction->try_emplace(InstIdx, const_cast<Instruction *>(&Inst));
228     }
229 
230     for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) {
231       auto OpndHash = hashOperand(Op);
232       if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) {
233         assert(IndexOperandHashMap);
234         IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash);
235       } else
236         Hashes.emplace_back(OpndHash);
237     }
238 
239     return stable_hash_combine(Hashes);
240   }
241 
242   // A function hash is calculated by considering only the number of arguments
243   // and whether a function is varargs, the order of basic blocks (given by the
244   // successors of each basic block in depth first order), and the order of
245   // opcodes of each instruction within each of these basic blocks. This mirrors
246   // the strategy FunctionComparator::compare() uses to compare functions by
247   // walking the BBs in depth first order and comparing each instruction in
248   // sequence. Because this hash currently does not look at the operands, it is
249   // insensitive to things such as the target of calls and the constants used in
250   // the function, which makes it useful when possibly merging functions which
251   // are the same modulo constants and call targets.
252   //
253   // Note that different users of StructuralHash will want different behavior
254   // out of it (i.e., MergeFunctions will want something different from PM
255   // expensive checks for pass modification status). When modifying this
256   // function, most changes should be gated behind an option and enabled
257   // selectively.
258   void update(const Function &F) {
259     // Declarations don't affect analyses.
260     if (F.isDeclaration())
261       return;
262 
263     SmallVector<stable_hash> Hashes;
264     Hashes.emplace_back(Hash);
265     Hashes.emplace_back(FunctionHeaderHash);
266 
267     Hashes.emplace_back(F.isVarArg());
268     Hashes.emplace_back(F.arg_size());
269 
270     SmallVector<const BasicBlock *, 8> BBs;
271     SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
272 
273     // Walk the blocks in the same order as
274     // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
275     // function "structure." (BB and opcode sequence)
276     BBs.push_back(&F.getEntryBlock());
277     VisitedBBs.insert(BBs[0]);
278     while (!BBs.empty()) {
279       const BasicBlock *BB = BBs.pop_back_val();
280 
281       Hashes.emplace_back(BlockHeaderHash);
282       for (auto &Inst : *BB)
283         Hashes.emplace_back(hashInstruction(Inst));
284 
285       for (const BasicBlock *Succ : successors(BB))
286         if (VisitedBBs.insert(Succ).second)
287           BBs.push_back(Succ);
288     }
289 
290     // Update the combined hash in place.
291     Hash = stable_hash_combine(Hashes);
292   }
293 
294   void update(const GlobalVariable &GV) {
295     // Declarations and used/compiler.used don't affect analyses.
296     // Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
297     // we ignore anything with the `.llvm` prefix
298     if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
299       return;
300     SmallVector<stable_hash> Hashes;
301     Hashes.emplace_back(Hash);
302     Hashes.emplace_back(GlobalHeaderHash);
303     Hashes.emplace_back(GV.getValueType()->getTypeID());
304 
305     // Update the combined hash in place.
306     Hash = stable_hash_combine(Hashes);
307   }
308 
309   void update(const Module &M) {
310     for (const GlobalVariable &GV : M.globals())
311       update(GV);
312     for (const Function &F : M)
313       update(F);
314   }
315 
316   uint64_t getHash() const { return Hash; }
317 
318   std::unique_ptr<IndexInstrMap> getIndexInstrMap() {
319     return std::move(IndexInstruction);
320   }
321 
322   std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() {
323     return std::move(IndexOperandHashMap);
324   }
325 };
326 
327 } // namespace
328 
329 stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
330   StructuralHashImpl H(DetailedHash);
331   H.update(F);
332   return H.getHash();
333 }
334 
335 stable_hash llvm::StructuralHash(const GlobalVariable &GVar) {
336   return StructuralHashImpl::hashGlobalVariable(GVar);
337 }
338 
339 stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
340   StructuralHashImpl H(DetailedHash);
341   H.update(M);
342   return H.getHash();
343 }
344 
345 FunctionHashInfo
346 llvm::StructuralHashWithDifferences(const Function &F,
347                                     IgnoreOperandFunc IgnoreOp) {
348   StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp);
349   H.update(F);
350   return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(),
351                           H.getIndexPairOpndHashMap());
352 }
353