xref: /llvm-project/llvm/lib/IR/StructuralHash.cpp (revision 6f10b65297707c1e964d570421ab4559dc2928d4)
1 //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/IR/StructuralHash.h"
10 #include "llvm/IR/Function.h"
11 #include "llvm/IR/GlobalVariable.h"
12 #include "llvm/IR/InstrTypes.h"
13 #include "llvm/IR/Instructions.h"
14 #include "llvm/IR/IntrinsicInst.h"
15 #include "llvm/IR/Module.h"
16 
17 using namespace llvm;
18 
19 namespace {
20 
21 // Basic hashing mechanism to detect structural change to the IR, used to verify
22 // pass return status consistency with actual change. In addition to being used
23 // by the MergeFunctions pass.
24 
25 class StructuralHashImpl {
26   stable_hash Hash = 4;
27 
28   bool DetailedHash;
29 
30   // This random value acts as a block header, as otherwise the partition of
31   // opcodes into BBs wouldn't affect the hash, only the order of the opcodes.
32   static constexpr stable_hash BlockHeaderHash = 45798;
33   static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
34   static constexpr stable_hash GlobalHeaderHash = 23456;
35 
36   /// IgnoreOp is a function that returns true if the operand should be ignored.
37   IgnoreOperandFunc IgnoreOp = nullptr;
38   /// A mapping from instruction indices to instruction pointers.
39   /// The index represents the position of an instruction based on the order in
40   /// which it is first encountered.
41   std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr;
42   /// A mapping from pairs of instruction indices and operand indices
43   /// to the hashes of the operands.
44   std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr;
45 
46   /// Assign a unique ID to each Value in the order they are first seen.
47   DenseMap<const Value *, int> ValueToId;
48 
49   stable_hash hashType(Type *ValueType) {
50     SmallVector<stable_hash> Hashes;
51     Hashes.emplace_back(ValueType->getTypeID());
52     if (ValueType->isIntegerTy())
53       Hashes.emplace_back(ValueType->getIntegerBitWidth());
54     return stable_hash_combine(Hashes);
55   }
56 
57 public:
58   StructuralHashImpl() = delete;
59   explicit StructuralHashImpl(bool DetailedHash,
60                               IgnoreOperandFunc IgnoreOp = nullptr)
61       : DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) {
62     if (IgnoreOp) {
63       IndexInstruction = std::make_unique<IndexInstrMap>();
64       IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
65     }
66   }
67 
68   stable_hash hashAPInt(const APInt &I) {
69     SmallVector<stable_hash> Hashes;
70     Hashes.emplace_back(I.getBitWidth());
71     auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords());
72     Hashes.append(RawVals.begin(), RawVals.end());
73     return stable_hash_combine(Hashes);
74   }
75 
76   stable_hash hashAPFloat(const APFloat &F) {
77     return hashAPInt(F.bitcastToAPInt());
78   }
79 
80   stable_hash hashGlobalValue(const GlobalValue *GV) {
81     if (!GV->hasName())
82       return 0;
83     return stable_hash_name(GV->getName());
84   }
85 
86   // Compute a hash for a Constant. This function is logically similar to
87   // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
88   // we're interested in computing a hash rather than comparing two Constants.
89   // Some of the logic is simplified, e.g, we don't expand GEPOperator.
90   stable_hash hashConstant(Constant *C) {
91     SmallVector<stable_hash> Hashes;
92 
93     Type *Ty = C->getType();
94     Hashes.emplace_back(hashType(Ty));
95 
96     if (C->isNullValue()) {
97       Hashes.emplace_back(static_cast<stable_hash>('N'));
98       return stable_hash_combine(Hashes);
99     }
100 
101     if (auto *G = dyn_cast<GlobalValue>(C)) {
102       Hashes.emplace_back(hashGlobalValue(G));
103       return stable_hash_combine(Hashes);
104     }
105 
106     if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
107       Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues()));
108       return stable_hash_combine(Hashes);
109     }
110 
111     switch (C->getValueID()) {
112     case Value::ConstantIntVal: {
113       const APInt &Int = cast<ConstantInt>(C)->getValue();
114       Hashes.emplace_back(hashAPInt(Int));
115       return stable_hash_combine(Hashes);
116     }
117     case Value::ConstantFPVal: {
118       const APFloat &APF = cast<ConstantFP>(C)->getValueAPF();
119       Hashes.emplace_back(hashAPFloat(APF));
120       return stable_hash_combine(Hashes);
121     }
122     case Value::ConstantArrayVal:
123     case Value::ConstantStructVal:
124     case Value::ConstantVectorVal:
125     case Value::ConstantExprVal: {
126       for (const auto &Op : C->operands()) {
127         auto H = hashConstant(cast<Constant>(Op));
128         Hashes.emplace_back(H);
129       }
130       return stable_hash_combine(Hashes);
131     }
132     case Value::BlockAddressVal: {
133       const BlockAddress *BA = cast<BlockAddress>(C);
134       auto H = hashGlobalValue(BA->getFunction());
135       Hashes.emplace_back(H);
136       return stable_hash_combine(Hashes);
137     }
138     case Value::DSOLocalEquivalentVal: {
139       const auto *Equiv = cast<DSOLocalEquivalent>(C);
140       auto H = hashGlobalValue(Equiv->getGlobalValue());
141       Hashes.emplace_back(H);
142       return stable_hash_combine(Hashes);
143     }
144     default:
145       // Skip other types of constants for simplicity.
146       return stable_hash_combine(Hashes);
147     }
148   }
149 
150   stable_hash hashValue(Value *V) {
151     // Check constant and return its hash.
152     Constant *C = dyn_cast<Constant>(V);
153     if (C)
154       return hashConstant(C);
155 
156     // Hash argument number.
157     SmallVector<stable_hash> Hashes;
158     if (Argument *Arg = dyn_cast<Argument>(V))
159       Hashes.emplace_back(Arg->getArgNo());
160 
161     // Get an index (an insertion order) for the non-constant value.
162     auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size());
163     Hashes.emplace_back(It->second);
164 
165     return stable_hash_combine(Hashes);
166   }
167 
168   stable_hash hashOperand(Value *Operand) {
169     SmallVector<stable_hash> Hashes;
170     Hashes.emplace_back(hashType(Operand->getType()));
171     Hashes.emplace_back(hashValue(Operand));
172     return stable_hash_combine(Hashes);
173   }
174 
175   stable_hash hashInstruction(const Instruction &Inst) {
176     SmallVector<stable_hash> Hashes;
177     Hashes.emplace_back(Inst.getOpcode());
178 
179     if (!DetailedHash)
180       return stable_hash_combine(Hashes);
181 
182     Hashes.emplace_back(hashType(Inst.getType()));
183 
184     // Handle additional properties of specific instructions that cause
185     // semantic differences in the IR.
186     if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
187       Hashes.emplace_back(ComparisonInstruction->getPredicate());
188 
189     unsigned InstIdx = 0;
190     if (IndexInstruction) {
191       InstIdx = IndexInstruction->size();
192       IndexInstruction->try_emplace(InstIdx, const_cast<Instruction *>(&Inst));
193     }
194 
195     for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) {
196       auto OpndHash = hashOperand(Op);
197       if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) {
198         assert(IndexOperandHashMap);
199         IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash);
200       } else
201         Hashes.emplace_back(OpndHash);
202     }
203 
204     return stable_hash_combine(Hashes);
205   }
206 
207   // A function hash is calculated by considering only the number of arguments
208   // and whether a function is varargs, the order of basic blocks (given by the
209   // successors of each basic block in depth first order), and the order of
210   // opcodes of each instruction within each of these basic blocks. This mirrors
211   // the strategy FunctionComparator::compare() uses to compare functions by
212   // walking the BBs in depth first order and comparing each instruction in
213   // sequence. Because this hash currently does not look at the operands, it is
214   // insensitive to things such as the target of calls and the constants used in
215   // the function, which makes it useful when possibly merging functions which
216   // are the same modulo constants and call targets.
217   //
218   // Note that different users of StructuralHash will want different behavior
219   // out of it (i.e., MergeFunctions will want something different from PM
220   // expensive checks for pass modification status). When modifying this
221   // function, most changes should be gated behind an option and enabled
222   // selectively.
223   void update(const Function &F) {
224     // Declarations don't affect analyses.
225     if (F.isDeclaration())
226       return;
227 
228     SmallVector<stable_hash> Hashes;
229     Hashes.emplace_back(Hash);
230     Hashes.emplace_back(FunctionHeaderHash);
231 
232     Hashes.emplace_back(F.isVarArg());
233     Hashes.emplace_back(F.arg_size());
234 
235     SmallVector<const BasicBlock *, 8> BBs;
236     SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
237 
238     // Walk the blocks in the same order as
239     // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
240     // function "structure." (BB and opcode sequence)
241     BBs.push_back(&F.getEntryBlock());
242     VisitedBBs.insert(BBs[0]);
243     while (!BBs.empty()) {
244       const BasicBlock *BB = BBs.pop_back_val();
245 
246       Hashes.emplace_back(BlockHeaderHash);
247       for (auto &Inst : *BB)
248         Hashes.emplace_back(hashInstruction(Inst));
249 
250       for (const BasicBlock *Succ : successors(BB))
251         if (VisitedBBs.insert(Succ).second)
252           BBs.push_back(Succ);
253     }
254 
255     // Update the combined hash in place.
256     Hash = stable_hash_combine(Hashes);
257   }
258 
259   void update(const GlobalVariable &GV) {
260     // Declarations and used/compiler.used don't affect analyses.
261     // Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
262     // we ignore anything with the `.llvm` prefix
263     if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
264       return;
265     SmallVector<stable_hash> Hashes;
266     Hashes.emplace_back(Hash);
267     Hashes.emplace_back(GlobalHeaderHash);
268     Hashes.emplace_back(GV.getValueType()->getTypeID());
269 
270     // Update the combined hash in place.
271     Hash = stable_hash_combine(Hashes);
272   }
273 
274   void update(const Module &M) {
275     for (const GlobalVariable &GV : M.globals())
276       update(GV);
277     for (const Function &F : M)
278       update(F);
279   }
280 
281   uint64_t getHash() const { return Hash; }
282 
283   std::unique_ptr<IndexInstrMap> getIndexInstrMap() {
284     return std::move(IndexInstruction);
285   }
286 
287   std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() {
288     return std::move(IndexOperandHashMap);
289   }
290 };
291 
292 } // namespace
293 
294 stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
295   StructuralHashImpl H(DetailedHash);
296   H.update(F);
297   return H.getHash();
298 }
299 
300 stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
301   StructuralHashImpl H(DetailedHash);
302   H.update(M);
303   return H.getHash();
304 }
305 
306 FunctionHashInfo
307 llvm::StructuralHashWithDifferences(const Function &F,
308                                     IgnoreOperandFunc IgnoreOp) {
309   StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp);
310   H.update(F);
311   return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(),
312                           H.getIndexPairOpndHashMap());
313 }
314