xref: /llvm-project/llvm/lib/IR/StructuralHash.cpp (revision 1941c5180b91d792200d5e868d45c96e99bda35e)
1 //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/IR/StructuralHash.h"
10 #include "llvm/ADT/Hashing.h"
11 #include "llvm/IR/Function.h"
12 #include "llvm/IR/GlobalVariable.h"
13 #include "llvm/IR/InstrTypes.h"
14 #include "llvm/IR/Instructions.h"
15 #include "llvm/IR/IntrinsicInst.h"
16 #include "llvm/IR/Module.h"
17 
18 using namespace llvm;
19 
20 namespace {
21 
22 // Basic hashing mechanism to detect structural change to the IR, used to verify
23 // pass return status consistency with actual change. In addition to being used
24 // by the MergeFunctions pass.
25 
26 class StructuralHashImpl {
27   stable_hash Hash = 4;
28 
29   bool DetailedHash;
30 
31   // This random value acts as a block header, as otherwise the partition of
32   // opcodes into BBs wouldn't affect the hash, only the order of the opcodes.
33   static constexpr stable_hash BlockHeaderHash = 45798;
34   static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
35   static constexpr stable_hash GlobalHeaderHash = 23456;
36 
37   // This will produce different values on 32-bit and 64-bit systens as
38   // hash_combine returns a size_t. However, this is only used for
39   // detailed hashing which, in-tree, only needs to distinguish between
40   // differences in functions.
41   // TODO: This is not stable.
42   template <typename T> stable_hash hashArbitaryType(const T &V) {
43     return hash_combine(V);
44   }
45 
46   stable_hash hashType(Type *ValueType) {
47     SmallVector<stable_hash> Hashes;
48     Hashes.emplace_back(ValueType->getTypeID());
49     if (ValueType->isIntegerTy())
50       Hashes.emplace_back(ValueType->getIntegerBitWidth());
51     return stable_hash_combine(Hashes);
52   }
53 
54 public:
55   StructuralHashImpl() = delete;
56   explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {}
57 
58   stable_hash hashConstant(Constant *C) {
59     SmallVector<stable_hash> Hashes;
60     // TODO: hashArbitaryType() is not stable.
61     if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) {
62       Hashes.emplace_back(hashArbitaryType(ConstInt->getValue()));
63     } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) {
64       Hashes.emplace_back(hashArbitaryType(ConstFP->getValue()));
65     } else if (Function *Func = dyn_cast<Function>(C)) {
66       // Hashing the name will be deterministic as LLVM's hashing infrastructure
67       // has explicit support for hashing strings and will not simply hash
68       // the pointer.
69       Hashes.emplace_back(hashArbitaryType(Func->getName()));
70     }
71 
72     return stable_hash_combine(Hashes);
73   }
74 
75   stable_hash hashValue(Value *V) {
76     // Check constant and return its hash.
77     Constant *C = dyn_cast<Constant>(V);
78     if (C)
79       return hashConstant(C);
80 
81     // Hash argument number.
82     SmallVector<stable_hash> Hashes;
83     if (Argument *Arg = dyn_cast<Argument>(V))
84       Hashes.emplace_back(Arg->getArgNo());
85 
86     return stable_hash_combine(Hashes);
87   }
88 
89   stable_hash hashOperand(Value *Operand) {
90     SmallVector<stable_hash> Hashes;
91     Hashes.emplace_back(hashType(Operand->getType()));
92     Hashes.emplace_back(hashValue(Operand));
93     return stable_hash_combine(Hashes);
94   }
95 
96   stable_hash hashInstruction(const Instruction &Inst) {
97     SmallVector<stable_hash> Hashes;
98     Hashes.emplace_back(Inst.getOpcode());
99 
100     if (!DetailedHash)
101       return stable_hash_combine(Hashes);
102 
103     Hashes.emplace_back(hashType(Inst.getType()));
104 
105     // Handle additional properties of specific instructions that cause
106     // semantic differences in the IR.
107     if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
108       Hashes.emplace_back(ComparisonInstruction->getPredicate());
109 
110     for (const auto &Op : Inst.operands())
111       Hashes.emplace_back(hashOperand(Op));
112 
113     return stable_hash_combine(Hashes);
114   }
115 
116   // A function hash is calculated by considering only the number of arguments
117   // and whether a function is varargs, the order of basic blocks (given by the
118   // successors of each basic block in depth first order), and the order of
119   // opcodes of each instruction within each of these basic blocks. This mirrors
120   // the strategy FunctionComparator::compare() uses to compare functions by
121   // walking the BBs in depth first order and comparing each instruction in
122   // sequence. Because this hash currently does not look at the operands, it is
123   // insensitive to things such as the target of calls and the constants used in
124   // the function, which makes it useful when possibly merging functions which
125   // are the same modulo constants and call targets.
126   //
127   // Note that different users of StructuralHash will want different behavior
128   // out of it (i.e., MergeFunctions will want something different from PM
129   // expensive checks for pass modification status). When modifying this
130   // function, most changes should be gated behind an option and enabled
131   // selectively.
132   void update(const Function &F) {
133     // Declarations don't affect analyses.
134     if (F.isDeclaration())
135       return;
136 
137     SmallVector<stable_hash> Hashes;
138     Hashes.emplace_back(Hash);
139     Hashes.emplace_back(FunctionHeaderHash);
140 
141     Hashes.emplace_back(F.isVarArg());
142     Hashes.emplace_back(F.arg_size());
143 
144     SmallVector<const BasicBlock *, 8> BBs;
145     SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
146 
147     // Walk the blocks in the same order as
148     // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
149     // function "structure." (BB and opcode sequence)
150     BBs.push_back(&F.getEntryBlock());
151     VisitedBBs.insert(BBs[0]);
152     while (!BBs.empty()) {
153       const BasicBlock *BB = BBs.pop_back_val();
154 
155       Hashes.emplace_back(BlockHeaderHash);
156       for (auto &Inst : *BB)
157         Hashes.emplace_back(hashInstruction(Inst));
158 
159       for (const BasicBlock *Succ : successors(BB))
160         if (VisitedBBs.insert(Succ).second)
161           BBs.push_back(Succ);
162     }
163 
164     // Update the combined hash in place.
165     Hash = stable_hash_combine(Hashes);
166   }
167 
168   void update(const GlobalVariable &GV) {
169     // Declarations and used/compiler.used don't affect analyses.
170     // Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
171     // we ignore anything with the `.llvm` prefix
172     if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
173       return;
174     SmallVector<stable_hash> Hashes;
175     Hashes.emplace_back(Hash);
176     Hashes.emplace_back(GlobalHeaderHash);
177     Hashes.emplace_back(GV.getValueType()->getTypeID());
178 
179     // Update the combined hash in place.
180     Hash = stable_hash_combine(Hashes);
181   }
182 
183   void update(const Module &M) {
184     for (const GlobalVariable &GV : M.globals())
185       update(GV);
186     for (const Function &F : M)
187       update(F);
188   }
189 
190   uint64_t getHash() const { return Hash; }
191 };
192 
193 } // namespace
194 
195 stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
196   StructuralHashImpl H(DetailedHash);
197   H.update(F);
198   return H.getHash();
199 }
200 
201 stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
202   StructuralHashImpl H(DetailedHash);
203   H.update(M);
204   return H.getHash();
205 }
206