xref: /freebsd-src/contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1 //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/IR/StructuralHash.h"
10 #include "llvm/ADT/Hashing.h"
11 #include "llvm/IR/Function.h"
12 #include "llvm/IR/GlobalVariable.h"
13 #include "llvm/IR/InstrTypes.h"
14 #include "llvm/IR/Instructions.h"
15 #include "llvm/IR/IntrinsicInst.h"
16 #include "llvm/IR/Module.h"
17 
18 using namespace llvm;
19 
20 namespace {
21 
22 // Basic hashing mechanism to detect structural change to the IR, used to verify
23 // pass return status consistency with actual change. In addition to being used
24 // by the MergeFunctions pass.
25 
26 class StructuralHashImpl {
27   uint64_t Hash;
28 
29   void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
30 
31   // This will produce different values on 32-bit and 64-bit systens as
32   // hash_combine returns a size_t. However, this is only used for
33   // detailed hashing which, in-tree, only needs to distinguish between
34   // differences in functions.
35   template <typename T> void hashArbitaryType(const T &V) {
36     hash(hash_combine(V));
37   }
38 
39   void hashType(Type *ValueType) {
40     hash(ValueType->getTypeID());
41     if (ValueType->isIntegerTy())
42       hash(ValueType->getIntegerBitWidth());
43   }
44 
45 public:
46   StructuralHashImpl() : Hash(4) {}
47 
48   void updateOperand(Value *Operand) {
49     hashType(Operand->getType());
50 
51     // The cases enumerated below are not exhaustive and are only aimed to
52     // get decent coverage over the function.
53     if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
54       hashArbitaryType(ConstInt->getValue());
55     } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
56       hashArbitaryType(ConstFP->getValue());
57     } else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
58       hash(Arg->getArgNo());
59     } else if (Function *Func = dyn_cast<Function>(Operand)) {
60       // Hashing the name will be deterministic as LLVM's hashing infrastructure
61       // has explicit support for hashing strings and will not simply hash
62       // the pointer.
63       hashArbitaryType(Func->getName());
64     }
65   }
66 
67   void updateInstruction(const Instruction &Inst, bool DetailedHash) {
68     hash(Inst.getOpcode());
69 
70     if (!DetailedHash)
71       return;
72 
73     hashType(Inst.getType());
74 
75     // Handle additional properties of specific instructions that cause
76     // semantic differences in the IR.
77     if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
78       hash(ComparisonInstruction->getPredicate());
79 
80     for (const auto &Op : Inst.operands())
81       updateOperand(Op);
82   }
83 
84   // A function hash is calculated by considering only the number of arguments
85   // and whether a function is varargs, the order of basic blocks (given by the
86   // successors of each basic block in depth first order), and the order of
87   // opcodes of each instruction within each of these basic blocks. This mirrors
88   // the strategy FunctionComparator::compare() uses to compare functions by
89   // walking the BBs in depth first order and comparing each instruction in
90   // sequence. Because this hash currently does not look at the operands, it is
91   // insensitive to things such as the target of calls and the constants used in
92   // the function, which makes it useful when possibly merging functions which
93   // are the same modulo constants and call targets.
94   //
95   // Note that different users of StructuralHash will want different behavior
96   // out of it (i.e., MergeFunctions will want something different from PM
97   // expensive checks for pass modification status). When modifying this
98   // function, most changes should be gated behind an option and enabled
99   // selectively.
100   void update(const Function &F, bool DetailedHash) {
101     // Declarations don't affect analyses.
102     if (F.isDeclaration())
103       return;
104 
105     hash(0x62642d6b6b2d6b72); // Function header
106 
107     hash(F.isVarArg());
108     hash(F.arg_size());
109 
110     SmallVector<const BasicBlock *, 8> BBs;
111     SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
112 
113     // Walk the blocks in the same order as
114     // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
115     // function "structure." (BB and opcode sequence)
116     BBs.push_back(&F.getEntryBlock());
117     VisitedBBs.insert(BBs[0]);
118     while (!BBs.empty()) {
119       const BasicBlock *BB = BBs.pop_back_val();
120 
121       // This random value acts as a block header, as otherwise the partition of
122       // opcodes into BBs wouldn't affect the hash, only the order of the
123       // opcodes
124       hash(45798);
125       for (auto &Inst : *BB)
126         updateInstruction(Inst, DetailedHash);
127 
128       const Instruction *Term = BB->getTerminator();
129       for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
130         if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
131           continue;
132         BBs.push_back(Term->getSuccessor(i));
133       }
134     }
135   }
136 
137   void update(const GlobalVariable &GV) {
138     // Declarations and used/compiler.used don't affect analyses.
139     // Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
140     // we ignore anything with the `.llvm` prefix
141     if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
142       return;
143     hash(23456); // Global header
144     hash(GV.getValueType()->getTypeID());
145   }
146 
147   void update(const Module &M, bool DetailedHash) {
148     for (const GlobalVariable &GV : M.globals())
149       update(GV);
150     for (const Function &F : M)
151       update(F, DetailedHash);
152   }
153 
154   uint64_t getHash() const { return Hash; }
155 };
156 
157 } // namespace
158 
159 IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
160   StructuralHashImpl H;
161   H.update(F, DetailedHash);
162   return H.getHash();
163 }
164 
165 IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
166   StructuralHashImpl H;
167   H.update(M, DetailedHash);
168   return H.getHash();
169 }
170