106c3fb27SDimitry Andric //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric
9e8d8bef9SDimitry Andric #include "llvm/IR/StructuralHash.h"
105f757f3fSDimitry Andric #include "llvm/ADT/Hashing.h"
11e8d8bef9SDimitry Andric #include "llvm/IR/Function.h"
1206c3fb27SDimitry Andric #include "llvm/IR/GlobalVariable.h"
135f757f3fSDimitry Andric #include "llvm/IR/InstrTypes.h"
145f757f3fSDimitry Andric #include "llvm/IR/Instructions.h"
155f757f3fSDimitry Andric #include "llvm/IR/IntrinsicInst.h"
16e8d8bef9SDimitry Andric #include "llvm/IR/Module.h"
17e8d8bef9SDimitry Andric
18e8d8bef9SDimitry Andric using namespace llvm;
19e8d8bef9SDimitry Andric
2006c3fb27SDimitry Andric namespace {
21e8d8bef9SDimitry Andric
22e8d8bef9SDimitry Andric // Basic hashing mechanism to detect structural change to the IR, used to verify
235f757f3fSDimitry Andric // pass return status consistency with actual change. In addition to being used
245f757f3fSDimitry Andric // by the MergeFunctions pass.
25e8d8bef9SDimitry Andric
2606c3fb27SDimitry Andric class StructuralHashImpl {
275f757f3fSDimitry Andric uint64_t Hash;
28e8d8bef9SDimitry Andric
hash(uint64_t V)295f757f3fSDimitry Andric void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
305f757f3fSDimitry Andric
315f757f3fSDimitry Andric // This will produce different values on 32-bit and 64-bit systens as
325f757f3fSDimitry Andric // hash_combine returns a size_t. However, this is only used for
335f757f3fSDimitry Andric // detailed hashing which, in-tree, only needs to distinguish between
345f757f3fSDimitry Andric // differences in functions.
hashArbitaryType(const T & V)355f757f3fSDimitry Andric template <typename T> void hashArbitaryType(const T &V) {
365f757f3fSDimitry Andric hash(hash_combine(V));
375f757f3fSDimitry Andric }
385f757f3fSDimitry Andric
hashType(Type * ValueType)395f757f3fSDimitry Andric void hashType(Type *ValueType) {
405f757f3fSDimitry Andric hash(ValueType->getTypeID());
415f757f3fSDimitry Andric if (ValueType->isIntegerTy())
425f757f3fSDimitry Andric hash(ValueType->getIntegerBitWidth());
435f757f3fSDimitry Andric }
44e8d8bef9SDimitry Andric
45e8d8bef9SDimitry Andric public:
StructuralHashImpl()4606c3fb27SDimitry Andric StructuralHashImpl() : Hash(4) {}
47e8d8bef9SDimitry Andric
updateOperand(Value * Operand)485f757f3fSDimitry Andric void updateOperand(Value *Operand) {
495f757f3fSDimitry Andric hashType(Operand->getType());
505f757f3fSDimitry Andric
515f757f3fSDimitry Andric // The cases enumerated below are not exhaustive and are only aimed to
525f757f3fSDimitry Andric // get decent coverage over the function.
535f757f3fSDimitry Andric if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
545f757f3fSDimitry Andric hashArbitaryType(ConstInt->getValue());
555f757f3fSDimitry Andric } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
565f757f3fSDimitry Andric hashArbitaryType(ConstFP->getValue());
575f757f3fSDimitry Andric } else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
585f757f3fSDimitry Andric hash(Arg->getArgNo());
595f757f3fSDimitry Andric } else if (Function *Func = dyn_cast<Function>(Operand)) {
605f757f3fSDimitry Andric // Hashing the name will be deterministic as LLVM's hashing infrastructure
615f757f3fSDimitry Andric // has explicit support for hashing strings and will not simply hash
625f757f3fSDimitry Andric // the pointer.
635f757f3fSDimitry Andric hashArbitaryType(Func->getName());
645f757f3fSDimitry Andric }
655f757f3fSDimitry Andric }
665f757f3fSDimitry Andric
updateInstruction(const Instruction & Inst,bool DetailedHash)675f757f3fSDimitry Andric void updateInstruction(const Instruction &Inst, bool DetailedHash) {
685f757f3fSDimitry Andric hash(Inst.getOpcode());
695f757f3fSDimitry Andric
705f757f3fSDimitry Andric if (!DetailedHash)
715f757f3fSDimitry Andric return;
725f757f3fSDimitry Andric
735f757f3fSDimitry Andric hashType(Inst.getType());
745f757f3fSDimitry Andric
755f757f3fSDimitry Andric // Handle additional properties of specific instructions that cause
765f757f3fSDimitry Andric // semantic differences in the IR.
775f757f3fSDimitry Andric if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
785f757f3fSDimitry Andric hash(ComparisonInstruction->getPredicate());
795f757f3fSDimitry Andric
805f757f3fSDimitry Andric for (const auto &Op : Inst.operands())
815f757f3fSDimitry Andric updateOperand(Op);
825f757f3fSDimitry Andric }
835f757f3fSDimitry Andric
845f757f3fSDimitry Andric // A function hash is calculated by considering only the number of arguments
855f757f3fSDimitry Andric // and whether a function is varargs, the order of basic blocks (given by the
865f757f3fSDimitry Andric // successors of each basic block in depth first order), and the order of
875f757f3fSDimitry Andric // opcodes of each instruction within each of these basic blocks. This mirrors
885f757f3fSDimitry Andric // the strategy FunctionComparator::compare() uses to compare functions by
895f757f3fSDimitry Andric // walking the BBs in depth first order and comparing each instruction in
905f757f3fSDimitry Andric // sequence. Because this hash currently does not look at the operands, it is
915f757f3fSDimitry Andric // insensitive to things such as the target of calls and the constants used in
925f757f3fSDimitry Andric // the function, which makes it useful when possibly merging functions which
935f757f3fSDimitry Andric // are the same modulo constants and call targets.
945f757f3fSDimitry Andric //
955f757f3fSDimitry Andric // Note that different users of StructuralHash will want different behavior
965f757f3fSDimitry Andric // out of it (i.e., MergeFunctions will want something different from PM
975f757f3fSDimitry Andric // expensive checks for pass modification status). When modifying this
985f757f3fSDimitry Andric // function, most changes should be gated behind an option and enabled
995f757f3fSDimitry Andric // selectively.
update(const Function & F,bool DetailedHash)1005f757f3fSDimitry Andric void update(const Function &F, bool DetailedHash) {
10106c3fb27SDimitry Andric // Declarations don't affect analyses.
10206c3fb27SDimitry Andric if (F.isDeclaration())
103e8d8bef9SDimitry Andric return;
104e8d8bef9SDimitry Andric
1055f757f3fSDimitry Andric hash(0x62642d6b6b2d6b72); // Function header
10606c3fb27SDimitry Andric
10706c3fb27SDimitry Andric hash(F.isVarArg());
10806c3fb27SDimitry Andric hash(F.arg_size());
109e8d8bef9SDimitry Andric
110e8d8bef9SDimitry Andric SmallVector<const BasicBlock *, 8> BBs;
111e8d8bef9SDimitry Andric SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
112e8d8bef9SDimitry Andric
1135f757f3fSDimitry Andric // Walk the blocks in the same order as
1145f757f3fSDimitry Andric // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
1155f757f3fSDimitry Andric // function "structure." (BB and opcode sequence)
116e8d8bef9SDimitry Andric BBs.push_back(&F.getEntryBlock());
117e8d8bef9SDimitry Andric VisitedBBs.insert(BBs[0]);
118e8d8bef9SDimitry Andric while (!BBs.empty()) {
119e8d8bef9SDimitry Andric const BasicBlock *BB = BBs.pop_back_val();
1205f757f3fSDimitry Andric
1215f757f3fSDimitry Andric // This random value acts as a block header, as otherwise the partition of
1225f757f3fSDimitry Andric // opcodes into BBs wouldn't affect the hash, only the order of the
1235f757f3fSDimitry Andric // opcodes
1245f757f3fSDimitry Andric hash(45798);
125e8d8bef9SDimitry Andric for (auto &Inst : *BB)
1265f757f3fSDimitry Andric updateInstruction(Inst, DetailedHash);
127e8d8bef9SDimitry Andric
128*7a6dacacSDimitry Andric for (const BasicBlock *Succ : successors(BB))
129*7a6dacacSDimitry Andric if (VisitedBBs.insert(Succ).second)
130*7a6dacacSDimitry Andric BBs.push_back(Succ);
131e8d8bef9SDimitry Andric }
132e8d8bef9SDimitry Andric }
133e8d8bef9SDimitry Andric
update(const GlobalVariable & GV)13406c3fb27SDimitry Andric void update(const GlobalVariable &GV) {
13506c3fb27SDimitry Andric // Declarations and used/compiler.used don't affect analyses.
13606c3fb27SDimitry Andric // Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
13706c3fb27SDimitry Andric // we ignore anything with the `.llvm` prefix
13806c3fb27SDimitry Andric if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
13906c3fb27SDimitry Andric return;
14006c3fb27SDimitry Andric hash(23456); // Global header
14106c3fb27SDimitry Andric hash(GV.getValueType()->getTypeID());
14206c3fb27SDimitry Andric }
14306c3fb27SDimitry Andric
update(const Module & M,bool DetailedHash)1445f757f3fSDimitry Andric void update(const Module &M, bool DetailedHash) {
14506c3fb27SDimitry Andric for (const GlobalVariable &GV : M.globals())
14606c3fb27SDimitry Andric update(GV);
147e8d8bef9SDimitry Andric for (const Function &F : M)
1485f757f3fSDimitry Andric update(F, DetailedHash);
149e8d8bef9SDimitry Andric }
150e8d8bef9SDimitry Andric
getHash() const151e8d8bef9SDimitry Andric uint64_t getHash() const { return Hash; }
152e8d8bef9SDimitry Andric };
153e8d8bef9SDimitry Andric
15406c3fb27SDimitry Andric } // namespace
155e8d8bef9SDimitry Andric
StructuralHash(const Function & F,bool DetailedHash)1565f757f3fSDimitry Andric IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
15706c3fb27SDimitry Andric StructuralHashImpl H;
1585f757f3fSDimitry Andric H.update(F, DetailedHash);
159e8d8bef9SDimitry Andric return H.getHash();
160e8d8bef9SDimitry Andric }
161e8d8bef9SDimitry Andric
StructuralHash(const Module & M,bool DetailedHash)1625f757f3fSDimitry Andric IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
16306c3fb27SDimitry Andric StructuralHashImpl H;
1645f757f3fSDimitry Andric H.update(M, DetailedHash);
165e8d8bef9SDimitry Andric return H.getHash();
166e8d8bef9SDimitry Andric }
167