xref: /llvm-project/bolt/lib/Core/HashUtilities.cpp (revision eeb987f6f35aa614ed7a555be33d67c5cb1be230)
1 //===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Computation of hash values over BinaryFunction and BinaryBasicBlock.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/HashUtilities.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Utils/NameResolver.h"
16 #include "llvm/MC/MCInstPrinter.h"
17 
18 namespace llvm {
19 namespace bolt {
20 
21 std::string hashInteger(uint64_t Value) {
22   std::string HashString;
23   if (Value == 0)
24     HashString.push_back(0);
25 
26   while (Value) {
27     uint8_t LSB = Value & 0xff;
28     HashString.push_back(LSB);
29     Value >>= 8;
30   }
31 
32   return HashString;
33 }
34 
35 std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
36   std::string HashString;
37 
38   // Ignore function references.
39   if (BC.getFunctionForSymbol(&Symbol))
40     return HashString;
41 
42   llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
43   if (!ErrorOrValue)
44     return HashString;
45 
46   // Ignore jump table references.
47   if (BC.getJumpTableContainingAddress(*ErrorOrValue))
48     return HashString;
49 
50   return HashString.append(hashInteger(*ErrorOrValue));
51 }
52 
53 std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
54   switch (Expr.getKind()) {
55   case MCExpr::Constant:
56     return hashInteger(cast<MCConstantExpr>(Expr).getValue());
57   case MCExpr::SymbolRef:
58     return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
59   case MCExpr::Unary: {
60     const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
61     return hashInteger(UnaryExpr.getOpcode())
62         .append(hashExpr(BC, *UnaryExpr.getSubExpr()));
63   }
64   case MCExpr::Binary: {
65     const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
66     return hashExpr(BC, *BinaryExpr.getLHS())
67         .append(hashInteger(BinaryExpr.getOpcode()))
68         .append(hashExpr(BC, *BinaryExpr.getRHS()));
69   }
70   case MCExpr::Target:
71     return std::string();
72   }
73 
74   llvm_unreachable("invalid expression kind");
75 }
76 
77 std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
78   if (Operand.isImm())
79     return hashInteger(Operand.getImm());
80   if (Operand.isReg())
81     return hashInteger(Operand.getReg());
82   if (Operand.isExpr())
83     return hashExpr(BC, *Operand.getExpr());
84 
85   return std::string();
86 }
87 
88 std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,
89                       OperandHashFuncTy OperandHashFunc) {
90   const bool IsX86 = BC.isX86();
91 
92   // The hash is computed by creating a string of all instruction opcodes and
93   // possibly their operands and then hashing that string with std::hash.
94   std::string HashString;
95 
96   for (const MCInst &Inst : BB) {
97     if (BC.MIB->isPseudo(Inst))
98       continue;
99 
100     unsigned Opcode = Inst.getOpcode();
101 
102     // Ignore unconditional jumps since we check CFG consistency by processing
103     // basic blocks in order and do not rely on branches to be in-sync with
104     // CFG. Note that we still use condition code of conditional jumps.
105     if (BC.MIB->isUnconditionalBranch(Inst))
106       continue;
107 
108     if (IsX86 && BC.MIB->isConditionalBranch(Inst))
109       Opcode = BC.MIB->getShortBranchOpcode(Opcode);
110 
111     if (Opcode == 0) {
112       HashString.push_back(0);
113     } else {
114       StringRef OpcodeName = BC.InstPrinter->getOpcodeName(Opcode);
115       HashString.append(OpcodeName.str());
116     }
117 
118     for (const MCOperand &Op : MCPlus::primeOperands(Inst))
119       HashString.append(OperandHashFunc(Op));
120   }
121   return HashString;
122 }
123 
124 /// A "loose" hash of a basic block to use with the stale profile matching. The
125 /// computed value will be the same for blocks with minor changes (such as
126 /// reordering of instructions or using different operands) but may result in
127 /// collisions that need to be resolved by a stronger hashing.
128 std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) {
129   // The hash is computed by creating a string of all lexicographically ordered
130   // instruction opcodes, which is then hashed with std::hash.
131   std::set<std::string> Opcodes;
132   for (const MCInst &Inst : BB) {
133     // Skip pseudo instructions and nops.
134     if (BC.MIB->isPseudo(Inst) || BC.MIB->isNoop(Inst))
135       continue;
136 
137     // Ignore unconditional jumps, as they can be added / removed as a result
138     // of basic block reordering.
139     if (BC.MIB->isUnconditionalBranch(Inst))
140       continue;
141 
142     // Do not distinguish different types of conditional jumps.
143     if (BC.MIB->isConditionalBranch(Inst)) {
144       Opcodes.insert("JMP");
145       continue;
146     }
147 
148     std::string Mnemonic = BC.InstPrinter->getMnemonic(Inst).first;
149     llvm::erase_if(Mnemonic, [](unsigned char ch) { return std::isspace(ch); });
150     Opcodes.insert(Mnemonic);
151   }
152 
153   std::string HashString;
154   for (const std::string &Opcode : Opcodes)
155     HashString.append(Opcode);
156   return HashString;
157 }
158 
159 /// An even looser hash level relative to $ hashBlockLoose to use with stale
160 /// profile matching, composed of the names of a block's called functions in
161 /// lexicographic order.
162 std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB) {
163   // The hash is computed by creating a string of all lexicographically ordered
164   // called function names.
165   std::vector<std::string> FunctionNames;
166   for (const MCInst &Instr : BB) {
167     // Skip non-call instructions.
168     if (!BC.MIB->isCall(Instr))
169       continue;
170     const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
171     if (!CallSymbol)
172       continue;
173     FunctionNames.push_back(std::string(CallSymbol->getName()));
174   }
175   std::sort(FunctionNames.begin(), FunctionNames.end());
176   std::string HashString;
177   for (const std::string &FunctionName : FunctionNames)
178     HashString.append(FunctionName);
179 
180   return HashString;
181 }
182 
183 /// The same as the $hashBlockCalls function, but for profiled functions.
184 std::string
185 hashBlockCalls(const DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *>
186                    &IdToYamlFunction,
187                const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
188   std::vector<std::string> FunctionNames;
189   for (const yaml::bolt::CallSiteInfo &CallSiteInfo : YamlBB.CallSites) {
190     auto It = IdToYamlFunction.find(CallSiteInfo.DestId);
191     if (It == IdToYamlFunction.end())
192       continue;
193     StringRef Name = NameResolver::dropNumNames(It->second->Name);
194     FunctionNames.push_back(std::string(Name));
195   }
196   std::sort(FunctionNames.begin(), FunctionNames.end());
197   std::string HashString;
198   for (const std::string &FunctionName : FunctionNames)
199     HashString.append(FunctionName);
200 
201   return HashString;
202 }
203 
204 } // namespace bolt
205 } // namespace llvm
206