xref: /llvm-project/bolt/lib/Core/HashUtilities.cpp (revision eeb987f6f35aa614ed7a555be33d67c5cb1be230)
13e3a926bSspupyrev //===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===//
23e3a926bSspupyrev //
33e3a926bSspupyrev // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
43e3a926bSspupyrev // See https://llvm.org/LICENSE.txt for license information.
53e3a926bSspupyrev // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
63e3a926bSspupyrev //
73e3a926bSspupyrev //===----------------------------------------------------------------------===//
83e3a926bSspupyrev //
93e3a926bSspupyrev // Computation of hash values over BinaryFunction and BinaryBasicBlock.
103e3a926bSspupyrev //
113e3a926bSspupyrev //===----------------------------------------------------------------------===//
123e3a926bSspupyrev 
133e3a926bSspupyrev #include "bolt/Core/HashUtilities.h"
143e3a926bSspupyrev #include "bolt/Core/BinaryContext.h"
15131eb305SShaw Young #include "bolt/Utils/NameResolver.h"
166fcb91b2SAmir Ayupov #include "llvm/MC/MCInstPrinter.h"
173e3a926bSspupyrev 
183e3a926bSspupyrev namespace llvm {
193e3a926bSspupyrev namespace bolt {
203e3a926bSspupyrev 
213e3a926bSspupyrev std::string hashInteger(uint64_t Value) {
223e3a926bSspupyrev   std::string HashString;
233e3a926bSspupyrev   if (Value == 0)
243e3a926bSspupyrev     HashString.push_back(0);
253e3a926bSspupyrev 
263e3a926bSspupyrev   while (Value) {
273e3a926bSspupyrev     uint8_t LSB = Value & 0xff;
283e3a926bSspupyrev     HashString.push_back(LSB);
293e3a926bSspupyrev     Value >>= 8;
303e3a926bSspupyrev   }
313e3a926bSspupyrev 
323e3a926bSspupyrev   return HashString;
333e3a926bSspupyrev }
343e3a926bSspupyrev 
353e3a926bSspupyrev std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
363e3a926bSspupyrev   std::string HashString;
373e3a926bSspupyrev 
383e3a926bSspupyrev   // Ignore function references.
393e3a926bSspupyrev   if (BC.getFunctionForSymbol(&Symbol))
403e3a926bSspupyrev     return HashString;
413e3a926bSspupyrev 
423e3a926bSspupyrev   llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
433e3a926bSspupyrev   if (!ErrorOrValue)
443e3a926bSspupyrev     return HashString;
453e3a926bSspupyrev 
463e3a926bSspupyrev   // Ignore jump table references.
473e3a926bSspupyrev   if (BC.getJumpTableContainingAddress(*ErrorOrValue))
483e3a926bSspupyrev     return HashString;
493e3a926bSspupyrev 
503e3a926bSspupyrev   return HashString.append(hashInteger(*ErrorOrValue));
513e3a926bSspupyrev }
523e3a926bSspupyrev 
533e3a926bSspupyrev std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
543e3a926bSspupyrev   switch (Expr.getKind()) {
553e3a926bSspupyrev   case MCExpr::Constant:
563e3a926bSspupyrev     return hashInteger(cast<MCConstantExpr>(Expr).getValue());
573e3a926bSspupyrev   case MCExpr::SymbolRef:
583e3a926bSspupyrev     return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
593e3a926bSspupyrev   case MCExpr::Unary: {
603e3a926bSspupyrev     const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
613e3a926bSspupyrev     return hashInteger(UnaryExpr.getOpcode())
623e3a926bSspupyrev         .append(hashExpr(BC, *UnaryExpr.getSubExpr()));
633e3a926bSspupyrev   }
643e3a926bSspupyrev   case MCExpr::Binary: {
653e3a926bSspupyrev     const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
663e3a926bSspupyrev     return hashExpr(BC, *BinaryExpr.getLHS())
673e3a926bSspupyrev         .append(hashInteger(BinaryExpr.getOpcode()))
683e3a926bSspupyrev         .append(hashExpr(BC, *BinaryExpr.getRHS()));
693e3a926bSspupyrev   }
703e3a926bSspupyrev   case MCExpr::Target:
713e3a926bSspupyrev     return std::string();
723e3a926bSspupyrev   }
733e3a926bSspupyrev 
743e3a926bSspupyrev   llvm_unreachable("invalid expression kind");
753e3a926bSspupyrev }
763e3a926bSspupyrev 
773e3a926bSspupyrev std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
783e3a926bSspupyrev   if (Operand.isImm())
793e3a926bSspupyrev     return hashInteger(Operand.getImm());
803e3a926bSspupyrev   if (Operand.isReg())
813e3a926bSspupyrev     return hashInteger(Operand.getReg());
823e3a926bSspupyrev   if (Operand.isExpr())
833e3a926bSspupyrev     return hashExpr(BC, *Operand.getExpr());
843e3a926bSspupyrev 
853e3a926bSspupyrev   return std::string();
863e3a926bSspupyrev }
873e3a926bSspupyrev 
883e3a926bSspupyrev std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,
893e3a926bSspupyrev                       OperandHashFuncTy OperandHashFunc) {
903e3a926bSspupyrev   const bool IsX86 = BC.isX86();
913e3a926bSspupyrev 
923e3a926bSspupyrev   // The hash is computed by creating a string of all instruction opcodes and
933e3a926bSspupyrev   // possibly their operands and then hashing that string with std::hash.
943e3a926bSspupyrev   std::string HashString;
953e3a926bSspupyrev 
963e3a926bSspupyrev   for (const MCInst &Inst : BB) {
973e3a926bSspupyrev     if (BC.MIB->isPseudo(Inst))
983e3a926bSspupyrev       continue;
993e3a926bSspupyrev 
1003e3a926bSspupyrev     unsigned Opcode = Inst.getOpcode();
1013e3a926bSspupyrev 
1023e3a926bSspupyrev     // Ignore unconditional jumps since we check CFG consistency by processing
1033e3a926bSspupyrev     // basic blocks in order and do not rely on branches to be in-sync with
1043e3a926bSspupyrev     // CFG. Note that we still use condition code of conditional jumps.
1053e3a926bSspupyrev     if (BC.MIB->isUnconditionalBranch(Inst))
1063e3a926bSspupyrev       continue;
1073e3a926bSspupyrev 
1083e3a926bSspupyrev     if (IsX86 && BC.MIB->isConditionalBranch(Inst))
1093e3a926bSspupyrev       Opcode = BC.MIB->getShortBranchOpcode(Opcode);
1103e3a926bSspupyrev 
1116fcb91b2SAmir Ayupov     if (Opcode == 0) {
1123e3a926bSspupyrev       HashString.push_back(0);
1136fcb91b2SAmir Ayupov     } else {
1146fcb91b2SAmir Ayupov       StringRef OpcodeName = BC.InstPrinter->getOpcodeName(Opcode);
1156fcb91b2SAmir Ayupov       HashString.append(OpcodeName.str());
1163e3a926bSspupyrev     }
1173e3a926bSspupyrev 
1183e3a926bSspupyrev     for (const MCOperand &Op : MCPlus::primeOperands(Inst))
1193e3a926bSspupyrev       HashString.append(OperandHashFunc(Op));
1203e3a926bSspupyrev   }
1213e3a926bSspupyrev   return HashString;
1223e3a926bSspupyrev }
1233e3a926bSspupyrev 
1241256ef27Sspupyrev /// A "loose" hash of a basic block to use with the stale profile matching. The
1251256ef27Sspupyrev /// computed value will be the same for blocks with minor changes (such as
1261256ef27Sspupyrev /// reordering of instructions or using different operands) but may result in
1271256ef27Sspupyrev /// collisions that need to be resolved by a stronger hashing.
1281256ef27Sspupyrev std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) {
1291256ef27Sspupyrev   // The hash is computed by creating a string of all lexicographically ordered
1301256ef27Sspupyrev   // instruction opcodes, which is then hashed with std::hash.
1311256ef27Sspupyrev   std::set<std::string> Opcodes;
1321256ef27Sspupyrev   for (const MCInst &Inst : BB) {
13342da84fdSspupyrev     // Skip pseudo instructions and nops.
13442da84fdSspupyrev     if (BC.MIB->isPseudo(Inst) || BC.MIB->isNoop(Inst))
1351256ef27Sspupyrev       continue;
1361256ef27Sspupyrev 
1371256ef27Sspupyrev     // Ignore unconditional jumps, as they can be added / removed as a result
1381256ef27Sspupyrev     // of basic block reordering.
1391256ef27Sspupyrev     if (BC.MIB->isUnconditionalBranch(Inst))
1401256ef27Sspupyrev       continue;
1411256ef27Sspupyrev 
1421256ef27Sspupyrev     // Do not distinguish different types of conditional jumps.
1431256ef27Sspupyrev     if (BC.MIB->isConditionalBranch(Inst)) {
1441256ef27Sspupyrev       Opcodes.insert("JMP");
1451256ef27Sspupyrev       continue;
1461256ef27Sspupyrev     }
1471256ef27Sspupyrev 
148*eeb987f6SSergei Barannikov     std::string Mnemonic = BC.InstPrinter->getMnemonic(Inst).first;
149eab5d337SKazu Hirata     llvm::erase_if(Mnemonic, [](unsigned char ch) { return std::isspace(ch); });
1501256ef27Sspupyrev     Opcodes.insert(Mnemonic);
1511256ef27Sspupyrev   }
1521256ef27Sspupyrev 
1531256ef27Sspupyrev   std::string HashString;
1541256ef27Sspupyrev   for (const std::string &Opcode : Opcodes)
1551256ef27Sspupyrev     HashString.append(Opcode);
1561256ef27Sspupyrev   return HashString;
1571256ef27Sspupyrev }
1581256ef27Sspupyrev 
159131eb305SShaw Young /// An even looser hash level relative to $ hashBlockLoose to use with stale
160131eb305SShaw Young /// profile matching, composed of the names of a block's called functions in
161131eb305SShaw Young /// lexicographic order.
162131eb305SShaw Young std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB) {
163131eb305SShaw Young   // The hash is computed by creating a string of all lexicographically ordered
164131eb305SShaw Young   // called function names.
165131eb305SShaw Young   std::vector<std::string> FunctionNames;
166131eb305SShaw Young   for (const MCInst &Instr : BB) {
167131eb305SShaw Young     // Skip non-call instructions.
168131eb305SShaw Young     if (!BC.MIB->isCall(Instr))
169131eb305SShaw Young       continue;
170131eb305SShaw Young     const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
171131eb305SShaw Young     if (!CallSymbol)
172131eb305SShaw Young       continue;
173131eb305SShaw Young     FunctionNames.push_back(std::string(CallSymbol->getName()));
174131eb305SShaw Young   }
175131eb305SShaw Young   std::sort(FunctionNames.begin(), FunctionNames.end());
176131eb305SShaw Young   std::string HashString;
177131eb305SShaw Young   for (const std::string &FunctionName : FunctionNames)
178131eb305SShaw Young     HashString.append(FunctionName);
179131eb305SShaw Young 
180131eb305SShaw Young   return HashString;
181131eb305SShaw Young }
182131eb305SShaw Young 
183131eb305SShaw Young /// The same as the $hashBlockCalls function, but for profiled functions.
184131eb305SShaw Young std::string
185131eb305SShaw Young hashBlockCalls(const DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *>
186131eb305SShaw Young                    &IdToYamlFunction,
187131eb305SShaw Young                const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
188131eb305SShaw Young   std::vector<std::string> FunctionNames;
189131eb305SShaw Young   for (const yaml::bolt::CallSiteInfo &CallSiteInfo : YamlBB.CallSites) {
190131eb305SShaw Young     auto It = IdToYamlFunction.find(CallSiteInfo.DestId);
191131eb305SShaw Young     if (It == IdToYamlFunction.end())
192131eb305SShaw Young       continue;
193131eb305SShaw Young     StringRef Name = NameResolver::dropNumNames(It->second->Name);
194131eb305SShaw Young     FunctionNames.push_back(std::string(Name));
195131eb305SShaw Young   }
196131eb305SShaw Young   std::sort(FunctionNames.begin(), FunctionNames.end());
197131eb305SShaw Young   std::string HashString;
198131eb305SShaw Young   for (const std::string &FunctionName : FunctionNames)
199131eb305SShaw Young     HashString.append(FunctionName);
200131eb305SShaw Young 
201131eb305SShaw Young   return HashString;
202131eb305SShaw Young }
203131eb305SShaw Young 
2043e3a926bSspupyrev } // namespace bolt
2053e3a926bSspupyrev } // namespace llvm
206