13e3a926bSspupyrev //===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===// 23e3a926bSspupyrev // 33e3a926bSspupyrev // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 43e3a926bSspupyrev // See https://llvm.org/LICENSE.txt for license information. 53e3a926bSspupyrev // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 63e3a926bSspupyrev // 73e3a926bSspupyrev //===----------------------------------------------------------------------===// 83e3a926bSspupyrev // 93e3a926bSspupyrev // Computation of hash values over BinaryFunction and BinaryBasicBlock. 103e3a926bSspupyrev // 113e3a926bSspupyrev //===----------------------------------------------------------------------===// 123e3a926bSspupyrev 133e3a926bSspupyrev #include "bolt/Core/HashUtilities.h" 143e3a926bSspupyrev #include "bolt/Core/BinaryContext.h" 15131eb305SShaw Young #include "bolt/Utils/NameResolver.h" 166fcb91b2SAmir Ayupov #include "llvm/MC/MCInstPrinter.h" 173e3a926bSspupyrev 183e3a926bSspupyrev namespace llvm { 193e3a926bSspupyrev namespace bolt { 203e3a926bSspupyrev 213e3a926bSspupyrev std::string hashInteger(uint64_t Value) { 223e3a926bSspupyrev std::string HashString; 233e3a926bSspupyrev if (Value == 0) 243e3a926bSspupyrev HashString.push_back(0); 253e3a926bSspupyrev 263e3a926bSspupyrev while (Value) { 273e3a926bSspupyrev uint8_t LSB = Value & 0xff; 283e3a926bSspupyrev HashString.push_back(LSB); 293e3a926bSspupyrev Value >>= 8; 303e3a926bSspupyrev } 313e3a926bSspupyrev 323e3a926bSspupyrev return HashString; 333e3a926bSspupyrev } 343e3a926bSspupyrev 353e3a926bSspupyrev std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) { 363e3a926bSspupyrev std::string HashString; 373e3a926bSspupyrev 383e3a926bSspupyrev // Ignore function references. 393e3a926bSspupyrev if (BC.getFunctionForSymbol(&Symbol)) 403e3a926bSspupyrev return HashString; 413e3a926bSspupyrev 423e3a926bSspupyrev llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol); 433e3a926bSspupyrev if (!ErrorOrValue) 443e3a926bSspupyrev return HashString; 453e3a926bSspupyrev 463e3a926bSspupyrev // Ignore jump table references. 473e3a926bSspupyrev if (BC.getJumpTableContainingAddress(*ErrorOrValue)) 483e3a926bSspupyrev return HashString; 493e3a926bSspupyrev 503e3a926bSspupyrev return HashString.append(hashInteger(*ErrorOrValue)); 513e3a926bSspupyrev } 523e3a926bSspupyrev 533e3a926bSspupyrev std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) { 543e3a926bSspupyrev switch (Expr.getKind()) { 553e3a926bSspupyrev case MCExpr::Constant: 563e3a926bSspupyrev return hashInteger(cast<MCConstantExpr>(Expr).getValue()); 573e3a926bSspupyrev case MCExpr::SymbolRef: 583e3a926bSspupyrev return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol()); 593e3a926bSspupyrev case MCExpr::Unary: { 603e3a926bSspupyrev const auto &UnaryExpr = cast<MCUnaryExpr>(Expr); 613e3a926bSspupyrev return hashInteger(UnaryExpr.getOpcode()) 623e3a926bSspupyrev .append(hashExpr(BC, *UnaryExpr.getSubExpr())); 633e3a926bSspupyrev } 643e3a926bSspupyrev case MCExpr::Binary: { 653e3a926bSspupyrev const auto &BinaryExpr = cast<MCBinaryExpr>(Expr); 663e3a926bSspupyrev return hashExpr(BC, *BinaryExpr.getLHS()) 673e3a926bSspupyrev .append(hashInteger(BinaryExpr.getOpcode())) 683e3a926bSspupyrev .append(hashExpr(BC, *BinaryExpr.getRHS())); 693e3a926bSspupyrev } 703e3a926bSspupyrev case MCExpr::Target: 713e3a926bSspupyrev return std::string(); 723e3a926bSspupyrev } 733e3a926bSspupyrev 743e3a926bSspupyrev llvm_unreachable("invalid expression kind"); 753e3a926bSspupyrev } 763e3a926bSspupyrev 773e3a926bSspupyrev std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) { 783e3a926bSspupyrev if (Operand.isImm()) 793e3a926bSspupyrev return hashInteger(Operand.getImm()); 803e3a926bSspupyrev if (Operand.isReg()) 813e3a926bSspupyrev return hashInteger(Operand.getReg()); 823e3a926bSspupyrev if (Operand.isExpr()) 833e3a926bSspupyrev return hashExpr(BC, *Operand.getExpr()); 843e3a926bSspupyrev 853e3a926bSspupyrev return std::string(); 863e3a926bSspupyrev } 873e3a926bSspupyrev 883e3a926bSspupyrev std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB, 893e3a926bSspupyrev OperandHashFuncTy OperandHashFunc) { 903e3a926bSspupyrev const bool IsX86 = BC.isX86(); 913e3a926bSspupyrev 923e3a926bSspupyrev // The hash is computed by creating a string of all instruction opcodes and 933e3a926bSspupyrev // possibly their operands and then hashing that string with std::hash. 943e3a926bSspupyrev std::string HashString; 953e3a926bSspupyrev 963e3a926bSspupyrev for (const MCInst &Inst : BB) { 973e3a926bSspupyrev if (BC.MIB->isPseudo(Inst)) 983e3a926bSspupyrev continue; 993e3a926bSspupyrev 1003e3a926bSspupyrev unsigned Opcode = Inst.getOpcode(); 1013e3a926bSspupyrev 1023e3a926bSspupyrev // Ignore unconditional jumps since we check CFG consistency by processing 1033e3a926bSspupyrev // basic blocks in order and do not rely on branches to be in-sync with 1043e3a926bSspupyrev // CFG. Note that we still use condition code of conditional jumps. 1053e3a926bSspupyrev if (BC.MIB->isUnconditionalBranch(Inst)) 1063e3a926bSspupyrev continue; 1073e3a926bSspupyrev 1083e3a926bSspupyrev if (IsX86 && BC.MIB->isConditionalBranch(Inst)) 1093e3a926bSspupyrev Opcode = BC.MIB->getShortBranchOpcode(Opcode); 1103e3a926bSspupyrev 1116fcb91b2SAmir Ayupov if (Opcode == 0) { 1123e3a926bSspupyrev HashString.push_back(0); 1136fcb91b2SAmir Ayupov } else { 1146fcb91b2SAmir Ayupov StringRef OpcodeName = BC.InstPrinter->getOpcodeName(Opcode); 1156fcb91b2SAmir Ayupov HashString.append(OpcodeName.str()); 1163e3a926bSspupyrev } 1173e3a926bSspupyrev 1183e3a926bSspupyrev for (const MCOperand &Op : MCPlus::primeOperands(Inst)) 1193e3a926bSspupyrev HashString.append(OperandHashFunc(Op)); 1203e3a926bSspupyrev } 1213e3a926bSspupyrev return HashString; 1223e3a926bSspupyrev } 1233e3a926bSspupyrev 1241256ef27Sspupyrev /// A "loose" hash of a basic block to use with the stale profile matching. The 1251256ef27Sspupyrev /// computed value will be the same for blocks with minor changes (such as 1261256ef27Sspupyrev /// reordering of instructions or using different operands) but may result in 1271256ef27Sspupyrev /// collisions that need to be resolved by a stronger hashing. 1281256ef27Sspupyrev std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) { 1291256ef27Sspupyrev // The hash is computed by creating a string of all lexicographically ordered 1301256ef27Sspupyrev // instruction opcodes, which is then hashed with std::hash. 1311256ef27Sspupyrev std::set<std::string> Opcodes; 1321256ef27Sspupyrev for (const MCInst &Inst : BB) { 13342da84fdSspupyrev // Skip pseudo instructions and nops. 13442da84fdSspupyrev if (BC.MIB->isPseudo(Inst) || BC.MIB->isNoop(Inst)) 1351256ef27Sspupyrev continue; 1361256ef27Sspupyrev 1371256ef27Sspupyrev // Ignore unconditional jumps, as they can be added / removed as a result 1381256ef27Sspupyrev // of basic block reordering. 1391256ef27Sspupyrev if (BC.MIB->isUnconditionalBranch(Inst)) 1401256ef27Sspupyrev continue; 1411256ef27Sspupyrev 1421256ef27Sspupyrev // Do not distinguish different types of conditional jumps. 1431256ef27Sspupyrev if (BC.MIB->isConditionalBranch(Inst)) { 1441256ef27Sspupyrev Opcodes.insert("JMP"); 1451256ef27Sspupyrev continue; 1461256ef27Sspupyrev } 1471256ef27Sspupyrev 148*eeb987f6SSergei Barannikov std::string Mnemonic = BC.InstPrinter->getMnemonic(Inst).first; 149eab5d337SKazu Hirata llvm::erase_if(Mnemonic, [](unsigned char ch) { return std::isspace(ch); }); 1501256ef27Sspupyrev Opcodes.insert(Mnemonic); 1511256ef27Sspupyrev } 1521256ef27Sspupyrev 1531256ef27Sspupyrev std::string HashString; 1541256ef27Sspupyrev for (const std::string &Opcode : Opcodes) 1551256ef27Sspupyrev HashString.append(Opcode); 1561256ef27Sspupyrev return HashString; 1571256ef27Sspupyrev } 1581256ef27Sspupyrev 159131eb305SShaw Young /// An even looser hash level relative to $ hashBlockLoose to use with stale 160131eb305SShaw Young /// profile matching, composed of the names of a block's called functions in 161131eb305SShaw Young /// lexicographic order. 162131eb305SShaw Young std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB) { 163131eb305SShaw Young // The hash is computed by creating a string of all lexicographically ordered 164131eb305SShaw Young // called function names. 165131eb305SShaw Young std::vector<std::string> FunctionNames; 166131eb305SShaw Young for (const MCInst &Instr : BB) { 167131eb305SShaw Young // Skip non-call instructions. 168131eb305SShaw Young if (!BC.MIB->isCall(Instr)) 169131eb305SShaw Young continue; 170131eb305SShaw Young const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr); 171131eb305SShaw Young if (!CallSymbol) 172131eb305SShaw Young continue; 173131eb305SShaw Young FunctionNames.push_back(std::string(CallSymbol->getName())); 174131eb305SShaw Young } 175131eb305SShaw Young std::sort(FunctionNames.begin(), FunctionNames.end()); 176131eb305SShaw Young std::string HashString; 177131eb305SShaw Young for (const std::string &FunctionName : FunctionNames) 178131eb305SShaw Young HashString.append(FunctionName); 179131eb305SShaw Young 180131eb305SShaw Young return HashString; 181131eb305SShaw Young } 182131eb305SShaw Young 183131eb305SShaw Young /// The same as the $hashBlockCalls function, but for profiled functions. 184131eb305SShaw Young std::string 185131eb305SShaw Young hashBlockCalls(const DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *> 186131eb305SShaw Young &IdToYamlFunction, 187131eb305SShaw Young const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { 188131eb305SShaw Young std::vector<std::string> FunctionNames; 189131eb305SShaw Young for (const yaml::bolt::CallSiteInfo &CallSiteInfo : YamlBB.CallSites) { 190131eb305SShaw Young auto It = IdToYamlFunction.find(CallSiteInfo.DestId); 191131eb305SShaw Young if (It == IdToYamlFunction.end()) 192131eb305SShaw Young continue; 193131eb305SShaw Young StringRef Name = NameResolver::dropNumNames(It->second->Name); 194131eb305SShaw Young FunctionNames.push_back(std::string(Name)); 195131eb305SShaw Young } 196131eb305SShaw Young std::sort(FunctionNames.begin(), FunctionNames.end()); 197131eb305SShaw Young std::string HashString; 198131eb305SShaw Young for (const std::string &FunctionName : FunctionNames) 199131eb305SShaw Young HashString.append(FunctionName); 200131eb305SShaw Young 201131eb305SShaw Young return HashString; 202131eb305SShaw Young } 203131eb305SShaw Young 2043e3a926bSspupyrev } // namespace bolt 2053e3a926bSspupyrev } // namespace llvm 206