1 //===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Computation of hash values over BinaryFunction and BinaryBasicBlock. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/HashUtilities.h" 14 #include "bolt/Core/BinaryContext.h" 15 #include "bolt/Utils/NameResolver.h" 16 #include "llvm/MC/MCInstPrinter.h" 17 18 namespace llvm { 19 namespace bolt { 20 21 std::string hashInteger(uint64_t Value) { 22 std::string HashString; 23 if (Value == 0) 24 HashString.push_back(0); 25 26 while (Value) { 27 uint8_t LSB = Value & 0xff; 28 HashString.push_back(LSB); 29 Value >>= 8; 30 } 31 32 return HashString; 33 } 34 35 std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) { 36 std::string HashString; 37 38 // Ignore function references. 39 if (BC.getFunctionForSymbol(&Symbol)) 40 return HashString; 41 42 llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol); 43 if (!ErrorOrValue) 44 return HashString; 45 46 // Ignore jump table references. 47 if (BC.getJumpTableContainingAddress(*ErrorOrValue)) 48 return HashString; 49 50 return HashString.append(hashInteger(*ErrorOrValue)); 51 } 52 53 std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) { 54 switch (Expr.getKind()) { 55 case MCExpr::Constant: 56 return hashInteger(cast<MCConstantExpr>(Expr).getValue()); 57 case MCExpr::SymbolRef: 58 return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol()); 59 case MCExpr::Unary: { 60 const auto &UnaryExpr = cast<MCUnaryExpr>(Expr); 61 return hashInteger(UnaryExpr.getOpcode()) 62 .append(hashExpr(BC, *UnaryExpr.getSubExpr())); 63 } 64 case MCExpr::Binary: { 65 const auto &BinaryExpr = cast<MCBinaryExpr>(Expr); 66 return hashExpr(BC, *BinaryExpr.getLHS()) 67 .append(hashInteger(BinaryExpr.getOpcode())) 68 .append(hashExpr(BC, *BinaryExpr.getRHS())); 69 } 70 case MCExpr::Target: 71 return std::string(); 72 } 73 74 llvm_unreachable("invalid expression kind"); 75 } 76 77 std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) { 78 if (Operand.isImm()) 79 return hashInteger(Operand.getImm()); 80 if (Operand.isReg()) 81 return hashInteger(Operand.getReg()); 82 if (Operand.isExpr()) 83 return hashExpr(BC, *Operand.getExpr()); 84 85 return std::string(); 86 } 87 88 std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB, 89 OperandHashFuncTy OperandHashFunc) { 90 const bool IsX86 = BC.isX86(); 91 92 // The hash is computed by creating a string of all instruction opcodes and 93 // possibly their operands and then hashing that string with std::hash. 94 std::string HashString; 95 96 for (const MCInst &Inst : BB) { 97 if (BC.MIB->isPseudo(Inst)) 98 continue; 99 100 unsigned Opcode = Inst.getOpcode(); 101 102 // Ignore unconditional jumps since we check CFG consistency by processing 103 // basic blocks in order and do not rely on branches to be in-sync with 104 // CFG. Note that we still use condition code of conditional jumps. 105 if (BC.MIB->isUnconditionalBranch(Inst)) 106 continue; 107 108 if (IsX86 && BC.MIB->isConditionalBranch(Inst)) 109 Opcode = BC.MIB->getShortBranchOpcode(Opcode); 110 111 if (Opcode == 0) { 112 HashString.push_back(0); 113 } else { 114 StringRef OpcodeName = BC.InstPrinter->getOpcodeName(Opcode); 115 HashString.append(OpcodeName.str()); 116 } 117 118 for (const MCOperand &Op : MCPlus::primeOperands(Inst)) 119 HashString.append(OperandHashFunc(Op)); 120 } 121 return HashString; 122 } 123 124 /// A "loose" hash of a basic block to use with the stale profile matching. The 125 /// computed value will be the same for blocks with minor changes (such as 126 /// reordering of instructions or using different operands) but may result in 127 /// collisions that need to be resolved by a stronger hashing. 128 std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) { 129 // The hash is computed by creating a string of all lexicographically ordered 130 // instruction opcodes, which is then hashed with std::hash. 131 std::set<std::string> Opcodes; 132 for (const MCInst &Inst : BB) { 133 // Skip pseudo instructions and nops. 134 if (BC.MIB->isPseudo(Inst) || BC.MIB->isNoop(Inst)) 135 continue; 136 137 // Ignore unconditional jumps, as they can be added / removed as a result 138 // of basic block reordering. 139 if (BC.MIB->isUnconditionalBranch(Inst)) 140 continue; 141 142 // Do not distinguish different types of conditional jumps. 143 if (BC.MIB->isConditionalBranch(Inst)) { 144 Opcodes.insert("JMP"); 145 continue; 146 } 147 148 std::string Mnemonic = BC.InstPrinter->getMnemonic(Inst).first; 149 llvm::erase_if(Mnemonic, [](unsigned char ch) { return std::isspace(ch); }); 150 Opcodes.insert(Mnemonic); 151 } 152 153 std::string HashString; 154 for (const std::string &Opcode : Opcodes) 155 HashString.append(Opcode); 156 return HashString; 157 } 158 159 /// An even looser hash level relative to $ hashBlockLoose to use with stale 160 /// profile matching, composed of the names of a block's called functions in 161 /// lexicographic order. 162 std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB) { 163 // The hash is computed by creating a string of all lexicographically ordered 164 // called function names. 165 std::vector<std::string> FunctionNames; 166 for (const MCInst &Instr : BB) { 167 // Skip non-call instructions. 168 if (!BC.MIB->isCall(Instr)) 169 continue; 170 const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr); 171 if (!CallSymbol) 172 continue; 173 FunctionNames.push_back(std::string(CallSymbol->getName())); 174 } 175 std::sort(FunctionNames.begin(), FunctionNames.end()); 176 std::string HashString; 177 for (const std::string &FunctionName : FunctionNames) 178 HashString.append(FunctionName); 179 180 return HashString; 181 } 182 183 /// The same as the $hashBlockCalls function, but for profiled functions. 184 std::string 185 hashBlockCalls(const DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *> 186 &IdToYamlFunction, 187 const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { 188 std::vector<std::string> FunctionNames; 189 for (const yaml::bolt::CallSiteInfo &CallSiteInfo : YamlBB.CallSites) { 190 auto It = IdToYamlFunction.find(CallSiteInfo.DestId); 191 if (It == IdToYamlFunction.end()) 192 continue; 193 StringRef Name = NameResolver::dropNumNames(It->second->Name); 194 FunctionNames.push_back(std::string(Name)); 195 } 196 std::sort(FunctionNames.begin(), FunctionNames.end()); 197 std::string HashString; 198 for (const std::string &FunctionName : FunctionNames) 199 HashString.append(FunctionName); 200 201 return HashString; 202 } 203 204 } // namespace bolt 205 } // namespace llvm 206