1 //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the InstrBuilder interface. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/MCA/InstrBuilder.h" 15 #include "llvm/ADT/APInt.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/Hashing.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/MC/MCInst.h" 20 #include "llvm/Support/Debug.h" 21 #include "llvm/Support/WithColor.h" 22 #include "llvm/Support/raw_ostream.h" 23 24 #define DEBUG_TYPE "llvm-mca-instrbuilder" 25 26 namespace llvm { 27 namespace mca { 28 29 char RecycledInstErr::ID = 0; 30 31 InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, 32 const llvm::MCInstrInfo &mcii, 33 const llvm::MCRegisterInfo &mri, 34 const llvm::MCInstrAnalysis *mcia, 35 const mca::InstrumentManager &im, unsigned cl) 36 : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true), 37 FirstReturnInst(true), CallLatency(cl) { 38 const MCSchedModel &SM = STI.getSchedModel(); 39 ProcResourceMasks.resize(SM.getNumProcResourceKinds()); 40 computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); 41 } 42 43 static void initializeUsedResources(InstrDesc &ID, 44 const MCSchedClassDesc &SCDesc, 45 const MCSubtargetInfo &STI, 46 ArrayRef<uint64_t> ProcResourceMasks) { 47 const MCSchedModel &SM = STI.getSchedModel(); 48 49 // Populate resources consumed. 50 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>; 51 SmallVector<ResourcePlusCycles, 4> Worklist; 52 53 // Track cycles contributed by resources that are in a "Super" relationship. 54 // This is required if we want to correctly match the behavior of method 55 // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set 56 // of "consumed" processor resources and resource cycles, the logic in 57 // ExpandProcResource() doesn't update the number of resource cycles 58 // contributed by a "Super" resource to a group. 59 // We need to take this into account when we find that a processor resource is 60 // part of a group, and it is also used as the "Super" of other resources. 61 // This map stores the number of cycles contributed by sub-resources that are 62 // part of a "Super" resource. The key value is the "Super" resource mask ID. 63 DenseMap<uint64_t, unsigned> SuperResources; 64 65 unsigned NumProcResources = SM.getNumProcResourceKinds(); 66 APInt Buffers(NumProcResources, 0); 67 68 bool AllInOrderResources = true; 69 bool AnyDispatchHazards = false; 70 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { 71 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I; 72 const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx); 73 if (!PRE->ReleaseAtCycle) { 74 #ifndef NDEBUG 75 WithColor::warning() 76 << "Ignoring invalid write of zero cycles on processor resource " 77 << PR.Name << "\n"; 78 WithColor::note() << "found in scheduling class " << SCDesc.Name 79 << " (write index #" << I << ")\n"; 80 #endif 81 continue; 82 } 83 84 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; 85 if (PR.BufferSize < 0) { 86 AllInOrderResources = false; 87 } else { 88 Buffers.setBit(getResourceStateIndex(Mask)); 89 AnyDispatchHazards |= (PR.BufferSize == 0); 90 AllInOrderResources &= (PR.BufferSize <= 1); 91 } 92 93 CycleSegment RCy(0, PRE->ReleaseAtCycle, false); 94 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy))); 95 if (PR.SuperIdx) { 96 uint64_t Super = ProcResourceMasks[PR.SuperIdx]; 97 SuperResources[Super] += PRE->ReleaseAtCycle; 98 } 99 } 100 101 ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards; 102 103 // Sort elements by mask popcount, so that we prioritize resource units over 104 // resource groups, and smaller groups over larger groups. 105 sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { 106 unsigned popcntA = llvm::popcount(A.first); 107 unsigned popcntB = llvm::popcount(B.first); 108 if (popcntA < popcntB) 109 return true; 110 if (popcntA > popcntB) 111 return false; 112 return A.first < B.first; 113 }); 114 115 uint64_t UsedResourceUnits = 0; 116 uint64_t UsedResourceGroups = 0; 117 uint64_t UnitsFromResourceGroups = 0; 118 119 // Remove cycles contributed by smaller resources, and check if there 120 // are partially overlapping resource groups. 121 ID.HasPartiallyOverlappingGroups = false; 122 123 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { 124 ResourcePlusCycles &A = Worklist[I]; 125 if (!A.second.size()) { 126 assert(llvm::popcount(A.first) > 1 && "Expected a group!"); 127 UsedResourceGroups |= llvm::bit_floor(A.first); 128 continue; 129 } 130 131 ID.Resources.emplace_back(A); 132 uint64_t NormalizedMask = A.first; 133 134 if (llvm::popcount(A.first) == 1) { 135 UsedResourceUnits |= A.first; 136 } else { 137 // Remove the leading 1 from the resource group mask. 138 NormalizedMask ^= llvm::bit_floor(NormalizedMask); 139 if (UnitsFromResourceGroups & NormalizedMask) 140 ID.HasPartiallyOverlappingGroups = true; 141 142 UnitsFromResourceGroups |= NormalizedMask; 143 UsedResourceGroups |= (A.first ^ NormalizedMask); 144 } 145 146 for (unsigned J = I + 1; J < E; ++J) { 147 ResourcePlusCycles &B = Worklist[J]; 148 if ((NormalizedMask & B.first) == NormalizedMask) { 149 B.second.CS.subtract(A.second.size() - SuperResources[A.first]); 150 if (llvm::popcount(B.first) > 1) 151 B.second.NumUnits++; 152 } 153 } 154 } 155 156 // A SchedWrite may specify a number of cycles in which a resource group 157 // is reserved. For example (on target x86; cpu Haswell): 158 // 159 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { 160 // let ReleaseAtCycles = [2, 2, 3]; 161 // } 162 // 163 // This means: 164 // Resource units HWPort0 and HWPort1 are both used for 2cy. 165 // Resource group HWPort01 is the union of HWPort0 and HWPort1. 166 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 167 // will not be usable for 2 entire cycles from instruction issue. 168 // 169 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency 170 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an 171 // extra delay on top of the 2 cycles latency. 172 // During those extra cycles, HWPort01 is not usable by other instructions. 173 for (ResourcePlusCycles &RPC : ID.Resources) { 174 if (llvm::popcount(RPC.first) > 1 && !RPC.second.isReserved()) { 175 // Remove the leading 1 from the resource group mask. 176 uint64_t Mask = RPC.first ^ llvm::bit_floor(RPC.first); 177 uint64_t MaxResourceUnits = llvm::popcount(Mask); 178 if (RPC.second.NumUnits > (unsigned)llvm::popcount(Mask)) { 179 RPC.second.setReserved(); 180 RPC.second.NumUnits = MaxResourceUnits; 181 } 182 } 183 } 184 185 // Identify extra buffers that are consumed through super resources. 186 for (const std::pair<uint64_t, unsigned> &SR : SuperResources) { 187 for (unsigned I = 1, E = NumProcResources; I < E; ++I) { 188 const MCProcResourceDesc &PR = *SM.getProcResource(I); 189 if (PR.BufferSize == -1) 190 continue; 191 192 uint64_t Mask = ProcResourceMasks[I]; 193 if (Mask != SR.first && ((Mask & SR.first) == SR.first)) 194 Buffers.setBit(getResourceStateIndex(Mask)); 195 } 196 } 197 198 ID.UsedBuffers = Buffers.getZExtValue(); 199 ID.UsedProcResUnits = UsedResourceUnits; 200 ID.UsedProcResGroups = UsedResourceGroups; 201 202 LLVM_DEBUG({ 203 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources) 204 dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", " 205 << "Reserved=" << R.second.isReserved() << ", " 206 << "#Units=" << R.second.NumUnits << ", " 207 << "cy=" << R.second.size() << '\n'; 208 uint64_t BufferIDs = ID.UsedBuffers; 209 while (BufferIDs) { 210 uint64_t Current = BufferIDs & (-BufferIDs); 211 dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n'; 212 BufferIDs ^= Current; 213 } 214 dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; 215 dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) 216 << '\n'; 217 dbgs() << "\t\tHasPartiallyOverlappingGroups=" 218 << ID.HasPartiallyOverlappingGroups << '\n'; 219 }); 220 } 221 222 static void computeMaxLatency(InstrDesc &ID, const MCSchedClassDesc &SCDesc, 223 const MCSubtargetInfo &STI, unsigned CallLatency, 224 bool IsCall) { 225 if (IsCall) { 226 // We cannot estimate how long this call will take. 227 // Artificially set an arbitrarily high latency. 228 ID.MaxLatency = CallLatency; 229 return; 230 } 231 232 int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); 233 // If latency is unknown, then conservatively assume the MaxLatency set for 234 // calls. 235 ID.MaxLatency = Latency < 0 ? CallLatency : static_cast<unsigned>(Latency); 236 } 237 238 static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { 239 // Count register definitions, and skip non register operands in the process. 240 unsigned I, E; 241 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 242 for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) { 243 const MCOperand &Op = MCI.getOperand(I); 244 if (Op.isReg()) 245 --NumExplicitDefs; 246 } 247 248 if (NumExplicitDefs) { 249 return make_error<InstructionError<MCInst>>( 250 "Expected more register operand definitions.", MCI); 251 } 252 253 if (MCDesc.hasOptionalDef()) { 254 // Always assume that the optional definition is the last operand. 255 const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1); 256 if (I == MCI.getNumOperands() || !Op.isReg()) { 257 std::string Message = 258 "expected a register operand for an optional definition. Instruction " 259 "has not been correctly analyzed."; 260 return make_error<InstructionError<MCInst>>(Message, MCI); 261 } 262 } 263 264 return ErrorSuccess(); 265 } 266 267 void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, 268 unsigned SchedClassID) { 269 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 270 const MCSchedModel &SM = STI.getSchedModel(); 271 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 272 273 // Assumptions made by this algorithm: 274 // 1. The number of explicit and implicit register definitions in a MCInst 275 // matches the number of explicit and implicit definitions according to 276 // the opcode descriptor (MCInstrDesc). 277 // 2. Uses start at index #(MCDesc.getNumDefs()). 278 // 3. There can only be a single optional register definition, an it is 279 // either the last operand of the sequence (excluding extra operands 280 // contributed by variadic opcodes) or one of the explicit register 281 // definitions. The latter occurs for some Thumb1 instructions. 282 // 283 // These assumptions work quite well for most out-of-order in-tree targets 284 // like x86. This is mainly because the vast majority of instructions is 285 // expanded to MCInst using a straightforward lowering logic that preserves 286 // the ordering of the operands. 287 // 288 // About assumption 1. 289 // The algorithm allows non-register operands between register operand 290 // definitions. This helps to handle some special ARM instructions with 291 // implicit operand increment (-mtriple=armv7): 292 // 293 // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed 294 // @ <MCOperand Reg:59> 295 // @ <MCOperand Imm:0> (!!) 296 // @ <MCOperand Reg:67> 297 // @ <MCOperand Imm:0> 298 // @ <MCOperand Imm:14> 299 // @ <MCOperand Reg:0>> 300 // 301 // MCDesc reports: 302 // 6 explicit operands. 303 // 1 optional definition 304 // 2 explicit definitions (!!) 305 // 306 // The presence of an 'Imm' operand between the two register definitions 307 // breaks the assumption that "register definitions are always at the 308 // beginning of the operand sequence". 309 // 310 // To workaround this issue, this algorithm ignores (i.e. skips) any 311 // non-register operands between register definitions. The optional 312 // definition is still at index #(NumOperands-1). 313 // 314 // According to assumption 2. register reads start at #(NumExplicitDefs-1). 315 // That means, register R1 from the example is both read and written. 316 unsigned NumExplicitDefs = MCDesc.getNumDefs(); 317 unsigned NumImplicitDefs = MCDesc.implicit_defs().size(); 318 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; 319 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; 320 if (MCDesc.hasOptionalDef()) 321 TotalDefs++; 322 323 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 324 ID.Writes.resize(TotalDefs + NumVariadicOps); 325 // Iterate over the operands list, and skip non-register or constant register 326 // operands. The first NumExplicitDefs register operands are expected to be 327 // register definitions. 328 unsigned CurrentDef = 0; 329 unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1; 330 unsigned i = 0; 331 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { 332 const MCOperand &Op = MCI.getOperand(i); 333 if (!Op.isReg()) 334 continue; 335 336 if (MCDesc.operands()[CurrentDef].isOptionalDef()) { 337 OptionalDefIdx = CurrentDef++; 338 continue; 339 } 340 if (MRI.isConstant(Op.getReg())) { 341 CurrentDef++; 342 continue; 343 } 344 345 WriteDescriptor &Write = ID.Writes[CurrentDef]; 346 Write.OpIndex = i; 347 if (CurrentDef < NumWriteLatencyEntries) { 348 const MCWriteLatencyEntry &WLE = 349 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); 350 // Conservatively default to MaxLatency. 351 Write.Latency = 352 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 353 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 354 } else { 355 // Assign a default latency for this write. 356 Write.Latency = ID.MaxLatency; 357 Write.SClassOrWriteResourceID = 0; 358 } 359 Write.IsOptionalDef = false; 360 LLVM_DEBUG({ 361 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex 362 << ", Latency=" << Write.Latency 363 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 364 }); 365 CurrentDef++; 366 } 367 368 assert(CurrentDef == NumExplicitDefs && 369 "Expected more register operand definitions."); 370 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { 371 unsigned Index = NumExplicitDefs + CurrentDef; 372 WriteDescriptor &Write = ID.Writes[Index]; 373 Write.OpIndex = ~CurrentDef; 374 Write.RegisterID = MCDesc.implicit_defs()[CurrentDef]; 375 if (Index < NumWriteLatencyEntries) { 376 const MCWriteLatencyEntry &WLE = 377 *STI.getWriteLatencyEntry(&SCDesc, Index); 378 // Conservatively default to MaxLatency. 379 Write.Latency = 380 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); 381 Write.SClassOrWriteResourceID = WLE.WriteResourceID; 382 } else { 383 // Assign a default latency for this write. 384 Write.Latency = ID.MaxLatency; 385 Write.SClassOrWriteResourceID = 0; 386 } 387 388 Write.IsOptionalDef = false; 389 assert(Write.RegisterID != 0 && "Expected a valid phys register!"); 390 LLVM_DEBUG({ 391 dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex 392 << ", PhysReg=" << MRI.getName(Write.RegisterID) 393 << ", Latency=" << Write.Latency 394 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 395 }); 396 } 397 398 if (MCDesc.hasOptionalDef()) { 399 WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs]; 400 Write.OpIndex = OptionalDefIdx; 401 // Assign a default latency for this write. 402 Write.Latency = ID.MaxLatency; 403 Write.SClassOrWriteResourceID = 0; 404 Write.IsOptionalDef = true; 405 LLVM_DEBUG({ 406 dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex 407 << ", Latency=" << Write.Latency 408 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 409 }); 410 } 411 412 if (!NumVariadicOps) 413 return; 414 415 bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs(); 416 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef(); 417 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 418 I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) { 419 const MCOperand &Op = MCI.getOperand(OpIndex); 420 if (!Op.isReg()) 421 continue; 422 if (MRI.isConstant(Op.getReg())) 423 continue; 424 425 WriteDescriptor &Write = ID.Writes[CurrentDef]; 426 Write.OpIndex = OpIndex; 427 // Assign a default latency for this write. 428 Write.Latency = ID.MaxLatency; 429 Write.SClassOrWriteResourceID = 0; 430 Write.IsOptionalDef = false; 431 ++CurrentDef; 432 LLVM_DEBUG({ 433 dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex 434 << ", Latency=" << Write.Latency 435 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; 436 }); 437 } 438 439 ID.Writes.resize(CurrentDef); 440 } 441 442 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, 443 unsigned SchedClassID) { 444 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 445 unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs(); 446 unsigned NumImplicitUses = MCDesc.implicit_uses().size(); 447 // Remove the optional definition. 448 if (MCDesc.hasOptionalDef()) 449 --NumExplicitUses; 450 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); 451 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps; 452 ID.Reads.resize(TotalUses); 453 unsigned CurrentUse = 0; 454 for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses; 455 ++I, ++OpIndex) { 456 const MCOperand &Op = MCI.getOperand(OpIndex); 457 if (!Op.isReg()) 458 continue; 459 if (MRI.isConstant(Op.getReg())) 460 continue; 461 462 ReadDescriptor &Read = ID.Reads[CurrentUse]; 463 Read.OpIndex = OpIndex; 464 Read.UseIndex = I; 465 Read.SchedClassID = SchedClassID; 466 ++CurrentUse; 467 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex 468 << ", UseIndex=" << Read.UseIndex << '\n'); 469 } 470 471 // For the purpose of ReadAdvance, implicit uses come directly after explicit 472 // uses. The "UseIndex" must be updated according to that implicit layout. 473 for (unsigned I = 0; I < NumImplicitUses; ++I) { 474 ReadDescriptor &Read = ID.Reads[CurrentUse + I]; 475 Read.OpIndex = ~I; 476 Read.UseIndex = NumExplicitUses + I; 477 Read.RegisterID = MCDesc.implicit_uses()[I]; 478 if (MRI.isConstant(Read.RegisterID)) 479 continue; 480 Read.SchedClassID = SchedClassID; 481 LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex 482 << ", UseIndex=" << Read.UseIndex << ", RegisterID=" 483 << MRI.getName(Read.RegisterID) << '\n'); 484 } 485 486 CurrentUse += NumImplicitUses; 487 488 bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs(); 489 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); 490 I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) { 491 const MCOperand &Op = MCI.getOperand(OpIndex); 492 if (!Op.isReg()) 493 continue; 494 495 ReadDescriptor &Read = ID.Reads[CurrentUse]; 496 Read.OpIndex = OpIndex; 497 Read.UseIndex = NumExplicitUses + NumImplicitUses + I; 498 Read.SchedClassID = SchedClassID; 499 ++CurrentUse; 500 LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex 501 << ", UseIndex=" << Read.UseIndex << '\n'); 502 } 503 504 ID.Reads.resize(CurrentUse); 505 } 506 507 hash_code hashMCOperand(const MCOperand &MCO) { 508 hash_code TypeHash = hash_combine(MCO.isReg(), MCO.isImm(), MCO.isSFPImm(), 509 MCO.isDFPImm(), MCO.isExpr(), MCO.isInst()); 510 if (MCO.isReg()) 511 return hash_combine(TypeHash, MCO.getReg()); 512 513 return TypeHash; 514 } 515 516 hash_code hashMCInst(const MCInst &MCI) { 517 hash_code InstructionHash = hash_combine(MCI.getOpcode(), MCI.getFlags()); 518 for (unsigned I = 0; I < MCI.getNumOperands(); ++I) { 519 InstructionHash = 520 hash_combine(InstructionHash, hashMCOperand(MCI.getOperand(I))); 521 } 522 return InstructionHash; 523 } 524 525 Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, 526 const MCInst &MCI) const { 527 if (ID.NumMicroOps != 0) 528 return ErrorSuccess(); 529 530 bool UsesBuffers = ID.UsedBuffers; 531 bool UsesResources = !ID.Resources.empty(); 532 if (!UsesBuffers && !UsesResources) 533 return ErrorSuccess(); 534 535 // FIXME: see PR44797. We should revisit these checks and possibly move them 536 // in CodeGenSchedule.cpp. 537 StringRef Message = "found an inconsistent instruction that decodes to zero " 538 "opcodes and that consumes scheduler resources."; 539 return make_error<InstructionError<MCInst>>(std::string(Message), MCI); 540 } 541 542 Expected<unsigned> InstrBuilder::getVariantSchedClassID(const MCInst &MCI, 543 unsigned SchedClassID) { 544 const MCSchedModel &SM = STI.getSchedModel(); 545 unsigned CPUID = SM.getProcessorID(); 546 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) 547 SchedClassID = 548 STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID); 549 550 if (!SchedClassID) { 551 return make_error<InstructionError<MCInst>>( 552 "unable to resolve scheduling class for write variant.", MCI); 553 } 554 555 return SchedClassID; 556 } 557 558 Expected<const InstrDesc &> 559 InstrBuilder::createInstrDescImpl(const MCInst &MCI, 560 const SmallVector<Instrument *> &IVec) { 561 assert(STI.getSchedModel().hasInstrSchedModel() && 562 "Itineraries are not yet supported!"); 563 564 // Obtain the instruction descriptor from the opcode. 565 unsigned short Opcode = MCI.getOpcode(); 566 const MCInstrDesc &MCDesc = MCII.get(Opcode); 567 const MCSchedModel &SM = STI.getSchedModel(); 568 569 // Then obtain the scheduling class information from the instruction. 570 // Allow InstrumentManager to override and use a different SchedClassID 571 unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec); 572 bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant(); 573 574 // Try to solve variant scheduling classes. 575 if (IsVariant) { 576 Expected<unsigned> VariantSchedClassIDOrErr = 577 getVariantSchedClassID(MCI, SchedClassID); 578 if (!VariantSchedClassIDOrErr) { 579 return VariantSchedClassIDOrErr.takeError(); 580 } 581 582 SchedClassID = *VariantSchedClassIDOrErr; 583 } 584 585 // Check if this instruction is supported. Otherwise, report an error. 586 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); 587 if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { 588 return make_error<InstructionError<MCInst>>( 589 "found an unsupported instruction in the input assembly sequence", MCI); 590 } 591 592 LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'); 593 LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); 594 LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n'); 595 596 // Create a new empty descriptor. 597 std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>(); 598 ID->NumMicroOps = SCDesc.NumMicroOps; 599 ID->SchedClassID = SchedClassID; 600 601 bool IsCall = MCIA->isCall(MCI); 602 if (IsCall && FirstCallInst) { 603 // We don't correctly model calls. 604 WithColor::warning() << "found a call in the input assembly sequence.\n"; 605 WithColor::note() << "call instructions are not correctly modeled. " 606 << "Assume a latency of " << CallLatency << "cy.\n"; 607 FirstCallInst = false; 608 } 609 610 if (MCIA->isReturn(MCI) && FirstReturnInst) { 611 WithColor::warning() << "found a return instruction in the input" 612 << " assembly sequence.\n"; 613 WithColor::note() << "program counter updates are ignored.\n"; 614 FirstReturnInst = false; 615 } 616 617 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); 618 computeMaxLatency(*ID, SCDesc, STI, CallLatency, IsCall); 619 620 if (Error Err = verifyOperands(MCDesc, MCI)) 621 return std::move(Err); 622 623 populateWrites(*ID, MCI, SchedClassID); 624 populateReads(*ID, MCI, SchedClassID); 625 626 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); 627 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); 628 629 // Validation check on the instruction descriptor. 630 if (Error Err = verifyInstrDesc(*ID, MCI)) 631 return std::move(Err); 632 633 // Now add the new descriptor. 634 bool IsVariadic = MCDesc.isVariadic(); 635 if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) { 636 auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID); 637 Descriptors[DKey] = std::move(ID); 638 return *Descriptors[DKey]; 639 } 640 641 auto VDKey = std::make_pair(hashMCInst(MCI), SchedClassID); 642 assert( 643 !VariantDescriptors.contains(VDKey) && 644 "Expected VariantDescriptors to not already have a value for this key."); 645 VariantDescriptors[VDKey] = std::move(ID); 646 return *VariantDescriptors[VDKey]; 647 } 648 649 Expected<const InstrDesc &> 650 InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI, 651 const SmallVector<Instrument *> &IVec) { 652 // Cache lookup using SchedClassID from Instrumentation 653 unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec); 654 655 auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID); 656 if (Descriptors.find_as(DKey) != Descriptors.end()) 657 return *Descriptors[DKey]; 658 659 Expected<unsigned> VariantSchedClassIDOrErr = 660 getVariantSchedClassID(MCI, SchedClassID); 661 if (!VariantSchedClassIDOrErr) { 662 return VariantSchedClassIDOrErr.takeError(); 663 } 664 665 SchedClassID = *VariantSchedClassIDOrErr; 666 667 auto VDKey = std::make_pair(hashMCInst(MCI), SchedClassID); 668 auto It = VariantDescriptors.find(VDKey); 669 if (It != VariantDescriptors.end()) 670 return *It->second; 671 672 return createInstrDescImpl(MCI, IVec); 673 } 674 675 STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc"); 676 677 Expected<std::unique_ptr<Instruction>> 678 InstrBuilder::createInstruction(const MCInst &MCI, 679 const SmallVector<Instrument *> &IVec) { 680 Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI, IVec); 681 if (!DescOrErr) 682 return DescOrErr.takeError(); 683 const InstrDesc &D = *DescOrErr; 684 Instruction *NewIS = nullptr; 685 std::unique_ptr<Instruction> CreatedIS; 686 bool IsInstRecycled = false; 687 688 if (!D.IsRecyclable) 689 ++NumVariantInst; 690 691 if (D.IsRecyclable && InstRecycleCB) { 692 if (auto *I = InstRecycleCB(D)) { 693 NewIS = I; 694 NewIS->reset(); 695 IsInstRecycled = true; 696 } 697 } 698 if (!IsInstRecycled) { 699 CreatedIS = std::make_unique<Instruction>(D, MCI.getOpcode()); 700 NewIS = CreatedIS.get(); 701 } 702 703 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); 704 const MCSchedClassDesc &SCDesc = 705 *STI.getSchedModel().getSchedClassDesc(D.SchedClassID); 706 707 NewIS->setMayLoad(MCDesc.mayLoad()); 708 NewIS->setMayStore(MCDesc.mayStore()); 709 NewIS->setHasSideEffects(MCDesc.hasUnmodeledSideEffects()); 710 NewIS->setBeginGroup(SCDesc.BeginGroup); 711 NewIS->setEndGroup(SCDesc.EndGroup); 712 NewIS->setRetireOOO(SCDesc.RetireOOO); 713 714 // Check if this is a dependency breaking instruction. 715 APInt Mask; 716 717 bool IsZeroIdiom = false; 718 bool IsDepBreaking = false; 719 if (MCIA) { 720 unsigned ProcID = STI.getSchedModel().getProcessorID(); 721 IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID); 722 IsDepBreaking = 723 IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID); 724 if (MCIA->isOptimizableRegisterMove(MCI, ProcID)) 725 NewIS->setOptimizableMove(); 726 } 727 728 // Initialize Reads first. 729 MCPhysReg RegID = 0; 730 size_t Idx = 0U; 731 for (const ReadDescriptor &RD : D.Reads) { 732 if (!RD.isImplicitRead()) { 733 // explicit read. 734 const MCOperand &Op = MCI.getOperand(RD.OpIndex); 735 // Skip non-register operands. 736 if (!Op.isReg()) 737 continue; 738 RegID = Op.getReg(); 739 } else { 740 // Implicit read. 741 RegID = RD.RegisterID; 742 } 743 744 // Skip invalid register operands. 745 if (!RegID) 746 continue; 747 748 // Okay, this is a register operand. Create a ReadState for it. 749 ReadState *RS = nullptr; 750 if (IsInstRecycled && Idx < NewIS->getUses().size()) { 751 NewIS->getUses()[Idx] = ReadState(RD, RegID); 752 RS = &NewIS->getUses()[Idx++]; 753 } else { 754 NewIS->getUses().emplace_back(RD, RegID); 755 RS = &NewIS->getUses().back(); 756 ++Idx; 757 } 758 759 if (IsDepBreaking) { 760 // A mask of all zeroes means: explicit input operands are not 761 // independent. 762 if (Mask.isZero()) { 763 if (!RD.isImplicitRead()) 764 RS->setIndependentFromDef(); 765 } else { 766 // Check if this register operand is independent according to `Mask`. 767 // Note that Mask may not have enough bits to describe all explicit and 768 // implicit input operands. If this register operand doesn't have a 769 // corresponding bit in Mask, then conservatively assume that it is 770 // dependent. 771 if (Mask.getBitWidth() > RD.UseIndex) { 772 // Okay. This map describe register use `RD.UseIndex`. 773 if (Mask[RD.UseIndex]) 774 RS->setIndependentFromDef(); 775 } 776 } 777 } 778 } 779 if (IsInstRecycled && Idx < NewIS->getUses().size()) 780 NewIS->getUses().pop_back_n(NewIS->getUses().size() - Idx); 781 782 // Early exit if there are no writes. 783 if (D.Writes.empty()) { 784 if (IsInstRecycled) 785 return llvm::make_error<RecycledInstErr>(NewIS); 786 else 787 return std::move(CreatedIS); 788 } 789 790 // Track register writes that implicitly clear the upper portion of the 791 // underlying super-registers using an APInt. 792 APInt WriteMask(D.Writes.size(), 0); 793 794 // Now query the MCInstrAnalysis object to obtain information about which 795 // register writes implicitly clear the upper portion of a super-register. 796 if (MCIA) 797 MCIA->clearsSuperRegisters(MRI, MCI, WriteMask); 798 799 // Initialize writes. 800 unsigned WriteIndex = 0; 801 Idx = 0U; 802 for (const WriteDescriptor &WD : D.Writes) { 803 RegID = WD.isImplicitWrite() ? MCRegister(WD.RegisterID) 804 : MCI.getOperand(WD.OpIndex).getReg(); 805 // Check if this is a optional definition that references NoReg or a write 806 // to a constant register. 807 if ((WD.IsOptionalDef && !RegID) || MRI.isConstant(RegID)) { 808 ++WriteIndex; 809 continue; 810 } 811 812 assert(RegID && "Expected a valid register ID!"); 813 if (IsInstRecycled && Idx < NewIS->getDefs().size()) { 814 NewIS->getDefs()[Idx++] = 815 WriteState(WD, RegID, 816 /* ClearsSuperRegs */ WriteMask[WriteIndex], 817 /* WritesZero */ IsZeroIdiom); 818 } else { 819 NewIS->getDefs().emplace_back(WD, RegID, 820 /* ClearsSuperRegs */ WriteMask[WriteIndex], 821 /* WritesZero */ IsZeroIdiom); 822 ++Idx; 823 } 824 ++WriteIndex; 825 } 826 if (IsInstRecycled && Idx < NewIS->getDefs().size()) 827 NewIS->getDefs().pop_back_n(NewIS->getDefs().size() - Idx); 828 829 if (IsInstRecycled) 830 return llvm::make_error<RecycledInstErr>(NewIS); 831 else 832 return std::move(CreatedIS); 833 } 834 } // namespace mca 835 } // namespace llvm 836