xref: /llvm-project/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h (revision 6784202b6bb0930d2e4bf231e915c61e446b0585)
1 //===--------------------- ResourceManager.h --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// The classes here represent processor resource units and their management
11 /// strategy.  These classes are managed by the Scheduler.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H
16 #define LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H
17 
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/MC/MCSchedule.h"
21 #include "llvm/MCA/Instruction.h"
22 #include "llvm/MCA/Support.h"
23 
24 namespace llvm {
25 namespace mca {
26 
27 /// Used to notify the internal state of a processor resource.
28 ///
29 /// A processor resource is available if it is not reserved, and there are
30 /// available slots in the buffer.  A processor resource is unavailable if it
31 /// is either reserved, or the associated buffer is full. A processor resource
32 /// with a buffer size of -1 is always available if it is not reserved.
33 ///
34 /// Values of type ResourceStateEvent are returned by method
35 /// ResourceManager::canBeDispatched()
36 ///
37 /// The naming convention for resource state events is:
38 ///  * Event names start with prefix RS_
39 ///  * Prefix RS_ is followed by a string describing the actual resource state.
40 enum ResourceStateEvent {
41   RS_BUFFER_AVAILABLE,
42   RS_BUFFER_UNAVAILABLE,
43   RS_RESERVED
44 };
45 
46 /// Resource allocation strategy used by hardware scheduler resources.
47 class ResourceStrategy {
48   ResourceStrategy(const ResourceStrategy &) = delete;
49   ResourceStrategy &operator=(const ResourceStrategy &) = delete;
50 
51 public:
52   ResourceStrategy() = default;
53   virtual ~ResourceStrategy();
54 
55   /// Selects a processor resource unit from a ReadyMask.
56   virtual uint64_t select(uint64_t ReadyMask) = 0;
57 
58   /// Called by the ResourceManager when a processor resource group, or a
59   /// processor resource with multiple units has become unavailable.
60   ///
61   /// The default strategy uses this information to bias its selection logic.
62   virtual void used(uint64_t ResourceMask) {}
63 };
64 
65 /// Default resource allocation strategy used by processor resource groups and
66 /// processor resources with multiple units.
67 class DefaultResourceStrategy final : public ResourceStrategy {
68   /// A Mask of resource unit identifiers.
69   ///
70   /// There is one bit set for every available resource unit.
71   /// It defaults to the value of field ResourceSizeMask in ResourceState.
72   const uint64_t ResourceUnitMask;
73 
74   /// A simple round-robin selector for processor resource units.
75   /// Each bit of this mask identifies a sub resource within a group.
76   ///
77   /// As an example, lets assume that this is a default policy for a
78   /// processor resource group composed by the following three units:
79   ///   ResourceA -- 0b001
80   ///   ResourceB -- 0b010
81   ///   ResourceC -- 0b100
82   ///
83   /// Field NextInSequenceMask is used to select the next unit from the set of
84   /// resource units. It defaults to the value of field `ResourceUnitMasks` (in
85   /// this example, it defaults to mask '0b111').
86   ///
87   /// The round-robin selector would firstly select 'ResourceC', then
88   /// 'ResourceB', and eventually 'ResourceA'.  When a resource R is used, the
89   /// corresponding bit in NextInSequenceMask is cleared.  For example, if
90   /// 'ResourceC' is selected, then the new value of NextInSequenceMask becomes
91   /// 0xb011.
92   ///
93   /// When NextInSequenceMask becomes zero, it is automatically reset to the
94   /// default value (i.e. ResourceUnitMask).
95   uint64_t NextInSequenceMask;
96 
97   /// This field is used to track resource units that are used (i.e. selected)
98   /// by other groups other than the one associated with this strategy object.
99   ///
100   /// In LLVM processor resource groups are allowed to partially (or fully)
101   /// overlap. That means, a same unit may be visible to multiple groups.
102   /// This field keeps track of uses that have originated from outside of
103   /// this group. The idea is to bias the selection strategy, so that resources
104   /// that haven't been used by other groups get prioritized.
105   ///
106   /// The end goal is to (try to) keep the resource distribution as much uniform
107   /// as possible. By construction, this mask only tracks one-level of resource
108   /// usage. Therefore, this strategy is expected to be less accurate when same
109   /// units are used multiple times by other groups within a single round of
110   /// select.
111   ///
112   /// Note: an LRU selector would have a better accuracy at the cost of being
113   /// slightly more expensive (mostly in terms of runtime cost). Methods
114   /// 'select' and 'used', are always in the hot execution path of llvm-mca.
115   /// Therefore, a slow implementation of 'select' would have a negative impact
116   /// on the overall performance of the tool.
117   uint64_t RemovedFromNextInSequence;
118 
119 public:
120   DefaultResourceStrategy(uint64_t UnitMask)
121       : ResourceUnitMask(UnitMask), NextInSequenceMask(UnitMask),
122         RemovedFromNextInSequence(0) {}
123   virtual ~DefaultResourceStrategy() = default;
124 
125   uint64_t select(uint64_t ReadyMask) override;
126   void used(uint64_t Mask) override;
127 };
128 
129 /// A processor resource descriptor.
130 ///
131 /// There is an instance of this class for every processor resource defined by
132 /// the machine scheduling model.
133 /// Objects of class ResourceState dynamically track the usage of processor
134 /// resource units.
135 class ResourceState {
136   /// An index to the MCProcResourceDesc entry in the processor model.
137   const unsigned ProcResourceDescIndex;
138   /// A resource mask. This is generated by the tool with the help of
139   /// function `mca::computeProcResourceMasks' (see Support.h).
140   ///
141   /// Field ResourceMask only has one bit set if this resource state describes a
142   /// processor resource unit (i.e. this is not a group). That means, we can
143   /// quickly check if a resource is a group by simply counting the number of
144   /// bits that are set in the mask.
145   ///
146   /// The most significant bit of a mask (MSB) uniquely identifies a resource.
147   /// Remaining bits are used to describe the composition of a group (Group).
148   ///
149   /// Example (little endian):
150   ///            Resource |  Mask      |  MSB       |  Group
151   ///            ---------+------------+------------+------------
152   ///            A        |  0b000001  |  0b000001  |  0b000000
153   ///                     |            |            |
154   ///            B        |  0b000010  |  0b000010  |  0b000000
155   ///                     |            |            |
156   ///            C        |  0b010000  |  0b010000  |  0b000000
157   ///                     |            |            |
158   ///            D        |  0b110010  |  0b100000  |  0b010010
159   ///
160   /// In this example, resources A, B and C are processor resource units.
161   /// Only resource D is a group resource, and it contains resources B and C.
162   /// That is because MSB(B) and MSB(C) are both contained within Group(D).
163   const uint64_t ResourceMask;
164 
165   /// A ProcResource can have multiple units.
166   ///
167   /// For processor resource groups this field is a mask of contained resource
168   /// units. It is obtained from ResourceMask by clearing the highest set bit.
169   /// The number of resource units in a group can be simply computed as the
170   /// population count of this field.
171   ///
172   /// For normal (i.e. non-group) resources, the number of bits set in this mask
173   /// is equivalent to the number of units declared by the processor model (see
174   /// field 'NumUnits' in 'ProcResourceUnits').
175   uint64_t ResourceSizeMask;
176 
177   /// A mask of ready units.
178   uint64_t ReadyMask;
179 
180   /// Buffered resources will have this field set to a positive number different
181   /// than zero. A buffered resource behaves like a reservation station
182   /// implementing its own buffer for out-of-order execution.
183   ///
184   /// A BufferSize of 1 is used by scheduler resources that force in-order
185   /// execution.
186   ///
187   /// A BufferSize of 0 is used to model in-order issue/dispatch resources.
188   /// Since in-order issue/dispatch resources don't implement buffers, dispatch
189   /// events coincide with issue events.
190   /// Also, no other instruction ca be dispatched/issue while this resource is
191   /// in use. Only when all the "resource cycles" are consumed (after the issue
192   /// event), a new instruction ca be dispatched.
193   const int BufferSize;
194 
195   /// Available slots in the buffer (zero, if this is not a buffered resource).
196   unsigned AvailableSlots;
197 
198   /// This field is set if this resource is currently reserved.
199   ///
200   /// Resources can be reserved for a number of cycles.
201   /// Instructions can still be dispatched to reserved resources. However,
202   /// istructions dispatched to a reserved resource cannot be issued to the
203   /// underlying units (i.e. pipelines) until the resource is released.
204   bool Unavailable;
205 
206   const bool IsAGroup;
207 
208   /// Checks for the availability of unit 'SubResMask' in the group.
209   bool isSubResourceReady(uint64_t SubResMask) const {
210     return ReadyMask & SubResMask;
211   }
212 
213 public:
214   ResourceState(const MCProcResourceDesc &Desc, unsigned Index, uint64_t Mask);
215 
216   unsigned getProcResourceID() const { return ProcResourceDescIndex; }
217   uint64_t getResourceMask() const { return ResourceMask; }
218   uint64_t getReadyMask() const { return ReadyMask; }
219   int getBufferSize() const { return BufferSize; }
220 
221   bool isBuffered() const { return BufferSize > 0; }
222   bool isInOrder() const { return BufferSize == 1; }
223 
224   /// Returns true if this is an in-order dispatch/issue resource.
225   bool isADispatchHazard() const { return BufferSize == 0; }
226   bool isReserved() const { return Unavailable; }
227 
228   void setReserved() { Unavailable = true; }
229   void clearReserved() { Unavailable = false; }
230 
231   /// Returs true if this resource is not reserved, and if there are at least
232   /// `NumUnits` available units.
233   bool isReady(unsigned NumUnits = 1) const;
234 
235   uint64_t getNumReadyUnits() const { return llvm::popcount(ReadyMask); }
236 
237   bool isAResourceGroup() const { return IsAGroup; }
238 
239   bool containsResource(uint64_t ID) const { return ResourceMask & ID; }
240 
241   void markSubResourceAsUsed(uint64_t ID) {
242     assert(isSubResourceReady(ID));
243     ReadyMask ^= ID;
244   }
245 
246   void releaseSubResource(uint64_t ID) {
247     assert(!isSubResourceReady(ID));
248     ReadyMask ^= ID;
249   }
250 
251   unsigned getNumUnits() const {
252     return isAResourceGroup() ? 1U : llvm::popcount(ResourceSizeMask);
253   }
254 
255   /// Checks if there is an available slot in the resource buffer.
256   ///
257   /// Returns RS_BUFFER_AVAILABLE if this is not a buffered resource, or if
258   /// there is a slot available.
259   ///
260   /// Returns RS_RESERVED if this buffered resource is a dispatch hazard, and it
261   /// is reserved.
262   ///
263   /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots.
264   ResourceStateEvent isBufferAvailable() const;
265 
266   /// Reserve a buffer slot.
267   ///
268   /// Returns true if the buffer is not full.
269   /// It always returns true if BufferSize is set to zero.
270   bool reserveBuffer() {
271     if (BufferSize <= 0)
272       return true;
273 
274     --AvailableSlots;
275     assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
276     return AvailableSlots;
277   }
278 
279   /// Releases a slot in the buffer.
280   void releaseBuffer() {
281     // Ignore dispatch hazards or invalid buffer sizes.
282     if (BufferSize <= 0)
283       return;
284 
285     ++AvailableSlots;
286     assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
287   }
288 
289 #ifndef NDEBUG
290   void dump() const;
291 #endif
292 };
293 
294 /// A resource unit identifier.
295 ///
296 /// This is used to identify a specific processor resource unit using a pair
297 /// of indices where the 'first' index is a processor resource mask, and the
298 /// 'second' index is an index for a "sub-resource" (i.e. unit).
299 typedef std::pair<uint64_t, uint64_t> ResourceRef;
300 
301 // First: a MCProcResourceDesc index identifying a buffered resource.
302 // Second: max number of buffer entries used in this resource.
303 typedef std::pair<unsigned, unsigned> BufferUsageEntry;
304 
305 /// A resource manager for processor resource units and groups.
306 ///
307 /// This class owns all the ResourceState objects, and it is responsible for
308 /// acting on requests from a Scheduler by updating the internal state of
309 /// ResourceState objects.
310 /// This class doesn't know about instruction itineraries and functional units.
311 /// In future, it can be extended to support itineraries too through the same
312 /// public interface.
313 class ResourceManager {
314   // Set of resources available on the subtarget.
315   //
316   // There is an instance of ResourceState for every resource declared by the
317   // target scheduling model.
318   //
319   // Elements of this vector are ordered by resource kind. In particular,
320   // resource units take precedence over resource groups.
321   //
322   // The index of a processor resource in this vector depends on the value of
323   // its mask (see the description of field ResourceState::ResourceMask).  In
324   // particular, it is computed as the position of the most significant bit set
325   // (MSB) in the mask plus one (since we want to ignore the invalid resource
326   // descriptor at index zero).
327   //
328   // Example (little endian):
329   //
330   //             Resource | Mask    |  MSB    | Index
331   //             ---------+---------+---------+-------
332   //                 A    | 0b00001 | 0b00001 |   1
333   //                      |         |         |
334   //                 B    | 0b00100 | 0b00100 |   3
335   //                      |         |         |
336   //                 C    | 0b10010 | 0b10000 |   5
337   //
338   //
339   // The same index is also used to address elements within vector `Strategies`
340   // and vector `Resource2Groups`.
341   std::vector<std::unique_ptr<ResourceState>> Resources;
342   std::vector<std::unique_ptr<ResourceStrategy>> Strategies;
343 
344   // Used to quickly identify groups that own a particular resource unit.
345   std::vector<uint64_t> Resource2Groups;
346 
347   // A table that maps processor resource IDs to processor resource masks.
348   SmallVector<uint64_t, 8> ProcResID2Mask;
349 
350   // A table that maps resource indices to actual processor resource IDs in the
351   // scheduling model.
352   SmallVector<unsigned, 8> ResIndex2ProcResID;
353 
354   // Keeps track of which resources are busy, and how many cycles are left
355   // before those become usable again.
356   SmallDenseMap<ResourceRef, unsigned> BusyResources;
357 
358   // Set of processor resource units available on the target.
359   uint64_t ProcResUnitMask;
360 
361   // Set of processor resource units that are available during this cycle.
362   uint64_t AvailableProcResUnits;
363 
364   // Set of processor resources that are currently reserved.
365   uint64_t ReservedResourceGroups;
366 
367   // Set of unavailable scheduler buffer resources. This is used internally to
368   // speedup `canBeDispatched()` queries.
369   uint64_t AvailableBuffers;
370 
371   // Set of dispatch hazard buffer resources that are currently unavailable.
372   uint64_t ReservedBuffers;
373 
374   // Returns the actual resource unit that will be used.
375   ResourceRef selectPipe(uint64_t ResourceID);
376 
377   void use(const ResourceRef &RR);
378   void release(const ResourceRef &RR);
379 
380   unsigned getNumUnits(uint64_t ResourceID) const;
381 
382   // Overrides the selection strategy for the processor resource with the given
383   // mask.
384   void setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
385                              uint64_t ResourceMask);
386 
387 public:
388   ResourceManager(const MCSchedModel &SM);
389   virtual ~ResourceManager() = default;
390 
391   // Overrides the selection strategy for the resource at index ResourceID in
392   // the MCProcResourceDesc table.
393   void setCustomStrategy(std::unique_ptr<ResourceStrategy> S,
394                          unsigned ResourceID) {
395     assert(ResourceID < ProcResID2Mask.size() &&
396            "Invalid resource index in input!");
397     return setCustomStrategyImpl(std::move(S), ProcResID2Mask[ResourceID]);
398   }
399 
400   // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if
401   // there are enough available slots in the buffers.
402   ResourceStateEvent canBeDispatched(uint64_t ConsumedBuffers) const;
403 
404   // Return the processor resource identifier associated to this Mask.
405   unsigned resolveResourceMask(uint64_t Mask) const;
406 
407   // Acquires a slot from every buffered resource in mask `ConsumedBuffers`.
408   // Units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved.
409   void reserveBuffers(uint64_t ConsumedBuffers);
410 
411   // Releases a slot from every buffered resource in mask `ConsumedBuffers`.
412   // ConsumedBuffers is a bitmask of previously acquired buffers (using method
413   // `reserveBuffers`). Units that are dispatch hazards (i.e. BufferSize=0) are
414   // not automatically unreserved by this method.
415   void releaseBuffers(uint64_t ConsumedBuffers);
416 
417   // Reserve a processor resource. A reserved resource is not available for
418   // instruction issue until it is released.
419   void reserveResource(uint64_t ResourceID);
420 
421   // Release a previously reserved processor resource.
422   void releaseResource(uint64_t ResourceID);
423 
424   // Returns a zero mask if resources requested by Desc are all available during
425   // this cycle. It returns a non-zero mask value only if there are unavailable
426   // processor resources; each bit set in the mask represents a busy processor
427   // resource unit or a reserved processor resource group.
428   uint64_t checkAvailability(const InstrDesc &Desc) const;
429 
430   uint64_t getProcResUnitMask() const { return ProcResUnitMask; }
431   uint64_t getAvailableProcResUnits() const { return AvailableProcResUnits; }
432 
433   using ResourceWithCycles = std::pair<ResourceRef, ReleaseAtCycles>;
434 
435   void issueInstruction(const InstrDesc &Desc,
436                         SmallVectorImpl<ResourceWithCycles> &Pipes) {
437     if (Desc.HasPartiallyOverlappingGroups)
438       return issueInstructionImpl(Desc, Pipes);
439 
440     return fastIssueInstruction(Desc, Pipes);
441   }
442 
443   // Selects pipeline resources consumed by an instruction.
444   // This method works under the assumption that used group resources don't
445   // partially overlap. The logic is guaranteed to find a valid resource unit
446   // schedule, no matter in which order individual uses are processed. For that
447   // reason, the vector of resource uses is simply (and quickly) processed in
448   // sequence. The resulting schedule is eventually stored into vector `Pipes`.
449   void fastIssueInstruction(const InstrDesc &Desc,
450                             SmallVectorImpl<ResourceWithCycles> &Pipes);
451 
452   // Selects pipeline resources consumed by an instruction.
453   // This method works under the assumption that used resource groups may
454   // partially overlap. This complicates the selection process, because the
455   // order in which uses are processed matters. The logic internally prioritizes
456   // groups which are more constrained than others.
457   void issueInstructionImpl(const InstrDesc &Desc,
458                             SmallVectorImpl<ResourceWithCycles> &Pipes);
459 
460   void cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed);
461 
462 #ifndef NDEBUG
463   void dump() const {
464     for (const std::unique_ptr<ResourceState> &Resource : Resources)
465       Resource->dump();
466   }
467 #endif
468 };
469 } // namespace mca
470 } // namespace llvm
471 
472 #endif // LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H
473