AMDGPUSubtarget.cpp - OpenGrok cross reference for /llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines Matching +full:- +full:- +full:requested +full:- +full:by
1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
33 #define DEBUG_TYPE "amdgpu-subtarget"
41 // Returns the maximum per-workgroup LDS allocation size (in bytes) that still
50       std::max(1u, (WorkGroupSize + WaveSize - 1) / WaveSize);
66   // consider the occupancy to be when the number of requested registers in a
74       -> std::tuple<const unsigned, const unsigned, unsigned> {
90   // is limited by LDS usage or barrier resources.
102       unsigned ExcessSlots = MinWavesPerCU - MinWavesPerCUForWGSize;
106         // possible size that requires MaxWavesPerWG - E waves where E is
109         // 2. (MaxWavesPerWG - E) * WaveSize >= MinWGSize
110         MinWavesPerCU -= MinWGsPerCU * std::min(ExcessSlotsPerWG,
111                                                 MaxWavesPerWG - MinWavesPerWG);
118     unsigned LeftoverSlots = WaveSlotsPerCU - MaxWGsPerCU * MinWavesPerWG;
125       // 2. (MinWavesPerWG + L - 1) * WaveSize <= MaxWGSize
127                                               ((MaxWGSize - 1) / WaveSize) + 1 -
141   return getOccupancyWithWorkGroupSizes(MFI->getLDSSize(), MF.getFunction());
165   // Requested minimum/maximum flat work group sizes.
166   std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
167     F, "amdgpu-flat-work-group-size", Default);
169   // Make sure requested minimum is less than requested maximum.
170   if (Requested.first > Requested.second)
173   // Make sure requested values do not violate subtarget's specifications.
174   if (Requested.first < getMinFlatWorkGroupSize())
176   if (Requested.second > getMaxFlatWorkGroupSize())
179   return Requested;
183     std::pair<unsigned, unsigned> Requested,
188   // If minimum/maximum flat work group sizes were explicitly requested using
189   // "amdgpu-flat-workgroup-size" attribute, then set default minimum/maximum
190   // number of waves per execution unit to values implied by requested
196   // Make sure requested minimum is less than requested maximum.
197   if (Requested.second && Requested.first > Requested.second)
200   // Make sure requested values do not violate subtarget's specifications.
201   if (Requested.first < getMinWavesPerEU() ||
202       Requested.second > getMaxWavesPerEU())
205   // Make sure requested values are compatible with values implied by requested
207   if (Requested.first < MinImpliedByFlatWorkGroupSize)
210   return Requested;
218   // Requested minimum/maximum number of waves per execution unit.
219   std::pair<unsigned, unsigned> Requested =
220       AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", Default, true);
221   return getEffectiveWavesPerEU(Requested, FlatWorkGroupSizes);
226   if (Node && Node->getNumOperands() == 3)
227     return mdconst::extract<ConstantInt>(Node->getOperand(Dim))->getZExtValue();
239     return ReqdSize - 1;
240   return getFlatWorkGroupSizes(Kernel).second - 1;
253   Function *Kernel = I->getParent()->getParent();
260     const Function *F = CI->getCalledFunction();
263       switch (F->getIntrinsicID()) {
311     CI->addRangeRetAttr(Range);
313     MDBuilder MDB(I->getContext());
315     I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
325   if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
331   // Assume all implicit inputs are used by default
335   return F.getFnAttributeAsParsedInteger("amdgpu-implicitarg-num-bytes",
349     if (Arg.hasAttribute("amdgpu-hidden-argument"))
407   return AMDGPU::getIntegerVecAttribute(F, "amdgpu-max-num-workgroups", 3,