Lines Matching +full:- +full:- +full:requested +full:- +full:by

1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
33 #define DEBUG_TYPE "amdgpu-subtarget"
41 // Returns the maximum per-workgroup LDS allocation size (in bytes) that still
50 std::max(1u, (WorkGroupSize + WaveSize - 1) / WaveSize);
66 // consider the occupancy to be when the number of requested registers in a
74 -> std::tuple<const unsigned, const unsigned, unsigned> {
90 // is limited by LDS usage or barrier resources.
102 unsigned ExcessSlots = MinWavesPerCU - MinWavesPerCUForWGSize;
106 // possible size that requires MaxWavesPerWG - E waves where E is
109 // 2. (MaxWavesPerWG - E) * WaveSize >= MinWGSize
110 MinWavesPerCU -= MinWGsPerCU * std::min(ExcessSlotsPerWG,
111 MaxWavesPerWG - MinWavesPerWG);
118 unsigned LeftoverSlots = WaveSlotsPerCU - MaxWGsPerCU * MinWavesPerWG;
125 // 2. (MinWavesPerWG + L - 1) * WaveSize <= MaxWGSize
127 ((MaxWGSize - 1) / WaveSize) + 1 -
141 return getOccupancyWithWorkGroupSizes(MFI->getLDSSize(), MF.getFunction());
165 // Requested minimum/maximum flat work group sizes.
166 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
167 F, "amdgpu-flat-work-group-size", Default);
169 // Make sure requested minimum is less than requested maximum.
170 if (Requested.first > Requested.second)
173 // Make sure requested values do not violate subtarget's specifications.
174 if (Requested.first < getMinFlatWorkGroupSize())
176 if (Requested.second > getMaxFlatWorkGroupSize())
179 return Requested;
183 std::pair<unsigned, unsigned> Requested,
188 // If minimum/maximum flat work group sizes were explicitly requested using
189 // "amdgpu-flat-workgroup-size" attribute, then set default minimum/maximum
190 // number of waves per execution unit to values implied by requested
196 // Make sure requested minimum is less than requested maximum.
197 if (Requested.second && Requested.first > Requested.second)
200 // Make sure requested values do not violate subtarget's specifications.
201 if (Requested.first < getMinWavesPerEU() ||
202 Requested.second > getMaxWavesPerEU())
205 // Make sure requested values are compatible with values implied by requested
207 if (Requested.first < MinImpliedByFlatWorkGroupSize)
210 return Requested;
218 // Requested minimum/maximum number of waves per execution unit.
219 std::pair<unsigned, unsigned> Requested =
220 AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", Default, true);
221 return getEffectiveWavesPerEU(Requested, FlatWorkGroupSizes);
226 if (Node && Node->getNumOperands() == 3)
227 return mdconst::extract<ConstantInt>(Node->getOperand(Dim))->getZExtValue();
239 return ReqdSize - 1;
240 return getFlatWorkGroupSizes(Kernel).second - 1;
253 Function *Kernel = I->getParent()->getParent();
260 const Function *F = CI->getCalledFunction();
263 switch (F->getIntrinsicID()) {
311 CI->addRangeRetAttr(Range);
313 MDBuilder MDB(I->getContext());
315 I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
325 if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
331 // Assume all implicit inputs are used by default
335 return F.getFnAttributeAsParsedInteger("amdgpu-implicitarg-num-bytes",
349 if (Arg.hasAttribute("amdgpu-hidden-argument"))
407 return AMDGPU::getIntegerVecAttribute(F, "amdgpu-max-num-workgroups", 3,