Lines Matching defs:forOp
76 static Operation::operand_range getLowerBoundOperands(AffineForOp forOp) {
77 return forOp.getLowerBoundOperands();
81 static Operation::operand_range getUpperBoundOperands(AffineForOp forOp) {
82 return forOp.getUpperBoundOperands();
87 static Value getOrCreateStep(AffineForOp forOp, OpBuilder &builder) {
88 return builder.create<arith::ConstantIndexOp>(forOp.getLoc(),
89 forOp.getStepAsInt());
94 static Value getOrEmitLowerBound(AffineForOp forOp, OpBuilder &builder) {
95 return lowerAffineLowerBound(forOp, builder);
100 static Value getOrEmitUpperBound(AffineForOp forOp, OpBuilder &builder) {
101 return lowerAffineUpperBound(forOp, builder);
110 static LogicalResult checkAffineLoopNestMappableImpl(AffineForOp forOp,
112 Region &limit = forOp.getRegion();
114 Operation *nested = &forOp.getBody()->front();
115 if (!areValuesDefinedAbove(getLowerBoundOperands(forOp), limit) ||
116 !areValuesDefinedAbove(getUpperBoundOperands(forOp), limit))
117 return forOp.emitError(
126 auto begin = forOp.getBody()->begin(), end = forOp.getBody()->end();
127 if (forOp.getBody()->empty() || std::next(begin, 2) != end)
128 return forOp.emitError("expected perfectly nested loops in the body");
130 if (!(forOp = dyn_cast<AffineForOp>(nested)))
136 static LogicalResult checkAffineLoopNestMappable(AffineForOp forOp,
145 return forOp.emitError("cannot map to more than 3 block dimensions");
148 return forOp.emitError("cannot map to more than 3 thread dimensions");
150 return checkAffineLoopNestMappableImpl(forOp, numBlockDims + numThreadDims);
157 std::optional<AffineForOp> collectBounds(AffineForOp forOp,
175 // mapping a loop nest of depth "numLoops" rooted at "forOp" to a GPU kernel.
180 AffineLoopToGpuConverter::collectBounds(AffineForOp forOp, unsigned numLoops) {
181 OpBuilder builder(forOp.getOperation());
186 AffineForOp currentLoop = forOp;
281 static LogicalResult convertAffineLoopNestToGPULaunch(AffineForOp forOp,
284 if (failed(checkAffineLoopNestMappable(forOp, numBlockDims, numThreadDims)))
289 converter.collectBounds(forOp, numBlockDims + numThreadDims);
292 converter.createLaunch(forOp, *maybeInnerLoop, numBlockDims, numThreadDims);
297 LogicalResult mlir::convertAffineLoopNestToGPULaunch(AffineForOp forOp,
300 return ::convertAffineLoopNestToGPULaunch(forOp, numBlockDims, numThreadDims);