AsyncParallelFor.cpp - OpenGrok cross reference for /llvm-project/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp

Lines Matching full:create
201     coords[i] = b.create<arith::RemSIOp>(index, tripCounts[i]);
202     index = b.create<arith::DivSIOp>(index, tripCounts[i]);
248 // Create a parallel compute fuction from the parallel operation.
261   func::FuncOp func = func::FuncOp::create(
273   // Create function entry block.
286   Value c0 = b.create<arith::ConstantIndexOp>(0);
287   Value c1 = b.create<arith::ConstantIndexOp>(1);
294             return b.create<arith::ConstantOp>(attr);
313     tripCount = b.create<arith::MulIOp>(tripCount, tripCounts[i]);
317   Value blockFirstIndex = b.create<arith::MulIOp>(blockIndex, blockSize);
321   Value blockEnd0 = b.create<arith::AddIOp>(blockFirstIndex, blockSize);
322   Value blockEnd1 = b.create<arith::MinSIOp>(blockEnd0, tripCount);
323   Value blockLastIndex = b.create<arith::SubIOp>(blockEnd1, c1);
336     blockEndCoord[i] = b.create<arith::AddIOp>(blockLastCoord[i], c1);
379       computeBlockInductionVars[loopIdx] = b.create<arith::AddIOp>(
380           lowerBounds[loopIdx], b.create<arith::MulIOp>(iv, steps[loopIdx]));
383       isBlockFirstCoord[loopIdx] = b.create<arith::CmpIOp>(
385       isBlockLastCoord[loopIdx] = b.create<arith::CmpIOp>(
390         isBlockFirstCoord[loopIdx] = b.create<arith::AndIOp>(
392         isBlockLastCoord[loopIdx] = b.create<arith::AndIOp>(
401           b.create<scf::ForOp>(c0, tripCounts[loopIdx + 1], c1, ValueRange(),
407           auto lb = b.create<arith::SelectOp>(isBlockFirstCoord[loopIdx],
410           auto ub = b.create<arith::SelectOp>(isBlockLastCoord[loopIdx],
414           b.create<scf::ForOp>(lb, ub, c1, ValueRange(),
418         b.create<scf::YieldOp>(loc);
429       b.create<scf::YieldOp>(loc);
433   b.create<scf::ForOp>(blockFirstCoord[0], blockEndCoord[0], c1, ValueRange(),
435   b.create<func::ReturnOp>(ValueRange());
481   func::FuncOp func = func::FuncOp::create(loc, "async_dispatch_fn", type);
489   // Create function entry block.
495   Value c1 = b.create<arith::ConstantIndexOp>(1);
496   Value c2 = b.create<arith::ConstantIndexOp>(2);
505   // Create a work splitting while loop for the [blockStart, blockEnd) range.
510   // Create a recursive dispatch loop.
511   scf::WhileOp whileOp = b.create<scf::WhileOp>(types, operands);
521     Value distance = b.create<arith::SubIOp>(end, start);
523         b.create<arith::CmpIOp>(arith::CmpIPredicate::sgt, distance, c1);
524     b.create<scf::ConditionOp>(dispatch, before->getArguments());
533     Value distance = b.create<arith::SubIOp>(end, start);
534     Value halfDistance = b.create<arith::DivSIOp>(distance, c2);
535     Value midIndex = b.create<arith::AddIOp>(start, halfDistance);
546       executeBuilder.create<func::CallOp>(executeLoc, func.getSymName(),
548       executeBuilder.create<async::YieldOp>(executeLoc, ValueRange());
551     // Create async.execute operation to dispatch half of the block range.
552     auto execute = b.create<ExecuteOp>(TypeRange(), ValueRange(), ValueRange(),
554     b.create<AddToGroupOp>(indexTy, execute.getToken(), group);
555     b.create<scf::YieldOp>(ValueRange({start, midIndex}));
567   b.create<func::CallOp>(computeFunc.func.getSymName(),
570   b.create<func::ReturnOp>(ValueRange());
588   Value c0 = b.create<arith::ConstantIndexOp>(0);
589   Value c1 = b.create<arith::ConstantIndexOp>(1);
605       b.create<arith::CmpIOp>(arith::CmpIPredicate::eq, blockCount, c1);
614     b.create<func::CallOp>(parallelComputeFunction.func.getSymName(),
617     b.create<scf::YieldOp>();
623     // Create an async.group to wait on all async tokens from the concurrent
626     Value groupSize = b.create<arith::SubIOp>(blockCount, c1);
627     Value group = b.create<CreateGroupOp>(GroupType::get(ctx), groupSize);
633     b.create<func::CallOp>(asyncDispatchFunction.getSymName(),
637     b.create<AwaitAllOp>(group);
639     b.create<scf::YieldOp>();
643   b.create<scf::IfOp>(isSingleBlock, syncDispatch, asyncDispatch);
657   Value c0 = b.create<arith::ConstantIndexOp>(0);
658   Value c1 = b.create<arith::ConstantIndexOp>(1);
660   // Create an async.group to wait on all async tokens from the concurrent
663   Value groupSize = b.create<arith::SubIOp>(blockCount, c1);
664   Value group = b.create<CreateGroupOp>(GroupType::get(ctx), groupSize);
691       executeBuilder.create<func::CallOp>(executeLoc, compute.getSymName(),
694       executeBuilder.create<async::YieldOp>(executeLoc, ValueRange());
697     // Create async.execute operation to launch parallel computate function.
698     auto execute = b.create<ExecuteOp>(TypeRange(), ValueRange(), ValueRange(),
700     b.create<AddToGroupOp>(rewriter.getIndexType(), execute.getToken(), group);
701     b.create<scf::YieldOp>();
705   b.create<scf::ForOp>(c1, blockCount, c1, ValueRange(), loopBuilder);
708   b.create<func::CallOp>(compute.getSymName(), compute.getResultTypes(),
712   b.create<AwaitAllOp>(group);
748     tripCount = b.create<arith::MulIOp>(tripCount, tripCounts[i]);
752   Value c0 = b.create<arith::ConstantIndexOp>(0);
754       b.create<arith::CmpIOp>(arith::CmpIPredicate::eq, tripCount, c0);
758     nestedBuilder.create<scf::YieldOp>(loc);
808       numWorkerThreadsVal = b.create<arith::ConstantIndexOp>(numWorkerThreads);
810       numWorkerThreadsVal = b.create<async::RuntimeNumWorkerThreadsOp>();
829     Value scalingFactor = b.create<arith::ConstantFloatOp>(
832       Value bracketBegin = b.create<arith::ConstantIndexOp>(p.first);
833       Value inBracket = b.create<arith::CmpIOp>(
835       Value bracketScalingFactor = b.create<arith::ConstantFloatOp>(
837       scalingFactor = b.create<arith::SelectOp>(inBracket, bracketScalingFactor,
841         b.create<arith::IndexCastOp>(b.getI32Type(), numWorkerThreadsVal);
843         b.create<arith::SIToFPOp>(b.getF32Type(), numWorkersIndex);
845         b.create<arith::MulFOp>(scalingFactor, numWorkersFloat);
847         b.create<arith::FPToSIOp>(b.getI32Type(), scaledNumWorkers);
849         b.create<arith::IndexCastOp>(b.getIndexType(), scaledNumInt);
851     Value maxComputeBlocks = b.create<arith::MaxSIOp>(
852         b.create<arith::ConstantIndexOp>(1), scaledWorkers);
858     Value bs0 = b.create<arith::CeilDivSIOp>(tripCount, maxComputeBlocks);
859     Value bs1 = b.create<arith::MaxSIOp>(bs0, minTaskSize);
860     Value blockSize = b.create<arith::MinSIOp>(tripCount, bs1);
866     // Create a parallel compute function that takes a block id and computes
870     Value blockCount = b.create<arith::CeilDivSIOp>(tripCount, blockSize);
879       b.create<scf::YieldOp>();
890       Value numIters = b.create<arith::ConstantIndexOp>(
892       Value alignedBlockSize = b.create<arith::MulIOp>(
893           b.create<arith::CeilDivSIOp>(blockSize, numIters), numIters);
896       b.create<scf::YieldOp>();
903       Value numIters = b.create<arith::ConstantIndexOp>(
905       Value useBlockAlignedComputeFn = b.create<arith::CmpIOp>(
908       b.create<scf::IfOp>(useBlockAlignedComputeFn, dispatchBlockAligned,
910       b.create<scf::YieldOp>();
917   b.create<scf::IfOp>(isZeroIterations, noOp, dispatch);
932         return builder.create<arith::ConstantIndexOp>(minTaskSize);