Lines Matching full:create
201 coords[i] = b.create<arith::RemSIOp>(index, tripCounts[i]);
202 index = b.create<arith::DivSIOp>(index, tripCounts[i]);
248 // Create a parallel compute fuction from the parallel operation.
261 func::FuncOp func = func::FuncOp::create(
273 // Create function entry block.
286 Value c0 = b.create<arith::ConstantIndexOp>(0);
287 Value c1 = b.create<arith::ConstantIndexOp>(1);
294 return b.create<arith::ConstantOp>(attr);
313 tripCount = b.create<arith::MulIOp>(tripCount, tripCounts[i]);
317 Value blockFirstIndex = b.create<arith::MulIOp>(blockIndex, blockSize);
321 Value blockEnd0 = b.create<arith::AddIOp>(blockFirstIndex, blockSize);
322 Value blockEnd1 = b.create<arith::MinSIOp>(blockEnd0, tripCount);
323 Value blockLastIndex = b.create<arith::SubIOp>(blockEnd1, c1);
336 blockEndCoord[i] = b.create<arith::AddIOp>(blockLastCoord[i], c1);
379 computeBlockInductionVars[loopIdx] = b.create<arith::AddIOp>(
380 lowerBounds[loopIdx], b.create<arith::MulIOp>(iv, steps[loopIdx]));
383 isBlockFirstCoord[loopIdx] = b.create<arith::CmpIOp>(
385 isBlockLastCoord[loopIdx] = b.create<arith::CmpIOp>(
390 isBlockFirstCoord[loopIdx] = b.create<arith::AndIOp>(
392 isBlockLastCoord[loopIdx] = b.create<arith::AndIOp>(
401 b.create<scf::ForOp>(c0, tripCounts[loopIdx + 1], c1, ValueRange(),
407 auto lb = b.create<arith::SelectOp>(isBlockFirstCoord[loopIdx],
410 auto ub = b.create<arith::SelectOp>(isBlockLastCoord[loopIdx],
414 b.create<scf::ForOp>(lb, ub, c1, ValueRange(),
418 b.create<scf::YieldOp>(loc);
429 b.create<scf::YieldOp>(loc);
433 b.create<scf::ForOp>(blockFirstCoord[0], blockEndCoord[0], c1, ValueRange(),
435 b.create<func::ReturnOp>(ValueRange());
481 func::FuncOp func = func::FuncOp::create(loc, "async_dispatch_fn", type);
489 // Create function entry block.
495 Value c1 = b.create<arith::ConstantIndexOp>(1);
496 Value c2 = b.create<arith::ConstantIndexOp>(2);
505 // Create a work splitting while loop for the [blockStart, blockEnd) range.
510 // Create a recursive dispatch loop.
511 scf::WhileOp whileOp = b.create<scf::WhileOp>(types, operands);
521 Value distance = b.create<arith::SubIOp>(end, start);
523 b.create<arith::CmpIOp>(arith::CmpIPredicate::sgt, distance, c1);
524 b.create<scf::ConditionOp>(dispatch, before->getArguments());
533 Value distance = b.create<arith::SubIOp>(end, start);
534 Value halfDistance = b.create<arith::DivSIOp>(distance, c2);
535 Value midIndex = b.create<arith::AddIOp>(start, halfDistance);
546 executeBuilder.create<func::CallOp>(executeLoc, func.getSymName(),
548 executeBuilder.create<async::YieldOp>(executeLoc, ValueRange());
551 // Create async.execute operation to dispatch half of the block range.
552 auto execute = b.create<ExecuteOp>(TypeRange(), ValueRange(), ValueRange(),
554 b.create<AddToGroupOp>(indexTy, execute.getToken(), group);
555 b.create<scf::YieldOp>(ValueRange({start, midIndex}));
567 b.create<func::CallOp>(computeFunc.func.getSymName(),
570 b.create<func::ReturnOp>(ValueRange());
588 Value c0 = b.create<arith::ConstantIndexOp>(0);
589 Value c1 = b.create<arith::ConstantIndexOp>(1);
605 b.create<arith::CmpIOp>(arith::CmpIPredicate::eq, blockCount, c1);
614 b.create<func::CallOp>(parallelComputeFunction.func.getSymName(),
617 b.create<scf::YieldOp>();
623 // Create an async.group to wait on all async tokens from the concurrent
626 Value groupSize = b.create<arith::SubIOp>(blockCount, c1);
627 Value group = b.create<CreateGroupOp>(GroupType::get(ctx), groupSize);
633 b.create<func::CallOp>(asyncDispatchFunction.getSymName(),
637 b.create<AwaitAllOp>(group);
639 b.create<scf::YieldOp>();
643 b.create<scf::IfOp>(isSingleBlock, syncDispatch, asyncDispatch);
657 Value c0 = b.create<arith::ConstantIndexOp>(0);
658 Value c1 = b.create<arith::ConstantIndexOp>(1);
660 // Create an async.group to wait on all async tokens from the concurrent
663 Value groupSize = b.create<arith::SubIOp>(blockCount, c1);
664 Value group = b.create<CreateGroupOp>(GroupType::get(ctx), groupSize);
691 executeBuilder.create<func::CallOp>(executeLoc, compute.getSymName(),
694 executeBuilder.create<async::YieldOp>(executeLoc, ValueRange());
697 // Create async.execute operation to launch parallel computate function.
698 auto execute = b.create<ExecuteOp>(TypeRange(), ValueRange(), ValueRange(),
700 b.create<AddToGroupOp>(rewriter.getIndexType(), execute.getToken(), group);
701 b.create<scf::YieldOp>();
705 b.create<scf::ForOp>(c1, blockCount, c1, ValueRange(), loopBuilder);
708 b.create<func::CallOp>(compute.getSymName(), compute.getResultTypes(),
712 b.create<AwaitAllOp>(group);
748 tripCount = b.create<arith::MulIOp>(tripCount, tripCounts[i]);
752 Value c0 = b.create<arith::ConstantIndexOp>(0);
754 b.create<arith::CmpIOp>(arith::CmpIPredicate::eq, tripCount, c0);
758 nestedBuilder.create<scf::YieldOp>(loc);
808 numWorkerThreadsVal = b.create<arith::ConstantIndexOp>(numWorkerThreads);
810 numWorkerThreadsVal = b.create<async::RuntimeNumWorkerThreadsOp>();
829 Value scalingFactor = b.create<arith::ConstantFloatOp>(
832 Value bracketBegin = b.create<arith::ConstantIndexOp>(p.first);
833 Value inBracket = b.create<arith::CmpIOp>(
835 Value bracketScalingFactor = b.create<arith::ConstantFloatOp>(
837 scalingFactor = b.create<arith::SelectOp>(inBracket, bracketScalingFactor,
841 b.create<arith::IndexCastOp>(b.getI32Type(), numWorkerThreadsVal);
843 b.create<arith::SIToFPOp>(b.getF32Type(), numWorkersIndex);
845 b.create<arith::MulFOp>(scalingFactor, numWorkersFloat);
847 b.create<arith::FPToSIOp>(b.getI32Type(), scaledNumWorkers);
849 b.create<arith::IndexCastOp>(b.getIndexType(), scaledNumInt);
851 Value maxComputeBlocks = b.create<arith::MaxSIOp>(
852 b.create<arith::ConstantIndexOp>(1), scaledWorkers);
858 Value bs0 = b.create<arith::CeilDivSIOp>(tripCount, maxComputeBlocks);
859 Value bs1 = b.create<arith::MaxSIOp>(bs0, minTaskSize);
860 Value blockSize = b.create<arith::MinSIOp>(tripCount, bs1);
866 // Create a parallel compute function that takes a block id and computes
870 Value blockCount = b.create<arith::CeilDivSIOp>(tripCount, blockSize);
879 b.create<scf::YieldOp>();
890 Value numIters = b.create<arith::ConstantIndexOp>(
892 Value alignedBlockSize = b.create<arith::MulIOp>(
893 b.create<arith::CeilDivSIOp>(blockSize, numIters), numIters);
896 b.create<scf::YieldOp>();
903 Value numIters = b.create<arith::ConstantIndexOp>(
905 Value useBlockAlignedComputeFn = b.create<arith::CmpIOp>(
908 b.create<scf::IfOp>(useBlockAlignedComputeFn, dispatchBlockAligned,
910 b.create<scf::YieldOp>();
917 b.create<scf::IfOp>(isZeroIterations, noOp, dispatch);
932 return builder.create<arith::ConstantIndexOp>(minTaskSize);