Lines Matching defs:barrier

210 /// async load for which there is not yet a barrier.
868 /// Build a tma load from global memory to shared memory using `barrier` to
873 TypedValue<nvgpu::MBarrierGroupType> barrier,
875 void buildBarrierArriveTx(TypedValue<nvgpu::MBarrierGroupType> barrier,
884 TypedValue<nvgpu::MBarrierGroupType> barrier);
886 void buildTryWaitParity(TypedValue<nvgpu::MBarrierGroupType> barrier);
895 TypedValue<nvgpu::MBarrierGroupType> barrier) {
911 OpFoldResult sz = buildTmaAsyncLoad(desc, shmem, barrier, loadOps);
914 // TODO: Note that cutlass predeclares the barrier arrive tx before the tma.async.load.
916 buildBarrierArriveTx(barrier, sizes);
923 buildBarrierArriveTx(barrier, getAsIndexOpFoldResult(rewriter.getContext(), 0));
939 Value barrier = rewriter.create<nvgpu::MBarrierCreateOp>(
944 loc, barrier, getValueOrCreateConstantIndexOp(rewriter, loc, numThreads),
947 return cast<TypedValue<nvgpu::MBarrierGroupType>>(barrier);
982 TypedValue<nvgpu::MBarrierGroupType> barrier,
987 loc, sharedMemref, barrier, globalDesc, ValueRange{zero, zero}, zero,
1002 TypedValue<nvgpu::MBarrierGroupType> barrier,
1013 rewriter.create<nvgpu::MBarrierArriveExpectTxOp>(loc, barrier, sizeVal, zero,
1018 TypedValue<nvgpu::MBarrierGroupType> barrier) {
1027 rewriter.create<nvgpu::MBarrierTryWaitParityOp>(loc, barrier, parity,
1051 // 1. Init a barrier object in shared memory.
1062 TypedValue<nvgpu::MBarrierGroupType> barrier =
1089 buildPredicateLoadsOnThread0(globalDescs, shmems, barrier);
1092 buildTryWaitParity(barrier);