17a7eacc7SStephan Herhut //===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =//
27a7eacc7SStephan Herhut //
37a7eacc7SStephan Herhut // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47a7eacc7SStephan Herhut // See https://llvm.org/LICENSE.txt for license information.
57a7eacc7SStephan Herhut // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67a7eacc7SStephan Herhut //
77a7eacc7SStephan Herhut //===----------------------------------------------------------------------===//
87a7eacc7SStephan Herhut //
97a7eacc7SStephan Herhut // This file implements utilities to generate mappings for parallel loops to
107a7eacc7SStephan Herhut // GPU devices.
117a7eacc7SStephan Herhut //
127a7eacc7SStephan Herhut //===----------------------------------------------------------------------===//
137a7eacc7SStephan Herhut
14039b969bSMichele Scuttari #include "mlir/Dialect/GPU/Transforms/Passes.h"
1567d0d7acSMichele Scuttari
1667d0d7acSMichele Scuttari #include "mlir/Dialect/Func/IR/FuncOps.h"
1767d0d7acSMichele Scuttari #include "mlir/Dialect/GPU/IR/GPUDialect.h"
1867d0d7acSMichele Scuttari #include "mlir/Dialect/GPU/Transforms/ParallelLoopMapper.h"
198b68da2cSAlex Zinenko #include "mlir/Dialect/SCF/IR/SCF.h"
207a7eacc7SStephan Herhut #include "mlir/IR/AffineMap.h"
21bcf3d524SChristian Sigg
2246bb6613SMaheshRavishankar namespace mlir {
2367d0d7acSMichele Scuttari #define GEN_PASS_DEF_GPUMAPPARALLELLOOPSPASS
2467d0d7acSMichele Scuttari #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
2567d0d7acSMichele Scuttari } // namespace mlir
2667d0d7acSMichele Scuttari
2767d0d7acSMichele Scuttari namespace mlir {
2846bb6613SMaheshRavishankar
29bcf3d524SChristian Sigg using scf::ParallelOp;
3046bb6613SMaheshRavishankar
getMappingAttrName()31bcf3d524SChristian Sigg StringRef gpu::getMappingAttrName() { return "mapping"; }
32bcf3d524SChristian Sigg
337bdd3722SMogball LogicalResult
setMappingAttr(ParallelOp ploopOp,ArrayRef<ParallelLoopDimMappingAttr> mapping)347bdd3722SMogball gpu::setMappingAttr(ParallelOp ploopOp,
357bdd3722SMogball ArrayRef<ParallelLoopDimMappingAttr> mapping) {
3646bb6613SMaheshRavishankar // Verify that each processor is mapped to only once.
3746bb6613SMaheshRavishankar llvm::DenseSet<gpu::Processor> specifiedMappings;
3846bb6613SMaheshRavishankar for (auto dimAttr : mapping) {
397bdd3722SMogball gpu::Processor processor = dimAttr.getProcessor();
4046bb6613SMaheshRavishankar if (processor != gpu::Processor::Sequential &&
4146bb6613SMaheshRavishankar specifiedMappings.count(processor))
4246bb6613SMaheshRavishankar return ploopOp.emitError(
4346bb6613SMaheshRavishankar "invalid mapping multiple loops to same processor");
44*76e79b0bSThomas Preud'homme specifiedMappings.insert(processor);
4546bb6613SMaheshRavishankar }
4646bb6613SMaheshRavishankar ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size());
471ffc1aaaSChristian Sigg ploopOp->setAttr(getMappingAttrName(),
48c2c83e97STres Popp ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs));
4946bb6613SMaheshRavishankar return success();
5046bb6613SMaheshRavishankar }
5146bb6613SMaheshRavishankar
52bcf3d524SChristian Sigg namespace gpu {
537a7eacc7SStephan Herhut namespace {
547a7eacc7SStephan Herhut enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };
55bcf3d524SChristian Sigg } // namespace
567a7eacc7SStephan Herhut
577a7eacc7SStephan Herhut static constexpr int kNumHardwareIds = 3;
587a7eacc7SStephan Herhut
597a7eacc7SStephan Herhut /// Bounded increment on MappingLevel. Increments to the next
607a7eacc7SStephan Herhut /// level unless Sequential was already reached.
operator ++(MappingLevel & mappingLevel)61bcf3d524SChristian Sigg static MappingLevel &operator++(MappingLevel &mappingLevel) {
627a7eacc7SStephan Herhut if (mappingLevel < Sequential) {
637a7eacc7SStephan Herhut mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);
647a7eacc7SStephan Herhut }
657a7eacc7SStephan Herhut return mappingLevel;
667a7eacc7SStephan Herhut }
677a7eacc7SStephan Herhut
687a7eacc7SStephan Herhut /// Computed the hardware id to use for a given mapping level. Will
697a7eacc7SStephan Herhut /// assign x,y and z hardware ids for the first 3 dimensions and use
707a7eacc7SStephan Herhut /// sequential after.
719db53a18SRiver Riddle /// TODO: Make this use x for the inner-most loop that is
7246bb6613SMaheshRavishankar /// distributed to map to x, the next innermost to y and the next innermost to
7346bb6613SMaheshRavishankar /// z.
getHardwareIdForMapping(MappingLevel level,int dimension)74bcf3d524SChristian Sigg static Processor getHardwareIdForMapping(MappingLevel level, int dimension) {
7546bb6613SMaheshRavishankar
767a7eacc7SStephan Herhut if (dimension >= kNumHardwareIds || level == Sequential)
7746bb6613SMaheshRavishankar return Processor::Sequential;
7846bb6613SMaheshRavishankar switch (level) {
7946bb6613SMaheshRavishankar case MapGrid:
8046bb6613SMaheshRavishankar switch (dimension) {
8146bb6613SMaheshRavishankar case 0:
8246bb6613SMaheshRavishankar return Processor::BlockX;
8346bb6613SMaheshRavishankar case 1:
8446bb6613SMaheshRavishankar return Processor::BlockY;
8546bb6613SMaheshRavishankar case 2:
8646bb6613SMaheshRavishankar return Processor::BlockZ;
8746bb6613SMaheshRavishankar default:
8846bb6613SMaheshRavishankar return Processor::Sequential;
8946bb6613SMaheshRavishankar }
9046bb6613SMaheshRavishankar break;
9146bb6613SMaheshRavishankar case MapBlock:
9246bb6613SMaheshRavishankar switch (dimension) {
9346bb6613SMaheshRavishankar case 0:
9446bb6613SMaheshRavishankar return Processor::ThreadX;
9546bb6613SMaheshRavishankar case 1:
9646bb6613SMaheshRavishankar return Processor::ThreadY;
9746bb6613SMaheshRavishankar case 2:
9846bb6613SMaheshRavishankar return Processor::ThreadZ;
9946bb6613SMaheshRavishankar default:
10046bb6613SMaheshRavishankar return Processor::Sequential;
10146bb6613SMaheshRavishankar }
10246bb6613SMaheshRavishankar default:;
10346bb6613SMaheshRavishankar }
10446bb6613SMaheshRavishankar return Processor::Sequential;
1057a7eacc7SStephan Herhut }
1067a7eacc7SStephan Herhut
1077a7eacc7SStephan Herhut /// Add mapping information to the given parallel loop. Do not add
1087a7eacc7SStephan Herhut /// mapping information if the loop already has it. Also, don't
1097a7eacc7SStephan Herhut /// start a mapping at a nested loop.
mapParallelOp(ParallelOp parallelOp,MappingLevel mappingLevel=MapGrid)1107a7eacc7SStephan Herhut static void mapParallelOp(ParallelOp parallelOp,
1117a7eacc7SStephan Herhut MappingLevel mappingLevel = MapGrid) {
1127a7eacc7SStephan Herhut // Do not try to add a mapping to already mapped loops or nested loops.
1131ffc1aaaSChristian Sigg if (parallelOp->getAttr(getMappingAttrName()) ||
1140bf4a82aSChristian Sigg ((mappingLevel == MapGrid) && parallelOp->getParentOfType<ParallelOp>()))
1157a7eacc7SStephan Herhut return;
1167a7eacc7SStephan Herhut
1177a7eacc7SStephan Herhut MLIRContext *ctx = parallelOp.getContext();
1187a7eacc7SStephan Herhut Builder b(ctx);
1197bdd3722SMogball SmallVector<ParallelLoopDimMappingAttr, 4> attrs;
120c2d03e4eSAlexander Belyaev attrs.reserve(parallelOp.getNumLoops());
121c2d03e4eSAlexander Belyaev for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) {
1227bdd3722SMogball attrs.push_back(b.getAttr<ParallelLoopDimMappingAttr>(
12346bb6613SMaheshRavishankar getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(),
12446bb6613SMaheshRavishankar b.getDimIdentityMap()));
1257a7eacc7SStephan Herhut }
126e21adfa3SRiver Riddle (void)setMappingAttr(parallelOp, attrs);
1277a7eacc7SStephan Herhut ++mappingLevel;
1287a7eacc7SStephan Herhut // Parallel loop operations are immediately nested, so do not use
1297a7eacc7SStephan Herhut // walk but just iterate over the operations.
1307a7eacc7SStephan Herhut for (Operation &op : *parallelOp.getBody()) {
1317a7eacc7SStephan Herhut if (ParallelOp nested = dyn_cast<ParallelOp>(op))
1327a7eacc7SStephan Herhut mapParallelOp(nested, mappingLevel);
1337a7eacc7SStephan Herhut }
1347a7eacc7SStephan Herhut }
1357a7eacc7SStephan Herhut
136bcf3d524SChristian Sigg namespace {
137bcf3d524SChristian Sigg struct GpuMapParallelLoopsPass
13867d0d7acSMichele Scuttari : public impl::GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> {
runOnOperationmlir::gpu::__anon7c5d29460211::GpuMapParallelLoopsPass139bcf3d524SChristian Sigg void runOnOperation() override {
140bcf3d524SChristian Sigg for (Region ®ion : getOperation()->getRegions()) {
1417a7eacc7SStephan Herhut region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
1427a7eacc7SStephan Herhut }
143bcf3d524SChristian Sigg }
144bcf3d524SChristian Sigg };
145bcf3d524SChristian Sigg
146bcf3d524SChristian Sigg } // namespace
147bcf3d524SChristian Sigg } // namespace gpu
148bcf3d524SChristian Sigg } // namespace mlir
149bcf3d524SChristian Sigg
150bcf3d524SChristian Sigg std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>>
createGpuMapParallelLoopsPass()151bcf3d524SChristian Sigg mlir::createGpuMapParallelLoopsPass() {
152bcf3d524SChristian Sigg return std::make_unique<gpu::GpuMapParallelLoopsPass>();
153bcf3d524SChristian Sigg }
154